taxonifi 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +3 -3
- data/.ruby-version +1 -1
- data/Gemfile.lock +18 -11
- data/lib/taxonifi/lumper.rb +59 -54
- data/lib/taxonifi/splitter/builder.rb +5 -2
- data/lib/taxonifi/version.rb +1 -1
- data/taxonifi.gemspec +6 -4
- data/test/test_lumper_names.rb +36 -10
- metadata +21 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db4b4d80d379ca1799481144f811c996353461ce352bab60a9d39c895165cd0a
|
4
|
+
data.tar.gz: 0f01744e01b482ac700a8ce4816aa7d7e679b36b0b6bb584898223bbed18132c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35e1cea4409390f2655ec9ba87f648cd8f5073d305c3a702a36797897f9863bdfac38d612cfd2ac10b966adee4a2e570e9190e2b737bf7d44e3c01fd9e4aff65
|
7
|
+
data.tar.gz: c8de99997d43fc409db58697c9060529c38b10d3cce06ffa3786ba0f31d6c67dd1da955640eda336e54ceefc33c4fdfa26b2a85aa74e4f19efcef1232e11e0c0
|
data/.github/workflows/ruby.yml
CHANGED
@@ -20,15 +20,15 @@ jobs:
|
|
20
20
|
strategy:
|
21
21
|
matrix:
|
22
22
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
23
|
-
ruby: [
|
23
|
+
ruby: [3.0, 3.4]
|
24
24
|
|
25
25
|
steps:
|
26
26
|
- uses: actions/checkout@v2
|
27
27
|
- name: Set up Ruby
|
28
28
|
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
29
29
|
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
30
|
-
|
31
|
-
|
30
|
+
uses: ruby/setup-ruby@v1
|
31
|
+
# uses: ruby/setup-ruby@fdcfbcf14ec9672f6f615cb9589a1bc5dd69d262
|
32
32
|
with:
|
33
33
|
ruby-version: ${{ matrix.ruby }}
|
34
34
|
- name: Install dependencies
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.4.4
|
data/Gemfile.lock
CHANGED
@@ -1,38 +1,45 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
taxonifi (0.6.
|
4
|
+
taxonifi (0.6.1)
|
5
|
+
csv (~> 3.3.5)
|
5
6
|
require_all (~> 3.0)
|
6
7
|
|
7
8
|
GEM
|
8
9
|
remote: https://rubygems.org/
|
9
10
|
specs:
|
10
11
|
awesome_print (1.9.2)
|
11
|
-
builder (3.
|
12
|
+
builder (3.3.0)
|
12
13
|
byebug (11.1.3)
|
13
|
-
|
14
|
-
|
14
|
+
csv (3.3.5)
|
15
|
+
date (3.4.1)
|
16
|
+
erb (5.0.2)
|
17
|
+
power_assert (2.0.5)
|
18
|
+
psych (5.2.6)
|
19
|
+
date
|
15
20
|
stringio
|
16
|
-
rake (13.0
|
17
|
-
rdoc (6.
|
21
|
+
rake (13.3.0)
|
22
|
+
rdoc (6.14.2)
|
23
|
+
erb
|
18
24
|
psych (>= 4.0.0)
|
19
25
|
require_all (3.0.0)
|
20
|
-
stringio (3.
|
21
|
-
test-unit (3.
|
26
|
+
stringio (3.1.7)
|
27
|
+
test-unit (3.7.0)
|
22
28
|
power_assert
|
23
29
|
|
24
30
|
PLATFORMS
|
25
31
|
ruby
|
32
|
+
x86_64-darwin-24
|
26
33
|
|
27
34
|
DEPENDENCIES
|
28
35
|
awesome_print (~> 1.8)
|
29
36
|
builder (~> 3.2)
|
30
|
-
bundler (~> 2.
|
37
|
+
bundler (~> 2.7)
|
31
38
|
byebug (~> 11)
|
32
39
|
rake (~> 13.0)
|
33
40
|
rdoc (~> 6.2)
|
34
41
|
taxonifi!
|
35
|
-
test-unit (~> 3.
|
42
|
+
test-unit (~> 3.5)
|
36
43
|
|
37
44
|
BUNDLED WITH
|
38
|
-
2.
|
45
|
+
2.7.1
|
data/lib/taxonifi/lumper.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# The lumper lumps! Tools for recognizing and using
|
2
|
-
# combinations of column types.
|
3
|
-
module Taxonifi::Lumper
|
1
|
+
# The lumper lumps! Tools for recognizing and using
|
2
|
+
# combinations of column types.
|
3
|
+
module Taxonifi::Lumper
|
4
4
|
# Define groups of columns/fields and include
|
5
5
|
# functionality to determine whether your
|
6
6
|
# columns match a given set.
|
@@ -13,8 +13,8 @@ module Taxonifi::Lumper
|
|
13
13
|
# !! Todo: map DwC URIs to these labels (at present they largely correllate with Tokens,
|
14
14
|
# perhaps map URIs to tokens!?)
|
15
15
|
QUAD = ['genus', 'subgenus', 'species', 'subspecies']
|
16
|
-
|
17
|
-
# Columns representing author and year
|
16
|
+
|
17
|
+
# Columns representing author and year
|
18
18
|
AUTHOR_YEAR = ['author', 'year']
|
19
19
|
|
20
20
|
# A Hash of named column combinations
|
@@ -33,15 +33,15 @@ module Taxonifi::Lumper
|
|
33
33
|
|
34
34
|
# Authors, Year, Title, Publication, Volume_Number Pages Cited_Page
|
35
35
|
|
36
|
-
# Lumps for which all columns are represented
|
37
|
-
# TODO: This is really an assessor method
|
36
|
+
# Lumps for which all columns are represented
|
37
|
+
# TODO: This is really an assessor method
|
38
38
|
def self.available_lumps(columns)
|
39
39
|
raise Taxonifi::Lumper::LumperError, 'Array not passed to Lumper.available_lumps.' if !(columns.class == Array)
|
40
40
|
LUMPS.keys.select{|k| (LUMPS[k] - columns) == []}
|
41
41
|
end
|
42
42
|
|
43
|
-
# Lumps for which any column is represented
|
44
|
-
# # TODO: This is really an assessor method
|
43
|
+
# Lumps for which any column is represented
|
44
|
+
# # TODO: This is really an assessor method
|
45
45
|
def self.intersecting_lumps(columns)
|
46
46
|
raise Taxonifi::Lumper::LumperError, 'Array not passed to Lumper.intersecting_lumps.' if !(columns.class == Array)
|
47
47
|
intersections = []
|
@@ -51,7 +51,7 @@ module Taxonifi::Lumper
|
|
51
51
|
intersections
|
52
52
|
end
|
53
53
|
|
54
|
-
|
54
|
+
|
55
55
|
# return [Taxonifi::Model::NameCollection] from a csv file.
|
56
56
|
def self.create_name_collection(options = {})
|
57
57
|
opts = {
|
@@ -59,24 +59,24 @@ module Taxonifi::Lumper
|
|
59
59
|
:initial_id => 0,
|
60
60
|
:capture_related_fields => true # Stores other column values in (column_header => value) pairs in Name#properties
|
61
61
|
}.merge!(options)
|
62
|
-
|
62
|
+
|
63
63
|
csv = opts[:csv]
|
64
64
|
raise Taxonifi::Lumper::LumperError, 'Something that is not a CSV::Table was passed to Lumper.create_name_collection.' if csv.class != CSV::Table
|
65
|
-
|
65
|
+
|
66
66
|
nc = Taxonifi::Model::NameCollection.new(:initial_id => opts[:initial_id])
|
67
67
|
row_size = csv.size
|
68
68
|
|
69
69
|
# The row index contains a vector of parent ids like
|
70
70
|
# [0, 4, 29]
|
71
71
|
# This implies that Name with #id 29 has Parent with #id 4
|
72
|
-
# Initialize an empty index.
|
72
|
+
# Initialize an empty index.
|
73
73
|
row_index = Taxonifi::Utils::Array.build_array_of_empty_arrays(row_size)
|
74
74
|
|
75
75
|
# The name_index keeps track of unique name per rank like
|
76
76
|
# :genus => {'Foo' => [0,2]}
|
77
77
|
# This says that "Foo" is instantiated two times in the
|
78
78
|
# name collection, with id 0, and id 2.
|
79
|
-
name_index = {} # Taxonifi::Lumper::NameIndex.new # {}
|
79
|
+
name_index = {} # Taxonifi::Lumper::NameIndex.new # {}
|
80
80
|
|
81
81
|
has_ref_fields = ([:citation_basic, :citation_small] & Taxonifi::Lumper.intersecting_lumps(csv.headers)).size > 0
|
82
82
|
unused_fields = csv.headers - Taxonifi::Lumper::LUMPS[:names]
|
@@ -87,21 +87,21 @@ module Taxonifi::Lumper
|
|
87
87
|
name_index[rank] = {}
|
88
88
|
csv.each_with_index do |row, i|
|
89
89
|
shares_rank = (rank == Taxonifi::Assessor::RowAssessor.lump_name_rank(row).to_s)
|
90
|
-
name = row[rank]
|
90
|
+
name = row[rank]
|
91
91
|
|
92
92
|
if !name.nil? # cell has data
|
93
93
|
n = nil # a Name if necessary
|
94
|
-
name_id = nil # index the new or existing Name
|
94
|
+
name_id = nil # index the new or existing Name
|
95
95
|
|
96
96
|
exists = false
|
97
97
|
if name_index[rank][name] # A matching name (String) has been previously added
|
98
98
|
name_index[rank][name].each do |id|
|
99
99
|
# Compare vectors of parent_ids for name presence
|
100
|
-
if nc.parent_id_vector(id) == row_index[i]
|
100
|
+
if nc.parent_id_vector(id) == row_index[i]
|
101
101
|
exists = true
|
102
102
|
name_id = id
|
103
|
-
break
|
104
|
-
end
|
103
|
+
break
|
104
|
+
end
|
105
105
|
end
|
106
106
|
end # end name exists
|
107
107
|
|
@@ -110,22 +110,27 @@ module Taxonifi::Lumper
|
|
110
110
|
unused_data = row.to_hash.select{|f| unused_fields.include?(f)}
|
111
111
|
row_identifier = (row['identifier'] ? row['identifier'] : i)
|
112
112
|
|
113
|
-
# Populate the new name if created. Previously matched names are not effected.
|
114
|
-
if !n.nil?
|
113
|
+
# Populate the new name if created. Previously matched names are not effected.
|
114
|
+
if !n.nil?
|
115
115
|
n.rank = rank
|
116
116
|
n.name = name
|
117
|
-
n.parent = nc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
|
117
|
+
n.parent = nc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
|
118
118
|
n.row_number = i
|
119
119
|
|
120
120
|
# Name/year needs to be standardized / cased out
|
121
121
|
# headers are overlapping at times
|
122
122
|
|
123
123
|
# Check to see if metadata (e.g. author year) apply to this rank, attach if so.
|
124
|
-
if shares_rank
|
125
|
-
if row['author_year']
|
126
|
-
|
124
|
+
if shares_rank
|
125
|
+
if row['author_year']
|
126
|
+
begin
|
127
|
+
builder = Taxonifi::Splitter::Builder.build_author_year(row['author_year'])
|
128
|
+
rescue Taxonifi::Splitter::SplitterError => e
|
129
|
+
# Map i to user's row number: +1 for 1-based, +1 to account for header row.
|
130
|
+
raise LumperError, "Failed to parse author_year string '#{row['author_year']}' in row #{i + 2}", e.backtrace
|
131
|
+
end
|
127
132
|
n.authors = builder.people # was author!?
|
128
|
-
n.year = builder.year
|
133
|
+
n.year = builder.year
|
129
134
|
n.parens = builder.parens
|
130
135
|
end
|
131
136
|
|
@@ -135,29 +140,29 @@ module Taxonifi::Lumper
|
|
135
140
|
|
136
141
|
name_id = nc.add_object(n).id
|
137
142
|
name_index[rank][name] ||= []
|
138
|
-
name_index[rank][name].push name_id
|
143
|
+
name_index[rank][name].push name_id
|
139
144
|
|
140
145
|
$DEBUG && $stderr.puts("added #{nc.collection.size - 1} | #{n.name} | #{n.rank} | #{n.parent ? n.parent.name : '-'} | #{n.parent ? n.parent.id : '-'}")
|
141
146
|
else
|
142
147
|
$DEBUG && $stderr.puts("already present #{rank} | #{name}")
|
143
|
-
if shares_rank
|
144
|
-
# original::
|
148
|
+
if shares_rank
|
149
|
+
# original::
|
145
150
|
nc.add_duplicate_entry_metadata(name_id, row_identifier, unused_data)
|
146
151
|
|
147
|
-
# hack
|
148
|
-
# nc.add_duplicate_entry_metadata(name_id, row_identifier, row.to_hash)
|
149
|
-
|
152
|
+
# hack
|
153
|
+
# nc.add_duplicate_entry_metadata(name_id, row_identifier, row.to_hash)
|
154
|
+
|
150
155
|
end
|
151
156
|
end
|
152
157
|
|
153
158
|
# build a by row vector of parent child relationships
|
154
|
-
row_index[i].push name_id
|
159
|
+
row_index[i].push name_id
|
155
160
|
end # end cell has data
|
156
161
|
|
157
162
|
end
|
158
163
|
end
|
159
164
|
nc
|
160
|
-
end
|
165
|
+
end
|
161
166
|
|
162
167
|
# return [Taxonifi::Model::RefCollection] from a CSV file.
|
163
168
|
def self.create_ref_collection(options = {})
|
@@ -180,7 +185,7 @@ module Taxonifi::Lumper
|
|
180
185
|
:year => row['year'],
|
181
186
|
:title => row['title'],
|
182
187
|
:publication => row['publication']
|
183
|
-
)
|
188
|
+
)
|
184
189
|
|
185
190
|
# TODO: break out each of these lexes to a builder
|
186
191
|
if row['authors'] && !row['authors'].empty?
|
@@ -216,16 +221,16 @@ module Taxonifi::Lumper
|
|
216
221
|
r.pages = row['pages']
|
217
222
|
end
|
218
223
|
end
|
219
|
-
|
224
|
+
|
220
225
|
r.add_properties(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
|
221
226
|
|
222
227
|
# Do some indexing.
|
223
|
-
ref_str = r.compact_string
|
228
|
+
ref_str = r.compact_string
|
224
229
|
if !ref_index.keys.include?(ref_str)
|
225
230
|
ref_id = rc.add_object(r).id
|
226
231
|
ref_index.merge!(ref_str => ref_id)
|
227
232
|
# puts "#{i} : #{ref_id}"
|
228
|
-
rc.row_index[i] = r
|
233
|
+
rc.row_index[i] = r
|
229
234
|
else
|
230
235
|
rc.row_index[i] = rc.object_by_id(ref_index[ref_str])
|
231
236
|
# puts "#{i} : #{ref_index[ref_str]}"
|
@@ -267,9 +272,9 @@ module Taxonifi::Lumper
|
|
267
272
|
name = row[rank]
|
268
273
|
if !name.nil? && !name.empty? # cell has data
|
269
274
|
o = nil # a Name if necessary
|
270
|
-
name_id = nil # index the new or existing name
|
275
|
+
name_id = nil # index the new or existing name
|
271
276
|
|
272
|
-
if name_index[rank][name] # Matching name is found
|
277
|
+
if name_index[rank][name] # Matching name is found
|
273
278
|
|
274
279
|
exists = false
|
275
280
|
name_index[rank][name].each do |id|
|
@@ -287,25 +292,25 @@ module Taxonifi::Lumper
|
|
287
292
|
o = Taxonifi::Model::GenericObject.new()
|
288
293
|
end
|
289
294
|
|
290
|
-
if !o.nil?
|
295
|
+
if !o.nil?
|
291
296
|
o.name = name
|
292
297
|
o.rank = rank
|
293
298
|
o.row_number = i
|
294
|
-
o.parent = c.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
|
299
|
+
o.parent = c.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
|
295
300
|
|
296
|
-
name_id = c.add_object(o).id
|
301
|
+
name_id = c.add_object(o).id
|
297
302
|
name_index[rank][name] ||= []
|
298
|
-
name_index[rank][name].push name_id
|
303
|
+
name_index[rank][name].push name_id
|
299
304
|
end
|
300
305
|
|
301
|
-
row_index[i].push name_id
|
306
|
+
row_index[i].push name_id
|
302
307
|
end
|
303
308
|
end
|
304
309
|
end
|
305
310
|
c
|
306
311
|
end
|
307
312
|
|
308
|
-
# Return a geog collection from a csv file.
|
313
|
+
# Return a geog collection from a csv file.
|
309
314
|
def self.create_geog_collection(csv)
|
310
315
|
raise Taxonifi::Lumper::LumperError, 'Something that is not a CSV::Table was passed to Lumper.create_geog_collection.' if csv.class != CSV::Table
|
311
316
|
gc = Taxonifi::Model::GeogCollection.new
|
@@ -320,7 +325,7 @@ module Taxonifi::Lumper
|
|
320
325
|
end
|
321
326
|
|
322
327
|
# We don't have the same problems as with taxon names, i.e.
|
323
|
-
# boo in
|
328
|
+
# boo in
|
324
329
|
# Foo nil boo
|
325
330
|
# Foo bar boo
|
326
331
|
# is the same thing wrt geography, not the case for taxon names.
|
@@ -331,28 +336,28 @@ module Taxonifi::Lumper
|
|
331
336
|
name = row[level]
|
332
337
|
if !name.nil? && !name.empty? # cell has data
|
333
338
|
g = nil # a Name if necessary
|
334
|
-
name_id = nil # index the new or existing name
|
339
|
+
name_id = nil # index the new or existing name
|
335
340
|
|
336
341
|
if name_index[level][name] # name exists
|
337
|
-
name_id = name_index[level][name]
|
342
|
+
name_id = name_index[level][name]
|
338
343
|
else
|
339
344
|
g = Taxonifi::Model::Geog.new()
|
340
345
|
name_id = gc.add_object(g).id
|
341
346
|
end
|
342
347
|
|
343
|
-
if !g.nil?
|
348
|
+
if !g.nil?
|
344
349
|
g.name = name
|
345
350
|
g.rank = level
|
346
|
-
g.parent = gc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
|
351
|
+
g.parent = gc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
|
347
352
|
end
|
348
353
|
|
349
354
|
name_index[level][name] = name_id
|
350
|
-
row_index[i].push name_id
|
355
|
+
row_index[i].push name_id
|
351
356
|
end
|
352
357
|
end
|
353
358
|
end
|
354
359
|
gc
|
355
|
-
end
|
360
|
+
end
|
356
361
|
|
357
|
-
end # end Lumper Module
|
362
|
+
end # end Lumper Module
|
358
363
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Builder functionality for parsing/lexing framework.
|
1
|
+
# Builder functionality for parsing/lexing framework.
|
2
2
|
module Taxonifi::Splitter::Builder
|
3
3
|
|
4
4
|
# Load all builders (= models)
|
@@ -7,8 +7,11 @@ module Taxonifi::Splitter::Builder
|
|
7
7
|
|
8
8
|
# Build and return Taxonifi::Model::AuthorYear from a string.
|
9
9
|
def self.build_author_year(text)
|
10
|
-
|
10
|
+
text = text&.strip
|
11
11
|
builder = Taxonifi::Model::AuthorYear.new
|
12
|
+
return builder if text.nil? || text.empty?
|
13
|
+
|
14
|
+
lexer = Taxonifi::Splitter::Lexer.new(text)
|
12
15
|
Taxonifi::Splitter::Parser.new(lexer, builder).parse_author_year
|
13
16
|
builder
|
14
17
|
end
|
data/lib/taxonifi/version.rb
CHANGED
data/taxonifi.gemspec
CHANGED
@@ -25,17 +25,19 @@ Gem::Specification.new do |s|
|
|
25
25
|
s.homepage = "https://github.com/SpeciesFile/taxonifi"
|
26
26
|
s.licenses = ["MIT"]
|
27
27
|
s.require_paths = ["lib"]
|
28
|
-
s.rubygems_version = "2.
|
28
|
+
s.rubygems_version = "3.2.22"
|
29
29
|
s.metadata = { "source_code_uri" => "https://github.com/SpeciesFileGroup/taxonifi" }
|
30
30
|
|
31
31
|
s.add_dependency "require_all", "~> 3.0"
|
32
|
-
s.
|
32
|
+
s.add_dependency "csv", "~> 3.3.5"
|
33
|
+
|
34
|
+
s.required_ruby_version = '>= 3.0', '< 4'
|
33
35
|
|
34
36
|
s.add_development_dependency "rake", '~> 13.0'
|
35
37
|
s.add_development_dependency "byebug", "~> 11"
|
36
|
-
s.add_development_dependency "bundler", "~> 2.
|
38
|
+
s.add_development_dependency "bundler", "~> 2.7"
|
37
39
|
s.add_development_dependency 'awesome_print', '~> 1.8'
|
38
|
-
s.add_development_dependency 'test-unit', '~> 3.
|
40
|
+
s.add_development_dependency 'test-unit', '~> 3.5'
|
39
41
|
s.add_development_dependency "rdoc", "~> 6.2"
|
40
42
|
s.add_development_dependency "builder", "~> 3.2"
|
41
43
|
|
data/test/test_lumper_names.rb
CHANGED
@@ -25,7 +25,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
25
25
|
def test_available_lumps
|
26
26
|
assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::QUAD ).include?(:quadrinomial)
|
27
27
|
assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:quad_author_year)
|
28
|
-
assert !Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:names)
|
28
|
+
assert !Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:names)
|
29
29
|
end
|
30
30
|
|
31
31
|
def test_create_name_collection_creates_a_name_collection
|
@@ -49,7 +49,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
49
49
|
assert_equal 0, nc.collection.first.row_number
|
50
50
|
assert_equal 0, nc.collection.last.row_number
|
51
51
|
end
|
52
|
-
|
52
|
+
|
53
53
|
def test_that_create_name_collection_parentifies
|
54
54
|
nc = Taxonifi::Lumper.create_name_collection(:csv => @csv)
|
55
55
|
assert_equal nc.collection[0], nc.collection[1].parent
|
@@ -87,18 +87,18 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
87
87
|
csv << ["Fooidae", "Foo", "bar", "Smith, 1854"]
|
88
88
|
csv << ["Fooidae", "Foo", "foo", "(Smith and Jones, 1854)"]
|
89
89
|
end
|
90
|
-
|
90
|
+
|
91
91
|
# 0 Fooidae
|
92
92
|
# 1 Foo
|
93
93
|
# 2 bar
|
94
|
-
# 3 foo
|
94
|
+
# 3 foo
|
95
95
|
|
96
96
|
csv = CSV.parse(string, headers: true)
|
97
97
|
nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
|
98
98
|
assert_equal 2, nc.collection[3].authors.size
|
99
99
|
assert_equal 'Smith', nc.collection[3].authors.first.last_name
|
100
100
|
assert_equal 1854, nc.collection[3].year
|
101
|
-
|
101
|
+
|
102
102
|
assert_equal 'Smith, 1854', nc.collection[2].author_year
|
103
103
|
assert_equal 'Smith', nc.collection[2].author_with_parens
|
104
104
|
|
@@ -115,12 +115,38 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
115
115
|
assert_equal true, nc.collection[3].parens
|
116
116
|
end
|
117
117
|
|
118
|
+
def test_that_create_a_name_collection_ignores_whitespace_author_year
|
119
|
+
string = CSV.generate() do |csv|
|
120
|
+
csv << %w{family genus species author_year}
|
121
|
+
csv << ["Fooidae", "Foo", "bar", " "]
|
122
|
+
end
|
123
|
+
|
124
|
+
csv = CSV.parse(string, headers: true)
|
125
|
+
nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
|
126
|
+
assert_equal 0, nc.collection[2].authors.size
|
127
|
+
assert_equal nil, nc.collection[2].author_year
|
128
|
+
end
|
129
|
+
|
130
|
+
def test_that_create_a_name_collection_throws_lumper_error_on_bad_author_year
|
131
|
+
string = CSV.generate() do |csv|
|
132
|
+
csv << %w{family genus species author_year}
|
133
|
+
csv << ["Fooidae", "Foo", "bar", ")Smith)"]
|
134
|
+
end
|
135
|
+
|
136
|
+
csv = CSV.parse(string, headers: true)
|
137
|
+
e = assert_raises Taxonifi::Lumper::LumperError do
|
138
|
+
Taxonifi::Lumper.create_name_collection(:csv => csv)
|
139
|
+
end
|
140
|
+
|
141
|
+
assert_match("')Smith)' in row 2", e.message)
|
142
|
+
end
|
143
|
+
|
118
144
|
def test_that_create_a_name_collection_handles_related_columns
|
119
145
|
string = CSV.generate() do |csv|
|
120
146
|
csv << %w{family genus species author_year foo bar Stuff}
|
121
147
|
csv << ["Fooidae", "Foo", "bar", "Smith, 1854" , nil, 1 , "one"]
|
122
148
|
end
|
123
|
-
|
149
|
+
|
124
150
|
# 0 Fooidae
|
125
151
|
# 1 Foo
|
126
152
|
# 2 bar
|
@@ -145,9 +171,9 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
145
171
|
# 1 Foo
|
146
172
|
# 2 bar
|
147
173
|
# 3 foo
|
148
|
-
# 4 varblorf
|
149
|
-
# 5 varblorf
|
150
|
-
# 6 varbliff
|
174
|
+
# 4 varblorf
|
175
|
+
# 5 varblorf
|
176
|
+
# 6 varbliff
|
151
177
|
|
152
178
|
csv = CSV.parse(string, headers: true)
|
153
179
|
nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
|
@@ -176,5 +202,5 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
|
|
176
202
|
|
177
203
|
#--- reference collections
|
178
204
|
|
179
|
-
end
|
205
|
+
end
|
180
206
|
|
metadata
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: taxonifi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Yoder
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
10
|
date: 2013-03-27 00:00:00.000000000 Z
|
@@ -24,6 +23,20 @@ dependencies:
|
|
24
23
|
- - "~>"
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '3.0'
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: csv
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 3.3.5
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 3.3.5
|
27
40
|
- !ruby/object:Gem::Dependency
|
28
41
|
name: rake
|
29
42
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,14 +71,14 @@ dependencies:
|
|
58
71
|
requirements:
|
59
72
|
- - "~>"
|
60
73
|
- !ruby/object:Gem::Version
|
61
|
-
version: '2.
|
74
|
+
version: '2.7'
|
62
75
|
type: :development
|
63
76
|
prerelease: false
|
64
77
|
version_requirements: !ruby/object:Gem::Requirement
|
65
78
|
requirements:
|
66
79
|
- - "~>"
|
67
80
|
- !ruby/object:Gem::Version
|
68
|
-
version: '2.
|
81
|
+
version: '2.7'
|
69
82
|
- !ruby/object:Gem::Dependency
|
70
83
|
name: awesome_print
|
71
84
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,14 +99,14 @@ dependencies:
|
|
86
99
|
requirements:
|
87
100
|
- - "~>"
|
88
101
|
- !ruby/object:Gem::Version
|
89
|
-
version: 3.
|
102
|
+
version: '3.5'
|
90
103
|
type: :development
|
91
104
|
prerelease: false
|
92
105
|
version_requirements: !ruby/object:Gem::Requirement
|
93
106
|
requirements:
|
94
107
|
- - "~>"
|
95
108
|
- !ruby/object:Gem::Version
|
96
|
-
version: 3.
|
109
|
+
version: '3.5'
|
97
110
|
- !ruby/object:Gem::Dependency
|
98
111
|
name: rdoc
|
99
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -207,7 +220,6 @@ licenses:
|
|
207
220
|
- MIT
|
208
221
|
metadata:
|
209
222
|
source_code_uri: https://github.com/SpeciesFileGroup/taxonifi
|
210
|
-
post_install_message:
|
211
223
|
rdoc_options: []
|
212
224
|
require_paths:
|
213
225
|
- lib
|
@@ -215,7 +227,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
215
227
|
requirements:
|
216
228
|
- - ">="
|
217
229
|
- !ruby/object:Gem::Version
|
218
|
-
version: '
|
230
|
+
version: '3.0'
|
219
231
|
- - "<"
|
220
232
|
- !ruby/object:Gem::Version
|
221
233
|
version: '4'
|
@@ -225,8 +237,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
225
237
|
- !ruby/object:Gem::Version
|
226
238
|
version: '0'
|
227
239
|
requirements: []
|
228
|
-
rubygems_version: 3.
|
229
|
-
signing_key:
|
240
|
+
rubygems_version: 3.7.1
|
230
241
|
specification_version: 4
|
231
242
|
summary: A general purpose framework for scripted handling of taxonomic names or other
|
232
243
|
heirarchical metadata.
|