taxonifi 0.5.5 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1aea1ca5d3bba09864edf09cbc5430ac58f6605a9247371c8245e63220b8a34
4
- data.tar.gz: c1007d26c853d41b9042410d3f206779ff70bcd7a5fbf0073709d33c6577d30e
3
+ metadata.gz: db4b4d80d379ca1799481144f811c996353461ce352bab60a9d39c895165cd0a
4
+ data.tar.gz: 0f01744e01b482ac700a8ce4816aa7d7e679b36b0b6bb584898223bbed18132c
5
5
  SHA512:
6
- metadata.gz: 4ea36b3f86d07ebb09b860fe64989657b5dff8ad42b0cf66a1f4884497f5d96b8cd72a285d47808b5420ee0d7b838fcfd25e49914646912e31887da9ec3a2b26
7
- data.tar.gz: f061c76e564eb3be9ce29fa47bc6874c3e89eb18d0ba4bcaabf0fc576126cf55e8843a7a00730359b217f8a9e6e1e942b0fb7a6ac1b723cc7a8adee55d43ec12
6
+ metadata.gz: 35e1cea4409390f2655ec9ba87f648cd8f5073d305c3a702a36797897f9863bdfac38d612cfd2ac10b966adee4a2e570e9190e2b737bf7d44e3c01fd9e4aff65
7
+ data.tar.gz: c8de99997d43fc409db58697c9060529c38b10d3cce06ffa3786ba0f31d6c67dd1da955640eda336e54ceefc33c4fdfa26b2a85aa74e4f19efcef1232e11e0c0
@@ -0,0 +1,41 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ master, ruby3 ]
13
+ pull_request:
14
+ branches: [ master ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ${{ matrix.os }}
20
+ strategy:
21
+ matrix:
22
+ os: [ubuntu-latest, macos-latest, windows-latest]
23
+ ruby: [3.0, 3.4]
24
+
25
+ steps:
26
+ - uses: actions/checkout@v2
27
+ - name: Set up Ruby
28
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
29
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
30
+ uses: ruby/setup-ruby@v1
31
+ # uses: ruby/setup-ruby@fdcfbcf14ec9672f6f615cb9589a1bc5dd69d262
32
+ with:
33
+ ruby-version: ${{ matrix.ruby }}
34
+ - name: Install dependencies
35
+ run: bundle install
36
+ - name: Show environment
37
+ run: gem environment
38
+ - name: Run tests
39
+ run: bundle exec rake
40
+ - name: Checking install works
41
+ run: rake install
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.5
1
+ 3.4.4
data/Gemfile.lock CHANGED
@@ -1,34 +1,45 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- taxonifi (0.5.5)
4
+ taxonifi (0.6.1)
5
+ csv (~> 3.3.5)
5
6
  require_all (~> 3.0)
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
9
10
  specs:
10
- awesome_print (1.8.0)
11
- builder (3.2.4)
12
- byebug (11.1.1)
13
- power_assert (1.1.5)
14
- rake (13.0.1)
15
- rdoc (6.2.1)
11
+ awesome_print (1.9.2)
12
+ builder (3.3.0)
13
+ byebug (11.1.3)
14
+ csv (3.3.5)
15
+ date (3.4.1)
16
+ erb (5.0.2)
17
+ power_assert (2.0.5)
18
+ psych (5.2.6)
19
+ date
20
+ stringio
21
+ rake (13.3.0)
22
+ rdoc (6.14.2)
23
+ erb
24
+ psych (>= 4.0.0)
16
25
  require_all (3.0.0)
17
- test-unit (3.3.5)
26
+ stringio (3.1.7)
27
+ test-unit (3.7.0)
18
28
  power_assert
19
29
 
20
30
  PLATFORMS
21
31
  ruby
32
+ x86_64-darwin-24
22
33
 
23
34
  DEPENDENCIES
24
35
  awesome_print (~> 1.8)
25
36
  builder (~> 3.2)
26
- bundler (~> 2.1)
37
+ bundler (~> 2.7)
27
38
  byebug (~> 11)
28
39
  rake (~> 13.0)
29
40
  rdoc (~> 6.2)
30
41
  taxonifi!
31
- test-unit (~> 3.3.5)
42
+ test-unit (~> 3.5)
32
43
 
33
44
  BUNDLED WITH
34
- 2.1.4
45
+ 2.7.1
data/README.md CHANGED
@@ -112,11 +112,11 @@ The following is an example that translates a DwC style input format as exported
112
112
  require 'taxonifi'
113
113
  file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/Lygaeoidea-csv.tsv'))
114
114
 
115
- csv = CSV.read(file, {
115
+ csv = CSV.read(file,
116
116
  headers: true,
117
117
  col_sep: "\t",
118
118
  header_converters: :downcase
119
- } )
119
+ )
120
120
 
121
121
  nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(csv)
122
122
  e = Taxonifi::Export::SpeciesFile.new(:nc => nc, :authorized_user_id => 1)
@@ -131,10 +131,10 @@ Reading files
131
131
  taxonifi feeds on Ruby's CSV. read your files with header true, and downcased, e.g.:
132
132
 
133
133
  ```
134
- csv = CSV.read('input/my_data.tab', {
134
+ csv = CSV.read('input/my_data.tab',
135
135
  headers: true,
136
136
  header_converters: :downcase,
137
- col_sep: "\t" } )
137
+ col_sep: "\t")
138
138
  ```
139
139
 
140
140
  # Code organization
@@ -79,6 +79,7 @@ module Taxonifi
79
79
  # this far? bad
80
80
  # raise RowAssessor::RowAssessorError
81
81
 
82
+ # TODO: add an error message that can be relayed on here and elsewhere
82
83
  raise RowAssessorError
83
84
  end
84
85
 
@@ -1,6 +1,6 @@
1
- # The lumper lumps! Tools for recognizing and using
2
- # combinations of column types.
3
- module Taxonifi::Lumper
1
+ # The lumper lumps! Tools for recognizing and using
2
+ # combinations of column types.
3
+ module Taxonifi::Lumper
4
4
  # Define groups of columns/fields and include
5
5
  # functionality to determine whether your
6
6
  # columns match a given set.
@@ -13,8 +13,8 @@ module Taxonifi::Lumper
13
13
  # !! Todo: map DwC URIs to these labels (at present they largely correllate with Tokens,
14
14
  # perhaps map URIs to tokens!?)
15
15
  QUAD = ['genus', 'subgenus', 'species', 'subspecies']
16
-
17
- # Columns representing author and year
16
+
17
+ # Columns representing author and year
18
18
  AUTHOR_YEAR = ['author', 'year']
19
19
 
20
20
  # A Hash of named column combinations
@@ -33,15 +33,15 @@ module Taxonifi::Lumper
33
33
 
34
34
  # Authors, Year, Title, Publication, Volume_Number Pages Cited_Page
35
35
 
36
- # Lumps for which all columns are represented
37
- # TODO: This is really an assessor method
36
+ # Lumps for which all columns are represented
37
+ # TODO: This is really an assessor method
38
38
  def self.available_lumps(columns)
39
39
  raise Taxonifi::Lumper::LumperError, 'Array not passed to Lumper.available_lumps.' if !(columns.class == Array)
40
40
  LUMPS.keys.select{|k| (LUMPS[k] - columns) == []}
41
41
  end
42
42
 
43
- # Lumps for which any column is represented
44
- # # TODO: This is really an assessor method
43
+ # Lumps for which any column is represented
44
+ # # TODO: This is really an assessor method
45
45
  def self.intersecting_lumps(columns)
46
46
  raise Taxonifi::Lumper::LumperError, 'Array not passed to Lumper.intersecting_lumps.' if !(columns.class == Array)
47
47
  intersections = []
@@ -51,7 +51,7 @@ module Taxonifi::Lumper
51
51
  intersections
52
52
  end
53
53
 
54
-
54
+
55
55
  # return [Taxonifi::Model::NameCollection] from a csv file.
56
56
  def self.create_name_collection(options = {})
57
57
  opts = {
@@ -59,24 +59,24 @@ module Taxonifi::Lumper
59
59
  :initial_id => 0,
60
60
  :capture_related_fields => true # Stores other column values in (column_header => value) pairs in Name#properties
61
61
  }.merge!(options)
62
-
62
+
63
63
  csv = opts[:csv]
64
64
  raise Taxonifi::Lumper::LumperError, 'Something that is not a CSV::Table was passed to Lumper.create_name_collection.' if csv.class != CSV::Table
65
-
65
+
66
66
  nc = Taxonifi::Model::NameCollection.new(:initial_id => opts[:initial_id])
67
67
  row_size = csv.size
68
68
 
69
69
  # The row index contains a vector of parent ids like
70
70
  # [0, 4, 29]
71
71
  # This implies that Name with #id 29 has Parent with #id 4
72
- # Initialize an empty index.
72
+ # Initialize an empty index.
73
73
  row_index = Taxonifi::Utils::Array.build_array_of_empty_arrays(row_size)
74
74
 
75
75
  # The name_index keeps track of unique name per rank like
76
76
  # :genus => {'Foo' => [0,2]}
77
77
  # This says that "Foo" is instantiated two times in the
78
78
  # name collection, with id 0, and id 2.
79
- name_index = {} # Taxonifi::Lumper::NameIndex.new # {}
79
+ name_index = {} # Taxonifi::Lumper::NameIndex.new # {}
80
80
 
81
81
  has_ref_fields = ([:citation_basic, :citation_small] & Taxonifi::Lumper.intersecting_lumps(csv.headers)).size > 0
82
82
  unused_fields = csv.headers - Taxonifi::Lumper::LUMPS[:names]
@@ -87,21 +87,21 @@ module Taxonifi::Lumper
87
87
  name_index[rank] = {}
88
88
  csv.each_with_index do |row, i|
89
89
  shares_rank = (rank == Taxonifi::Assessor::RowAssessor.lump_name_rank(row).to_s)
90
- name = row[rank]
90
+ name = row[rank]
91
91
 
92
92
  if !name.nil? # cell has data
93
93
  n = nil # a Name if necessary
94
- name_id = nil # index the new or existing Name
94
+ name_id = nil # index the new or existing Name
95
95
 
96
96
  exists = false
97
97
  if name_index[rank][name] # A matching name (String) has been previously added
98
98
  name_index[rank][name].each do |id|
99
99
  # Compare vectors of parent_ids for name presence
100
- if nc.parent_id_vector(id) == row_index[i]
100
+ if nc.parent_id_vector(id) == row_index[i]
101
101
  exists = true
102
102
  name_id = id
103
- break
104
- end
103
+ break
104
+ end
105
105
  end
106
106
  end # end name exists
107
107
 
@@ -110,22 +110,27 @@ module Taxonifi::Lumper
110
110
  unused_data = row.to_hash.select{|f| unused_fields.include?(f)}
111
111
  row_identifier = (row['identifier'] ? row['identifier'] : i)
112
112
 
113
- # Populate the new name if created. Previously matched names are not effected.
114
- if !n.nil?
113
+ # Populate the new name if created. Previously matched names are not effected.
114
+ if !n.nil?
115
115
  n.rank = rank
116
116
  n.name = name
117
- n.parent = nc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
117
+ n.parent = nc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
118
118
  n.row_number = i
119
119
 
120
120
  # Name/year needs to be standardized / cased out
121
121
  # headers are overlapping at times
122
122
 
123
123
  # Check to see if metadata (e.g. author year) apply to this rank, attach if so.
124
- if shares_rank
125
- if row['author_year']
126
- builder = Taxonifi::Splitter::Builder.build_author_year(row['author_year'])
124
+ if shares_rank
125
+ if row['author_year']
126
+ begin
127
+ builder = Taxonifi::Splitter::Builder.build_author_year(row['author_year'])
128
+ rescue Taxonifi::Splitter::SplitterError => e
129
+ # Map i to user's row number: +1 for 1-based, +1 to account for header row.
130
+ raise LumperError, "Failed to parse author_year string '#{row['author_year']}' in row #{i + 2}", e.backtrace
131
+ end
127
132
  n.authors = builder.people # was author!?
128
- n.year = builder.year
133
+ n.year = builder.year
129
134
  n.parens = builder.parens
130
135
  end
131
136
 
@@ -135,29 +140,29 @@ module Taxonifi::Lumper
135
140
 
136
141
  name_id = nc.add_object(n).id
137
142
  name_index[rank][name] ||= []
138
- name_index[rank][name].push name_id
143
+ name_index[rank][name].push name_id
139
144
 
140
145
  $DEBUG && $stderr.puts("added #{nc.collection.size - 1} | #{n.name} | #{n.rank} | #{n.parent ? n.parent.name : '-'} | #{n.parent ? n.parent.id : '-'}")
141
146
  else
142
147
  $DEBUG && $stderr.puts("already present #{rank} | #{name}")
143
- if shares_rank
144
- # original::
148
+ if shares_rank
149
+ # original::
145
150
  nc.add_duplicate_entry_metadata(name_id, row_identifier, unused_data)
146
151
 
147
- # hack
148
- # nc.add_duplicate_entry_metadata(name_id, row_identifier, row.to_hash)
149
-
152
+ # hack
153
+ # nc.add_duplicate_entry_metadata(name_id, row_identifier, row.to_hash)
154
+
150
155
  end
151
156
  end
152
157
 
153
158
  # build a by row vector of parent child relationships
154
- row_index[i].push name_id
159
+ row_index[i].push name_id
155
160
  end # end cell has data
156
161
 
157
162
  end
158
163
  end
159
164
  nc
160
- end
165
+ end
161
166
 
162
167
  # return [Taxonifi::Model::RefCollection] from a CSV file.
163
168
  def self.create_ref_collection(options = {})
@@ -180,7 +185,7 @@ module Taxonifi::Lumper
180
185
  :year => row['year'],
181
186
  :title => row['title'],
182
187
  :publication => row['publication']
183
- )
188
+ )
184
189
 
185
190
  # TODO: break out each of these lexes to a builder
186
191
  if row['authors'] && !row['authors'].empty?
@@ -216,16 +221,16 @@ module Taxonifi::Lumper
216
221
  r.pages = row['pages']
217
222
  end
218
223
  end
219
-
224
+
220
225
  r.add_properties(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
221
226
 
222
227
  # Do some indexing.
223
- ref_str = r.compact_string
228
+ ref_str = r.compact_string
224
229
  if !ref_index.keys.include?(ref_str)
225
230
  ref_id = rc.add_object(r).id
226
231
  ref_index.merge!(ref_str => ref_id)
227
232
  # puts "#{i} : #{ref_id}"
228
- rc.row_index[i] = r
233
+ rc.row_index[i] = r
229
234
  else
230
235
  rc.row_index[i] = rc.object_by_id(ref_index[ref_str])
231
236
  # puts "#{i} : #{ref_index[ref_str]}"
@@ -267,9 +272,9 @@ module Taxonifi::Lumper
267
272
  name = row[rank]
268
273
  if !name.nil? && !name.empty? # cell has data
269
274
  o = nil # a Name if necessary
270
- name_id = nil # index the new or existing name
275
+ name_id = nil # index the new or existing name
271
276
 
272
- if name_index[rank][name] # Matching name is found
277
+ if name_index[rank][name] # Matching name is found
273
278
 
274
279
  exists = false
275
280
  name_index[rank][name].each do |id|
@@ -287,25 +292,25 @@ module Taxonifi::Lumper
287
292
  o = Taxonifi::Model::GenericObject.new()
288
293
  end
289
294
 
290
- if !o.nil?
295
+ if !o.nil?
291
296
  o.name = name
292
297
  o.rank = rank
293
298
  o.row_number = i
294
- o.parent = c.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
299
+ o.parent = c.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
295
300
 
296
- name_id = c.add_object(o).id
301
+ name_id = c.add_object(o).id
297
302
  name_index[rank][name] ||= []
298
- name_index[rank][name].push name_id
303
+ name_index[rank][name].push name_id
299
304
  end
300
305
 
301
- row_index[i].push name_id
306
+ row_index[i].push name_id
302
307
  end
303
308
  end
304
309
  end
305
310
  c
306
311
  end
307
312
 
308
- # Return a geog collection from a csv file.
313
+ # Return a geog collection from a csv file.
309
314
  def self.create_geog_collection(csv)
310
315
  raise Taxonifi::Lumper::LumperError, 'Something that is not a CSV::Table was passed to Lumper.create_geog_collection.' if csv.class != CSV::Table
311
316
  gc = Taxonifi::Model::GeogCollection.new
@@ -320,7 +325,7 @@ module Taxonifi::Lumper
320
325
  end
321
326
 
322
327
  # We don't have the same problems as with taxon names, i.e.
323
- # boo in
328
+ # boo in
324
329
  # Foo nil boo
325
330
  # Foo bar boo
326
331
  # is the same thing wrt geography, not the case for taxon names.
@@ -331,28 +336,28 @@ module Taxonifi::Lumper
331
336
  name = row[level]
332
337
  if !name.nil? && !name.empty? # cell has data
333
338
  g = nil # a Name if necessary
334
- name_id = nil # index the new or existing name
339
+ name_id = nil # index the new or existing name
335
340
 
336
341
  if name_index[level][name] # name exists
337
- name_id = name_index[level][name]
342
+ name_id = name_index[level][name]
338
343
  else
339
344
  g = Taxonifi::Model::Geog.new()
340
345
  name_id = gc.add_object(g).id
341
346
  end
342
347
 
343
- if !g.nil?
348
+ if !g.nil?
344
349
  g.name = name
345
350
  g.rank = level
346
- g.parent = gc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
351
+ g.parent = gc.object_by_id(row_index[i].last) if row_index[i].size > 0 # it's parent is the previous id in this row
347
352
  end
348
353
 
349
354
  name_index[level][name] = name_id
350
- row_index[i].push name_id
355
+ row_index[i].push name_id
351
356
  end
352
357
  end
353
358
  end
354
359
  gc
355
- end
360
+ end
356
361
 
357
- end # end Lumper Module
362
+ end # end Lumper Module
358
363
 
@@ -1,4 +1,4 @@
1
- # Builder functionality for parsing/lexing framework.
1
+ # Builder functionality for parsing/lexing framework.
2
2
  module Taxonifi::Splitter::Builder
3
3
 
4
4
  # Load all builders (= models)
@@ -7,8 +7,11 @@ module Taxonifi::Splitter::Builder
7
7
 
8
8
  # Build and return Taxonifi::Model::AuthorYear from a string.
9
9
  def self.build_author_year(text)
10
- lexer = Taxonifi::Splitter::Lexer.new(text)
10
+ text = text&.strip
11
11
  builder = Taxonifi::Model::AuthorYear.new
12
+ return builder if text.nil? || text.empty?
13
+
14
+ lexer = Taxonifi::Splitter::Lexer.new(text)
12
15
  Taxonifi::Splitter::Parser.new(lexer, builder).parse_author_year
13
16
  builder
14
17
  end
@@ -1,3 +1,3 @@
1
1
  module Taxonifi
2
- VERSION = "0.5.5"
2
+ VERSION = "0.6.1"
3
3
  end
data/taxonifi.gemspec CHANGED
@@ -25,17 +25,19 @@ Gem::Specification.new do |s|
25
25
  s.homepage = "https://github.com/SpeciesFile/taxonifi"
26
26
  s.licenses = ["MIT"]
27
27
  s.require_paths = ["lib"]
28
- s.rubygems_version = "2.4.5"
28
+ s.rubygems_version = "3.2.22"
29
29
  s.metadata = { "source_code_uri" => "https://github.com/SpeciesFileGroup/taxonifi" }
30
30
 
31
31
  s.add_dependency "require_all", "~> 3.0"
32
- s.required_ruby_version = '~> 2.6'
32
+ s.add_dependency "csv", "~> 3.3.5"
33
+
34
+ s.required_ruby_version = '>= 3.0', '< 4'
33
35
 
34
36
  s.add_development_dependency "rake", '~> 13.0'
35
37
  s.add_development_dependency "byebug", "~> 11"
36
- s.add_development_dependency "bundler", "~> 2.1"
38
+ s.add_development_dependency "bundler", "~> 2.7"
37
39
  s.add_development_dependency 'awesome_print', '~> 1.8'
38
- s.add_development_dependency 'test-unit', '~> 3.3.5'
40
+ s.add_development_dependency 'test-unit', '~> 3.5'
39
41
  s.add_development_dependency "rdoc", "~> 6.2"
40
42
  s.add_development_dependency "builder", "~> 3.2"
41
43
 
data/test/helper.rb CHANGED
@@ -22,16 +22,16 @@ def generic_csv_with_names
22
22
  csv << [6, "3", "Foo bar stuff (Guy, 1921)", "species", "Foo bar blorf (Guy, 1921)"] # initial subspecies rank data had rank blank, assuming they will be called species
23
23
  end
24
24
 
25
- @csv = CSV.parse(@csv_string, {headers: true})
25
+ @csv = CSV.parse(@csv_string, headers: true)
26
26
  end
27
27
 
28
28
  def names
29
29
  file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/names.csv'))
30
30
 
31
- csv = CSV.read(file, {
31
+ csv = CSV.read(file,
32
32
  headers: true,
33
33
  col_sep: ",",
34
34
  header_converters: :downcase
35
- } )
35
+ )
36
36
  nc = Taxonifi::Lumper.create_name_collection(csv: csv, initial_id: 1)
37
37
  end
@@ -16,11 +16,11 @@ class Test_TaxonifiExports < Test::Unit::TestCase
16
16
  def test_big_file
17
17
  file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/Lygaeoidea.csv'))
18
18
 
19
- csv = CSV.read(file, {
19
+ csv = CSV.read(file,
20
20
  headers: true,
21
21
  col_sep: ",",
22
22
  header_converters: :downcase
23
- } )
23
+ )
24
24
 
25
25
  nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(csv)
26
26
  nc.generate_ref_collection(1)
@@ -32,11 +32,11 @@ class Test_TaxonifiExports < Test::Unit::TestCase
32
32
  def test_little_file_linkages
33
33
  file = File.expand_path(File.join(File.dirname(__FILE__), 'file_fixtures/Fossil.csv'))
34
34
 
35
- csv = CSV.read(file, {
35
+ csv = CSV.read(file,
36
36
  headers: true,
37
37
  col_sep: ",",
38
38
  header_converters: :downcase
39
- } )
39
+ )
40
40
 
41
41
  nc = Taxonifi::Lumper.create_name_collection(:csv => csv, :initial_id => 1)
42
42
  rc = Taxonifi::Lumper.create_ref_collection(:csv => csv)
@@ -9,7 +9,7 @@ class Test_TaxonifiLumperClump < Test::Unit::TestCase
9
9
  csv << ["Fooidae", "Foo", "bar", "Smith", "1854"]
10
10
  end
11
11
 
12
- @csv = CSV.parse(@csv_string, {headers: true})
12
+ @csv = CSV.parse(@csv_string, headers: true)
13
13
  end
14
14
 
15
15
  def test_new_clump_without_params_can_be_created
@@ -23,7 +23,7 @@ class Test_TaxonifiLumperGeogs < Test::Unit::TestCase
23
23
  # The name_index looks like
24
24
  # {:country => {"Canada" => 0, "USA" => 2, "Utopia" => 5} ... etc.
25
25
 
26
- @csv = CSV.parse(@csv_string, {headers: true})
26
+ @csv = CSV.parse(@csv_string, headers: true)
27
27
  end
28
28
 
29
29
  def _create_a_collection
@@ -9,7 +9,7 @@ class Test_TaxonifiLumperHierarchicalCollection < Test::Unit::TestCase
9
9
  csv << %w{a b c}
10
10
  end
11
11
 
12
- @csv = CSV.parse(@csv_string, {headers: true})
12
+ @csv = CSV.parse(@csv_string, headers: true)
13
13
  end
14
14
 
15
15
  def test_that_create_hierarchical_collection_creates_collection
@@ -48,7 +48,7 @@ class Test_TaxonifiLumperHierarchicalCollection < Test::Unit::TestCase
48
48
  csv << @headers
49
49
  csv << ["a", nil, "c"]
50
50
  end
51
- csv = CSV.parse(csv_string, {headers: true})
51
+ csv = CSV.parse(csv_string, headers: true)
52
52
  c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c})
53
53
  assert_equal nil, c.collection.first.parent
54
54
  assert_equal "a", c.collection[1].parent.name
@@ -61,7 +61,7 @@ class Test_TaxonifiLumperHierarchicalCollection < Test::Unit::TestCase
61
61
  csv << ["a", "b", "d"]
62
62
  csv << ["e", "b", "f"]
63
63
  end
64
- csv = CSV.parse(csv_string, {headers: true})
64
+ csv = CSV.parse(csv_string, headers: true)
65
65
  c = Taxonifi::Lumper.create_hierarchical_collection(csv, %w{a b c})
66
66
  assert_equal %w{a b c d e b f}, c.collection.collect{|o| o.name}
67
67
  assert_equal 7, c.collection.size
@@ -9,7 +9,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
9
9
  csv << ["Fooidae", "Foo", "bar", "Smith", "1854"]
10
10
  end
11
11
 
12
- @csv = CSV.parse(@csv_string, {headers: true})
12
+ @csv = CSV.parse(@csv_string, headers: true)
13
13
  end
14
14
 
15
15
  def test_that_setup_setups
@@ -25,7 +25,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
25
25
  def test_available_lumps
26
26
  assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::QUAD ).include?(:quadrinomial)
27
27
  assert Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:quad_author_year)
28
- assert !Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:names)
28
+ assert !Taxonifi::Lumper.available_lumps( Taxonifi::Lumper::AUTHOR_YEAR + Taxonifi::Lumper::QUAD ).include?(:names)
29
29
  end
30
30
 
31
31
  def test_create_name_collection_creates_a_name_collection
@@ -49,7 +49,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
49
49
  assert_equal 0, nc.collection.first.row_number
50
50
  assert_equal 0, nc.collection.last.row_number
51
51
  end
52
-
52
+
53
53
  def test_that_create_name_collection_parentifies
54
54
  nc = Taxonifi::Lumper.create_name_collection(:csv => @csv)
55
55
  assert_equal nc.collection[0], nc.collection[1].parent
@@ -69,7 +69,7 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
69
69
  # 1 3 6
70
70
  # 0 4 7
71
71
 
72
- csv = CSV.parse(string, {headers: true})
72
+ csv = CSV.parse(string, headers: true)
73
73
  nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
74
74
 
75
75
  assert_equal nc.collection[2], nc.collection[5].parent
@@ -87,18 +87,18 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
87
87
  csv << ["Fooidae", "Foo", "bar", "Smith, 1854"]
88
88
  csv << ["Fooidae", "Foo", "foo", "(Smith and Jones, 1854)"]
89
89
  end
90
-
90
+
91
91
  # 0 Fooidae
92
92
  # 1 Foo
93
93
  # 2 bar
94
- # 3 foo
94
+ # 3 foo
95
95
 
96
- csv = CSV.parse(string, {headers: true})
96
+ csv = CSV.parse(string, headers: true)
97
97
  nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
98
98
  assert_equal 2, nc.collection[3].authors.size
99
99
  assert_equal 'Smith', nc.collection[3].authors.first.last_name
100
100
  assert_equal 1854, nc.collection[3].year
101
-
101
+
102
102
  assert_equal 'Smith, 1854', nc.collection[2].author_year
103
103
  assert_equal 'Smith', nc.collection[2].author_with_parens
104
104
 
@@ -115,17 +115,43 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
115
115
  assert_equal true, nc.collection[3].parens
116
116
  end
117
117
 
118
+ def test_that_create_a_name_collection_ignores_whitespace_author_year
119
+ string = CSV.generate() do |csv|
120
+ csv << %w{family genus species author_year}
121
+ csv << ["Fooidae", "Foo", "bar", " "]
122
+ end
123
+
124
+ csv = CSV.parse(string, headers: true)
125
+ nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
126
+ assert_equal 0, nc.collection[2].authors.size
127
+ assert_equal nil, nc.collection[2].author_year
128
+ end
129
+
130
+ def test_that_create_a_name_collection_throws_lumper_error_on_bad_author_year
131
+ string = CSV.generate() do |csv|
132
+ csv << %w{family genus species author_year}
133
+ csv << ["Fooidae", "Foo", "bar", ")Smith)"]
134
+ end
135
+
136
+ csv = CSV.parse(string, headers: true)
137
+ e = assert_raises Taxonifi::Lumper::LumperError do
138
+ Taxonifi::Lumper.create_name_collection(:csv => csv)
139
+ end
140
+
141
+ assert_match("')Smith)' in row 2", e.message)
142
+ end
143
+
118
144
  def test_that_create_a_name_collection_handles_related_columns
119
145
  string = CSV.generate() do |csv|
120
146
  csv << %w{family genus species author_year foo bar Stuff}
121
147
  csv << ["Fooidae", "Foo", "bar", "Smith, 1854" , nil, 1 , "one"]
122
148
  end
123
-
149
+
124
150
  # 0 Fooidae
125
151
  # 1 Foo
126
152
  # 2 bar
127
153
 
128
- csv = CSV.parse(string, {headers: true})
154
+ csv = CSV.parse(string, headers: true)
129
155
  nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
130
156
  assert_equal nil, nc.collection[2].properties['foo']
131
157
  assert_equal "1", nc.collection[2].properties['bar'] # !!! everything converted to String
@@ -145,11 +171,11 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
145
171
  # 1 Foo
146
172
  # 2 bar
147
173
  # 3 foo
148
- # 4 varblorf
149
- # 5 varblorf
150
- # 6 varbliff
174
+ # 4 varblorf
175
+ # 5 varblorf
176
+ # 6 varbliff
151
177
 
152
- csv = CSV.parse(string, {headers: true})
178
+ csv = CSV.parse(string, headers: true)
153
179
  nc = Taxonifi::Lumper.create_name_collection(:csv => csv)
154
180
 
155
181
  assert_equal nc.collection[1], nc.collection[2].parent
@@ -176,5 +202,5 @@ class Test_TaxonifiLumperNames < Test::Unit::TestCase
176
202
 
177
203
  #--- reference collections
178
204
 
179
- end
205
+ end
180
206
 
@@ -14,7 +14,7 @@ class Test_TaxonifiLumperParentChildNameCollection < Test::Unit::TestCase
14
14
  csv << [5, "0", "Bidae", "Family", nil ]
15
15
  end
16
16
 
17
- @csv = CSV.parse(@csv_string, {headers: true})
17
+ @csv = CSV.parse(@csv_string, headers: true)
18
18
  end
19
19
 
20
20
  def create_a_collection
@@ -54,7 +54,7 @@ class Test_TaxonifiLumperParentChildNameCollection < Test::Unit::TestCase
54
54
  # csv << [7, 3, 'Neortholomus (Neortholomus) blorf (Say, 1832)','species']
55
55
  # csv << [8, 3, 'Neortholomus (Neortholomus) blorf (Say, 1832)','species']
56
56
  end
57
- csv = CSV.parse(csv_string, {headers: true})
57
+ csv = CSV.parse(csv_string, headers: true)
58
58
 
59
59
  nc = Taxonifi::Lumper::Lumps::ParentChildNameCollection.name_collection(csv)
60
60
 
@@ -9,7 +9,7 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
9
9
  csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
10
10
  end
11
11
 
12
- @csv = CSV.parse(@csv_string, {headers: true})
12
+ @csv = CSV.parse(@csv_string, headers: true)
13
13
  end
14
14
 
15
15
  def test_available_lumps
@@ -23,7 +23,7 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
23
23
  csv << ["Smith J. and Barnes S."]
24
24
  end
25
25
 
26
- csv = CSV.parse(csv_string, {headers: true})
26
+ csv = CSV.parse(csv_string, headers: true)
27
27
 
28
28
  assert_equal [:citation_basic, :citation_small], Taxonifi::Lumper.intersecting_lumps(csv.headers)
29
29
  assert_equal [], Taxonifi::Lumper.available_lumps(csv.headers)
@@ -56,7 +56,7 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
56
56
  csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
57
57
  csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
58
58
  end
59
- csv = CSV.parse(csv_string, {headers: true})
59
+ csv = CSV.parse(csv_string, headers: true)
60
60
  rc = Taxonifi::Lumper.create_ref_collection(:csv => csv)
61
61
  assert_equal 1, rc.collection.size
62
62
  end
@@ -68,7 +68,7 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
68
68
  csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
69
69
  csv << ["Smith J. and Bartes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
70
70
  end
71
- csv = CSV.parse(csv_string, {headers: true})
71
+ csv = CSV.parse(csv_string, headers: true)
72
72
  rc = Taxonifi::Lumper.create_ref_collection(:csv => csv)
73
73
  assert_equal 2, rc.collection.size
74
74
  end
@@ -79,7 +79,7 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
79
79
  csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
80
80
  csv << ["Smith J.", "2012", "Foo and bar", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)" ]
81
81
  end
82
- csv = CSV.parse(csv_string, {headers: true})
82
+ csv = CSV.parse(csv_string, headers: true)
83
83
  rc = Taxonifi::Lumper.create_ref_collection(:csv => csv)
84
84
  assert_equal "Foo and bar", rc.object_from_row(1).title
85
85
  end
@@ -90,7 +90,7 @@ class Test_TaxonifiLumperRefs < Test::Unit::TestCase
90
90
  csv << ["Smith J. and Barnes S.", "2012", "Bar and foo", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)", "foo value", 1 ]
91
91
  csv << ["Smith J.", "2012", "Foo and bar", "Journal of Foo", "2", "3", "2-3, 190", nil, "2", "4", "2(4)", nil, "bar value" ]
92
92
  end
93
- csv = CSV.parse(csv_string, {headers: true})
93
+ csv = CSV.parse(csv_string, headers: true)
94
94
  rc = Taxonifi::Lumper.create_ref_collection(:csv => csv)
95
95
 
96
96
  assert_equal "foo value", rc.collection.first.properties['foo']
@@ -14,7 +14,7 @@ class Test_TaxonifiAccessor < Test::Unit::TestCase
14
14
  csv << ["Fooidae", "Bar", nil, "bar", nil, "varbar", "Smith", "1854"] # 5
15
15
  end
16
16
 
17
- @csv = CSV.parse(@csv_string, {headers: true, header_converters: :downcase})
17
+ @csv = CSV.parse(@csv_string, headers: true, header_converters: :downcase)
18
18
  end
19
19
 
20
20
  def test_first_available
@@ -46,7 +46,7 @@ class Test_TaxonifiAccessor < Test::Unit::TestCase
46
46
  csv << headers
47
47
  csv << ["Smith J. and Barnes S."]
48
48
  end
49
- csv = CSV.parse(csv_string, {headers: true})
49
+ csv = CSV.parse(csv_string, headers: true)
50
50
  assert_equal [:citation_basic, :citation_small], Taxonifi::Assessor::RowAssessor.intersecting_lumps_with_data(csv.first)
51
51
  end
52
52
 
@@ -57,7 +57,7 @@ class Test_TaxonifiAccessor < Test::Unit::TestCase
57
57
  csv << ["Smith J. and Barnes S.", 1912, "Foo", "Bar", "3(4)", "1-2"]
58
58
  end
59
59
 
60
- csv = CSV.parse(csv_string, {headers: true})
60
+ csv = CSV.parse(csv_string, headers: true)
61
61
 
62
62
  assert_equal [:citation_small], Taxonifi::Assessor::RowAssessor.lumps_with_data(csv.first)
63
63
  end
metadata CHANGED
@@ -1,11 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taxonifi
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Yoder
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
10
  date: 2013-03-27 00:00:00.000000000 Z
@@ -24,6 +23,20 @@ dependencies:
24
23
  - - "~>"
25
24
  - !ruby/object:Gem::Version
26
25
  version: '3.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: csv
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: 3.3.5
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 3.3.5
27
40
  - !ruby/object:Gem::Dependency
28
41
  name: rake
29
42
  requirement: !ruby/object:Gem::Requirement
@@ -58,14 +71,14 @@ dependencies:
58
71
  requirements:
59
72
  - - "~>"
60
73
  - !ruby/object:Gem::Version
61
- version: '2.1'
74
+ version: '2.7'
62
75
  type: :development
63
76
  prerelease: false
64
77
  version_requirements: !ruby/object:Gem::Requirement
65
78
  requirements:
66
79
  - - "~>"
67
80
  - !ruby/object:Gem::Version
68
- version: '2.1'
81
+ version: '2.7'
69
82
  - !ruby/object:Gem::Dependency
70
83
  name: awesome_print
71
84
  requirement: !ruby/object:Gem::Requirement
@@ -86,14 +99,14 @@ dependencies:
86
99
  requirements:
87
100
  - - "~>"
88
101
  - !ruby/object:Gem::Version
89
- version: 3.3.5
102
+ version: '3.5'
90
103
  type: :development
91
104
  prerelease: false
92
105
  version_requirements: !ruby/object:Gem::Requirement
93
106
  requirements:
94
107
  - - "~>"
95
108
  - !ruby/object:Gem::Version
96
- version: 3.3.5
109
+ version: '3.5'
97
110
  - !ruby/object:Gem::Dependency
98
111
  name: rdoc
99
112
  requirement: !ruby/object:Gem::Requirement
@@ -132,6 +145,7 @@ extra_rdoc_files:
132
145
  - README.md
133
146
  files:
134
147
  - ".document"
148
+ - ".github/workflows/ruby.yml"
135
149
  - ".gitignore"
136
150
  - ".ruby-version"
137
151
  - ".travis.yml"
@@ -206,23 +220,24 @@ licenses:
206
220
  - MIT
207
221
  metadata:
208
222
  source_code_uri: https://github.com/SpeciesFileGroup/taxonifi
209
- post_install_message:
210
223
  rdoc_options: []
211
224
  require_paths:
212
225
  - lib
213
226
  required_ruby_version: !ruby/object:Gem::Requirement
214
227
  requirements:
215
- - - "~>"
228
+ - - ">="
229
+ - !ruby/object:Gem::Version
230
+ version: '3.0'
231
+ - - "<"
216
232
  - !ruby/object:Gem::Version
217
- version: '2.6'
233
+ version: '4'
218
234
  required_rubygems_version: !ruby/object:Gem::Requirement
219
235
  requirements:
220
236
  - - ">="
221
237
  - !ruby/object:Gem::Version
222
238
  version: '0'
223
239
  requirements: []
224
- rubygems_version: 3.0.6
225
- signing_key:
240
+ rubygems_version: 3.7.1
226
241
  specification_version: 4
227
242
  summary: A general purpose framework for scripted handling of taxonomic names or other
228
243
  heirarchical metadata.