taxonifi 0.2.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +59 -0
  3. data/.travis.yml +11 -0
  4. data/Gemfile +5 -17
  5. data/Gemfile.lock +22 -40
  6. data/README.md +192 -0
  7. data/Rakefile +35 -26
  8. data/lib/export/format/base.rb +1 -1
  9. data/lib/export/format/species_file.rb +154 -152
  10. data/lib/lumper/clump.rb +1 -1
  11. data/lib/lumper/lumper.rb +22 -18
  12. data/lib/lumper/lumps/parent_child_name_collection.rb +1 -2
  13. data/lib/lumper/name_index.rb +21 -0
  14. data/lib/{models → model}/author_year.rb +2 -2
  15. data/lib/{models → model}/base.rb +35 -5
  16. data/lib/{models → model}/collection.rb +8 -1
  17. data/lib/{models → model}/name.rb +128 -36
  18. data/lib/{models → model}/name_collection.rb +134 -33
  19. data/lib/{models → model}/person.rb +1 -1
  20. data/lib/{models → model}/ref.rb +4 -2
  21. data/lib/model/ref_collection.rb +171 -0
  22. data/lib/{models → model}/species_name.rb +24 -3
  23. data/lib/splitter/builder.rb +1 -1
  24. data/lib/splitter/parser.rb +5 -0
  25. data/lib/splitter/tokens.rb +54 -9
  26. data/lib/taxonifi/version.rb +3 -0
  27. data/lib/taxonifi.rb +5 -9
  28. data/taxonifi.gemspec +29 -99
  29. data/test/helper.rb +1 -1
  30. data/test/test_exporter.rb +1 -1
  31. data/test/test_lumper_names.rb +9 -9
  32. data/test/test_lumper_refs.rb +4 -4
  33. data/test/test_parser.rb +97 -26
  34. data/test/test_splitter_tokens.rb +25 -4
  35. data/test/test_taxonifi_base.rb +1 -1
  36. data/test/test_taxonifi_geog.rb +1 -1
  37. data/test/test_taxonifi_name.rb +13 -14
  38. data/test/test_taxonifi_name_collection.rb +11 -5
  39. data/test/test_taxonifi_ref.rb +1 -1
  40. data/test/test_taxonifi_ref_collection.rb +40 -3
  41. data/test/test_taxonifi_species_name.rb +51 -1
  42. data/travis/before_install.sh +2 -0
  43. metadata +96 -66
  44. data/README.rdoc +0 -154
  45. data/VERSION +0 -1
  46. data/lib/models/ref_collection.rb +0 -107
  47. /data/lib/{models → model}/generic_object.rb +0 -0
  48. /data/lib/{models → model}/geog.rb +0 -0
  49. /data/lib/{models → model}/geog_collection.rb +0 -0
  50. /data/lib/{models → model}/shared_class_methods.rb +0 -0
@@ -8,6 +8,7 @@ module Taxonifi::Export
8
8
 
9
9
  # tblRanks 5/17/2012
10
10
  SPECIES_FILE_RANKS = {
11
+ 'variety' => 5, # there is no variety rank per se in SFs, they are handled this way according to DE
11
12
  'subspecies' => 5,
12
13
  'species' => 10,
13
14
  'species subgroup' => 11,
@@ -55,18 +56,17 @@ module Taxonifi::Export
55
56
  attr_accessor :name_collection
56
57
  attr_accessor :ref_collection
57
58
  attr_accessor :pub_collection
58
- attr_accessor :author_index
59
59
  attr_accessor :genus_names, :species_names, :nomenclator
60
60
  attr_accessor :authorized_user_id, :time
61
- attr_accessor :starting_ref_id
61
+
62
+ attr_accessor :built_nomenclators
62
63
 
63
64
  def initialize(options = {})
64
65
  opts = {
65
66
  :nc => Taxonifi::Model::NameCollection.new,
66
67
  :export_folder => 'species_file',
67
68
  :authorized_user_id => nil,
68
- :starting_ref_id => 1, # should be configured elsewhere... but
69
- :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites}
69
+ :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites tblTypeSpecies}
70
70
  }.merge!(options)
71
71
 
72
72
  @manifest = opts[:manifest]
@@ -77,9 +77,7 @@ module Taxonifi::Export
77
77
  @name_collection = opts[:nc]
78
78
  @pub_collection = {} # title => id
79
79
  @authorized_user_id = opts[:authorized_user_id]
80
- @author_index = {}
81
- @starting_ref_id = opts[:starting_ref_id]
82
-
80
+
83
81
  # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
84
82
  # because it's all internal. There will is a strong potential for key collisions if this pipeline is modified to
85
83
  # include references external to the initialized name_collection. See also export_references.
@@ -93,11 +91,6 @@ module Taxonifi::Export
93
91
  @empty_quotes = ""
94
92
  end
95
93
 
96
- # Assumes names that are the same are the same person.
97
- def build_author_index
98
- @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
99
- end
100
-
101
94
  def export()
102
95
  super
103
96
  # You must have
@@ -109,9 +102,6 @@ module Taxonifi::Export
109
102
  # Give authors unique ids:
110
103
  # @name_collection.ref_collection.uniquify_authors(1)
111
104
 
112
- if @name_collection.ref_collection
113
- build_author_index
114
- end
115
105
 
116
106
  # raise Taxonifi::Export::ExportError, 'NameCollection has no RefCollection, you might try @name_collection.generate_ref_collection(1), or alter the manifest: hash.' if ! @name_collection.ref_collection.nil?
117
107
 
@@ -122,14 +112,28 @@ module Taxonifi::Export
122
112
  @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
123
113
  @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
124
114
  @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
115
+ @name_collection.names_at_rank('variety').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
116
+
117
+ # Add combinations of names from nomenclators/citations as well
118
+
119
+ @name_collection.nomenclators.keys.each do |k|
120
+ @genus_names.merge!(@name_collection.nomenclators[k][0] => nil)
121
+ @genus_names.merge!(@name_collection.nomenclators[k][1] => nil)
122
+ @species_names.merge!(@name_collection.nomenclators[k][2] => nil)
123
+ @species_names.merge!(@name_collection.nomenclators[k][3] => nil)
124
+ @species_names.merge!(@name_collection.nomenclators[k][4] => nil)
125
+ end
125
126
 
127
+ @genus_names.delete_if{|key,value| key.nil? || key.length == 0}
128
+ @species_names.delete_if{|key,value| key.nil? || key.length == 0}
129
+
126
130
  str = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
127
131
  @manifest.each do |f|
128
132
  str << send(f)
129
133
  end
130
134
  str << ['COMMIT', 'END TRY', 'BEGIN CATCH',
131
- 'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;',
132
- 'ROLLBACK', 'END CATCH']
135
+ 'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;',
136
+ 'ROLLBACK', 'END CATCH']
133
137
  write_file('everything.sql', str.join("\n\n"))
134
138
  true
135
139
  end
@@ -142,56 +146,56 @@ module Taxonifi::Export
142
146
  # nc.ref_collection = Taxonifi::Model::RefCollection.new
143
147
  # etc.
144
148
  def export_references(options = {})
145
- raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest: to achieve a similar result.'
146
- #opts = {
147
- # :starting_ref_id => 0,
148
- # :starting_author_id => 0
149
- #}
150
-
151
- #configure_folders
152
- #build_author_index
153
-
154
- ## order matters
155
- #['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
156
- # write_file(t, send(t))
157
- #end
158
- end
149
+ raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest to achieve a similar result.'
150
+ #configure_folders
151
+ end
159
152
 
160
- # Get's the reference for a name as referenced
161
- # by .related[:link_to_ref_from_row]
153
+ # Gets the reference for a name as referenced
154
+ # by .properties[:link_to_ref_from_row]
162
155
  def get_ref(name)
163
- if not name.related[:link_to_ref_from_row].nil?
164
- return @name_collection.ref_collection.object_from_row(name.related[:link_to_ref_from_row])
165
- end
166
- nil
156
+ # if not name.properties[:link_to_ref_from_row].nil?
157
+ # return @name_collection.ref_collection.object_from_row(name.properties[:link_to_ref_from_row])
158
+ # end
159
+ # nil
160
+ name.original_description_reference ? name.original_description_reference : nil
167
161
  end
168
162
 
169
163
  def tblTaxa
170
- @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
164
+ @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode Extinct NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
171
165
  sql = []
172
- @name_collection.collection.each do |n|
173
- $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
174
-
175
- ref = get_ref(n)
176
- cols = {
177
- TaxonNameID: n.id,
178
- TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1
179
- RankID: SPECIES_FILE_RANKS[n.rank],
180
- Name: n.name,
181
- Parens: (n.parens ? 1 : 0),
182
- AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id), # !! SF folks like to pre-populate with zeros
183
- RefID: (ref ? ref.id : 0),
184
- DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
185
- AccessCode: 0,
186
- NameStatus: (n.related_name.nil? ? 0 : 7), # 0 :valid, 7: synonym)
187
- StatusFlags: (n.related_name.nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym
188
- OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0), # SF must be pre-configured with 0 filler (this restriction needs to go)
189
- LastUpdate: @time,
190
- ModifiedBy: @authorized_user_id,
191
- }
192
- sql << sql_insert_statement('tblTaxa', cols)
166
+ sql_above = []
167
+
168
+ # Need to add by rank for FK constraint handling
169
+
170
+ Taxonifi::RANKS.each do |rank|
171
+ @name_collection.names_at_rank(rank).each do |n|
172
+ $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
173
+
174
+ # ref = get_ref(n)
175
+ cols = {
176
+ TaxonNameID: n.id,
177
+ TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1
178
+ RankID: SPECIES_FILE_RANKS[n.rank],
179
+ Name: n.name,
180
+ Parens: (n.parens ? 1 : 0),
181
+ AboveID: 0,
182
+ RefID: (n.original_description_reference ? n.original_description_reference.id : 0),
183
+ DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
184
+ AccessCode: 0,
185
+ Extinct: (n.properties && n.properties['extinct'] == 'true' ? 1 : 0),
186
+ NameStatus: (n.related_name.nil? ? 0 : 7), # 0 :valid, 7: synonym)
187
+ StatusFlags: (n.related_name.nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym
188
+ OriginalGenusID: (n.properties && !n.properties['original_genus_id'].nil? ? n.properties['original_genus_id'] : 0), # SF must be pre-configured with 0 filler (this restriction needs to go)
189
+ LastUpdate: @time,
190
+ ModifiedBy: @authorized_user_id,
191
+ }
192
+ sql << sql_insert_statement('tblTaxa', cols)
193
+ above_id = (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id)
194
+ sql_above.push "UPDATE tblTaxa SET AboveID = #{above_id} where TaxonNameID = #{n.id};"
195
+ end
193
196
  end
194
- sql.join("\n")
197
+
198
+ sql.join("\n") + sql_above.join("\n")
195
199
  end
196
200
 
197
201
  # Generate a tblRefs string.
@@ -202,6 +206,16 @@ module Taxonifi::Export
202
206
  # Assumes the 0 "null" pub id is there
203
207
  pub_id = @pub_collection[r.publication] ? @pub_collection[r.publication] : 0
204
208
 
209
+ # Build a note based on "unused" properties
210
+ note = []
211
+ if r.properties
212
+ r.properties.keys.each do |k|
213
+ note.push "#{k}: #{r.properties[k]}" if r.properties[k] && r.properties.length > 0
214
+ end
215
+ end
216
+ note = note.join("; ")
217
+ note = @empty_quotes if note.length == 0
218
+
205
219
  cols = {
206
220
  RefID: r.id,
207
221
  ContainingRefID: 0,
@@ -210,12 +224,12 @@ module Taxonifi::Export
210
224
  Series: @empty_quotes,
211
225
  Volume: (r.volume ? r.volume : @empty_quotes),
212
226
  Issue: (r.number ? r.number : @empty_quotes),
213
- RefPages: r.page_string, # always a string
227
+ RefPages: r.page_string, # always a strings
214
228
  ActualYear: (r.year ? r.year : @empty_quotes),
215
229
  StatedYear: @empty_quotes,
216
230
  AccessCode: 0,
217
231
  Flags: 0,
218
- Note: @empty_quotes,
232
+ Note: note,
219
233
  LastUpdate: @time,
220
234
  LinkID: 0,
221
235
  ModifiedBy: @authorized_user_id,
@@ -231,7 +245,7 @@ module Taxonifi::Export
231
245
  def tblPubs
232
246
  sql = []
233
247
  @headers = %w{PubID PrefID PubType ShortName FullName Note LastUpdate ModifiedBy Publisher PlacePublished PubRegID Status StartYear EndYear BHL}
234
-
248
+
235
249
  # Hackish should build this elsewhere, but degrades OK
236
250
  pubs = @name_collection.ref_collection.collection.collect{|r| r.publication}.compact.uniq
237
251
 
@@ -263,9 +277,7 @@ module Taxonifi::Export
263
277
  def tblPeople
264
278
  @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
265
279
  sql = []
266
- @author_index.keys.each_with_index do |k,i|
267
- a = @author_index[k]
268
- # a.id = i + 1
280
+ @name_collection.ref_collection.all_authors.each do |a|
269
281
  cols = {
270
282
  PersonID: a.id,
271
283
  FamilyName: (a.last_name.length > 0 ? a.last_name : "Unknown"),
@@ -287,12 +299,11 @@ module Taxonifi::Export
287
299
  sql = []
288
300
  @name_collection.ref_collection.collection.each do |r|
289
301
  r.authors.each_with_index do |x, i|
290
- a = @author_index[x.compact_string]
291
302
  cols = {
292
303
  RefID: r.id,
293
- PersonID: a.id,
304
+ PersonID: x.id,
294
305
  SeqNum: i + 1,
295
- AuthorCount: r.authors.size,
306
+ AuthorCount: r.authors.size + 1,
296
307
  LastUpdate: @time,
297
308
  ModifiedBy: @authorized_user_id
298
309
  }
@@ -306,35 +317,62 @@ module Taxonifi::Export
306
317
  def tblCites
307
318
  @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
308
319
  sql = []
309
-
310
- @name_collection.collection.each do |n|
311
- next if @nomenclator[n.nomenclator_name].nil? # Only create nomenclator records if they are original citations, otherwise not !! Might need updating in future imports
312
- ref = get_ref(n)
313
320
 
314
- # ref = @by_author_reference_index[n.author_year_index]
315
- next if ref.nil?
316
- cols = {
317
- TaxonNameID: n.id,
318
- SeqNum: 1,
319
- RefID: ref.id,
320
- NomenclatorID: @nomenclator[n.nomenclator_name],
321
- LastUpdate: @time,
322
- ModifiedBy: @authorized_user_id,
323
- CitePages: @empty_quotes, # equates to "" in CSV speak
324
- NewNameStatus: 0,
325
- Note: @empty_quotes,
326
- TypeClarification: 0, # We might derive more data from this
327
- CurrentConcept: 1, # Boolean, right?
328
- ConceptChange: 0, # Unspecified
329
- InfoFlags: 0, #
330
- InfoFlagStatus: 1, # 1 => needs review
331
- PolynomialStatus: 0
332
- }
333
- sql << sql_insert_statement('tblCites', cols)
321
+ @name_collection.citations.keys.each do |name_id|
322
+ seq_num = 1
323
+ @name_collection.citations[name_id].each do |ref_id, nomenclator_index, properties|
324
+ cols = {
325
+ TaxonNameID: name_id,
326
+ SeqNum: seq_num,
327
+ RefID: ref_id,
328
+ NomenclatorID: nomenclator_index,
329
+ LastUpdate: @time,
330
+ ModifiedBy: @authorized_user_id,
331
+ CitePages: (properties[:cite_pages] ? properties[:cite_pages] : @empty_quotes),
332
+ NewNameStatus: 0,
333
+ Note: (properties[:note] ? properties[:note] : @empty_quotes),
334
+ TypeClarification: 0, # We might derive more data from this
335
+ CurrentConcept: (properties[:current_concept] == true ? 1 : 0), # Boolean, right?
336
+ ConceptChange: 0, # Unspecified
337
+ InfoFlags: 0, #
338
+ InfoFlagStatus: 1, # 1 => needs review
339
+ PolynomialStatus: 0
340
+ }
341
+ sql << sql_insert_statement('tblCites', cols)
342
+ seq_num += 1
343
+ end
334
344
  end
335
345
  sql.join("\n")
336
346
  end
337
347
 
348
+ # Generate tblTypeSpecies string.
349
+ def tblTypeSpecies
350
+ @headers = %w{GenusNameID SpeciesNameID Reason AuthorityRefID FirstFamGrpNameID LastUpdate ModifiedBy NewID}
351
+ sql = []
352
+
353
+ names = @name_collection.names_at_rank('genus') + @name_collection.names_at_rank('subgenus')
354
+ names.each do |n|
355
+ if n.properties[:type_species_id]
356
+ ref = get_ref(n)
357
+
358
+ # ref = @by_author_reference_index[n.author_year_index]
359
+ next if ref.nil?
360
+ cols = {
361
+ GenusNameID: n.id ,
362
+ SpeciesNameID: n.properties[:type_species_id],
363
+ Reason: 0 ,
364
+ AuthorityRefID: 0 ,
365
+ FirstFamGrpNameID: 0 ,
366
+ LastUpdate: @time ,
367
+ ModifiedBy: @authorized_user_id ,
368
+ NewID: 0 # What is this?
369
+ }
370
+ sql << sql_insert_statement('tblTypeSpecies', cols)
371
+ end
372
+ end
373
+ sql.join("\n")
374
+ end
375
+
338
376
  def tblGenusNames
339
377
  # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed.
340
378
  # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
@@ -374,78 +412,42 @@ module Taxonifi::Export
374
412
  @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
375
413
  sql = []
376
414
  i = 1
377
- @name_collection.collection.each do |n|
378
- gid, sgid = 0,0
379
- sid = @species_names[n.parent_name_at_rank('species')] || 0
380
- ssid = @species_names[n.parent_name_at_rank('subspecies')] || 0
381
415
 
382
- if n.parens == false
383
- gid = @genus_names[n.parent_name_at_rank('genus')] || 0
384
- sgid = @genus_names[n.parent_name_at_rank('subgenus')] || 0
385
- end
416
+ # Ugh, move build from here
417
+ @name_collection.nomenclators.keys.each do |i|
418
+ name = @name_collection.nomenclators[i]
419
+ genus_id = @genus_names[name[0]]
420
+ genus_id ||= 0
421
+ subgenus_id = @genus_names[name[1]]
422
+ subgenus_id ||= 0
423
+ species_id = @species_names[name[2]]
424
+ species_id ||= 0
425
+ subspecies_id = @species_names[name[3]]
426
+ subspecies_id ||= 0
427
+ variety_id = @species_names[name[4]]
428
+ variety_id ||= 0
386
429
 
387
- next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('subtribe')
388
-
389
- ref = get_ref(n)
390
- # debugger
391
- # ref = @by_author_reference_index[n.author_year_index]
392
-
393
- next if ref.nil?
394
430
  cols = {
395
431
  NomenclatorID: i,
396
- GenusNameID: gid,
397
- SubgenusNameID: sgid,
398
- SpeciesNameID: sid,
399
- SubspeciesNameID: ssid,
400
- InfrasubspeciesNameID: 0,
401
- InfrasubKind: 0, # this might be wrong
432
+ GenusNameID: genus_id,
433
+ SubgenusNameID: subgenus_id,
434
+ SpeciesNameID: species_id,
435
+ SubspeciesNameID: subspecies_id,
436
+ InfrasubspeciesNameID: variety_id,
437
+ InfrasubKind: (variety_id == 0 ? 0 : 2),
402
438
  LastUpdate: @time,
403
439
  ModifiedBy: @authorized_user_id,
404
- SuitableForGenus: 0, # Set in SF
405
- SuitableForSpecies: 0 # Set in SF
440
+ SuitableForGenus: 0, # Set in SF w test
441
+ SuitableForSpecies: 0 # Set in SF w test
406
442
  }
407
- @nomenclator.merge!(n.nomenclator_name => i)
408
443
  i += 1
409
-
410
444
  sql << sql_insert_statement('tblNomenclator', cols)
411
445
  end
412
446
 
413
- # TODO: DRY this up with above?!
414
- @name_collection.combinations.each do |c|
415
- gid, sgid = 0,0
416
- sid = (c[2].nil? ? 0 : @species_names[c[2].name])
417
- ssid = (c[3].nil? ? 0 : @species_names[c[3].name])
418
-
419
- if c.compact.last.parens == false
420
- gid = (c[0].nil? ? 0 : @genus_names[c[0].name])
421
- sgid = (c[1].nil? ? 0 : @genus_names[c[1].name])
422
- end
423
-
424
- # ref = @by_author_reference_index[c.compact.last.author_year_index]
425
- ref = @name_collection.ref_collection.object_from_row(c.compact.last.related[:link_to_ref_from_row])
426
-
427
- next if ref.nil?
428
-
429
- cols = {
430
- NomenclatorID: i,
431
- GenusNameID: gid ,
432
- SubgenusNameID: sgid ,
433
- SpeciesNameID: sid ,
434
- SubspeciesNameID: ssid ,
435
- InfrasubspeciesNameID: 0,
436
- InfrasubKind: 0, # this might be wrong
437
- LastUpdate: @time,
438
- ModifiedBy: @authorized_user_id,
439
- SuitableForGenus: 0, # Set in SF
440
- SuitableForSpecies: 0 # Set in SF
441
- }
442
- # check!?
443
- @nomenclator.merge!(c.compact.last.nomenclator_name => i)
444
- sql << sql_insert_statement('tblNomenclator', cols)
445
- i += 1
446
- end
447
447
  sql.join("\n")
448
448
  end
449
449
 
450
+
451
+
450
452
  end # End class
451
453
  end # End module
data/lib/lumper/clump.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
2
2
 
3
- # A Clump is a "C"ollection of lump derivatives and the relatinoships between these derivatives!
3
+ # A Clump is a "C"ollection of lump derivatives and the relationships between these derivatives!
4
4
  # It's used to define relationships among objects derived, for example, between single rows of data
5
5
  module Taxonifi::Lumper:Clumps
6
6
 
data/lib/lumper/lumper.rb CHANGED
@@ -63,7 +63,7 @@ module Taxonifi::Lumper
63
63
  opts = {
64
64
  :csv => [],
65
65
  :initial_id => 0,
66
- :capture_related_fields => true # Stores other column values in (column_header => value) pairs in Name.related
66
+ :capture_related_fields => true # Stores other column values in (column_header => value) pairs in Name#properties
67
67
  }.merge!(options)
68
68
 
69
69
  csv = opts[:csv]
@@ -82,14 +82,14 @@ module Taxonifi::Lumper
82
82
  # :genus => {'Foo' => [0,2]}
83
83
  # This says that "Foo" is instantiated two times in the
84
84
  # name collection, with id 0, and id 2.
85
- name_index = {}
85
+ name_index = {} # Taxonifi::Lumper::NameIndex.new # {}
86
86
 
87
87
  has_ref_fields = ([:citation_basic, :citation_small] & Taxonifi::Lumper.intersecting_lumps(csv.headers)).size > 0
88
88
  unused_fields = csv.headers - Taxonifi::Lumper::LUMPS[:names]
89
89
 
90
-
91
90
  # First pass, create and index names
92
91
  Taxonifi::Assessor::RowAssessor.rank_headers(csv.headers).each do |rank|
92
+ # name_index.new_rank(rank)
93
93
  name_index[rank] = {}
94
94
  csv.each_with_index do |row, i|
95
95
  shares_rank = (rank == Taxonifi::Assessor::RowAssessor.lump_name_rank(row).to_s)
@@ -99,9 +99,8 @@ module Taxonifi::Lumper
99
99
  n = nil # a Name if necessary
100
100
  name_id = nil # index the new or existing Name
101
101
 
102
+ exists = false
102
103
  if name_index[rank][name] # A matching name (String) has been previously added
103
- exists = false
104
-
105
104
  name_index[rank][name].each do |id|
106
105
  # Compare vectors of parent_ids for name presence
107
106
  if nc.parent_id_vector(id) == row_index[i]
@@ -110,15 +109,12 @@ module Taxonifi::Lumper
110
109
  break
111
110
  end
112
111
  end
113
-
114
- if !exists # name (string) exists, but parents are different, create new name
115
- n = Taxonifi::Model::Name.new()
116
- end
117
-
118
- else # no version of the name exists
119
- n = Taxonifi::Model::Name.new()
120
112
  end # end name exists
121
113
 
114
+ n = Taxonifi::Model::Name.new() if !exists
115
+
116
+ unused_data = row.to_hash.select{|f| unused_fields.include?(f)}
117
+ row_identifier = (row['identifier'] ? row['identifier'] : i)
122
118
 
123
119
  # Populate the new name if created. Previously matched names are not effected.
124
120
  if !n.nil?
@@ -134,13 +130,13 @@ module Taxonifi::Lumper
134
130
  if shares_rank
135
131
  if row['author_year']
136
132
  builder = Taxonifi::Splitter::Builder.build_author_year(row['author_year'])
137
- n.author = builder.people
133
+ n.authors = builder.people # was author!?
138
134
  n.year = builder.year
139
- n.parens = !builder.parens
135
+ n.parens = builder.parens
140
136
  end
141
137
 
142
- n.related.merge!(:link_to_ref_from_row => i) if has_ref_fields
143
- n.related.merge!(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
138
+ n.add_property(:link_to_ref_from_row, i) if has_ref_fields # TODO: update this
139
+ n.add_properties(unused_data) if opts[:capture_related_fields]
144
140
  end
145
141
 
146
142
  name_id = nc.add_object(n).id
@@ -150,6 +146,14 @@ module Taxonifi::Lumper
150
146
  $DEBUG && $stderr.puts("added #{nc.collection.size - 1} | #{n.name} | #{n.rank} | #{n.parent ? n.parent.name : '-'} | #{n.parent ? n.parent.id : '-'}")
151
147
  else
152
148
  $DEBUG && $stderr.puts("already present #{rank} | #{name}")
149
+ if shares_rank
150
+ # original::
151
+ nc.add_duplicate_entry_metadata(name_id, row_identifier, unused_data)
152
+
153
+ # hack
154
+ # nc.add_duplicate_entry_metadata(name_id, row_identifier, row.to_hash)
155
+
156
+ end
153
157
  end
154
158
 
155
159
  # build a by row vector of parent child relationships
@@ -166,7 +170,7 @@ module Taxonifi::Lumper
166
170
  opts = {
167
171
  :csv => nil,
168
172
  :inital_id => 1,
169
- :capture_related_fields => true # Stores other column values in (column_header => value) pairs in Ref.related
173
+ :capture_related_fields => true # Stores other column values in (column_header => value) pairs in Ref#related
170
174
  }.merge!(options)
171
175
  csv = opts[:csv]
172
176
 
@@ -219,7 +223,7 @@ module Taxonifi::Lumper
219
223
  end
220
224
  end
221
225
 
222
- r.related.merge!(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
226
+ r.add_properties(row.to_hash.select{|f| unused_fields.include?(f)}) if opts[:capture_related_fields]
223
227
 
224
228
  # Do some indexing.
225
229
  ref_str = r.compact_string
@@ -56,7 +56,7 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
56
56
  n.rank = rank
57
57
  n.name = name
58
58
  n.row_number = i
59
- n.related.merge!(:external_id => external_id)
59
+ n.add_property(:external_id, external_id)
60
60
 
61
61
  if parent = external_index[parent_id]
62
62
  n.parent = parent
@@ -134,7 +134,6 @@ module Taxonifi::Lumper::Lumps::ParentChildNameCollection
134
134
  # validation in general, something to look at, for now, throw up our hands and move on.
135
135
  return last_id if (real_genus.nil? || real_species.nil?)
136
136
 
137
- # debugger if real_genus.id == 399
138
137
  real_subgenus = nil # revisit
139
138
  real_subspecies = nc.object_by_id(nc.name_exists?(tmp_subspecies)) if !tmp_subspecies.nil?
140
139
 
@@ -0,0 +1,21 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '../taxonifi'))
2
+
3
+ module Taxonifi::Lumper
4
+ class NameIndex
5
+ attr_accessor :index
6
+
7
+ def initialize
8
+ @index = {}
9
+ end
10
+
11
+ def new_rank(rank)
12
+ @index[rank] = {}
13
+ end
14
+
15
+ def name_exists_at_rank?(name, rank)
16
+ name_index[rank] && name_index[rank][name]
17
+ end
18
+
19
+ end
20
+ end
21
+
@@ -1,4 +1,4 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), "../models/base.rb"))
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "../model/base.rb"))
2
2
 
3
3
  module Taxonifi
4
4
  module Model
@@ -28,7 +28,7 @@ module Taxonifi
28
28
  def compact_index
29
29
  index = [@year]
30
30
  @people.each do |a|
31
- index.push a.compact_string
31
+ index.push(a.compact_string)
32
32
  end
33
33
  index.join("-")
34
34
  end