taxonifi 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +24 -7
  3. data/README.rdoc +5 -6
  4. data/Rakefile +1 -1
  5. data/VERSION +1 -1
  6. data/lib/assessor/row_assessor.rb +25 -18
  7. data/lib/export/format/base.rb +96 -1
  8. data/lib/export/format/obo_nomenclature.rb +71 -0
  9. data/lib/export/format/prolog.rb +59 -0
  10. data/lib/export/format/species_file.rb +303 -193
  11. data/lib/lumper/clump.rb +112 -0
  12. data/lib/lumper/lumper.rb +71 -45
  13. data/lib/lumper/lumps/parent_child_name_collection.rb +79 -15
  14. data/lib/models/author_year.rb +1 -2
  15. data/lib/models/base.rb +56 -51
  16. data/lib/models/collection.rb +16 -1
  17. data/lib/models/name.rb +56 -15
  18. data/lib/models/name_collection.rb +70 -19
  19. data/lib/models/ref.rb +17 -0
  20. data/lib/models/ref_collection.rb +2 -1
  21. data/lib/models/shared_class_methods.rb +29 -0
  22. data/lib/models/species_name.rb +14 -12
  23. data/lib/splitter/parser.rb +1 -2
  24. data/lib/splitter/tokens.rb +1 -1
  25. data/lib/taxonifi.rb +12 -0
  26. data/lib/utils/array.rb +17 -0
  27. data/lib/utils/hash.rb +17 -0
  28. data/taxonifi.gemspec +116 -0
  29. data/test/file_fixtures/Fossil.csv +11 -0
  30. data/test/file_fixtures/Lygaeoidea.csv +1 -1
  31. data/test/file_fixtures/names.csv +1 -0
  32. data/test/helper.rb +14 -0
  33. data/test/test_export_prolog.rb +14 -0
  34. data/test/test_exporter.rb +23 -0
  35. data/test/test_lumper_clump.rb +75 -0
  36. data/test/test_lumper_names.rb +67 -9
  37. data/test/test_lumper_parent_child_name_collection.rb +47 -3
  38. data/test/test_lumper_refs.rb +22 -7
  39. data/test/test_obo_nomenclature.rb +14 -0
  40. data/test/test_parser.rb +4 -2
  41. data/test/test_splitter_tokens.rb +9 -0
  42. data/test/test_taxonifi_accessor.rb +21 -15
  43. data/test/test_taxonifi_base.rb +25 -0
  44. data/test/test_taxonifi_name.rb +41 -4
  45. data/test/test_taxonifi_name_collection.rb +54 -17
  46. data/test/test_taxonifi_species_name.rb +1 -1
  47. metadata +34 -5
@@ -29,7 +29,7 @@ module Taxonifi::Export
29
29
  'superfamily group' => 44,
30
30
  'subinfraordinal group' => 45,
31
31
  'infraorder' => 46,
32
- 'suborder' => 8,
32
+ 'suborder' => 48,
33
33
  'order' => 50,
34
34
  'mirorder' => 51,
35
35
  'superorder' => 52,
@@ -54,288 +54,398 @@ module Taxonifi::Export
54
54
 
55
55
  attr_accessor :name_collection
56
56
  attr_accessor :ref_collection
57
+ attr_accessor :pub_collection
57
58
  attr_accessor :author_index
58
59
  attr_accessor :genus_names, :species_names, :nomenclator
59
60
  attr_accessor :authorized_user_id, :time
60
-
61
- # MANIFEST order is important
62
- MANIFEST = %w{tblTaxa tblRefs tblPeople tblRefAuthors tblGenusNames tblSpeciesNames tblNomenclator tblCites}
61
+ attr_accessor :starting_ref_id
63
62
 
64
63
  def initialize(options = {})
65
64
  opts = {
66
65
  :nc => Taxonifi::Model::NameCollection.new,
67
66
  :export_folder => 'species_file',
68
- :authorized_user_id => nil
67
+ :authorized_user_id => nil,
68
+ :starting_ref_id => 1, # should be configured elsewhere... but
69
+ :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites}
69
70
  }.merge!(options)
70
71
 
72
+ @manifest = opts[:manifest]
73
+
71
74
  super(opts)
72
75
  raise Taxonifi::Export::ExportError, 'NameCollection not passed to SpeciesFile export.' if ! opts[:nc].class == Taxonifi::Model::NameCollection
73
76
  raise Taxonifi::Export::ExportError, 'You must provide authorized_user_id for species_file export initialization.' if opts[:authorized_user_id].nil?
74
77
  @name_collection = opts[:nc]
78
+ @pub_collection = {} # title => id
75
79
  @authorized_user_id = opts[:authorized_user_id]
76
80
  @author_index = {}
77
-
78
- #
81
+ @starting_ref_id = opts[:starting_ref_id]
82
+
79
83
  # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
80
84
  # because it's all internal. There will is a strong potential for key collisions if this pipeline is modified to
81
85
  # include references external to the initialized name_collection. See also export_references.
82
86
  #
83
- @by_author_reference_index = {}
87
+ # @by_author_reference_index = {}
84
88
  @genus_names = {}
85
89
  @species_names = {}
86
90
  @nomenclator = {}
91
+
87
92
  @time = Time.now.strftime("%F %T")
93
+ @empty_quotes = ""
88
94
  end
89
95
 
90
- # Export only the ref_collection. Sidesteps the main name-centric exports
91
- # Note that this still uses the base @name_collection object as a starting reference,
92
- # it just references @name_collection.ref_collection. So you can do:
93
- # nc = Taxonifi::Model::NameCollection.new
94
- # nc.ref_collection = Taxonifi::Model::RefCollection.new
95
- # etc.
96
- def export_references(options = {})
97
- opts = {
98
- :starting_ref_id => 0,
99
- :starting_author_id => 0
100
- }
101
-
102
- configure_folders
103
- build_author_index
104
-
105
- # order matters
106
- ['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
107
- write_file(t, send(t))
108
- end
109
- end
110
-
111
96
  # Assumes names that are the same are the same person.
112
97
  def build_author_index
113
98
  @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
114
99
  end
115
-
100
+
116
101
  def export()
117
102
  super
118
- @name_collection.generate_ref_collection(1)
103
+ # You must have
104
+ # how to create and link the reference IDs.
105
+
106
+ # Reference related approaches:
107
+ #
108
+ # @name_collection.generate_ref_collection(1)
109
+ # Give authors unique ids:
110
+ # @name_collection.ref_collection.uniquify_authors(1)
119
111
 
120
- # Give authors unique ids
121
- @name_collection.ref_collection.uniquify_authors(1)
122
- build_author_index
112
+ if @name_collection.ref_collection
113
+ build_author_index
114
+ end
115
+
116
+ # raise Taxonifi::Export::ExportError, 'NameCollection has no RefCollection, you might try @name_collection.generate_ref_collection(1), or alter the manifest: hash.' if ! @name_collection.ref_collection.nil?
123
117
 
124
118
  # See notes in #initalize re potential key collisions!
125
- @by_author_reference_index = @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
126
-
119
+ # @by_author_reference_index = @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
120
+
127
121
  @name_collection.names_at_rank('genus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
128
122
  @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
129
123
  @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
130
124
  @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
131
125
 
132
- MANIFEST.each do |f|
133
- write_file(f, send(f))
126
+ str = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
127
+ @manifest.each do |f|
128
+ str << send(f)
129
+ end
130
+ str << ['COMMIT', 'END TRY', 'BEGIN CATCH',
131
+ 'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;',
132
+ 'ROLLBACK', 'END CATCH']
133
+ write_file('everything.sql', str.join("\n\n"))
134
+ true
135
+ end
136
+
137
+ # Deprecated!
138
+ # Export only the ref_collection. Sidesteps the main name-centric exports
139
+ # Note that this still uses the base @name_collection object as a starting reference,
140
+ # it just references @name_collection.ref_collection. So you can do:
141
+ # nc = Taxonifi::Model::NameCollection.new
142
+ # nc.ref_collection = Taxonifi::Model::RefCollection.new
143
+ # etc.
144
+ def export_references(options = {})
145
+ raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest: to achieve a similar result.'
146
+ #opts = {
147
+ # :starting_ref_id => 0,
148
+ # :starting_author_id => 0
149
+ #}
150
+
151
+ #configure_folders
152
+ #build_author_index
153
+
154
+ ## order matters
155
+ #['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
156
+ # write_file(t, send(t))
157
+ #end
158
+ end
159
+
160
+ # Get's the reference for a name as referenced
161
+ # by .related[:link_to_ref_from_row]
162
+ def get_ref(name)
163
+ if not name.related[:link_to_ref_from_row].nil?
164
+ return @name_collection.ref_collection.object_from_row(name.related[:link_to_ref_from_row])
134
165
  end
166
+ nil
135
167
  end
136
168
 
137
169
  def tblTaxa
138
170
  @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
139
- @csv_string = CSV.generate() do |csv|
140
- csv << @headers
141
- @name_collection.collection.each do |n|
142
- ref = @by_author_reference_index[n.author_year_index]
143
- cols = {
144
- TaxonNameID: n.id,
145
- TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1
146
- RankID: SPECIES_FILE_RANKS[n.rank],
147
- Name: n.name,
148
- Parens: (n.parens ? 1 : 0),
149
- AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id), # !! SF folks like to pre-populate with zeros
150
- RefID: (ref ? ref.id : 0),
151
- DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
152
- AccessCode: 0,
153
- NameStatus: (n.related_name.nil? ? 0 : 7), # 0 :valid, 7: synonym)
154
- StatusFlags: (n.related_name.nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym
155
- OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0), # SF must be pre-configured with 0 filler (this restriction needs to go)
156
- LastUpdate: @time,
157
- ModifiedBy: @authorized_user_id,
158
- }
159
- csv << @headers.collect{|h| cols[h.to_sym]}
160
- end
171
+ sql = []
172
+ @name_collection.collection.each do |n|
173
+ $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
174
+
175
+ ref = get_ref(n)
176
+ cols = {
177
+ TaxonNameID: n.id,
178
+ TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1
179
+ RankID: SPECIES_FILE_RANKS[n.rank],
180
+ Name: n.name,
181
+ Parens: (n.parens ? 1 : 0),
182
+ AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id), # !! SF folks like to pre-populate with zeros
183
+ RefID: (ref ? ref.id : 0),
184
+ DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
185
+ AccessCode: 0,
186
+ NameStatus: (n.related_name.nil? ? 0 : 7), # 0 :valid, 7: synonym)
187
+ StatusFlags: (n.related_name.nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym
188
+ OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0), # SF must be pre-configured with 0 filler (this restriction needs to go)
189
+ LastUpdate: @time,
190
+ ModifiedBy: @authorized_user_id,
191
+ }
192
+ sql << sql_insert_statement('tblTaxa', cols)
161
193
  end
162
- @csv_string
194
+ sql.join("\n")
163
195
  end
164
196
 
165
197
  # Generate a tblRefs string.
166
198
  def tblRefs
167
- @headers = %w{RefID ActualYear Title PubID Verbatim}
168
- @csv_string = CSV.generate(:col_sep => "\t") do |csv|
169
- csv << @headers
170
- @name_collection.ref_collection.collection.each_with_index do |r,i|
171
- cols = {
172
- RefID: r.id, # i + 1,
173
- Title: (r.title.nil? ? """""" : r.title),
174
- PubID: 0, # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
175
- ActualYear: r.year,
176
- Verbatim: r.full_citation
177
- }
178
- csv << @headers.collect{|h| cols[h.to_sym]}
179
- end
199
+ sql = []
200
+ @headers = %w{RefID ActualYear Title PubID Verbatim}
201
+ @name_collection.ref_collection.collection.each_with_index do |r,i|
202
+ # Assumes the 0 "null" pub id is there
203
+ pub_id = @pub_collection[r.publication] ? @pub_collection[r.publication] : 0
204
+
205
+ cols = {
206
+ RefID: r.id,
207
+ ContainingRefID: 0,
208
+ Title: (r.title.nil? ? @empty_quotes : r.title),
209
+ PubID: pub_id,
210
+ Series: @empty_quotes,
211
+ Volume: (r.volume ? r.volume : @empty_quotes),
212
+ Issue: (r.number ? r.number : @empty_quotes),
213
+ RefPages: r.page_string, # always a string
214
+ ActualYear: (r.year ? r.year : @empty_quotes),
215
+ StatedYear: @empty_quotes,
216
+ AccessCode: 0,
217
+ Flags: 0,
218
+ Note: @empty_quotes,
219
+ LastUpdate: @time,
220
+ LinkID: 0,
221
+ ModifiedBy: @authorized_user_id,
222
+ CiteDataStatus: 0,
223
+ Verbatim: (r.full_citation ? r.full_citation : @empty_quotes)
224
+ }
225
+ sql << sql_insert_statement('tblRefs', cols)
180
226
  end
181
- @csv_string
227
+ sql.join("\n")
182
228
  end
183
229
 
184
- # TODO make a standard transaction wrapper
185
- def sqlRefs
186
- sql = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
187
- @headers = %w{RefID ActualYear Title PubID Verbatim}
188
- @name_collection.ref_collection.collection.each_with_index do |r,i|
230
+ # Generate tblPubs SQL
231
+ def tblPubs
232
+ sql = []
233
+ @headers = %w{PubID PrefID PubType ShortName FullName Note LastUpdate ModifiedBy Publisher PlacePublished PubRegID Status StartYear EndYear BHL}
234
+
235
+ # Hackish should build this elsewhere, but degrades OK
236
+ pubs = @name_collection.ref_collection.collection.collect{|r| r.publication}.compact.uniq
237
+
238
+ pubs.each_with_index do |p, i|
189
239
  cols = {
190
- RefID: r.id, # i + 1,
191
- Title: (r.title.nil? ? """""" : r.title),
192
- PubID: 0, # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
193
- ActualYear: r.year,
194
- Verbatim: r.full_citation
240
+ PubID: i + 1,
241
+ PrefID: 0,
242
+ PubType: 1,
243
+ ShortName: "unknown_#{i}", # Unique constraint
244
+ FullName: p,
245
+ Note: @empty_quotes,
246
+ LastUpdate: @time,
247
+ ModifiedBy: @authorized_user_id,
248
+ Publisher: @empty_quotes,
249
+ PlacePublished: @empty_quotes,
250
+ PubRegID: 0,
251
+ Status: 0,
252
+ StartYear: 0,
253
+ EndYear: 0,
254
+ BHL: 0
195
255
  }
196
- sql << "INSERT INTO tblRefs (#{@headers.sort.join(",")}) VALUES (#{@headers.sort.collect{|h| "'#{cols[h.to_sym].to_s.gsub(/'/,"''")}'"}.join(",")});"
256
+ @pub_collection.merge!(p => i + 1)
257
+ sql << sql_insert_statement('tblPubs', cols)
197
258
  end
198
- sql << ['COMMIT', 'END TRY', 'BEGIN CATCH', 'ROLLBACK', 'END CATCH']
199
- sql.join("\n")
259
+ sql.join("\n")
200
260
  end
201
261
 
202
262
  # Generate tblPeople string.
203
263
  def tblPeople
204
264
  @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
205
- @csv_string = CSV.generate() do |csv|
206
- csv << @headers
207
- @author_index.keys.each_with_index do |k,i|
208
- a = @author_index[k]
209
- # a.id = i + 1
210
- cols = {
211
- PersonID: a.id,
212
- FamilyName: a.last_name,
213
- GivenName: a.first_name,
214
- GivenInitials: a.initials_string,
215
- Suffix: a.suffix,
216
- Role: 1, # authors
217
- LastUpdate: @time,
218
- ModifiedBy: @authorized_user_id
219
- }
220
- csv << @headers.collect{|h| cols[h.to_sym]}
221
- end
265
+ sql = []
266
+ @author_index.keys.each_with_index do |k,i|
267
+ a = @author_index[k]
268
+ # a.id = i + 1
269
+ cols = {
270
+ PersonID: a.id,
271
+ FamilyName: (a.last_name.length > 0 ? a.last_name : "Unknown"),
272
+ GivenNames: a.first_name || @empty_quotes,
273
+ GivenInitials: a.initials_string || @empty_quotes,
274
+ Suffix: a.suffix || @empty_quotes,
275
+ Role: 1, # authors
276
+ LastUpdate: @time,
277
+ ModifiedBy: @authorized_user_id
278
+ }
279
+ sql << sql_insert_statement('tblPeople', cols)
222
280
  end
223
- @csv_string
281
+ sql.join("\n")
224
282
  end
225
283
 
226
284
  # Generate tblRefAuthors string.
227
285
  def tblRefAuthors
228
286
  @headers = %w{RefID PersonID SeqNum AuthorCount LastUpdate ModifiedBy}
229
- @csv_string = CSV.generate() do |csv|
230
- csv << @headers
231
- @name_collection.ref_collection.collection.each do |r|
232
- r.authors.each_with_index do |x, i|
233
- a = @author_index[x.compact_string]
234
- cols = {
235
- RefID: r.id,
236
- PersonID: a.id,
237
- SeqNum: i + 1,
238
- AuthorCount: r.authors.size,
239
- LastUpdate: @time,
240
- ModifiedBy: @authorized_user_id
241
- }
242
- csv << @headers.collect{|h| cols[h.to_sym]}
243
- end
287
+ sql = []
288
+ @name_collection.ref_collection.collection.each do |r|
289
+ r.authors.each_with_index do |x, i|
290
+ a = @author_index[x.compact_string]
291
+ cols = {
292
+ RefID: r.id,
293
+ PersonID: a.id,
294
+ SeqNum: i + 1,
295
+ AuthorCount: r.authors.size,
296
+ LastUpdate: @time,
297
+ ModifiedBy: @authorized_user_id
298
+ }
299
+ sql << sql_insert_statement('tblRefAuthors', cols)
244
300
  end
245
301
  end
246
- @csv_string
302
+ sql.join("\n")
247
303
  end
248
304
 
249
305
  # Generate tblCites string.
250
306
  def tblCites
251
307
  @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
252
- @csv_string = CSV.generate() do |csv|
253
- csv << @headers
254
- @name_collection.collection.each do |n|
255
- ref = @by_author_reference_index[n.author_year_index]
256
- next if ref.nil?
257
- cols = {
258
- TaxonNameID: n.id,
259
- SeqNum: 1,
260
- RefID: ref.id,
261
- NomenclatorID: @nomenclator[n.nomenclator_name],
262
- LastUpdate: @time,
263
- ModifiedBy: @authorized_user_id,
264
- CitePages: """""", # equates to "" in CSV speak
265
- NewNameStatus: 0,
266
- Note: """""",
267
- TypeClarification: 0, # We might derive more data from this
268
- CurrentConcept: 1, # Boolean, right?
269
- ConceptChange: 0, # Unspecified
270
- InfoFlags: 0, #
271
- InfoFlagStatus: 1, # 1 => needs review
272
- PolynomialStatus: 0
273
- }
274
- csv << @headers.collect{|h| cols[h.to_sym]}
275
- end
308
+ sql = []
309
+
310
+ @name_collection.collection.each do |n|
311
+ next if @nomenclator[n.nomenclator_name].nil? # Only create nomenclator records if they are original citations, otherwise not !! Might need updating in future imports
312
+ ref = get_ref(n)
313
+
314
+ # ref = @by_author_reference_index[n.author_year_index]
315
+ next if ref.nil?
316
+ cols = {
317
+ TaxonNameID: n.id,
318
+ SeqNum: 1,
319
+ RefID: ref.id,
320
+ NomenclatorID: @nomenclator[n.nomenclator_name],
321
+ LastUpdate: @time,
322
+ ModifiedBy: @authorized_user_id,
323
+ CitePages: @empty_quotes, # equates to "" in CSV speak
324
+ NewNameStatus: 0,
325
+ Note: @empty_quotes,
326
+ TypeClarification: 0, # We might derive more data from this
327
+ CurrentConcept: 1, # Boolean, right?
328
+ ConceptChange: 0, # Unspecified
329
+ InfoFlags: 0, #
330
+ InfoFlagStatus: 1, # 1 => needs review
331
+ PolynomialStatus: 0
332
+ }
333
+ sql << sql_insert_statement('tblCites', cols)
276
334
  end
277
- @csv_string
335
+ sql.join("\n")
278
336
  end
279
337
 
280
338
  def tblGenusNames
281
- @csv_string = csv_for_genus_and_species_names_tables('Genus')
282
- @csv_string
339
+ # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed.
340
+ # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
341
+ sql = sql_for_genus_and_species_names_tables('Genus')
342
+ sql
283
343
  end
284
344
 
285
345
  def tblSpeciesNames
286
- @csv_string = csv_for_genus_and_species_names_tables('Species')
287
- @csv_string
346
+ # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed.
347
+ # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
348
+ sql = sql_for_genus_and_species_names_tables('Species')
349
+ sql
288
350
  end
289
351
 
290
- def csv_for_genus_and_species_names_tables(type)
352
+ def sql_for_genus_and_species_names_tables(type)
353
+ sql = []
291
354
  col = "#{type}NameID"
292
355
  @headers = [col, "Name", "LastUpdate", "ModifiedBy", "Italicize"]
293
- @csv_string = CSV.generate() do |csv|
294
- csv << @headers
295
- var = self.send("#{type.downcase}_names")
296
- var.keys.each_with_index do |n,i|
297
- var[n] = i + 1
298
- cols = {
299
- col.to_sym => i + 1,
300
- Name: n,
301
- LastUpdate: @time,
302
- ModifiedBy: @authorized_user_id,
303
- Italicize: 1 # always true for these data
304
- }
305
- csv << @headers.collect{|h| cols[h.to_sym]}
306
- end
356
+ var = self.send("#{type.downcase}_names")
357
+ var.keys.each_with_index do |n,i|
358
+ var[n] = i + 1
359
+ cols = {
360
+ col.to_sym => i + 1,
361
+ Name: n,
362
+ LastUpdate: @time,
363
+ ModifiedBy: @authorized_user_id,
364
+ Italicize: 1 # always true for these data
365
+ }
366
+ sql << sql_insert_statement("tbl#{type}Names", cols)
307
367
  end
308
- @csv_string
368
+ sql.join("\n")
309
369
  end
310
370
 
311
- # must be called post tblGenusNames and tblSpeciesNames
371
+ # Must be called post tblGenusNames and tblSpeciesNames.
372
+ # Some records are not used but can be cleaned by SF
312
373
  def tblNomenclator
313
374
  @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
314
- @csv_string = CSV.generate() do |csv|
315
- csv << @headers
316
- i = 1
317
- @name_collection.collection.each do |n|
318
- next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('genus')
319
- cols = {
320
- NomenclatorID: i,
321
- GenusNameID: @genus_names[n.parent_name_at_rank('genus')] || 0,
322
- SubgenusNameID: @genus_names[n.parent_name_at_rank('subgenus')] || 0,
323
- SpeciesNameID: @species_names[n.parent_name_at_rank('species')] || 0,
324
- SubspeciesNameID: @species_names[n.parent_name_at_rank('subspecies')] || 0,
325
- InfrasubspeciesNameID: 0,
326
- InfrasubKind: 0, # this might be wrong
327
- LastUpdate: @time,
328
- ModifiedBy: @authorized_user_id,
329
- SuitableForGenus: 0, # Set in SF
330
- SuitableForSpecies: 0 # Set in SF
331
- }
332
- @nomenclator.merge!(n.nomenclator_name => i)
333
- i += 1
334
- csv << @headers.collect{|h| cols[h.to_sym]}
335
- end
375
+ sql = []
376
+ i = 1
377
+ @name_collection.collection.each do |n|
378
+ gid, sgid = 0,0
379
+ sid = @species_names[n.parent_name_at_rank('species')] || 0
380
+ ssid = @species_names[n.parent_name_at_rank('subspecies')] || 0
381
+
382
+ if n.parens == false
383
+ gid = @genus_names[n.parent_name_at_rank('genus')] || 0
384
+ sgid = @genus_names[n.parent_name_at_rank('subgenus')] || 0
385
+ end
386
+
387
+ next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('subtribe')
388
+
389
+ ref = get_ref(n)
390
+ # debugger
391
+ # ref = @by_author_reference_index[n.author_year_index]
392
+
393
+ next if ref.nil?
394
+ cols = {
395
+ NomenclatorID: i,
396
+ GenusNameID: gid,
397
+ SubgenusNameID: sgid,
398
+ SpeciesNameID: sid,
399
+ SubspeciesNameID: ssid,
400
+ InfrasubspeciesNameID: 0,
401
+ InfrasubKind: 0, # this might be wrong
402
+ LastUpdate: @time,
403
+ ModifiedBy: @authorized_user_id,
404
+ SuitableForGenus: 0, # Set in SF
405
+ SuitableForSpecies: 0 # Set in SF
406
+ }
407
+ @nomenclator.merge!(n.nomenclator_name => i)
408
+ i += 1
409
+
410
+ sql << sql_insert_statement('tblNomenclator', cols)
411
+ end
412
+
413
+ # TODO: DRY this up with above?!
414
+ @name_collection.combinations.each do |c|
415
+ gid, sgid = 0,0
416
+ sid = (c[2].nil? ? 0 : @species_names[c[2].name])
417
+ ssid = (c[3].nil? ? 0 : @species_names[c[3].name])
418
+
419
+ if c.compact.last.parens == false
420
+ gid = (c[0].nil? ? 0 : @genus_names[c[0].name])
421
+ sgid = (c[1].nil? ? 0 : @genus_names[c[1].name])
422
+ end
423
+
424
+ # ref = @by_author_reference_index[c.compact.last.author_year_index]
425
+ ref = @name_collection.ref_collection.object_from_row(c.compact.last.related[:link_to_ref_from_row])
426
+
427
+ next if ref.nil?
428
+
429
+ cols = {
430
+ NomenclatorID: i,
431
+ GenusNameID: gid ,
432
+ SubgenusNameID: sgid ,
433
+ SpeciesNameID: sid ,
434
+ SubspeciesNameID: ssid ,
435
+ InfrasubspeciesNameID: 0,
436
+ InfrasubKind: 0, # this might be wrong
437
+ LastUpdate: @time,
438
+ ModifiedBy: @authorized_user_id,
439
+ SuitableForGenus: 0, # Set in SF
440
+ SuitableForSpecies: 0 # Set in SF
441
+ }
442
+ # check!?
443
+ @nomenclator.merge!(c.compact.last.nomenclator_name => i)
444
+ sql << sql_insert_statement('tblNomenclator', cols)
445
+ i += 1
336
446
  end
337
- @csv_string
447
+ sql.join("\n")
338
448
  end
339
449
 
340
- end
341
- end
450
+ end # End class
451
+ end # End module