taxonifi 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +24 -7
  3. data/README.rdoc +5 -6
  4. data/Rakefile +1 -1
  5. data/VERSION +1 -1
  6. data/lib/assessor/row_assessor.rb +25 -18
  7. data/lib/export/format/base.rb +96 -1
  8. data/lib/export/format/obo_nomenclature.rb +71 -0
  9. data/lib/export/format/prolog.rb +59 -0
  10. data/lib/export/format/species_file.rb +303 -193
  11. data/lib/lumper/clump.rb +112 -0
  12. data/lib/lumper/lumper.rb +71 -45
  13. data/lib/lumper/lumps/parent_child_name_collection.rb +79 -15
  14. data/lib/models/author_year.rb +1 -2
  15. data/lib/models/base.rb +56 -51
  16. data/lib/models/collection.rb +16 -1
  17. data/lib/models/name.rb +56 -15
  18. data/lib/models/name_collection.rb +70 -19
  19. data/lib/models/ref.rb +17 -0
  20. data/lib/models/ref_collection.rb +2 -1
  21. data/lib/models/shared_class_methods.rb +29 -0
  22. data/lib/models/species_name.rb +14 -12
  23. data/lib/splitter/parser.rb +1 -2
  24. data/lib/splitter/tokens.rb +1 -1
  25. data/lib/taxonifi.rb +12 -0
  26. data/lib/utils/array.rb +17 -0
  27. data/lib/utils/hash.rb +17 -0
  28. data/taxonifi.gemspec +116 -0
  29. data/test/file_fixtures/Fossil.csv +11 -0
  30. data/test/file_fixtures/Lygaeoidea.csv +1 -1
  31. data/test/file_fixtures/names.csv +1 -0
  32. data/test/helper.rb +14 -0
  33. data/test/test_export_prolog.rb +14 -0
  34. data/test/test_exporter.rb +23 -0
  35. data/test/test_lumper_clump.rb +75 -0
  36. data/test/test_lumper_names.rb +67 -9
  37. data/test/test_lumper_parent_child_name_collection.rb +47 -3
  38. data/test/test_lumper_refs.rb +22 -7
  39. data/test/test_obo_nomenclature.rb +14 -0
  40. data/test/test_parser.rb +4 -2
  41. data/test/test_splitter_tokens.rb +9 -0
  42. data/test/test_taxonifi_accessor.rb +21 -15
  43. data/test/test_taxonifi_base.rb +25 -0
  44. data/test/test_taxonifi_name.rb +41 -4
  45. data/test/test_taxonifi_name_collection.rb +54 -17
  46. data/test/test_taxonifi_species_name.rb +1 -1
  47. metadata +34 -5
@@ -29,7 +29,7 @@ module Taxonifi::Export
29
29
  'superfamily group' => 44,
30
30
  'subinfraordinal group' => 45,
31
31
  'infraorder' => 46,
32
- 'suborder' => 8,
32
+ 'suborder' => 48,
33
33
  'order' => 50,
34
34
  'mirorder' => 51,
35
35
  'superorder' => 52,
@@ -54,288 +54,398 @@ module Taxonifi::Export
54
54
 
55
55
  attr_accessor :name_collection
56
56
  attr_accessor :ref_collection
57
+ attr_accessor :pub_collection
57
58
  attr_accessor :author_index
58
59
  attr_accessor :genus_names, :species_names, :nomenclator
59
60
  attr_accessor :authorized_user_id, :time
60
-
61
- # MANIFEST order is important
62
- MANIFEST = %w{tblTaxa tblRefs tblPeople tblRefAuthors tblGenusNames tblSpeciesNames tblNomenclator tblCites}
61
+ attr_accessor :starting_ref_id
63
62
 
64
63
  def initialize(options = {})
65
64
  opts = {
66
65
  :nc => Taxonifi::Model::NameCollection.new,
67
66
  :export_folder => 'species_file',
68
- :authorized_user_id => nil
67
+ :authorized_user_id => nil,
68
+ :starting_ref_id => 1, # should be configured elsewhere... but
69
+ :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites}
69
70
  }.merge!(options)
70
71
 
72
+ @manifest = opts[:manifest]
73
+
71
74
  super(opts)
72
75
  raise Taxonifi::Export::ExportError, 'NameCollection not passed to SpeciesFile export.' if ! opts[:nc].class == Taxonifi::Model::NameCollection
73
76
  raise Taxonifi::Export::ExportError, 'You must provide authorized_user_id for species_file export initialization.' if opts[:authorized_user_id].nil?
74
77
  @name_collection = opts[:nc]
78
+ @pub_collection = {} # title => id
75
79
  @authorized_user_id = opts[:authorized_user_id]
76
80
  @author_index = {}
77
-
78
- #
81
+ @starting_ref_id = opts[:starting_ref_id]
82
+
79
83
  # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works"
80
84
  # because it's all internal. There will is a strong potential for key collisions if this pipeline is modified to
81
85
  # include references external to the initialized name_collection. See also export_references.
82
86
  #
83
- @by_author_reference_index = {}
87
+ # @by_author_reference_index = {}
84
88
  @genus_names = {}
85
89
  @species_names = {}
86
90
  @nomenclator = {}
91
+
87
92
  @time = Time.now.strftime("%F %T")
93
+ @empty_quotes = ""
88
94
  end
89
95
 
90
- # Export only the ref_collection. Sidesteps the main name-centric exports
91
- # Note that this still uses the base @name_collection object as a starting reference,
92
- # it just references @name_collection.ref_collection. So you can do:
93
- # nc = Taxonifi::Model::NameCollection.new
94
- # nc.ref_collection = Taxonifi::Model::RefCollection.new
95
- # etc.
96
- def export_references(options = {})
97
- opts = {
98
- :starting_ref_id => 0,
99
- :starting_author_id => 0
100
- }
101
-
102
- configure_folders
103
- build_author_index
104
-
105
- # order matters
106
- ['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
107
- write_file(t, send(t))
108
- end
109
- end
110
-
111
96
  # Assumes names that are the same are the same person.
112
97
  def build_author_index
113
98
  @author_index = @name_collection.ref_collection.unique_authors.inject({}){|hsh, a| hsh.merge!(a.compact_string => a)}
114
99
  end
115
-
100
+
116
101
  def export()
117
102
  super
118
- @name_collection.generate_ref_collection(1)
103
+ # You must have
104
+ # how to create and link the reference IDs.
105
+
106
+ # Reference related approaches:
107
+ #
108
+ # @name_collection.generate_ref_collection(1)
109
+ # Give authors unique ids:
110
+ # @name_collection.ref_collection.uniquify_authors(1)
119
111
 
120
- # Give authors unique ids
121
- @name_collection.ref_collection.uniquify_authors(1)
122
- build_author_index
112
+ if @name_collection.ref_collection
113
+ build_author_index
114
+ end
115
+
116
+ # raise Taxonifi::Export::ExportError, 'NameCollection has no RefCollection, you might try @name_collection.generate_ref_collection(1), or alter the manifest: hash.' if ! @name_collection.ref_collection.nil?
123
117
 
124
118
  # See notes in #initalize re potential key collisions!
125
- @by_author_reference_index = @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
126
-
119
+ # @by_author_reference_index = @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)}
120
+
127
121
  @name_collection.names_at_rank('genus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
128
122
  @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)}
129
123
  @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
130
124
  @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)}
131
125
 
132
- MANIFEST.each do |f|
133
- write_file(f, send(f))
126
+ str = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
127
+ @manifest.each do |f|
128
+ str << send(f)
129
+ end
130
+ str << ['COMMIT', 'END TRY', 'BEGIN CATCH',
131
+ 'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;',
132
+ 'ROLLBACK', 'END CATCH']
133
+ write_file('everything.sql', str.join("\n\n"))
134
+ true
135
+ end
136
+
137
+ # Deprecated!
138
+ # Export only the ref_collection. Sidesteps the main name-centric exports
139
+ # Note that this still uses the base @name_collection object as a starting reference,
140
+ # it just references @name_collection.ref_collection. So you can do:
141
+ # nc = Taxonifi::Model::NameCollection.new
142
+ # nc.ref_collection = Taxonifi::Model::RefCollection.new
143
+ # etc.
144
+ def export_references(options = {})
145
+ raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest: to achieve a similar result.'
146
+ #opts = {
147
+ # :starting_ref_id => 0,
148
+ # :starting_author_id => 0
149
+ #}
150
+
151
+ #configure_folders
152
+ #build_author_index
153
+
154
+ ## order matters
155
+ #['tblPeople', 'tblRefs', 'tblRefAuthors', 'sqlRefs' ].each do |t|
156
+ # write_file(t, send(t))
157
+ #end
158
+ end
159
+
160
+ # Get's the reference for a name as referenced
161
+ # by .related[:link_to_ref_from_row]
162
+ def get_ref(name)
163
+ if not name.related[:link_to_ref_from_row].nil?
164
+ return @name_collection.ref_collection.object_from_row(name.related[:link_to_ref_from_row])
134
165
  end
166
+ nil
135
167
  end
136
168
 
137
169
  def tblTaxa
138
170
  @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy}
139
- @csv_string = CSV.generate() do |csv|
140
- csv << @headers
141
- @name_collection.collection.each do |n|
142
- ref = @by_author_reference_index[n.author_year_index]
143
- cols = {
144
- TaxonNameID: n.id,
145
- TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1
146
- RankID: SPECIES_FILE_RANKS[n.rank],
147
- Name: n.name,
148
- Parens: (n.parens ? 1 : 0),
149
- AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id), # !! SF folks like to pre-populate with zeros
150
- RefID: (ref ? ref.id : 0),
151
- DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
152
- AccessCode: 0,
153
- NameStatus: (n.related_name.nil? ? 0 : 7), # 0 :valid, 7: synonym)
154
- StatusFlags: (n.related_name.nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym
155
- OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0), # SF must be pre-configured with 0 filler (this restriction needs to go)
156
- LastUpdate: @time,
157
- ModifiedBy: @authorized_user_id,
158
- }
159
- csv << @headers.collect{|h| cols[h.to_sym]}
160
- end
171
+ sql = []
172
+ @name_collection.collection.each do |n|
173
+ $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30
174
+
175
+ ref = get_ref(n)
176
+ cols = {
177
+ TaxonNameID: n.id,
178
+ TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1
179
+ RankID: SPECIES_FILE_RANKS[n.rank],
180
+ Name: n.name,
181
+ Parens: (n.parens ? 1 : 0),
182
+ AboveID: (n.related_name.nil? ? (n.parent ? n.parent.id : 0) : n.related_name.id), # !! SF folks like to pre-populate with zeros
183
+ RefID: (ref ? ref.id : 0),
184
+ DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero
185
+ AccessCode: 0,
186
+ NameStatus: (n.related_name.nil? ? 0 : 7), # 0 :valid, 7: synonym)
187
+ StatusFlags: (n.related_name.nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym
188
+ OriginalGenusID: (!n.parens && n.parent_at_rank('genus') ? n.parent_at_rank('genus').id : 0), # SF must be pre-configured with 0 filler (this restriction needs to go)
189
+ LastUpdate: @time,
190
+ ModifiedBy: @authorized_user_id,
191
+ }
192
+ sql << sql_insert_statement('tblTaxa', cols)
161
193
  end
162
- @csv_string
194
+ sql.join("\n")
163
195
  end
164
196
 
165
197
  # Generate a tblRefs string.
166
198
  def tblRefs
167
- @headers = %w{RefID ActualYear Title PubID Verbatim}
168
- @csv_string = CSV.generate(:col_sep => "\t") do |csv|
169
- csv << @headers
170
- @name_collection.ref_collection.collection.each_with_index do |r,i|
171
- cols = {
172
- RefID: r.id, # i + 1,
173
- Title: (r.title.nil? ? """""" : r.title),
174
- PubID: 0, # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
175
- ActualYear: r.year,
176
- Verbatim: r.full_citation
177
- }
178
- csv << @headers.collect{|h| cols[h.to_sym]}
179
- end
199
+ sql = []
200
+ @headers = %w{RefID ActualYear Title PubID Verbatim}
201
+ @name_collection.ref_collection.collection.each_with_index do |r,i|
202
+ # Assumes the 0 "null" pub id is there
203
+ pub_id = @pub_collection[r.publication] ? @pub_collection[r.publication] : 0
204
+
205
+ cols = {
206
+ RefID: r.id,
207
+ ContainingRefID: 0,
208
+ Title: (r.title.nil? ? @empty_quotes : r.title),
209
+ PubID: pub_id,
210
+ Series: @empty_quotes,
211
+ Volume: (r.volume ? r.volume : @empty_quotes),
212
+ Issue: (r.number ? r.number : @empty_quotes),
213
+ RefPages: r.page_string, # always a string
214
+ ActualYear: (r.year ? r.year : @empty_quotes),
215
+ StatedYear: @empty_quotes,
216
+ AccessCode: 0,
217
+ Flags: 0,
218
+ Note: @empty_quotes,
219
+ LastUpdate: @time,
220
+ LinkID: 0,
221
+ ModifiedBy: @authorized_user_id,
222
+ CiteDataStatus: 0,
223
+ Verbatim: (r.full_citation ? r.full_citation : @empty_quotes)
224
+ }
225
+ sql << sql_insert_statement('tblRefs', cols)
180
226
  end
181
- @csv_string
227
+ sql.join("\n")
182
228
  end
183
229
 
184
- # TODO make a standard transaction wrapper
185
- def sqlRefs
186
- sql = [ 'BEGIN TRY', 'BEGIN TRANSACTION']
187
- @headers = %w{RefID ActualYear Title PubID Verbatim}
188
- @name_collection.ref_collection.collection.each_with_index do |r,i|
230
+ # Generate tblPubs SQL
231
+ def tblPubs
232
+ sql = []
233
+ @headers = %w{PubID PrefID PubType ShortName FullName Note LastUpdate ModifiedBy Publisher PlacePublished PubRegID Status StartYear EndYear BHL}
234
+
235
+ # Hackish should build this elsewhere, but degrades OK
236
+ pubs = @name_collection.ref_collection.collection.collect{|r| r.publication}.compact.uniq
237
+
238
+ pubs.each_with_index do |p, i|
189
239
  cols = {
190
- RefID: r.id, # i + 1,
191
- Title: (r.title.nil? ? """""" : r.title),
192
- PubID: 0, # Careful - assumes you have a pre-generated PubID of Zero in there, PubID table is not included in CSV imports
193
- ActualYear: r.year,
194
- Verbatim: r.full_citation
240
+ PubID: i + 1,
241
+ PrefID: 0,
242
+ PubType: 1,
243
+ ShortName: "unknown_#{i}", # Unique constraint
244
+ FullName: p,
245
+ Note: @empty_quotes,
246
+ LastUpdate: @time,
247
+ ModifiedBy: @authorized_user_id,
248
+ Publisher: @empty_quotes,
249
+ PlacePublished: @empty_quotes,
250
+ PubRegID: 0,
251
+ Status: 0,
252
+ StartYear: 0,
253
+ EndYear: 0,
254
+ BHL: 0
195
255
  }
196
- sql << "INSERT INTO tblRefs (#{@headers.sort.join(",")}) VALUES (#{@headers.sort.collect{|h| "'#{cols[h.to_sym].to_s.gsub(/'/,"''")}'"}.join(",")});"
256
+ @pub_collection.merge!(p => i + 1)
257
+ sql << sql_insert_statement('tblPubs', cols)
197
258
  end
198
- sql << ['COMMIT', 'END TRY', 'BEGIN CATCH', 'ROLLBACK', 'END CATCH']
199
- sql.join("\n")
259
+ sql.join("\n")
200
260
  end
201
261
 
202
262
  # Generate tblPeople string.
203
263
  def tblPeople
204
264
  @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy}
205
- @csv_string = CSV.generate() do |csv|
206
- csv << @headers
207
- @author_index.keys.each_with_index do |k,i|
208
- a = @author_index[k]
209
- # a.id = i + 1
210
- cols = {
211
- PersonID: a.id,
212
- FamilyName: a.last_name,
213
- GivenName: a.first_name,
214
- GivenInitials: a.initials_string,
215
- Suffix: a.suffix,
216
- Role: 1, # authors
217
- LastUpdate: @time,
218
- ModifiedBy: @authorized_user_id
219
- }
220
- csv << @headers.collect{|h| cols[h.to_sym]}
221
- end
265
+ sql = []
266
+ @author_index.keys.each_with_index do |k,i|
267
+ a = @author_index[k]
268
+ # a.id = i + 1
269
+ cols = {
270
+ PersonID: a.id,
271
+ FamilyName: (a.last_name.length > 0 ? a.last_name : "Unknown"),
272
+ GivenNames: a.first_name || @empty_quotes,
273
+ GivenInitials: a.initials_string || @empty_quotes,
274
+ Suffix: a.suffix || @empty_quotes,
275
+ Role: 1, # authors
276
+ LastUpdate: @time,
277
+ ModifiedBy: @authorized_user_id
278
+ }
279
+ sql << sql_insert_statement('tblPeople', cols)
222
280
  end
223
- @csv_string
281
+ sql.join("\n")
224
282
  end
225
283
 
226
284
  # Generate tblRefAuthors string.
227
285
  def tblRefAuthors
228
286
  @headers = %w{RefID PersonID SeqNum AuthorCount LastUpdate ModifiedBy}
229
- @csv_string = CSV.generate() do |csv|
230
- csv << @headers
231
- @name_collection.ref_collection.collection.each do |r|
232
- r.authors.each_with_index do |x, i|
233
- a = @author_index[x.compact_string]
234
- cols = {
235
- RefID: r.id,
236
- PersonID: a.id,
237
- SeqNum: i + 1,
238
- AuthorCount: r.authors.size,
239
- LastUpdate: @time,
240
- ModifiedBy: @authorized_user_id
241
- }
242
- csv << @headers.collect{|h| cols[h.to_sym]}
243
- end
287
+ sql = []
288
+ @name_collection.ref_collection.collection.each do |r|
289
+ r.authors.each_with_index do |x, i|
290
+ a = @author_index[x.compact_string]
291
+ cols = {
292
+ RefID: r.id,
293
+ PersonID: a.id,
294
+ SeqNum: i + 1,
295
+ AuthorCount: r.authors.size,
296
+ LastUpdate: @time,
297
+ ModifiedBy: @authorized_user_id
298
+ }
299
+ sql << sql_insert_statement('tblRefAuthors', cols)
244
300
  end
245
301
  end
246
- @csv_string
302
+ sql.join("\n")
247
303
  end
248
304
 
249
305
  # Generate tblCites string.
250
306
  def tblCites
251
307
  @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus}
252
- @csv_string = CSV.generate() do |csv|
253
- csv << @headers
254
- @name_collection.collection.each do |n|
255
- ref = @by_author_reference_index[n.author_year_index]
256
- next if ref.nil?
257
- cols = {
258
- TaxonNameID: n.id,
259
- SeqNum: 1,
260
- RefID: ref.id,
261
- NomenclatorID: @nomenclator[n.nomenclator_name],
262
- LastUpdate: @time,
263
- ModifiedBy: @authorized_user_id,
264
- CitePages: """""", # equates to "" in CSV speak
265
- NewNameStatus: 0,
266
- Note: """""",
267
- TypeClarification: 0, # We might derive more data from this
268
- CurrentConcept: 1, # Boolean, right?
269
- ConceptChange: 0, # Unspecified
270
- InfoFlags: 0, #
271
- InfoFlagStatus: 1, # 1 => needs review
272
- PolynomialStatus: 0
273
- }
274
- csv << @headers.collect{|h| cols[h.to_sym]}
275
- end
308
+ sql = []
309
+
310
+ @name_collection.collection.each do |n|
311
+ next if @nomenclator[n.nomenclator_name].nil? # Only create nomenclator records if they are original citations, otherwise not !! Might need updating in future imports
312
+ ref = get_ref(n)
313
+
314
+ # ref = @by_author_reference_index[n.author_year_index]
315
+ next if ref.nil?
316
+ cols = {
317
+ TaxonNameID: n.id,
318
+ SeqNum: 1,
319
+ RefID: ref.id,
320
+ NomenclatorID: @nomenclator[n.nomenclator_name],
321
+ LastUpdate: @time,
322
+ ModifiedBy: @authorized_user_id,
323
+ CitePages: @empty_quotes, # equates to "" in CSV speak
324
+ NewNameStatus: 0,
325
+ Note: @empty_quotes,
326
+ TypeClarification: 0, # We might derive more data from this
327
+ CurrentConcept: 1, # Boolean, right?
328
+ ConceptChange: 0, # Unspecified
329
+ InfoFlags: 0, #
330
+ InfoFlagStatus: 1, # 1 => needs review
331
+ PolynomialStatus: 0
332
+ }
333
+ sql << sql_insert_statement('tblCites', cols)
276
334
  end
277
- @csv_string
335
+ sql.join("\n")
278
336
  end
279
337
 
280
338
  def tblGenusNames
281
- @csv_string = csv_for_genus_and_species_names_tables('Genus')
282
- @csv_string
339
+ # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed.
340
+ # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
341
+ sql = sql_for_genus_and_species_names_tables('Genus')
342
+ sql
283
343
  end
284
344
 
285
345
  def tblSpeciesNames
286
- @csv_string = csv_for_genus_and_species_names_tables('Species')
287
- @csv_string
346
+ # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed.
347
+ # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations
348
+ sql = sql_for_genus_and_species_names_tables('Species')
349
+ sql
288
350
  end
289
351
 
290
- def csv_for_genus_and_species_names_tables(type)
352
+ def sql_for_genus_and_species_names_tables(type)
353
+ sql = []
291
354
  col = "#{type}NameID"
292
355
  @headers = [col, "Name", "LastUpdate", "ModifiedBy", "Italicize"]
293
- @csv_string = CSV.generate() do |csv|
294
- csv << @headers
295
- var = self.send("#{type.downcase}_names")
296
- var.keys.each_with_index do |n,i|
297
- var[n] = i + 1
298
- cols = {
299
- col.to_sym => i + 1,
300
- Name: n,
301
- LastUpdate: @time,
302
- ModifiedBy: @authorized_user_id,
303
- Italicize: 1 # always true for these data
304
- }
305
- csv << @headers.collect{|h| cols[h.to_sym]}
306
- end
356
+ var = self.send("#{type.downcase}_names")
357
+ var.keys.each_with_index do |n,i|
358
+ var[n] = i + 1
359
+ cols = {
360
+ col.to_sym => i + 1,
361
+ Name: n,
362
+ LastUpdate: @time,
363
+ ModifiedBy: @authorized_user_id,
364
+ Italicize: 1 # always true for these data
365
+ }
366
+ sql << sql_insert_statement("tbl#{type}Names", cols)
307
367
  end
308
- @csv_string
368
+ sql.join("\n")
309
369
  end
310
370
 
311
- # must be called post tblGenusNames and tblSpeciesNames
371
+ # Must be called post tblGenusNames and tblSpeciesNames.
372
+ # Some records are not used but can be cleaned by SF
312
373
  def tblNomenclator
313
374
  @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind}
314
- @csv_string = CSV.generate() do |csv|
315
- csv << @headers
316
- i = 1
317
- @name_collection.collection.each do |n|
318
- next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('genus')
319
- cols = {
320
- NomenclatorID: i,
321
- GenusNameID: @genus_names[n.parent_name_at_rank('genus')] || 0,
322
- SubgenusNameID: @genus_names[n.parent_name_at_rank('subgenus')] || 0,
323
- SpeciesNameID: @species_names[n.parent_name_at_rank('species')] || 0,
324
- SubspeciesNameID: @species_names[n.parent_name_at_rank('subspecies')] || 0,
325
- InfrasubspeciesNameID: 0,
326
- InfrasubKind: 0, # this might be wrong
327
- LastUpdate: @time,
328
- ModifiedBy: @authorized_user_id,
329
- SuitableForGenus: 0, # Set in SF
330
- SuitableForSpecies: 0 # Set in SF
331
- }
332
- @nomenclator.merge!(n.nomenclator_name => i)
333
- i += 1
334
- csv << @headers.collect{|h| cols[h.to_sym]}
335
- end
375
+ sql = []
376
+ i = 1
377
+ @name_collection.collection.each do |n|
378
+ gid, sgid = 0,0
379
+ sid = @species_names[n.parent_name_at_rank('species')] || 0
380
+ ssid = @species_names[n.parent_name_at_rank('subspecies')] || 0
381
+
382
+ if n.parens == false
383
+ gid = @genus_names[n.parent_name_at_rank('genus')] || 0
384
+ sgid = @genus_names[n.parent_name_at_rank('subgenus')] || 0
385
+ end
386
+
387
+ next if Taxonifi::RANKS.index(n.rank) < Taxonifi::RANKS.index('subtribe')
388
+
389
+ ref = get_ref(n)
390
+ # debugger
391
+ # ref = @by_author_reference_index[n.author_year_index]
392
+
393
+ next if ref.nil?
394
+ cols = {
395
+ NomenclatorID: i,
396
+ GenusNameID: gid,
397
+ SubgenusNameID: sgid,
398
+ SpeciesNameID: sid,
399
+ SubspeciesNameID: ssid,
400
+ InfrasubspeciesNameID: 0,
401
+ InfrasubKind: 0, # this might be wrong
402
+ LastUpdate: @time,
403
+ ModifiedBy: @authorized_user_id,
404
+ SuitableForGenus: 0, # Set in SF
405
+ SuitableForSpecies: 0 # Set in SF
406
+ }
407
+ @nomenclator.merge!(n.nomenclator_name => i)
408
+ i += 1
409
+
410
+ sql << sql_insert_statement('tblNomenclator', cols)
411
+ end
412
+
413
+ # TODO: DRY this up with above?!
414
+ @name_collection.combinations.each do |c|
415
+ gid, sgid = 0,0
416
+ sid = (c[2].nil? ? 0 : @species_names[c[2].name])
417
+ ssid = (c[3].nil? ? 0 : @species_names[c[3].name])
418
+
419
+ if c.compact.last.parens == false
420
+ gid = (c[0].nil? ? 0 : @genus_names[c[0].name])
421
+ sgid = (c[1].nil? ? 0 : @genus_names[c[1].name])
422
+ end
423
+
424
+ # ref = @by_author_reference_index[c.compact.last.author_year_index]
425
+ ref = @name_collection.ref_collection.object_from_row(c.compact.last.related[:link_to_ref_from_row])
426
+
427
+ next if ref.nil?
428
+
429
+ cols = {
430
+ NomenclatorID: i,
431
+ GenusNameID: gid ,
432
+ SubgenusNameID: sgid ,
433
+ SpeciesNameID: sid ,
434
+ SubspeciesNameID: ssid ,
435
+ InfrasubspeciesNameID: 0,
436
+ InfrasubKind: 0, # this might be wrong
437
+ LastUpdate: @time,
438
+ ModifiedBy: @authorized_user_id,
439
+ SuitableForGenus: 0, # Set in SF
440
+ SuitableForSpecies: 0 # Set in SF
441
+ }
442
+ # check!?
443
+ @nomenclator.merge!(c.compact.last.nomenclator_name => i)
444
+ sql << sql_insert_statement('tblNomenclator', cols)
445
+ i += 1
336
446
  end
337
- @csv_string
447
+ sql.join("\n")
338
448
  end
339
449
 
340
- end
341
- end
450
+ end # End class
451
+ end # End module