dwc-archive 0.9.10 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -1
  4. data/.rubocop.yml +23 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -7
  7. data/CHANGELOG +14 -8
  8. data/Gemfile +3 -1
  9. data/LICENSE +1 -1
  10. data/README.md +119 -107
  11. data/Rakefile +13 -36
  12. data/dwc-archive.gemspec +23 -19
  13. data/features/step_definitions/dwc-creator_steps.rb +5 -5
  14. data/features/step_definitions/dwc-reader_steps.rb +47 -28
  15. data/features/support/env.rb +1 -1
  16. data/lib/dwc_archive.rb +124 -0
  17. data/lib/dwc_archive/archive.rb +60 -0
  18. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  19. data/lib/dwc_archive/core.rb +25 -0
  20. data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
  21. data/lib/dwc_archive/expander.rb +88 -0
  22. data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
  23. data/lib/dwc_archive/generator.rb +91 -0
  24. data/lib/dwc_archive/generator_eml_xml.rb +116 -0
  25. data/lib/dwc_archive/generator_meta_xml.rb +72 -0
  26. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  27. data/lib/dwc_archive/ingester.rb +106 -0
  28. data/lib/dwc_archive/metadata.rb +57 -0
  29. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  30. data/lib/dwc_archive/version.rb +6 -0
  31. data/lib/dwc_archive/xml_reader.rb +90 -0
  32. data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
  33. data/spec/files/generator_eml.xml +47 -0
  34. data/spec/files/generator_meta.xml +19 -0
  35. data/spec/lib/classification_normalizer_spec.rb +96 -105
  36. data/spec/lib/core_spec.rb +43 -41
  37. data/spec/lib/darwin_core_spec.rb +108 -138
  38. data/spec/lib/generator_eml_xml_spec.rb +12 -11
  39. data/spec/lib/generator_meta_xml_spec.rb +12 -11
  40. data/spec/lib/generator_spec.rb +77 -69
  41. data/spec/lib/gnub_taxon_spec.rb +15 -17
  42. data/spec/lib/metadata_spec.rb +50 -41
  43. data/spec/lib/taxon_normalized_spec.rb +62 -65
  44. data/spec/lib/xml_reader_spec.rb +9 -12
  45. data/spec/spec_helper.rb +54 -51
  46. metadata +105 -88
  47. data/.rvmrc +0 -1
  48. data/] +0 -40
  49. data/lib/dwc-archive.rb +0 -107
  50. data/lib/dwc-archive/archive.rb +0 -40
  51. data/lib/dwc-archive/classification_normalizer.rb +0 -428
  52. data/lib/dwc-archive/core.rb +0 -17
  53. data/lib/dwc-archive/expander.rb +0 -84
  54. data/lib/dwc-archive/generator.rb +0 -85
  55. data/lib/dwc-archive/generator_eml_xml.rb +0 -86
  56. data/lib/dwc-archive/generator_meta_xml.rb +0 -58
  57. data/lib/dwc-archive/ingester.rb +0 -101
  58. data/lib/dwc-archive/metadata.rb +0 -48
  59. data/lib/dwc-archive/version.rb +0 -3
  60. data/lib/dwc-archive/xml_reader.rb +0 -64
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use ruby-1.9.3-p392@dwc-archive --create
data/] DELETED
@@ -1,40 +0,0 @@
1
- require_relative '../spec_helper'
2
-
3
- describe DarwinCore::Core do
4
- subject(:dwca) { DarwinCore.new(file_path) }
5
- subject(:core) { DarwinCore::Core.new(dwca) }
6
- let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
7
- file_name) }
8
- let(:file_name) { 'data.tar.gz' }
9
-
10
-
11
- describe '.new' do
12
- it 'creates new core' do
13
- expect(core).to be_kind_of DarwinCore::Core
14
- end
15
- end
16
-
17
- describe '#id' do
18
-
19
- it 'returns core id' do
20
- expect(core.id[:index]).to eq 0
21
- expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
22
- end
23
-
24
- context 'no coreid' do
25
- let(:file_name) { 'empty_coreid.tar.gz' }
26
-
27
- it 'does not return coreid' do
28
- expect(core.id[:index]).to eq 0
29
- expect(core.id[:term]).to be_nil
30
- end
31
- end
32
- end
33
-
34
- it 'reads core file from archive' do
35
-
36
- core.read
37
-
38
- end
39
-
40
- end
@@ -1,107 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- recent_ruby = RUBY_VERSION >= '1.9.1'
4
- raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless recent_ruby
5
-
6
- require 'fileutils'
7
- require 'ostruct'
8
- require 'digest'
9
- require 'csv'
10
- require 'logger'
11
- require_relative 'dwc-archive/xml_reader'
12
- require_relative 'dwc-archive/ingester'
13
- require_relative 'dwc-archive/errors'
14
- require_relative 'dwc-archive/expander'
15
- require_relative 'dwc-archive/archive'
16
- require_relative 'dwc-archive/core'
17
- require_relative 'dwc-archive/extension'
18
- require_relative 'dwc-archive/metadata'
19
- require_relative 'dwc-archive/generator'
20
- require_relative 'dwc-archive/generator_meta_xml'
21
- require_relative 'dwc-archive/generator_eml_xml'
22
- require_relative 'dwc-archive/classification_normalizer'
23
- require_relative 'dwc-archive/version'
24
-
25
- class DarwinCore
26
-
27
- VERSION = DarwinCore::VERSION
28
- DEFAULT_TMP_DIR = "/tmp"
29
-
30
- attr_reader :archive, :core, :metadata, :extensions,
31
- :classification_normalizer
32
- alias :eml :metadata
33
-
34
-
35
- def self.nil_field?(field)
36
- return true if [nil, '', '/N'].include?(field)
37
- false
38
- end
39
-
40
- def self.clean_all(tmp_dir = DEFAULT_TMP_DIR)
41
- Dir.entries(tmp_dir).each do |entry|
42
- path = File.join(tmp_dir, entry)
43
- if FileTest.directory?(path) && entry.match(/^dwc_[\d]+$/)
44
- FileUtils.rm_rf(path)
45
- end
46
- end
47
- end
48
-
49
- def self.logger
50
- @@logger ||= Logger.new(nil)
51
- end
52
-
53
- def self.logger=(logger)
54
- @@logger = logger
55
- end
56
-
57
- def self.logger_reset
58
- self.logger = Logger.new(nil)
59
- end
60
-
61
- def self.logger_write(obj_id, message, method = :info)
62
- self.logger.send(method, "|%s|%s|" % [obj_id, message])
63
- end
64
-
65
- def initialize(dwc_path, tmp_dir = DEFAULT_TMP_DIR)
66
- @dwc_path = dwc_path
67
- @archive = DarwinCore::Archive.new(@dwc_path, tmp_dir)
68
- @core = DarwinCore::Core.new(self)
69
- @metadata = DarwinCore::Metadata.new(@archive)
70
- @extensions = get_extensions
71
- end
72
-
73
- def file_name
74
- File.split(@dwc_path).last
75
- end
76
-
77
- def path
78
- File.expand_path(@dwc_path)
79
- end
80
-
81
- # generates a hash from a classification data with path to each node,
82
- # list of synonyms and vernacular names.
83
- def normalize_classification
84
- return nil unless has_parent_id?
85
- @classification_normalizer ||= DarwinCore::ClassificationNormalizer.
86
- new(self)
87
- @classification_normalizer.normalize
88
- end
89
-
90
- def has_parent_id?
91
- !!@core.fields.join('|').downcase.match(/highertaxonid|parentnameusageid/)
92
- end
93
-
94
- def checksum
95
- Digest::SHA1.hexdigest(open(@dwc_path).read)
96
- end
97
-
98
- private
99
- def get_extensions
100
- res = []
101
- root_key = @archive.meta.keys[0]
102
- ext = @archive.meta[root_key][:extension]
103
- return [] unless ext
104
- ext = [ext] if ext.class != Array
105
- ext.map { |e| DarwinCore::Extension.new(self, e) }
106
- end
107
- end
@@ -1,40 +0,0 @@
1
- require 'nokogiri'
2
- class DarwinCore
3
- class Archive
4
- attr_reader :meta, :eml
5
- def initialize(archive_path, tmp_dir)
6
- @archive_path = archive_path
7
- @tmp_dir = tmp_dir
8
- @expander = DarwinCore::Expander.new(@archive_path, @tmp_dir)
9
- @expander.unpack
10
- if valid?
11
- @meta = DarwinCore::XmlReader.
12
- from_xml(open(File.join(@expander.path, 'meta.xml')))
13
- @eml = files.include?("eml.xml") ?
14
- DarwinCore::XmlReader.
15
- from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
16
- else
17
- clean
18
- raise InvalidArchiveError
19
- end
20
- end
21
-
22
- def valid?
23
- valid = true
24
- valid = valid && @expander.path && FileTest.exists?(@expander.path)
25
- valid = valid && files && files.include?('meta.xml')
26
- end
27
-
28
- def files
29
- @expander.files
30
- end
31
-
32
- def files_path
33
- @expander.path
34
- end
35
-
36
- def clean
37
- @expander.clean
38
- end
39
- end
40
- end
@@ -1,428 +0,0 @@
1
- # encoding: utf-8
2
- require 'parsley-store'
3
-
4
- class DarwinCore
5
-
6
- class TaxonNormalized
7
- attr_accessor :id, :local_id, :global_id, :source, :parent_id,
8
- :classification_path_id, :classification_path,
9
- :linnean_classification_path, :current_name, :current_name_canonical,
10
- :synonyms, :vernacular_names, :rank, :status
11
-
12
- def initialize
13
- @id = @parent_id = @rank = @status = nil
14
- @current_name = ''
15
- @current_name_canonical = ''
16
- @source = ''
17
- @local_id = ''
18
- @global_id = ''
19
- @classification_path = []
20
- @classification_path_id = []
21
- @synonyms = []
22
- @vernacular_names = []
23
- @linnean_classification_path = []
24
- end
25
-
26
- end
27
-
28
- class GnubTaxon < TaxonNormalized
29
- attr_accessor :uuid, :uuid_path
30
-
31
- def initialize
32
- super
33
- @uuid = nil
34
- @uuid_path = []
35
- end
36
- end
37
-
38
- class SynonymNormalized < Struct.new(:id, :name, :canonical_name,
39
- :status, :source, :local_id,
40
- :global_id);end
41
- class VernacularNormalized < Struct.new(:name, :language, :locality,
42
- :country_code);end
43
-
44
- class ClassificationNormalizer
45
- attr_reader :error_names, :tree, :normalized_data
46
-
47
- def initialize(dwc_instance)
48
- @dwc = dwc_instance
49
- @core_fields = get_fields(@dwc.core)
50
- @extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
51
- @normalized_data = {}
52
- @synonyms = {}
53
- @parser = ParsleyStore.new(1,2)
54
- @name_strings = {}
55
- @vernacular_name_strings = {}
56
- @error_names = []
57
- @tree = {}
58
- end
59
-
60
- def darwin_core
61
- @dwc
62
- end
63
-
64
- def add_name_string(name_string)
65
- @name_strings[name_string] = 1 unless @name_strings[name_string]
66
- end
67
-
68
- def add_vernacular_name_string(name_string)
69
- unless @vernacular_name_strings[name_string]
70
- @vernacular_name_strings[name_string] = 1
71
- end
72
- end
73
-
74
- def name_strings(opts = {})
75
- opts = { with_hash: false }.merge(opts)
76
- if !!opts[:with_hash]
77
- @name_strings
78
- else
79
- @name_strings.keys
80
- end
81
- end
82
-
83
- def vernacular_name_strings(opts = {})
84
- opts = { with_hash: false }.merge(opts)
85
- if !!opts[:with_hash]
86
- @vernacular_name_strings
87
- else
88
- @vernacular_name_strings.keys
89
- end
90
- end
91
-
92
- def normalize(opts = {})
93
- opts = { :with_canonical_names => true,
94
- :with_extensions => true }.merge(opts)
95
- @with_canonical_names = !!opts[:with_canonical_names]
96
- DarwinCore.logger_write(@dwc.object_id,
97
- 'Started normalization of the classification')
98
- ingest_core
99
- DarwinCore.logger_write(@dwc.object_id,
100
- 'Calculating the classification parent/child paths')
101
- has_parent_id? ?
102
- calculate_classification_path :
103
- @normalized_data.keys.each { |id| @tree[id] = {} }
104
- DarwinCore.logger_write(@dwc.object_id, 'Ingesting data from extensions')
105
- if !!opts[:with_extensions]
106
- ingest_extensions
107
- end
108
- @normalized_data
109
- end
110
-
111
- private
112
-
113
- def get_canonical_name(a_scientific_name)
114
- if @with_canonical_names
115
- canonical_name = @parser.parse(a_scientific_name,
116
- :canonical_only => true)
117
- canonical_name.to_s.empty? ? a_scientific_name : canonical_name
118
- else
119
- nil
120
- end
121
- end
122
-
123
- def get_fields(element)
124
- data = element.fields.inject({}) do |res, f|
125
- field = f[:term].split('/')[-1]
126
- field = field ? field.downcase.to_sym : ''
127
- res[field] = f[:index].to_i
128
- res
129
- end
130
- data[:id] = element.id[:index] if element.id
131
- data
132
- end
133
-
134
- def status_synonym?(status)
135
- status && !!status.match(/^syn/)
136
- end
137
-
138
- def add_synonym_from_core(taxon_id, row)
139
- @synonyms[row[@core_fields[:id]]] = taxon_id
140
- taxon = @normalized_data[row[taxon_id]] ?
141
- @normalized_data[row[taxon_id]] :
142
- @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
143
- synonym = SynonymNormalized.new(
144
- row[@core_fields[:id]],
145
- row[@core_fields[:scientificname]],
146
- row[@core_fields[:canonicalname]],
147
- @core_fields[:taxonomicstatus] ?
148
- row[@core_fields[:taxonomicstatus]] :
149
- nil,
150
- @core_fields[:source] ? row[@core_fields[:source]] : nil,
151
- @core_fields[:localid] ? row[@core_fields[:localid]] : nil,
152
- @core_fields[:globalid] ? row[@core_fields[:globalid]] : nil,
153
- )
154
- taxon.synonyms << synonym
155
- add_name_string(synonym.name)
156
- add_name_string(synonym.canonical_name)
157
- end
158
-
159
- def set_scientific_name(row, fields)
160
- row[fields[:scientificname]] = 'N/A' unless row[fields[:scientificname]]
161
- canonical_name = nil
162
- scientific_name = row[fields[:scientificname]].strip
163
- if separate_canonical_and_authorship?(row, fields)
164
- if @with_canonical_names
165
- canonical_name = row[fields[:scientificname]].strip
166
- end
167
- scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
168
- else
169
- if @with_canonical_names
170
- canonical_name = get_canonical_name(row[fields[:scientificname]])
171
- end
172
- end
173
- fields[:canonicalname] = row.size
174
- row << canonical_name
175
- row[fields[:scientificname]] = scientific_name
176
- end
177
-
178
- def separate_canonical_and_authorship?(row, fields)
179
- authorship = ''
180
- if fields[:scientificnameauthorship]
181
- authorship = row[fields[:scientificnameauthorship]].to_s.strip
182
- end
183
- !(authorship.empty? || row[fields[:scientificname]].index(authorship))
184
- end
185
-
186
- def ingest_core
187
- @normalized_data = {}
188
- has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
189
- raise DarwinCore::CoreFileError.new('Darwin Core core fields must ' +
190
- 'contain taxon id and scientific name') unless has_name_and_id
191
- @dwc.core.read do |rows|
192
- rows[1].each do |error|
193
- @error_names << { :data => error,
194
- :error => :reading_or_encoding_error }
195
- end
196
- rows[0].each do |r|
197
- set_scientific_name(r, @core_fields)
198
- #core has AcceptedNameUsageId
199
- if @core_fields[:acceptednameusageid] &&
200
- r[@core_fields[:acceptednameusageid]] &&
201
- r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
202
- add_synonym_from_core(@core_fields[:acceptednameusageid], r)
203
- elsif !@core_fields[:acceptednameusageid] &&
204
- @core_fields[:taxonomicstatus] &&
205
- status_synonym?(r[@core_fields[:taxonomicstatus]])
206
- add_synonym_from_core(parent_id, r) if has_parent_id?
207
- else
208
- unless @normalized_data[r[@core_fields[:id]]]
209
- if gnub_archive?
210
- new_taxon = DarwinCore::GnubTaxon.new
211
- else
212
- new_taxon = DarwinCore::TaxonNormalized.new
213
- end
214
- @normalized_data[r[@core_fields[:id]]] = new_taxon
215
- end
216
- taxon = @normalized_data[r[@core_fields[:id]]]
217
- if gnub_archive?
218
- taxon.uuid = r[@core_fields[:originalnameusageid]]
219
- taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
220
- split('|')
221
- end
222
- taxon.id = r[@core_fields[:id]]
223
- taxon.current_name = r[@core_fields[:scientificname]]
224
- taxon.current_name_canonical = r[@core_fields[:canonicalname]]
225
- taxon.parent_id = has_parent_id? ? r[parent_id] : nil
226
- taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
227
- if @core_fields[:taxonomicstatus]
228
- taxon.status = r[@core_fields[:taxonomicstatus]]
229
- end
230
- taxon.source = r[@core_fields[:source]] if @core_fields[:source]
231
- taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
232
- if @core_fields[:globalid]
233
- taxon.global_id = r[@core_fields[:globalid]]
234
- end
235
- taxon.linnean_classification_path =
236
- get_linnean_classification_path(r, taxon)
237
- add_name_string(taxon.current_name)
238
- has_canonical = taxon.current_name_canonical &&
239
- !taxon.current_name_canonical.empty?
240
- add_name_string(taxon.current_name_canonical) if has_canonical
241
- end
242
- end
243
- end
244
- end
245
-
246
- def has_parent_id?
247
- @has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
248
- @core_fields.has_key?(:parentnameusageid)
249
- end
250
-
251
- def parent_id
252
- parent_id_field = @core_fields[:highertaxonid] ||
253
- @core_fields[:parentnameusageid]
254
- end
255
-
256
- def calculate_classification_path
257
- @paths_num = 0
258
- @normalized_data.each do |taxon_id, taxon|
259
- next if !taxon.classification_path_id.empty?
260
- res = get_classification_path(taxon)
261
- next if res == 'error'
262
- end
263
- end
264
-
265
- def get_classification_path(taxon)
266
- return if !taxon.classification_path_id.empty?
267
- @paths_num += 1
268
- if @paths_num % 10000 == 0
269
- DarwinCore.logger_write(@dwc.object_id,
270
- "Calculated %s paths" % @paths_num)
271
- end
272
- current_node = {taxon.id => {}}
273
- if DarwinCore.nil_field?(taxon.parent_id)
274
- if @with_canonical_names
275
- taxon.classification_path << taxon.current_name_canonical
276
- end
277
- taxon.classification_path_id << taxon.id
278
- @tree.merge!(current_node)
279
- else
280
- parent_cp = parent_cpid = nil
281
- if @normalized_data[taxon.parent_id]
282
- if @with_canonical_names
283
- parent_cp = @normalized_data[taxon.parent_id].classification_path
284
- end
285
- parent_cpid = @normalized_data[taxon.parent_id].
286
- classification_path_id
287
- else
288
- current_parent = @normalized_data[@synonyms[taxon.parent_id]]
289
- if current_parent
290
- error = 'WARNING: The parent of the taxon ' +
291
- "\'#{taxon.current_name}\' is deprecated"
292
- @error_names << {:data => taxon,
293
- :error => :deprecated_parent,
294
- :current_parent => current_parent }
295
-
296
- if @with_canonical_names
297
- parent_cp = current_parent.classification_path
298
- end
299
- parent_cpid = current_parent.classification_path_id
300
- else
301
- error = 'WARNING: The parent of the taxon ' +
302
- "\'#{taxon.current_name}\' not found"
303
- @error_names << {:data => taxon,
304
- :error => :deprecated_parent, :current_parent => nil}
305
- end
306
- end
307
- return 'error' unless parent_cpid
308
- if parent_cpid.empty?
309
- res = 'error'
310
- begin
311
- res = get_classification_path(@normalized_data[taxon.parent_id])
312
- rescue SystemStackError
313
- @error_names << {:data => taxon,
314
- :error => :too_deep_hierarchy, :current_parent => nil}
315
- end
316
- return res if res == 'error'
317
- if @with_canonical_names
318
- taxon.classification_path += @normalized_data[taxon.parent_id].
319
- classification_path + [taxon.current_name_canonical]
320
- end
321
- taxon.classification_path_id += @normalized_data[taxon.parent_id].
322
- classification_path_id + [taxon.id]
323
- parent_node = @normalized_data[taxon.parent_id].
324
- classification_path_id.inject(@tree) {|node, id| node[id]}
325
- parent_node.merge!(current_node)
326
- else
327
- taxon.classification_path += parent_cp +
328
- [taxon.current_name_canonical] if @with_canonical_names
329
- taxon.classification_path_id += parent_cpid + [taxon.id]
330
- parent_node = @normalized_data[taxon.parent_id].
331
- classification_path_id.inject(@tree) {|node, id| node[id]}
332
- begin
333
- parent_node.merge!(current_node)
334
- rescue NoMethodError => e
335
- DarwinCore.logger_write(@dwc.object_id,
336
- "Error '%s' taxon %s" % [e.message, taxon.id])
337
- return 'error'
338
- end
339
- end
340
- end
341
- end
342
-
343
- def ingest_extensions
344
- @extensions.each do |e|
345
- ext, fields = *e
346
- ingest_synonyms(e) if (File.split(e[0].file_path).
347
- last.match(/synonym/i) &&
348
- fields.keys.include?(:scientificname))
349
- ingest_vernaculars(e) if fields.keys.include? :vernacularname
350
- end
351
- end
352
-
353
- def ingest_synonyms(extension)
354
- DarwinCore.logger_write(@dwc.object_id, 'Ingesting synonyms extension')
355
- ext, fields = *extension
356
- ext.read do |rows|
357
- rows[0].each do |r|
358
- set_scientific_name(r, fields)
359
- synonym = SynonymNormalized.new(
360
- nil,
361
- r[fields[:scientificname]],
362
- r[fields[:canonicalname]],
363
- fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil,
364
- fields[:source] ? r[fields[:source]] : nil,
365
- fields[:localid] ? r[fields[:localid]] : nil,
366
- fields[:globalid] ? r[fields[:globalid]] : nil,
367
- )
368
- if @normalized_data[r[fields[:id]]]
369
- @normalized_data[r[fields[:id]]].synonyms << synonym
370
- add_name_string(synonym.name)
371
- add_name_string(synonym.canonical_name)
372
- else
373
- @error_names << { :taxon => synonym,
374
- :error => :synonym_of_unknown_taxa }
375
- end
376
- end
377
- end
378
- end
379
-
380
- def ingest_vernaculars(extension)
381
- DarwinCore.logger_write(@dwc.object_id,
382
- 'Ingesting vernacular names extension')
383
- ext, fields = *extension
384
- ext.read do |rows|
385
- rows[0].each do |r|
386
-
387
- language = nil
388
- if fields[:language]
389
- language = r[fields[:language]]
390
- elsif fields[:languagecode]
391
- language = r[fields[:languagecode]]
392
- end
393
-
394
- locality = fields[:locality] ? r[fields[:locality]] : nil
395
-
396
- country_code = fields[:countrycode] ? r[fields[:countrycode]] : nil
397
-
398
- vernacular = VernacularNormalized.new(
399
- r[fields[:vernacularname]],
400
- language,
401
- locality,
402
- country_code)
403
- if @normalized_data[r[fields[:id]]]
404
- @normalized_data[r[fields[:id]]].vernacular_names << vernacular
405
- add_vernacular_name_string(vernacular.name)
406
- else
407
- @error_names << { :vernacular_name => vernacular,
408
- :error => :vernacular_of_unknown_taxa }
409
- end
410
- end
411
- end
412
- end
413
-
414
- #Collect linnean classification path only on species level
415
- def get_linnean_classification_path(row, taxon)
416
- res = []
417
- [:kingdom, :phylum, :class,
418
- :order, :family, :genus, :subgenus].each do |clade|
419
- res << [row[@core_fields[clade]], clade] if @core_fields[clade]
420
- end
421
- res
422
- end
423
-
424
- def gnub_archive?
425
- @core_fields[:originalnameusageidpath]
426
- end
427
- end
428
- end