dwc-archive 0.9.10 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -1
  4. data/.rubocop.yml +23 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -7
  7. data/CHANGELOG +14 -8
  8. data/Gemfile +3 -1
  9. data/LICENSE +1 -1
  10. data/README.md +119 -107
  11. data/Rakefile +13 -36
  12. data/dwc-archive.gemspec +23 -19
  13. data/features/step_definitions/dwc-creator_steps.rb +5 -5
  14. data/features/step_definitions/dwc-reader_steps.rb +47 -28
  15. data/features/support/env.rb +1 -1
  16. data/lib/dwc_archive.rb +124 -0
  17. data/lib/dwc_archive/archive.rb +60 -0
  18. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  19. data/lib/dwc_archive/core.rb +25 -0
  20. data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
  21. data/lib/dwc_archive/expander.rb +88 -0
  22. data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
  23. data/lib/dwc_archive/generator.rb +91 -0
  24. data/lib/dwc_archive/generator_eml_xml.rb +116 -0
  25. data/lib/dwc_archive/generator_meta_xml.rb +72 -0
  26. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  27. data/lib/dwc_archive/ingester.rb +106 -0
  28. data/lib/dwc_archive/metadata.rb +57 -0
  29. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  30. data/lib/dwc_archive/version.rb +6 -0
  31. data/lib/dwc_archive/xml_reader.rb +90 -0
  32. data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
  33. data/spec/files/generator_eml.xml +47 -0
  34. data/spec/files/generator_meta.xml +19 -0
  35. data/spec/lib/classification_normalizer_spec.rb +96 -105
  36. data/spec/lib/core_spec.rb +43 -41
  37. data/spec/lib/darwin_core_spec.rb +108 -138
  38. data/spec/lib/generator_eml_xml_spec.rb +12 -11
  39. data/spec/lib/generator_meta_xml_spec.rb +12 -11
  40. data/spec/lib/generator_spec.rb +77 -69
  41. data/spec/lib/gnub_taxon_spec.rb +15 -17
  42. data/spec/lib/metadata_spec.rb +50 -41
  43. data/spec/lib/taxon_normalized_spec.rb +62 -65
  44. data/spec/lib/xml_reader_spec.rb +9 -12
  45. data/spec/spec_helper.rb +54 -51
  46. metadata +105 -88
  47. data/.rvmrc +0 -1
  48. data/] +0 -40
  49. data/lib/dwc-archive.rb +0 -107
  50. data/lib/dwc-archive/archive.rb +0 -40
  51. data/lib/dwc-archive/classification_normalizer.rb +0 -428
  52. data/lib/dwc-archive/core.rb +0 -17
  53. data/lib/dwc-archive/expander.rb +0 -84
  54. data/lib/dwc-archive/generator.rb +0 -85
  55. data/lib/dwc-archive/generator_eml_xml.rb +0 -86
  56. data/lib/dwc-archive/generator_meta_xml.rb +0 -58
  57. data/lib/dwc-archive/ingester.rb +0 -101
  58. data/lib/dwc-archive/metadata.rb +0 -48
  59. data/lib/dwc-archive/version.rb +0 -3
  60. data/lib/dwc-archive/xml_reader.rb +0 -64
data/.rvmrc DELETED
@@ -1 +0,0 @@
1
- rvm use ruby-1.9.3-p392@dwc-archive --create
data/] DELETED
@@ -1,40 +0,0 @@
1
- require_relative '../spec_helper'
2
-
3
- describe DarwinCore::Core do
4
- subject(:dwca) { DarwinCore.new(file_path) }
5
- subject(:core) { DarwinCore::Core.new(dwca) }
6
- let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
7
- file_name) }
8
- let(:file_name) { 'data.tar.gz' }
9
-
10
-
11
- describe '.new' do
12
- it 'creates new core' do
13
- expect(core).to be_kind_of DarwinCore::Core
14
- end
15
- end
16
-
17
- describe '#id' do
18
-
19
- it 'returns core id' do
20
- expect(core.id[:index]).to eq 0
21
- expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
22
- end
23
-
24
- context 'no coreid' do
25
- let(:file_name) { 'empty_coreid.tar.gz' }
26
-
27
- it 'does not return coreid' do
28
- expect(core.id[:index]).to eq 0
29
- expect(core.id[:term]).to be_nil
30
- end
31
- end
32
- end
33
-
34
- it 'reads core file from archive' do
35
-
36
- core.read
37
-
38
- end
39
-
40
- end
@@ -1,107 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- recent_ruby = RUBY_VERSION >= '1.9.1'
4
- raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless recent_ruby
5
-
6
- require 'fileutils'
7
- require 'ostruct'
8
- require 'digest'
9
- require 'csv'
10
- require 'logger'
11
- require_relative 'dwc-archive/xml_reader'
12
- require_relative 'dwc-archive/ingester'
13
- require_relative 'dwc-archive/errors'
14
- require_relative 'dwc-archive/expander'
15
- require_relative 'dwc-archive/archive'
16
- require_relative 'dwc-archive/core'
17
- require_relative 'dwc-archive/extension'
18
- require_relative 'dwc-archive/metadata'
19
- require_relative 'dwc-archive/generator'
20
- require_relative 'dwc-archive/generator_meta_xml'
21
- require_relative 'dwc-archive/generator_eml_xml'
22
- require_relative 'dwc-archive/classification_normalizer'
23
- require_relative 'dwc-archive/version'
24
-
25
- class DarwinCore
26
-
27
- VERSION = DarwinCore::VERSION
28
- DEFAULT_TMP_DIR = "/tmp"
29
-
30
- attr_reader :archive, :core, :metadata, :extensions,
31
- :classification_normalizer
32
- alias :eml :metadata
33
-
34
-
35
- def self.nil_field?(field)
36
- return true if [nil, '', '/N'].include?(field)
37
- false
38
- end
39
-
40
- def self.clean_all(tmp_dir = DEFAULT_TMP_DIR)
41
- Dir.entries(tmp_dir).each do |entry|
42
- path = File.join(tmp_dir, entry)
43
- if FileTest.directory?(path) && entry.match(/^dwc_[\d]+$/)
44
- FileUtils.rm_rf(path)
45
- end
46
- end
47
- end
48
-
49
- def self.logger
50
- @@logger ||= Logger.new(nil)
51
- end
52
-
53
- def self.logger=(logger)
54
- @@logger = logger
55
- end
56
-
57
- def self.logger_reset
58
- self.logger = Logger.new(nil)
59
- end
60
-
61
- def self.logger_write(obj_id, message, method = :info)
62
- self.logger.send(method, "|%s|%s|" % [obj_id, message])
63
- end
64
-
65
- def initialize(dwc_path, tmp_dir = DEFAULT_TMP_DIR)
66
- @dwc_path = dwc_path
67
- @archive = DarwinCore::Archive.new(@dwc_path, tmp_dir)
68
- @core = DarwinCore::Core.new(self)
69
- @metadata = DarwinCore::Metadata.new(@archive)
70
- @extensions = get_extensions
71
- end
72
-
73
- def file_name
74
- File.split(@dwc_path).last
75
- end
76
-
77
- def path
78
- File.expand_path(@dwc_path)
79
- end
80
-
81
- # generates a hash from a classification data with path to each node,
82
- # list of synonyms and vernacular names.
83
- def normalize_classification
84
- return nil unless has_parent_id?
85
- @classification_normalizer ||= DarwinCore::ClassificationNormalizer.
86
- new(self)
87
- @classification_normalizer.normalize
88
- end
89
-
90
- def has_parent_id?
91
- !!@core.fields.join('|').downcase.match(/highertaxonid|parentnameusageid/)
92
- end
93
-
94
- def checksum
95
- Digest::SHA1.hexdigest(open(@dwc_path).read)
96
- end
97
-
98
- private
99
- def get_extensions
100
- res = []
101
- root_key = @archive.meta.keys[0]
102
- ext = @archive.meta[root_key][:extension]
103
- return [] unless ext
104
- ext = [ext] if ext.class != Array
105
- ext.map { |e| DarwinCore::Extension.new(self, e) }
106
- end
107
- end
@@ -1,40 +0,0 @@
1
- require 'nokogiri'
2
- class DarwinCore
3
- class Archive
4
- attr_reader :meta, :eml
5
- def initialize(archive_path, tmp_dir)
6
- @archive_path = archive_path
7
- @tmp_dir = tmp_dir
8
- @expander = DarwinCore::Expander.new(@archive_path, @tmp_dir)
9
- @expander.unpack
10
- if valid?
11
- @meta = DarwinCore::XmlReader.
12
- from_xml(open(File.join(@expander.path, 'meta.xml')))
13
- @eml = files.include?("eml.xml") ?
14
- DarwinCore::XmlReader.
15
- from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
16
- else
17
- clean
18
- raise InvalidArchiveError
19
- end
20
- end
21
-
22
- def valid?
23
- valid = true
24
- valid = valid && @expander.path && FileTest.exists?(@expander.path)
25
- valid = valid && files && files.include?('meta.xml')
26
- end
27
-
28
- def files
29
- @expander.files
30
- end
31
-
32
- def files_path
33
- @expander.path
34
- end
35
-
36
- def clean
37
- @expander.clean
38
- end
39
- end
40
- end
@@ -1,428 +0,0 @@
1
- # encoding: utf-8
2
- require 'parsley-store'
3
-
4
- class DarwinCore
5
-
6
- class TaxonNormalized
7
- attr_accessor :id, :local_id, :global_id, :source, :parent_id,
8
- :classification_path_id, :classification_path,
9
- :linnean_classification_path, :current_name, :current_name_canonical,
10
- :synonyms, :vernacular_names, :rank, :status
11
-
12
- def initialize
13
- @id = @parent_id = @rank = @status = nil
14
- @current_name = ''
15
- @current_name_canonical = ''
16
- @source = ''
17
- @local_id = ''
18
- @global_id = ''
19
- @classification_path = []
20
- @classification_path_id = []
21
- @synonyms = []
22
- @vernacular_names = []
23
- @linnean_classification_path = []
24
- end
25
-
26
- end
27
-
28
- class GnubTaxon < TaxonNormalized
29
- attr_accessor :uuid, :uuid_path
30
-
31
- def initialize
32
- super
33
- @uuid = nil
34
- @uuid_path = []
35
- end
36
- end
37
-
38
- class SynonymNormalized < Struct.new(:id, :name, :canonical_name,
39
- :status, :source, :local_id,
40
- :global_id);end
41
- class VernacularNormalized < Struct.new(:name, :language, :locality,
42
- :country_code);end
43
-
44
- class ClassificationNormalizer
45
- attr_reader :error_names, :tree, :normalized_data
46
-
47
- def initialize(dwc_instance)
48
- @dwc = dwc_instance
49
- @core_fields = get_fields(@dwc.core)
50
- @extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
51
- @normalized_data = {}
52
- @synonyms = {}
53
- @parser = ParsleyStore.new(1,2)
54
- @name_strings = {}
55
- @vernacular_name_strings = {}
56
- @error_names = []
57
- @tree = {}
58
- end
59
-
60
- def darwin_core
61
- @dwc
62
- end
63
-
64
- def add_name_string(name_string)
65
- @name_strings[name_string] = 1 unless @name_strings[name_string]
66
- end
67
-
68
- def add_vernacular_name_string(name_string)
69
- unless @vernacular_name_strings[name_string]
70
- @vernacular_name_strings[name_string] = 1
71
- end
72
- end
73
-
74
- def name_strings(opts = {})
75
- opts = { with_hash: false }.merge(opts)
76
- if !!opts[:with_hash]
77
- @name_strings
78
- else
79
- @name_strings.keys
80
- end
81
- end
82
-
83
- def vernacular_name_strings(opts = {})
84
- opts = { with_hash: false }.merge(opts)
85
- if !!opts[:with_hash]
86
- @vernacular_name_strings
87
- else
88
- @vernacular_name_strings.keys
89
- end
90
- end
91
-
92
- def normalize(opts = {})
93
- opts = { :with_canonical_names => true,
94
- :with_extensions => true }.merge(opts)
95
- @with_canonical_names = !!opts[:with_canonical_names]
96
- DarwinCore.logger_write(@dwc.object_id,
97
- 'Started normalization of the classification')
98
- ingest_core
99
- DarwinCore.logger_write(@dwc.object_id,
100
- 'Calculating the classification parent/child paths')
101
- has_parent_id? ?
102
- calculate_classification_path :
103
- @normalized_data.keys.each { |id| @tree[id] = {} }
104
- DarwinCore.logger_write(@dwc.object_id, 'Ingesting data from extensions')
105
- if !!opts[:with_extensions]
106
- ingest_extensions
107
- end
108
- @normalized_data
109
- end
110
-
111
- private
112
-
113
- def get_canonical_name(a_scientific_name)
114
- if @with_canonical_names
115
- canonical_name = @parser.parse(a_scientific_name,
116
- :canonical_only => true)
117
- canonical_name.to_s.empty? ? a_scientific_name : canonical_name
118
- else
119
- nil
120
- end
121
- end
122
-
123
- def get_fields(element)
124
- data = element.fields.inject({}) do |res, f|
125
- field = f[:term].split('/')[-1]
126
- field = field ? field.downcase.to_sym : ''
127
- res[field] = f[:index].to_i
128
- res
129
- end
130
- data[:id] = element.id[:index] if element.id
131
- data
132
- end
133
-
134
- def status_synonym?(status)
135
- status && !!status.match(/^syn/)
136
- end
137
-
138
- def add_synonym_from_core(taxon_id, row)
139
- @synonyms[row[@core_fields[:id]]] = taxon_id
140
- taxon = @normalized_data[row[taxon_id]] ?
141
- @normalized_data[row[taxon_id]] :
142
- @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
143
- synonym = SynonymNormalized.new(
144
- row[@core_fields[:id]],
145
- row[@core_fields[:scientificname]],
146
- row[@core_fields[:canonicalname]],
147
- @core_fields[:taxonomicstatus] ?
148
- row[@core_fields[:taxonomicstatus]] :
149
- nil,
150
- @core_fields[:source] ? row[@core_fields[:source]] : nil,
151
- @core_fields[:localid] ? row[@core_fields[:localid]] : nil,
152
- @core_fields[:globalid] ? row[@core_fields[:globalid]] : nil,
153
- )
154
- taxon.synonyms << synonym
155
- add_name_string(synonym.name)
156
- add_name_string(synonym.canonical_name)
157
- end
158
-
159
- def set_scientific_name(row, fields)
160
- row[fields[:scientificname]] = 'N/A' unless row[fields[:scientificname]]
161
- canonical_name = nil
162
- scientific_name = row[fields[:scientificname]].strip
163
- if separate_canonical_and_authorship?(row, fields)
164
- if @with_canonical_names
165
- canonical_name = row[fields[:scientificname]].strip
166
- end
167
- scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
168
- else
169
- if @with_canonical_names
170
- canonical_name = get_canonical_name(row[fields[:scientificname]])
171
- end
172
- end
173
- fields[:canonicalname] = row.size
174
- row << canonical_name
175
- row[fields[:scientificname]] = scientific_name
176
- end
177
-
178
- def separate_canonical_and_authorship?(row, fields)
179
- authorship = ''
180
- if fields[:scientificnameauthorship]
181
- authorship = row[fields[:scientificnameauthorship]].to_s.strip
182
- end
183
- !(authorship.empty? || row[fields[:scientificname]].index(authorship))
184
- end
185
-
186
- def ingest_core
187
- @normalized_data = {}
188
- has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
189
- raise DarwinCore::CoreFileError.new('Darwin Core core fields must ' +
190
- 'contain taxon id and scientific name') unless has_name_and_id
191
- @dwc.core.read do |rows|
192
- rows[1].each do |error|
193
- @error_names << { :data => error,
194
- :error => :reading_or_encoding_error }
195
- end
196
- rows[0].each do |r|
197
- set_scientific_name(r, @core_fields)
198
- #core has AcceptedNameUsageId
199
- if @core_fields[:acceptednameusageid] &&
200
- r[@core_fields[:acceptednameusageid]] &&
201
- r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
202
- add_synonym_from_core(@core_fields[:acceptednameusageid], r)
203
- elsif !@core_fields[:acceptednameusageid] &&
204
- @core_fields[:taxonomicstatus] &&
205
- status_synonym?(r[@core_fields[:taxonomicstatus]])
206
- add_synonym_from_core(parent_id, r) if has_parent_id?
207
- else
208
- unless @normalized_data[r[@core_fields[:id]]]
209
- if gnub_archive?
210
- new_taxon = DarwinCore::GnubTaxon.new
211
- else
212
- new_taxon = DarwinCore::TaxonNormalized.new
213
- end
214
- @normalized_data[r[@core_fields[:id]]] = new_taxon
215
- end
216
- taxon = @normalized_data[r[@core_fields[:id]]]
217
- if gnub_archive?
218
- taxon.uuid = r[@core_fields[:originalnameusageid]]
219
- taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
220
- split('|')
221
- end
222
- taxon.id = r[@core_fields[:id]]
223
- taxon.current_name = r[@core_fields[:scientificname]]
224
- taxon.current_name_canonical = r[@core_fields[:canonicalname]]
225
- taxon.parent_id = has_parent_id? ? r[parent_id] : nil
226
- taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
227
- if @core_fields[:taxonomicstatus]
228
- taxon.status = r[@core_fields[:taxonomicstatus]]
229
- end
230
- taxon.source = r[@core_fields[:source]] if @core_fields[:source]
231
- taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
232
- if @core_fields[:globalid]
233
- taxon.global_id = r[@core_fields[:globalid]]
234
- end
235
- taxon.linnean_classification_path =
236
- get_linnean_classification_path(r, taxon)
237
- add_name_string(taxon.current_name)
238
- has_canonical = taxon.current_name_canonical &&
239
- !taxon.current_name_canonical.empty?
240
- add_name_string(taxon.current_name_canonical) if has_canonical
241
- end
242
- end
243
- end
244
- end
245
-
246
- def has_parent_id?
247
- @has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
248
- @core_fields.has_key?(:parentnameusageid)
249
- end
250
-
251
- def parent_id
252
- parent_id_field = @core_fields[:highertaxonid] ||
253
- @core_fields[:parentnameusageid]
254
- end
255
-
256
- def calculate_classification_path
257
- @paths_num = 0
258
- @normalized_data.each do |taxon_id, taxon|
259
- next if !taxon.classification_path_id.empty?
260
- res = get_classification_path(taxon)
261
- next if res == 'error'
262
- end
263
- end
264
-
265
- def get_classification_path(taxon)
266
- return if !taxon.classification_path_id.empty?
267
- @paths_num += 1
268
- if @paths_num % 10000 == 0
269
- DarwinCore.logger_write(@dwc.object_id,
270
- "Calculated %s paths" % @paths_num)
271
- end
272
- current_node = {taxon.id => {}}
273
- if DarwinCore.nil_field?(taxon.parent_id)
274
- if @with_canonical_names
275
- taxon.classification_path << taxon.current_name_canonical
276
- end
277
- taxon.classification_path_id << taxon.id
278
- @tree.merge!(current_node)
279
- else
280
- parent_cp = parent_cpid = nil
281
- if @normalized_data[taxon.parent_id]
282
- if @with_canonical_names
283
- parent_cp = @normalized_data[taxon.parent_id].classification_path
284
- end
285
- parent_cpid = @normalized_data[taxon.parent_id].
286
- classification_path_id
287
- else
288
- current_parent = @normalized_data[@synonyms[taxon.parent_id]]
289
- if current_parent
290
- error = 'WARNING: The parent of the taxon ' +
291
- "\'#{taxon.current_name}\' is deprecated"
292
- @error_names << {:data => taxon,
293
- :error => :deprecated_parent,
294
- :current_parent => current_parent }
295
-
296
- if @with_canonical_names
297
- parent_cp = current_parent.classification_path
298
- end
299
- parent_cpid = current_parent.classification_path_id
300
- else
301
- error = 'WARNING: The parent of the taxon ' +
302
- "\'#{taxon.current_name}\' not found"
303
- @error_names << {:data => taxon,
304
- :error => :deprecated_parent, :current_parent => nil}
305
- end
306
- end
307
- return 'error' unless parent_cpid
308
- if parent_cpid.empty?
309
- res = 'error'
310
- begin
311
- res = get_classification_path(@normalized_data[taxon.parent_id])
312
- rescue SystemStackError
313
- @error_names << {:data => taxon,
314
- :error => :too_deep_hierarchy, :current_parent => nil}
315
- end
316
- return res if res == 'error'
317
- if @with_canonical_names
318
- taxon.classification_path += @normalized_data[taxon.parent_id].
319
- classification_path + [taxon.current_name_canonical]
320
- end
321
- taxon.classification_path_id += @normalized_data[taxon.parent_id].
322
- classification_path_id + [taxon.id]
323
- parent_node = @normalized_data[taxon.parent_id].
324
- classification_path_id.inject(@tree) {|node, id| node[id]}
325
- parent_node.merge!(current_node)
326
- else
327
- taxon.classification_path += parent_cp +
328
- [taxon.current_name_canonical] if @with_canonical_names
329
- taxon.classification_path_id += parent_cpid + [taxon.id]
330
- parent_node = @normalized_data[taxon.parent_id].
331
- classification_path_id.inject(@tree) {|node, id| node[id]}
332
- begin
333
- parent_node.merge!(current_node)
334
- rescue NoMethodError => e
335
- DarwinCore.logger_write(@dwc.object_id,
336
- "Error '%s' taxon %s" % [e.message, taxon.id])
337
- return 'error'
338
- end
339
- end
340
- end
341
- end
342
-
343
- def ingest_extensions
344
- @extensions.each do |e|
345
- ext, fields = *e
346
- ingest_synonyms(e) if (File.split(e[0].file_path).
347
- last.match(/synonym/i) &&
348
- fields.keys.include?(:scientificname))
349
- ingest_vernaculars(e) if fields.keys.include? :vernacularname
350
- end
351
- end
352
-
353
- def ingest_synonyms(extension)
354
- DarwinCore.logger_write(@dwc.object_id, 'Ingesting synonyms extension')
355
- ext, fields = *extension
356
- ext.read do |rows|
357
- rows[0].each do |r|
358
- set_scientific_name(r, fields)
359
- synonym = SynonymNormalized.new(
360
- nil,
361
- r[fields[:scientificname]],
362
- r[fields[:canonicalname]],
363
- fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil,
364
- fields[:source] ? r[fields[:source]] : nil,
365
- fields[:localid] ? r[fields[:localid]] : nil,
366
- fields[:globalid] ? r[fields[:globalid]] : nil,
367
- )
368
- if @normalized_data[r[fields[:id]]]
369
- @normalized_data[r[fields[:id]]].synonyms << synonym
370
- add_name_string(synonym.name)
371
- add_name_string(synonym.canonical_name)
372
- else
373
- @error_names << { :taxon => synonym,
374
- :error => :synonym_of_unknown_taxa }
375
- end
376
- end
377
- end
378
- end
379
-
380
- def ingest_vernaculars(extension)
381
- DarwinCore.logger_write(@dwc.object_id,
382
- 'Ingesting vernacular names extension')
383
- ext, fields = *extension
384
- ext.read do |rows|
385
- rows[0].each do |r|
386
-
387
- language = nil
388
- if fields[:language]
389
- language = r[fields[:language]]
390
- elsif fields[:languagecode]
391
- language = r[fields[:languagecode]]
392
- end
393
-
394
- locality = fields[:locality] ? r[fields[:locality]] : nil
395
-
396
- country_code = fields[:countrycode] ? r[fields[:countrycode]] : nil
397
-
398
- vernacular = VernacularNormalized.new(
399
- r[fields[:vernacularname]],
400
- language,
401
- locality,
402
- country_code)
403
- if @normalized_data[r[fields[:id]]]
404
- @normalized_data[r[fields[:id]]].vernacular_names << vernacular
405
- add_vernacular_name_string(vernacular.name)
406
- else
407
- @error_names << { :vernacular_name => vernacular,
408
- :error => :vernacular_of_unknown_taxa }
409
- end
410
- end
411
- end
412
- end
413
-
414
- #Collect linnean classification path only on species level
415
- def get_linnean_classification_path(row, taxon)
416
- res = []
417
- [:kingdom, :phylum, :class,
418
- :order, :family, :genus, :subgenus].each do |clade|
419
- res << [row[@core_fields[clade]], clade] if @core_fields[clade]
420
- end
421
- res
422
- end
423
-
424
- def gnub_archive?
425
- @core_fields[:originalnameusageidpath]
426
- end
427
- end
428
- end