dwc-archive 0.9.11 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +2 -1
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -1
- data/.travis.yml +2 -3
- data/CHANGELOG +2 -0
- data/Gemfile +3 -1
- data/README.md +110 -106
- data/Rakefile +13 -36
- data/dwc-archive.gemspec +24 -19
- data/features/step_definitions/dwc-creator_steps.rb +5 -5
- data/features/step_definitions/dwc-reader_steps.rb +47 -28
- data/features/support/env.rb +1 -1
- data/lib/dwc_archive.rb +121 -0
- data/lib/dwc_archive/archive.rb +59 -0
- data/lib/dwc_archive/classification_normalizer.rb +392 -0
- data/lib/dwc_archive/core.rb +25 -0
- data/lib/{dwc-archive → dwc_archive}/errors.rb +2 -0
- data/lib/dwc_archive/expander.rb +88 -0
- data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
- data/lib/dwc_archive/generator.rb +90 -0
- data/lib/{dwc-archive → dwc_archive}/generator_eml_xml.rb +40 -33
- data/lib/{dwc-archive → dwc_archive}/generator_meta_xml.rb +21 -20
- data/lib/dwc_archive/gnub_taxon.rb +14 -0
- data/lib/dwc_archive/ingester.rb +106 -0
- data/lib/dwc_archive/metadata.rb +56 -0
- data/lib/dwc_archive/taxon_normalized.rb +23 -0
- data/lib/dwc_archive/version.rb +6 -0
- data/lib/dwc_archive/xml_reader.rb +89 -0
- data/spec/files/generator_eml.xml +1 -1
- data/spec/lib/classification_normalizer_spec.rb +96 -105
- data/spec/lib/core_spec.rb +43 -41
- data/spec/lib/darwin_core_spec.rb +111 -132
- data/spec/lib/generator_eml_xml_spec.rb +12 -11
- data/spec/lib/generator_meta_xml_spec.rb +12 -11
- data/spec/lib/generator_spec.rb +73 -74
- data/spec/lib/gnub_taxon_spec.rb +14 -16
- data/spec/lib/metadata_spec.rb +50 -41
- data/spec/lib/taxon_normalized_spec.rb +62 -65
- data/spec/lib/xml_reader_spec.rb +9 -12
- data/spec/spec_helper.rb +55 -49
- metadata +92 -77
- data/.rvmrc +0 -1
- data/lib/dwc-archive.rb +0 -107
- data/lib/dwc-archive/archive.rb +0 -40
- data/lib/dwc-archive/classification_normalizer.rb +0 -427
- data/lib/dwc-archive/core.rb +0 -19
- data/lib/dwc-archive/expander.rb +0 -85
- data/lib/dwc-archive/generator.rb +0 -86
- data/lib/dwc-archive/ingester.rb +0 -101
- data/lib/dwc-archive/metadata.rb +0 -48
- data/lib/dwc-archive/version.rb +0 -3
- data/lib/dwc-archive/xml_reader.rb +0 -80
data/.rvmrc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
rvm use ruby-1.9.3-p392@dwc-archive --create
|
data/lib/dwc-archive.rb
DELETED
@@ -1,107 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
recent_ruby = RUBY_VERSION >= '1.9.1'
|
4
|
-
raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless recent_ruby
|
5
|
-
|
6
|
-
require 'fileutils'
|
7
|
-
require 'ostruct'
|
8
|
-
require 'digest'
|
9
|
-
require 'csv'
|
10
|
-
require 'logger'
|
11
|
-
require 'nokogiri'
|
12
|
-
require_relative 'dwc-archive/xml_reader'
|
13
|
-
require_relative 'dwc-archive/ingester'
|
14
|
-
require_relative 'dwc-archive/errors'
|
15
|
-
require_relative 'dwc-archive/expander'
|
16
|
-
require_relative 'dwc-archive/archive'
|
17
|
-
require_relative 'dwc-archive/core'
|
18
|
-
require_relative 'dwc-archive/extension'
|
19
|
-
require_relative 'dwc-archive/metadata'
|
20
|
-
require_relative 'dwc-archive/generator'
|
21
|
-
require_relative 'dwc-archive/generator_meta_xml'
|
22
|
-
require_relative 'dwc-archive/generator_eml_xml'
|
23
|
-
require_relative 'dwc-archive/classification_normalizer'
|
24
|
-
require_relative 'dwc-archive/version'
|
25
|
-
|
26
|
-
class DarwinCore
|
27
|
-
|
28
|
-
DEFAULT_TMP_DIR = "/tmp"
|
29
|
-
|
30
|
-
attr_reader :archive, :core, :metadata, :extensions,
|
31
|
-
:classification_normalizer
|
32
|
-
alias :eml :metadata
|
33
|
-
|
34
|
-
|
35
|
-
def self.nil_field?(field)
|
36
|
-
return true if [nil, '', '/N'].include?(field)
|
37
|
-
false
|
38
|
-
end
|
39
|
-
|
40
|
-
def self.clean_all(tmp_dir = DEFAULT_TMP_DIR)
|
41
|
-
Dir.entries(tmp_dir).each do |entry|
|
42
|
-
path = File.join(tmp_dir, entry)
|
43
|
-
if FileTest.directory?(path) && entry.match(/^dwc_[\d]+$/)
|
44
|
-
FileUtils.rm_rf(path)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.logger
|
50
|
-
@@logger ||= Logger.new(nil)
|
51
|
-
end
|
52
|
-
|
53
|
-
def self.logger=(logger)
|
54
|
-
@@logger = logger
|
55
|
-
end
|
56
|
-
|
57
|
-
def self.logger_reset
|
58
|
-
self.logger = Logger.new(nil)
|
59
|
-
end
|
60
|
-
|
61
|
-
def self.logger_write(obj_id, message, method = :info)
|
62
|
-
self.logger.send(method, "|%s|%s|" % [obj_id, message])
|
63
|
-
end
|
64
|
-
|
65
|
-
def initialize(dwc_path, tmp_dir = DEFAULT_TMP_DIR)
|
66
|
-
@dwc_path = dwc_path
|
67
|
-
@archive = DarwinCore::Archive.new(@dwc_path, tmp_dir)
|
68
|
-
@core = DarwinCore::Core.new(self)
|
69
|
-
@metadata = DarwinCore::Metadata.new(@archive)
|
70
|
-
@extensions = get_extensions
|
71
|
-
end
|
72
|
-
|
73
|
-
def file_name
|
74
|
-
File.split(@dwc_path).last
|
75
|
-
end
|
76
|
-
|
77
|
-
def path
|
78
|
-
File.expand_path(@dwc_path)
|
79
|
-
end
|
80
|
-
|
81
|
-
# generates a hash from a classification data with path to each node,
|
82
|
-
# list of synonyms and vernacular names.
|
83
|
-
def normalize_classification
|
84
|
-
return nil unless has_parent_id?
|
85
|
-
@classification_normalizer ||= DarwinCore::ClassificationNormalizer.
|
86
|
-
new(self)
|
87
|
-
@classification_normalizer.normalize
|
88
|
-
end
|
89
|
-
|
90
|
-
def has_parent_id?
|
91
|
-
!!@core.fields.join('|').downcase.match(/highertaxonid|parentnameusageid/)
|
92
|
-
end
|
93
|
-
|
94
|
-
def checksum
|
95
|
-
Digest::SHA1.hexdigest(open(@dwc_path).read)
|
96
|
-
end
|
97
|
-
|
98
|
-
private
|
99
|
-
def get_extensions
|
100
|
-
res = []
|
101
|
-
root_key = @archive.meta.keys[0]
|
102
|
-
ext = @archive.meta[root_key][:extension]
|
103
|
-
return [] unless ext
|
104
|
-
ext = [ext] if ext.class != Array
|
105
|
-
ext.map { |e| DarwinCore::Extension.new(self, e) }
|
106
|
-
end
|
107
|
-
end
|
data/lib/dwc-archive/archive.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
class DarwinCore
|
2
|
-
class Archive
|
3
|
-
attr_reader :meta, :eml
|
4
|
-
|
5
|
-
def initialize(archive_path, tmp_dir)
|
6
|
-
@archive_path = archive_path
|
7
|
-
@tmp_dir = tmp_dir
|
8
|
-
@expander = DarwinCore::Expander.new(@archive_path, @tmp_dir)
|
9
|
-
@expander.unpack
|
10
|
-
if valid?
|
11
|
-
@meta = DarwinCore::XmlReader.
|
12
|
-
from_xml(open(File.join(@expander.path, 'meta.xml')))
|
13
|
-
@eml = files.include?("eml.xml") ?
|
14
|
-
DarwinCore::XmlReader.
|
15
|
-
from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
|
16
|
-
else
|
17
|
-
clean
|
18
|
-
raise InvalidArchiveError
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
def valid?
|
23
|
-
valid = true
|
24
|
-
valid = valid && @expander.path && FileTest.exists?(@expander.path)
|
25
|
-
valid = valid && files && files.include?('meta.xml')
|
26
|
-
end
|
27
|
-
|
28
|
-
def files
|
29
|
-
@expander.files
|
30
|
-
end
|
31
|
-
|
32
|
-
def files_path
|
33
|
-
@expander.path
|
34
|
-
end
|
35
|
-
|
36
|
-
def clean
|
37
|
-
@expander.clean
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
@@ -1,427 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require 'parsley-store'
|
3
|
-
|
4
|
-
class DarwinCore
|
5
|
-
|
6
|
-
class TaxonNormalized
|
7
|
-
attr_accessor :id, :local_id, :global_id, :source, :parent_id,
|
8
|
-
:classification_path_id, :classification_path,
|
9
|
-
:linnean_classification_path, :current_name, :current_name_canonical,
|
10
|
-
:synonyms, :vernacular_names, :rank, :status
|
11
|
-
|
12
|
-
def initialize
|
13
|
-
@id = @parent_id = @rank = @status = nil
|
14
|
-
@current_name = ''
|
15
|
-
@current_name_canonical = ''
|
16
|
-
@source = ''
|
17
|
-
@local_id = ''
|
18
|
-
@global_id = ''
|
19
|
-
@classification_path = []
|
20
|
-
@classification_path_id = []
|
21
|
-
@synonyms = []
|
22
|
-
@vernacular_names = []
|
23
|
-
@linnean_classification_path = []
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
class GnubTaxon < TaxonNormalized
|
29
|
-
attr_accessor :uuid, :uuid_path
|
30
|
-
|
31
|
-
def initialize
|
32
|
-
super
|
33
|
-
@uuid = nil
|
34
|
-
@uuid_path = []
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class SynonymNormalized < Struct.new(:id, :name, :canonical_name,
|
39
|
-
:status, :source, :local_id,
|
40
|
-
:global_id);end
|
41
|
-
class VernacularNormalized < Struct.new(:name, :language, :locality,
|
42
|
-
:country_code);end
|
43
|
-
|
44
|
-
class ClassificationNormalizer
|
45
|
-
attr_reader :error_names, :tree, :normalized_data
|
46
|
-
|
47
|
-
def initialize(dwc_instance)
|
48
|
-
@dwc = dwc_instance
|
49
|
-
@core_fields = get_fields(@dwc.core)
|
50
|
-
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
|
51
|
-
@normalized_data = {}
|
52
|
-
@synonyms = {}
|
53
|
-
@parser = ParsleyStore.new(1,2)
|
54
|
-
@name_strings = {}
|
55
|
-
@vernacular_name_strings = {}
|
56
|
-
@error_names = []
|
57
|
-
@tree = {}
|
58
|
-
end
|
59
|
-
|
60
|
-
def darwin_core
|
61
|
-
@dwc
|
62
|
-
end
|
63
|
-
|
64
|
-
def add_name_string(name_string)
|
65
|
-
@name_strings[name_string] = 1 unless @name_strings[name_string]
|
66
|
-
end
|
67
|
-
|
68
|
-
def add_vernacular_name_string(name_string)
|
69
|
-
unless @vernacular_name_strings[name_string]
|
70
|
-
@vernacular_name_strings[name_string] = 1
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
def name_strings(opts = {})
|
75
|
-
process_strings(@name_strings, opts)
|
76
|
-
end
|
77
|
-
|
78
|
-
def vernacular_name_strings(opts = {})
|
79
|
-
process_strings(@vernacular_name_strings, opts)
|
80
|
-
end
|
81
|
-
|
82
|
-
def normalize(opts = {})
|
83
|
-
opts = { :with_canonical_names => true,
|
84
|
-
:with_extensions => true }.merge(opts)
|
85
|
-
@with_canonical_names = !!opts[:with_canonical_names]
|
86
|
-
DarwinCore.logger_write(@dwc.object_id,
|
87
|
-
'Started normalization of the classification')
|
88
|
-
ingest_core
|
89
|
-
DarwinCore.logger_write(@dwc.object_id,
|
90
|
-
'Calculating the classification parent/child paths')
|
91
|
-
has_parent_id? ?
|
92
|
-
calculate_classification_path :
|
93
|
-
@normalized_data.keys.each { |id| @tree[id] = {} }
|
94
|
-
DarwinCore.logger_write(@dwc.object_id, 'Ingesting data from extensions')
|
95
|
-
if !!opts[:with_extensions]
|
96
|
-
ingest_extensions
|
97
|
-
end
|
98
|
-
@normalized_data
|
99
|
-
end
|
100
|
-
|
101
|
-
private
|
102
|
-
|
103
|
-
def process_strings(strings, opts)
|
104
|
-
opts = { with_hash: false }.merge(opts)
|
105
|
-
if !!opts[:with_hash]
|
106
|
-
strings
|
107
|
-
else
|
108
|
-
strings.keys
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
def get_canonical_name(a_scientific_name)
|
113
|
-
if @with_canonical_names
|
114
|
-
canonical_name = @parser.parse(a_scientific_name,
|
115
|
-
:canonical_only => true)
|
116
|
-
canonical_name.to_s.empty? ? a_scientific_name : canonical_name
|
117
|
-
else
|
118
|
-
nil
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
def get_fields(element)
|
123
|
-
data = element.fields.inject({}) do |res, f|
|
124
|
-
field = f[:term].split('/')[-1]
|
125
|
-
field = field ? field.downcase.to_sym : ''
|
126
|
-
res[field] = f[:index].to_i
|
127
|
-
res
|
128
|
-
end
|
129
|
-
data[:id] = element.id[:index] if element.id
|
130
|
-
data
|
131
|
-
end
|
132
|
-
|
133
|
-
def status_synonym?(status)
|
134
|
-
status && !!status.match(/^syn/)
|
135
|
-
end
|
136
|
-
|
137
|
-
def add_synonym_from_core(taxon_id, row)
|
138
|
-
@synonyms[row[@core_fields[:id]]] = taxon_id
|
139
|
-
taxon = @normalized_data[row[taxon_id]] ?
|
140
|
-
@normalized_data[row[taxon_id]] :
|
141
|
-
@normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
142
|
-
synonym = SynonymNormalized.new(
|
143
|
-
row[@core_fields[:id]],
|
144
|
-
row[@core_fields[:scientificname]],
|
145
|
-
row[@core_fields[:canonicalname]],
|
146
|
-
@core_fields[:taxonomicstatus] ?
|
147
|
-
row[@core_fields[:taxonomicstatus]] :
|
148
|
-
nil,
|
149
|
-
@core_fields[:source] ? row[@core_fields[:source]] : nil,
|
150
|
-
@core_fields[:localid] ? row[@core_fields[:localid]] : nil,
|
151
|
-
@core_fields[:globalid] ? row[@core_fields[:globalid]] : nil,
|
152
|
-
)
|
153
|
-
taxon.synonyms << synonym
|
154
|
-
add_name_string(synonym.name)
|
155
|
-
add_name_string(synonym.canonical_name)
|
156
|
-
end
|
157
|
-
|
158
|
-
def set_scientific_name(row, fields)
|
159
|
-
row[fields[:scientificname]] = 'N/A' unless row[fields[:scientificname]]
|
160
|
-
canonical_name = nil
|
161
|
-
scientific_name = row[fields[:scientificname]].strip
|
162
|
-
if separate_canonical_and_authorship?(row, fields)
|
163
|
-
if @with_canonical_names
|
164
|
-
canonical_name = row[fields[:scientificname]].strip
|
165
|
-
end
|
166
|
-
scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
|
167
|
-
else
|
168
|
-
if @with_canonical_names
|
169
|
-
canonical_name = get_canonical_name(row[fields[:scientificname]])
|
170
|
-
end
|
171
|
-
end
|
172
|
-
fields[:canonicalname] = row.size
|
173
|
-
row << canonical_name
|
174
|
-
row[fields[:scientificname]] = scientific_name
|
175
|
-
end
|
176
|
-
|
177
|
-
def separate_canonical_and_authorship?(row, fields)
|
178
|
-
authorship = ''
|
179
|
-
if fields[:scientificnameauthorship]
|
180
|
-
authorship = row[fields[:scientificnameauthorship]].to_s.strip
|
181
|
-
end
|
182
|
-
!(authorship.empty? || row[fields[:scientificname]].index(authorship))
|
183
|
-
end
|
184
|
-
|
185
|
-
def ingest_core
|
186
|
-
@normalized_data = {}
|
187
|
-
has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
|
188
|
-
raise DarwinCore::CoreFileError.new('Darwin Core core fields must ' +
|
189
|
-
'contain taxon id and scientific name') unless has_name_and_id
|
190
|
-
@dwc.core.read do |rows|
|
191
|
-
rows[1].each do |error|
|
192
|
-
@error_names << { :data => error,
|
193
|
-
:error => :reading_or_encoding_error }
|
194
|
-
end
|
195
|
-
rows[0].each do |r|
|
196
|
-
set_scientific_name(r, @core_fields)
|
197
|
-
#core has AcceptedNameUsageId
|
198
|
-
if @core_fields[:acceptednameusageid] &&
|
199
|
-
r[@core_fields[:acceptednameusageid]] &&
|
200
|
-
r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
|
201
|
-
add_synonym_from_core(@core_fields[:acceptednameusageid], r)
|
202
|
-
elsif !@core_fields[:acceptednameusageid] &&
|
203
|
-
@core_fields[:taxonomicstatus] &&
|
204
|
-
status_synonym?(r[@core_fields[:taxonomicstatus]])
|
205
|
-
add_synonym_from_core(parent_id, r) if has_parent_id?
|
206
|
-
else
|
207
|
-
unless @normalized_data[r[@core_fields[:id]]]
|
208
|
-
if gnub_archive?
|
209
|
-
new_taxon = DarwinCore::GnubTaxon.new
|
210
|
-
else
|
211
|
-
new_taxon = DarwinCore::TaxonNormalized.new
|
212
|
-
end
|
213
|
-
@normalized_data[r[@core_fields[:id]]] = new_taxon
|
214
|
-
end
|
215
|
-
taxon = @normalized_data[r[@core_fields[:id]]]
|
216
|
-
if gnub_archive?
|
217
|
-
taxon.uuid = r[@core_fields[:originalnameusageid]]
|
218
|
-
taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
|
219
|
-
split('|')
|
220
|
-
end
|
221
|
-
taxon.id = r[@core_fields[:id]]
|
222
|
-
taxon.current_name = r[@core_fields[:scientificname]]
|
223
|
-
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
|
224
|
-
taxon.parent_id = has_parent_id? ? r[parent_id] : nil
|
225
|
-
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
|
226
|
-
if @core_fields[:taxonomicstatus]
|
227
|
-
taxon.status = r[@core_fields[:taxonomicstatus]]
|
228
|
-
end
|
229
|
-
taxon.source = r[@core_fields[:source]] if @core_fields[:source]
|
230
|
-
taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
|
231
|
-
if @core_fields[:globalid]
|
232
|
-
taxon.global_id = r[@core_fields[:globalid]]
|
233
|
-
end
|
234
|
-
taxon.linnean_classification_path =
|
235
|
-
get_linnean_classification_path(r, taxon)
|
236
|
-
add_name_string(taxon.current_name)
|
237
|
-
has_canonical = taxon.current_name_canonical &&
|
238
|
-
!taxon.current_name_canonical.empty?
|
239
|
-
add_name_string(taxon.current_name_canonical) if has_canonical
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
def has_parent_id?
|
246
|
-
@has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
|
247
|
-
@core_fields.has_key?(:parentnameusageid)
|
248
|
-
end
|
249
|
-
|
250
|
-
def parent_id
|
251
|
-
parent_id_field = @core_fields[:highertaxonid] ||
|
252
|
-
@core_fields[:parentnameusageid]
|
253
|
-
end
|
254
|
-
|
255
|
-
def calculate_classification_path
|
256
|
-
@paths_num = 0
|
257
|
-
@normalized_data.each do |taxon_id, taxon|
|
258
|
-
next if !taxon.classification_path_id.empty?
|
259
|
-
res = get_classification_path(taxon)
|
260
|
-
next if res == 'error'
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
def get_classification_path(taxon)
|
265
|
-
return if !taxon.classification_path_id.empty?
|
266
|
-
@paths_num += 1
|
267
|
-
if @paths_num % 10000 == 0
|
268
|
-
DarwinCore.logger_write(@dwc.object_id,
|
269
|
-
"Calculated %s paths" % @paths_num)
|
270
|
-
end
|
271
|
-
current_node = {taxon.id => {}}
|
272
|
-
if DarwinCore.nil_field?(taxon.parent_id)
|
273
|
-
if @with_canonical_names
|
274
|
-
taxon.classification_path << taxon.current_name_canonical
|
275
|
-
end
|
276
|
-
taxon.classification_path_id << taxon.id
|
277
|
-
@tree.merge!(current_node)
|
278
|
-
else
|
279
|
-
parent_cp = parent_cpid = nil
|
280
|
-
if @normalized_data[taxon.parent_id]
|
281
|
-
if @with_canonical_names
|
282
|
-
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
283
|
-
end
|
284
|
-
parent_cpid = @normalized_data[taxon.parent_id].
|
285
|
-
classification_path_id
|
286
|
-
else
|
287
|
-
current_parent = @normalized_data[@synonyms[taxon.parent_id]]
|
288
|
-
if current_parent
|
289
|
-
error = 'WARNING: The parent of the taxon ' +
|
290
|
-
"\'#{taxon.current_name}\' is deprecated"
|
291
|
-
@error_names << {:data => taxon,
|
292
|
-
:error => :deprecated_parent,
|
293
|
-
:current_parent => current_parent }
|
294
|
-
|
295
|
-
if @with_canonical_names
|
296
|
-
parent_cp = current_parent.classification_path
|
297
|
-
end
|
298
|
-
parent_cpid = current_parent.classification_path_id
|
299
|
-
else
|
300
|
-
error = 'WARNING: The parent of the taxon ' +
|
301
|
-
"\'#{taxon.current_name}\' not found"
|
302
|
-
@error_names << {:data => taxon,
|
303
|
-
:error => :deprecated_parent, :current_parent => nil}
|
304
|
-
end
|
305
|
-
end
|
306
|
-
return 'error' unless parent_cpid
|
307
|
-
if parent_cpid.empty?
|
308
|
-
res = 'error'
|
309
|
-
begin
|
310
|
-
res = get_classification_path(@normalized_data[taxon.parent_id])
|
311
|
-
rescue SystemStackError
|
312
|
-
@error_names << {:data => taxon,
|
313
|
-
:error => :too_deep_hierarchy, :current_parent => nil}
|
314
|
-
end
|
315
|
-
return res if res == 'error'
|
316
|
-
if @with_canonical_names
|
317
|
-
taxon.classification_path += @normalized_data[taxon.parent_id].
|
318
|
-
classification_path + [taxon.current_name_canonical]
|
319
|
-
end
|
320
|
-
taxon.classification_path_id += @normalized_data[taxon.parent_id].
|
321
|
-
classification_path_id + [taxon.id]
|
322
|
-
parent_node = @normalized_data[taxon.parent_id].
|
323
|
-
classification_path_id.inject(@tree) {|node, id| node[id]}
|
324
|
-
parent_node.merge!(current_node)
|
325
|
-
else
|
326
|
-
taxon.classification_path += parent_cp +
|
327
|
-
[taxon.current_name_canonical] if @with_canonical_names
|
328
|
-
taxon.classification_path_id += parent_cpid + [taxon.id]
|
329
|
-
parent_node = @normalized_data[taxon.parent_id].
|
330
|
-
classification_path_id.inject(@tree) {|node, id| node[id]}
|
331
|
-
begin
|
332
|
-
parent_node.merge!(current_node)
|
333
|
-
rescue NoMethodError => e
|
334
|
-
DarwinCore.logger_write(@dwc.object_id,
|
335
|
-
"Error '%s' taxon %s" % [e.message, taxon.id])
|
336
|
-
return 'error'
|
337
|
-
end
|
338
|
-
end
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
def ingest_extensions
|
343
|
-
@extensions.each do |e|
|
344
|
-
ext, fields = *e
|
345
|
-
ingest_synonyms(e) if (File.split(e[0].file_path).
|
346
|
-
last.match(/synonym/i) &&
|
347
|
-
fields.keys.include?(:scientificname))
|
348
|
-
ingest_vernaculars(e) if fields.keys.include? :vernacularname
|
349
|
-
end
|
350
|
-
end
|
351
|
-
|
352
|
-
def ingest_synonyms(extension)
|
353
|
-
DarwinCore.logger_write(@dwc.object_id, 'Ingesting synonyms extension')
|
354
|
-
ext, fields = *extension
|
355
|
-
ext.read do |rows|
|
356
|
-
rows[0].each do |r|
|
357
|
-
set_scientific_name(r, fields)
|
358
|
-
synonym = SynonymNormalized.new(
|
359
|
-
nil,
|
360
|
-
r[fields[:scientificname]],
|
361
|
-
r[fields[:canonicalname]],
|
362
|
-
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil,
|
363
|
-
fields[:source] ? r[fields[:source]] : nil,
|
364
|
-
fields[:localid] ? r[fields[:localid]] : nil,
|
365
|
-
fields[:globalid] ? r[fields[:globalid]] : nil,
|
366
|
-
)
|
367
|
-
if @normalized_data[r[fields[:id]]]
|
368
|
-
@normalized_data[r[fields[:id]]].synonyms << synonym
|
369
|
-
add_name_string(synonym.name)
|
370
|
-
add_name_string(synonym.canonical_name)
|
371
|
-
else
|
372
|
-
@error_names << { :taxon => synonym,
|
373
|
-
:error => :synonym_of_unknown_taxa }
|
374
|
-
end
|
375
|
-
end
|
376
|
-
end
|
377
|
-
end
|
378
|
-
|
379
|
-
def ingest_vernaculars(extension)
|
380
|
-
DarwinCore.logger_write(@dwc.object_id,
|
381
|
-
'Ingesting vernacular names extension')
|
382
|
-
ext, fields = *extension
|
383
|
-
ext.read do |rows|
|
384
|
-
rows[0].each do |r|
|
385
|
-
|
386
|
-
language = nil
|
387
|
-
if fields[:language]
|
388
|
-
language = r[fields[:language]]
|
389
|
-
elsif fields[:languagecode]
|
390
|
-
language = r[fields[:languagecode]]
|
391
|
-
end
|
392
|
-
|
393
|
-
locality = fields[:locality] ? r[fields[:locality]] : nil
|
394
|
-
|
395
|
-
country_code = fields[:countrycode] ? r[fields[:countrycode]] : nil
|
396
|
-
|
397
|
-
vernacular = VernacularNormalized.new(
|
398
|
-
r[fields[:vernacularname]],
|
399
|
-
language,
|
400
|
-
locality,
|
401
|
-
country_code)
|
402
|
-
if @normalized_data[r[fields[:id]]]
|
403
|
-
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
404
|
-
add_vernacular_name_string(vernacular.name)
|
405
|
-
else
|
406
|
-
@error_names << { :vernacular_name => vernacular,
|
407
|
-
:error => :vernacular_of_unknown_taxa }
|
408
|
-
end
|
409
|
-
end
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
#Collect linnean classification path only on species level
|
414
|
-
def get_linnean_classification_path(row, taxon)
|
415
|
-
res = []
|
416
|
-
[:kingdom, :phylum, :class,
|
417
|
-
:order, :family, :genus, :subgenus].each do |clade|
|
418
|
-
res << [row[@core_fields[clade]], clade] if @core_fields[clade]
|
419
|
-
end
|
420
|
-
res
|
421
|
-
end
|
422
|
-
|
423
|
-
def gnub_archive?
|
424
|
-
@core_fields[:originalnameusageidpath]
|
425
|
-
end
|
426
|
-
end
|
427
|
-
end
|