dwc-archive 0.5.8 → 0.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +39 -20
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.9
|
@@ -29,6 +29,7 @@ class DarwinCore
|
|
29
29
|
@core_fields = get_fields(@dwc.core)
|
30
30
|
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
|
31
31
|
@normalized_data = {}
|
32
|
+
@synonyms = {}
|
32
33
|
@parser = ParsleyStore.new(1,2)
|
33
34
|
@name_strings = {}
|
34
35
|
@error_names = []
|
@@ -61,7 +62,6 @@ class DarwinCore
|
|
61
62
|
a_scientific_name.force_encoding('utf-8')
|
62
63
|
end
|
63
64
|
canonical_name = @parser.parse(a_scientific_name, :canonical_only => true)
|
64
|
-
add_name_string(canonical_name) unless canonical_name.to_s.empty?
|
65
65
|
canonical_name.to_s.empty? ? a_scientific_name : canonical_name
|
66
66
|
end
|
67
67
|
|
@@ -76,11 +76,15 @@ class DarwinCore
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def add_synonym_from_core(taxon_id, row)
|
79
|
+
@synonyms[row[@core_fields[:id]]] = taxon_id
|
79
80
|
taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
80
|
-
|
81
|
+
synonym = SynonymNormalized.new(
|
81
82
|
row[@core_fields[:scientificname]],
|
82
83
|
row[@core_fields[:canonicalname]],
|
83
84
|
@core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil)
|
85
|
+
taxon.synonyms << synonym
|
86
|
+
add_name_string(synonym.name)
|
87
|
+
add_name_string(synonym.canonical_name)
|
84
88
|
end
|
85
89
|
|
86
90
|
def set_scientific_name(row, fields)
|
@@ -95,7 +99,6 @@ class DarwinCore
|
|
95
99
|
raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core_fields[:id] && @core_fields[:scientificname])
|
96
100
|
@dwc.core.read do |rows|
|
97
101
|
rows[0].each do |r|
|
98
|
-
add_name_string(r[@core_fields[:scientificname]])
|
99
102
|
set_scientific_name(r, @core_fields)
|
100
103
|
#core has AcceptedNameUsageId
|
101
104
|
if @core_fields[:acceptednameusageid] && r[@core_fields[:acceptednameusageid]] && r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
|
@@ -110,6 +113,8 @@ class DarwinCore
|
|
110
113
|
taxon.parent_id = r[parent_id]
|
111
114
|
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
|
112
115
|
taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
|
116
|
+
add_name_string(taxon.current_name)
|
117
|
+
add_name_string(taxon.current_name_canonical)
|
113
118
|
end
|
114
119
|
end
|
115
120
|
end
|
@@ -123,11 +128,8 @@ class DarwinCore
|
|
123
128
|
@paths_num = 0
|
124
129
|
@normalized_data.each do |taxon_id, taxon|
|
125
130
|
next if !taxon.classification_path.empty?
|
126
|
-
|
127
|
-
|
128
|
-
rescue DarwinCore::ParentNotCurrentError
|
129
|
-
next
|
130
|
-
end
|
131
|
+
res = get_classification_path(taxon)
|
132
|
+
next if res == 'error'
|
131
133
|
end
|
132
134
|
end
|
133
135
|
|
@@ -141,15 +143,24 @@ class DarwinCore
|
|
141
143
|
taxon.classification_path_id << taxon.id
|
142
144
|
@tree.merge!(current_node)
|
143
145
|
else
|
144
|
-
|
146
|
+
parent_cp = nil
|
147
|
+
if @normalized_data[taxon.parent_id]
|
145
148
|
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
149
|
+
else
|
150
|
+
current_parent = @normalized_data[@synonyms[taxon.parent_id]]
|
151
|
+
if current_parent
|
152
|
+
error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' is deprecated"
|
153
|
+
@error_names << {:name => taxon, :error => :deprecated_parent, :current_parent => current_parent }
|
154
|
+
parent_cp = current_parent.classification_path
|
155
|
+
else
|
156
|
+
error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' not found"
|
157
|
+
@error_names << {:name => taxon, :error => :deprecated_parent, :current_parent => nil}
|
158
|
+
end
|
150
159
|
end
|
160
|
+
return 'error' unless parent_cp
|
151
161
|
if parent_cp.empty?
|
152
|
-
get_classification_path(@normalized_data[taxon.parent_id])
|
162
|
+
res = get_classification_path(@normalized_data[taxon.parent_id])
|
163
|
+
return res if res == 'error'
|
153
164
|
taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
|
154
165
|
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
155
166
|
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
@@ -158,7 +169,12 @@ class DarwinCore
|
|
158
169
|
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
|
159
170
|
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
160
171
|
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
161
|
-
|
172
|
+
begin
|
173
|
+
parent_node.merge!(current_node)
|
174
|
+
rescue NoMethodError => e
|
175
|
+
DarwinCore.logger_write(@dwc.object_id, "Error '%s' taxon %s" % [e.message, taxon.id])
|
176
|
+
return 'error'
|
177
|
+
end
|
162
178
|
end
|
163
179
|
end
|
164
180
|
end
|
@@ -176,12 +192,14 @@ class DarwinCore
|
|
176
192
|
ext, fields = *extension
|
177
193
|
ext.read do |rows|
|
178
194
|
rows[0].each do |r|
|
179
|
-
add_name_string(r[fields[:scientificname]])
|
180
195
|
set_scientific_name(r, fields)
|
181
|
-
|
196
|
+
synonym = SynonymNormalized.new(
|
182
197
|
r[fields[:scientificname]],
|
183
198
|
r[fields[:canonicalname]],
|
184
199
|
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
|
200
|
+
@normalized_data[r[fields[:id]]].synonyms << synonym
|
201
|
+
add_name_string(synonym.name)
|
202
|
+
add_name_string(synonym.canonical_name)
|
185
203
|
end
|
186
204
|
end
|
187
205
|
end
|
@@ -191,14 +209,15 @@ class DarwinCore
|
|
191
209
|
ext, fields = *extension
|
192
210
|
ext.read do |rows|
|
193
211
|
rows[0].each do |r|
|
194
|
-
|
195
|
-
@normalized_data[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
|
212
|
+
vernacular = VernacularNormalized.new(
|
196
213
|
r[fields[:vernacularname]],
|
197
214
|
fields[:languagecode] ? r[fields[:languagecode]] : nil)
|
198
|
-
|
215
|
+
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
216
|
+
add_name_string(vernacular.name)
|
199
217
|
end
|
200
218
|
end
|
201
219
|
end
|
202
220
|
|
203
221
|
end
|
204
222
|
end
|
223
|
+
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 9
|
10
|
+
version: 0.5.9
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-12-03 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|