dwc-archive 0.5.8 → 0.5.9
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +39 -20
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.9
|
@@ -29,6 +29,7 @@ class DarwinCore
|
|
29
29
|
@core_fields = get_fields(@dwc.core)
|
30
30
|
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
|
31
31
|
@normalized_data = {}
|
32
|
+
@synonyms = {}
|
32
33
|
@parser = ParsleyStore.new(1,2)
|
33
34
|
@name_strings = {}
|
34
35
|
@error_names = []
|
@@ -61,7 +62,6 @@ class DarwinCore
|
|
61
62
|
a_scientific_name.force_encoding('utf-8')
|
62
63
|
end
|
63
64
|
canonical_name = @parser.parse(a_scientific_name, :canonical_only => true)
|
64
|
-
add_name_string(canonical_name) unless canonical_name.to_s.empty?
|
65
65
|
canonical_name.to_s.empty? ? a_scientific_name : canonical_name
|
66
66
|
end
|
67
67
|
|
@@ -76,11 +76,15 @@ class DarwinCore
|
|
76
76
|
end
|
77
77
|
|
78
78
|
def add_synonym_from_core(taxon_id, row)
|
79
|
+
@synonyms[row[@core_fields[:id]]] = taxon_id
|
79
80
|
taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
80
|
-
|
81
|
+
synonym = SynonymNormalized.new(
|
81
82
|
row[@core_fields[:scientificname]],
|
82
83
|
row[@core_fields[:canonicalname]],
|
83
84
|
@core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil)
|
85
|
+
taxon.synonyms << synonym
|
86
|
+
add_name_string(synonym.name)
|
87
|
+
add_name_string(synonym.canonical_name)
|
84
88
|
end
|
85
89
|
|
86
90
|
def set_scientific_name(row, fields)
|
@@ -95,7 +99,6 @@ class DarwinCore
|
|
95
99
|
raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core_fields[:id] && @core_fields[:scientificname])
|
96
100
|
@dwc.core.read do |rows|
|
97
101
|
rows[0].each do |r|
|
98
|
-
add_name_string(r[@core_fields[:scientificname]])
|
99
102
|
set_scientific_name(r, @core_fields)
|
100
103
|
#core has AcceptedNameUsageId
|
101
104
|
if @core_fields[:acceptednameusageid] && r[@core_fields[:acceptednameusageid]] && r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
|
@@ -110,6 +113,8 @@ class DarwinCore
|
|
110
113
|
taxon.parent_id = r[parent_id]
|
111
114
|
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
|
112
115
|
taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
|
116
|
+
add_name_string(taxon.current_name)
|
117
|
+
add_name_string(taxon.current_name_canonical)
|
113
118
|
end
|
114
119
|
end
|
115
120
|
end
|
@@ -123,11 +128,8 @@ class DarwinCore
|
|
123
128
|
@paths_num = 0
|
124
129
|
@normalized_data.each do |taxon_id, taxon|
|
125
130
|
next if !taxon.classification_path.empty?
|
126
|
-
|
127
|
-
|
128
|
-
rescue DarwinCore::ParentNotCurrentError
|
129
|
-
next
|
130
|
-
end
|
131
|
+
res = get_classification_path(taxon)
|
132
|
+
next if res == 'error'
|
131
133
|
end
|
132
134
|
end
|
133
135
|
|
@@ -141,15 +143,24 @@ class DarwinCore
|
|
141
143
|
taxon.classification_path_id << taxon.id
|
142
144
|
@tree.merge!(current_node)
|
143
145
|
else
|
144
|
-
|
146
|
+
parent_cp = nil
|
147
|
+
if @normalized_data[taxon.parent_id]
|
145
148
|
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
149
|
+
else
|
150
|
+
current_parent = @normalized_data[@synonyms[taxon.parent_id]]
|
151
|
+
if current_parent
|
152
|
+
error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' is deprecated"
|
153
|
+
@error_names << {:name => taxon, :error => :deprecated_parent, :current_parent => current_parent }
|
154
|
+
parent_cp = current_parent.classification_path
|
155
|
+
else
|
156
|
+
error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' not found"
|
157
|
+
@error_names << {:name => taxon, :error => :deprecated_parent, :current_parent => nil}
|
158
|
+
end
|
150
159
|
end
|
160
|
+
return 'error' unless parent_cp
|
151
161
|
if parent_cp.empty?
|
152
|
-
get_classification_path(@normalized_data[taxon.parent_id])
|
162
|
+
res = get_classification_path(@normalized_data[taxon.parent_id])
|
163
|
+
return res if res == 'error'
|
153
164
|
taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
|
154
165
|
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
155
166
|
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
@@ -158,7 +169,12 @@ class DarwinCore
|
|
158
169
|
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
|
159
170
|
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
160
171
|
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
161
|
-
|
172
|
+
begin
|
173
|
+
parent_node.merge!(current_node)
|
174
|
+
rescue NoMethodError => e
|
175
|
+
DarwinCore.logger_write(@dwc.object_id, "Error '%s' taxon %s" % [e.message, taxon.id])
|
176
|
+
return 'error'
|
177
|
+
end
|
162
178
|
end
|
163
179
|
end
|
164
180
|
end
|
@@ -176,12 +192,14 @@ class DarwinCore
|
|
176
192
|
ext, fields = *extension
|
177
193
|
ext.read do |rows|
|
178
194
|
rows[0].each do |r|
|
179
|
-
add_name_string(r[fields[:scientificname]])
|
180
195
|
set_scientific_name(r, fields)
|
181
|
-
|
196
|
+
synonym = SynonymNormalized.new(
|
182
197
|
r[fields[:scientificname]],
|
183
198
|
r[fields[:canonicalname]],
|
184
199
|
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
|
200
|
+
@normalized_data[r[fields[:id]]].synonyms << synonym
|
201
|
+
add_name_string(synonym.name)
|
202
|
+
add_name_string(synonym.canonical_name)
|
185
203
|
end
|
186
204
|
end
|
187
205
|
end
|
@@ -191,14 +209,15 @@ class DarwinCore
|
|
191
209
|
ext, fields = *extension
|
192
210
|
ext.read do |rows|
|
193
211
|
rows[0].each do |r|
|
194
|
-
|
195
|
-
@normalized_data[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
|
212
|
+
vernacular = VernacularNormalized.new(
|
196
213
|
r[fields[:vernacularname]],
|
197
214
|
fields[:languagecode] ? r[fields[:languagecode]] : nil)
|
198
|
-
|
215
|
+
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
216
|
+
add_name_string(vernacular.name)
|
199
217
|
end
|
200
218
|
end
|
201
219
|
end
|
202
220
|
|
203
221
|
end
|
204
222
|
end
|
223
|
+
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 9
|
10
|
+
version: 0.5.9
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-12-03 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|