dwc-archive 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +17 -17
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.3
|
@@ -22,13 +22,13 @@ class DarwinCore
|
|
22
22
|
class VernacularNormalized < Struct.new(:name, :language);end
|
23
23
|
|
24
24
|
class ClassificationNormalizer
|
25
|
-
attr_reader :error_names, :tree
|
25
|
+
attr_reader :error_names, :tree, :normalized_data
|
26
26
|
|
27
27
|
def initialize(dwc_instance)
|
28
28
|
@dwc = dwc_instance
|
29
29
|
@core_fields = get_fields(@dwc.core)
|
30
30
|
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
|
31
|
-
@
|
31
|
+
@normalized_data = {}
|
32
32
|
@parser = ParsleyStore.new(1,2)
|
33
33
|
@name_strings = {}
|
34
34
|
@error_names = []
|
@@ -45,13 +45,13 @@ class DarwinCore
|
|
45
45
|
|
46
46
|
def normalize
|
47
47
|
DarwinCore.logger_write(@dwc.object_id, "Started normalization of the classification")
|
48
|
-
@
|
48
|
+
@normalized_data = {}
|
49
49
|
ingest_core
|
50
50
|
DarwinCore.logger_write(@dwc.object_id, "Calculating the classification parent/child paths")
|
51
51
|
calculate_classification_path
|
52
52
|
DarwinCore.logger_write(@dwc.object_id, "Ingesting data from extensions")
|
53
53
|
ingest_extensions
|
54
|
-
@
|
54
|
+
@normalized_data
|
55
55
|
end
|
56
56
|
|
57
57
|
private
|
@@ -62,7 +62,7 @@ class DarwinCore
|
|
62
62
|
end
|
63
63
|
canonical_name = @parser.parse(a_scientific_name, :canonical_only => true)
|
64
64
|
add_name_string(a_scientific_name)
|
65
|
-
add_name_string(canonical_name) unless canonical_name.empty?
|
65
|
+
add_name_string(canonical_name) unless canonical_name.to_s.empty?
|
66
66
|
canonical_name.empty? ? a_scientific_name : canonical_name
|
67
67
|
end
|
68
68
|
|
@@ -77,7 +77,7 @@ class DarwinCore
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def add_synonym_from_core(taxon_id, row)
|
80
|
-
taxon = @
|
80
|
+
taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
81
81
|
taxon.synonyms << SynonymNormalized.new(
|
82
82
|
row[@core_fields[:scientificname]],
|
83
83
|
row[@core_fields[:canonicalname]],
|
@@ -103,7 +103,7 @@ class DarwinCore
|
|
103
103
|
elsif !@core_fields[:acceptednameusageid] && status_synonym?(r[@core_fields[:taxonomicstatus]])
|
104
104
|
add_synonym_from_core(parent_id, r)
|
105
105
|
else
|
106
|
-
taxon = @
|
106
|
+
taxon = @normalized_data[r[@core_fields[:id]]] ? @normalized_data[r[@core_fields[:id]]] : @normalized_data[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
|
107
107
|
taxon.id = r[@core_fields[:id]]
|
108
108
|
taxon.current_name = r[@core_fields[:scientificname]]
|
109
109
|
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
|
@@ -120,7 +120,7 @@ class DarwinCore
|
|
120
120
|
end
|
121
121
|
|
122
122
|
def calculate_classification_path
|
123
|
-
@
|
123
|
+
@normalized_data.each do |taxon_id, taxon|
|
124
124
|
next if !taxon.classification_path.empty?
|
125
125
|
begin
|
126
126
|
get_classification_path(taxon)
|
@@ -139,22 +139,22 @@ class DarwinCore
|
|
139
139
|
@tree.merge!(current_node)
|
140
140
|
else
|
141
141
|
begin
|
142
|
-
parent_cp = @
|
142
|
+
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
143
143
|
rescue NoMethodError #name has a parent which is not a current name
|
144
144
|
error = "The parent of the taxon \'#{taxon.current_name}\' is deprecated"
|
145
145
|
@error_names << {:name => taxon, :error => error}
|
146
146
|
raise DarwinCore::ParentNotCurrentError, error
|
147
147
|
end
|
148
148
|
if parent_cp.empty?
|
149
|
-
get_classification_path(@
|
150
|
-
taxon.classification_path += @
|
151
|
-
taxon.classification_path_id += @
|
152
|
-
parent_node = @
|
149
|
+
get_classification_path(@normalized_data[taxon.parent_id])
|
150
|
+
taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
|
151
|
+
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
152
|
+
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
153
153
|
parent_node.merge!(current_node)
|
154
154
|
else
|
155
155
|
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
|
156
|
-
taxon.classification_path_id += @
|
157
|
-
parent_node = @
|
156
|
+
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
157
|
+
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
158
158
|
parent_node.merge!(current_node)
|
159
159
|
end
|
160
160
|
end
|
@@ -174,7 +174,7 @@ class DarwinCore
|
|
174
174
|
ext.read do |rows|
|
175
175
|
rows[0].each do |r|
|
176
176
|
set_scientific_name(r, fields)
|
177
|
-
@
|
177
|
+
@normalized_data[r[fields[:id]]].synonyms << SynonymNormalized.new(
|
178
178
|
r[fields[:scientificname]],
|
179
179
|
r[fields[:canonicalname]],
|
180
180
|
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
|
@@ -187,7 +187,7 @@ class DarwinCore
|
|
187
187
|
ext, fields = *extension
|
188
188
|
ext.read do |rows|
|
189
189
|
rows[0].each do |r|
|
190
|
-
@
|
190
|
+
@normalized_data[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
|
191
191
|
r[fields[:vernacularname]],
|
192
192
|
fields[:languagecode] ? r[fields[:languagecode]] : nil)
|
193
193
|
add_name_string(r[fields[:vernacularname]])
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 3
|
10
|
+
version: 0.5.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|