dwc-archive 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +17 -17
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.3
|
@@ -22,13 +22,13 @@ class DarwinCore
|
|
22
22
|
class VernacularNormalized < Struct.new(:name, :language);end
|
23
23
|
|
24
24
|
class ClassificationNormalizer
|
25
|
-
attr_reader :error_names, :tree
|
25
|
+
attr_reader :error_names, :tree, :normalized_data
|
26
26
|
|
27
27
|
def initialize(dwc_instance)
|
28
28
|
@dwc = dwc_instance
|
29
29
|
@core_fields = get_fields(@dwc.core)
|
30
30
|
@extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
|
31
|
-
@
|
31
|
+
@normalized_data = {}
|
32
32
|
@parser = ParsleyStore.new(1,2)
|
33
33
|
@name_strings = {}
|
34
34
|
@error_names = []
|
@@ -45,13 +45,13 @@ class DarwinCore
|
|
45
45
|
|
46
46
|
def normalize
|
47
47
|
DarwinCore.logger_write(@dwc.object_id, "Started normalization of the classification")
|
48
|
-
@
|
48
|
+
@normalized_data = {}
|
49
49
|
ingest_core
|
50
50
|
DarwinCore.logger_write(@dwc.object_id, "Calculating the classification parent/child paths")
|
51
51
|
calculate_classification_path
|
52
52
|
DarwinCore.logger_write(@dwc.object_id, "Ingesting data from extensions")
|
53
53
|
ingest_extensions
|
54
|
-
@
|
54
|
+
@normalized_data
|
55
55
|
end
|
56
56
|
|
57
57
|
private
|
@@ -62,7 +62,7 @@ class DarwinCore
|
|
62
62
|
end
|
63
63
|
canonical_name = @parser.parse(a_scientific_name, :canonical_only => true)
|
64
64
|
add_name_string(a_scientific_name)
|
65
|
-
add_name_string(canonical_name) unless canonical_name.empty?
|
65
|
+
add_name_string(canonical_name) unless canonical_name.to_s.empty?
|
66
66
|
canonical_name.empty? ? a_scientific_name : canonical_name
|
67
67
|
end
|
68
68
|
|
@@ -77,7 +77,7 @@ class DarwinCore
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def add_synonym_from_core(taxon_id, row)
|
80
|
-
taxon = @
|
80
|
+
taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
81
81
|
taxon.synonyms << SynonymNormalized.new(
|
82
82
|
row[@core_fields[:scientificname]],
|
83
83
|
row[@core_fields[:canonicalname]],
|
@@ -103,7 +103,7 @@ class DarwinCore
|
|
103
103
|
elsif !@core_fields[:acceptednameusageid] && status_synonym?(r[@core_fields[:taxonomicstatus]])
|
104
104
|
add_synonym_from_core(parent_id, r)
|
105
105
|
else
|
106
|
-
taxon = @
|
106
|
+
taxon = @normalized_data[r[@core_fields[:id]]] ? @normalized_data[r[@core_fields[:id]]] : @normalized_data[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
|
107
107
|
taxon.id = r[@core_fields[:id]]
|
108
108
|
taxon.current_name = r[@core_fields[:scientificname]]
|
109
109
|
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
|
@@ -120,7 +120,7 @@ class DarwinCore
|
|
120
120
|
end
|
121
121
|
|
122
122
|
def calculate_classification_path
|
123
|
-
@
|
123
|
+
@normalized_data.each do |taxon_id, taxon|
|
124
124
|
next if !taxon.classification_path.empty?
|
125
125
|
begin
|
126
126
|
get_classification_path(taxon)
|
@@ -139,22 +139,22 @@ class DarwinCore
|
|
139
139
|
@tree.merge!(current_node)
|
140
140
|
else
|
141
141
|
begin
|
142
|
-
parent_cp = @
|
142
|
+
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
143
143
|
rescue NoMethodError #name has a parent which is not a current name
|
144
144
|
error = "The parent of the taxon \'#{taxon.current_name}\' is deprecated"
|
145
145
|
@error_names << {:name => taxon, :error => error}
|
146
146
|
raise DarwinCore::ParentNotCurrentError, error
|
147
147
|
end
|
148
148
|
if parent_cp.empty?
|
149
|
-
get_classification_path(@
|
150
|
-
taxon.classification_path += @
|
151
|
-
taxon.classification_path_id += @
|
152
|
-
parent_node = @
|
149
|
+
get_classification_path(@normalized_data[taxon.parent_id])
|
150
|
+
taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
|
151
|
+
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
152
|
+
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
153
153
|
parent_node.merge!(current_node)
|
154
154
|
else
|
155
155
|
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
|
156
|
-
taxon.classification_path_id += @
|
157
|
-
parent_node = @
|
156
|
+
taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
|
157
|
+
parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
158
158
|
parent_node.merge!(current_node)
|
159
159
|
end
|
160
160
|
end
|
@@ -174,7 +174,7 @@ class DarwinCore
|
|
174
174
|
ext.read do |rows|
|
175
175
|
rows[0].each do |r|
|
176
176
|
set_scientific_name(r, fields)
|
177
|
-
@
|
177
|
+
@normalized_data[r[fields[:id]]].synonyms << SynonymNormalized.new(
|
178
178
|
r[fields[:scientificname]],
|
179
179
|
r[fields[:canonicalname]],
|
180
180
|
fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
|
@@ -187,7 +187,7 @@ class DarwinCore
|
|
187
187
|
ext, fields = *extension
|
188
188
|
ext.read do |rows|
|
189
189
|
rows[0].each do |r|
|
190
|
-
@
|
190
|
+
@normalized_data[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
|
191
191
|
r[fields[:vernacularname]],
|
192
192
|
fields[:languagecode] ? r[fields[:languagecode]] : nil)
|
193
193
|
add_name_string(r[fields[:vernacularname]])
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 13
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 3
|
10
|
+
version: 0.5.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|