dwc-archive 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.11
|
@@ -61,3 +61,5 @@ Feature: Reading of a Darwing Core Archive
|
|
61
61
|
Then I am able to use DarwinCore#normalize_classification method
|
62
62
|
And get normalized classification in expected format
|
63
63
|
And there are paths, synonyms and vernacular names in normalized classification
|
64
|
+
And names used in classification can be accessed by "name_strings" method
|
65
|
+
And nodes_ids organized in trees can be accessed by "tree" method
|
@@ -162,7 +162,8 @@ Then /^I can read extensions content using block$/ do
|
|
162
162
|
end
|
163
163
|
|
164
164
|
Then /^I am able to use DarwinCore\#normalize_classification method$/ do
|
165
|
-
@
|
165
|
+
@cn = DarwinCore::ClassificationNormalizer.new(@dwc)
|
166
|
+
@normalized_classification = @cn.normalize
|
166
167
|
end
|
167
168
|
|
168
169
|
Then /^get normalized classification in expected format$/ do
|
@@ -191,3 +192,27 @@ Then /^there are paths, synonyms and vernacular names in normalized classificati
|
|
191
192
|
@vernaculars_are_generated.should be_true
|
192
193
|
@synonyms_are_generated.should be_true
|
193
194
|
end
|
195
|
+
|
196
|
+
Then /^names used in classification can be accessed by "([^"]*)" method$/ do |name_strings|
|
197
|
+
names = @cn.send(name_strings.to_sym)
|
198
|
+
names.size.should > @normalized_classification.size
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
Then /^nodes_ids organized in trees can be accessed by "([^"]*)" method$/ do |tree|
|
203
|
+
def flatten_tree(data, keys)
|
204
|
+
data.each do |k, v|
|
205
|
+
keys << k
|
206
|
+
if v != {}
|
207
|
+
debugger if v.class != Hash
|
208
|
+
flatten_tree(v, keys)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
tree = @cn.send(tree.to_sym)
|
213
|
+
tree.class.should == Hash
|
214
|
+
keys = []
|
215
|
+
flatten_tree(tree, keys)
|
216
|
+
@normalized_classification.size.should == keys.size
|
217
|
+
end
|
218
|
+
|
@@ -4,13 +4,14 @@ require 'parsley-store'
|
|
4
4
|
class DarwinCore
|
5
5
|
|
6
6
|
class TaxonNormalized
|
7
|
-
attr_accessor :id, :parent_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
|
7
|
+
attr_accessor :id, :parent_id, :classification_path_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
|
8
8
|
|
9
9
|
def initialize
|
10
10
|
@id = @parent_id = @rank = @status = nil
|
11
11
|
@current_name = ''
|
12
12
|
@current_name_canonical = ''
|
13
13
|
@classification_path = []
|
14
|
+
@classification_path_id = []
|
14
15
|
@synonyms = []
|
15
16
|
@vernacular_names = []
|
16
17
|
end
|
@@ -22,7 +23,7 @@ class DarwinCore
|
|
22
23
|
|
23
24
|
class ClassificationNormalizer
|
24
25
|
attr_accessor :verbose
|
25
|
-
attr_reader :error_names
|
26
|
+
attr_reader :error_names, :tree
|
26
27
|
|
27
28
|
def initialize(dwc_instance, verbose = false)
|
28
29
|
@dwc = dwc_instance
|
@@ -32,7 +33,17 @@ class DarwinCore
|
|
32
33
|
@parser = ParsleyStore.new(1,2)
|
33
34
|
@verbose = verbose
|
34
35
|
@verbose_count = 10000
|
36
|
+
@name_strings = {}
|
35
37
|
@error_names = []
|
38
|
+
@tree = {}
|
39
|
+
end
|
40
|
+
|
41
|
+
def add_name_string(name_string)
|
42
|
+
@name_strings[name_string] = 1 unless @name_strings[name_string]
|
43
|
+
end
|
44
|
+
|
45
|
+
def name_strings
|
46
|
+
@name_strings.keys
|
36
47
|
end
|
37
48
|
|
38
49
|
def normalize
|
@@ -55,6 +66,8 @@ class DarwinCore
|
|
55
66
|
@parser = ParsleyStore.new(1,2)
|
56
67
|
parsed_name = @parser.parse(a_scientific_name)[:scientificName]
|
57
68
|
end
|
69
|
+
add_name_string(a_scientific_name)
|
70
|
+
add_name_string(parsed_name[:canonical]) if parsed_name[:parsed]
|
58
71
|
parsed_name[:parsed] ? parsed_name[:canonical] : a_scientific_name
|
59
72
|
end
|
60
73
|
|
@@ -118,8 +131,11 @@ class DarwinCore
|
|
118
131
|
|
119
132
|
def get_classification_path(taxon)
|
120
133
|
return if !taxon.classification_path.empty?
|
134
|
+
current_node = {taxon.id => {}}
|
121
135
|
if DarwinCore.nil_field?(taxon.parent_id)
|
122
136
|
taxon.classification_path << taxon.current_name_canonical
|
137
|
+
taxon.classification_path_id << taxon.id
|
138
|
+
@tree.merge!(current_node)
|
123
139
|
else
|
124
140
|
begin
|
125
141
|
parent_cp = @res[taxon.parent_id].classification_path
|
@@ -131,8 +147,14 @@ class DarwinCore
|
|
131
147
|
if parent_cp.empty?
|
132
148
|
get_classification_path(@res[taxon.parent_id])
|
133
149
|
taxon.classification_path += @res[taxon.parent_id].classification_path + [taxon.current_name_canonical]
|
150
|
+
taxon.classification_path_id += @res[taxon.parent_id].classification_path_id + [taxon.id]
|
151
|
+
parent_node = @res[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
152
|
+
parent_node.merge!(current_node)
|
134
153
|
else
|
135
154
|
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
|
155
|
+
taxon.classification_path_id += @res[taxon.parent_id].classification_path_id + [taxon.id]
|
156
|
+
parent_node = @res[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
157
|
+
parent_node.merge!(current_node)
|
136
158
|
end
|
137
159
|
end
|
138
160
|
end
|
@@ -167,6 +189,7 @@ class DarwinCore
|
|
167
189
|
@res[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
|
168
190
|
r[fields[:vernacularname]],
|
169
191
|
fields[:languagecode] ? r[fields[:languagecode]] : nil)
|
192
|
+
add_name_string(r[fields[:vernacularname]])
|
170
193
|
end
|
171
194
|
end
|
172
195
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 0.4.
|
9
|
+
- 11
|
10
|
+
version: 0.4.11
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-09-
|
18
|
+
date: 2010-09-21 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|