dwc-archive 0.4.10 → 0.4.11
Sign up to get free protection for your applications and to get access to all the features.
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.11
|
@@ -61,3 +61,5 @@ Feature: Reading of a Darwing Core Archive
|
|
61
61
|
Then I am able to use DarwinCore#normalize_classification method
|
62
62
|
And get normalized classification in expected format
|
63
63
|
And there are paths, synonyms and vernacular names in normalized classification
|
64
|
+
And names used in classification can be accessed by "name_strings" method
|
65
|
+
And nodes_ids organized in trees can be accessed by "tree" method
|
@@ -162,7 +162,8 @@ Then /^I can read extensions content using block$/ do
|
|
162
162
|
end
|
163
163
|
|
164
164
|
Then /^I am able to use DarwinCore\#normalize_classification method$/ do
|
165
|
-
@
|
165
|
+
@cn = DarwinCore::ClassificationNormalizer.new(@dwc)
|
166
|
+
@normalized_classification = @cn.normalize
|
166
167
|
end
|
167
168
|
|
168
169
|
Then /^get normalized classification in expected format$/ do
|
@@ -191,3 +192,27 @@ Then /^there are paths, synonyms and vernacular names in normalized classificati
|
|
191
192
|
@vernaculars_are_generated.should be_true
|
192
193
|
@synonyms_are_generated.should be_true
|
193
194
|
end
|
195
|
+
|
196
|
+
Then /^names used in classification can be accessed by "([^"]*)" method$/ do |name_strings|
|
197
|
+
names = @cn.send(name_strings.to_sym)
|
198
|
+
names.size.should > @normalized_classification.size
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
Then /^nodes_ids organized in trees can be accessed by "([^"]*)" method$/ do |tree|
|
203
|
+
def flatten_tree(data, keys)
|
204
|
+
data.each do |k, v|
|
205
|
+
keys << k
|
206
|
+
if v != {}
|
207
|
+
debugger if v.class != Hash
|
208
|
+
flatten_tree(v, keys)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
tree = @cn.send(tree.to_sym)
|
213
|
+
tree.class.should == Hash
|
214
|
+
keys = []
|
215
|
+
flatten_tree(tree, keys)
|
216
|
+
@normalized_classification.size.should == keys.size
|
217
|
+
end
|
218
|
+
|
@@ -4,13 +4,14 @@ require 'parsley-store'
|
|
4
4
|
class DarwinCore
|
5
5
|
|
6
6
|
class TaxonNormalized
|
7
|
-
attr_accessor :id, :parent_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
|
7
|
+
attr_accessor :id, :parent_id, :classification_path_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
|
8
8
|
|
9
9
|
def initialize
|
10
10
|
@id = @parent_id = @rank = @status = nil
|
11
11
|
@current_name = ''
|
12
12
|
@current_name_canonical = ''
|
13
13
|
@classification_path = []
|
14
|
+
@classification_path_id = []
|
14
15
|
@synonyms = []
|
15
16
|
@vernacular_names = []
|
16
17
|
end
|
@@ -22,7 +23,7 @@ class DarwinCore
|
|
22
23
|
|
23
24
|
class ClassificationNormalizer
|
24
25
|
attr_accessor :verbose
|
25
|
-
attr_reader :error_names
|
26
|
+
attr_reader :error_names, :tree
|
26
27
|
|
27
28
|
def initialize(dwc_instance, verbose = false)
|
28
29
|
@dwc = dwc_instance
|
@@ -32,7 +33,17 @@ class DarwinCore
|
|
32
33
|
@parser = ParsleyStore.new(1,2)
|
33
34
|
@verbose = verbose
|
34
35
|
@verbose_count = 10000
|
36
|
+
@name_strings = {}
|
35
37
|
@error_names = []
|
38
|
+
@tree = {}
|
39
|
+
end
|
40
|
+
|
41
|
+
def add_name_string(name_string)
|
42
|
+
@name_strings[name_string] = 1 unless @name_strings[name_string]
|
43
|
+
end
|
44
|
+
|
45
|
+
def name_strings
|
46
|
+
@name_strings.keys
|
36
47
|
end
|
37
48
|
|
38
49
|
def normalize
|
@@ -55,6 +66,8 @@ class DarwinCore
|
|
55
66
|
@parser = ParsleyStore.new(1,2)
|
56
67
|
parsed_name = @parser.parse(a_scientific_name)[:scientificName]
|
57
68
|
end
|
69
|
+
add_name_string(a_scientific_name)
|
70
|
+
add_name_string(parsed_name[:canonical]) if parsed_name[:parsed]
|
58
71
|
parsed_name[:parsed] ? parsed_name[:canonical] : a_scientific_name
|
59
72
|
end
|
60
73
|
|
@@ -118,8 +131,11 @@ class DarwinCore
|
|
118
131
|
|
119
132
|
def get_classification_path(taxon)
|
120
133
|
return if !taxon.classification_path.empty?
|
134
|
+
current_node = {taxon.id => {}}
|
121
135
|
if DarwinCore.nil_field?(taxon.parent_id)
|
122
136
|
taxon.classification_path << taxon.current_name_canonical
|
137
|
+
taxon.classification_path_id << taxon.id
|
138
|
+
@tree.merge!(current_node)
|
123
139
|
else
|
124
140
|
begin
|
125
141
|
parent_cp = @res[taxon.parent_id].classification_path
|
@@ -131,8 +147,14 @@ class DarwinCore
|
|
131
147
|
if parent_cp.empty?
|
132
148
|
get_classification_path(@res[taxon.parent_id])
|
133
149
|
taxon.classification_path += @res[taxon.parent_id].classification_path + [taxon.current_name_canonical]
|
150
|
+
taxon.classification_path_id += @res[taxon.parent_id].classification_path_id + [taxon.id]
|
151
|
+
parent_node = @res[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
152
|
+
parent_node.merge!(current_node)
|
134
153
|
else
|
135
154
|
taxon.classification_path += parent_cp + [taxon.current_name_canonical]
|
155
|
+
taxon.classification_path_id += @res[taxon.parent_id].classification_path_id + [taxon.id]
|
156
|
+
parent_node = @res[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
|
157
|
+
parent_node.merge!(current_node)
|
136
158
|
end
|
137
159
|
end
|
138
160
|
end
|
@@ -167,6 +189,7 @@ class DarwinCore
|
|
167
189
|
@res[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
|
168
190
|
r[fields[:vernacularname]],
|
169
191
|
fields[:languagecode] ? r[fields[:languagecode]] : nil)
|
192
|
+
add_name_string(r[fields[:vernacularname]])
|
170
193
|
end
|
171
194
|
end
|
172
195
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 4
|
9
|
-
-
|
10
|
-
version: 0.4.
|
9
|
+
- 11
|
10
|
+
version: 0.4.11
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-09-
|
18
|
+
date: 2010-09-21 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|