dwc-archive 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/LICENSE +1 -1
- data/README.md +9 -0
- data/Rakefile +19 -14
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +144 -52
- data/spec/files/gnub.tar.gz +0 -0
- data/spec/lib/dwc-archive_spec.rb +17 -3
- data/spec/spec_helper.rb +0 -1
- metadata +4 -3
data/CHANGELOG
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -143,6 +143,15 @@ Creating a DarwinCore Archive file
|
|
143
143
|
gen.add_eml_xml(eml)
|
144
144
|
gen.pack
|
145
145
|
|
146
|
+
Logging
|
147
|
+
-------
|
148
|
+
|
149
|
+
Gem has ability to show logs of it's events:
|
150
|
+
|
151
|
+
require 'dwc-archive'
|
152
|
+
DarwinCore.logger = Logger.new($stdout)
|
153
|
+
|
154
|
+
|
146
155
|
Note on Patches/Pull Requests
|
147
156
|
-----------------------------
|
148
157
|
|
data/Rakefile
CHANGED
@@ -6,7 +6,10 @@ begin
|
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "dwc-archive"
|
8
8
|
gem.summary = %Q{Handler of Darwin Core Archive files}
|
9
|
-
gem.description =
|
9
|
+
gem.description = 'Darwin Core Archive is the current standard exchange ' +
|
10
|
+
'format for GLobal Names Architecture modules. ' +
|
11
|
+
'This gem makes it easy to incorporate files in ' +
|
12
|
+
'Darwin Core Archive format into a ruby project.'
|
10
13
|
gem.email = "dmozzherin at gmail dot com"
|
11
14
|
gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
|
12
15
|
gem.authors = ["Dmitry Mozzherin"]
|
@@ -14,11 +17,11 @@ begin
|
|
14
17
|
gem.add_dependency 'parsley-store', ">= 0.3.0"
|
15
18
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
16
19
|
gem.add_development_dependency "cucumber", ">= 0"
|
17
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
20
|
end
|
19
21
|
Jeweler::GemcutterTasks.new
|
20
22
|
rescue LoadError
|
21
|
-
puts
|
23
|
+
puts 'Jeweler (or a dependency) not available. ' +
|
24
|
+
'Install it with: gem install jeweler'
|
22
25
|
end
|
23
26
|
|
24
27
|
require 'rspec/core/rake_task'
|
@@ -40,18 +43,20 @@ begin
|
|
40
43
|
task :features => :check_dependencies
|
41
44
|
rescue LoadError
|
42
45
|
task :features do
|
43
|
-
abort
|
46
|
+
abort 'Cucumber is not available. In order to run features, ' +
|
47
|
+
'you must: sudo gem install cucumber'
|
44
48
|
end
|
45
49
|
end
|
46
50
|
|
47
|
-
|
51
|
+
desc 'Run an IRB session with CSL loaded'
|
52
|
+
task :irb, [:script] do |t, args|
|
53
|
+
ARGV.clear
|
54
|
+
|
55
|
+
require 'irb'
|
56
|
+
require_relative 'lib/dwc-archive'
|
48
57
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
# rdoc.title = "dwc-archive #{version}"
|
55
|
-
# rdoc.rdoc_files.include('README*')
|
56
|
-
# rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
-
# end
|
58
|
+
IRB.conf[:SCRIPT] = args.script
|
59
|
+
IRB.start
|
60
|
+
end
|
61
|
+
|
62
|
+
task :default => :spec
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.6
|
@@ -4,7 +4,10 @@ require 'parsley-store'
|
|
4
4
|
class DarwinCore
|
5
5
|
|
6
6
|
class TaxonNormalized
|
7
|
-
attr_accessor :id, :local_id, :global_id, :source, :parent_id,
|
7
|
+
attr_accessor :id, :local_id, :global_id, :source, :parent_id,
|
8
|
+
:classification_path_id, :classification_path,
|
9
|
+
:linnean_classification_path, :current_name, :current_name_canonical,
|
10
|
+
:synonyms, :vernacular_names, :rank, :status
|
8
11
|
|
9
12
|
def initialize
|
10
13
|
@id = @parent_id = @rank = @status = nil
|
@@ -22,8 +25,21 @@ class DarwinCore
|
|
22
25
|
|
23
26
|
end
|
24
27
|
|
25
|
-
class
|
26
|
-
|
28
|
+
class GnubTaxon < TaxonNormalized
|
29
|
+
attr_accessor :uuid, :uuid_path
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
super
|
33
|
+
@uuid = nil
|
34
|
+
@uuid_path = []
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class SynonymNormalized < Struct.new(:id, :name, :canonical_name,
|
39
|
+
:status, :source, :local_id,
|
40
|
+
:global_id);end
|
41
|
+
class VernacularNormalized < Struct.new(:name, :language, :locality,
|
42
|
+
:country_code);end
|
27
43
|
|
28
44
|
class ClassificationNormalizer
|
29
45
|
attr_reader :error_names, :tree, :normalized_data
|
@@ -46,7 +62,9 @@ class DarwinCore
|
|
46
62
|
end
|
47
63
|
|
48
64
|
def add_vernacular_name_string(name_string)
|
49
|
-
|
65
|
+
unless @vernacular_name_strings[name_string]
|
66
|
+
@vernacular_name_strings[name_string] = 1
|
67
|
+
end
|
50
68
|
end
|
51
69
|
|
52
70
|
def name_strings(opts = {})
|
@@ -68,13 +86,18 @@ class DarwinCore
|
|
68
86
|
end
|
69
87
|
|
70
88
|
def normalize(opts = {})
|
71
|
-
opts = { :with_canonical_names => true,
|
89
|
+
opts = { :with_canonical_names => true,
|
90
|
+
:with_extensions => true }.merge(opts)
|
72
91
|
@with_canonical_names = !!opts[:with_canonical_names]
|
73
|
-
DarwinCore.logger_write(@dwc.object_id,
|
92
|
+
DarwinCore.logger_write(@dwc.object_id,
|
93
|
+
'Started normalization of the classification')
|
74
94
|
ingest_core
|
75
|
-
DarwinCore.logger_write(@dwc.object_id,
|
76
|
-
|
77
|
-
|
95
|
+
DarwinCore.logger_write(@dwc.object_id,
|
96
|
+
'Calculating the classification parent/child paths')
|
97
|
+
has_parent_id? ?
|
98
|
+
calculate_classification_path :
|
99
|
+
@normalized_data.keys.each { |id| @tree[id] = {} }
|
100
|
+
DarwinCore.logger_write(@dwc.object_id, 'Ingesting data from extensions')
|
78
101
|
if !!opts[:with_extensions]
|
79
102
|
ingest_extensions
|
80
103
|
end
|
@@ -85,7 +108,8 @@ class DarwinCore
|
|
85
108
|
|
86
109
|
def get_canonical_name(a_scientific_name)
|
87
110
|
if @with_canonical_names
|
88
|
-
canonical_name = @parser.parse(a_scientific_name,
|
111
|
+
canonical_name = @parser.parse(a_scientific_name,
|
112
|
+
:canonical_only => true)
|
89
113
|
canonical_name.to_s.empty? ? a_scientific_name : canonical_name
|
90
114
|
else
|
91
115
|
nil
|
@@ -93,9 +117,9 @@ class DarwinCore
|
|
93
117
|
end
|
94
118
|
|
95
119
|
def get_fields(element)
|
96
|
-
data = element.fields.inject({}) do |res, f|
|
120
|
+
data = element.fields.inject({}) do |res, f|
|
97
121
|
field = f[:term].split('/')[-1]
|
98
|
-
field = field ? field.downcase.to_sym : ''
|
122
|
+
field = field ? field.downcase.to_sym : ''
|
99
123
|
res[field] = f[:index].to_i
|
100
124
|
res
|
101
125
|
end
|
@@ -109,12 +133,16 @@ class DarwinCore
|
|
109
133
|
|
110
134
|
def add_synonym_from_core(taxon_id, row)
|
111
135
|
@synonyms[row[@core_fields[:id]]] = taxon_id
|
112
|
-
taxon = @normalized_data[row[taxon_id]] ?
|
136
|
+
taxon = @normalized_data[row[taxon_id]] ?
|
137
|
+
@normalized_data[row[taxon_id]] :
|
138
|
+
@normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
113
139
|
synonym = SynonymNormalized.new(
|
114
140
|
row[@core_fields[:id]],
|
115
141
|
row[@core_fields[:scientificname]],
|
116
142
|
row[@core_fields[:canonicalname]],
|
117
|
-
@core_fields[:taxonomicstatus] ?
|
143
|
+
@core_fields[:taxonomicstatus] ?
|
144
|
+
row[@core_fields[:taxonomicstatus]] :
|
145
|
+
nil,
|
118
146
|
@core_fields[:source] ? row[@core_fields[:source]] : nil,
|
119
147
|
@core_fields[:localid] ? row[@core_fields[:localid]] : nil,
|
120
148
|
@core_fields[:globalid] ? row[@core_fields[:globalid]] : nil,
|
@@ -129,10 +157,14 @@ class DarwinCore
|
|
129
157
|
canonical_name = nil
|
130
158
|
scientific_name = row[fields[:scientificname]].strip
|
131
159
|
if separate_canonical_and_authorship?(row, fields)
|
132
|
-
|
160
|
+
if @with_canonical_names
|
161
|
+
canonical_name = row[fields[:scientificname]].strip
|
162
|
+
end
|
133
163
|
scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
|
134
164
|
else
|
135
|
-
|
165
|
+
if @with_canonical_names
|
166
|
+
canonical_name = get_canonical_name(row[fields[:scientificname]])
|
167
|
+
end
|
136
168
|
end
|
137
169
|
fields[:canonicalname] = row.size
|
138
170
|
row << canonical_name
|
@@ -149,43 +181,72 @@ class DarwinCore
|
|
149
181
|
|
150
182
|
def ingest_core
|
151
183
|
@normalized_data = {}
|
152
|
-
|
184
|
+
has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
|
185
|
+
raise DarwinCore::CoreFileError.new('Darwin Core core fields must ' +
|
186
|
+
'contain taxon id and scientific name') unless has_name_and_id
|
153
187
|
@dwc.core.read do |rows|
|
154
188
|
rows[1].each do |error|
|
155
|
-
@error_names << { :data => error,
|
189
|
+
@error_names << { :data => error,
|
190
|
+
:error => :reading_or_encoding_error }
|
156
191
|
end
|
157
192
|
rows[0].each do |r|
|
158
193
|
set_scientific_name(r, @core_fields)
|
159
194
|
#core has AcceptedNameUsageId
|
160
|
-
if @core_fields[:acceptednameusageid] &&
|
195
|
+
if @core_fields[:acceptednameusageid] &&
|
196
|
+
r[@core_fields[:acceptednameusageid]] &&
|
197
|
+
r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
|
161
198
|
add_synonym_from_core(@core_fields[:acceptednameusageid], r)
|
162
|
-
elsif !@core_fields[:acceptednameusageid] &&
|
199
|
+
elsif !@core_fields[:acceptednameusageid] &&
|
200
|
+
@core_fields[:taxonomicstatus] &&
|
201
|
+
status_synonym?(r[@core_fields[:taxonomicstatus]])
|
163
202
|
add_synonym_from_core(parent_id, r) if has_parent_id?
|
164
203
|
else
|
165
|
-
|
204
|
+
unless @normalized_data[r[@core_fields[:id]]]
|
205
|
+
if gnub_archive?
|
206
|
+
new_taxon = DarwinCore::GnubTaxon.new
|
207
|
+
else
|
208
|
+
new_taxon = DarwinCore::TaxonNormalized.new
|
209
|
+
end
|
210
|
+
@normalized_data[r[@core_fields[:id]]] = new_taxon
|
211
|
+
end
|
212
|
+
taxon = @normalized_data[r[@core_fields[:id]]]
|
213
|
+
if gnub_archive?
|
214
|
+
taxon.uuid = r[@core_fields[:originalnameusageid]]
|
215
|
+
taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
|
216
|
+
split('|')
|
217
|
+
end
|
166
218
|
taxon.id = r[@core_fields[:id]]
|
167
219
|
taxon.current_name = r[@core_fields[:scientificname]]
|
168
220
|
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
|
169
221
|
taxon.parent_id = has_parent_id? ? r[parent_id] : nil
|
170
222
|
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
|
171
|
-
|
223
|
+
if @core_fields[:taxonomicstatus]
|
224
|
+
taxon.status = r[@core_fields[:taxonomicstatus]]
|
225
|
+
end
|
172
226
|
taxon.source = r[@core_fields[:source]] if @core_fields[:source]
|
173
227
|
taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
|
174
|
-
|
175
|
-
|
228
|
+
if @core_fields[:globalid]
|
229
|
+
taxon.global_id = r[@core_fields[:globalid]]
|
230
|
+
end
|
231
|
+
taxon.linnean_classification_path =
|
232
|
+
get_linnean_classification_path(r, taxon)
|
176
233
|
add_name_string(taxon.current_name)
|
177
|
-
|
234
|
+
has_canonical = taxon.current_name_canonical &&
|
235
|
+
!taxon.current_name_canonical.empty?
|
236
|
+
add_name_string(taxon.current_name_canonical) if has_canonical
|
178
237
|
end
|
179
238
|
end
|
180
239
|
end
|
181
240
|
end
|
182
241
|
|
183
242
|
def has_parent_id?
|
184
|
-
@has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
|
243
|
+
@has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
|
244
|
+
@core_fields.has_key?(:parentnameusageid)
|
185
245
|
end
|
186
246
|
|
187
247
|
def parent_id
|
188
|
-
parent_id_field = @core_fields[:highertaxonid] ||
|
248
|
+
parent_id_field = @core_fields[:highertaxonid] ||
|
249
|
+
@core_fields[:parentnameusageid]
|
189
250
|
end
|
190
251
|
|
191
252
|
def calculate_classification_path
|
@@ -200,28 +261,43 @@ class DarwinCore
|
|
200
261
|
def get_classification_path(taxon)
|
201
262
|
return if !taxon.classification_path_id.empty?
|
202
263
|
@paths_num += 1
|
203
|
-
|
264
|
+
if @paths_num % 10000 == 0
|
265
|
+
DarwinCore.logger_write(@dwc.object_id,
|
266
|
+
"Calculated %s paths" % @paths_num)
|
267
|
+
end
|
204
268
|
current_node = {taxon.id => {}}
|
205
269
|
if DarwinCore.nil_field?(taxon.parent_id)
|
206
|
-
|
270
|
+
if @with_canonical_names
|
271
|
+
taxon.classification_path << taxon.current_name_canonical
|
272
|
+
end
|
207
273
|
taxon.classification_path_id << taxon.id
|
208
274
|
@tree.merge!(current_node)
|
209
275
|
else
|
210
276
|
parent_cp = parent_cpid = nil
|
211
277
|
if @normalized_data[taxon.parent_id]
|
212
|
-
|
213
|
-
|
278
|
+
if @with_canonical_names
|
279
|
+
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
280
|
+
end
|
281
|
+
parent_cpid = @normalized_data[taxon.parent_id].
|
282
|
+
classification_path_id
|
214
283
|
else
|
215
284
|
current_parent = @normalized_data[@synonyms[taxon.parent_id]]
|
216
285
|
if current_parent
|
217
|
-
error =
|
218
|
-
|
219
|
-
|
220
|
-
|
286
|
+
error = 'WARNING: The parent of the taxon ' +
|
287
|
+
"\'#{taxon.current_name}\' is deprecated"
|
288
|
+
@error_names << {:data => taxon,
|
289
|
+
:error => :deprecated_parent,
|
290
|
+
:current_parent => current_parent }
|
291
|
+
|
292
|
+
if @with_canonical_names
|
293
|
+
parent_cp = current_parent.classification_path
|
294
|
+
end
|
221
295
|
parent_cpid = current_parent.classification_path_id
|
222
296
|
else
|
223
|
-
error =
|
224
|
-
|
297
|
+
error = 'WARNING: The parent of the taxon ' +
|
298
|
+
"\'#{taxon.current_name}\' not found"
|
299
|
+
@error_names << {:data => taxon,
|
300
|
+
:error => :deprecated_parent, :current_parent => nil}
|
225
301
|
end
|
226
302
|
end
|
227
303
|
return 'error' unless parent_cpid
|
@@ -230,23 +306,30 @@ class DarwinCore
|
|
230
306
|
begin
|
231
307
|
res = get_classification_path(@normalized_data[taxon.parent_id])
|
232
308
|
rescue SystemStackError
|
233
|
-
@error_names << {:data => taxon,
|
309
|
+
@error_names << {:data => taxon,
|
310
|
+
:error => :too_deep_hierarchy, :current_parent => nil}
|
234
311
|
end
|
235
312
|
return res if res == 'error'
|
236
313
|
if @with_canonical_names
|
237
|
-
taxon.classification_path += @normalized_data[taxon.parent_id].
|
314
|
+
taxon.classification_path += @normalized_data[taxon.parent_id].
|
315
|
+
classification_path + [taxon.current_name_canonical]
|
238
316
|
end
|
239
|
-
taxon.classification_path_id += @normalized_data[taxon.parent_id].
|
240
|
-
|
317
|
+
taxon.classification_path_id += @normalized_data[taxon.parent_id].
|
318
|
+
classification_path_id + [taxon.id]
|
319
|
+
parent_node = @normalized_data[taxon.parent_id].
|
320
|
+
classification_path_id.inject(@tree) {|node, id| node[id]}
|
241
321
|
parent_node.merge!(current_node)
|
242
322
|
else
|
243
|
-
taxon.classification_path += parent_cp +
|
323
|
+
taxon.classification_path += parent_cp +
|
324
|
+
[taxon.current_name_canonical] if @with_canonical_names
|
244
325
|
taxon.classification_path_id += parent_cpid + [taxon.id]
|
245
|
-
parent_node = @normalized_data[taxon.parent_id].
|
326
|
+
parent_node = @normalized_data[taxon.parent_id].
|
327
|
+
classification_path_id.inject(@tree) {|node, id| node[id]}
|
246
328
|
begin
|
247
329
|
parent_node.merge!(current_node)
|
248
330
|
rescue NoMethodError => e
|
249
|
-
DarwinCore.logger_write(@dwc.object_id,
|
331
|
+
DarwinCore.logger_write(@dwc.object_id,
|
332
|
+
"Error '%s' taxon %s" % [e.message, taxon.id])
|
250
333
|
return 'error'
|
251
334
|
end
|
252
335
|
end
|
@@ -256,13 +339,15 @@ class DarwinCore
|
|
256
339
|
def ingest_extensions
|
257
340
|
@extensions.each do |e|
|
258
341
|
ext, fields = *e
|
259
|
-
ingest_synonyms(e) if (File.split(e[0].file_path).
|
342
|
+
ingest_synonyms(e) if (File.split(e[0].file_path).
|
343
|
+
last.match(/synonym/i) &&
|
344
|
+
fields.keys.include?(:scientificname))
|
260
345
|
ingest_vernaculars(e) if fields.keys.include? :vernacularname
|
261
346
|
end
|
262
347
|
end
|
263
348
|
|
264
349
|
def ingest_synonyms(extension)
|
265
|
-
DarwinCore.logger_write(@dwc.object_id,
|
350
|
+
DarwinCore.logger_write(@dwc.object_id, 'Ingesting synonyms extension')
|
266
351
|
ext, fields = *extension
|
267
352
|
ext.read do |rows|
|
268
353
|
rows[0].each do |r|
|
@@ -281,14 +366,16 @@ class DarwinCore
|
|
281
366
|
add_name_string(synonym.name)
|
282
367
|
add_name_string(synonym.canonical_name)
|
283
368
|
else
|
284
|
-
@error_names << { :taxon => synonym,
|
369
|
+
@error_names << { :taxon => synonym,
|
370
|
+
:error => :synonym_of_unknown_taxa }
|
285
371
|
end
|
286
372
|
end
|
287
373
|
end
|
288
374
|
end
|
289
375
|
|
290
376
|
def ingest_vernaculars(extension)
|
291
|
-
DarwinCore.logger_write(@dwc.object_id,
|
377
|
+
DarwinCore.logger_write(@dwc.object_id,
|
378
|
+
'Ingesting vernacular names extension')
|
292
379
|
ext, fields = *extension
|
293
380
|
ext.read do |rows|
|
294
381
|
rows[0].each do |r|
|
@@ -313,20 +400,25 @@ class DarwinCore
|
|
313
400
|
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
314
401
|
add_vernacular_name_string(vernacular.name)
|
315
402
|
else
|
316
|
-
@error_names << { :vernacular_name => vernacular,
|
403
|
+
@error_names << { :vernacular_name => vernacular,
|
404
|
+
:error => :vernacular_of_unknown_taxa }
|
317
405
|
end
|
318
406
|
end
|
319
407
|
end
|
320
408
|
end
|
321
|
-
|
409
|
+
|
322
410
|
#Collect linnean classification path only on species level
|
323
411
|
def get_linnean_classification_path(row, taxon)
|
324
412
|
res = []
|
325
|
-
[:kingdom, :phylum, :class,
|
326
|
-
|
413
|
+
[:kingdom, :phylum, :class,
|
414
|
+
:order, :family, :genus, :subgenus].each do |clade|
|
415
|
+
res << [row[@core_fields[clade]], clade] if @core_fields[clade]
|
327
416
|
end
|
328
417
|
res
|
329
418
|
end
|
330
419
|
|
420
|
+
def gnub_archive?
|
421
|
+
@core_fields[:originalnameusageidpath]
|
422
|
+
end
|
331
423
|
end
|
332
424
|
end
|
Binary file
|
@@ -49,7 +49,7 @@ describe DarwinCore do
|
|
49
49
|
file = File.join(@file_dir, 'invalid.tar.gz')
|
50
50
|
lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::InvalidArchiveError)
|
51
51
|
end
|
52
|
-
|
52
|
+
|
53
53
|
it "should raise an error if archive is not in utf-8" do
|
54
54
|
file = File.join(@file_dir, 'latin1.tar.gz')
|
55
55
|
lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::EncodingError)
|
@@ -219,9 +219,9 @@ describe DarwinCore do
|
|
219
219
|
dwc = DarwinCore.new(file)
|
220
220
|
norm = dwc.normalize_classification
|
221
221
|
taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
|
222
|
-
taxa.size.should == 2
|
222
|
+
taxa.size.should == 2
|
223
223
|
end
|
224
|
-
|
224
|
+
|
225
225
|
it "should be able to get language and locality fields for vernacular names" do
|
226
226
|
file = File.join(@file_dir, 'language_locality.tar.gz')
|
227
227
|
dwc = DarwinCore.new(file)
|
@@ -231,6 +231,20 @@ describe DarwinCore do
|
|
231
231
|
vn.language.should == 'en'
|
232
232
|
vn.locality.should == 'New England'
|
233
233
|
end
|
234
|
+
|
235
|
+
it 'should be able to get uuids from gnub dataset' do
|
236
|
+
file = File.join(@file_dir, 'gnub.tar.gz')
|
237
|
+
dwc = DarwinCore.new(file)
|
238
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
239
|
+
cn.normalize
|
240
|
+
vn = cn.normalized_data['9c399f90-cfb8-5a7f-9a21-18285a473488']
|
241
|
+
vn.class.should == DarwinCore::GnubTaxon
|
242
|
+
vn.uuid.should == '8faa91f6-663f-4cfe-b785-0ab4e9415a51'
|
243
|
+
vn.uuid_path.should == [
|
244
|
+
"9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3",
|
245
|
+
"bf4c91c0-3d1f-44c7-9d3b-249382182a26",
|
246
|
+
"8faa91f6-663f-4cfe-b785-0ab4e9415a51"]
|
247
|
+
end
|
234
248
|
end
|
235
249
|
|
236
250
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -171,6 +171,7 @@ files:
|
|
171
171
|
- spec/files/empty_coreid.tar.gz
|
172
172
|
- spec/files/file with characters(3).gz
|
173
173
|
- spec/files/flat_list.tar.gz
|
174
|
+
- spec/files/gnub.tar.gz
|
174
175
|
- spec/files/invalid.tar.gz
|
175
176
|
- spec/files/junk_dir_inside.zip
|
176
177
|
- spec/files/language_locality.tar.gz
|
@@ -202,7 +203,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
202
203
|
version: '0'
|
203
204
|
segments:
|
204
205
|
- 0
|
205
|
-
hash:
|
206
|
+
hash: 2372622078447411988
|
206
207
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
207
208
|
none: false
|
208
209
|
requirements:
|