dwc-archive 0.9.5 → 0.9.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/LICENSE +1 -1
- data/README.md +9 -0
- data/Rakefile +19 -14
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +144 -52
- data/spec/files/gnub.tar.gz +0 -0
- data/spec/lib/dwc-archive_spec.rb +17 -3
- data/spec/spec_helper.rb +0 -1
- metadata +4 -3
data/CHANGELOG
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -143,6 +143,15 @@ Creating a DarwinCore Archive file
|
|
143
143
|
gen.add_eml_xml(eml)
|
144
144
|
gen.pack
|
145
145
|
|
146
|
+
Logging
|
147
|
+
-------
|
148
|
+
|
149
|
+
Gem has ability to show logs of it's events:
|
150
|
+
|
151
|
+
require 'dwc-archive'
|
152
|
+
DarwinCore.logger = Logger.new($stdout)
|
153
|
+
|
154
|
+
|
146
155
|
Note on Patches/Pull Requests
|
147
156
|
-----------------------------
|
148
157
|
|
data/Rakefile
CHANGED
@@ -6,7 +6,10 @@ begin
|
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
7
|
gem.name = "dwc-archive"
|
8
8
|
gem.summary = %Q{Handler of Darwin Core Archive files}
|
9
|
-
gem.description =
|
9
|
+
gem.description = 'Darwin Core Archive is the current standard exchange ' +
|
10
|
+
'format for GLobal Names Architecture modules. ' +
|
11
|
+
'This gem makes it easy to incorporate files in ' +
|
12
|
+
'Darwin Core Archive format into a ruby project.'
|
10
13
|
gem.email = "dmozzherin at gmail dot com"
|
11
14
|
gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
|
12
15
|
gem.authors = ["Dmitry Mozzherin"]
|
@@ -14,11 +17,11 @@ begin
|
|
14
17
|
gem.add_dependency 'parsley-store', ">= 0.3.0"
|
15
18
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
16
19
|
gem.add_development_dependency "cucumber", ">= 0"
|
17
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
20
|
end
|
19
21
|
Jeweler::GemcutterTasks.new
|
20
22
|
rescue LoadError
|
21
|
-
puts
|
23
|
+
puts 'Jeweler (or a dependency) not available. ' +
|
24
|
+
'Install it with: gem install jeweler'
|
22
25
|
end
|
23
26
|
|
24
27
|
require 'rspec/core/rake_task'
|
@@ -40,18 +43,20 @@ begin
|
|
40
43
|
task :features => :check_dependencies
|
41
44
|
rescue LoadError
|
42
45
|
task :features do
|
43
|
-
abort
|
46
|
+
abort 'Cucumber is not available. In order to run features, ' +
|
47
|
+
'you must: sudo gem install cucumber'
|
44
48
|
end
|
45
49
|
end
|
46
50
|
|
47
|
-
|
51
|
+
desc 'Run an IRB session with CSL loaded'
|
52
|
+
task :irb, [:script] do |t, args|
|
53
|
+
ARGV.clear
|
54
|
+
|
55
|
+
require 'irb'
|
56
|
+
require_relative 'lib/dwc-archive'
|
48
57
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
# rdoc.title = "dwc-archive #{version}"
|
55
|
-
# rdoc.rdoc_files.include('README*')
|
56
|
-
# rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
-
# end
|
58
|
+
IRB.conf[:SCRIPT] = args.script
|
59
|
+
IRB.start
|
60
|
+
end
|
61
|
+
|
62
|
+
task :default => :spec
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.6
|
@@ -4,7 +4,10 @@ require 'parsley-store'
|
|
4
4
|
class DarwinCore
|
5
5
|
|
6
6
|
class TaxonNormalized
|
7
|
-
attr_accessor :id, :local_id, :global_id, :source, :parent_id,
|
7
|
+
attr_accessor :id, :local_id, :global_id, :source, :parent_id,
|
8
|
+
:classification_path_id, :classification_path,
|
9
|
+
:linnean_classification_path, :current_name, :current_name_canonical,
|
10
|
+
:synonyms, :vernacular_names, :rank, :status
|
8
11
|
|
9
12
|
def initialize
|
10
13
|
@id = @parent_id = @rank = @status = nil
|
@@ -22,8 +25,21 @@ class DarwinCore
|
|
22
25
|
|
23
26
|
end
|
24
27
|
|
25
|
-
class
|
26
|
-
|
28
|
+
class GnubTaxon < TaxonNormalized
|
29
|
+
attr_accessor :uuid, :uuid_path
|
30
|
+
|
31
|
+
def initialize
|
32
|
+
super
|
33
|
+
@uuid = nil
|
34
|
+
@uuid_path = []
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class SynonymNormalized < Struct.new(:id, :name, :canonical_name,
|
39
|
+
:status, :source, :local_id,
|
40
|
+
:global_id);end
|
41
|
+
class VernacularNormalized < Struct.new(:name, :language, :locality,
|
42
|
+
:country_code);end
|
27
43
|
|
28
44
|
class ClassificationNormalizer
|
29
45
|
attr_reader :error_names, :tree, :normalized_data
|
@@ -46,7 +62,9 @@ class DarwinCore
|
|
46
62
|
end
|
47
63
|
|
48
64
|
def add_vernacular_name_string(name_string)
|
49
|
-
|
65
|
+
unless @vernacular_name_strings[name_string]
|
66
|
+
@vernacular_name_strings[name_string] = 1
|
67
|
+
end
|
50
68
|
end
|
51
69
|
|
52
70
|
def name_strings(opts = {})
|
@@ -68,13 +86,18 @@ class DarwinCore
|
|
68
86
|
end
|
69
87
|
|
70
88
|
def normalize(opts = {})
|
71
|
-
opts = { :with_canonical_names => true,
|
89
|
+
opts = { :with_canonical_names => true,
|
90
|
+
:with_extensions => true }.merge(opts)
|
72
91
|
@with_canonical_names = !!opts[:with_canonical_names]
|
73
|
-
DarwinCore.logger_write(@dwc.object_id,
|
92
|
+
DarwinCore.logger_write(@dwc.object_id,
|
93
|
+
'Started normalization of the classification')
|
74
94
|
ingest_core
|
75
|
-
DarwinCore.logger_write(@dwc.object_id,
|
76
|
-
|
77
|
-
|
95
|
+
DarwinCore.logger_write(@dwc.object_id,
|
96
|
+
'Calculating the classification parent/child paths')
|
97
|
+
has_parent_id? ?
|
98
|
+
calculate_classification_path :
|
99
|
+
@normalized_data.keys.each { |id| @tree[id] = {} }
|
100
|
+
DarwinCore.logger_write(@dwc.object_id, 'Ingesting data from extensions')
|
78
101
|
if !!opts[:with_extensions]
|
79
102
|
ingest_extensions
|
80
103
|
end
|
@@ -85,7 +108,8 @@ class DarwinCore
|
|
85
108
|
|
86
109
|
def get_canonical_name(a_scientific_name)
|
87
110
|
if @with_canonical_names
|
88
|
-
canonical_name = @parser.parse(a_scientific_name,
|
111
|
+
canonical_name = @parser.parse(a_scientific_name,
|
112
|
+
:canonical_only => true)
|
89
113
|
canonical_name.to_s.empty? ? a_scientific_name : canonical_name
|
90
114
|
else
|
91
115
|
nil
|
@@ -93,9 +117,9 @@ class DarwinCore
|
|
93
117
|
end
|
94
118
|
|
95
119
|
def get_fields(element)
|
96
|
-
data = element.fields.inject({}) do |res, f|
|
120
|
+
data = element.fields.inject({}) do |res, f|
|
97
121
|
field = f[:term].split('/')[-1]
|
98
|
-
field = field ? field.downcase.to_sym : ''
|
122
|
+
field = field ? field.downcase.to_sym : ''
|
99
123
|
res[field] = f[:index].to_i
|
100
124
|
res
|
101
125
|
end
|
@@ -109,12 +133,16 @@ class DarwinCore
|
|
109
133
|
|
110
134
|
def add_synonym_from_core(taxon_id, row)
|
111
135
|
@synonyms[row[@core_fields[:id]]] = taxon_id
|
112
|
-
taxon = @normalized_data[row[taxon_id]] ?
|
136
|
+
taxon = @normalized_data[row[taxon_id]] ?
|
137
|
+
@normalized_data[row[taxon_id]] :
|
138
|
+
@normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
|
113
139
|
synonym = SynonymNormalized.new(
|
114
140
|
row[@core_fields[:id]],
|
115
141
|
row[@core_fields[:scientificname]],
|
116
142
|
row[@core_fields[:canonicalname]],
|
117
|
-
@core_fields[:taxonomicstatus] ?
|
143
|
+
@core_fields[:taxonomicstatus] ?
|
144
|
+
row[@core_fields[:taxonomicstatus]] :
|
145
|
+
nil,
|
118
146
|
@core_fields[:source] ? row[@core_fields[:source]] : nil,
|
119
147
|
@core_fields[:localid] ? row[@core_fields[:localid]] : nil,
|
120
148
|
@core_fields[:globalid] ? row[@core_fields[:globalid]] : nil,
|
@@ -129,10 +157,14 @@ class DarwinCore
|
|
129
157
|
canonical_name = nil
|
130
158
|
scientific_name = row[fields[:scientificname]].strip
|
131
159
|
if separate_canonical_and_authorship?(row, fields)
|
132
|
-
|
160
|
+
if @with_canonical_names
|
161
|
+
canonical_name = row[fields[:scientificname]].strip
|
162
|
+
end
|
133
163
|
scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
|
134
164
|
else
|
135
|
-
|
165
|
+
if @with_canonical_names
|
166
|
+
canonical_name = get_canonical_name(row[fields[:scientificname]])
|
167
|
+
end
|
136
168
|
end
|
137
169
|
fields[:canonicalname] = row.size
|
138
170
|
row << canonical_name
|
@@ -149,43 +181,72 @@ class DarwinCore
|
|
149
181
|
|
150
182
|
def ingest_core
|
151
183
|
@normalized_data = {}
|
152
|
-
|
184
|
+
has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
|
185
|
+
raise DarwinCore::CoreFileError.new('Darwin Core core fields must ' +
|
186
|
+
'contain taxon id and scientific name') unless has_name_and_id
|
153
187
|
@dwc.core.read do |rows|
|
154
188
|
rows[1].each do |error|
|
155
|
-
@error_names << { :data => error,
|
189
|
+
@error_names << { :data => error,
|
190
|
+
:error => :reading_or_encoding_error }
|
156
191
|
end
|
157
192
|
rows[0].each do |r|
|
158
193
|
set_scientific_name(r, @core_fields)
|
159
194
|
#core has AcceptedNameUsageId
|
160
|
-
if @core_fields[:acceptednameusageid] &&
|
195
|
+
if @core_fields[:acceptednameusageid] &&
|
196
|
+
r[@core_fields[:acceptednameusageid]] &&
|
197
|
+
r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
|
161
198
|
add_synonym_from_core(@core_fields[:acceptednameusageid], r)
|
162
|
-
elsif !@core_fields[:acceptednameusageid] &&
|
199
|
+
elsif !@core_fields[:acceptednameusageid] &&
|
200
|
+
@core_fields[:taxonomicstatus] &&
|
201
|
+
status_synonym?(r[@core_fields[:taxonomicstatus]])
|
163
202
|
add_synonym_from_core(parent_id, r) if has_parent_id?
|
164
203
|
else
|
165
|
-
|
204
|
+
unless @normalized_data[r[@core_fields[:id]]]
|
205
|
+
if gnub_archive?
|
206
|
+
new_taxon = DarwinCore::GnubTaxon.new
|
207
|
+
else
|
208
|
+
new_taxon = DarwinCore::TaxonNormalized.new
|
209
|
+
end
|
210
|
+
@normalized_data[r[@core_fields[:id]]] = new_taxon
|
211
|
+
end
|
212
|
+
taxon = @normalized_data[r[@core_fields[:id]]]
|
213
|
+
if gnub_archive?
|
214
|
+
taxon.uuid = r[@core_fields[:originalnameusageid]]
|
215
|
+
taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
|
216
|
+
split('|')
|
217
|
+
end
|
166
218
|
taxon.id = r[@core_fields[:id]]
|
167
219
|
taxon.current_name = r[@core_fields[:scientificname]]
|
168
220
|
taxon.current_name_canonical = r[@core_fields[:canonicalname]]
|
169
221
|
taxon.parent_id = has_parent_id? ? r[parent_id] : nil
|
170
222
|
taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
|
171
|
-
|
223
|
+
if @core_fields[:taxonomicstatus]
|
224
|
+
taxon.status = r[@core_fields[:taxonomicstatus]]
|
225
|
+
end
|
172
226
|
taxon.source = r[@core_fields[:source]] if @core_fields[:source]
|
173
227
|
taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
|
174
|
-
|
175
|
-
|
228
|
+
if @core_fields[:globalid]
|
229
|
+
taxon.global_id = r[@core_fields[:globalid]]
|
230
|
+
end
|
231
|
+
taxon.linnean_classification_path =
|
232
|
+
get_linnean_classification_path(r, taxon)
|
176
233
|
add_name_string(taxon.current_name)
|
177
|
-
|
234
|
+
has_canonical = taxon.current_name_canonical &&
|
235
|
+
!taxon.current_name_canonical.empty?
|
236
|
+
add_name_string(taxon.current_name_canonical) if has_canonical
|
178
237
|
end
|
179
238
|
end
|
180
239
|
end
|
181
240
|
end
|
182
241
|
|
183
242
|
def has_parent_id?
|
184
|
-
@has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
|
243
|
+
@has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
|
244
|
+
@core_fields.has_key?(:parentnameusageid)
|
185
245
|
end
|
186
246
|
|
187
247
|
def parent_id
|
188
|
-
parent_id_field = @core_fields[:highertaxonid] ||
|
248
|
+
parent_id_field = @core_fields[:highertaxonid] ||
|
249
|
+
@core_fields[:parentnameusageid]
|
189
250
|
end
|
190
251
|
|
191
252
|
def calculate_classification_path
|
@@ -200,28 +261,43 @@ class DarwinCore
|
|
200
261
|
def get_classification_path(taxon)
|
201
262
|
return if !taxon.classification_path_id.empty?
|
202
263
|
@paths_num += 1
|
203
|
-
|
264
|
+
if @paths_num % 10000 == 0
|
265
|
+
DarwinCore.logger_write(@dwc.object_id,
|
266
|
+
"Calculated %s paths" % @paths_num)
|
267
|
+
end
|
204
268
|
current_node = {taxon.id => {}}
|
205
269
|
if DarwinCore.nil_field?(taxon.parent_id)
|
206
|
-
|
270
|
+
if @with_canonical_names
|
271
|
+
taxon.classification_path << taxon.current_name_canonical
|
272
|
+
end
|
207
273
|
taxon.classification_path_id << taxon.id
|
208
274
|
@tree.merge!(current_node)
|
209
275
|
else
|
210
276
|
parent_cp = parent_cpid = nil
|
211
277
|
if @normalized_data[taxon.parent_id]
|
212
|
-
|
213
|
-
|
278
|
+
if @with_canonical_names
|
279
|
+
parent_cp = @normalized_data[taxon.parent_id].classification_path
|
280
|
+
end
|
281
|
+
parent_cpid = @normalized_data[taxon.parent_id].
|
282
|
+
classification_path_id
|
214
283
|
else
|
215
284
|
current_parent = @normalized_data[@synonyms[taxon.parent_id]]
|
216
285
|
if current_parent
|
217
|
-
error =
|
218
|
-
|
219
|
-
|
220
|
-
|
286
|
+
error = 'WARNING: The parent of the taxon ' +
|
287
|
+
"\'#{taxon.current_name}\' is deprecated"
|
288
|
+
@error_names << {:data => taxon,
|
289
|
+
:error => :deprecated_parent,
|
290
|
+
:current_parent => current_parent }
|
291
|
+
|
292
|
+
if @with_canonical_names
|
293
|
+
parent_cp = current_parent.classification_path
|
294
|
+
end
|
221
295
|
parent_cpid = current_parent.classification_path_id
|
222
296
|
else
|
223
|
-
error =
|
224
|
-
|
297
|
+
error = 'WARNING: The parent of the taxon ' +
|
298
|
+
"\'#{taxon.current_name}\' not found"
|
299
|
+
@error_names << {:data => taxon,
|
300
|
+
:error => :deprecated_parent, :current_parent => nil}
|
225
301
|
end
|
226
302
|
end
|
227
303
|
return 'error' unless parent_cpid
|
@@ -230,23 +306,30 @@ class DarwinCore
|
|
230
306
|
begin
|
231
307
|
res = get_classification_path(@normalized_data[taxon.parent_id])
|
232
308
|
rescue SystemStackError
|
233
|
-
@error_names << {:data => taxon,
|
309
|
+
@error_names << {:data => taxon,
|
310
|
+
:error => :too_deep_hierarchy, :current_parent => nil}
|
234
311
|
end
|
235
312
|
return res if res == 'error'
|
236
313
|
if @with_canonical_names
|
237
|
-
taxon.classification_path += @normalized_data[taxon.parent_id].
|
314
|
+
taxon.classification_path += @normalized_data[taxon.parent_id].
|
315
|
+
classification_path + [taxon.current_name_canonical]
|
238
316
|
end
|
239
|
-
taxon.classification_path_id += @normalized_data[taxon.parent_id].
|
240
|
-
|
317
|
+
taxon.classification_path_id += @normalized_data[taxon.parent_id].
|
318
|
+
classification_path_id + [taxon.id]
|
319
|
+
parent_node = @normalized_data[taxon.parent_id].
|
320
|
+
classification_path_id.inject(@tree) {|node, id| node[id]}
|
241
321
|
parent_node.merge!(current_node)
|
242
322
|
else
|
243
|
-
taxon.classification_path += parent_cp +
|
323
|
+
taxon.classification_path += parent_cp +
|
324
|
+
[taxon.current_name_canonical] if @with_canonical_names
|
244
325
|
taxon.classification_path_id += parent_cpid + [taxon.id]
|
245
|
-
parent_node = @normalized_data[taxon.parent_id].
|
326
|
+
parent_node = @normalized_data[taxon.parent_id].
|
327
|
+
classification_path_id.inject(@tree) {|node, id| node[id]}
|
246
328
|
begin
|
247
329
|
parent_node.merge!(current_node)
|
248
330
|
rescue NoMethodError => e
|
249
|
-
DarwinCore.logger_write(@dwc.object_id,
|
331
|
+
DarwinCore.logger_write(@dwc.object_id,
|
332
|
+
"Error '%s' taxon %s" % [e.message, taxon.id])
|
250
333
|
return 'error'
|
251
334
|
end
|
252
335
|
end
|
@@ -256,13 +339,15 @@ class DarwinCore
|
|
256
339
|
def ingest_extensions
|
257
340
|
@extensions.each do |e|
|
258
341
|
ext, fields = *e
|
259
|
-
ingest_synonyms(e) if (File.split(e[0].file_path).
|
342
|
+
ingest_synonyms(e) if (File.split(e[0].file_path).
|
343
|
+
last.match(/synonym/i) &&
|
344
|
+
fields.keys.include?(:scientificname))
|
260
345
|
ingest_vernaculars(e) if fields.keys.include? :vernacularname
|
261
346
|
end
|
262
347
|
end
|
263
348
|
|
264
349
|
def ingest_synonyms(extension)
|
265
|
-
DarwinCore.logger_write(@dwc.object_id,
|
350
|
+
DarwinCore.logger_write(@dwc.object_id, 'Ingesting synonyms extension')
|
266
351
|
ext, fields = *extension
|
267
352
|
ext.read do |rows|
|
268
353
|
rows[0].each do |r|
|
@@ -281,14 +366,16 @@ class DarwinCore
|
|
281
366
|
add_name_string(synonym.name)
|
282
367
|
add_name_string(synonym.canonical_name)
|
283
368
|
else
|
284
|
-
@error_names << { :taxon => synonym,
|
369
|
+
@error_names << { :taxon => synonym,
|
370
|
+
:error => :synonym_of_unknown_taxa }
|
285
371
|
end
|
286
372
|
end
|
287
373
|
end
|
288
374
|
end
|
289
375
|
|
290
376
|
def ingest_vernaculars(extension)
|
291
|
-
DarwinCore.logger_write(@dwc.object_id,
|
377
|
+
DarwinCore.logger_write(@dwc.object_id,
|
378
|
+
'Ingesting vernacular names extension')
|
292
379
|
ext, fields = *extension
|
293
380
|
ext.read do |rows|
|
294
381
|
rows[0].each do |r|
|
@@ -313,20 +400,25 @@ class DarwinCore
|
|
313
400
|
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
314
401
|
add_vernacular_name_string(vernacular.name)
|
315
402
|
else
|
316
|
-
@error_names << { :vernacular_name => vernacular,
|
403
|
+
@error_names << { :vernacular_name => vernacular,
|
404
|
+
:error => :vernacular_of_unknown_taxa }
|
317
405
|
end
|
318
406
|
end
|
319
407
|
end
|
320
408
|
end
|
321
|
-
|
409
|
+
|
322
410
|
#Collect linnean classification path only on species level
|
323
411
|
def get_linnean_classification_path(row, taxon)
|
324
412
|
res = []
|
325
|
-
[:kingdom, :phylum, :class,
|
326
|
-
|
413
|
+
[:kingdom, :phylum, :class,
|
414
|
+
:order, :family, :genus, :subgenus].each do |clade|
|
415
|
+
res << [row[@core_fields[clade]], clade] if @core_fields[clade]
|
327
416
|
end
|
328
417
|
res
|
329
418
|
end
|
330
419
|
|
420
|
+
def gnub_archive?
|
421
|
+
@core_fields[:originalnameusageidpath]
|
422
|
+
end
|
331
423
|
end
|
332
424
|
end
|
Binary file
|
@@ -49,7 +49,7 @@ describe DarwinCore do
|
|
49
49
|
file = File.join(@file_dir, 'invalid.tar.gz')
|
50
50
|
lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::InvalidArchiveError)
|
51
51
|
end
|
52
|
-
|
52
|
+
|
53
53
|
it "should raise an error if archive is not in utf-8" do
|
54
54
|
file = File.join(@file_dir, 'latin1.tar.gz')
|
55
55
|
lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::EncodingError)
|
@@ -219,9 +219,9 @@ describe DarwinCore do
|
|
219
219
|
dwc = DarwinCore.new(file)
|
220
220
|
norm = dwc.normalize_classification
|
221
221
|
taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
|
222
|
-
taxa.size.should == 2
|
222
|
+
taxa.size.should == 2
|
223
223
|
end
|
224
|
-
|
224
|
+
|
225
225
|
it "should be able to get language and locality fields for vernacular names" do
|
226
226
|
file = File.join(@file_dir, 'language_locality.tar.gz')
|
227
227
|
dwc = DarwinCore.new(file)
|
@@ -231,6 +231,20 @@ describe DarwinCore do
|
|
231
231
|
vn.language.should == 'en'
|
232
232
|
vn.locality.should == 'New England'
|
233
233
|
end
|
234
|
+
|
235
|
+
it 'should be able to get uuids from gnub dataset' do
|
236
|
+
file = File.join(@file_dir, 'gnub.tar.gz')
|
237
|
+
dwc = DarwinCore.new(file)
|
238
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
239
|
+
cn.normalize
|
240
|
+
vn = cn.normalized_data['9c399f90-cfb8-5a7f-9a21-18285a473488']
|
241
|
+
vn.class.should == DarwinCore::GnubTaxon
|
242
|
+
vn.uuid.should == '8faa91f6-663f-4cfe-b785-0ab4e9415a51'
|
243
|
+
vn.uuid_path.should == [
|
244
|
+
"9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3",
|
245
|
+
"bf4c91c0-3d1f-44c7-9d3b-249382182a26",
|
246
|
+
"8faa91f6-663f-4cfe-b785-0ab4e9415a51"]
|
247
|
+
end
|
234
248
|
end
|
235
249
|
|
236
250
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -171,6 +171,7 @@ files:
|
|
171
171
|
- spec/files/empty_coreid.tar.gz
|
172
172
|
- spec/files/file with characters(3).gz
|
173
173
|
- spec/files/flat_list.tar.gz
|
174
|
+
- spec/files/gnub.tar.gz
|
174
175
|
- spec/files/invalid.tar.gz
|
175
176
|
- spec/files/junk_dir_inside.zip
|
176
177
|
- spec/files/language_locality.tar.gz
|
@@ -202,7 +203,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
202
203
|
version: '0'
|
203
204
|
segments:
|
204
205
|
- 0
|
205
|
-
hash:
|
206
|
+
hash: 2372622078447411988
|
206
207
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
207
208
|
none: false
|
208
209
|
requirements:
|