dwc-archive 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ 0.9.6 Added support for GNUB DwCA files
2
+
3
+ 0.9.4 Gem dependencies updated, added travis support
4
+
1
5
  0.9.0 Migrated code to ruby 1.9.3
2
6
 
3
7
  0.8.3 Updated outdated exception rasing
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2012 Marine Biological Laboratoryj
1
+ Copyright (c) 2010-2012 Marine Biological Laboratory
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -143,6 +143,15 @@ Creating a DarwinCore Archive file
143
143
  gen.add_eml_xml(eml)
144
144
  gen.pack
145
145
 
146
+ Logging
147
+ -------
148
+
149
+ Gem has ability to show logs of it's events:
150
+
151
+ require 'dwc-archive'
152
+ DarwinCore.logger = Logger.new($stdout)
153
+
154
+
146
155
  Note on Patches/Pull Requests
147
156
  -----------------------------
148
157
 
data/Rakefile CHANGED
@@ -6,7 +6,10 @@ begin
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "dwc-archive"
8
8
  gem.summary = %Q{Handler of Darwin Core Archive files}
9
- gem.description = %q{Darwin Core Archive is the current standard exchange format for GLobal Names Architecture modules. This gem makes it easy to incorporate files in Darwin Core Archive format into a ruby project.}
9
+ gem.description = 'Darwin Core Archive is the current standard exchange ' +
10
+ 'format for GLobal Names Architecture modules. ' +
11
+ 'This gem makes it easy to incorporate files in ' +
12
+ 'Darwin Core Archive format into a ruby project.'
10
13
  gem.email = "dmozzherin at gmail dot com"
11
14
  gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
12
15
  gem.authors = ["Dmitry Mozzherin"]
@@ -14,11 +17,11 @@ begin
14
17
  gem.add_dependency 'parsley-store', ">= 0.3.0"
15
18
  gem.add_development_dependency "rspec", ">= 1.2.9"
16
19
  gem.add_development_dependency "cucumber", ">= 0"
17
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
20
  end
19
21
  Jeweler::GemcutterTasks.new
20
22
  rescue LoadError
21
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
23
+ puts 'Jeweler (or a dependency) not available. ' +
24
+ 'Install it with: gem install jeweler'
22
25
  end
23
26
 
24
27
  require 'rspec/core/rake_task'
@@ -40,18 +43,20 @@ begin
40
43
  task :features => :check_dependencies
41
44
  rescue LoadError
42
45
  task :features do
43
- abort "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
46
+ abort 'Cucumber is not available. In order to run features, ' +
47
+ 'you must: sudo gem install cucumber'
44
48
  end
45
49
  end
46
50
 
47
- task :default => :spec
51
+ desc 'Run an IRB session with CSL loaded'
52
+ task :irb, [:script] do |t, args|
53
+ ARGV.clear
54
+
55
+ require 'irb'
56
+ require_relative 'lib/dwc-archive'
48
57
 
49
- # require 'rdoc/task'
50
- # Rake::RDocTask.new do |rdoc|
51
- # version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
- #
53
- # rdoc.rdoc_dir = 'rdoc'
54
- # rdoc.title = "dwc-archive #{version}"
55
- # rdoc.rdoc_files.include('README*')
56
- # rdoc.rdoc_files.include('lib/**/*.rb')
57
- # end
58
+ IRB.conf[:SCRIPT] = args.script
59
+ IRB.start
60
+ end
61
+
62
+ task :default => :spec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.9.5
1
+ 0.9.6
@@ -4,7 +4,10 @@ require 'parsley-store'
4
4
  class DarwinCore
5
5
 
6
6
  class TaxonNormalized
7
- attr_accessor :id, :local_id, :global_id, :source, :parent_id, :classification_path_id, :classification_path, :linnean_classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
7
+ attr_accessor :id, :local_id, :global_id, :source, :parent_id,
8
+ :classification_path_id, :classification_path,
9
+ :linnean_classification_path, :current_name, :current_name_canonical,
10
+ :synonyms, :vernacular_names, :rank, :status
8
11
 
9
12
  def initialize
10
13
  @id = @parent_id = @rank = @status = nil
@@ -22,8 +25,21 @@ class DarwinCore
22
25
 
23
26
  end
24
27
 
25
- class SynonymNormalized < Struct.new(:id, :name, :canonical_name, :status, :source, :local_id, :global_id);end
26
- class VernacularNormalized < Struct.new(:name, :language, :locality, :country_code);end
28
+ class GnubTaxon < TaxonNormalized
29
+ attr_accessor :uuid, :uuid_path
30
+
31
+ def initialize
32
+ super
33
+ @uuid = nil
34
+ @uuid_path = []
35
+ end
36
+ end
37
+
38
+ class SynonymNormalized < Struct.new(:id, :name, :canonical_name,
39
+ :status, :source, :local_id,
40
+ :global_id);end
41
+ class VernacularNormalized < Struct.new(:name, :language, :locality,
42
+ :country_code);end
27
43
 
28
44
  class ClassificationNormalizer
29
45
  attr_reader :error_names, :tree, :normalized_data
@@ -46,7 +62,9 @@ class DarwinCore
46
62
  end
47
63
 
48
64
  def add_vernacular_name_string(name_string)
49
- @vernacular_name_strings[name_string] = 1 unless @vernacular_name_strings[name_string]
65
+ unless @vernacular_name_strings[name_string]
66
+ @vernacular_name_strings[name_string] = 1
67
+ end
50
68
  end
51
69
 
52
70
  def name_strings(opts = {})
@@ -68,13 +86,18 @@ class DarwinCore
68
86
  end
69
87
 
70
88
  def normalize(opts = {})
71
- opts = { :with_canonical_names => true, :with_extensions => true }.merge(opts)
89
+ opts = { :with_canonical_names => true,
90
+ :with_extensions => true }.merge(opts)
72
91
  @with_canonical_names = !!opts[:with_canonical_names]
73
- DarwinCore.logger_write(@dwc.object_id, "Started normalization of the classification")
92
+ DarwinCore.logger_write(@dwc.object_id,
93
+ 'Started normalization of the classification')
74
94
  ingest_core
75
- DarwinCore.logger_write(@dwc.object_id, "Calculating the classification parent/child paths")
76
- has_parent_id? ? calculate_classification_path : @normalized_data.keys.each { |id| @tree[id] = {} }
77
- DarwinCore.logger_write(@dwc.object_id, "Ingesting data from extensions")
95
+ DarwinCore.logger_write(@dwc.object_id,
96
+ 'Calculating the classification parent/child paths')
97
+ has_parent_id? ?
98
+ calculate_classification_path :
99
+ @normalized_data.keys.each { |id| @tree[id] = {} }
100
+ DarwinCore.logger_write(@dwc.object_id, 'Ingesting data from extensions')
78
101
  if !!opts[:with_extensions]
79
102
  ingest_extensions
80
103
  end
@@ -85,7 +108,8 @@ class DarwinCore
85
108
 
86
109
  def get_canonical_name(a_scientific_name)
87
110
  if @with_canonical_names
88
- canonical_name = @parser.parse(a_scientific_name, :canonical_only => true)
111
+ canonical_name = @parser.parse(a_scientific_name,
112
+ :canonical_only => true)
89
113
  canonical_name.to_s.empty? ? a_scientific_name : canonical_name
90
114
  else
91
115
  nil
@@ -93,9 +117,9 @@ class DarwinCore
93
117
  end
94
118
 
95
119
  def get_fields(element)
96
- data = element.fields.inject({}) do |res, f|
120
+ data = element.fields.inject({}) do |res, f|
97
121
  field = f[:term].split('/')[-1]
98
- field = field ? field.downcase.to_sym : ''
122
+ field = field ? field.downcase.to_sym : ''
99
123
  res[field] = f[:index].to_i
100
124
  res
101
125
  end
@@ -109,12 +133,16 @@ class DarwinCore
109
133
 
110
134
  def add_synonym_from_core(taxon_id, row)
111
135
  @synonyms[row[@core_fields[:id]]] = taxon_id
112
- taxon = @normalized_data[row[taxon_id]] ? @normalized_data[row[taxon_id]] : @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
136
+ taxon = @normalized_data[row[taxon_id]] ?
137
+ @normalized_data[row[taxon_id]] :
138
+ @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
113
139
  synonym = SynonymNormalized.new(
114
140
  row[@core_fields[:id]],
115
141
  row[@core_fields[:scientificname]],
116
142
  row[@core_fields[:canonicalname]],
117
- @core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil,
143
+ @core_fields[:taxonomicstatus] ?
144
+ row[@core_fields[:taxonomicstatus]] :
145
+ nil,
118
146
  @core_fields[:source] ? row[@core_fields[:source]] : nil,
119
147
  @core_fields[:localid] ? row[@core_fields[:localid]] : nil,
120
148
  @core_fields[:globalid] ? row[@core_fields[:globalid]] : nil,
@@ -129,10 +157,14 @@ class DarwinCore
129
157
  canonical_name = nil
130
158
  scientific_name = row[fields[:scientificname]].strip
131
159
  if separate_canonical_and_authorship?(row, fields)
132
- canonical_name = row[fields[:scientificname]].strip if @with_canonical_names
160
+ if @with_canonical_names
161
+ canonical_name = row[fields[:scientificname]].strip
162
+ end
133
163
  scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
134
164
  else
135
- canonical_name = get_canonical_name(row[fields[:scientificname]]) if @with_canonical_names
165
+ if @with_canonical_names
166
+ canonical_name = get_canonical_name(row[fields[:scientificname]])
167
+ end
136
168
  end
137
169
  fields[:canonicalname] = row.size
138
170
  row << canonical_name
@@ -149,43 +181,72 @@ class DarwinCore
149
181
 
150
182
  def ingest_core
151
183
  @normalized_data = {}
152
- raise DarwinCore::CoreFileError.new("Darwin Core core fields must contain taxon id and scientific name") unless (@core_fields[:id] && @core_fields[:scientificname])
184
+ has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
185
+ raise DarwinCore::CoreFileError.new('Darwin Core core fields must ' +
186
+ 'contain taxon id and scientific name') unless has_name_and_id
153
187
  @dwc.core.read do |rows|
154
188
  rows[1].each do |error|
155
- @error_names << { :data => error, :error => :reading_or_encoding_error }
189
+ @error_names << { :data => error,
190
+ :error => :reading_or_encoding_error }
156
191
  end
157
192
  rows[0].each do |r|
158
193
  set_scientific_name(r, @core_fields)
159
194
  #core has AcceptedNameUsageId
160
- if @core_fields[:acceptednameusageid] && r[@core_fields[:acceptednameusageid]] && r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
195
+ if @core_fields[:acceptednameusageid] &&
196
+ r[@core_fields[:acceptednameusageid]] &&
197
+ r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
161
198
  add_synonym_from_core(@core_fields[:acceptednameusageid], r)
162
- elsif !@core_fields[:acceptednameusageid] && @core_fields[:taxonomicstatus] && status_synonym?(r[@core_fields[:taxonomicstatus]])
199
+ elsif !@core_fields[:acceptednameusageid] &&
200
+ @core_fields[:taxonomicstatus] &&
201
+ status_synonym?(r[@core_fields[:taxonomicstatus]])
163
202
  add_synonym_from_core(parent_id, r) if has_parent_id?
164
203
  else
165
- taxon = @normalized_data[r[@core_fields[:id]]] ? @normalized_data[r[@core_fields[:id]]] : @normalized_data[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
204
+ unless @normalized_data[r[@core_fields[:id]]]
205
+ if gnub_archive?
206
+ new_taxon = DarwinCore::GnubTaxon.new
207
+ else
208
+ new_taxon = DarwinCore::TaxonNormalized.new
209
+ end
210
+ @normalized_data[r[@core_fields[:id]]] = new_taxon
211
+ end
212
+ taxon = @normalized_data[r[@core_fields[:id]]]
213
+ if gnub_archive?
214
+ taxon.uuid = r[@core_fields[:originalnameusageid]]
215
+ taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
216
+ split('|')
217
+ end
166
218
  taxon.id = r[@core_fields[:id]]
167
219
  taxon.current_name = r[@core_fields[:scientificname]]
168
220
  taxon.current_name_canonical = r[@core_fields[:canonicalname]]
169
221
  taxon.parent_id = has_parent_id? ? r[parent_id] : nil
170
222
  taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
171
- taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
223
+ if @core_fields[:taxonomicstatus]
224
+ taxon.status = r[@core_fields[:taxonomicstatus]]
225
+ end
172
226
  taxon.source = r[@core_fields[:source]] if @core_fields[:source]
173
227
  taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
174
- taxon.global_id = r[@core_fields[:globalid]] if @core_fields[:globalid]
175
- taxon.linnean_classification_path = get_linnean_classification_path(r, taxon)
228
+ if @core_fields[:globalid]
229
+ taxon.global_id = r[@core_fields[:globalid]]
230
+ end
231
+ taxon.linnean_classification_path =
232
+ get_linnean_classification_path(r, taxon)
176
233
  add_name_string(taxon.current_name)
177
- add_name_string(taxon.current_name_canonical) if taxon.current_name_canonical && !taxon.current_name_canonical.empty?
234
+ has_canonical = taxon.current_name_canonical &&
235
+ !taxon.current_name_canonical.empty?
236
+ add_name_string(taxon.current_name_canonical) if has_canonical
178
237
  end
179
238
  end
180
239
  end
181
240
  end
182
241
 
183
242
  def has_parent_id?
184
- @has_parent_id ||= @core_fields.has_key?(:highertaxonid) || @core_fields.has_key?(:parentnameusageid)
243
+ @has_parent_id ||= @core_fields.has_key?(:highertaxonid) ||
244
+ @core_fields.has_key?(:parentnameusageid)
185
245
  end
186
246
 
187
247
  def parent_id
188
- parent_id_field = @core_fields[:highertaxonid] || @core_fields[:parentnameusageid]
248
+ parent_id_field = @core_fields[:highertaxonid] ||
249
+ @core_fields[:parentnameusageid]
189
250
  end
190
251
 
191
252
  def calculate_classification_path
@@ -200,28 +261,43 @@ class DarwinCore
200
261
  def get_classification_path(taxon)
201
262
  return if !taxon.classification_path_id.empty?
202
263
  @paths_num += 1
203
- DarwinCore.logger_write(@dwc.object_id, "Calculated %s paths" % @paths_num) if @paths_num % 10000 == 0
264
+ if @paths_num % 10000 == 0
265
+ DarwinCore.logger_write(@dwc.object_id,
266
+ "Calculated %s paths" % @paths_num)
267
+ end
204
268
  current_node = {taxon.id => {}}
205
269
  if DarwinCore.nil_field?(taxon.parent_id)
206
- taxon.classification_path << taxon.current_name_canonical if @with_canonical_names
270
+ if @with_canonical_names
271
+ taxon.classification_path << taxon.current_name_canonical
272
+ end
207
273
  taxon.classification_path_id << taxon.id
208
274
  @tree.merge!(current_node)
209
275
  else
210
276
  parent_cp = parent_cpid = nil
211
277
  if @normalized_data[taxon.parent_id]
212
- parent_cp = @normalized_data[taxon.parent_id].classification_path if @with_canonical_names
213
- parent_cpid = @normalized_data[taxon.parent_id].classification_path_id
278
+ if @with_canonical_names
279
+ parent_cp = @normalized_data[taxon.parent_id].classification_path
280
+ end
281
+ parent_cpid = @normalized_data[taxon.parent_id].
282
+ classification_path_id
214
283
  else
215
284
  current_parent = @normalized_data[@synonyms[taxon.parent_id]]
216
285
  if current_parent
217
- error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' is deprecated"
218
- @error_names << {:data => taxon, :error => :deprecated_parent, :current_parent => current_parent }
219
-
220
- parent_cp = current_parent.classification_path if @with_canonical_names
286
+ error = 'WARNING: The parent of the taxon ' +
287
+ "\'#{taxon.current_name}\' is deprecated"
288
+ @error_names << {:data => taxon,
289
+ :error => :deprecated_parent,
290
+ :current_parent => current_parent }
291
+
292
+ if @with_canonical_names
293
+ parent_cp = current_parent.classification_path
294
+ end
221
295
  parent_cpid = current_parent.classification_path_id
222
296
  else
223
- error = "WARNING: The parent of the taxon \'#{taxon.current_name}\' not found"
224
- @error_names << {:data => taxon, :error => :deprecated_parent, :current_parent => nil}
297
+ error = 'WARNING: The parent of the taxon ' +
298
+ "\'#{taxon.current_name}\' not found"
299
+ @error_names << {:data => taxon,
300
+ :error => :deprecated_parent, :current_parent => nil}
225
301
  end
226
302
  end
227
303
  return 'error' unless parent_cpid
@@ -230,23 +306,30 @@ class DarwinCore
230
306
  begin
231
307
  res = get_classification_path(@normalized_data[taxon.parent_id])
232
308
  rescue SystemStackError
233
- @error_names << {:data => taxon, :error => :too_deep_hierarchy, :current_parent => nil}
309
+ @error_names << {:data => taxon,
310
+ :error => :too_deep_hierarchy, :current_parent => nil}
234
311
  end
235
312
  return res if res == 'error'
236
313
  if @with_canonical_names
237
- taxon.classification_path += @normalized_data[taxon.parent_id].classification_path + [taxon.current_name_canonical]
314
+ taxon.classification_path += @normalized_data[taxon.parent_id].
315
+ classification_path + [taxon.current_name_canonical]
238
316
  end
239
- taxon.classification_path_id += @normalized_data[taxon.parent_id].classification_path_id + [taxon.id]
240
- parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
317
+ taxon.classification_path_id += @normalized_data[taxon.parent_id].
318
+ classification_path_id + [taxon.id]
319
+ parent_node = @normalized_data[taxon.parent_id].
320
+ classification_path_id.inject(@tree) {|node, id| node[id]}
241
321
  parent_node.merge!(current_node)
242
322
  else
243
- taxon.classification_path += parent_cp + [taxon.current_name_canonical] if @with_canonical_names
323
+ taxon.classification_path += parent_cp +
324
+ [taxon.current_name_canonical] if @with_canonical_names
244
325
  taxon.classification_path_id += parent_cpid + [taxon.id]
245
- parent_node = @normalized_data[taxon.parent_id].classification_path_id.inject(@tree) {|node, id| node[id]}
326
+ parent_node = @normalized_data[taxon.parent_id].
327
+ classification_path_id.inject(@tree) {|node, id| node[id]}
246
328
  begin
247
329
  parent_node.merge!(current_node)
248
330
  rescue NoMethodError => e
249
- DarwinCore.logger_write(@dwc.object_id, "Error '%s' taxon %s" % [e.message, taxon.id])
331
+ DarwinCore.logger_write(@dwc.object_id,
332
+ "Error '%s' taxon %s" % [e.message, taxon.id])
250
333
  return 'error'
251
334
  end
252
335
  end
@@ -256,13 +339,15 @@ class DarwinCore
256
339
  def ingest_extensions
257
340
  @extensions.each do |e|
258
341
  ext, fields = *e
259
- ingest_synonyms(e) if (File.split(e[0].file_path).last.match(/synonym/i) && fields.keys.include?(:scientificname))
342
+ ingest_synonyms(e) if (File.split(e[0].file_path).
343
+ last.match(/synonym/i) &&
344
+ fields.keys.include?(:scientificname))
260
345
  ingest_vernaculars(e) if fields.keys.include? :vernacularname
261
346
  end
262
347
  end
263
348
 
264
349
  def ingest_synonyms(extension)
265
- DarwinCore.logger_write(@dwc.object_id, "Ingesting synonyms extension")
350
+ DarwinCore.logger_write(@dwc.object_id, 'Ingesting synonyms extension')
266
351
  ext, fields = *extension
267
352
  ext.read do |rows|
268
353
  rows[0].each do |r|
@@ -281,14 +366,16 @@ class DarwinCore
281
366
  add_name_string(synonym.name)
282
367
  add_name_string(synonym.canonical_name)
283
368
  else
284
- @error_names << { :taxon => synonym, :error => :synonym_of_unknown_taxa }
369
+ @error_names << { :taxon => synonym,
370
+ :error => :synonym_of_unknown_taxa }
285
371
  end
286
372
  end
287
373
  end
288
374
  end
289
375
 
290
376
  def ingest_vernaculars(extension)
291
- DarwinCore.logger_write(@dwc.object_id, "Ingesting vernacular names extension")
377
+ DarwinCore.logger_write(@dwc.object_id,
378
+ 'Ingesting vernacular names extension')
292
379
  ext, fields = *extension
293
380
  ext.read do |rows|
294
381
  rows[0].each do |r|
@@ -313,20 +400,25 @@ class DarwinCore
313
400
  @normalized_data[r[fields[:id]]].vernacular_names << vernacular
314
401
  add_vernacular_name_string(vernacular.name)
315
402
  else
316
- @error_names << { :vernacular_name => vernacular, :error => :vernacular_of_unknown_taxa }
403
+ @error_names << { :vernacular_name => vernacular,
404
+ :error => :vernacular_of_unknown_taxa }
317
405
  end
318
406
  end
319
407
  end
320
408
  end
321
-
409
+
322
410
  #Collect linnean classification path only on species level
323
411
  def get_linnean_classification_path(row, taxon)
324
412
  res = []
325
- [:kingdom, :phylum, :class, :order, :family, :genus, :subgenus].each do |clade|
326
- res << [row[@core_fields[clade]], clade] if @core_fields[clade]
413
+ [:kingdom, :phylum, :class,
414
+ :order, :family, :genus, :subgenus].each do |clade|
415
+ res << [row[@core_fields[clade]], clade] if @core_fields[clade]
327
416
  end
328
417
  res
329
418
  end
330
419
 
420
+ def gnub_archive?
421
+ @core_fields[:originalnameusageidpath]
422
+ end
331
423
  end
332
424
  end
Binary file
@@ -49,7 +49,7 @@ describe DarwinCore do
49
49
  file = File.join(@file_dir, 'invalid.tar.gz')
50
50
  lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::InvalidArchiveError)
51
51
  end
52
-
52
+
53
53
  it "should raise an error if archive is not in utf-8" do
54
54
  file = File.join(@file_dir, 'latin1.tar.gz')
55
55
  lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::EncodingError)
@@ -219,9 +219,9 @@ describe DarwinCore do
219
219
  dwc = DarwinCore.new(file)
220
220
  norm = dwc.normalize_classification
221
221
  taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
222
- taxa.size.should == 2
222
+ taxa.size.should == 2
223
223
  end
224
-
224
+
225
225
  it "should be able to get language and locality fields for vernacular names" do
226
226
  file = File.join(@file_dir, 'language_locality.tar.gz')
227
227
  dwc = DarwinCore.new(file)
@@ -231,6 +231,20 @@ describe DarwinCore do
231
231
  vn.language.should == 'en'
232
232
  vn.locality.should == 'New England'
233
233
  end
234
+
235
+ it 'should be able to get uuids from gnub dataset' do
236
+ file = File.join(@file_dir, 'gnub.tar.gz')
237
+ dwc = DarwinCore.new(file)
238
+ cn = DarwinCore::ClassificationNormalizer.new(dwc)
239
+ cn.normalize
240
+ vn = cn.normalized_data['9c399f90-cfb8-5a7f-9a21-18285a473488']
241
+ vn.class.should == DarwinCore::GnubTaxon
242
+ vn.uuid.should == '8faa91f6-663f-4cfe-b785-0ab4e9415a51'
243
+ vn.uuid_path.should == [
244
+ "9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3",
245
+ "bf4c91c0-3d1f-44c7-9d3b-249382182a26",
246
+ "8faa91f6-663f-4cfe-b785-0ab4e9415a51"]
247
+ end
234
248
  end
235
249
 
236
250
  end
@@ -1,6 +1,5 @@
1
1
  $LOAD_PATH.unshift(File.dirname(__FILE__))
2
2
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
- require 'rubygems'
4
3
  require 'dwc-archive'
5
4
  require 'rspec'
6
5
  require 'rspec/autorun'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.5
4
+ version: 0.9.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-06-17 00:00:00.000000000 Z
12
+ date: 2013-06-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -171,6 +171,7 @@ files:
171
171
  - spec/files/empty_coreid.tar.gz
172
172
  - spec/files/file with characters(3).gz
173
173
  - spec/files/flat_list.tar.gz
174
+ - spec/files/gnub.tar.gz
174
175
  - spec/files/invalid.tar.gz
175
176
  - spec/files/junk_dir_inside.zip
176
177
  - spec/files/language_locality.tar.gz
@@ -202,7 +203,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
202
203
  version: '0'
203
204
  segments:
204
205
  - 0
205
- hash: -2036702231114584390
206
+ hash: 2372622078447411988
206
207
  required_rubygems_version: !ruby/object:Gem::Requirement
207
208
  none: false
208
209
  requirements: