dwc-archive 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.4.3
data/lib/dwc-archive.rb CHANGED
@@ -25,6 +25,9 @@ require 'dwc-archive/generator_eml_xml'
25
25
  require 'dwc-archive/classification_normalizer'
26
26
 
27
27
  class DarwinCore
28
+
29
+ VERSION = open(File.join(File.dirname(__FILE__), '..', 'VERSION')).readline.strip
30
+
28
31
  attr_reader :archive, :core, :metadata, :extensions
29
32
  alias :eml :metadata
30
33
 
@@ -52,9 +55,9 @@ class DarwinCore
52
55
  end
53
56
 
54
57
  # generates a hash from a classification data with path to each node, list of synonyms and vernacular names.
55
- def normalize_classification
58
+ def normalize_classification(verbose = false)
56
59
  return nil unless has_parent_id?
57
- DarwinCore::ClassificationNormalizer.new(self).normalize
60
+ DarwinCore::ClassificationNormalizer.new(self, verbose).normalize
58
61
  end
59
62
 
60
63
  def has_parent_id?
@@ -1,8 +1,8 @@
1
1
  # encoding: utf-8
2
- require 'biodiversity'
2
+ require 'parsley-store'
3
3
 
4
4
  class DarwinCore
5
-
5
+
6
6
  class TaxonNormalized
7
7
  attr_accessor :id, :parent_id, :classification_path, :current_name, :current_name_canonical, :synonyms, :vernacular_names, :rank, :status
8
8
 
@@ -18,12 +18,15 @@ class DarwinCore
18
18
  class VernacularNormalized < Struct.new(:name, :language);end
19
19
 
20
20
  class ClassificationNormalizer
21
- def initialize(dwc_instance)
21
+
22
+ def initialize(dwc_instance, verbose = false)
22
23
  @dwc = dwc_instance
23
24
  @core = get_fields(@dwc.core)
24
25
  @extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
25
26
  @res = {}
26
- @parser = ScientificNameParser.new
27
+ @parser = ParsleyStore.new(1,2)
28
+ @verbose = verbose
29
+ @verbose_count = 1000
27
30
  end
28
31
 
29
32
  def normalize
@@ -42,7 +45,7 @@ class DarwinCore
42
45
  begin
43
46
  parsed_name = @parser.parse(a_scientific_name)[:scientificName]
44
47
  rescue
45
- @parser = ScientificNameParser.new
48
+ @parser = ParsleyStore.new(1,2)
46
49
  parsed_name = @parser.parse(a_scientific_name)[:scientificName]
47
50
  end
48
51
  parsed_name[:parsed] ? parsed_name[:canonical] : a_scientific_name
@@ -63,12 +66,17 @@ class DarwinCore
63
66
  taxon.synonyms << SynonymNormalized.new(
64
67
  row[@core[:scientificname]],
65
68
  canonical_name(row[@core[:scientificname]]),
66
- row[@core[:taxonomicstatus]])
69
+ @core[:taxonomicstatus] ? row[@core[:taxonomicstatus]] : nil)
67
70
  end
68
71
 
69
72
  def injest_core
70
73
  raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core[:id] && @core[:scientificname])
71
- @dwc.core.read[0].each do |r|
74
+ puts "Reading core information" if @verbose
75
+ rows = @dwc.core.read[0]
76
+ puts "Injesting information from the core" if @verbose
77
+ rows.each_with_index do |r, i|
78
+ count = i + 1
79
+ puts "Injesting %s'th record" % count if @verbose and count % @verbose_count == 0
72
80
  #core has AcceptedNameUsageId
73
81
  if @core[:acceptednameusageid] && r[@core[:acceptednameusageid]] && r[@core[:acceptednameusageid]] != r[@core[:id]]
74
82
  add_synonym_from_core(@core[:acceptednameusageid], r)
@@ -102,7 +110,7 @@ class DarwinCore
102
110
  if DarwinCore.nil_field?(taxon.parent_id)
103
111
  taxon.classification_path = [taxon.current_name_canonical]
104
112
  else
105
- parent_cp = @res[taxon.parent_id].classification_path
113
+ parent_cp = @res[taxon.parent_id].classification_path
106
114
  if parent_cp
107
115
  taxon.classification_path = parent_cp + [taxon.current_name_canonical]
108
116
  else
@@ -121,8 +129,11 @@ class DarwinCore
121
129
  end
122
130
 
123
131
  def injest_synonyms(extension)
132
+ puts "Injesting synonyms extension" if @verbose
124
133
  ext, fields = *extension
125
- ext.read[0].each do |r|
134
+ ext.read[0].each_with_index do |r, i|
135
+ count = i + 1
136
+ puts "Injesting %s'th record" % count if @verbose && count % @verbose_count == 0
126
137
  @res[r[fields[:id]]].synonyms << SynonymNormalized.new(
127
138
  r[fields[:scientificname]],
128
139
  canonical_name(r[fields[:scientificname]]),
@@ -131,8 +142,11 @@ class DarwinCore
131
142
  end
132
143
 
133
144
  def injest_vernaculars(extension)
145
+ puts "Injesting vernacular names" if @verbose
134
146
  ext, fields = *extension
135
- ext.read[0].each do |r|
147
+ ext.read[0].each_with_index do |r, i|
148
+ count = i + 1
149
+ puts "Injesting %s'th record" % count if @verbose && count % @verbose_count == 0
136
150
  @res[r[fields[:id]]].vernacular_names << VernacularNormalized.new(
137
151
  r[fields[:vernacularname]],
138
152
  fields[:languagecode] ? r[fields[:languagecode]] : nil)
@@ -141,3 +155,4 @@ class DarwinCore
141
155
 
142
156
  end
143
157
  end
158
+
@@ -5,6 +5,12 @@ describe DarwinCore do
5
5
  @file_dir = File.join(File.dirname(__FILE__), '..', 'files')
6
6
  end
7
7
 
8
+ describe "VERSION" do
9
+ it "should return VERSION number" do
10
+ DarwinCore::VERSION.split('.').join('').to_i.should > 41
11
+ end
12
+ end
13
+
8
14
  describe "::nil_field?" do
9
15
  it "should return true for entries which normally mean nil" do
10
16
  [nil, '/N', ''].each do |i|
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 9
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
8
  - 4
8
- - 2
9
- version: 0.4.2
9
+ - 3
10
+ version: 0.4.3
10
11
  platform: ruby
11
12
  authors:
12
13
  - Dmitry Mozzherin
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-09-10 00:00:00 -04:00
18
+ date: 2010-09-13 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -25,6 +26,7 @@ dependencies:
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 13
28
30
  segments:
29
31
  - 1
30
32
  - 2
@@ -40,6 +42,7 @@ dependencies:
40
42
  requirements:
41
43
  - - ">="
42
44
  - !ruby/object:Gem::Version
45
+ hash: 3
43
46
  segments:
44
47
  - 0
45
48
  version: "0"
@@ -112,6 +115,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
112
115
  requirements:
113
116
  - - ">="
114
117
  - !ruby/object:Gem::Version
118
+ hash: 3
115
119
  segments:
116
120
  - 0
117
121
  version: "0"
@@ -120,6 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
120
124
  requirements:
121
125
  - - ">="
122
126
  - !ruby/object:Gem::Version
127
+ hash: 3
123
128
  segments:
124
129
  - 0
125
130
  version: "0"