dwc-archive 0.4.13 → 0.4.14

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.13
1
+ 0.4.14
@@ -27,7 +27,7 @@ class DarwinCore
27
27
 
28
28
  def initialize(dwc_instance, verbose = false)
29
29
  @dwc = dwc_instance
30
- @core = get_fields(@dwc.core)
30
+ @core_fields = get_fields(@dwc.core)
31
31
  @extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
32
32
  @res = {}
33
33
  @parser = ParsleyStore.new(1,2)
@@ -56,7 +56,7 @@ class DarwinCore
56
56
 
57
57
  private
58
58
 
59
- def canonical_name(a_scientific_name)
59
+ def get_canonical_name(a_scientific_name)
60
60
  if R19
61
61
  a_scientific_name.force_encoding('utf-8')
62
62
  end
@@ -84,38 +84,47 @@ class DarwinCore
84
84
  def add_synonym_from_core(taxon_id, row)
85
85
  taxon = @res[row[taxon_id]] ? @res[row[taxon_id]] : @res[row[taxon_id]] = DarwinCore::TaxonNormalized.new
86
86
  taxon.synonyms << SynonymNormalized.new(
87
- row[@core[:scientificname]],
88
- canonical_name(row[@core[:scientificname]]),
89
- @core[:taxonomicstatus] ? row[@core[:taxonomicstatus]] : nil)
87
+ row[@core_fields[:scientificname]],
88
+ row[@core_fields[:canonicalname]],
89
+ @core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil)
90
+ end
91
+
92
+ def set_scientific_name(row, fields)
93
+ canonical_name = fields[:scientificnameauthorship] ? row[fields[:scientificname]] : get_canonical_name(row[fields[:scientificname]])
94
+ fields[:canonicalname] = row.size
95
+ row << canonical_name
96
+ scientific_name = (fields[:scientificnameauthorship] && row[fields[:scientificnameauthorship]].to_s.strip != '') ? row[fields[:scientificname]].strip + ' ' + row[fields[:scientificnameauthorship]].strip : row[fields[:scientificname]].strip
97
+ row[fields[:scientificname]] = scientific_name
90
98
  end
91
99
 
92
100
  def ingest_core
93
- raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core[:id] && @core[:scientificname])
101
+ raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core_fields[:id] && @core_fields[:scientificname])
94
102
  puts "Reading core information" if @verbose
95
103
  rows = @dwc.core.read[0]
96
104
  puts "Ingesting information from the core" if @verbose
97
105
  rows.each_with_index do |r, i|
98
106
  count = i + 1
107
+ set_scientific_name(r, @core_fields)
99
108
  puts "Ingesting %s'th record" % count if @verbose and count % @verbose_count == 0
100
109
  #core has AcceptedNameUsageId
101
- if @core[:acceptednameusageid] && r[@core[:acceptednameusageid]] && r[@core[:acceptednameusageid]] != r[@core[:id]]
102
- add_synonym_from_core(@core[:acceptednameusageid], r)
103
- elsif !@core[:acceptednameusageid] && status_synonym?(r[@core[:taxonomicstatus]])
110
+ if @core_fields[:acceptednameusageid] && r[@core_fields[:acceptednameusageid]] && r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
111
+ add_synonym_from_core(@core_fields[:acceptednameusageid], r)
112
+ elsif !@core_fields[:acceptednameusageid] && status_synonym?(r[@core_fields[:taxonomicstatus]])
104
113
  add_synonym_from_core(parent_id, r)
105
114
  else
106
- taxon = @res[r[@core[:id]]] ? @res[r[@core[:id]]] : @res[r[@core[:id]]] = DarwinCore::TaxonNormalized.new
107
- taxon.id = r[@core[:id]]
108
- taxon.current_name = r[@core[:scientificname]]
109
- taxon.current_name_canonical = canonical_name(r[@core[:scientificname]])
115
+ taxon = @res[r[@core_fields[:id]]] ? @res[r[@core_fields[:id]]] : @res[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
116
+ taxon.id = r[@core_fields[:id]]
117
+ taxon.current_name = r[@core_fields[:scientificname]]
118
+ taxon.current_name_canonical = r[@core_fields[:canonicalname]]
110
119
  taxon.parent_id = r[parent_id]
111
- taxon.rank = r[@core[:taxonrank]] if @core[:taxonrank]
112
- taxon.status = r[@core[:taxonomicstatus]] if @core[:taxonomicstatus]
120
+ taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
121
+ taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
113
122
  end
114
123
  end
115
124
  end
116
125
 
117
126
  def parent_id
118
- parent_id_field = @core[:highertaxonid] || @core[:parentnameusageid]
127
+ parent_id_field = @core_fields[:highertaxonid] || @core_fields[:parentnameusageid]
119
128
  end
120
129
 
121
130
  def calculate_classification_path
@@ -172,10 +181,11 @@ class DarwinCore
172
181
  ext, fields = *extension
173
182
  ext.read[0].each_with_index do |r, i|
174
183
  count = i + 1
184
+ set_scientific_name(r, fields)
175
185
  puts "Ingesting %s'th record" % count if @verbose && count % @verbose_count == 0
176
186
  @res[r[fields[:id]]].synonyms << SynonymNormalized.new(
177
187
  r[fields[:scientificname]],
178
- canonical_name(r[fields[:scientificname]]),
188
+ r[fields[:canonicalname]],
179
189
  fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
180
190
  end
181
191
  end
@@ -98,6 +98,17 @@ describe DarwinCore do
98
98
  norm = dwc.normalize_classification
99
99
  norm.select { |k,v| !v.synonyms.empty? }.map { |k,v| v.synonyms }.size.should > 0
100
100
  end
101
+
102
+ it "should be able work with files which have scientificNameAuthorship" do
103
+ file = File.join(@file_dir, 'sci_name_authorship.tar.gz')
104
+ dwc = DarwinCore.new(file)
105
+ $lala = 1
106
+ norm = dwc.normalize_classification
107
+ taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
108
+ taxa.size.should == 507
109
+ syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
110
+ syn.size.should == 50
111
+ end
101
112
  end
102
113
 
103
114
  end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 19
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
8
  - 4
8
- - 13
9
- version: 0.4.13
9
+ - 14
10
+ version: 0.4.14
10
11
  platform: ruby
11
12
  authors:
12
13
  - Dmitry Mozzherin
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-10-07 00:00:00 -04:00
18
+ date: 2010-10-08 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -25,6 +26,7 @@ dependencies:
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 3
28
30
  segments:
29
31
  - 0
30
32
  version: "0"
@@ -38,6 +40,7 @@ dependencies:
38
40
  requirements:
39
41
  - - ">="
40
42
  - !ruby/object:Gem::Version
43
+ hash: 13
41
44
  segments:
42
45
  - 1
43
46
  - 2
@@ -53,6 +56,7 @@ dependencies:
53
56
  requirements:
54
57
  - - ">="
55
58
  - !ruby/object:Gem::Version
59
+ hash: 3
56
60
  segments:
57
61
  - 0
58
62
  version: "0"
@@ -104,6 +108,7 @@ files:
104
108
  - spec/files/junk_dir_inside.zip
105
109
  - spec/files/meta.xml
106
110
  - spec/files/minimal.tar.gz
111
+ - spec/files/sci_name_authorship.tar.gz
107
112
  - spec/files/synonyms_in_core_accepted_name_field.tar.gz
108
113
  - spec/files/synonyms_in_extension.tar.gz
109
114
  - spec/files/uncompressed
@@ -125,6 +130,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
125
130
  requirements:
126
131
  - - ">="
127
132
  - !ruby/object:Gem::Version
133
+ hash: 3
128
134
  segments:
129
135
  - 0
130
136
  version: "0"
@@ -133,6 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
139
  requirements:
134
140
  - - ">="
135
141
  - !ruby/object:Gem::Version
142
+ hash: 3
136
143
  segments:
137
144
  - 0
138
145
  version: "0"