dwc-archive 0.4.13 → 0.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.13
1
+ 0.4.14
@@ -27,7 +27,7 @@ class DarwinCore
27
27
 
28
28
  def initialize(dwc_instance, verbose = false)
29
29
  @dwc = dwc_instance
30
- @core = get_fields(@dwc.core)
30
+ @core_fields = get_fields(@dwc.core)
31
31
  @extensions = @dwc.extensions.map { |e| [e, get_fields(e)] }
32
32
  @res = {}
33
33
  @parser = ParsleyStore.new(1,2)
@@ -56,7 +56,7 @@ class DarwinCore
56
56
 
57
57
  private
58
58
 
59
- def canonical_name(a_scientific_name)
59
+ def get_canonical_name(a_scientific_name)
60
60
  if R19
61
61
  a_scientific_name.force_encoding('utf-8')
62
62
  end
@@ -84,38 +84,47 @@ class DarwinCore
84
84
  def add_synonym_from_core(taxon_id, row)
85
85
  taxon = @res[row[taxon_id]] ? @res[row[taxon_id]] : @res[row[taxon_id]] = DarwinCore::TaxonNormalized.new
86
86
  taxon.synonyms << SynonymNormalized.new(
87
- row[@core[:scientificname]],
88
- canonical_name(row[@core[:scientificname]]),
89
- @core[:taxonomicstatus] ? row[@core[:taxonomicstatus]] : nil)
87
+ row[@core_fields[:scientificname]],
88
+ row[@core_fields[:canonicalname]],
89
+ @core_fields[:taxonomicstatus] ? row[@core_fields[:taxonomicstatus]] : nil)
90
+ end
91
+
92
+ def set_scientific_name(row, fields)
93
+ canonical_name = fields[:scientificnameauthorship] ? row[fields[:scientificname]] : get_canonical_name(row[fields[:scientificname]])
94
+ fields[:canonicalname] = row.size
95
+ row << canonical_name
96
+ scientific_name = (fields[:scientificnameauthorship] && row[fields[:scientificnameauthorship]].to_s.strip != '') ? row[fields[:scientificname]].strip + ' ' + row[fields[:scientificnameauthorship]].strip : row[fields[:scientificname]].strip
97
+ row[fields[:scientificname]] = scientific_name
90
98
  end
91
99
 
92
100
  def ingest_core
93
- raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core[:id] && @core[:scientificname])
101
+ raise RuntimeError, "Darwin Core core fields must contain taxon id and scientific name" unless (@core_fields[:id] && @core_fields[:scientificname])
94
102
  puts "Reading core information" if @verbose
95
103
  rows = @dwc.core.read[0]
96
104
  puts "Ingesting information from the core" if @verbose
97
105
  rows.each_with_index do |r, i|
98
106
  count = i + 1
107
+ set_scientific_name(r, @core_fields)
99
108
  puts "Ingesting %s'th record" % count if @verbose and count % @verbose_count == 0
100
109
  #core has AcceptedNameUsageId
101
- if @core[:acceptednameusageid] && r[@core[:acceptednameusageid]] && r[@core[:acceptednameusageid]] != r[@core[:id]]
102
- add_synonym_from_core(@core[:acceptednameusageid], r)
103
- elsif !@core[:acceptednameusageid] && status_synonym?(r[@core[:taxonomicstatus]])
110
+ if @core_fields[:acceptednameusageid] && r[@core_fields[:acceptednameusageid]] && r[@core_fields[:acceptednameusageid]] != r[@core_fields[:id]]
111
+ add_synonym_from_core(@core_fields[:acceptednameusageid], r)
112
+ elsif !@core_fields[:acceptednameusageid] && status_synonym?(r[@core_fields[:taxonomicstatus]])
104
113
  add_synonym_from_core(parent_id, r)
105
114
  else
106
- taxon = @res[r[@core[:id]]] ? @res[r[@core[:id]]] : @res[r[@core[:id]]] = DarwinCore::TaxonNormalized.new
107
- taxon.id = r[@core[:id]]
108
- taxon.current_name = r[@core[:scientificname]]
109
- taxon.current_name_canonical = canonical_name(r[@core[:scientificname]])
115
+ taxon = @res[r[@core_fields[:id]]] ? @res[r[@core_fields[:id]]] : @res[r[@core_fields[:id]]] = DarwinCore::TaxonNormalized.new
116
+ taxon.id = r[@core_fields[:id]]
117
+ taxon.current_name = r[@core_fields[:scientificname]]
118
+ taxon.current_name_canonical = r[@core_fields[:canonicalname]]
110
119
  taxon.parent_id = r[parent_id]
111
- taxon.rank = r[@core[:taxonrank]] if @core[:taxonrank]
112
- taxon.status = r[@core[:taxonomicstatus]] if @core[:taxonomicstatus]
120
+ taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
121
+ taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
113
122
  end
114
123
  end
115
124
  end
116
125
 
117
126
  def parent_id
118
- parent_id_field = @core[:highertaxonid] || @core[:parentnameusageid]
127
+ parent_id_field = @core_fields[:highertaxonid] || @core_fields[:parentnameusageid]
119
128
  end
120
129
 
121
130
  def calculate_classification_path
@@ -172,10 +181,11 @@ class DarwinCore
172
181
  ext, fields = *extension
173
182
  ext.read[0].each_with_index do |r, i|
174
183
  count = i + 1
184
+ set_scientific_name(r, fields)
175
185
  puts "Ingesting %s'th record" % count if @verbose && count % @verbose_count == 0
176
186
  @res[r[fields[:id]]].synonyms << SynonymNormalized.new(
177
187
  r[fields[:scientificname]],
178
- canonical_name(r[fields[:scientificname]]),
188
+ r[fields[:canonicalname]],
179
189
  fields[:taxonomicstatus] ? r[fields[:taxonomicstatus]] : nil)
180
190
  end
181
191
  end
@@ -98,6 +98,17 @@ describe DarwinCore do
98
98
  norm = dwc.normalize_classification
99
99
  norm.select { |k,v| !v.synonyms.empty? }.map { |k,v| v.synonyms }.size.should > 0
100
100
  end
101
+
102
+ it "should be able work with files which have scientificNameAuthorship" do
103
+ file = File.join(@file_dir, 'sci_name_authorship.tar.gz')
104
+ dwc = DarwinCore.new(file)
105
+ $lala = 1
106
+ norm = dwc.normalize_classification
107
+ taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
108
+ taxa.size.should == 507
109
+ syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
110
+ syn.size.should == 50
111
+ end
101
112
  end
102
113
 
103
114
  end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 19
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
8
  - 4
8
- - 13
9
- version: 0.4.13
9
+ - 14
10
+ version: 0.4.14
10
11
  platform: ruby
11
12
  authors:
12
13
  - Dmitry Mozzherin
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-10-07 00:00:00 -04:00
18
+ date: 2010-10-08 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -25,6 +26,7 @@ dependencies:
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 3
28
30
  segments:
29
31
  - 0
30
32
  version: "0"
@@ -38,6 +40,7 @@ dependencies:
38
40
  requirements:
39
41
  - - ">="
40
42
  - !ruby/object:Gem::Version
43
+ hash: 13
41
44
  segments:
42
45
  - 1
43
46
  - 2
@@ -53,6 +56,7 @@ dependencies:
53
56
  requirements:
54
57
  - - ">="
55
58
  - !ruby/object:Gem::Version
59
+ hash: 3
56
60
  segments:
57
61
  - 0
58
62
  version: "0"
@@ -104,6 +108,7 @@ files:
104
108
  - spec/files/junk_dir_inside.zip
105
109
  - spec/files/meta.xml
106
110
  - spec/files/minimal.tar.gz
111
+ - spec/files/sci_name_authorship.tar.gz
107
112
  - spec/files/synonyms_in_core_accepted_name_field.tar.gz
108
113
  - spec/files/synonyms_in_extension.tar.gz
109
114
  - spec/files/uncompressed
@@ -125,6 +130,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
125
130
  requirements:
126
131
  - - ">="
127
132
  - !ruby/object:Gem::Version
133
+ hash: 3
128
134
  segments:
129
135
  - 0
130
136
  version: "0"
@@ -133,6 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
139
  requirements:
134
140
  - - ">="
135
141
  - !ruby/object:Gem::Version
142
+ hash: 3
136
143
  segments:
137
144
  - 0
138
145
  version: "0"