dwc-archive 0.5.14 → 0.5.15

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.14
1
+ 0.5.15
@@ -19,7 +19,7 @@ class DarwinCore
19
19
  end
20
20
 
21
21
  class SynonymNormalized < Struct.new(:name, :canonical_name, :status);end
22
- class VernacularNormalized < Struct.new(:name, :language);end
22
+ class VernacularNormalized < Struct.new(:name, :language, :locality);end
23
23
 
24
24
  class ClassificationNormalizer
25
25
  attr_reader :error_names, :tree, :normalized_data
@@ -210,9 +210,20 @@ class DarwinCore
210
210
  ext, fields = *extension
211
211
  ext.read do |rows|
212
212
  rows[0].each do |r|
213
+
214
+ language = nil
215
+ if fields[:language]
216
+ language = r[fields[:language]]
217
+ elsif fields[:languagecode]
218
+ language = r[fields[:languagecode]]
219
+ end
220
+
221
+ locality = fields[:locality] ? r[fields[:locality]] : nil
222
+
213
223
  vernacular = VernacularNormalized.new(
214
224
  r[fields[:vernacularname]],
215
- fields[:languagecode] ? r[fields[:languagecode]] : nil)
225
+ language,
226
+ locality)
216
227
  @normalized_data[r[fields[:id]]].vernacular_names << vernacular
217
228
  add_name_string(vernacular.name)
218
229
  end
Binary file
data/spec/files/meta.xml CHANGED
@@ -1,22 +1,22 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <archive xmlns="http://rs.tdwg.org/dwc/text/">
3
- <core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/DarwinCore">
4
- <files>
5
- <location>DarwinCore.txt</location>
6
- </files>
7
- <id index="0" term="http://rs.tdwg.org/dwc/terms/TaxonID"/>
8
- <field index="1" term="http://purl.org/dc/terms/source"/>
9
- <field index="2" term="http://rs.tdwg.org/dwc/terms/ScientificName"/>
10
- <field index="3" term="http://rs.tdwg.org/dwc/terms/HigherTaxonID"/>
11
- <field index="4" term="http://rs.tdwg.org/dwc/terms/TaxonRank"/>
12
- <field index="5" term="http://rs.tdwg.org/dwc/terms/TaxonomicStatus"/>
13
- </core>
14
- <extension encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/ipt/terms/1.0/VernacularName">
15
- <files>
16
- <location>VernacularName.txt</location>
17
- </files>
18
- <coreid index="0"/>
19
- <field index="1" term="http://rs.gbif.org/ecat/terms/vernacularName"/>
20
- <field index="2" term="http://rs.gbif.org/thesaurus/languageCode"/>
21
- </extension>
3
+ <core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/DarwinCore">
4
+ <files>
5
+ <location>DarwinCore.txt</location>
6
+ </files>
7
+ <id index="0" term="http://rs.tdwg.org/dwc/terms/TaxonID"></id>
8
+ <field index="1" term="http://purl.org/dc/terms/source"></field>
9
+ <field index="2" term="http://rs.tdwg.org/dwc/terms/ScientificName"></field>
10
+ <field index="3" term="http://rs.tdwg.org/dwc/terms/HigherTaxonID"></field>
11
+ <field index="4" term="http://rs.tdwg.org/dwc/terms/TaxonRank"></field>
12
+ <field index="5" term="http://rs.tdwg.org/dwc/terms/TaxonomicStatus"></field>
13
+ </core>
14
+ <extension encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/ipt/terms/1.0/VernacularName">
15
+ <files>
16
+ <location>VernacularName.txt</location>
17
+ </files>
18
+ <coreid index="0"></coreid>
19
+ <field index="1" term="http://rs.gbif.org/ecat/terms/vernacularName"></field>
20
+ <field index="2" term="http://rs.gbif.org/thesaurus/languageCode"></field>
21
+ </extension>
22
22
  </archive>
@@ -96,7 +96,9 @@ describe DarwinCore do
96
96
  file = File.join(@file_dir, 'data.tar.gz')
97
97
  dwc = DarwinCore.new(file)
98
98
  norm = dwc.normalize_classification
99
- norm.select { |k,v| !v.synonyms.empty? }.map { |k,v| v.synonyms }.size.should > 0
99
+ nodes_with_syn = norm.select { |k,v| !v.synonyms.empty? }
100
+ nodes_with_syn.map { |k,v| v.synonyms }.size.should > 0
101
+ nodes_with_syn.first[1].synonyms.first.status.should == 'synonym'
100
102
  end
101
103
 
102
104
  it "should be able work with files which have scientificNameAuthorship" do
@@ -109,6 +111,16 @@ describe DarwinCore do
109
111
  syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
110
112
  syn.size.should == 50
111
113
  end
114
+
115
+ it "should be able to get language and locality fields for vernacular names" do
116
+ file = File.join(@file_dir, 'language_locality.tar.gz')
117
+ dwc = DarwinCore.new(file)
118
+ cn = DarwinCore::ClassificationNormalizer.new(dwc)
119
+ cn.normalize
120
+ vn = cn.normalized_data['leptogastrinae:tid:42'].vernacular_names.first
121
+ vn.language.should == 'en'
122
+ vn.locality.should == 'New England'
123
+ end
112
124
  end
113
125
 
114
126
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 5
9
- - 14
10
- version: 0.5.14
9
+ - 15
10
+ version: 0.5.15
11
11
  platform: ruby
12
12
  authors:
13
13
  - Dmitry Mozzherin
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-25 00:00:00 -04:00
18
+ date: 2011-05-26 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -107,6 +107,7 @@ files:
107
107
  - spec/files/flat_list.tar.gz
108
108
  - spec/files/invalid.tar.gz
109
109
  - spec/files/junk_dir_inside.zip
110
+ - spec/files/language_locality.tar.gz
110
111
  - spec/files/meta.xml
111
112
  - spec/files/minimal.tar.gz
112
113
  - spec/files/sci_name_authorship.tar.gz