dwc-archive 0.5.14 → 0.5.15
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +13 -2
- data/spec/files/language_locality.tar.gz +0 -0
- data/spec/files/meta.xml +19 -19
- data/spec/lib/dwc-archive_spec.rb +13 -1
- metadata +5 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.15
|
@@ -19,7 +19,7 @@ class DarwinCore
|
|
19
19
|
end
|
20
20
|
|
21
21
|
class SynonymNormalized < Struct.new(:name, :canonical_name, :status);end
|
22
|
-
class VernacularNormalized < Struct.new(:name, :language);end
|
22
|
+
class VernacularNormalized < Struct.new(:name, :language, :locality);end
|
23
23
|
|
24
24
|
class ClassificationNormalizer
|
25
25
|
attr_reader :error_names, :tree, :normalized_data
|
@@ -210,9 +210,20 @@ class DarwinCore
|
|
210
210
|
ext, fields = *extension
|
211
211
|
ext.read do |rows|
|
212
212
|
rows[0].each do |r|
|
213
|
+
|
214
|
+
language = nil
|
215
|
+
if fields[:language]
|
216
|
+
language = r[fields[:language]]
|
217
|
+
elsif fields[:languagecode]
|
218
|
+
language = r[fields[:languagecode]]
|
219
|
+
end
|
220
|
+
|
221
|
+
locality = fields[:locality] ? r[fields[:locality]] : nil
|
222
|
+
|
213
223
|
vernacular = VernacularNormalized.new(
|
214
224
|
r[fields[:vernacularname]],
|
215
|
-
|
225
|
+
language,
|
226
|
+
locality)
|
216
227
|
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
217
228
|
add_name_string(vernacular.name)
|
218
229
|
end
|
Binary file
|
data/spec/files/meta.xml
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
2
|
<archive xmlns="http://rs.tdwg.org/dwc/text/">
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
3
|
+
<core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/DarwinCore">
|
4
|
+
<files>
|
5
|
+
<location>DarwinCore.txt</location>
|
6
|
+
</files>
|
7
|
+
<id index="0" term="http://rs.tdwg.org/dwc/terms/TaxonID"></id>
|
8
|
+
<field index="1" term="http://purl.org/dc/terms/source"></field>
|
9
|
+
<field index="2" term="http://rs.tdwg.org/dwc/terms/ScientificName"></field>
|
10
|
+
<field index="3" term="http://rs.tdwg.org/dwc/terms/HigherTaxonID"></field>
|
11
|
+
<field index="4" term="http://rs.tdwg.org/dwc/terms/TaxonRank"></field>
|
12
|
+
<field index="5" term="http://rs.tdwg.org/dwc/terms/TaxonomicStatus"></field>
|
13
|
+
</core>
|
14
|
+
<extension encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/ipt/terms/1.0/VernacularName">
|
15
|
+
<files>
|
16
|
+
<location>VernacularName.txt</location>
|
17
|
+
</files>
|
18
|
+
<coreid index="0"></coreid>
|
19
|
+
<field index="1" term="http://rs.gbif.org/ecat/terms/vernacularName"></field>
|
20
|
+
<field index="2" term="http://rs.gbif.org/thesaurus/languageCode"></field>
|
21
|
+
</extension>
|
22
22
|
</archive>
|
@@ -96,7 +96,9 @@ describe DarwinCore do
|
|
96
96
|
file = File.join(@file_dir, 'data.tar.gz')
|
97
97
|
dwc = DarwinCore.new(file)
|
98
98
|
norm = dwc.normalize_classification
|
99
|
-
norm.select { |k,v| !v.synonyms.empty? }
|
99
|
+
nodes_with_syn = norm.select { |k,v| !v.synonyms.empty? }
|
100
|
+
nodes_with_syn.map { |k,v| v.synonyms }.size.should > 0
|
101
|
+
nodes_with_syn.first[1].synonyms.first.status.should == 'synonym'
|
100
102
|
end
|
101
103
|
|
102
104
|
it "should be able work with files which have scientificNameAuthorship" do
|
@@ -109,6 +111,16 @@ describe DarwinCore do
|
|
109
111
|
syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
|
110
112
|
syn.size.should == 50
|
111
113
|
end
|
114
|
+
|
115
|
+
it "should be able to get language and locality fields for vernacular names" do
|
116
|
+
file = File.join(@file_dir, 'language_locality.tar.gz')
|
117
|
+
dwc = DarwinCore.new(file)
|
118
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
119
|
+
cn.normalize
|
120
|
+
vn = cn.normalized_data['leptogastrinae:tid:42'].vernacular_names.first
|
121
|
+
vn.language.should == 'en'
|
122
|
+
vn.locality.should == 'New England'
|
123
|
+
end
|
112
124
|
end
|
113
125
|
|
114
126
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 15
|
10
|
+
version: 0.5.15
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-05-
|
18
|
+
date: 2011-05-26 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- spec/files/flat_list.tar.gz
|
108
108
|
- spec/files/invalid.tar.gz
|
109
109
|
- spec/files/junk_dir_inside.zip
|
110
|
+
- spec/files/language_locality.tar.gz
|
110
111
|
- spec/files/meta.xml
|
111
112
|
- spec/files/minimal.tar.gz
|
112
113
|
- spec/files/sci_name_authorship.tar.gz
|