dwc-archive 0.5.14 → 0.5.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +13 -2
- data/spec/files/language_locality.tar.gz +0 -0
- data/spec/files/meta.xml +19 -19
- data/spec/lib/dwc-archive_spec.rb +13 -1
- metadata +5 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.15
|
@@ -19,7 +19,7 @@ class DarwinCore
|
|
19
19
|
end
|
20
20
|
|
21
21
|
class SynonymNormalized < Struct.new(:name, :canonical_name, :status);end
|
22
|
-
class VernacularNormalized < Struct.new(:name, :language);end
|
22
|
+
class VernacularNormalized < Struct.new(:name, :language, :locality);end
|
23
23
|
|
24
24
|
class ClassificationNormalizer
|
25
25
|
attr_reader :error_names, :tree, :normalized_data
|
@@ -210,9 +210,20 @@ class DarwinCore
|
|
210
210
|
ext, fields = *extension
|
211
211
|
ext.read do |rows|
|
212
212
|
rows[0].each do |r|
|
213
|
+
|
214
|
+
language = nil
|
215
|
+
if fields[:language]
|
216
|
+
language = r[fields[:language]]
|
217
|
+
elsif fields[:languagecode]
|
218
|
+
language = r[fields[:languagecode]]
|
219
|
+
end
|
220
|
+
|
221
|
+
locality = fields[:locality] ? r[fields[:locality]] : nil
|
222
|
+
|
213
223
|
vernacular = VernacularNormalized.new(
|
214
224
|
r[fields[:vernacularname]],
|
215
|
-
|
225
|
+
language,
|
226
|
+
locality)
|
216
227
|
@normalized_data[r[fields[:id]]].vernacular_names << vernacular
|
217
228
|
add_name_string(vernacular.name)
|
218
229
|
end
|
Binary file
|
data/spec/files/meta.xml
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
2
|
<archive xmlns="http://rs.tdwg.org/dwc/text/">
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
3
|
+
<core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/DarwinCore">
|
4
|
+
<files>
|
5
|
+
<location>DarwinCore.txt</location>
|
6
|
+
</files>
|
7
|
+
<id index="0" term="http://rs.tdwg.org/dwc/terms/TaxonID"></id>
|
8
|
+
<field index="1" term="http://purl.org/dc/terms/source"></field>
|
9
|
+
<field index="2" term="http://rs.tdwg.org/dwc/terms/ScientificName"></field>
|
10
|
+
<field index="3" term="http://rs.tdwg.org/dwc/terms/HigherTaxonID"></field>
|
11
|
+
<field index="4" term="http://rs.tdwg.org/dwc/terms/TaxonRank"></field>
|
12
|
+
<field index="5" term="http://rs.tdwg.org/dwc/terms/TaxonomicStatus"></field>
|
13
|
+
</core>
|
14
|
+
<extension encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/ipt/terms/1.0/VernacularName">
|
15
|
+
<files>
|
16
|
+
<location>VernacularName.txt</location>
|
17
|
+
</files>
|
18
|
+
<coreid index="0"></coreid>
|
19
|
+
<field index="1" term="http://rs.gbif.org/ecat/terms/vernacularName"></field>
|
20
|
+
<field index="2" term="http://rs.gbif.org/thesaurus/languageCode"></field>
|
21
|
+
</extension>
|
22
22
|
</archive>
|
@@ -96,7 +96,9 @@ describe DarwinCore do
|
|
96
96
|
file = File.join(@file_dir, 'data.tar.gz')
|
97
97
|
dwc = DarwinCore.new(file)
|
98
98
|
norm = dwc.normalize_classification
|
99
|
-
norm.select { |k,v| !v.synonyms.empty? }
|
99
|
+
nodes_with_syn = norm.select { |k,v| !v.synonyms.empty? }
|
100
|
+
nodes_with_syn.map { |k,v| v.synonyms }.size.should > 0
|
101
|
+
nodes_with_syn.first[1].synonyms.first.status.should == 'synonym'
|
100
102
|
end
|
101
103
|
|
102
104
|
it "should be able work with files which have scientificNameAuthorship" do
|
@@ -109,6 +111,16 @@ describe DarwinCore do
|
|
109
111
|
syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
|
110
112
|
syn.size.should == 50
|
111
113
|
end
|
114
|
+
|
115
|
+
it "should be able to get language and locality fields for vernacular names" do
|
116
|
+
file = File.join(@file_dir, 'language_locality.tar.gz')
|
117
|
+
dwc = DarwinCore.new(file)
|
118
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
119
|
+
cn.normalize
|
120
|
+
vn = cn.normalized_data['leptogastrinae:tid:42'].vernacular_names.first
|
121
|
+
vn.language.should == 'en'
|
122
|
+
vn.locality.should == 'New England'
|
123
|
+
end
|
112
124
|
end
|
113
125
|
|
114
126
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 15
|
10
|
+
version: 0.5.15
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-05-
|
18
|
+
date: 2011-05-26 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -107,6 +107,7 @@ files:
|
|
107
107
|
- spec/files/flat_list.tar.gz
|
108
108
|
- spec/files/invalid.tar.gz
|
109
109
|
- spec/files/junk_dir_inside.zip
|
110
|
+
- spec/files/language_locality.tar.gz
|
110
111
|
- spec/files/meta.xml
|
111
112
|
- spec/files/minimal.tar.gz
|
112
113
|
- spec/files/sci_name_authorship.tar.gz
|