dwc-archive 0.5.13 → 0.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +22 -6
- data/VERSION +1 -1
- data/lib/dwc-archive/classification_normalizer.rb +1 -0
- metadata +8 -10
data/README.rdoc
CHANGED
@@ -25,7 +25,7 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
25
25
|
# read content of the core data file into memory or used with a block
|
26
26
|
# it returns array of arrays of data
|
27
27
|
# rows that had a wrong encoding will be collected into errors array
|
28
|
-
data, errors = dwc.core.read
|
28
|
+
data, errors = dwc.core.read
|
29
29
|
|
30
30
|
# read content using a block with getting back results in sets 100 rows each
|
31
31
|
results = []
|
@@ -45,18 +45,34 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
45
45
|
results << [tail_data, tail_errors]
|
46
46
|
|
47
47
|
# normalize names in classification collecting together synonyms, canonical names,
|
48
|
-
# vernacular names and associating paths to taxons in a classification
|
48
|
+
# vernacular names and associating paths to taxons in a classification
|
49
49
|
# distributed as DwCA file
|
50
50
|
# NOTE: this functionality requires biodiversity gem for ruby 1.8.x and
|
51
51
|
# biodiversity19 gem for ruby 1.9.x
|
52
52
|
|
53
53
|
result = dwc.normalize_classification
|
54
54
|
|
55
|
+
# for a finer control over normalization:
|
56
|
+
|
57
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
58
|
+
cn.normalize
|
59
|
+
|
60
|
+
# to get a flat hash of nodes with attached vernacular names and synonyms
|
61
|
+
normalized_data = cn.normalized_data
|
62
|
+
|
63
|
+
# to get a representation of tree organization as a hash
|
64
|
+
classification_tree = cn.tree
|
65
|
+
|
66
|
+
# to get list of all name strings used as scientific or vernacular names
|
67
|
+
all_name_strings = cn.name_strings
|
68
|
+
|
69
|
+
# to get list of errors generated during the normalization
|
70
|
+
errors = cn.error_names
|
55
71
|
|
56
72
|
DarwinCore.clean_all # remove all expanded archives
|
57
73
|
|
58
74
|
== Creating a DarwinCore Archive file
|
59
|
-
|
75
|
+
|
60
76
|
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
61
77
|
|
62
78
|
core = [
|
@@ -77,7 +93,7 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
77
93
|
]
|
78
94
|
|
79
95
|
synonyms = [
|
80
|
-
["http://rs.tdwg.org/dwc/terms/TaxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"],
|
96
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID", "http://rs.tdwg.org/dwc/terms/scientificName", "http://rs.tdwg.org/dwc/terms/taxonomicStatus"],
|
81
97
|
[1, "Betila Linnaeus, 1753", 'misspelling']
|
82
98
|
]
|
83
99
|
|
@@ -89,7 +105,7 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
89
105
|
:last_name => 'Doe',
|
90
106
|
:email => 'jdoe@example.com' },
|
91
107
|
{ :first_name => 'Jane',
|
92
|
-
:last_name => 'Doe',
|
108
|
+
:last_name => 'Doe',
|
93
109
|
:email => 'jane@example.com' }
|
94
110
|
],
|
95
111
|
:abstract => 'test classification',
|
@@ -106,7 +122,7 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
106
122
|
|
107
123
|
|
108
124
|
== Note on Patches/Pull Requests
|
109
|
-
|
125
|
+
|
110
126
|
* Fork the project.
|
111
127
|
* Make your feature addition or bug fix.
|
112
128
|
* Add tests for it. This is important so I don't break it in a
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.14
|
@@ -88,6 +88,7 @@ class DarwinCore
|
|
88
88
|
end
|
89
89
|
|
90
90
|
def set_scientific_name(row, fields)
|
91
|
+
row[fields[:scientificname]] = 'N/A' unless row[fields[:scientificname]]
|
91
92
|
canonical_name = fields[:scientificnameauthorship] ? row[fields[:scientificname]] : get_canonical_name(row[fields[:scientificname]])
|
92
93
|
fields[:canonicalname] = row.size
|
93
94
|
row << canonical_name
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 23
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 5
|
9
|
-
-
|
10
|
-
version: 0.5.
|
9
|
+
- 14
|
10
|
+
version: 0.5.14
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-05-25 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -147,11 +147,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
147
|
requirements: []
|
148
148
|
|
149
149
|
rubyforge_project:
|
150
|
-
rubygems_version: 1.
|
150
|
+
rubygems_version: 1.5.2
|
151
151
|
signing_key:
|
152
152
|
specification_version: 3
|
153
153
|
summary: Handler of Darwin Core Archive files
|
154
|
-
test_files:
|
155
|
-
|
156
|
-
- spec/lib/xml_reader_soec.rb
|
157
|
-
- spec/spec_helper.rb
|
154
|
+
test_files: []
|
155
|
+
|