dwc-archive 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rvmrc +1 -1
- data/.travis.yml +14 -0
- data/Gemfile +13 -16
- data/Gemfile.lock +122 -122
- data/{README.rdoc → README.md} +48 -26
- data/VERSION +1 -1
- data/lib/dwc-archive/core.rb +3 -3
- data/lib/dwc-archive/expander.rb +4 -2
- data/lib/dwc-archive/ingester.rb +26 -8
- metadata +17 -96
data/.rvmrc
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
rvm use ruby-1.9.3-
|
|
1
|
+
rvm use ruby-1.9.3-p392@dwc-archive --create
|
data/.travis.yml
ADDED
data/Gemfile
CHANGED
|
@@ -1,20 +1,17 @@
|
|
|
1
|
-
source
|
|
2
|
-
# Add dependencies required to use your gem here.
|
|
3
|
-
# Example:
|
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
|
1
|
+
source 'https://rubygems.org'
|
|
5
2
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
gem "nokogiri", "~> 1.5.0"
|
|
10
|
-
gem "parsley-store", "~> 0.3.0"
|
|
11
|
-
gem "archive-tar-minitar"
|
|
3
|
+
gem 'nokogiri', '~> 1.5'
|
|
4
|
+
gem 'parsley-store', '~> 0.3.1'
|
|
5
|
+
gem 'archive-tar-minitar', '~> 0.5'
|
|
12
6
|
|
|
13
7
|
group :development do
|
|
14
|
-
gem
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
gem
|
|
19
|
-
gem
|
|
8
|
+
gem 'debugger', '~> 1.3'
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
group :test do
|
|
12
|
+
gem 'rspec', '~> 2.13'
|
|
13
|
+
gem 'cucumber', '~> 1.3'
|
|
14
|
+
gem 'bundler', '~> 1.3'
|
|
15
|
+
gem 'jeweler', '~> 1.8'
|
|
16
|
+
gem 'jazz_hands', '~> 0.5'
|
|
20
17
|
end
|
data/Gemfile.lock
CHANGED
|
@@ -1,155 +1,155 @@
|
|
|
1
1
|
GEM
|
|
2
|
-
remote:
|
|
2
|
+
remote: https://rubygems.org/
|
|
3
3
|
specs:
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
rack
|
|
12
|
-
rack-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
i18n (~> 0.
|
|
19
|
-
|
|
4
|
+
abstract (1.0.0)
|
|
5
|
+
actionpack (3.0.8)
|
|
6
|
+
activemodel (= 3.0.8)
|
|
7
|
+
activesupport (= 3.0.8)
|
|
8
|
+
builder (~> 2.1.2)
|
|
9
|
+
erubis (~> 2.6.6)
|
|
10
|
+
i18n (~> 0.5.0)
|
|
11
|
+
rack (~> 1.2.1)
|
|
12
|
+
rack-mount (~> 0.6.14)
|
|
13
|
+
rack-test (~> 0.5.7)
|
|
14
|
+
tzinfo (~> 0.3.23)
|
|
15
|
+
activemodel (3.0.8)
|
|
16
|
+
activesupport (= 3.0.8)
|
|
17
|
+
builder (~> 2.1.2)
|
|
18
|
+
i18n (~> 0.5.0)
|
|
19
|
+
activesupport (3.0.8)
|
|
20
20
|
archive-tar-minitar (0.5.2)
|
|
21
|
-
awesome_print (1.0
|
|
22
|
-
binding_of_caller (0.
|
|
23
|
-
|
|
21
|
+
awesome_print (1.1.0)
|
|
22
|
+
binding_of_caller (0.7.1)
|
|
23
|
+
debug_inspector (>= 0.0.1)
|
|
24
|
+
biodiversity (3.0.1)
|
|
24
25
|
parallel
|
|
25
|
-
parallel
|
|
26
|
-
|
|
26
|
+
parallel (~> 0.6)
|
|
27
|
+
rake (~> 10.0)
|
|
27
28
|
treetop
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
treetop (~> 1.4)
|
|
30
|
+
unicode_utils (~> 1.4)
|
|
31
|
+
builder (2.1.2)
|
|
32
|
+
coderay (1.0.9)
|
|
30
33
|
columnize (0.3.6)
|
|
31
|
-
coolline (0.
|
|
32
|
-
cucumber (1.1
|
|
34
|
+
coolline (0.4.2)
|
|
35
|
+
cucumber (1.3.1)
|
|
33
36
|
builder (>= 2.1.2)
|
|
34
|
-
diff-lcs (>= 1.1.
|
|
35
|
-
gherkin (~> 2.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
debugger (1.
|
|
37
|
+
diff-lcs (>= 1.1.3)
|
|
38
|
+
gherkin (~> 2.12.0)
|
|
39
|
+
multi_json (~> 1.3)
|
|
40
|
+
debug_inspector (0.0.2)
|
|
41
|
+
debugger (1.5.0)
|
|
39
42
|
columnize (>= 0.3.1)
|
|
40
|
-
debugger-linecache (~> 1.
|
|
41
|
-
debugger-ruby_core_source (~> 1.
|
|
42
|
-
debugger-linecache (1.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
gherkin (2.
|
|
49
|
-
|
|
43
|
+
debugger-linecache (~> 1.2.0)
|
|
44
|
+
debugger-ruby_core_source (~> 1.2.0)
|
|
45
|
+
debugger-linecache (1.2.0)
|
|
46
|
+
debugger-ruby_core_source (1.2.0)
|
|
47
|
+
diff-lcs (1.2.4)
|
|
48
|
+
diffy (2.1.4)
|
|
49
|
+
erubis (2.6.6)
|
|
50
|
+
abstract (>= 1.0.0)
|
|
51
|
+
gherkin (2.12.0)
|
|
52
|
+
multi_json (~> 1.3)
|
|
50
53
|
git (1.2.5)
|
|
51
54
|
grit (2.5.0)
|
|
52
55
|
diff-lcs (~> 1.1)
|
|
53
56
|
mime-types (~> 1.15)
|
|
54
57
|
posix-spawn (~> 0.3.6)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
pry (~> 0.
|
|
64
|
-
pry-doc (~> 0.4.
|
|
65
|
-
pry-git (~> 0.2.
|
|
66
|
-
pry-
|
|
67
|
-
pry-remote (>= 0.1.
|
|
68
|
-
pry-stack_explorer (~> 0.4.
|
|
69
|
-
railties (
|
|
70
|
-
jeweler (1.
|
|
58
|
+
hirb (0.7.1)
|
|
59
|
+
i18n (0.5.0)
|
|
60
|
+
jazz_hands (0.5.0)
|
|
61
|
+
awesome_print (~> 1.1.0)
|
|
62
|
+
coderay (~> 1.0.9)
|
|
63
|
+
coolline (>= 0.4.0)
|
|
64
|
+
hirb (~> 0.7.1)
|
|
65
|
+
pry (~> 0.9.12)
|
|
66
|
+
pry-debugger (~> 0.2.2)
|
|
67
|
+
pry-doc (~> 0.4.4)
|
|
68
|
+
pry-git (~> 0.2.3)
|
|
69
|
+
pry-rails (~> 0.2.2)
|
|
70
|
+
pry-remote (>= 0.1.7)
|
|
71
|
+
pry-stack_explorer (~> 0.4.9)
|
|
72
|
+
railties (>= 3.0, < 5.0)
|
|
73
|
+
jeweler (1.8.4)
|
|
71
74
|
bundler (~> 1.0)
|
|
72
75
|
git (>= 1.2.5)
|
|
73
76
|
rake
|
|
74
|
-
|
|
75
|
-
json (1.7.
|
|
76
|
-
method_source (0.
|
|
77
|
-
mime-types (1.
|
|
78
|
-
multi_json (1.3
|
|
79
|
-
nokogiri (1.5.
|
|
80
|
-
parallel (0.
|
|
81
|
-
parsley-store (0.3.
|
|
82
|
-
|
|
83
|
-
|
|
77
|
+
rdoc
|
|
78
|
+
json (1.7.7)
|
|
79
|
+
method_source (0.8.1)
|
|
80
|
+
mime-types (1.23)
|
|
81
|
+
multi_json (1.7.3)
|
|
82
|
+
nokogiri (1.5.9)
|
|
83
|
+
parallel (0.6.4)
|
|
84
|
+
parsley-store (0.3.1)
|
|
85
|
+
biodiversity (~> 3.0.1)
|
|
86
|
+
jeweler (~> 1.8)
|
|
87
|
+
redis (~> 3.0)
|
|
84
88
|
polyglot (0.3.3)
|
|
85
89
|
posix-spawn (0.3.6)
|
|
86
|
-
pry (0.9.
|
|
90
|
+
pry (0.9.12.1)
|
|
87
91
|
coderay (~> 1.0.5)
|
|
88
|
-
method_source (~> 0.
|
|
89
|
-
slop (
|
|
90
|
-
pry-
|
|
91
|
-
|
|
92
|
-
|
|
92
|
+
method_source (~> 0.8)
|
|
93
|
+
slop (~> 3.4)
|
|
94
|
+
pry-debugger (0.2.2)
|
|
95
|
+
debugger (~> 1.3)
|
|
96
|
+
pry (~> 0.9.10)
|
|
97
|
+
pry-doc (0.4.5)
|
|
98
|
+
pry (>= 0.9)
|
|
99
|
+
yard (>= 0.8)
|
|
93
100
|
pry-git (0.2.3)
|
|
94
101
|
diffy
|
|
95
102
|
grit
|
|
96
103
|
pry (>= 0.9.8)
|
|
97
|
-
pry-
|
|
98
|
-
pry (
|
|
99
|
-
pry-remote (0.1.
|
|
100
|
-
pry (~> 0.9
|
|
101
|
-
slop (~>
|
|
102
|
-
pry-stack_explorer (0.4.
|
|
103
|
-
binding_of_caller (
|
|
104
|
-
|
|
105
|
-
rack
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
rack-test (0.6.1)
|
|
104
|
+
pry-rails (0.2.2)
|
|
105
|
+
pry (>= 0.9.10)
|
|
106
|
+
pry-remote (0.1.7)
|
|
107
|
+
pry (~> 0.9)
|
|
108
|
+
slop (~> 3.0)
|
|
109
|
+
pry-stack_explorer (0.4.9)
|
|
110
|
+
binding_of_caller (>= 0.7)
|
|
111
|
+
pry (~> 0.9.11)
|
|
112
|
+
rack (1.2.8)
|
|
113
|
+
rack-mount (0.6.14)
|
|
114
|
+
rack (>= 1.0.0)
|
|
115
|
+
rack-test (0.5.7)
|
|
110
116
|
rack (>= 1.0)
|
|
111
|
-
railties (3.
|
|
112
|
-
actionpack (= 3.
|
|
113
|
-
activesupport (= 3.
|
|
114
|
-
rack-ssl (~> 1.3.2)
|
|
117
|
+
railties (3.0.8)
|
|
118
|
+
actionpack (= 3.0.8)
|
|
119
|
+
activesupport (= 3.0.8)
|
|
115
120
|
rake (>= 0.8.7)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
rdoc (3.12)
|
|
121
|
+
thor (~> 0.14.4)
|
|
122
|
+
rake (10.0.4)
|
|
123
|
+
rdoc (4.0.1)
|
|
120
124
|
json (~> 1.4)
|
|
121
|
-
redis (3.0.
|
|
122
|
-
rspec (2.
|
|
123
|
-
rspec-core (~> 2.
|
|
124
|
-
rspec-expectations (~> 2.
|
|
125
|
-
rspec-mocks (~> 2.
|
|
126
|
-
rspec-core (2.
|
|
127
|
-
rspec-expectations (2.
|
|
128
|
-
diff-lcs (
|
|
129
|
-
rspec-mocks (2.
|
|
130
|
-
slop (
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
rack (~> 1.0)
|
|
134
|
-
tilt (~> 1.1, != 1.3.0)
|
|
135
|
-
term-ansicolor (1.0.7)
|
|
136
|
-
thor (0.15.4)
|
|
137
|
-
tilt (1.3.3)
|
|
138
|
-
treetop (1.4.10)
|
|
125
|
+
redis (3.0.4)
|
|
126
|
+
rspec (2.13.0)
|
|
127
|
+
rspec-core (~> 2.13.0)
|
|
128
|
+
rspec-expectations (~> 2.13.0)
|
|
129
|
+
rspec-mocks (~> 2.13.0)
|
|
130
|
+
rspec-core (2.13.1)
|
|
131
|
+
rspec-expectations (2.13.0)
|
|
132
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
|
133
|
+
rspec-mocks (2.13.1)
|
|
134
|
+
slop (3.4.4)
|
|
135
|
+
thor (0.14.6)
|
|
136
|
+
treetop (1.4.12)
|
|
139
137
|
polyglot
|
|
140
138
|
polyglot (>= 0.3.1)
|
|
141
|
-
|
|
139
|
+
tzinfo (0.3.37)
|
|
140
|
+
unicode_utils (1.4.0)
|
|
141
|
+
yard (0.8.6.1)
|
|
142
142
|
|
|
143
143
|
PLATFORMS
|
|
144
144
|
ruby
|
|
145
145
|
|
|
146
146
|
DEPENDENCIES
|
|
147
|
-
archive-tar-minitar
|
|
148
|
-
bundler (~> 1.
|
|
149
|
-
cucumber (~> 1.
|
|
150
|
-
debugger
|
|
151
|
-
jazz_hands
|
|
152
|
-
jeweler (~> 1.
|
|
153
|
-
nokogiri (~> 1.5
|
|
154
|
-
parsley-store (~> 0.3.
|
|
155
|
-
rspec (~> 2.
|
|
147
|
+
archive-tar-minitar (~> 0.5)
|
|
148
|
+
bundler (~> 1.3)
|
|
149
|
+
cucumber (~> 1.3)
|
|
150
|
+
debugger (~> 1.3)
|
|
151
|
+
jazz_hands (~> 0.5)
|
|
152
|
+
jeweler (~> 1.8)
|
|
153
|
+
nokogiri (~> 1.5)
|
|
154
|
+
parsley-store (~> 0.3.1)
|
|
155
|
+
rspec (~> 2.13)
|
data/{README.rdoc → README.md}
RENAMED
|
@@ -1,17 +1,22 @@
|
|
|
1
|
-
|
|
1
|
+
Darwin Core Archive
|
|
2
|
+
===================
|
|
2
3
|
|
|
3
|
-
|
|
4
|
+
[![Gem Version][1]][2]
|
|
5
|
+
[![Continuous Integration Status][3]][4]
|
|
6
|
+
[![Dependency Status][5]][6]
|
|
4
7
|
|
|
8
|
+
Darwin Core Archive format is a current standard for information exchange
|
|
9
|
+
between Global Names Architecture modules. This gem allows to work with
|
|
10
|
+
Darwin Core Archive data compressed to either zip or tar.gz files.
|
|
11
|
+
More information about Darwing Core Archive can be found on a [GBIF page:][7]
|
|
5
12
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
== Installation
|
|
9
|
-
|
|
10
|
-
Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
13
|
+
Installation
|
|
14
|
+
------------
|
|
11
15
|
|
|
12
16
|
sudo gem install dwc-archive
|
|
13
17
|
|
|
14
|
-
|
|
18
|
+
Usage
|
|
19
|
+
-----
|
|
15
20
|
|
|
16
21
|
require 'rubygems'
|
|
17
22
|
require 'dwc-archive'
|
|
@@ -30,7 +35,7 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
|
30
35
|
# rows that had a wrong encoding will be collected into errors array
|
|
31
36
|
data, errors = dwc.core.read
|
|
32
37
|
|
|
33
|
-
# read content using a block
|
|
38
|
+
# read content using a block, getting back results in sets of 100 rows each
|
|
34
39
|
results = []
|
|
35
40
|
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
|
|
36
41
|
results << [data, errors]
|
|
@@ -47,11 +52,9 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
|
47
52
|
end
|
|
48
53
|
results << [tail_data, tail_errors]
|
|
49
54
|
|
|
50
|
-
# normalize names in classification collecting together synonyms,
|
|
51
|
-
# vernacular names and associating paths to taxons
|
|
52
|
-
# distributed as DwCA file
|
|
53
|
-
# NOTE: this functionality requires biodiversity gem for ruby 1.8.x and
|
|
54
|
-
# biodiversity19 gem for ruby 1.9.x
|
|
55
|
+
# normalize names in classification collecting together synonyms,
|
|
56
|
+
# canonical names, vernacular names and associating paths to taxons
|
|
57
|
+
# in a classification distributed as DwCA file
|
|
55
58
|
|
|
56
59
|
result = dwc.normalize_classification
|
|
57
60
|
|
|
@@ -59,10 +62,11 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
|
59
62
|
|
|
60
63
|
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
|
61
64
|
cn.normalize
|
|
62
|
-
# if you don't want to generate path consisting of canonical forms
|
|
65
|
+
# if you don't want to generate path consisting of canonical forms
|
|
66
|
+
# of ancestors to a taxon
|
|
63
67
|
cn.normalize(:with_canonical_names => false)
|
|
64
68
|
|
|
65
|
-
# if you don't want to ingest information from extensions
|
|
69
|
+
# if you don't want to ingest information from extensions
|
|
66
70
|
cn.normalize(:with_extensions => false)
|
|
67
71
|
|
|
68
72
|
# to get a flat hash of nodes with attached vernacular names and synonyms
|
|
@@ -79,19 +83,24 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
|
79
83
|
|
|
80
84
|
DarwinCore.clean_all # remove all expanded archives
|
|
81
85
|
|
|
82
|
-
|
|
86
|
+
Creating a DarwinCore Archive file
|
|
87
|
+
----------------------------------
|
|
83
88
|
|
|
84
89
|
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
|
85
90
|
|
|
86
91
|
core = [
|
|
87
|
-
["http://rs.tdwg.org/dwc/terms/taxonID",
|
|
92
|
+
["http://rs.tdwg.org/dwc/terms/taxonID",
|
|
93
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
|
94
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
|
95
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"],
|
|
88
96
|
[1, 0, "Plantae", "kingdom"],
|
|
89
97
|
[2, 1, "Betula", "genus"],
|
|
90
98
|
[3, 2, "Betula verucosa", "species"]
|
|
91
99
|
]
|
|
92
100
|
|
|
93
101
|
vernacular_names = [
|
|
94
|
-
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
|
102
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
|
103
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName"],
|
|
95
104
|
[1, "Plants"],
|
|
96
105
|
[1, "Растения"],
|
|
97
106
|
[2, "Birch"],
|
|
@@ -121,28 +130,41 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
|
121
130
|
:email => 'jimdoe@example.com',
|
|
122
131
|
:url => 'http://aggregator.example.org' }],
|
|
123
132
|
:abstract => 'test classification',
|
|
124
|
-
:citation =>
|
|
133
|
+
:citation =>
|
|
134
|
+
'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
|
|
125
135
|
:url => 'http://example.com'
|
|
126
136
|
}
|
|
127
137
|
|
|
128
138
|
gen.add_core(core, 'core.txt')
|
|
129
|
-
gen.add_extension(vernacular_names,
|
|
139
|
+
gen.add_extension(vernacular_names,
|
|
140
|
+
'vernacular_names.txt',
|
|
141
|
+
true, 'http://rs.gbif.org/terms/1.0/VernacularName')
|
|
130
142
|
gen.add_meta_xml
|
|
131
143
|
gen.add_eml_xml(eml)
|
|
132
144
|
gen.pack
|
|
133
145
|
|
|
134
|
-
|
|
135
|
-
|
|
146
|
+
Note on Patches/Pull Requests
|
|
147
|
+
-----------------------------
|
|
136
148
|
|
|
137
149
|
* Fork the project.
|
|
138
150
|
* Make your feature addition or bug fix.
|
|
139
151
|
* Add tests for it. This is important so I don't break it in a
|
|
140
152
|
future version unintentionally.
|
|
141
153
|
* Commit, do not mess with rakefile, version, or history.
|
|
142
|
-
(if you want to have your own version, that is fine but bump
|
|
154
|
+
(if you want to have your own version, that is fine but bump
|
|
155
|
+
version in a commit by itself I can ignore when I pull)
|
|
143
156
|
* Send me a pull request. Bonus points for topic branches.
|
|
144
157
|
|
|
145
158
|
|
|
146
|
-
|
|
159
|
+
Copyright
|
|
160
|
+
---------
|
|
161
|
+
|
|
162
|
+
Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
|
|
147
163
|
|
|
148
|
-
|
|
164
|
+
[1]: https://badge.fury.io/rb/dwc-archive.png
|
|
165
|
+
[2]: http://badge.fury.io/rb/dwc-archive
|
|
166
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/dwc-archive.png
|
|
167
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/dwc-archive
|
|
168
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
|
|
169
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
|
|
170
|
+
[7]: http://bit.ly/2IxcBA
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.9.
|
|
1
|
+
0.9.4
|
data/lib/dwc-archive/core.rb
CHANGED
|
@@ -8,9 +8,9 @@ class DarwinCore
|
|
|
8
8
|
@path = @archive.files_path
|
|
9
9
|
root_key = @archive.meta.keys[0]
|
|
10
10
|
@data = @archive.meta[root_key][:core]
|
|
11
|
-
raise DarwinCore::CoreFileError.new("Cannot
|
|
12
|
-
@id = @data[:id][:attributes]
|
|
13
|
-
raise DarwinCore::CoreFileError.new("Cannot find core identifier") unless @id
|
|
11
|
+
raise DarwinCore::CoreFileError.new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
|
|
12
|
+
@id = @data[:id][:attributes]
|
|
13
|
+
# raise DarwinCore::CoreFileError.new("Cannot find core identifier") unless @id
|
|
14
14
|
get_attributes(DarwinCore::CoreFileError)
|
|
15
15
|
end
|
|
16
16
|
end
|
data/lib/dwc-archive/expander.rb
CHANGED
|
@@ -3,7 +3,7 @@ class DarwinCore
|
|
|
3
3
|
def initialize(archive_path, tmp_dir)
|
|
4
4
|
@archive_path = archive_path
|
|
5
5
|
@tmp_dir = tmp_dir
|
|
6
|
-
@path = File.join(tmp_dir, 'dwc_' + rand(
|
|
6
|
+
@path = File.join(tmp_dir, 'dwc_' + rand(10_000_000_000).to_s)
|
|
7
7
|
@unpacker = get_unpacker
|
|
8
8
|
end
|
|
9
9
|
|
|
@@ -11,7 +11,9 @@ class DarwinCore
|
|
|
11
11
|
clean
|
|
12
12
|
raise DarwinCore::FileNotFoundError unless File.exists?(@archive_path)
|
|
13
13
|
success = @unpacker.call(@path, @archive_path) if @unpacker
|
|
14
|
-
(@unpacker && success && $?.exitstatus == 0) ?
|
|
14
|
+
(@unpacker && success && $?.exitstatus == 0) ?
|
|
15
|
+
success :
|
|
16
|
+
(clean; raise DarwinCore::UnpackingError)
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def path
|
data/lib/dwc-archive/ingester.rb
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
class DarwinCore
|
|
3
3
|
module Ingester
|
|
4
4
|
attr_reader :data, :properties, :encoding, :fields_separator, :size
|
|
5
|
-
attr_reader :file_path, :fields, :line_separator,
|
|
5
|
+
attr_reader :file_path, :fields, :line_separator,
|
|
6
|
+
:quote_character, :ignore_headers
|
|
6
7
|
|
|
7
8
|
def size
|
|
8
9
|
@size ||= get_size
|
|
@@ -22,7 +23,9 @@ class DarwinCore
|
|
|
22
23
|
index_fix = 0; next if @ignore_headers && i == 0
|
|
23
24
|
min_size > r.size ? errors << r : process_csv_row(res, errors, r)
|
|
24
25
|
if (i + index_fix) % batch_size == 0
|
|
25
|
-
DarwinCore.logger_write(@dwc.object_id,
|
|
26
|
+
DarwinCore.logger_write(@dwc.object_id,
|
|
27
|
+
"Ingested %s records from %s" %
|
|
28
|
+
[(i + index_fix), name])
|
|
26
29
|
if block_given?
|
|
27
30
|
yield [res, errors]
|
|
28
31
|
res = []
|
|
@@ -42,25 +45,40 @@ class DarwinCore
|
|
|
42
45
|
def process_csv_row(result, errors, row)
|
|
43
46
|
str = row.join('')
|
|
44
47
|
str = str.force_encoding('utf-8')
|
|
45
|
-
str.encoding.name ==
|
|
48
|
+
if str.encoding.name == 'UTF-8' && str.valid_encoding?
|
|
49
|
+
result << row.map { |f| f.nil? ? nil : f.force_encoding('utf-8') }
|
|
50
|
+
else
|
|
51
|
+
errors << row
|
|
52
|
+
end
|
|
46
53
|
end
|
|
47
54
|
|
|
48
55
|
def get_attributes(exception)
|
|
49
56
|
@properties = @data[:attributes]
|
|
50
57
|
@encoding = @properties[:encoding] || 'UTF-8'
|
|
51
|
-
|
|
58
|
+
err_msg = 'No support for encodings other ' +
|
|
59
|
+
'than utf-8 or utf-16 at the moment'
|
|
60
|
+
encodings = ['utf-8', 'utf8', 'utf-16', 'utf16']
|
|
61
|
+
unless encodings.include? @encoding.downcase
|
|
62
|
+
raise DarwinCore::EncodingError.new(err_msg)
|
|
63
|
+
end
|
|
52
64
|
@field_separator = get_field_separator
|
|
53
65
|
@quote_character = @properties[:fieldsEnclosedBy] || ""
|
|
54
|
-
@line_separator = @properties[:linesTerminatedBy] ||
|
|
55
|
-
@ignore_headers = @properties[:ignoreHeaderLines] ?
|
|
66
|
+
@line_separator = @properties[:linesTerminatedBy] || '\n'
|
|
67
|
+
@ignore_headers = @properties[:ignoreHeaderLines] ?
|
|
68
|
+
[1, true].include?(@properties[:ignoreHeaderLines]) :
|
|
69
|
+
false
|
|
56
70
|
@file_path = get_file_path
|
|
57
71
|
raise DarwinCore::FileNotFoundError.new("No file data") unless @file_path
|
|
58
72
|
@fields = get_fields
|
|
59
|
-
|
|
73
|
+
if @fields.empty?
|
|
74
|
+
raise DarwinCore::InvalidArchiveError.new("No data fields are found")
|
|
75
|
+
end
|
|
60
76
|
end
|
|
61
77
|
|
|
62
78
|
def get_file_path
|
|
63
|
-
file = @data[:location] ||
|
|
79
|
+
file = @data[:location] ||
|
|
80
|
+
@data[:attributes][:location] ||
|
|
81
|
+
@data[:files][:location]
|
|
64
82
|
File.join(@path, file)
|
|
65
83
|
end
|
|
66
84
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dwc-archive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.9.
|
|
4
|
+
version: 0.9.4
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2013-05-09 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: nokogiri
|
|
@@ -18,7 +18,7 @@ dependencies:
|
|
|
18
18
|
requirements:
|
|
19
19
|
- - ~>
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
|
-
version: 1.5
|
|
21
|
+
version: '1.5'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -26,7 +26,7 @@ dependencies:
|
|
|
26
26
|
requirements:
|
|
27
27
|
- - ~>
|
|
28
28
|
- !ruby/object:Gem::Version
|
|
29
|
-
version: 1.5
|
|
29
|
+
version: '1.5'
|
|
30
30
|
- !ruby/object:Gem::Dependency
|
|
31
31
|
name: parsley-store
|
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -34,7 +34,7 @@ dependencies:
|
|
|
34
34
|
requirements:
|
|
35
35
|
- - ~>
|
|
36
36
|
- !ruby/object:Gem::Version
|
|
37
|
-
version: 0.3.
|
|
37
|
+
version: 0.3.1
|
|
38
38
|
type: :runtime
|
|
39
39
|
prerelease: false
|
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -42,79 +42,31 @@ dependencies:
|
|
|
42
42
|
requirements:
|
|
43
43
|
- - ~>
|
|
44
44
|
- !ruby/object:Gem::Version
|
|
45
|
-
version: 0.3.
|
|
45
|
+
version: 0.3.1
|
|
46
46
|
- !ruby/object:Gem::Dependency
|
|
47
47
|
name: archive-tar-minitar
|
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
|
49
|
-
none: false
|
|
50
|
-
requirements:
|
|
51
|
-
- - ! '>='
|
|
52
|
-
- !ruby/object:Gem::Version
|
|
53
|
-
version: '0'
|
|
54
|
-
type: :runtime
|
|
55
|
-
prerelease: false
|
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
-
none: false
|
|
58
|
-
requirements:
|
|
59
|
-
- - ! '>='
|
|
60
|
-
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0'
|
|
62
|
-
- !ruby/object:Gem::Dependency
|
|
63
|
-
name: rspec
|
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
|
65
|
-
none: false
|
|
66
|
-
requirements:
|
|
67
|
-
- - ~>
|
|
68
|
-
- !ruby/object:Gem::Version
|
|
69
|
-
version: 2.7.0
|
|
70
|
-
type: :development
|
|
71
|
-
prerelease: false
|
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
73
|
-
none: false
|
|
74
|
-
requirements:
|
|
75
|
-
- - ~>
|
|
76
|
-
- !ruby/object:Gem::Version
|
|
77
|
-
version: 2.7.0
|
|
78
|
-
- !ruby/object:Gem::Dependency
|
|
79
|
-
name: cucumber
|
|
80
|
-
requirement: !ruby/object:Gem::Requirement
|
|
81
|
-
none: false
|
|
82
|
-
requirements:
|
|
83
|
-
- - ~>
|
|
84
|
-
- !ruby/object:Gem::Version
|
|
85
|
-
version: 1.1.3
|
|
86
|
-
type: :development
|
|
87
|
-
prerelease: false
|
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
89
49
|
none: false
|
|
90
50
|
requirements:
|
|
91
51
|
- - ~>
|
|
92
52
|
- !ruby/object:Gem::Version
|
|
93
|
-
version:
|
|
94
|
-
|
|
95
|
-
name: bundler
|
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
|
97
|
-
none: false
|
|
98
|
-
requirements:
|
|
99
|
-
- - ~>
|
|
100
|
-
- !ruby/object:Gem::Version
|
|
101
|
-
version: '1.0'
|
|
102
|
-
type: :development
|
|
53
|
+
version: '0.5'
|
|
54
|
+
type: :runtime
|
|
103
55
|
prerelease: false
|
|
104
56
|
version_requirements: !ruby/object:Gem::Requirement
|
|
105
57
|
none: false
|
|
106
58
|
requirements:
|
|
107
59
|
- - ~>
|
|
108
60
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: '
|
|
61
|
+
version: '0.5'
|
|
110
62
|
- !ruby/object:Gem::Dependency
|
|
111
|
-
name:
|
|
63
|
+
name: debugger
|
|
112
64
|
requirement: !ruby/object:Gem::Requirement
|
|
113
65
|
none: false
|
|
114
66
|
requirements:
|
|
115
67
|
- - ~>
|
|
116
68
|
- !ruby/object:Gem::Version
|
|
117
|
-
version: 1.
|
|
69
|
+
version: '1.3'
|
|
118
70
|
type: :development
|
|
119
71
|
prerelease: false
|
|
120
72
|
version_requirements: !ruby/object:Gem::Requirement
|
|
@@ -122,39 +74,7 @@ dependencies:
|
|
|
122
74
|
requirements:
|
|
123
75
|
- - ~>
|
|
124
76
|
- !ruby/object:Gem::Version
|
|
125
|
-
version: 1.
|
|
126
|
-
- !ruby/object:Gem::Dependency
|
|
127
|
-
name: debugger
|
|
128
|
-
requirement: !ruby/object:Gem::Requirement
|
|
129
|
-
none: false
|
|
130
|
-
requirements:
|
|
131
|
-
- - ! '>='
|
|
132
|
-
- !ruby/object:Gem::Version
|
|
133
|
-
version: '0'
|
|
134
|
-
type: :development
|
|
135
|
-
prerelease: false
|
|
136
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
137
|
-
none: false
|
|
138
|
-
requirements:
|
|
139
|
-
- - ! '>='
|
|
140
|
-
- !ruby/object:Gem::Version
|
|
141
|
-
version: '0'
|
|
142
|
-
- !ruby/object:Gem::Dependency
|
|
143
|
-
name: jazz_hands
|
|
144
|
-
requirement: !ruby/object:Gem::Requirement
|
|
145
|
-
none: false
|
|
146
|
-
requirements:
|
|
147
|
-
- - ! '>='
|
|
148
|
-
- !ruby/object:Gem::Version
|
|
149
|
-
version: '0'
|
|
150
|
-
type: :development
|
|
151
|
-
prerelease: false
|
|
152
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
153
|
-
none: false
|
|
154
|
-
requirements:
|
|
155
|
-
- - ! '>='
|
|
156
|
-
- !ruby/object:Gem::Version
|
|
157
|
-
version: '0'
|
|
77
|
+
version: '1.3'
|
|
158
78
|
- !ruby/object:Gem::Dependency
|
|
159
79
|
name: parsley-store
|
|
160
80
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -211,15 +131,16 @@ executables: []
|
|
|
211
131
|
extensions: []
|
|
212
132
|
extra_rdoc_files:
|
|
213
133
|
- LICENSE
|
|
214
|
-
- README.
|
|
134
|
+
- README.md
|
|
215
135
|
files:
|
|
216
136
|
- .document
|
|
217
137
|
- .rvmrc
|
|
138
|
+
- .travis.yml
|
|
218
139
|
- CHANGELOG
|
|
219
140
|
- Gemfile
|
|
220
141
|
- Gemfile.lock
|
|
221
142
|
- LICENSE
|
|
222
|
-
- README.
|
|
143
|
+
- README.md
|
|
223
144
|
- Rakefile
|
|
224
145
|
- VERSION
|
|
225
146
|
- features/dwca-creator.feature
|
|
@@ -281,7 +202,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
281
202
|
version: '0'
|
|
282
203
|
segments:
|
|
283
204
|
- 0
|
|
284
|
-
hash:
|
|
205
|
+
hash: -2809542454291987056
|
|
285
206
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
286
207
|
none: false
|
|
287
208
|
requirements:
|
|
@@ -290,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
290
211
|
version: '0'
|
|
291
212
|
requirements: []
|
|
292
213
|
rubyforge_project:
|
|
293
|
-
rubygems_version: 1.8.
|
|
214
|
+
rubygems_version: 1.8.25
|
|
294
215
|
signing_key:
|
|
295
216
|
specification_version: 3
|
|
296
217
|
summary: Handler of Darwin Core Archive files
|