dwc-archive 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.rvmrc +1 -1
- data/.travis.yml +14 -0
- data/Gemfile +13 -16
- data/Gemfile.lock +122 -122
- data/{README.rdoc → README.md} +48 -26
- data/VERSION +1 -1
- data/lib/dwc-archive/core.rb +3 -3
- data/lib/dwc-archive/expander.rb +4 -2
- data/lib/dwc-archive/ingester.rb +26 -8
- metadata +17 -96
data/.rvmrc
CHANGED
@@ -1 +1 @@
|
|
1
|
-
rvm use ruby-1.9.3-
|
1
|
+
rvm use ruby-1.9.3-p392@dwc-archive --create
|
data/.travis.yml
ADDED
data/Gemfile
CHANGED
@@ -1,20 +1,17 @@
|
|
1
|
-
source
|
2
|
-
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
1
|
+
source 'https://rubygems.org'
|
5
2
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
gem "nokogiri", "~> 1.5.0"
|
10
|
-
gem "parsley-store", "~> 0.3.0"
|
11
|
-
gem "archive-tar-minitar"
|
3
|
+
gem 'nokogiri', '~> 1.5'
|
4
|
+
gem 'parsley-store', '~> 0.3.1'
|
5
|
+
gem 'archive-tar-minitar', '~> 0.5'
|
12
6
|
|
13
7
|
group :development do
|
14
|
-
gem
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
gem
|
19
|
-
gem
|
8
|
+
gem 'debugger', '~> 1.3'
|
9
|
+
end
|
10
|
+
|
11
|
+
group :test do
|
12
|
+
gem 'rspec', '~> 2.13'
|
13
|
+
gem 'cucumber', '~> 1.3'
|
14
|
+
gem 'bundler', '~> 1.3'
|
15
|
+
gem 'jeweler', '~> 1.8'
|
16
|
+
gem 'jazz_hands', '~> 0.5'
|
20
17
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,155 +1,155 @@
|
|
1
1
|
GEM
|
2
|
-
remote:
|
2
|
+
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
rack
|
12
|
-
rack-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
i18n (~> 0.
|
19
|
-
|
4
|
+
abstract (1.0.0)
|
5
|
+
actionpack (3.0.8)
|
6
|
+
activemodel (= 3.0.8)
|
7
|
+
activesupport (= 3.0.8)
|
8
|
+
builder (~> 2.1.2)
|
9
|
+
erubis (~> 2.6.6)
|
10
|
+
i18n (~> 0.5.0)
|
11
|
+
rack (~> 1.2.1)
|
12
|
+
rack-mount (~> 0.6.14)
|
13
|
+
rack-test (~> 0.5.7)
|
14
|
+
tzinfo (~> 0.3.23)
|
15
|
+
activemodel (3.0.8)
|
16
|
+
activesupport (= 3.0.8)
|
17
|
+
builder (~> 2.1.2)
|
18
|
+
i18n (~> 0.5.0)
|
19
|
+
activesupport (3.0.8)
|
20
20
|
archive-tar-minitar (0.5.2)
|
21
|
-
awesome_print (1.0
|
22
|
-
binding_of_caller (0.
|
23
|
-
|
21
|
+
awesome_print (1.1.0)
|
22
|
+
binding_of_caller (0.7.1)
|
23
|
+
debug_inspector (>= 0.0.1)
|
24
|
+
biodiversity (3.0.1)
|
24
25
|
parallel
|
25
|
-
parallel
|
26
|
-
|
26
|
+
parallel (~> 0.6)
|
27
|
+
rake (~> 10.0)
|
27
28
|
treetop
|
28
|
-
|
29
|
-
|
29
|
+
treetop (~> 1.4)
|
30
|
+
unicode_utils (~> 1.4)
|
31
|
+
builder (2.1.2)
|
32
|
+
coderay (1.0.9)
|
30
33
|
columnize (0.3.6)
|
31
|
-
coolline (0.
|
32
|
-
cucumber (1.1
|
34
|
+
coolline (0.4.2)
|
35
|
+
cucumber (1.3.1)
|
33
36
|
builder (>= 2.1.2)
|
34
|
-
diff-lcs (>= 1.1.
|
35
|
-
gherkin (~> 2.
|
36
|
-
|
37
|
-
|
38
|
-
debugger (1.
|
37
|
+
diff-lcs (>= 1.1.3)
|
38
|
+
gherkin (~> 2.12.0)
|
39
|
+
multi_json (~> 1.3)
|
40
|
+
debug_inspector (0.0.2)
|
41
|
+
debugger (1.5.0)
|
39
42
|
columnize (>= 0.3.1)
|
40
|
-
debugger-linecache (~> 1.
|
41
|
-
debugger-ruby_core_source (~> 1.
|
42
|
-
debugger-linecache (1.
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
gherkin (2.
|
49
|
-
|
43
|
+
debugger-linecache (~> 1.2.0)
|
44
|
+
debugger-ruby_core_source (~> 1.2.0)
|
45
|
+
debugger-linecache (1.2.0)
|
46
|
+
debugger-ruby_core_source (1.2.0)
|
47
|
+
diff-lcs (1.2.4)
|
48
|
+
diffy (2.1.4)
|
49
|
+
erubis (2.6.6)
|
50
|
+
abstract (>= 1.0.0)
|
51
|
+
gherkin (2.12.0)
|
52
|
+
multi_json (~> 1.3)
|
50
53
|
git (1.2.5)
|
51
54
|
grit (2.5.0)
|
52
55
|
diff-lcs (~> 1.1)
|
53
56
|
mime-types (~> 1.15)
|
54
57
|
posix-spawn (~> 0.3.6)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
pry (~> 0.
|
64
|
-
pry-doc (~> 0.4.
|
65
|
-
pry-git (~> 0.2.
|
66
|
-
pry-
|
67
|
-
pry-remote (>= 0.1.
|
68
|
-
pry-stack_explorer (~> 0.4.
|
69
|
-
railties (
|
70
|
-
jeweler (1.
|
58
|
+
hirb (0.7.1)
|
59
|
+
i18n (0.5.0)
|
60
|
+
jazz_hands (0.5.0)
|
61
|
+
awesome_print (~> 1.1.0)
|
62
|
+
coderay (~> 1.0.9)
|
63
|
+
coolline (>= 0.4.0)
|
64
|
+
hirb (~> 0.7.1)
|
65
|
+
pry (~> 0.9.12)
|
66
|
+
pry-debugger (~> 0.2.2)
|
67
|
+
pry-doc (~> 0.4.4)
|
68
|
+
pry-git (~> 0.2.3)
|
69
|
+
pry-rails (~> 0.2.2)
|
70
|
+
pry-remote (>= 0.1.7)
|
71
|
+
pry-stack_explorer (~> 0.4.9)
|
72
|
+
railties (>= 3.0, < 5.0)
|
73
|
+
jeweler (1.8.4)
|
71
74
|
bundler (~> 1.0)
|
72
75
|
git (>= 1.2.5)
|
73
76
|
rake
|
74
|
-
|
75
|
-
json (1.7.
|
76
|
-
method_source (0.
|
77
|
-
mime-types (1.
|
78
|
-
multi_json (1.3
|
79
|
-
nokogiri (1.5.
|
80
|
-
parallel (0.
|
81
|
-
parsley-store (0.3.
|
82
|
-
|
83
|
-
|
77
|
+
rdoc
|
78
|
+
json (1.7.7)
|
79
|
+
method_source (0.8.1)
|
80
|
+
mime-types (1.23)
|
81
|
+
multi_json (1.7.3)
|
82
|
+
nokogiri (1.5.9)
|
83
|
+
parallel (0.6.4)
|
84
|
+
parsley-store (0.3.1)
|
85
|
+
biodiversity (~> 3.0.1)
|
86
|
+
jeweler (~> 1.8)
|
87
|
+
redis (~> 3.0)
|
84
88
|
polyglot (0.3.3)
|
85
89
|
posix-spawn (0.3.6)
|
86
|
-
pry (0.9.
|
90
|
+
pry (0.9.12.1)
|
87
91
|
coderay (~> 1.0.5)
|
88
|
-
method_source (~> 0.
|
89
|
-
slop (
|
90
|
-
pry-
|
91
|
-
|
92
|
-
|
92
|
+
method_source (~> 0.8)
|
93
|
+
slop (~> 3.4)
|
94
|
+
pry-debugger (0.2.2)
|
95
|
+
debugger (~> 1.3)
|
96
|
+
pry (~> 0.9.10)
|
97
|
+
pry-doc (0.4.5)
|
98
|
+
pry (>= 0.9)
|
99
|
+
yard (>= 0.8)
|
93
100
|
pry-git (0.2.3)
|
94
101
|
diffy
|
95
102
|
grit
|
96
103
|
pry (>= 0.9.8)
|
97
|
-
pry-
|
98
|
-
pry (
|
99
|
-
pry-remote (0.1.
|
100
|
-
pry (~> 0.9
|
101
|
-
slop (~>
|
102
|
-
pry-stack_explorer (0.4.
|
103
|
-
binding_of_caller (
|
104
|
-
|
105
|
-
rack
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
rack-test (0.6.1)
|
104
|
+
pry-rails (0.2.2)
|
105
|
+
pry (>= 0.9.10)
|
106
|
+
pry-remote (0.1.7)
|
107
|
+
pry (~> 0.9)
|
108
|
+
slop (~> 3.0)
|
109
|
+
pry-stack_explorer (0.4.9)
|
110
|
+
binding_of_caller (>= 0.7)
|
111
|
+
pry (~> 0.9.11)
|
112
|
+
rack (1.2.8)
|
113
|
+
rack-mount (0.6.14)
|
114
|
+
rack (>= 1.0.0)
|
115
|
+
rack-test (0.5.7)
|
110
116
|
rack (>= 1.0)
|
111
|
-
railties (3.
|
112
|
-
actionpack (= 3.
|
113
|
-
activesupport (= 3.
|
114
|
-
rack-ssl (~> 1.3.2)
|
117
|
+
railties (3.0.8)
|
118
|
+
actionpack (= 3.0.8)
|
119
|
+
activesupport (= 3.0.8)
|
115
120
|
rake (>= 0.8.7)
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
rdoc (3.12)
|
121
|
+
thor (~> 0.14.4)
|
122
|
+
rake (10.0.4)
|
123
|
+
rdoc (4.0.1)
|
120
124
|
json (~> 1.4)
|
121
|
-
redis (3.0.
|
122
|
-
rspec (2.
|
123
|
-
rspec-core (~> 2.
|
124
|
-
rspec-expectations (~> 2.
|
125
|
-
rspec-mocks (~> 2.
|
126
|
-
rspec-core (2.
|
127
|
-
rspec-expectations (2.
|
128
|
-
diff-lcs (
|
129
|
-
rspec-mocks (2.
|
130
|
-
slop (
|
131
|
-
|
132
|
-
|
133
|
-
rack (~> 1.0)
|
134
|
-
tilt (~> 1.1, != 1.3.0)
|
135
|
-
term-ansicolor (1.0.7)
|
136
|
-
thor (0.15.4)
|
137
|
-
tilt (1.3.3)
|
138
|
-
treetop (1.4.10)
|
125
|
+
redis (3.0.4)
|
126
|
+
rspec (2.13.0)
|
127
|
+
rspec-core (~> 2.13.0)
|
128
|
+
rspec-expectations (~> 2.13.0)
|
129
|
+
rspec-mocks (~> 2.13.0)
|
130
|
+
rspec-core (2.13.1)
|
131
|
+
rspec-expectations (2.13.0)
|
132
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
133
|
+
rspec-mocks (2.13.1)
|
134
|
+
slop (3.4.4)
|
135
|
+
thor (0.14.6)
|
136
|
+
treetop (1.4.12)
|
139
137
|
polyglot
|
140
138
|
polyglot (>= 0.3.1)
|
141
|
-
|
139
|
+
tzinfo (0.3.37)
|
140
|
+
unicode_utils (1.4.0)
|
141
|
+
yard (0.8.6.1)
|
142
142
|
|
143
143
|
PLATFORMS
|
144
144
|
ruby
|
145
145
|
|
146
146
|
DEPENDENCIES
|
147
|
-
archive-tar-minitar
|
148
|
-
bundler (~> 1.
|
149
|
-
cucumber (~> 1.
|
150
|
-
debugger
|
151
|
-
jazz_hands
|
152
|
-
jeweler (~> 1.
|
153
|
-
nokogiri (~> 1.5
|
154
|
-
parsley-store (~> 0.3.
|
155
|
-
rspec (~> 2.
|
147
|
+
archive-tar-minitar (~> 0.5)
|
148
|
+
bundler (~> 1.3)
|
149
|
+
cucumber (~> 1.3)
|
150
|
+
debugger (~> 1.3)
|
151
|
+
jazz_hands (~> 0.5)
|
152
|
+
jeweler (~> 1.8)
|
153
|
+
nokogiri (~> 1.5)
|
154
|
+
parsley-store (~> 0.3.1)
|
155
|
+
rspec (~> 2.13)
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,17 +1,22 @@
|
|
1
|
-
|
1
|
+
Darwin Core Archive
|
2
|
+
===================
|
2
3
|
|
3
|
-
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![Dependency Status][5]][6]
|
4
7
|
|
8
|
+
Darwin Core Archive format is a current standard for information exchange
|
9
|
+
between Global Names Architecture modules. This gem allows to work with
|
10
|
+
Darwin Core Archive data compressed to either zip or tar.gz files.
|
11
|
+
More information about Darwing Core Archive can be found on a [GBIF page:][7]
|
5
12
|
|
6
|
-
|
7
|
-
|
8
|
-
== Installation
|
9
|
-
|
10
|
-
Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
13
|
+
Installation
|
14
|
+
------------
|
11
15
|
|
12
16
|
sudo gem install dwc-archive
|
13
17
|
|
14
|
-
|
18
|
+
Usage
|
19
|
+
-----
|
15
20
|
|
16
21
|
require 'rubygems'
|
17
22
|
require 'dwc-archive'
|
@@ -30,7 +35,7 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
30
35
|
# rows that had a wrong encoding will be collected into errors array
|
31
36
|
data, errors = dwc.core.read
|
32
37
|
|
33
|
-
# read content using a block
|
38
|
+
# read content using a block, getting back results in sets of 100 rows each
|
34
39
|
results = []
|
35
40
|
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
|
36
41
|
results << [data, errors]
|
@@ -47,11 +52,9 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
47
52
|
end
|
48
53
|
results << [tail_data, tail_errors]
|
49
54
|
|
50
|
-
# normalize names in classification collecting together synonyms,
|
51
|
-
# vernacular names and associating paths to taxons
|
52
|
-
# distributed as DwCA file
|
53
|
-
# NOTE: this functionality requires biodiversity gem for ruby 1.8.x and
|
54
|
-
# biodiversity19 gem for ruby 1.9.x
|
55
|
+
# normalize names in classification collecting together synonyms,
|
56
|
+
# canonical names, vernacular names and associating paths to taxons
|
57
|
+
# in a classification distributed as DwCA file
|
55
58
|
|
56
59
|
result = dwc.normalize_classification
|
57
60
|
|
@@ -59,10 +62,11 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
59
62
|
|
60
63
|
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
61
64
|
cn.normalize
|
62
|
-
# if you don't want to generate path consisting of canonical forms
|
65
|
+
# if you don't want to generate path consisting of canonical forms
|
66
|
+
# of ancestors to a taxon
|
63
67
|
cn.normalize(:with_canonical_names => false)
|
64
68
|
|
65
|
-
# if you don't want to ingest information from extensions
|
69
|
+
# if you don't want to ingest information from extensions
|
66
70
|
cn.normalize(:with_extensions => false)
|
67
71
|
|
68
72
|
# to get a flat hash of nodes with attached vernacular names and synonyms
|
@@ -79,19 +83,24 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
79
83
|
|
80
84
|
DarwinCore.clean_all # remove all expanded archives
|
81
85
|
|
82
|
-
|
86
|
+
Creating a DarwinCore Archive file
|
87
|
+
----------------------------------
|
83
88
|
|
84
89
|
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
85
90
|
|
86
91
|
core = [
|
87
|
-
["http://rs.tdwg.org/dwc/terms/taxonID",
|
92
|
+
["http://rs.tdwg.org/dwc/terms/taxonID",
|
93
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
94
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
95
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"],
|
88
96
|
[1, 0, "Plantae", "kingdom"],
|
89
97
|
[2, 1, "Betula", "genus"],
|
90
98
|
[3, 2, "Betula verucosa", "species"]
|
91
99
|
]
|
92
100
|
|
93
101
|
vernacular_names = [
|
94
|
-
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
102
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
103
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName"],
|
95
104
|
[1, "Plants"],
|
96
105
|
[1, "Растения"],
|
97
106
|
[2, "Birch"],
|
@@ -121,28 +130,41 @@ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
|
|
121
130
|
:email => 'jimdoe@example.com',
|
122
131
|
:url => 'http://aggregator.example.org' }],
|
123
132
|
:abstract => 'test classification',
|
124
|
-
:citation =>
|
133
|
+
:citation =>
|
134
|
+
'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
|
125
135
|
:url => 'http://example.com'
|
126
136
|
}
|
127
137
|
|
128
138
|
gen.add_core(core, 'core.txt')
|
129
|
-
gen.add_extension(vernacular_names,
|
139
|
+
gen.add_extension(vernacular_names,
|
140
|
+
'vernacular_names.txt',
|
141
|
+
true, 'http://rs.gbif.org/terms/1.0/VernacularName')
|
130
142
|
gen.add_meta_xml
|
131
143
|
gen.add_eml_xml(eml)
|
132
144
|
gen.pack
|
133
145
|
|
134
|
-
|
135
|
-
|
146
|
+
Note on Patches/Pull Requests
|
147
|
+
-----------------------------
|
136
148
|
|
137
149
|
* Fork the project.
|
138
150
|
* Make your feature addition or bug fix.
|
139
151
|
* Add tests for it. This is important so I don't break it in a
|
140
152
|
future version unintentionally.
|
141
153
|
* Commit, do not mess with rakefile, version, or history.
|
142
|
-
(if you want to have your own version, that is fine but bump
|
154
|
+
(if you want to have your own version, that is fine but bump
|
155
|
+
version in a commit by itself I can ignore when I pull)
|
143
156
|
* Send me a pull request. Bonus points for topic branches.
|
144
157
|
|
145
158
|
|
146
|
-
|
159
|
+
Copyright
|
160
|
+
---------
|
161
|
+
|
162
|
+
Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
|
147
163
|
|
148
|
-
|
164
|
+
[1]: https://badge.fury.io/rb/dwc-archive.png
|
165
|
+
[2]: http://badge.fury.io/rb/dwc-archive
|
166
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/dwc-archive.png
|
167
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/dwc-archive
|
168
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
|
169
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
|
170
|
+
[7]: http://bit.ly/2IxcBA
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.9.
|
1
|
+
0.9.4
|
data/lib/dwc-archive/core.rb
CHANGED
@@ -8,9 +8,9 @@ class DarwinCore
|
|
8
8
|
@path = @archive.files_path
|
9
9
|
root_key = @archive.meta.keys[0]
|
10
10
|
@data = @archive.meta[root_key][:core]
|
11
|
-
raise DarwinCore::CoreFileError.new("Cannot
|
12
|
-
@id = @data[:id][:attributes]
|
13
|
-
raise DarwinCore::CoreFileError.new("Cannot find core identifier") unless @id
|
11
|
+
raise DarwinCore::CoreFileError.new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
|
12
|
+
@id = @data[:id][:attributes]
|
13
|
+
# raise DarwinCore::CoreFileError.new("Cannot find core identifier") unless @id
|
14
14
|
get_attributes(DarwinCore::CoreFileError)
|
15
15
|
end
|
16
16
|
end
|
data/lib/dwc-archive/expander.rb
CHANGED
@@ -3,7 +3,7 @@ class DarwinCore
|
|
3
3
|
def initialize(archive_path, tmp_dir)
|
4
4
|
@archive_path = archive_path
|
5
5
|
@tmp_dir = tmp_dir
|
6
|
-
@path = File.join(tmp_dir, 'dwc_' + rand(
|
6
|
+
@path = File.join(tmp_dir, 'dwc_' + rand(10_000_000_000).to_s)
|
7
7
|
@unpacker = get_unpacker
|
8
8
|
end
|
9
9
|
|
@@ -11,7 +11,9 @@ class DarwinCore
|
|
11
11
|
clean
|
12
12
|
raise DarwinCore::FileNotFoundError unless File.exists?(@archive_path)
|
13
13
|
success = @unpacker.call(@path, @archive_path) if @unpacker
|
14
|
-
(@unpacker && success && $?.exitstatus == 0) ?
|
14
|
+
(@unpacker && success && $?.exitstatus == 0) ?
|
15
|
+
success :
|
16
|
+
(clean; raise DarwinCore::UnpackingError)
|
15
17
|
end
|
16
18
|
|
17
19
|
def path
|
data/lib/dwc-archive/ingester.rb
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
class DarwinCore
|
3
3
|
module Ingester
|
4
4
|
attr_reader :data, :properties, :encoding, :fields_separator, :size
|
5
|
-
attr_reader :file_path, :fields, :line_separator,
|
5
|
+
attr_reader :file_path, :fields, :line_separator,
|
6
|
+
:quote_character, :ignore_headers
|
6
7
|
|
7
8
|
def size
|
8
9
|
@size ||= get_size
|
@@ -22,7 +23,9 @@ class DarwinCore
|
|
22
23
|
index_fix = 0; next if @ignore_headers && i == 0
|
23
24
|
min_size > r.size ? errors << r : process_csv_row(res, errors, r)
|
24
25
|
if (i + index_fix) % batch_size == 0
|
25
|
-
DarwinCore.logger_write(@dwc.object_id,
|
26
|
+
DarwinCore.logger_write(@dwc.object_id,
|
27
|
+
"Ingested %s records from %s" %
|
28
|
+
[(i + index_fix), name])
|
26
29
|
if block_given?
|
27
30
|
yield [res, errors]
|
28
31
|
res = []
|
@@ -42,25 +45,40 @@ class DarwinCore
|
|
42
45
|
def process_csv_row(result, errors, row)
|
43
46
|
str = row.join('')
|
44
47
|
str = str.force_encoding('utf-8')
|
45
|
-
str.encoding.name ==
|
48
|
+
if str.encoding.name == 'UTF-8' && str.valid_encoding?
|
49
|
+
result << row.map { |f| f.nil? ? nil : f.force_encoding('utf-8') }
|
50
|
+
else
|
51
|
+
errors << row
|
52
|
+
end
|
46
53
|
end
|
47
54
|
|
48
55
|
def get_attributes(exception)
|
49
56
|
@properties = @data[:attributes]
|
50
57
|
@encoding = @properties[:encoding] || 'UTF-8'
|
51
|
-
|
58
|
+
err_msg = 'No support for encodings other ' +
|
59
|
+
'than utf-8 or utf-16 at the moment'
|
60
|
+
encodings = ['utf-8', 'utf8', 'utf-16', 'utf16']
|
61
|
+
unless encodings.include? @encoding.downcase
|
62
|
+
raise DarwinCore::EncodingError.new(err_msg)
|
63
|
+
end
|
52
64
|
@field_separator = get_field_separator
|
53
65
|
@quote_character = @properties[:fieldsEnclosedBy] || ""
|
54
|
-
@line_separator = @properties[:linesTerminatedBy] ||
|
55
|
-
@ignore_headers = @properties[:ignoreHeaderLines] ?
|
66
|
+
@line_separator = @properties[:linesTerminatedBy] || '\n'
|
67
|
+
@ignore_headers = @properties[:ignoreHeaderLines] ?
|
68
|
+
[1, true].include?(@properties[:ignoreHeaderLines]) :
|
69
|
+
false
|
56
70
|
@file_path = get_file_path
|
57
71
|
raise DarwinCore::FileNotFoundError.new("No file data") unless @file_path
|
58
72
|
@fields = get_fields
|
59
|
-
|
73
|
+
if @fields.empty?
|
74
|
+
raise DarwinCore::InvalidArchiveError.new("No data fields are found")
|
75
|
+
end
|
60
76
|
end
|
61
77
|
|
62
78
|
def get_file_path
|
63
|
-
file = @data[:location] ||
|
79
|
+
file = @data[:location] ||
|
80
|
+
@data[:attributes][:location] ||
|
81
|
+
@data[:files][:location]
|
64
82
|
File.join(@path, file)
|
65
83
|
end
|
66
84
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-05-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.5
|
21
|
+
version: '1.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 1.5
|
29
|
+
version: '1.5'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
31
|
name: parsley-store
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -34,7 +34,7 @@ dependencies:
|
|
34
34
|
requirements:
|
35
35
|
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 0.3.
|
37
|
+
version: 0.3.1
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -42,79 +42,31 @@ dependencies:
|
|
42
42
|
requirements:
|
43
43
|
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0.3.
|
45
|
+
version: 0.3.1
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: archive-tar-minitar
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - ! '>='
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '0'
|
54
|
-
type: :runtime
|
55
|
-
prerelease: false
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
|
-
requirements:
|
59
|
-
- - ! '>='
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: rspec
|
64
|
-
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
|
-
requirements:
|
67
|
-
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: 2.7.0
|
70
|
-
type: :development
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
|
-
requirements:
|
75
|
-
- - ~>
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
version: 2.7.0
|
78
|
-
- !ruby/object:Gem::Dependency
|
79
|
-
name: cucumber
|
80
|
-
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
|
-
requirements:
|
83
|
-
- - ~>
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: 1.1.3
|
86
|
-
type: :development
|
87
|
-
prerelease: false
|
88
|
-
version_requirements: !ruby/object:Gem::Requirement
|
89
49
|
none: false
|
90
50
|
requirements:
|
91
51
|
- - ~>
|
92
52
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
94
|
-
|
95
|
-
name: bundler
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - ~>
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version: '1.0'
|
102
|
-
type: :development
|
53
|
+
version: '0.5'
|
54
|
+
type: :runtime
|
103
55
|
prerelease: false
|
104
56
|
version_requirements: !ruby/object:Gem::Requirement
|
105
57
|
none: false
|
106
58
|
requirements:
|
107
59
|
- - ~>
|
108
60
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
61
|
+
version: '0.5'
|
110
62
|
- !ruby/object:Gem::Dependency
|
111
|
-
name:
|
63
|
+
name: debugger
|
112
64
|
requirement: !ruby/object:Gem::Requirement
|
113
65
|
none: false
|
114
66
|
requirements:
|
115
67
|
- - ~>
|
116
68
|
- !ruby/object:Gem::Version
|
117
|
-
version: 1.
|
69
|
+
version: '1.3'
|
118
70
|
type: :development
|
119
71
|
prerelease: false
|
120
72
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -122,39 +74,7 @@ dependencies:
|
|
122
74
|
requirements:
|
123
75
|
- - ~>
|
124
76
|
- !ruby/object:Gem::Version
|
125
|
-
version: 1.
|
126
|
-
- !ruby/object:Gem::Dependency
|
127
|
-
name: debugger
|
128
|
-
requirement: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
|
-
requirements:
|
131
|
-
- - ! '>='
|
132
|
-
- !ruby/object:Gem::Version
|
133
|
-
version: '0'
|
134
|
-
type: :development
|
135
|
-
prerelease: false
|
136
|
-
version_requirements: !ruby/object:Gem::Requirement
|
137
|
-
none: false
|
138
|
-
requirements:
|
139
|
-
- - ! '>='
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
version: '0'
|
142
|
-
- !ruby/object:Gem::Dependency
|
143
|
-
name: jazz_hands
|
144
|
-
requirement: !ruby/object:Gem::Requirement
|
145
|
-
none: false
|
146
|
-
requirements:
|
147
|
-
- - ! '>='
|
148
|
-
- !ruby/object:Gem::Version
|
149
|
-
version: '0'
|
150
|
-
type: :development
|
151
|
-
prerelease: false
|
152
|
-
version_requirements: !ruby/object:Gem::Requirement
|
153
|
-
none: false
|
154
|
-
requirements:
|
155
|
-
- - ! '>='
|
156
|
-
- !ruby/object:Gem::Version
|
157
|
-
version: '0'
|
77
|
+
version: '1.3'
|
158
78
|
- !ruby/object:Gem::Dependency
|
159
79
|
name: parsley-store
|
160
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -211,15 +131,16 @@ executables: []
|
|
211
131
|
extensions: []
|
212
132
|
extra_rdoc_files:
|
213
133
|
- LICENSE
|
214
|
-
- README.
|
134
|
+
- README.md
|
215
135
|
files:
|
216
136
|
- .document
|
217
137
|
- .rvmrc
|
138
|
+
- .travis.yml
|
218
139
|
- CHANGELOG
|
219
140
|
- Gemfile
|
220
141
|
- Gemfile.lock
|
221
142
|
- LICENSE
|
222
|
-
- README.
|
143
|
+
- README.md
|
223
144
|
- Rakefile
|
224
145
|
- VERSION
|
225
146
|
- features/dwca-creator.feature
|
@@ -281,7 +202,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
281
202
|
version: '0'
|
282
203
|
segments:
|
283
204
|
- 0
|
284
|
-
hash:
|
205
|
+
hash: -2809542454291987056
|
285
206
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
286
207
|
none: false
|
287
208
|
requirements:
|
@@ -290,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
290
211
|
version: '0'
|
291
212
|
requirements: []
|
292
213
|
rubyforge_project:
|
293
|
-
rubygems_version: 1.8.
|
214
|
+
rubygems_version: 1.8.25
|
294
215
|
signing_key:
|
295
216
|
specification_version: 3
|
296
217
|
summary: Handler of Darwin Core Archive files
|