dwc-archive 0.9.6 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe DarwinCore do
4
+ subject { DarwinCore }
5
+ let(:file_dir) { File.expand_path('../../files', __FILE__) }
6
+
7
+ it 'breaks for ruby 1.8 and older' do
8
+ stub_const('RUBY_VERSION', '1.8.7')
9
+ expect{load File.expand_path('../../../lib/dwc-archive.rb', __FILE__)}.
10
+ to raise_error
11
+ end
12
+
13
+ it 'continues for ruby 1.9.1 and higher' do
14
+ stub_const('RUBY_VERSION', '1.9.2')
15
+ expect{load File.expand_path('../../../lib/dwc-archive.rb', __FILE__)}.
16
+ to_not raise_error
17
+ end
18
+
19
+ describe 'redis connection' do
20
+ it 'redis is running' do
21
+ expect do
22
+ socket = TCPSocket.open('localhost', 6379)
23
+ socket.close
24
+ end.to_not raise_error
25
+ end
26
+ end
27
+
28
+ it 'has version' do
29
+ expect(DarwinCore::VERSION =~ /\d+\.\d+\.\d/).to be_true
30
+ end
31
+
32
+ describe '.nil_field?' do
33
+ it 'is true for nil fields' do
34
+ [nil, '/N', ''].each do |i|
35
+ expect(DarwinCore.nil_field?(i)).to be_true
36
+ end
37
+ end
38
+
39
+ it 'is false for non-nil fields' do
40
+ [0, '0', '123', 123, 'dsdfs434343/N'].each do |i|
41
+ expect(subject.nil_field?(i)).to be_false
42
+ end
43
+ end
44
+ end
45
+
46
+ describe '.clean_all' do
47
+ let(:tmp_dir) { DarwinCore::DEFAULT_TMP_DIR }
48
+
49
+ it 'cleans dwca directories' do
50
+ Dir.chdir(tmp_dir)
51
+ FileUtils.mkdir('dwc_123') unless File.exists?('dwc_123')
52
+ dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_[\d]+$/) }
53
+ expect(dwca_dirs.size).to be > 0
54
+ subject.clean_all
55
+ dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_[\d]+$/) }
56
+ expect(dwca_dirs.size).to be 0
57
+ end
58
+
59
+ context 'no dwc files exist' do
60
+ it 'does nothing' do
61
+ subject.clean_all
62
+ subject.clean_all
63
+ dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_[\d]+$/) }
64
+ expect(dwca_dirs.size).to be 0
65
+ end
66
+ end
67
+ end
68
+
69
+ describe '.logger' do
70
+ it { expect(subject.logger).to be_kind_of Logger }
71
+ end
72
+
73
+ describe '.logger=' do
74
+ it 'sets logger' do
75
+ expect(subject.logger = 'fake logger').to eq 'fake logger'
76
+ expect(subject.logger).to eq 'fake logger'
77
+ end
78
+ end
79
+
80
+ describe '.logger_reset' do
81
+ it 'resets logger' do
82
+ subject.logger = 'fake logger'
83
+ expect(subject.logger).to eq 'fake logger'
84
+ subject.logger_reset
85
+ expect(subject.logger).to be_kind_of Logger
86
+ end
87
+ end
88
+
89
+ describe '.new' do
90
+ subject(:dwca) { DarwinCore.new(file_path) }
91
+
92
+ context 'tar.gz and zip files supplied' do
93
+ files = %w(data.zip data.tar.gz minimal.tar.gz junk_dir_inside.zip)
94
+ files.each do |file|
95
+ let(:file_path) { File.join(file_dir, file) }
96
+
97
+ it "creates archive from %s" % file do
98
+ expect(dwca.archive.valid?).to be_true
99
+ end
100
+
101
+ end
102
+ end
103
+
104
+ context 'when file does not exist' do
105
+ let(:file_path) { File.join(file_dir, 'no_file.gz') }
106
+
107
+ it 'raises not found' do
108
+ expect { dwca }.to raise_error DarwinCore::FileNotFoundError
109
+ end
110
+ end
111
+
112
+ context 'archive cannot unpack' do
113
+
114
+ let(:file_path) { File.join(file_dir, 'broken.tar.gz') }
115
+
116
+ it 'raises unpacking error' do
117
+ expect { dwca }.to raise_error DarwinCore::UnpackingError
118
+ end
119
+ end
120
+
121
+ context 'archive is broken' do
122
+
123
+ let(:file_path) { File.join(file_dir, 'invalid.tar.gz') }
124
+
125
+ it 'raises error of invalid archive' do
126
+ expect { dwca }.to raise_error DarwinCore::InvalidArchiveError
127
+ end
128
+
129
+ end
130
+
131
+ context 'archive is not in utf-8 encoding' do
132
+
133
+ let(:file_path) { File.join(file_dir, 'latin1.tar.gz') }
134
+
135
+ it 'raises wrong encoding error' do
136
+ expect { dwca }.to raise_error DarwinCore::EncodingError
137
+ end
138
+
139
+ end
140
+
141
+ context 'filename with spaces and non-alphanumeric chars' do
142
+
143
+ let(:file_path) { File.join(file_dir, 'file with characters(3).gz') }
144
+
145
+ it 'creates archive' do
146
+ expect(dwca.archive.valid?).to be_true
147
+ end
148
+
149
+ end
150
+ end
151
+
152
+ describe 'file_name' do
153
+ subject(:dwca) { DarwinCore.new(file_path) }
154
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
155
+
156
+ it 'returns file name' do
157
+ expect(dwca.file_name).to eq 'data.tar.gz'
158
+ end
159
+ end
160
+
161
+ describe 'path' do
162
+ subject(:dwca) { DarwinCore.new(file_path) }
163
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
164
+
165
+ it 'returns path of the archive' do
166
+ expect(dwca.path).to match %r|spec.files.data\.tar\.gz|
167
+ end
168
+ end
169
+
170
+ describe '#archive' do
171
+ subject(:dwca) { DarwinCore.new(file_path) }
172
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
173
+
174
+ it 'returns archive' do
175
+ expect(dwca.archive).to be_kind_of DarwinCore::Archive
176
+ end
177
+ end
178
+
179
+ describe '#core' do
180
+ subject(:dwca) { DarwinCore.new(file_path) }
181
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
182
+
183
+ it 'returns core' do
184
+ expect(dwca.core).to be_kind_of DarwinCore::Core
185
+ end
186
+ end
187
+
188
+ describe '#metadata' do
189
+ subject(:dwca) { DarwinCore.new(file_path) }
190
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
191
+
192
+ it 'returns eml' do
193
+ expect(dwca.eml).to be_kind_of DarwinCore::Metadata
194
+ expect(dwca.metadata).to be_kind_of DarwinCore::Metadata
195
+ end
196
+ end
197
+
198
+ describe '#extensions' do
199
+ subject(:dwca) { DarwinCore.new(file_path) }
200
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
201
+
202
+ it 'returns extensions' do
203
+ extensions = dwca.extensions
204
+ expect(extensions).to be_kind_of Array
205
+ expect(extensions[0]).to be_kind_of DarwinCore::Extension
206
+ end
207
+ end
208
+
209
+ describe '#checksum' do
210
+ subject(:dwca) { DarwinCore.new(file_path) }
211
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
212
+
213
+ it 'creates checksum hash' do
214
+ expect(dwca.checksum).to eq '7d94fc28ffaf434b66fbc790aa5ef00d834057bf'
215
+ end
216
+ end
217
+
218
+ describe '#has_parent_id' do
219
+ subject(:dwca) { DarwinCore.new(file_path) }
220
+
221
+ context 'has classification' do
222
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
223
+ it 'returns true' do
224
+ expect(dwca.has_parent_id?).to be_true
225
+ end
226
+ end
227
+
228
+ context 'does not have classification' do
229
+ let(:file_path) { File.join(file_dir, 'gnub.tar.gz') }
230
+ it 'returns false' do
231
+ expect(dwca.has_parent_id?).to be_false
232
+ end
233
+ end
234
+ end
235
+
236
+ describe '#classification_normalizer' do
237
+ subject(:dwca) { DarwinCore.new(file_path) }
238
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
239
+
240
+ context 'not initialized' do
241
+ it 'is nil' do
242
+ expect(dwca.classification_normalizer).to be_nil
243
+ end
244
+ end
245
+
246
+ context 'initialized' do
247
+ it 'is DarwinCore::ClassificationNormalizer' do
248
+ dwca.normalize_classification
249
+ expect(dwca.classification_normalizer).
250
+ to be_kind_of DarwinCore::ClassificationNormalizer
251
+ end
252
+ end
253
+ end
254
+
255
+ describe '#normalize_classification' do
256
+ subject(:dwca) { DarwinCore.new(file_path) }
257
+ let(:file_path) { File.join(file_dir, 'data.tar.gz') }
258
+ let(:normalized) { dwca.normalize_classification }
259
+
260
+ it 'returns hash' do
261
+ expect(normalized).to be_kind_of Hash
262
+ end
263
+
264
+ it 'uses utf-8 encoding for classification paths' do
265
+ encodings = []
266
+ normalized.each do |taxon_id, taxon|
267
+ taxon.classification_path.each { |p| encodings << p.encoding }
268
+ end
269
+ expect(encodings.uniq!.map { |e| e.to_s }).to eq ['UTF-8']
270
+ end
271
+
272
+ it 'has elements of DarwinCore::TaxonNormalized type' do
273
+ expect(normalized['leptogastrinae:tid:2857']).
274
+ to be_kind_of DarwinCore::TaxonNormalized
275
+ end
276
+ end
277
+
278
+ end
279
+
@@ -0,0 +1,21 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe DarwinCore::Generator::EmlXml do
4
+ subject(:eml) { DarwinCore::Generator::EmlXml.new(data, path) }
5
+ let(:data) { EML_DATA }
6
+ let(:path) { DarwinCore::DEFAULT_TMP_DIR }
7
+
8
+ describe '.new' do
9
+ it 'initializes generator' do
10
+ expect(eml).to be_kind_of DarwinCore::Generator::EmlXml
11
+ end
12
+ end
13
+
14
+ describe '#create' do
15
+ it 'should create eml xml' do
16
+ eml.create
17
+ eml_xml = File.read(File.join(path, 'eml.xml'))
18
+ expect(eml_xml).to match /Test Classification/
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe DarwinCore::Generator::MetaXml do
4
+ subject(:meta) { DarwinCore::Generator::MetaXml.new(data, path) }
5
+ let(:data) { META_DATA }
6
+ let(:path) { DarwinCore::DEFAULT_TMP_DIR }
7
+
8
+ describe '.new' do
9
+ it 'initializes' do
10
+ expect(meta).to be_kind_of DarwinCore::Generator::MetaXml
11
+ end
12
+ end
13
+
14
+ describe '#create' do
15
+ it 'creates metadata file' do
16
+ meta.create
17
+ meta = File.read(File.join(path, 'meta.xml'))
18
+ expect(meta).to match %r|<location>core.csv</location>|
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,116 @@
1
+ # encoding: utf-8
2
+ require_relative '../spec_helper'
3
+
4
+ describe DarwinCore::Generator do
5
+ subject(:gen) { DarwinCore::Generator.new(dwc_path, tmp_dir) }
6
+ let(:tmp_dir) { DarwinCore::DEFAULT_TMP_DIR }
7
+ let(:dwc_path) { File.join(tmp_dir, 'spec_dwca.tar.gz') }
8
+
9
+ describe '.new' do
10
+ it 'initializes empty DwCA' do
11
+ expect(gen).to be_kind_of DarwinCore::Generator
12
+ end
13
+ end
14
+
15
+ describe '#add_core' do
16
+ it 'adds core to DwCA instance' do
17
+ gen.add_core(CORE_DATA.dup, 'core.csv', true)
18
+ core = File.read(File.join(gen.path, 'core.csv'))
19
+ expect(core).to match /taxonID,parentNameUsageID,scientificName/
20
+ end
21
+
22
+ context 'urls are not given in header' do
23
+ it 'raises error' do
24
+ data = CORE_DATA.dup
25
+ data[0] = data[0].map { |f| f.split('/')[-1] }
26
+ expect { gen.add_core(data, 'core.csv', true) }.
27
+ to raise_error DarwinCore::GeneratorError
28
+ end
29
+ end
30
+ end
31
+
32
+ describe '#add_extension' do
33
+ it 'adds extension to DwCA instance' do
34
+ gen.add_extension(EXTENSION_DATA.dup,
35
+ 'vern.csv',
36
+ true,
37
+ 'http://rs.gbif.org/terms/1.0/VernacularName')
38
+ extension = File.read(File.join(gen.path, 'vern.csv'))
39
+
40
+ expect(extension).to match /Береза/
41
+ end
42
+ end
43
+
44
+ describe '#add_meta_xml' do
45
+ it 'creates metadata for DwCA' do
46
+ gen.add_core(CORE_DATA.dup, 'core.csv', true)
47
+ gen.add_extension(EXTENSION_DATA.dup,
48
+ 'vern.csv',
49
+ true,
50
+ 'http://rs.gbif.org/terms/1.0/VernacularName')
51
+
52
+ gen.add_meta_xml
53
+ meta = File.read(File.join(gen.path, 'meta.xml'))
54
+ expect(meta).to match %r|<location>core.csv</location>|
55
+ end
56
+ end
57
+
58
+ describe '#add_eml_data' do
59
+ it 'adds eml data' do
60
+ gen.add_eml_xml(EML_DATA)
61
+ eml = File.read(File.join(gen.path, 'eml.xml'))
62
+ expect(eml).to match /jdoe@example.com/
63
+ end
64
+ end
65
+
66
+ describe '#path' do
67
+ it 'returns temporary path for assembling DwCA' do
68
+ expect(gen.path).to match /dwc_\d+$/
69
+ end
70
+ end
71
+
72
+ describe '#files' do
73
+ it 'returns created files' do
74
+ gen.add_core(CORE_DATA.dup, 'core.csv', true)
75
+ gen.add_extension(EXTENSION_DATA.dup,
76
+ 'vern.csv',
77
+ true,
78
+ 'http://rs.gbif.org/terms/1.0/VernacularName')
79
+
80
+ gen.add_meta_xml
81
+ expect(gen.files).to match_array ['core.csv', 'meta.xml', 'vern.csv']
82
+ end
83
+ end
84
+
85
+ describe '#pack' do
86
+ it 'creates final DwCA file' do
87
+ FileUtils.rm dwc_path if File.exists?(dwc_path)
88
+ gen.add_core(CORE_DATA.dup, 'core.csv', true)
89
+ gen.add_extension(EXTENSION_DATA.dup,
90
+ 'vern.csv',
91
+ true,
92
+ 'http://rs.gbif.org/terms/1.0/VernacularName')
93
+
94
+ gen.add_meta_xml
95
+ gen.add_eml_xml(EML_DATA)
96
+ gen.pack
97
+ expect(File.exists?(dwc_path)).to be_true
98
+ end
99
+ end
100
+
101
+ describe '#clean' do
102
+ it 'removes temporary directory for DwCA' do
103
+ gen.add_eml_xml(EML_DATA)
104
+ expect(File.exists?(gen.path)).to be true
105
+ gen.clean
106
+ expect(File.exists?(gen.path)).to be false
107
+ end
108
+ end
109
+
110
+ describe '#eml_xml_data' do
111
+ it 'returns current eml data' do
112
+ expect(gen.eml_xml_data).to be_kind_of Hash
113
+ end
114
+ end
115
+
116
+ end
@@ -0,0 +1,34 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe DarwinCore::GnubTaxon do
4
+ subject(:dwca) { DarwinCore.new(file_path) }
5
+ subject(:normalizer) { DarwinCore::ClassificationNormalizer.new(dwca) }
6
+ let(:file_dir) { File.expand_path('../../files', __FILE__) }
7
+ let(:file_path) { File.join(file_dir, file_name) }
8
+ let(:file_name) { 'gnub.tar.gz' }
9
+
10
+ it 'should get uuids from GNUB' do
11
+ normalizer.normalize
12
+ tn = normalizer.normalized_data['9c399f90-cfb8-5a7f-9a21-18285a473488']
13
+ expect(tn).to be_kind_of DarwinCore::GnubTaxon
14
+ expect(tn).to be_kind_of DarwinCore::TaxonNormalized
15
+ expect(tn.uuid).to eq '8faa91f6-663f-4cfe-b785-0ab4e9415a51'
16
+ expect(tn.uuid_path).to eq [
17
+ '9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3',
18
+ 'bf4c91c0-3d1f-44c7-9d3b-249382182a26',
19
+ '8faa91f6-663f-4cfe-b785-0ab4e9415a51']
20
+
21
+ end
22
+
23
+
24
+ context 'not a gnub data' do
25
+ let(:file_name) { 'data.tar.gz' }
26
+ it 'should not be of GnubTaxon type' do
27
+ normalizer.normalize
28
+ tn = normalizer.normalized_data['leptogastrinae:tid:42']
29
+ expect(tn).to be_kind_of DarwinCore::TaxonNormalized
30
+ expect(tn).not_to be_kind_of DarwinCore::GnubTaxon
31
+ end
32
+ end
33
+
34
+ end