dwc-archive 0.9.6 → 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock DELETED
@@ -1,155 +0,0 @@
1
- GEM
2
- remote: https://rubygems.org/
3
- specs:
4
- abstract (1.0.0)
5
- actionpack (3.0.8)
6
- activemodel (= 3.0.8)
7
- activesupport (= 3.0.8)
8
- builder (~> 2.1.2)
9
- erubis (~> 2.6.6)
10
- i18n (~> 0.5.0)
11
- rack (~> 1.2.1)
12
- rack-mount (~> 0.6.14)
13
- rack-test (~> 0.5.7)
14
- tzinfo (~> 0.3.23)
15
- activemodel (3.0.8)
16
- activesupport (= 3.0.8)
17
- builder (~> 2.1.2)
18
- i18n (~> 0.5.0)
19
- activesupport (3.0.8)
20
- archive-tar-minitar (0.5.2)
21
- awesome_print (1.1.0)
22
- binding_of_caller (0.7.1)
23
- debug_inspector (>= 0.0.1)
24
- biodiversity (3.1.0)
25
- parallel
26
- parallel (~> 0.6)
27
- rake (~> 10.0)
28
- treetop
29
- treetop (~> 1.4)
30
- unicode_utils (~> 1.4)
31
- builder (2.1.2)
32
- coderay (1.0.9)
33
- columnize (0.3.6)
34
- coolline (0.4.2)
35
- cucumber (1.3.1)
36
- builder (>= 2.1.2)
37
- diff-lcs (>= 1.1.3)
38
- gherkin (~> 2.12.0)
39
- multi_json (~> 1.3)
40
- debug_inspector (0.0.2)
41
- debugger (1.5.0)
42
- columnize (>= 0.3.1)
43
- debugger-linecache (~> 1.2.0)
44
- debugger-ruby_core_source (~> 1.2.0)
45
- debugger-linecache (1.2.0)
46
- debugger-ruby_core_source (1.2.0)
47
- diff-lcs (1.2.4)
48
- diffy (2.1.4)
49
- erubis (2.6.6)
50
- abstract (>= 1.0.0)
51
- gherkin (2.12.0)
52
- multi_json (~> 1.3)
53
- git (1.2.5)
54
- grit (2.5.0)
55
- diff-lcs (~> 1.1)
56
- mime-types (~> 1.15)
57
- posix-spawn (~> 0.3.6)
58
- hirb (0.7.1)
59
- i18n (0.5.0)
60
- jazz_hands (0.5.0)
61
- awesome_print (~> 1.1.0)
62
- coderay (~> 1.0.9)
63
- coolline (>= 0.4.0)
64
- hirb (~> 0.7.1)
65
- pry (~> 0.9.12)
66
- pry-debugger (~> 0.2.2)
67
- pry-doc (~> 0.4.4)
68
- pry-git (~> 0.2.3)
69
- pry-rails (~> 0.2.2)
70
- pry-remote (>= 0.1.7)
71
- pry-stack_explorer (~> 0.4.9)
72
- railties (>= 3.0, < 5.0)
73
- jeweler (1.8.4)
74
- bundler (~> 1.0)
75
- git (>= 1.2.5)
76
- rake
77
- rdoc
78
- json (1.7.7)
79
- method_source (0.8.1)
80
- mime-types (1.23)
81
- multi_json (1.7.3)
82
- nokogiri (1.5.9)
83
- parallel (0.7.0)
84
- parsley-store (0.3.2)
85
- biodiversity (~> 3.1.0)
86
- jeweler (~> 1.8)
87
- redis (~> 3.0)
88
- polyglot (0.3.3)
89
- posix-spawn (0.3.6)
90
- pry (0.9.12.1)
91
- coderay (~> 1.0.5)
92
- method_source (~> 0.8)
93
- slop (~> 3.4)
94
- pry-debugger (0.2.2)
95
- debugger (~> 1.3)
96
- pry (~> 0.9.10)
97
- pry-doc (0.4.5)
98
- pry (>= 0.9)
99
- yard (>= 0.8)
100
- pry-git (0.2.3)
101
- diffy
102
- grit
103
- pry (>= 0.9.8)
104
- pry-rails (0.2.2)
105
- pry (>= 0.9.10)
106
- pry-remote (0.1.7)
107
- pry (~> 0.9)
108
- slop (~> 3.0)
109
- pry-stack_explorer (0.4.9)
110
- binding_of_caller (>= 0.7)
111
- pry (~> 0.9.11)
112
- rack (1.2.8)
113
- rack-mount (0.6.14)
114
- rack (>= 1.0.0)
115
- rack-test (0.5.7)
116
- rack (>= 1.0)
117
- railties (3.0.8)
118
- actionpack (= 3.0.8)
119
- activesupport (= 3.0.8)
120
- rake (>= 0.8.7)
121
- thor (~> 0.14.4)
122
- rake (10.0.4)
123
- rdoc (4.0.1)
124
- json (~> 1.4)
125
- redis (3.0.4)
126
- rspec (2.13.0)
127
- rspec-core (~> 2.13.0)
128
- rspec-expectations (~> 2.13.0)
129
- rspec-mocks (~> 2.13.0)
130
- rspec-core (2.13.1)
131
- rspec-expectations (2.13.0)
132
- diff-lcs (>= 1.1.3, < 2.0)
133
- rspec-mocks (2.13.1)
134
- slop (3.4.4)
135
- thor (0.14.6)
136
- treetop (1.4.14)
137
- polyglot
138
- polyglot (>= 0.3.1)
139
- tzinfo (0.3.37)
140
- unicode_utils (1.4.0)
141
- yard (0.8.6.1)
142
-
143
- PLATFORMS
144
- ruby
145
-
146
- DEPENDENCIES
147
- archive-tar-minitar (~> 0.5)
148
- bundler (~> 1.3)
149
- cucumber (~> 1.3)
150
- debugger (~> 1.3)
151
- jazz_hands (~> 0.5)
152
- jeweler (~> 1.8)
153
- nokogiri (~> 1.5)
154
- parsley-store (~> 0.3.2)
155
- rspec (~> 2.13)
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.9.6
Binary file
@@ -1,10 +0,0 @@
1
- UTF8RGX = /\A(
2
- [\x09\x0A\x0D\x20-\x7E] # ASCII
3
- | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
4
- | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
5
- | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
6
- | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
7
- | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
8
- | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
9
- | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
10
- )*\z/x
@@ -1,250 +0,0 @@
1
- # encoding: utf-8
2
- require File.expand_path(File.dirname(__FILE__) + "/../spec_helper")
3
-
4
- describe DarwinCore do
5
- before(:all) do
6
- @file_dir = File.join(File.dirname(__FILE__), '..', 'files')
7
- end
8
-
9
- describe "VERSION" do
10
- it "should return VERSION number" do
11
- DarwinCore::VERSION.split('.').join('').to_i.should > 41
12
- end
13
- end
14
-
15
- describe "::nil_field?" do
16
- it "should return true for entries which normally mean nil" do
17
- [nil, '/N', ''].each do |i|
18
- DarwinCore.nil_field?(i).should be_true
19
- end
20
- end
21
-
22
- it "should return false for fields that are not nil" do
23
- [0, '0', '123', 123, 'dsdfs434343/N'].each do |i|
24
- DarwinCore.nil_field?(i).should be_false
25
- end
26
- end
27
- end
28
-
29
- describe ".new" do
30
- it "should create DarwinCore instance out of archive file" do
31
- ['data.zip', 'data.tar.gz', 'minimal.tar.gz', 'junk_dir_inside.zip'].each do |file|
32
- file = File.join(@file_dir, file)
33
- dwc = DarwinCore.new(file)
34
- dwc.archive.valid?.should be_true
35
- end
36
- end
37
-
38
- it "should raise an error if archive file does not exist" do
39
- file = 'not_a_file'
40
- lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::FileNotFoundError)
41
- end
42
-
43
- it "should raise an error if archive is broken" do
44
- file = File.join(@file_dir, 'broken.tar.gz')
45
- lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::UnpackingError)
46
- end
47
-
48
- it "should raise an error if archive is invalid" do
49
- file = File.join(@file_dir, 'invalid.tar.gz')
50
- lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::InvalidArchiveError)
51
- end
52
-
53
- it "should raise an error if archive is not in utf-8" do
54
- file = File.join(@file_dir, 'latin1.tar.gz')
55
- lambda { DarwinCore.new(file) }.should raise_error(DarwinCore::EncodingError)
56
- end
57
-
58
- it "should work with files that have non-alfanumeric characters and spaces" do
59
- file = File.join(@file_dir, 'file with characters(3).gz')
60
- dwc = DarwinCore.new(file)
61
- dwc.archive.valid?.should be_true
62
- end
63
- end
64
-
65
- describe ".normalize_classification" do
66
- it "should return flat list if file has no parent id information" do
67
- file = File.join(@file_dir, 'flat_list.tar.gz')
68
- dwc = DarwinCore.new(file)
69
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
70
- cn.normalize
71
- cn.normalized_data.should_not be_nil
72
- cn.normalized_data.size.should > 0
73
- end
74
-
75
- it "should return array or hash of name_strings back" do
76
- file = File.join(@file_dir, 'data.tar.gz')
77
- dwc = DarwinCore.new(file)
78
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
79
- cn.normalize
80
- name_strings = cn.name_strings
81
- name_strings.is_a?(Array).should be_true
82
- name_strings.size.should > 1
83
- name_strings = cn.name_strings(with_hash: true)
84
- name_strings.size.should > 1
85
- name_strings.is_a?(Hash).should be_true
86
- name_strings.is_a?(Hash).should be_true
87
- name_strings.values.uniq.should == [1]
88
- vernacular_name_strings = cn.vernacular_name_strings
89
- vernacular_name_strings.is_a?(Array).should be_true
90
- vernacular_name_strings.size.should > 0
91
- vernacular_name_strings = cn.vernacular_name_strings(with_hash: true)
92
- vernacular_name_strings.size.should > 0
93
- vernacular_name_strings.is_a?(Hash).should be_true
94
- vernacular_name_strings.values.uniq.should == [1]
95
- end
96
-
97
- it "should traverse DarwinCore files and assemble data for every node in memory" do
98
- file = File.join(@file_dir, 'data.tar.gz')
99
- dwc = DarwinCore.new(file)
100
- norm = dwc.normalize_classification
101
- norm.class.should == Hash
102
- path_encodings = []
103
- norm.each do |taxon_id, taxon|
104
- taxon.classification_path.each {|p| path_encodings << p.encoding}
105
- end
106
- path_encodings.uniq!
107
- path_encodings.size.should == 1
108
- path_encodings[0].to_s.should == "UTF-8"
109
- norm['leptogastrinae:tid:2857'].class.should == DarwinCore::TaxonNormalized
110
- norm['leptogastrinae:tid:2857'].source.should == 'http://leptogastrinae.lifedesks.org/pages/2857'
111
- end
112
-
113
- it "should assemble synonyms from core" do
114
- file = File.join(@file_dir, 'data.tar.gz')
115
- dwc = DarwinCore.new(file)
116
- norm = dwc.normalize_classification
117
- syn = norm.values.select {|n| n.synonyms.size > 0}[0].synonyms[0]
118
- syn.id.should == 'leptogastrinae:tid:127'
119
- syn.name.should == "Leptogastridae"
120
- syn.source.should == 'http://leptogastrinae.lifedesks.org/pages/127'
121
- end
122
-
123
- it "should be able to assemble vernacular names from an extension" do
124
- file = File.join(@file_dir, 'data.tar.gz')
125
- dwc = DarwinCore.new(file)
126
- norm = dwc.normalize_classification
127
- norm.select { |k,v| !v.vernacular_names.empty? }.map { |k,v| v.vernacular_names }.size.should > 0
128
- end
129
-
130
- it "should be able to assemble synonyms from extension" do
131
- file = File.join(@file_dir, 'synonyms_in_extension.tar.gz')
132
- dwc = DarwinCore.new(file)
133
- norm = dwc.normalize_classification
134
- norm.select { |k,v| !v.synonyms.empty? }.map { |k,v| v.synonyms }.size.should > 0
135
- end
136
-
137
- it "should not assemble synonyms from extension with scientificName, and file name not matching 'synonym'" do
138
- file = File.join(@file_dir, 'not_synonym_in_extension.tar.gz')
139
- dwc = DarwinCore.new(file)
140
- norm = dwc.normalize_classification
141
- norm.select { |k,v| !v.synonyms.empty? }.map { |k,v| v.synonyms }.size.should == 0
142
- end
143
-
144
- it "should not attempt to assemble extensions with with_extensions opts set to false" do
145
- file = File.join(@file_dir, 'data.tar.gz')
146
- dwc = DarwinCore.new(file)
147
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
148
- norm = cn.normalize(:with_extensions => false)
149
- norm.select { |k,v| !v.vernacular_names.empty? }.size.should == 0
150
- norm = cn.normalize()
151
- norm.select { |k,v| !v.vernacular_names.empty? }.size.should > 0
152
- file = File.join(@file_dir, 'synonyms_in_extension.tar.gz')
153
- dwc = DarwinCore.new(file)
154
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
155
- norm = cn.normalize(:with_extensions => false)
156
- norm.select { |k,v| !v.synonyms.empty? }.size.should == 0
157
- norm = cn.normalize()
158
- norm.select { |k,v| !v.synonyms.empty? }.size.should > 0
159
- end
160
-
161
- it "should assemble linnean classification if terms for it exists" do
162
- file = File.join(@file_dir, 'linnean.tar.gz')
163
- dwc = DarwinCore.new(file)
164
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
165
- norm = cn.normalize
166
- cn.normalized_data.first.last.linnean_classification_path.should == [["Animalia", :kingdom], ["Arthropoda", :phylum], ["Insecta", :class], ["Diptera", :order], ["Cecidomyiidae", :family], ["Resseliella", :genus]]
167
- end
168
-
169
- it "should keep linnean classification empty if terms are not there" do
170
- file = File.join(@file_dir, 'data.tar.gz')
171
- dwc = DarwinCore.new(file)
172
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
173
- norm = cn.normalize
174
- cn.normalized_data.first.last.linnean_classification_path.should == []
175
- end
176
-
177
- it "should be able to assemble synonyms from core" do
178
- file = File.join(@file_dir, 'synonyms_in_core_accepted_name_field.tar.gz')
179
- dwc = DarwinCore.new(file)
180
- norm = dwc.normalize_classification
181
- norm.select { |k,v| !v.synonyms.empty? }.map { |k,v| v.synonyms }.size.should > 0
182
- end
183
-
184
- it "should be able to assemble synonyms from extension" do
185
- file = File.join(@file_dir, 'data.tar.gz')
186
- dwc = DarwinCore.new(file)
187
- norm = dwc.normalize_classification
188
- nodes_with_syn = norm.select { |k,v| !v.synonyms.empty? }
189
- nodes_with_syn.map { |k,v| v.synonyms }.size.should > 0
190
- nodes_with_syn.first[1].synonyms.first.status.should == 'synonym'
191
- end
192
-
193
- it "should be able work with files which have scientificNameAuthorship" do
194
- file = File.join(@file_dir, 'sci_name_authorship.tar.gz')
195
- dwc = DarwinCore.new(file)
196
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
197
- norm = cn.normalize
198
- path_encodings = norm.map {|taxon_id, taxon| taxon.classification_path}.flatten.map { |name| name.encoding.to_s }.uniq
199
- path_encodings.size.should == 1
200
- path_encodings[0].should == "UTF-8"
201
- taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
202
- taxa.size.should == 507
203
- syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
204
- syn.size.should == 50
205
- end
206
-
207
- it "should be able work with files which repeat scientificNameAuthorship value in scientificName field" do
208
- file = File.join(@file_dir, 'sci_name_authorship_dup.tar.gz')
209
- dwc = DarwinCore.new(file)
210
- norm = dwc.normalize_classification
211
- taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
212
- taxa.size.should == 507
213
- syn = norm.select{|k,v| v.synonyms.size > 0}.map {|k,v| v.synonyms}.flatten.select {|s| s.name.split(" ").size > s.canonical_name.split(" ").size}
214
- syn.size.should == 50
215
- end
216
-
217
- it "should be able open files where coreid is empty" do
218
- file = File.join(@file_dir, 'empty_coreid.tar.gz')
219
- dwc = DarwinCore.new(file)
220
- norm = dwc.normalize_classification
221
- taxa = norm.select{|k,v| v.current_name_canonical.match " "}.select{|k,v| [v.current_name.split(" ").size > v.current_name_canonical.split(" ").size]}
222
- taxa.size.should == 2
223
- end
224
-
225
- it "should be able to get language and locality fields for vernacular names" do
226
- file = File.join(@file_dir, 'language_locality.tar.gz')
227
- dwc = DarwinCore.new(file)
228
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
229
- cn.normalize
230
- vn = cn.normalized_data['leptogastrinae:tid:42'].vernacular_names.first
231
- vn.language.should == 'en'
232
- vn.locality.should == 'New England'
233
- end
234
-
235
- it 'should be able to get uuids from gnub dataset' do
236
- file = File.join(@file_dir, 'gnub.tar.gz')
237
- dwc = DarwinCore.new(file)
238
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
239
- cn.normalize
240
- vn = cn.normalized_data['9c399f90-cfb8-5a7f-9a21-18285a473488']
241
- vn.class.should == DarwinCore::GnubTaxon
242
- vn.uuid.should == '8faa91f6-663f-4cfe-b785-0ab4e9415a51'
243
- vn.uuid_path.should == [
244
- "9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3",
245
- "bf4c91c0-3d1f-44c7-9d3b-249382182a26",
246
- "8faa91f6-663f-4cfe-b785-0ab4e9415a51"]
247
- end
248
- end
249
-
250
- end
data/spec/spec.opts DELETED
@@ -1 +0,0 @@
1
- --color