dwc-archive 0.5.12 → 0.5.13
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +4 -6
- data/VERSION +1 -1
- data/features/dwca-reader.feature +7 -6
- data/features/step_definitions/dwc-reader_steps.rb +6 -2
- data/features/support/env.rb +1 -1
- data/lib/dwc-archive.rb +11 -6
- data/lib/dwc-archive/ingester.rb +5 -5
- data/spec/spec_helper.rb +4 -4
- metadata +11 -6
- data/.gitignore +0 -24
data/Rakefile
CHANGED
@@ -21,14 +21,12 @@ rescue LoadError
|
|
21
21
|
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
22
22
|
end
|
23
23
|
|
24
|
-
require '
|
25
|
-
|
26
|
-
spec.
|
27
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
24
|
+
require 'rspec/core/rake_task'
|
25
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
26
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
28
27
|
end
|
29
28
|
|
30
|
-
|
31
|
-
spec.libs << 'lib' << 'spec'
|
29
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
32
30
|
spec.pattern = 'spec/**/*_spec.rb'
|
33
31
|
spec.rcov = true
|
34
32
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.13
|
@@ -6,8 +6,8 @@ Feature: Reading of a Darwing Core Archive
|
|
6
6
|
Scenario: Creating Darwin Core Archive object
|
7
7
|
Given path to a dwc file "data.tar.gz"
|
8
8
|
When I create a new DarwinCore::Archive instance
|
9
|
-
Then I should find that the archive is valid
|
10
|
-
|
9
|
+
Then I should find that the archive is valid
|
10
|
+
And I should see what files the archive has
|
11
11
|
|
12
12
|
When I delete expanded files
|
13
13
|
Then they should disappear
|
@@ -19,9 +19,10 @@ Feature: Reading of a Darwing Core Archive
|
|
19
19
|
|
20
20
|
Scenario: Instantiating DarwinCore with tar.gz file
|
21
21
|
Given path to a dwc file "data.tar.gz"
|
22
|
-
When I create a new DarwinCore instance
|
22
|
+
When I create a new DarwinCore instance
|
23
23
|
Then instance should have a valid archive
|
24
24
|
And instance should have a core
|
25
|
+
And I should see checksum
|
25
26
|
When I check core data
|
26
27
|
Then I should find core.properties
|
27
28
|
And core.file_path
|
@@ -30,13 +31,13 @@ Feature: Reading of a Darwing Core Archive
|
|
30
31
|
And core.size
|
31
32
|
Then DarwinCore instance should have an extensions array
|
32
33
|
And every extension in array should be an instance of DarwinCore::Extension
|
33
|
-
And extension should have properties, data, file_path, coreid, fields
|
34
|
+
And extension should have properties, data, file_path, coreid, fields
|
34
35
|
Then DarwinCore instance should have dwc.metadata object
|
35
36
|
And I should find id, title, creators, metadata provider
|
36
37
|
|
37
38
|
Scenario: Instantiating DawinCore with zip file
|
38
39
|
Given path to a dwc file "data.zip"
|
39
|
-
When I create a new DarwinCore instance
|
40
|
+
When I create a new DarwinCore instance
|
40
41
|
Then instance should have a valid archive
|
41
42
|
|
42
43
|
Scenario: Cleaning temporary directory from expanded archives
|
@@ -49,7 +50,7 @@ Feature: Reading of a Darwing Core Archive
|
|
49
50
|
When I create a new DarwinCore instance
|
50
51
|
Then I can read its content into memory
|
51
52
|
Then I can read extensions content into memory
|
52
|
-
|
53
|
+
|
53
54
|
Scenario: Importing data using block
|
54
55
|
Given path to a dwc file "data.tar.gz"
|
55
56
|
When I create a new DarwinCore instance
|
@@ -42,6 +42,10 @@ Then /^instance should have a core$/ do
|
|
42
42
|
@dwc.core.class.should == DarwinCore::Core
|
43
43
|
end
|
44
44
|
|
45
|
+
Then /^I should see checksum$/ do
|
46
|
+
@dwc.checksum.should == '880775bd100f7b00c49ceefd2d7317daada99b26'
|
47
|
+
end
|
48
|
+
|
45
49
|
When /^I check core data$/ do
|
46
50
|
@core = @dwc.core
|
47
51
|
end
|
@@ -198,7 +202,7 @@ Then /^there are paths, synonyms and vernacular names in normalized classificati
|
|
198
202
|
end
|
199
203
|
|
200
204
|
Then /^names used in classification can be accessed by "([^"]*)" method$/ do |name_strings|
|
201
|
-
names = @cn.send(name_strings.to_sym)
|
205
|
+
names = @cn.send(name_strings.to_sym)
|
202
206
|
names.size.should > @normalized_classification.size
|
203
207
|
end
|
204
208
|
|
@@ -216,7 +220,7 @@ Then /^nodes_ids organized in trees can be accessed by "([^"]*)" method$/ do |tr
|
|
216
220
|
tree = @cn.send(tree.to_sym)
|
217
221
|
tree.class.should == Hash
|
218
222
|
keys = []
|
219
|
-
flatten_tree(tree, keys)
|
223
|
+
flatten_tree(tree, keys)
|
220
224
|
@normalized_classification.size.should == keys.size
|
221
225
|
end
|
222
226
|
|
data/features/support/env.rb
CHANGED
data/lib/dwc-archive.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
$:.unshift(File.dirname(__FILE__)) unless
|
3
|
-
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
4
4
|
R19 = RUBY_VERSION.split('.')[0..1].join('').to_i > 18
|
5
5
|
require 'fileutils'
|
6
6
|
require 'ostruct'
|
@@ -30,19 +30,19 @@ require 'dwc-archive/generator_eml_xml'
|
|
30
30
|
require 'dwc-archive/classification_normalizer'
|
31
31
|
|
32
32
|
class DarwinCore
|
33
|
-
|
33
|
+
|
34
34
|
VERSION = open(File.join(File.dirname(__FILE__), '..', 'VERSION')).readline.strip
|
35
35
|
|
36
36
|
attr_reader :archive, :core, :metadata, :extensions, :classification_normalizer
|
37
37
|
alias :eml :metadata
|
38
|
-
|
38
|
+
|
39
39
|
DEFAULT_TMP_DIR = "/tmp"
|
40
|
-
|
40
|
+
|
41
41
|
def self.nil_field?(field)
|
42
42
|
return true if [nil, '', '/N'].include?(field)
|
43
43
|
false
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
def self.clean_all(tmp_dir = DEFAULT_TMP_DIR)
|
47
47
|
Dir.entries(tmp_dir).each do |entry|
|
48
48
|
path = File.join(tmp_dir, entry)
|
@@ -69,7 +69,8 @@ class DarwinCore
|
|
69
69
|
end
|
70
70
|
|
71
71
|
def initialize(dwc_path, tmp_dir = DEFAULT_TMP_DIR)
|
72
|
-
@
|
72
|
+
@dwc_path = dwc_path
|
73
|
+
@archive = DarwinCore::Archive.new(@dwc_path, tmp_dir)
|
73
74
|
@core = DarwinCore::Core.new(self)
|
74
75
|
@metadata = DarwinCore::Metadata.new(@archive)
|
75
76
|
@extensions = get_extensions
|
@@ -86,6 +87,10 @@ class DarwinCore
|
|
86
87
|
!!@core.fields.join('|').downcase.match(/highertaxonid|parentnameusageid/)
|
87
88
|
end
|
88
89
|
|
90
|
+
def checksum
|
91
|
+
Digest::SHA1.hexdigest(open(@dwc_path).read)
|
92
|
+
end
|
93
|
+
|
89
94
|
private
|
90
95
|
def get_extensions
|
91
96
|
res = []
|
data/lib/dwc-archive/ingester.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class DarwinCore
|
1
|
+
class DarwinCore
|
2
2
|
module Ingester
|
3
3
|
attr_reader :data, :properties, :encoding, :fields_separator, :size
|
4
4
|
attr_reader :file_path, :fields, :line_separator, :quote_character, :ignore_headers
|
@@ -20,7 +20,7 @@ class DarwinCore
|
|
20
20
|
min_size > r.size ? errors << r : process_csv_row(res, errors, r)
|
21
21
|
if (i + index_fix) % batch_size == 0
|
22
22
|
DarwinCore.logger_write(@dwc.object_id, "Ingested %s records from %s" % [(i + index_fix), name])
|
23
|
-
if block_given?
|
23
|
+
if block_given?
|
24
24
|
yield [res, errors]
|
25
25
|
res = []
|
26
26
|
errors = []
|
@@ -30,7 +30,7 @@ class DarwinCore
|
|
30
30
|
yield [res, errors] if block_given?
|
31
31
|
[res, errors]
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
private
|
35
35
|
def name
|
36
36
|
self.class.to_s.split('::')[-1].downcase
|
@@ -60,7 +60,7 @@ class DarwinCore
|
|
60
60
|
@fields = get_fields
|
61
61
|
raise exception("No data fields are found") if @fields.empty?
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
def get_file_path
|
65
65
|
file = @data[:location] || @data[:attributes][:location] || @data[:files][:location]
|
66
66
|
File.join(@path, file)
|
@@ -78,7 +78,7 @@ class DarwinCore
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def get_size
|
81
|
-
`wc -l #{@file_path}`.match(
|
81
|
+
`wc -l #{@file_path}`.match(/^\s*([\d]+)\s/)[1].to_i
|
82
82
|
end
|
83
83
|
end
|
84
84
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,10 +2,10 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
2
2
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
3
|
require 'rubygems'
|
4
4
|
require 'dwc-archive'
|
5
|
-
require '
|
6
|
-
require '
|
5
|
+
require 'rspec'
|
6
|
+
require 'rspec/autorun'
|
7
7
|
require 'ruby-debug'
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
RSpec.configure do |config|
|
10
|
+
|
11
11
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 17
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 5
|
8
|
-
-
|
9
|
-
version: 0.5.
|
9
|
+
- 13
|
10
|
+
version: 0.5.13
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Dmitry Mozzherin
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2011-
|
18
|
+
date: 2011-04-07 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
@@ -25,6 +26,7 @@ dependencies:
|
|
25
26
|
requirements:
|
26
27
|
- - ">="
|
27
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 23
|
28
30
|
segments:
|
29
31
|
- 0
|
30
32
|
- 2
|
@@ -40,6 +42,7 @@ dependencies:
|
|
40
42
|
requirements:
|
41
43
|
- - ">="
|
42
44
|
- !ruby/object:Gem::Version
|
45
|
+
hash: 13
|
43
46
|
segments:
|
44
47
|
- 1
|
45
48
|
- 2
|
@@ -55,6 +58,7 @@ dependencies:
|
|
55
58
|
requirements:
|
56
59
|
- - ">="
|
57
60
|
- !ruby/object:Gem::Version
|
61
|
+
hash: 3
|
58
62
|
segments:
|
59
63
|
- 0
|
60
64
|
version: "0"
|
@@ -71,7 +75,6 @@ extra_rdoc_files:
|
|
71
75
|
- README.rdoc
|
72
76
|
files:
|
73
77
|
- .document
|
74
|
-
- .gitignore
|
75
78
|
- LICENSE
|
76
79
|
- README.rdoc
|
77
80
|
- Rakefile
|
@@ -119,8 +122,8 @@ homepage: http://github.com/GlobalNamesArchitecture/dwc-archive
|
|
119
122
|
licenses: []
|
120
123
|
|
121
124
|
post_install_message:
|
122
|
-
rdoc_options:
|
123
|
-
|
125
|
+
rdoc_options: []
|
126
|
+
|
124
127
|
require_paths:
|
125
128
|
- lib
|
126
129
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -128,6 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
128
131
|
requirements:
|
129
132
|
- - ">="
|
130
133
|
- !ruby/object:Gem::Version
|
134
|
+
hash: 3
|
131
135
|
segments:
|
132
136
|
- 0
|
133
137
|
version: "0"
|
@@ -136,6 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
136
140
|
requirements:
|
137
141
|
- - ">="
|
138
142
|
- !ruby/object:Gem::Version
|
143
|
+
hash: 3
|
139
144
|
segments:
|
140
145
|
- 0
|
141
146
|
version: "0"
|
data/.gitignore
DELETED