dwc-archive 0.5.12 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +4 -6
- data/VERSION +1 -1
- data/features/dwca-reader.feature +7 -6
- data/features/step_definitions/dwc-reader_steps.rb +6 -2
- data/features/support/env.rb +1 -1
- data/lib/dwc-archive.rb +11 -6
- data/lib/dwc-archive/ingester.rb +5 -5
- data/spec/spec_helper.rb +4 -4
- metadata +11 -6
- data/.gitignore +0 -24
data/Rakefile
CHANGED
@@ -21,14 +21,12 @@ rescue LoadError
|
|
21
21
|
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
22
22
|
end
|
23
23
|
|
24
|
-
require '
|
25
|
-
|
26
|
-
spec.
|
27
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
24
|
+
require 'rspec/core/rake_task'
|
25
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
26
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
28
27
|
end
|
29
28
|
|
30
|
-
|
31
|
-
spec.libs << 'lib' << 'spec'
|
29
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
32
30
|
spec.pattern = 'spec/**/*_spec.rb'
|
33
31
|
spec.rcov = true
|
34
32
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.13
|
@@ -6,8 +6,8 @@ Feature: Reading of a Darwing Core Archive
|
|
6
6
|
Scenario: Creating Darwin Core Archive object
|
7
7
|
Given path to a dwc file "data.tar.gz"
|
8
8
|
When I create a new DarwinCore::Archive instance
|
9
|
-
Then I should find that the archive is valid
|
10
|
-
|
9
|
+
Then I should find that the archive is valid
|
10
|
+
And I should see what files the archive has
|
11
11
|
|
12
12
|
When I delete expanded files
|
13
13
|
Then they should disappear
|
@@ -19,9 +19,10 @@ Feature: Reading of a Darwing Core Archive
|
|
19
19
|
|
20
20
|
Scenario: Instantiating DarwinCore with tar.gz file
|
21
21
|
Given path to a dwc file "data.tar.gz"
|
22
|
-
When I create a new DarwinCore instance
|
22
|
+
When I create a new DarwinCore instance
|
23
23
|
Then instance should have a valid archive
|
24
24
|
And instance should have a core
|
25
|
+
And I should see checksum
|
25
26
|
When I check core data
|
26
27
|
Then I should find core.properties
|
27
28
|
And core.file_path
|
@@ -30,13 +31,13 @@ Feature: Reading of a Darwing Core Archive
|
|
30
31
|
And core.size
|
31
32
|
Then DarwinCore instance should have an extensions array
|
32
33
|
And every extension in array should be an instance of DarwinCore::Extension
|
33
|
-
And extension should have properties, data, file_path, coreid, fields
|
34
|
+
And extension should have properties, data, file_path, coreid, fields
|
34
35
|
Then DarwinCore instance should have dwc.metadata object
|
35
36
|
And I should find id, title, creators, metadata provider
|
36
37
|
|
37
38
|
Scenario: Instantiating DawinCore with zip file
|
38
39
|
Given path to a dwc file "data.zip"
|
39
|
-
When I create a new DarwinCore instance
|
40
|
+
When I create a new DarwinCore instance
|
40
41
|
Then instance should have a valid archive
|
41
42
|
|
42
43
|
Scenario: Cleaning temporary directory from expanded archives
|
@@ -49,7 +50,7 @@ Feature: Reading of a Darwing Core Archive
|
|
49
50
|
When I create a new DarwinCore instance
|
50
51
|
Then I can read its content into memory
|
51
52
|
Then I can read extensions content into memory
|
52
|
-
|
53
|
+
|
53
54
|
Scenario: Importing data using block
|
54
55
|
Given path to a dwc file "data.tar.gz"
|
55
56
|
When I create a new DarwinCore instance
|
@@ -42,6 +42,10 @@ Then /^instance should have a core$/ do
|
|
42
42
|
@dwc.core.class.should == DarwinCore::Core
|
43
43
|
end
|
44
44
|
|
45
|
+
Then /^I should see checksum$/ do
|
46
|
+
@dwc.checksum.should == '880775bd100f7b00c49ceefd2d7317daada99b26'
|
47
|
+
end
|
48
|
+
|
45
49
|
When /^I check core data$/ do
|
46
50
|
@core = @dwc.core
|
47
51
|
end
|
@@ -198,7 +202,7 @@ Then /^there are paths, synonyms and vernacular names in normalized classificati
|
|
198
202
|
end
|
199
203
|
|
200
204
|
Then /^names used in classification can be accessed by "([^"]*)" method$/ do |name_strings|
|
201
|
-
names = @cn.send(name_strings.to_sym)
|
205
|
+
names = @cn.send(name_strings.to_sym)
|
202
206
|
names.size.should > @normalized_classification.size
|
203
207
|
end
|
204
208
|
|
@@ -216,7 +220,7 @@ Then /^nodes_ids organized in trees can be accessed by "([^"]*)" method$/ do |tr
|
|
216
220
|
tree = @cn.send(tree.to_sym)
|
217
221
|
tree.class.should == Hash
|
218
222
|
keys = []
|
219
|
-
flatten_tree(tree, keys)
|
223
|
+
flatten_tree(tree, keys)
|
220
224
|
@normalized_classification.size.should == keys.size
|
221
225
|
end
|
222
226
|
|
data/features/support/env.rb
CHANGED
data/lib/dwc-archive.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
$:.unshift(File.dirname(__FILE__)) unless
|
3
|
-
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
4
4
|
R19 = RUBY_VERSION.split('.')[0..1].join('').to_i > 18
|
5
5
|
require 'fileutils'
|
6
6
|
require 'ostruct'
|
@@ -30,19 +30,19 @@ require 'dwc-archive/generator_eml_xml'
|
|
30
30
|
require 'dwc-archive/classification_normalizer'
|
31
31
|
|
32
32
|
class DarwinCore
|
33
|
-
|
33
|
+
|
34
34
|
VERSION = open(File.join(File.dirname(__FILE__), '..', 'VERSION')).readline.strip
|
35
35
|
|
36
36
|
attr_reader :archive, :core, :metadata, :extensions, :classification_normalizer
|
37
37
|
alias :eml :metadata
|
38
|
-
|
38
|
+
|
39
39
|
DEFAULT_TMP_DIR = "/tmp"
|
40
|
-
|
40
|
+
|
41
41
|
def self.nil_field?(field)
|
42
42
|
return true if [nil, '', '/N'].include?(field)
|
43
43
|
false
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
46
|
def self.clean_all(tmp_dir = DEFAULT_TMP_DIR)
|
47
47
|
Dir.entries(tmp_dir).each do |entry|
|
48
48
|
path = File.join(tmp_dir, entry)
|
@@ -69,7 +69,8 @@ class DarwinCore
|
|
69
69
|
end
|
70
70
|
|
71
71
|
def initialize(dwc_path, tmp_dir = DEFAULT_TMP_DIR)
|
72
|
-
@
|
72
|
+
@dwc_path = dwc_path
|
73
|
+
@archive = DarwinCore::Archive.new(@dwc_path, tmp_dir)
|
73
74
|
@core = DarwinCore::Core.new(self)
|
74
75
|
@metadata = DarwinCore::Metadata.new(@archive)
|
75
76
|
@extensions = get_extensions
|
@@ -86,6 +87,10 @@ class DarwinCore
|
|
86
87
|
!!@core.fields.join('|').downcase.match(/highertaxonid|parentnameusageid/)
|
87
88
|
end
|
88
89
|
|
90
|
+
def checksum
|
91
|
+
Digest::SHA1.hexdigest(open(@dwc_path).read)
|
92
|
+
end
|
93
|
+
|
89
94
|
private
|
90
95
|
def get_extensions
|
91
96
|
res = []
|
data/lib/dwc-archive/ingester.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class DarwinCore
|
1
|
+
class DarwinCore
|
2
2
|
module Ingester
|
3
3
|
attr_reader :data, :properties, :encoding, :fields_separator, :size
|
4
4
|
attr_reader :file_path, :fields, :line_separator, :quote_character, :ignore_headers
|
@@ -20,7 +20,7 @@ class DarwinCore
|
|
20
20
|
min_size > r.size ? errors << r : process_csv_row(res, errors, r)
|
21
21
|
if (i + index_fix) % batch_size == 0
|
22
22
|
DarwinCore.logger_write(@dwc.object_id, "Ingested %s records from %s" % [(i + index_fix), name])
|
23
|
-
if block_given?
|
23
|
+
if block_given?
|
24
24
|
yield [res, errors]
|
25
25
|
res = []
|
26
26
|
errors = []
|
@@ -30,7 +30,7 @@ class DarwinCore
|
|
30
30
|
yield [res, errors] if block_given?
|
31
31
|
[res, errors]
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
private
|
35
35
|
def name
|
36
36
|
self.class.to_s.split('::')[-1].downcase
|
@@ -60,7 +60,7 @@ class DarwinCore
|
|
60
60
|
@fields = get_fields
|
61
61
|
raise exception("No data fields are found") if @fields.empty?
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
def get_file_path
|
65
65
|
file = @data[:location] || @data[:attributes][:location] || @data[:files][:location]
|
66
66
|
File.join(@path, file)
|
@@ -78,7 +78,7 @@ class DarwinCore
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def get_size
|
81
|
-
`wc -l #{@file_path}`.match(
|
81
|
+
`wc -l #{@file_path}`.match(/^\s*([\d]+)\s/)[1].to_i
|
82
82
|
end
|
83
83
|
end
|
84
84
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -2,10 +2,10 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
2
2
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
3
|
require 'rubygems'
|
4
4
|
require 'dwc-archive'
|
5
|
-
require '
|
6
|
-
require '
|
5
|
+
require 'rspec'
|
6
|
+
require 'rspec/autorun'
|
7
7
|
require 'ruby-debug'
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
RSpec.configure do |config|
|
10
|
+
|
11
11
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 17
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 5
|
8
|
-
-
|
9
|
-
version: 0.5.
|
9
|
+
- 13
|
10
|
+
version: 0.5.13
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Dmitry Mozzherin
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2011-
|
18
|
+
date: 2011-04-07 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
@@ -25,6 +26,7 @@ dependencies:
|
|
25
26
|
requirements:
|
26
27
|
- - ">="
|
27
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 23
|
28
30
|
segments:
|
29
31
|
- 0
|
30
32
|
- 2
|
@@ -40,6 +42,7 @@ dependencies:
|
|
40
42
|
requirements:
|
41
43
|
- - ">="
|
42
44
|
- !ruby/object:Gem::Version
|
45
|
+
hash: 13
|
43
46
|
segments:
|
44
47
|
- 1
|
45
48
|
- 2
|
@@ -55,6 +58,7 @@ dependencies:
|
|
55
58
|
requirements:
|
56
59
|
- - ">="
|
57
60
|
- !ruby/object:Gem::Version
|
61
|
+
hash: 3
|
58
62
|
segments:
|
59
63
|
- 0
|
60
64
|
version: "0"
|
@@ -71,7 +75,6 @@ extra_rdoc_files:
|
|
71
75
|
- README.rdoc
|
72
76
|
files:
|
73
77
|
- .document
|
74
|
-
- .gitignore
|
75
78
|
- LICENSE
|
76
79
|
- README.rdoc
|
77
80
|
- Rakefile
|
@@ -119,8 +122,8 @@ homepage: http://github.com/GlobalNamesArchitecture/dwc-archive
|
|
119
122
|
licenses: []
|
120
123
|
|
121
124
|
post_install_message:
|
122
|
-
rdoc_options:
|
123
|
-
|
125
|
+
rdoc_options: []
|
126
|
+
|
124
127
|
require_paths:
|
125
128
|
- lib
|
126
129
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -128,6 +131,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
128
131
|
requirements:
|
129
132
|
- - ">="
|
130
133
|
- !ruby/object:Gem::Version
|
134
|
+
hash: 3
|
131
135
|
segments:
|
132
136
|
- 0
|
133
137
|
version: "0"
|
@@ -136,6 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
136
140
|
requirements:
|
137
141
|
- - ">="
|
138
142
|
- !ruby/object:Gem::Version
|
143
|
+
hash: 3
|
139
144
|
segments:
|
140
145
|
- 0
|
141
146
|
version: "0"
|
data/.gitignore
DELETED