dwc-archive 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/features/step_definitions/dwc-archive_steps.rb +2 -2
- data/lib/dwc-archive/core.rb +2 -9
- data/lib/dwc-archive/errors.rb +1 -0
- data/lib/dwc-archive/extension.rb +3 -9
- data/lib/dwc-archive/ingester.rb +27 -9
- data/spec/files/data.tar.gz +0 -0
- metadata +22 -8
data/Rakefile
CHANGED
@@ -10,6 +10,7 @@ begin
|
|
10
10
|
gem.email = "dmozzherin at gmail dot com"
|
11
11
|
gem.homepage = "http://github.com/dimus/dwc-archive"
|
12
12
|
gem.authors = ["Dmitry Mozzherin"]
|
13
|
+
gem.add_dependency "fastercsv" if RUBY_VERSION.match /^1.8/
|
13
14
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
15
|
gem.add_development_dependency "cucumber", ">= 0"
|
15
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
@@ -128,7 +128,7 @@ Then /^I can read its content into memory$/ do
|
|
128
128
|
core_data, core_errors = @dwc.core.read
|
129
129
|
core_data.class.should == Array
|
130
130
|
core_data.size.should == 584
|
131
|
-
core_errors.size.should ==
|
131
|
+
core_errors.size.should == 3
|
132
132
|
end
|
133
133
|
|
134
134
|
Then /^I can read extensions content into memory$/ do
|
@@ -147,7 +147,7 @@ Then /^I can read its core content using block$/ do
|
|
147
147
|
res << [r.size, err.size]
|
148
148
|
end
|
149
149
|
res << [tail_data.size, tail_errors.size]
|
150
|
-
res.should == [[
|
150
|
+
res.should == [[198,2],[200,0],[186,1]]
|
151
151
|
end
|
152
152
|
|
153
153
|
Then /^I can read extensions content using block$/ do
|
data/lib/dwc-archive/core.rb
CHANGED
@@ -8,16 +8,9 @@ class DarwinCore
|
|
8
8
|
root_key = @archive.meta.keys[0]
|
9
9
|
@data = @archive.meta[root_key][:core]
|
10
10
|
raise CoreFileError("Cannot found core in meta.xml, is meta.xml valid?") unless @data
|
11
|
-
@properties = @data[:attributes]
|
12
|
-
@encoding = @properties[:encoding] || 'UTF-8'
|
13
|
-
raise CoreFileError("No support for encodings other than utf-8 or utf-16 at the moment") unless ["utf-8", "utf8", "utf-16", "utf16"].include? @encoding.downcase
|
14
11
|
@id = @data[:id][:attributes]
|
15
|
-
|
16
|
-
|
17
|
-
@line_separator = @properties[:linesTerminatedBy] || "\n"
|
18
|
-
@ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
|
19
|
-
@file_path = get_file_path
|
20
|
-
@fields = get_fields
|
12
|
+
raise CoreFileError("Cannot find core identifier") unless @id
|
13
|
+
get_attributes(CoreFileError)
|
21
14
|
end
|
22
15
|
end
|
23
16
|
end
|
data/lib/dwc-archive/errors.rb
CHANGED
@@ -7,15 +7,9 @@ class DarwinCore
|
|
7
7
|
@archive = archive
|
8
8
|
@path = @archive.files_path
|
9
9
|
@data = data
|
10
|
-
@
|
11
|
-
|
12
|
-
|
13
|
-
@quote_character = @properties[:fieldsEnclosedBy] || ""
|
14
|
-
@line_separator = @properties[:linesTerminatedBy] || "\n"
|
15
|
-
@ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
|
16
|
-
@field_separator = get_field_separator
|
17
|
-
@file_path = get_file_path
|
18
|
-
@fields = get_fields
|
10
|
+
@coreid = @data[:coreid][:attributes]
|
11
|
+
raise ExtensionFileError("Extension has no coreid information") unless @coreid
|
12
|
+
get_attributes(ExtensionFileError)
|
19
13
|
end
|
20
14
|
|
21
15
|
end
|
data/lib/dwc-archive/ingester.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
class DarwinCore
|
1
|
+
class DarwinCore
|
2
2
|
module Ingester
|
3
3
|
attr_reader :data, :properties, :encoding, :fields_separator
|
4
4
|
attr_reader :file_path, :fields, :line_separator, :quote_character, :ignore_headers
|
@@ -10,14 +10,7 @@ class DarwinCore
|
|
10
10
|
args.merge!({:quote_char => @quote_character}) if @quote_character != ''
|
11
11
|
CSV.open(@file_path, args).each_with_index do |r, i|
|
12
12
|
index_fix = 0; next if @ignore_headers && i == 0
|
13
|
-
|
14
|
-
if defined? FasterCSV
|
15
|
-
require File.join(File.dirname(__FILE__), 'utf_regex_ruby18')
|
16
|
-
UTF8RGX === str ? res << r : errors << r
|
17
|
-
else
|
18
|
-
str = str.force_encoding('utf-8')
|
19
|
-
str.encoding.name == "UTF-8" && str.valid_encoding? ? res << r : errors << r
|
20
|
-
end
|
13
|
+
@fields.size > (r.size - 1) ? errors << r : process_csv_row(res, errors, r)
|
21
14
|
if block_given? && (i + index_fix) % batch_size == 0
|
22
15
|
yield [res, errors]
|
23
16
|
res = []
|
@@ -28,6 +21,31 @@ class DarwinCore
|
|
28
21
|
end
|
29
22
|
|
30
23
|
private
|
24
|
+
def process_csv_row(result, errors, row)
|
25
|
+
str = row.join('')
|
26
|
+
if defined? FasterCSV
|
27
|
+
require File.join(File.dirname(__FILE__), 'utf_regex_ruby18')
|
28
|
+
UTF8RGX === str ? result << row : errors << row
|
29
|
+
else
|
30
|
+
str = str.force_encoding('utf-8')
|
31
|
+
str.encoding.name == "UTF-8" && str.valid_encoding? ? result << row : errors << row
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_attributes(exception)
|
36
|
+
@properties = @data[:attributes]
|
37
|
+
@encoding = @properties[:encoding] || 'UTF-8'
|
38
|
+
raise exception("No support for encodings other than utf-8 or utf-16 at the moment") unless ["utf-8", "utf8", "utf-16", "utf16"].include? @encoding.downcase
|
39
|
+
@field_separator = get_field_separator
|
40
|
+
@quote_character = @properties[:fieldsEnclosedBy] || ""
|
41
|
+
@line_separator = @properties[:linesTerminatedBy] || "\n"
|
42
|
+
@ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
|
43
|
+
@file_path = get_file_path
|
44
|
+
raise exception("No file data") unless @file_path
|
45
|
+
@fields = get_fields
|
46
|
+
raise exception("No data fields are found") if @fields.empty?
|
47
|
+
end
|
48
|
+
|
31
49
|
def get_file_path
|
32
50
|
file = @data[:location] || @data[:attributes][:location] || @data[:files][:location]
|
33
51
|
File.join(@path, file)
|
data/spec/files/data.tar.gz
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 2
|
10
|
+
version: 0.2.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,13 +15,27 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-21 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: fastercsv
|
23
23
|
prerelease: false
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: rspec
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
25
39
|
none: false
|
26
40
|
requirements:
|
27
41
|
- - ">="
|
@@ -33,11 +47,11 @@ dependencies:
|
|
33
47
|
- 9
|
34
48
|
version: 1.2.9
|
35
49
|
type: :development
|
36
|
-
version_requirements: *
|
50
|
+
version_requirements: *id002
|
37
51
|
- !ruby/object:Gem::Dependency
|
38
52
|
name: cucumber
|
39
53
|
prerelease: false
|
40
|
-
requirement: &
|
54
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
55
|
none: false
|
42
56
|
requirements:
|
43
57
|
- - ">="
|
@@ -47,7 +61,7 @@ dependencies:
|
|
47
61
|
- 0
|
48
62
|
version: "0"
|
49
63
|
type: :development
|
50
|
-
version_requirements: *
|
64
|
+
version_requirements: *id003
|
51
65
|
description: Darwin Core Archive is the current standard exchange format for GLobal Names Architecture modules. This gem makes it easy to incorporate files in Darwin Core Archive format into a ruby project.
|
52
66
|
email: dmozzherin at gmail dot com
|
53
67
|
executables: []
|