dwc-archive 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -10,6 +10,7 @@ begin
10
10
  gem.email = "dmozzherin at gmail dot com"
11
11
  gem.homepage = "http://github.com/dimus/dwc-archive"
12
12
  gem.authors = ["Dmitry Mozzherin"]
13
+ gem.add_dependency "fastercsv" if RUBY_VERSION.match /^1.8/
13
14
  gem.add_development_dependency "rspec", ">= 1.2.9"
14
15
  gem.add_development_dependency "cucumber", ">= 0"
15
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -128,7 +128,7 @@ Then /^I can read its content into memory$/ do
128
128
  core_data, core_errors = @dwc.core.read
129
129
  core_data.class.should == Array
130
130
  core_data.size.should == 584
131
- core_errors.size.should == 1
131
+ core_errors.size.should == 3
132
132
  end
133
133
 
134
134
  Then /^I can read extensions content into memory$/ do
@@ -147,7 +147,7 @@ Then /^I can read its core content using block$/ do
147
147
  res << [r.size, err.size]
148
148
  end
149
149
  res << [tail_data.size, tail_errors.size]
150
- res.should == [[200,0],[200,0],[184,1]]
150
+ res.should == [[198,2],[200,0],[186,1]]
151
151
  end
152
152
 
153
153
  Then /^I can read extensions content using block$/ do
@@ -8,16 +8,9 @@ class DarwinCore
8
8
  root_key = @archive.meta.keys[0]
9
9
  @data = @archive.meta[root_key][:core]
10
10
  raise CoreFileError("Cannot found core in meta.xml, is meta.xml valid?") unless @data
11
- @properties = @data[:attributes]
12
- @encoding = @properties[:encoding] || 'UTF-8'
13
- raise CoreFileError("No support for encodings other than utf-8 or utf-16 at the moment") unless ["utf-8", "utf8", "utf-16", "utf16"].include? @encoding.downcase
14
11
  @id = @data[:id][:attributes]
15
- @field_separator = get_field_separator
16
- @quote_character = @properties[:fieldsEnclosedBy] || ""
17
- @line_separator = @properties[:linesTerminatedBy] || "\n"
18
- @ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
19
- @file_path = get_file_path
20
- @fields = get_fields
12
+ raise CoreFileError("Cannot find core identifier") unless @id
13
+ get_attributes(CoreFileError)
21
14
  end
22
15
  end
23
16
  end
@@ -4,4 +4,5 @@ class DarwinCore
4
4
  class UnpackingError < Error; end
5
5
  class InvalidArchiveError < Error; end
6
6
  class CoreFileError < Error; end
7
+ class ExtensionFileError < Error; end
7
8
  end
@@ -7,15 +7,9 @@ class DarwinCore
7
7
  @archive = archive
8
8
  @path = @archive.files_path
9
9
  @data = data
10
- @properties = @data[:attributes]
11
- @coreid = @data[:coreid][:attributes]
12
- @encoding = @properties[:encoding] || 'UTF-8'
13
- @quote_character = @properties[:fieldsEnclosedBy] || ""
14
- @line_separator = @properties[:linesTerminatedBy] || "\n"
15
- @ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
16
- @field_separator = get_field_separator
17
- @file_path = get_file_path
18
- @fields = get_fields
10
+ @coreid = @data[:coreid][:attributes]
11
+ raise ExtensionFileError("Extension has no coreid information") unless @coreid
12
+ get_attributes(ExtensionFileError)
19
13
  end
20
14
 
21
15
  end
@@ -1,4 +1,4 @@
1
- class DarwinCore
1
+ class DarwinCore
2
2
  module Ingester
3
3
  attr_reader :data, :properties, :encoding, :fields_separator
4
4
  attr_reader :file_path, :fields, :line_separator, :quote_character, :ignore_headers
@@ -10,14 +10,7 @@ class DarwinCore
10
10
  args.merge!({:quote_char => @quote_character}) if @quote_character != ''
11
11
  CSV.open(@file_path, args).each_with_index do |r, i|
12
12
  index_fix = 0; next if @ignore_headers && i == 0
13
- str = r.join('')
14
- if defined? FasterCSV
15
- require File.join(File.dirname(__FILE__), 'utf_regex_ruby18')
16
- UTF8RGX === str ? res << r : errors << r
17
- else
18
- str = str.force_encoding('utf-8')
19
- str.encoding.name == "UTF-8" && str.valid_encoding? ? res << r : errors << r
20
- end
13
+ @fields.size > (r.size - 1) ? errors << r : process_csv_row(res, errors, r)
21
14
  if block_given? && (i + index_fix) % batch_size == 0
22
15
  yield [res, errors]
23
16
  res = []
@@ -28,6 +21,31 @@ class DarwinCore
28
21
  end
29
22
 
30
23
  private
24
+ def process_csv_row(result, errors, row)
25
+ str = row.join('')
26
+ if defined? FasterCSV
27
+ require File.join(File.dirname(__FILE__), 'utf_regex_ruby18')
28
+ UTF8RGX === str ? result << row : errors << row
29
+ else
30
+ str = str.force_encoding('utf-8')
31
+ str.encoding.name == "UTF-8" && str.valid_encoding? ? result << row : errors << row
32
+ end
33
+ end
34
+
35
+ def get_attributes(exception)
36
+ @properties = @data[:attributes]
37
+ @encoding = @properties[:encoding] || 'UTF-8'
38
+ raise exception("No support for encodings other than utf-8 or utf-16 at the moment") unless ["utf-8", "utf8", "utf-16", "utf16"].include? @encoding.downcase
39
+ @field_separator = get_field_separator
40
+ @quote_character = @properties[:fieldsEnclosedBy] || ""
41
+ @line_separator = @properties[:linesTerminatedBy] || "\n"
42
+ @ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
43
+ @file_path = get_file_path
44
+ raise exception("No file data") unless @file_path
45
+ @fields = get_fields
46
+ raise exception("No data fields are found") if @fields.empty?
47
+ end
48
+
31
49
  def get_file_path
32
50
  file = @data[:location] || @data[:attributes][:location] || @data[:files][:location]
33
51
  File.join(@path, file)
Binary file
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Dmitry Mozzherin
@@ -15,13 +15,27 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-19 00:00:00 -04:00
18
+ date: 2010-05-21 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: rspec
22
+ name: fastercsv
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rspec
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
25
39
  none: false
26
40
  requirements:
27
41
  - - ">="
@@ -33,11 +47,11 @@ dependencies:
33
47
  - 9
34
48
  version: 1.2.9
35
49
  type: :development
36
- version_requirements: *id001
50
+ version_requirements: *id002
37
51
  - !ruby/object:Gem::Dependency
38
52
  name: cucumber
39
53
  prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
54
+ requirement: &id003 !ruby/object:Gem::Requirement
41
55
  none: false
42
56
  requirements:
43
57
  - - ">="
@@ -47,7 +61,7 @@ dependencies:
47
61
  - 0
48
62
  version: "0"
49
63
  type: :development
50
- version_requirements: *id002
64
+ version_requirements: *id003
51
65
  description: Darwin Core Archive is the current standard exchange format for GLobal Names Architecture modules. This gem makes it easy to incorporate files in Darwin Core Archive format into a ruby project.
52
66
  email: dmozzherin at gmail dot com
53
67
  executables: []