dwc-archive 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -10,6 +10,7 @@ begin
10
10
  gem.email = "dmozzherin at gmail dot com"
11
11
  gem.homepage = "http://github.com/dimus/dwc-archive"
12
12
  gem.authors = ["Dmitry Mozzherin"]
13
+ gem.add_dependency "fastercsv" if RUBY_VERSION.match /^1.8/
13
14
  gem.add_development_dependency "rspec", ">= 1.2.9"
14
15
  gem.add_development_dependency "cucumber", ">= 0"
15
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -128,7 +128,7 @@ Then /^I can read its content into memory$/ do
128
128
  core_data, core_errors = @dwc.core.read
129
129
  core_data.class.should == Array
130
130
  core_data.size.should == 584
131
- core_errors.size.should == 1
131
+ core_errors.size.should == 3
132
132
  end
133
133
 
134
134
  Then /^I can read extensions content into memory$/ do
@@ -147,7 +147,7 @@ Then /^I can read its core content using block$/ do
147
147
  res << [r.size, err.size]
148
148
  end
149
149
  res << [tail_data.size, tail_errors.size]
150
- res.should == [[200,0],[200,0],[184,1]]
150
+ res.should == [[198,2],[200,0],[186,1]]
151
151
  end
152
152
 
153
153
  Then /^I can read extensions content using block$/ do
@@ -8,16 +8,9 @@ class DarwinCore
8
8
  root_key = @archive.meta.keys[0]
9
9
  @data = @archive.meta[root_key][:core]
10
10
  raise CoreFileError("Cannot found core in meta.xml, is meta.xml valid?") unless @data
11
- @properties = @data[:attributes]
12
- @encoding = @properties[:encoding] || 'UTF-8'
13
- raise CoreFileError("No support for encodings other than utf-8 or utf-16 at the moment") unless ["utf-8", "utf8", "utf-16", "utf16"].include? @encoding.downcase
14
11
  @id = @data[:id][:attributes]
15
- @field_separator = get_field_separator
16
- @quote_character = @properties[:fieldsEnclosedBy] || ""
17
- @line_separator = @properties[:linesTerminatedBy] || "\n"
18
- @ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
19
- @file_path = get_file_path
20
- @fields = get_fields
12
+ raise CoreFileError("Cannot find core identifier") unless @id
13
+ get_attributes(CoreFileError)
21
14
  end
22
15
  end
23
16
  end
@@ -4,4 +4,5 @@ class DarwinCore
4
4
  class UnpackingError < Error; end
5
5
  class InvalidArchiveError < Error; end
6
6
  class CoreFileError < Error; end
7
+ class ExtensionFileError < Error; end
7
8
  end
@@ -7,15 +7,9 @@ class DarwinCore
7
7
  @archive = archive
8
8
  @path = @archive.files_path
9
9
  @data = data
10
- @properties = @data[:attributes]
11
- @coreid = @data[:coreid][:attributes]
12
- @encoding = @properties[:encoding] || 'UTF-8'
13
- @quote_character = @properties[:fieldsEnclosedBy] || ""
14
- @line_separator = @properties[:linesTerminatedBy] || "\n"
15
- @ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
16
- @field_separator = get_field_separator
17
- @file_path = get_file_path
18
- @fields = get_fields
10
+ @coreid = @data[:coreid][:attributes]
11
+ raise ExtensionFileError("Extension has no coreid information") unless @coreid
12
+ get_attributes(ExtensionFileError)
19
13
  end
20
14
 
21
15
  end
@@ -1,4 +1,4 @@
1
- class DarwinCore
1
+ class DarwinCore
2
2
  module Ingester
3
3
  attr_reader :data, :properties, :encoding, :fields_separator
4
4
  attr_reader :file_path, :fields, :line_separator, :quote_character, :ignore_headers
@@ -10,14 +10,7 @@ class DarwinCore
10
10
  args.merge!({:quote_char => @quote_character}) if @quote_character != ''
11
11
  CSV.open(@file_path, args).each_with_index do |r, i|
12
12
  index_fix = 0; next if @ignore_headers && i == 0
13
- str = r.join('')
14
- if defined? FasterCSV
15
- require File.join(File.dirname(__FILE__), 'utf_regex_ruby18')
16
- UTF8RGX === str ? res << r : errors << r
17
- else
18
- str = str.force_encoding('utf-8')
19
- str.encoding.name == "UTF-8" && str.valid_encoding? ? res << r : errors << r
20
- end
13
+ @fields.size > (r.size - 1) ? errors << r : process_csv_row(res, errors, r)
21
14
  if block_given? && (i + index_fix) % batch_size == 0
22
15
  yield [res, errors]
23
16
  res = []
@@ -28,6 +21,31 @@ class DarwinCore
28
21
  end
29
22
 
30
23
  private
24
+ def process_csv_row(result, errors, row)
25
+ str = row.join('')
26
+ if defined? FasterCSV
27
+ require File.join(File.dirname(__FILE__), 'utf_regex_ruby18')
28
+ UTF8RGX === str ? result << row : errors << row
29
+ else
30
+ str = str.force_encoding('utf-8')
31
+ str.encoding.name == "UTF-8" && str.valid_encoding? ? result << row : errors << row
32
+ end
33
+ end
34
+
35
+ def get_attributes(exception)
36
+ @properties = @data[:attributes]
37
+ @encoding = @properties[:encoding] || 'UTF-8'
38
+ raise exception("No support for encodings other than utf-8 or utf-16 at the moment") unless ["utf-8", "utf8", "utf-16", "utf16"].include? @encoding.downcase
39
+ @field_separator = get_field_separator
40
+ @quote_character = @properties[:fieldsEnclosedBy] || ""
41
+ @line_separator = @properties[:linesTerminatedBy] || "\n"
42
+ @ignore_headers = @properties[:ignoreHeaderLines] ? [1, true].include?(@properties[:ignoreHeaderLines]) : false
43
+ @file_path = get_file_path
44
+ raise exception("No file data") unless @file_path
45
+ @fields = get_fields
46
+ raise exception("No data fields are found") if @fields.empty?
47
+ end
48
+
31
49
  def get_file_path
32
50
  file = @data[:location] || @data[:attributes][:location] || @data[:files][:location]
33
51
  File.join(@path, file)
Binary file
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Dmitry Mozzherin
@@ -15,13 +15,27 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-19 00:00:00 -04:00
18
+ date: 2010-05-21 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
- name: rspec
22
+ name: fastercsv
23
23
  prerelease: false
24
24
  requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rspec
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
25
39
  none: false
26
40
  requirements:
27
41
  - - ">="
@@ -33,11 +47,11 @@ dependencies:
33
47
  - 9
34
48
  version: 1.2.9
35
49
  type: :development
36
- version_requirements: *id001
50
+ version_requirements: *id002
37
51
  - !ruby/object:Gem::Dependency
38
52
  name: cucumber
39
53
  prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
54
+ requirement: &id003 !ruby/object:Gem::Requirement
41
55
  none: false
42
56
  requirements:
43
57
  - - ">="
@@ -47,7 +61,7 @@ dependencies:
47
61
  - 0
48
62
  version: "0"
49
63
  type: :development
50
- version_requirements: *id002
64
+ version_requirements: *id003
51
65
  description: Darwin Core Archive is the current standard exchange format for GLobal Names Architecture modules. This gem makes it easy to incorporate files in Darwin Core Archive format into a ruby project.
52
66
  email: dmozzherin at gmail dot com
53
67
  executables: []