dwc-archive 0.9.6 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8e8c929203d1b652f8ba345b0c4c39cfc87a0369
4
+ data.tar.gz: 739a064221bf52523990bfea38749f11b4d986c0
5
+ SHA512:
6
+ metadata.gz: adfd46bea84e301ceca6de355f189b884bc6a9a22eb8d98e3976f17e3407d2de7f15963ec0d6f29958de057de9d579c81d8007d72c38fc1b9f9ada9295381152
7
+ data.tar.gz: 2e188f828d0bbe28baf5f1d8aa7aab12ca330000bf3f5ee145501d78961f9ca5e696be1171998a433952cf84546a8d5fd61c6cf45958576a7e0800fbf4542dae
data/.gitignore ADDED
@@ -0,0 +1,30 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ install
21
+ Gemfile.lock
22
+
23
+ ## PROJECT::SPECIFIC
24
+ tags
25
+ bin
26
+ .bundle
27
+ bundle_bin
28
+ Gemfile.lock
29
+
30
+
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format nested
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p353
data/.travis.yml CHANGED
@@ -1,10 +1,12 @@
1
1
  rvm:
2
- - 1.9.3
3
- - 2.0.0
2
+ - 1.9.3-p448
3
+ - 2.0.0-p353
4
4
  before_install:
5
5
  - sudo apt-get update
6
- - sudo apt-get install redis-server
7
- bundler_args: --without development
6
+ - gem install debugger
7
+ # bundler_args: --without development
8
+ services:
9
+ - redis-server
8
10
  script:
9
11
  - bundle exec cucumber
10
12
  - bundle exec rake
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ 0.9.7 Refactoring and tests improvements
2
+
1
3
  0.9.6 Added support for GNUB DwCA files
2
4
 
3
5
  0.9.4 Gem dependencies updated, added travis support
data/Gemfile CHANGED
@@ -1,17 +1,3 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- gem 'nokogiri', '~> 1.5'
4
- gem 'parsley-store', '~> 0.3.2'
5
- gem 'archive-tar-minitar', '~> 0.5'
6
-
7
- group :development do
8
- gem 'debugger', '~> 1.3'
9
- end
10
-
11
- group :test do
12
- gem 'rspec', '~> 2.13'
13
- gem 'cucumber', '~> 1.3'
14
- gem 'bundler', '~> 1.3'
15
- gem 'jeweler', '~> 1.8'
16
- gem 'jazz_hands', '~> 0.5'
17
- end
3
+ gemspec
data/README.md CHANGED
@@ -3,18 +3,25 @@ Darwin Core Archive
3
3
 
4
4
  [![Gem Version][1]][2]
5
5
  [![Continuous Integration Status][3]][4]
6
- [![Dependency Status][5]][6]
6
+ [![Coverage Status][5]][6]
7
+ [![CodePolice][7]][8]
8
+ [![Dependency Status][9]][10]
7
9
 
8
10
  Darwin Core Archive format is a current standard for information exchange
9
11
  between Global Names Architecture modules. This gem allows to work with
10
12
  Darwin Core Archive data compressed to either zip or tar.gz files.
11
- More information about Darwing Core Archive can be found on a [GBIF page:][7]
13
+ More information about Darwing Core Archive can be found on a [GBIF page:][11]
12
14
 
13
15
  Installation
14
16
  ------------
15
17
 
16
18
  sudo gem install dwc-archive
17
19
 
20
+ ### System Requirements
21
+
22
+ You need [Redis Server][12] and unzip library installed
23
+
24
+
18
25
  Usage
19
26
  -----
20
27
 
@@ -174,6 +181,11 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
174
181
  [2]: http://badge.fury.io/rb/dwc-archive
175
182
  [3]: https://secure.travis-ci.org/GlobalNamesArchitecture/dwc-archive.png
176
183
  [4]: http://travis-ci.org/GlobalNamesArchitecture/dwc-archive
177
- [5]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
178
- [6]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
179
- [7]: http://bit.ly/2IxcBA
184
+ [5]: https://coveralls.io/repos/GlobalNamesArchitecture/dwc-archive/badge.png
185
+ [6]: https://coveralls.io/r/GlobalNamesArchitecture/dwc-archive
186
+ [7]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive.png
187
+ [8]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive
188
+ [9]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
189
+ [10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
190
+ [11]: http://bit.ly/2IxcBA
191
+ [12]: http://redis.io/topics/quickstart
data/Rakefile CHANGED
@@ -1,28 +1,10 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ require "bundler/gem_tasks"
3
2
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "dwc-archive"
8
- gem.summary = %Q{Handler of Darwin Core Archive files}
9
- gem.description = 'Darwin Core Archive is the current standard exchange ' +
10
- 'format for GLobal Names Architecture modules. ' +
11
- 'This gem makes it easy to incorporate files in ' +
12
- 'Darwin Core Archive format into a ruby project.'
13
- gem.email = "dmozzherin at gmail dot com"
14
- gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
15
- gem.authors = ["Dmitry Mozzherin"]
16
- #gem.add_dependency "fastercsv" if RUBY_VERSION.match /^1.8/
17
- gem.add_dependency 'parsley-store', ">= 0.3.0"
18
- gem.add_development_dependency "rspec", ">= 1.2.9"
19
- gem.add_development_dependency "cucumber", ">= 0"
20
- end
21
- Jeweler::GemcutterTasks.new
22
- rescue LoadError
23
- puts 'Jeweler (or a dependency) not available. ' +
24
- 'Install it with: gem install jeweler'
25
- end
3
+ # Bundler::GemHelper.install_tasks
4
+ # require 'bundler/gem_tasks'
5
+ # require 'rake/testtasks'
6
+ # require 'rubygems'
7
+ # require 'rake'
26
8
 
27
9
  require 'rspec/core/rake_task'
28
10
  RSpec::Core::RakeTask.new(:spec) do |spec|
data/] ADDED
@@ -0,0 +1,40 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe DarwinCore::Core do
4
+ subject(:dwca) { DarwinCore.new(file_path) }
5
+ subject(:core) { DarwinCore::Core.new(dwca) }
6
+ let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
7
+ file_name) }
8
+ let(:file_name) { 'data.tar.gz' }
9
+
10
+
11
+ describe '.new' do
12
+ it 'creates new core' do
13
+ expect(core).to be_kind_of DarwinCore::Core
14
+ end
15
+ end
16
+
17
+ describe '#id' do
18
+
19
+ it 'returns core id' do
20
+ expect(core.id[:index]).to eq 0
21
+ expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
22
+ end
23
+
24
+ context 'no coreid' do
25
+ let(:file_name) { 'empty_coreid.tar.gz' }
26
+
27
+ it 'does not return coreid' do
28
+ expect(core.id[:index]).to eq 0
29
+ expect(core.id[:term]).to be_nil
30
+ end
31
+ end
32
+ end
33
+
34
+ it 'reads core file from archive' do
35
+
36
+ core.read
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,33 @@
1
+ require File.expand_path('../lib/dwc-archive/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "dwc-archive"
5
+ gem.version = DarwinCore::VERSION
6
+ gem.authors = ["Dmitry Mozzherin"]
7
+ gem.email = ["dmozzherin at gmail dot com"]
8
+ gem.description = %q{Darwin Core Archive is the current standard exchange
9
+ format for GLobal Names Architecture modules.
10
+ This gem makes it easy to incorporate files in
11
+ Darwin Core Archive format into a ruby project.}
12
+ gem.summary = %q{Handler of Darwin Core Archive files}
13
+ gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
14
+ gem.license = "MIT"
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ["lib"]
20
+
21
+ gem.add_runtime_dependency 'nokogiri', '~> 1.6'
22
+ gem.add_runtime_dependency 'parsley-store', '~> 0.3'
23
+ gem.add_runtime_dependency 'archive-tar-minitar', '~> 0.5'
24
+
25
+ gem.add_development_dependency 'rake', '~> 10.1'
26
+ gem.add_development_dependency 'bundler', '~> 1.3'
27
+ gem.add_development_dependency 'rspec', '~> 2.14'
28
+ gem.add_development_dependency 'cucumber', '~> 1.3'
29
+ gem.add_development_dependency 'coveralls', '~> 0.7'
30
+ gem.add_development_dependency 'debugger', '~> 1.6'
31
+ gem.add_development_dependency 'git', '~> 1.2'
32
+ end
33
+
data/lib/dwc-archive.rb CHANGED
@@ -1,34 +1,36 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__)) unless
3
- $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
4
- R19 = RUBY_VERSION.split('.')[0..1].join('').to_i > 18
5
- raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless R19
2
+
3
+ recent_ruby = RUBY_VERSION >= '1.9.1'
4
+ raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless recent_ruby
5
+
6
6
  require 'fileutils'
7
7
  require 'ostruct'
8
8
  require 'digest'
9
9
  require 'csv'
10
10
  require 'logger'
11
- require 'dwc-archive/xml_reader'
12
- require 'dwc-archive/ingester'
13
- require 'dwc-archive/errors'
14
- require 'dwc-archive/expander'
15
- require 'dwc-archive/archive'
16
- require 'dwc-archive/core'
17
- require 'dwc-archive/extension'
18
- require 'dwc-archive/metadata'
19
- require 'dwc-archive/generator'
20
- require 'dwc-archive/generator_meta_xml'
21
- require 'dwc-archive/generator_eml_xml'
22
- require 'dwc-archive/classification_normalizer'
11
+ require_relative 'dwc-archive/xml_reader'
12
+ require_relative 'dwc-archive/ingester'
13
+ require_relative 'dwc-archive/errors'
14
+ require_relative 'dwc-archive/expander'
15
+ require_relative 'dwc-archive/archive'
16
+ require_relative 'dwc-archive/core'
17
+ require_relative 'dwc-archive/extension'
18
+ require_relative 'dwc-archive/metadata'
19
+ require_relative 'dwc-archive/generator'
20
+ require_relative 'dwc-archive/generator_meta_xml'
21
+ require_relative 'dwc-archive/generator_eml_xml'
22
+ require_relative 'dwc-archive/classification_normalizer'
23
+ require_relative 'dwc-archive/version'
23
24
 
24
25
  class DarwinCore
25
26
 
26
- VERSION = open(File.join(File.dirname(__FILE__), '..', 'VERSION')).readline.strip
27
+ VERSION = DarwinCore::VERSION
28
+ DEFAULT_TMP_DIR = "/tmp"
27
29
 
28
- attr_reader :archive, :core, :metadata, :extensions, :classification_normalizer
30
+ attr_reader :archive, :core, :metadata, :extensions,
31
+ :classification_normalizer
29
32
  alias :eml :metadata
30
33
 
31
- DEFAULT_TMP_DIR = "/tmp"
32
34
 
33
35
  def self.nil_field?(field)
34
36
  return true if [nil, '', '/N'].include?(field)
@@ -68,10 +70,20 @@ class DarwinCore
68
70
  @extensions = get_extensions
69
71
  end
70
72
 
71
- # generates a hash from a classification data with path to each node, list of synonyms and vernacular names.
73
+ def file_name
74
+ File.split(@dwc_path).last
75
+ end
76
+
77
+ def path
78
+ File.expand_path(@dwc_path)
79
+ end
80
+
81
+ # generates a hash from a classification data with path to each node,
82
+ # list of synonyms and vernacular names.
72
83
  def normalize_classification
73
84
  return nil unless has_parent_id?
74
- @classification_normalizer ||= DarwinCore::ClassificationNormalizer.new(self)
85
+ @classification_normalizer ||= DarwinCore::ClassificationNormalizer.
86
+ new(self)
75
87
  @classification_normalizer.normalize
76
88
  end
77
89
 
@@ -8,8 +8,11 @@ class DarwinCore
8
8
  @expander = DarwinCore::Expander.new(@archive_path, @tmp_dir)
9
9
  @expander.unpack
10
10
  if valid?
11
- @meta = DarwinCore::XmlReader.from_xml(open(File.join(@expander.path, 'meta.xml')))
12
- @eml = files.include?("eml.xml") ? DarwinCore::XmlReader.from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
11
+ @meta = DarwinCore::XmlReader.
12
+ from_xml(open(File.join(@expander.path, 'meta.xml')))
13
+ @eml = files.include?("eml.xml") ?
14
+ DarwinCore::XmlReader.
15
+ from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
13
16
  else
14
17
  clean
15
18
  raise InvalidArchiveError
@@ -57,6 +57,10 @@ class DarwinCore
57
57
  @tree = {}
58
58
  end
59
59
 
60
+ def darwin_core
61
+ @dwc
62
+ end
63
+
60
64
  def add_name_string(name_string)
61
65
  @name_strings[name_string] = 1 unless @name_strings[name_string]
62
66
  end
@@ -8,9 +8,9 @@ class DarwinCore
8
8
  @path = @archive.files_path
9
9
  root_key = @archive.meta.keys[0]
10
10
  @data = @archive.meta[root_key][:core]
11
- raise DarwinCore::CoreFileError.new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
11
+ raise DarwinCore::CoreFileError.
12
+ new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
12
13
  @id = @data[:id][:attributes]
13
- # raise DarwinCore::CoreFileError.new("Cannot find core identifier") unless @id
14
14
  get_attributes(DarwinCore::CoreFileError)
15
15
  end
16
16
  end
@@ -43,12 +43,16 @@ class DarwinCore
43
43
  if file_type.match(/tar.*gzip/i)
44
44
  return proc do |tmp_path, archive_path|
45
45
  FileUtils.mkdir tmp_path
46
- system("tar -zxf #{esc(archive_path)} -C #{tmp_path} > /dev/null 2>&1")
46
+ path = esc(archive_path)
47
+ system("tar -zxf #{path} -C #{tmp_path} > /dev/null 2>&1")
47
48
  end
48
49
  end
49
50
 
50
51
  if file_type.match(/Zip/)
51
- return proc { |tmp_path, archive_path| system("unzip -qq -d #{tmp_path} #{esc(archive_path)} > /dev/null 2>&1") }
52
+ return proc do |tmp_path, archive_path|
53
+ path = esc(archive_path)
54
+ system("unzip -qq -d #{tmp_path} #{path} > /dev/null 2>&1")
55
+ end
52
56
  end
53
57
 
54
58
  return nil
@@ -8,7 +8,8 @@ class DarwinCore
8
8
  @path = File.join(tmp_dir, 'dwc_' + rand(10000000000).to_s)
9
9
  FileUtils.mkdir(@path)
10
10
  @meta_xml_data = {:extensions => []}
11
- @eml_xml_data = {:id => nil, :title => nil, :authors => [], :abstract => nil, :citation => nil, :url => nil}
11
+ @eml_xml_data = {:id => nil, :title => nil,
12
+ :authors => [], :abstract => nil, :citation => nil, :url => nil}
12
13
  @write = 'w:utf-8'
13
14
  end
14
15
 
@@ -22,27 +23,36 @@ class DarwinCore
22
23
  header = data.shift
23
24
  fields = header.map do |f|
24
25
  f.strip!
25
- raise DarwinCore::GeneratorError.new("No header in core data, or header fields are not urls") unless f.match(/^http:\/\//)
26
- f.split("/")[-1]
26
+ err = 'No header in core data, or header fields are not urls'
27
+ raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
28
+ f.split('/')[-1]
27
29
  end
28
30
  data.unshift(fields) if keep_headers
29
31
  ignore_header_lines = keep_headers ? 1 : 0
30
- @meta_xml_data[:core] = {:fields => header, :ignoreHeaderLines => ignore_header_lines, :location => file_name}
32
+ @meta_xml_data[:core] = { fields: header,
33
+ ignoreHeaderLines: ignore_header_lines,
34
+ location:file_name }
31
35
  data.each {|d| c << d}
32
36
  c.close
33
37
  end
34
38
 
35
- def add_extension(data, file_name, keep_headers = true, row_type = "http://rs.tdwg.org/dwc/terms/Taxon")
39
+ def add_extension(data, file_name,
40
+ keep_headers = true,
41
+ row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
36
42
  c = CSV.open(File.join(@path,file_name), @write)
37
43
  header = data.shift
38
44
  fields = header.map do |f|
39
45
  f.strip!
40
- raise DarwinCore::GeneratorError.new("No header in core data, or header fields are not urls") unless f.match(/^http:\/\//)
41
- f.split("/")[-1]
46
+ err = 'No header in core data, or header fields are not urls'
47
+ raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
48
+ f.split('/')[-1]
42
49
  end
43
50
  data.unshift(fields) if keep_headers
44
51
  ignore_header_lines = keep_headers ? 1 : 0
45
- @meta_xml_data[:extensions] << { :fields => header, :ignoreHeaderLines => ignore_header_lines, :location => file_name, :rowType => row_type }
52
+ @meta_xml_data[:extensions] << { fields: header,
53
+ ignoreHeaderLines: ignore_header_lines,
54
+ location: file_name,
55
+ rowType: row_type }
46
56
  data.each { |d| c << d }
47
57
  c.close
48
58
  end
@@ -9,26 +9,28 @@ class DarwinCore
9
9
  end
10
10
 
11
11
  def create
12
+ eml_uri = 'eml://ecoinformatics.org/eml-2.1.1' +
13
+ ' http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd'
12
14
  builder = Nokogiri::XML::Builder.new do |xml|
13
- xml.eml(:packageId => "%s/%s" % [@data[:id], timestamp],
14
- :system => @data[:system] || "http://globalnames.org",
15
- :'xml:lang' => "en",
16
- :'xmlns:eml' => "eml://ecoinformatics.org/eml-2.1.1",
17
- :'xmlns:md' => "eml://ecoinformatics.org/methods-2.1.1",
18
- :'xmlns:proj' => "eml://ecoinformatics.org/project-2.1.1",
19
- :'xmlns:d' => "eml://ecoinformatics.org/dataset-2.1.1",
20
- :'xmlns:res' => "eml://ecoinformatics.org/resource-2.1.1",
21
- :'xmlns:dc' => "http://purl.org/dc/terms/",
22
- :'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
23
- :'xsi:schemaLocation' => "eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd") do
24
- xml.dataset(:id => @data[:id]) do
15
+ xml.eml(packageId: "%s/%s" % [@data[:id], timestamp],
16
+ system: @data[:system] || 'http://globalnames.org',
17
+ :'xml:lang' => 'en',
18
+ :'xmlns:eml' => 'eml://ecoinformatics.org/eml-2.1.1',
19
+ :'xmlns:md' => 'eml://ecoinformatics.org/methods-2.1.1',
20
+ :'xmlns:proj' => 'eml://ecoinformatics.org/project-2.1.1',
21
+ :'xmlns:d' => 'eml://ecoinformatics.org/dataset-2.1.1',
22
+ :'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
23
+ :'xmlns:dc' => 'http://purl.org/dc/terms/',
24
+ :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
25
+ :'xsi:schemaLocation' => 'eml_uri') do
26
+ xml.dataset(id: @data[:id]) do
25
27
  xml.title(@data[:title])
26
28
  xml.license(@data[:license])
27
29
  contacts = []
28
30
  @data[:authors].each_with_index do |a, i|
29
31
  creator_id = i + 1
30
32
  contacts << creator_id
31
- xml.creator(:id => creator_id, :scope => 'document') do
33
+ xml.creator(id: creator_id, scope: 'document') do
32
34
  xml.individualName do
33
35
  xml.givenName(a[:first_name])
34
36
  xml.surName(a[:last_name])
@@ -77,7 +79,7 @@ class DarwinCore
77
79
  private
78
80
  def timestamp
79
81
  t = Time.now.getutc.to_a[0..5].reverse
80
- t[0..2].join('-') + "::" + t[-3..-1].join(':')
82
+ t[0..2].join('-') + '::' + t[-3..-1].join(':')
81
83
  end
82
84
  end
83
85
  end