dwc-archive 0.9.6 → 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8e8c929203d1b652f8ba345b0c4c39cfc87a0369
4
+ data.tar.gz: 739a064221bf52523990bfea38749f11b4d986c0
5
+ SHA512:
6
+ metadata.gz: adfd46bea84e301ceca6de355f189b884bc6a9a22eb8d98e3976f17e3407d2de7f15963ec0d6f29958de057de9d579c81d8007d72c38fc1b9f9ada9295381152
7
+ data.tar.gz: 2e188f828d0bbe28baf5f1d8aa7aab12ca330000bf3f5ee145501d78961f9ca5e696be1171998a433952cf84546a8d5fd61c6cf45958576a7e0800fbf4542dae
data/.gitignore ADDED
@@ -0,0 +1,30 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ install
21
+ Gemfile.lock
22
+
23
+ ## PROJECT::SPECIFIC
24
+ tags
25
+ bin
26
+ .bundle
27
+ bundle_bin
28
+ Gemfile.lock
29
+
30
+
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format nested
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.0.0-p353
data/.travis.yml CHANGED
@@ -1,10 +1,12 @@
1
1
  rvm:
2
- - 1.9.3
3
- - 2.0.0
2
+ - 1.9.3-p448
3
+ - 2.0.0-p353
4
4
  before_install:
5
5
  - sudo apt-get update
6
- - sudo apt-get install redis-server
7
- bundler_args: --without development
6
+ - gem install debugger
7
+ # bundler_args: --without development
8
+ services:
9
+ - redis-server
8
10
  script:
9
11
  - bundle exec cucumber
10
12
  - bundle exec rake
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ 0.9.7 Refactoring and tests improvements
2
+
1
3
  0.9.6 Added support for GNUB DwCA files
2
4
 
3
5
  0.9.4 Gem dependencies updated, added travis support
data/Gemfile CHANGED
@@ -1,17 +1,3 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- gem 'nokogiri', '~> 1.5'
4
- gem 'parsley-store', '~> 0.3.2'
5
- gem 'archive-tar-minitar', '~> 0.5'
6
-
7
- group :development do
8
- gem 'debugger', '~> 1.3'
9
- end
10
-
11
- group :test do
12
- gem 'rspec', '~> 2.13'
13
- gem 'cucumber', '~> 1.3'
14
- gem 'bundler', '~> 1.3'
15
- gem 'jeweler', '~> 1.8'
16
- gem 'jazz_hands', '~> 0.5'
17
- end
3
+ gemspec
data/README.md CHANGED
@@ -3,18 +3,25 @@ Darwin Core Archive
3
3
 
4
4
  [![Gem Version][1]][2]
5
5
  [![Continuous Integration Status][3]][4]
6
- [![Dependency Status][5]][6]
6
+ [![Coverage Status][5]][6]
7
+ [![CodePolice][7]][8]
8
+ [![Dependency Status][9]][10]
7
9
 
8
10
  Darwin Core Archive format is a current standard for information exchange
9
11
  between Global Names Architecture modules. This gem allows to work with
10
12
  Darwin Core Archive data compressed to either zip or tar.gz files.
11
- More information about Darwing Core Archive can be found on a [GBIF page:][7]
13
+ More information about Darwing Core Archive can be found on a [GBIF page:][11]
12
14
 
13
15
  Installation
14
16
  ------------
15
17
 
16
18
  sudo gem install dwc-archive
17
19
 
20
+ ### System Requirements
21
+
22
+ You need [Redis Server][12] and unzip library installed
23
+
24
+
18
25
  Usage
19
26
  -----
20
27
 
@@ -174,6 +181,11 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
174
181
  [2]: http://badge.fury.io/rb/dwc-archive
175
182
  [3]: https://secure.travis-ci.org/GlobalNamesArchitecture/dwc-archive.png
176
183
  [4]: http://travis-ci.org/GlobalNamesArchitecture/dwc-archive
177
- [5]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
178
- [6]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
179
- [7]: http://bit.ly/2IxcBA
184
+ [5]: https://coveralls.io/repos/GlobalNamesArchitecture/dwc-archive/badge.png
185
+ [6]: https://coveralls.io/r/GlobalNamesArchitecture/dwc-archive
186
+ [7]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive.png
187
+ [8]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive
188
+ [9]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
189
+ [10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
190
+ [11]: http://bit.ly/2IxcBA
191
+ [12]: http://redis.io/topics/quickstart
data/Rakefile CHANGED
@@ -1,28 +1,10 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ require "bundler/gem_tasks"
3
2
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "dwc-archive"
8
- gem.summary = %Q{Handler of Darwin Core Archive files}
9
- gem.description = 'Darwin Core Archive is the current standard exchange ' +
10
- 'format for GLobal Names Architecture modules. ' +
11
- 'This gem makes it easy to incorporate files in ' +
12
- 'Darwin Core Archive format into a ruby project.'
13
- gem.email = "dmozzherin at gmail dot com"
14
- gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
15
- gem.authors = ["Dmitry Mozzherin"]
16
- #gem.add_dependency "fastercsv" if RUBY_VERSION.match /^1.8/
17
- gem.add_dependency 'parsley-store', ">= 0.3.0"
18
- gem.add_development_dependency "rspec", ">= 1.2.9"
19
- gem.add_development_dependency "cucumber", ">= 0"
20
- end
21
- Jeweler::GemcutterTasks.new
22
- rescue LoadError
23
- puts 'Jeweler (or a dependency) not available. ' +
24
- 'Install it with: gem install jeweler'
25
- end
3
+ # Bundler::GemHelper.install_tasks
4
+ # require 'bundler/gem_tasks'
5
+ # require 'rake/testtasks'
6
+ # require 'rubygems'
7
+ # require 'rake'
26
8
 
27
9
  require 'rspec/core/rake_task'
28
10
  RSpec::Core::RakeTask.new(:spec) do |spec|
data/] ADDED
@@ -0,0 +1,40 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe DarwinCore::Core do
4
+ subject(:dwca) { DarwinCore.new(file_path) }
5
+ subject(:core) { DarwinCore::Core.new(dwca) }
6
+ let(:file_path) { File.join(File.expand_path('../../files', __FILE__),
7
+ file_name) }
8
+ let(:file_name) { 'data.tar.gz' }
9
+
10
+
11
+ describe '.new' do
12
+ it 'creates new core' do
13
+ expect(core).to be_kind_of DarwinCore::Core
14
+ end
15
+ end
16
+
17
+ describe '#id' do
18
+
19
+ it 'returns core id' do
20
+ expect(core.id[:index]).to eq 0
21
+ expect(core.id[:term]).to eq 'http://rs.tdwg.org/dwc/terms/TaxonID'
22
+ end
23
+
24
+ context 'no coreid' do
25
+ let(:file_name) { 'empty_coreid.tar.gz' }
26
+
27
+ it 'does not return coreid' do
28
+ expect(core.id[:index]).to eq 0
29
+ expect(core.id[:term]).to be_nil
30
+ end
31
+ end
32
+ end
33
+
34
+ it 'reads core file from archive' do
35
+
36
+ core.read
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,33 @@
1
+ require File.expand_path('../lib/dwc-archive/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "dwc-archive"
5
+ gem.version = DarwinCore::VERSION
6
+ gem.authors = ["Dmitry Mozzherin"]
7
+ gem.email = ["dmozzherin at gmail dot com"]
8
+ gem.description = %q{Darwin Core Archive is the current standard exchange
9
+ format for GLobal Names Architecture modules.
10
+ This gem makes it easy to incorporate files in
11
+ Darwin Core Archive format into a ruby project.}
12
+ gem.summary = %q{Handler of Darwin Core Archive files}
13
+ gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
14
+ gem.license = "MIT"
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ["lib"]
20
+
21
+ gem.add_runtime_dependency 'nokogiri', '~> 1.6'
22
+ gem.add_runtime_dependency 'parsley-store', '~> 0.3'
23
+ gem.add_runtime_dependency 'archive-tar-minitar', '~> 0.5'
24
+
25
+ gem.add_development_dependency 'rake', '~> 10.1'
26
+ gem.add_development_dependency 'bundler', '~> 1.3'
27
+ gem.add_development_dependency 'rspec', '~> 2.14'
28
+ gem.add_development_dependency 'cucumber', '~> 1.3'
29
+ gem.add_development_dependency 'coveralls', '~> 0.7'
30
+ gem.add_development_dependency 'debugger', '~> 1.6'
31
+ gem.add_development_dependency 'git', '~> 1.2'
32
+ end
33
+
data/lib/dwc-archive.rb CHANGED
@@ -1,34 +1,36 @@
1
1
  # encoding: UTF-8
2
- $:.unshift(File.dirname(__FILE__)) unless
3
- $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
4
- R19 = RUBY_VERSION.split('.')[0..1].join('').to_i > 18
5
- raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless R19
2
+
3
+ recent_ruby = RUBY_VERSION >= '1.9.1'
4
+ raise "IMPORTANT: dwc-archive gem requires ruby >= 1.9.1" unless recent_ruby
5
+
6
6
  require 'fileutils'
7
7
  require 'ostruct'
8
8
  require 'digest'
9
9
  require 'csv'
10
10
  require 'logger'
11
- require 'dwc-archive/xml_reader'
12
- require 'dwc-archive/ingester'
13
- require 'dwc-archive/errors'
14
- require 'dwc-archive/expander'
15
- require 'dwc-archive/archive'
16
- require 'dwc-archive/core'
17
- require 'dwc-archive/extension'
18
- require 'dwc-archive/metadata'
19
- require 'dwc-archive/generator'
20
- require 'dwc-archive/generator_meta_xml'
21
- require 'dwc-archive/generator_eml_xml'
22
- require 'dwc-archive/classification_normalizer'
11
+ require_relative 'dwc-archive/xml_reader'
12
+ require_relative 'dwc-archive/ingester'
13
+ require_relative 'dwc-archive/errors'
14
+ require_relative 'dwc-archive/expander'
15
+ require_relative 'dwc-archive/archive'
16
+ require_relative 'dwc-archive/core'
17
+ require_relative 'dwc-archive/extension'
18
+ require_relative 'dwc-archive/metadata'
19
+ require_relative 'dwc-archive/generator'
20
+ require_relative 'dwc-archive/generator_meta_xml'
21
+ require_relative 'dwc-archive/generator_eml_xml'
22
+ require_relative 'dwc-archive/classification_normalizer'
23
+ require_relative 'dwc-archive/version'
23
24
 
24
25
  class DarwinCore
25
26
 
26
- VERSION = open(File.join(File.dirname(__FILE__), '..', 'VERSION')).readline.strip
27
+ VERSION = DarwinCore::VERSION
28
+ DEFAULT_TMP_DIR = "/tmp"
27
29
 
28
- attr_reader :archive, :core, :metadata, :extensions, :classification_normalizer
30
+ attr_reader :archive, :core, :metadata, :extensions,
31
+ :classification_normalizer
29
32
  alias :eml :metadata
30
33
 
31
- DEFAULT_TMP_DIR = "/tmp"
32
34
 
33
35
  def self.nil_field?(field)
34
36
  return true if [nil, '', '/N'].include?(field)
@@ -68,10 +70,20 @@ class DarwinCore
68
70
  @extensions = get_extensions
69
71
  end
70
72
 
71
- # generates a hash from a classification data with path to each node, list of synonyms and vernacular names.
73
+ def file_name
74
+ File.split(@dwc_path).last
75
+ end
76
+
77
+ def path
78
+ File.expand_path(@dwc_path)
79
+ end
80
+
81
+ # generates a hash from a classification data with path to each node,
82
+ # list of synonyms and vernacular names.
72
83
  def normalize_classification
73
84
  return nil unless has_parent_id?
74
- @classification_normalizer ||= DarwinCore::ClassificationNormalizer.new(self)
85
+ @classification_normalizer ||= DarwinCore::ClassificationNormalizer.
86
+ new(self)
75
87
  @classification_normalizer.normalize
76
88
  end
77
89
 
@@ -8,8 +8,11 @@ class DarwinCore
8
8
  @expander = DarwinCore::Expander.new(@archive_path, @tmp_dir)
9
9
  @expander.unpack
10
10
  if valid?
11
- @meta = DarwinCore::XmlReader.from_xml(open(File.join(@expander.path, 'meta.xml')))
12
- @eml = files.include?("eml.xml") ? DarwinCore::XmlReader.from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
11
+ @meta = DarwinCore::XmlReader.
12
+ from_xml(open(File.join(@expander.path, 'meta.xml')))
13
+ @eml = files.include?("eml.xml") ?
14
+ DarwinCore::XmlReader.
15
+ from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
13
16
  else
14
17
  clean
15
18
  raise InvalidArchiveError
@@ -57,6 +57,10 @@ class DarwinCore
57
57
  @tree = {}
58
58
  end
59
59
 
60
+ def darwin_core
61
+ @dwc
62
+ end
63
+
60
64
  def add_name_string(name_string)
61
65
  @name_strings[name_string] = 1 unless @name_strings[name_string]
62
66
  end
@@ -8,9 +8,9 @@ class DarwinCore
8
8
  @path = @archive.files_path
9
9
  root_key = @archive.meta.keys[0]
10
10
  @data = @archive.meta[root_key][:core]
11
- raise DarwinCore::CoreFileError.new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
11
+ raise DarwinCore::CoreFileError.
12
+ new("Cannot find core in meta.xml, is meta.xml valid?") unless @data
12
13
  @id = @data[:id][:attributes]
13
- # raise DarwinCore::CoreFileError.new("Cannot find core identifier") unless @id
14
14
  get_attributes(DarwinCore::CoreFileError)
15
15
  end
16
16
  end
@@ -43,12 +43,16 @@ class DarwinCore
43
43
  if file_type.match(/tar.*gzip/i)
44
44
  return proc do |tmp_path, archive_path|
45
45
  FileUtils.mkdir tmp_path
46
- system("tar -zxf #{esc(archive_path)} -C #{tmp_path} > /dev/null 2>&1")
46
+ path = esc(archive_path)
47
+ system("tar -zxf #{path} -C #{tmp_path} > /dev/null 2>&1")
47
48
  end
48
49
  end
49
50
 
50
51
  if file_type.match(/Zip/)
51
- return proc { |tmp_path, archive_path| system("unzip -qq -d #{tmp_path} #{esc(archive_path)} > /dev/null 2>&1") }
52
+ return proc do |tmp_path, archive_path|
53
+ path = esc(archive_path)
54
+ system("unzip -qq -d #{tmp_path} #{path} > /dev/null 2>&1")
55
+ end
52
56
  end
53
57
 
54
58
  return nil
@@ -8,7 +8,8 @@ class DarwinCore
8
8
  @path = File.join(tmp_dir, 'dwc_' + rand(10000000000).to_s)
9
9
  FileUtils.mkdir(@path)
10
10
  @meta_xml_data = {:extensions => []}
11
- @eml_xml_data = {:id => nil, :title => nil, :authors => [], :abstract => nil, :citation => nil, :url => nil}
11
+ @eml_xml_data = {:id => nil, :title => nil,
12
+ :authors => [], :abstract => nil, :citation => nil, :url => nil}
12
13
  @write = 'w:utf-8'
13
14
  end
14
15
 
@@ -22,27 +23,36 @@ class DarwinCore
22
23
  header = data.shift
23
24
  fields = header.map do |f|
24
25
  f.strip!
25
- raise DarwinCore::GeneratorError.new("No header in core data, or header fields are not urls") unless f.match(/^http:\/\//)
26
- f.split("/")[-1]
26
+ err = 'No header in core data, or header fields are not urls'
27
+ raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
28
+ f.split('/')[-1]
27
29
  end
28
30
  data.unshift(fields) if keep_headers
29
31
  ignore_header_lines = keep_headers ? 1 : 0
30
- @meta_xml_data[:core] = {:fields => header, :ignoreHeaderLines => ignore_header_lines, :location => file_name}
32
+ @meta_xml_data[:core] = { fields: header,
33
+ ignoreHeaderLines: ignore_header_lines,
34
+ location:file_name }
31
35
  data.each {|d| c << d}
32
36
  c.close
33
37
  end
34
38
 
35
- def add_extension(data, file_name, keep_headers = true, row_type = "http://rs.tdwg.org/dwc/terms/Taxon")
39
+ def add_extension(data, file_name,
40
+ keep_headers = true,
41
+ row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
36
42
  c = CSV.open(File.join(@path,file_name), @write)
37
43
  header = data.shift
38
44
  fields = header.map do |f|
39
45
  f.strip!
40
- raise DarwinCore::GeneratorError.new("No header in core data, or header fields are not urls") unless f.match(/^http:\/\//)
41
- f.split("/")[-1]
46
+ err = 'No header in core data, or header fields are not urls'
47
+ raise DarwinCore::GeneratorError.new(err) unless f.match(/^http:\/\//)
48
+ f.split('/')[-1]
42
49
  end
43
50
  data.unshift(fields) if keep_headers
44
51
  ignore_header_lines = keep_headers ? 1 : 0
45
- @meta_xml_data[:extensions] << { :fields => header, :ignoreHeaderLines => ignore_header_lines, :location => file_name, :rowType => row_type }
52
+ @meta_xml_data[:extensions] << { fields: header,
53
+ ignoreHeaderLines: ignore_header_lines,
54
+ location: file_name,
55
+ rowType: row_type }
46
56
  data.each { |d| c << d }
47
57
  c.close
48
58
  end
@@ -9,26 +9,28 @@ class DarwinCore
9
9
  end
10
10
 
11
11
  def create
12
+ eml_uri = 'eml://ecoinformatics.org/eml-2.1.1' +
13
+ ' http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd'
12
14
  builder = Nokogiri::XML::Builder.new do |xml|
13
- xml.eml(:packageId => "%s/%s" % [@data[:id], timestamp],
14
- :system => @data[:system] || "http://globalnames.org",
15
- :'xml:lang' => "en",
16
- :'xmlns:eml' => "eml://ecoinformatics.org/eml-2.1.1",
17
- :'xmlns:md' => "eml://ecoinformatics.org/methods-2.1.1",
18
- :'xmlns:proj' => "eml://ecoinformatics.org/project-2.1.1",
19
- :'xmlns:d' => "eml://ecoinformatics.org/dataset-2.1.1",
20
- :'xmlns:res' => "eml://ecoinformatics.org/resource-2.1.1",
21
- :'xmlns:dc' => "http://purl.org/dc/terms/",
22
- :'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
23
- :'xsi:schemaLocation' => "eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd") do
24
- xml.dataset(:id => @data[:id]) do
15
+ xml.eml(packageId: "%s/%s" % [@data[:id], timestamp],
16
+ system: @data[:system] || 'http://globalnames.org',
17
+ :'xml:lang' => 'en',
18
+ :'xmlns:eml' => 'eml://ecoinformatics.org/eml-2.1.1',
19
+ :'xmlns:md' => 'eml://ecoinformatics.org/methods-2.1.1',
20
+ :'xmlns:proj' => 'eml://ecoinformatics.org/project-2.1.1',
21
+ :'xmlns:d' => 'eml://ecoinformatics.org/dataset-2.1.1',
22
+ :'xmlns:res' => 'eml://ecoinformatics.org/resource-2.1.1',
23
+ :'xmlns:dc' => 'http://purl.org/dc/terms/',
24
+ :'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
25
+ :'xsi:schemaLocation' => 'eml_uri') do
26
+ xml.dataset(id: @data[:id]) do
25
27
  xml.title(@data[:title])
26
28
  xml.license(@data[:license])
27
29
  contacts = []
28
30
  @data[:authors].each_with_index do |a, i|
29
31
  creator_id = i + 1
30
32
  contacts << creator_id
31
- xml.creator(:id => creator_id, :scope => 'document') do
33
+ xml.creator(id: creator_id, scope: 'document') do
32
34
  xml.individualName do
33
35
  xml.givenName(a[:first_name])
34
36
  xml.surName(a[:last_name])
@@ -77,7 +79,7 @@ class DarwinCore
77
79
  private
78
80
  def timestamp
79
81
  t = Time.now.getutc.to_a[0..5].reverse
80
- t[0..2].join('-') + "::" + t[-3..-1].join(':')
82
+ t[0..2].join('-') + '::' + t[-3..-1].join(':')
81
83
  end
82
84
  end
83
85
  end