dwc-archive 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Dmitry Mozzherin
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,38 @@
1
+ = Darwin Core Archive
2
+
3
+ Darwin Core Archive format is a current standard for information exchange between Global Names Architecture modules. This gem allows to work with Darwin Core Archive data compressed to either zip or tar.gz files. More information about Darwing Core Archive can be found on a GBIF page[http://www.gbif.org/informatics/standards-and-tools/publishing-data/data-standards/darwin-core-archives/]:
4
+
5
+ == Installation
6
+
7
+ Update to latest rubygems (v >= 1.3.6) which adds gemcutter sources by default.
8
+
9
+ sudo gem install dwc-archive
10
+
11
+ == Usage
12
+
13
+ require 'rubygems'
14
+ require 'dwc-archive'
15
+
16
+ dwc = DWCA::DarwinCore.new('/path_to_file/archive_file.tar.gz')
17
+ dwc.archive.files # the archive file list
18
+ dwc.metadata.data # summary of metadata from eml.xml if it exists
19
+ dwc.metadata.authors # authors of the archive
20
+ dwc.core.data # summary of DarwinCore main file
21
+ dwc.core.file_path # path to the DarwinCore main file
22
+ dwc.core.extensions # array of DarwinCore Star extensions
23
+ dwc.core.extensions[0].data # summary for an extension
24
+
25
+ == Note on Patches/Pull Requests
26
+
27
+ * Fork the project.
28
+ * Make your feature addition or bug fix.
29
+ * Add tests for it. This is important so I don't break it in a
30
+ future version unintentionally.
31
+ * Commit, do not mess with rakefile, version, or history.
32
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
33
+ * Send me a pull request. Bonus points for topic branches.
34
+
35
+
36
+ == Copyright
37
+
38
+ Copyright (c) 2010 Dmitry Mozzherin. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "dwc-archive"
8
+ gem.summary = %Q{Handler of Darwin Core Archive files}
9
+ gem.description = %Q{Darwin Core Archive Files are current standard exchange format for GLobal Names Architecture modules. This project creates ways to work with such files.}
10
+ gem.email = "dmozzherin at gmail dot com"
11
+ gem.homepage = "http://github.com/dimus/dwc-archive"
12
+ gem.authors = ["Dmitry Mozzherin"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ gem.add_development_dependency "cucumber", ">= 0"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'spec/rake/spectask'
23
+ Spec::Rake::SpecTask.new(:spec) do |spec|
24
+ spec.libs << 'lib' << 'spec'
25
+ spec.spec_files = FileList['spec/**/*_spec.rb']
26
+ end
27
+
28
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.pattern = 'spec/**/*_spec.rb'
31
+ spec.rcov = true
32
+ end
33
+
34
+ task :spec => :check_dependencies
35
+
36
+ begin
37
+ require 'cucumber/rake/task'
38
+ Cucumber::Rake::Task.new(:features)
39
+
40
+ task :features => :check_dependencies
41
+ rescue LoadError
42
+ task :features do
43
+ abort "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
44
+ end
45
+ end
46
+
47
+ task :default => :spec
48
+
49
+ require 'rake/rdoctask'
50
+ Rake::RDocTask.new do |rdoc|
51
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "dwc-archive #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.2
@@ -0,0 +1,74 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{dwc-archive}
8
+ s.version = "0.1.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dmitry Mozzherin"]
12
+ s.date = %q{2010-03-18}
13
+ s.description = %q{Darwin Core Archive Files are current standard exchange format for GLobal Names Architecture modules. This project creates ways to work with such files.}
14
+ s.email = %q{dmozzherin at gmail dot com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "dwc-archive.gemspec",
27
+ "features/dwc-archive.feature",
28
+ "features/step_definitions/dwc-archive_steps.rb",
29
+ "features/support/env.rb",
30
+ "lib/dwc-archive.rb",
31
+ "lib/dwc-archive/.expander.rb.swo",
32
+ "lib/dwc-archive/archive.rb",
33
+ "lib/dwc-archive/core.rb",
34
+ "lib/dwc-archive/expander.rb",
35
+ "lib/dwc-archive/extension.rb",
36
+ "lib/dwc-archive/metadata.rb",
37
+ "lib/ruby_extensions.rb",
38
+ "spec/dwc-archive_spec.rb",
39
+ "spec/files/data.tar.gz",
40
+ "spec/files/data.zip",
41
+ "spec/files/eml.xml",
42
+ "spec/files/meta.xml",
43
+ "spec/lib/ruby_extenstions_spec.rb",
44
+ "spec/spec.opts",
45
+ "spec/spec_helper.rb"
46
+ ]
47
+ s.homepage = %q{http://github.com/dimus/dwc-archive}
48
+ s.rdoc_options = ["--charset=UTF-8"]
49
+ s.require_paths = ["lib"]
50
+ s.rubygems_version = %q{1.3.6}
51
+ s.summary = %q{Handler of Darwin Core Archive files}
52
+ s.test_files = [
53
+ "spec/dwc-archive_spec.rb",
54
+ "spec/lib/ruby_extenstions_spec.rb",
55
+ "spec/spec_helper.rb"
56
+ ]
57
+
58
+ if s.respond_to? :specification_version then
59
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
60
+ s.specification_version = 3
61
+
62
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
63
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
64
+ s.add_development_dependency(%q<cucumber>, [">= 0"])
65
+ else
66
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
67
+ s.add_dependency(%q<cucumber>, [">= 0"])
68
+ end
69
+ else
70
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
71
+ s.add_dependency(%q<cucumber>, [">= 0"])
72
+ end
73
+ end
74
+
@@ -0,0 +1,34 @@
1
+ Feature: Creation of a Darwing Core Archive
2
+ In order to start working with Darwin Core Archive file
3
+ A user should be able initiate dwc object from a file
4
+ So I want to implement handling of dwc object creation
5
+
6
+ Scenario: Creating Darwin Core Archive object
7
+ Given path to a dwc file "data.tar.gz"
8
+ When I create a new DarwinCore::Archive instance
9
+ Then I should find that the archive is valid
10
+ Then I should see what files the archive has
11
+
12
+ When I delete expanded files
13
+ Then they should disappear
14
+
15
+ Scenario: Instantiating DarwinCore with tar.gz file
16
+ Given path to a dwc file "data.tar.gz"
17
+ When I create a new DarwinCore instance
18
+ Then instance should have a valid archive
19
+ And instance should have a core
20
+ When I check core data
21
+ Then I should find core.properties
22
+ And core.file_path
23
+ And core.id
24
+ And core.fields
25
+ Then DarwinCore instance should have an extensions array
26
+ And every extension in array should be an instance of DarwinCore::Extension
27
+ And extension should have properties, data, file_path, coreid, fields
28
+ Then DarwinCore instance should have dwc.metadata object
29
+ And I should find id, title, creators, metadata provider
30
+
31
+ Scenario: Instantiating DawinCore with zip file
32
+ Given path to a dwc file "data.zip"
33
+ When I create a new DarwinCore instance
34
+ Then instance should have a valid archive
@@ -0,0 +1,95 @@
1
+ require 'ruby-debug'
2
+
3
+ Given /^path to a dwc file "([^\"]*)"$/ do |arg1|
4
+ @dwca_file = File.expand_path(File.dirname(__FILE__) + "../../../spec/files/" + arg1)
5
+ @tmp_dir = "/tmp"
6
+ end
7
+
8
+ When /^I create a new DarwinCore::Archive instance$/ do
9
+ @dwca = DarwinCore::Archive.new(@dwca_file, @tmp_dir)
10
+ end
11
+
12
+ Then /^I should find that the archive is valid$/ do
13
+ @dwca.valid?.should be_true
14
+ end
15
+
16
+ Then /^I should see what files the archive has$/ do
17
+ @dwca.files.should == ["DarwinCore.txt", "VernacularName.txt", "eml.xml", "leptogastrinae.xlsx", "meta.xml", "metadata.txt"]
18
+ end
19
+
20
+ When /^I delete expanded files$/ do
21
+ @dwca.clean
22
+ end
23
+
24
+ Then /^they should disappear$/ do
25
+ @dwca.files.should be_nil
26
+ end
27
+
28
+ When /^I create a new DarwinCore instance$/ do
29
+ @dwc = DarwinCore.new(@dwca_file)
30
+ end
31
+
32
+ Then /^instance should have a valid archive$/ do
33
+ @dwc.archive.valid?.should be_true
34
+ end
35
+
36
+ Then /^instance should have a core$/ do
37
+ @dwc.core.class.should == DarwinCore::Core
38
+ end
39
+
40
+ When /^I check core data$/ do
41
+ @core = @dwc.core
42
+ end
43
+
44
+ Then /^I should find core.properties$/ do
45
+ @core.properties.class.should == Hash
46
+ @core.properties[:encoding].should == "UTF-8"
47
+ @core.properties[:fieldsTerminatedBy].should == "\\t"
48
+ @core.properties[:linesTerminatedBy].should == "\\n"
49
+ end
50
+
51
+ And /^core\.file_path$/ do
52
+ @core.file_path.should match(/\/tmp\/dwc_[\d]+\/DarwinCore.txt/)
53
+ end
54
+
55
+ And /^core\.id$/ do
56
+ @core.id.should == {:index => 0, :term => 'http://rs.tdwg.org/dwc/terms/TaxonID'}
57
+ end
58
+
59
+ And /^core\.fields$/ do
60
+ @core.fields.size.should == 5
61
+ end
62
+ Then /^DarwinCore instance should have dwc\.metadata object$/ do
63
+ @dwc.metadata.class.should == DarwinCore::Metadata
64
+ end
65
+
66
+ And /^I should find id, title, creators, metadata provider$/ do
67
+ @dwc.metadata.id.should == 'leptogastrinae:version:2.5'
68
+ @dwc.metadata.title.should == 'Leptogastrinae (Diptera: Asilidae) Classification'
69
+ @dwc.metadata.authors.should == [
70
+ {:last_name=>"Bayless", :email=>"keith.bayless@gmail.com", :first_name=>"Keith"},
71
+ {:last_name=>"Dikow", :email=>"dshorthouse@eol.org", :first_name=>"Torsten"}]
72
+ @dwc.metadata.abstract.should == 'These are all the names in the Leptogastrinae classification.'
73
+ @dwc.metadata.citation.should == 'Dikow, Torsten. 2010. The Leptogastrinae classification.'
74
+ @dwc.metadata.url.should == 'http://leptogastrinae.lifedesks.org/files/leptogastrinae/classification_export/shared/leptogastrinae.tar.gz'
75
+ end
76
+
77
+ Then /^DarwinCore instance should have an extensions array$/ do
78
+ @dwc.extensions.class.should == Array
79
+ @dwc.extensions.size.should == 1
80
+ end
81
+
82
+ And /^every extension in array should be an instance of DarwinCore::Extension$/ do
83
+ classes = @dwc.extensions.map {|e| e.class}.uniq
84
+ classes.size.should == 1
85
+ classes[0].should == DarwinCore::Extension
86
+ end
87
+
88
+ Then /^extension should have properties, data, file_path, coreid, fields$/ do
89
+ ext = @dwc.extensions[0]
90
+ ext.properties.should == {:ignoreHeaderLines=>1, :encoding=>"UTF-8", :rowType=>"http://rs.gbif.org/ipt/terms/1.0/VernacularName", :fieldsEnclosedBy=>"", :fieldsTerminatedBy=>"\\t", :linesTerminatedBy=>"\\n"}
91
+ ext.data.class.should == Hash
92
+ ext.file_path.should match(/\/tmp\/dwc_[\d]+\/VernacularName.txt/)
93
+ ext.coreid.should == {:index=>0}
94
+ ext.fields.should == [{:term=>"http://rs.gbif.org/ecat/terms/vernacularName", :index=>1}, {:term=>"http://rs.gbif.org/thesaurus/languageCode", :index=>2}]
95
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
2
+ require 'dwc-archive'
3
+
4
+ require 'spec/expectations'
@@ -0,0 +1,29 @@
1
+ # encoding: UTF-8
2
+ $:.unshift(File.dirname(__FILE__)) unless
3
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
4
+ require 'ruby_extensions'
5
+ require 'dwc-archive/expander'
6
+ require 'dwc-archive/archive'
7
+ require 'dwc-archive/core'
8
+ require 'dwc-archive/extension'
9
+ require 'dwc-archive/metadata'
10
+
11
+ class DarwinCore
12
+ attr_reader :archive, :core, :metadata, :extensions
13
+ alias :eml :metadata
14
+ def initialize(dwc_path, tmp_dir = "/tmp")
15
+ @archive = DarwinCore::Archive.new(dwc_path, tmp_dir)
16
+ @core = DarwinCore::Core.new(@archive)
17
+ @metadata = DarwinCore::Metadata.new(@archive)
18
+ @extensions = get_extensions
19
+ end
20
+ private
21
+ def get_extensions
22
+ res = []
23
+ root_key = @archive.meta.keys[0]
24
+ ext = @archive.meta[root_key][:extension]
25
+ return [] unless ext
26
+ ext = [ext] unless ext.class == Array
27
+ ext.map {|e| DarwinCore::Extension.new(@archive, e)}
28
+ end
29
+ end
Binary file
@@ -0,0 +1,37 @@
1
+ require 'nokogiri'
2
+ class DarwinCore
3
+ class Archive
4
+ attr_reader :meta, :eml
5
+ def initialize(archive_path, tmp_dir)
6
+ @archive_path = archive_path
7
+ @tmp_dir = tmp_dir
8
+ @expander = DarwinCore::Expander.new(@archive_path, @tmp_dir)
9
+ @expander.unpack
10
+ if valid?
11
+ @meta = Hash.from_xml(open(File.join(@expander.path, 'meta.xml')))
12
+ @eml = files.include?("eml.xml") ? Hash.from_xml(open(File.join(@expander.path, 'eml.xml'))) : nil
13
+ else
14
+ clean
15
+ raise 'not a valid Darwin Core Archive File'
16
+ end
17
+ end
18
+
19
+ def valid?
20
+ valid = true
21
+ valid = valid && FileTest.exists?(@archive_path)
22
+ valid = valid && files && files.include?('meta.xml')
23
+ end
24
+
25
+ def files
26
+ @expander.files
27
+ end
28
+
29
+ def files_path
30
+ @expander.path
31
+ end
32
+
33
+ def clean
34
+ @expander.clean
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,34 @@
1
+ class DarwinCore
2
+ class Core
3
+ def initialize(archive)
4
+ @archive = archive
5
+ @path = @archive.files_path
6
+ root_key = @archive.meta.keys[0]
7
+ @core = @archive.meta[root_key][:core]
8
+ raise "Cannot found core in meta.xml, is meta.xml valid?" unless @core
9
+ end
10
+
11
+ def data
12
+ @core
13
+ end
14
+
15
+ def properties
16
+ @core[:attributes]
17
+ end
18
+
19
+ def file_path
20
+ file = @core[:files][:location] || @core[:location]
21
+ File.join(@path, file)
22
+ end
23
+
24
+ def id
25
+ @core[:id][:attributes]
26
+ end
27
+
28
+ def fields
29
+ @core[:field] = [@core[:field]] unless @core[:field].class == Array
30
+ @core[:field].map {|f| f[:attributes]}
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,71 @@
1
+ class DarwinCore
2
+ class Expander
3
+ def initialize(archive_path, tmp_dir)
4
+ @archive_path = archive_path
5
+ @tmp_dir = tmp_dir
6
+ @path = File.join(tmp_dir, 'dwc_' + rand(10000000000).to_s)
7
+ @unpacker = get_unpacker
8
+ end
9
+
10
+ def unpack
11
+ clean
12
+ @unpacker.call(@path, @archive_path) if @unpacker
13
+ end
14
+
15
+ def path
16
+ @files_path ||= files_path
17
+ end
18
+
19
+ def clean
20
+ FileUtils.rm_rf(@path) if FileTest.exists?(@path)
21
+ end
22
+
23
+ def files
24
+ return nil unless path && FileTest.exists?(path)
25
+ Dir.entries(path).select {|e| e !~ /[\.]{1,2}$/}.sort
26
+ end
27
+
28
+ private
29
+ def get_unpacker
30
+ file_type = IO.popen("file -z " + @archive_path).read
31
+
32
+ if file_type.match(/tar.*gzip/i)
33
+ return proc do |tmp_path, archive_path|
34
+ FileUtils.mkdir tmp_path
35
+ system "tar -zxvf #{archive_path} -C #{tmp_path}"
36
+ end
37
+ end
38
+
39
+ if file_type.match(/Zip/)
40
+ return proc { |tmp_path, archive_path| system "unzip -qq -d #{tmp_path} #{archive_path}" }
41
+ end
42
+
43
+ return nil
44
+ end
45
+
46
+ def path_entries(dir)
47
+ Dir.entries(dir).select {|e| e !~ /[\.]{1,2}$/}.sort
48
+ end
49
+
50
+ def files_path
51
+ res = nil
52
+ entries = path_entries(@path)
53
+ if entries.include?('meta.xml')
54
+ res = @path
55
+ else
56
+ entries.each do |e|
57
+ check_path = File.join(@path, e)
58
+ if FileTest.directory?(check_path)
59
+ if path_entries(check_path).include?('meta.xml')
60
+ res = check_path
61
+ break
62
+ end
63
+ end
64
+ end
65
+ end
66
+ res
67
+ end
68
+ end
69
+
70
+
71
+ end
@@ -0,0 +1,31 @@
1
+ class DarwinCore
2
+ class Extension
3
+ def initialize(archive, extension)
4
+ @archive = archive
5
+ @path = @archive.files_path
6
+ @extension = extension
7
+ end
8
+
9
+ def data
10
+ @extension
11
+ end
12
+
13
+ def properties
14
+ @extension[:attributes]
15
+ end
16
+
17
+ def file_path
18
+ file = @extension[:files][:location]
19
+ File.join(@path, file)
20
+ end
21
+
22
+ def coreid
23
+ @extension[:coreid][:attributes]
24
+ end
25
+
26
+ def fields
27
+ @extension[:field] = [@extension[:field]] unless @extension[:field].class == Array
28
+ @extension[:field].map {|f| f[:attributes]}
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,38 @@
1
+ class DarwinCore
2
+ class Metadata
3
+ def initialize(archive)
4
+ @archive = archive
5
+ @metadata = @archive.eml
6
+ end
7
+
8
+ def data
9
+ @metadata
10
+ end
11
+
12
+ def id
13
+ @metadata[:eml][:dataset][:attributes][:id]
14
+ end
15
+
16
+ def title
17
+ @metadata[:eml][:dataset][:title]
18
+ end
19
+
20
+ def authors
21
+ return nil unless defined?(@metadata[:eml][:dataset][:creator])
22
+ @metadata[:eml][:dataset][:creator] = [@metadata[:eml][:dataset][:creator]] unless @metadata[:eml][:dataset][:creator].class == Array
23
+ @metadata[:eml][:dataset][:creator].map {|c| {:first_name => c[:individualName][:givenName], :last_name => c[:individualName][:surName], :email => c[:electronicMailAddress]}}
24
+ end
25
+
26
+ def abstract
27
+ @metadata[:eml][:dataset][:abstract] rescue nil
28
+ end
29
+
30
+ def citation
31
+ @metadata[:eml][:additionalMetadata][:metadata][:citation] rescue nil
32
+ end
33
+
34
+ def url
35
+ @metadata[:eml][:dataset][:distribution][:online][:url] rescue nil
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,64 @@
1
+ # USAGE: Hash.from_xml:(YOUR_XML_STRING)
2
+ require 'nokogiri'
3
+ # modified from http://stackoverflow.com/questions/1230741/convert-a-nokogiri-document-to-a-ruby-hash/1231297#1231297
4
+
5
+ class Hash
6
+ class << self
7
+ def from_xml(xml_io)
8
+ begin
9
+ result = Nokogiri::XML(xml_io)
10
+ return { result.root.name.to_sym => xml_node_to_hash(result.root)}
11
+ rescue Exception => e
12
+ # raise your custom exception here
13
+ end
14
+ end
15
+
16
+ def xml_node_to_hash(node)
17
+ # If we are at the root of the document, start the hash
18
+ if node.element?
19
+ result_hash = {}
20
+ if node.attributes != {}
21
+ result_hash[:attributes] = {}
22
+ node.attributes.keys.each do |key|
23
+ result_hash[:attributes][node.attributes[key].name.to_sym] = prepare(node.attributes[key].value)
24
+ end
25
+ end
26
+ if node.children.size > 0
27
+ node.children.each do |child|
28
+ result = xml_node_to_hash(child)
29
+
30
+ if child.name == "text"
31
+ unless child.next_sibling || child.previous_sibling
32
+ return prepare(result)
33
+ end
34
+ elsif result_hash[child.name.to_sym]
35
+ if result_hash[child.name.to_sym].is_a?(Object::Array)
36
+ result_hash[child.name.to_sym] << prepare(result)
37
+ else
38
+ result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << prepare(result)
39
+ end
40
+ else
41
+ result_hash[child.name.to_sym] = prepare(result)
42
+ end
43
+ end
44
+
45
+ return result_hash
46
+ else
47
+ return result_hash
48
+ end
49
+ else
50
+ return prepare(node.content.to_s)
51
+ end
52
+ end
53
+
54
+ def prepare(data)
55
+ (data.class == String && data.to_i.to_s == data) ? data.to_i : data
56
+ end
57
+ end
58
+
59
+ def to_struct(struct_name)
60
+ Struct.new(struct_name,*keys).new(*values)
61
+ end
62
+ end
63
+
64
+
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "DwcArchive" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
Binary file
Binary file
@@ -0,0 +1,46 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <eml:eml
3
+ packageId="eml.1.1" system="knb"
4
+ xmlns:eml="eml://ecoinformatics.org/eml-2.1.0"
5
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
6
+ xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.0 eml.xsd">
7
+ <dataset id="leptogastrinae:version:2.5">
8
+ <title>Leptogastrinae (Diptera: Asilidae) Classification</title>
9
+ <creator id="10" scope="document">
10
+ <individualName>
11
+ <givenName>Keith</givenName>
12
+ <surName>Bayless</surName>
13
+ </individualName>
14
+ <electronicMailAddress>keith.bayless@gmail.com</electronicMailAddress>
15
+ </creator>
16
+ <creator id="5" scope="document">
17
+ <individualName>
18
+ <givenName>Torsten</givenName>
19
+ <surName>Dikow</surName>
20
+ </individualName>
21
+ <electronicMailAddress>dshorthouse@eol.org</electronicMailAddress>
22
+ </creator>
23
+ <metadataProvider>
24
+ <organizationName>Encyclopedia of Life: LifeDesks (http://www.lifedesks.org)</organizationName>
25
+ </metadataProvider>
26
+ <pubDate>2010-02-03T01:09:41-05:00</pubDate>
27
+ <abstract>These are all the names in the Leptogastrinae classification.</abstract>
28
+ <intellectualRights>Creative Commons: publicdomain</intellectualRights>
29
+ <distribution>
30
+ <online>
31
+ <url function="download">http://leptogastrinae.lifedesks.org/files/leptogastrinae/classification_export/shared/leptogastrinae.tar.gz</url>
32
+ </online>
33
+ </distribution>
34
+ <contact>
35
+ <references>5</references>
36
+ </contact>
37
+ <publisher>
38
+ <organizationName>The Marine Biological Laboratory</organizationName>
39
+ </publisher>
40
+ </dataset>
41
+ <additionalMetadata>
42
+ <metadata>
43
+ <citation>Dikow, Torsten. 2010. The Leptogastrinae classification.</citation>
44
+ </metadata>
45
+ </additionalMetadata>
46
+ </eml:eml>
@@ -0,0 +1,22 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <archive xmlns="http://rs.tdwg.org/dwc/text/">
3
+ <core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/DarwinCore">
4
+ <files>
5
+ <location>DarwinCore.txt</location>
6
+ </files>
7
+ <id index="0" term="http://rs.tdwg.org/dwc/terms/TaxonID"/>
8
+ <field index="1" term="http://purl.org/dc/terms/source"/>
9
+ <field index="2" term="http://rs.tdwg.org/dwc/terms/ScientificName"/>
10
+ <field index="3" term="http://rs.tdwg.org/dwc/terms/HigherTaxonID"/>
11
+ <field index="4" term="http://rs.tdwg.org/dwc/terms/TaxonRank"/>
12
+ <field index="5" term="http://rs.tdwg.org/dwc/terms/TaxonomicStatus"/>
13
+ </core>
14
+ <extension encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/ipt/terms/1.0/VernacularName">
15
+ <files>
16
+ <location>VernacularName.txt</location>
17
+ </files>
18
+ <coreid index="0"/>
19
+ <field index="1" term="http://rs.gbif.org/ecat/terms/vernacularName"/>
20
+ <field index="2" term="http://rs.gbif.org/thesaurus/languageCode"/>
21
+ </extension>
22
+ </archive>
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + "/../spec_helper"
2
+
3
+ describe "Hash" do
4
+ it "should parse xml to hash" do
5
+ Hash.public_methods.include?("from_xml").should be_true
6
+ end
7
+
8
+ it "should parse xml" do
9
+ xml_string = open(File.dirname(__FILE__) + "/../files/meta.xml").read
10
+ meta = Hash.from_xml(xml_string)
11
+ meta[:archive].keys.map {|k| k.to_s}.sort.should == %w(core extension)
12
+ meta[:archive][:core].keys.map{|k| k.to_s}.sort.should == ["attributes", "field", "files", "id"]
13
+ end
14
+
15
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'dwc-archive'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dwc-archive
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 2
9
+ version: 0.1.2
10
+ platform: ruby
11
+ authors:
12
+ - Dmitry Mozzherin
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-03-18 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 9
31
+ version: 1.2.9
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: cucumber
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :development
45
+ version_requirements: *id002
46
+ description: Darwin Core Archive Files are current standard exchange format for GLobal Names Architecture modules. This project creates ways to work with such files.
47
+ email: dmozzherin at gmail dot com
48
+ executables: []
49
+
50
+ extensions: []
51
+
52
+ extra_rdoc_files:
53
+ - LICENSE
54
+ - README.rdoc
55
+ files:
56
+ - .document
57
+ - .gitignore
58
+ - LICENSE
59
+ - README.rdoc
60
+ - Rakefile
61
+ - VERSION
62
+ - dwc-archive.gemspec
63
+ - features/dwc-archive.feature
64
+ - features/step_definitions/dwc-archive_steps.rb
65
+ - features/support/env.rb
66
+ - lib/dwc-archive.rb
67
+ - lib/dwc-archive/.expander.rb.swo
68
+ - lib/dwc-archive/archive.rb
69
+ - lib/dwc-archive/core.rb
70
+ - lib/dwc-archive/expander.rb
71
+ - lib/dwc-archive/extension.rb
72
+ - lib/dwc-archive/metadata.rb
73
+ - lib/ruby_extensions.rb
74
+ - spec/dwc-archive_spec.rb
75
+ - spec/files/data.tar.gz
76
+ - spec/files/data.zip
77
+ - spec/files/eml.xml
78
+ - spec/files/meta.xml
79
+ - spec/lib/ruby_extenstions_spec.rb
80
+ - spec/spec.opts
81
+ - spec/spec_helper.rb
82
+ has_rdoc: true
83
+ homepage: http://github.com/dimus/dwc-archive
84
+ licenses: []
85
+
86
+ post_install_message:
87
+ rdoc_options:
88
+ - --charset=UTF-8
89
+ require_paths:
90
+ - lib
91
+ required_ruby_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ segments:
96
+ - 0
97
+ version: "0"
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ requirements: []
106
+
107
+ rubyforge_project:
108
+ rubygems_version: 1.3.6
109
+ signing_key:
110
+ specification_version: 3
111
+ summary: Handler of Darwin Core Archive files
112
+ test_files:
113
+ - spec/dwc-archive_spec.rb
114
+ - spec/lib/ruby_extenstions_spec.rb
115
+ - spec/spec_helper.rb