dwc-archive 0.9.11 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +2 -1
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -1
- data/.travis.yml +2 -3
- data/CHANGELOG +2 -0
- data/Gemfile +3 -1
- data/README.md +110 -106
- data/Rakefile +13 -36
- data/dwc-archive.gemspec +24 -19
- data/features/step_definitions/dwc-creator_steps.rb +5 -5
- data/features/step_definitions/dwc-reader_steps.rb +47 -28
- data/features/support/env.rb +1 -1
- data/lib/dwc_archive.rb +121 -0
- data/lib/dwc_archive/archive.rb +59 -0
- data/lib/dwc_archive/classification_normalizer.rb +392 -0
- data/lib/dwc_archive/core.rb +25 -0
- data/lib/{dwc-archive → dwc_archive}/errors.rb +2 -0
- data/lib/dwc_archive/expander.rb +88 -0
- data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
- data/lib/dwc_archive/generator.rb +90 -0
- data/lib/{dwc-archive → dwc_archive}/generator_eml_xml.rb +40 -33
- data/lib/{dwc-archive → dwc_archive}/generator_meta_xml.rb +21 -20
- data/lib/dwc_archive/gnub_taxon.rb +14 -0
- data/lib/dwc_archive/ingester.rb +106 -0
- data/lib/dwc_archive/metadata.rb +56 -0
- data/lib/dwc_archive/taxon_normalized.rb +23 -0
- data/lib/dwc_archive/version.rb +6 -0
- data/lib/dwc_archive/xml_reader.rb +89 -0
- data/spec/files/generator_eml.xml +1 -1
- data/spec/lib/classification_normalizer_spec.rb +96 -105
- data/spec/lib/core_spec.rb +43 -41
- data/spec/lib/darwin_core_spec.rb +111 -132
- data/spec/lib/generator_eml_xml_spec.rb +12 -11
- data/spec/lib/generator_meta_xml_spec.rb +12 -11
- data/spec/lib/generator_spec.rb +73 -74
- data/spec/lib/gnub_taxon_spec.rb +14 -16
- data/spec/lib/metadata_spec.rb +50 -41
- data/spec/lib/taxon_normalized_spec.rb +62 -65
- data/spec/lib/xml_reader_spec.rb +9 -12
- data/spec/spec_helper.rb +55 -49
- metadata +92 -77
- data/.rvmrc +0 -1
- data/lib/dwc-archive.rb +0 -107
- data/lib/dwc-archive/archive.rb +0 -40
- data/lib/dwc-archive/classification_normalizer.rb +0 -427
- data/lib/dwc-archive/core.rb +0 -19
- data/lib/dwc-archive/expander.rb +0 -85
- data/lib/dwc-archive/generator.rb +0 -86
- data/lib/dwc-archive/ingester.rb +0 -101
- data/lib/dwc-archive/metadata.rb +0 -48
- data/lib/dwc-archive/version.rb +0 -3
- data/lib/dwc-archive/xml_reader.rb +0 -80
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8e61e692fe24c4b6bd56e38a7b19a357ba0acdd3180999e606bd6fc8e55dae29
|
4
|
+
data.tar.gz: 1bf6ee067e24d6cb75d415eaa1f10bcf37a96d7daee7a1935248a6a58a0cb5ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a162b05026795e2e4ea8e92683682fab820fb0cfcea68a8c166eb9dc012246ba01a16af4e17af39a2ae28bf2b6a3b85f877ac53b71e27915f8529cb4d973d67a
|
7
|
+
data.tar.gz: 10a59bee9c60103c91fd5403c4c765903521e640dab3c6ef7ff78eaceb04938aa0bd554e9941bdd90260a2f0a2e4927be3d0ada830b2a4310be405f2ead1184f
|
data/.rspec
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.4
|
3
|
+
Exclude:
|
4
|
+
- features/**/*
|
5
|
+
- .bundle/**/*
|
6
|
+
- bundle_bin/**/*
|
7
|
+
- lib/dwc_archive/ingester.rb
|
8
|
+
- lib/dwc_archive/generator_meta_xml.rb
|
9
|
+
- lib/dwc_archive/generator_eml_xml.rb
|
10
|
+
- lib/dwc_archive/classification_normalizer.rb
|
11
|
+
|
12
|
+
Style/StringLiterals:
|
13
|
+
EnforcedStyle: double_quotes
|
14
|
+
|
15
|
+
Layout/DotPosition:
|
16
|
+
EnforcedStyle: trailing
|
17
|
+
|
18
|
+
Metrics/BlockLength:
|
19
|
+
Exclude:
|
20
|
+
- 'Rakefile'
|
21
|
+
- '*.gemspec'
|
22
|
+
- '**/*.rake'
|
23
|
+
- 'spec/**/*spec.rb'
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.5.1
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -19,136 +19,140 @@ Installation
|
|
19
19
|
|
20
20
|
### System Requirements
|
21
21
|
|
22
|
-
You need [Redis Server][12] and unzip library installed
|
22
|
+
You need [Redis Server][12] and unzip library installed
|
23
23
|
|
24
24
|
|
25
25
|
Usage
|
26
26
|
-----
|
27
27
|
|
28
|
-
|
29
|
-
|
28
|
+
```ruby
|
29
|
+
require 'rubygems'
|
30
|
+
require 'dwc-archive'
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
|
33
|
+
dwc.archive.files # the archive file list
|
34
|
+
dwc.metadata.data # summary of metadata from eml.xml if it exists
|
35
|
+
dwc.metadata.authors # authors of the archive
|
36
|
+
dwc.core.data # summary of DarwinCore main file
|
37
|
+
dwc.core.file_path # path to the DarwinCore main file
|
38
|
+
dwc.extensions # array of DarwinCore Star extensions
|
39
|
+
dwc.extensions[0].data # summary for an extension
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
# read content of the core data file into memory or used with a block
|
42
|
+
# it returns array of arrays of data
|
43
|
+
# rows that had a wrong encoding will be collected into errors array
|
44
|
+
data, errors = dwc.core.read
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
# read content using a block, getting back results in sets of 100 rows each
|
47
|
+
results = []
|
48
|
+
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
|
49
|
+
results << [data, errors]
|
50
|
+
end
|
51
|
+
results << [tail_data, tail_errors]
|
51
52
|
|
52
|
-
|
53
|
-
|
53
|
+
# read content of an extension data file into memory
|
54
|
+
data, errors = dwc.core.extensions[0].read
|
54
55
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
# read content of an extension data using block
|
57
|
+
results = []
|
58
|
+
tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
|
59
|
+
results << [data, errors]
|
60
|
+
end
|
61
|
+
results << [tail_data, tail_errors]
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
# normalize names in classification collecting together synonyms,
|
64
|
+
# canonical names, vernacular names and associating paths to taxons
|
65
|
+
# in a classification distributed as DwCA file
|
65
66
|
|
66
|
-
|
67
|
+
result = dwc.normalize_classification
|
67
68
|
|
68
|
-
|
69
|
+
# for a finer control over normalization:
|
69
70
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
71
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
72
|
+
cn.normalize
|
73
|
+
# if you don't want to generate path consisting of canonical forms
|
74
|
+
# of ancestors to a taxon
|
75
|
+
cn.normalize(:with_canonical_names => false)
|
75
76
|
|
76
|
-
|
77
|
-
|
77
|
+
# if you don't want to ingest information from extensions
|
78
|
+
cn.normalize(:with_extensions => false)
|
78
79
|
|
79
|
-
|
80
|
-
|
80
|
+
# to get a flat hash of nodes with attached vernacular names and synonyms
|
81
|
+
normalized_data = cn.normalized_data
|
81
82
|
|
82
|
-
|
83
|
-
|
83
|
+
# to get a representation of tree organization as a hash
|
84
|
+
classification_tree = cn.tree
|
84
85
|
|
85
|
-
|
86
|
-
|
86
|
+
# to get list of all name strings used as scientific or vernacular names
|
87
|
+
all_name_strings = cn.name_strings
|
87
88
|
|
88
|
-
|
89
|
-
|
89
|
+
# to get list of errors generated during the normalization
|
90
|
+
errors = cn.error_names
|
90
91
|
|
91
|
-
|
92
|
+
DarwinCore.clean_all # remove all expanded archives
|
93
|
+
```
|
92
94
|
|
93
95
|
Creating a DarwinCore Archive file
|
94
96
|
----------------------------------
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
:
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
98
|
+
```ruby
|
99
|
+
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
100
|
+
|
101
|
+
core = [
|
102
|
+
["http://rs.tdwg.org/dwc/terms/taxonID",
|
103
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
104
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
105
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"],
|
106
|
+
[1, 0, "Plantae", "kingdom"],
|
107
|
+
[2, 1, "Betula", "genus"],
|
108
|
+
[3, 2, "Betula verucosa", "species"]
|
109
|
+
]
|
110
|
+
|
111
|
+
vernacular_names = [
|
112
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
113
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName"],
|
114
|
+
[1, "Plants"],
|
115
|
+
[1, "Растения"],
|
116
|
+
[2, "Birch"],
|
117
|
+
[2, "Береза"],
|
118
|
+
[3, "Wheeping Birch"],
|
119
|
+
[3, "Береза плакучая"]
|
120
|
+
]
|
121
|
+
|
122
|
+
eml = {
|
123
|
+
:id => '1234',
|
124
|
+
:license => 'http://creativecommons.org/licenses/by-sa/3.0/',
|
125
|
+
:title => 'Test Classification',
|
126
|
+
:authors => [
|
127
|
+
{ :first_name => 'John',
|
128
|
+
:last_name => 'Doe',
|
129
|
+
:email => 'jdoe@example.com',
|
130
|
+
:organization => 'Example',
|
131
|
+
:position => 'Assistant Professor',
|
132
|
+
:url => 'http://example.org' },
|
133
|
+
{ :first_name => 'Jane',
|
134
|
+
:last_name => 'Doe',
|
135
|
+
:email => 'jane@example.com' }
|
136
|
+
],
|
137
|
+
:metadata_providers => [
|
138
|
+
{ :first_name => 'Jim',
|
139
|
+
:last_name => 'Doe',
|
140
|
+
:email => 'jimdoe@example.com',
|
141
|
+
:url => 'http://aggregator.example.org' }],
|
142
|
+
:abstract => 'test classification',
|
143
|
+
:citation =>
|
144
|
+
'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
|
145
|
+
:url => 'http://example.com'
|
146
|
+
}
|
147
|
+
|
148
|
+
gen.add_core(core, 'core.txt')
|
149
|
+
gen.add_extension(vernacular_names,
|
150
|
+
'vernacular_names.txt',
|
151
|
+
true, 'http://rs.gbif.org/terms/1.0/VernacularName')
|
152
|
+
gen.add_meta_xml
|
153
|
+
gen.add_eml_xml(eml)
|
154
|
+
gen.pack
|
155
|
+
```
|
152
156
|
|
153
157
|
Logging
|
154
158
|
-------
|
data/Rakefile
CHANGED
@@ -1,44 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# Bundler::GemHelper.install_tasks
|
4
|
-
# require 'bundler/gem_tasks'
|
5
|
-
# require 'rake/testtasks'
|
6
|
-
# require 'rubygems'
|
7
|
-
# require 'rake'
|
8
|
-
|
9
|
-
require 'rspec/core/rake_task'
|
10
|
-
RSpec::Core::RakeTask.new(:spec) do |spec|
|
11
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
12
|
-
end
|
1
|
+
# frozen_string_literal: true
|
13
2
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
# task :spec => :check_dependencies
|
20
|
-
|
21
|
-
begin
|
22
|
-
require 'cucumber/rake/task'
|
23
|
-
Cucumber::Rake::Task.new(:features)
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
require "cucumber/rake/task"
|
24
6
|
|
25
|
-
|
26
|
-
|
27
|
-
task :features do
|
28
|
-
abort 'Cucumber is not available. In order to run features, ' +
|
29
|
-
'you must: sudo gem install cucumber'
|
30
|
-
end
|
7
|
+
RSpec::Core::RakeTask.new(:rspec) do |rspec|
|
8
|
+
rspec.pattern = "spec/**/*_spec.rb"
|
31
9
|
end
|
32
10
|
|
33
|
-
|
34
|
-
task :irb, [:script] do |t, args|
|
35
|
-
ARGV.clear
|
11
|
+
Cucumber::Rake::Task.new(:features)
|
36
12
|
|
37
|
-
|
38
|
-
|
13
|
+
# task rspec: :check_dependencies
|
14
|
+
task features: :check_dependencies
|
39
15
|
|
40
|
-
|
41
|
-
|
16
|
+
desc "open an irb session preloaded with this library"
|
17
|
+
task :console do
|
18
|
+
sh "irb -I lib -I extra -r dwc_archive.rb"
|
42
19
|
end
|
43
20
|
|
44
|
-
task :
|
21
|
+
task default: :rspec
|
data/dwc-archive.gemspec
CHANGED
@@ -1,33 +1,38 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require File.expand_path("lib/dwc_archive/version", __dir__)
|
2
4
|
|
3
5
|
Gem::Specification.new do |gem|
|
4
6
|
gem.name = "dwc-archive"
|
5
7
|
gem.version = DarwinCore::VERSION
|
6
8
|
gem.authors = ["Dmitry Mozzherin"]
|
7
9
|
gem.email = ["dmozzherin at gmail dot com"]
|
8
|
-
gem.description =
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
gem.summary =
|
10
|
+
gem.description = "Darwin Core Archive is the current standard exchange " \
|
11
|
+
"format for GLobal Names Architecture modules. " \
|
12
|
+
"This gem makes it easy to incorporate files in " \
|
13
|
+
"Darwin Core Archive format into a ruby project."
|
14
|
+
gem.summary = "Handler of Darwin Core Archive files"
|
13
15
|
gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
|
14
16
|
gem.license = "MIT"
|
15
17
|
|
16
|
-
gem.
|
18
|
+
gem.required_ruby_version = ">= 2.4.1"
|
19
|
+
gem.files = `git ls-files`.split("\n").map(&:strip)
|
17
20
|
gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
21
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
22
|
gem.require_paths = ["lib"]
|
20
23
|
|
21
|
-
gem.add_runtime_dependency
|
22
|
-
gem.add_runtime_dependency
|
23
|
-
gem.add_runtime_dependency
|
24
|
-
|
25
|
-
gem.add_development_dependency 'rake', '~> 10.1'
|
26
|
-
gem.add_development_dependency 'bundler', '~> 1.3'
|
27
|
-
gem.add_development_dependency 'rspec', '~> 2.14'
|
28
|
-
gem.add_development_dependency 'cucumber', '~> 1.3'
|
29
|
-
gem.add_development_dependency 'coveralls', '~> 0.7'
|
30
|
-
gem.add_development_dependency 'debugger', '~> 1.6'
|
31
|
-
gem.add_development_dependency 'git', '~> 1.2'
|
32
|
-
end
|
24
|
+
# gem.add_runtime_dependency "minitar", "~> 0.6"
|
25
|
+
gem.add_runtime_dependency "nokogiri", "~> 1.8"
|
26
|
+
gem.add_runtime_dependency "parsley-store", "~> 0.3"
|
33
27
|
|
28
|
+
# gem.add_development_dependency "byebug", "~> 3.4"
|
29
|
+
gem.add_development_dependency "bundler", "~> 1.16"
|
30
|
+
gem.add_development_dependency "coveralls", "~> 0.8"
|
31
|
+
gem.add_development_dependency "cucumber", "~> 3.1"
|
32
|
+
gem.add_development_dependency "git", "~> 1.4"
|
33
|
+
gem.add_development_dependency "rake", "~> 12.3"
|
34
|
+
gem.add_development_dependency "rspec", "~> 3.7"
|
35
|
+
gem.add_development_dependency "rubocop", "~> 0.58"
|
36
|
+
gem.add_development_dependency "solargraph", "~> 0.23"
|
37
|
+
gem.add_development_dependency "travis", "~> 1.8"
|
38
|
+
end
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
Then /^these data should be saved as "([^\"]*)" file$/ do |file_name|
|
21
21
|
file = File.join(@gen.path, file_name)
|
22
|
-
@gen.files.include?(file_name).should
|
22
|
+
@gen.files.include?(file_name).should be true
|
23
23
|
csv = CSV.open(file).count.should == 4
|
24
24
|
end
|
25
25
|
|
@@ -51,7 +51,7 @@ end
|
|
51
51
|
Then /^data are saved as "([^\"]*)" and "([^\"]*)"$/ do |file_name_1, file_name_2|
|
52
52
|
[file_name_1, file_name_2].each do |file_name|
|
53
53
|
file = File.join(@gen.path, file_name)
|
54
|
-
@gen.files.include?(file_name).should
|
54
|
+
@gen.files.include?(file_name).should be true
|
55
55
|
csv = CSV.open(file).count.should > 1
|
56
56
|
end
|
57
57
|
end
|
@@ -86,7 +86,7 @@ end
|
|
86
86
|
|
87
87
|
Then /^there should be "([^\"]*)" file with core and extensions informations$/ do |file_name|
|
88
88
|
meta = File.join(@gen.path, file_name)
|
89
|
-
@gen.files.include?(file_name).should
|
89
|
+
@gen.files.include?(file_name).should be true
|
90
90
|
dom = Nokogiri::XML(open(File.join(@gen.path, file_name)))
|
91
91
|
dom.xpath('//xmlns:core//xmlns:location').text.should == 'darwin_core.txt'
|
92
92
|
dom.xpath('//xmlns:extension[1]//xmlns:location').text.should == 'vernacular.txt'
|
@@ -94,7 +94,7 @@ end
|
|
94
94
|
|
95
95
|
Then /^there should be "([^\"]*)" file with authoriship information$/ do |file_name|
|
96
96
|
eml = File.join(@gen.path, file_name)
|
97
|
-
@gen.files.include?(file_name).should
|
97
|
+
@gen.files.include?(file_name).should be true
|
98
98
|
end
|
99
99
|
|
100
100
|
Given /^a path to a new file \- "([^\"]*)"$/ do |file_name|
|
@@ -107,6 +107,6 @@ end
|
|
107
107
|
|
108
108
|
Then /^there should be a valid new archive file$/ do
|
109
109
|
dwc = DarwinCore.new('/tmp/dwc.tar.gz')
|
110
|
-
dwc.archive.valid?.should
|
110
|
+
dwc.archive.valid?.should be true
|
111
111
|
end
|
112
112
|
|