dwc-archive 0.9.11 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rspec +2 -1
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -1
- data/.travis.yml +4 -7
- data/CHANGELOG +4 -0
- data/Gemfile +3 -1
- data/LICENSE +1 -1
- data/README.md +114 -109
- data/Rakefile +13 -36
- data/dwc-archive.gemspec +23 -19
- data/features/step_definitions/dwc-creator_steps.rb +5 -5
- data/features/step_definitions/dwc-reader_steps.rb +47 -28
- data/features/support/env.rb +1 -1
- data/lib/dwc_archive.rb +124 -0
- data/lib/dwc_archive/archive.rb +60 -0
- data/lib/dwc_archive/classification_normalizer.rb +382 -0
- data/lib/dwc_archive/core.rb +25 -0
- data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
- data/lib/dwc_archive/expander.rb +88 -0
- data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
- data/lib/dwc_archive/generator.rb +91 -0
- data/lib/{dwc-archive → dwc_archive}/generator_eml_xml.rb +40 -33
- data/lib/{dwc-archive → dwc_archive}/generator_meta_xml.rb +21 -20
- data/lib/dwc_archive/gnub_taxon.rb +14 -0
- data/lib/dwc_archive/ingester.rb +106 -0
- data/lib/dwc_archive/metadata.rb +57 -0
- data/lib/dwc_archive/taxon_normalized.rb +23 -0
- data/lib/dwc_archive/version.rb +6 -0
- data/lib/dwc_archive/xml_reader.rb +90 -0
- data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
- data/spec/files/generator_eml.xml +1 -1
- data/spec/lib/classification_normalizer_spec.rb +96 -105
- data/spec/lib/core_spec.rb +43 -41
- data/spec/lib/darwin_core_spec.rb +108 -138
- data/spec/lib/generator_eml_xml_spec.rb +12 -11
- data/spec/lib/generator_meta_xml_spec.rb +12 -11
- data/spec/lib/generator_spec.rb +73 -74
- data/spec/lib/gnub_taxon_spec.rb +15 -17
- data/spec/lib/metadata_spec.rb +50 -41
- data/spec/lib/taxon_normalized_spec.rb +62 -65
- data/spec/lib/xml_reader_spec.rb +9 -12
- data/spec/spec_helper.rb +54 -51
- metadata +101 -87
- data/.rvmrc +0 -1
- data/lib/dwc-archive.rb +0 -107
- data/lib/dwc-archive/archive.rb +0 -40
- data/lib/dwc-archive/classification_normalizer.rb +0 -427
- data/lib/dwc-archive/core.rb +0 -19
- data/lib/dwc-archive/expander.rb +0 -85
- data/lib/dwc-archive/generator.rb +0 -86
- data/lib/dwc-archive/ingester.rb +0 -101
- data/lib/dwc-archive/metadata.rb +0 -48
- data/lib/dwc-archive/version.rb +0 -3
- data/lib/dwc-archive/xml_reader.rb +0 -80
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fb7ca119f1b5dd9f1657772d0fb8e80df64909f0739113a00366441045865e46
|
4
|
+
data.tar.gz: aa8fab9a533682f6ea4907b52dcae3b63d320331860c8936819815f84a26f9e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b122be118cd7804cc299ea465ced05aba037df1e9265b8940fd7db369f73a72483fa9e11b1c6571e85c7ff68282c9512b25b73ae2095da0d96d00b3c18739da4
|
7
|
+
data.tar.gz: 4f787964ca02539f02ab97d7e0d3106fd6ae67331718a92a4313337c219bdf054921702cfed2d116147ff7b5d79405e63b483f59639371f3ca7e83b68b51b52f
|
data/.gitignore
CHANGED
data/.rspec
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.6
|
3
|
+
Exclude:
|
4
|
+
- features/**/*
|
5
|
+
- .bundle/**/*
|
6
|
+
- bundle_bin/**/*
|
7
|
+
- lib/dwc_archive/ingester.rb
|
8
|
+
- lib/dwc_archive/generator_meta_xml.rb
|
9
|
+
- lib/dwc_archive/generator_eml_xml.rb
|
10
|
+
- lib/dwc_archive/classification_normalizer.rb
|
11
|
+
|
12
|
+
Style/StringLiterals:
|
13
|
+
EnforcedStyle: double_quotes
|
14
|
+
|
15
|
+
Layout/DotPosition:
|
16
|
+
EnforcedStyle: trailing
|
17
|
+
|
18
|
+
Metrics/BlockLength:
|
19
|
+
Exclude:
|
20
|
+
- 'Rakefile'
|
21
|
+
- '*.gemspec'
|
22
|
+
- '**/*.rake'
|
23
|
+
- 'spec/**/*spec.rb'
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.7.1
|
data/.travis.yml
CHANGED
@@ -1,16 +1,13 @@
|
|
1
1
|
rvm:
|
2
|
-
-
|
3
|
-
- 2.
|
2
|
+
- 2.5
|
3
|
+
- 2.6
|
4
|
+
- 2.7
|
4
5
|
before_install:
|
5
6
|
- sudo apt-get update
|
6
|
-
- gem install
|
7
|
-
# bundler_args: --without development
|
8
|
-
services:
|
9
|
-
- redis-server
|
7
|
+
- gem install bundler
|
10
8
|
script:
|
11
9
|
- bundle exec cucumber
|
12
10
|
- bundle exec rake
|
13
11
|
branches:
|
14
12
|
only:
|
15
13
|
- master
|
16
|
-
|
data/CHANGELOG
CHANGED
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -19,136 +19,140 @@ Installation
|
|
19
19
|
|
20
20
|
### System Requirements
|
21
21
|
|
22
|
-
You need [Redis Server][12] and unzip library installed
|
22
|
+
You need [Redis Server][12] and unzip library installed
|
23
23
|
|
24
24
|
|
25
25
|
Usage
|
26
26
|
-----
|
27
27
|
|
28
|
-
|
29
|
-
|
28
|
+
```ruby
|
29
|
+
require 'rubygems'
|
30
|
+
require 'dwc_archive'
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
|
33
|
+
dwc.archive.files # the archive file list
|
34
|
+
dwc.metadata.data # summary of metadata from eml.xml if it exists
|
35
|
+
dwc.metadata.authors # authors of the archive
|
36
|
+
dwc.core.data # summary of DarwinCore main file
|
37
|
+
dwc.core.file_path # path to the DarwinCore main file
|
38
|
+
dwc.extensions # array of DarwinCore Star extensions
|
39
|
+
dwc.extensions[0].data # summary for an extension
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
# read content of the core data file into memory or used with a block
|
42
|
+
# it returns array of arrays of data
|
43
|
+
# rows that had a wrong encoding will be collected into errors array
|
44
|
+
data, errors = dwc.core.read
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
# read content using a block, getting back results in sets of 100 rows each
|
47
|
+
results = []
|
48
|
+
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
|
49
|
+
results << [data, errors]
|
50
|
+
end
|
51
|
+
results << [tail_data, tail_errors]
|
51
52
|
|
52
|
-
|
53
|
-
|
53
|
+
# read content of an extension data file into memory
|
54
|
+
data, errors = dwc.core.extensions[0].read
|
54
55
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
# read content of an extension data using block
|
57
|
+
results = []
|
58
|
+
tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
|
59
|
+
results << [data, errors]
|
60
|
+
end
|
61
|
+
results << [tail_data, tail_errors]
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
# normalize names in classification collecting together synonyms,
|
64
|
+
# canonical names, vernacular names and associating paths to taxons
|
65
|
+
# in a classification distributed as DwCA file
|
65
66
|
|
66
|
-
|
67
|
+
result = dwc.normalize_classification
|
67
68
|
|
68
|
-
|
69
|
+
# for a finer control over normalization:
|
69
70
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
71
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
72
|
+
cn.normalize
|
73
|
+
# if you don't want to generate path consisting of canonical forms
|
74
|
+
# of ancestors to a taxon
|
75
|
+
cn.normalize(:with_canonical_names => false)
|
75
76
|
|
76
|
-
|
77
|
-
|
77
|
+
# if you don't want to ingest information from extensions
|
78
|
+
cn.normalize(:with_extensions => false)
|
78
79
|
|
79
|
-
|
80
|
-
|
80
|
+
# to get a flat hash of nodes with attached vernacular names and synonyms
|
81
|
+
normalized_data = cn.normalized_data
|
81
82
|
|
82
|
-
|
83
|
-
|
83
|
+
# to get a representation of tree organization as a hash
|
84
|
+
classification_tree = cn.tree
|
84
85
|
|
85
|
-
|
86
|
-
|
86
|
+
# to get list of all name strings used as scientific or vernacular names
|
87
|
+
all_name_strings = cn.name_strings
|
87
88
|
|
88
|
-
|
89
|
-
|
89
|
+
# to get list of errors generated during the normalization
|
90
|
+
errors = cn.error_names
|
90
91
|
|
91
|
-
|
92
|
+
DarwinCore.clean_all # remove all expanded archives
|
93
|
+
```
|
92
94
|
|
93
95
|
Creating a DarwinCore Archive file
|
94
96
|
----------------------------------
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
:
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
98
|
+
```ruby
|
99
|
+
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
100
|
+
|
101
|
+
core = [
|
102
|
+
["http://rs.tdwg.org/dwc/terms/taxonID",
|
103
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
104
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
105
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"],
|
106
|
+
[1, 0, "Plantae", "kingdom"],
|
107
|
+
[2, 1, "Betula", "genus"],
|
108
|
+
[3, 2, "Betula verucosa", "species"]
|
109
|
+
]
|
110
|
+
|
111
|
+
vernacular_names = [
|
112
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
113
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName"],
|
114
|
+
[1, "Plants"],
|
115
|
+
[1, "Растения"],
|
116
|
+
[2, "Birch"],
|
117
|
+
[2, "Береза"],
|
118
|
+
[3, "Wheeping Birch"],
|
119
|
+
[3, "Береза плакучая"]
|
120
|
+
]
|
121
|
+
|
122
|
+
eml = {
|
123
|
+
:id => '1234',
|
124
|
+
:license => 'http://creativecommons.org/licenses/by-sa/3.0/',
|
125
|
+
:title => 'Test Classification',
|
126
|
+
:authors => [
|
127
|
+
{ :first_name => 'John',
|
128
|
+
:last_name => 'Doe',
|
129
|
+
:email => 'jdoe@example.com',
|
130
|
+
:organization => 'Example',
|
131
|
+
:position => 'Assistant Professor',
|
132
|
+
:url => 'http://example.org' },
|
133
|
+
{ :first_name => 'Jane',
|
134
|
+
:last_name => 'Doe',
|
135
|
+
:email => 'jane@example.com' }
|
136
|
+
],
|
137
|
+
:metadata_providers => [
|
138
|
+
{ :first_name => 'Jim',
|
139
|
+
:last_name => 'Doe',
|
140
|
+
:email => 'jimdoe@example.com',
|
141
|
+
:url => 'http://aggregator.example.org' }],
|
142
|
+
:abstract => 'test classification',
|
143
|
+
:citation =>
|
144
|
+
'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
|
145
|
+
:url => 'http://example.com'
|
146
|
+
}
|
147
|
+
|
148
|
+
gen.add_core(core, 'core.txt')
|
149
|
+
gen.add_extension(vernacular_names,
|
150
|
+
'vernacular_names.txt',
|
151
|
+
true, 'http://rs.gbif.org/terms/1.0/VernacularName')
|
152
|
+
gen.add_meta_xml
|
153
|
+
gen.add_eml_xml(eml)
|
154
|
+
gen.pack
|
155
|
+
```
|
152
156
|
|
153
157
|
Logging
|
154
158
|
-------
|
@@ -175,11 +179,11 @@ Note on Patches/Pull Requests
|
|
175
179
|
Copyright
|
176
180
|
---------
|
177
181
|
|
178
|
-
Author -- [
|
182
|
+
Author -- [@dimus][13]
|
179
183
|
|
180
|
-
Contributors -- [
|
184
|
+
Contributors -- [@mjy][14], [@LocoDelAssembly][16]
|
181
185
|
|
182
|
-
Copyright (c) 2010-
|
186
|
+
Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
|
183
187
|
|
184
188
|
[1]: https://badge.fury.io/rb/dwc-archive.png
|
185
189
|
[2]: http://badge.fury.io/rb/dwc-archive
|
@@ -196,3 +200,4 @@ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for deta
|
|
196
200
|
[13]: https://github.com/dimus
|
197
201
|
[14]: https://github.com/mjy
|
198
202
|
[15]: http://mbl.edu
|
203
|
+
[16]: https://github.com/LocoDelAssembly
|
data/Rakefile
CHANGED
@@ -1,44 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# Bundler::GemHelper.install_tasks
|
4
|
-
# require 'bundler/gem_tasks'
|
5
|
-
# require 'rake/testtasks'
|
6
|
-
# require 'rubygems'
|
7
|
-
# require 'rake'
|
8
|
-
|
9
|
-
require 'rspec/core/rake_task'
|
10
|
-
RSpec::Core::RakeTask.new(:spec) do |spec|
|
11
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
12
|
-
end
|
1
|
+
# frozen_string_literal: true
|
13
2
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
# task :spec => :check_dependencies
|
20
|
-
|
21
|
-
begin
|
22
|
-
require 'cucumber/rake/task'
|
23
|
-
Cucumber::Rake::Task.new(:features)
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
require "cucumber/rake/task"
|
24
6
|
|
25
|
-
|
26
|
-
|
27
|
-
task :features do
|
28
|
-
abort 'Cucumber is not available. In order to run features, ' +
|
29
|
-
'you must: sudo gem install cucumber'
|
30
|
-
end
|
7
|
+
RSpec::Core::RakeTask.new(:rspec) do |rspec|
|
8
|
+
rspec.pattern = "spec/**/*_spec.rb"
|
31
9
|
end
|
32
10
|
|
33
|
-
|
34
|
-
task :irb, [:script] do |t, args|
|
35
|
-
ARGV.clear
|
11
|
+
Cucumber::Rake::Task.new(:features)
|
36
12
|
|
37
|
-
|
38
|
-
|
13
|
+
# task rspec: :check_dependencies
|
14
|
+
task features: :check_dependencies
|
39
15
|
|
40
|
-
|
41
|
-
|
16
|
+
desc "open an irb session preloaded with this library"
|
17
|
+
task :console do
|
18
|
+
sh "irb -I lib -I extra -r dwc_archive.rb"
|
42
19
|
end
|
43
20
|
|
44
|
-
task :
|
21
|
+
task default: :rspec
|
data/dwc-archive.gemspec
CHANGED
@@ -1,33 +1,37 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require File.expand_path("lib/dwc_archive/version", __dir__)
|
2
4
|
|
3
5
|
Gem::Specification.new do |gem|
|
4
6
|
gem.name = "dwc-archive"
|
5
7
|
gem.version = DarwinCore::VERSION
|
6
8
|
gem.authors = ["Dmitry Mozzherin"]
|
7
9
|
gem.email = ["dmozzherin at gmail dot com"]
|
8
|
-
gem.description =
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
gem.summary =
|
10
|
+
gem.description = "Darwin Core Archive is the current standard exchange " \
|
11
|
+
"format for GLobal Names Architecture modules. " \
|
12
|
+
"This gem makes it easy to incorporate files in " \
|
13
|
+
"Darwin Core Archive format into a ruby project."
|
14
|
+
gem.summary = "Handler of Darwin Core Archive files"
|
13
15
|
gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
|
14
16
|
gem.license = "MIT"
|
15
17
|
|
16
|
-
gem.
|
18
|
+
gem.required_ruby_version = ">= 2.6.0"
|
19
|
+
gem.files = `git ls-files`.split("\n").map(&:strip)
|
17
20
|
gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
21
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
22
|
gem.require_paths = ["lib"]
|
20
23
|
|
21
|
-
gem.add_runtime_dependency
|
22
|
-
gem.add_runtime_dependency
|
23
|
-
gem.add_runtime_dependency
|
24
|
-
|
25
|
-
gem.add_development_dependency 'rake', '~> 10.1'
|
26
|
-
gem.add_development_dependency 'bundler', '~> 1.3'
|
27
|
-
gem.add_development_dependency 'rspec', '~> 2.14'
|
28
|
-
gem.add_development_dependency 'cucumber', '~> 1.3'
|
29
|
-
gem.add_development_dependency 'coveralls', '~> 0.7'
|
30
|
-
gem.add_development_dependency 'debugger', '~> 1.6'
|
31
|
-
gem.add_development_dependency 'git', '~> 1.2'
|
32
|
-
end
|
24
|
+
# gem.add_runtime_dependency "minitar", "~> 0.6"
|
25
|
+
gem.add_runtime_dependency "biodiversity", "~> 5.1.2"
|
26
|
+
gem.add_runtime_dependency "nokogiri", "~> 1.11"
|
33
27
|
|
28
|
+
gem.add_development_dependency "bundler", "~> 2.2"
|
29
|
+
gem.add_development_dependency "byebug", "~> 11.1"
|
30
|
+
gem.add_development_dependency "cucumber", "~> 5"
|
31
|
+
gem.add_development_dependency "git", "~> 1.8"
|
32
|
+
gem.add_development_dependency "rake", "~> 13"
|
33
|
+
gem.add_development_dependency "rspec", "~> 3.10"
|
34
|
+
gem.add_development_dependency "rubocop", "~> 1.8"
|
35
|
+
gem.add_development_dependency "solargraph", "~> 0.40"
|
36
|
+
gem.add_development_dependency "travis", "~> 1.10"
|
37
|
+
end
|