dwc-archive 0.9.10 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rspec +2 -1
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -1
- data/.travis.yml +4 -7
- data/CHANGELOG +14 -8
- data/Gemfile +3 -1
- data/LICENSE +1 -1
- data/README.md +119 -107
- data/Rakefile +13 -36
- data/dwc-archive.gemspec +23 -19
- data/features/step_definitions/dwc-creator_steps.rb +5 -5
- data/features/step_definitions/dwc-reader_steps.rb +47 -28
- data/features/support/env.rb +1 -1
- data/lib/dwc_archive.rb +124 -0
- data/lib/dwc_archive/archive.rb +60 -0
- data/lib/dwc_archive/classification_normalizer.rb +382 -0
- data/lib/dwc_archive/core.rb +25 -0
- data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
- data/lib/dwc_archive/expander.rb +88 -0
- data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
- data/lib/dwc_archive/generator.rb +91 -0
- data/lib/dwc_archive/generator_eml_xml.rb +116 -0
- data/lib/dwc_archive/generator_meta_xml.rb +72 -0
- data/lib/dwc_archive/gnub_taxon.rb +14 -0
- data/lib/dwc_archive/ingester.rb +106 -0
- data/lib/dwc_archive/metadata.rb +57 -0
- data/lib/dwc_archive/taxon_normalized.rb +23 -0
- data/lib/dwc_archive/version.rb +6 -0
- data/lib/dwc_archive/xml_reader.rb +90 -0
- data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
- data/spec/files/generator_eml.xml +47 -0
- data/spec/files/generator_meta.xml +19 -0
- data/spec/lib/classification_normalizer_spec.rb +96 -105
- data/spec/lib/core_spec.rb +43 -41
- data/spec/lib/darwin_core_spec.rb +108 -138
- data/spec/lib/generator_eml_xml_spec.rb +12 -11
- data/spec/lib/generator_meta_xml_spec.rb +12 -11
- data/spec/lib/generator_spec.rb +77 -69
- data/spec/lib/gnub_taxon_spec.rb +15 -17
- data/spec/lib/metadata_spec.rb +50 -41
- data/spec/lib/taxon_normalized_spec.rb +62 -65
- data/spec/lib/xml_reader_spec.rb +9 -12
- data/spec/spec_helper.rb +54 -51
- metadata +105 -88
- data/.rvmrc +0 -1
- data/] +0 -40
- data/lib/dwc-archive.rb +0 -107
- data/lib/dwc-archive/archive.rb +0 -40
- data/lib/dwc-archive/classification_normalizer.rb +0 -428
- data/lib/dwc-archive/core.rb +0 -17
- data/lib/dwc-archive/expander.rb +0 -84
- data/lib/dwc-archive/generator.rb +0 -85
- data/lib/dwc-archive/generator_eml_xml.rb +0 -86
- data/lib/dwc-archive/generator_meta_xml.rb +0 -58
- data/lib/dwc-archive/ingester.rb +0 -101
- data/lib/dwc-archive/metadata.rb +0 -48
- data/lib/dwc-archive/version.rb +0 -3
- data/lib/dwc-archive/xml_reader.rb +0 -64
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f91a9aa06150210f94bd69bb408dc21340feb21294c958f1fe0b03eeda4dab57
|
4
|
+
data.tar.gz: eb6c0d368729f09aa91ddc7a131545b1fa1cdfcab5c09a9326e05bcd6e55cdea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99a1903bd90f161180c59934a5533c55c3c838aeecd0a37476b7b240004c798ea9149a1511eac8ac3a434df1c2e5d5940bb0b6f125f0f7949decf62bcd78900a
|
7
|
+
data.tar.gz: 5922fca1a5e0a26ede79e9b3ee4d4a3255c9f1403b7ec71a9f5ce1b929f1dfc71abde50c82541a76a09899be9710e557f1850003f5ec04aa6c81e53f55933fd1
|
data/.gitignore
CHANGED
data/.rspec
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.6
|
3
|
+
Exclude:
|
4
|
+
- features/**/*
|
5
|
+
- .bundle/**/*
|
6
|
+
- bundle_bin/**/*
|
7
|
+
- lib/dwc_archive/ingester.rb
|
8
|
+
- lib/dwc_archive/generator_meta_xml.rb
|
9
|
+
- lib/dwc_archive/generator_eml_xml.rb
|
10
|
+
- lib/dwc_archive/classification_normalizer.rb
|
11
|
+
|
12
|
+
Style/StringLiterals:
|
13
|
+
EnforcedStyle: double_quotes
|
14
|
+
|
15
|
+
Layout/DotPosition:
|
16
|
+
EnforcedStyle: trailing
|
17
|
+
|
18
|
+
Metrics/BlockLength:
|
19
|
+
Exclude:
|
20
|
+
- 'Rakefile'
|
21
|
+
- '*.gemspec'
|
22
|
+
- '**/*.rake'
|
23
|
+
- 'spec/**/*spec.rb'
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.7.1
|
data/.travis.yml
CHANGED
@@ -1,16 +1,13 @@
|
|
1
1
|
rvm:
|
2
|
-
-
|
3
|
-
- 2.
|
2
|
+
- 2.5
|
3
|
+
- 2.6
|
4
|
+
- 2.7
|
4
5
|
before_install:
|
5
6
|
- sudo apt-get update
|
6
|
-
- gem install
|
7
|
-
# bundler_args: --without development
|
8
|
-
services:
|
9
|
-
- redis-server
|
7
|
+
- gem install bundler
|
10
8
|
script:
|
11
9
|
- bundle exec cucumber
|
12
10
|
- bundle exec rake
|
13
11
|
branches:
|
14
12
|
only:
|
15
13
|
- master
|
16
|
-
|
data/CHANGELOG
CHANGED
@@ -1,15 +1,21 @@
|
|
1
|
-
|
1
|
+
1.1.0 Update name parser to go-based biodiversity
|
2
2
|
|
3
|
-
0.
|
3
|
+
1.0.1 Cleanup and gems update
|
4
4
|
|
5
|
-
0.9.
|
5
|
+
0.9.11 Removed VERSION duplicate
|
6
6
|
|
7
|
-
0.9.
|
7
|
+
0.9.7 Refactoring and tests improvements
|
8
8
|
|
9
|
-
0.
|
9
|
+
0.9.6 Added support for GNUB DwCA files
|
10
10
|
|
11
|
-
0.
|
11
|
+
0.9.4 Gem dependencies updated, added travis support
|
12
12
|
|
13
|
-
0.
|
13
|
+
0.9.0 Migrated code to ruby 1.9.3
|
14
14
|
|
15
|
-
0.8.
|
15
|
+
0.8.3 Updated outdated exception rasing
|
16
|
+
|
17
|
+
0.8.2 Removed species info from linnean classification path
|
18
|
+
|
19
|
+
0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
|
20
|
+
|
21
|
+
0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -19,136 +19,140 @@ Installation
|
|
19
19
|
|
20
20
|
### System Requirements
|
21
21
|
|
22
|
-
You need [Redis Server][12] and unzip library installed
|
22
|
+
You need [Redis Server][12] and unzip library installed
|
23
23
|
|
24
24
|
|
25
25
|
Usage
|
26
26
|
-----
|
27
27
|
|
28
|
-
|
29
|
-
|
28
|
+
```ruby
|
29
|
+
require 'rubygems'
|
30
|
+
require 'dwc_archive'
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
32
|
+
dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
|
33
|
+
dwc.archive.files # the archive file list
|
34
|
+
dwc.metadata.data # summary of metadata from eml.xml if it exists
|
35
|
+
dwc.metadata.authors # authors of the archive
|
36
|
+
dwc.core.data # summary of DarwinCore main file
|
37
|
+
dwc.core.file_path # path to the DarwinCore main file
|
38
|
+
dwc.extensions # array of DarwinCore Star extensions
|
39
|
+
dwc.extensions[0].data # summary for an extension
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
# read content of the core data file into memory or used with a block
|
42
|
+
# it returns array of arrays of data
|
43
|
+
# rows that had a wrong encoding will be collected into errors array
|
44
|
+
data, errors = dwc.core.read
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
# read content using a block, getting back results in sets of 100 rows each
|
47
|
+
results = []
|
48
|
+
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
|
49
|
+
results << [data, errors]
|
50
|
+
end
|
51
|
+
results << [tail_data, tail_errors]
|
51
52
|
|
52
|
-
|
53
|
-
|
53
|
+
# read content of an extension data file into memory
|
54
|
+
data, errors = dwc.core.extensions[0].read
|
54
55
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
56
|
+
# read content of an extension data using block
|
57
|
+
results = []
|
58
|
+
tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
|
59
|
+
results << [data, errors]
|
60
|
+
end
|
61
|
+
results << [tail_data, tail_errors]
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
# normalize names in classification collecting together synonyms,
|
64
|
+
# canonical names, vernacular names and associating paths to taxons
|
65
|
+
# in a classification distributed as DwCA file
|
65
66
|
|
66
|
-
|
67
|
+
result = dwc.normalize_classification
|
67
68
|
|
68
|
-
|
69
|
+
# for a finer control over normalization:
|
69
70
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
71
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
72
|
+
cn.normalize
|
73
|
+
# if you don't want to generate path consisting of canonical forms
|
74
|
+
# of ancestors to a taxon
|
75
|
+
cn.normalize(:with_canonical_names => false)
|
75
76
|
|
76
|
-
|
77
|
-
|
77
|
+
# if you don't want to ingest information from extensions
|
78
|
+
cn.normalize(:with_extensions => false)
|
78
79
|
|
79
|
-
|
80
|
-
|
80
|
+
# to get a flat hash of nodes with attached vernacular names and synonyms
|
81
|
+
normalized_data = cn.normalized_data
|
81
82
|
|
82
|
-
|
83
|
-
|
83
|
+
# to get a representation of tree organization as a hash
|
84
|
+
classification_tree = cn.tree
|
84
85
|
|
85
|
-
|
86
|
-
|
86
|
+
# to get list of all name strings used as scientific or vernacular names
|
87
|
+
all_name_strings = cn.name_strings
|
87
88
|
|
88
|
-
|
89
|
-
|
89
|
+
# to get list of errors generated during the normalization
|
90
|
+
errors = cn.error_names
|
90
91
|
|
91
|
-
|
92
|
+
DarwinCore.clean_all # remove all expanded archives
|
93
|
+
```
|
92
94
|
|
93
95
|
Creating a DarwinCore Archive file
|
94
96
|
----------------------------------
|
95
97
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
:
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
98
|
+
```ruby
|
99
|
+
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
100
|
+
|
101
|
+
core = [
|
102
|
+
["http://rs.tdwg.org/dwc/terms/taxonID",
|
103
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
104
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
105
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"],
|
106
|
+
[1, 0, "Plantae", "kingdom"],
|
107
|
+
[2, 1, "Betula", "genus"],
|
108
|
+
[3, 2, "Betula verucosa", "species"]
|
109
|
+
]
|
110
|
+
|
111
|
+
vernacular_names = [
|
112
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
113
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName"],
|
114
|
+
[1, "Plants"],
|
115
|
+
[1, "Растения"],
|
116
|
+
[2, "Birch"],
|
117
|
+
[2, "Береза"],
|
118
|
+
[3, "Wheeping Birch"],
|
119
|
+
[3, "Береза плакучая"]
|
120
|
+
]
|
121
|
+
|
122
|
+
eml = {
|
123
|
+
:id => '1234',
|
124
|
+
:license => 'http://creativecommons.org/licenses/by-sa/3.0/',
|
125
|
+
:title => 'Test Classification',
|
126
|
+
:authors => [
|
127
|
+
{ :first_name => 'John',
|
128
|
+
:last_name => 'Doe',
|
129
|
+
:email => 'jdoe@example.com',
|
130
|
+
:organization => 'Example',
|
131
|
+
:position => 'Assistant Professor',
|
132
|
+
:url => 'http://example.org' },
|
133
|
+
{ :first_name => 'Jane',
|
134
|
+
:last_name => 'Doe',
|
135
|
+
:email => 'jane@example.com' }
|
136
|
+
],
|
137
|
+
:metadata_providers => [
|
138
|
+
{ :first_name => 'Jim',
|
139
|
+
:last_name => 'Doe',
|
140
|
+
:email => 'jimdoe@example.com',
|
141
|
+
:url => 'http://aggregator.example.org' }],
|
142
|
+
:abstract => 'test classification',
|
143
|
+
:citation =>
|
144
|
+
'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
|
145
|
+
:url => 'http://example.com'
|
146
|
+
}
|
147
|
+
|
148
|
+
gen.add_core(core, 'core.txt')
|
149
|
+
gen.add_extension(vernacular_names,
|
150
|
+
'vernacular_names.txt',
|
151
|
+
true, 'http://rs.gbif.org/terms/1.0/VernacularName')
|
152
|
+
gen.add_meta_xml
|
153
|
+
gen.add_eml_xml(eml)
|
154
|
+
gen.pack
|
155
|
+
```
|
152
156
|
|
153
157
|
Logging
|
154
158
|
-------
|
@@ -175,7 +179,11 @@ Note on Patches/Pull Requests
|
|
175
179
|
Copyright
|
176
180
|
---------
|
177
181
|
|
178
|
-
|
182
|
+
Author -- [@dimus][13]
|
183
|
+
|
184
|
+
Contributors -- [@mjy][14], [@LocoDelAssembly][16]
|
185
|
+
|
186
|
+
Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
|
179
187
|
|
180
188
|
[1]: https://badge.fury.io/rb/dwc-archive.png
|
181
189
|
[2]: http://badge.fury.io/rb/dwc-archive
|
@@ -189,3 +197,7 @@ Copyright (c) 2010-2013 Marine Biological Laboratory. See LICENSE for details.
|
|
189
197
|
[10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
|
190
198
|
[11]: http://bit.ly/2IxcBA
|
191
199
|
[12]: http://redis.io/topics/quickstart
|
200
|
+
[13]: https://github.com/dimus
|
201
|
+
[14]: https://github.com/mjy
|
202
|
+
[15]: http://mbl.edu
|
203
|
+
[16]: https://github.com/LocoDelAssembly
|
data/Rakefile
CHANGED
@@ -1,44 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# Bundler::GemHelper.install_tasks
|
4
|
-
# require 'bundler/gem_tasks'
|
5
|
-
# require 'rake/testtasks'
|
6
|
-
# require 'rubygems'
|
7
|
-
# require 'rake'
|
8
|
-
|
9
|
-
require 'rspec/core/rake_task'
|
10
|
-
RSpec::Core::RakeTask.new(:spec) do |spec|
|
11
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
12
|
-
end
|
1
|
+
# frozen_string_literal: true
|
13
2
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
# task :spec => :check_dependencies
|
20
|
-
|
21
|
-
begin
|
22
|
-
require 'cucumber/rake/task'
|
23
|
-
Cucumber::Rake::Task.new(:features)
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
require "cucumber/rake/task"
|
24
6
|
|
25
|
-
|
26
|
-
|
27
|
-
task :features do
|
28
|
-
abort 'Cucumber is not available. In order to run features, ' +
|
29
|
-
'you must: sudo gem install cucumber'
|
30
|
-
end
|
7
|
+
RSpec::Core::RakeTask.new(:rspec) do |rspec|
|
8
|
+
rspec.pattern = "spec/**/*_spec.rb"
|
31
9
|
end
|
32
10
|
|
33
|
-
|
34
|
-
task :irb, [:script] do |t, args|
|
35
|
-
ARGV.clear
|
11
|
+
Cucumber::Rake::Task.new(:features)
|
36
12
|
|
37
|
-
|
38
|
-
|
13
|
+
# task rspec: :check_dependencies
|
14
|
+
task features: :check_dependencies
|
39
15
|
|
40
|
-
|
41
|
-
|
16
|
+
desc "open an irb session preloaded with this library"
|
17
|
+
task :console do
|
18
|
+
sh "irb -I lib -I extra -r dwc_archive.rb"
|
42
19
|
end
|
43
20
|
|
44
|
-
task :
|
21
|
+
task default: :rspec
|
data/dwc-archive.gemspec
CHANGED
@@ -1,33 +1,37 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require File.expand_path("lib/dwc_archive/version", __dir__)
|
2
4
|
|
3
5
|
Gem::Specification.new do |gem|
|
4
6
|
gem.name = "dwc-archive"
|
5
7
|
gem.version = DarwinCore::VERSION
|
6
8
|
gem.authors = ["Dmitry Mozzherin"]
|
7
9
|
gem.email = ["dmozzherin at gmail dot com"]
|
8
|
-
gem.description =
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
gem.summary =
|
10
|
+
gem.description = "Darwin Core Archive is the current standard exchange " \
|
11
|
+
"format for GLobal Names Architecture modules. " \
|
12
|
+
"This gem makes it easy to incorporate files in " \
|
13
|
+
"Darwin Core Archive format into a ruby project."
|
14
|
+
gem.summary = "Handler of Darwin Core Archive files"
|
13
15
|
gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
|
14
16
|
gem.license = "MIT"
|
15
17
|
|
16
|
-
gem.
|
18
|
+
gem.required_ruby_version = ">= 2.6.0"
|
19
|
+
gem.files = `git ls-files`.split("\n").map(&:strip)
|
17
20
|
gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
21
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
22
|
gem.require_paths = ["lib"]
|
20
23
|
|
21
|
-
gem.add_runtime_dependency
|
22
|
-
gem.add_runtime_dependency
|
23
|
-
gem.add_runtime_dependency
|
24
|
-
|
25
|
-
gem.add_development_dependency 'rake', '~> 10.1'
|
26
|
-
gem.add_development_dependency 'bundler', '~> 1.3'
|
27
|
-
gem.add_development_dependency 'rspec', '~> 2.14'
|
28
|
-
gem.add_development_dependency 'cucumber', '~> 1.3'
|
29
|
-
gem.add_development_dependency 'coveralls', '~> 0.7'
|
30
|
-
gem.add_development_dependency 'debugger', '~> 1.6'
|
31
|
-
gem.add_development_dependency 'git', '~> 1.2'
|
32
|
-
end
|
24
|
+
# gem.add_runtime_dependency "minitar", "~> 0.6"
|
25
|
+
gem.add_runtime_dependency "biodiversity", "~> 5"
|
26
|
+
gem.add_runtime_dependency "nokogiri", "~> 1.11"
|
33
27
|
|
28
|
+
gem.add_development_dependency "bundler", "~> 2.2"
|
29
|
+
gem.add_development_dependency "byebug", "~> 11.1"
|
30
|
+
gem.add_development_dependency "cucumber", "~> 5"
|
31
|
+
gem.add_development_dependency "git", "~> 1.8"
|
32
|
+
gem.add_development_dependency "rake", "~> 13"
|
33
|
+
gem.add_development_dependency "rspec", "~> 3.10"
|
34
|
+
gem.add_development_dependency "rubocop", "~> 1.8"
|
35
|
+
gem.add_development_dependency "solargraph", "~> 0.40"
|
36
|
+
gem.add_development_dependency "travis", "~> 1.10"
|
37
|
+
end
|