dwc-archive 0.1.3 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +35 -0
- data/.gitignore +10 -0
- data/.rspec +3 -0
- data/.rubocop.yml +23 -0
- data/.ruby-version +1 -0
- data/CHANGELOG +21 -0
- data/Gemfile +5 -0
- data/LICENSE +1 -1
- data/README.md +203 -0
- data/Rakefile +13 -49
- data/dwc-archive.gemspec +31 -69
- data/features/dwca-creator.feature +40 -0
- data/features/dwca-reader.feature +88 -0
- data/features/step_definitions/dwc-creator_steps.rb +112 -0
- data/features/step_definitions/dwc-reader_steps.rb +294 -0
- data/features/support/env.rb +2 -2
- data/lib/dwc_archive/archive.rb +60 -0
- data/lib/dwc_archive/classification_normalizer.rb +382 -0
- data/lib/dwc_archive/core.rb +25 -0
- data/lib/dwc_archive/errors.rb +21 -0
- data/lib/dwc_archive/expander.rb +88 -0
- data/lib/dwc_archive/extension.rb +19 -0
- data/lib/dwc_archive/generator.rb +91 -0
- data/lib/dwc_archive/generator_eml_xml.rb +116 -0
- data/lib/dwc_archive/generator_meta_xml.rb +72 -0
- data/lib/dwc_archive/gnub_taxon.rb +14 -0
- data/lib/dwc_archive/ingester.rb +111 -0
- data/lib/dwc_archive/metadata.rb +57 -0
- data/lib/dwc_archive/taxon_normalized.rb +23 -0
- data/lib/dwc_archive/version.rb +6 -0
- data/lib/dwc_archive/xml_reader.rb +90 -0
- data/lib/dwc_archive.rb +124 -0
- data/spec/files/broken.tar.gz +0 -0
- data/spec/files/data.tar.gz +0 -0
- data/spec/files/data_with_quotes.tar.gz +0 -0
- data/spec/files/eml.xml +36 -36
- data/spec/files/empty_coreid.tar.gz +0 -0
- data/spec/files/file with characters(3).tar.gz +0 -0
- data/spec/files/flat_list.tar.gz +0 -0
- data/spec/files/generator_eml.xml +47 -0
- data/spec/files/generator_meta.xml +19 -0
- data/spec/files/gnub.tar.gz +0 -0
- data/spec/files/invalid.tar.gz +0 -0
- data/spec/files/junk_dir_inside.zip +0 -0
- data/spec/files/language_locality.tar.gz +0 -0
- data/spec/files/latin1.tar.gz +0 -0
- data/spec/files/linnean.tar.gz +0 -0
- data/spec/files/meta.xml +19 -19
- data/spec/files/minimal.tar.gz +0 -0
- data/spec/files/not_synonym_in_extension.tar.gz +0 -0
- data/spec/files/sci_name_authorship.tar.gz +0 -0
- data/spec/files/sci_name_authorship_dup.tar.gz +0 -0
- data/spec/files/synonyms_in_core_accepted_name_field.tar.gz +0 -0
- data/spec/files/synonyms_in_extension.tar.gz +0 -0
- data/spec/files/uncompressed +1 -0
- data/spec/lib/classification_normalizer_spec.rb +214 -0
- data/spec/lib/core_spec.rb +100 -0
- data/spec/lib/darwin_core_spec.rb +252 -0
- data/spec/lib/generator_eml_xml_spec.rb +22 -0
- data/spec/lib/generator_meta_xml_spec.rb +22 -0
- data/spec/lib/generator_spec.rb +124 -0
- data/spec/lib/gnub_taxon_spec.rb +32 -0
- data/spec/lib/metadata_spec.rb +89 -0
- data/spec/lib/taxon_normalized_spec.rb +142 -0
- data/spec/lib/xml_reader_spec.rb +15 -0
- data/spec/spec_helper.rb +79 -7
- metadata +266 -88
- data/README.rdoc +0 -38
- data/VERSION +0 -1
- data/features/dwc-archive.feature +0 -34
- data/features/step_definitions/dwc-archive_steps.rb +0 -95
- data/lib/dwc-archive/.expander.rb.swo +0 -0
- data/lib/dwc-archive/archive.rb +0 -37
- data/lib/dwc-archive/core.rb +0 -34
- data/lib/dwc-archive/expander.rb +0 -71
- data/lib/dwc-archive/extension.rb +0 -31
- data/lib/dwc-archive/metadata.rb +0 -38
- data/lib/dwc-archive.rb +0 -29
- data/lib/ruby_extensions.rb +0 -64
- data/spec/dwc-archive_spec.rb +0 -7
- data/spec/lib/ruby_extenstions_spec.rb +0 -15
- data/spec/spec.opts +0 -1
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b35d0c900fbb815f4955e59306c6cc5a134f4c5094adfc98576ac72249e82566
|
|
4
|
+
data.tar.gz: 8f8f58062273f5d5e6afb1084aed358d7585e8efac107e7fdf723cfa93e0d0bc
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 39cd3818632795b6d49ea9c7affbc565c8d8a7c9aa48b1acf2e00b6c8743b80ed58f5d6a0b7e26ea5375f1e3ba5e2dc91b59fd9820cda0918d254ae4739d930e
|
|
7
|
+
data.tar.gz: 82f055dd0ed152a516eade9df4efe12cc0aaded52c0c1054f25340d07f0a2cd9f11fd2ca6873e3cf9adab7b3404f892c8c07c5953ada133c2a53d071fc8fe858
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
|
2
|
+
# They are provided by a third-party and are governed by
|
|
3
|
+
# separate terms of service, privacy policy, and support
|
|
4
|
+
# documentation.
|
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
|
7
|
+
|
|
8
|
+
name: build
|
|
9
|
+
|
|
10
|
+
on:
|
|
11
|
+
push:
|
|
12
|
+
branches: [ master ]
|
|
13
|
+
pull_request:
|
|
14
|
+
branches: [ master ]
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
test:
|
|
18
|
+
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
strategy:
|
|
21
|
+
matrix:
|
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
|
23
|
+
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v2
|
|
26
|
+
- name: Set up Ruby
|
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
|
29
|
+
# uses: ruby/setup-ruby@v1
|
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
|
31
|
+
with:
|
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
|
34
|
+
- name: Run tests
|
|
35
|
+
run: bundle exec rake
|
data/.gitignore
CHANGED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
TargetRubyVersion: 2.6
|
|
3
|
+
Exclude:
|
|
4
|
+
- features/**/*
|
|
5
|
+
- .bundle/**/*
|
|
6
|
+
- bundle_bin/**/*
|
|
7
|
+
- lib/dwc_archive/ingester.rb
|
|
8
|
+
- lib/dwc_archive/generator_meta_xml.rb
|
|
9
|
+
- lib/dwc_archive/generator_eml_xml.rb
|
|
10
|
+
- lib/dwc_archive/classification_normalizer.rb
|
|
11
|
+
|
|
12
|
+
Style/StringLiterals:
|
|
13
|
+
EnforcedStyle: double_quotes
|
|
14
|
+
|
|
15
|
+
Layout/DotPosition:
|
|
16
|
+
EnforcedStyle: trailing
|
|
17
|
+
|
|
18
|
+
Metrics/BlockLength:
|
|
19
|
+
Exclude:
|
|
20
|
+
- 'Rakefile'
|
|
21
|
+
- '*.gemspec'
|
|
22
|
+
- '**/*.rake'
|
|
23
|
+
- 'spec/**/*spec.rb'
|
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.3.5
|
data/CHANGELOG
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
1.1.0 Update name parser to go-based biodiversity
|
|
2
|
+
|
|
3
|
+
1.0.1 Cleanup and gems update
|
|
4
|
+
|
|
5
|
+
0.9.11 Removed VERSION duplicate
|
|
6
|
+
|
|
7
|
+
0.9.7 Refactoring and tests improvements
|
|
8
|
+
|
|
9
|
+
0.9.6 Added support for GNUB DwCA files
|
|
10
|
+
|
|
11
|
+
0.9.4 Gem dependencies updated, added travis support
|
|
12
|
+
|
|
13
|
+
0.9.0 Migrated code to ruby 1.9.3
|
|
14
|
+
|
|
15
|
+
0.8.3 Updated outdated exception rasing
|
|
16
|
+
|
|
17
|
+
0.8.2 Removed species info from linnean classification path
|
|
18
|
+
|
|
19
|
+
0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
|
|
20
|
+
|
|
21
|
+
0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
|
data/Gemfile
ADDED
data/LICENSE
CHANGED
data/README.md
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Darwin Core Archive
|
|
2
|
+
===================
|
|
3
|
+
|
|
4
|
+
[![Gem Version][1]][2]
|
|
5
|
+
[![Continuous Integration Status][3]][4]
|
|
6
|
+
[![Coverage Status][5]][6]
|
|
7
|
+
[![CodePolice][7]][8]
|
|
8
|
+
[![Dependency Status][9]][10]
|
|
9
|
+
|
|
10
|
+
Darwin Core Archive format is a current standard for information exchange
|
|
11
|
+
between Global Names Architecture modules. This gem allows to work with
|
|
12
|
+
Darwin Core Archive data compressed to either zip or tar.gz files.
|
|
13
|
+
More information about Darwing Core Archive can be found on a [GBIF page:][11]
|
|
14
|
+
|
|
15
|
+
Installation
|
|
16
|
+
------------
|
|
17
|
+
|
|
18
|
+
sudo gem install dwc-archive
|
|
19
|
+
|
|
20
|
+
### System Requirements
|
|
21
|
+
|
|
22
|
+
You need [Redis Server][12] and unzip library installed
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
Usage
|
|
26
|
+
-----
|
|
27
|
+
|
|
28
|
+
```ruby
|
|
29
|
+
require 'rubygems'
|
|
30
|
+
require 'dwc_archive'
|
|
31
|
+
|
|
32
|
+
dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
|
|
33
|
+
dwc.archive.files # the archive file list
|
|
34
|
+
dwc.metadata.data # summary of metadata from eml.xml if it exists
|
|
35
|
+
dwc.metadata.authors # authors of the archive
|
|
36
|
+
dwc.core.data # summary of DarwinCore main file
|
|
37
|
+
dwc.core.file_path # path to the DarwinCore main file
|
|
38
|
+
dwc.extensions # array of DarwinCore Star extensions
|
|
39
|
+
dwc.extensions[0].data # summary for an extension
|
|
40
|
+
|
|
41
|
+
# read content of the core data file into memory or used with a block
|
|
42
|
+
# it returns array of arrays of data
|
|
43
|
+
# rows that had a wrong encoding will be collected into errors array
|
|
44
|
+
data, errors = dwc.core.read
|
|
45
|
+
|
|
46
|
+
# read content using a block, getting back results in sets of 100 rows each
|
|
47
|
+
results = []
|
|
48
|
+
tail_data, tail_errors = dwc.core.read(100) do |data, errors|
|
|
49
|
+
results << [data, errors]
|
|
50
|
+
end
|
|
51
|
+
results << [tail_data, tail_errors]
|
|
52
|
+
|
|
53
|
+
# read content of an extension data file into memory
|
|
54
|
+
data, errors = dwc.core.extensions[0].read
|
|
55
|
+
|
|
56
|
+
# read content of an extension data using block
|
|
57
|
+
results = []
|
|
58
|
+
tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
|
|
59
|
+
results << [data, errors]
|
|
60
|
+
end
|
|
61
|
+
results << [tail_data, tail_errors]
|
|
62
|
+
|
|
63
|
+
# normalize names in classification collecting together synonyms,
|
|
64
|
+
# canonical names, vernacular names and associating paths to taxons
|
|
65
|
+
# in a classification distributed as DwCA file
|
|
66
|
+
|
|
67
|
+
result = dwc.normalize_classification
|
|
68
|
+
|
|
69
|
+
# for a finer control over normalization:
|
|
70
|
+
|
|
71
|
+
cn = DarwinCore::ClassificationNormalizer.new(dwc)
|
|
72
|
+
cn.normalize
|
|
73
|
+
# if you don't want to generate path consisting of canonical forms
|
|
74
|
+
# of ancestors to a taxon
|
|
75
|
+
cn.normalize(:with_canonical_names => false)
|
|
76
|
+
|
|
77
|
+
# if you don't want to ingest information from extensions
|
|
78
|
+
cn.normalize(:with_extensions => false)
|
|
79
|
+
|
|
80
|
+
# to get a flat hash of nodes with attached vernacular names and synonyms
|
|
81
|
+
normalized_data = cn.normalized_data
|
|
82
|
+
|
|
83
|
+
# to get a representation of tree organization as a hash
|
|
84
|
+
classification_tree = cn.tree
|
|
85
|
+
|
|
86
|
+
# to get list of all name strings used as scientific or vernacular names
|
|
87
|
+
all_name_strings = cn.name_strings
|
|
88
|
+
|
|
89
|
+
# to get list of errors generated during the normalization
|
|
90
|
+
errors = cn.error_names
|
|
91
|
+
|
|
92
|
+
DarwinCore.clean_all # remove all expanded archives
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Creating a DarwinCore Archive file
|
|
96
|
+
----------------------------------
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
|
|
100
|
+
|
|
101
|
+
core = [
|
|
102
|
+
["http://rs.tdwg.org/dwc/terms/taxonID",
|
|
103
|
+
"http://rs.tdwg.org/dwc/terms/parentNameUsageID",
|
|
104
|
+
"http://rs.tdwg.org/dwc/terms/scientificName",
|
|
105
|
+
"http://rs.tdwg.org/dwc/terms/taxonRank"],
|
|
106
|
+
[1, 0, "Plantae", "kingdom"],
|
|
107
|
+
[2, 1, "Betula", "genus"],
|
|
108
|
+
[3, 2, "Betula verucosa", "species"]
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
vernacular_names = [
|
|
112
|
+
["http://rs.tdwg.org/dwc/terms/TaxonID",
|
|
113
|
+
"http://rs.tdwg.org/dwc/terms/vernacularName"],
|
|
114
|
+
[1, "Plants"],
|
|
115
|
+
[1, "Растения"],
|
|
116
|
+
[2, "Birch"],
|
|
117
|
+
[2, "Береза"],
|
|
118
|
+
[3, "Wheeping Birch"],
|
|
119
|
+
[3, "Береза плакучая"]
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
eml = {
|
|
123
|
+
:id => '1234',
|
|
124
|
+
:license => 'http://creativecommons.org/licenses/by-sa/3.0/',
|
|
125
|
+
:title => 'Test Classification',
|
|
126
|
+
:authors => [
|
|
127
|
+
{ :first_name => 'John',
|
|
128
|
+
:last_name => 'Doe',
|
|
129
|
+
:email => 'jdoe@example.com',
|
|
130
|
+
:organization => 'Example',
|
|
131
|
+
:position => 'Assistant Professor',
|
|
132
|
+
:url => 'http://example.org' },
|
|
133
|
+
{ :first_name => 'Jane',
|
|
134
|
+
:last_name => 'Doe',
|
|
135
|
+
:email => 'jane@example.com' }
|
|
136
|
+
],
|
|
137
|
+
:metadata_providers => [
|
|
138
|
+
{ :first_name => 'Jim',
|
|
139
|
+
:last_name => 'Doe',
|
|
140
|
+
:email => 'jimdoe@example.com',
|
|
141
|
+
:url => 'http://aggregator.example.org' }],
|
|
142
|
+
:abstract => 'test classification',
|
|
143
|
+
:citation =>
|
|
144
|
+
'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
|
|
145
|
+
:url => 'http://example.com'
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
gen.add_core(core, 'core.txt')
|
|
149
|
+
gen.add_extension(vernacular_names,
|
|
150
|
+
'vernacular_names.txt',
|
|
151
|
+
true, 'http://rs.gbif.org/terms/1.0/VernacularName')
|
|
152
|
+
gen.add_meta_xml
|
|
153
|
+
gen.add_eml_xml(eml)
|
|
154
|
+
gen.pack
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Logging
|
|
158
|
+
-------
|
|
159
|
+
|
|
160
|
+
Gem has ability to show logs of it's events:
|
|
161
|
+
|
|
162
|
+
require 'dwc-archive'
|
|
163
|
+
DarwinCore.logger = Logger.new($stdout)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
Note on Patches/Pull Requests
|
|
167
|
+
-----------------------------
|
|
168
|
+
|
|
169
|
+
* Fork the project.
|
|
170
|
+
* Make your feature addition or bug fix.
|
|
171
|
+
* Add tests for it. This is important so I don't break it in a
|
|
172
|
+
future version unintentionally.
|
|
173
|
+
* Commit, do not mess with rakefile, version, or history.
|
|
174
|
+
(if you want to have your own version, that is fine but bump
|
|
175
|
+
version in a commit by itself I can ignore when I pull)
|
|
176
|
+
* Send me a pull request. Bonus points for topic branches.
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
Copyright
|
|
180
|
+
---------
|
|
181
|
+
|
|
182
|
+
Author -- [@dimus][13]
|
|
183
|
+
|
|
184
|
+
Contributors -- [@mjy][14], [@LocoDelAssembly][16]
|
|
185
|
+
|
|
186
|
+
Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
|
|
187
|
+
|
|
188
|
+
[1]: https://badge.fury.io/rb/dwc-archive.png
|
|
189
|
+
[2]: http://badge.fury.io/rb/dwc-archive
|
|
190
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/dwc-archive.png
|
|
191
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/dwc-archive
|
|
192
|
+
[5]: https://coveralls.io/repos/GlobalNamesArchitecture/dwc-archive/badge.png
|
|
193
|
+
[6]: https://coveralls.io/r/GlobalNamesArchitecture/dwc-archive
|
|
194
|
+
[7]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive.png
|
|
195
|
+
[8]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive
|
|
196
|
+
[9]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
|
|
197
|
+
[10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
|
|
198
|
+
[11]: http://bit.ly/2IxcBA
|
|
199
|
+
[12]: http://redis.io/topics/quickstart
|
|
200
|
+
[13]: https://github.com/dimus
|
|
201
|
+
[14]: https://github.com/mjy
|
|
202
|
+
[15]: http://mbl.edu
|
|
203
|
+
[16]: https://github.com/LocoDelAssembly
|
data/Rakefile
CHANGED
|
@@ -1,57 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
require 'rake'
|
|
1
|
+
# frozen_string_literal: true
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
gem.name = "dwc-archive"
|
|
8
|
-
gem.summary = %Q{Handler of Darwin Core Archive files}
|
|
9
|
-
gem.description = %Q{Darwin Core Archive Files are current standard exchange format for GLobal Names Architecture modules. This project creates ways to work with such files.}
|
|
10
|
-
gem.email = "dmozzherin at gmail dot com"
|
|
11
|
-
gem.homepage = "http://github.com/dimus/dwc-archive"
|
|
12
|
-
gem.authors = ["Dmitry Mozzherin"]
|
|
13
|
-
gem.add_development_dependency "rspec", ">= 1.2.9"
|
|
14
|
-
gem.add_development_dependency "cucumber", ">= 0"
|
|
15
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
16
|
-
end
|
|
17
|
-
Jeweler::GemcutterTasks.new
|
|
18
|
-
rescue LoadError
|
|
19
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
require 'spec/rake/spectask'
|
|
23
|
-
Spec::Rake::SpecTask.new(:spec) do |spec|
|
|
24
|
-
spec.libs << 'lib' << 'spec'
|
|
25
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
|
26
|
-
end
|
|
3
|
+
require "bundler/gem_tasks"
|
|
4
|
+
require "rspec/core/rake_task"
|
|
5
|
+
require "cucumber/rake/task"
|
|
27
6
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
|
31
|
-
spec.rcov = true
|
|
7
|
+
RSpec::Core::RakeTask.new(:rspec) do |rspec|
|
|
8
|
+
rspec.pattern = "spec/**/*_spec.rb"
|
|
32
9
|
end
|
|
33
10
|
|
|
34
|
-
|
|
11
|
+
Cucumber::Rake::Task.new(:features)
|
|
35
12
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
Cucumber::Rake::Task.new(:features)
|
|
13
|
+
# task rspec: :check_dependencies
|
|
14
|
+
task features: :check_dependencies
|
|
39
15
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
abort "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
|
|
44
|
-
end
|
|
16
|
+
desc "open an irb session preloaded with this library"
|
|
17
|
+
task :console do
|
|
18
|
+
sh "irb -I lib -I extra -r dwc_archive.rb"
|
|
45
19
|
end
|
|
46
20
|
|
|
47
|
-
task :
|
|
48
|
-
|
|
49
|
-
require 'rake/rdoctask'
|
|
50
|
-
Rake::RDocTask.new do |rdoc|
|
|
51
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
52
|
-
|
|
53
|
-
rdoc.rdoc_dir = 'rdoc'
|
|
54
|
-
rdoc.title = "dwc-archive #{version}"
|
|
55
|
-
rdoc.rdoc_files.include('README*')
|
|
56
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
57
|
-
end
|
|
21
|
+
task default: :rspec
|
data/dwc-archive.gemspec
CHANGED
|
@@ -1,74 +1,36 @@
|
|
|
1
|
-
#
|
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
|
4
|
-
# -*- encoding: utf-8 -*-
|
|
1
|
+
# frozen_string_literal: true
|
|
5
2
|
|
|
6
|
-
|
|
7
|
-
s.name = %q{dwc-archive}
|
|
8
|
-
s.version = "0.1.3"
|
|
3
|
+
require File.expand_path("lib/dwc_archive/version", __dir__)
|
|
9
4
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
"LICENSE",
|
|
23
|
-
"README.rdoc",
|
|
24
|
-
"Rakefile",
|
|
25
|
-
"VERSION",
|
|
26
|
-
"dwc-archive.gemspec",
|
|
27
|
-
"features/dwc-archive.feature",
|
|
28
|
-
"features/step_definitions/dwc-archive_steps.rb",
|
|
29
|
-
"features/support/env.rb",
|
|
30
|
-
"lib/dwc-archive.rb",
|
|
31
|
-
"lib/dwc-archive/.expander.rb.swo",
|
|
32
|
-
"lib/dwc-archive/archive.rb",
|
|
33
|
-
"lib/dwc-archive/core.rb",
|
|
34
|
-
"lib/dwc-archive/expander.rb",
|
|
35
|
-
"lib/dwc-archive/extension.rb",
|
|
36
|
-
"lib/dwc-archive/metadata.rb",
|
|
37
|
-
"lib/ruby_extensions.rb",
|
|
38
|
-
"spec/dwc-archive_spec.rb",
|
|
39
|
-
"spec/files/data.tar.gz",
|
|
40
|
-
"spec/files/data.zip",
|
|
41
|
-
"spec/files/eml.xml",
|
|
42
|
-
"spec/files/meta.xml",
|
|
43
|
-
"spec/lib/ruby_extenstions_spec.rb",
|
|
44
|
-
"spec/spec.opts",
|
|
45
|
-
"spec/spec_helper.rb"
|
|
46
|
-
]
|
|
47
|
-
s.homepage = %q{http://github.com/dimus/dwc-archive}
|
|
48
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
|
49
|
-
s.require_paths = ["lib"]
|
|
50
|
-
s.rubygems_version = %q{1.3.6}
|
|
51
|
-
s.summary = %q{Handler of Darwin Core Archive files}
|
|
52
|
-
s.test_files = [
|
|
53
|
-
"spec/dwc-archive_spec.rb",
|
|
54
|
-
"spec/lib/ruby_extenstions_spec.rb",
|
|
55
|
-
"spec/spec_helper.rb"
|
|
56
|
-
]
|
|
5
|
+
Gem::Specification.new do |gem|
|
|
6
|
+
gem.name = "dwc-archive"
|
|
7
|
+
gem.version = DarwinCore::VERSION
|
|
8
|
+
gem.authors = ["Dmitry Mozzherin"]
|
|
9
|
+
gem.email = ["dmozzherin at gmail dot com"]
|
|
10
|
+
gem.description = "Darwin Core Archive is the current standard exchange " \
|
|
11
|
+
"format for GLobal Names Architecture modules. " \
|
|
12
|
+
"This gem makes it easy to incorporate files in " \
|
|
13
|
+
"Darwin Core Archive format into a ruby project."
|
|
14
|
+
gem.summary = "Handler of Darwin Core Archive files"
|
|
15
|
+
gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
|
|
16
|
+
gem.license = "MIT"
|
|
57
17
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
18
|
+
gem.required_ruby_version = ">= 2.6.0"
|
|
19
|
+
gem.files = `git ls-files`.split("\n").map(&:strip)
|
|
20
|
+
gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
21
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
22
|
+
gem.require_paths = ["lib"]
|
|
61
23
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
else
|
|
66
|
-
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
|
67
|
-
s.add_dependency(%q<cucumber>, [">= 0"])
|
|
68
|
-
end
|
|
69
|
-
else
|
|
70
|
-
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
|
71
|
-
s.add_dependency(%q<cucumber>, [">= 0"])
|
|
72
|
-
end
|
|
73
|
-
end
|
|
24
|
+
# gem.add_runtime_dependency "minitar", "~> 0.6"
|
|
25
|
+
gem.add_runtime_dependency "biodiversity", "~> 6.0"
|
|
26
|
+
gem.add_runtime_dependency "nokogiri", "~> 1.16"
|
|
74
27
|
|
|
28
|
+
gem.add_development_dependency "bundler", "~> 2.5"
|
|
29
|
+
gem.add_development_dependency "byebug", "~> 11.1"
|
|
30
|
+
gem.add_development_dependency "cucumber", "~> 9"
|
|
31
|
+
gem.add_development_dependency "git", "~> 2.3"
|
|
32
|
+
gem.add_development_dependency "rake", "~> 13.2"
|
|
33
|
+
gem.add_development_dependency "rspec", "~> 3.13"
|
|
34
|
+
gem.add_development_dependency "rubocop", "~> 1.66"
|
|
35
|
+
gem.add_development_dependency "ruby-lsp", "~> 0.17"
|
|
36
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
Feature: Creating and writing a Darwin Core Archive
|
|
2
|
+
In order to communicate with DwCA compatible programs
|
|
3
|
+
A User should be able to
|
|
4
|
+
Save data from ruby objects into Darwin Core Archive file
|
|
5
|
+
|
|
6
|
+
Scenario: Creating Core File
|
|
7
|
+
Given an array of urls for Darwin Core or other terms
|
|
8
|
+
And arrays of data in the order correpsonding to order of terms
|
|
9
|
+
When User creates generator
|
|
10
|
+
And User sends this data to core generator
|
|
11
|
+
Then these data should be saved as "darwin_core.txt" file
|
|
12
|
+
|
|
13
|
+
Scenario: Creating Extensions
|
|
14
|
+
Given 2 sets of data with terms as urls in the header
|
|
15
|
+
When User creates generator
|
|
16
|
+
And User adds extensions with file names "vernacular.txt" and "synonyms.txt"
|
|
17
|
+
Then data are saved as "vernacular.txt" and "synonyms.txt"
|
|
18
|
+
|
|
19
|
+
Scenario: Creating metadata.xml and eml.xml
|
|
20
|
+
Given an array of urls for Darwin Core or other terms
|
|
21
|
+
And arrays of data in the order correpsonding to order of terms
|
|
22
|
+
And 2 sets of data with terms as urls in the header
|
|
23
|
+
When User creates generator
|
|
24
|
+
And User sends this data to core generator
|
|
25
|
+
And User adds extensions with file names "vernacular.txt" and "synonyms.txt"
|
|
26
|
+
And User generates meta.xml and eml.xml
|
|
27
|
+
Then there should be "meta.xml" file with core and extensions informations
|
|
28
|
+
And there should be "eml.xml" file with authoriship information
|
|
29
|
+
|
|
30
|
+
Scenario: Making DarwinCore Archive file
|
|
31
|
+
Given an array of urls for Darwin Core or other terms
|
|
32
|
+
And arrays of data in the order correpsonding to order of terms
|
|
33
|
+
And 2 sets of data with terms as urls in the header
|
|
34
|
+
When User creates generator
|
|
35
|
+
And User sends this data to core generator
|
|
36
|
+
And User adds extensions with file names "vernacular.txt" and "synonyms.txt"
|
|
37
|
+
And User generates meta.xml and eml.xml
|
|
38
|
+
And generates archive
|
|
39
|
+
Then there should be a valid new archive file
|
|
40
|
+
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Feature: Reading of a Darwing Core Archive
|
|
2
|
+
In order to start working with Darwin Core Archive file
|
|
3
|
+
A user should be able initiate dwc object from a file
|
|
4
|
+
So I want to implement handling of dwc object creation
|
|
5
|
+
|
|
6
|
+
Scenario: Creating Darwin Core Archive object
|
|
7
|
+
Given path to a dwc file "data.tar.gz"
|
|
8
|
+
When I create a new DarwinCore::Archive instance
|
|
9
|
+
Then I should find that the archive is valid
|
|
10
|
+
And I should see what files the archive has
|
|
11
|
+
|
|
12
|
+
When I delete expanded files
|
|
13
|
+
Then they should disappear
|
|
14
|
+
|
|
15
|
+
Scenario: Instantiating DarwinCore with a file without "eml.xml"
|
|
16
|
+
Given path to a dwc file "minimal.tar.gz"
|
|
17
|
+
When I create a new DarwinCore instance
|
|
18
|
+
Then "DarwinCore_instance.metadata.data" should send instance of "NilClass" back
|
|
19
|
+
|
|
20
|
+
Scenario: Instantiating DarwinCore with tar.gz file
|
|
21
|
+
Given path to a dwc file "data.tar.gz"
|
|
22
|
+
When I create a new DarwinCore instance
|
|
23
|
+
Then instance should have a valid archive
|
|
24
|
+
And instance should have a core
|
|
25
|
+
And I should see checksum
|
|
26
|
+
When I check core data
|
|
27
|
+
Then I should find core.properties
|
|
28
|
+
And core.file_path
|
|
29
|
+
And core.id
|
|
30
|
+
And core.fields
|
|
31
|
+
And core.size
|
|
32
|
+
Then DarwinCore instance should have an extensions array
|
|
33
|
+
And every extension in array should be an instance of DarwinCore::Extension
|
|
34
|
+
And extension should have properties, data, file_path, coreid, fields
|
|
35
|
+
Then DarwinCore instance should have dwc.metadata object
|
|
36
|
+
And I should find id, title, creators, metadata provider
|
|
37
|
+
|
|
38
|
+
Scenario: Instantiating DawinCore with zip file
|
|
39
|
+
Given path to a dwc file "data.zip"
|
|
40
|
+
When I create a new DarwinCore instance
|
|
41
|
+
Then instance should have a valid archive
|
|
42
|
+
|
|
43
|
+
Scenario: Cleaning temporary directory from expanded archives
|
|
44
|
+
Given acces to DarwinCore gem
|
|
45
|
+
When I use DarwinCore.clean_all method
|
|
46
|
+
Then all temporary directories created by DarwinCore are deleted
|
|
47
|
+
|
|
48
|
+
Scenario: Importing data into memory
|
|
49
|
+
Given path to a dwc file "data.tar.gz"
|
|
50
|
+
When I create a new DarwinCore instance
|
|
51
|
+
Then I can read its content into memory
|
|
52
|
+
Then I can read extensions content into memory
|
|
53
|
+
|
|
54
|
+
Scenario: Importing data with quotes
|
|
55
|
+
Given path to a dwc file "data_with_quotes.tar.gz"
|
|
56
|
+
When I create a new DarwinCore instance
|
|
57
|
+
Then I can read its content into memory
|
|
58
|
+
|
|
59
|
+
Scenario: Importing data using block
|
|
60
|
+
Given path to a dwc file "data.tar.gz"
|
|
61
|
+
When I create a new DarwinCore instance
|
|
62
|
+
Then I can read its core content using block
|
|
63
|
+
Then I can read extensions content using block
|
|
64
|
+
|
|
65
|
+
Scenario: Normalizing classification
|
|
66
|
+
Given path to a dwc file "data.tar.gz"
|
|
67
|
+
When I create a new DarwinCore instance
|
|
68
|
+
Then I am able to use DarwinCore#normalize_classification method
|
|
69
|
+
And get normalized classification in expected format
|
|
70
|
+
And there are paths, synonyms and vernacular names in normalized classification
|
|
71
|
+
And there are local_id and global_id methods in taxons and synonyms
|
|
72
|
+
And names used in classification can be accessed by "name_strings" method
|
|
73
|
+
And vernacular names used in classification can be accessed by "vernacular_name_strings" method
|
|
74
|
+
And nodes_ids organized in trees can be accessed by "tree" method
|
|
75
|
+
|
|
76
|
+
Scenario: Normalizing classification without canonical names
|
|
77
|
+
Given path to a dwc file "data.tar.gz"
|
|
78
|
+
When I create a new DarwinCore instance
|
|
79
|
+
Then I am able to use DarwinCore#normalize_classification method
|
|
80
|
+
Then I am able to use normalize method without canonical names path
|
|
81
|
+
And get normalized classification in expected format
|
|
82
|
+
And there are id paths, no canonical names paths in normalized classification
|
|
83
|
+
|
|
84
|
+
Scenario: Normalizing classification skipping extensions data
|
|
85
|
+
Given path to a dwc file "synonyms_in_extension.tar.gz"
|
|
86
|
+
When I create a new DarwinCore instance
|
|
87
|
+
Then I am able to use normalize method without ingesting extensions
|
|
88
|
+
And extension information is not ingested
|