dwc-archive 0.1.3 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +35 -0
  3. data/.gitignore +10 -0
  4. data/.rspec +3 -0
  5. data/.rubocop.yml +23 -0
  6. data/.ruby-version +1 -0
  7. data/CHANGELOG +21 -0
  8. data/Gemfile +5 -0
  9. data/LICENSE +1 -1
  10. data/README.md +203 -0
  11. data/Rakefile +13 -49
  12. data/dwc-archive.gemspec +31 -69
  13. data/features/dwca-creator.feature +40 -0
  14. data/features/dwca-reader.feature +88 -0
  15. data/features/step_definitions/dwc-creator_steps.rb +112 -0
  16. data/features/step_definitions/dwc-reader_steps.rb +294 -0
  17. data/features/support/env.rb +2 -2
  18. data/lib/dwc_archive/archive.rb +60 -0
  19. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  20. data/lib/dwc_archive/core.rb +25 -0
  21. data/lib/dwc_archive/errors.rb +21 -0
  22. data/lib/dwc_archive/expander.rb +88 -0
  23. data/lib/dwc_archive/extension.rb +19 -0
  24. data/lib/dwc_archive/generator.rb +91 -0
  25. data/lib/dwc_archive/generator_eml_xml.rb +116 -0
  26. data/lib/dwc_archive/generator_meta_xml.rb +72 -0
  27. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  28. data/lib/dwc_archive/ingester.rb +111 -0
  29. data/lib/dwc_archive/metadata.rb +57 -0
  30. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  31. data/lib/dwc_archive/version.rb +6 -0
  32. data/lib/dwc_archive/xml_reader.rb +90 -0
  33. data/lib/dwc_archive.rb +124 -0
  34. data/spec/files/broken.tar.gz +0 -0
  35. data/spec/files/data.tar.gz +0 -0
  36. data/spec/files/data_with_quotes.tar.gz +0 -0
  37. data/spec/files/eml.xml +36 -36
  38. data/spec/files/empty_coreid.tar.gz +0 -0
  39. data/spec/files/file with characters(3).tar.gz +0 -0
  40. data/spec/files/flat_list.tar.gz +0 -0
  41. data/spec/files/generator_eml.xml +47 -0
  42. data/spec/files/generator_meta.xml +19 -0
  43. data/spec/files/gnub.tar.gz +0 -0
  44. data/spec/files/invalid.tar.gz +0 -0
  45. data/spec/files/junk_dir_inside.zip +0 -0
  46. data/spec/files/language_locality.tar.gz +0 -0
  47. data/spec/files/latin1.tar.gz +0 -0
  48. data/spec/files/linnean.tar.gz +0 -0
  49. data/spec/files/meta.xml +19 -19
  50. data/spec/files/minimal.tar.gz +0 -0
  51. data/spec/files/not_synonym_in_extension.tar.gz +0 -0
  52. data/spec/files/sci_name_authorship.tar.gz +0 -0
  53. data/spec/files/sci_name_authorship_dup.tar.gz +0 -0
  54. data/spec/files/synonyms_in_core_accepted_name_field.tar.gz +0 -0
  55. data/spec/files/synonyms_in_extension.tar.gz +0 -0
  56. data/spec/files/uncompressed +1 -0
  57. data/spec/lib/classification_normalizer_spec.rb +214 -0
  58. data/spec/lib/core_spec.rb +100 -0
  59. data/spec/lib/darwin_core_spec.rb +252 -0
  60. data/spec/lib/generator_eml_xml_spec.rb +22 -0
  61. data/spec/lib/generator_meta_xml_spec.rb +22 -0
  62. data/spec/lib/generator_spec.rb +124 -0
  63. data/spec/lib/gnub_taxon_spec.rb +32 -0
  64. data/spec/lib/metadata_spec.rb +89 -0
  65. data/spec/lib/taxon_normalized_spec.rb +142 -0
  66. data/spec/lib/xml_reader_spec.rb +15 -0
  67. data/spec/spec_helper.rb +79 -7
  68. metadata +266 -88
  69. data/README.rdoc +0 -38
  70. data/VERSION +0 -1
  71. data/features/dwc-archive.feature +0 -34
  72. data/features/step_definitions/dwc-archive_steps.rb +0 -95
  73. data/lib/dwc-archive/.expander.rb.swo +0 -0
  74. data/lib/dwc-archive/archive.rb +0 -37
  75. data/lib/dwc-archive/core.rb +0 -34
  76. data/lib/dwc-archive/expander.rb +0 -71
  77. data/lib/dwc-archive/extension.rb +0 -31
  78. data/lib/dwc-archive/metadata.rb +0 -38
  79. data/lib/dwc-archive.rb +0 -29
  80. data/lib/ruby_extensions.rb +0 -64
  81. data/spec/dwc-archive_spec.rb +0 -7
  82. data/spec/lib/ruby_extenstions_spec.rb +0 -15
  83. data/spec/spec.opts +0 -1
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b35d0c900fbb815f4955e59306c6cc5a134f4c5094adfc98576ac72249e82566
4
+ data.tar.gz: 8f8f58062273f5d5e6afb1084aed358d7585e8efac107e7fdf723cfa93e0d0bc
5
+ SHA512:
6
+ metadata.gz: 39cd3818632795b6d49ea9c7affbc565c8d8a7c9aa48b1acf2e00b6c8743b80ed58f5d6a0b7e26ea5375f1e3ba5e2dc91b59fd9820cda0918d254ae4739d930e
7
+ data.tar.gz: 82f055dd0ed152a516eade9df4efe12cc0aaded52c0c1054f25340d07f0a2cd9f11fd2ca6873e3cf9adab7b3404f892c8c07c5953ada133c2a53d071fc8fe858
@@ -0,0 +1,35 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: build
9
+
10
+ on:
11
+ push:
12
+ branches: [ master ]
13
+ pull_request:
14
+ branches: [ master ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.6', '2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Run tests
35
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -17,5 +17,15 @@ tmtags
17
17
  coverage
18
18
  rdoc
19
19
  pkg
20
+ install
21
+ Gemfile.lock
20
22
 
21
23
  ## PROJECT::SPECIFIC
24
+ tags
25
+ bin
26
+ .bundle
27
+ bundle_bin
28
+ Gemfile.lock
29
+ .byebug_history
30
+
31
+
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format p
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,23 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.6
3
+ Exclude:
4
+ - features/**/*
5
+ - .bundle/**/*
6
+ - bundle_bin/**/*
7
+ - lib/dwc_archive/ingester.rb
8
+ - lib/dwc_archive/generator_meta_xml.rb
9
+ - lib/dwc_archive/generator_eml_xml.rb
10
+ - lib/dwc_archive/classification_normalizer.rb
11
+
12
+ Style/StringLiterals:
13
+ EnforcedStyle: double_quotes
14
+
15
+ Layout/DotPosition:
16
+ EnforcedStyle: trailing
17
+
18
+ Metrics/BlockLength:
19
+ Exclude:
20
+ - 'Rakefile'
21
+ - '*.gemspec'
22
+ - '**/*.rake'
23
+ - 'spec/**/*spec.rb'
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.3.5
data/CHANGELOG ADDED
@@ -0,0 +1,21 @@
1
+ 1.1.0 Update name parser to go-based biodiversity
2
+
3
+ 1.0.1 Cleanup and gems update
4
+
5
+ 0.9.11 Removed VERSION duplicate
6
+
7
+ 0.9.7 Refactoring and tests improvements
8
+
9
+ 0.9.6 Added support for GNUB DwCA files
10
+
11
+ 0.9.4 Gem dependencies updated, added travis support
12
+
13
+ 0.9.0 Migrated code to ruby 1.9.3
14
+
15
+ 0.8.3 Updated outdated exception rasing
16
+
17
+ 0.8.2 Removed species info from linnean classification path
18
+
19
+ 0.8.1 Linnean classification path is now only for species and infraspecies with canonical forms. It ends with a canonical form of the taxon
20
+
21
+ 0.8.0 Added linnean classification path to normalized data from DwCA. It consists of data associated with clades like 'kingdom', 'order' etc.
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010 Dmitry Mozzherin
1
+ Copyright (c) 2010-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md ADDED
@@ -0,0 +1,203 @@
1
+ Darwin Core Archive
2
+ ===================
3
+
4
+ [![Gem Version][1]][2]
5
+ [![Continuous Integration Status][3]][4]
6
+ [![Coverage Status][5]][6]
7
+ [![CodePolice][7]][8]
8
+ [![Dependency Status][9]][10]
9
+
10
+ Darwin Core Archive format is a current standard for information exchange
11
+ between Global Names Architecture modules. This gem allows to work with
12
+ Darwin Core Archive data compressed to either zip or tar.gz files.
13
+ More information about Darwing Core Archive can be found on a [GBIF page:][11]
14
+
15
+ Installation
16
+ ------------
17
+
18
+ sudo gem install dwc-archive
19
+
20
+ ### System Requirements
21
+
22
+ You need [Redis Server][12] and unzip library installed
23
+
24
+
25
+ Usage
26
+ -----
27
+
28
+ ```ruby
29
+ require 'rubygems'
30
+ require 'dwc_archive'
31
+
32
+ dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
33
+ dwc.archive.files # the archive file list
34
+ dwc.metadata.data # summary of metadata from eml.xml if it exists
35
+ dwc.metadata.authors # authors of the archive
36
+ dwc.core.data # summary of DarwinCore main file
37
+ dwc.core.file_path # path to the DarwinCore main file
38
+ dwc.extensions # array of DarwinCore Star extensions
39
+ dwc.extensions[0].data # summary for an extension
40
+
41
+ # read content of the core data file into memory or used with a block
42
+ # it returns array of arrays of data
43
+ # rows that had a wrong encoding will be collected into errors array
44
+ data, errors = dwc.core.read
45
+
46
+ # read content using a block, getting back results in sets of 100 rows each
47
+ results = []
48
+ tail_data, tail_errors = dwc.core.read(100) do |data, errors|
49
+ results << [data, errors]
50
+ end
51
+ results << [tail_data, tail_errors]
52
+
53
+ # read content of an extension data file into memory
54
+ data, errors = dwc.core.extensions[0].read
55
+
56
+ # read content of an extension data using block
57
+ results = []
58
+ tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
59
+ results << [data, errors]
60
+ end
61
+ results << [tail_data, tail_errors]
62
+
63
+ # normalize names in classification collecting together synonyms,
64
+ # canonical names, vernacular names and associating paths to taxons
65
+ # in a classification distributed as DwCA file
66
+
67
+ result = dwc.normalize_classification
68
+
69
+ # for a finer control over normalization:
70
+
71
+ cn = DarwinCore::ClassificationNormalizer.new(dwc)
72
+ cn.normalize
73
+ # if you don't want to generate path consisting of canonical forms
74
+ # of ancestors to a taxon
75
+ cn.normalize(:with_canonical_names => false)
76
+
77
+ # if you don't want to ingest information from extensions
78
+ cn.normalize(:with_extensions => false)
79
+
80
+ # to get a flat hash of nodes with attached vernacular names and synonyms
81
+ normalized_data = cn.normalized_data
82
+
83
+ # to get a representation of tree organization as a hash
84
+ classification_tree = cn.tree
85
+
86
+ # to get list of all name strings used as scientific or vernacular names
87
+ all_name_strings = cn.name_strings
88
+
89
+ # to get list of errors generated during the normalization
90
+ errors = cn.error_names
91
+
92
+ DarwinCore.clean_all # remove all expanded archives
93
+ ```
94
+
95
+ Creating a DarwinCore Archive file
96
+ ----------------------------------
97
+
98
+ ```ruby
99
+ gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
100
+
101
+ core = [
102
+ ["http://rs.tdwg.org/dwc/terms/taxonID",
103
+ "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
104
+ "http://rs.tdwg.org/dwc/terms/scientificName",
105
+ "http://rs.tdwg.org/dwc/terms/taxonRank"],
106
+ [1, 0, "Plantae", "kingdom"],
107
+ [2, 1, "Betula", "genus"],
108
+ [3, 2, "Betula verucosa", "species"]
109
+ ]
110
+
111
+ vernacular_names = [
112
+ ["http://rs.tdwg.org/dwc/terms/TaxonID",
113
+ "http://rs.tdwg.org/dwc/terms/vernacularName"],
114
+ [1, "Plants"],
115
+ [1, "Растения"],
116
+ [2, "Birch"],
117
+ [2, "Береза"],
118
+ [3, "Wheeping Birch"],
119
+ [3, "Береза плакучая"]
120
+ ]
121
+
122
+ eml = {
123
+ :id => '1234',
124
+ :license => 'http://creativecommons.org/licenses/by-sa/3.0/',
125
+ :title => 'Test Classification',
126
+ :authors => [
127
+ { :first_name => 'John',
128
+ :last_name => 'Doe',
129
+ :email => 'jdoe@example.com',
130
+ :organization => 'Example',
131
+ :position => 'Assistant Professor',
132
+ :url => 'http://example.org' },
133
+ { :first_name => 'Jane',
134
+ :last_name => 'Doe',
135
+ :email => 'jane@example.com' }
136
+ ],
137
+ :metadata_providers => [
138
+ { :first_name => 'Jim',
139
+ :last_name => 'Doe',
140
+ :email => 'jimdoe@example.com',
141
+ :url => 'http://aggregator.example.org' }],
142
+ :abstract => 'test classification',
143
+ :citation =>
144
+ 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
145
+ :url => 'http://example.com'
146
+ }
147
+
148
+ gen.add_core(core, 'core.txt')
149
+ gen.add_extension(vernacular_names,
150
+ 'vernacular_names.txt',
151
+ true, 'http://rs.gbif.org/terms/1.0/VernacularName')
152
+ gen.add_meta_xml
153
+ gen.add_eml_xml(eml)
154
+ gen.pack
155
+ ```
156
+
157
+ Logging
158
+ -------
159
+
160
+ Gem has ability to show logs of it's events:
161
+
162
+ require 'dwc-archive'
163
+ DarwinCore.logger = Logger.new($stdout)
164
+
165
+
166
+ Note on Patches/Pull Requests
167
+ -----------------------------
168
+
169
+ * Fork the project.
170
+ * Make your feature addition or bug fix.
171
+ * Add tests for it. This is important so I don't break it in a
172
+ future version unintentionally.
173
+ * Commit, do not mess with rakefile, version, or history.
174
+ (if you want to have your own version, that is fine but bump
175
+ version in a commit by itself I can ignore when I pull)
176
+ * Send me a pull request. Bonus points for topic branches.
177
+
178
+
179
+ Copyright
180
+ ---------
181
+
182
+ Author -- [@dimus][13]
183
+
184
+ Contributors -- [@mjy][14], [@LocoDelAssembly][16]
185
+
186
+ Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
187
+
188
+ [1]: https://badge.fury.io/rb/dwc-archive.png
189
+ [2]: http://badge.fury.io/rb/dwc-archive
190
+ [3]: https://secure.travis-ci.org/GlobalNamesArchitecture/dwc-archive.png
191
+ [4]: http://travis-ci.org/GlobalNamesArchitecture/dwc-archive
192
+ [5]: https://coveralls.io/repos/GlobalNamesArchitecture/dwc-archive/badge.png
193
+ [6]: https://coveralls.io/r/GlobalNamesArchitecture/dwc-archive
194
+ [7]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive.png
195
+ [8]: https://codeclimate.com/github/GlobalNamesArchitecture/dwc-archive
196
+ [9]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive.png
197
+ [10]: https://gemnasium.com/GlobalNamesArchitecture/dwc-archive
198
+ [11]: http://bit.ly/2IxcBA
199
+ [12]: http://redis.io/topics/quickstart
200
+ [13]: https://github.com/dimus
201
+ [14]: https://github.com/mjy
202
+ [15]: http://mbl.edu
203
+ [16]: https://github.com/LocoDelAssembly
data/Rakefile CHANGED
@@ -1,57 +1,21 @@
1
- require 'rubygems'
2
- require 'rake'
1
+ # frozen_string_literal: true
3
2
 
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "dwc-archive"
8
- gem.summary = %Q{Handler of Darwin Core Archive files}
9
- gem.description = %Q{Darwin Core Archive Files are current standard exchange format for GLobal Names Architecture modules. This project creates ways to work with such files.}
10
- gem.email = "dmozzherin at gmail dot com"
11
- gem.homepage = "http://github.com/dimus/dwc-archive"
12
- gem.authors = ["Dmitry Mozzherin"]
13
- gem.add_development_dependency "rspec", ">= 1.2.9"
14
- gem.add_development_dependency "cucumber", ">= 0"
15
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
- end
17
- Jeweler::GemcutterTasks.new
18
- rescue LoadError
19
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
- end
21
-
22
- require 'spec/rake/spectask'
23
- Spec::Rake::SpecTask.new(:spec) do |spec|
24
- spec.libs << 'lib' << 'spec'
25
- spec.spec_files = FileList['spec/**/*_spec.rb']
26
- end
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "cucumber/rake/task"
27
6
 
28
- Spec::Rake::SpecTask.new(:rcov) do |spec|
29
- spec.libs << 'lib' << 'spec'
30
- spec.pattern = 'spec/**/*_spec.rb'
31
- spec.rcov = true
7
+ RSpec::Core::RakeTask.new(:rspec) do |rspec|
8
+ rspec.pattern = "spec/**/*_spec.rb"
32
9
  end
33
10
 
34
- task :spec => :check_dependencies
11
+ Cucumber::Rake::Task.new(:features)
35
12
 
36
- begin
37
- require 'cucumber/rake/task'
38
- Cucumber::Rake::Task.new(:features)
13
+ # task rspec: :check_dependencies
14
+ task features: :check_dependencies
39
15
 
40
- task :features => :check_dependencies
41
- rescue LoadError
42
- task :features do
43
- abort "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
44
- end
16
+ desc "open an irb session preloaded with this library"
17
+ task :console do
18
+ sh "irb -I lib -I extra -r dwc_archive.rb"
45
19
  end
46
20
 
47
- task :default => :spec
48
-
49
- require 'rake/rdoctask'
50
- Rake::RDocTask.new do |rdoc|
51
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
-
53
- rdoc.rdoc_dir = 'rdoc'
54
- rdoc.title = "dwc-archive #{version}"
55
- rdoc.rdoc_files.include('README*')
56
- rdoc.rdoc_files.include('lib/**/*.rb')
57
- end
21
+ task default: :rspec
data/dwc-archive.gemspec CHANGED
@@ -1,74 +1,36 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
- # -*- encoding: utf-8 -*-
1
+ # frozen_string_literal: true
5
2
 
6
- Gem::Specification.new do |s|
7
- s.name = %q{dwc-archive}
8
- s.version = "0.1.3"
3
+ require File.expand_path("lib/dwc_archive/version", __dir__)
9
4
 
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Dmitry Mozzherin"]
12
- s.date = %q{2010-03-18}
13
- s.description = %q{Darwin Core Archive Files are current standard exchange format for GLobal Names Architecture modules. This project creates ways to work with such files.}
14
- s.email = %q{dmozzherin at gmail dot com}
15
- s.extra_rdoc_files = [
16
- "LICENSE",
17
- "README.rdoc"
18
- ]
19
- s.files = [
20
- ".document",
21
- ".gitignore",
22
- "LICENSE",
23
- "README.rdoc",
24
- "Rakefile",
25
- "VERSION",
26
- "dwc-archive.gemspec",
27
- "features/dwc-archive.feature",
28
- "features/step_definitions/dwc-archive_steps.rb",
29
- "features/support/env.rb",
30
- "lib/dwc-archive.rb",
31
- "lib/dwc-archive/.expander.rb.swo",
32
- "lib/dwc-archive/archive.rb",
33
- "lib/dwc-archive/core.rb",
34
- "lib/dwc-archive/expander.rb",
35
- "lib/dwc-archive/extension.rb",
36
- "lib/dwc-archive/metadata.rb",
37
- "lib/ruby_extensions.rb",
38
- "spec/dwc-archive_spec.rb",
39
- "spec/files/data.tar.gz",
40
- "spec/files/data.zip",
41
- "spec/files/eml.xml",
42
- "spec/files/meta.xml",
43
- "spec/lib/ruby_extenstions_spec.rb",
44
- "spec/spec.opts",
45
- "spec/spec_helper.rb"
46
- ]
47
- s.homepage = %q{http://github.com/dimus/dwc-archive}
48
- s.rdoc_options = ["--charset=UTF-8"]
49
- s.require_paths = ["lib"]
50
- s.rubygems_version = %q{1.3.6}
51
- s.summary = %q{Handler of Darwin Core Archive files}
52
- s.test_files = [
53
- "spec/dwc-archive_spec.rb",
54
- "spec/lib/ruby_extenstions_spec.rb",
55
- "spec/spec_helper.rb"
56
- ]
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "dwc-archive"
7
+ gem.version = DarwinCore::VERSION
8
+ gem.authors = ["Dmitry Mozzherin"]
9
+ gem.email = ["dmozzherin at gmail dot com"]
10
+ gem.description = "Darwin Core Archive is the current standard exchange " \
11
+ "format for GLobal Names Architecture modules. " \
12
+ "This gem makes it easy to incorporate files in " \
13
+ "Darwin Core Archive format into a ruby project."
14
+ gem.summary = "Handler of Darwin Core Archive files"
15
+ gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
16
+ gem.license = "MIT"
57
17
 
58
- if s.respond_to? :specification_version then
59
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
60
- s.specification_version = 3
18
+ gem.required_ruby_version = ">= 2.6.0"
19
+ gem.files = `git ls-files`.split("\n").map(&:strip)
20
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
22
+ gem.require_paths = ["lib"]
61
23
 
62
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
63
- s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
64
- s.add_development_dependency(%q<cucumber>, [">= 0"])
65
- else
66
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
67
- s.add_dependency(%q<cucumber>, [">= 0"])
68
- end
69
- else
70
- s.add_dependency(%q<rspec>, [">= 1.2.9"])
71
- s.add_dependency(%q<cucumber>, [">= 0"])
72
- end
73
- end
24
+ # gem.add_runtime_dependency "minitar", "~> 0.6"
25
+ gem.add_runtime_dependency "biodiversity", "~> 6.0"
26
+ gem.add_runtime_dependency "nokogiri", "~> 1.16"
74
27
 
28
+ gem.add_development_dependency "bundler", "~> 2.5"
29
+ gem.add_development_dependency "byebug", "~> 11.1"
30
+ gem.add_development_dependency "cucumber", "~> 9"
31
+ gem.add_development_dependency "git", "~> 2.3"
32
+ gem.add_development_dependency "rake", "~> 13.2"
33
+ gem.add_development_dependency "rspec", "~> 3.13"
34
+ gem.add_development_dependency "rubocop", "~> 1.66"
35
+ gem.add_development_dependency "ruby-lsp", "~> 0.17"
36
+ end
@@ -0,0 +1,40 @@
1
+ Feature: Creating and writing a Darwin Core Archive
2
+ In order to communicate with DwCA compatible programs
3
+ A User should be able to
4
+ Save data from ruby objects into Darwin Core Archive file
5
+
6
+ Scenario: Creating Core File
7
+ Given an array of urls for Darwin Core or other terms
8
+ And arrays of data in the order correpsonding to order of terms
9
+ When User creates generator
10
+ And User sends this data to core generator
11
+ Then these data should be saved as "darwin_core.txt" file
12
+
13
+ Scenario: Creating Extensions
14
+ Given 2 sets of data with terms as urls in the header
15
+ When User creates generator
16
+ And User adds extensions with file names "vernacular.txt" and "synonyms.txt"
17
+ Then data are saved as "vernacular.txt" and "synonyms.txt"
18
+
19
+ Scenario: Creating metadata.xml and eml.xml
20
+ Given an array of urls for Darwin Core or other terms
21
+ And arrays of data in the order correpsonding to order of terms
22
+ And 2 sets of data with terms as urls in the header
23
+ When User creates generator
24
+ And User sends this data to core generator
25
+ And User adds extensions with file names "vernacular.txt" and "synonyms.txt"
26
+ And User generates meta.xml and eml.xml
27
+ Then there should be "meta.xml" file with core and extensions informations
28
+ And there should be "eml.xml" file with authoriship information
29
+
30
+ Scenario: Making DarwinCore Archive file
31
+ Given an array of urls for Darwin Core or other terms
32
+ And arrays of data in the order correpsonding to order of terms
33
+ And 2 sets of data with terms as urls in the header
34
+ When User creates generator
35
+ And User sends this data to core generator
36
+ And User adds extensions with file names "vernacular.txt" and "synonyms.txt"
37
+ And User generates meta.xml and eml.xml
38
+ And generates archive
39
+ Then there should be a valid new archive file
40
+
@@ -0,0 +1,88 @@
1
+ Feature: Reading of a Darwing Core Archive
2
+ In order to start working with Darwin Core Archive file
3
+ A user should be able initiate dwc object from a file
4
+ So I want to implement handling of dwc object creation
5
+
6
+ Scenario: Creating Darwin Core Archive object
7
+ Given path to a dwc file "data.tar.gz"
8
+ When I create a new DarwinCore::Archive instance
9
+ Then I should find that the archive is valid
10
+ And I should see what files the archive has
11
+
12
+ When I delete expanded files
13
+ Then they should disappear
14
+
15
+ Scenario: Instantiating DarwinCore with a file without "eml.xml"
16
+ Given path to a dwc file "minimal.tar.gz"
17
+ When I create a new DarwinCore instance
18
+ Then "DarwinCore_instance.metadata.data" should send instance of "NilClass" back
19
+
20
+ Scenario: Instantiating DarwinCore with tar.gz file
21
+ Given path to a dwc file "data.tar.gz"
22
+ When I create a new DarwinCore instance
23
+ Then instance should have a valid archive
24
+ And instance should have a core
25
+ And I should see checksum
26
+ When I check core data
27
+ Then I should find core.properties
28
+ And core.file_path
29
+ And core.id
30
+ And core.fields
31
+ And core.size
32
+ Then DarwinCore instance should have an extensions array
33
+ And every extension in array should be an instance of DarwinCore::Extension
34
+ And extension should have properties, data, file_path, coreid, fields
35
+ Then DarwinCore instance should have dwc.metadata object
36
+ And I should find id, title, creators, metadata provider
37
+
38
+ Scenario: Instantiating DawinCore with zip file
39
+ Given path to a dwc file "data.zip"
40
+ When I create a new DarwinCore instance
41
+ Then instance should have a valid archive
42
+
43
+ Scenario: Cleaning temporary directory from expanded archives
44
+ Given acces to DarwinCore gem
45
+ When I use DarwinCore.clean_all method
46
+ Then all temporary directories created by DarwinCore are deleted
47
+
48
+ Scenario: Importing data into memory
49
+ Given path to a dwc file "data.tar.gz"
50
+ When I create a new DarwinCore instance
51
+ Then I can read its content into memory
52
+ Then I can read extensions content into memory
53
+
54
+ Scenario: Importing data with quotes
55
+ Given path to a dwc file "data_with_quotes.tar.gz"
56
+ When I create a new DarwinCore instance
57
+ Then I can read its content into memory
58
+
59
+ Scenario: Importing data using block
60
+ Given path to a dwc file "data.tar.gz"
61
+ When I create a new DarwinCore instance
62
+ Then I can read its core content using block
63
+ Then I can read extensions content using block
64
+
65
+ Scenario: Normalizing classification
66
+ Given path to a dwc file "data.tar.gz"
67
+ When I create a new DarwinCore instance
68
+ Then I am able to use DarwinCore#normalize_classification method
69
+ And get normalized classification in expected format
70
+ And there are paths, synonyms and vernacular names in normalized classification
71
+ And there are local_id and global_id methods in taxons and synonyms
72
+ And names used in classification can be accessed by "name_strings" method
73
+ And vernacular names used in classification can be accessed by "vernacular_name_strings" method
74
+ And nodes_ids organized in trees can be accessed by "tree" method
75
+
76
+ Scenario: Normalizing classification without canonical names
77
+ Given path to a dwc file "data.tar.gz"
78
+ When I create a new DarwinCore instance
79
+ Then I am able to use DarwinCore#normalize_classification method
80
+ Then I am able to use normalize method without canonical names path
81
+ And get normalized classification in expected format
82
+ And there are id paths, no canonical names paths in normalized classification
83
+
84
+ Scenario: Normalizing classification skipping extensions data
85
+ Given path to a dwc file "synonyms_in_extension.tar.gz"
86
+ When I create a new DarwinCore instance
87
+ Then I am able to use normalize method without ingesting extensions
88
+ And extension information is not ingested