dwc-archive 0.9.11 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -1
  4. data/.rubocop.yml +23 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -7
  7. data/CHANGELOG +4 -0
  8. data/Gemfile +3 -1
  9. data/LICENSE +1 -1
  10. data/README.md +114 -109
  11. data/Rakefile +13 -36
  12. data/dwc-archive.gemspec +23 -19
  13. data/features/step_definitions/dwc-creator_steps.rb +5 -5
  14. data/features/step_definitions/dwc-reader_steps.rb +47 -28
  15. data/features/support/env.rb +1 -1
  16. data/lib/dwc_archive.rb +124 -0
  17. data/lib/dwc_archive/archive.rb +60 -0
  18. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  19. data/lib/dwc_archive/core.rb +25 -0
  20. data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
  21. data/lib/dwc_archive/expander.rb +88 -0
  22. data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
  23. data/lib/dwc_archive/generator.rb +91 -0
  24. data/lib/{dwc-archive → dwc_archive}/generator_eml_xml.rb +40 -33
  25. data/lib/{dwc-archive → dwc_archive}/generator_meta_xml.rb +21 -20
  26. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  27. data/lib/dwc_archive/ingester.rb +106 -0
  28. data/lib/dwc_archive/metadata.rb +57 -0
  29. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  30. data/lib/dwc_archive/version.rb +6 -0
  31. data/lib/dwc_archive/xml_reader.rb +90 -0
  32. data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
  33. data/spec/files/generator_eml.xml +1 -1
  34. data/spec/lib/classification_normalizer_spec.rb +96 -105
  35. data/spec/lib/core_spec.rb +43 -41
  36. data/spec/lib/darwin_core_spec.rb +108 -138
  37. data/spec/lib/generator_eml_xml_spec.rb +12 -11
  38. data/spec/lib/generator_meta_xml_spec.rb +12 -11
  39. data/spec/lib/generator_spec.rb +73 -74
  40. data/spec/lib/gnub_taxon_spec.rb +15 -17
  41. data/spec/lib/metadata_spec.rb +50 -41
  42. data/spec/lib/taxon_normalized_spec.rb +62 -65
  43. data/spec/lib/xml_reader_spec.rb +9 -12
  44. data/spec/spec_helper.rb +54 -51
  45. metadata +101 -87
  46. data/.rvmrc +0 -1
  47. data/lib/dwc-archive.rb +0 -107
  48. data/lib/dwc-archive/archive.rb +0 -40
  49. data/lib/dwc-archive/classification_normalizer.rb +0 -427
  50. data/lib/dwc-archive/core.rb +0 -19
  51. data/lib/dwc-archive/expander.rb +0 -85
  52. data/lib/dwc-archive/generator.rb +0 -86
  53. data/lib/dwc-archive/ingester.rb +0 -101
  54. data/lib/dwc-archive/metadata.rb +0 -48
  55. data/lib/dwc-archive/version.rb +0 -3
  56. data/lib/dwc-archive/xml_reader.rb +0 -80
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 5c6c5b2a4de324abded5b3adb81d6bcd9603965f
4
- data.tar.gz: ac00e15f95766838ff42a9bdc3f1a682c4a8e9ff
2
+ SHA256:
3
+ metadata.gz: fb7ca119f1b5dd9f1657772d0fb8e80df64909f0739113a00366441045865e46
4
+ data.tar.gz: aa8fab9a533682f6ea4907b52dcae3b63d320331860c8936819815f84a26f9e1
5
5
  SHA512:
6
- metadata.gz: d84f2974ed7bcbabc62d2d3b5c5b15dd8d6d2aa38836a955c12896ada2391e978ca2d420d86bf59db2ab61cc801c9c43036be13699ba7b3902d8daa4f366d45b
7
- data.tar.gz: a07a91365cba60fa418b85d7b516781c9209ea607dbe5ec45da86febd14bfa104cc16e4b55619a157f5bbb487dff2fd7e318399d815a63372197a9732b3820fb
6
+ metadata.gz: b122be118cd7804cc299ea465ced05aba037df1e9265b8940fd7db369f73a72483fa9e11b1c6571e85c7ff68282c9512b25b73ae2095da0d96d00b3c18739da4
7
+ data.tar.gz: 4f787964ca02539f02ab97d7e0d3106fd6ae67331718a92a4313337c219bdf054921702cfed2d116147ff7b5d79405e63b483f59639371f3ca7e83b68b51b52f
data/.gitignore CHANGED
@@ -26,5 +26,6 @@ bin
26
26
  .bundle
27
27
  bundle_bin
28
28
  Gemfile.lock
29
+ .byebug_history
29
30
 
30
31
 
data/.rspec CHANGED
@@ -1,2 +1,3 @@
1
- --format nested
1
+ --format p
2
2
  --color
3
+ --require spec_helper
@@ -0,0 +1,23 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.6
3
+ Exclude:
4
+ - features/**/*
5
+ - .bundle/**/*
6
+ - bundle_bin/**/*
7
+ - lib/dwc_archive/ingester.rb
8
+ - lib/dwc_archive/generator_meta_xml.rb
9
+ - lib/dwc_archive/generator_eml_xml.rb
10
+ - lib/dwc_archive/classification_normalizer.rb
11
+
12
+ Style/StringLiterals:
13
+ EnforcedStyle: double_quotes
14
+
15
+ Layout/DotPosition:
16
+ EnforcedStyle: trailing
17
+
18
+ Metrics/BlockLength:
19
+ Exclude:
20
+ - 'Rakefile'
21
+ - '*.gemspec'
22
+ - '**/*.rake'
23
+ - 'spec/**/*spec.rb'
@@ -1 +1 @@
1
- 2.0.0-p353
1
+ 2.7.1
@@ -1,16 +1,13 @@
1
1
  rvm:
2
- - 1.9.3-p484
3
- - 2.0.0-p353
2
+ - 2.5
3
+ - 2.6
4
+ - 2.7
4
5
  before_install:
5
6
  - sudo apt-get update
6
- - gem install debugger
7
- # bundler_args: --without development
8
- services:
9
- - redis-server
7
+ - gem install bundler
10
8
  script:
11
9
  - bundle exec cucumber
12
10
  - bundle exec rake
13
11
  branches:
14
12
  only:
15
13
  - master
16
-
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ 1.1.0 Update name parser to go-based biodiversity
2
+
3
+ 1.0.1 Cleanup and gems update
4
+
1
5
  0.9.11 Removed VERSION duplicate
2
6
 
3
7
  0.9.7 Refactoring and tests improvements
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
2
4
 
3
5
  gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2012 Marine Biological Laboratory
1
+ Copyright (c) 2010-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -19,136 +19,140 @@ Installation
19
19
 
20
20
  ### System Requirements
21
21
 
22
- You need [Redis Server][12] and unzip library installed
22
+ You need [Redis Server][12] and unzip library installed
23
23
 
24
24
 
25
25
  Usage
26
26
  -----
27
27
 
28
- require 'rubygems'
29
- require 'dwc-archive'
28
+ ```ruby
29
+ require 'rubygems'
30
+ require 'dwc_archive'
30
31
 
31
- dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
32
- dwc.archive.files # the archive file list
33
- dwc.metadata.data # summary of metadata from eml.xml if it exists
34
- dwc.metadata.authors # authors of the archive
35
- dwc.core.data # summary of DarwinCore main file
36
- dwc.core.file_path # path to the DarwinCore main file
37
- dwc.extensions # array of DarwinCore Star extensions
38
- dwc.extensions[0].data # summary for an extension
32
+ dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
33
+ dwc.archive.files # the archive file list
34
+ dwc.metadata.data # summary of metadata from eml.xml if it exists
35
+ dwc.metadata.authors # authors of the archive
36
+ dwc.core.data # summary of DarwinCore main file
37
+ dwc.core.file_path # path to the DarwinCore main file
38
+ dwc.extensions # array of DarwinCore Star extensions
39
+ dwc.extensions[0].data # summary for an extension
39
40
 
40
- # read content of the core data file into memory or used with a block
41
- # it returns array of arrays of data
42
- # rows that had a wrong encoding will be collected into errors array
43
- data, errors = dwc.core.read
41
+ # read content of the core data file into memory or used with a block
42
+ # it returns array of arrays of data
43
+ # rows that had a wrong encoding will be collected into errors array
44
+ data, errors = dwc.core.read
44
45
 
45
- # read content using a block, getting back results in sets of 100 rows each
46
- results = []
47
- tail_data, tail_errors = dwc.core.read(100) do |data, errors|
48
- results << [data, errors]
49
- end
50
- results << [tail_data, tail_errors]
46
+ # read content using a block, getting back results in sets of 100 rows each
47
+ results = []
48
+ tail_data, tail_errors = dwc.core.read(100) do |data, errors|
49
+ results << [data, errors]
50
+ end
51
+ results << [tail_data, tail_errors]
51
52
 
52
- # read content of an extension data file into memory
53
- data, errors = dwc.core.extensions[0].read
53
+ # read content of an extension data file into memory
54
+ data, errors = dwc.core.extensions[0].read
54
55
 
55
- # read content of an extension data using block
56
- results = []
57
- tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
58
- results << [data, errors]
59
- end
60
- results << [tail_data, tail_errors]
56
+ # read content of an extension data using block
57
+ results = []
58
+ tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
59
+ results << [data, errors]
60
+ end
61
+ results << [tail_data, tail_errors]
61
62
 
62
- # normalize names in classification collecting together synonyms,
63
- # canonical names, vernacular names and associating paths to taxons
64
- # in a classification distributed as DwCA file
63
+ # normalize names in classification collecting together synonyms,
64
+ # canonical names, vernacular names and associating paths to taxons
65
+ # in a classification distributed as DwCA file
65
66
 
66
- result = dwc.normalize_classification
67
+ result = dwc.normalize_classification
67
68
 
68
- # for a finer control over normalization:
69
+ # for a finer control over normalization:
69
70
 
70
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
71
- cn.normalize
72
- # if you don't want to generate path consisting of canonical forms
73
- # of ancestors to a taxon
74
- cn.normalize(:with_canonical_names => false)
71
+ cn = DarwinCore::ClassificationNormalizer.new(dwc)
72
+ cn.normalize
73
+ # if you don't want to generate path consisting of canonical forms
74
+ # of ancestors to a taxon
75
+ cn.normalize(:with_canonical_names => false)
75
76
 
76
- # if you don't want to ingest information from extensions
77
- cn.normalize(:with_extensions => false)
77
+ # if you don't want to ingest information from extensions
78
+ cn.normalize(:with_extensions => false)
78
79
 
79
- # to get a flat hash of nodes with attached vernacular names and synonyms
80
- normalized_data = cn.normalized_data
80
+ # to get a flat hash of nodes with attached vernacular names and synonyms
81
+ normalized_data = cn.normalized_data
81
82
 
82
- # to get a representation of tree organization as a hash
83
- classification_tree = cn.tree
83
+ # to get a representation of tree organization as a hash
84
+ classification_tree = cn.tree
84
85
 
85
- # to get list of all name strings used as scientific or vernacular names
86
- all_name_strings = cn.name_strings
86
+ # to get list of all name strings used as scientific or vernacular names
87
+ all_name_strings = cn.name_strings
87
88
 
88
- # to get list of errors generated during the normalization
89
- errors = cn.error_names
89
+ # to get list of errors generated during the normalization
90
+ errors = cn.error_names
90
91
 
91
- DarwinCore.clean_all # remove all expanded archives
92
+ DarwinCore.clean_all # remove all expanded archives
93
+ ```
92
94
 
93
95
  Creating a DarwinCore Archive file
94
96
  ----------------------------------
95
97
 
96
- gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
97
-
98
- core = [
99
- ["http://rs.tdwg.org/dwc/terms/taxonID",
100
- "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
101
- "http://rs.tdwg.org/dwc/terms/scientificName",
102
- "http://rs.tdwg.org/dwc/terms/taxonRank"],
103
- [1, 0, "Plantae", "kingdom"],
104
- [2, 1, "Betula", "genus"],
105
- [3, 2, "Betula verucosa", "species"]
106
- ]
107
-
108
- vernacular_names = [
109
- ["http://rs.tdwg.org/dwc/terms/TaxonID",
110
- "http://rs.tdwg.org/dwc/terms/vernacularName"],
111
- [1, "Plants"],
112
- [1, "Растения"],
113
- [2, "Birch"],
114
- [2, "Береза"],
115
- [3, "Wheeping Birch"],
116
- [3, "Береза плакучая"]
117
- ]
118
-
119
- eml = {
120
- :id => '1234',
121
- :license => 'http://creativecommons.org/licenses/by-sa/3.0/',
122
- :title => 'Test Classification',
123
- :authors => [
124
- { :first_name => 'John',
125
- :last_name => 'Doe',
126
- :email => 'jdoe@example.com',
127
- :organization => 'Example',
128
- :position => 'Assistant Professor',
129
- :url => 'http://example.org' },
130
- { :first_name => 'Jane',
131
- :last_name => 'Doe',
132
- :email => 'jane@example.com' }
133
- ],
134
- :metadata_providers => [
135
- { :first_name => 'Jim',
136
- :last_name => 'Doe',
137
- :email => 'jimdoe@example.com',
138
- :url => 'http://aggregator.example.org' }],
139
- :abstract => 'test classification',
140
- :citation =>
141
- 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
142
- :url => 'http://example.com'
143
- }
144
-
145
- gen.add_core(core, 'core.txt')
146
- gen.add_extension(vernacular_names,
147
- 'vernacular_names.txt',
148
- true, 'http://rs.gbif.org/terms/1.0/VernacularName')
149
- gen.add_meta_xml
150
- gen.add_eml_xml(eml)
151
- gen.pack
98
+ ```ruby
99
+ gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
100
+
101
+ core = [
102
+ ["http://rs.tdwg.org/dwc/terms/taxonID",
103
+ "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
104
+ "http://rs.tdwg.org/dwc/terms/scientificName",
105
+ "http://rs.tdwg.org/dwc/terms/taxonRank"],
106
+ [1, 0, "Plantae", "kingdom"],
107
+ [2, 1, "Betula", "genus"],
108
+ [3, 2, "Betula verucosa", "species"]
109
+ ]
110
+
111
+ vernacular_names = [
112
+ ["http://rs.tdwg.org/dwc/terms/TaxonID",
113
+ "http://rs.tdwg.org/dwc/terms/vernacularName"],
114
+ [1, "Plants"],
115
+ [1, "Растения"],
116
+ [2, "Birch"],
117
+ [2, "Береза"],
118
+ [3, "Wheeping Birch"],
119
+ [3, "Береза плакучая"]
120
+ ]
121
+
122
+ eml = {
123
+ :id => '1234',
124
+ :license => 'http://creativecommons.org/licenses/by-sa/3.0/',
125
+ :title => 'Test Classification',
126
+ :authors => [
127
+ { :first_name => 'John',
128
+ :last_name => 'Doe',
129
+ :email => 'jdoe@example.com',
130
+ :organization => 'Example',
131
+ :position => 'Assistant Professor',
132
+ :url => 'http://example.org' },
133
+ { :first_name => 'Jane',
134
+ :last_name => 'Doe',
135
+ :email => 'jane@example.com' }
136
+ ],
137
+ :metadata_providers => [
138
+ { :first_name => 'Jim',
139
+ :last_name => 'Doe',
140
+ :email => 'jimdoe@example.com',
141
+ :url => 'http://aggregator.example.org' }],
142
+ :abstract => 'test classification',
143
+ :citation =>
144
+ 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
145
+ :url => 'http://example.com'
146
+ }
147
+
148
+ gen.add_core(core, 'core.txt')
149
+ gen.add_extension(vernacular_names,
150
+ 'vernacular_names.txt',
151
+ true, 'http://rs.gbif.org/terms/1.0/VernacularName')
152
+ gen.add_meta_xml
153
+ gen.add_eml_xml(eml)
154
+ gen.pack
155
+ ```
152
156
 
153
157
  Logging
154
158
  -------
@@ -175,11 +179,11 @@ Note on Patches/Pull Requests
175
179
  Copyright
176
180
  ---------
177
181
 
178
- Author -- [Dmitry Mozzherin][13]
182
+ Author -- [@dimus][13]
179
183
 
180
- Contributors -- [Matt Yoder][14]
184
+ Contributors -- [@mjy][14], [@LocoDelAssembly][16]
181
185
 
182
- Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
186
+ Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
183
187
 
184
188
  [1]: https://badge.fury.io/rb/dwc-archive.png
185
189
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -196,3 +200,4 @@ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for deta
196
200
  [13]: https://github.com/dimus
197
201
  [14]: https://github.com/mjy
198
202
  [15]: http://mbl.edu
203
+ [16]: https://github.com/LocoDelAssembly
data/Rakefile CHANGED
@@ -1,44 +1,21 @@
1
- require "bundler/gem_tasks"
2
-
3
- # Bundler::GemHelper.install_tasks
4
- # require 'bundler/gem_tasks'
5
- # require 'rake/testtasks'
6
- # require 'rubygems'
7
- # require 'rake'
8
-
9
- require 'rspec/core/rake_task'
10
- RSpec::Core::RakeTask.new(:spec) do |spec|
11
- spec.pattern = 'spec/**/*_spec.rb'
12
- end
1
+ # frozen_string_literal: true
13
2
 
14
- RSpec::Core::RakeTask.new(:rcov) do |spec|
15
- spec.pattern = 'spec/**/*_spec.rb'
16
- spec.rcov = true
17
- end
18
-
19
- # task :spec => :check_dependencies
20
-
21
- begin
22
- require 'cucumber/rake/task'
23
- Cucumber::Rake::Task.new(:features)
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "cucumber/rake/task"
24
6
 
25
- task :features => :check_dependencies
26
- rescue LoadError
27
- task :features do
28
- abort 'Cucumber is not available. In order to run features, ' +
29
- 'you must: sudo gem install cucumber'
30
- end
7
+ RSpec::Core::RakeTask.new(:rspec) do |rspec|
8
+ rspec.pattern = "spec/**/*_spec.rb"
31
9
  end
32
10
 
33
- desc 'Run an IRB session with CSL loaded'
34
- task :irb, [:script] do |t, args|
35
- ARGV.clear
11
+ Cucumber::Rake::Task.new(:features)
36
12
 
37
- require 'irb'
38
- require_relative 'lib/dwc-archive'
13
+ # task rspec: :check_dependencies
14
+ task features: :check_dependencies
39
15
 
40
- IRB.conf[:SCRIPT] = args.script
41
- IRB.start
16
+ desc "open an irb session preloaded with this library"
17
+ task :console do
18
+ sh "irb -I lib -I extra -r dwc_archive.rb"
42
19
  end
43
20
 
44
- task :default => :spec
21
+ task default: :rspec
@@ -1,33 +1,37 @@
1
- require File.expand_path('../lib/dwc-archive/version', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path("lib/dwc_archive/version", __dir__)
2
4
 
3
5
  Gem::Specification.new do |gem|
4
6
  gem.name = "dwc-archive"
5
7
  gem.version = DarwinCore::VERSION
6
8
  gem.authors = ["Dmitry Mozzherin"]
7
9
  gem.email = ["dmozzherin at gmail dot com"]
8
- gem.description = %q{Darwin Core Archive is the current standard exchange
9
- format for GLobal Names Architecture modules.
10
- This gem makes it easy to incorporate files in
11
- Darwin Core Archive format into a ruby project.}
12
- gem.summary = %q{Handler of Darwin Core Archive files}
10
+ gem.description = "Darwin Core Archive is the current standard exchange " \
11
+ "format for GLobal Names Architecture modules. " \
12
+ "This gem makes it easy to incorporate files in " \
13
+ "Darwin Core Archive format into a ruby project."
14
+ gem.summary = "Handler of Darwin Core Archive files"
13
15
  gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
14
16
  gem.license = "MIT"
15
17
 
16
- gem.files = `git ls-files`.split($/)
18
+ gem.required_ruby_version = ">= 2.6.0"
19
+ gem.files = `git ls-files`.split("\n").map(&:strip)
17
20
  gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
21
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
22
  gem.require_paths = ["lib"]
20
23
 
21
- gem.add_runtime_dependency 'nokogiri', '~> 1.6'
22
- gem.add_runtime_dependency 'parsley-store', '~> 0.3'
23
- gem.add_runtime_dependency 'archive-tar-minitar', '~> 0.5'
24
-
25
- gem.add_development_dependency 'rake', '~> 10.1'
26
- gem.add_development_dependency 'bundler', '~> 1.3'
27
- gem.add_development_dependency 'rspec', '~> 2.14'
28
- gem.add_development_dependency 'cucumber', '~> 1.3'
29
- gem.add_development_dependency 'coveralls', '~> 0.7'
30
- gem.add_development_dependency 'debugger', '~> 1.6'
31
- gem.add_development_dependency 'git', '~> 1.2'
32
- end
24
+ # gem.add_runtime_dependency "minitar", "~> 0.6"
25
+ gem.add_runtime_dependency "biodiversity", "~> 5.1.2"
26
+ gem.add_runtime_dependency "nokogiri", "~> 1.11"
33
27
 
28
+ gem.add_development_dependency "bundler", "~> 2.2"
29
+ gem.add_development_dependency "byebug", "~> 11.1"
30
+ gem.add_development_dependency "cucumber", "~> 5"
31
+ gem.add_development_dependency "git", "~> 1.8"
32
+ gem.add_development_dependency "rake", "~> 13"
33
+ gem.add_development_dependency "rspec", "~> 3.10"
34
+ gem.add_development_dependency "rubocop", "~> 1.8"
35
+ gem.add_development_dependency "solargraph", "~> 0.40"
36
+ gem.add_development_dependency "travis", "~> 1.10"
37
+ end