dwc-archive 0.9.11 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rspec +2 -1
  4. data/.rubocop.yml +23 -0
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +4 -7
  7. data/CHANGELOG +4 -0
  8. data/Gemfile +3 -1
  9. data/LICENSE +1 -1
  10. data/README.md +114 -109
  11. data/Rakefile +13 -36
  12. data/dwc-archive.gemspec +23 -19
  13. data/features/step_definitions/dwc-creator_steps.rb +5 -5
  14. data/features/step_definitions/dwc-reader_steps.rb +47 -28
  15. data/features/support/env.rb +1 -1
  16. data/lib/dwc_archive.rb +124 -0
  17. data/lib/dwc_archive/archive.rb +60 -0
  18. data/lib/dwc_archive/classification_normalizer.rb +382 -0
  19. data/lib/dwc_archive/core.rb +25 -0
  20. data/lib/{dwc-archive → dwc_archive}/errors.rb +10 -0
  21. data/lib/dwc_archive/expander.rb +88 -0
  22. data/lib/{dwc-archive → dwc_archive}/extension.rb +5 -3
  23. data/lib/dwc_archive/generator.rb +91 -0
  24. data/lib/{dwc-archive → dwc_archive}/generator_eml_xml.rb +40 -33
  25. data/lib/{dwc-archive → dwc_archive}/generator_meta_xml.rb +21 -20
  26. data/lib/dwc_archive/gnub_taxon.rb +14 -0
  27. data/lib/dwc_archive/ingester.rb +106 -0
  28. data/lib/dwc_archive/metadata.rb +57 -0
  29. data/lib/dwc_archive/taxon_normalized.rb +23 -0
  30. data/lib/dwc_archive/version.rb +6 -0
  31. data/lib/dwc_archive/xml_reader.rb +90 -0
  32. data/spec/files/file with characters(3).gz b/data/spec/files/file with → characters(3).tar.gz +0 -0
  33. data/spec/files/generator_eml.xml +1 -1
  34. data/spec/lib/classification_normalizer_spec.rb +96 -105
  35. data/spec/lib/core_spec.rb +43 -41
  36. data/spec/lib/darwin_core_spec.rb +108 -138
  37. data/spec/lib/generator_eml_xml_spec.rb +12 -11
  38. data/spec/lib/generator_meta_xml_spec.rb +12 -11
  39. data/spec/lib/generator_spec.rb +73 -74
  40. data/spec/lib/gnub_taxon_spec.rb +15 -17
  41. data/spec/lib/metadata_spec.rb +50 -41
  42. data/spec/lib/taxon_normalized_spec.rb +62 -65
  43. data/spec/lib/xml_reader_spec.rb +9 -12
  44. data/spec/spec_helper.rb +54 -51
  45. metadata +101 -87
  46. data/.rvmrc +0 -1
  47. data/lib/dwc-archive.rb +0 -107
  48. data/lib/dwc-archive/archive.rb +0 -40
  49. data/lib/dwc-archive/classification_normalizer.rb +0 -427
  50. data/lib/dwc-archive/core.rb +0 -19
  51. data/lib/dwc-archive/expander.rb +0 -85
  52. data/lib/dwc-archive/generator.rb +0 -86
  53. data/lib/dwc-archive/ingester.rb +0 -101
  54. data/lib/dwc-archive/metadata.rb +0 -48
  55. data/lib/dwc-archive/version.rb +0 -3
  56. data/lib/dwc-archive/xml_reader.rb +0 -80
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 5c6c5b2a4de324abded5b3adb81d6bcd9603965f
4
- data.tar.gz: ac00e15f95766838ff42a9bdc3f1a682c4a8e9ff
2
+ SHA256:
3
+ metadata.gz: fb7ca119f1b5dd9f1657772d0fb8e80df64909f0739113a00366441045865e46
4
+ data.tar.gz: aa8fab9a533682f6ea4907b52dcae3b63d320331860c8936819815f84a26f9e1
5
5
  SHA512:
6
- metadata.gz: d84f2974ed7bcbabc62d2d3b5c5b15dd8d6d2aa38836a955c12896ada2391e978ca2d420d86bf59db2ab61cc801c9c43036be13699ba7b3902d8daa4f366d45b
7
- data.tar.gz: a07a91365cba60fa418b85d7b516781c9209ea607dbe5ec45da86febd14bfa104cc16e4b55619a157f5bbb487dff2fd7e318399d815a63372197a9732b3820fb
6
+ metadata.gz: b122be118cd7804cc299ea465ced05aba037df1e9265b8940fd7db369f73a72483fa9e11b1c6571e85c7ff68282c9512b25b73ae2095da0d96d00b3c18739da4
7
+ data.tar.gz: 4f787964ca02539f02ab97d7e0d3106fd6ae67331718a92a4313337c219bdf054921702cfed2d116147ff7b5d79405e63b483f59639371f3ca7e83b68b51b52f
data/.gitignore CHANGED
@@ -26,5 +26,6 @@ bin
26
26
  .bundle
27
27
  bundle_bin
28
28
  Gemfile.lock
29
+ .byebug_history
29
30
 
30
31
 
data/.rspec CHANGED
@@ -1,2 +1,3 @@
1
- --format nested
1
+ --format p
2
2
  --color
3
+ --require spec_helper
@@ -0,0 +1,23 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.6
3
+ Exclude:
4
+ - features/**/*
5
+ - .bundle/**/*
6
+ - bundle_bin/**/*
7
+ - lib/dwc_archive/ingester.rb
8
+ - lib/dwc_archive/generator_meta_xml.rb
9
+ - lib/dwc_archive/generator_eml_xml.rb
10
+ - lib/dwc_archive/classification_normalizer.rb
11
+
12
+ Style/StringLiterals:
13
+ EnforcedStyle: double_quotes
14
+
15
+ Layout/DotPosition:
16
+ EnforcedStyle: trailing
17
+
18
+ Metrics/BlockLength:
19
+ Exclude:
20
+ - 'Rakefile'
21
+ - '*.gemspec'
22
+ - '**/*.rake'
23
+ - 'spec/**/*spec.rb'
@@ -1 +1 @@
1
- 2.0.0-p353
1
+ 2.7.1
@@ -1,16 +1,13 @@
1
1
  rvm:
2
- - 1.9.3-p484
3
- - 2.0.0-p353
2
+ - 2.5
3
+ - 2.6
4
+ - 2.7
4
5
  before_install:
5
6
  - sudo apt-get update
6
- - gem install debugger
7
- # bundler_args: --without development
8
- services:
9
- - redis-server
7
+ - gem install bundler
10
8
  script:
11
9
  - bundle exec cucumber
12
10
  - bundle exec rake
13
11
  branches:
14
12
  only:
15
13
  - master
16
-
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ 1.1.0 Update name parser to go-based biodiversity
2
+
3
+ 1.0.1 Cleanup and gems update
4
+
1
5
  0.9.11 Removed VERSION duplicate
2
6
 
3
7
  0.9.7 Refactoring and tests improvements
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
2
4
 
3
5
  gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2012 Marine Biological Laboratory
1
+ Copyright (c) 2010-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -19,136 +19,140 @@ Installation
19
19
 
20
20
  ### System Requirements
21
21
 
22
- You need [Redis Server][12] and unzip library installed
22
+ You need [Redis Server][12] and unzip library installed
23
23
 
24
24
 
25
25
  Usage
26
26
  -----
27
27
 
28
- require 'rubygems'
29
- require 'dwc-archive'
28
+ ```ruby
29
+ require 'rubygems'
30
+ require 'dwc_archive'
30
31
 
31
- dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
32
- dwc.archive.files # the archive file list
33
- dwc.metadata.data # summary of metadata from eml.xml if it exists
34
- dwc.metadata.authors # authors of the archive
35
- dwc.core.data # summary of DarwinCore main file
36
- dwc.core.file_path # path to the DarwinCore main file
37
- dwc.extensions # array of DarwinCore Star extensions
38
- dwc.extensions[0].data # summary for an extension
32
+ dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
33
+ dwc.archive.files # the archive file list
34
+ dwc.metadata.data # summary of metadata from eml.xml if it exists
35
+ dwc.metadata.authors # authors of the archive
36
+ dwc.core.data # summary of DarwinCore main file
37
+ dwc.core.file_path # path to the DarwinCore main file
38
+ dwc.extensions # array of DarwinCore Star extensions
39
+ dwc.extensions[0].data # summary for an extension
39
40
 
40
- # read content of the core data file into memory or used with a block
41
- # it returns array of arrays of data
42
- # rows that had a wrong encoding will be collected into errors array
43
- data, errors = dwc.core.read
41
+ # read content of the core data file into memory or used with a block
42
+ # it returns array of arrays of data
43
+ # rows that had a wrong encoding will be collected into errors array
44
+ data, errors = dwc.core.read
44
45
 
45
- # read content using a block, getting back results in sets of 100 rows each
46
- results = []
47
- tail_data, tail_errors = dwc.core.read(100) do |data, errors|
48
- results << [data, errors]
49
- end
50
- results << [tail_data, tail_errors]
46
+ # read content using a block, getting back results in sets of 100 rows each
47
+ results = []
48
+ tail_data, tail_errors = dwc.core.read(100) do |data, errors|
49
+ results << [data, errors]
50
+ end
51
+ results << [tail_data, tail_errors]
51
52
 
52
- # read content of an extension data file into memory
53
- data, errors = dwc.core.extensions[0].read
53
+ # read content of an extension data file into memory
54
+ data, errors = dwc.core.extensions[0].read
54
55
 
55
- # read content of an extension data using block
56
- results = []
57
- tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
58
- results << [data, errors]
59
- end
60
- results << [tail_data, tail_errors]
56
+ # read content of an extension data using block
57
+ results = []
58
+ tail_data, tail_errors = dwc.core.extensions[0](100) do |data, errors|
59
+ results << [data, errors]
60
+ end
61
+ results << [tail_data, tail_errors]
61
62
 
62
- # normalize names in classification collecting together synonyms,
63
- # canonical names, vernacular names and associating paths to taxons
64
- # in a classification distributed as DwCA file
63
+ # normalize names in classification collecting together synonyms,
64
+ # canonical names, vernacular names and associating paths to taxons
65
+ # in a classification distributed as DwCA file
65
66
 
66
- result = dwc.normalize_classification
67
+ result = dwc.normalize_classification
67
68
 
68
- # for a finer control over normalization:
69
+ # for a finer control over normalization:
69
70
 
70
- cn = DarwinCore::ClassificationNormalizer.new(dwc)
71
- cn.normalize
72
- # if you don't want to generate path consisting of canonical forms
73
- # of ancestors to a taxon
74
- cn.normalize(:with_canonical_names => false)
71
+ cn = DarwinCore::ClassificationNormalizer.new(dwc)
72
+ cn.normalize
73
+ # if you don't want to generate path consisting of canonical forms
74
+ # of ancestors to a taxon
75
+ cn.normalize(:with_canonical_names => false)
75
76
 
76
- # if you don't want to ingest information from extensions
77
- cn.normalize(:with_extensions => false)
77
+ # if you don't want to ingest information from extensions
78
+ cn.normalize(:with_extensions => false)
78
79
 
79
- # to get a flat hash of nodes with attached vernacular names and synonyms
80
- normalized_data = cn.normalized_data
80
+ # to get a flat hash of nodes with attached vernacular names and synonyms
81
+ normalized_data = cn.normalized_data
81
82
 
82
- # to get a representation of tree organization as a hash
83
- classification_tree = cn.tree
83
+ # to get a representation of tree organization as a hash
84
+ classification_tree = cn.tree
84
85
 
85
- # to get list of all name strings used as scientific or vernacular names
86
- all_name_strings = cn.name_strings
86
+ # to get list of all name strings used as scientific or vernacular names
87
+ all_name_strings = cn.name_strings
87
88
 
88
- # to get list of errors generated during the normalization
89
- errors = cn.error_names
89
+ # to get list of errors generated during the normalization
90
+ errors = cn.error_names
90
91
 
91
- DarwinCore.clean_all # remove all expanded archives
92
+ DarwinCore.clean_all # remove all expanded archives
93
+ ```
92
94
 
93
95
  Creating a DarwinCore Archive file
94
96
  ----------------------------------
95
97
 
96
- gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
97
-
98
- core = [
99
- ["http://rs.tdwg.org/dwc/terms/taxonID",
100
- "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
101
- "http://rs.tdwg.org/dwc/terms/scientificName",
102
- "http://rs.tdwg.org/dwc/terms/taxonRank"],
103
- [1, 0, "Plantae", "kingdom"],
104
- [2, 1, "Betula", "genus"],
105
- [3, 2, "Betula verucosa", "species"]
106
- ]
107
-
108
- vernacular_names = [
109
- ["http://rs.tdwg.org/dwc/terms/TaxonID",
110
- "http://rs.tdwg.org/dwc/terms/vernacularName"],
111
- [1, "Plants"],
112
- [1, "Растения"],
113
- [2, "Birch"],
114
- [2, "Береза"],
115
- [3, "Wheeping Birch"],
116
- [3, "Береза плакучая"]
117
- ]
118
-
119
- eml = {
120
- :id => '1234',
121
- :license => 'http://creativecommons.org/licenses/by-sa/3.0/',
122
- :title => 'Test Classification',
123
- :authors => [
124
- { :first_name => 'John',
125
- :last_name => 'Doe',
126
- :email => 'jdoe@example.com',
127
- :organization => 'Example',
128
- :position => 'Assistant Professor',
129
- :url => 'http://example.org' },
130
- { :first_name => 'Jane',
131
- :last_name => 'Doe',
132
- :email => 'jane@example.com' }
133
- ],
134
- :metadata_providers => [
135
- { :first_name => 'Jim',
136
- :last_name => 'Doe',
137
- :email => 'jimdoe@example.com',
138
- :url => 'http://aggregator.example.org' }],
139
- :abstract => 'test classification',
140
- :citation =>
141
- 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
142
- :url => 'http://example.com'
143
- }
144
-
145
- gen.add_core(core, 'core.txt')
146
- gen.add_extension(vernacular_names,
147
- 'vernacular_names.txt',
148
- true, 'http://rs.gbif.org/terms/1.0/VernacularName')
149
- gen.add_meta_xml
150
- gen.add_eml_xml(eml)
151
- gen.pack
98
+ ```ruby
99
+ gen = DarwinCore::Generator.new('/tmp/dwc_birches.tar.gz')
100
+
101
+ core = [
102
+ ["http://rs.tdwg.org/dwc/terms/taxonID",
103
+ "http://rs.tdwg.org/dwc/terms/parentNameUsageID",
104
+ "http://rs.tdwg.org/dwc/terms/scientificName",
105
+ "http://rs.tdwg.org/dwc/terms/taxonRank"],
106
+ [1, 0, "Plantae", "kingdom"],
107
+ [2, 1, "Betula", "genus"],
108
+ [3, 2, "Betula verucosa", "species"]
109
+ ]
110
+
111
+ vernacular_names = [
112
+ ["http://rs.tdwg.org/dwc/terms/TaxonID",
113
+ "http://rs.tdwg.org/dwc/terms/vernacularName"],
114
+ [1, "Plants"],
115
+ [1, "Растения"],
116
+ [2, "Birch"],
117
+ [2, "Береза"],
118
+ [3, "Wheeping Birch"],
119
+ [3, "Береза плакучая"]
120
+ ]
121
+
122
+ eml = {
123
+ :id => '1234',
124
+ :license => 'http://creativecommons.org/licenses/by-sa/3.0/',
125
+ :title => 'Test Classification',
126
+ :authors => [
127
+ { :first_name => 'John',
128
+ :last_name => 'Doe',
129
+ :email => 'jdoe@example.com',
130
+ :organization => 'Example',
131
+ :position => 'Assistant Professor',
132
+ :url => 'http://example.org' },
133
+ { :first_name => 'Jane',
134
+ :last_name => 'Doe',
135
+ :email => 'jane@example.com' }
136
+ ],
137
+ :metadata_providers => [
138
+ { :first_name => 'Jim',
139
+ :last_name => 'Doe',
140
+ :email => 'jimdoe@example.com',
141
+ :url => 'http://aggregator.example.org' }],
142
+ :abstract => 'test classification',
143
+ :citation =>
144
+ 'Test classification: Doe John, Doe Jane, Taxnonmy, 10, 1, 2010',
145
+ :url => 'http://example.com'
146
+ }
147
+
148
+ gen.add_core(core, 'core.txt')
149
+ gen.add_extension(vernacular_names,
150
+ 'vernacular_names.txt',
151
+ true, 'http://rs.gbif.org/terms/1.0/VernacularName')
152
+ gen.add_meta_xml
153
+ gen.add_eml_xml(eml)
154
+ gen.pack
155
+ ```
152
156
 
153
157
  Logging
154
158
  -------
@@ -175,11 +179,11 @@ Note on Patches/Pull Requests
175
179
  Copyright
176
180
  ---------
177
181
 
178
- Author -- [Dmitry Mozzherin][13]
182
+ Author -- [@dimus][13]
179
183
 
180
- Contributors -- [Matt Yoder][14]
184
+ Contributors -- [@mjy][14], [@LocoDelAssembly][16]
181
185
 
182
- Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
186
+ Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
183
187
 
184
188
  [1]: https://badge.fury.io/rb/dwc-archive.png
185
189
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -196,3 +200,4 @@ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for deta
196
200
  [13]: https://github.com/dimus
197
201
  [14]: https://github.com/mjy
198
202
  [15]: http://mbl.edu
203
+ [16]: https://github.com/LocoDelAssembly
data/Rakefile CHANGED
@@ -1,44 +1,21 @@
1
- require "bundler/gem_tasks"
2
-
3
- # Bundler::GemHelper.install_tasks
4
- # require 'bundler/gem_tasks'
5
- # require 'rake/testtasks'
6
- # require 'rubygems'
7
- # require 'rake'
8
-
9
- require 'rspec/core/rake_task'
10
- RSpec::Core::RakeTask.new(:spec) do |spec|
11
- spec.pattern = 'spec/**/*_spec.rb'
12
- end
1
+ # frozen_string_literal: true
13
2
 
14
- RSpec::Core::RakeTask.new(:rcov) do |spec|
15
- spec.pattern = 'spec/**/*_spec.rb'
16
- spec.rcov = true
17
- end
18
-
19
- # task :spec => :check_dependencies
20
-
21
- begin
22
- require 'cucumber/rake/task'
23
- Cucumber::Rake::Task.new(:features)
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+ require "cucumber/rake/task"
24
6
 
25
- task :features => :check_dependencies
26
- rescue LoadError
27
- task :features do
28
- abort 'Cucumber is not available. In order to run features, ' +
29
- 'you must: sudo gem install cucumber'
30
- end
7
+ RSpec::Core::RakeTask.new(:rspec) do |rspec|
8
+ rspec.pattern = "spec/**/*_spec.rb"
31
9
  end
32
10
 
33
- desc 'Run an IRB session with CSL loaded'
34
- task :irb, [:script] do |t, args|
35
- ARGV.clear
11
+ Cucumber::Rake::Task.new(:features)
36
12
 
37
- require 'irb'
38
- require_relative 'lib/dwc-archive'
13
+ # task rspec: :check_dependencies
14
+ task features: :check_dependencies
39
15
 
40
- IRB.conf[:SCRIPT] = args.script
41
- IRB.start
16
+ desc "open an irb session preloaded with this library"
17
+ task :console do
18
+ sh "irb -I lib -I extra -r dwc_archive.rb"
42
19
  end
43
20
 
44
- task :default => :spec
21
+ task default: :rspec
@@ -1,33 +1,37 @@
1
- require File.expand_path('../lib/dwc-archive/version', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ require File.expand_path("lib/dwc_archive/version", __dir__)
2
4
 
3
5
  Gem::Specification.new do |gem|
4
6
  gem.name = "dwc-archive"
5
7
  gem.version = DarwinCore::VERSION
6
8
  gem.authors = ["Dmitry Mozzherin"]
7
9
  gem.email = ["dmozzherin at gmail dot com"]
8
- gem.description = %q{Darwin Core Archive is the current standard exchange
9
- format for GLobal Names Architecture modules.
10
- This gem makes it easy to incorporate files in
11
- Darwin Core Archive format into a ruby project.}
12
- gem.summary = %q{Handler of Darwin Core Archive files}
10
+ gem.description = "Darwin Core Archive is the current standard exchange " \
11
+ "format for GLobal Names Architecture modules. " \
12
+ "This gem makes it easy to incorporate files in " \
13
+ "Darwin Core Archive format into a ruby project."
14
+ gem.summary = "Handler of Darwin Core Archive files"
13
15
  gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
14
16
  gem.license = "MIT"
15
17
 
16
- gem.files = `git ls-files`.split($/)
18
+ gem.required_ruby_version = ">= 2.6.0"
19
+ gem.files = `git ls-files`.split("\n").map(&:strip)
17
20
  gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
21
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
22
  gem.require_paths = ["lib"]
20
23
 
21
- gem.add_runtime_dependency 'nokogiri', '~> 1.6'
22
- gem.add_runtime_dependency 'parsley-store', '~> 0.3'
23
- gem.add_runtime_dependency 'archive-tar-minitar', '~> 0.5'
24
-
25
- gem.add_development_dependency 'rake', '~> 10.1'
26
- gem.add_development_dependency 'bundler', '~> 1.3'
27
- gem.add_development_dependency 'rspec', '~> 2.14'
28
- gem.add_development_dependency 'cucumber', '~> 1.3'
29
- gem.add_development_dependency 'coveralls', '~> 0.7'
30
- gem.add_development_dependency 'debugger', '~> 1.6'
31
- gem.add_development_dependency 'git', '~> 1.2'
32
- end
24
+ # gem.add_runtime_dependency "minitar", "~> 0.6"
25
+ gem.add_runtime_dependency "biodiversity", "~> 5.1.2"
26
+ gem.add_runtime_dependency "nokogiri", "~> 1.11"
33
27
 
28
+ gem.add_development_dependency "bundler", "~> 2.2"
29
+ gem.add_development_dependency "byebug", "~> 11.1"
30
+ gem.add_development_dependency "cucumber", "~> 5"
31
+ gem.add_development_dependency "git", "~> 1.8"
32
+ gem.add_development_dependency "rake", "~> 13"
33
+ gem.add_development_dependency "rspec", "~> 3.10"
34
+ gem.add_development_dependency "rubocop", "~> 1.8"
35
+ gem.add_development_dependency "solargraph", "~> 0.40"
36
+ gem.add_development_dependency "travis", "~> 1.10"
37
+ end