dwc-archive 1.0.1 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e61e692fe24c4b6bd56e38a7b19a357ba0acdd3180999e606bd6fc8e55dae29
4
- data.tar.gz: 1bf6ee067e24d6cb75d415eaa1f10bcf37a96d7daee7a1935248a6a58a0cb5ed
3
+ metadata.gz: 6e4de59525e7e4a3f212dac828b97ff489f5ec40f764758a5b81b7fef09cc166
4
+ data.tar.gz: 2f408d2a04bac402c395928a6d048d2d28daed3235119c149684026b233e2126
5
5
  SHA512:
6
- metadata.gz: a162b05026795e2e4ea8e92683682fab820fb0cfcea68a8c166eb9dc012246ba01a16af4e17af39a2ae28bf2b6a3b85f877ac53b71e27915f8529cb4d973d67a
7
- data.tar.gz: 10a59bee9c60103c91fd5403c4c765903521e640dab3c6ef7ff78eaceb04938aa0bd554e9941bdd90260a2f0a2e4927be3d0ada830b2a4310be405f2ead1184f
6
+ metadata.gz: 145cd2d7d2f9e1c87c76ddca220fd56f1e44cca70ceb6995edfeba73d985e166709cf4254b6b2e19f143ffd02d879700587d61c6b9da239bdaff4288fde17cf9
7
+ data.tar.gz: e92697139838d7d720bfd9aca87ddf4537a8e272b6fabb04a05f92bfde047c9a45867e46e9b27b39d738c822ba6a843a73b3670b45da16ad9c2aca5df0523913
@@ -0,0 +1,35 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: build
9
+
10
+ on:
11
+ push:
12
+ branches: [ master ]
13
+ pull_request:
14
+ branches: [ master ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.6', '2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Run tests
35
+ run: bundle exec rake
data/.gitignore CHANGED
@@ -26,5 +26,6 @@ bin
26
26
  .bundle
27
27
  bundle_bin
28
28
  Gemfile.lock
29
+ .byebug_history
29
30
 
30
31
 
data/.rubocop.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ TargetRubyVersion: 2.6
3
3
  Exclude:
4
4
  - features/**/*
5
5
  - .bundle/**/*
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.5.1
1
+ 3.0.0
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ 1.1.0 Update name parser to go-based biodiversity
2
+
1
3
  1.0.1 Cleanup and gems update
2
4
 
3
5
  0.9.11 Removed VERSION duplicate
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2012 Marine Biological Laboratory
1
+ Copyright (c) 2010-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -27,7 +27,7 @@ Usage
27
27
 
28
28
  ```ruby
29
29
  require 'rubygems'
30
- require 'dwc-archive'
30
+ require 'dwc_archive'
31
31
 
32
32
  dwc = DarwinCore.new('/path_to_file/archive_file.tar.gz')
33
33
  dwc.archive.files # the archive file list
@@ -179,11 +179,11 @@ Note on Patches/Pull Requests
179
179
  Copyright
180
180
  ---------
181
181
 
182
- Author -- [Dmitry Mozzherin][13]
182
+ Author -- [@dimus][13]
183
183
 
184
- Contributors -- [Matt Yoder][14]
184
+ Contributors -- [@mjy][14], [@LocoDelAssembly][16]
185
185
 
186
- Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
186
+ Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
187
187
 
188
188
  [1]: https://badge.fury.io/rb/dwc-archive.png
189
189
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -200,3 +200,4 @@ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for deta
200
200
  [13]: https://github.com/dimus
201
201
  [14]: https://github.com/mjy
202
202
  [15]: http://mbl.edu
203
+ [16]: https://github.com/LocoDelAssembly
data/dwc-archive.gemspec CHANGED
@@ -15,24 +15,23 @@ Gem::Specification.new do |gem|
15
15
  gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
16
16
  gem.license = "MIT"
17
17
 
18
- gem.required_ruby_version = ">= 2.4.1"
18
+ gem.required_ruby_version = ">= 2.6.0"
19
19
  gem.files = `git ls-files`.split("\n").map(&:strip)
20
20
  gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
21
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
22
22
  gem.require_paths = ["lib"]
23
23
 
24
24
  # gem.add_runtime_dependency "minitar", "~> 0.6"
25
- gem.add_runtime_dependency "nokogiri", "~> 1.8"
26
- gem.add_runtime_dependency "parsley-store", "~> 0.3"
25
+ gem.add_runtime_dependency "biodiversity", "~> 5.2.0"
26
+ gem.add_runtime_dependency "nokogiri", "~> 1.11"
27
27
 
28
- # gem.add_development_dependency "byebug", "~> 3.4"
29
- gem.add_development_dependency "bundler", "~> 1.16"
30
- gem.add_development_dependency "coveralls", "~> 0.8"
31
- gem.add_development_dependency "cucumber", "~> 3.1"
32
- gem.add_development_dependency "git", "~> 1.4"
33
- gem.add_development_dependency "rake", "~> 12.3"
34
- gem.add_development_dependency "rspec", "~> 3.7"
35
- gem.add_development_dependency "rubocop", "~> 0.58"
36
- gem.add_development_dependency "solargraph", "~> 0.23"
37
- gem.add_development_dependency "travis", "~> 1.8"
28
+ gem.add_development_dependency "bundler", "~> 2.2"
29
+ gem.add_development_dependency "byebug", "~> 11.1"
30
+ gem.add_development_dependency "cucumber", "~> 5"
31
+ gem.add_development_dependency "git", "~> 1.8"
32
+ gem.add_development_dependency "rake", "~> 13"
33
+ gem.add_development_dependency "rspec", "~> 3.10"
34
+ gem.add_development_dependency "rubocop", "~> 1.8"
35
+ gem.add_development_dependency "solargraph", "~> 0.40"
36
+ gem.add_development_dependency "travis", "~> 1.10"
38
37
  end
data/lib/dwc_archive.rb CHANGED
@@ -6,7 +6,7 @@ require "digest"
6
6
  require "csv"
7
7
  require "logger"
8
8
  require "nokogiri"
9
- require "parsley-store"
9
+ require "biodiversity"
10
10
  require_relative "dwc_archive/xml_reader"
11
11
  require_relative "dwc_archive/ingester"
12
12
  require_relative "dwc_archive/errors"
@@ -39,11 +39,12 @@ class DarwinCore
39
39
 
40
40
  def files(path)
41
41
  return nil unless path && FileTest.exists?(path)
42
- Dir.entries(path).reject { |e| e.match(/[\.]{1,2}$/) }.sort
42
+
43
+ Dir.entries(path).reject { |e| e.match(/[.]{1,2}$/) }.sort
43
44
  end
44
45
 
45
46
  def random_path(tmp_dir)
46
- File.join(tmp_dir, "dwc_" + rand(10_000_000_000).to_s)
47
+ File.join(tmp_dir, "dwc_#{rand(10_000_000_000)}")
47
48
  end
48
49
  end
49
50
 
@@ -52,15 +53,14 @@ class DarwinCore
52
53
 
53
54
  def self.nil_field?(field)
54
55
  return true if [nil, "", "/N"].include?(field)
56
+
55
57
  false
56
58
  end
57
59
 
58
60
  def self.clean_all(tmp_dir = DEFAULT_TMP_DIR)
59
61
  Dir.entries(tmp_dir).each do |entry|
60
62
  path = File.join(tmp_dir, entry)
61
- if FileTest.directory?(path) && entry.match(/^dwc_[\d]+$/)
62
- FileUtils.rm_rf(path)
63
- end
63
+ FileUtils.rm_rf(path) if FileTest.directory?(path) && entry.match(/^dwc_\d+$/)
64
64
  end
65
65
  end
66
66
 
@@ -96,6 +96,7 @@ class DarwinCore
96
96
  # list of synonyms and vernacular names.
97
97
  def normalize_classification
98
98
  return nil unless parent_id?
99
+
99
100
  @classification_normalizer ||=
100
101
  DarwinCore::ClassificationNormalizer.new(self)
101
102
  @classification_normalizer.normalize
@@ -112,9 +113,11 @@ class DarwinCore
112
113
 
113
114
  def extensions
114
115
  return @extensions if @extensions
116
+
115
117
  root_key = @archive.meta.keys[0]
116
118
  ext = @archive.meta[root_key][:extension]
117
119
  return @extensions = [] unless ext
120
+
118
121
  ext = [ext] if ext.class != Array
119
122
  @extensions = ext.map { |e| DarwinCore::Extension.new(self, e) }
120
123
  end
@@ -52,6 +52,7 @@ class DarwinCore
52
52
  def prepare_eml_file
53
53
  @eml = nil
54
54
  return unless files.include?("eml.xml")
55
+
55
56
  eml_file = File.open(File.join(@expander.path, "eml.xml"))
56
57
  @eml = DarwinCore::XmlReader.from_xml(eml_file)
57
58
  end
@@ -1,10 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  class DarwinCore
3
4
  # Returns tree representation of Darwin Core file with vernacular and
4
5
  # and synonyms attached to the taxon nodes
5
6
  class ClassificationNormalizer
6
7
  attr_reader :error_names, :tree, :normalized_data, :dwc
7
- alias_method :darwin_core, :dwc
8
+ alias darwin_core dwc
8
9
 
9
10
  def initialize(dwc_instance)
10
11
  @dwc = dwc_instance
@@ -12,7 +13,6 @@ class DarwinCore
12
13
  @extensions = @dwc.extensions.map { |e| [e, find_fields(e)] }
13
14
  @normalized_data = {}
14
15
  @synonyms = {}
15
- @parser = ParsleyStore.new(1, 2)
16
16
  @name_strings = {}
17
17
  @vernacular_name_strings = {}
18
18
  @error_names = []
@@ -25,6 +25,7 @@ class DarwinCore
25
25
 
26
26
  def add_vernacular_name_string(name_string)
27
27
  return if @vernacular_name_strings[name_string]
28
+
28
29
  @vernacular_name_strings[name_string] = 1
29
30
  end
30
31
 
@@ -70,7 +71,9 @@ class DarwinCore
70
71
 
71
72
  def get_canonical_name(a_scientific_name)
72
73
  return nil unless @with_canonical_names
73
- canonical_name = @parser.parse(a_scientific_name, canonical_only: true)
74
+
75
+ canonical_name = Biodiversity::Parser.parse(a_scientific_name).
76
+ dig(:canonical, :simple)
74
77
  canonical_name.to_s.empty? ? a_scientific_name : canonical_name
75
78
  end
76
79
 
@@ -85,15 +88,13 @@ class DarwinCore
85
88
  end
86
89
 
87
90
  def status_synonym?(status)
88
- status && status.match(/^syn/)
91
+ status&.match(/^syn/)
89
92
  end
90
93
 
91
94
  def add_synonym_from_core(taxon_id, row)
92
95
  cf = @core_fields
93
96
  @synonyms[row[cf[:id]]] = taxon_id
94
- unless @normalized_data[row[taxon_id]]
95
- @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
96
- end
97
+ @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new unless @normalized_data[row[taxon_id]]
97
98
 
98
99
  taxon = @normalized_data[row[taxon_id]]
99
100
  synonym = SynonymNormalized.new(
@@ -105,7 +106,7 @@ class DarwinCore
105
106
  cf[:localid] ? row[cf[:localid]] : nil,
106
107
  cf[:globalid] ? row[cf[:globalid]] : nil
107
108
  )
108
- taxon.synonyms << synonym
109
+ taxon.synonyms << synonym
109
110
  add_name_string(synonym.name)
110
111
  add_name_string(synonym.canonical_name)
111
112
  end
@@ -115,14 +116,10 @@ class DarwinCore
115
116
  canonical_name = nil
116
117
  scientific_name = row[fields[:scientificname]].strip
117
118
  if separate_canonical_and_authorship?(row, fields)
118
- if @with_canonical_names
119
- canonical_name = row[fields[:scientificname]].strip
120
- end
119
+ canonical_name = row[fields[:scientificname]].strip if @with_canonical_names
121
120
  scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
122
121
  else
123
- if @with_canonical_names
124
- canonical_name = get_canonical_name(row[fields[:scientificname]])
125
- end
122
+ canonical_name = get_canonical_name(row[fields[:scientificname]]) if @with_canonical_names
126
123
  end
127
124
  fields[:canonicalname] = row.size
128
125
  row << canonical_name
@@ -131,18 +128,17 @@ class DarwinCore
131
128
 
132
129
  def separate_canonical_and_authorship?(row, fields)
133
130
  authorship = ""
134
- if fields[:scientificnameauthorship]
135
- authorship = row[fields[:scientificnameauthorship]].to_s.strip
136
- end
131
+ authorship = row[fields[:scientificnameauthorship]].to_s.strip if fields[:scientificnameauthorship]
137
132
  !(authorship.empty? || row[fields[:scientificname]].index(authorship))
138
133
  end
139
134
 
140
135
  def ingest_core
141
136
  @normalized_data = {}
142
137
  has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
143
- fail(DarwinCore::CoreFileError,
144
- "Darwin Core core fields must contain taxon id and scientific name"
145
- ) unless has_name_and_id
138
+ unless has_name_and_id
139
+ raise(DarwinCore::CoreFileError,
140
+ "Darwin Core core fields must contain taxon id and scientific name")
141
+ end
146
142
  @dwc.core.read do |rows|
147
143
  rows[1].each do |error|
148
144
  @error_names << { data: error,
@@ -161,32 +157,28 @@ class DarwinCore
161
157
  add_synonym_from_core(parent_id, r) if parent_id?
162
158
  else
163
159
  unless @normalized_data[r[@core_fields[:id]]]
164
- if gnub_archive?
165
- new_taxon = DarwinCore::GnubTaxon.new
166
- else
167
- new_taxon = DarwinCore::TaxonNormalized.new
168
- end
160
+ new_taxon = if gnub_archive?
161
+ DarwinCore::GnubTaxon.new
162
+ else
163
+ DarwinCore::TaxonNormalized.new
164
+ end
169
165
  @normalized_data[r[@core_fields[:id]]] = new_taxon
170
166
  end
171
167
  taxon = @normalized_data[r[@core_fields[:id]]]
172
168
  if gnub_archive?
173
169
  taxon.uuid = r[@core_fields[:originalnameusageid]]
174
170
  taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
175
- split("|")
171
+ split("|")
176
172
  end
177
173
  taxon.id = r[@core_fields[:id]]
178
174
  taxon.current_name = r[@core_fields[:scientificname]]
179
175
  taxon.current_name_canonical = r[@core_fields[:canonicalname]]
180
176
  taxon.parent_id = parent_id? ? r[parent_id] : nil
181
177
  taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
182
- if @core_fields[:taxonomicstatus]
183
- taxon.status = r[@core_fields[:taxonomicstatus]]
184
- end
178
+ taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
185
179
  taxon.source = r[@core_fields[:source]] if @core_fields[:source]
186
180
  taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
187
- if @core_fields[:globalid]
188
- taxon.global_id = r[@core_fields[:globalid]]
189
- end
181
+ taxon.global_id = r[@core_fields[:globalid]] if @core_fields[:globalid]
190
182
  taxon.linnean_classification_path =
191
183
  get_linnean_classification_path(r, taxon)
192
184
  add_name_string(taxon.current_name)
@@ -211,6 +203,7 @@ class DarwinCore
211
203
  @paths_num = 0
212
204
  @normalized_data.each do |_taxon_id, taxon|
213
205
  next unless taxon.classification_path_id.empty?
206
+
214
207
  res = get_classification_path(taxon)
215
208
  next if res == "error"
216
209
  end
@@ -218,6 +211,7 @@ class DarwinCore
218
211
 
219
212
  def get_classification_path(taxon)
220
213
  return unless taxon.classification_path_id.empty?
214
+
221
215
  @paths_num += 1
222
216
  if @paths_num % 10_000 == 0
223
217
  DarwinCore.logger_write(@dwc.object_id,
@@ -225,17 +219,13 @@ class DarwinCore
225
219
  end
226
220
  current_node = { taxon.id => {} }
227
221
  if DarwinCore.nil_field?(taxon.parent_id)
228
- if @with_canonical_names
229
- taxon.classification_path << taxon.current_name_canonical
230
- end
222
+ taxon.classification_path << taxon.current_name_canonical if @with_canonical_names
231
223
  taxon.classification_path_id << taxon.id
232
224
  @tree.merge!(current_node)
233
225
  else
234
226
  parent_cp = parent_cpid = nil
235
227
  if @normalized_data[taxon.parent_id]
236
- if @with_canonical_names
237
- parent_cp = @normalized_data[taxon.parent_id].classification_path
238
- end
228
+ parent_cp = @normalized_data[taxon.parent_id].classification_path if @with_canonical_names
239
229
  parent_cpid = @normalized_data[taxon.parent_id].
240
230
  classification_path_id
241
231
  else
@@ -245,9 +235,7 @@ class DarwinCore
245
235
  error: :deprecated_parent,
246
236
  current_parent: current_parent }
247
237
 
248
- if @with_canonical_names
249
- parent_cp = current_parent.classification_path
250
- end
238
+ parent_cp = current_parent.classification_path if @with_canonical_names
251
239
  parent_cpid = current_parent.classification_path_id
252
240
  else
253
241
  @error_names << { data: taxon,
@@ -256,6 +244,7 @@ class DarwinCore
256
244
  end
257
245
  end
258
246
  return "error" unless parent_cpid
247
+
259
248
  if parent_cpid.empty?
260
249
  res = "error"
261
250
  begin
@@ -266,6 +255,7 @@ class DarwinCore
266
255
  current_parent: nil }
267
256
  end
268
257
  return res if res == "error"
258
+
269
259
  if @with_canonical_names
270
260
  taxon.classification_path += @normalized_data[taxon.parent_id].
271
261
  classification_path +
@@ -293,7 +283,7 @@ class DarwinCore
293
283
  rescue NoMethodError => e
294
284
  DarwinCore.logger_write(@dwc.object_id,
295
285
  "Error '#{e.message}' taxon #{taxon.id}")
296
- return "error"
286
+ "error"
297
287
  end
298
288
  end
299
289
  end
@@ -379,8 +369,8 @@ class DarwinCore
379
369
 
380
370
  # Collect linnean classification path only on species level
381
371
  def get_linnean_classification_path(row, _taxon)
382
- [:kingdom, :phylum, :class, :order, :family, :genus,
383
- :subgenus].each_with_object([]) do |clade, res|
372
+ %i[kingdom phylum class order family genus
373
+ subgenus].each_with_object([]) do |clade, res|
384
374
  res << [row[@core_fields[clade]], clade] if @core_fields[clade]
385
375
  end
386
376
  end
@@ -2,12 +2,20 @@
2
2
 
3
3
  class DarwinCore
4
4
  class Error < RuntimeError; end
5
+
5
6
  class FileNotFoundError < Error; end
7
+
6
8
  class UnpackingError < Error; end
9
+
7
10
  class InvalidArchiveError < Error; end
11
+
8
12
  class CoreFileError < Error; end
13
+
9
14
  class ExtensionFileError < Error; end
15
+
10
16
  class GeneratorError < Error; end
17
+
11
18
  class ParentNotCurrentError < Error; end
19
+
12
20
  class EncodingError < Error; end
13
21
  end
@@ -13,6 +13,7 @@ class DarwinCore
13
13
  def unpack
14
14
  clean
15
15
  raise DarwinCore::FileNotFoundError unless File.exist?(@archive_path)
16
+
16
17
  success = @unpacker.call(@dir_path, @archive_path) if @unpacker
17
18
  if @unpacker && success && $CHILD_STATUS.exitstatus.zero?
18
19
  success
@@ -37,11 +38,9 @@ class DarwinCore
37
38
  private
38
39
 
39
40
  def init_unpacker
40
- file_command = IO.popen("file -z " + esc(@archive_path))
41
- file_type = file_command.read
42
- file_command.close
43
- return tar_unpacker if file_type =~ /tar.*gzip/i
44
- return zip_unpacker if file_type =~ /Zip/
41
+ return tar_unpacker if @archive_path =~ /tar.gz$/i
42
+ return zip_unpacker if @archive_path =~ /zip$/i
43
+
45
44
  nil
46
45
  end
47
46
 
@@ -61,11 +60,11 @@ class DarwinCore
61
60
  end
62
61
 
63
62
  def esc(a_str)
64
- "'" + a_str.gsub(92.chr, '\\\\\\').gsub("'", "\\\\'") + "'"
63
+ "'#{a_str.gsub(92.chr, '\\\\\\').gsub("'", "\\\\'")}'"
65
64
  end
66
65
 
67
66
  def path_entries(dir)
68
- Dir.entries(dir).reject { |e| e.match(/[\.]{1,2}$/) }.sort
67
+ Dir.entries(dir).reject { |e| e.match(/\.{1,2}$/) }.sort
69
68
  end
70
69
 
71
70
  def files_path
@@ -79,6 +78,7 @@ class DarwinCore
79
78
  check_path = File.join(@dir_path, e)
80
79
  next unless FileTest.directory?(check_path) &&
81
80
  path_entries(check_path).include?("meta.xml")
81
+
82
82
  res = check_path
83
83
  break
84
84
  end
@@ -12,7 +12,7 @@ class DarwinCore
12
12
  @meta_xml_data = { extensions: [] }
13
13
  @eml_xml_data = { id: nil, title: nil, authors: [], abstrac: nil,
14
14
  citation: nil, url: nil }
15
- @write = "w:utf-8"
15
+ @write = 'w:utf-8'
16
16
  end
17
17
 
18
18
  def clean
@@ -20,14 +20,14 @@ class DarwinCore
20
20
  end
21
21
 
22
22
  def add_core(data, file_name, keep_headers = true)
23
- opts = { type: "core", data: data, file_name: file_name,
23
+ opts = { type: 'core', data: data, file_name: file_name,
24
24
  keep_headers: keep_headers }
25
25
  prepare_csv_file(opts)
26
26
  end
27
27
 
28
28
  def add_extension(data, file_name, keep_headers = true,
29
- row_type = "http://rs.tdwg.org/dwc/terms/Taxon")
30
- opts = { type: "extension", data: data, file_name: file_name,
29
+ row_type = 'http://rs.tdwg.org/dwc/terms/Taxon')
30
+ opts = { type: 'extension', data: data, file_name: file_name,
31
31
  keep_headers: keep_headers, row_type: row_type }
32
32
  prepare_csv_file(opts)
33
33
  end
@@ -57,7 +57,7 @@ class DarwinCore
57
57
  def prepare_csv_file(opts)
58
58
  c = CSV.open(File.join(@path, opts[:file_name]), @write)
59
59
  attributes = prepare_attributes(opts)
60
- if opts[:type] == "core"
60
+ if opts[:type] == 'core'
61
61
  @meta_xml_data[:core] = attributes
62
62
  else
63
63
  @meta_xml_data[:extensions] << attributes
@@ -83,7 +83,8 @@ class DarwinCore
83
83
  f = f.strip
84
84
  err = "No header in #{file_type} data, or header fields are not urls"
85
85
  raise DarwinCore::GeneratorError, err unless f =~ %r{^http://}
86
- f.split("/")[-1]
86
+
87
+ f.split('/')[-1]
87
88
  end
88
89
  end
89
90
  end
@@ -1,11 +1,9 @@
1
- # encoding: utf-8
2
1
  class DarwinCore
3
2
  # This module abstracts information for reading csv file to be used
4
3
  # in several classes which need such functionality
5
4
  module Ingester
6
- attr_reader :data, :properties, :encoding, :fields_separator, :size
7
- attr_reader :file_path, :fields, :line_separator, :quote_character,
8
- :ignore_headers
5
+ attr_reader :data, :properties, :encoding, :fields_separator, :size, :file_path, :fields, :line_separator,
6
+ :quote_character, :ignore_headers
9
7
 
10
8
  def size
11
9
  @size ||= init_size
@@ -16,16 +14,19 @@ class DarwinCore
16
14
  res = []
17
15
  errors = []
18
16
  args = define_csv_args
19
- min_size = @fields.map { |f| f[:index].to_i || 0 }.sort[-1] + 1
20
- csv = CSV.new(open(@file_path), args)
17
+ min_size = @fields.map { |f| f[:index].to_i || 0 }.max + 1
18
+ csv = CSV.new(open(@file_path), **args)
21
19
  csv.each_with_index do |r, i|
22
20
  next if @ignore_headers && i == 0
21
+
23
22
  min_size > r.size ? errors << r : process_csv_row(res, errors, r)
24
23
  next if i == 0 || i % batch_size != 0
24
+
25
25
  DarwinCore.logger_write(@dwc.object_id,
26
26
  format("Ingested %s records from %s",
27
27
  i, name))
28
28
  next unless block_given?
29
+
29
30
  yield [res, errors]
30
31
  res = []
31
32
  errors = []
@@ -70,12 +71,14 @@ class DarwinCore
70
71
 
71
72
  def init_encoding
72
73
  @encoding = @properties[:encoding] || "UTF-8"
73
- accepted_encoding = ["utf-8", "utf8", "utf-16", "utf16"].
74
+ accepted_encoding = %w[utf-8 utf8 utf-16 utf16].
74
75
  include?(@encoding.downcase)
75
- fail(
76
- DarwinCore::EncodingError,
77
- "No support for encodings other than utf-8 or utf-16 at the moment"
78
- ) unless accepted_encoding
76
+ unless accepted_encoding
77
+ raise(
78
+ DarwinCore::EncodingError,
79
+ "No support for encodings other than utf-8 or utf-16 at the moment"
80
+ )
81
+ end
79
82
  end
80
83
 
81
84
  def init_file_path
@@ -83,14 +86,16 @@ class DarwinCore
83
86
  @data[:attributes][:location] ||
84
87
  @data[:files][:location]
85
88
  @file_path = File.join(@path, file)
86
- fail DarwinCore::FileNotFoundError, "No file data" unless @file_path
89
+ raise DarwinCore::FileNotFoundError, "No file data" unless @file_path
87
90
  end
88
91
 
89
92
  def init_fields
90
93
  @data[:field] = [data[:field]] if data[:field].class != Array
91
94
  @fields = @data[:field].map { |f| f[:attributes] }
92
- fail DarwinCore::InvalidArchiveError,
93
- "No data fields are found" if @fields.empty?
95
+ if @fields.empty?
96
+ raise DarwinCore::InvalidArchiveError,
97
+ "No data fields are found"
98
+ end
94
99
  end
95
100
 
96
101
  def init_field_separator
@@ -100,7 +105,7 @@ class DarwinCore
100
105
  end
101
106
 
102
107
  def init_size
103
- `wc -l #{@file_path}`.match(/^\s*([\d]+)\s/)[1].to_i
108
+ `wc -l #{@file_path}`.match(/^\s*(\d+)\s/)[1].to_i
104
109
  end
105
110
  end
106
111
  end
@@ -25,6 +25,7 @@ class DarwinCore
25
25
 
26
26
  def authors
27
27
  return nil unless defined?(@metadata[:eml][:dataset][:creator])
28
+
28
29
  authors = [@metadata[:eml][:dataset][:creator]].flatten
29
30
  authors.map do |au|
30
31
  { first_name: au[:individualName][:givenName],
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Version constant of the class
4
4
  class DarwinCore
5
- VERSION = "1.0.1"
5
+ VERSION = "1.1.4"
6
6
  end
@@ -35,13 +35,14 @@ class DarwinCore
35
35
  end
36
36
 
37
37
  def prepare(data)
38
- data.class == String && data.to_i.to_s == data ? data.to_i : data
38
+ data.instance_of?(String) && data.to_i.to_s == data ? data.to_i : data
39
39
  end
40
40
 
41
41
  def add_attributes
42
42
  return if @node.attributes.empty?
43
+
43
44
  @val[:attributes] = {}
44
- @node.attributes.keys.each do |key|
45
+ @node.attributes.each_key do |key|
45
46
  add_attribute(@val[:attributes], @node.attributes[key])
46
47
  end
47
48
  end
@@ -1,18 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
4
+
3
5
  describe DarwinCore do
4
6
  subject { DarwinCore }
5
7
  let(:file_dir) { File.expand_path("../files", __dir__) }
6
8
 
7
- describe "redis connection" do
8
- it "redis is running" do
9
- expect do
10
- socket = TCPSocket.open("localhost", 6379)
11
- socket.close
12
- end.to_not raise_error
13
- end
14
- end
15
-
16
9
  it "has version" do
17
10
  expect(DarwinCore::VERSION).to match(/\d+\.\d+\.\d/)
18
11
  end
@@ -37,10 +30,10 @@ describe DarwinCore do
37
30
  it "cleans dwca directories" do
38
31
  Dir.chdir(tmp_dir)
39
32
  FileUtils.mkdir("dwc_123") unless File.exist?("dwc_123")
40
- dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_[\d]+$/) }
33
+ dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_\d+$/) }
41
34
  expect(dwca_dirs.size).to be > 0
42
35
  subject.clean_all
43
- dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_[\d]+$/) }
36
+ dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_\d+$/) }
44
37
  expect(dwca_dirs.size).to be 0
45
38
  end
46
39
 
@@ -48,7 +41,7 @@ describe DarwinCore do
48
41
  it "does nothing" do
49
42
  subject.clean_all
50
43
  subject.clean_all
51
- dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_[\d]+$/) }
44
+ dwca_dirs = Dir.entries(tmp_dir).select { |d| d.match(/^dwc_\d+$/) }
52
45
  expect(dwca_dirs.size).to be 0
53
46
  end
54
47
  end
@@ -121,7 +114,7 @@ describe DarwinCore do
121
114
  end
122
115
 
123
116
  context "filename with spaces and non-alphanumeric chars" do
124
- let(:file_path) { File.join(file_dir, "file with characters(3).gz") }
117
+ let(:file_path) { File.join(file_dir, "file with characters(3).tar.gz") }
125
118
 
126
119
  it "creates archive" do
127
120
  expect(dwca.archive.valid?).to be true
@@ -237,7 +230,8 @@ describe DarwinCore do
237
230
  let(:file_path) { File.join(file_dir, "data.tar.gz") }
238
231
  let(:normalized) { dwca.normalize_classification }
239
232
  let(:encodings) do
240
- normalized.each_with_object(Set.new) do |taxon, e|
233
+ set = Set.new
234
+ normalized.each_with_object(set) do |taxon, e|
241
235
  taxon[1].classification_path.each { |p| e << p.encoding }
242
236
  end
243
237
  end
@@ -13,10 +13,10 @@ describe DarwinCore::GnubTaxon do
13
13
  expect(tn).to be_kind_of DarwinCore::GnubTaxon
14
14
  expect(tn).to be_kind_of DarwinCore::TaxonNormalized
15
15
  expect(tn.uuid).to eq "8faa91f6-663f-4cfe-b785-0ab4e9415a51"
16
- expect(tn.uuid_path).to eq [
17
- "9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3",
18
- "bf4c91c0-3d1f-44c7-9d3b-249382182a26",
19
- "8faa91f6-663f-4cfe-b785-0ab4e9415a51"
16
+ expect(tn.uuid_path).to eq %w[
17
+ 9a9f9eeb-d5f9-4ff6-b6cb-a5ad345e33c3
18
+ bf4c91c0-3d1f-44c7-9d3b-249382182a26
19
+ 8faa91f6-663f-4cfe-b785-0ab4e9415a51
20
20
  ]
21
21
  end
22
22
 
data/spec/spec_helper.rb CHANGED
@@ -1,8 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "coveralls"
4
- Coveralls.wear!
5
-
6
3
  require "dwc_archive"
7
4
  require "rspec"
8
5
  require "rspec/mocks"
metadata CHANGED
@@ -1,169 +1,169 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-17 00:00:00.000000000 Z
11
+ date: 2021-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: nokogiri
14
+ name: biodiversity
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.8'
19
+ version: 5.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.8'
26
+ version: 5.2.0
27
27
  - !ruby/object:Gem::Dependency
28
- name: parsley-store
28
+ name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.3'
33
+ version: '1.11'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0.3'
40
+ version: '1.11'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.16'
47
+ version: '2.2'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.16'
54
+ version: '2.2'
55
55
  - !ruby/object:Gem::Dependency
56
- name: coveralls
56
+ name: byebug
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0.8'
61
+ version: '11.1'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0.8'
68
+ version: '11.1'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: cucumber
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '3.1'
75
+ version: '5'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '3.1'
82
+ version: '5'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: git
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '1.4'
89
+ version: '1.8'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '1.4'
96
+ version: '1.8'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rake
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '12.3'
103
+ version: '13'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '12.3'
110
+ version: '13'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: rspec
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '3.7'
117
+ version: '3.10'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '3.7'
124
+ version: '3.10'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: rubocop
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.58'
131
+ version: '1.8'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.58'
138
+ version: '1.8'
139
139
  - !ruby/object:Gem::Dependency
140
140
  name: solargraph
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.23'
145
+ version: '0.40'
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.23'
152
+ version: '0.40'
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: travis
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - "~>"
158
158
  - !ruby/object:Gem::Version
159
- version: '1.8'
159
+ version: '1.10'
160
160
  type: :development
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
- version: '1.8'
166
+ version: '1.10'
167
167
  description: Darwin Core Archive is the current standard exchange format for GLobal
168
168
  Names Architecture modules. This gem makes it easy to incorporate files in Darwin
169
169
  Core Archive format into a ruby project.
@@ -174,11 +174,11 @@ extensions: []
174
174
  extra_rdoc_files: []
175
175
  files:
176
176
  - ".document"
177
+ - ".github/workflows/build.yml"
177
178
  - ".gitignore"
178
179
  - ".rspec"
179
180
  - ".rubocop.yml"
180
181
  - ".ruby-version"
181
- - ".travis.yml"
182
182
  - CHANGELOG
183
183
  - Gemfile
184
184
  - LICENSE
@@ -212,7 +212,7 @@ files:
212
212
  - spec/files/data_with_quotes.tar.gz
213
213
  - spec/files/eml.xml
214
214
  - spec/files/empty_coreid.tar.gz
215
- - spec/files/file with characters(3).gz
215
+ - spec/files/file with characters(3).tar.gz
216
216
  - spec/files/flat_list.tar.gz
217
217
  - spec/files/generator_eml.xml
218
218
  - spec/files/generator_meta.xml
@@ -245,7 +245,7 @@ homepage: http://github.com/GlobalNamesArchitecture/dwc-archive
245
245
  licenses:
246
246
  - MIT
247
247
  metadata: {}
248
- post_install_message:
248
+ post_install_message:
249
249
  rdoc_options: []
250
250
  require_paths:
251
251
  - lib
@@ -253,16 +253,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
253
253
  requirements:
254
254
  - - ">="
255
255
  - !ruby/object:Gem::Version
256
- version: 2.4.1
256
+ version: 2.6.0
257
257
  required_rubygems_version: !ruby/object:Gem::Requirement
258
258
  requirements:
259
259
  - - ">="
260
260
  - !ruby/object:Gem::Version
261
261
  version: '0'
262
262
  requirements: []
263
- rubyforge_project:
264
- rubygems_version: 2.7.6
265
- signing_key:
263
+ rubygems_version: 3.2.15
264
+ signing_key:
266
265
  specification_version: 4
267
266
  summary: Handler of Darwin Core Archive files
268
267
  test_files:
@@ -277,7 +276,7 @@ test_files:
277
276
  - spec/files/data_with_quotes.tar.gz
278
277
  - spec/files/eml.xml
279
278
  - spec/files/empty_coreid.tar.gz
280
- - spec/files/file with characters(3).gz
279
+ - spec/files/file with characters(3).tar.gz
281
280
  - spec/files/flat_list.tar.gz
282
281
  - spec/files/generator_eml.xml
283
282
  - spec/files/generator_meta.xml
data/.travis.yml DELETED
@@ -1,15 +0,0 @@
1
- rvm:
2
- - 2.4
3
- - 2.5
4
- before_install:
5
- - sudo apt-get update
6
- # bundler_args: --without development
7
- services:
8
- - redis-server
9
- script:
10
- - bundle exec cucumber
11
- - bundle exec rake
12
- branches:
13
- only:
14
- - master
15
-