dwc-archive 1.1.2 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +35 -0
- data/.ruby-version +1 -1
- data/dwc-archive.gemspec +1 -1
- data/lib/dwc_archive/ingester.rb +20 -15
- data/lib/dwc_archive/version.rb +1 -1
- data/spec/lib/darwin_core_spec.rb +4 -1
- metadata +6 -6
- data/.travis.yml +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 024cdcbe976462942f15dee141a48d4dc0595ccfb1b4013ace4b88fd742ec6a6
|
4
|
+
data.tar.gz: 29aff6c8699d82f74513874a90cbfe796b2db8f1c94134500cbc154bf3dd61d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 794cb8f3092f0005970421f68fba890553f825f9ef77902f3460fe16240a5438ec8fd277aa95eccae9335072dad5c42a25df87a326d5abd7f53f6271b411271c
|
7
|
+
data.tar.gz: fe2491aabacc15866880c54767f7b874b0c7c01c894947a671c3f0a7b10a1785a806731067100c426b39f80ec1d300e7298c7d0a4b8d30e7d2cab73847859622
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: build
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.2
|
data/dwc-archive.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.require_paths = ["lib"]
|
23
23
|
|
24
24
|
# gem.add_runtime_dependency "minitar", "~> 0.6"
|
25
|
-
gem.add_runtime_dependency "biodiversity", "~> 5"
|
25
|
+
gem.add_runtime_dependency "biodiversity", "~> 5.5.2"
|
26
26
|
gem.add_runtime_dependency "nokogiri", "~> 1.11"
|
27
27
|
|
28
28
|
gem.add_development_dependency "bundler", "~> 2.2"
|
data/lib/dwc_archive/ingester.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
class DarwinCore
|
3
2
|
# This module abstracts information for reading csv file to be used
|
4
3
|
# in several classes which need such functionality
|
5
4
|
module Ingester
|
6
|
-
attr_reader :data, :properties, :encoding, :fields_separator, :size
|
7
|
-
|
8
|
-
:ignore_headers
|
5
|
+
attr_reader :data, :properties, :encoding, :fields_separator, :size, :file_path, :fields, :line_separator,
|
6
|
+
:quote_character, :ignore_headers
|
9
7
|
|
10
8
|
def size
|
11
9
|
@size ||= init_size
|
@@ -16,16 +14,19 @@ class DarwinCore
|
|
16
14
|
res = []
|
17
15
|
errors = []
|
18
16
|
args = define_csv_args
|
19
|
-
min_size = @fields.map { |f| f[:index].to_i || 0 }.
|
20
|
-
csv = CSV.new(open(@file_path), args)
|
17
|
+
min_size = @fields.map { |f| f[:index].to_i || 0 }.max + 1
|
18
|
+
csv = CSV.new(open(@file_path), **args)
|
21
19
|
csv.each_with_index do |r, i|
|
22
20
|
next if @ignore_headers && i == 0
|
21
|
+
|
23
22
|
min_size > r.size ? errors << r : process_csv_row(res, errors, r)
|
24
23
|
next if i == 0 || i % batch_size != 0
|
24
|
+
|
25
25
|
DarwinCore.logger_write(@dwc.object_id,
|
26
26
|
format("Ingested %s records from %s",
|
27
27
|
i, name))
|
28
28
|
next unless block_given?
|
29
|
+
|
29
30
|
yield [res, errors]
|
30
31
|
res = []
|
31
32
|
errors = []
|
@@ -70,12 +71,14 @@ class DarwinCore
|
|
70
71
|
|
71
72
|
def init_encoding
|
72
73
|
@encoding = @properties[:encoding] || "UTF-8"
|
73
|
-
accepted_encoding = [
|
74
|
+
accepted_encoding = %w[utf-8 utf8 utf-16 utf16].
|
74
75
|
include?(@encoding.downcase)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
76
|
+
unless accepted_encoding
|
77
|
+
raise(
|
78
|
+
DarwinCore::EncodingError,
|
79
|
+
"No support for encodings other than utf-8 or utf-16 at the moment"
|
80
|
+
)
|
81
|
+
end
|
79
82
|
end
|
80
83
|
|
81
84
|
def init_file_path
|
@@ -83,14 +86,16 @@ class DarwinCore
|
|
83
86
|
@data[:attributes][:location] ||
|
84
87
|
@data[:files][:location]
|
85
88
|
@file_path = File.join(@path, file)
|
86
|
-
|
89
|
+
raise DarwinCore::FileNotFoundError, "No file data" unless @file_path
|
87
90
|
end
|
88
91
|
|
89
92
|
def init_fields
|
90
93
|
@data[:field] = [data[:field]] if data[:field].class != Array
|
91
94
|
@fields = @data[:field].map { |f| f[:attributes] }
|
92
|
-
|
93
|
-
|
95
|
+
if @fields.empty?
|
96
|
+
raise DarwinCore::InvalidArchiveError,
|
97
|
+
"No data fields are found"
|
98
|
+
end
|
94
99
|
end
|
95
100
|
|
96
101
|
def init_field_separator
|
@@ -100,7 +105,7 @@ class DarwinCore
|
|
100
105
|
end
|
101
106
|
|
102
107
|
def init_size
|
103
|
-
`wc -l #{@file_path}`.match(/^\s*(
|
108
|
+
`wc -l #{@file_path}`.match(/^\s*(\d+)\s/)[1].to_i
|
104
109
|
end
|
105
110
|
end
|
106
111
|
end
|
data/lib/dwc_archive/version.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "set"
|
4
|
+
|
3
5
|
describe DarwinCore do
|
4
6
|
subject { DarwinCore }
|
5
7
|
let(:file_dir) { File.expand_path("../files", __dir__) }
|
@@ -228,7 +230,8 @@ describe DarwinCore do
|
|
228
230
|
let(:file_path) { File.join(file_dir, "data.tar.gz") }
|
229
231
|
let(:normalized) { dwca.normalize_classification }
|
230
232
|
let(:encodings) do
|
231
|
-
|
233
|
+
set = Set.new
|
234
|
+
normalized.each_with_object(set) do |taxon, e|
|
232
235
|
taxon[1].classification_path.each { |p| e << p.encoding }
|
233
236
|
end
|
234
237
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 5.5.2
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 5.5.2
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -174,11 +174,11 @@ extensions: []
|
|
174
174
|
extra_rdoc_files: []
|
175
175
|
files:
|
176
176
|
- ".document"
|
177
|
+
- ".github/workflows/build.yml"
|
177
178
|
- ".gitignore"
|
178
179
|
- ".rspec"
|
179
180
|
- ".rubocop.yml"
|
180
181
|
- ".ruby-version"
|
181
|
-
- ".travis.yml"
|
182
182
|
- CHANGELOG
|
183
183
|
- Gemfile
|
184
184
|
- LICENSE
|
@@ -260,7 +260,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
260
260
|
- !ruby/object:Gem::Version
|
261
261
|
version: '0'
|
262
262
|
requirements: []
|
263
|
-
rubygems_version: 3.2.
|
263
|
+
rubygems_version: 3.2.22
|
264
264
|
signing_key:
|
265
265
|
specification_version: 4
|
266
266
|
summary: Handler of Darwin Core Archive files
|