dwc-archive 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +35 -0
- data/.ruby-version +1 -1
- data/dwc-archive.gemspec +1 -1
- data/lib/dwc_archive/ingester.rb +20 -15
- data/lib/dwc_archive/version.rb +1 -1
- data/spec/lib/darwin_core_spec.rb +4 -1
- metadata +6 -6
- data/.travis.yml +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e4de59525e7e4a3f212dac828b97ff489f5ec40f764758a5b81b7fef09cc166
|
4
|
+
data.tar.gz: 2f408d2a04bac402c395928a6d048d2d28daed3235119c149684026b233e2126
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 145cd2d7d2f9e1c87c76ddca220fd56f1e44cca70ceb6995edfeba73d985e166709cf4254b6b2e19f143ffd02d879700587d61c6b9da239bdaff4288fde17cf9
|
7
|
+
data.tar.gz: e92697139838d7d720bfd9aca87ddf4537a8e272b6fabb04a05f92bfde047c9a45867e46e9b27b39d738c822ba6a843a73b3670b45da16ad9c2aca5df0523913
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: build
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Run tests
|
35
|
+
run: bundle exec rake
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.0
|
data/dwc-archive.gemspec
CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |gem|
|
|
22
22
|
gem.require_paths = ["lib"]
|
23
23
|
|
24
24
|
# gem.add_runtime_dependency "minitar", "~> 0.6"
|
25
|
-
gem.add_runtime_dependency "biodiversity", "~> 5.
|
25
|
+
gem.add_runtime_dependency "biodiversity", "~> 5.2.0"
|
26
26
|
gem.add_runtime_dependency "nokogiri", "~> 1.11"
|
27
27
|
|
28
28
|
gem.add_development_dependency "bundler", "~> 2.2"
|
data/lib/dwc_archive/ingester.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
class DarwinCore
|
3
2
|
# This module abstracts information for reading csv file to be used
|
4
3
|
# in several classes which need such functionality
|
5
4
|
module Ingester
|
6
|
-
attr_reader :data, :properties, :encoding, :fields_separator, :size
|
7
|
-
|
8
|
-
:ignore_headers
|
5
|
+
attr_reader :data, :properties, :encoding, :fields_separator, :size, :file_path, :fields, :line_separator,
|
6
|
+
:quote_character, :ignore_headers
|
9
7
|
|
10
8
|
def size
|
11
9
|
@size ||= init_size
|
@@ -16,16 +14,19 @@ class DarwinCore
|
|
16
14
|
res = []
|
17
15
|
errors = []
|
18
16
|
args = define_csv_args
|
19
|
-
min_size = @fields.map { |f| f[:index].to_i || 0 }.
|
20
|
-
csv = CSV.new(open(@file_path), args)
|
17
|
+
min_size = @fields.map { |f| f[:index].to_i || 0 }.max + 1
|
18
|
+
csv = CSV.new(open(@file_path), **args)
|
21
19
|
csv.each_with_index do |r, i|
|
22
20
|
next if @ignore_headers && i == 0
|
21
|
+
|
23
22
|
min_size > r.size ? errors << r : process_csv_row(res, errors, r)
|
24
23
|
next if i == 0 || i % batch_size != 0
|
24
|
+
|
25
25
|
DarwinCore.logger_write(@dwc.object_id,
|
26
26
|
format("Ingested %s records from %s",
|
27
27
|
i, name))
|
28
28
|
next unless block_given?
|
29
|
+
|
29
30
|
yield [res, errors]
|
30
31
|
res = []
|
31
32
|
errors = []
|
@@ -70,12 +71,14 @@ class DarwinCore
|
|
70
71
|
|
71
72
|
def init_encoding
|
72
73
|
@encoding = @properties[:encoding] || "UTF-8"
|
73
|
-
accepted_encoding = [
|
74
|
+
accepted_encoding = %w[utf-8 utf8 utf-16 utf16].
|
74
75
|
include?(@encoding.downcase)
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
76
|
+
unless accepted_encoding
|
77
|
+
raise(
|
78
|
+
DarwinCore::EncodingError,
|
79
|
+
"No support for encodings other than utf-8 or utf-16 at the moment"
|
80
|
+
)
|
81
|
+
end
|
79
82
|
end
|
80
83
|
|
81
84
|
def init_file_path
|
@@ -83,14 +86,16 @@ class DarwinCore
|
|
83
86
|
@data[:attributes][:location] ||
|
84
87
|
@data[:files][:location]
|
85
88
|
@file_path = File.join(@path, file)
|
86
|
-
|
89
|
+
raise DarwinCore::FileNotFoundError, "No file data" unless @file_path
|
87
90
|
end
|
88
91
|
|
89
92
|
def init_fields
|
90
93
|
@data[:field] = [data[:field]] if data[:field].class != Array
|
91
94
|
@fields = @data[:field].map { |f| f[:attributes] }
|
92
|
-
|
93
|
-
|
95
|
+
if @fields.empty?
|
96
|
+
raise DarwinCore::InvalidArchiveError,
|
97
|
+
"No data fields are found"
|
98
|
+
end
|
94
99
|
end
|
95
100
|
|
96
101
|
def init_field_separator
|
@@ -100,7 +105,7 @@ class DarwinCore
|
|
100
105
|
end
|
101
106
|
|
102
107
|
def init_size
|
103
|
-
`wc -l #{@file_path}`.match(/^\s*(
|
108
|
+
`wc -l #{@file_path}`.match(/^\s*(\d+)\s/)[1].to_i
|
104
109
|
end
|
105
110
|
end
|
106
111
|
end
|
data/lib/dwc_archive/version.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "set"
|
4
|
+
|
3
5
|
describe DarwinCore do
|
4
6
|
subject { DarwinCore }
|
5
7
|
let(:file_dir) { File.expand_path("../files", __dir__) }
|
@@ -228,7 +230,8 @@ describe DarwinCore do
|
|
228
230
|
let(:file_path) { File.join(file_dir, "data.tar.gz") }
|
229
231
|
let(:normalized) { dwca.normalize_classification }
|
230
232
|
let(:encodings) do
|
231
|
-
|
233
|
+
set = Set.new
|
234
|
+
normalized.each_with_object(set) do |taxon, e|
|
232
235
|
taxon[1].classification_path.each { |p| e << p.encoding }
|
233
236
|
end
|
234
237
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dwc-archive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 5.
|
19
|
+
version: 5.2.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 5.
|
26
|
+
version: 5.2.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -174,11 +174,11 @@ extensions: []
|
|
174
174
|
extra_rdoc_files: []
|
175
175
|
files:
|
176
176
|
- ".document"
|
177
|
+
- ".github/workflows/build.yml"
|
177
178
|
- ".gitignore"
|
178
179
|
- ".rspec"
|
179
180
|
- ".rubocop.yml"
|
180
181
|
- ".ruby-version"
|
181
|
-
- ".travis.yml"
|
182
182
|
- CHANGELOG
|
183
183
|
- Gemfile
|
184
184
|
- LICENSE
|
@@ -260,7 +260,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
260
260
|
- !ruby/object:Gem::Version
|
261
261
|
version: '0'
|
262
262
|
requirements: []
|
263
|
-
rubygems_version: 3.2.
|
263
|
+
rubygems_version: 3.2.15
|
264
264
|
signing_key:
|
265
265
|
specification_version: 4
|
266
266
|
summary: Handler of Darwin Core Archive files
|