dwc-archive 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0fd098cb550fe49fe0b1e4234dee9d42a99fdea5a6ce41393a71c000802b3887
4
- data.tar.gz: b637574ad6eda25ad8f1cb53538443529cd42ae0948530a09e3c080f16c2cb7b
3
+ metadata.gz: a5d4e174cb4ec6df2328fe99abb52b13587c74f22f01efb2a9017db4d9f1d7ba
4
+ data.tar.gz: e341038db2a23282173cf2e4671750739464dca6076f7560694cc40251bdd036
5
5
  SHA512:
6
- metadata.gz: 7e8b88410e88f7ad3cabe6d02934db35143f88408d08164890e970c36af68cfc164c88b9bd644e723a2575e0f781e4ef7aa7b2726331a8a70d9f48498cc46a0d
7
- data.tar.gz: 1d989666190f9e7333dae5eddff4c95b4f58797ebdffda3c5198625d4c869439f786beeb2d474e3a6260385320960b987d4329a072a5c01db30ccb040ef9e5ca
6
+ metadata.gz: 8b51b82724e21acab76e1763dc2375658080a16dc440c9913fd632aff195a45c34e7bf9446cf5d4afc9fc397c8ee562763ab36532ab5992421e5ed3f2df64273
7
+ data.tar.gz: 04cbcc92c8b565b2c5f8e8bf082d284b65c55520d91f19c16bc63789519ea374f5ed0466c176ce69907a83566caa8efd2f834399f7b57a26801603455cf2b118
@@ -1 +1 @@
1
- 2.6.6
1
+ 2.7.1
@@ -1,15 +1,13 @@
1
1
  rvm:
2
- - 2.4
3
2
  - 2.5
3
+ - 2.6
4
+ - 2.7
4
5
  before_install:
5
6
  - sudo apt-get update
6
- # bundler_args: --without development
7
- services:
8
- - redis-server
7
+ - gem install bundler
9
8
  script:
10
9
  - bundle exec cucumber
11
10
  - bundle exec rake
12
11
  branches:
13
12
  only:
14
13
  - master
15
-
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2012 Marine Biological Laboratory
1
+ Copyright (c) 2010-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -179,11 +179,11 @@ Note on Patches/Pull Requests
179
179
  Copyright
180
180
  ---------
181
181
 
182
- Author -- [Dmitry Mozzherin][13]
182
+ Author -- [@dimus][13]
183
183
 
184
- Contributors -- [Matt Yoder][14]
184
+ Contributors -- [@mjy][14], [@LocoDelAssembly][16]
185
185
 
186
- Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
186
+ Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
187
187
 
188
188
  [1]: https://badge.fury.io/rb/dwc-archive.png
189
189
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -200,3 +200,4 @@ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for deta
200
200
  [13]: https://github.com/dimus
201
201
  [14]: https://github.com/mjy
202
202
  [15]: http://mbl.edu
203
+ [16]: https://github.com/LocoDelAssembly
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
16
16
  gem.license = "MIT"
17
17
 
18
- gem.required_ruby_version = ">= 2.6.6"
18
+ gem.required_ruby_version = ">= 2.5.0"
19
19
  gem.files = `git ls-files`.split("\n").map(&:strip)
20
20
  gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
21
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
@@ -1,10 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  class DarwinCore
3
4
  # Returns tree representation of Darwin Core file with vernacular and
4
5
  # and synonyms attached to the taxon nodes
5
6
  class ClassificationNormalizer
6
7
  attr_reader :error_names, :tree, :normalized_data, :dwc
7
- alias_method :darwin_core, :dwc
8
+ alias darwin_core dwc
8
9
 
9
10
  def initialize(dwc_instance)
10
11
  @dwc = dwc_instance
@@ -12,7 +13,6 @@ class DarwinCore
12
13
  @extensions = @dwc.extensions.map { |e| [e, find_fields(e)] }
13
14
  @normalized_data = {}
14
15
  @synonyms = {}
15
- @parser = ::Biodiversity::Parser
16
16
  @name_strings = {}
17
17
  @vernacular_name_strings = {}
18
18
  @error_names = []
@@ -25,6 +25,7 @@ class DarwinCore
25
25
 
26
26
  def add_vernacular_name_string(name_string)
27
27
  return if @vernacular_name_strings[name_string]
28
+
28
29
  @vernacular_name_strings[name_string] = 1
29
30
  end
30
31
 
@@ -70,9 +71,9 @@ class DarwinCore
70
71
 
71
72
  def get_canonical_name(a_scientific_name)
72
73
  return nil unless @with_canonical_names
73
- canonical_name = nil
74
- parsed = @parser.parse(a_scientific_name)
75
- canonical_name = parsed[:canonicalName][:simple] if parsed[:parsed]
74
+
75
+ canonical_name = Biodiversity::Parser.parse(a_scientific_name).
76
+ dig(:canonicalName, :simple)
76
77
  canonical_name.to_s.empty? ? a_scientific_name : canonical_name
77
78
  end
78
79
 
@@ -87,15 +88,13 @@ class DarwinCore
87
88
  end
88
89
 
89
90
  def status_synonym?(status)
90
- status && status.match(/^syn/)
91
+ status&.match(/^syn/)
91
92
  end
92
93
 
93
94
  def add_synonym_from_core(taxon_id, row)
94
95
  cf = @core_fields
95
96
  @synonyms[row[cf[:id]]] = taxon_id
96
- unless @normalized_data[row[taxon_id]]
97
- @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
98
- end
97
+ @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new unless @normalized_data[row[taxon_id]]
99
98
 
100
99
  taxon = @normalized_data[row[taxon_id]]
101
100
  synonym = SynonymNormalized.new(
@@ -107,7 +106,7 @@ class DarwinCore
107
106
  cf[:localid] ? row[cf[:localid]] : nil,
108
107
  cf[:globalid] ? row[cf[:globalid]] : nil
109
108
  )
110
- taxon.synonyms << synonym
109
+ taxon.synonyms << synonym
111
110
  add_name_string(synonym.name)
112
111
  add_name_string(synonym.canonical_name)
113
112
  end
@@ -117,14 +116,10 @@ class DarwinCore
117
116
  canonical_name = nil
118
117
  scientific_name = row[fields[:scientificname]].strip
119
118
  if separate_canonical_and_authorship?(row, fields)
120
- if @with_canonical_names
121
- canonical_name = row[fields[:scientificname]].strip
122
- end
119
+ canonical_name = row[fields[:scientificname]].strip if @with_canonical_names
123
120
  scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
124
121
  else
125
- if @with_canonical_names
126
- canonical_name = get_canonical_name(row[fields[:scientificname]])
127
- end
122
+ canonical_name = get_canonical_name(row[fields[:scientificname]]) if @with_canonical_names
128
123
  end
129
124
  fields[:canonicalname] = row.size
130
125
  row << canonical_name
@@ -133,18 +128,17 @@ class DarwinCore
133
128
 
134
129
  def separate_canonical_and_authorship?(row, fields)
135
130
  authorship = ""
136
- if fields[:scientificnameauthorship]
137
- authorship = row[fields[:scientificnameauthorship]].to_s.strip
138
- end
131
+ authorship = row[fields[:scientificnameauthorship]].to_s.strip if fields[:scientificnameauthorship]
139
132
  !(authorship.empty? || row[fields[:scientificname]].index(authorship))
140
133
  end
141
134
 
142
135
  def ingest_core
143
136
  @normalized_data = {}
144
137
  has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
145
- fail(DarwinCore::CoreFileError,
146
- "Darwin Core core fields must contain taxon id and scientific name"
147
- ) unless has_name_and_id
138
+ unless has_name_and_id
139
+ raise(DarwinCore::CoreFileError,
140
+ "Darwin Core core fields must contain taxon id and scientific name")
141
+ end
148
142
  @dwc.core.read do |rows|
149
143
  rows[1].each do |error|
150
144
  @error_names << { data: error,
@@ -163,32 +157,28 @@ class DarwinCore
163
157
  add_synonym_from_core(parent_id, r) if parent_id?
164
158
  else
165
159
  unless @normalized_data[r[@core_fields[:id]]]
166
- if gnub_archive?
167
- new_taxon = DarwinCore::GnubTaxon.new
168
- else
169
- new_taxon = DarwinCore::TaxonNormalized.new
170
- end
160
+ new_taxon = if gnub_archive?
161
+ DarwinCore::GnubTaxon.new
162
+ else
163
+ DarwinCore::TaxonNormalized.new
164
+ end
171
165
  @normalized_data[r[@core_fields[:id]]] = new_taxon
172
166
  end
173
167
  taxon = @normalized_data[r[@core_fields[:id]]]
174
168
  if gnub_archive?
175
169
  taxon.uuid = r[@core_fields[:originalnameusageid]]
176
170
  taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
177
- split("|")
171
+ split("|")
178
172
  end
179
173
  taxon.id = r[@core_fields[:id]]
180
174
  taxon.current_name = r[@core_fields[:scientificname]]
181
175
  taxon.current_name_canonical = r[@core_fields[:canonicalname]]
182
176
  taxon.parent_id = parent_id? ? r[parent_id] : nil
183
177
  taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
184
- if @core_fields[:taxonomicstatus]
185
- taxon.status = r[@core_fields[:taxonomicstatus]]
186
- end
178
+ taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
187
179
  taxon.source = r[@core_fields[:source]] if @core_fields[:source]
188
180
  taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
189
- if @core_fields[:globalid]
190
- taxon.global_id = r[@core_fields[:globalid]]
191
- end
181
+ taxon.global_id = r[@core_fields[:globalid]] if @core_fields[:globalid]
192
182
  taxon.linnean_classification_path =
193
183
  get_linnean_classification_path(r, taxon)
194
184
  add_name_string(taxon.current_name)
@@ -213,6 +203,7 @@ class DarwinCore
213
203
  @paths_num = 0
214
204
  @normalized_data.each do |_taxon_id, taxon|
215
205
  next unless taxon.classification_path_id.empty?
206
+
216
207
  res = get_classification_path(taxon)
217
208
  next if res == "error"
218
209
  end
@@ -220,6 +211,7 @@ class DarwinCore
220
211
 
221
212
  def get_classification_path(taxon)
222
213
  return unless taxon.classification_path_id.empty?
214
+
223
215
  @paths_num += 1
224
216
  if @paths_num % 10_000 == 0
225
217
  DarwinCore.logger_write(@dwc.object_id,
@@ -227,17 +219,13 @@ class DarwinCore
227
219
  end
228
220
  current_node = { taxon.id => {} }
229
221
  if DarwinCore.nil_field?(taxon.parent_id)
230
- if @with_canonical_names
231
- taxon.classification_path << taxon.current_name_canonical
232
- end
222
+ taxon.classification_path << taxon.current_name_canonical if @with_canonical_names
233
223
  taxon.classification_path_id << taxon.id
234
224
  @tree.merge!(current_node)
235
225
  else
236
226
  parent_cp = parent_cpid = nil
237
227
  if @normalized_data[taxon.parent_id]
238
- if @with_canonical_names
239
- parent_cp = @normalized_data[taxon.parent_id].classification_path
240
- end
228
+ parent_cp = @normalized_data[taxon.parent_id].classification_path if @with_canonical_names
241
229
  parent_cpid = @normalized_data[taxon.parent_id].
242
230
  classification_path_id
243
231
  else
@@ -247,9 +235,7 @@ class DarwinCore
247
235
  error: :deprecated_parent,
248
236
  current_parent: current_parent }
249
237
 
250
- if @with_canonical_names
251
- parent_cp = current_parent.classification_path
252
- end
238
+ parent_cp = current_parent.classification_path if @with_canonical_names
253
239
  parent_cpid = current_parent.classification_path_id
254
240
  else
255
241
  @error_names << { data: taxon,
@@ -258,6 +244,7 @@ class DarwinCore
258
244
  end
259
245
  end
260
246
  return "error" unless parent_cpid
247
+
261
248
  if parent_cpid.empty?
262
249
  res = "error"
263
250
  begin
@@ -268,6 +255,7 @@ class DarwinCore
268
255
  current_parent: nil }
269
256
  end
270
257
  return res if res == "error"
258
+
271
259
  if @with_canonical_names
272
260
  taxon.classification_path += @normalized_data[taxon.parent_id].
273
261
  classification_path +
@@ -295,7 +283,7 @@ class DarwinCore
295
283
  rescue NoMethodError => e
296
284
  DarwinCore.logger_write(@dwc.object_id,
297
285
  "Error '#{e.message}' taxon #{taxon.id}")
298
- return "error"
286
+ "error"
299
287
  end
300
288
  end
301
289
  end
@@ -381,8 +369,8 @@ class DarwinCore
381
369
 
382
370
  # Collect linnean classification path only on species level
383
371
  def get_linnean_classification_path(row, _taxon)
384
- [:kingdom, :phylum, :class, :order, :family, :genus,
385
- :subgenus].each_with_object([]) do |clade, res|
372
+ %i[kingdom phylum class order family genus
373
+ subgenus].each_with_object([]) do |clade, res|
386
374
  res << [row[@core_fields[clade]], clade] if @core_fields[clade]
387
375
  end
388
376
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Version constant of the class
4
4
  class DarwinCore
5
- VERSION = "1.1.0"
5
+ VERSION = "1.1.1"
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-03 00:00:00.000000000 Z
11
+ date: 2020-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -253,14 +253,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
253
253
  requirements:
254
254
  - - ">="
255
255
  - !ruby/object:Gem::Version
256
- version: 2.6.6
256
+ version: 2.5.0
257
257
  required_rubygems_version: !ruby/object:Gem::Requirement
258
258
  requirements:
259
259
  - - ">="
260
260
  - !ruby/object:Gem::Version
261
261
  version: '0'
262
262
  requirements: []
263
- rubygems_version: 3.0.3
263
+ rubygems_version: 3.1.2
264
264
  signing_key:
265
265
  specification_version: 4
266
266
  summary: Handler of Darwin Core Archive files