dwc-archive 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0fd098cb550fe49fe0b1e4234dee9d42a99fdea5a6ce41393a71c000802b3887
4
- data.tar.gz: b637574ad6eda25ad8f1cb53538443529cd42ae0948530a09e3c080f16c2cb7b
3
+ metadata.gz: a5d4e174cb4ec6df2328fe99abb52b13587c74f22f01efb2a9017db4d9f1d7ba
4
+ data.tar.gz: e341038db2a23282173cf2e4671750739464dca6076f7560694cc40251bdd036
5
5
  SHA512:
6
- metadata.gz: 7e8b88410e88f7ad3cabe6d02934db35143f88408d08164890e970c36af68cfc164c88b9bd644e723a2575e0f781e4ef7aa7b2726331a8a70d9f48498cc46a0d
7
- data.tar.gz: 1d989666190f9e7333dae5eddff4c95b4f58797ebdffda3c5198625d4c869439f786beeb2d474e3a6260385320960b987d4329a072a5c01db30ccb040ef9e5ca
6
+ metadata.gz: 8b51b82724e21acab76e1763dc2375658080a16dc440c9913fd632aff195a45c34e7bf9446cf5d4afc9fc397c8ee562763ab36532ab5992421e5ed3f2df64273
7
+ data.tar.gz: 04cbcc92c8b565b2c5f8e8bf082d284b65c55520d91f19c16bc63789519ea374f5ed0466c176ce69907a83566caa8efd2f834399f7b57a26801603455cf2b118
@@ -1 +1 @@
1
- 2.6.6
1
+ 2.7.1
@@ -1,15 +1,13 @@
1
1
  rvm:
2
- - 2.4
3
2
  - 2.5
3
+ - 2.6
4
+ - 2.7
4
5
  before_install:
5
6
  - sudo apt-get update
6
- # bundler_args: --without development
7
- services:
8
- - redis-server
7
+ - gem install bundler
9
8
  script:
10
9
  - bundle exec cucumber
11
10
  - bundle exec rake
12
11
  branches:
13
12
  only:
14
13
  - master
15
-
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2012 Marine Biological Laboratory
1
+ Copyright (c) 2010-2020 Dmitry Mozzherin
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -179,11 +179,11 @@ Note on Patches/Pull Requests
179
179
  Copyright
180
180
  ---------
181
181
 
182
- Author -- [Dmitry Mozzherin][13]
182
+ Author -- [@dimus][13]
183
183
 
184
- Contributors -- [Matt Yoder][14]
184
+ Contributors -- [@mjy][14], [@LocoDelAssembly][16]
185
185
 
186
- Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for details.
186
+ Copyright (c) 2010-2020 [@dimus][15]. See LICENSE for details.
187
187
 
188
188
  [1]: https://badge.fury.io/rb/dwc-archive.png
189
189
  [2]: http://badge.fury.io/rb/dwc-archive
@@ -200,3 +200,4 @@ Copyright (c) 2010-2014 [Marine Biological Laboratory][15]. See LICENSE for deta
200
200
  [13]: https://github.com/dimus
201
201
  [14]: https://github.com/mjy
202
202
  [15]: http://mbl.edu
203
+ [16]: https://github.com/LocoDelAssembly
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.homepage = "http://github.com/GlobalNamesArchitecture/dwc-archive"
16
16
  gem.license = "MIT"
17
17
 
18
- gem.required_ruby_version = ">= 2.6.6"
18
+ gem.required_ruby_version = ">= 2.5.0"
19
19
  gem.files = `git ls-files`.split("\n").map(&:strip)
20
20
  gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
21
21
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
@@ -1,10 +1,11 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  class DarwinCore
3
4
  # Returns tree representation of Darwin Core file with vernacular and
4
5
  # and synonyms attached to the taxon nodes
5
6
  class ClassificationNormalizer
6
7
  attr_reader :error_names, :tree, :normalized_data, :dwc
7
- alias_method :darwin_core, :dwc
8
+ alias darwin_core dwc
8
9
 
9
10
  def initialize(dwc_instance)
10
11
  @dwc = dwc_instance
@@ -12,7 +13,6 @@ class DarwinCore
12
13
  @extensions = @dwc.extensions.map { |e| [e, find_fields(e)] }
13
14
  @normalized_data = {}
14
15
  @synonyms = {}
15
- @parser = ::Biodiversity::Parser
16
16
  @name_strings = {}
17
17
  @vernacular_name_strings = {}
18
18
  @error_names = []
@@ -25,6 +25,7 @@ class DarwinCore
25
25
 
26
26
  def add_vernacular_name_string(name_string)
27
27
  return if @vernacular_name_strings[name_string]
28
+
28
29
  @vernacular_name_strings[name_string] = 1
29
30
  end
30
31
 
@@ -70,9 +71,9 @@ class DarwinCore
70
71
 
71
72
  def get_canonical_name(a_scientific_name)
72
73
  return nil unless @with_canonical_names
73
- canonical_name = nil
74
- parsed = @parser.parse(a_scientific_name)
75
- canonical_name = parsed[:canonicalName][:simple] if parsed[:parsed]
74
+
75
+ canonical_name = Biodiversity::Parser.parse(a_scientific_name).
76
+ dig(:canonicalName, :simple)
76
77
  canonical_name.to_s.empty? ? a_scientific_name : canonical_name
77
78
  end
78
79
 
@@ -87,15 +88,13 @@ class DarwinCore
87
88
  end
88
89
 
89
90
  def status_synonym?(status)
90
- status && status.match(/^syn/)
91
+ status&.match(/^syn/)
91
92
  end
92
93
 
93
94
  def add_synonym_from_core(taxon_id, row)
94
95
  cf = @core_fields
95
96
  @synonyms[row[cf[:id]]] = taxon_id
96
- unless @normalized_data[row[taxon_id]]
97
- @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new
98
- end
97
+ @normalized_data[row[taxon_id]] = DarwinCore::TaxonNormalized.new unless @normalized_data[row[taxon_id]]
99
98
 
100
99
  taxon = @normalized_data[row[taxon_id]]
101
100
  synonym = SynonymNormalized.new(
@@ -107,7 +106,7 @@ class DarwinCore
107
106
  cf[:localid] ? row[cf[:localid]] : nil,
108
107
  cf[:globalid] ? row[cf[:globalid]] : nil
109
108
  )
110
- taxon.synonyms << synonym
109
+ taxon.synonyms << synonym
111
110
  add_name_string(synonym.name)
112
111
  add_name_string(synonym.canonical_name)
113
112
  end
@@ -117,14 +116,10 @@ class DarwinCore
117
116
  canonical_name = nil
118
117
  scientific_name = row[fields[:scientificname]].strip
119
118
  if separate_canonical_and_authorship?(row, fields)
120
- if @with_canonical_names
121
- canonical_name = row[fields[:scientificname]].strip
122
- end
119
+ canonical_name = row[fields[:scientificname]].strip if @with_canonical_names
123
120
  scientific_name += " #{row[fields[:scientificnameauthorship]].strip}"
124
121
  else
125
- if @with_canonical_names
126
- canonical_name = get_canonical_name(row[fields[:scientificname]])
127
- end
122
+ canonical_name = get_canonical_name(row[fields[:scientificname]]) if @with_canonical_names
128
123
  end
129
124
  fields[:canonicalname] = row.size
130
125
  row << canonical_name
@@ -133,18 +128,17 @@ class DarwinCore
133
128
 
134
129
  def separate_canonical_and_authorship?(row, fields)
135
130
  authorship = ""
136
- if fields[:scientificnameauthorship]
137
- authorship = row[fields[:scientificnameauthorship]].to_s.strip
138
- end
131
+ authorship = row[fields[:scientificnameauthorship]].to_s.strip if fields[:scientificnameauthorship]
139
132
  !(authorship.empty? || row[fields[:scientificname]].index(authorship))
140
133
  end
141
134
 
142
135
  def ingest_core
143
136
  @normalized_data = {}
144
137
  has_name_and_id = @core_fields[:id] && @core_fields[:scientificname]
145
- fail(DarwinCore::CoreFileError,
146
- "Darwin Core core fields must contain taxon id and scientific name"
147
- ) unless has_name_and_id
138
+ unless has_name_and_id
139
+ raise(DarwinCore::CoreFileError,
140
+ "Darwin Core core fields must contain taxon id and scientific name")
141
+ end
148
142
  @dwc.core.read do |rows|
149
143
  rows[1].each do |error|
150
144
  @error_names << { data: error,
@@ -163,32 +157,28 @@ class DarwinCore
163
157
  add_synonym_from_core(parent_id, r) if parent_id?
164
158
  else
165
159
  unless @normalized_data[r[@core_fields[:id]]]
166
- if gnub_archive?
167
- new_taxon = DarwinCore::GnubTaxon.new
168
- else
169
- new_taxon = DarwinCore::TaxonNormalized.new
170
- end
160
+ new_taxon = if gnub_archive?
161
+ DarwinCore::GnubTaxon.new
162
+ else
163
+ DarwinCore::TaxonNormalized.new
164
+ end
171
165
  @normalized_data[r[@core_fields[:id]]] = new_taxon
172
166
  end
173
167
  taxon = @normalized_data[r[@core_fields[:id]]]
174
168
  if gnub_archive?
175
169
  taxon.uuid = r[@core_fields[:originalnameusageid]]
176
170
  taxon.uuid_path = r[@core_fields[:originalnameusageidpath]].
177
- split("|")
171
+ split("|")
178
172
  end
179
173
  taxon.id = r[@core_fields[:id]]
180
174
  taxon.current_name = r[@core_fields[:scientificname]]
181
175
  taxon.current_name_canonical = r[@core_fields[:canonicalname]]
182
176
  taxon.parent_id = parent_id? ? r[parent_id] : nil
183
177
  taxon.rank = r[@core_fields[:taxonrank]] if @core_fields[:taxonrank]
184
- if @core_fields[:taxonomicstatus]
185
- taxon.status = r[@core_fields[:taxonomicstatus]]
186
- end
178
+ taxon.status = r[@core_fields[:taxonomicstatus]] if @core_fields[:taxonomicstatus]
187
179
  taxon.source = r[@core_fields[:source]] if @core_fields[:source]
188
180
  taxon.local_id = r[@core_fields[:localid]] if @core_fields[:localid]
189
- if @core_fields[:globalid]
190
- taxon.global_id = r[@core_fields[:globalid]]
191
- end
181
+ taxon.global_id = r[@core_fields[:globalid]] if @core_fields[:globalid]
192
182
  taxon.linnean_classification_path =
193
183
  get_linnean_classification_path(r, taxon)
194
184
  add_name_string(taxon.current_name)
@@ -213,6 +203,7 @@ class DarwinCore
213
203
  @paths_num = 0
214
204
  @normalized_data.each do |_taxon_id, taxon|
215
205
  next unless taxon.classification_path_id.empty?
206
+
216
207
  res = get_classification_path(taxon)
217
208
  next if res == "error"
218
209
  end
@@ -220,6 +211,7 @@ class DarwinCore
220
211
 
221
212
  def get_classification_path(taxon)
222
213
  return unless taxon.classification_path_id.empty?
214
+
223
215
  @paths_num += 1
224
216
  if @paths_num % 10_000 == 0
225
217
  DarwinCore.logger_write(@dwc.object_id,
@@ -227,17 +219,13 @@ class DarwinCore
227
219
  end
228
220
  current_node = { taxon.id => {} }
229
221
  if DarwinCore.nil_field?(taxon.parent_id)
230
- if @with_canonical_names
231
- taxon.classification_path << taxon.current_name_canonical
232
- end
222
+ taxon.classification_path << taxon.current_name_canonical if @with_canonical_names
233
223
  taxon.classification_path_id << taxon.id
234
224
  @tree.merge!(current_node)
235
225
  else
236
226
  parent_cp = parent_cpid = nil
237
227
  if @normalized_data[taxon.parent_id]
238
- if @with_canonical_names
239
- parent_cp = @normalized_data[taxon.parent_id].classification_path
240
- end
228
+ parent_cp = @normalized_data[taxon.parent_id].classification_path if @with_canonical_names
241
229
  parent_cpid = @normalized_data[taxon.parent_id].
242
230
  classification_path_id
243
231
  else
@@ -247,9 +235,7 @@ class DarwinCore
247
235
  error: :deprecated_parent,
248
236
  current_parent: current_parent }
249
237
 
250
- if @with_canonical_names
251
- parent_cp = current_parent.classification_path
252
- end
238
+ parent_cp = current_parent.classification_path if @with_canonical_names
253
239
  parent_cpid = current_parent.classification_path_id
254
240
  else
255
241
  @error_names << { data: taxon,
@@ -258,6 +244,7 @@ class DarwinCore
258
244
  end
259
245
  end
260
246
  return "error" unless parent_cpid
247
+
261
248
  if parent_cpid.empty?
262
249
  res = "error"
263
250
  begin
@@ -268,6 +255,7 @@ class DarwinCore
268
255
  current_parent: nil }
269
256
  end
270
257
  return res if res == "error"
258
+
271
259
  if @with_canonical_names
272
260
  taxon.classification_path += @normalized_data[taxon.parent_id].
273
261
  classification_path +
@@ -295,7 +283,7 @@ class DarwinCore
295
283
  rescue NoMethodError => e
296
284
  DarwinCore.logger_write(@dwc.object_id,
297
285
  "Error '#{e.message}' taxon #{taxon.id}")
298
- return "error"
286
+ "error"
299
287
  end
300
288
  end
301
289
  end
@@ -381,8 +369,8 @@ class DarwinCore
381
369
 
382
370
  # Collect linnean classification path only on species level
383
371
  def get_linnean_classification_path(row, _taxon)
384
- [:kingdom, :phylum, :class, :order, :family, :genus,
385
- :subgenus].each_with_object([]) do |clade, res|
372
+ %i[kingdom phylum class order family genus
373
+ subgenus].each_with_object([]) do |clade, res|
386
374
  res << [row[@core_fields[clade]], clade] if @core_fields[clade]
387
375
  end
388
376
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Version constant of the class
4
4
  class DarwinCore
5
- VERSION = "1.1.0"
5
+ VERSION = "1.1.1"
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dwc-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-03 00:00:00.000000000 Z
11
+ date: 2020-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -253,14 +253,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
253
253
  requirements:
254
254
  - - ">="
255
255
  - !ruby/object:Gem::Version
256
- version: 2.6.6
256
+ version: 2.5.0
257
257
  required_rubygems_version: !ruby/object:Gem::Requirement
258
258
  requirements:
259
259
  - - ">="
260
260
  - !ruby/object:Gem::Version
261
261
  version: '0'
262
262
  requirements: []
263
- rubygems_version: 3.0.3
263
+ rubygems_version: 3.1.2
264
264
  signing_key:
265
265
  specification_version: 4
266
266
  summary: Handler of Darwin Core Archive files