gn_crossmap 2.3.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 017146ff7e76cd3dedfbf6af0960c043f05ea9d4
4
- data.tar.gz: e580a9493d729eb78bbf0d05598aa6c470b70053
3
+ metadata.gz: 143661fa671afbd77e8f666ef883df2e127356fa
4
+ data.tar.gz: a809d9b2472bb8577fffc9df78b72557f7ff3200
5
5
  SHA512:
6
- metadata.gz: b3ceff8590a56778deeeb226faef127b5ae32dc4728c818f2d575d1863d4a527e07278d3de6b6dd5cf59b75ad7d017a1e8032a3f7b7404923d62bc86f6e04866
7
- data.tar.gz: f8feb027e0c8333187e83912acedb44e4efc535f1841c612810365e593e7517df89bcd88a1054401dd2ea77489736c4e4300e891fe9cb7686fc1a7a648b8a522
6
+ metadata.gz: 6dbc394ee6ae8e673f455dba4f789bc0ccded1b5c68ca51e19d1549233effeac417b26f7faa6fcac8a7b7db06f4a4d26e7f0944b5f9553673f82a752452e9048
7
+ data.tar.gz: 6e4be322cbd94e014ee70c9f009fe28c2da4f1b9012cf5cdfa2c0c8b62772dd69ed41ae1c42e917d2648ba3852a3c819add1f2813f85800156dff93f0d7111c0
data/.rubocop.yml CHANGED
@@ -5,6 +5,8 @@ AllCops:
5
5
  - spec/**/*
6
6
  Include:
7
7
  - exe/crossmap
8
+ - "**/Gemfile"
9
+ - "**/Rakefile"
8
10
  Style/StringLiterals:
9
11
  EnforcedStyle: double_quotes
10
12
  Style/DotPosition:
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 3.0.0
4
+
5
+ * @dimus - allow lists without taxonID
6
+
3
7
  ## 2.3.1
4
8
 
5
9
  * @dimus - show resolver url in log
data/README.md CHANGED
@@ -56,7 +56,8 @@ crossmap -i my_list.csv -o my_list_if.csv -d 5
56
56
  # to use standard intput and/or output
57
57
  cat my_list.csv | crossmap -i - -o - > output
58
58
 
59
- # to keep only taxonID from original input
59
+ # to keep only taxonID (if given) from original input
60
+ # no original fields will be kept without taxonID
60
61
  cat my_list.csv | crossmap -i my_list.csv -s
61
62
  ```
62
63
 
@@ -85,8 +86,9 @@ designates `STDIN`
85
86
  : (integer) id of a data source from [GN resolver][resolver]
86
87
 
87
88
  ``skip_original``
88
- : (boolean) if true only `taxonID` is preserved from original data. Otherwise
89
- all original data is preserved
89
+ : (boolean) if true only `taxonID` (if given) is preserved
90
+ from original data. Otherwise all original data is preserved. If there is no
91
+ ``taxonID``, no original data will be preserved.
90
92
 
91
93
  ``alt_headers``
92
94
  : (array) empty array by default. If `alt_headers` are not empty they are used
@@ -184,7 +186,14 @@ Match types dictionary can be accessed with `GnCrossmap::MATCH_TYPES` constant
184
186
  `subspecies` `variety` `form scientificNameAuthorship` `scientificName`
185
187
  `taxonRank`
186
188
 
187
- #### Simple Example
189
+ #### simplest Example -- only scientificName
190
+
191
+ | scientificName |
192
+ |---------------------------------------------------------|
193
+ | Animalia |
194
+ | Macrobiotus echinogenitus subsp. areolatus Murray, 1907 |
195
+
196
+ #### taxonID and scientificName Example
188
197
 
189
198
  taxonID;scientificName
190
199
  1;Macrobiotus echinogenitus subsp. areolatus Murray, 1907
data/Rakefile CHANGED
@@ -8,4 +8,4 @@ end
8
8
 
9
9
  RuboCop::RakeTask.new
10
10
 
11
- task default: [:rubocop, :rspec]
11
+ task default: %i(rubocop rspec)
data/gn_crossmap.gemspec CHANGED
@@ -1,8 +1,12 @@
1
1
  # coding: utf-8
2
+
2
3
  lib = File.expand_path("../lib", __FILE__)
4
+
3
5
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
6
  require "gn_crossmap/version"
5
7
 
8
+ # rubocop:disable Metrics/BlockLength:
9
+
6
10
  Gem::Specification.new do |gem|
7
11
  gem.required_ruby_version = ">= 2.1"
8
12
  gem.name = "gn_crossmap"
@@ -12,7 +16,7 @@ Gem::Specification.new do |gem|
12
16
  gem.email = ["dmozzherin@gmail.com"]
13
17
 
14
18
  gem.summary = "Crossmaps a list of scientific names to names from " \
15
- "a data source in GN Index"
19
+ "a data source in GN Index"
16
20
  gem.description = "Gem uses a checklist in a comma-separated format as " \
17
21
  "an input, and returns back a new comma-separated " \
18
22
  "list crossmapping the scientific names to one of the " \
@@ -30,11 +34,13 @@ Gem::Specification.new do |gem|
30
34
  gem.add_dependency "biodiversity", "~> 3.1"
31
35
  gem.add_dependency "rest-client", "~> 2.0"
32
36
  gem.add_dependency "logger-colors", "~> 1.0"
37
+ gem.add_dependency "gn_uuid", "~> 0.5"
33
38
 
34
39
  gem.add_development_dependency "bundler", "~> 1.7"
35
40
  gem.add_development_dependency "rake", "~> 11.0"
36
41
  gem.add_development_dependency "rspec", "~> 3.2"
37
42
  gem.add_development_dependency "rubocop", "~> 0.31"
38
43
  gem.add_development_dependency "coveralls", "~> 0.8"
39
- gem.add_development_dependency "gn_uuid", "~> 0.5"
40
44
  end
45
+
46
+ # rubocop:enable Metrics/BlockLength:
data/lib/gn_crossmap.rb CHANGED
@@ -5,6 +5,7 @@ require "tempfile"
5
5
  require "logger"
6
6
  require "logger/colors"
7
7
  require "biodiversity"
8
+ require "gn_uuid"
8
9
  require "gn_crossmap/errors"
9
10
  require "gn_crossmap/version"
10
11
  require "gn_crossmap/reader"
@@ -57,6 +58,10 @@ module GnCrossmap
57
58
  logger.info(message)
58
59
  end
59
60
 
61
+ def find_id(row, name)
62
+ row.key?(:taxonid) ? row[:taxonid].strip : GnUUID.uuid(name)
63
+ end
64
+
60
65
  private
61
66
 
62
67
  def create_resolver(writer, opts)
@@ -21,13 +21,6 @@ module GnCrossmap
21
21
  def init_fields_collector
22
22
  @fields = @row.map { |f| prepare_field(f) }
23
23
  @collector = collector_factory
24
- err = "taxonID must be present in the csv header"
25
- raise GnCrossmapError, err unless taxon_id?
26
- end
27
-
28
- def taxon_id?
29
- @taxon_id_index = @fields.index(:taxonid)
30
- !@taxon_id_index.nil?
31
24
  end
32
25
 
33
26
  def prepare_field(field)
@@ -16,12 +16,12 @@ module GnCrossmap
16
16
 
17
17
  def id_name_rank(row)
18
18
  @row = row
19
- id = @row[:taxonid]
20
- return nil if id.to_s.strip == ""
21
19
  rank = find_rank
22
20
  return nil unless rank
23
21
  name = assemble_name(rank)
24
22
  return nil unless name
23
+ id = GnCrossmap.find_id(@row, name)
24
+ return nil if id.strip.to_s == ""
25
25
  { id: id, name: name, rank: rank.to_s }
26
26
  end
27
27
 
@@ -8,8 +8,8 @@ module GnCrossmap
8
8
 
9
9
  def id_name_rank(row)
10
10
  @row = row
11
- id = @row[:taxonid]
12
11
  name = find_name
12
+ id = GnCrossmap.find_id(@row, name)
13
13
  rank = @row[:taxonrank]
14
14
  rank = parse_rank if rank.nil?
15
15
  id && name ? { id: id, name: name, rank: rank } : nil
@@ -1,6 +1,6 @@
1
1
  # Namespace module for crossmapping checklists to GN sources
2
2
  module GnCrossmap
3
- VERSION = "2.3.1".freeze
3
+ VERSION = "3.0.0".freeze
4
4
 
5
5
  def self.version
6
6
  VERSION
@@ -22,10 +22,10 @@ module GnCrossmap
22
22
  private
23
23
 
24
24
  def output_fields(original_fields)
25
- original_fields + [:matchedType, :inputName, :matchedName,
26
- :matchedCanonicalForm, :inputRank, :matchedRank,
27
- :synonymStatus, :acceptedName, :matchedEditDistance,
28
- :matchedScore, :matchTaxonID]
25
+ original_fields + %i(matchedType inputName matchedName
26
+ matchedCanonicalForm inputRank matchedRank
27
+ synonymStatus acceptedName matchedEditDistance
28
+ matchedScore matchTaxonID)
29
29
  end
30
30
  end
31
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.1
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-03-07 00:00:00.000000000 Z
11
+ date: 2017-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: trollop
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: gn_uuid
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.5'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.5'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: bundler
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -136,20 +150,6 @@ dependencies:
136
150
  - - "~>"
137
151
  - !ruby/object:Gem::Version
138
152
  version: '0.8'
139
- - !ruby/object:Gem::Dependency
140
- name: gn_uuid
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - "~>"
144
- - !ruby/object:Gem::Version
145
- version: '0.5'
146
- type: :development
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - "~>"
151
- - !ruby/object:Gem::Version
152
- version: '0.5'
153
153
  description: Gem uses a checklist in a comma-separated format as an input, and returns
154
154
  back a new comma-separated list crossmapping the scientific names to one of the
155
155
  data sources from http://resolver.globalnames.org