gn_crossmap 3.0.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe8f372dc41915a9e7b577a5ec01453be24b5edb
4
- data.tar.gz: a260cec5476a37645aaa68443204aa262ab4f3f4
3
+ metadata.gz: 540a1ea4dd47918c8d98c6de7e38f0f6cc116798
4
+ data.tar.gz: eca75610cb8974aced0dbed71a51c38c865c4a27
5
5
  SHA512:
6
- metadata.gz: a86e7b8931b712bcb01d85c50e0ef41512e447bfec3884ce21b9193a23cbfa4c002032f29aafe6839f2f2c306e88f8f51a29458317629eec2b090f18f71876df
7
- data.tar.gz: 4067fade993942853c1a80c58411f250798fe8aad17d299edf8a25c301cd4d794ff56e845cac74c25facf8928ade73f68bfa61fd73cd19537bb9991012b995bb
6
+ metadata.gz: 4f008fac59680bac5c8160f8a5e3f8caf22b801b67be088145b0ef1d8ed3657fa50f72d36960efac54410f11f2853cdd6354a1bd2bc7535d87e30d84558ad45e
7
+ data.tar.gz: ba229e5d5f0d954e9884a7bb31ace418eeb3c5131e77ebc70df945706d66990a042a8e73865ca925da5c16eb4dfd1ca021aba82a9e2ebed35447ce159990ff00
@@ -3,6 +3,7 @@ AllCops:
3
3
  - db/**/*
4
4
  - bundle_bin/**/*
5
5
  - spec/**/*
6
+ - bin/**/*
6
7
  Include:
7
8
  - exe/crossmap
8
9
  - "**/Gemfile"
@@ -0,0 +1 @@
1
+ 2.4.1
@@ -1,7 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1
4
- - 2.2
3
+ - 2.4
5
4
  script:
6
5
  - bundle exec rake
7
6
  branches:
@@ -1,5 +1,11 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 3.1.0
4
+
5
+ * @dimus - Fixes #34 add canonical form input
6
+
7
+ * @dimus - Fixes #35 optionally returns classification path
8
+
3
9
  ## 3.0.3
4
10
 
5
11
  * @dimus - Fixes #33 infraspecies rank is given for all 'unknown' infra-specific
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  # Specify your gem's dependencies in gn_crossmap.gemspec
data/README.md CHANGED
@@ -59,6 +59,9 @@ cat my_list.csv | crossmap -i - -o - > output
59
59
  # to keep only taxonID (if given) from original input
60
60
  # no original fields will be kept without taxonID
61
61
  cat my_list.csv | crossmap -i my_list.csv -s
62
+
63
+ # to show classification from the source
64
+ cat my_list.csv | crossmap -i my_list.csv -w
62
65
  ```
63
66
 
64
67
  ### Usage as Ruby Library (API description)
@@ -97,6 +100,9 @@ instead of the headers supplied with the file
97
100
  ``resolver_url``
98
101
  : URL to globalnames' resolver. Default is ``http://resolver.globalnames.org``
99
102
 
103
+ ``with_classification``
104
+ : (boolean) if true, adds classification path to the output
105
+
100
106
  #### `GnCrossmap.logger=`
101
107
 
102
108
  Allows to set logger to a custom logger (default is `STDERR`)
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rspec/core/rake_task"
3
5
  require "rubocop/rake_task"
@@ -1,11 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require "trollop"
3
5
  require "gn_crossmap"
4
6
 
5
- puts "This program requires Ruby >= v. 2.1.0" if RUBY_VERSION < "2.1.0"
7
+ puts "This program requires Ruby >= v. 2.4.1" if RUBY_VERSION < "2.4.1"
6
8
 
7
9
  CATALOGUE_OF_LIFE = 1
8
- OUTPUT = "output.csv".freeze
10
+ OUTPUT = "output.csv"
9
11
  opts = Trollop.options do
10
12
  banner "Compares a list of scientific names to scientific names from a " \
11
13
  "data source from Global Names Resolver\n\n " \
@@ -18,6 +20,8 @@ opts = Trollop.options do
18
20
  default: CATALOGUE_OF_LIFE)
19
21
  opt(:skip_original, "If given, only 'taxonID' is shown " \
20
22
  "from the original input", type: :boolean)
23
+ opt(:with_classification, "If given, returns classification path of " \
24
+ "matched names", type: :boolean)
21
25
  end
22
26
 
23
27
  Trollop.die :input, "must be set" if opts[:input].nil?
@@ -1,4 +1,4 @@
1
- # coding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  lib = File.expand_path("../lib", __FILE__)
4
4
 
@@ -30,18 +30,18 @@ Gem::Specification.new do |gem|
30
30
  gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
31
  gem.require_paths = ["lib"]
32
32
 
33
- gem.add_dependency "trollop", "~> 2.1"
34
33
  gem.add_dependency "biodiversity", "~> 3.1"
35
- gem.add_dependency "rest-client", "~> 2.0"
36
- gem.add_dependency "logger-colors", "~> 1.0"
37
34
  gem.add_dependency "gn_uuid", "~> 0.5"
35
+ gem.add_dependency "logger-colors", "~> 1.0"
36
+ gem.add_dependency "rest-client", "~> 2.0"
37
+ gem.add_dependency "trollop", "~> 2.1"
38
38
 
39
39
  gem.add_development_dependency "bundler", "~> 1.7"
40
- gem.add_development_dependency "rake", "~> 11.0"
41
- gem.add_development_dependency "rspec", "~> 3.2"
42
- gem.add_development_dependency "rubocop", "~> 0.31"
43
- gem.add_development_dependency "coveralls", "~> 0.8"
44
40
  gem.add_development_dependency "byebug", "~> 9.0"
41
+ gem.add_development_dependency "coveralls", "~> 0.8"
42
+ gem.add_development_dependency "rake", "~> 12.0"
43
+ gem.add_development_dependency "rspec", "~> 3.2"
44
+ gem.add_development_dependency "rubocop", "~> 0.49"
45
45
  end
46
46
 
47
47
  # rubocop:enable Metrics/BlockLength:
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "csv"
2
4
  require "ostruct"
3
5
  require "rest_client"
@@ -19,8 +21,8 @@ require "gn_crossmap/stats"
19
21
 
20
22
  # Namespace module for crossmapping checklists wth GN sources
21
23
  module GnCrossmap
22
- INPUT_MODE = "r:utf-8".freeze
23
- OUTPUT_MODE = "w:utf-8".freeze
24
+ INPUT_MODE = "r:utf-8"
25
+ OUTPUT_MODE = "w:utf-8"
24
26
  MATCH_TYPES = {
25
27
  0 => "No match",
26
28
  1 => "Exact string match",
@@ -65,11 +67,13 @@ module GnCrossmap
65
67
  private
66
68
 
67
69
  def create_resolver(writer, opts)
68
- Resolver.new(writer, opts.data_source_id, opts.resolver_url, opts.stats)
70
+ Resolver.new(writer, opts.data_source_id, opts.resolver_url,
71
+ opts.stats, opts.with_classification)
69
72
  end
70
73
 
71
74
  def create_writer(reader, output_io, opts)
72
- Writer.new(output_io, reader.original_fields, output_name(opts.output))
75
+ Writer.new(output_io, reader.original_fields,
76
+ output_name(opts.output), opts.with_classification)
73
77
  end
74
78
 
75
79
  def create_reader(input_io, opts)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Assemble data from CSV reader by checking column fields
3
5
  class Collector
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Assemble data from CSV reader by checking column fields
3
5
  class ColumnCollector
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Error to raise in case of problems
2
4
  class GnCrossmapError < RuntimeError; end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Reads supplied csv file and creates ruby structure to compare
3
5
  # with a Global Names Resolver source
@@ -99,7 +101,7 @@ module GnCrossmap
99
101
 
100
102
  def taxon_id_header(hdrs)
101
103
  hdrs.each do |h|
102
- return [h] if h =~ /taxonid\s*$/i
104
+ return [h] if h && h.match?(/taxonid\s*$/i)
103
105
  end
104
106
  []
105
107
  end
@@ -1,12 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Sends data to GN Resolver and collects results
3
5
  class Resolver
4
6
  attr_reader :stats
5
7
 
6
- def initialize(writer, data_source_id, resolver_url, stats)
8
+ def initialize(writer, data_source_id,
9
+ resolver_url, stats, with_classification = false)
7
10
  @stats = stats
8
11
  @resolver_url = resolver_url
9
- @processor = GnCrossmap::ResultProcessor.new(writer, @stats)
12
+ @processor = GnCrossmap::ResultProcessor.
13
+ new(writer, @stats, with_classification)
10
14
  @ds_id = data_source_id
11
15
  @count = 0
12
16
  @current_data = {}
@@ -57,12 +61,12 @@ module GnCrossmap
57
61
 
58
62
  def collect_names(slice)
59
63
  @current_data = {}
60
- slice.each_with_object("") do |row, str|
64
+ slice.each_with_object([]) do |row, str|
61
65
  id = row[:id].strip
62
66
  @current_data[id] = row[:original]
63
67
  @processor.input[id] = { rank: row[:rank] }
64
- str << "#{id}|#{row[:name]}\n"
65
- end
68
+ str << "#{id}|#{row[:name]}"
69
+ end.join("\n")
66
70
  end
67
71
 
68
72
  def remote_resolve(names)
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Processes data received from the GN Resolver
3
5
  class ResultProcessor
4
6
  attr_reader :input, :writer
5
7
 
6
- def initialize(writer, stats)
8
+ def initialize(writer, stats, with_classification = false)
9
+ @with_classification = with_classification
10
+ @parser = ScientificNameParser.new
7
11
  @stats = stats
8
12
  @writer = writer
9
13
  @input = {}
@@ -28,8 +32,8 @@ module GnCrossmap
28
32
  @stats.stats[:resolved_records] += 1
29
33
  res = @original_data[datum[:supplied_id]]
30
34
  res += [GnCrossmap::MATCH_TYPES[0], datum[:supplied_name_string], nil,
31
- nil, @input[datum[:supplied_id]][:rank], nil,
32
- nil, nil, nil]
35
+ datum[:supplied_canonical_form], nil,
36
+ @input[datum[:supplied_id]][:rank], nil, nil, nil, nil]
33
37
  @writer.write(res)
34
38
  end
35
39
 
@@ -50,13 +54,28 @@ module GnCrossmap
50
54
  @original_data[datum[:supplied_id]] + new_data(datum, result)
51
55
  end
52
56
 
57
+ # rubocop:disable Metrics/AbcSize
58
+
53
59
  def new_data(datum, result)
54
60
  synonym = result[:current_name_string] ? "synonym" : nil
55
- [matched_type(result), datum[:supplied_name_string],
56
- result[:name_string], result[:canonical_form],
57
- @input[datum[:supplied_id]][:rank], matched_rank(result),
58
- synonym, result[:current_name_string] || result[:name_string],
59
- result[:edit_distance], result[:score], result[:taxon_id]]
61
+ res = [matched_type(result), datum[:supplied_name_string],
62
+ result[:name_string], canonical(datum[:supplied_name_string]),
63
+ result[:canonical_form], @input[datum[:supplied_id]][:rank],
64
+ matched_rank(result), synonym,
65
+ result[:current_name_string] || result[:name_string],
66
+ result[:edit_distance], result[:score], result[:taxon_id]]
67
+ res << classification(result) if @with_classification
68
+ res
69
+ end
70
+
71
+ # rubocop:enable all
72
+
73
+ def canonical(name_string)
74
+ parsed = @parser.parse(name_string)[:scientificName]
75
+ parsed[:canonical].nil? || parsed[:hybrid] ? nil : parsed[:canonical]
76
+ rescue StandardError
77
+ @parser = ScientificNameParser.new
78
+ nil
60
79
  end
61
80
 
62
81
  def matched_rank(record)
@@ -66,5 +85,19 @@ module GnCrossmap
66
85
  def matched_type(record)
67
86
  GnCrossmap::MATCH_TYPES[record[:match_type]]
68
87
  end
88
+
89
+ # rubocop:disable Metrics/AbcSize
90
+
91
+ def classification(result)
92
+ return nil if result[:classification_path].to_s.strip == ""
93
+ path = result[:classification_path].split("|")
94
+ ranks = result[:classification_path_ranks].split("|")
95
+ if path.size == ranks.size
96
+ path = path.zip(ranks).map { |e| "#{e[0]}(#{e[1]})" }
97
+ end
98
+ path.join(", ")
99
+ end
100
+
101
+ # rubocop:enable all
69
102
  end
70
103
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Assemble data from CSV reader by parsing scientificName field
3
5
  class SciNameCollector
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Namespace module for crossmapping checklists to GN sources
2
4
  module GnCrossmap
3
- VERSION = "3.0.3".freeze
5
+ VERSION = "3.1.0"
4
6
 
5
7
  def self.version
6
8
  VERSION
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Saves output from GN Resolver to disk
3
5
  class Writer
4
- def initialize(output_io, original_fields, output_name)
6
+ def initialize(output_io, original_fields, output_name,
7
+ with_classification = false)
5
8
  @output_io = output_io
6
9
  @output_fields = output_fields(original_fields)
10
+ @output_fields << :classification if with_classification
7
11
  @output = CSV.new(@output_io, col_sep: "\t")
8
12
  @output << @output_fields
9
13
  @output_name = output_name
@@ -22,7 +26,7 @@ module GnCrossmap
22
26
  private
23
27
 
24
28
  def output_fields(original_fields)
25
- original_fields + %i[matchedType inputName matchedName
29
+ original_fields + %i[matchedType inputName matchedName inputCanonicalForm
26
30
  matchedCanonicalForm inputRank matchedRank
27
31
  synonymStatus acceptedName matchedEditDistance
28
32
  matchedScore matchTaxonID]
metadata CHANGED
@@ -1,85 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.3
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-26 00:00:00.000000000 Z
11
+ date: 2017-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: trollop
14
+ name: biodiversity
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.1'
19
+ version: '3.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '3.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: biodiversity
28
+ name: gn_uuid
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '3.1'
33
+ version: '0.5'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '3.1'
40
+ version: '0.5'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rest-client
42
+ name: logger-colors
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '2.0'
47
+ version: '1.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '2.0'
54
+ version: '1.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: logger-colors
56
+ name: rest-client
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.0'
61
+ version: '2.0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.0'
68
+ version: '2.0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: gn_uuid
70
+ name: trollop
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0.5'
75
+ version: '2.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0.5'
82
+ version: '2.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: bundler
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -95,75 +95,75 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.7'
97
97
  - !ruby/object:Gem::Dependency
98
- name: rake
98
+ name: byebug
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '11.0'
103
+ version: '9.0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '11.0'
110
+ version: '9.0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: rspec
112
+ name: coveralls
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '3.2'
117
+ version: '0.8'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '3.2'
124
+ version: '0.8'
125
125
  - !ruby/object:Gem::Dependency
126
- name: rubocop
126
+ name: rake
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.31'
131
+ version: '12.0'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.31'
138
+ version: '12.0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: coveralls
140
+ name: rspec
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.8'
145
+ version: '3.2'
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.8'
152
+ version: '3.2'
153
153
  - !ruby/object:Gem::Dependency
154
- name: byebug
154
+ name: rubocop
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - "~>"
158
158
  - !ruby/object:Gem::Version
159
- version: '9.0'
159
+ version: '0.49'
160
160
  type: :development
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
- version: '9.0'
166
+ version: '0.49'
167
167
  description: Gem uses a checklist in a comma-separated format as an input, and returns
168
168
  back a new comma-separated list crossmapping the scientific names to one of the
169
169
  data sources from http://resolver.globalnames.org
@@ -177,6 +177,7 @@ files:
177
177
  - ".gitignore"
178
178
  - ".rspec"
179
179
  - ".rubocop.yml"
180
+ - ".ruby-version"
180
181
  - ".travis.yml"
181
182
  - CHANGELOG.md
182
183
  - Gemfile