gn_crossmap 3.0.3 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe8f372dc41915a9e7b577a5ec01453be24b5edb
4
- data.tar.gz: a260cec5476a37645aaa68443204aa262ab4f3f4
3
+ metadata.gz: 540a1ea4dd47918c8d98c6de7e38f0f6cc116798
4
+ data.tar.gz: eca75610cb8974aced0dbed71a51c38c865c4a27
5
5
  SHA512:
6
- metadata.gz: a86e7b8931b712bcb01d85c50e0ef41512e447bfec3884ce21b9193a23cbfa4c002032f29aafe6839f2f2c306e88f8f51a29458317629eec2b090f18f71876df
7
- data.tar.gz: 4067fade993942853c1a80c58411f250798fe8aad17d299edf8a25c301cd4d794ff56e845cac74c25facf8928ade73f68bfa61fd73cd19537bb9991012b995bb
6
+ metadata.gz: 4f008fac59680bac5c8160f8a5e3f8caf22b801b67be088145b0ef1d8ed3657fa50f72d36960efac54410f11f2853cdd6354a1bd2bc7535d87e30d84558ad45e
7
+ data.tar.gz: ba229e5d5f0d954e9884a7bb31ace418eeb3c5131e77ebc70df945706d66990a042a8e73865ca925da5c16eb4dfd1ca021aba82a9e2ebed35447ce159990ff00
@@ -3,6 +3,7 @@ AllCops:
3
3
  - db/**/*
4
4
  - bundle_bin/**/*
5
5
  - spec/**/*
6
+ - bin/**/*
6
7
  Include:
7
8
  - exe/crossmap
8
9
  - "**/Gemfile"
@@ -0,0 +1 @@
1
+ 2.4.1
@@ -1,7 +1,6 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.1
4
- - 2.2
3
+ - 2.4
5
4
  script:
6
5
  - bundle exec rake
7
6
  branches:
@@ -1,5 +1,11 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 3.1.0
4
+
5
+ * @dimus - Fixes #34 add canonical form input
6
+
7
+ * @dimus - Fixes #35 optionally returns classification path
8
+
3
9
  ## 3.0.3
4
10
 
5
11
  * @dimus - Fixes #33 infraspecies rank is given for all 'unknown' infra-specific
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source "https://rubygems.org"
2
4
 
3
5
  # Specify your gem's dependencies in gn_crossmap.gemspec
data/README.md CHANGED
@@ -59,6 +59,9 @@ cat my_list.csv | crossmap -i - -o - > output
59
59
  # to keep only taxonID (if given) from original input
60
60
  # no original fields will be kept without taxonID
61
61
  cat my_list.csv | crossmap -i my_list.csv -s
62
+
63
+ # to show classification from the source
64
+ cat my_list.csv | crossmap -i my_list.csv -w
62
65
  ```
63
66
 
64
67
  ### Usage as Ruby Library (API description)
@@ -97,6 +100,9 @@ instead of the headers supplied with the file
97
100
  ``resolver_url``
98
101
  : URL to globalnames' resolver. Default is ``http://resolver.globalnames.org``
99
102
 
103
+ ``with_classification``
104
+ : (boolean) if true, adds classification path to the output
105
+
100
106
  #### `GnCrossmap.logger=`
101
107
 
102
108
  Allows to set logger to a custom logger (default is `STDERR`)
data/Rakefile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rspec/core/rake_task"
3
5
  require "rubocop/rake_task"
@@ -1,11 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  require "trollop"
3
5
  require "gn_crossmap"
4
6
 
5
- puts "This program requires Ruby >= v. 2.1.0" if RUBY_VERSION < "2.1.0"
7
+ puts "This program requires Ruby >= v. 2.4.1" if RUBY_VERSION < "2.4.1"
6
8
 
7
9
  CATALOGUE_OF_LIFE = 1
8
- OUTPUT = "output.csv".freeze
10
+ OUTPUT = "output.csv"
9
11
  opts = Trollop.options do
10
12
  banner "Compares a list of scientific names to scientific names from a " \
11
13
  "data source from Global Names Resolver\n\n " \
@@ -18,6 +20,8 @@ opts = Trollop.options do
18
20
  default: CATALOGUE_OF_LIFE)
19
21
  opt(:skip_original, "If given, only 'taxonID' is shown " \
20
22
  "from the original input", type: :boolean)
23
+ opt(:with_classification, "If given, returns classification path of " \
24
+ "matched names", type: :boolean)
21
25
  end
22
26
 
23
27
  Trollop.die :input, "must be set" if opts[:input].nil?
@@ -1,4 +1,4 @@
1
- # coding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  lib = File.expand_path("../lib", __FILE__)
4
4
 
@@ -30,18 +30,18 @@ Gem::Specification.new do |gem|
30
30
  gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
31
  gem.require_paths = ["lib"]
32
32
 
33
- gem.add_dependency "trollop", "~> 2.1"
34
33
  gem.add_dependency "biodiversity", "~> 3.1"
35
- gem.add_dependency "rest-client", "~> 2.0"
36
- gem.add_dependency "logger-colors", "~> 1.0"
37
34
  gem.add_dependency "gn_uuid", "~> 0.5"
35
+ gem.add_dependency "logger-colors", "~> 1.0"
36
+ gem.add_dependency "rest-client", "~> 2.0"
37
+ gem.add_dependency "trollop", "~> 2.1"
38
38
 
39
39
  gem.add_development_dependency "bundler", "~> 1.7"
40
- gem.add_development_dependency "rake", "~> 11.0"
41
- gem.add_development_dependency "rspec", "~> 3.2"
42
- gem.add_development_dependency "rubocop", "~> 0.31"
43
- gem.add_development_dependency "coveralls", "~> 0.8"
44
40
  gem.add_development_dependency "byebug", "~> 9.0"
41
+ gem.add_development_dependency "coveralls", "~> 0.8"
42
+ gem.add_development_dependency "rake", "~> 12.0"
43
+ gem.add_development_dependency "rspec", "~> 3.2"
44
+ gem.add_development_dependency "rubocop", "~> 0.49"
45
45
  end
46
46
 
47
47
  # rubocop:enable Metrics/BlockLength:
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "csv"
2
4
  require "ostruct"
3
5
  require "rest_client"
@@ -19,8 +21,8 @@ require "gn_crossmap/stats"
19
21
 
20
22
  # Namespace module for crossmapping checklists wth GN sources
21
23
  module GnCrossmap
22
- INPUT_MODE = "r:utf-8".freeze
23
- OUTPUT_MODE = "w:utf-8".freeze
24
+ INPUT_MODE = "r:utf-8"
25
+ OUTPUT_MODE = "w:utf-8"
24
26
  MATCH_TYPES = {
25
27
  0 => "No match",
26
28
  1 => "Exact string match",
@@ -65,11 +67,13 @@ module GnCrossmap
65
67
  private
66
68
 
67
69
  def create_resolver(writer, opts)
68
- Resolver.new(writer, opts.data_source_id, opts.resolver_url, opts.stats)
70
+ Resolver.new(writer, opts.data_source_id, opts.resolver_url,
71
+ opts.stats, opts.with_classification)
69
72
  end
70
73
 
71
74
  def create_writer(reader, output_io, opts)
72
- Writer.new(output_io, reader.original_fields, output_name(opts.output))
75
+ Writer.new(output_io, reader.original_fields,
76
+ output_name(opts.output), opts.with_classification)
73
77
  end
74
78
 
75
79
  def create_reader(input_io, opts)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Assemble data from CSV reader by checking column fields
3
5
  class Collector
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Assemble data from CSV reader by checking column fields
3
5
  class ColumnCollector
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Error to raise in case of problems
2
4
  class GnCrossmapError < RuntimeError; end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Reads supplied csv file and creates ruby structure to compare
3
5
  # with a Global Names Resolver source
@@ -99,7 +101,7 @@ module GnCrossmap
99
101
 
100
102
  def taxon_id_header(hdrs)
101
103
  hdrs.each do |h|
102
- return [h] if h =~ /taxonid\s*$/i
104
+ return [h] if h && h.match?(/taxonid\s*$/i)
103
105
  end
104
106
  []
105
107
  end
@@ -1,12 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Sends data to GN Resolver and collects results
3
5
  class Resolver
4
6
  attr_reader :stats
5
7
 
6
- def initialize(writer, data_source_id, resolver_url, stats)
8
+ def initialize(writer, data_source_id,
9
+ resolver_url, stats, with_classification = false)
7
10
  @stats = stats
8
11
  @resolver_url = resolver_url
9
- @processor = GnCrossmap::ResultProcessor.new(writer, @stats)
12
+ @processor = GnCrossmap::ResultProcessor.
13
+ new(writer, @stats, with_classification)
10
14
  @ds_id = data_source_id
11
15
  @count = 0
12
16
  @current_data = {}
@@ -57,12 +61,12 @@ module GnCrossmap
57
61
 
58
62
  def collect_names(slice)
59
63
  @current_data = {}
60
- slice.each_with_object("") do |row, str|
64
+ slice.each_with_object([]) do |row, str|
61
65
  id = row[:id].strip
62
66
  @current_data[id] = row[:original]
63
67
  @processor.input[id] = { rank: row[:rank] }
64
- str << "#{id}|#{row[:name]}\n"
65
- end
68
+ str << "#{id}|#{row[:name]}"
69
+ end.join("\n")
66
70
  end
67
71
 
68
72
  def remote_resolve(names)
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Processes data received from the GN Resolver
3
5
  class ResultProcessor
4
6
  attr_reader :input, :writer
5
7
 
6
- def initialize(writer, stats)
8
+ def initialize(writer, stats, with_classification = false)
9
+ @with_classification = with_classification
10
+ @parser = ScientificNameParser.new
7
11
  @stats = stats
8
12
  @writer = writer
9
13
  @input = {}
@@ -28,8 +32,8 @@ module GnCrossmap
28
32
  @stats.stats[:resolved_records] += 1
29
33
  res = @original_data[datum[:supplied_id]]
30
34
  res += [GnCrossmap::MATCH_TYPES[0], datum[:supplied_name_string], nil,
31
- nil, @input[datum[:supplied_id]][:rank], nil,
32
- nil, nil, nil]
35
+ datum[:supplied_canonical_form], nil,
36
+ @input[datum[:supplied_id]][:rank], nil, nil, nil, nil]
33
37
  @writer.write(res)
34
38
  end
35
39
 
@@ -50,13 +54,28 @@ module GnCrossmap
50
54
  @original_data[datum[:supplied_id]] + new_data(datum, result)
51
55
  end
52
56
 
57
+ # rubocop:disable Metrics/AbcSize
58
+
53
59
  def new_data(datum, result)
54
60
  synonym = result[:current_name_string] ? "synonym" : nil
55
- [matched_type(result), datum[:supplied_name_string],
56
- result[:name_string], result[:canonical_form],
57
- @input[datum[:supplied_id]][:rank], matched_rank(result),
58
- synonym, result[:current_name_string] || result[:name_string],
59
- result[:edit_distance], result[:score], result[:taxon_id]]
61
+ res = [matched_type(result), datum[:supplied_name_string],
62
+ result[:name_string], canonical(datum[:supplied_name_string]),
63
+ result[:canonical_form], @input[datum[:supplied_id]][:rank],
64
+ matched_rank(result), synonym,
65
+ result[:current_name_string] || result[:name_string],
66
+ result[:edit_distance], result[:score], result[:taxon_id]]
67
+ res << classification(result) if @with_classification
68
+ res
69
+ end
70
+
71
+ # rubocop:enable all
72
+
73
+ def canonical(name_string)
74
+ parsed = @parser.parse(name_string)[:scientificName]
75
+ parsed[:canonical].nil? || parsed[:hybrid] ? nil : parsed[:canonical]
76
+ rescue StandardError
77
+ @parser = ScientificNameParser.new
78
+ nil
60
79
  end
61
80
 
62
81
  def matched_rank(record)
@@ -66,5 +85,19 @@ module GnCrossmap
66
85
  def matched_type(record)
67
86
  GnCrossmap::MATCH_TYPES[record[:match_type]]
68
87
  end
88
+
89
+ # rubocop:disable Metrics/AbcSize
90
+
91
+ def classification(result)
92
+ return nil if result[:classification_path].to_s.strip == ""
93
+ path = result[:classification_path].split("|")
94
+ ranks = result[:classification_path_ranks].split("|")
95
+ if path.size == ranks.size
96
+ path = path.zip(ranks).map { |e| "#{e[0]}(#{e[1]})" }
97
+ end
98
+ path.join(", ")
99
+ end
100
+
101
+ # rubocop:enable all
69
102
  end
70
103
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Assemble data from CSV reader by parsing scientificName field
3
5
  class SciNameCollector
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Namespace module for crossmapping checklists to GN sources
2
4
  module GnCrossmap
3
- VERSION = "3.0.3".freeze
5
+ VERSION = "3.1.0"
4
6
 
5
7
  def self.version
6
8
  VERSION
@@ -1,9 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module GnCrossmap
2
4
  # Saves output from GN Resolver to disk
3
5
  class Writer
4
- def initialize(output_io, original_fields, output_name)
6
+ def initialize(output_io, original_fields, output_name,
7
+ with_classification = false)
5
8
  @output_io = output_io
6
9
  @output_fields = output_fields(original_fields)
10
+ @output_fields << :classification if with_classification
7
11
  @output = CSV.new(@output_io, col_sep: "\t")
8
12
  @output << @output_fields
9
13
  @output_name = output_name
@@ -22,7 +26,7 @@ module GnCrossmap
22
26
  private
23
27
 
24
28
  def output_fields(original_fields)
25
- original_fields + %i[matchedType inputName matchedName
29
+ original_fields + %i[matchedType inputName matchedName inputCanonicalForm
26
30
  matchedCanonicalForm inputRank matchedRank
27
31
  synonymStatus acceptedName matchedEditDistance
28
32
  matchedScore matchTaxonID]
metadata CHANGED
@@ -1,85 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.3
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-26 00:00:00.000000000 Z
11
+ date: 2017-08-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: trollop
14
+ name: biodiversity
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.1'
19
+ version: '3.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '3.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: biodiversity
28
+ name: gn_uuid
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '3.1'
33
+ version: '0.5'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '3.1'
40
+ version: '0.5'
41
41
  - !ruby/object:Gem::Dependency
42
- name: rest-client
42
+ name: logger-colors
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '2.0'
47
+ version: '1.0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '2.0'
54
+ version: '1.0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: logger-colors
56
+ name: rest-client
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.0'
61
+ version: '2.0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.0'
68
+ version: '2.0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: gn_uuid
70
+ name: trollop
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0.5'
75
+ version: '2.1'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0.5'
82
+ version: '2.1'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: bundler
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -95,75 +95,75 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.7'
97
97
  - !ruby/object:Gem::Dependency
98
- name: rake
98
+ name: byebug
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '11.0'
103
+ version: '9.0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '11.0'
110
+ version: '9.0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: rspec
112
+ name: coveralls
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '3.2'
117
+ version: '0.8'
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '3.2'
124
+ version: '0.8'
125
125
  - !ruby/object:Gem::Dependency
126
- name: rubocop
126
+ name: rake
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.31'
131
+ version: '12.0'
132
132
  type: :development
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.31'
138
+ version: '12.0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: coveralls
140
+ name: rspec
141
141
  requirement: !ruby/object:Gem::Requirement
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: '0.8'
145
+ version: '3.2'
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: '0.8'
152
+ version: '3.2'
153
153
  - !ruby/object:Gem::Dependency
154
- name: byebug
154
+ name: rubocop
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
157
  - - "~>"
158
158
  - !ruby/object:Gem::Version
159
- version: '9.0'
159
+ version: '0.49'
160
160
  type: :development
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
- version: '9.0'
166
+ version: '0.49'
167
167
  description: Gem uses a checklist in a comma-separated format as an input, and returns
168
168
  back a new comma-separated list crossmapping the scientific names to one of the
169
169
  data sources from http://resolver.globalnames.org
@@ -177,6 +177,7 @@ files:
177
177
  - ".gitignore"
178
178
  - ".rspec"
179
179
  - ".rubocop.yml"
180
+ - ".ruby-version"
180
181
  - ".travis.yml"
181
182
  - CHANGELOG.md
182
183
  - Gemfile