gn_crossmap 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ac2bef76184049dac604ad4be78998d50fc8cc54
4
+ data.tar.gz: 4ebf72afdfe65e37148d19dd2234d771cb3db951
5
+ SHA512:
6
+ metadata.gz: 06b2f2a33ad5d73344e73afa6b76790ab195b5529b98984949a40a3d1e64eb79f5449d1045a0a0c1d60af6b65a5076f63deb3339d1e1b6edbf734790fc5b4cff
7
+ data.tar.gz: f26df79b31645a2228be814aa47e9efd4f4976d9753df3ca4a0ebeafc1b5b7f1d91cd55d8e61555a655f1c39d0051d36e34554a983f07fec507aa4e4aaf5ed75
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ output.csv
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,10 @@
1
+ AllCops:
2
+ Exclude:
3
+ - db/**/*
4
+ - bundle_bin/**/*
5
+ Include:
6
+ - exe/crossmap
7
+ Style/StringLiterals:
8
+ EnforcedStyle: double_quotes
9
+ Style/DotPosition:
10
+ EnforcedStyle: trailing
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1
4
+ - 2.2
5
+ script:
6
+ - bundle exec rake
7
+ branches:
8
+ only:
9
+ - master
@@ -0,0 +1,13 @@
1
+ gn_crossmap CHANGELOG
2
+ =====================
3
+
4
+ 0.1.1
5
+ -----
6
+ - [Dmitry Mozzherin][dimus] - first official release -- works for full names
7
+ and names entered in rank fields
8
+
9
+ 0.1.0
10
+ -----
11
+ - [Dmitry Mozzherin][dimus] - initial version
12
+
13
+ [dimus]: https://github.com/dimus
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in gn_crossmap.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Marine Biological Laboratory
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,137 @@
1
+ # GnCrossmap
2
+ [![Gem Version][gem_badge]][gem_link]
3
+ [![Continuous Integration Status][ci_badge]][ci_link]
4
+ [![Coverage Status][cov_badge]][cov_link]
5
+ [![CodeClimate][code_badge]][code_link]
6
+ [![Dependency Status][dep_badge]][dep_link]
7
+
8
+ This gem crossmaps a checklist of scientific names to names from a data source
9
+ in [GN Resolver][resolver].
10
+
11
+ Checklist has to be in a CSV format.
12
+
13
+ Compatibility
14
+ -------------
15
+
16
+ This gem is compatible with Ruby versions higher or equal to 2.1.0
17
+
18
+ Installation
19
+ ------------
20
+
21
+ Add this line to your application's Gemfile:
22
+
23
+ ```ruby
24
+ gem 'gn_crossmap'
25
+ ```
26
+
27
+ And then execute:
28
+
29
+ $ bundle
30
+
31
+ Or install it yourself as:
32
+
33
+ $ gem install gn_crossmap
34
+
35
+ Usage
36
+ -----
37
+
38
+ ### Input file format
39
+
40
+ - Comma Separated File with names of fields in first row.
41
+ - Columns can be separated by tab, comma or semicolon
42
+ - At least some columns should have recognizable fields
43
+
44
+ taxonID kingdom phylum class order family genus species
45
+ subspecies variety form scientificNameAuthorship scientificName
46
+ taxonRank
47
+
48
+ #### Simple Example
49
+
50
+ taxonID;scientificName
51
+ 1;Macrobiotus echinogenitus subsp. areolatus Murray, 1907
52
+ ...
53
+
54
+ #### Rank Example
55
+
56
+ taxonID;scientificName;taxonRank
57
+ 1;Macrobiotus echinogenitus f. areolatus Murray, 1907;form
58
+ ...
59
+
60
+ #### Family and Authorship Example
61
+
62
+ taxonID;family;scientificName;scientificNameAuthorship
63
+ 1;Macrobiotidae;Macrobiotus echinogenitus subsp. areolatus;Murray, 1907
64
+ ...
65
+
66
+ #### Fine-grained Example
67
+
68
+ TaxonId;kingdom;subkingdom;phylum;subphylum;superclass;class;subclass;cohort;superorder;order;suborder;infraorder;superfamily;family;subfamily;tribe;subtribe;genus;subgenus;section;species;subspecies;variety;form;ScientificNameAuthorship
69
+ 1;Animalia;;Tardigrada;;;Eutardigrada;;;;Parachela;;;Macrobiotoidea;Macrobiotidae;;;;Macrobiotus;;;harmsworthi;obscurus;;;Dastych, 1985
70
+
71
+ ### Usage from command line
72
+
73
+ # to see help
74
+ $ crossmap --help
75
+
76
+ # to compare with default source (Catalogue of Life)
77
+ $ crossmap -i my_list.csv -o my_list_col.csv
78
+
79
+ # to compare with other source (Index Fungorum in this example)
80
+ $ crossmap -i my_list.csv -o my_list_if.csv -d 5
81
+
82
+ ### Usage as Ruby Library
83
+
84
+ ```ruby
85
+ require "gn_crossmap"
86
+
87
+ # If you want to change logger -- default Logging is to standard output
88
+ GnCrossmap.logger = MyCustomLogger.new
89
+
90
+ GnCrossmap.run("path/to/input.csv", "path/to/output.csv", 5)
91
+ ```
92
+
93
+ Development
94
+ -----------
95
+
96
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run
97
+ `bin/console` for an interactive prompt that will allow you to experiment.
98
+
99
+ To install this gem onto your local machine, run `bundle exec rake install`. To
100
+ release a new version, update the version number in `version.rb`, and then run
101
+ `bundle exec rake release` to create a git tag for the version, push git
102
+ commits and tags, and push the `.gem` file to
103
+ [rubygems.org][rubygems]
104
+
105
+ Contributing
106
+ ------------
107
+
108
+ 1. Fork it ( https://github.com/[my-github-username]/gn_crossmap/fork )
109
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
110
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
111
+ 4. Push to the branch (`git push origin my-new-feature`)
112
+ 5. Create a new Pull Request
113
+
114
+ Copyright
115
+ ---------
116
+
117
+ Author -- [Dmitry Mozzherin][dimus]
118
+
119
+ Copyright (c) 2015 [Marine Biological Laboratory][mbl].
120
+ See [LICENSE][license] for details.
121
+
122
+ [gem_badge]: https://badge.fury.io/rb/gn_crossmap.png
123
+ [gem_link]: http://badge.fury.io/rb/gn_crossmap
124
+ [ci_badge]: https://secure.travis-ci.org/GlobalNamesArchitecture/gn_crossmap.png
125
+ [ci_link]: http://travis-ci.org/GlobalNamesArchitecture/gn_crossmap
126
+ [cov_badge]: https://coveralls.io/repos/GlobalNamesArchitecture/gn_crossmap/badge.png?branch=master
127
+ [cov_link]: https://coveralls.io/r/GlobalNamesArchitecture/gn_crossmap?branch=master
128
+ [code_badge]: https://codeclimate.com/github/GlobalNamesArchitecture/gn_crossmap.png
129
+ [code_link]: https://codeclimate.com/github/GlobalNamesArchitecture/gn_crossmap
130
+ [dep_badge]: https://gemnasium.com/GlobalNamesArchitecture/gn_crossmap.png
131
+ [dep_link]: https://gemnasium.com/GlobalNamesArchitecture/gn_crossmap
132
+ [resolver]: http://resolver.globalnames.org
133
+ [rubygems]: https://rubygems.org
134
+ [dimus]: https://github.com/dimus
135
+ [mbl]: http://mbl.edu
136
+ [license]: https://github.com/GlobalNamesArchitecture/gn_crossmap/blob/master/LICENSE
137
+ [terms]: http://rs.tdwg.org/dwc/terms
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require "rubocop/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new(:rspec) do |rspec|
6
+ rspec.pattern = "spec/**/*_spec.rb"
7
+ end
8
+
9
+ RuboCop::RakeTask.new
10
+
11
+ task default: [:rubocop, :rspec]
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "gn_crossmap"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ require "trollop"
3
+ require "gn_crossmap"
4
+
5
+ puts "This program requires Ruby >= v. 2.1.0" if RUBY_VERSION < "2.1.0"
6
+
7
+ CATALOGUE_OF_LIFE = 1
8
+ OUTPUT = "output.csv"
9
+ opts = Trollop.options do
10
+ banner "Compares a list of scientific names to scientific names from a " \
11
+ "data source from Global Names Resolver\n\n " \
12
+ "Usage:\n crossmap [options]\n\noptions:"
13
+
14
+ opt(:input, "Path to intput file", type: :string)
15
+ opt(:output, "Path to output file", default: OUTPUT)
16
+ opt(:data_source_id, "Data source id from GN Resolver",
17
+ default: CATALOGUE_OF_LIFE)
18
+ end
19
+
20
+ Trollop.die :input, "must be set" if opts[:input].nil?
21
+ Trollop.die :input, "file must exist" unless File.exist?(opts[:input])
22
+
23
+ GnCrossmap.run(opts[:input], opts[:output], opts[:data_source_id])
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "gn_crossmap/version"
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "gn_crossmap"
8
+ gem.version = GnCrossmap::VERSION
9
+ gem.authors = ["Dmitry Mozzherin"]
10
+ gem.email = ["dmozzherin@gmail.com"]
11
+
12
+ gem.summary = "Crossmaps a list of scientific names to names from " \
13
+ "a data source in GN Index"
14
+ gem.description = "User supplies a comma-separated file which breaks " \
15
+ "contains in one row a hierarchy path of known ranks, " \
16
+ "scientific name which can be split into its semantic " \
17
+ "elements and include authorship and taxon concept " \
18
+ "reference. User also supplies an id of a data source "\
19
+ "from global names resolver/index. User gets back a " \
20
+ "new comma-separated file where scientific names from " \
21
+ "her list match data from the given data source."
22
+ gem.homepage = "https://github.com/GlobalNamesArchitecture/gn_crossmap"
23
+
24
+ gem.files = `git ls-files -z`.split("\x0").
25
+ reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ gem.bindir = "exe"
27
+ gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ gem.require_paths = ["lib"]
29
+
30
+ gem.add_dependency "trollop", "~> 2.1"
31
+ gem.add_dependency "biodiversity", "~> 3.1"
32
+
33
+ gem.add_development_dependency "bundler", "~> 1.7"
34
+ gem.add_development_dependency "rake", "~> 10.0"
35
+ gem.add_development_dependency "rspec", "~> 3.2"
36
+ gem.add_development_dependency "rubocop", "~> 0.31"
37
+ gem.add_development_dependency "coveralls", "~> 0.8"
38
+ gem.add_development_dependency "byebug"
39
+ end
@@ -0,0 +1,34 @@
1
+ require "csv"
2
+ require "rest_client"
3
+ require "logger"
4
+ require "biodiversity"
5
+ require "gn_crossmap/version"
6
+ require "gn_crossmap/reader"
7
+ require "gn_crossmap/writer"
8
+ require "gn_crossmap/collector"
9
+ require "gn_crossmap/column_collector"
10
+ require "gn_crossmap/sci_name_collector"
11
+ require "gn_crossmap/resolver"
12
+ require "gn_crossmap/result_processor"
13
+
14
+ # Namespace module for crossmapping checklists wth GN sources
15
+ module GnCrossmap
16
+ class << self
17
+ attr_writer :logger
18
+
19
+ def run(input, output, data_source_id)
20
+ data = Reader.new(input).read
21
+ writer = Writer.new(output)
22
+ Resolver.new(writer, data_source_id).resolve(data)
23
+ output
24
+ end
25
+
26
+ def logger
27
+ @logger ||= Logger.new($stdout)
28
+ end
29
+
30
+ def log(message)
31
+ logger.info(message)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,44 @@
1
+ module GnCrossmap
2
+ # Assemble data from CSV reader by checking column fields
3
+ class Collector
4
+ RANKS = %i(kingdom subkingdom phylum subphylum superclass class
5
+ subclass cohort superorder order suborder infraorder superfamily
6
+ family subfamily tribe subtribe genus subgenus section species
7
+ subspecies variety form)
8
+ SPECIES_RANKS = %i(genus species subspecies variety form)
9
+
10
+ attr_reader :data
11
+
12
+ def initialize
13
+ @data = []
14
+ @fields = nil
15
+ @collector = nil
16
+ end
17
+
18
+ def process_row(row)
19
+ @row = row
20
+ @fields ? collect_data : init_fields_collector
21
+ end
22
+
23
+ private
24
+
25
+ def init_fields_collector
26
+ @fields = @row.map { |f| f.downcase.to_sym }
27
+ @collector = collector_factory
28
+ end
29
+
30
+ def collect_data
31
+ @row = @fields.zip(@row).to_h
32
+ data = @collector.id_name_rank(@row)
33
+ @data << data if data
34
+ end
35
+
36
+ def collector_factory
37
+ if @fields.include?(:scientificname)
38
+ SciNameCollector.new(@fields)
39
+ else
40
+ ColumnCollector.new(@fields)
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,74 @@
1
+ module GnCrossmap
2
+ # Assemble data from CSV reader by checking column fields
3
+ class ColumnCollector
4
+ RANKS = %i(kingdom subkingdom phylum subphylum superclass class
5
+ subclass cohort superorder order suborder infraorder superfamily
6
+ family subfamily tribe subtribe genus subgenus section species
7
+ subspecies variety form)
8
+ SPECIES_RANKS = %i(genus species subspecies variety form)
9
+
10
+ attr_reader :data
11
+
12
+ def initialize(fields)
13
+ @fields = fields
14
+ end
15
+
16
+ def id_name_rank(row)
17
+ @row = row
18
+ id = @row[:taxonid]
19
+ return nil if id.to_s.strip == ""
20
+ rank = find_rank
21
+ return nil unless rank
22
+ name = assemble_name(rank)
23
+ return nil unless name
24
+ { id: id, name: name, rank: rank.to_s }
25
+ end
26
+
27
+ private
28
+
29
+ def find_rank
30
+ name_rank = nil
31
+ RANKS.reverse_each do |rank|
32
+ next if @row[rank].to_s.strip == ""
33
+ name_rank = rank
34
+ break
35
+ end
36
+ name_rank
37
+ end
38
+
39
+ def assemble_name(name_rank)
40
+ name = @row[name_rank]
41
+ if SPECIES_RANKS[1..-1].include?(name_rank)
42
+ name = assemble_species_name(name, name_rank)
43
+ end
44
+ name
45
+ end
46
+
47
+ def assemble_species_name(name, name_rank)
48
+ ending = [add_infrarank(name, name_rank), @row[:scientificnameauthorship]]
49
+ ranks = SPECIES_RANKS[0...SPECIES_RANKS.index(name_rank)]
50
+ starting = name_start(ranks)
51
+ (starting + ending).flatten.join(" ").strip.gsub(/\s+/, " ")
52
+ end
53
+
54
+ def name_start(ranks)
55
+ ranks.each_with_object([]) do |rank, ary|
56
+ next unless @row[rank]
57
+ ary << add_infrarank(@row[rank], rank)
58
+ end
59
+ end
60
+
61
+ def add_infrarank(name, rank)
62
+ case rank
63
+ when :subspecies
64
+ "subsp. #{name}"
65
+ when :variety
66
+ "var. #{name}"
67
+ when :form
68
+ "f. #{name}"
69
+ else
70
+ name
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,30 @@
1
+ module GnCrossmap
2
+ # Reads supplied csv file and creates ruby structure to compare
3
+ # with a Global Names Resolver source
4
+ class Reader
5
+ def initialize(csv_path)
6
+ @csv_file = csv_path
7
+ @col_sep = col_sep
8
+ end
9
+
10
+ def read
11
+ GnCrossmap.log("Read input file '#{File.basename(@csv_file)}'")
12
+ parse_input
13
+ end
14
+
15
+ private
16
+
17
+ def col_sep
18
+ line = open(@csv_file, &:readline)
19
+ [";", ",", "\t"].map { |s| [line.count(s), s] }.sort.last.last
20
+ end
21
+
22
+ def parse_input
23
+ dc = Collector.new
24
+ CSV.open(@csv_file, col_sep: @col_sep).each do |row|
25
+ dc.process_row(row)
26
+ end
27
+ dc.data
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,60 @@
1
+ module GnCrossmap
2
+ # Sends data to GN Resolver and collects results
3
+ class Resolver
4
+ URL = "http://resolver.globalnames.org/name_resolvers.json"
5
+
6
+ def initialize(writer, data_source_id)
7
+ @processor = GnCrossmap::ResultProcessor.new(writer)
8
+ @ds_id = data_source_id
9
+ @count = 0
10
+ @batch = 200
11
+ end
12
+
13
+ def resolve(data)
14
+ data_size = data.size
15
+ data.each_slice(@batch) do |slice|
16
+ with_log(data_size) do
17
+ names = collect_names(slice)
18
+ remote_resolve(names)
19
+ end
20
+ end
21
+ @processor.writer.close
22
+ end
23
+
24
+ private
25
+
26
+ def with_log(size)
27
+ s = @count + 1
28
+ @count += @batch
29
+ e = [@count, size].min
30
+ GnCrossmap.log("Resolve #{s}-#{e} out of #{size} records")
31
+ yield
32
+ end
33
+
34
+ def collect_names(slice)
35
+ slice.each_with_object("") do |row, str|
36
+ @processor.input[row[:id]] = { rank: row[:rank] }
37
+ str << "#{row[:id]}|#{row[:name]}\n"
38
+ end
39
+ end
40
+
41
+ def remote_resolve(names)
42
+ res = RestClient.post(URL, data: names, data_source_ids: @ds_id)
43
+ @processor.process(res)
44
+ rescue RestClient::Exception
45
+ single_remote_resolve(names)
46
+ end
47
+
48
+ def single_remote_resolve(names)
49
+ names.split("\n").each do |name|
50
+ begin
51
+ res = RestClient.post(URL, data: name, data_source_ids: @ds_id)
52
+ @processor.process(res)
53
+ rescue RestClient::Exception => e
54
+ GnCrossmap.log("Resolver broke on '#{name}': #{e}")
55
+ next
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,58 @@
1
+ module GnCrossmap
2
+ # Processes data received from the GN Resolver
3
+ class ResultProcessor
4
+ MATCH_TYPES = {
5
+ 1 => "Exact match",
6
+ 2 => "Canonical form exact match",
7
+ 3 => "Canonical form fuzzy match",
8
+ 4 => "Partial canonical form match",
9
+ 5 => "Partial canonical form fuzzy match",
10
+ 6 => "Genus part match"
11
+ }
12
+
13
+ attr_reader :input, :writer
14
+
15
+ def initialize(writer)
16
+ @writer = writer
17
+ @input = {}
18
+ end
19
+
20
+ def process(result)
21
+ res = rubyfy(result)
22
+ res[:data].each do |d|
23
+ d[:results].nil? ? write_empty_result(d) : write_result(d)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def rubyfy(result)
30
+ JSON.parse(result, symbolize_names: true)
31
+ end
32
+
33
+ def write_empty_result(datum)
34
+ res = [datum[:supplied_id], datum[:supplied_name_string], nil, nil,
35
+ @input[datum[:supplied_id]][:rank], nil, nil, nil, nil]
36
+ @writer.write(res)
37
+ end
38
+
39
+ def write_result(datum)
40
+ datum[:results].each do |r|
41
+ res = [datum[:supplied_id], datum[:supplied_name_string],
42
+ r[:name_string], r[:canonical_form],
43
+ @input[datum[:supplied_id]][:rank],
44
+ matched_rank(r), matched_type(r),
45
+ r[:edit_distance], r[:score]]
46
+ @writer.write(res)
47
+ end
48
+ end
49
+
50
+ def matched_rank(record)
51
+ record[:classification_path_ranks].split("|").last
52
+ end
53
+
54
+ def matched_type(record)
55
+ MATCH_TYPES[record[:match_type]]
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,61 @@
1
+ module GnCrossmap
2
+ # Assemble data from CSV reader by parsing scientificName field
3
+ class SciNameCollector
4
+ def initialize(fields)
5
+ @fields = fields
6
+ @parser = ScientificNameParser.new
7
+ end
8
+
9
+ def id_name_rank(row)
10
+ @row = row
11
+ id = @row[:taxonid]
12
+ name = find_name
13
+ rank = @row[:taxonRank]
14
+ rank = parse_rank if rank.nil?
15
+ (id && name) ? { id: id, name: name, rank: rank } : nil
16
+ end
17
+
18
+ private
19
+
20
+ def find_name
21
+ name = @row[:scientificname].strip
22
+ authorship = @row[:scientificnameauthorship].to_s.strip
23
+ name = "#{name} #{authorship}" if authorship != ""
24
+ name
25
+ end
26
+
27
+ def parse_rank
28
+ @parsed_name = @parser.parse(@row[:scientificname])[:scientificName]
29
+ return nil if !@parsed_name[:canonical] || @parsed_name[:hybrid]
30
+ words_num = @parsed_name[:canonical].split(" ").size
31
+ infer_rank(words_num)
32
+ rescue RuntimeError
33
+ @parser = ScientificNameParser.new
34
+ nil
35
+ end
36
+
37
+ def infer_rank(words_in_canonical_form)
38
+ case words_in_canonical_form
39
+ when 1
40
+ nil
41
+ when 2
42
+ "species"
43
+ else
44
+ normalize_rank(@parsed_name[:details][0][:infraspecies][-1][:rank])
45
+ end
46
+ end
47
+
48
+ def normalize_rank(rank)
49
+ case rank
50
+ when /^f/
51
+ "form"
52
+ when /^var/
53
+ "variety"
54
+ when /^sub/
55
+ "subspicies"
56
+ else
57
+ rank
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,8 @@
1
+ # Namespace module for crossmapping checklists to GN sources
2
+ module GnCrossmap
3
+ VERSION = "0.1.1"
4
+
5
+ def self.version
6
+ VERSION
7
+ end
8
+ end
@@ -0,0 +1,22 @@
1
+ module GnCrossmap
2
+ # Saves output from GN Resolver to disk
3
+ class Writer
4
+ def initialize(output_path)
5
+ @path = output_path
6
+ @output = CSV.open(@path, "w:utf-8")
7
+ @output << [:taxonID, :scientificName, :matchedScientificName,
8
+ :matchedCanonicalForm, :rank, :matchedRank, :matchType,
9
+ :editDistance, :score]
10
+ GnCrossmap.log("Open output file '#{@path}'")
11
+ end
12
+
13
+ def write(record)
14
+ @output << record
15
+ end
16
+
17
+ def close
18
+ GnCrossmap.log("Close output file '#{@path}'")
19
+ @output.close
20
+ end
21
+ end
22
+ end
metadata ADDED
@@ -0,0 +1,182 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gn_crossmap
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Dmitry Mozzherin
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-05-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: trollop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: biodiversity
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.31'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.31'
97
+ - !ruby/object:Gem::Dependency
98
+ name: coveralls
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.8'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.8'
111
+ - !ruby/object:Gem::Dependency
112
+ name: byebug
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: User supplies a comma-separated file which breaks contains in one row
126
+ a hierarchy path of known ranks, scientific name which can be split into its semantic
127
+ elements and include authorship and taxon concept reference. User also supplies
128
+ an id of a data source from global names resolver/index. User gets back a new comma-separated
129
+ file where scientific names from her list match data from the given data source.
130
+ email:
131
+ - dmozzherin@gmail.com
132
+ executables:
133
+ - crossmap
134
+ extensions: []
135
+ extra_rdoc_files: []
136
+ files:
137
+ - ".gitignore"
138
+ - ".rspec"
139
+ - ".rubocop.yml"
140
+ - ".travis.yml"
141
+ - CHANGELOG.md
142
+ - Gemfile
143
+ - LICENSE
144
+ - README.md
145
+ - Rakefile
146
+ - bin/console
147
+ - bin/setup
148
+ - exe/crossmap
149
+ - gn_crossmap.gemspec
150
+ - lib/gn_crossmap.rb
151
+ - lib/gn_crossmap/collector.rb
152
+ - lib/gn_crossmap/column_collector.rb
153
+ - lib/gn_crossmap/reader.rb
154
+ - lib/gn_crossmap/resolver.rb
155
+ - lib/gn_crossmap/result_processor.rb
156
+ - lib/gn_crossmap/sci_name_collector.rb
157
+ - lib/gn_crossmap/version.rb
158
+ - lib/gn_crossmap/writer.rb
159
+ homepage: https://github.com/GlobalNamesArchitecture/gn_crossmap
160
+ licenses: []
161
+ metadata: {}
162
+ post_install_message:
163
+ rdoc_options: []
164
+ require_paths:
165
+ - lib
166
+ required_ruby_version: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - ">="
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ required_rubygems_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ requirements: []
177
+ rubyforge_project:
178
+ rubygems_version: 2.2.3
179
+ signing_key:
180
+ specification_version: 4
181
+ summary: Crossmaps a list of scientific names to names from a data source in GN Index
182
+ test_files: []