gn_crossmap 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ac2bef76184049dac604ad4be78998d50fc8cc54
4
+ data.tar.gz: 4ebf72afdfe65e37148d19dd2234d771cb3db951
5
+ SHA512:
6
+ metadata.gz: 06b2f2a33ad5d73344e73afa6b76790ab195b5529b98984949a40a3d1e64eb79f5449d1045a0a0c1d60af6b65a5076f63deb3339d1e1b6edbf734790fc5b4cff
7
+ data.tar.gz: f26df79b31645a2228be814aa47e9efd4f4976d9753df3ca4a0ebeafc1b5b7f1d91cd55d8e61555a655f1c39d0051d36e34554a983f07fec507aa4e4aaf5ed75
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ output.csv
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,10 @@
1
+ AllCops:
2
+ Exclude:
3
+ - db/**/*
4
+ - bundle_bin/**/*
5
+ Include:
6
+ - exe/crossmap
7
+ Style/StringLiterals:
8
+ EnforcedStyle: double_quotes
9
+ Style/DotPosition:
10
+ EnforcedStyle: trailing
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1
4
+ - 2.2
5
+ script:
6
+ - bundle exec rake
7
+ branches:
8
+ only:
9
+ - master
@@ -0,0 +1,13 @@
1
+ gn_crossmap CHANGELOG
2
+ =====================
3
+
4
+ 0.1.1
5
+ -----
6
+ - [Dmitry Mozzherin][dimus] - first official release -- works for full names
7
+ and names entered in rank fields
8
+
9
+ 0.1.0
10
+ -----
11
+ - [Dmitry Mozzherin][dimus] - initial version
12
+
13
+ [dimus]: https://github.com/dimus
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in gn_crossmap.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Marine Biological Laboratory
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,137 @@
1
+ # GnCrossmap
2
+ [![Gem Version][gem_badge]][gem_link]
3
+ [![Continuous Integration Status][ci_badge]][ci_link]
4
+ [![Coverage Status][cov_badge]][cov_link]
5
+ [![CodeClimate][code_badge]][code_link]
6
+ [![Dependency Status][dep_badge]][dep_link]
7
+
8
+ This gem crossmaps a checklist of scientific names to names from a data source
9
+ in [GN Resolver][resolver].
10
+
11
+ Checklist has to be in a CSV format.
12
+
13
+ Compatibility
14
+ -------------
15
+
16
+ This gem is compatible with Ruby versions higher or equal to 2.1.0
17
+
18
+ Installation
19
+ ------------
20
+
21
+ Add this line to your application's Gemfile:
22
+
23
+ ```ruby
24
+ gem 'gn_crossmap'
25
+ ```
26
+
27
+ And then execute:
28
+
29
+ $ bundle
30
+
31
+ Or install it yourself as:
32
+
33
+ $ gem install gn_crossmap
34
+
35
+ Usage
36
+ -----
37
+
38
+ ### Input file format
39
+
40
+ - Comma Separated File with names of fields in first row.
41
+ - Columns can be separated by tab, comma or semicolon
42
+ - At least some columns should have recognizable fields
43
+
44
+ taxonID kingdom phylum class order family genus species
45
+ subspecies variety form scientificNameAuthorship scientificName
46
+ taxonRank
47
+
48
+ #### Simple Example
49
+
50
+ taxonID;scientificName
51
+ 1;Macrobiotus echinogenitus subsp. areolatus Murray, 1907
52
+ ...
53
+
54
+ #### Rank Example
55
+
56
+ taxonID;scientificName;taxonRank
57
+ 1;Macrobiotus echinogenitus f. areolatus Murray, 1907;form
58
+ ...
59
+
60
+ #### Family and Authorship Example
61
+
62
+ taxonID;family;scientificName;scientificNameAuthorship
63
+ 1;Macrobiotidae;Macrobiotus echinogenitus subsp. areolatus;Murray, 1907
64
+ ...
65
+
66
+ #### Fine-grained Example
67
+
68
+ TaxonId;kingdom;subkingdom;phylum;subphylum;superclass;class;subclass;cohort;superorder;order;suborder;infraorder;superfamily;family;subfamily;tribe;subtribe;genus;subgenus;section;species;subspecies;variety;form;ScientificNameAuthorship
69
+ 1;Animalia;;Tardigrada;;;Eutardigrada;;;;Parachela;;;Macrobiotoidea;Macrobiotidae;;;;Macrobiotus;;;harmsworthi;obscurus;;;Dastych, 1985
70
+
71
+ ### Usage from command line
72
+
73
+ # to see help
74
+ $ crossmap --help
75
+
76
+ # to compare with default source (Catalogue of Life)
77
+ $ crossmap -i my_list.csv -o my_list_col.csv
78
+
79
+ # to compare with other source (Index Fungorum in this example)
80
+ $ crossmap -i my_list.csv -o my_list_if.csv -d 5
81
+
82
+ ### Usage as Ruby Library
83
+
84
+ ```ruby
85
+ require "gn_crossmap"
86
+
87
+ # If you want to change logger -- default Logging is to standard output
88
+ GnCrossmap.logger = MyCustomLogger.new
89
+
90
+ GnCrossmap.run("path/to/input.csv", "path/to/output.csv", 5)
91
+ ```
92
+
93
+ Development
94
+ -----------
95
+
96
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run
97
+ `bin/console` for an interactive prompt that will allow you to experiment.
98
+
99
+ To install this gem onto your local machine, run `bundle exec rake install`. To
100
+ release a new version, update the version number in `version.rb`, and then run
101
+ `bundle exec rake release` to create a git tag for the version, push git
102
+ commits and tags, and push the `.gem` file to
103
+ [rubygems.org][rubygems]
104
+
105
+ Contributing
106
+ ------------
107
+
108
+ 1. Fork it ( https://github.com/[my-github-username]/gn_crossmap/fork )
109
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
110
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
111
+ 4. Push to the branch (`git push origin my-new-feature`)
112
+ 5. Create a new Pull Request
113
+
114
+ Copyright
115
+ ---------
116
+
117
+ Author -- [Dmitry Mozzherin][dimus]
118
+
119
+ Copyright (c) 2015 [Marine Biological Laboratory][mbl].
120
+ See [LICENSE][license] for details.
121
+
122
+ [gem_badge]: https://badge.fury.io/rb/gn_crossmap.png
123
+ [gem_link]: http://badge.fury.io/rb/gn_crossmap
124
+ [ci_badge]: https://secure.travis-ci.org/GlobalNamesArchitecture/gn_crossmap.png
125
+ [ci_link]: http://travis-ci.org/GlobalNamesArchitecture/gn_crossmap
126
+ [cov_badge]: https://coveralls.io/repos/GlobalNamesArchitecture/gn_crossmap/badge.png?branch=master
127
+ [cov_link]: https://coveralls.io/r/GlobalNamesArchitecture/gn_crossmap?branch=master
128
+ [code_badge]: https://codeclimate.com/github/GlobalNamesArchitecture/gn_crossmap.png
129
+ [code_link]: https://codeclimate.com/github/GlobalNamesArchitecture/gn_crossmap
130
+ [dep_badge]: https://gemnasium.com/GlobalNamesArchitecture/gn_crossmap.png
131
+ [dep_link]: https://gemnasium.com/GlobalNamesArchitecture/gn_crossmap
132
+ [resolver]: http://resolver.globalnames.org
133
+ [rubygems]: https://rubygems.org
134
+ [dimus]: https://github.com/dimus
135
+ [mbl]: http://mbl.edu
136
+ [license]: https://github.com/GlobalNamesArchitecture/gn_crossmap/blob/master/LICENSE
137
+ [terms]: http://rs.tdwg.org/dwc/terms
@@ -0,0 +1,11 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+ require "rubocop/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new(:rspec) do |rspec|
6
+ rspec.pattern = "spec/**/*_spec.rb"
7
+ end
8
+
9
+ RuboCop::RakeTask.new
10
+
11
+ task default: [:rubocop, :rspec]
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "gn_crossmap"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ require "trollop"
3
+ require "gn_crossmap"
4
+
5
+ puts "This program requires Ruby >= v. 2.1.0" if RUBY_VERSION < "2.1.0"
6
+
7
+ CATALOGUE_OF_LIFE = 1
8
+ OUTPUT = "output.csv"
9
+ opts = Trollop.options do
10
+ banner "Compares a list of scientific names to scientific names from a " \
11
+ "data source from Global Names Resolver\n\n " \
12
+ "Usage:\n crossmap [options]\n\noptions:"
13
+
14
+ opt(:input, "Path to intput file", type: :string)
15
+ opt(:output, "Path to output file", default: OUTPUT)
16
+ opt(:data_source_id, "Data source id from GN Resolver",
17
+ default: CATALOGUE_OF_LIFE)
18
+ end
19
+
20
+ Trollop.die :input, "must be set" if opts[:input].nil?
21
+ Trollop.die :input, "file must exist" unless File.exist?(opts[:input])
22
+
23
+ GnCrossmap.run(opts[:input], opts[:output], opts[:data_source_id])
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "gn_crossmap/version"
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "gn_crossmap"
8
+ gem.version = GnCrossmap::VERSION
9
+ gem.authors = ["Dmitry Mozzherin"]
10
+ gem.email = ["dmozzherin@gmail.com"]
11
+
12
+ gem.summary = "Crossmaps a list of scientific names to names from " \
13
+ "a data source in GN Index"
14
+ gem.description = "User supplies a comma-separated file which breaks " \
15
+ "contains in one row a hierarchy path of known ranks, " \
16
+ "scientific name which can be split into its semantic " \
17
+ "elements and include authorship and taxon concept " \
18
+ "reference. User also supplies an id of a data source "\
19
+ "from global names resolver/index. User gets back a " \
20
+ "new comma-separated file where scientific names from " \
21
+ "her list match data from the given data source."
22
+ gem.homepage = "https://github.com/GlobalNamesArchitecture/gn_crossmap"
23
+
24
+ gem.files = `git ls-files -z`.split("\x0").
25
+ reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ gem.bindir = "exe"
27
+ gem.executables = gem.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ gem.require_paths = ["lib"]
29
+
30
+ gem.add_dependency "trollop", "~> 2.1"
31
+ gem.add_dependency "biodiversity", "~> 3.1"
32
+
33
+ gem.add_development_dependency "bundler", "~> 1.7"
34
+ gem.add_development_dependency "rake", "~> 10.0"
35
+ gem.add_development_dependency "rspec", "~> 3.2"
36
+ gem.add_development_dependency "rubocop", "~> 0.31"
37
+ gem.add_development_dependency "coveralls", "~> 0.8"
38
+ gem.add_development_dependency "byebug"
39
+ end
@@ -0,0 +1,34 @@
1
+ require "csv"
2
+ require "rest_client"
3
+ require "logger"
4
+ require "biodiversity"
5
+ require "gn_crossmap/version"
6
+ require "gn_crossmap/reader"
7
+ require "gn_crossmap/writer"
8
+ require "gn_crossmap/collector"
9
+ require "gn_crossmap/column_collector"
10
+ require "gn_crossmap/sci_name_collector"
11
+ require "gn_crossmap/resolver"
12
+ require "gn_crossmap/result_processor"
13
+
14
+ # Namespace module for crossmapping checklists wth GN sources
15
+ module GnCrossmap
16
+ class << self
17
+ attr_writer :logger
18
+
19
+ def run(input, output, data_source_id)
20
+ data = Reader.new(input).read
21
+ writer = Writer.new(output)
22
+ Resolver.new(writer, data_source_id).resolve(data)
23
+ output
24
+ end
25
+
26
+ def logger
27
+ @logger ||= Logger.new($stdout)
28
+ end
29
+
30
+ def log(message)
31
+ logger.info(message)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,44 @@
1
+ module GnCrossmap
2
+ # Assemble data from CSV reader by checking column fields
3
+ class Collector
4
+ RANKS = %i(kingdom subkingdom phylum subphylum superclass class
5
+ subclass cohort superorder order suborder infraorder superfamily
6
+ family subfamily tribe subtribe genus subgenus section species
7
+ subspecies variety form)
8
+ SPECIES_RANKS = %i(genus species subspecies variety form)
9
+
10
+ attr_reader :data
11
+
12
+ def initialize
13
+ @data = []
14
+ @fields = nil
15
+ @collector = nil
16
+ end
17
+
18
+ def process_row(row)
19
+ @row = row
20
+ @fields ? collect_data : init_fields_collector
21
+ end
22
+
23
+ private
24
+
25
+ def init_fields_collector
26
+ @fields = @row.map { |f| f.downcase.to_sym }
27
+ @collector = collector_factory
28
+ end
29
+
30
+ def collect_data
31
+ @row = @fields.zip(@row).to_h
32
+ data = @collector.id_name_rank(@row)
33
+ @data << data if data
34
+ end
35
+
36
+ def collector_factory
37
+ if @fields.include?(:scientificname)
38
+ SciNameCollector.new(@fields)
39
+ else
40
+ ColumnCollector.new(@fields)
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,74 @@
1
+ module GnCrossmap
2
+ # Assemble data from CSV reader by checking column fields
3
+ class ColumnCollector
4
+ RANKS = %i(kingdom subkingdom phylum subphylum superclass class
5
+ subclass cohort superorder order suborder infraorder superfamily
6
+ family subfamily tribe subtribe genus subgenus section species
7
+ subspecies variety form)
8
+ SPECIES_RANKS = %i(genus species subspecies variety form)
9
+
10
+ attr_reader :data
11
+
12
+ def initialize(fields)
13
+ @fields = fields
14
+ end
15
+
16
+ def id_name_rank(row)
17
+ @row = row
18
+ id = @row[:taxonid]
19
+ return nil if id.to_s.strip == ""
20
+ rank = find_rank
21
+ return nil unless rank
22
+ name = assemble_name(rank)
23
+ return nil unless name
24
+ { id: id, name: name, rank: rank.to_s }
25
+ end
26
+
27
+ private
28
+
29
+ def find_rank
30
+ name_rank = nil
31
+ RANKS.reverse_each do |rank|
32
+ next if @row[rank].to_s.strip == ""
33
+ name_rank = rank
34
+ break
35
+ end
36
+ name_rank
37
+ end
38
+
39
+ def assemble_name(name_rank)
40
+ name = @row[name_rank]
41
+ if SPECIES_RANKS[1..-1].include?(name_rank)
42
+ name = assemble_species_name(name, name_rank)
43
+ end
44
+ name
45
+ end
46
+
47
+ def assemble_species_name(name, name_rank)
48
+ ending = [add_infrarank(name, name_rank), @row[:scientificnameauthorship]]
49
+ ranks = SPECIES_RANKS[0...SPECIES_RANKS.index(name_rank)]
50
+ starting = name_start(ranks)
51
+ (starting + ending).flatten.join(" ").strip.gsub(/\s+/, " ")
52
+ end
53
+
54
+ def name_start(ranks)
55
+ ranks.each_with_object([]) do |rank, ary|
56
+ next unless @row[rank]
57
+ ary << add_infrarank(@row[rank], rank)
58
+ end
59
+ end
60
+
61
+ def add_infrarank(name, rank)
62
+ case rank
63
+ when :subspecies
64
+ "subsp. #{name}"
65
+ when :variety
66
+ "var. #{name}"
67
+ when :form
68
+ "f. #{name}"
69
+ else
70
+ name
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,30 @@
1
+ module GnCrossmap
2
+ # Reads supplied csv file and creates ruby structure to compare
3
+ # with a Global Names Resolver source
4
+ class Reader
5
+ def initialize(csv_path)
6
+ @csv_file = csv_path
7
+ @col_sep = col_sep
8
+ end
9
+
10
+ def read
11
+ GnCrossmap.log("Read input file '#{File.basename(@csv_file)}'")
12
+ parse_input
13
+ end
14
+
15
+ private
16
+
17
+ def col_sep
18
+ line = open(@csv_file, &:readline)
19
+ [";", ",", "\t"].map { |s| [line.count(s), s] }.sort.last.last
20
+ end
21
+
22
+ def parse_input
23
+ dc = Collector.new
24
+ CSV.open(@csv_file, col_sep: @col_sep).each do |row|
25
+ dc.process_row(row)
26
+ end
27
+ dc.data
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,60 @@
1
+ module GnCrossmap
2
+ # Sends data to GN Resolver and collects results
3
+ class Resolver
4
+ URL = "http://resolver.globalnames.org/name_resolvers.json"
5
+
6
+ def initialize(writer, data_source_id)
7
+ @processor = GnCrossmap::ResultProcessor.new(writer)
8
+ @ds_id = data_source_id
9
+ @count = 0
10
+ @batch = 200
11
+ end
12
+
13
+ def resolve(data)
14
+ data_size = data.size
15
+ data.each_slice(@batch) do |slice|
16
+ with_log(data_size) do
17
+ names = collect_names(slice)
18
+ remote_resolve(names)
19
+ end
20
+ end
21
+ @processor.writer.close
22
+ end
23
+
24
+ private
25
+
26
+ def with_log(size)
27
+ s = @count + 1
28
+ @count += @batch
29
+ e = [@count, size].min
30
+ GnCrossmap.log("Resolve #{s}-#{e} out of #{size} records")
31
+ yield
32
+ end
33
+
34
+ def collect_names(slice)
35
+ slice.each_with_object("") do |row, str|
36
+ @processor.input[row[:id]] = { rank: row[:rank] }
37
+ str << "#{row[:id]}|#{row[:name]}\n"
38
+ end
39
+ end
40
+
41
+ def remote_resolve(names)
42
+ res = RestClient.post(URL, data: names, data_source_ids: @ds_id)
43
+ @processor.process(res)
44
+ rescue RestClient::Exception
45
+ single_remote_resolve(names)
46
+ end
47
+
48
+ def single_remote_resolve(names)
49
+ names.split("\n").each do |name|
50
+ begin
51
+ res = RestClient.post(URL, data: name, data_source_ids: @ds_id)
52
+ @processor.process(res)
53
+ rescue RestClient::Exception => e
54
+ GnCrossmap.log("Resolver broke on '#{name}': #{e}")
55
+ next
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,58 @@
1
+ module GnCrossmap
2
+ # Processes data received from the GN Resolver
3
+ class ResultProcessor
4
+ MATCH_TYPES = {
5
+ 1 => "Exact match",
6
+ 2 => "Canonical form exact match",
7
+ 3 => "Canonical form fuzzy match",
8
+ 4 => "Partial canonical form match",
9
+ 5 => "Partial canonical form fuzzy match",
10
+ 6 => "Genus part match"
11
+ }
12
+
13
+ attr_reader :input, :writer
14
+
15
+ def initialize(writer)
16
+ @writer = writer
17
+ @input = {}
18
+ end
19
+
20
+ def process(result)
21
+ res = rubyfy(result)
22
+ res[:data].each do |d|
23
+ d[:results].nil? ? write_empty_result(d) : write_result(d)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def rubyfy(result)
30
+ JSON.parse(result, symbolize_names: true)
31
+ end
32
+
33
+ def write_empty_result(datum)
34
+ res = [datum[:supplied_id], datum[:supplied_name_string], nil, nil,
35
+ @input[datum[:supplied_id]][:rank], nil, nil, nil, nil]
36
+ @writer.write(res)
37
+ end
38
+
39
+ def write_result(datum)
40
+ datum[:results].each do |r|
41
+ res = [datum[:supplied_id], datum[:supplied_name_string],
42
+ r[:name_string], r[:canonical_form],
43
+ @input[datum[:supplied_id]][:rank],
44
+ matched_rank(r), matched_type(r),
45
+ r[:edit_distance], r[:score]]
46
+ @writer.write(res)
47
+ end
48
+ end
49
+
50
+ def matched_rank(record)
51
+ record[:classification_path_ranks].split("|").last
52
+ end
53
+
54
+ def matched_type(record)
55
+ MATCH_TYPES[record[:match_type]]
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,61 @@
1
+ module GnCrossmap
2
+ # Assemble data from CSV reader by parsing scientificName field
3
+ class SciNameCollector
4
+ def initialize(fields)
5
+ @fields = fields
6
+ @parser = ScientificNameParser.new
7
+ end
8
+
9
+ def id_name_rank(row)
10
+ @row = row
11
+ id = @row[:taxonid]
12
+ name = find_name
13
+ rank = @row[:taxonRank]
14
+ rank = parse_rank if rank.nil?
15
+ (id && name) ? { id: id, name: name, rank: rank } : nil
16
+ end
17
+
18
+ private
19
+
20
+ def find_name
21
+ name = @row[:scientificname].strip
22
+ authorship = @row[:scientificnameauthorship].to_s.strip
23
+ name = "#{name} #{authorship}" if authorship != ""
24
+ name
25
+ end
26
+
27
+ def parse_rank
28
+ @parsed_name = @parser.parse(@row[:scientificname])[:scientificName]
29
+ return nil if !@parsed_name[:canonical] || @parsed_name[:hybrid]
30
+ words_num = @parsed_name[:canonical].split(" ").size
31
+ infer_rank(words_num)
32
+ rescue RuntimeError
33
+ @parser = ScientificNameParser.new
34
+ nil
35
+ end
36
+
37
+ def infer_rank(words_in_canonical_form)
38
+ case words_in_canonical_form
39
+ when 1
40
+ nil
41
+ when 2
42
+ "species"
43
+ else
44
+ normalize_rank(@parsed_name[:details][0][:infraspecies][-1][:rank])
45
+ end
46
+ end
47
+
48
+ def normalize_rank(rank)
49
+ case rank
50
+ when /^f/
51
+ "form"
52
+ when /^var/
53
+ "variety"
54
+ when /^sub/
55
+ "subspicies"
56
+ else
57
+ rank
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,8 @@
1
+ # Namespace module for crossmapping checklists to GN sources
2
+ module GnCrossmap
3
+ VERSION = "0.1.1"
4
+
5
+ def self.version
6
+ VERSION
7
+ end
8
+ end
@@ -0,0 +1,22 @@
1
+ module GnCrossmap
2
+ # Saves output from GN Resolver to disk
3
+ class Writer
4
+ def initialize(output_path)
5
+ @path = output_path
6
+ @output = CSV.open(@path, "w:utf-8")
7
+ @output << [:taxonID, :scientificName, :matchedScientificName,
8
+ :matchedCanonicalForm, :rank, :matchedRank, :matchType,
9
+ :editDistance, :score]
10
+ GnCrossmap.log("Open output file '#{@path}'")
11
+ end
12
+
13
+ def write(record)
14
+ @output << record
15
+ end
16
+
17
+ def close
18
+ GnCrossmap.log("Close output file '#{@path}'")
19
+ @output.close
20
+ end
21
+ end
22
+ end
metadata ADDED
@@ -0,0 +1,182 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gn_crossmap
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Dmitry Mozzherin
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-05-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: trollop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: biodiversity
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.31'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.31'
97
+ - !ruby/object:Gem::Dependency
98
+ name: coveralls
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.8'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.8'
111
+ - !ruby/object:Gem::Dependency
112
+ name: byebug
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ description: User supplies a comma-separated file which breaks contains in one row
126
+ a hierarchy path of known ranks, scientific name which can be split into its semantic
127
+ elements and include authorship and taxon concept reference. User also supplies
128
+ an id of a data source from global names resolver/index. User gets back a new comma-separated
129
+ file where scientific names from her list match data from the given data source.
130
+ email:
131
+ - dmozzherin@gmail.com
132
+ executables:
133
+ - crossmap
134
+ extensions: []
135
+ extra_rdoc_files: []
136
+ files:
137
+ - ".gitignore"
138
+ - ".rspec"
139
+ - ".rubocop.yml"
140
+ - ".travis.yml"
141
+ - CHANGELOG.md
142
+ - Gemfile
143
+ - LICENSE
144
+ - README.md
145
+ - Rakefile
146
+ - bin/console
147
+ - bin/setup
148
+ - exe/crossmap
149
+ - gn_crossmap.gemspec
150
+ - lib/gn_crossmap.rb
151
+ - lib/gn_crossmap/collector.rb
152
+ - lib/gn_crossmap/column_collector.rb
153
+ - lib/gn_crossmap/reader.rb
154
+ - lib/gn_crossmap/resolver.rb
155
+ - lib/gn_crossmap/result_processor.rb
156
+ - lib/gn_crossmap/sci_name_collector.rb
157
+ - lib/gn_crossmap/version.rb
158
+ - lib/gn_crossmap/writer.rb
159
+ homepage: https://github.com/GlobalNamesArchitecture/gn_crossmap
160
+ licenses: []
161
+ metadata: {}
162
+ post_install_message:
163
+ rdoc_options: []
164
+ require_paths:
165
+ - lib
166
+ required_ruby_version: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - ">="
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ required_rubygems_version: !ruby/object:Gem::Requirement
172
+ requirements:
173
+ - - ">="
174
+ - !ruby/object:Gem::Version
175
+ version: '0'
176
+ requirements: []
177
+ rubyforge_project:
178
+ rubygems_version: 2.2.3
179
+ signing_key:
180
+ specification_version: 4
181
+ summary: Crossmaps a list of scientific names to names from a data source in GN Index
182
+ test_files: []