gn_crossmap 2.3.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +4 -0
- data/README.md +13 -4
- data/Rakefile +1 -1
- data/gn_crossmap.gemspec +8 -2
- data/lib/gn_crossmap.rb +5 -0
- data/lib/gn_crossmap/collector.rb +0 -7
- data/lib/gn_crossmap/column_collector.rb +2 -2
- data/lib/gn_crossmap/sci_name_collector.rb +1 -1
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap/writer.rb +4 -4
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 143661fa671afbd77e8f666ef883df2e127356fa
|
4
|
+
data.tar.gz: a809d9b2472bb8577fffc9df78b72557f7ff3200
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6dbc394ee6ae8e673f455dba4f789bc0ccded1b5c68ca51e19d1549233effeac417b26f7faa6fcac8a7b7db06f4a4d26e7f0944b5f9553673f82a752452e9048
|
7
|
+
data.tar.gz: 6e4be322cbd94e014ee70c9f009fe28c2da4f1b9012cf5cdfa2c0c8b62772dd69ed41ae1c42e917d2648ba3852a3c819add1f2813f85800156dff93f0d7111c0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -56,7 +56,8 @@ crossmap -i my_list.csv -o my_list_if.csv -d 5
|
|
56
56
|
# to use standard intput and/or output
|
57
57
|
cat my_list.csv | crossmap -i - -o - > output
|
58
58
|
|
59
|
-
# to keep only taxonID from original input
|
59
|
+
# to keep only taxonID (if given) from original input
|
60
|
+
# no original fields will be kept without taxonID
|
60
61
|
cat my_list.csv | crossmap -i my_list.csv -s
|
61
62
|
```
|
62
63
|
|
@@ -85,8 +86,9 @@ designates `STDIN`
|
|
85
86
|
: (integer) id of a data source from [GN resolver][resolver]
|
86
87
|
|
87
88
|
``skip_original``
|
88
|
-
: (boolean) if true only `taxonID` is preserved
|
89
|
-
all original data is preserved
|
89
|
+
: (boolean) if true only `taxonID` (if given) is preserved
|
90
|
+
from original data. Otherwise all original data is preserved. If there is no
|
91
|
+
``taxonID``, no original data will be preserved.
|
90
92
|
|
91
93
|
``alt_headers``
|
92
94
|
: (array) empty array by default. If `alt_headers` are not empty they are used
|
@@ -184,7 +186,14 @@ Match types dictionary can be accessed with `GnCrossmap::MATCH_TYPES` constant
|
|
184
186
|
`subspecies` `variety` `form scientificNameAuthorship` `scientificName`
|
185
187
|
`taxonRank`
|
186
188
|
|
187
|
-
####
|
189
|
+
#### simplest Example -- only scientificName
|
190
|
+
|
191
|
+
| scientificName |
|
192
|
+
|---------------------------------------------------------|
|
193
|
+
| Animalia |
|
194
|
+
| Macrobiotus echinogenitus subsp. areolatus Murray, 1907 |
|
195
|
+
|
196
|
+
#### taxonID and scientificName Example
|
188
197
|
|
189
198
|
taxonID;scientificName
|
190
199
|
1;Macrobiotus echinogenitus subsp. areolatus Murray, 1907
|
data/Rakefile
CHANGED
data/gn_crossmap.gemspec
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
|
2
3
|
lib = File.expand_path("../lib", __FILE__)
|
4
|
+
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require "gn_crossmap/version"
|
5
7
|
|
8
|
+
# rubocop:disable Metrics/BlockLength:
|
9
|
+
|
6
10
|
Gem::Specification.new do |gem|
|
7
11
|
gem.required_ruby_version = ">= 2.1"
|
8
12
|
gem.name = "gn_crossmap"
|
@@ -12,7 +16,7 @@ Gem::Specification.new do |gem|
|
|
12
16
|
gem.email = ["dmozzherin@gmail.com"]
|
13
17
|
|
14
18
|
gem.summary = "Crossmaps a list of scientific names to names from " \
|
15
|
-
|
19
|
+
"a data source in GN Index"
|
16
20
|
gem.description = "Gem uses a checklist in a comma-separated format as " \
|
17
21
|
"an input, and returns back a new comma-separated " \
|
18
22
|
"list crossmapping the scientific names to one of the " \
|
@@ -30,11 +34,13 @@ Gem::Specification.new do |gem|
|
|
30
34
|
gem.add_dependency "biodiversity", "~> 3.1"
|
31
35
|
gem.add_dependency "rest-client", "~> 2.0"
|
32
36
|
gem.add_dependency "logger-colors", "~> 1.0"
|
37
|
+
gem.add_dependency "gn_uuid", "~> 0.5"
|
33
38
|
|
34
39
|
gem.add_development_dependency "bundler", "~> 1.7"
|
35
40
|
gem.add_development_dependency "rake", "~> 11.0"
|
36
41
|
gem.add_development_dependency "rspec", "~> 3.2"
|
37
42
|
gem.add_development_dependency "rubocop", "~> 0.31"
|
38
43
|
gem.add_development_dependency "coveralls", "~> 0.8"
|
39
|
-
gem.add_development_dependency "gn_uuid", "~> 0.5"
|
40
44
|
end
|
45
|
+
|
46
|
+
# rubocop:enable Metrics/BlockLength:
|
data/lib/gn_crossmap.rb
CHANGED
@@ -5,6 +5,7 @@ require "tempfile"
|
|
5
5
|
require "logger"
|
6
6
|
require "logger/colors"
|
7
7
|
require "biodiversity"
|
8
|
+
require "gn_uuid"
|
8
9
|
require "gn_crossmap/errors"
|
9
10
|
require "gn_crossmap/version"
|
10
11
|
require "gn_crossmap/reader"
|
@@ -57,6 +58,10 @@ module GnCrossmap
|
|
57
58
|
logger.info(message)
|
58
59
|
end
|
59
60
|
|
61
|
+
def find_id(row, name)
|
62
|
+
row.key?(:taxonid) ? row[:taxonid].strip : GnUUID.uuid(name)
|
63
|
+
end
|
64
|
+
|
60
65
|
private
|
61
66
|
|
62
67
|
def create_resolver(writer, opts)
|
@@ -21,13 +21,6 @@ module GnCrossmap
|
|
21
21
|
def init_fields_collector
|
22
22
|
@fields = @row.map { |f| prepare_field(f) }
|
23
23
|
@collector = collector_factory
|
24
|
-
err = "taxonID must be present in the csv header"
|
25
|
-
raise GnCrossmapError, err unless taxon_id?
|
26
|
-
end
|
27
|
-
|
28
|
-
def taxon_id?
|
29
|
-
@taxon_id_index = @fields.index(:taxonid)
|
30
|
-
!@taxon_id_index.nil?
|
31
24
|
end
|
32
25
|
|
33
26
|
def prepare_field(field)
|
@@ -16,12 +16,12 @@ module GnCrossmap
|
|
16
16
|
|
17
17
|
def id_name_rank(row)
|
18
18
|
@row = row
|
19
|
-
id = @row[:taxonid]
|
20
|
-
return nil if id.to_s.strip == ""
|
21
19
|
rank = find_rank
|
22
20
|
return nil unless rank
|
23
21
|
name = assemble_name(rank)
|
24
22
|
return nil unless name
|
23
|
+
id = GnCrossmap.find_id(@row, name)
|
24
|
+
return nil if id.strip.to_s == ""
|
25
25
|
{ id: id, name: name, rank: rank.to_s }
|
26
26
|
end
|
27
27
|
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap/writer.rb
CHANGED
@@ -22,10 +22,10 @@ module GnCrossmap
|
|
22
22
|
private
|
23
23
|
|
24
24
|
def output_fields(original_fields)
|
25
|
-
original_fields +
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
original_fields + %i(matchedType inputName matchedName
|
26
|
+
matchedCanonicalForm inputRank matchedRank
|
27
|
+
synonymStatus acceptedName matchedEditDistance
|
28
|
+
matchedScore matchTaxonID)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: gn_uuid
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.5'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.5'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: bundler
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,20 +150,6 @@ dependencies:
|
|
136
150
|
- - "~>"
|
137
151
|
- !ruby/object:Gem::Version
|
138
152
|
version: '0.8'
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: gn_uuid
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: '0.5'
|
146
|
-
type: :development
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - "~>"
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: '0.5'
|
153
153
|
description: Gem uses a checklist in a comma-separated format as an input, and returns
|
154
154
|
back a new comma-separated list crossmapping the scientific names to one of the
|
155
155
|
data sources from http://resolver.globalnames.org
|