gn_crossmap 2.3.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/CHANGELOG.md +4 -0
- data/README.md +13 -4
- data/Rakefile +1 -1
- data/gn_crossmap.gemspec +8 -2
- data/lib/gn_crossmap.rb +5 -0
- data/lib/gn_crossmap/collector.rb +0 -7
- data/lib/gn_crossmap/column_collector.rb +2 -2
- data/lib/gn_crossmap/sci_name_collector.rb +1 -1
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap/writer.rb +4 -4
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 143661fa671afbd77e8f666ef883df2e127356fa
|
4
|
+
data.tar.gz: a809d9b2472bb8577fffc9df78b72557f7ff3200
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6dbc394ee6ae8e673f455dba4f789bc0ccded1b5c68ca51e19d1549233effeac417b26f7faa6fcac8a7b7db06f4a4d26e7f0944b5f9553673f82a752452e9048
|
7
|
+
data.tar.gz: 6e4be322cbd94e014ee70c9f009fe28c2da4f1b9012cf5cdfa2c0c8b62772dd69ed41ae1c42e917d2648ba3852a3c819add1f2813f85800156dff93f0d7111c0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -56,7 +56,8 @@ crossmap -i my_list.csv -o my_list_if.csv -d 5
|
|
56
56
|
# to use standard intput and/or output
|
57
57
|
cat my_list.csv | crossmap -i - -o - > output
|
58
58
|
|
59
|
-
# to keep only taxonID from original input
|
59
|
+
# to keep only taxonID (if given) from original input
|
60
|
+
# no original fields will be kept without taxonID
|
60
61
|
cat my_list.csv | crossmap -i my_list.csv -s
|
61
62
|
```
|
62
63
|
|
@@ -85,8 +86,9 @@ designates `STDIN`
|
|
85
86
|
: (integer) id of a data source from [GN resolver][resolver]
|
86
87
|
|
87
88
|
``skip_original``
|
88
|
-
: (boolean) if true only `taxonID` is preserved
|
89
|
-
all original data is preserved
|
89
|
+
: (boolean) if true only `taxonID` (if given) is preserved
|
90
|
+
from original data. Otherwise all original data is preserved. If there is no
|
91
|
+
``taxonID``, no original data will be preserved.
|
90
92
|
|
91
93
|
``alt_headers``
|
92
94
|
: (array) empty array by default. If `alt_headers` are not empty they are used
|
@@ -184,7 +186,14 @@ Match types dictionary can be accessed with `GnCrossmap::MATCH_TYPES` constant
|
|
184
186
|
`subspecies` `variety` `form scientificNameAuthorship` `scientificName`
|
185
187
|
`taxonRank`
|
186
188
|
|
187
|
-
####
|
189
|
+
#### simplest Example -- only scientificName
|
190
|
+
|
191
|
+
| scientificName |
|
192
|
+
|---------------------------------------------------------|
|
193
|
+
| Animalia |
|
194
|
+
| Macrobiotus echinogenitus subsp. areolatus Murray, 1907 |
|
195
|
+
|
196
|
+
#### taxonID and scientificName Example
|
188
197
|
|
189
198
|
taxonID;scientificName
|
190
199
|
1;Macrobiotus echinogenitus subsp. areolatus Murray, 1907
|
data/Rakefile
CHANGED
data/gn_crossmap.gemspec
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
|
2
3
|
lib = File.expand_path("../lib", __FILE__)
|
4
|
+
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require "gn_crossmap/version"
|
5
7
|
|
8
|
+
# rubocop:disable Metrics/BlockLength:
|
9
|
+
|
6
10
|
Gem::Specification.new do |gem|
|
7
11
|
gem.required_ruby_version = ">= 2.1"
|
8
12
|
gem.name = "gn_crossmap"
|
@@ -12,7 +16,7 @@ Gem::Specification.new do |gem|
|
|
12
16
|
gem.email = ["dmozzherin@gmail.com"]
|
13
17
|
|
14
18
|
gem.summary = "Crossmaps a list of scientific names to names from " \
|
15
|
-
|
19
|
+
"a data source in GN Index"
|
16
20
|
gem.description = "Gem uses a checklist in a comma-separated format as " \
|
17
21
|
"an input, and returns back a new comma-separated " \
|
18
22
|
"list crossmapping the scientific names to one of the " \
|
@@ -30,11 +34,13 @@ Gem::Specification.new do |gem|
|
|
30
34
|
gem.add_dependency "biodiversity", "~> 3.1"
|
31
35
|
gem.add_dependency "rest-client", "~> 2.0"
|
32
36
|
gem.add_dependency "logger-colors", "~> 1.0"
|
37
|
+
gem.add_dependency "gn_uuid", "~> 0.5"
|
33
38
|
|
34
39
|
gem.add_development_dependency "bundler", "~> 1.7"
|
35
40
|
gem.add_development_dependency "rake", "~> 11.0"
|
36
41
|
gem.add_development_dependency "rspec", "~> 3.2"
|
37
42
|
gem.add_development_dependency "rubocop", "~> 0.31"
|
38
43
|
gem.add_development_dependency "coveralls", "~> 0.8"
|
39
|
-
gem.add_development_dependency "gn_uuid", "~> 0.5"
|
40
44
|
end
|
45
|
+
|
46
|
+
# rubocop:enable Metrics/BlockLength:
|
data/lib/gn_crossmap.rb
CHANGED
@@ -5,6 +5,7 @@ require "tempfile"
|
|
5
5
|
require "logger"
|
6
6
|
require "logger/colors"
|
7
7
|
require "biodiversity"
|
8
|
+
require "gn_uuid"
|
8
9
|
require "gn_crossmap/errors"
|
9
10
|
require "gn_crossmap/version"
|
10
11
|
require "gn_crossmap/reader"
|
@@ -57,6 +58,10 @@ module GnCrossmap
|
|
57
58
|
logger.info(message)
|
58
59
|
end
|
59
60
|
|
61
|
+
def find_id(row, name)
|
62
|
+
row.key?(:taxonid) ? row[:taxonid].strip : GnUUID.uuid(name)
|
63
|
+
end
|
64
|
+
|
60
65
|
private
|
61
66
|
|
62
67
|
def create_resolver(writer, opts)
|
@@ -21,13 +21,6 @@ module GnCrossmap
|
|
21
21
|
def init_fields_collector
|
22
22
|
@fields = @row.map { |f| prepare_field(f) }
|
23
23
|
@collector = collector_factory
|
24
|
-
err = "taxonID must be present in the csv header"
|
25
|
-
raise GnCrossmapError, err unless taxon_id?
|
26
|
-
end
|
27
|
-
|
28
|
-
def taxon_id?
|
29
|
-
@taxon_id_index = @fields.index(:taxonid)
|
30
|
-
!@taxon_id_index.nil?
|
31
24
|
end
|
32
25
|
|
33
26
|
def prepare_field(field)
|
@@ -16,12 +16,12 @@ module GnCrossmap
|
|
16
16
|
|
17
17
|
def id_name_rank(row)
|
18
18
|
@row = row
|
19
|
-
id = @row[:taxonid]
|
20
|
-
return nil if id.to_s.strip == ""
|
21
19
|
rank = find_rank
|
22
20
|
return nil unless rank
|
23
21
|
name = assemble_name(rank)
|
24
22
|
return nil unless name
|
23
|
+
id = GnCrossmap.find_id(@row, name)
|
24
|
+
return nil if id.strip.to_s == ""
|
25
25
|
{ id: id, name: name, rank: rank.to_s }
|
26
26
|
end
|
27
27
|
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap/writer.rb
CHANGED
@@ -22,10 +22,10 @@ module GnCrossmap
|
|
22
22
|
private
|
23
23
|
|
24
24
|
def output_fields(original_fields)
|
25
|
-
original_fields +
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
original_fields + %i(matchedType inputName matchedName
|
26
|
+
matchedCanonicalForm inputRank matchedRank
|
27
|
+
synonymStatus acceptedName matchedEditDistance
|
28
|
+
matchedScore matchTaxonID)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: gn_uuid
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.5'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.5'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: bundler
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,20 +150,6 @@ dependencies:
|
|
136
150
|
- - "~>"
|
137
151
|
- !ruby/object:Gem::Version
|
138
152
|
version: '0.8'
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: gn_uuid
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - "~>"
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: '0.5'
|
146
|
-
type: :development
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - "~>"
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: '0.5'
|
153
153
|
description: Gem uses a checklist in a comma-separated format as an input, and returns
|
154
154
|
back a new comma-separated list crossmapping the scientific names to one of the
|
155
155
|
data sources from http://resolver.globalnames.org
|