gn_crossmap 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/gn_crossmap.gemspec +1 -0
- data/lib/gn_crossmap/collector.rb +6 -1
- data/lib/gn_crossmap/resolver.rb +1 -1
- data/lib/gn_crossmap/result_processor.rb +12 -7
- data/lib/gn_crossmap/sci_name_collector.rb +3 -5
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap/writer.rb +3 -3
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f528c813176d1683a162444e5a70917e9efd2bb
|
4
|
+
data.tar.gz: bd398879ac5ffd7cf104d89b0dc4d6060e2cdd64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 873f228e5f5e52c565df39771e788978d13f5f020cc739b37d5ef982df3d1e3477a1e535e95cd19bc6c92bd23983b799eddb52701005e8a59b3f5518c0b2da80
|
7
|
+
data.tar.gz: d10af8cf20f2e00f2508f155edd768eaee53faa1ff0046c3d740067d784d1d442067c1c1ba7e04cf3a07f6455725ade882d47a66d318e0895a286eb1b9053835
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
gn_crossmap CHANGELOG
|
2
2
|
=====================
|
3
|
+
0.1.7
|
4
|
+
-----
|
5
|
+
|
6
|
+
* @dimus - #13 - make it possible wo ingest field names like dwc:scientificName
|
7
|
+
or http://example.org/term/sommeTerm
|
8
|
+
|
9
|
+
* @dimus - #12 - fix a bug which prevents so salvage most of the names from a
|
10
|
+
failing batch (if a batch of names has one name that breaks
|
11
|
+
resolution on GN-resolver end)
|
12
|
+
|
13
|
+
* @dimus - #11 - add taxonID from resolved data to results
|
14
|
+
|
15
|
+
* @dimus - #10 - in resulting csv moved "match_type" field to be the first one
|
16
|
+
to make it easier to see what matched and what did not
|
17
|
+
|
18
|
+
* @dimus - #9 - fixed another problem with rank inffering
|
3
19
|
|
4
20
|
0.1.6
|
5
21
|
-----
|
data/gn_crossmap.gemspec
CHANGED
@@ -17,12 +17,17 @@ module GnCrossmap
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def init_fields_collector
|
20
|
-
@fields = @row.map { |f| f
|
20
|
+
@fields = @row.map { |f| prepare_field(f) }
|
21
21
|
@collector = collector_factory
|
22
22
|
err = "taxonID must be present in the csv header"
|
23
23
|
fail GnCrossmapError, err unless @fields.include?(:taxonid)
|
24
24
|
end
|
25
25
|
|
26
|
+
def prepare_field(field)
|
27
|
+
field = field.to_s.gsub(":", "/")
|
28
|
+
field.split("/")[-1].strip.downcase.to_sym
|
29
|
+
end
|
30
|
+
|
26
31
|
def collect_data
|
27
32
|
@row = @fields.zip(@row).to_h
|
28
33
|
data = @collector.id_name_rank(@row)
|
data/lib/gn_crossmap/resolver.rb
CHANGED
@@ -52,7 +52,7 @@ module GnCrossmap
|
|
52
52
|
names.split("\n").each do |name|
|
53
53
|
begin
|
54
54
|
res = RestClient.post(URL, data: name, data_source_ids: @ds_id)
|
55
|
-
@processor.process(res)
|
55
|
+
@processor.process(res, @current_data)
|
56
56
|
rescue RestClient::Exception => e
|
57
57
|
GnCrossmap.logger.error("Resolver broke on '#{name}': #{e.message}")
|
58
58
|
next
|
@@ -2,6 +2,7 @@ module GnCrossmap
|
|
2
2
|
# Processes data received from the GN Resolver
|
3
3
|
class ResultProcessor
|
4
4
|
MATCH_TYPES = {
|
5
|
+
0 => "No match",
|
5
6
|
1 => "Exact match",
|
6
7
|
2 => "Canonical form exact match",
|
7
8
|
3 => "Canonical form fuzzy match",
|
@@ -33,8 +34,9 @@ module GnCrossmap
|
|
33
34
|
|
34
35
|
def write_empty_result(datum)
|
35
36
|
res = @original_data[datum[:supplied_id]]
|
36
|
-
res += [datum[:supplied_name_string], nil,
|
37
|
-
@input[datum[:supplied_id]][:rank], nil,
|
37
|
+
res += [MATCH_TYPES[0], datum[:supplied_name_string], nil,
|
38
|
+
nil, @input[datum[:supplied_id]][:rank], nil,
|
39
|
+
nil, nil, nil]
|
38
40
|
@writer.write(res)
|
39
41
|
end
|
40
42
|
|
@@ -45,11 +47,14 @@ module GnCrossmap
|
|
45
47
|
end
|
46
48
|
|
47
49
|
def compile_result(datum, result)
|
48
|
-
@original_data[datum[:supplied_id]] +
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
50
|
+
@original_data[datum[:supplied_id]] + new_data(datum, result)
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_data(datum, result)
|
54
|
+
[matched_type(result), datum[:supplied_name_string],
|
55
|
+
result[:name_string], result[:canonical_form],
|
56
|
+
@input[datum[:supplied_id]][:rank], matched_rank(result),
|
57
|
+
result[:edit_distance], result[:score], result[:taxon_id]]
|
53
58
|
end
|
54
59
|
|
55
60
|
def matched_rank(record)
|
@@ -18,10 +18,10 @@ module GnCrossmap
|
|
18
18
|
private
|
19
19
|
|
20
20
|
def find_name
|
21
|
-
name = @row[:scientificname].strip
|
21
|
+
name = @row[:scientificname].to_s.strip
|
22
22
|
authorship = @row[:scientificnameauthorship].to_s.strip
|
23
23
|
name = "#{name} #{authorship}" if authorship != ""
|
24
|
-
name
|
24
|
+
name.strip == "" ? nil : name.strip
|
25
25
|
end
|
26
26
|
|
27
27
|
def parse_rank
|
@@ -29,11 +29,9 @@ module GnCrossmap
|
|
29
29
|
return nil if !@parsed_name[:canonical] || @parsed_name[:hybrid]
|
30
30
|
words_num = @parsed_name[:canonical].split(" ").size
|
31
31
|
infer_rank(words_num)
|
32
|
-
rescue
|
32
|
+
rescue StandardError
|
33
33
|
@parser = ScientificNameParser.new
|
34
34
|
nil
|
35
|
-
rescue NoMethodError
|
36
|
-
nil
|
37
35
|
end
|
38
36
|
|
39
37
|
def infer_rank(words_in_canonical_form)
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap/writer.rb
CHANGED
@@ -21,9 +21,9 @@ module GnCrossmap
|
|
21
21
|
private
|
22
22
|
|
23
23
|
def output_fields(original_fields)
|
24
|
-
original_fields + [:
|
25
|
-
:
|
26
|
-
:matchedEditDistance, :marchedScore]
|
24
|
+
original_fields + [:matchedType, :inputName, :matchedName,
|
25
|
+
:matchedCanonicalForm, :inputRank, :matchedRank,
|
26
|
+
:matchedEditDistance, :marchedScore, :matchTaxonID]
|
27
27
|
end
|
28
28
|
end
|
29
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05
|
11
|
+
date: 2015-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|
@@ -136,6 +136,20 @@ dependencies:
|
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0.8'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: gn_uuid
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0.5'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0.5'
|
139
153
|
description: Gem uses a checklist in a comma-separated format as an input, and returns
|
140
154
|
back a new comma-separated list crossmapping the scientific names to one of the
|
141
155
|
data sources from http://resolver.globalnames.org
|