gn_crossmap 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/gn_crossmap.gemspec +1 -0
- data/lib/gn_crossmap/collector.rb +6 -1
- data/lib/gn_crossmap/resolver.rb +1 -1
- data/lib/gn_crossmap/result_processor.rb +12 -7
- data/lib/gn_crossmap/sci_name_collector.rb +3 -5
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap/writer.rb +3 -3
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f528c813176d1683a162444e5a70917e9efd2bb
|
4
|
+
data.tar.gz: bd398879ac5ffd7cf104d89b0dc4d6060e2cdd64
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 873f228e5f5e52c565df39771e788978d13f5f020cc739b37d5ef982df3d1e3477a1e535e95cd19bc6c92bd23983b799eddb52701005e8a59b3f5518c0b2da80
|
7
|
+
data.tar.gz: d10af8cf20f2e00f2508f155edd768eaee53faa1ff0046c3d740067d784d1d442067c1c1ba7e04cf3a07f6455725ade882d47a66d318e0895a286eb1b9053835
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,21 @@
|
|
1
1
|
gn_crossmap CHANGELOG
|
2
2
|
=====================
|
3
|
+
0.1.7
|
4
|
+
-----
|
5
|
+
|
6
|
+
* @dimus - #13 - make it possible wo ingest field names like dwc:scientificName
|
7
|
+
or http://example.org/term/sommeTerm
|
8
|
+
|
9
|
+
* @dimus - #12 - fix a bug which prevents so salvage most of the names from a
|
10
|
+
failing batch (if a batch of names has one name that breaks
|
11
|
+
resolution on GN-resolver end)
|
12
|
+
|
13
|
+
* @dimus - #11 - add taxonID from resolved data to results
|
14
|
+
|
15
|
+
* @dimus - #10 - in resulting csv moved "match_type" field to be the first one
|
16
|
+
to make it easier to see what matched and what did not
|
17
|
+
|
18
|
+
* @dimus - #9 - fixed another problem with rank inffering
|
3
19
|
|
4
20
|
0.1.6
|
5
21
|
-----
|
data/gn_crossmap.gemspec
CHANGED
@@ -17,12 +17,17 @@ module GnCrossmap
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def init_fields_collector
|
20
|
-
@fields = @row.map { |f| f
|
20
|
+
@fields = @row.map { |f| prepare_field(f) }
|
21
21
|
@collector = collector_factory
|
22
22
|
err = "taxonID must be present in the csv header"
|
23
23
|
fail GnCrossmapError, err unless @fields.include?(:taxonid)
|
24
24
|
end
|
25
25
|
|
26
|
+
def prepare_field(field)
|
27
|
+
field = field.to_s.gsub(":", "/")
|
28
|
+
field.split("/")[-1].strip.downcase.to_sym
|
29
|
+
end
|
30
|
+
|
26
31
|
def collect_data
|
27
32
|
@row = @fields.zip(@row).to_h
|
28
33
|
data = @collector.id_name_rank(@row)
|
data/lib/gn_crossmap/resolver.rb
CHANGED
@@ -52,7 +52,7 @@ module GnCrossmap
|
|
52
52
|
names.split("\n").each do |name|
|
53
53
|
begin
|
54
54
|
res = RestClient.post(URL, data: name, data_source_ids: @ds_id)
|
55
|
-
@processor.process(res)
|
55
|
+
@processor.process(res, @current_data)
|
56
56
|
rescue RestClient::Exception => e
|
57
57
|
GnCrossmap.logger.error("Resolver broke on '#{name}': #{e.message}")
|
58
58
|
next
|
@@ -2,6 +2,7 @@ module GnCrossmap
|
|
2
2
|
# Processes data received from the GN Resolver
|
3
3
|
class ResultProcessor
|
4
4
|
MATCH_TYPES = {
|
5
|
+
0 => "No match",
|
5
6
|
1 => "Exact match",
|
6
7
|
2 => "Canonical form exact match",
|
7
8
|
3 => "Canonical form fuzzy match",
|
@@ -33,8 +34,9 @@ module GnCrossmap
|
|
33
34
|
|
34
35
|
def write_empty_result(datum)
|
35
36
|
res = @original_data[datum[:supplied_id]]
|
36
|
-
res += [datum[:supplied_name_string], nil,
|
37
|
-
@input[datum[:supplied_id]][:rank], nil,
|
37
|
+
res += [MATCH_TYPES[0], datum[:supplied_name_string], nil,
|
38
|
+
nil, @input[datum[:supplied_id]][:rank], nil,
|
39
|
+
nil, nil, nil]
|
38
40
|
@writer.write(res)
|
39
41
|
end
|
40
42
|
|
@@ -45,11 +47,14 @@ module GnCrossmap
|
|
45
47
|
end
|
46
48
|
|
47
49
|
def compile_result(datum, result)
|
48
|
-
@original_data[datum[:supplied_id]] +
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
50
|
+
@original_data[datum[:supplied_id]] + new_data(datum, result)
|
51
|
+
end
|
52
|
+
|
53
|
+
def new_data(datum, result)
|
54
|
+
[matched_type(result), datum[:supplied_name_string],
|
55
|
+
result[:name_string], result[:canonical_form],
|
56
|
+
@input[datum[:supplied_id]][:rank], matched_rank(result),
|
57
|
+
result[:edit_distance], result[:score], result[:taxon_id]]
|
53
58
|
end
|
54
59
|
|
55
60
|
def matched_rank(record)
|
@@ -18,10 +18,10 @@ module GnCrossmap
|
|
18
18
|
private
|
19
19
|
|
20
20
|
def find_name
|
21
|
-
name = @row[:scientificname].strip
|
21
|
+
name = @row[:scientificname].to_s.strip
|
22
22
|
authorship = @row[:scientificnameauthorship].to_s.strip
|
23
23
|
name = "#{name} #{authorship}" if authorship != ""
|
24
|
-
name
|
24
|
+
name.strip == "" ? nil : name.strip
|
25
25
|
end
|
26
26
|
|
27
27
|
def parse_rank
|
@@ -29,11 +29,9 @@ module GnCrossmap
|
|
29
29
|
return nil if !@parsed_name[:canonical] || @parsed_name[:hybrid]
|
30
30
|
words_num = @parsed_name[:canonical].split(" ").size
|
31
31
|
infer_rank(words_num)
|
32
|
-
rescue
|
32
|
+
rescue StandardError
|
33
33
|
@parser = ScientificNameParser.new
|
34
34
|
nil
|
35
|
-
rescue NoMethodError
|
36
|
-
nil
|
37
35
|
end
|
38
36
|
|
39
37
|
def infer_rank(words_in_canonical_form)
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap/writer.rb
CHANGED
@@ -21,9 +21,9 @@ module GnCrossmap
|
|
21
21
|
private
|
22
22
|
|
23
23
|
def output_fields(original_fields)
|
24
|
-
original_fields + [:
|
25
|
-
:
|
26
|
-
:matchedEditDistance, :marchedScore]
|
24
|
+
original_fields + [:matchedType, :inputName, :matchedName,
|
25
|
+
:matchedCanonicalForm, :inputRank, :matchedRank,
|
26
|
+
:matchedEditDistance, :marchedScore, :matchTaxonID]
|
27
27
|
end
|
28
28
|
end
|
29
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05
|
11
|
+
date: 2015-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trollop
|
@@ -136,6 +136,20 @@ dependencies:
|
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0.8'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: gn_uuid
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0.5'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0.5'
|
139
153
|
description: Gem uses a checklist in a comma-separated format as an input, and returns
|
140
154
|
back a new comma-separated list crossmapping the scientific names to one of the
|
141
155
|
data sources from http://resolver.globalnames.org
|