gn_crossmap 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 216f6c9075440bcd561f162c87444ff276062423
4
- data.tar.gz: 4a615a04bc5fcc97530edddf62e62c66fee97761
3
+ metadata.gz: 9f528c813176d1683a162444e5a70917e9efd2bb
4
+ data.tar.gz: bd398879ac5ffd7cf104d89b0dc4d6060e2cdd64
5
5
  SHA512:
6
- metadata.gz: c9284eefdad5f12f171eecc67dce4c3b6684a0a2e6b52a8ec0eac01afe03ee862625dabd32f4f3985381fe9ede398a6ceb8ade60a4752d5ea15bd89b9e96afb8
7
- data.tar.gz: 0dd26bd181ee96669f2fa52714ea358a4b92fb0df6123daf3c5c3ac139cb032481be2c47333ee31f76c8bfe147bd81aa474b76da365c026834a159efcc82a0cb
6
+ metadata.gz: 873f228e5f5e52c565df39771e788978d13f5f020cc739b37d5ef982df3d1e3477a1e535e95cd19bc6c92bd23983b799eddb52701005e8a59b3f5518c0b2da80
7
+ data.tar.gz: d10af8cf20f2e00f2508f155edd768eaee53faa1ff0046c3d740067d784d1d442067c1c1ba7e04cf3a07f6455725ade882d47a66d318e0895a286eb1b9053835
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  gn_crossmap CHANGELOG
2
2
  =====================
3
+ 0.1.7
4
+ -----
5
+
6
+ * @dimus - #13 - make it possible wo ingest field names like dwc:scientificName
7
+ or http://example.org/term/sommeTerm
8
+
9
+ * @dimus - #12 - fix a bug which prevents so salvage most of the names from a
10
+ failing batch (if a batch of names has one name that breaks
11
+ resolution on GN-resolver end)
12
+
13
+ * @dimus - #11 - add taxonID from resolved data to results
14
+
15
+ * @dimus - #10 - in resulting csv moved "match_type" field to be the first one
16
+ to make it easier to see what matched and what did not
17
+
18
+ * @dimus - #9 - fixed another problem with rank inffering
3
19
 
4
20
  0.1.6
5
21
  -----
data/gn_crossmap.gemspec CHANGED
@@ -35,4 +35,5 @@ Gem::Specification.new do |gem|
35
35
  gem.add_development_dependency "rspec", "~> 3.2"
36
36
  gem.add_development_dependency "rubocop", "~> 0.31"
37
37
  gem.add_development_dependency "coveralls", "~> 0.8"
38
+ gem.add_development_dependency "gn_uuid", "~> 0.5"
38
39
  end
@@ -17,12 +17,17 @@ module GnCrossmap
17
17
  private
18
18
 
19
19
  def init_fields_collector
20
- @fields = @row.map { |f| f.to_s.strip.downcase.to_sym }
20
+ @fields = @row.map { |f| prepare_field(f) }
21
21
  @collector = collector_factory
22
22
  err = "taxonID must be present in the csv header"
23
23
  fail GnCrossmapError, err unless @fields.include?(:taxonid)
24
24
  end
25
25
 
26
+ def prepare_field(field)
27
+ field = field.to_s.gsub(":", "/")
28
+ field.split("/")[-1].strip.downcase.to_sym
29
+ end
30
+
26
31
  def collect_data
27
32
  @row = @fields.zip(@row).to_h
28
33
  data = @collector.id_name_rank(@row)
@@ -52,7 +52,7 @@ module GnCrossmap
52
52
  names.split("\n").each do |name|
53
53
  begin
54
54
  res = RestClient.post(URL, data: name, data_source_ids: @ds_id)
55
- @processor.process(res)
55
+ @processor.process(res, @current_data)
56
56
  rescue RestClient::Exception => e
57
57
  GnCrossmap.logger.error("Resolver broke on '#{name}': #{e.message}")
58
58
  next
@@ -2,6 +2,7 @@ module GnCrossmap
2
2
  # Processes data received from the GN Resolver
3
3
  class ResultProcessor
4
4
  MATCH_TYPES = {
5
+ 0 => "No match",
5
6
  1 => "Exact match",
6
7
  2 => "Canonical form exact match",
7
8
  3 => "Canonical form fuzzy match",
@@ -33,8 +34,9 @@ module GnCrossmap
33
34
 
34
35
  def write_empty_result(datum)
35
36
  res = @original_data[datum[:supplied_id]]
36
- res += [datum[:supplied_name_string], nil, nil,
37
- @input[datum[:supplied_id]][:rank], nil, nil, nil, nil]
37
+ res += [MATCH_TYPES[0], datum[:supplied_name_string], nil,
38
+ nil, @input[datum[:supplied_id]][:rank], nil,
39
+ nil, nil, nil]
38
40
  @writer.write(res)
39
41
  end
40
42
 
@@ -45,11 +47,14 @@ module GnCrossmap
45
47
  end
46
48
 
47
49
  def compile_result(datum, result)
48
- @original_data[datum[:supplied_id]] +
49
- [datum[:supplied_name_string], result[:name_string],
50
- result[:canonical_form], @input[datum[:supplied_id]][:rank],
51
- matched_rank(result), matched_type(result),
52
- result[:edit_distance], result[:score]]
50
+ @original_data[datum[:supplied_id]] + new_data(datum, result)
51
+ end
52
+
53
+ def new_data(datum, result)
54
+ [matched_type(result), datum[:supplied_name_string],
55
+ result[:name_string], result[:canonical_form],
56
+ @input[datum[:supplied_id]][:rank], matched_rank(result),
57
+ result[:edit_distance], result[:score], result[:taxon_id]]
53
58
  end
54
59
 
55
60
  def matched_rank(record)
@@ -18,10 +18,10 @@ module GnCrossmap
18
18
  private
19
19
 
20
20
  def find_name
21
- name = @row[:scientificname].strip
21
+ name = @row[:scientificname].to_s.strip
22
22
  authorship = @row[:scientificnameauthorship].to_s.strip
23
23
  name = "#{name} #{authorship}" if authorship != ""
24
- name
24
+ name.strip == "" ? nil : name.strip
25
25
  end
26
26
 
27
27
  def parse_rank
@@ -29,11 +29,9 @@ module GnCrossmap
29
29
  return nil if !@parsed_name[:canonical] || @parsed_name[:hybrid]
30
30
  words_num = @parsed_name[:canonical].split(" ").size
31
31
  infer_rank(words_num)
32
- rescue RuntimeError
32
+ rescue StandardError
33
33
  @parser = ScientificNameParser.new
34
34
  nil
35
- rescue NoMethodError
36
- nil
37
35
  end
38
36
 
39
37
  def infer_rank(words_in_canonical_form)
@@ -1,6 +1,6 @@
1
1
  # Namespace module for crossmapping checklists to GN sources
2
2
  module GnCrossmap
3
- VERSION = "0.1.6"
3
+ VERSION = "0.1.7"
4
4
 
5
5
  def self.version
6
6
  VERSION
@@ -21,9 +21,9 @@ module GnCrossmap
21
21
  private
22
22
 
23
23
  def output_fields(original_fields)
24
- original_fields + [:inputName, :matchedName, :matchedCanonicalForm,
25
- :inputRank, :matchedRank, :matchedType,
26
- :matchedEditDistance, :marchedScore]
24
+ original_fields + [:matchedType, :inputName, :matchedName,
25
+ :matchedCanonicalForm, :inputRank, :matchedRank,
26
+ :matchedEditDistance, :marchedScore, :matchTaxonID]
27
27
  end
28
28
  end
29
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-05-31 00:00:00.000000000 Z
11
+ date: 2015-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: trollop
@@ -136,6 +136,20 @@ dependencies:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0.8'
139
+ - !ruby/object:Gem::Dependency
140
+ name: gn_uuid
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.5'
139
153
  description: Gem uses a checklist in a comma-separated format as an input, and returns
140
154
  back a new comma-separated list crossmapping the scientific names to one of the
141
155
  data sources from http://resolver.globalnames.org