gn_crossmap 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 216f6c9075440bcd561f162c87444ff276062423
4
- data.tar.gz: 4a615a04bc5fcc97530edddf62e62c66fee97761
3
+ metadata.gz: 9f528c813176d1683a162444e5a70917e9efd2bb
4
+ data.tar.gz: bd398879ac5ffd7cf104d89b0dc4d6060e2cdd64
5
5
  SHA512:
6
- metadata.gz: c9284eefdad5f12f171eecc67dce4c3b6684a0a2e6b52a8ec0eac01afe03ee862625dabd32f4f3985381fe9ede398a6ceb8ade60a4752d5ea15bd89b9e96afb8
7
- data.tar.gz: 0dd26bd181ee96669f2fa52714ea358a4b92fb0df6123daf3c5c3ac139cb032481be2c47333ee31f76c8bfe147bd81aa474b76da365c026834a159efcc82a0cb
6
+ metadata.gz: 873f228e5f5e52c565df39771e788978d13f5f020cc739b37d5ef982df3d1e3477a1e535e95cd19bc6c92bd23983b799eddb52701005e8a59b3f5518c0b2da80
7
+ data.tar.gz: d10af8cf20f2e00f2508f155edd768eaee53faa1ff0046c3d740067d784d1d442067c1c1ba7e04cf3a07f6455725ade882d47a66d318e0895a286eb1b9053835
data/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  gn_crossmap CHANGELOG
2
2
  =====================
3
+ 0.1.7
4
+ -----
5
+
6
+ * @dimus - #13 - make it possible wo ingest field names like dwc:scientificName
7
+ or http://example.org/term/sommeTerm
8
+
9
+ * @dimus - #12 - fix a bug which prevents so salvage most of the names from a
10
+ failing batch (if a batch of names has one name that breaks
11
+ resolution on GN-resolver end)
12
+
13
+ * @dimus - #11 - add taxonID from resolved data to results
14
+
15
+ * @dimus - #10 - in resulting csv moved "match_type" field to be the first one
16
+ to make it easier to see what matched and what did not
17
+
18
+ * @dimus - #9 - fixed another problem with rank inffering
3
19
 
4
20
  0.1.6
5
21
  -----
data/gn_crossmap.gemspec CHANGED
@@ -35,4 +35,5 @@ Gem::Specification.new do |gem|
35
35
  gem.add_development_dependency "rspec", "~> 3.2"
36
36
  gem.add_development_dependency "rubocop", "~> 0.31"
37
37
  gem.add_development_dependency "coveralls", "~> 0.8"
38
+ gem.add_development_dependency "gn_uuid", "~> 0.5"
38
39
  end
@@ -17,12 +17,17 @@ module GnCrossmap
17
17
  private
18
18
 
19
19
  def init_fields_collector
20
- @fields = @row.map { |f| f.to_s.strip.downcase.to_sym }
20
+ @fields = @row.map { |f| prepare_field(f) }
21
21
  @collector = collector_factory
22
22
  err = "taxonID must be present in the csv header"
23
23
  fail GnCrossmapError, err unless @fields.include?(:taxonid)
24
24
  end
25
25
 
26
+ def prepare_field(field)
27
+ field = field.to_s.gsub(":", "/")
28
+ field.split("/")[-1].strip.downcase.to_sym
29
+ end
30
+
26
31
  def collect_data
27
32
  @row = @fields.zip(@row).to_h
28
33
  data = @collector.id_name_rank(@row)
@@ -52,7 +52,7 @@ module GnCrossmap
52
52
  names.split("\n").each do |name|
53
53
  begin
54
54
  res = RestClient.post(URL, data: name, data_source_ids: @ds_id)
55
- @processor.process(res)
55
+ @processor.process(res, @current_data)
56
56
  rescue RestClient::Exception => e
57
57
  GnCrossmap.logger.error("Resolver broke on '#{name}': #{e.message}")
58
58
  next
@@ -2,6 +2,7 @@ module GnCrossmap
2
2
  # Processes data received from the GN Resolver
3
3
  class ResultProcessor
4
4
  MATCH_TYPES = {
5
+ 0 => "No match",
5
6
  1 => "Exact match",
6
7
  2 => "Canonical form exact match",
7
8
  3 => "Canonical form fuzzy match",
@@ -33,8 +34,9 @@ module GnCrossmap
33
34
 
34
35
  def write_empty_result(datum)
35
36
  res = @original_data[datum[:supplied_id]]
36
- res += [datum[:supplied_name_string], nil, nil,
37
- @input[datum[:supplied_id]][:rank], nil, nil, nil, nil]
37
+ res += [MATCH_TYPES[0], datum[:supplied_name_string], nil,
38
+ nil, @input[datum[:supplied_id]][:rank], nil,
39
+ nil, nil, nil]
38
40
  @writer.write(res)
39
41
  end
40
42
 
@@ -45,11 +47,14 @@ module GnCrossmap
45
47
  end
46
48
 
47
49
  def compile_result(datum, result)
48
- @original_data[datum[:supplied_id]] +
49
- [datum[:supplied_name_string], result[:name_string],
50
- result[:canonical_form], @input[datum[:supplied_id]][:rank],
51
- matched_rank(result), matched_type(result),
52
- result[:edit_distance], result[:score]]
50
+ @original_data[datum[:supplied_id]] + new_data(datum, result)
51
+ end
52
+
53
+ def new_data(datum, result)
54
+ [matched_type(result), datum[:supplied_name_string],
55
+ result[:name_string], result[:canonical_form],
56
+ @input[datum[:supplied_id]][:rank], matched_rank(result),
57
+ result[:edit_distance], result[:score], result[:taxon_id]]
53
58
  end
54
59
 
55
60
  def matched_rank(record)
@@ -18,10 +18,10 @@ module GnCrossmap
18
18
  private
19
19
 
20
20
  def find_name
21
- name = @row[:scientificname].strip
21
+ name = @row[:scientificname].to_s.strip
22
22
  authorship = @row[:scientificnameauthorship].to_s.strip
23
23
  name = "#{name} #{authorship}" if authorship != ""
24
- name
24
+ name.strip == "" ? nil : name.strip
25
25
  end
26
26
 
27
27
  def parse_rank
@@ -29,11 +29,9 @@ module GnCrossmap
29
29
  return nil if !@parsed_name[:canonical] || @parsed_name[:hybrid]
30
30
  words_num = @parsed_name[:canonical].split(" ").size
31
31
  infer_rank(words_num)
32
- rescue RuntimeError
32
+ rescue StandardError
33
33
  @parser = ScientificNameParser.new
34
34
  nil
35
- rescue NoMethodError
36
- nil
37
35
  end
38
36
 
39
37
  def infer_rank(words_in_canonical_form)
@@ -1,6 +1,6 @@
1
1
  # Namespace module for crossmapping checklists to GN sources
2
2
  module GnCrossmap
3
- VERSION = "0.1.6"
3
+ VERSION = "0.1.7"
4
4
 
5
5
  def self.version
6
6
  VERSION
@@ -21,9 +21,9 @@ module GnCrossmap
21
21
  private
22
22
 
23
23
  def output_fields(original_fields)
24
- original_fields + [:inputName, :matchedName, :matchedCanonicalForm,
25
- :inputRank, :matchedRank, :matchedType,
26
- :matchedEditDistance, :marchedScore]
24
+ original_fields + [:matchedType, :inputName, :matchedName,
25
+ :matchedCanonicalForm, :inputRank, :matchedRank,
26
+ :matchedEditDistance, :marchedScore, :matchTaxonID]
27
27
  end
28
28
  end
29
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-05-31 00:00:00.000000000 Z
11
+ date: 2015-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: trollop
@@ -136,6 +136,20 @@ dependencies:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0.8'
139
+ - !ruby/object:Gem::Dependency
140
+ name: gn_uuid
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.5'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.5'
139
153
  description: Gem uses a checklist in a comma-separated format as an input, and returns
140
154
  back a new comma-separated list crossmapping the scientific names to one of the
141
155
  data sources from http://resolver.globalnames.org