gn_crossmap 3.1.5 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c2c789845b44b7024403fad224e24dffce133d23
4
- data.tar.gz: 9257b08c532e69c141dadec5aa1f9d185dd50968
3
+ metadata.gz: bfd0f87bbaf73d66c33e130c124e087104994879
4
+ data.tar.gz: 11020365c22465b4d804faaf2cf13c6ee168b051
5
5
  SHA512:
6
- metadata.gz: 2ca3260ab4a2985692b44bac76274a65a9079aef9b21254475ebb558d770f9e9cc4f05a35f28c4b802575c1799cc8785390192222cbe4400b0cfa669181e2043
7
- data.tar.gz: 5339a56423169e8bc84dc72d83437d47409ba4c752bef77e7de00bbf91c3ef983389c99a0ab302b10bd6358510b2f85f759ef316f7baa3e1c3275ac1e1d34e1b
6
+ metadata.gz: 5ca351156f9007cc6aaa879b3d86852b2a1aeb2584182351ccd3d228997cbccb47604bf97654bd583e58bc1a7280d0f0acb9f6903095db783e4bf0e614cebddf
7
+ data.tar.gz: 5580be6d8d50f4385e05b798b37a987338e54270d132560df1465f7d2a8cd101b22394a867481577861f03fea28efdd412542092f7f8b5928bf9d8654c434321
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 3.2.0
4
+
5
+ * @dimus - Add a column `matchSize` with number of matches for each name
6
+
3
7
  ## 3.1.5
4
8
 
5
9
  * @dimus - Fix number of columns in the output when there is no taxon_id given
data/README.md CHANGED
@@ -251,15 +251,17 @@ More examples can be found in [spec/files][files] directory
251
251
 
252
252
  Field | Description
253
253
  ---------------------|-----------------------------------------------------------
254
- taxonID | original ID attached to a name in the checklist
255
- scientificName | name from the checklist
256
- matchedScientificName| name matched from the GN Reolver data source
254
+ classification | classification path of the data source (if available)
255
+ editDistance | for fuzzy-matching -- how many characters differ between checklist and data source name
256
+ matchSize | number of returned matches for a name
257
+ matchType | what kind of match it is
257
258
  matchedCanonicalForm | canonical form of the matched name
258
- rank | rank from the source (if it was given/inferred)
259
259
  matchedRank | corresponding rank from the data source
260
- matchType | what kind of match it is
261
- editDistance | for fuzzy-matching -- how many characters differ between checklist and data source name
260
+ matchedScientificName| name matched from the GN Reolver data source
261
+ rank | rank from the source (if it was given/inferred)
262
+ scientificName | name from the checklist
262
263
  score | heuristic score from 0 to 1 where 1 is a good match, 0.5 match requires further human investigation
264
+ taxonID | original ID attached to a name in the checklist
263
265
 
264
266
  #### Types of Matches
265
267
 
data/lib/gn_crossmap.rb CHANGED
@@ -61,7 +61,11 @@ module GnCrossmap
61
61
  end
62
62
 
63
63
  def find_id(row, name)
64
- row.key?(:taxonid) ? row[:taxonid].strip : GnUUID.uuid(name)
64
+ if row.key?(:taxonid) && row[:taxonid]
65
+ row[:taxonid].to_s.strip
66
+ else
67
+ GnUUID.uuid(name.to_s)
68
+ end
65
69
  end
66
70
 
67
71
  private
@@ -36,7 +36,7 @@ module GnCrossmap
36
36
 
37
37
  def compile_empty_result(datum)
38
38
  res = @original_data[datum[:supplied_id]]
39
- res += [GnCrossmap::MATCH_TYPES[0], datum[:supplied_name_string],
39
+ res += [GnCrossmap::MATCH_TYPES[0], 0, datum[:supplied_name_string],
40
40
  nil, nil, nil, nil,
41
41
  @input[datum[:supplied_id]][:rank], nil, nil, nil, nil, nil]
42
42
  res << nil if @with_classification
@@ -45,8 +45,9 @@ module GnCrossmap
45
45
 
46
46
  def write_result(datum)
47
47
  collect_stats(datum)
48
+ match_size = datum[:results].size
48
49
  datum[:results].each do |result|
49
- @writer.write(compile_result(datum, result))
50
+ @writer.write(compile_result(datum, result, match_size))
50
51
  end
51
52
  end
52
53
 
@@ -56,15 +57,16 @@ module GnCrossmap
56
57
  @stats.stats[:resolved_records] += 1
57
58
  end
58
59
 
59
- def compile_result(datum, result)
60
- @original_data[datum[:supplied_id]] + new_data(datum, result)
60
+ def compile_result(datum, result, match_size)
61
+ @original_data[datum[:supplied_id]] + new_data(datum,
62
+ result, match_size)
61
63
  end
62
64
 
63
65
  # rubocop:disable Metrics/AbcSize
64
66
 
65
- def new_data(datum, result)
67
+ def new_data(datum, result, match_size)
66
68
  synonym = result[:current_name_string] ? "synonym" : nil
67
- res = [matched_type(result), datum[:supplied_name_string],
69
+ res = [matched_type(result), match_size, datum[:supplied_name_string],
68
70
  result[:name_string], canonical(datum[:supplied_name_string]),
69
71
  result[:canonical_form], result[:edit_distance],
70
72
  @input[datum[:supplied_id]][:rank], matched_rank(result), synonym,
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnCrossmap
5
- VERSION = "3.1.5"
5
+ VERSION = "3.2.0"
6
6
 
7
7
  def self.version
8
8
  VERSION
@@ -26,8 +26,9 @@ module GnCrossmap
26
26
  private
27
27
 
28
28
  def output_fields(original_fields)
29
- original_fields + %i[matchedType inputName matchedName inputCanonicalForm
30
- matchedCanonicalForm matchedEditDistance inputRank
29
+ original_fields + %i[matchedType matchSize inputName matchedName
30
+ inputCanonicalForm matchedCanonicalForm
31
+ matchedEditDistance inputRank
31
32
  matchedRank synonymStatus acceptedName
32
33
  matchedScore matchTaxonID]
33
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.5
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-10 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity