gn_crossmap 3.1.5 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c2c789845b44b7024403fad224e24dffce133d23
4
- data.tar.gz: 9257b08c532e69c141dadec5aa1f9d185dd50968
3
+ metadata.gz: bfd0f87bbaf73d66c33e130c124e087104994879
4
+ data.tar.gz: 11020365c22465b4d804faaf2cf13c6ee168b051
5
5
  SHA512:
6
- metadata.gz: 2ca3260ab4a2985692b44bac76274a65a9079aef9b21254475ebb558d770f9e9cc4f05a35f28c4b802575c1799cc8785390192222cbe4400b0cfa669181e2043
7
- data.tar.gz: 5339a56423169e8bc84dc72d83437d47409ba4c752bef77e7de00bbf91c3ef983389c99a0ab302b10bd6358510b2f85f759ef316f7baa3e1c3275ac1e1d34e1b
6
+ metadata.gz: 5ca351156f9007cc6aaa879b3d86852b2a1aeb2584182351ccd3d228997cbccb47604bf97654bd583e58bc1a7280d0f0acb9f6903095db783e4bf0e614cebddf
7
+ data.tar.gz: 5580be6d8d50f4385e05b798b37a987338e54270d132560df1465f7d2a8cd101b22394a867481577861f03fea28efdd412542092f7f8b5928bf9d8654c434321
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # ``gn_crossmap`` CHANGELOG
2
2
 
3
+ ## 3.2.0
4
+
5
+ * @dimus - Add a column `matchSize` with number of matches for each name
6
+
3
7
  ## 3.1.5
4
8
 
5
9
  * @dimus - Fix number of columns in the output when there is no taxon_id given
data/README.md CHANGED
@@ -251,15 +251,17 @@ More examples can be found in [spec/files][files] directory
251
251
 
252
252
  Field | Description
253
253
  ---------------------|-----------------------------------------------------------
254
- taxonID | original ID attached to a name in the checklist
255
- scientificName | name from the checklist
256
- matchedScientificName| name matched from the GN Reolver data source
254
+ classification | classification path of the data source (if available)
255
+ editDistance | for fuzzy-matching -- how many characters differ between checklist and data source name
256
+ matchSize | number of returned matches for a name
257
+ matchType | what kind of match it is
257
258
  matchedCanonicalForm | canonical form of the matched name
258
- rank | rank from the source (if it was given/inferred)
259
259
  matchedRank | corresponding rank from the data source
260
- matchType | what kind of match it is
261
- editDistance | for fuzzy-matching -- how many characters differ between checklist and data source name
260
+ matchedScientificName| name matched from the GN Reolver data source
261
+ rank | rank from the source (if it was given/inferred)
262
+ scientificName | name from the checklist
262
263
  score | heuristic score from 0 to 1 where 1 is a good match, 0.5 match requires further human investigation
264
+ taxonID | original ID attached to a name in the checklist
263
265
 
264
266
  #### Types of Matches
265
267
 
data/lib/gn_crossmap.rb CHANGED
@@ -61,7 +61,11 @@ module GnCrossmap
61
61
  end
62
62
 
63
63
  def find_id(row, name)
64
- row.key?(:taxonid) ? row[:taxonid].strip : GnUUID.uuid(name)
64
+ if row.key?(:taxonid) && row[:taxonid]
65
+ row[:taxonid].to_s.strip
66
+ else
67
+ GnUUID.uuid(name.to_s)
68
+ end
65
69
  end
66
70
 
67
71
  private
@@ -36,7 +36,7 @@ module GnCrossmap
36
36
 
37
37
  def compile_empty_result(datum)
38
38
  res = @original_data[datum[:supplied_id]]
39
- res += [GnCrossmap::MATCH_TYPES[0], datum[:supplied_name_string],
39
+ res += [GnCrossmap::MATCH_TYPES[0], 0, datum[:supplied_name_string],
40
40
  nil, nil, nil, nil,
41
41
  @input[datum[:supplied_id]][:rank], nil, nil, nil, nil, nil]
42
42
  res << nil if @with_classification
@@ -45,8 +45,9 @@ module GnCrossmap
45
45
 
46
46
  def write_result(datum)
47
47
  collect_stats(datum)
48
+ match_size = datum[:results].size
48
49
  datum[:results].each do |result|
49
- @writer.write(compile_result(datum, result))
50
+ @writer.write(compile_result(datum, result, match_size))
50
51
  end
51
52
  end
52
53
 
@@ -56,15 +57,16 @@ module GnCrossmap
56
57
  @stats.stats[:resolved_records] += 1
57
58
  end
58
59
 
59
- def compile_result(datum, result)
60
- @original_data[datum[:supplied_id]] + new_data(datum, result)
60
+ def compile_result(datum, result, match_size)
61
+ @original_data[datum[:supplied_id]] + new_data(datum,
62
+ result, match_size)
61
63
  end
62
64
 
63
65
  # rubocop:disable Metrics/AbcSize
64
66
 
65
- def new_data(datum, result)
67
+ def new_data(datum, result, match_size)
66
68
  synonym = result[:current_name_string] ? "synonym" : nil
67
- res = [matched_type(result), datum[:supplied_name_string],
69
+ res = [matched_type(result), match_size, datum[:supplied_name_string],
68
70
  result[:name_string], canonical(datum[:supplied_name_string]),
69
71
  result[:canonical_form], result[:edit_distance],
70
72
  @input[datum[:supplied_id]][:rank], matched_rank(result), synonym,
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Namespace module for crossmapping checklists to GN sources
4
4
  module GnCrossmap
5
- VERSION = "3.1.5"
5
+ VERSION = "3.2.0"
6
6
 
7
7
  def self.version
8
8
  VERSION
@@ -26,8 +26,9 @@ module GnCrossmap
26
26
  private
27
27
 
28
28
  def output_fields(original_fields)
29
- original_fields + %i[matchedType inputName matchedName inputCanonicalForm
30
- matchedCanonicalForm matchedEditDistance inputRank
29
+ original_fields + %i[matchedType matchSize inputName matchedName
30
+ inputCanonicalForm matchedCanonicalForm
31
+ matchedEditDistance inputRank
31
32
  matchedRank synonymStatus acceptedName
32
33
  matchedScore matchTaxonID]
33
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gn_crossmap
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.5
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-08-10 00:00:00.000000000 Z
11
+ date: 2017-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: biodiversity