gn_crossmap 3.1.5 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +8 -6
- data/lib/gn_crossmap.rb +5 -1
- data/lib/gn_crossmap/result_processor.rb +8 -6
- data/lib/gn_crossmap/version.rb +1 -1
- data/lib/gn_crossmap/writer.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bfd0f87bbaf73d66c33e130c124e087104994879
|
4
|
+
data.tar.gz: 11020365c22465b4d804faaf2cf13c6ee168b051
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ca351156f9007cc6aaa879b3d86852b2a1aeb2584182351ccd3d228997cbccb47604bf97654bd583e58bc1a7280d0f0acb9f6903095db783e4bf0e614cebddf
|
7
|
+
data.tar.gz: 5580be6d8d50f4385e05b798b37a987338e54270d132560df1465f7d2a8cd101b22394a867481577861f03fea28efdd412542092f7f8b5928bf9d8654c434321
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -251,15 +251,17 @@ More examples can be found in [spec/files][files] directory
|
|
251
251
|
|
252
252
|
Field | Description
|
253
253
|
---------------------|-----------------------------------------------------------
|
254
|
-
|
255
|
-
|
256
|
-
|
254
|
+
classification | classification path of the data source (if available)
|
255
|
+
editDistance | for fuzzy-matching -- how many characters differ between checklist and data source name
|
256
|
+
matchSize | number of returned matches for a name
|
257
|
+
matchType | what kind of match it is
|
257
258
|
matchedCanonicalForm | canonical form of the matched name
|
258
|
-
rank | rank from the source (if it was given/inferred)
|
259
259
|
matchedRank | corresponding rank from the data source
|
260
|
-
|
261
|
-
|
260
|
+
matchedScientificName| name matched from the GN Reolver data source
|
261
|
+
rank | rank from the source (if it was given/inferred)
|
262
|
+
scientificName | name from the checklist
|
262
263
|
score | heuristic score from 0 to 1 where 1 is a good match, 0.5 match requires further human investigation
|
264
|
+
taxonID | original ID attached to a name in the checklist
|
263
265
|
|
264
266
|
#### Types of Matches
|
265
267
|
|
data/lib/gn_crossmap.rb
CHANGED
@@ -36,7 +36,7 @@ module GnCrossmap
|
|
36
36
|
|
37
37
|
def compile_empty_result(datum)
|
38
38
|
res = @original_data[datum[:supplied_id]]
|
39
|
-
res += [GnCrossmap::MATCH_TYPES[0], datum[:supplied_name_string],
|
39
|
+
res += [GnCrossmap::MATCH_TYPES[0], 0, datum[:supplied_name_string],
|
40
40
|
nil, nil, nil, nil,
|
41
41
|
@input[datum[:supplied_id]][:rank], nil, nil, nil, nil, nil]
|
42
42
|
res << nil if @with_classification
|
@@ -45,8 +45,9 @@ module GnCrossmap
|
|
45
45
|
|
46
46
|
def write_result(datum)
|
47
47
|
collect_stats(datum)
|
48
|
+
match_size = datum[:results].size
|
48
49
|
datum[:results].each do |result|
|
49
|
-
@writer.write(compile_result(datum, result))
|
50
|
+
@writer.write(compile_result(datum, result, match_size))
|
50
51
|
end
|
51
52
|
end
|
52
53
|
|
@@ -56,15 +57,16 @@ module GnCrossmap
|
|
56
57
|
@stats.stats[:resolved_records] += 1
|
57
58
|
end
|
58
59
|
|
59
|
-
def compile_result(datum, result)
|
60
|
-
@original_data[datum[:supplied_id]] + new_data(datum,
|
60
|
+
def compile_result(datum, result, match_size)
|
61
|
+
@original_data[datum[:supplied_id]] + new_data(datum,
|
62
|
+
result, match_size)
|
61
63
|
end
|
62
64
|
|
63
65
|
# rubocop:disable Metrics/AbcSize
|
64
66
|
|
65
|
-
def new_data(datum, result)
|
67
|
+
def new_data(datum, result, match_size)
|
66
68
|
synonym = result[:current_name_string] ? "synonym" : nil
|
67
|
-
res = [matched_type(result), datum[:supplied_name_string],
|
69
|
+
res = [matched_type(result), match_size, datum[:supplied_name_string],
|
68
70
|
result[:name_string], canonical(datum[:supplied_name_string]),
|
69
71
|
result[:canonical_form], result[:edit_distance],
|
70
72
|
@input[datum[:supplied_id]][:rank], matched_rank(result), synonym,
|
data/lib/gn_crossmap/version.rb
CHANGED
data/lib/gn_crossmap/writer.rb
CHANGED
@@ -26,8 +26,9 @@ module GnCrossmap
|
|
26
26
|
private
|
27
27
|
|
28
28
|
def output_fields(original_fields)
|
29
|
-
original_fields + %i[matchedType inputName matchedName
|
30
|
-
matchedCanonicalForm
|
29
|
+
original_fields + %i[matchedType matchSize inputName matchedName
|
30
|
+
inputCanonicalForm matchedCanonicalForm
|
31
|
+
matchedEditDistance inputRank
|
31
32
|
matchedRank synonymStatus acceptedName
|
32
33
|
matchedScore matchTaxonID]
|
33
34
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gn_crossmap
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-08-
|
11
|
+
date: 2017-08-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: biodiversity
|