taxamatch_rb 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f9cbd9334dff96ed1723f1487bc3bb89805c4f4
4
- data.tar.gz: 5928a559f917d9908d251cb873285e0242d60476
3
+ metadata.gz: 83617a85524edc8d1a9615a12b955a4a01ce8a0a
4
+ data.tar.gz: 480ac61418d818621282531ce74680bb99bdce19
5
5
  SHA512:
6
- metadata.gz: d598f616d3f34f1cfc051b1e6ee049075eb3a87d5714d3712075434eead6f88e3667bde46edfd0a5cd7a8c868b3139d4ca9802f21205f38e918b4eba07e92324
7
- data.tar.gz: d429b8e677be3fe170f01c66dc07c002fa05e77f14fc89e78dce4f6ded814c3a52477e44ebc0fb8688bf40cba686da45e5ede275d54cebdf6d3daa7f518637e5
6
+ metadata.gz: 15373025eec71b3dfe679406f3a12e74aac836536c616d0d37196b6f253760ffb91124ffb4d0c86dc7c37d406a6ed038cd459137024ac52f1ca7b10491890096
7
+ data.tar.gz: 38f1a2cd62d2271151b698529a17d1eeac07317baf0bca91523569e465c383e1d6644aca42d8ba0cc41ee6603a3c7e94c468c51c8fec32773a998b9f3328e2d4
data/CHANGELOG CHANGED
@@ -1,3 +1,5 @@
1
+ 1.1.1 - revert changes which changed API
2
+
1
3
  1.1.0 - create gem with bundle instead of jeweler, refactoring
2
4
 
3
5
  1.0.0 - fixed a parsing problem with infraspecies without string,
@@ -1,31 +1,37 @@
1
1
  module Taxamatch
2
- # Matches name strings of scientific names
2
+
3
3
  class Base
4
+
4
5
  def initialize
5
6
  @parser = Taxamatch::Atomizer.new
6
7
  @dlm = DamerauLevenshtein
7
8
  end
8
9
 
10
+
11
+ # takes two scientific names and returns true
12
+ # if names match and false if they don't
9
13
  def taxamatch(str1, str2, return_boolean = true)
10
14
  preparsed_1 = @parser.parse(str1)
11
15
  preparsed_2 = @parser.parse(str2)
12
- match = taxamatch_preparsed(preparsed_1, preparsed_2)
13
- return_boolean ? (!!match && match["match"]) : match
16
+ match = taxamatch_preparsed(preparsed_1, preparsed_2) rescue nil
17
+ return_boolean ? (!!match && match['match']) : match
14
18
  end
15
19
 
20
+ # takes two hashes of parsed scientific names, analyses them and
21
+ # returns back this function is useful when species strings are preparsed.
16
22
  def taxamatch_preparsed(preparsed_1, preparsed_2)
17
23
  result = nil
18
24
  if preparsed_1[:uninomial] && preparsed_2[:uninomial]
19
25
  result = match_uninomial(preparsed_1, preparsed_2)
20
- elsif preparsed_1[:genus] && preparsed_2[:genus]
26
+ end
27
+ if preparsed_1[:genus] && preparsed_2[:genus]
21
28
  result = match_multinomial(preparsed_1, preparsed_2)
22
29
  end
23
- if result && result["match"]
24
- result["match"] = match_authors(preparsed_1, preparsed_2)
30
+ if result && result['match']
31
+ result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ?
32
+ false : true
25
33
  end
26
- result
27
- rescue StandardError
28
- nil
34
+ return result
29
35
  end
30
36
 
31
37
  def match_uninomial(preparsed_1, preparsed_2)
@@ -47,17 +53,17 @@ module Taxamatch
47
53
  match_hash = match_matches(gen_match, sp_match, infrasp_match)
48
54
  elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) ||
49
55
  (!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
50
- match_hash = { "match" => false,
51
- "edit_distance" => 5,
52
- "phonetic_match" => false }
56
+ match_hash = { 'match' => false,
57
+ 'edit_distance' => 5,
58
+ 'phonetic_match' => false }
53
59
  total_length += preparsed_1[:infraspecies] ?
54
60
  preparsed_1[:infraspecies][0][:string].size :
55
61
  preparsed_2[:infraspecies][0][:string].size
56
62
  else
57
63
  match_hash = match_matches(gen_match, sp_match)
58
64
  end
59
- match_hash.merge({ "score" =>
60
- (1 - match_hash["edit_distance"]/(total_length/2)) })
65
+ match_hash.merge({ 'score' =>
66
+ (1 - match_hash['edit_distance']/(total_length/2)) })
61
67
  match_hash
62
68
  end
63
69
 
@@ -67,22 +73,22 @@ module Taxamatch
67
73
  opts = { with_phonetic_match: true }.merge(opts)
68
74
  min_length = [genus1_length, genus2_length].min
69
75
  unless opts[:with_phonetic_match]
70
- genus1[:phonetized] = "A"
71
- genus2[:phonetized] = "B"
76
+ genus1[:phonetized] = 'A'
77
+ genus2[:phonetized] = 'B'
72
78
  end
73
79
  match = false
74
80
  ed = @dlm.distance(genus1[:normalized],
75
81
  genus2[:normalized], 1, 3) #TODO put block = 2
76
- return { "edit_distance" => ed,
77
- "phonetic_match" => false,
78
- "match" => false } if ed/min_length.to_f > 0.2
79
- return { "edit_distance" => ed,
80
- "phonetic_match" => true,
81
- "match" => true } if genus1[:phonetized] == genus2[:phonetized]
82
+ return { 'edit_distance' => ed,
83
+ 'phonetic_match' => false,
84
+ 'match' => false } if ed/min_length.to_f > 0.2
85
+ return { 'edit_distance' => ed,
86
+ 'phonetic_match' => true,
87
+ 'match' => true } if genus1[:phonetized] == genus2[:phonetized]
82
88
 
83
89
  match = true if ed <= 3 && (min_length > ed * 2) &&
84
90
  (ed < 2 || genus1[0] == genus2[0])
85
- { "edit_distance" => ed, "match" => match, "phonetic_match" => false }
91
+ { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false }
86
92
  end
87
93
 
88
94
  def match_species(sp1, sp2, opts = {})
@@ -91,26 +97,26 @@ module Taxamatch
91
97
  opts = { with_phonetic_match: true }.merge(opts)
92
98
  min_length = [sp1_length, sp2_length].min
93
99
  unless opts[:with_phonetic_match]
94
- sp1[:phonetized] = "A"
95
- sp2[:phonetized] = "B"
100
+ sp1[:phonetized] = 'A'
101
+ sp2[:phonetized] = 'B'
96
102
  end
97
103
  sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
98
104
  sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
99
105
  match = false
100
106
  ed = @dlm.distance(sp1[:normalized],
101
107
  sp2[:normalized], 1, 4) #TODO put block 4
102
- return { "edit_distance" => ed,
103
- "phonetic_match" => false,
104
- "match" => false } if ed/min_length.to_f > 0.3334
105
- return {"edit_distance" => ed,
106
- "phonetic_match" => true,
107
- "match" => true} if sp1[:phonetized] == sp2[:phonetized]
108
+ return { 'edit_distance' => ed,
109
+ 'phonetic_match' => false,
110
+ 'match' => false } if ed/min_length.to_f > 0.3334
111
+ return {'edit_distance' => ed,
112
+ 'phonetic_match' => true,
113
+ 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
108
114
 
109
115
  match = true if ed <= 4 &&
110
116
  (min_length >= ed * 2) &&
111
117
  (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) &&
112
118
  (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
113
- { "edit_distance" => ed, "match" => match, "phonetic_match" => false }
119
+ { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false }
114
120
  end
115
121
 
116
122
  def match_authors(preparsed_1, preparsed_2)
@@ -130,25 +136,27 @@ module Taxamatch
130
136
  au2 = p2[:normalized_authors]
131
137
  yr1 = p1[:years]
132
138
  yr2 = p2[:years]
133
- return true if au1.empty? || au2.empty?
139
+ return 0 if au1.empty? || au2.empty?
134
140
  score = Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
135
- score == 0 ? false : true
141
+ score == 0 ? -1 : 1
136
142
  end
137
143
 
138
144
  def match_matches(genus_match, species_match, infraspecies_match = nil)
139
145
  match = species_match
140
146
  if infraspecies_match
141
- match["edit_distance"] += infraspecies_match["edit_distance"]
142
- match["match"] &&= infraspecies_match["match"]
143
- match["phonetic_match"] &&= infraspecies_match["phonetic_match"]
147
+ match['edit_distance'] += infraspecies_match['edit_distance']
148
+ match['match'] &&= infraspecies_match['match']
149
+ match['phonetic_match'] &&= infraspecies_match['phonetic_match']
144
150
  end
145
- match["edit_distance"] += genus_match["edit_distance"]
146
- if match["edit_distance"] > (infraspecies_match ? 6 : 4)
147
- match["match"] = false
151
+ match['edit_distance'] += genus_match['edit_distance']
152
+ if match['edit_distance'] > (infraspecies_match ? 6 : 4)
153
+ match['match'] = false
148
154
  end
149
- match["match"] &&= genus_match["match"]
150
- match["phonetic_match"] &&= genus_match["phonetic_match"]
155
+ match['match'] &&= genus_match['match']
156
+ match['phonetic_match'] &&= genus_match['phonetic_match']
151
157
  match
152
158
  end
159
+
153
160
  end
154
161
  end
162
+
@@ -1,5 +1,5 @@
1
1
  module Taxamatch
2
- VERSION = "1.1.0"
2
+ VERSION = "1.1.1"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taxamatch_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin