emiler 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8b8779ed5ed7a15142398296cbd9c4338c768a7f
4
- data.tar.gz: e8290f544838222b96e34bbcccd4b58af982ba28
3
+ metadata.gz: 88c02b12935d6916f9499df2a434905469268109
4
+ data.tar.gz: 60f9eb13edbbaf3fd9dec3726840fad647a6dd38
5
5
  SHA512:
6
- metadata.gz: 645a20b05c46357a5b91e22aae4e6ae93c8ea6d1ffed6d4a922ccfb958720ba8919a554a630028003be97fbe9517fb44c4407003b220cc0c39399bf4ed1949d3
7
- data.tar.gz: 3c69442320ddb491a659e384c09e2218ca6ce4980ec47234bcb374027cb2f195dc62e3ec92448f726a4c00e440218709eedde135e0d2fc2ff10738e3afdf0212
6
+ metadata.gz: f382fcf45028650029f23230dd8f1f9445d21394fcdb8f49e252ed067cbf55876ec78f386002cad7acd0e7000fbbc36b59cdde84b5b9c05eb7470a2ebe0aa86e
7
+ data.tar.gz: c23d478e3b23404e8dfb408f62df2a5db66c65a2c9b003de02d879940ee2a259c75e4035ee27781a35cf4138c555be2aaf44865e53c83f2f54575ce48eae2757
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
25
  spec.require_paths = ['lib']
26
26
 
27
- spec.add_development_dependency 'bundler', '~> 1.11'
27
+ spec.add_development_dependency 'bundler', '~> 1'
28
28
  spec.add_development_dependency 'rake', '~> 10.0'
29
29
  spec.add_development_dependency 'rspec', '~> 3.0'
30
30
  spec.add_development_dependency 'pry', '~> 0.10'
@@ -1,24 +1,21 @@
1
1
  require 'emiler/version'
2
- require 'fuzzystringmatch'
2
+ require 'emiler/jarowinkler'
3
3
 
4
4
  module Emiler
5
- USE_PURE = ENV['USE_PURE_JW']
6
5
  INEXACT_MATCH_COEFFICIENT = ENV['INEXACT_MATCH_COEFFICIENT'] || 0.8
7
6
  RAISE_ON_MALFORMED_EMAIL = ENV['RAISE_ON_MALFORMED_EMAIL']
8
7
 
9
8
  class JW
10
9
  attr_reader :jw
11
- def initialize matcher = :native
12
- @jw = FuzzyStringMatch::JaroWinkler.create(matcher)
10
+ def initialize
11
+ @jw = FuzzyStringMatch::JaroWinklerPure.new
13
12
  end
14
13
 
15
14
  def distance s1, s2
16
15
  @jw.getDistance s1, s2
17
16
  end
18
17
 
19
- NATIVE = JW.new
20
- PURE = USE_PURE ? JW.new(:pure) : nil
21
- MATCHER = PURE || NATIVE
18
+ MATCHER = JW.new
22
19
  DUMMY = { jw: 0, full: 0, name: 0, domain: 0, result: false }.freeze
23
20
 
24
21
  private :initialize
@@ -32,7 +29,7 @@ module Emiler
32
29
 
33
30
  # rubocop:disable Metrics/AbcSize
34
31
  def similarity e1, e2
35
- e1, e2 = [e1, e2].map(&:downcase)
32
+ e1, e2 = [e1, e2].map(&:to_s).map(&:downcase)
36
33
  em1, em2 = [e1, e2].map { |e| e.split '@' }
37
34
 
38
35
  if em1.size != 2 || em2.size != 2
@@ -0,0 +1,101 @@
1
+ #
2
+ # Fuzzy String Match
3
+ #
4
+ # Copyright 2010-2011 Kiyoka Nishiyama
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ module FuzzyStringMatch
19
+ class JaroWinklerPure
20
+ THRESHOLD = 0.7
21
+
22
+ def pure?
23
+ true
24
+ end
25
+
26
+ def getDistance( s1, s2 )
27
+ a1 = s1.split( // )
28
+ a2 = s2.split( // )
29
+
30
+ if s1.size > s2.size
31
+ (max,min) = a1,a2
32
+ else
33
+ (max,min) = a2,a1
34
+ end
35
+
36
+ range = [ (max.size / 2 - 1), 0 ].max
37
+ indexes = Array.new( min.size, -1 )
38
+ flags = Array.new( max.size, false )
39
+
40
+ matches = 0;
41
+ (0 ... min.size).each { |mi|
42
+ c1 = min[mi]
43
+ xi = [mi - range, 0].max
44
+ xn = [mi + range + 1, max.size].min
45
+
46
+ (xi ... xn).each { |i|
47
+ if (not flags[i]) && ( c1 == max[i] )
48
+ indexes[mi] = i
49
+ flags[i] = true
50
+ matches += 1
51
+ break
52
+ end
53
+ }
54
+ }
55
+
56
+ ms1 = Array.new( matches, nil )
57
+ ms2 = Array.new( matches, nil )
58
+
59
+ si = 0
60
+ (0 ... min.size).each { |i|
61
+ if (indexes[i] != -1)
62
+ ms1[si] = min[i]
63
+ si += 1
64
+ end
65
+ }
66
+
67
+ si = 0
68
+ (0 ... max.size).each { |i|
69
+ if flags[i]
70
+ ms2[si] = max[i]
71
+ si += 1
72
+ end
73
+ }
74
+
75
+ transpositions = 0
76
+ (0 ... ms1.size).each { |mi|
77
+ if ms1[mi] != ms2[mi]
78
+ transpositions += 1
79
+ end
80
+ }
81
+
82
+ prefix = 0
83
+ (0 ... min.size).each { |mi|
84
+ if s1[mi] == s2[mi]
85
+ prefix += 1
86
+ else
87
+ break
88
+ end
89
+ }
90
+
91
+ if 0 == matches
92
+ 0.0
93
+ else
94
+ m = matches.to_f
95
+ t = (transpositions/ 2)
96
+ j = ((m / s1.size) + (m / s2.size) + ((m - t) / m)) / 3.0;
97
+ return j < THRESHOLD ? j : j + [0.1, 1.0 / max.size].min * prefix * (1 - j)
98
+ end
99
+ end
100
+ end
101
+ end
@@ -1,3 +1,3 @@
1
1
  module Emiler
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: emiler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksei Matiushkin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-03 00:00:00.000000000 Z
11
+ date: 2016-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '1'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -103,6 +103,7 @@ files:
103
103
  - bin/setup
104
104
  - emiler.gemspec
105
105
  - lib/emiler.rb
106
+ - lib/emiler/jarowinkler.rb
106
107
  - lib/emiler/version.rb
107
108
  homepage: https://kantox.com
108
109
  licenses: