emiler 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/emiler.gemspec +1 -1
- data/lib/emiler.rb +5 -8
- data/lib/emiler/jarowinkler.rb +101 -0
- data/lib/emiler/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88c02b12935d6916f9499df2a434905469268109
|
4
|
+
data.tar.gz: 60f9eb13edbbaf3fd9dec3726840fad647a6dd38
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f382fcf45028650029f23230dd8f1f9445d21394fcdb8f49e252ed067cbf55876ec78f386002cad7acd0e7000fbbc36b59cdde84b5b9c05eb7470a2ebe0aa86e
|
7
|
+
data.tar.gz: c23d478e3b23404e8dfb408f62df2a5db66c65a2c9b003de02d879940ee2a259c75e4035ee27781a35cf4138c555be2aaf44865e53c83f2f54575ce48eae2757
|
data/emiler.gemspec
CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
25
25
|
spec.require_paths = ['lib']
|
26
26
|
|
27
|
-
spec.add_development_dependency 'bundler', '~> 1
|
27
|
+
spec.add_development_dependency 'bundler', '~> 1'
|
28
28
|
spec.add_development_dependency 'rake', '~> 10.0'
|
29
29
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
30
30
|
spec.add_development_dependency 'pry', '~> 0.10'
|
data/lib/emiler.rb
CHANGED
@@ -1,24 +1,21 @@
|
|
1
1
|
require 'emiler/version'
|
2
|
-
require '
|
2
|
+
require 'emiler/jarowinkler'
|
3
3
|
|
4
4
|
module Emiler
|
5
|
-
USE_PURE = ENV['USE_PURE_JW']
|
6
5
|
INEXACT_MATCH_COEFFICIENT = ENV['INEXACT_MATCH_COEFFICIENT'] || 0.8
|
7
6
|
RAISE_ON_MALFORMED_EMAIL = ENV['RAISE_ON_MALFORMED_EMAIL']
|
8
7
|
|
9
8
|
class JW
|
10
9
|
attr_reader :jw
|
11
|
-
def initialize
|
12
|
-
@jw = FuzzyStringMatch::
|
10
|
+
def initialize
|
11
|
+
@jw = FuzzyStringMatch::JaroWinklerPure.new
|
13
12
|
end
|
14
13
|
|
15
14
|
def distance s1, s2
|
16
15
|
@jw.getDistance s1, s2
|
17
16
|
end
|
18
17
|
|
19
|
-
|
20
|
-
PURE = USE_PURE ? JW.new(:pure) : nil
|
21
|
-
MATCHER = PURE || NATIVE
|
18
|
+
MATCHER = JW.new
|
22
19
|
DUMMY = { jw: 0, full: 0, name: 0, domain: 0, result: false }.freeze
|
23
20
|
|
24
21
|
private :initialize
|
@@ -32,7 +29,7 @@ module Emiler
|
|
32
29
|
|
33
30
|
# rubocop:disable Metrics/AbcSize
|
34
31
|
def similarity e1, e2
|
35
|
-
e1, e2 = [e1, e2].map(&:downcase)
|
32
|
+
e1, e2 = [e1, e2].map(&:to_s).map(&:downcase)
|
36
33
|
em1, em2 = [e1, e2].map { |e| e.split '@' }
|
37
34
|
|
38
35
|
if em1.size != 2 || em2.size != 2
|
@@ -0,0 +1,101 @@
|
|
1
|
+
#
|
2
|
+
# Fuzzy String Match
|
3
|
+
#
|
4
|
+
# Copyright 2010-2011 Kiyoka Nishiyama
|
5
|
+
#
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License.
|
8
|
+
# You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
#
|
18
|
+
module FuzzyStringMatch
|
19
|
+
class JaroWinklerPure
|
20
|
+
THRESHOLD = 0.7
|
21
|
+
|
22
|
+
def pure?
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def getDistance( s1, s2 )
|
27
|
+
a1 = s1.split( // )
|
28
|
+
a2 = s2.split( // )
|
29
|
+
|
30
|
+
if s1.size > s2.size
|
31
|
+
(max,min) = a1,a2
|
32
|
+
else
|
33
|
+
(max,min) = a2,a1
|
34
|
+
end
|
35
|
+
|
36
|
+
range = [ (max.size / 2 - 1), 0 ].max
|
37
|
+
indexes = Array.new( min.size, -1 )
|
38
|
+
flags = Array.new( max.size, false )
|
39
|
+
|
40
|
+
matches = 0;
|
41
|
+
(0 ... min.size).each { |mi|
|
42
|
+
c1 = min[mi]
|
43
|
+
xi = [mi - range, 0].max
|
44
|
+
xn = [mi + range + 1, max.size].min
|
45
|
+
|
46
|
+
(xi ... xn).each { |i|
|
47
|
+
if (not flags[i]) && ( c1 == max[i] )
|
48
|
+
indexes[mi] = i
|
49
|
+
flags[i] = true
|
50
|
+
matches += 1
|
51
|
+
break
|
52
|
+
end
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
ms1 = Array.new( matches, nil )
|
57
|
+
ms2 = Array.new( matches, nil )
|
58
|
+
|
59
|
+
si = 0
|
60
|
+
(0 ... min.size).each { |i|
|
61
|
+
if (indexes[i] != -1)
|
62
|
+
ms1[si] = min[i]
|
63
|
+
si += 1
|
64
|
+
end
|
65
|
+
}
|
66
|
+
|
67
|
+
si = 0
|
68
|
+
(0 ... max.size).each { |i|
|
69
|
+
if flags[i]
|
70
|
+
ms2[si] = max[i]
|
71
|
+
si += 1
|
72
|
+
end
|
73
|
+
}
|
74
|
+
|
75
|
+
transpositions = 0
|
76
|
+
(0 ... ms1.size).each { |mi|
|
77
|
+
if ms1[mi] != ms2[mi]
|
78
|
+
transpositions += 1
|
79
|
+
end
|
80
|
+
}
|
81
|
+
|
82
|
+
prefix = 0
|
83
|
+
(0 ... min.size).each { |mi|
|
84
|
+
if s1[mi] == s2[mi]
|
85
|
+
prefix += 1
|
86
|
+
else
|
87
|
+
break
|
88
|
+
end
|
89
|
+
}
|
90
|
+
|
91
|
+
if 0 == matches
|
92
|
+
0.0
|
93
|
+
else
|
94
|
+
m = matches.to_f
|
95
|
+
t = (transpositions/ 2)
|
96
|
+
j = ((m / s1.size) + (m / s2.size) + ((m - t) / m)) / 3.0;
|
97
|
+
return j < THRESHOLD ? j : j + [0.1, 1.0 / max.size].min * prefix * (1 - j)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
data/lib/emiler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: emiler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksei Matiushkin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1
|
19
|
+
version: '1'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1
|
26
|
+
version: '1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- bin/setup
|
104
104
|
- emiler.gemspec
|
105
105
|
- lib/emiler.rb
|
106
|
+
- lib/emiler/jarowinkler.rb
|
106
107
|
- lib/emiler/version.rb
|
107
108
|
homepage: https://kantox.com
|
108
109
|
licenses:
|