emiler 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8b8779ed5ed7a15142398296cbd9c4338c768a7f
4
- data.tar.gz: e8290f544838222b96e34bbcccd4b58af982ba28
3
+ metadata.gz: 88c02b12935d6916f9499df2a434905469268109
4
+ data.tar.gz: 60f9eb13edbbaf3fd9dec3726840fad647a6dd38
5
5
  SHA512:
6
- metadata.gz: 645a20b05c46357a5b91e22aae4e6ae93c8ea6d1ffed6d4a922ccfb958720ba8919a554a630028003be97fbe9517fb44c4407003b220cc0c39399bf4ed1949d3
7
- data.tar.gz: 3c69442320ddb491a659e384c09e2218ca6ce4980ec47234bcb374027cb2f195dc62e3ec92448f726a4c00e440218709eedde135e0d2fc2ff10738e3afdf0212
6
+ metadata.gz: f382fcf45028650029f23230dd8f1f9445d21394fcdb8f49e252ed067cbf55876ec78f386002cad7acd0e7000fbbc36b59cdde84b5b9c05eb7470a2ebe0aa86e
7
+ data.tar.gz: c23d478e3b23404e8dfb408f62df2a5db66c65a2c9b003de02d879940ee2a259c75e4035ee27781a35cf4138c555be2aaf44865e53c83f2f54575ce48eae2757
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
25
  spec.require_paths = ['lib']
26
26
 
27
- spec.add_development_dependency 'bundler', '~> 1.11'
27
+ spec.add_development_dependency 'bundler', '~> 1'
28
28
  spec.add_development_dependency 'rake', '~> 10.0'
29
29
  spec.add_development_dependency 'rspec', '~> 3.0'
30
30
  spec.add_development_dependency 'pry', '~> 0.10'
@@ -1,24 +1,21 @@
1
1
  require 'emiler/version'
2
- require 'fuzzystringmatch'
2
+ require 'emiler/jarowinkler'
3
3
 
4
4
  module Emiler
5
- USE_PURE = ENV['USE_PURE_JW']
6
5
  INEXACT_MATCH_COEFFICIENT = ENV['INEXACT_MATCH_COEFFICIENT'] || 0.8
7
6
  RAISE_ON_MALFORMED_EMAIL = ENV['RAISE_ON_MALFORMED_EMAIL']
8
7
 
9
8
  class JW
10
9
  attr_reader :jw
11
- def initialize matcher = :native
12
- @jw = FuzzyStringMatch::JaroWinkler.create(matcher)
10
+ def initialize
11
+ @jw = FuzzyStringMatch::JaroWinklerPure.new
13
12
  end
14
13
 
15
14
  def distance s1, s2
16
15
  @jw.getDistance s1, s2
17
16
  end
18
17
 
19
- NATIVE = JW.new
20
- PURE = USE_PURE ? JW.new(:pure) : nil
21
- MATCHER = PURE || NATIVE
18
+ MATCHER = JW.new
22
19
  DUMMY = { jw: 0, full: 0, name: 0, domain: 0, result: false }.freeze
23
20
 
24
21
  private :initialize
@@ -32,7 +29,7 @@ module Emiler
32
29
 
33
30
  # rubocop:disable Metrics/AbcSize
34
31
  def similarity e1, e2
35
- e1, e2 = [e1, e2].map(&:downcase)
32
+ e1, e2 = [e1, e2].map(&:to_s).map(&:downcase)
36
33
  em1, em2 = [e1, e2].map { |e| e.split '@' }
37
34
 
38
35
  if em1.size != 2 || em2.size != 2
@@ -0,0 +1,101 @@
1
+ #
2
+ # Fuzzy String Match
3
+ #
4
+ # Copyright 2010-2011 Kiyoka Nishiyama
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ module FuzzyStringMatch
19
+ class JaroWinklerPure
20
+ THRESHOLD = 0.7
21
+
22
+ def pure?
23
+ true
24
+ end
25
+
26
+ def getDistance( s1, s2 )
27
+ a1 = s1.split( // )
28
+ a2 = s2.split( // )
29
+
30
+ if s1.size > s2.size
31
+ (max,min) = a1,a2
32
+ else
33
+ (max,min) = a2,a1
34
+ end
35
+
36
+ range = [ (max.size / 2 - 1), 0 ].max
37
+ indexes = Array.new( min.size, -1 )
38
+ flags = Array.new( max.size, false )
39
+
40
+ matches = 0;
41
+ (0 ... min.size).each { |mi|
42
+ c1 = min[mi]
43
+ xi = [mi - range, 0].max
44
+ xn = [mi + range + 1, max.size].min
45
+
46
+ (xi ... xn).each { |i|
47
+ if (not flags[i]) && ( c1 == max[i] )
48
+ indexes[mi] = i
49
+ flags[i] = true
50
+ matches += 1
51
+ break
52
+ end
53
+ }
54
+ }
55
+
56
+ ms1 = Array.new( matches, nil )
57
+ ms2 = Array.new( matches, nil )
58
+
59
+ si = 0
60
+ (0 ... min.size).each { |i|
61
+ if (indexes[i] != -1)
62
+ ms1[si] = min[i]
63
+ si += 1
64
+ end
65
+ }
66
+
67
+ si = 0
68
+ (0 ... max.size).each { |i|
69
+ if flags[i]
70
+ ms2[si] = max[i]
71
+ si += 1
72
+ end
73
+ }
74
+
75
+ transpositions = 0
76
+ (0 ... ms1.size).each { |mi|
77
+ if ms1[mi] != ms2[mi]
78
+ transpositions += 1
79
+ end
80
+ }
81
+
82
+ prefix = 0
83
+ (0 ... min.size).each { |mi|
84
+ if s1[mi] == s2[mi]
85
+ prefix += 1
86
+ else
87
+ break
88
+ end
89
+ }
90
+
91
+ if 0 == matches
92
+ 0.0
93
+ else
94
+ m = matches.to_f
95
+ t = (transpositions/ 2)
96
+ j = ((m / s1.size) + (m / s2.size) + ((m - t) / m)) / 3.0;
97
+ return j < THRESHOLD ? j : j + [0.1, 1.0 / max.size].min * prefix * (1 - j)
98
+ end
99
+ end
100
+ end
101
+ end
@@ -1,3 +1,3 @@
1
1
  module Emiler
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: emiler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksei Matiushkin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-03 00:00:00.000000000 Z
11
+ date: 2016-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '1'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -103,6 +103,7 @@ files:
103
103
  - bin/setup
104
104
  - emiler.gemspec
105
105
  - lib/emiler.rb
106
+ - lib/emiler/jarowinkler.rb
106
107
  - lib/emiler/version.rb
107
108
  homepage: https://kantox.com
108
109
  licenses: