emiler 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +26 -0
- data/.travis.yml +4 -0
- data/Gemfile +2 -0
- data/README.md +6 -1
- data/lib/emiler/jarowinkler.rb +34 -43
- data/lib/emiler/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0361dccbd493b95c973033250393401c2888c094
|
4
|
+
data.tar.gz: a03b7c228c9e3c8da609a5d44b20a1ab9c4450d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 68cc06b5d23967e4ca4921640d5c856a087a258fc977cd27e4b268d58c512e2b509ca4693b18044258079e44e8ab799a5f7a791a4f2aee6eb2d3a5afd3c2f613
|
7
|
+
data.tar.gz: 45567581427124b3718141db0bd043989ddc393cc3a4edae6b0b132380c6e487ce217c421e46bba62cfb349194d0ad309504ceab79fa01c66ef972762704831c
|
data/.codeclimate.yml
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
---
|
2
|
+
engines:
|
3
|
+
duplication:
|
4
|
+
enabled: true
|
5
|
+
config:
|
6
|
+
languages:
|
7
|
+
- ruby
|
8
|
+
- javascript
|
9
|
+
- python
|
10
|
+
- php
|
11
|
+
fixme:
|
12
|
+
enabled: true
|
13
|
+
rubocop:
|
14
|
+
enabled: true
|
15
|
+
ratings:
|
16
|
+
paths:
|
17
|
+
- "**.inc"
|
18
|
+
- "**.js"
|
19
|
+
- "**.jsx"
|
20
|
+
- "**.module"
|
21
|
+
- "**.php"
|
22
|
+
- "**.py"
|
23
|
+
- "**.rb"
|
24
|
+
exclude_paths:
|
25
|
+
- features/
|
26
|
+
- spec/
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
# Emiler
|
2
2
|
|
3
3
|
Naïve distance calculation for emails. Returns “similar” basing on artificial
|
4
|
-
name and domain comparison.
|
4
|
+
name and domain comparison.
|
5
|
+
|
6
|
+
[![Build Status](https://travis-ci.org/am-kantox/emiler.svg?branch=master)](https://travis-ci.org/am-kantox/emiler)
|
7
|
+
[![Code Climate](https://codeclimate.com/github/am-kantox/emiler/badges/gpa.svg)](https://codeclimate.com/github/am-kantox/emiler)
|
8
|
+
[![Test Coverage](https://codeclimate.com/github/am-kantox/emiler/badges/coverage.svg)](https://codeclimate.com/github/am-kantox/emiler/coverage)
|
9
|
+
[![Issue Count](https://codeclimate.com/github/am-kantox/emiler/badges/issue_count.svg)](https://codeclimate.com/github/am-kantox/emiler)
|
5
10
|
|
6
11
|
## Installation
|
7
12
|
|
data/lib/emiler/jarowinkler.rb
CHANGED
@@ -23,77 +23,68 @@ module FuzzyStringMatch
|
|
23
23
|
true
|
24
24
|
end
|
25
25
|
|
26
|
-
def getDistance(
|
27
|
-
a1 = s1.split(
|
28
|
-
a2 = s2.split(
|
26
|
+
def getDistance(s1, s2)
|
27
|
+
a1 = s1.split(//)
|
28
|
+
a2 = s2.split(//)
|
29
29
|
|
30
|
-
|
31
|
-
(max,min) = a1,a2
|
32
|
-
else
|
33
|
-
(max,min) = a2,a1
|
34
|
-
end
|
30
|
+
max, min = s1.size > s2.size ? [a1, a2] : [a2, a1]
|
35
31
|
|
36
|
-
range = [
|
37
|
-
indexes = Array.new(
|
38
|
-
flags = Array.new(
|
32
|
+
range = [(max.size / 2 - 1), 0].max
|
33
|
+
indexes = Array.new(min.size, -1)
|
34
|
+
flags = Array.new(max.size, false)
|
39
35
|
|
40
|
-
matches = 0
|
41
|
-
(0
|
36
|
+
matches = 0
|
37
|
+
(0...min.size).each do |mi|
|
42
38
|
c1 = min[mi]
|
43
39
|
xi = [mi - range, 0].max
|
44
40
|
xn = [mi + range + 1, max.size].min
|
45
41
|
|
46
|
-
(xi
|
47
|
-
|
48
|
-
indexes[mi] = i
|
49
|
-
flags[i] = true
|
50
|
-
matches += 1
|
51
|
-
break
|
52
|
-
end
|
53
|
-
}
|
54
|
-
}
|
42
|
+
(xi...xn).each do |i|
|
43
|
+
next unless !flags[i] && c1 == max[i]
|
55
44
|
|
56
|
-
|
57
|
-
|
45
|
+
indexes[mi] = i
|
46
|
+
flags[i] = true
|
47
|
+
matches += 1
|
48
|
+
break
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ms1 = Array.new(matches, nil)
|
53
|
+
ms2 = Array.new(matches, nil)
|
58
54
|
|
59
55
|
si = 0
|
60
|
-
(0
|
61
|
-
if
|
56
|
+
(0...min.size).each do |i|
|
57
|
+
if indexes[i] != -1
|
62
58
|
ms1[si] = min[i]
|
63
59
|
si += 1
|
64
60
|
end
|
65
|
-
|
61
|
+
end
|
66
62
|
|
67
63
|
si = 0
|
68
|
-
(0
|
64
|
+
(0...max.size).each do |i|
|
69
65
|
if flags[i]
|
70
66
|
ms2[si] = max[i]
|
71
67
|
si += 1
|
72
68
|
end
|
73
|
-
|
69
|
+
end
|
74
70
|
|
75
71
|
transpositions = 0
|
76
|
-
(0
|
77
|
-
if ms1[mi] != ms2[mi]
|
78
|
-
|
79
|
-
end
|
80
|
-
}
|
72
|
+
(0...ms1.size).each do |mi|
|
73
|
+
transpositions += 1 if ms1[mi] != ms2[mi]
|
74
|
+
end
|
81
75
|
|
82
76
|
prefix = 0
|
83
|
-
(0
|
84
|
-
if s1[mi] == s2[mi]
|
85
|
-
|
86
|
-
|
87
|
-
break
|
88
|
-
end
|
89
|
-
}
|
77
|
+
(0...min.size).each do |mi|
|
78
|
+
prefix += 1 if s1[mi] == s2[mi]
|
79
|
+
break unless s1[mi] == s2[mi]
|
80
|
+
end
|
90
81
|
|
91
82
|
if 0 == matches
|
92
83
|
0.0
|
93
84
|
else
|
94
85
|
m = matches.to_f
|
95
|
-
t = (transpositions/ 2)
|
96
|
-
j = ((m / s1.size) + (m / s2.size) + ((m - t) / m)) / 3.0
|
86
|
+
t = (transpositions / 2)
|
87
|
+
j = ((m / s1.size) + (m / s2.size) + ((m - t) / m)) / 3.0
|
97
88
|
return j < THRESHOLD ? j : j + [0.1, 1.0 / max.size].min * prefix * (1 - j)
|
98
89
|
end
|
99
90
|
end
|
data/lib/emiler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: emiler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksei Matiushkin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -75,6 +75,7 @@ executables:
|
|
75
75
|
extensions: []
|
76
76
|
extra_rdoc_files: []
|
77
77
|
files:
|
78
|
+
- ".codeclimate.yml"
|
78
79
|
- ".gitignore"
|
79
80
|
- ".rspec"
|
80
81
|
- ".rubocop.yml"
|