jaro_winkler 1.3.1 → 1.3.2.beta
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -16
- data/benchmark/pure.rb +1 -1
- data/benchmark/pure.txt +5 -5
- data/ext/jaro_winkler/adj_matrix.c +62 -0
- data/ext/jaro_winkler/adj_matrix.h +21 -0
- data/ext/jaro_winkler/distance.c +16 -38
- data/ext/jaro_winkler/distance.h +2 -2
- data/ext/jaro_winkler/jaro_winkler.c +2 -3
- data/ext/jaro_winkler/murmur_hash2.c +64 -0
- data/lib/jaro_winkler.rb +2 -2
- data/lib/jaro_winkler/version.rb +1 -1
- data/spec/jaro_winkler_spec.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a90ee9e013479d3c47c7cd632646921f87af4fc
|
4
|
+
data.tar.gz: 30d065fc0728d3fda6db84af71584f7a60b038cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d29510e81e2ab5510a85360321e77444363531a2786c5f5dd6213514c0f5d97232ac5d19b25eb0fbd3bc9972dc255e326b7f600c2254cdb2fe6ed7be20cd76e9
|
7
|
+
data.tar.gz: 5cbb9e3167a42f86ecd6b93dfd9fe21aca7e599e7faf492e9c8f59b109958b157faff0601f1b8078ee6b3a0e92165eda9cab36d6d8eb9d7c5935f3828da18d74
|
data/README.md
CHANGED
@@ -46,7 +46,7 @@ adj_table | boolean | false | The option is used to give partial credit for
|
|
46
46
|
|
47
47
|
origin formula:
|
48
48
|
|
49
|
-
![origin](https://chart.googleapis.com/chart?cht=tx&chl=%5Cbegin%7Bcases%
|
49
|
+
![origin](https://chart.googleapis.com/chart?cht=tx&chs&chl=%5Cbegin%7Bcases%7D0%26%7B%5Ctext%7Bif%20%7Dm%3D0%7D%5C%5C%5Cfrac%7B1%7D%7B3%7D(%5Cfrac%7Bm%7D%7B%5Cleft%7Cs1%5Cright%7C%7D%2B%5Cfrac%7Bm%7D%7B%5Cleft%7Cs2%5Cright%7C%7D%2B%5Cfrac%7Bm-t%7D%7Bm%7D)%26%5Ctext%7Bothers%7D%5Cend%7Bcases%7D)
|
50
50
|
|
51
51
|
where
|
52
52
|
|
@@ -55,7 +55,7 @@ where
|
|
55
55
|
|
56
56
|
with adjusting table:
|
57
57
|
|
58
|
-
![adj](https://chart.googleapis.com/chart?cht=tx&chl=%5Cbegin%7Bcases%
|
58
|
+
![adj](https://chart.googleapis.com/chart?cht=tx&chs&chl=%5Cbegin%7Bcases%7D0%26%5Ctext%7Bif%20%7Dm%3D0%5C%5C%5Cfrac%7B1%7D%7B3%7D(%5Cfrac%7B%5Cfrac%7Bs%7D%7B10%7D%2Bm%7D%7B%5Cleft%7Cs1%5Cright%7C%7D%2B%5Cfrac%7B%5Cfrac%7Bs%7D%7B10%7D%2Bm%7D%7B%5Cleft%7Cs2%5Cright%7C%7D%2B%5Cfrac%7Bm-t%7D%7Bm%7D)%26%5Ctext%7Bothers%7D%5Cend%7Bcases%7D)
|
59
59
|
|
60
60
|
where
|
61
61
|
|
@@ -76,7 +76,7 @@ Windows Support | **Yes** | | No | **Yes**
|
|
76
76
|
Adjusting Table | **Yes** | No | No | No
|
77
77
|
Native | **Yes** | **Yes** | **Yes** | **Yes**
|
78
78
|
Pure Ruby | **Yes** | **Yes** | No | No
|
79
|
-
Speed | Medium | Fast
|
79
|
+
Speed | Medium | **Fast** | Medium | Slow
|
80
80
|
Bug Found | **Not Yet** | Yes | **Not Yet** | Yes
|
81
81
|
|
82
82
|
For `Bug Found`, I made a rake task to build the table below, the source code is in `Rakefile`:
|
@@ -99,30 +99,28 @@ str_1 | str_2 | origin | jaro_winkler | fuzzystringmatch | hotwater |
|
|
99
99
|
|
100
100
|
### Pure Ruby
|
101
101
|
|
102
|
-
| user
|
103
|
-
---------------- |
|
104
|
-
jaro_winkler |
|
105
|
-
fuzzystringmatch |
|
102
|
+
| user | system | total | real
|
103
|
+
---------------- | -------- | -------- | -------- | ------------
|
104
|
+
jaro_winkler | 1.300000 | 0.000000 | 1.300000 | ( 1.299802)
|
105
|
+
fuzzystringmatch | 1.510000 | 0.000000 | 1.510000 | ( 1.510136)
|
106
106
|
|
107
|
-
- jaro_winkler (1.
|
107
|
+
- jaro_winkler (1.3.1)
|
108
108
|
- fuzzy-string-match (0.9.6)
|
109
109
|
|
110
110
|
### Native
|
111
111
|
|
112
112
|
| user | system | total | real
|
113
113
|
---------------- | -------- | -------- | -------- | ------------
|
114
|
-
jaro_winkler | 0.
|
115
|
-
fuzzystringmatch | 0.
|
116
|
-
hotwater | 0.
|
117
|
-
amatch | 0.960000 | 0.
|
114
|
+
jaro_winkler | 0.350000 | 0.010000 | 0.360000 | ( 0.345293)
|
115
|
+
fuzzystringmatch | 0.140000 | 0.000000 | 0.140000 | ( 0.138711)
|
116
|
+
hotwater | 0.310000 | 0.000000 | 0.310000 | ( 0.306498)
|
117
|
+
amatch | 0.960000 | 0.000000 | 0.960000 | ( 0.961509)
|
118
118
|
|
119
|
-
- jaro_winkler (1.
|
119
|
+
- jaro_winkler (1.3.1)
|
120
120
|
- fuzzy-string-match (0.9.6)
|
121
121
|
- hotwater (0.1.2)
|
122
122
|
- amatch (0.3.0)
|
123
123
|
|
124
124
|
# Todo
|
125
125
|
|
126
|
-
- Custom adjusting word table.
|
127
|
-
- If the adjusting table is ASCII encoded, use dense matrix instread of sparse matrix to speed up.
|
128
|
-
- Call by reference instead of call by value to enhance performance.
|
126
|
+
- Custom adjusting word table.
|
data/benchmark/pure.rb
CHANGED
@@ -3,7 +3,7 @@ require 'jaro_winkler'
|
|
3
3
|
require 'fuzzystringmatch'
|
4
4
|
ary = [['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten']]
|
5
5
|
|
6
|
-
n =
|
6
|
+
n = 10000
|
7
7
|
Benchmark.bmbm do |x|
|
8
8
|
x.report 'jaro_winkler' do
|
9
9
|
n.times{ ary.each{ |str1, str2| JaroWinkler.r_distance(str1, str2) } }
|
data/benchmark/pure.txt
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
Rehearsal ----------------------------------------------------
|
2
|
-
jaro_winkler
|
3
|
-
fuzzystringmatch
|
4
|
-
|
2
|
+
jaro_winkler 1.300000 0.000000 1.300000 ( 1.300723)
|
3
|
+
fuzzystringmatch 1.500000 0.010000 1.510000 ( 1.497842)
|
4
|
+
------------------------------------------- total: 2.810000sec
|
5
5
|
|
6
6
|
user system total real
|
7
|
-
jaro_winkler
|
8
|
-
fuzzystringmatch
|
7
|
+
jaro_winkler 1.300000 0.000000 1.300000 ( 1.299802)
|
8
|
+
fuzzystringmatch 1.510000 0.000000 1.510000 ( 1.510136)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include "adj_matrix.h"
|
3
|
+
|
4
|
+
extern unsigned int MurmurHash2(const void * key, int len, unsigned int seed);
|
5
|
+
static void node_free(Node *head);
|
6
|
+
|
7
|
+
AdjMatrix* adj_matrix_new(unsigned int length){
|
8
|
+
AdjMatrix *matrix = malloc(sizeof(AdjMatrix));
|
9
|
+
matrix->length = length == 0 ? ADJ_MATRIX_DEFAULT_LENGTH : length;
|
10
|
+
matrix->table = malloc(matrix->length * sizeof(Node**));
|
11
|
+
for(int i = 0; i < matrix->length; i++){
|
12
|
+
matrix->table[i] = malloc(matrix->length * sizeof(Node*));
|
13
|
+
for (int j = 0; j < matrix->length; j++)
|
14
|
+
matrix->table[i][j] = NULL;
|
15
|
+
}
|
16
|
+
return matrix;
|
17
|
+
}
|
18
|
+
|
19
|
+
void adj_matrix_add(AdjMatrix *matrix, unsigned long long x, unsigned long long y){
|
20
|
+
unsigned int h1 = MurmurHash2(&x, sizeof(long long), ADJ_MATRIX_SEED) % ADJ_MATRIX_DEFAULT_LENGTH,
|
21
|
+
h2 = MurmurHash2(&y, sizeof(long long), ADJ_MATRIX_SEED) % ADJ_MATRIX_DEFAULT_LENGTH;
|
22
|
+
Node *new_node = malloc(sizeof(Node)); new_node->x = h1; new_node->y = h2; new_node->next = NULL;
|
23
|
+
if(matrix->table[h1][h2] == NULL){
|
24
|
+
matrix->table[h1][h2] = matrix->table[h2][h1] = new_node;
|
25
|
+
}
|
26
|
+
else{
|
27
|
+
Node *previous = NULL;
|
28
|
+
for(Node *i = matrix->table[h1][h2]; i != NULL; i = i->next) previous = i;
|
29
|
+
previous->next = new_node;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
char adj_matrix_find(AdjMatrix *matrix, unsigned long long x, unsigned long long y){
|
34
|
+
unsigned int h1 = MurmurHash2(&x, sizeof(long long), ADJ_MATRIX_SEED) % ADJ_MATRIX_DEFAULT_LENGTH,
|
35
|
+
h2 = MurmurHash2(&y, sizeof(long long), ADJ_MATRIX_SEED) % ADJ_MATRIX_DEFAULT_LENGTH;
|
36
|
+
Node *node = matrix->table[h1][h2];
|
37
|
+
if(node == NULL) return 0;
|
38
|
+
else{
|
39
|
+
for(Node *i = node; i != NULL; i = i->next)
|
40
|
+
if((i->x == h1 && i->y == h2) || (i->x == h2 && i->y == h1)) return 1;
|
41
|
+
return 0;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
static void node_free(Node *head){
|
46
|
+
if(head == NULL) return;
|
47
|
+
node_free(head->next);
|
48
|
+
free(head);
|
49
|
+
}
|
50
|
+
|
51
|
+
void adj_matrix_free(AdjMatrix *matrix){
|
52
|
+
for(int i = 0; i < matrix->length; i++){
|
53
|
+
for(int j = 0; j < matrix->length; j++)
|
54
|
+
if(matrix->table[i][j] != NULL){
|
55
|
+
node_free(matrix->table[i][j]);
|
56
|
+
matrix->table[i][j] = matrix->table[j][i] = NULL;
|
57
|
+
}
|
58
|
+
free(matrix->table[i]);
|
59
|
+
}
|
60
|
+
free(matrix->table);
|
61
|
+
free(matrix);
|
62
|
+
}
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#ifndef ADJ_MATRIX_H
|
2
|
+
#define ADJ_MATRIX_H 1
|
3
|
+
#define ADJ_MATRIX_DEFAULT_LENGTH 958
|
4
|
+
#define ADJ_MATRIX_SEED 9527
|
5
|
+
|
6
|
+
typedef struct _node{
|
7
|
+
struct _node *next;
|
8
|
+
unsigned long long x, y;
|
9
|
+
} Node;
|
10
|
+
|
11
|
+
typedef struct{
|
12
|
+
Node ***table;
|
13
|
+
unsigned int length;
|
14
|
+
} AdjMatrix;
|
15
|
+
|
16
|
+
AdjMatrix* adj_matrix_new(unsigned int length);
|
17
|
+
void adj_matrix_add(AdjMatrix *matrix, unsigned long long x, unsigned long long y);
|
18
|
+
char adj_matrix_find(AdjMatrix *matrix, unsigned long long x, unsigned long long y);
|
19
|
+
void adj_matrix_free(AdjMatrix *matrix);
|
20
|
+
|
21
|
+
#endif /* ADJ_MATRIX_H */
|
data/ext/jaro_winkler/distance.c
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
#include <stdlib.h>
|
3
3
|
#include <ctype.h>
|
4
4
|
#include "distance.h"
|
5
|
+
#include "adj_matrix.h"
|
5
6
|
|
6
7
|
typedef struct{
|
7
8
|
unsigned long long code;
|
@@ -13,27 +14,16 @@ typedef struct{
|
|
13
14
|
int length;
|
14
15
|
} Codepoints;
|
15
16
|
|
16
|
-
|
17
|
-
unsigned long long x, y;
|
18
|
-
} Coord;
|
19
|
-
|
20
|
-
typedef struct{
|
21
|
-
Coord *coords;
|
22
|
-
int length;
|
23
|
-
} Matrix;
|
24
|
-
|
25
|
-
static const char *DEFAULT_ADJ_TABLE[] = {
|
17
|
+
const char *DEFAULT_ADJ_TABLE[] = {
|
26
18
|
"A","E", "A","I", "A","O", "A","U", "B","V", "E","I", "E","O", "E","U", "I","O", "I","U", "O","U",
|
27
19
|
"I","Y", "E","Y", "C","G", "E","F", "W","U", "W","V", "X","K", "S","Z", "X","S", "Q","C", "U","V",
|
28
20
|
"M","N", "L","I", "Q","O", "P","R", "I","J", "2","Z", "5","S", "8","B", "1","I", "1","L", "0","O",
|
29
21
|
"0","Q", "C","K", "G","J", "E"," ", "Y"," ", "S"," "
|
30
22
|
};
|
31
|
-
static Matrix DEFAULT_MATRIX;
|
32
23
|
|
33
24
|
static UnicodeHash unicode_hash_new(const char *str);
|
34
25
|
static Codepoints codepoints_new(const char *str, int byte_len);
|
35
|
-
static
|
36
|
-
static char matrix_find(Matrix matrix, unsigned long long code_1, unsigned long long code_2);
|
26
|
+
static AdjMatrix* adj_matrix_default();
|
37
27
|
|
38
28
|
Option option_new(){
|
39
29
|
Option opt;
|
@@ -61,13 +51,6 @@ double c_distance(char *s1, int s1_byte_len, char *s2, int s2_byte_len, Option o
|
|
61
51
|
int tmp2 = code_ary_1.length; code_ary_1.length = code_ary_2.length; code_ary_2.length = tmp2;
|
62
52
|
}
|
63
53
|
|
64
|
-
// Adjusting table
|
65
|
-
static char first_time = 1;
|
66
|
-
if(opt.adj_table){
|
67
|
-
if(first_time) DEFAULT_MATRIX = matrix_new(DEFAULT_ADJ_TABLE, sizeof(DEFAULT_ADJ_TABLE) / 8);
|
68
|
-
first_time = 0;
|
69
|
-
}
|
70
|
-
|
71
54
|
// Compute jaro distance
|
72
55
|
int window_size = code_ary_2.length / 2 - 1;
|
73
56
|
if(window_size < 0) window_size = 0;
|
@@ -91,7 +74,7 @@ double c_distance(char *s1, int s1_byte_len, char *s2, int s2_byte_len, Option o
|
|
91
74
|
previous_index = j;
|
92
75
|
found = 1;
|
93
76
|
}
|
94
|
-
}else if(opt.adj_table &&
|
77
|
+
}else if(opt.adj_table && adj_matrix_find(adj_matrix_default(), code_ary_1.ary[i], code_ary_2.ary[j])) sim_matched = 1;
|
95
78
|
} // for(int j = left; j <= right; j++){
|
96
79
|
if(matched){
|
97
80
|
matches++;
|
@@ -130,7 +113,7 @@ static UnicodeHash unicode_hash_new(const char *str){
|
|
130
113
|
}
|
131
114
|
|
132
115
|
static Codepoints codepoints_new(const char *str, int byte_len){
|
133
|
-
Codepoints ret = {
|
116
|
+
Codepoints ret = {};
|
134
117
|
ret.ary = calloc(byte_len, sizeof(long long));
|
135
118
|
int count = 0;
|
136
119
|
for(int i = 0; i < byte_len;){
|
@@ -143,21 +126,16 @@ static Codepoints codepoints_new(const char *str, int byte_len){
|
|
143
126
|
return ret;
|
144
127
|
}
|
145
128
|
|
146
|
-
static
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
129
|
+
static AdjMatrix* adj_matrix_default(){
|
130
|
+
static char first_time = 1;
|
131
|
+
static AdjMatrix *ret_matrix;
|
132
|
+
if(first_time){
|
133
|
+
ret_matrix = adj_matrix_new(ADJ_MATRIX_DEFAULT_LENGTH);
|
134
|
+
for(int i = 0; i < 78; i += 2){
|
135
|
+
UnicodeHash h1 = unicode_hash_new(DEFAULT_ADJ_TABLE[i]), h2 = unicode_hash_new(DEFAULT_ADJ_TABLE[i + 1]);
|
136
|
+
adj_matrix_add(ret_matrix, h1.code, h2.code);
|
137
|
+
}
|
138
|
+
first_time = 0;
|
155
139
|
}
|
156
|
-
return
|
157
|
-
}
|
158
|
-
|
159
|
-
static char matrix_find(Matrix matrix, unsigned long long code_1, unsigned long long code_2){
|
160
|
-
for (int i = 0; i < matrix.length; i++)
|
161
|
-
if(matrix.coords[i].x == code_1 && matrix.coords[i].y == code_2) return 1;
|
162
|
-
return 0;
|
140
|
+
return ret_matrix;
|
163
141
|
}
|
data/ext/jaro_winkler/distance.h
CHANGED
@@ -6,7 +6,7 @@ typedef struct{
|
|
6
6
|
char ignore_case, adj_table;
|
7
7
|
} Option;
|
8
8
|
|
9
|
-
double
|
10
|
-
Option
|
9
|
+
double c_distance(char *s1, int s1_byte_len, char *s2, int s2_byte_len, Option opt);
|
10
|
+
Option option_new();
|
11
11
|
|
12
12
|
#endif /* DISTANCE_H */
|
@@ -21,8 +21,7 @@ VALUE rb_distance(int argc, VALUE *argv, VALUE self){
|
|
21
21
|
if(c_opt.weight > 0.25) rb_raise(rb_eRuntimeError, "Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1.");
|
22
22
|
if(!NIL_P(threshold)) c_opt.threshold = NUM2DBL(threshold);
|
23
23
|
if(!NIL_P(ignore_case)) c_opt.ignore_case = (TYPE(ignore_case) == T_FALSE || NIL_P(ignore_case)) ? 0 : 1;
|
24
|
-
if(!NIL_P(adj_table)) c_opt.adj_table = (TYPE(adj_table) == T_FALSE || NIL_P(adj_table))
|
24
|
+
if(!NIL_P(adj_table)) c_opt.adj_table = (TYPE(adj_table) == T_FALSE || NIL_P(adj_table)) ? 0 : 1;
|
25
25
|
}
|
26
|
-
|
27
|
-
return ret;
|
26
|
+
return rb_float_new(c_distance(StringValuePtr(s1), RSTRING_LEN(s1), StringValuePtr(s2), RSTRING_LEN(s2), c_opt));
|
28
27
|
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
//-----------------------------------------------------------------------------
|
2
|
+
// MurmurHash2, by Austin Appleby
|
3
|
+
|
4
|
+
// Note - This code makes a few assumptions about how your machine behaves -
|
5
|
+
|
6
|
+
// 1. We can read a 4-byte value from any address without crashing
|
7
|
+
// 2. sizeof(int) == 4
|
8
|
+
|
9
|
+
// And it has a few limitations -
|
10
|
+
|
11
|
+
// 1. It will not work incrementally.
|
12
|
+
// 2. It will not produce the same results on little-endian and big-endian
|
13
|
+
// machines.
|
14
|
+
|
15
|
+
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
16
|
+
{
|
17
|
+
// 'm' and 'r' are mixing constants generated offline.
|
18
|
+
// They're not really 'magic', they just happen to work well.
|
19
|
+
|
20
|
+
const unsigned int m = 0x5bd1e995;
|
21
|
+
const int r = 24;
|
22
|
+
|
23
|
+
// Initialize the hash to a 'random' value
|
24
|
+
|
25
|
+
unsigned int h = seed ^ len;
|
26
|
+
|
27
|
+
// Mix 4 bytes at a time into the hash
|
28
|
+
|
29
|
+
const unsigned char * data = (const unsigned char *)key;
|
30
|
+
|
31
|
+
while(len >= 4)
|
32
|
+
{
|
33
|
+
unsigned int k = *(unsigned int *)data;
|
34
|
+
|
35
|
+
k *= m;
|
36
|
+
k ^= k >> r;
|
37
|
+
k *= m;
|
38
|
+
|
39
|
+
h *= m;
|
40
|
+
h ^= k;
|
41
|
+
|
42
|
+
data += 4;
|
43
|
+
len -= 4;
|
44
|
+
}
|
45
|
+
|
46
|
+
// Handle the last few bytes of the input array
|
47
|
+
|
48
|
+
switch(len)
|
49
|
+
{
|
50
|
+
case 3: h ^= data[2] << 16;
|
51
|
+
case 2: h ^= data[1] << 8;
|
52
|
+
case 1: h ^= data[0];
|
53
|
+
h *= m;
|
54
|
+
};
|
55
|
+
|
56
|
+
// Do a few final mixes of the hash to ensure the last few
|
57
|
+
// bytes are well-incorporated.
|
58
|
+
|
59
|
+
h ^= h >> 13;
|
60
|
+
h *= m;
|
61
|
+
h ^= h >> 15;
|
62
|
+
|
63
|
+
return h;
|
64
|
+
}
|
data/lib/jaro_winkler.rb
CHANGED
@@ -4,7 +4,7 @@ require 'jaro_winkler/jaro_winkler.so' unless JaroWinkler.fallback?
|
|
4
4
|
module JaroWinkler
|
5
5
|
module_function
|
6
6
|
def jaro_distance s1, s2, options = {}
|
7
|
-
options
|
7
|
+
options[:adj_table]
|
8
8
|
length1, length2 = s1.length, s2.length
|
9
9
|
# Guarantee the length order
|
10
10
|
if s1.length > s2.length
|
@@ -52,7 +52,7 @@ module JaroWinkler
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def r_distance s1, s2, options = {}
|
55
|
-
options = {weight: 0.1, threshold: 0.7, ignore_case: false}.merge options
|
55
|
+
options = {weight: 0.1, threshold: 0.7, ignore_case: false, adj_table: false}.merge options
|
56
56
|
weight, threshold, ignore_case = options[:weight], options[:threshold], options[:ignore_case]
|
57
57
|
raise 'Scaling factor should not exceed 0.25, otherwise the distance can become larger than 1' if weight > 0.25
|
58
58
|
s1, s2 = s1.upcase, s2.upcase if ignore_case
|
data/lib/jaro_winkler/version.rb
CHANGED
data/spec/jaro_winkler_spec.rb
CHANGED
@@ -55,7 +55,7 @@ shared_examples 'common' do |strategy|
|
|
55
55
|
it 'works with adjusting table' do
|
56
56
|
ary = [
|
57
57
|
['HENKA' , 'HENKAN' , 0.9667] , # m=5, t=0, s=0
|
58
|
-
['AL' , 'AL' , 1.0 ],
|
58
|
+
['AL' , 'AL' , 1.0 ] , # m=2, t=0, s=0
|
59
59
|
['MARTHA' , 'MARHTA' , 0.9611] , # m=6, t=1, s=0
|
60
60
|
['JONES' , 'JOHNSON' , 0.8598] , # m=4, t=0, s=3
|
61
61
|
['ABCVWXYZ' , 'CABVWXYZ' , 0.9583] , # m=8, t=1, s=0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jaro_winkler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.2.beta
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jian Weihang
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,11 +72,14 @@ files:
|
|
72
72
|
- benchmark/native.txt
|
73
73
|
- benchmark/pure.rb
|
74
74
|
- benchmark/pure.txt
|
75
|
+
- ext/jaro_winkler/adj_matrix.c
|
76
|
+
- ext/jaro_winkler/adj_matrix.h
|
75
77
|
- ext/jaro_winkler/distance.c
|
76
78
|
- ext/jaro_winkler/distance.h
|
77
79
|
- ext/jaro_winkler/extconf.rb
|
78
80
|
- ext/jaro_winkler/jaro_winkler.c
|
79
81
|
- ext/jaro_winkler/jaro_winkler.h
|
82
|
+
- ext/jaro_winkler/murmur_hash2.c
|
80
83
|
- jaro_winkler.gemspec
|
81
84
|
- lib/jaro_winkler.rb
|
82
85
|
- lib/jaro_winkler/adjusting_table.rb
|
@@ -99,9 +102,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
99
102
|
version: '0'
|
100
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
104
|
requirements:
|
102
|
-
- - "
|
105
|
+
- - ">"
|
103
106
|
- !ruby/object:Gem::Version
|
104
|
-
version:
|
107
|
+
version: 1.3.1
|
105
108
|
requirements: []
|
106
109
|
rubyforge_project:
|
107
110
|
rubygems_version: 2.4.1
|