jaro_winkler 1.5.1-universal-java-10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2f783ac8d8355443adfe51a5d3f32cf7b8ddef5f
4
+ data.tar.gz: 96f863de9c8e879104fdb78ded7834691ad6ca66
5
+ SHA512:
6
+ metadata.gz: 354d4337b57d40de31960a6fd050d56ca7dd852031f5ad0525e7f2147ef8345ab6c4b4d53ff93a0e6011b55dc342b2ab05147db277c392f31988109757219685
7
+ data.tar.gz: 8db99ee1730ac2d84d95ec509fd9d2395ae812d6169bf1ce9f60c44495bafa541b4fbe39a3688c521b5f38707d635164c7ef5b8b7b062390ff2be6a017e6ce8c
@@ -0,0 +1,97 @@
1
+ #include "adj_matrix.h"
2
+ #include "codepoints.h"
3
+ #include "ruby.h"
4
+
5
+ const char *DEFAULT_ADJ_TABLE[] = {
6
+ "A", "E", "A", "I", "A", "O", "A", "U", "B", "V", "E", "I", "E",
7
+ "O", "E", "U", "I", "O", "I", "U", "O", "U", "I", "Y", "E", "Y",
8
+ "C", "G", "E", "F", "W", "U", "W", "V", "X", "K", "S", "Z", "X",
9
+ "S", "Q", "C", "U", "V", "M", "N", "L", "I", "Q", "O", "P", "R",
10
+ "I", "J", "2", "Z", "5", "S", "8", "B", "1", "I", "1", "L", "0",
11
+ "O", "0", "Q", "C", "K", "G", "J", "E", " ", "Y", " ", "S", " "};
12
+
13
+ void node_free(Node *head);
14
+
15
+ AdjMatrix *adj_matrix_new(uint32_t length) {
16
+ AdjMatrix *matrix = malloc(sizeof(AdjMatrix));
17
+ matrix->length = length == 0 ? ADJ_MATRIX_DEFAULT_LENGTH : length;
18
+ matrix->table = malloc(matrix->length * sizeof(Node **));
19
+ for (size_t i = 0; i < matrix->length; i++) {
20
+ matrix->table[i] = malloc(matrix->length * sizeof(Node *));
21
+ for (size_t j = 0; j < matrix->length; j++)
22
+ matrix->table[i][j] = NULL;
23
+ }
24
+ return matrix;
25
+ }
26
+
27
+ void adj_matrix_add(AdjMatrix *matrix, uint64_t x, uint64_t y) {
28
+ uint32_t h1 = st_hash(&x, sizeof(x), ADJ_MATRIX_SEED) %
29
+ ADJ_MATRIX_DEFAULT_LENGTH,
30
+ h2 = st_hash(&y, sizeof(y), ADJ_MATRIX_SEED) %
31
+ ADJ_MATRIX_DEFAULT_LENGTH;
32
+ Node *new_node = malloc(sizeof(Node));
33
+ new_node->x = h1;
34
+ new_node->y = h2;
35
+ new_node->next = NULL;
36
+ if (matrix->table[h1][h2] == NULL) {
37
+ matrix->table[h1][h2] = matrix->table[h2][h1] = new_node;
38
+ } else {
39
+ Node *previous = NULL;
40
+ for (Node *i = matrix->table[h1][h2]; i != NULL; i = i->next)
41
+ previous = i;
42
+ previous->next = new_node;
43
+ }
44
+ }
45
+
46
+ char adj_matrix_find(AdjMatrix *matrix, uint64_t x, uint64_t y) {
47
+ uint32_t h1 = st_hash(&x, sizeof(x), ADJ_MATRIX_SEED) %
48
+ ADJ_MATRIX_DEFAULT_LENGTH,
49
+ h2 = st_hash(&y, sizeof(y), ADJ_MATRIX_SEED) %
50
+ ADJ_MATRIX_DEFAULT_LENGTH;
51
+ Node *node = matrix->table[h1][h2];
52
+ if (node == NULL)
53
+ return 0;
54
+ else {
55
+ for (Node *i = node; i != NULL; i = i->next)
56
+ if ((i->x == h1 && i->y == h2) || (i->x == h2 && i->y == h1))
57
+ return 1;
58
+ return 0;
59
+ }
60
+ }
61
+
62
+ void node_free(Node *head) {
63
+ if (head == NULL)
64
+ return;
65
+ node_free(head->next);
66
+ free(head);
67
+ }
68
+
69
+ void adj_matrix_free(AdjMatrix *matrix) {
70
+ for (size_t i = 0; i < matrix->length; i++) {
71
+ for (size_t j = 0; j < matrix->length; j++)
72
+ if (matrix->table[i][j] != NULL) {
73
+ node_free(matrix->table[i][j]);
74
+ matrix->table[i][j] = matrix->table[j][i] = NULL;
75
+ }
76
+ free(matrix->table[i]);
77
+ }
78
+ free(matrix->table);
79
+ free(matrix);
80
+ }
81
+
82
+ AdjMatrix *adj_matrix_default() {
83
+ static char first_time = 1;
84
+ static AdjMatrix *ret_matrix;
85
+ if (first_time) {
86
+ ret_matrix = adj_matrix_new(ADJ_MATRIX_DEFAULT_LENGTH);
87
+ size_t length = sizeof(DEFAULT_ADJ_TABLE) / sizeof(char *);
88
+ for (size_t i = 0; i < length; i += 2) {
89
+ uint64_t code_1, code_2;
90
+ code_1 = *DEFAULT_ADJ_TABLE[i] & 0xff;
91
+ code_2 = *DEFAULT_ADJ_TABLE[i + 1] & 0xff;
92
+ adj_matrix_add(ret_matrix, code_1, code_2);
93
+ }
94
+ first_time = 0;
95
+ }
96
+ return ret_matrix;
97
+ }
@@ -0,0 +1,22 @@
1
+ #pragma once
2
+
3
+ #include "stdint.h"
4
+
5
+ #define ADJ_MATRIX_DEFAULT_LENGTH 958
6
+ #define ADJ_MATRIX_SEED 9527
7
+
8
+ typedef struct _node {
9
+ struct _node *next;
10
+ uint64_t x, y;
11
+ } Node;
12
+
13
+ typedef struct {
14
+ Node ***table;
15
+ uint32_t length;
16
+ } AdjMatrix;
17
+
18
+ AdjMatrix *adj_matrix_new(uint32_t length);
19
+ void adj_matrix_add(AdjMatrix *matrix, uint64_t x, uint64_t y);
20
+ char adj_matrix_find(AdjMatrix *matrix, uint64_t x, uint64_t y);
21
+ void adj_matrix_free(AdjMatrix *matrix);
22
+ AdjMatrix *adj_matrix_default();
@@ -0,0 +1,61 @@
1
+ #include "codepoints.h"
2
+ #include "ruby.h"
3
+ #include "ruby/encoding.h"
4
+ #include <stdint.h>
5
+ #include <stdlib.h>
6
+ #include <string.h>
7
+
8
+ // this function is copied from string.c
9
+ static inline int single_byte_optimizable(VALUE str) {
10
+ rb_encoding *enc;
11
+
12
+ /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
13
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
14
+ return 1;
15
+
16
+ enc = rb_enc_get(str);
17
+ if (rb_enc_mbmaxlen(enc) == 1)
18
+ return 1;
19
+
20
+ /* Conservative. Possibly single byte.
21
+ * "\xa1" in Shift_JIS for example. */
22
+ return 0;
23
+ }
24
+
25
+ void codepoints_init(CodePoints *codepoints, VALUE str) {
26
+ size_t i, length;
27
+ int32_t n;
28
+ uint32_t c;
29
+ const char *ptr, *end;
30
+ rb_encoding *enc;
31
+
32
+ if (single_byte_optimizable(str)) {
33
+ length = RSTRING_LEN(str);
34
+ ptr = RSTRING_PTR(str);
35
+ codepoints->data = malloc(length * sizeof(*codepoints->data));
36
+ for (i = 0, codepoints->length = 0; i < length; i++, codepoints->length++)
37
+ codepoints->data[i] = ptr[i] & 0xff;
38
+ } else {
39
+ codepoints->length = 0;
40
+ codepoints->size = 32;
41
+ codepoints->data = malloc(codepoints->size * sizeof(*codepoints->data));
42
+ str = rb_str_new_frozen(str);
43
+ ptr = RSTRING_PTR(str);
44
+ end = RSTRING_END(str);
45
+ enc = rb_enc_get(str);
46
+
47
+ while (ptr < end) {
48
+ c = rb_enc_codepoint_len(ptr, end, &n, enc);
49
+ if (codepoints->length == codepoints->size) {
50
+ codepoints->size *= 2;
51
+ codepoints->data = realloc(codepoints->data, sizeof(*codepoints->data) *
52
+ codepoints->size);
53
+ }
54
+ codepoints->data[codepoints->length++] = c;
55
+ ptr += n;
56
+ }
57
+ RB_GC_GUARD(str);
58
+ }
59
+ }
60
+
61
+ void codepoints_free(CodePoints *codepoints) { free(codepoints->data); }
@@ -0,0 +1,13 @@
1
+ #pragma once
2
+ #include "ruby.h"
3
+ #include <stddef.h>
4
+ #include <stdint.h>
5
+
6
+ typedef struct {
7
+ uint32_t *data;
8
+ size_t length;
9
+ size_t size;
10
+ } CodePoints;
11
+
12
+ void codepoints_init(CodePoints *, VALUE str);
13
+ void codepoints_free(CodePoints *);
@@ -0,0 +1,121 @@
1
+ #include "jaro.h"
2
+ #include "adj_matrix.h"
3
+ #include "codepoints.h"
4
+
5
+ #include <ctype.h>
6
+ #include <stdlib.h>
7
+ #include <string.h>
8
+
9
+ #define DEFAULT_WEIGHT 0.1
10
+ #define DEFAULT_THRESHOLD 0.7
11
+ #define SWAP(x, y) \
12
+ do { \
13
+ __typeof__(x) SWAP = x; \
14
+ x = y; \
15
+ y = SWAP; \
16
+ } while (0)
17
+
18
+ const Options DEFAULT_OPTIONS = {.weight = DEFAULT_WEIGHT,
19
+ .threshold = DEFAULT_THRESHOLD,
20
+ .ignore_case = 0,
21
+ .adj_table = 0};
22
+
23
+ double jaro_distance_from_codes(uint32_t *codepoints1, size_t len1,
24
+ uint32_t *codepoints2, size_t len2,
25
+ Options *opt) {
26
+ if (!len1 || !len2)
27
+ return 0.0;
28
+
29
+ if (len1 > len2) {
30
+ SWAP(codepoints1, codepoints2);
31
+ SWAP(len1, len2);
32
+ }
33
+
34
+ if (opt->ignore_case) {
35
+ for (size_t i = 0; i < len1; i++)
36
+ codepoints1[i] = tolower(codepoints1[i]);
37
+ for (size_t i = 0; i < len2; i++)
38
+ codepoints2[i] = tolower(codepoints2[i]);
39
+ }
40
+
41
+ int32_t window_size = (int32_t)len2 / 2 - 1;
42
+ if (window_size < 0)
43
+ window_size = 0;
44
+
45
+ char short_codes_flag[len1];
46
+ char long_codes_flag[len2];
47
+ memset(short_codes_flag, 0, len1);
48
+ memset(long_codes_flag, 0, len2);
49
+
50
+ // count number of matching characters
51
+ size_t match_count = 0;
52
+ for (size_t i = 0; i < len1; i++) {
53
+ size_t left = (i >= (size_t)window_size) ? i - window_size : 0;
54
+ size_t right =
55
+ (i + window_size <= len2 - 1) ? (i + window_size) : (len2 - 1);
56
+ if (right > len2 - 1)
57
+ right = len2 - 1;
58
+ for (size_t j = left; j <= right; j++) {
59
+ if (!long_codes_flag[j] && codepoints1[i] == codepoints2[j]) {
60
+ short_codes_flag[i] = long_codes_flag[j] = 1;
61
+ match_count++;
62
+ break;
63
+ }
64
+ }
65
+ }
66
+
67
+ if (!match_count)
68
+ return 0.0;
69
+
70
+ // count number of transpositions
71
+ size_t transposition_count = 0, j = 0, k = 0;
72
+ for (size_t i = 0; i < len1; i++) {
73
+ if (short_codes_flag[i]) {
74
+ for (j = k; j < len2; j++) {
75
+ if (long_codes_flag[j]) {
76
+ k = j + 1;
77
+ break;
78
+ }
79
+ }
80
+ if (codepoints1[i] != codepoints2[j])
81
+ transposition_count++;
82
+ }
83
+ }
84
+
85
+ // count similarities in nonmatched characters
86
+ size_t similar_count = 0;
87
+ if (opt->adj_table && len1 > match_count)
88
+ for (size_t i = 0; i < len1; i++)
89
+ if (!short_codes_flag[i])
90
+ for (size_t j = 0; j < len2; j++)
91
+ if (!long_codes_flag[j])
92
+ if (adj_matrix_find(adj_matrix_default(), codepoints1[i],
93
+ codepoints2[j])) {
94
+ similar_count += 3;
95
+ break;
96
+ }
97
+
98
+ double m = (double)match_count;
99
+ double t = (double)(transposition_count / 2);
100
+ if (opt->adj_table)
101
+ m = similar_count / 10.0 + m;
102
+ return (m / len1 + m / len2 + (m - t) / m) / 3;
103
+ }
104
+
105
+ double jaro_winkler_distance_from_codes(uint32_t *codepoints1, size_t len1,
106
+ uint32_t *codepoints2, size_t len2,
107
+ Options *opt) {
108
+ double jaro_distance =
109
+ jaro_distance_from_codes(codepoints1, len1, codepoints2, len2, opt);
110
+
111
+ if (jaro_distance < opt->threshold)
112
+ return jaro_distance;
113
+ else {
114
+ size_t prefix = 0;
115
+ size_t max_4 = len1 > 4 ? 4 : len1;
116
+ for (prefix = 0;
117
+ prefix < max_4 && codepoints1[prefix] == codepoints2[prefix]; prefix++)
118
+ ;
119
+ return jaro_distance + prefix * opt->weight * (1 - jaro_distance);
120
+ }
121
+ }
@@ -0,0 +1,17 @@
1
+ #pragma once
2
+
3
+ #include <stddef.h>
4
+ #include <stdint.h>
5
+
6
+ typedef struct {
7
+ double weight, threshold;
8
+ char ignore_case, adj_table;
9
+ } Options;
10
+
11
+ extern const Options DEFAULT_OPTIONS;
12
+
13
+ double jaro_distance_from_codes(uint32_t *codepoints1, size_t len1,
14
+ uint32_t *codepoints2, size_t len2, Options *);
15
+ double jaro_winkler_distance_from_codes(uint32_t *codepoints1, size_t len1,
16
+ uint32_t *codepoints2, size_t len2,
17
+ Options *);
@@ -0,0 +1,70 @@
1
+ #include "codepoints.h"
2
+ #include "jaro.h"
3
+ #include "ruby.h"
4
+
5
+ VALUE rb_mJaroWinkler, rb_eError, rb_eInvalidWeightError;
6
+
7
+ VALUE rb_jaro_winkler_distance(size_t argc, VALUE *argv, VALUE self);
8
+ VALUE rb_jaro_distance(size_t argc, VALUE *argv, VALUE self);
9
+ VALUE distance(size_t argc, VALUE *argv, VALUE self,
10
+ double (*distance_fn)(uint32_t *codepoints1, size_t len1,
11
+ uint32_t *codepoints2, size_t len2,
12
+ Options *));
13
+
14
+ void Init_jaro_winkler_ext(void) {
15
+ rb_mJaroWinkler = rb_define_module("JaroWinkler");
16
+ rb_eError = rb_define_class_under(rb_mJaroWinkler, "Error", rb_eRuntimeError);
17
+ rb_eInvalidWeightError =
18
+ rb_define_class_under(rb_mJaroWinkler, "InvalidWeightError", rb_eError);
19
+ rb_define_singleton_method(rb_mJaroWinkler, "distance",
20
+ rb_jaro_winkler_distance, -1);
21
+ rb_define_singleton_method(rb_mJaroWinkler, "jaro_distance", rb_jaro_distance,
22
+ -1);
23
+ }
24
+
25
+ VALUE distance(size_t argc, VALUE *argv, VALUE self,
26
+ double (*distance_fn)(uint32_t *codepoints1, size_t len1,
27
+ uint32_t *codepoints2, size_t len2,
28
+ Options *)) {
29
+ VALUE s1, s2, opt;
30
+ CodePoints cp1, cp2;
31
+
32
+ rb_scan_args((int32_t)argc, argv, "2:", &s1, &s2, &opt);
33
+ codepoints_init(&cp1, s1);
34
+ codepoints_init(&cp2, s2);
35
+
36
+ Options c_opt = DEFAULT_OPTIONS;
37
+ if (TYPE(opt) == T_HASH) {
38
+ VALUE weight = rb_hash_aref(opt, ID2SYM(rb_intern("weight"))),
39
+ threshold = rb_hash_aref(opt, ID2SYM(rb_intern("threshold"))),
40
+ ignore_case = rb_hash_aref(opt, ID2SYM(rb_intern("ignore_case"))),
41
+ adj_table = rb_hash_aref(opt, ID2SYM(rb_intern("adj_table")));
42
+ if (!NIL_P(weight))
43
+ c_opt.weight = NUM2DBL(weight);
44
+ if (c_opt.weight > 0.25)
45
+ rb_raise(rb_eInvalidWeightError, "Scaling factor should not exceed 0.25, "
46
+ "otherwise the distance can become "
47
+ "larger than 1.");
48
+ if (!NIL_P(threshold))
49
+ c_opt.threshold = NUM2DBL(threshold);
50
+ if (!NIL_P(ignore_case))
51
+ c_opt.ignore_case =
52
+ (TYPE(ignore_case) == T_FALSE || NIL_P(ignore_case)) ? 0 : 1;
53
+ if (!NIL_P(adj_table))
54
+ c_opt.adj_table =
55
+ (TYPE(adj_table) == T_FALSE || NIL_P(adj_table)) ? 0 : 1;
56
+ }
57
+ VALUE ret = rb_float_new(
58
+ (*distance_fn)(cp1.data, cp1.length, cp2.data, cp2.length, &c_opt));
59
+ codepoints_free(&cp1);
60
+ codepoints_free(&cp2);
61
+ return ret;
62
+ }
63
+
64
+ VALUE rb_jaro_distance(size_t argc, VALUE *argv, VALUE self) {
65
+ return distance(argc, argv, self, jaro_distance_from_codes);
66
+ }
67
+
68
+ VALUE rb_jaro_winkler_distance(size_t argc, VALUE *argv, VALUE self) {
69
+ return distance(argc, argv, self, jaro_winkler_distance_from_codes);
70
+ }
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'jaro_winkler/version'
4
+
5
+ if RUBY_ENGINE == 'ruby'
6
+ require 'jaro_winkler/jaro_winkler_ext'
7
+ else
8
+ require 'jaro_winkler/jaro_winkler_pure'
9
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JaroWinkler
4
+ DEFAULT_ADJ_TABLE = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
5
+ [
6
+ %w[A E], %w[A I], %w[A O], %w[A U], %w[B V], %w[E I], %w[E O], %w[E U], %w[I O],
7
+ %w[I U], %w[O U], %w[I Y], %w[E Y], %w[C G], %w[E F], %w[W U], %w[W V], %w[X K],
8
+ %w[S Z], %w[X S], %w[Q C], %w[U V], %w[M N], %w[L I], %w[Q O], %w[P R], %w[I J],
9
+ %w[2 Z], %w[5 S], %w[8 B], %w[1 I], %w[1 L], %w[0 O], %w[0 Q], %w[C K], %w[G J],
10
+ ['E', ' '], ['Y', ' '], ['S', ' ']
11
+ ].each do |s1, s2|
12
+ DEFAULT_ADJ_TABLE[s1][s2] = DEFAULT_ADJ_TABLE[s2][s1] = true
13
+ end
14
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'jaro_winkler/adjusting_table'
4
+ module JaroWinkler
5
+ class Error < RuntimeError; end
6
+ class InvalidWeightError < Error; end
7
+
8
+ DEFAULT_WEIGHT = 0.1
9
+ DEFAULT_THRESHOLD = 0.7
10
+ DEFAULT_OPTIONS = {
11
+ jaro: { adj_table: false, ignore_case: false },
12
+ jaro_winkler: { weight: DEFAULT_WEIGHT, threshold: DEFAULT_THRESHOLD }
13
+ }.freeze
14
+
15
+ class << self
16
+ def distance(str1, str2, options = {})
17
+ _distance str1.codepoints.to_a, str2.codepoints.to_a, options
18
+ end
19
+
20
+ def jaro_distance(str1, str2, options = {})
21
+ _jaro_distance str1.codepoints.to_a, str2.codepoints.to_a, options
22
+ end
23
+
24
+ private
25
+
26
+ def _distance(codes1, codes2, options = {})
27
+ options = DEFAULT_OPTIONS[:jaro_winkler].merge options
28
+ raise InvalidWeightError if options[:weight] > 0.25
29
+ jaro_distance = _jaro_distance(codes1, codes2, options)
30
+
31
+ if jaro_distance < options[:threshold]
32
+ jaro_distance
33
+ else
34
+ codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
35
+ len1 = codes1.length
36
+ len2 = codes2.length
37
+ max_4 = len1 > 4 ? 4 : len1
38
+ prefix = 0
39
+ prefix += 1 while prefix < max_4 && codes1[prefix] == codes2[prefix]
40
+ jaro_distance + prefix * options[:weight] * (1 - jaro_distance)
41
+ end
42
+ end
43
+
44
+ def _jaro_distance(codes1, codes2, options = {})
45
+ options = DEFAULT_OPTIONS[:jaro].merge options
46
+
47
+ codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
48
+ len1 = codes1.length
49
+ len2 = codes2.length
50
+ return 0.0 if len1 == 0 || len2 == 0
51
+
52
+ if options[:ignore_case]
53
+ codes1.map! { |c| c >= 97 && c <= 122 ? c -= 32 : c }
54
+ codes2.map! { |c| c >= 97 && c <= 122 ? c -= 32 : c }
55
+ end
56
+
57
+ window = len2 / 2 - 1
58
+ window = 0 if window < 0
59
+ flags1 = 0
60
+ flags2 = 0
61
+
62
+ # // count number of matching characters
63
+ match_count = 0
64
+ i = 0
65
+ while i < len1
66
+ left = i >= window ? i - window : 0
67
+ right = i + window <= len2 - 1 ? (i + window) : (len2 - 1)
68
+ right = len2 - 1 if right > len2 - 1
69
+ j = left
70
+ while j <= right
71
+ if flags2[j] == 0 && codes1[i] == codes2[j]
72
+ flags1 |= (1 << i)
73
+ flags2 |= (1 << j)
74
+ match_count += 1
75
+ break
76
+ end
77
+ j += 1
78
+ end
79
+ i += 1
80
+ end
81
+
82
+ return 0.0 if match_count == 0
83
+
84
+ # // count number of transpositions
85
+ transposition_count = j = k = 0
86
+ i = 0
87
+ while i < len1
88
+ if flags1[i] == 1
89
+ j = k
90
+ while j < len2
91
+ if flags2[j] == 1
92
+ k = j + 1
93
+ break
94
+ end
95
+ j += 1
96
+ end
97
+ transposition_count += 1 if codes1[i] != codes2[j]
98
+ end
99
+ i += 1
100
+ end
101
+
102
+ # // count similarities in nonmatched characters
103
+ similar_count = 0
104
+ if options[:adj_table] && len1 > match_count
105
+ i = 0
106
+ while i < len1
107
+ if flags1[i] == 0
108
+ j = 0
109
+ while j < len2
110
+ if flags2[j] == 0
111
+ if DEFAULT_ADJ_TABLE[codes1[i].chr(Encoding::UTF_8)][codes2[j].chr(Encoding::UTF_8)]
112
+ similar_count += 3
113
+ break
114
+ end
115
+ end
116
+ j += 1
117
+ end
118
+ end
119
+ i += 1
120
+ end
121
+ end
122
+
123
+ m = match_count.to_f
124
+ t = transposition_count / 2
125
+ m = similar_count / 10.0 + m if options[:adj_table]
126
+ (m / len1 + m / len2 + (m - t) / m) / 3
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JaroWinkler
4
+ VERSION = '1.5.1'
5
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jaro_winkler
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.1
5
+ platform: universal-java-10
6
+ authors:
7
+ - Jian Weihang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-06-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '1.7'
19
+ name: bundler
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '12.0'
33
+ name: rake
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ name: rake-compiler
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ name: minitest
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: |-
70
+ jaro_winkler is an implementation of Jaro-Winkler \
71
+ distance algorithm which is written in C extension and will fallback to pure \
72
+ Ruby version in platforms other than MRI/KRI like JRuby or Rubinius. Both of \
73
+ C and Ruby implementation support any kind of string encoding, such as \
74
+ UTF-8, EUC-JP, Big5, etc.
75
+ email: tonytonyjan@gmail.com
76
+ executables: []
77
+ extensions: []
78
+ extra_rdoc_files: []
79
+ files:
80
+ - ext/jaro_winkler/adj_matrix.c
81
+ - ext/jaro_winkler/adj_matrix.h
82
+ - ext/jaro_winkler/codepoints.c
83
+ - ext/jaro_winkler/codepoints.h
84
+ - ext/jaro_winkler/jaro.c
85
+ - ext/jaro_winkler/jaro.h
86
+ - ext/jaro_winkler/jaro_winkler.c
87
+ - lib/jaro_winkler.rb
88
+ - lib/jaro_winkler/adjusting_table.rb
89
+ - lib/jaro_winkler/jaro_winkler_pure.rb
90
+ - lib/jaro_winkler/version.rb
91
+ homepage: https://github.com/tonytonyjan/jaro_winkler
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.14.1
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: An implementation of Jaro-Winkler distance algorithm written \ in C extension
115
+ which supports any kind of string encoding.
116
+ test_files: []