jaro_winkler 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d6d2ce4d12911ca5891bbf2bdc8c6fdc8e81415
4
- data.tar.gz: 9cdfa4369a6131b82c76b4fe8072c9395fa8e8ff
3
+ metadata.gz: 77f956f70c8b182eae7e78f336c377bc1b4bfa8f
4
+ data.tar.gz: 17bead21ccd72a4d1c6137cf4f612b1d1c036e26
5
5
  SHA512:
6
- metadata.gz: 192c9452f5159a26fc152a33e77a0f5de20b7f072eb4d3f93ec8f743bb1bd80b6a7d32852efd61eb00c2f1af617f5e8a423e61cf4d64edcf565628bb81b8ed9e
7
- data.tar.gz: 5f36c58fa716b3f4ba442d0bd551606d5fc25844a25c26888cf236e799ba7720aa305eced293e70cfdc7dba73bf070436945774d49ea5d99a324bbc8d81182bb
6
+ metadata.gz: a4c0e7786621635addc80d7c0c18276ef720c1e9690c61f74b506cf5627368cb5862073e96447cef6739ab044d18aa159db492e3bdc182a483809bd273979fd3
7
+ data.tar.gz: 83800c9061918d37f3be5a5b4f3137346425328a33f013dbd37dd58f3c818dd4e411605f372aabd0c6a5501a694ddf5928246e96d5e931af5e5468cc2d699777
data/README.md CHANGED
@@ -42,6 +42,25 @@ adj_table | boolean | false | The option is used to give partial credit for
42
42
  ['1', 'I'], ['1', 'L'], ['0', 'O'], ['0', 'Q'], ['C', 'K'], ['G', 'J'], ['E', ' '], ['Y', ' '], ['S', ' ']
43
43
  ```
44
44
 
45
+ ## How Adjusting Table Work
46
+
47
+ origin formula:
48
+
49
+ ![origin](https://chart.googleapis.com/chart?cht=tx&chl=%5Cbegin%7Bcases%7D%200%20%26%20m%3D0%20%5C%5C%20%5Cfrac%20%7B%201%20%7D%7B%203%20%7D%20(%5Cfrac%20%7B%20m%20%7D%7B%20%5Cleft%7C%20s1%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20m%20%7D%7B%20%5Cleft%7C%20s2%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20m-t%20%7D%7B%20m%20%7D%20)%20%26%20others%20%5Cend%7Bcases%7D)
50
+
51
+ where
52
+
53
+ - `m` is the number of matching characters.
54
+ - `t` is half the number of transpositions.
55
+
56
+ with adjusting table:
57
+
58
+ ![adj](https://chart.googleapis.com/chart?cht=tx&chl=%5Cbegin%7Bcases%7D%200%20%26%20m%3D0%20%5C%5C%20%5Cfrac%20%7B%201%20%7D%7B%203%20%7D%20(%5Cfrac%20%7B%20%5Cfrac%20%7B%20s%20%7D%7B%2010%20%7D%20%2Bm%20%7D%7B%20%5Cleft%7C%20s1%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20%5Cfrac%20%7B%20s%20%7D%7B%2010%20%7D%20%2Bm%20%7D%7B%20%5Cleft%7C%20s2%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20m-t%20%7D%7B%20m%20%7D%20)%20%26%20others%20%5Cend%7Bcases%7D)
59
+
60
+ where
61
+
62
+ - `s` is the number of nonmatching but similar characters.
63
+
45
64
  # Why This?
46
65
 
47
66
  There is also another similar gem named [fuzzy-string-match](https://github.com/kiyoka/fuzzy-string-match) which both provides C and Ruby version as well.
@@ -106,3 +125,4 @@ amatch | 0.960000 | 0.010000 | 0.970000 | ( 0.964803)
106
125
 
107
126
  - Custom adjusting word table.
108
127
  - If the adjusting table is ASCII encoded, use dense matrix instread of sparse matrix to speed up.
128
+ - Call by reference instead of call by value to enhance performance.
data/benchmark/native.txt CHANGED
@@ -1,12 +1,12 @@
1
1
  Rehearsal ----------------------------------------------------
2
- jaro_winkler 0.370000 0.000000 0.370000 ( 0.367923)
3
- fuzzystringmatch 0.340000 0.030000 0.370000 ( 0.372721)
4
- hotwater 0.310000 0.000000 0.310000 ( 0.313405)
5
- amatch 0.970000 0.000000 0.970000 ( 0.968318)
6
- ------------------------------------------- total: 2.020000sec
2
+ jaro_winkler 0.350000 0.000000 0.350000 ( 0.358591)
3
+ fuzzystringmatch 0.360000 0.020000 0.380000 ( 0.381666)
4
+ hotwater 0.340000 0.000000 0.340000 ( 0.337789)
5
+ amatch 1.010000 0.000000 1.010000 ( 1.010946)
6
+ ------------------------------------------- total: 2.080000sec
7
7
 
8
8
  user system total real
9
- jaro_winkler 0.390000 0.000000 0.390000 ( 0.392408)
10
- fuzzystringmatch 0.150000 0.000000 0.150000 ( 0.151552)
11
- hotwater 0.320000 0.000000 0.320000 ( 0.317740)
12
- amatch 0.960000 0.010000 0.970000 ( 0.964803)
9
+ jaro_winkler 0.350000 0.010000 0.360000 ( 0.345293)
10
+ fuzzystringmatch 0.140000 0.000000 0.140000 ( 0.138711)
11
+ hotwater 0.310000 0.000000 0.310000 ( 0.306498)
12
+ amatch 0.960000 0.000000 0.960000 ( 0.961509)
@@ -37,7 +37,7 @@ static char matrix_find(Matrix matrix, unsigned long long code_1, unsigned long
37
37
 
38
38
  Option option_new(){
39
39
  Option opt;
40
- opt.ignore_case = 0;
40
+ opt.ignore_case = opt.adj_table = 0;
41
41
  opt.weight = 0.1;
42
42
  opt.threshold = 0.7;
43
43
  return opt;
@@ -1,3 +1,3 @@
1
1
  module JaroWinkler
2
- VERSION = "1.3.0"
2
+ VERSION = "1.3.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jian Weihang