jaro_winkler 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d6d2ce4d12911ca5891bbf2bdc8c6fdc8e81415
4
- data.tar.gz: 9cdfa4369a6131b82c76b4fe8072c9395fa8e8ff
3
+ metadata.gz: 77f956f70c8b182eae7e78f336c377bc1b4bfa8f
4
+ data.tar.gz: 17bead21ccd72a4d1c6137cf4f612b1d1c036e26
5
5
  SHA512:
6
- metadata.gz: 192c9452f5159a26fc152a33e77a0f5de20b7f072eb4d3f93ec8f743bb1bd80b6a7d32852efd61eb00c2f1af617f5e8a423e61cf4d64edcf565628bb81b8ed9e
7
- data.tar.gz: 5f36c58fa716b3f4ba442d0bd551606d5fc25844a25c26888cf236e799ba7720aa305eced293e70cfdc7dba73bf070436945774d49ea5d99a324bbc8d81182bb
6
+ metadata.gz: a4c0e7786621635addc80d7c0c18276ef720c1e9690c61f74b506cf5627368cb5862073e96447cef6739ab044d18aa159db492e3bdc182a483809bd273979fd3
7
+ data.tar.gz: 83800c9061918d37f3be5a5b4f3137346425328a33f013dbd37dd58f3c818dd4e411605f372aabd0c6a5501a694ddf5928246e96d5e931af5e5468cc2d699777
data/README.md CHANGED
@@ -42,6 +42,25 @@ adj_table | boolean | false | The option is used to give partial credit for
42
42
  ['1', 'I'], ['1', 'L'], ['0', 'O'], ['0', 'Q'], ['C', 'K'], ['G', 'J'], ['E', ' '], ['Y', ' '], ['S', ' ']
43
43
  ```
44
44
 
45
+ ## How Adjusting Table Work
46
+
47
+ origin formula:
48
+
49
+ ![origin](https://chart.googleapis.com/chart?cht=tx&chl=%5Cbegin%7Bcases%7D%200%20%26%20m%3D0%20%5C%5C%20%5Cfrac%20%7B%201%20%7D%7B%203%20%7D%20(%5Cfrac%20%7B%20m%20%7D%7B%20%5Cleft%7C%20s1%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20m%20%7D%7B%20%5Cleft%7C%20s2%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20m-t%20%7D%7B%20m%20%7D%20)%20%26%20others%20%5Cend%7Bcases%7D)
50
+
51
+ where
52
+
53
+ - `m` is the number of matching characters.
54
+ - `t` is half the number of transpositions.
55
+
56
+ with adjusting table:
57
+
58
+ ![adj](https://chart.googleapis.com/chart?cht=tx&chl=%5Cbegin%7Bcases%7D%200%20%26%20m%3D0%20%5C%5C%20%5Cfrac%20%7B%201%20%7D%7B%203%20%7D%20(%5Cfrac%20%7B%20%5Cfrac%20%7B%20s%20%7D%7B%2010%20%7D%20%2Bm%20%7D%7B%20%5Cleft%7C%20s1%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20%5Cfrac%20%7B%20s%20%7D%7B%2010%20%7D%20%2Bm%20%7D%7B%20%5Cleft%7C%20s2%20%5Cright%7C%20%20%7D%20%2B%5Cfrac%20%7B%20m-t%20%7D%7B%20m%20%7D%20)%20%26%20others%20%5Cend%7Bcases%7D)
59
+
60
+ where
61
+
62
+ - `s` is the number of nonmatching but similar characters.
63
+
45
64
  # Why This?
46
65
 
47
66
  There is also another similar gem named [fuzzy-string-match](https://github.com/kiyoka/fuzzy-string-match) which both provides C and Ruby version as well.
@@ -106,3 +125,4 @@ amatch | 0.960000 | 0.010000 | 0.970000 | ( 0.964803)
106
125
 
107
126
  - Custom adjusting word table.
108
127
  - If the adjusting table is ASCII encoded, use dense matrix instread of sparse matrix to speed up.
128
+ - Call by reference instead of call by value to enhance performance.
data/benchmark/native.txt CHANGED
@@ -1,12 +1,12 @@
1
1
  Rehearsal ----------------------------------------------------
2
- jaro_winkler 0.370000 0.000000 0.370000 ( 0.367923)
3
- fuzzystringmatch 0.340000 0.030000 0.370000 ( 0.372721)
4
- hotwater 0.310000 0.000000 0.310000 ( 0.313405)
5
- amatch 0.970000 0.000000 0.970000 ( 0.968318)
6
- ------------------------------------------- total: 2.020000sec
2
+ jaro_winkler 0.350000 0.000000 0.350000 ( 0.358591)
3
+ fuzzystringmatch 0.360000 0.020000 0.380000 ( 0.381666)
4
+ hotwater 0.340000 0.000000 0.340000 ( 0.337789)
5
+ amatch 1.010000 0.000000 1.010000 ( 1.010946)
6
+ ------------------------------------------- total: 2.080000sec
7
7
 
8
8
  user system total real
9
- jaro_winkler 0.390000 0.000000 0.390000 ( 0.392408)
10
- fuzzystringmatch 0.150000 0.000000 0.150000 ( 0.151552)
11
- hotwater 0.320000 0.000000 0.320000 ( 0.317740)
12
- amatch 0.960000 0.010000 0.970000 ( 0.964803)
9
+ jaro_winkler 0.350000 0.010000 0.360000 ( 0.345293)
10
+ fuzzystringmatch 0.140000 0.000000 0.140000 ( 0.138711)
11
+ hotwater 0.310000 0.000000 0.310000 ( 0.306498)
12
+ amatch 0.960000 0.000000 0.960000 ( 0.961509)
@@ -37,7 +37,7 @@ static char matrix_find(Matrix matrix, unsigned long long code_1, unsigned long
37
37
 
38
38
  Option option_new(){
39
39
  Option opt;
40
- opt.ignore_case = 0;
40
+ opt.ignore_case = opt.adj_table = 0;
41
41
  opt.weight = 0.1;
42
42
  opt.threshold = 0.7;
43
43
  return opt;
@@ -1,3 +1,3 @@
1
1
  module JaroWinkler
2
- VERSION = "1.3.0"
2
+ VERSION = "1.3.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jian Weihang