jaro_winkler 1.3.6 → 1.3.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d48fca750c919028bdf13a2145bf431919a2c924
4
- data.tar.gz: 134da623e4f99a708cf83f7e2e961a8882ecffb8
3
+ metadata.gz: 7687918cbfcaa8ffb589f1d1383ac2e6611e235f
4
+ data.tar.gz: e8129522a42193023a77593dddfe3917eafe3f68
5
5
  SHA512:
6
- metadata.gz: 7476d0dcd726f7d6b2405c861b139f99b22c71e21e48607beab76c1a29a11b6e9398c33943e8bfcff042b18fdea2d66f858283df17bbfd4e351a3ca93ee8f05a
7
- data.tar.gz: 1c0a9c5b3521e761c752bc19a09a24052da3a46357195b9a86c86815418fc55bbe090b44db738bf086a44aabb1056f660a1584f1c8c7d01d124faf20623041b4
6
+ metadata.gz: be7befc2a7e5c5a2866ba7e4995bf99a0f9d75bfab3e01c806a1d39b10b0a27975f795f2330248e96e5673a8ce81d0ef01fa12a6d18f3f2cf73ff9de28764a60
7
+ data.tar.gz: 3183fb3c534e1c1820ee16cd6a81bfcd35899ffc586e5ac064f5e4deace965d5507b83ca1d4a1fe3d8d30cc894da72eb998f1119ec3223a61c50c072d7e2029d
@@ -27,10 +27,10 @@ double jaro_winkler_distance(char* short_str, int short_str_len, char* long_str,
27
27
  int window_size = long_codes_len/2 - 1;
28
28
  if(window_size < 0) window_size = 0;
29
29
 
30
- char short_codes_flag[MAX_WORD_LENGTH];
31
- char long_codes_flag[MAX_WORD_LENGTH];
32
- memset(short_codes_flag, 0, MAX_WORD_LENGTH);
33
- memset(long_codes_flag, 0, MAX_WORD_LENGTH);
30
+ char short_codes_flag[short_str_len];
31
+ char long_codes_flag[long_str_len];
32
+ memset(short_codes_flag, 0, short_str_len);
33
+ memset(long_codes_flag, 0, long_str_len);
34
34
 
35
35
  // count number of matching characters
36
36
  int match_count = 0;
@@ -2,7 +2,6 @@
2
2
  #define LIBJARO_JARO_H
3
3
 
4
4
  #define SWAP(x, y) do{ __typeof__(x) SWAP = x; x = y; y = SWAP; }while(0)
5
- #define MAX_WORD_LENGTH 64
6
5
  #define DEFAULT_WEIGHT 0.1
7
6
  #define DEFAULT_THRESHOLD 0.7
8
7
 
@@ -1,3 +1,3 @@
1
1
  module JaroWinkler
2
- VERSION = "1.3.6"
2
+ VERSION = "1.3.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.6
4
+ version: 1.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jian Weihang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-22 00:00:00.000000000 Z
11
+ date: 2015-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -111,24 +111,12 @@ dependencies:
111
111
  description: It's a implementation of Jaro-Winkler distance algorithm, it uses C extension
112
112
  and will fallback to pure Ruby version in JRuby. Both implementation supports UTF-8
113
113
  string.
114
- email:
115
- - tonytonyjan@gmail.com
114
+ email: tonytonyjan@gmail.com
116
115
  executables: []
117
116
  extensions:
118
117
  - ext/jaro_winkler/extconf.rb
119
118
  extra_rdoc_files: []
120
119
  files:
121
- - ".gitignore"
122
- - ".rspec"
123
- - ".travis.yml"
124
- - Gemfile
125
- - LICENSE.txt
126
- - README.md
127
- - Rakefile
128
- - benchmark/native.rb
129
- - benchmark/native.txt
130
- - benchmark/pure.rb
131
- - benchmark/pure.txt
132
120
  - ext/jaro_winkler/adj_matrix.c
133
121
  - ext/jaro_winkler/adj_matrix.h
134
122
  - ext/jaro_winkler/code.c
@@ -138,14 +126,10 @@ files:
138
126
  - ext/jaro_winkler/jaro.h
139
127
  - ext/jaro_winkler/jaro_winkler.c
140
128
  - ext/jaro_winkler/murmur_hash2.c
141
- - jaro_winkler.gemspec
142
129
  - lib/jaro_winkler.rb
143
130
  - lib/jaro_winkler/adjusting_table.rb
144
131
  - lib/jaro_winkler/fallback.rb
145
132
  - lib/jaro_winkler/version.rb
146
- - spec/adjusting_table_spec.rb
147
- - spec/jaro_winkler_spec.rb
148
- - spec/spec_helper.rb
149
133
  homepage: https://github.com/tonytonyjan/jaro_winkler
150
134
  licenses:
151
135
  - MIT
@@ -166,12 +150,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
150
  version: '0'
167
151
  requirements: []
168
152
  rubyforge_project:
169
- rubygems_version: 2.4.6
153
+ rubygems_version: 2.4.5.1
170
154
  signing_key:
171
155
  specification_version: 4
172
156
  summary: Ruby & C implementation of Jaro-Winkler distance algorithm which both support
173
157
  UTF-8 string.
174
- test_files:
175
- - spec/adjusting_table_spec.rb
176
- - spec/jaro_winkler_spec.rb
177
- - spec/spec_helper.rb
158
+ test_files: []
data/.gitignore DELETED
@@ -1,14 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
10
- *.bundle
11
- *.so
12
- *.o
13
- *.a
14
- mkmf.log
data/.rspec DELETED
@@ -1,2 +0,0 @@
1
- --color
2
- --require spec_helper
@@ -1,6 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.2.0
4
- - 2.1.5
5
- - 2.0.0
6
- - 1.9.3
data/Gemfile DELETED
@@ -1,2 +0,0 @@
1
- source 'https://rubygems.org'
2
- gemspec
@@ -1,22 +0,0 @@
1
- Copyright (c) 2014 Jian Weihang
2
-
3
- MIT License
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md DELETED
@@ -1,129 +0,0 @@
1
- [![Build Status](https://travis-ci.org/tonytonyjan/jaro_winkler.svg?branch=master)](https://travis-ci.org/tonytonyjan/jaro_winkler)
2
-
3
- It's an implementation of [Jaro-Winkler distance](http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) algorithm, it uses C extension and will fallback to pure Ruby version in JRuby. Both of them supports UTF-8 string.
4
-
5
- # Installation
6
-
7
- ```
8
- gem install jaro_winkler
9
- ```
10
-
11
- # Usage
12
-
13
- ```ruby
14
- require 'jaro_winkler'
15
- JaroWinkler.distance "MARTHA", "MARHTA"
16
- # => 0.9611
17
- JaroWinkler.distance "MARTHA", "marhta", ignore_case: true
18
- # => 0.9611
19
- JaroWinkler.distance "MARTHA", "MARHTA", weight: 0.2
20
- # => 0.9778
21
-
22
- # Force the strategy
23
- JaroWinkler.c_distance "MARTHA", "MARHTA" # C extension
24
- JaroWinkler.r_distance "MARTHA", "MARHTA" # Pure Ruby
25
- ```
26
-
27
- ## Options
28
-
29
- Name | Type | Default | Note
30
- ----------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------
31
- ignore_case | boolean | false | All lower case characters are converted to upper case prior to the comparison.
32
- weight | number | 0.1 | A constant scaling factor for how much the score is adjusted upwards for having common prefixes.
33
- threshold | number | 0.7 | The prefix bonus is only added when the compared strings have a Jaro distance above the threshold.
34
- adj_table | boolean | false | The option is used to give partial credit for characters that may be errors due to known phonetic or character recognition errors. A typical example is to match the letter "O" with the number "0".
35
-
36
- # Adjusting Table
37
-
38
- ## Default Table
39
-
40
- ```
41
- ['A', 'E'], ['A', 'I'], ['A', 'O'], ['A', 'U'], ['B', 'V'], ['E', 'I'], ['E', 'O'], ['E', 'U'], ['I', 'O'], ['I', 'U'],
42
- ['O', 'U'], ['I', 'Y'], ['E', 'Y'], ['C', 'G'], ['E', 'F'], ['W', 'U'], ['W', 'V'], ['X', 'K'], ['S', 'Z'], ['X', 'S'],
43
- ['Q', 'C'], ['U', 'V'], ['M', 'N'], ['L', 'I'], ['Q', 'O'], ['P', 'R'], ['I', 'J'], ['2', 'Z'], ['5', 'S'], ['8', 'B'],
44
- ['1', 'I'], ['1', 'L'], ['0', 'O'], ['0', 'Q'], ['C', 'K'], ['G', 'J'], ['E', ' '], ['Y', ' '], ['S', ' ']
45
- ```
46
-
47
- ## How it works?
48
-
49
- Original Formula:
50
-
51
- ![origin](https://chart.googleapis.com/chart?cht=tx&chs&chl=%5Cbegin%7Bcases%7D0%26%7B%5Ctext%7Bif%20%7Dm%3D0%7D%5C%5C%5Cfrac%7B1%7D%7B3%7D(%5Cfrac%7Bm%7D%7B%5Cleft%7Cs1%5Cright%7C%7D%2B%5Cfrac%7Bm%7D%7B%5Cleft%7Cs2%5Cright%7C%7D%2B%5Cfrac%7Bm-t%7D%7Bm%7D)%26%5Ctext%7Bothers%7D%5Cend%7Bcases%7D)
52
-
53
- where
54
-
55
- - `m` is the number of matching characters.
56
- - `t` is half the number of transpositions.
57
-
58
- With Adjusting Table:
59
-
60
- ![adj](https://chart.googleapis.com/chart?cht=tx&chs&chl=%5Cbegin%7Bcases%7D0%26%5Ctext%7Bif%20%7Dm%3D0%5C%5C%5Cfrac%7B1%7D%7B3%7D(%5Cfrac%7B%5Cfrac%7Bs%7D%7B10%7D%2Bm%7D%7B%5Cleft%7Cs1%5Cright%7C%7D%2B%5Cfrac%7B%5Cfrac%7Bs%7D%7B10%7D%2Bm%7D%7B%5Cleft%7Cs2%5Cright%7C%7D%2B%5Cfrac%7Bm-t%7D%7Bm%7D)%26%5Ctext%7Bothers%7D%5Cend%7Bcases%7D)
61
-
62
- where
63
-
64
- - `s` is the number of nonmatching but similar characters.
65
-
66
- # Why This?
67
-
68
- There is also another similar gem named [fuzzy-string-match](https://github.com/kiyoka/fuzzy-string-match) which both provides C and Ruby version as well.
69
-
70
- I reinvent this wheel because of the naming in `fuzzy-string-match` such as `getDistance` breaks convention, and some weird code like `a1 = s1.split( // )` (`s1.chars` could be better), furthermore, it's bugged (see tables below).
71
-
72
- # Compare with other gems
73
-
74
- | jaro_winkler | fuzzystringmatch | hotwater | amatch
75
- --------------- | ------------ | ---------------- | -------- | ------
76
- UTF-8 Suport | **Yes** | Pure Ruby only | No | No
77
- Windows Support | **Yes** | | No | **Yes**
78
- Adjusting Table | **Yes** | No | No | No
79
- Native | **Yes** | **Yes** | **Yes** | **Yes**
80
- Pure Ruby | **Yes** | **Yes** | No | No
81
- Speed | Medium | **Fast** | Medium | Slow
82
- Bug Found | **Not Yet** | Yes | **Not Yet** | Yes
83
-
84
- For `Bug Found`, I made a rake task to build the table below, the source code is in `Rakefile`:
85
-
86
- str_1 | str_2 | origin | jaro_winkler | fuzzystringmatch | hotwater | amatch
87
- --- | --- | --- | --- | --- | --- | ---
88
- "henka" | "henkan" | 0.9667 | 0.9667 | **0.9722** | 0.9667 | **0.9444**
89
- "al" | "al" | 1.0 | 1.0 | 1.0 | 1.0 | 1.0
90
- "martha" | "marhta" | 0.9611 | 0.9611 | 0.9611 | 0.9611 | **0.9444**
91
- "jones" | "johnson" | 0.8324 | 0.8324 | 0.8324 | 0.8324 | **0.7905**
92
- "abcvwxyz" | "cabvwxyz" | 0.9583 | 0.9583 | 0.9583 | 0.9583 | 0.9583
93
- "dwayne" | "duane" | 0.84 | 0.84 | 0.84 | 0.84 | **0.8222**
94
- "dixon" | "dicksonx" | 0.8133 | 0.8133 | 0.8133 | 0.8133 | **0.7667**
95
- "fvie" | "ten" | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
96
-
97
- - The origin result is from the [original C implementation by the author of the algorithm](http://web.archive.org/web/20100227020019/http://www.census.gov/geo/msb/stand/strcmp.c).
98
- - Test data are borrowed from [fuzzy-string-match's rspec file](https://github.com/kiyoka/fuzzy-string-match/blob/master/test/basic_pure_spec.rb).
99
-
100
- # Benchmark
101
-
102
- ## Pure Ruby
103
-
104
- | user | system | total | real
105
- ---------------- | -------- | -------- | -------- | ------------
106
- jaro_winkler | 1.300000 | 0.000000 | 1.300000 | ( 1.299802)
107
- fuzzystringmatch | 1.510000 | 0.000000 | 1.510000 | ( 1.510136)
108
-
109
- - jaro_winkler (1.3.1)
110
- - fuzzy-string-match (0.9.6)
111
-
112
- ## Native
113
-
114
- | user | system | total | real
115
- ---------------- | -------- | -------- | -------- | ------------
116
- jaro_winkler | 0.350000 | 0.010000 | 0.360000 | ( 0.345293)
117
- fuzzystringmatch | 0.140000 | 0.000000 | 0.140000 | ( 0.138711)
118
- hotwater | 0.310000 | 0.000000 | 0.310000 | ( 0.306498)
119
- amatch | 0.960000 | 0.000000 | 0.960000 | ( 0.961509)
120
-
121
- - jaro_winkler (1.3.1)
122
- - fuzzy-string-match (0.9.6)
123
- - hotwater (0.1.2)
124
- - amatch (0.3.0)
125
-
126
- # Todo
127
-
128
- - Custom adjusting word table.
129
- - The algorithm between C and Ruby are different.
data/Rakefile DELETED
@@ -1,51 +0,0 @@
1
- require "bundler/gem_tasks"
2
- require "rake/extensiontask"
3
- require 'rspec/core/rake_task'
4
-
5
- RSpec::Core::RakeTask.new(:spec)
6
- Rake::ExtensionTask.new("jaro_winkler") do |ext|
7
- ext.lib_dir = "lib/jaro_winkler"
8
- end
9
-
10
- task default: [:compile, :spec]
11
-
12
- desc 'type can be "native" or "pure"'
13
- task :benchmark, :type do |t, args|
14
- args.with_defaults(type: :all)
15
- ROOT_PATH = File.expand_path('..', __FILE__)
16
- LIB_PATH = File.join(ROOT_PATH, 'lib')
17
- BENCHMARK_PATH = File.join(ROOT_PATH, 'benchmark')
18
-
19
- files = File.join(BENCHMARK_PATH, args[:type] == :all ? '*.rb' : "#{args[:type]}.rb")
20
- Dir[files].each do |path|
21
- output_path = File.join(BENCHMARK_PATH, File.basename(path, '*.rb').sub('.rb', '.txt'))
22
- cmd = "RUBYLIB=#{LIB_PATH} ruby #{path}"
23
- puts cmd
24
- output = `#{cmd}`
25
- File.write(output_path, output)
26
- end
27
- end
28
-
29
- task :compare do
30
- require 'jaro_winkler'
31
- require 'fuzzystringmatch'
32
- require 'hotwater'
33
- require 'amatch'
34
- @ary = [['henka', 'henkan'], ['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten'], ['San Francisco', 'Santa Monica']]
35
- table = []
36
- table << %w[str_1 str_2 jaro_winkler fuzzystringmatch hotwater amatch]
37
- table << %w[--- --- --- --- --- ---]
38
- jarow = FuzzyStringMatch::JaroWinkler.create(:native)
39
- @ary.each do |str_1, str_2|
40
- table << ["\"#{str_1}\"", "\"#{str_2}\"", JaroWinkler.distance(str_1, str_2).round(4), jarow.getDistance(str_1, str_2).round(4), Hotwater.jaro_winkler_distance(str_1, str_2).round(4), Amatch::Jaro.new(str_1).match(str_2).round(4)]
41
- end
42
- col_len = []
43
- table.first.length.times{ |i| col_len << table.map{ |row| row[i].to_s.length }.max }
44
- table.first.each_with_index{ |title, i| "%-#{col_len[i]}s" % title }
45
- table.each_with_index do |row|
46
- row.each_with_index do |col, i|
47
- row[i] = "%-#{col_len[i]}s" % col.to_s
48
- end
49
- end
50
- table.each{|row| puts row.join(' | ')}
51
- end
@@ -1,26 +0,0 @@
1
- require 'benchmark'
2
- require 'jaro_winkler'
3
- require 'fuzzystringmatch'
4
- require 'hotwater'
5
- require 'amatch'
6
- ary = [['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten']]
7
-
8
- n = 100000
9
- Benchmark.bmbm do |x|
10
- x.report 'jaro_winkler' do
11
- n.times{ ary.each{ |str1, str2| JaroWinkler.c_distance(str1, str2) } }
12
- end
13
-
14
- x.report 'fuzzystringmatch' do
15
- jarow = FuzzyStringMatch::JaroWinkler.create(:native)
16
- n.times{ ary.each{ |str1, str2| jarow.getDistance(str1, str2) } }
17
- end
18
-
19
- x.report 'hotwater' do
20
- n.times{ ary.each{ |str1, str2| Hotwater.jaro_winkler_distance(str1, str2) } }
21
- end
22
-
23
- x.report 'amatch' do
24
- n.times{ ary.each{ |str1, str2| Amatch::Jaro.new(str1).match(str2) } }
25
- end
26
- end
@@ -1,12 +0,0 @@
1
- Rehearsal ----------------------------------------------------
2
- jaro_winkler 0.350000 0.000000 0.350000 ( 0.348383)
3
- fuzzystringmatch 0.330000 0.020000 0.350000 ( 0.354850)
4
- hotwater 0.280000 0.000000 0.280000 ( 0.278819)
5
- amatch 0.980000 0.000000 0.980000 ( 0.983325)
6
- ------------------------------------------- total: 1.960000sec
7
-
8
- user system total real
9
- jaro_winkler 0.330000 0.000000 0.330000 ( 0.331923)
10
- fuzzystringmatch 0.140000 0.000000 0.140000 ( 0.135655)
11
- hotwater 0.280000 0.000000 0.280000 ( 0.276728)
12
- amatch 0.930000 0.010000 0.940000 ( 0.932943)
@@ -1,16 +0,0 @@
1
- require 'benchmark'
2
- require 'jaro_winkler'
3
- require 'fuzzystringmatch'
4
- ary = [['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten']]
5
-
6
- n = 10000
7
- Benchmark.bmbm do |x|
8
- x.report 'jaro_winkler' do
9
- n.times{ ary.each{ |str1, str2| JaroWinkler.r_distance(str1, str2) } }
10
- end
11
-
12
- x.report 'fuzzystringmatch' do
13
- jarow = FuzzyStringMatch::JaroWinkler.create(:pure)
14
- n.times{ ary.each{ |str1, str2| jarow.getDistance(str1, str2) } }
15
- end
16
- end
@@ -1,8 +0,0 @@
1
- Rehearsal ----------------------------------------------------
2
- jaro_winkler 1.300000 0.000000 1.300000 ( 1.300723)
3
- fuzzystringmatch 1.500000 0.010000 1.510000 ( 1.497842)
4
- ------------------------------------------- total: 2.810000sec
5
-
6
- user system total real
7
- jaro_winkler 1.300000 0.000000 1.300000 ( 1.299802)
8
- fuzzystringmatch 1.510000 0.000000 1.510000 ( 1.510136)
@@ -1,30 +0,0 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'jaro_winkler/fallback'
5
- require 'jaro_winkler/version'
6
-
7
- Gem::Specification.new do |spec|
8
- spec.name = "jaro_winkler"
9
- spec.version = JaroWinkler::VERSION
10
- spec.authors = ["Jian Weihang"]
11
- spec.email = ["tonytonyjan@gmail.com"]
12
- spec.extensions = ["ext/jaro_winkler/extconf.rb"]
13
- spec.summary = %q{Ruby & C implementation of Jaro-Winkler distance algorithm which both support UTF-8 string.}
14
- spec.description = %q{It's a implementation of Jaro-Winkler distance algorithm, it uses C extension and will fallback to pure Ruby version in JRuby. Both implementation supports UTF-8 string.}
15
- spec.homepage = "https://github.com/tonytonyjan/jaro_winkler"
16
- spec.license = "MIT"
17
-
18
- spec.files = `git ls-files -z`.split("\x0")
19
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
- spec.require_paths = ["lib"]
22
-
23
- spec.add_development_dependency "bundler", "~> 1.7"
24
- spec.add_development_dependency "rake", "~> 10.0"
25
- spec.add_development_dependency "rake-compiler"
26
- spec.add_development_dependency "rspec"
27
- spec.add_development_dependency "fuzzy-string-match"
28
- spec.add_development_dependency "hotwater"
29
- spec.add_development_dependency "amatch"
30
- end
@@ -1,8 +0,0 @@
1
- # spec/adjusting_table_spec.rb
2
- require 'jaro_winkler'
3
-
4
- describe JaroWinkler::DEFAULT_ADJ_TABLE do
5
- it 'should not be empty' do
6
- expect(JaroWinkler::DEFAULT_ADJ_TABLE).not_to be_empty
7
- end
8
- end
@@ -1,69 +0,0 @@
1
- # encoding: utf-8
2
- require 'jaro_winkler'
3
- include JaroWinkler
4
-
5
- shared_examples 'common' do |strategy|
6
- it 'works' do
7
- expect(send(strategy, 'henka','henkan')).to be_within(0.0001).of(0.9667)
8
- expect(send(strategy, 'al','al')).to be_within(0.0001).of(1.0)
9
- expect(send(strategy, 'martha','marhta')).to be_within(0.0001).of(0.9611)
10
- expect(send(strategy, 'jones','johnson')).to be_within(0.0001).of(0.8323)
11
- expect(send(strategy, 'abcvwxyz','cabvwxyz')).to be_within(0.0001).of(0.9583)
12
- expect(send(strategy, 'dwayne','duane')).to be_within(0.0001).of(0.8400)
13
- expect(send(strategy, 'dixon','dicksonx')).to be_within(0.0001).of(0.8133)
14
- expect(send(strategy, 'fvie','ten')).to be_within(0.0001).of(0.0)
15
- expect(send(strategy, 'tony','tony')).to be_within(0.0001).of(1.0)
16
- expect(send(strategy, 'tonytonyjan','tonytonyjan')).to be_within(0.0001).of(1.0)
17
- expect(send(strategy, 'x','x')).to be_within(0.0001).of(1.0)
18
- expect(send(strategy, '','')).to be_within(0.0001).of(0.0)
19
- expect(send(strategy, 'tony','')).to be_within(0.0001).of(0.0)
20
- expect(send(strategy, '','tony')).to be_within(0.0001).of(0.0)
21
- expect(send(strategy, 'tonytonyjan','tony')).to be_within(0.0001).of(0.8727)
22
- expect(send(strategy, 'tony','tonytonyjan')).to be_within(0.0001).of(0.8727)
23
- end
24
-
25
- it 'works with UTF-8' do
26
- expect(send(strategy, '變形金剛4:絕跡重生','變形金剛4: 絕跡重生')).to be_within(0.0001).of(0.9818)
27
- expect(send(strategy, '連勝文','連勝丼')).to be_within(0.0001).of(0.8222)
28
- expect(send(strategy, '馬英九','馬英丸')).to be_within(0.0001).of(0.8222)
29
- expect(send(strategy, '良い','いい')).to be_within(0.0001).of(0.6666)
30
- end
31
-
32
- it 'sets ignore_case' do
33
- expect(send(strategy, 'MARTHA', 'marhta', ignore_case: true)).to be_within(0.0001).of(0.9611)
34
- end
35
-
36
- it 'sets weight' do
37
- expect(send(strategy, 'MARTHA', 'MARHTA', weight: 0.2)).to be_within(0.0001).of(0.9778)
38
- end
39
-
40
- it 'sets threshold' do
41
- expect(send(strategy, 'MARTHA', 'MARHTA', threshold: 0.99)).to be_within(0.0001).of(0.9445)
42
- end
43
-
44
-
45
- it 'works with adjusting table' do
46
- expect(send(strategy, 'HENKA', 'HENKAN', adj_table: true)).to be_within(0.0001).of(0.9667) # m=5, t=0, s=0
47
- expect(send(strategy, 'AL', 'AL', adj_table: true)).to be_within(0.0001).of(1.0) # m=2, t=0, s=0
48
- expect(send(strategy, 'MARTHA', 'MARHTA', adj_table: true)).to be_within(0.0001).of(0.9611) # m=6, t=1, s=0
49
- expect(send(strategy, 'JONES', 'JOHNSON', adj_table: true)).to be_within(0.0001).of(0.8598) # m=4, t=0, s=3
50
- expect(send(strategy, 'ABCVWXYZ', 'CABVWXYZ', adj_table: true)).to be_within(0.0001).of(0.9583) # m=8, t=1, s=0
51
- expect(send(strategy, 'DWAYNE', 'DUANE', adj_table: true)).to be_within(0.0001).of(0.8730) # m=4, t=0, s=3
52
- expect(send(strategy, 'DIXON', 'DICKSONX', adj_table: true)).to be_within(0.0001).of(0.8393) # m=4, t=0, s=3
53
- expect(send(strategy, 'FVIE', 'TEN', adj_table: true)).to be_within(0.0001).of(0.0)
54
- end
55
-
56
- context 'with weight exceeding 0.25' do
57
- it 'throws exception' do
58
- expect{ send(strategy, 'MARTHA', 'MARHTA', weight: 0.26) }.to raise_error
59
- end
60
- end
61
- end
62
-
63
- describe 'Pure Ruby' do
64
- include_examples 'common', :r_distance
65
- end
66
-
67
- describe 'C extention' do
68
- include_examples 'common', :c_distance
69
- end
@@ -1,89 +0,0 @@
1
- # This file was generated by the `rspec --init` command. Conventionally, all
2
- # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
- # The generated `.rspec` file contains `--require spec_helper` which will cause this
4
- # file to always be loaded, without a need to explicitly require it in any files.
5
- #
6
- # Given that it is always loaded, you are encouraged to keep this file as
7
- # light-weight as possible. Requiring heavyweight dependencies from this file
8
- # will add to the boot time of your test suite on EVERY test run, even for an
9
- # individual file that may not need all of that loaded. Instead, consider making
10
- # a separate helper file that requires the additional dependencies and performs
11
- # the additional setup, and require it from the spec files that actually need it.
12
- #
13
- # The `.rspec` file also contains a few flags that are not defaults but that
14
- # users commonly want.
15
- #
16
- # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
17
- RSpec.configure do |config|
18
- # rspec-expectations config goes here. You can use an alternate
19
- # assertion/expectation library such as wrong or the stdlib/minitest
20
- # assertions if you prefer.
21
- config.expect_with :rspec do |expectations|
22
- # This option will default to `true` in RSpec 4. It makes the `description`
23
- # and `failure_message` of custom matchers include text for helper methods
24
- # defined using `chain`, e.g.:
25
- # be_bigger_than(2).and_smaller_than(4).description
26
- # # => "be bigger than 2 and smaller than 4"
27
- # ...rather than:
28
- # # => "be bigger than 2"
29
- expectations.include_chain_clauses_in_custom_matcher_descriptions = true
30
- end
31
-
32
- # rspec-mocks config goes here. You can use an alternate test double
33
- # library (such as bogus or mocha) by changing the `mock_with` option here.
34
- config.mock_with :rspec do |mocks|
35
- # Prevents you from mocking or stubbing a method that does not exist on
36
- # a real object. This is generally recommended, and will default to
37
- # `true` in RSpec 4.
38
- mocks.verify_partial_doubles = true
39
- end
40
-
41
- # The settings below are suggested to provide a good initial experience
42
- # with RSpec, but feel free to customize to your heart's content.
43
- =begin
44
- # These two settings work together to allow you to limit a spec run
45
- # to individual examples or groups you care about by tagging them with
46
- # `:focus` metadata. When nothing is tagged with `:focus`, all examples
47
- # get run.
48
- config.filter_run :focus
49
- config.run_all_when_everything_filtered = true
50
-
51
- # Limits the available syntax to the non-monkey patched syntax that is recommended.
52
- # For more details, see:
53
- # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
54
- # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
55
- # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
56
- config.disable_monkey_patching!
57
-
58
- # This setting enables warnings. It's recommended, but in some cases may
59
- # be too noisy due to issues in dependencies.
60
- config.warnings = true
61
-
62
- # Many RSpec users commonly either run the entire suite or an individual
63
- # file, and it's useful to allow more verbose output when running an
64
- # individual spec file.
65
- if config.files_to_run.one?
66
- # Use the documentation formatter for detailed output,
67
- # unless a formatter has already been configured
68
- # (e.g. via a command-line flag).
69
- config.default_formatter = 'doc'
70
- end
71
-
72
- # Print the 10 slowest examples and example groups at the
73
- # end of the spec run, to help surface which specs are running
74
- # particularly slow.
75
- config.profile_examples = 10
76
-
77
- # Run specs in random order to surface order dependencies. If you find an
78
- # order dependency and want to debug it, you can fix the order by providing
79
- # the seed, which is printed after each run.
80
- # --seed 1234
81
- config.order = :random
82
-
83
- # Seed global randomization in this process using the `--seed` CLI option.
84
- # Setting this allows you to use `--seed` to deterministically reproduce
85
- # test failures related to randomization by passing the same `--seed` value
86
- # as the one that triggered the failure.
87
- Kernel.srand config.seed
88
- =end
89
- end