jaro_winkler 1.3.6 → 1.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d48fca750c919028bdf13a2145bf431919a2c924
4
- data.tar.gz: 134da623e4f99a708cf83f7e2e961a8882ecffb8
3
+ metadata.gz: 7687918cbfcaa8ffb589f1d1383ac2e6611e235f
4
+ data.tar.gz: e8129522a42193023a77593dddfe3917eafe3f68
5
5
  SHA512:
6
- metadata.gz: 7476d0dcd726f7d6b2405c861b139f99b22c71e21e48607beab76c1a29a11b6e9398c33943e8bfcff042b18fdea2d66f858283df17bbfd4e351a3ca93ee8f05a
7
- data.tar.gz: 1c0a9c5b3521e761c752bc19a09a24052da3a46357195b9a86c86815418fc55bbe090b44db738bf086a44aabb1056f660a1584f1c8c7d01d124faf20623041b4
6
+ metadata.gz: be7befc2a7e5c5a2866ba7e4995bf99a0f9d75bfab3e01c806a1d39b10b0a27975f795f2330248e96e5673a8ce81d0ef01fa12a6d18f3f2cf73ff9de28764a60
7
+ data.tar.gz: 3183fb3c534e1c1820ee16cd6a81bfcd35899ffc586e5ac064f5e4deace965d5507b83ca1d4a1fe3d8d30cc894da72eb998f1119ec3223a61c50c072d7e2029d
@@ -27,10 +27,10 @@ double jaro_winkler_distance(char* short_str, int short_str_len, char* long_str,
27
27
  int window_size = long_codes_len/2 - 1;
28
28
  if(window_size < 0) window_size = 0;
29
29
 
30
- char short_codes_flag[MAX_WORD_LENGTH];
31
- char long_codes_flag[MAX_WORD_LENGTH];
32
- memset(short_codes_flag, 0, MAX_WORD_LENGTH);
33
- memset(long_codes_flag, 0, MAX_WORD_LENGTH);
30
+ char short_codes_flag[short_str_len];
31
+ char long_codes_flag[long_str_len];
32
+ memset(short_codes_flag, 0, short_str_len);
33
+ memset(long_codes_flag, 0, long_str_len);
34
34
 
35
35
  // count number of matching characters
36
36
  int match_count = 0;
@@ -2,7 +2,6 @@
2
2
  #define LIBJARO_JARO_H
3
3
 
4
4
  #define SWAP(x, y) do{ __typeof__(x) SWAP = x; x = y; y = SWAP; }while(0)
5
- #define MAX_WORD_LENGTH 64
6
5
  #define DEFAULT_WEIGHT 0.1
7
6
  #define DEFAULT_THRESHOLD 0.7
8
7
 
@@ -1,3 +1,3 @@
1
1
  module JaroWinkler
2
- VERSION = "1.3.6"
2
+ VERSION = "1.3.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.6
4
+ version: 1.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jian Weihang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-22 00:00:00.000000000 Z
11
+ date: 2015-09-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -111,24 +111,12 @@ dependencies:
111
111
  description: It's a implementation of Jaro-Winkler distance algorithm, it uses C extension
112
112
  and will fallback to pure Ruby version in JRuby. Both implementation supports UTF-8
113
113
  string.
114
- email:
115
- - tonytonyjan@gmail.com
114
+ email: tonytonyjan@gmail.com
116
115
  executables: []
117
116
  extensions:
118
117
  - ext/jaro_winkler/extconf.rb
119
118
  extra_rdoc_files: []
120
119
  files:
121
- - ".gitignore"
122
- - ".rspec"
123
- - ".travis.yml"
124
- - Gemfile
125
- - LICENSE.txt
126
- - README.md
127
- - Rakefile
128
- - benchmark/native.rb
129
- - benchmark/native.txt
130
- - benchmark/pure.rb
131
- - benchmark/pure.txt
132
120
  - ext/jaro_winkler/adj_matrix.c
133
121
  - ext/jaro_winkler/adj_matrix.h
134
122
  - ext/jaro_winkler/code.c
@@ -138,14 +126,10 @@ files:
138
126
  - ext/jaro_winkler/jaro.h
139
127
  - ext/jaro_winkler/jaro_winkler.c
140
128
  - ext/jaro_winkler/murmur_hash2.c
141
- - jaro_winkler.gemspec
142
129
  - lib/jaro_winkler.rb
143
130
  - lib/jaro_winkler/adjusting_table.rb
144
131
  - lib/jaro_winkler/fallback.rb
145
132
  - lib/jaro_winkler/version.rb
146
- - spec/adjusting_table_spec.rb
147
- - spec/jaro_winkler_spec.rb
148
- - spec/spec_helper.rb
149
133
  homepage: https://github.com/tonytonyjan/jaro_winkler
150
134
  licenses:
151
135
  - MIT
@@ -166,12 +150,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
150
  version: '0'
167
151
  requirements: []
168
152
  rubyforge_project:
169
- rubygems_version: 2.4.6
153
+ rubygems_version: 2.4.5.1
170
154
  signing_key:
171
155
  specification_version: 4
172
156
  summary: Ruby & C implementation of Jaro-Winkler distance algorithm which both support
173
157
  UTF-8 string.
174
- test_files:
175
- - spec/adjusting_table_spec.rb
176
- - spec/jaro_winkler_spec.rb
177
- - spec/spec_helper.rb
158
+ test_files: []
data/.gitignore DELETED
@@ -1,14 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /Gemfile.lock
4
- /_yardoc/
5
- /coverage/
6
- /doc/
7
- /pkg/
8
- /spec/reports/
9
- /tmp/
10
- *.bundle
11
- *.so
12
- *.o
13
- *.a
14
- mkmf.log
data/.rspec DELETED
@@ -1,2 +0,0 @@
1
- --color
2
- --require spec_helper
@@ -1,6 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.2.0
4
- - 2.1.5
5
- - 2.0.0
6
- - 1.9.3
data/Gemfile DELETED
@@ -1,2 +0,0 @@
1
- source 'https://rubygems.org'
2
- gemspec
@@ -1,22 +0,0 @@
1
- Copyright (c) 2014 Jian Weihang
2
-
3
- MIT License
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md DELETED
@@ -1,129 +0,0 @@
1
- [![Build Status](https://travis-ci.org/tonytonyjan/jaro_winkler.svg?branch=master)](https://travis-ci.org/tonytonyjan/jaro_winkler)
2
-
3
- It's an implementation of [Jaro-Winkler distance](http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) algorithm, it uses C extension and will fallback to pure Ruby version in JRuby. Both of them supports UTF-8 string.
4
-
5
- # Installation
6
-
7
- ```
8
- gem install jaro_winkler
9
- ```
10
-
11
- # Usage
12
-
13
- ```ruby
14
- require 'jaro_winkler'
15
- JaroWinkler.distance "MARTHA", "MARHTA"
16
- # => 0.9611
17
- JaroWinkler.distance "MARTHA", "marhta", ignore_case: true
18
- # => 0.9611
19
- JaroWinkler.distance "MARTHA", "MARHTA", weight: 0.2
20
- # => 0.9778
21
-
22
- # Force the strategy
23
- JaroWinkler.c_distance "MARTHA", "MARHTA" # C extension
24
- JaroWinkler.r_distance "MARTHA", "MARHTA" # Pure Ruby
25
- ```
26
-
27
- ## Options
28
-
29
- Name | Type | Default | Note
30
- ----------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------
31
- ignore_case | boolean | false | All lower case characters are converted to upper case prior to the comparison.
32
- weight | number | 0.1 | A constant scaling factor for how much the score is adjusted upwards for having common prefixes.
33
- threshold | number | 0.7 | The prefix bonus is only added when the compared strings have a Jaro distance above the threshold.
34
- adj_table | boolean | false | The option is used to give partial credit for characters that may be errors due to known phonetic or character recognition errors. A typical example is to match the letter "O" with the number "0".
35
-
36
- # Adjusting Table
37
-
38
- ## Default Table
39
-
40
- ```
41
- ['A', 'E'], ['A', 'I'], ['A', 'O'], ['A', 'U'], ['B', 'V'], ['E', 'I'], ['E', 'O'], ['E', 'U'], ['I', 'O'], ['I', 'U'],
42
- ['O', 'U'], ['I', 'Y'], ['E', 'Y'], ['C', 'G'], ['E', 'F'], ['W', 'U'], ['W', 'V'], ['X', 'K'], ['S', 'Z'], ['X', 'S'],
43
- ['Q', 'C'], ['U', 'V'], ['M', 'N'], ['L', 'I'], ['Q', 'O'], ['P', 'R'], ['I', 'J'], ['2', 'Z'], ['5', 'S'], ['8', 'B'],
44
- ['1', 'I'], ['1', 'L'], ['0', 'O'], ['0', 'Q'], ['C', 'K'], ['G', 'J'], ['E', ' '], ['Y', ' '], ['S', ' ']
45
- ```
46
-
47
- ## How it works?
48
-
49
- Original Formula:
50
-
51
- ![origin](https://chart.googleapis.com/chart?cht=tx&chs&chl=%5Cbegin%7Bcases%7D0%26%7B%5Ctext%7Bif%20%7Dm%3D0%7D%5C%5C%5Cfrac%7B1%7D%7B3%7D(%5Cfrac%7Bm%7D%7B%5Cleft%7Cs1%5Cright%7C%7D%2B%5Cfrac%7Bm%7D%7B%5Cleft%7Cs2%5Cright%7C%7D%2B%5Cfrac%7Bm-t%7D%7Bm%7D)%26%5Ctext%7Bothers%7D%5Cend%7Bcases%7D)
52
-
53
- where
54
-
55
- - `m` is the number of matching characters.
56
- - `t` is half the number of transpositions.
57
-
58
- With Adjusting Table:
59
-
60
- ![adj](https://chart.googleapis.com/chart?cht=tx&chs&chl=%5Cbegin%7Bcases%7D0%26%5Ctext%7Bif%20%7Dm%3D0%5C%5C%5Cfrac%7B1%7D%7B3%7D(%5Cfrac%7B%5Cfrac%7Bs%7D%7B10%7D%2Bm%7D%7B%5Cleft%7Cs1%5Cright%7C%7D%2B%5Cfrac%7B%5Cfrac%7Bs%7D%7B10%7D%2Bm%7D%7B%5Cleft%7Cs2%5Cright%7C%7D%2B%5Cfrac%7Bm-t%7D%7Bm%7D)%26%5Ctext%7Bothers%7D%5Cend%7Bcases%7D)
61
-
62
- where
63
-
64
- - `s` is the number of nonmatching but similar characters.
65
-
66
- # Why This?
67
-
68
- There is also another similar gem named [fuzzy-string-match](https://github.com/kiyoka/fuzzy-string-match) which both provides C and Ruby version as well.
69
-
70
- I reinvent this wheel because of the naming in `fuzzy-string-match` such as `getDistance` breaks convention, and some weird code like `a1 = s1.split( // )` (`s1.chars` could be better), furthermore, it's bugged (see tables below).
71
-
72
- # Compare with other gems
73
-
74
- | jaro_winkler | fuzzystringmatch | hotwater | amatch
75
- --------------- | ------------ | ---------------- | -------- | ------
76
- UTF-8 Suport | **Yes** | Pure Ruby only | No | No
77
- Windows Support | **Yes** | | No | **Yes**
78
- Adjusting Table | **Yes** | No | No | No
79
- Native | **Yes** | **Yes** | **Yes** | **Yes**
80
- Pure Ruby | **Yes** | **Yes** | No | No
81
- Speed | Medium | **Fast** | Medium | Slow
82
- Bug Found | **Not Yet** | Yes | **Not Yet** | Yes
83
-
84
- For `Bug Found`, I made a rake task to build the table below, the source code is in `Rakefile`:
85
-
86
- str_1 | str_2 | origin | jaro_winkler | fuzzystringmatch | hotwater | amatch
87
- --- | --- | --- | --- | --- | --- | ---
88
- "henka" | "henkan" | 0.9667 | 0.9667 | **0.9722** | 0.9667 | **0.9444**
89
- "al" | "al" | 1.0 | 1.0 | 1.0 | 1.0 | 1.0
90
- "martha" | "marhta" | 0.9611 | 0.9611 | 0.9611 | 0.9611 | **0.9444**
91
- "jones" | "johnson" | 0.8324 | 0.8324 | 0.8324 | 0.8324 | **0.7905**
92
- "abcvwxyz" | "cabvwxyz" | 0.9583 | 0.9583 | 0.9583 | 0.9583 | 0.9583
93
- "dwayne" | "duane" | 0.84 | 0.84 | 0.84 | 0.84 | **0.8222**
94
- "dixon" | "dicksonx" | 0.8133 | 0.8133 | 0.8133 | 0.8133 | **0.7667**
95
- "fvie" | "ten" | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
96
-
97
- - The origin result is from the [original C implementation by the author of the algorithm](http://web.archive.org/web/20100227020019/http://www.census.gov/geo/msb/stand/strcmp.c).
98
- - Test data are borrowed from [fuzzy-string-match's rspec file](https://github.com/kiyoka/fuzzy-string-match/blob/master/test/basic_pure_spec.rb).
99
-
100
- # Benchmark
101
-
102
- ## Pure Ruby
103
-
104
- | user | system | total | real
105
- ---------------- | -------- | -------- | -------- | ------------
106
- jaro_winkler | 1.300000 | 0.000000 | 1.300000 | ( 1.299802)
107
- fuzzystringmatch | 1.510000 | 0.000000 | 1.510000 | ( 1.510136)
108
-
109
- - jaro_winkler (1.3.1)
110
- - fuzzy-string-match (0.9.6)
111
-
112
- ## Native
113
-
114
- | user | system | total | real
115
- ---------------- | -------- | -------- | -------- | ------------
116
- jaro_winkler | 0.350000 | 0.010000 | 0.360000 | ( 0.345293)
117
- fuzzystringmatch | 0.140000 | 0.000000 | 0.140000 | ( 0.138711)
118
- hotwater | 0.310000 | 0.000000 | 0.310000 | ( 0.306498)
119
- amatch | 0.960000 | 0.000000 | 0.960000 | ( 0.961509)
120
-
121
- - jaro_winkler (1.3.1)
122
- - fuzzy-string-match (0.9.6)
123
- - hotwater (0.1.2)
124
- - amatch (0.3.0)
125
-
126
- # Todo
127
-
128
- - Custom adjusting word table.
129
- - The algorithm between C and Ruby are different.
data/Rakefile DELETED
@@ -1,51 +0,0 @@
1
- require "bundler/gem_tasks"
2
- require "rake/extensiontask"
3
- require 'rspec/core/rake_task'
4
-
5
- RSpec::Core::RakeTask.new(:spec)
6
- Rake::ExtensionTask.new("jaro_winkler") do |ext|
7
- ext.lib_dir = "lib/jaro_winkler"
8
- end
9
-
10
- task default: [:compile, :spec]
11
-
12
- desc 'type can be "native" or "pure"'
13
- task :benchmark, :type do |t, args|
14
- args.with_defaults(type: :all)
15
- ROOT_PATH = File.expand_path('..', __FILE__)
16
- LIB_PATH = File.join(ROOT_PATH, 'lib')
17
- BENCHMARK_PATH = File.join(ROOT_PATH, 'benchmark')
18
-
19
- files = File.join(BENCHMARK_PATH, args[:type] == :all ? '*.rb' : "#{args[:type]}.rb")
20
- Dir[files].each do |path|
21
- output_path = File.join(BENCHMARK_PATH, File.basename(path, '*.rb').sub('.rb', '.txt'))
22
- cmd = "RUBYLIB=#{LIB_PATH} ruby #{path}"
23
- puts cmd
24
- output = `#{cmd}`
25
- File.write(output_path, output)
26
- end
27
- end
28
-
29
- task :compare do
30
- require 'jaro_winkler'
31
- require 'fuzzystringmatch'
32
- require 'hotwater'
33
- require 'amatch'
34
- @ary = [['henka', 'henkan'], ['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten'], ['San Francisco', 'Santa Monica']]
35
- table = []
36
- table << %w[str_1 str_2 jaro_winkler fuzzystringmatch hotwater amatch]
37
- table << %w[--- --- --- --- --- ---]
38
- jarow = FuzzyStringMatch::JaroWinkler.create(:native)
39
- @ary.each do |str_1, str_2|
40
- table << ["\"#{str_1}\"", "\"#{str_2}\"", JaroWinkler.distance(str_1, str_2).round(4), jarow.getDistance(str_1, str_2).round(4), Hotwater.jaro_winkler_distance(str_1, str_2).round(4), Amatch::Jaro.new(str_1).match(str_2).round(4)]
41
- end
42
- col_len = []
43
- table.first.length.times{ |i| col_len << table.map{ |row| row[i].to_s.length }.max }
44
- table.first.each_with_index{ |title, i| "%-#{col_len[i]}s" % title }
45
- table.each_with_index do |row|
46
- row.each_with_index do |col, i|
47
- row[i] = "%-#{col_len[i]}s" % col.to_s
48
- end
49
- end
50
- table.each{|row| puts row.join(' | ')}
51
- end
@@ -1,26 +0,0 @@
1
- require 'benchmark'
2
- require 'jaro_winkler'
3
- require 'fuzzystringmatch'
4
- require 'hotwater'
5
- require 'amatch'
6
- ary = [['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten']]
7
-
8
- n = 100000
9
- Benchmark.bmbm do |x|
10
- x.report 'jaro_winkler' do
11
- n.times{ ary.each{ |str1, str2| JaroWinkler.c_distance(str1, str2) } }
12
- end
13
-
14
- x.report 'fuzzystringmatch' do
15
- jarow = FuzzyStringMatch::JaroWinkler.create(:native)
16
- n.times{ ary.each{ |str1, str2| jarow.getDistance(str1, str2) } }
17
- end
18
-
19
- x.report 'hotwater' do
20
- n.times{ ary.each{ |str1, str2| Hotwater.jaro_winkler_distance(str1, str2) } }
21
- end
22
-
23
- x.report 'amatch' do
24
- n.times{ ary.each{ |str1, str2| Amatch::Jaro.new(str1).match(str2) } }
25
- end
26
- end
@@ -1,12 +0,0 @@
1
- Rehearsal ----------------------------------------------------
2
- jaro_winkler 0.350000 0.000000 0.350000 ( 0.348383)
3
- fuzzystringmatch 0.330000 0.020000 0.350000 ( 0.354850)
4
- hotwater 0.280000 0.000000 0.280000 ( 0.278819)
5
- amatch 0.980000 0.000000 0.980000 ( 0.983325)
6
- ------------------------------------------- total: 1.960000sec
7
-
8
- user system total real
9
- jaro_winkler 0.330000 0.000000 0.330000 ( 0.331923)
10
- fuzzystringmatch 0.140000 0.000000 0.140000 ( 0.135655)
11
- hotwater 0.280000 0.000000 0.280000 ( 0.276728)
12
- amatch 0.930000 0.010000 0.940000 ( 0.932943)
@@ -1,16 +0,0 @@
1
- require 'benchmark'
2
- require 'jaro_winkler'
3
- require 'fuzzystringmatch'
4
- ary = [['al', 'al'], ['martha', 'marhta'], ['jones', 'johnson'], ['abcvwxyz', 'cabvwxyz'], ['dwayne', 'duane'], ['dixon', 'dicksonx'], ['fvie', 'ten']]
5
-
6
- n = 10000
7
- Benchmark.bmbm do |x|
8
- x.report 'jaro_winkler' do
9
- n.times{ ary.each{ |str1, str2| JaroWinkler.r_distance(str1, str2) } }
10
- end
11
-
12
- x.report 'fuzzystringmatch' do
13
- jarow = FuzzyStringMatch::JaroWinkler.create(:pure)
14
- n.times{ ary.each{ |str1, str2| jarow.getDistance(str1, str2) } }
15
- end
16
- end
@@ -1,8 +0,0 @@
1
- Rehearsal ----------------------------------------------------
2
- jaro_winkler 1.300000 0.000000 1.300000 ( 1.300723)
3
- fuzzystringmatch 1.500000 0.010000 1.510000 ( 1.497842)
4
- ------------------------------------------- total: 2.810000sec
5
-
6
- user system total real
7
- jaro_winkler 1.300000 0.000000 1.300000 ( 1.299802)
8
- fuzzystringmatch 1.510000 0.000000 1.510000 ( 1.510136)
@@ -1,30 +0,0 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'jaro_winkler/fallback'
5
- require 'jaro_winkler/version'
6
-
7
- Gem::Specification.new do |spec|
8
- spec.name = "jaro_winkler"
9
- spec.version = JaroWinkler::VERSION
10
- spec.authors = ["Jian Weihang"]
11
- spec.email = ["tonytonyjan@gmail.com"]
12
- spec.extensions = ["ext/jaro_winkler/extconf.rb"]
13
- spec.summary = %q{Ruby & C implementation of Jaro-Winkler distance algorithm which both support UTF-8 string.}
14
- spec.description = %q{It's a implementation of Jaro-Winkler distance algorithm, it uses C extension and will fallback to pure Ruby version in JRuby. Both implementation supports UTF-8 string.}
15
- spec.homepage = "https://github.com/tonytonyjan/jaro_winkler"
16
- spec.license = "MIT"
17
-
18
- spec.files = `git ls-files -z`.split("\x0")
19
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
- spec.require_paths = ["lib"]
22
-
23
- spec.add_development_dependency "bundler", "~> 1.7"
24
- spec.add_development_dependency "rake", "~> 10.0"
25
- spec.add_development_dependency "rake-compiler"
26
- spec.add_development_dependency "rspec"
27
- spec.add_development_dependency "fuzzy-string-match"
28
- spec.add_development_dependency "hotwater"
29
- spec.add_development_dependency "amatch"
30
- end
@@ -1,8 +0,0 @@
1
- # spec/adjusting_table_spec.rb
2
- require 'jaro_winkler'
3
-
4
- describe JaroWinkler::DEFAULT_ADJ_TABLE do
5
- it 'should not be empty' do
6
- expect(JaroWinkler::DEFAULT_ADJ_TABLE).not_to be_empty
7
- end
8
- end
@@ -1,69 +0,0 @@
1
- # encoding: utf-8
2
- require 'jaro_winkler'
3
- include JaroWinkler
4
-
5
- shared_examples 'common' do |strategy|
6
- it 'works' do
7
- expect(send(strategy, 'henka','henkan')).to be_within(0.0001).of(0.9667)
8
- expect(send(strategy, 'al','al')).to be_within(0.0001).of(1.0)
9
- expect(send(strategy, 'martha','marhta')).to be_within(0.0001).of(0.9611)
10
- expect(send(strategy, 'jones','johnson')).to be_within(0.0001).of(0.8323)
11
- expect(send(strategy, 'abcvwxyz','cabvwxyz')).to be_within(0.0001).of(0.9583)
12
- expect(send(strategy, 'dwayne','duane')).to be_within(0.0001).of(0.8400)
13
- expect(send(strategy, 'dixon','dicksonx')).to be_within(0.0001).of(0.8133)
14
- expect(send(strategy, 'fvie','ten')).to be_within(0.0001).of(0.0)
15
- expect(send(strategy, 'tony','tony')).to be_within(0.0001).of(1.0)
16
- expect(send(strategy, 'tonytonyjan','tonytonyjan')).to be_within(0.0001).of(1.0)
17
- expect(send(strategy, 'x','x')).to be_within(0.0001).of(1.0)
18
- expect(send(strategy, '','')).to be_within(0.0001).of(0.0)
19
- expect(send(strategy, 'tony','')).to be_within(0.0001).of(0.0)
20
- expect(send(strategy, '','tony')).to be_within(0.0001).of(0.0)
21
- expect(send(strategy, 'tonytonyjan','tony')).to be_within(0.0001).of(0.8727)
22
- expect(send(strategy, 'tony','tonytonyjan')).to be_within(0.0001).of(0.8727)
23
- end
24
-
25
- it 'works with UTF-8' do
26
- expect(send(strategy, '變形金剛4:絕跡重生','變形金剛4: 絕跡重生')).to be_within(0.0001).of(0.9818)
27
- expect(send(strategy, '連勝文','連勝丼')).to be_within(0.0001).of(0.8222)
28
- expect(send(strategy, '馬英九','馬英丸')).to be_within(0.0001).of(0.8222)
29
- expect(send(strategy, '良い','いい')).to be_within(0.0001).of(0.6666)
30
- end
31
-
32
- it 'sets ignore_case' do
33
- expect(send(strategy, 'MARTHA', 'marhta', ignore_case: true)).to be_within(0.0001).of(0.9611)
34
- end
35
-
36
- it 'sets weight' do
37
- expect(send(strategy, 'MARTHA', 'MARHTA', weight: 0.2)).to be_within(0.0001).of(0.9778)
38
- end
39
-
40
- it 'sets threshold' do
41
- expect(send(strategy, 'MARTHA', 'MARHTA', threshold: 0.99)).to be_within(0.0001).of(0.9445)
42
- end
43
-
44
-
45
- it 'works with adjusting table' do
46
- expect(send(strategy, 'HENKA', 'HENKAN', adj_table: true)).to be_within(0.0001).of(0.9667) # m=5, t=0, s=0
47
- expect(send(strategy, 'AL', 'AL', adj_table: true)).to be_within(0.0001).of(1.0) # m=2, t=0, s=0
48
- expect(send(strategy, 'MARTHA', 'MARHTA', adj_table: true)).to be_within(0.0001).of(0.9611) # m=6, t=1, s=0
49
- expect(send(strategy, 'JONES', 'JOHNSON', adj_table: true)).to be_within(0.0001).of(0.8598) # m=4, t=0, s=3
50
- expect(send(strategy, 'ABCVWXYZ', 'CABVWXYZ', adj_table: true)).to be_within(0.0001).of(0.9583) # m=8, t=1, s=0
51
- expect(send(strategy, 'DWAYNE', 'DUANE', adj_table: true)).to be_within(0.0001).of(0.8730) # m=4, t=0, s=3
52
- expect(send(strategy, 'DIXON', 'DICKSONX', adj_table: true)).to be_within(0.0001).of(0.8393) # m=4, t=0, s=3
53
- expect(send(strategy, 'FVIE', 'TEN', adj_table: true)).to be_within(0.0001).of(0.0)
54
- end
55
-
56
- context 'with weight exceeding 0.25' do
57
- it 'throws exception' do
58
- expect{ send(strategy, 'MARTHA', 'MARHTA', weight: 0.26) }.to raise_error
59
- end
60
- end
61
- end
62
-
63
- describe 'Pure Ruby' do
64
- include_examples 'common', :r_distance
65
- end
66
-
67
- describe 'C extention' do
68
- include_examples 'common', :c_distance
69
- end
@@ -1,89 +0,0 @@
1
- # This file was generated by the `rspec --init` command. Conventionally, all
2
- # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
- # The generated `.rspec` file contains `--require spec_helper` which will cause this
4
- # file to always be loaded, without a need to explicitly require it in any files.
5
- #
6
- # Given that it is always loaded, you are encouraged to keep this file as
7
- # light-weight as possible. Requiring heavyweight dependencies from this file
8
- # will add to the boot time of your test suite on EVERY test run, even for an
9
- # individual file that may not need all of that loaded. Instead, consider making
10
- # a separate helper file that requires the additional dependencies and performs
11
- # the additional setup, and require it from the spec files that actually need it.
12
- #
13
- # The `.rspec` file also contains a few flags that are not defaults but that
14
- # users commonly want.
15
- #
16
- # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
17
- RSpec.configure do |config|
18
- # rspec-expectations config goes here. You can use an alternate
19
- # assertion/expectation library such as wrong or the stdlib/minitest
20
- # assertions if you prefer.
21
- config.expect_with :rspec do |expectations|
22
- # This option will default to `true` in RSpec 4. It makes the `description`
23
- # and `failure_message` of custom matchers include text for helper methods
24
- # defined using `chain`, e.g.:
25
- # be_bigger_than(2).and_smaller_than(4).description
26
- # # => "be bigger than 2 and smaller than 4"
27
- # ...rather than:
28
- # # => "be bigger than 2"
29
- expectations.include_chain_clauses_in_custom_matcher_descriptions = true
30
- end
31
-
32
- # rspec-mocks config goes here. You can use an alternate test double
33
- # library (such as bogus or mocha) by changing the `mock_with` option here.
34
- config.mock_with :rspec do |mocks|
35
- # Prevents you from mocking or stubbing a method that does not exist on
36
- # a real object. This is generally recommended, and will default to
37
- # `true` in RSpec 4.
38
- mocks.verify_partial_doubles = true
39
- end
40
-
41
- # The settings below are suggested to provide a good initial experience
42
- # with RSpec, but feel free to customize to your heart's content.
43
- =begin
44
- # These two settings work together to allow you to limit a spec run
45
- # to individual examples or groups you care about by tagging them with
46
- # `:focus` metadata. When nothing is tagged with `:focus`, all examples
47
- # get run.
48
- config.filter_run :focus
49
- config.run_all_when_everything_filtered = true
50
-
51
- # Limits the available syntax to the non-monkey patched syntax that is recommended.
52
- # For more details, see:
53
- # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
54
- # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
55
- # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
56
- config.disable_monkey_patching!
57
-
58
- # This setting enables warnings. It's recommended, but in some cases may
59
- # be too noisy due to issues in dependencies.
60
- config.warnings = true
61
-
62
- # Many RSpec users commonly either run the entire suite or an individual
63
- # file, and it's useful to allow more verbose output when running an
64
- # individual spec file.
65
- if config.files_to_run.one?
66
- # Use the documentation formatter for detailed output,
67
- # unless a formatter has already been configured
68
- # (e.g. via a command-line flag).
69
- config.default_formatter = 'doc'
70
- end
71
-
72
- # Print the 10 slowest examples and example groups at the
73
- # end of the spec run, to help surface which specs are running
74
- # particularly slow.
75
- config.profile_examples = 10
76
-
77
- # Run specs in random order to surface order dependencies. If you find an
78
- # order dependency and want to debug it, you can fix the order by providing
79
- # the seed, which is printed after each run.
80
- # --seed 1234
81
- config.order = :random
82
-
83
- # Seed global randomization in this process using the `--seed` CLI option.
84
- # Setting this allows you to use `--seed` to deterministically reproduce
85
- # test failures related to randomization by passing the same `--seed` value
86
- # as the one that triggered the failure.
87
- Kernel.srand config.seed
88
- =end
89
- end