redis-asm 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +114 -0
- data/Rakefile +7 -0
- data/lib/redis-asm.rb +2 -0
- data/lib/redis/asm.rb +26 -0
- data/lib/redis/asm/version.rb +5 -0
- data/lib/redis_asm.lua +239 -0
- data/redis-asm.gemspec +26 -0
- data/spec/redis/asm/asm_spec.rb +129 -0
- data/spec/redis/asm/test_data.txt +131 -0
- data/spec/spec_helper.rb +3 -0
- metadata +135 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0614a530aee374d91c9b3fdbea83c6ac4020c588
|
4
|
+
data.tar.gz: c8e1a86c8f7dcf5b4a6ef455f47d50ada66af54b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 70a2cc486e140531cc11310965f0313870af99e512cc2f0d825d401202a1d95992a931eb86440016070b6eed76ba9ad238d195c812946a8679c72e2ba0eaebeb
|
7
|
+
data.tar.gz: 6467734e8eb1d868b35b80826afaa88bd46971f98451f135dba7faf7ed7588bb66a44cf0901b1618c9b3dca5538a7142cd17af73482964fd24185473abdbbaba
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Masato Yamaguchi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# Redis::Asm
|
2
|
+
|
3
|
+
##### Fast fuzzy string search on Redis using Lua. UTF-8 Ready.
|
4
|
+
|
5
|
+
## Description
|
6
|
+
Fast ASM(Approximate String Matching) by calculating edit distance within the collections such as ZSET, HASH, LIST, SET on Redis using Lua script.
|
7
|
+
Redis::Asm provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings.
|
8
|
+
|
9
|
+
## Prerequisites
|
10
|
+
This library requires a Redis server with Lua scripting support (EVAL and EVALSHA commands). This support was added in Redis 2.6.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'redis-asm'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install redis-asm
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
To initialize `Redis::Asm` with host and port:
|
31
|
+
```ruby
|
32
|
+
redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT)
|
33
|
+
asm = Redis::Asm.new(redis)
|
34
|
+
```
|
35
|
+
To execute fuzzy search from Redis collections:
|
36
|
+
```ruby
|
37
|
+
require 'json'
|
38
|
+
|
39
|
+
# asm.search(KEY, NEELDE, MAX_RESULTS=10)
|
40
|
+
|
41
|
+
# To search from SET or LIST
|
42
|
+
|
43
|
+
result = asm.search(SET_OR_LIST_KEY, 'example')
|
44
|
+
puts JSON.parse(result).to_yaml
|
45
|
+
# ---
|
46
|
+
# - haystack: example
|
47
|
+
# match: 1
|
48
|
+
# - haystack: samples
|
49
|
+
# match: 0.5
|
50
|
+
# - haystack: abampere
|
51
|
+
# match: 0.42857142857143
|
52
|
+
.
|
53
|
+
.
|
54
|
+
|
55
|
+
# To search from HASH
|
56
|
+
|
57
|
+
# Redis::Asm matches HASH values
|
58
|
+
# each item has 'field' property
|
59
|
+
|
60
|
+
result = asm.search(HASH_KEY, '東京都')
|
61
|
+
puts JSON.parse(result).to_yaml
|
62
|
+
# ---
|
63
|
+
# - haystack: "東京都"
|
64
|
+
# field: '126'
|
65
|
+
# match: 1
|
66
|
+
# - haystack: "京都府"
|
67
|
+
# field: '125'
|
68
|
+
# match: 0.33333333333333
|
69
|
+
|
70
|
+
# To search from ZSET
|
71
|
+
# each item has 'score' property
|
72
|
+
|
73
|
+
result = asm.search(ZSET_KEY, '東京都')
|
74
|
+
puts JSON.parse(result).to_yaml
|
75
|
+
# ---
|
76
|
+
# - haystack: "東京都"
|
77
|
+
# score: '126'
|
78
|
+
# match: 1
|
79
|
+
# - haystack: "京都府"
|
80
|
+
# score: '125'
|
81
|
+
# match: 0.33333333333333
|
82
|
+
```
|
83
|
+
## Performance
|
84
|
+
|
85
|
+
- PC: MBP 2.6 GHz Intel Core i5 16GM DD3 RAM
|
86
|
+
- OS: Mac OSX 10.9.5
|
87
|
+
- ruby 2.1.5p273 [x86_64-darwin13.0]
|
88
|
+
- Redis server v=2.6.17 bits=64
|
89
|
+
|
90
|
+
```bash
|
91
|
+
# search from 10,000 items of SETS
|
92
|
+
# each item contains UTF-8 characters, and consists of between 1 and 30 chars.
|
93
|
+
% ruby search_bench.rb stone
|
94
|
+
user system total real
|
95
|
+
0.000000 0.000000 0.000000 ( 0.038567)
|
96
|
+
% ruby search_bench.rb 東京都
|
97
|
+
user system total real
|
98
|
+
0.000000 0.000000 0.000000 ( 0.022540)
|
99
|
+
|
100
|
+
% ruby search_bench.rb 弊社といたしましては
|
101
|
+
user system total real
|
102
|
+
0.000000 0.000000 0.000000 ( 0.063109)
|
103
|
+
|
104
|
+
```
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
## Contributing
|
109
|
+
|
110
|
+
1. Fork it ( https://github.com/krt/redis-asm/fork )
|
111
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
112
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
113
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
114
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/redis-asm.rb
ADDED
data/lib/redis/asm.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'redis'
|
2
|
+
require "redis/asm/version"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
class Redis
|
6
|
+
class Asm
|
7
|
+
|
8
|
+
SCRIPT_DIR = File.expand_path('../../', __FILE__)
|
9
|
+
SCRIPT = File.read File.join(SCRIPT_DIR, "redis_asm.lua")
|
10
|
+
SHA1 = Digest::SHA1.hexdigest SCRIPT
|
11
|
+
|
12
|
+
def initialize(redis)
|
13
|
+
@redis = redis
|
14
|
+
end
|
15
|
+
|
16
|
+
def search(key, needle, max_results=10)
|
17
|
+
@redis.evalsha(SHA1, :keys => [key], :argv => [needle, max_results])
|
18
|
+
rescue Exception => e
|
19
|
+
if e.message =~ /NOSCRIPT/
|
20
|
+
@redis.eval script, :keys => [key], :argv => [needle, max_results]
|
21
|
+
else
|
22
|
+
raise e
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/redis_asm.lua
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
--[[
|
2
|
+
|
3
|
+
redis_asm.lua
|
4
|
+
approximate string matching for redis
|
5
|
+
|
6
|
+
Copyright (c) 2015 Masato Yamaguchi
|
7
|
+
|
8
|
+
This software is released under the MIT License.
|
9
|
+
|
10
|
+
http://opensource.org/licenses/mit-license.php
|
11
|
+
|
12
|
+
|
13
|
+
USAGE:
|
14
|
+
> eval "(content of this script)" 1 KEY NEEDLE MAX_RESULTS
|
15
|
+
|
16
|
+
@param {string} KEY Name of key. Accepts ZSET, SET, HASH and LIST.
|
17
|
+
@param {string} NEEDLE Search word.
|
18
|
+
@param {boolean} MAX_RESULTS Max size of results, defaults 10.
|
19
|
+
@return {string} Result as json string.
|
20
|
+
]]
|
21
|
+
|
22
|
+
local i
|
23
|
+
local haystacks = {}
|
24
|
+
local opt_data = {} -- score for ZSET, or field for HASH.
|
25
|
+
|
26
|
+
local key_type = redis.call('TYPE', KEYS[1])["ok"]
|
27
|
+
|
28
|
+
if not key_type then return nil end
|
29
|
+
if key_type == 'zset' then
|
30
|
+
local zset = redis.call('ZRANGE', KEYS[1], 0, -1, 'WITHSCORES')
|
31
|
+
local is_value = true
|
32
|
+
for i = 1, #zset do
|
33
|
+
if is_value then haystacks[#haystacks + 1] = zset[i] end
|
34
|
+
if not is_value then opt_data[#opt_data + 1] = zset[i] end
|
35
|
+
is_value = not is_value
|
36
|
+
end
|
37
|
+
elseif key_type == 'list' then
|
38
|
+
haystacks = redis.call('LRANGE', KEYS[1], 0, -1)
|
39
|
+
elseif key_type == 'set' then
|
40
|
+
haystacks = redis.call('SMEMBERS', KEYS[1])
|
41
|
+
elseif key_type == 'hash' then
|
42
|
+
local hash = redis.call('HGETALL', KEYS[1])
|
43
|
+
local is_field = true
|
44
|
+
for i = 1, #hash do
|
45
|
+
if is_field then opt_data[#opt_data + 1] = hash[i] end
|
46
|
+
if not is_field then haystacks[#haystacks + 1] = hash[i] end
|
47
|
+
is_field = not is_field
|
48
|
+
end
|
49
|
+
else
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
|
53
|
+
local needle = ARGV[1]
|
54
|
+
if not needle then return nil end
|
55
|
+
|
56
|
+
local max_results = tonumber(ARGV[2]) or 10
|
57
|
+
|
58
|
+
local cjson = cjson
|
59
|
+
local s_byte = string.byte
|
60
|
+
local s_sub = string.sub
|
61
|
+
local s_find = string.find
|
62
|
+
local m_min = math.min
|
63
|
+
local m_max = math.max
|
64
|
+
local m_floor = math.floor
|
65
|
+
local m_ceil = math.ceil
|
66
|
+
local t_sort = table.sort
|
67
|
+
|
68
|
+
|
69
|
+
-- mapping utf-8 leading-byte to byte offset
|
70
|
+
local byte_offsets = {
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
78
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
79
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
80
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
81
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
82
|
+
1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
|
83
|
+
3, 3, 3, 3, 3, 3, 3}
|
84
|
+
|
85
|
+
--[[
|
86
|
+
* Split utf-8 string into multi-byte chunks according to its leading-byte.
|
87
|
+
* @param {string}
|
88
|
+
* @return {Array.<string>} Array of multi-byte strings.
|
89
|
+
--]]
|
90
|
+
local function split_into_utf8_bytes(str)
|
91
|
+
local codes = {}
|
92
|
+
local i
|
93
|
+
local offset = 0
|
94
|
+
|
95
|
+
local mb_str, byte, offset_pos
|
96
|
+
|
97
|
+
for i = 1, #str do
|
98
|
+
offset_pos = i + offset
|
99
|
+
if offset_pos >= #str then
|
100
|
+
break
|
101
|
+
end
|
102
|
+
|
103
|
+
byte = byte_offsets[s_byte(str, offset_pos, offset_pos)] or 0
|
104
|
+
|
105
|
+
mb_str = s_sub(str, offset_pos, offset_pos + byte)
|
106
|
+
codes[#codes + 1] = mb_str
|
107
|
+
offset = offset + byte
|
108
|
+
end
|
109
|
+
return codes
|
110
|
+
end
|
111
|
+
|
112
|
+
--[[
|
113
|
+
* Check if haystack includes any character in needle.
|
114
|
+
* @param {string}
|
115
|
+
* @param {Array.<string>}
|
116
|
+
* @return {boolean} true if haystack includes utf_needle
|
117
|
+
--]]
|
118
|
+
local function haystack_includes_needle_char(haystack, utf_needle)
|
119
|
+
for i = 1, #utf_needle do
|
120
|
+
if s_find(haystack, utf_needle[i]) then return true end
|
121
|
+
end
|
122
|
+
return false
|
123
|
+
end
|
124
|
+
|
125
|
+
local cache = {}
|
126
|
+
|
127
|
+
--[[
|
128
|
+
* Calculate match score using levenshtein distance.
|
129
|
+
* @param {Array.<string>} haystack
|
130
|
+
* @param {Array.<string>} needle
|
131
|
+
* @param {boolean} if true, stop calculating
|
132
|
+
when the result might be lower than lowest_score
|
133
|
+
* @param {number|nil} lowest_score
|
134
|
+
* @return {number|nil} match score(0..1)
|
135
|
+
--]]
|
136
|
+
local function levenshtein_score(str, needle, should_cutoff, lowest_score)
|
137
|
+
local length, length_needle, code, result, should_break
|
138
|
+
local distance, distance_needle, index, index_needle, cutoff_distance
|
139
|
+
local longer_length = m_max(#str, #needle)
|
140
|
+
|
141
|
+
if should_cutoff and lowest_score then
|
142
|
+
cutoff_distance = m_ceil((1 - lowest_score) * longer_length) + 1
|
143
|
+
end
|
144
|
+
|
145
|
+
length = #str
|
146
|
+
length_needle = #needle
|
147
|
+
for index = 1, length do
|
148
|
+
cache[index] = index + 1
|
149
|
+
end
|
150
|
+
|
151
|
+
for index_needle = 1, length_needle do
|
152
|
+
code = needle[index_needle]
|
153
|
+
result = index_needle - 1
|
154
|
+
distance = index_needle - 1
|
155
|
+
|
156
|
+
for index = 1, length do
|
157
|
+
distance_needle = (code == str[index]) and distance or distance + 1
|
158
|
+
distance = cache[index]
|
159
|
+
result = (distance > result) and
|
160
|
+
((distance_needle > result) and result + 1 or distance_needle)
|
161
|
+
or
|
162
|
+
((distance_needle > distance) and distance + 1 or distance_needle)
|
163
|
+
cache[index] = result
|
164
|
+
|
165
|
+
if cutoff_distance and result > cutoff_distance then
|
166
|
+
return nil
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
return 1 - (result / longer_length)
|
171
|
+
end
|
172
|
+
|
173
|
+
local scores = {}
|
174
|
+
local utf_needle = split_into_utf8_bytes(needle)
|
175
|
+
local lowest_score, utf_word, longer_length, score
|
176
|
+
local should_cutoff = false
|
177
|
+
|
178
|
+
-- main loop.
|
179
|
+
for i = 1, #haystacks do
|
180
|
+
if haystack_includes_needle_char(haystacks[i], utf_needle) then
|
181
|
+
utf_word = split_into_utf8_bytes(haystacks[i])
|
182
|
+
|
183
|
+
if #utf_word >= #utf_needle then
|
184
|
+
longer_length = #utf_word
|
185
|
+
|
186
|
+
if s_find(haystacks[i], needle) then
|
187
|
+
score = #utf_needle * (1 / longer_length)
|
188
|
+
else
|
189
|
+
score = levenshtein_score(utf_word, utf_needle, should_cutoff, lowest_score)
|
190
|
+
end
|
191
|
+
|
192
|
+
if score and not(score == 0) then
|
193
|
+
if #scores > max_results then
|
194
|
+
should_cutoff = true
|
195
|
+
t_sort(
|
196
|
+
scores,
|
197
|
+
function(a,b)
|
198
|
+
return a.score > b.score
|
199
|
+
end
|
200
|
+
)
|
201
|
+
lowest_score = scores[max_results].score
|
202
|
+
if score > lowest_score then
|
203
|
+
scores[#scores + 1] = {score = score, idx = i}
|
204
|
+
end
|
205
|
+
else
|
206
|
+
scores[#scores + 1] = {score = score, idx = i}
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
t_sort(
|
215
|
+
scores,
|
216
|
+
function(a,b)
|
217
|
+
return a.score > b.score
|
218
|
+
end
|
219
|
+
)
|
220
|
+
|
221
|
+
local result = {}
|
222
|
+
local output_length = m_min(#scores, max_results)
|
223
|
+
|
224
|
+
for i = 1, output_length do
|
225
|
+
local item = {}
|
226
|
+
item['match'] = scores[i].score
|
227
|
+
item['haystack'] = haystacks[scores[i].idx]
|
228
|
+
if key_type == 'zset' then
|
229
|
+
item['score'] = opt_data[scores[i].idx]
|
230
|
+
elseif key_type == 'hash' then
|
231
|
+
item['field'] = opt_data[scores[i].idx]
|
232
|
+
end
|
233
|
+
result[#result + 1] = item
|
234
|
+
end
|
235
|
+
|
236
|
+
local text = cjson.encode(result)
|
237
|
+
|
238
|
+
return(text)
|
239
|
+
|
data/redis-asm.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'redis/asm/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "redis-asm"
|
8
|
+
spec.version = Redis::Asm::VERSION
|
9
|
+
spec.authors = ["Masato Yamaguchi"]
|
10
|
+
spec.email = ["karateka2000@gmail.com"]
|
11
|
+
spec.summary = "Fast fuzzy string search on Redis using Lua. UTF-8 Ready."
|
12
|
+
spec.description = "Fast ASM(Approximate String Matching) by calucuating edit distance within the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings."
|
13
|
+
spec.homepage = "http://github.com/krt/redis-asm"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_dependency 'redis', '~> 3.0'
|
25
|
+
spec.add_dependency 'digest/sha1'
|
26
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'json'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
REDIS_PORT = ENV['REDIS_PORT'] || 6379
|
6
|
+
REDIS_HOST = ENV['REDIS_HOST'] || 'localhost'
|
7
|
+
|
8
|
+
redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT)
|
9
|
+
asm = Redis::Asm.new(redis)
|
10
|
+
|
11
|
+
SKEY = 'redis:asm:testing:set'
|
12
|
+
ZKEY = 'redis:asm:testing:zset'
|
13
|
+
HKEY = 'redis:asm:testing:hash'
|
14
|
+
LKEY = 'redis:asm:testing:list'
|
15
|
+
|
16
|
+
describe Redis::Asm do
|
17
|
+
|
18
|
+
before :all do
|
19
|
+
test_data = File.read(File.expand_path('../test_data.txt', __FILE__))
|
20
|
+
.split("\n")
|
21
|
+
i = 0
|
22
|
+
zdata = test_data.map{|item| i += 1; [i, item]}
|
23
|
+
i = 0
|
24
|
+
hdata = test_data.inject({}){|ha, k| i += 1; ha.merge(i=>k)}
|
25
|
+
|
26
|
+
redis.pipelined do |r|
|
27
|
+
redis.sadd SKEY, test_data
|
28
|
+
redis.zadd ZKEY, zdata
|
29
|
+
redis.mapped_hmset HKEY, hdata
|
30
|
+
test_data.each{|item| redis.rpush LKEY,item}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
after :all do
|
35
|
+
redis.del ZKEY
|
36
|
+
redis.del HKEY
|
37
|
+
redis.del SKEY
|
38
|
+
redis.del LKEY
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'has a version number' do
|
42
|
+
expect(Redis::Asm::VERSION).not_to be nil
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'responds to search method' do
|
46
|
+
expect(asm.respond_to?(:search)).to eq(true)
|
47
|
+
end
|
48
|
+
|
49
|
+
context 'execute fuzzy searching on Redis SET or LIST' do
|
50
|
+
let(:result_set) {JSON.parse(asm.search(SKEY, 'example'))}
|
51
|
+
let(:result_list) {JSON.parse(asm.search(LKEY, 'example'))}
|
52
|
+
|
53
|
+
it "result has exactly matched string" do
|
54
|
+
expect(result_set.first).to eq({"haystack"=>"example", "match"=>1})
|
55
|
+
expect(result_list.first).to eq({"haystack"=>"example", "match"=>1})
|
56
|
+
end
|
57
|
+
|
58
|
+
it "result has fuzzy matched string" do
|
59
|
+
expect(result_set[1]).to eq({"haystack"=>"samples", "match"=>0.5})
|
60
|
+
expect(result_list[1]).to eq({"haystack"=>"samples", "match"=>0.5})
|
61
|
+
end
|
62
|
+
|
63
|
+
it "result size must be default limit(10)" do
|
64
|
+
expect(result_set.size).to eq 10
|
65
|
+
expect(result_list.size).to eq 10
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context 'execute fuzzy searching on Redis SET or LIST using multi-byte string' do
|
70
|
+
let(:result_set) {JSON.parse(asm.search(SKEY, '東京都'))}
|
71
|
+
let(:result_list) {JSON.parse(asm.search(LKEY, '東京都'))}
|
72
|
+
|
73
|
+
it "result has exactly matched string" do
|
74
|
+
expect(result_set.first).to eq({"haystack"=>"東京都", "match"=>1})
|
75
|
+
expect(result_list.first).to eq({"haystack"=>"東京都", "match"=>1})
|
76
|
+
end
|
77
|
+
|
78
|
+
it "result has fuzzy matched string" do
|
79
|
+
expect(result_set[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333})
|
80
|
+
expect(result_list[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333})
|
81
|
+
end
|
82
|
+
|
83
|
+
it "result size must be matched item count" do
|
84
|
+
expect(result_set.size).to eq 2
|
85
|
+
expect(result_list.size).to eq 2
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
context 'execute fuzzy searching on Redis ZSET or HASH' do
|
90
|
+
let(:result_zset) {JSON.parse(asm.search(ZKEY, 'example'))}
|
91
|
+
let(:result_hash) {JSON.parse(asm.search(HKEY, 'example'))}
|
92
|
+
|
93
|
+
it "result has exactly matched string, zset has 'score' and hash has 'field'" do
|
94
|
+
expect(result_zset.first).to eq({"haystack"=>"example", "score"=>"114", "match"=>1})
|
95
|
+
expect(result_hash.first).to eq({"haystack"=>"example", "field"=>"114", "match"=>1})
|
96
|
+
end
|
97
|
+
|
98
|
+
it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do
|
99
|
+
expect(result_zset[1]).to eq({"haystack"=>"samples", "score"=>"119", "match"=>0.5})
|
100
|
+
expect(result_hash[1]).to eq({"haystack"=>"samples", "field"=>"119", "match"=>0.5})
|
101
|
+
end
|
102
|
+
|
103
|
+
it "result size must be default limit(10)" do
|
104
|
+
expect(result_zset.size).to eq 10
|
105
|
+
expect(result_hash.size).to eq 10
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context 'execute fuzzy searching on Redis ZSET or HASH using multi-byte string' do
|
110
|
+
let(:result_zset) {JSON.parse(asm.search(ZKEY, '東京都'))}
|
111
|
+
let(:result_hash) {JSON.parse(asm.search(HKEY, '東京都'))}
|
112
|
+
|
113
|
+
it "result has exactly matched string, zset has 'score' and hash has 'field'" do
|
114
|
+
expect(result_zset.first).to eq({"haystack"=>"東京都", "score"=>"126", "match"=>1})
|
115
|
+
expect(result_hash.first).to eq({"haystack"=>"東京都", "field"=>"126", "match"=>1})
|
116
|
+
end
|
117
|
+
|
118
|
+
it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do
|
119
|
+
expect(result_zset[1]).to eq({"haystack"=>"京都府", "score"=>"125", "match"=>0.33333333333333})
|
120
|
+
expect(result_hash[1]).to eq({"haystack"=>"京都府", "field"=>"125", "match"=>0.33333333333333})
|
121
|
+
end
|
122
|
+
|
123
|
+
it "result size must be matched item count" do
|
124
|
+
expect(result_zset.size).to eq 2
|
125
|
+
expect(result_hash.size).to eq 2
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
1ab2cd34ef5g6
|
2
|
+
a
|
3
|
+
aa
|
4
|
+
aah
|
5
|
+
aahed
|
6
|
+
aahing
|
7
|
+
aahs
|
8
|
+
aal
|
9
|
+
aalii
|
10
|
+
aaliis
|
11
|
+
aals
|
12
|
+
aardvark
|
13
|
+
aardvarks
|
14
|
+
aardwolf
|
15
|
+
aardwolves
|
16
|
+
aargh
|
17
|
+
aarrgh
|
18
|
+
aarrghh
|
19
|
+
aarti
|
20
|
+
aartis
|
21
|
+
aas
|
22
|
+
aasvogel
|
23
|
+
aasvogels
|
24
|
+
ab
|
25
|
+
aba
|
26
|
+
abac
|
27
|
+
abaca
|
28
|
+
abacas
|
29
|
+
abaci
|
30
|
+
aback
|
31
|
+
abacs
|
32
|
+
abacterial
|
33
|
+
abactinal
|
34
|
+
abactinally
|
35
|
+
abactor
|
36
|
+
abactors
|
37
|
+
abacus
|
38
|
+
abacuses
|
39
|
+
abaft
|
40
|
+
abaka
|
41
|
+
abakas
|
42
|
+
abalone
|
43
|
+
abalones
|
44
|
+
abamp
|
45
|
+
abampere
|
46
|
+
abamperes
|
47
|
+
abamps
|
48
|
+
aband
|
49
|
+
abanded
|
50
|
+
abanding
|
51
|
+
abandon
|
52
|
+
abandoned
|
53
|
+
abandonedly
|
54
|
+
abandonee
|
55
|
+
abandonees
|
56
|
+
abandoner
|
57
|
+
abandoners
|
58
|
+
abandoning
|
59
|
+
abandonment
|
60
|
+
abandonments
|
61
|
+
abandons
|
62
|
+
abandonware
|
63
|
+
abandonwares
|
64
|
+
abands
|
65
|
+
abapical
|
66
|
+
abas
|
67
|
+
abase
|
68
|
+
abased
|
69
|
+
abasedly
|
70
|
+
abasement
|
71
|
+
abasements
|
72
|
+
abaser
|
73
|
+
abasers
|
74
|
+
abases
|
75
|
+
abash
|
76
|
+
abashed
|
77
|
+
abashedly
|
78
|
+
abashes
|
79
|
+
abashing
|
80
|
+
abashless
|
81
|
+
abashment
|
82
|
+
abashments
|
83
|
+
abasia
|
84
|
+
abasias
|
85
|
+
abasing
|
86
|
+
abask
|
87
|
+
abatable
|
88
|
+
abate
|
89
|
+
abated
|
90
|
+
abatement
|
91
|
+
abatements
|
92
|
+
abater
|
93
|
+
abaters
|
94
|
+
abates
|
95
|
+
abating
|
96
|
+
abatis
|
97
|
+
abatises
|
98
|
+
abator
|
99
|
+
abators
|
100
|
+
abattis
|
101
|
+
abattises
|
102
|
+
abattoir
|
103
|
+
abattoirs
|
104
|
+
abc
|
105
|
+
abcdefg
|
106
|
+
ac
|
107
|
+
axc
|
108
|
+
b
|
109
|
+
bc
|
110
|
+
cat
|
111
|
+
cow
|
112
|
+
difference
|
113
|
+
distance
|
114
|
+
example
|
115
|
+
frankenstein
|
116
|
+
javawasneat
|
117
|
+
kitten
|
118
|
+
levenshtein
|
119
|
+
samples
|
120
|
+
scalaisgreat
|
121
|
+
sitting
|
122
|
+
sturgeon
|
123
|
+
urgently
|
124
|
+
xabxcdxxefxgx
|
125
|
+
京都府
|
126
|
+
東京都
|
127
|
+
弊社佐藤
|
128
|
+
弊社と致しましては
|
129
|
+
貴社におかれましては
|
130
|
+
因為我是中國人所以我會說中文
|
131
|
+
因為我是英國人所以我會說英文
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: redis-asm
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Masato Yamaguchi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: redis
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: digest/sha1
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Fast ASM(Approximate String Matching) by calucuating edit distance within
|
84
|
+
the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides
|
85
|
+
you to search multi-byte characters correctly, because it recognizes lead-byte of
|
86
|
+
UTF-8 strings.
|
87
|
+
email:
|
88
|
+
- karateka2000@gmail.com
|
89
|
+
executables: []
|
90
|
+
extensions: []
|
91
|
+
extra_rdoc_files: []
|
92
|
+
files:
|
93
|
+
- ".gitignore"
|
94
|
+
- ".rspec"
|
95
|
+
- ".travis.yml"
|
96
|
+
- Gemfile
|
97
|
+
- LICENSE.txt
|
98
|
+
- README.md
|
99
|
+
- Rakefile
|
100
|
+
- lib/redis-asm.rb
|
101
|
+
- lib/redis/asm.rb
|
102
|
+
- lib/redis/asm/version.rb
|
103
|
+
- lib/redis_asm.lua
|
104
|
+
- redis-asm.gemspec
|
105
|
+
- spec/redis/asm/asm_spec.rb
|
106
|
+
- spec/redis/asm/test_data.txt
|
107
|
+
- spec/spec_helper.rb
|
108
|
+
homepage: http://github.com/krt/redis-asm
|
109
|
+
licenses:
|
110
|
+
- MIT
|
111
|
+
metadata: {}
|
112
|
+
post_install_message:
|
113
|
+
rdoc_options: []
|
114
|
+
require_paths:
|
115
|
+
- lib
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - ">="
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
requirements: []
|
127
|
+
rubyforge_project:
|
128
|
+
rubygems_version: 2.2.2
|
129
|
+
signing_key:
|
130
|
+
specification_version: 4
|
131
|
+
summary: Fast fuzzy string search on Redis using Lua. UTF-8 Ready.
|
132
|
+
test_files:
|
133
|
+
- spec/redis/asm/asm_spec.rb
|
134
|
+
- spec/redis/asm/test_data.txt
|
135
|
+
- spec/spec_helper.rb
|