redis-asm 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +114 -0
- data/Rakefile +7 -0
- data/lib/redis-asm.rb +2 -0
- data/lib/redis/asm.rb +26 -0
- data/lib/redis/asm/version.rb +5 -0
- data/lib/redis_asm.lua +239 -0
- data/redis-asm.gemspec +26 -0
- data/spec/redis/asm/asm_spec.rb +129 -0
- data/spec/redis/asm/test_data.txt +131 -0
- data/spec/spec_helper.rb +3 -0
- metadata +135 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0614a530aee374d91c9b3fdbea83c6ac4020c588
|
4
|
+
data.tar.gz: c8e1a86c8f7dcf5b4a6ef455f47d50ada66af54b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 70a2cc486e140531cc11310965f0313870af99e512cc2f0d825d401202a1d95992a931eb86440016070b6eed76ba9ad238d195c812946a8679c72e2ba0eaebeb
|
7
|
+
data.tar.gz: 6467734e8eb1d868b35b80826afaa88bd46971f98451f135dba7faf7ed7588bb66a44cf0901b1618c9b3dca5538a7142cd17af73482964fd24185473abdbbaba
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Masato Yamaguchi
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# Redis::Asm
|
2
|
+
|
3
|
+
##### Fast fuzzy string search on Redis using Lua. UTF-8 Ready.
|
4
|
+
|
5
|
+
## Description
|
6
|
+
Fast ASM(Approximate String Matching) by calculating edit distance within the collections such as ZSET, HASH, LIST, SET on Redis using Lua script.
|
7
|
+
Redis::Asm provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings.
|
8
|
+
|
9
|
+
## Prerequisites
|
10
|
+
This library requires a Redis server with Lua scripting support (EVAL and EVALSHA commands). This support was added in Redis 2.6.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'redis-asm'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install redis-asm
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
To initialize `Redis::Asm` with host and port:
|
31
|
+
```ruby
|
32
|
+
redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT)
|
33
|
+
asm = Redis::Asm.new(redis)
|
34
|
+
```
|
35
|
+
To execute fuzzy search from Redis collections:
|
36
|
+
```ruby
|
37
|
+
require 'json'
|
38
|
+
|
39
|
+
# asm.search(KEY, NEELDE, MAX_RESULTS=10)
|
40
|
+
|
41
|
+
# To search from SET or LIST
|
42
|
+
|
43
|
+
result = asm.search(SET_OR_LIST_KEY, 'example')
|
44
|
+
puts JSON.parse(result).to_yaml
|
45
|
+
# ---
|
46
|
+
# - haystack: example
|
47
|
+
# match: 1
|
48
|
+
# - haystack: samples
|
49
|
+
# match: 0.5
|
50
|
+
# - haystack: abampere
|
51
|
+
# match: 0.42857142857143
|
52
|
+
.
|
53
|
+
.
|
54
|
+
|
55
|
+
# To search from HASH
|
56
|
+
|
57
|
+
# Redis::Asm matches HASH values
|
58
|
+
# each item has 'field' property
|
59
|
+
|
60
|
+
result = asm.search(HASH_KEY, '東京都')
|
61
|
+
puts JSON.parse(result).to_yaml
|
62
|
+
# ---
|
63
|
+
# - haystack: "東京都"
|
64
|
+
# field: '126'
|
65
|
+
# match: 1
|
66
|
+
# - haystack: "京都府"
|
67
|
+
# field: '125'
|
68
|
+
# match: 0.33333333333333
|
69
|
+
|
70
|
+
# To search from ZSET
|
71
|
+
# each item has 'score' property
|
72
|
+
|
73
|
+
result = asm.search(ZSET_KEY, '東京都')
|
74
|
+
puts JSON.parse(result).to_yaml
|
75
|
+
# ---
|
76
|
+
# - haystack: "東京都"
|
77
|
+
# score: '126'
|
78
|
+
# match: 1
|
79
|
+
# - haystack: "京都府"
|
80
|
+
# score: '125'
|
81
|
+
# match: 0.33333333333333
|
82
|
+
```
|
83
|
+
## Performance
|
84
|
+
|
85
|
+
- PC: MBP 2.6 GHz Intel Core i5 16GM DD3 RAM
|
86
|
+
- OS: Mac OSX 10.9.5
|
87
|
+
- ruby 2.1.5p273 [x86_64-darwin13.0]
|
88
|
+
- Redis server v=2.6.17 bits=64
|
89
|
+
|
90
|
+
```bash
|
91
|
+
# search from 10,000 items of SETS
|
92
|
+
# each item contains UTF-8 characters, and consists of between 1 and 30 chars.
|
93
|
+
% ruby search_bench.rb stone
|
94
|
+
user system total real
|
95
|
+
0.000000 0.000000 0.000000 ( 0.038567)
|
96
|
+
% ruby search_bench.rb 東京都
|
97
|
+
user system total real
|
98
|
+
0.000000 0.000000 0.000000 ( 0.022540)
|
99
|
+
|
100
|
+
% ruby search_bench.rb 弊社といたしましては
|
101
|
+
user system total real
|
102
|
+
0.000000 0.000000 0.000000 ( 0.063109)
|
103
|
+
|
104
|
+
```
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
## Contributing
|
109
|
+
|
110
|
+
1. Fork it ( https://github.com/krt/redis-asm/fork )
|
111
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
112
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
113
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
114
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/redis-asm.rb
ADDED
data/lib/redis/asm.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'redis'
|
2
|
+
require "redis/asm/version"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
class Redis
|
6
|
+
class Asm
|
7
|
+
|
8
|
+
SCRIPT_DIR = File.expand_path('../../', __FILE__)
|
9
|
+
SCRIPT = File.read File.join(SCRIPT_DIR, "redis_asm.lua")
|
10
|
+
SHA1 = Digest::SHA1.hexdigest SCRIPT
|
11
|
+
|
12
|
+
def initialize(redis)
|
13
|
+
@redis = redis
|
14
|
+
end
|
15
|
+
|
16
|
+
def search(key, needle, max_results=10)
|
17
|
+
@redis.evalsha(SHA1, :keys => [key], :argv => [needle, max_results])
|
18
|
+
rescue Exception => e
|
19
|
+
if e.message =~ /NOSCRIPT/
|
20
|
+
@redis.eval script, :keys => [key], :argv => [needle, max_results]
|
21
|
+
else
|
22
|
+
raise e
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/redis_asm.lua
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
--[[
|
2
|
+
|
3
|
+
redis_asm.lua
|
4
|
+
approximate string matching for redis
|
5
|
+
|
6
|
+
Copyright (c) 2015 Masato Yamaguchi
|
7
|
+
|
8
|
+
This software is released under the MIT License.
|
9
|
+
|
10
|
+
http://opensource.org/licenses/mit-license.php
|
11
|
+
|
12
|
+
|
13
|
+
USAGE:
|
14
|
+
> eval "(content of this script)" 1 KEY NEEDLE MAX_RESULTS
|
15
|
+
|
16
|
+
@param {string} KEY Name of key. Accepts ZSET, SET, HASH and LIST.
|
17
|
+
@param {string} NEEDLE Search word.
|
18
|
+
@param {boolean} MAX_RESULTS Max size of results, defaults 10.
|
19
|
+
@return {string} Result as json string.
|
20
|
+
]]
|
21
|
+
|
22
|
+
local i
|
23
|
+
local haystacks = {}
|
24
|
+
local opt_data = {} -- score for ZSET, or field for HASH.
|
25
|
+
|
26
|
+
local key_type = redis.call('TYPE', KEYS[1])["ok"]
|
27
|
+
|
28
|
+
if not key_type then return nil end
|
29
|
+
if key_type == 'zset' then
|
30
|
+
local zset = redis.call('ZRANGE', KEYS[1], 0, -1, 'WITHSCORES')
|
31
|
+
local is_value = true
|
32
|
+
for i = 1, #zset do
|
33
|
+
if is_value then haystacks[#haystacks + 1] = zset[i] end
|
34
|
+
if not is_value then opt_data[#opt_data + 1] = zset[i] end
|
35
|
+
is_value = not is_value
|
36
|
+
end
|
37
|
+
elseif key_type == 'list' then
|
38
|
+
haystacks = redis.call('LRANGE', KEYS[1], 0, -1)
|
39
|
+
elseif key_type == 'set' then
|
40
|
+
haystacks = redis.call('SMEMBERS', KEYS[1])
|
41
|
+
elseif key_type == 'hash' then
|
42
|
+
local hash = redis.call('HGETALL', KEYS[1])
|
43
|
+
local is_field = true
|
44
|
+
for i = 1, #hash do
|
45
|
+
if is_field then opt_data[#opt_data + 1] = hash[i] end
|
46
|
+
if not is_field then haystacks[#haystacks + 1] = hash[i] end
|
47
|
+
is_field = not is_field
|
48
|
+
end
|
49
|
+
else
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
|
53
|
+
local needle = ARGV[1]
|
54
|
+
if not needle then return nil end
|
55
|
+
|
56
|
+
local max_results = tonumber(ARGV[2]) or 10
|
57
|
+
|
58
|
+
local cjson = cjson
|
59
|
+
local s_byte = string.byte
|
60
|
+
local s_sub = string.sub
|
61
|
+
local s_find = string.find
|
62
|
+
local m_min = math.min
|
63
|
+
local m_max = math.max
|
64
|
+
local m_floor = math.floor
|
65
|
+
local m_ceil = math.ceil
|
66
|
+
local t_sort = table.sort
|
67
|
+
|
68
|
+
|
69
|
+
-- mapping utf-8 leading-byte to byte offset
|
70
|
+
local byte_offsets = {
|
71
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
72
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
73
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
74
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
75
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
76
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
77
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
78
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
79
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
80
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
81
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
82
|
+
1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
|
83
|
+
3, 3, 3, 3, 3, 3, 3}
|
84
|
+
|
85
|
+
--[[
|
86
|
+
* Split utf-8 string into multi-byte chunks according to its leading-byte.
|
87
|
+
* @param {string}
|
88
|
+
* @return {Array.<string>} Array of multi-byte strings.
|
89
|
+
--]]
|
90
|
+
local function split_into_utf8_bytes(str)
|
91
|
+
local codes = {}
|
92
|
+
local i
|
93
|
+
local offset = 0
|
94
|
+
|
95
|
+
local mb_str, byte, offset_pos
|
96
|
+
|
97
|
+
for i = 1, #str do
|
98
|
+
offset_pos = i + offset
|
99
|
+
if offset_pos >= #str then
|
100
|
+
break
|
101
|
+
end
|
102
|
+
|
103
|
+
byte = byte_offsets[s_byte(str, offset_pos, offset_pos)] or 0
|
104
|
+
|
105
|
+
mb_str = s_sub(str, offset_pos, offset_pos + byte)
|
106
|
+
codes[#codes + 1] = mb_str
|
107
|
+
offset = offset + byte
|
108
|
+
end
|
109
|
+
return codes
|
110
|
+
end
|
111
|
+
|
112
|
+
--[[
|
113
|
+
* Check if haystack includes any character in needle.
|
114
|
+
* @param {string}
|
115
|
+
* @param {Array.<string>}
|
116
|
+
* @return {boolean} true if haystack includes utf_needle
|
117
|
+
--]]
|
118
|
+
local function haystack_includes_needle_char(haystack, utf_needle)
|
119
|
+
for i = 1, #utf_needle do
|
120
|
+
if s_find(haystack, utf_needle[i]) then return true end
|
121
|
+
end
|
122
|
+
return false
|
123
|
+
end
|
124
|
+
|
125
|
+
local cache = {}
|
126
|
+
|
127
|
+
--[[
|
128
|
+
* Calculate match score using levenshtein distance.
|
129
|
+
* @param {Array.<string>} haystack
|
130
|
+
* @param {Array.<string>} needle
|
131
|
+
* @param {boolean} if true, stop calculating
|
132
|
+
when the result might be lower than lowest_score
|
133
|
+
* @param {number|nil} lowest_score
|
134
|
+
* @return {number|nil} match score(0..1)
|
135
|
+
--]]
|
136
|
+
local function levenshtein_score(str, needle, should_cutoff, lowest_score)
|
137
|
+
local length, length_needle, code, result, should_break
|
138
|
+
local distance, distance_needle, index, index_needle, cutoff_distance
|
139
|
+
local longer_length = m_max(#str, #needle)
|
140
|
+
|
141
|
+
if should_cutoff and lowest_score then
|
142
|
+
cutoff_distance = m_ceil((1 - lowest_score) * longer_length) + 1
|
143
|
+
end
|
144
|
+
|
145
|
+
length = #str
|
146
|
+
length_needle = #needle
|
147
|
+
for index = 1, length do
|
148
|
+
cache[index] = index + 1
|
149
|
+
end
|
150
|
+
|
151
|
+
for index_needle = 1, length_needle do
|
152
|
+
code = needle[index_needle]
|
153
|
+
result = index_needle - 1
|
154
|
+
distance = index_needle - 1
|
155
|
+
|
156
|
+
for index = 1, length do
|
157
|
+
distance_needle = (code == str[index]) and distance or distance + 1
|
158
|
+
distance = cache[index]
|
159
|
+
result = (distance > result) and
|
160
|
+
((distance_needle > result) and result + 1 or distance_needle)
|
161
|
+
or
|
162
|
+
((distance_needle > distance) and distance + 1 or distance_needle)
|
163
|
+
cache[index] = result
|
164
|
+
|
165
|
+
if cutoff_distance and result > cutoff_distance then
|
166
|
+
return nil
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
return 1 - (result / longer_length)
|
171
|
+
end
|
172
|
+
|
173
|
+
local scores = {}
|
174
|
+
local utf_needle = split_into_utf8_bytes(needle)
|
175
|
+
local lowest_score, utf_word, longer_length, score
|
176
|
+
local should_cutoff = false
|
177
|
+
|
178
|
+
-- main loop.
|
179
|
+
for i = 1, #haystacks do
|
180
|
+
if haystack_includes_needle_char(haystacks[i], utf_needle) then
|
181
|
+
utf_word = split_into_utf8_bytes(haystacks[i])
|
182
|
+
|
183
|
+
if #utf_word >= #utf_needle then
|
184
|
+
longer_length = #utf_word
|
185
|
+
|
186
|
+
if s_find(haystacks[i], needle) then
|
187
|
+
score = #utf_needle * (1 / longer_length)
|
188
|
+
else
|
189
|
+
score = levenshtein_score(utf_word, utf_needle, should_cutoff, lowest_score)
|
190
|
+
end
|
191
|
+
|
192
|
+
if score and not(score == 0) then
|
193
|
+
if #scores > max_results then
|
194
|
+
should_cutoff = true
|
195
|
+
t_sort(
|
196
|
+
scores,
|
197
|
+
function(a,b)
|
198
|
+
return a.score > b.score
|
199
|
+
end
|
200
|
+
)
|
201
|
+
lowest_score = scores[max_results].score
|
202
|
+
if score > lowest_score then
|
203
|
+
scores[#scores + 1] = {score = score, idx = i}
|
204
|
+
end
|
205
|
+
else
|
206
|
+
scores[#scores + 1] = {score = score, idx = i}
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
t_sort(
|
215
|
+
scores,
|
216
|
+
function(a,b)
|
217
|
+
return a.score > b.score
|
218
|
+
end
|
219
|
+
)
|
220
|
+
|
221
|
+
local result = {}
|
222
|
+
local output_length = m_min(#scores, max_results)
|
223
|
+
|
224
|
+
for i = 1, output_length do
|
225
|
+
local item = {}
|
226
|
+
item['match'] = scores[i].score
|
227
|
+
item['haystack'] = haystacks[scores[i].idx]
|
228
|
+
if key_type == 'zset' then
|
229
|
+
item['score'] = opt_data[scores[i].idx]
|
230
|
+
elseif key_type == 'hash' then
|
231
|
+
item['field'] = opt_data[scores[i].idx]
|
232
|
+
end
|
233
|
+
result[#result + 1] = item
|
234
|
+
end
|
235
|
+
|
236
|
+
local text = cjson.encode(result)
|
237
|
+
|
238
|
+
return(text)
|
239
|
+
|
data/redis-asm.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'redis/asm/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "redis-asm"
|
8
|
+
spec.version = Redis::Asm::VERSION
|
9
|
+
spec.authors = ["Masato Yamaguchi"]
|
10
|
+
spec.email = ["karateka2000@gmail.com"]
|
11
|
+
spec.summary = "Fast fuzzy string search on Redis using Lua. UTF-8 Ready."
|
12
|
+
spec.description = "Fast ASM(Approximate String Matching) by calucuating edit distance within the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings."
|
13
|
+
spec.homepage = "http://github.com/krt/redis-asm"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_dependency 'redis', '~> 3.0'
|
25
|
+
spec.add_dependency 'digest/sha1'
|
26
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'json'
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
REDIS_PORT = ENV['REDIS_PORT'] || 6379
|
6
|
+
REDIS_HOST = ENV['REDIS_HOST'] || 'localhost'
|
7
|
+
|
8
|
+
redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT)
|
9
|
+
asm = Redis::Asm.new(redis)
|
10
|
+
|
11
|
+
SKEY = 'redis:asm:testing:set'
|
12
|
+
ZKEY = 'redis:asm:testing:zset'
|
13
|
+
HKEY = 'redis:asm:testing:hash'
|
14
|
+
LKEY = 'redis:asm:testing:list'
|
15
|
+
|
16
|
+
describe Redis::Asm do
|
17
|
+
|
18
|
+
before :all do
|
19
|
+
test_data = File.read(File.expand_path('../test_data.txt', __FILE__))
|
20
|
+
.split("\n")
|
21
|
+
i = 0
|
22
|
+
zdata = test_data.map{|item| i += 1; [i, item]}
|
23
|
+
i = 0
|
24
|
+
hdata = test_data.inject({}){|ha, k| i += 1; ha.merge(i=>k)}
|
25
|
+
|
26
|
+
redis.pipelined do |r|
|
27
|
+
redis.sadd SKEY, test_data
|
28
|
+
redis.zadd ZKEY, zdata
|
29
|
+
redis.mapped_hmset HKEY, hdata
|
30
|
+
test_data.each{|item| redis.rpush LKEY,item}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
after :all do
|
35
|
+
redis.del ZKEY
|
36
|
+
redis.del HKEY
|
37
|
+
redis.del SKEY
|
38
|
+
redis.del LKEY
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'has a version number' do
|
42
|
+
expect(Redis::Asm::VERSION).not_to be nil
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'responds to search method' do
|
46
|
+
expect(asm.respond_to?(:search)).to eq(true)
|
47
|
+
end
|
48
|
+
|
49
|
+
context 'execute fuzzy searching on Redis SET or LIST' do
|
50
|
+
let(:result_set) {JSON.parse(asm.search(SKEY, 'example'))}
|
51
|
+
let(:result_list) {JSON.parse(asm.search(LKEY, 'example'))}
|
52
|
+
|
53
|
+
it "result has exactly matched string" do
|
54
|
+
expect(result_set.first).to eq({"haystack"=>"example", "match"=>1})
|
55
|
+
expect(result_list.first).to eq({"haystack"=>"example", "match"=>1})
|
56
|
+
end
|
57
|
+
|
58
|
+
it "result has fuzzy matched string" do
|
59
|
+
expect(result_set[1]).to eq({"haystack"=>"samples", "match"=>0.5})
|
60
|
+
expect(result_list[1]).to eq({"haystack"=>"samples", "match"=>0.5})
|
61
|
+
end
|
62
|
+
|
63
|
+
it "result size must be default limit(10)" do
|
64
|
+
expect(result_set.size).to eq 10
|
65
|
+
expect(result_list.size).to eq 10
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context 'execute fuzzy searching on Redis SET or LIST using multi-byte string' do
|
70
|
+
let(:result_set) {JSON.parse(asm.search(SKEY, '東京都'))}
|
71
|
+
let(:result_list) {JSON.parse(asm.search(LKEY, '東京都'))}
|
72
|
+
|
73
|
+
it "result has exactly matched string" do
|
74
|
+
expect(result_set.first).to eq({"haystack"=>"東京都", "match"=>1})
|
75
|
+
expect(result_list.first).to eq({"haystack"=>"東京都", "match"=>1})
|
76
|
+
end
|
77
|
+
|
78
|
+
it "result has fuzzy matched string" do
|
79
|
+
expect(result_set[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333})
|
80
|
+
expect(result_list[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333})
|
81
|
+
end
|
82
|
+
|
83
|
+
it "result size must be matched item count" do
|
84
|
+
expect(result_set.size).to eq 2
|
85
|
+
expect(result_list.size).to eq 2
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
context 'execute fuzzy searching on Redis ZSET or HASH' do
|
90
|
+
let(:result_zset) {JSON.parse(asm.search(ZKEY, 'example'))}
|
91
|
+
let(:result_hash) {JSON.parse(asm.search(HKEY, 'example'))}
|
92
|
+
|
93
|
+
it "result has exactly matched string, zset has 'score' and hash has 'field'" do
|
94
|
+
expect(result_zset.first).to eq({"haystack"=>"example", "score"=>"114", "match"=>1})
|
95
|
+
expect(result_hash.first).to eq({"haystack"=>"example", "field"=>"114", "match"=>1})
|
96
|
+
end
|
97
|
+
|
98
|
+
it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do
|
99
|
+
expect(result_zset[1]).to eq({"haystack"=>"samples", "score"=>"119", "match"=>0.5})
|
100
|
+
expect(result_hash[1]).to eq({"haystack"=>"samples", "field"=>"119", "match"=>0.5})
|
101
|
+
end
|
102
|
+
|
103
|
+
it "result size must be default limit(10)" do
|
104
|
+
expect(result_zset.size).to eq 10
|
105
|
+
expect(result_hash.size).to eq 10
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context 'execute fuzzy searching on Redis ZSET or HASH using multi-byte string' do
|
110
|
+
let(:result_zset) {JSON.parse(asm.search(ZKEY, '東京都'))}
|
111
|
+
let(:result_hash) {JSON.parse(asm.search(HKEY, '東京都'))}
|
112
|
+
|
113
|
+
it "result has exactly matched string, zset has 'score' and hash has 'field'" do
|
114
|
+
expect(result_zset.first).to eq({"haystack"=>"東京都", "score"=>"126", "match"=>1})
|
115
|
+
expect(result_hash.first).to eq({"haystack"=>"東京都", "field"=>"126", "match"=>1})
|
116
|
+
end
|
117
|
+
|
118
|
+
it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do
|
119
|
+
expect(result_zset[1]).to eq({"haystack"=>"京都府", "score"=>"125", "match"=>0.33333333333333})
|
120
|
+
expect(result_hash[1]).to eq({"haystack"=>"京都府", "field"=>"125", "match"=>0.33333333333333})
|
121
|
+
end
|
122
|
+
|
123
|
+
it "result size must be matched item count" do
|
124
|
+
expect(result_zset.size).to eq 2
|
125
|
+
expect(result_hash.size).to eq 2
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
1ab2cd34ef5g6
|
2
|
+
a
|
3
|
+
aa
|
4
|
+
aah
|
5
|
+
aahed
|
6
|
+
aahing
|
7
|
+
aahs
|
8
|
+
aal
|
9
|
+
aalii
|
10
|
+
aaliis
|
11
|
+
aals
|
12
|
+
aardvark
|
13
|
+
aardvarks
|
14
|
+
aardwolf
|
15
|
+
aardwolves
|
16
|
+
aargh
|
17
|
+
aarrgh
|
18
|
+
aarrghh
|
19
|
+
aarti
|
20
|
+
aartis
|
21
|
+
aas
|
22
|
+
aasvogel
|
23
|
+
aasvogels
|
24
|
+
ab
|
25
|
+
aba
|
26
|
+
abac
|
27
|
+
abaca
|
28
|
+
abacas
|
29
|
+
abaci
|
30
|
+
aback
|
31
|
+
abacs
|
32
|
+
abacterial
|
33
|
+
abactinal
|
34
|
+
abactinally
|
35
|
+
abactor
|
36
|
+
abactors
|
37
|
+
abacus
|
38
|
+
abacuses
|
39
|
+
abaft
|
40
|
+
abaka
|
41
|
+
abakas
|
42
|
+
abalone
|
43
|
+
abalones
|
44
|
+
abamp
|
45
|
+
abampere
|
46
|
+
abamperes
|
47
|
+
abamps
|
48
|
+
aband
|
49
|
+
abanded
|
50
|
+
abanding
|
51
|
+
abandon
|
52
|
+
abandoned
|
53
|
+
abandonedly
|
54
|
+
abandonee
|
55
|
+
abandonees
|
56
|
+
abandoner
|
57
|
+
abandoners
|
58
|
+
abandoning
|
59
|
+
abandonment
|
60
|
+
abandonments
|
61
|
+
abandons
|
62
|
+
abandonware
|
63
|
+
abandonwares
|
64
|
+
abands
|
65
|
+
abapical
|
66
|
+
abas
|
67
|
+
abase
|
68
|
+
abased
|
69
|
+
abasedly
|
70
|
+
abasement
|
71
|
+
abasements
|
72
|
+
abaser
|
73
|
+
abasers
|
74
|
+
abases
|
75
|
+
abash
|
76
|
+
abashed
|
77
|
+
abashedly
|
78
|
+
abashes
|
79
|
+
abashing
|
80
|
+
abashless
|
81
|
+
abashment
|
82
|
+
abashments
|
83
|
+
abasia
|
84
|
+
abasias
|
85
|
+
abasing
|
86
|
+
abask
|
87
|
+
abatable
|
88
|
+
abate
|
89
|
+
abated
|
90
|
+
abatement
|
91
|
+
abatements
|
92
|
+
abater
|
93
|
+
abaters
|
94
|
+
abates
|
95
|
+
abating
|
96
|
+
abatis
|
97
|
+
abatises
|
98
|
+
abator
|
99
|
+
abators
|
100
|
+
abattis
|
101
|
+
abattises
|
102
|
+
abattoir
|
103
|
+
abattoirs
|
104
|
+
abc
|
105
|
+
abcdefg
|
106
|
+
ac
|
107
|
+
axc
|
108
|
+
b
|
109
|
+
bc
|
110
|
+
cat
|
111
|
+
cow
|
112
|
+
difference
|
113
|
+
distance
|
114
|
+
example
|
115
|
+
frankenstein
|
116
|
+
javawasneat
|
117
|
+
kitten
|
118
|
+
levenshtein
|
119
|
+
samples
|
120
|
+
scalaisgreat
|
121
|
+
sitting
|
122
|
+
sturgeon
|
123
|
+
urgently
|
124
|
+
xabxcdxxefxgx
|
125
|
+
京都府
|
126
|
+
東京都
|
127
|
+
弊社佐藤
|
128
|
+
弊社と致しましては
|
129
|
+
貴社におかれましては
|
130
|
+
因為我是中國人所以我會說中文
|
131
|
+
因為我是英國人所以我會說英文
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: redis-asm
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Masato Yamaguchi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: redis
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: digest/sha1
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: Fast ASM(Approximate String Matching) by calucuating edit distance within
|
84
|
+
the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides
|
85
|
+
you to search multi-byte characters correctly, because it recognizes lead-byte of
|
86
|
+
UTF-8 strings.
|
87
|
+
email:
|
88
|
+
- karateka2000@gmail.com
|
89
|
+
executables: []
|
90
|
+
extensions: []
|
91
|
+
extra_rdoc_files: []
|
92
|
+
files:
|
93
|
+
- ".gitignore"
|
94
|
+
- ".rspec"
|
95
|
+
- ".travis.yml"
|
96
|
+
- Gemfile
|
97
|
+
- LICENSE.txt
|
98
|
+
- README.md
|
99
|
+
- Rakefile
|
100
|
+
- lib/redis-asm.rb
|
101
|
+
- lib/redis/asm.rb
|
102
|
+
- lib/redis/asm/version.rb
|
103
|
+
- lib/redis_asm.lua
|
104
|
+
- redis-asm.gemspec
|
105
|
+
- spec/redis/asm/asm_spec.rb
|
106
|
+
- spec/redis/asm/test_data.txt
|
107
|
+
- spec/spec_helper.rb
|
108
|
+
homepage: http://github.com/krt/redis-asm
|
109
|
+
licenses:
|
110
|
+
- MIT
|
111
|
+
metadata: {}
|
112
|
+
post_install_message:
|
113
|
+
rdoc_options: []
|
114
|
+
require_paths:
|
115
|
+
- lib
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - ">="
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
requirements: []
|
127
|
+
rubyforge_project:
|
128
|
+
rubygems_version: 2.2.2
|
129
|
+
signing_key:
|
130
|
+
specification_version: 4
|
131
|
+
summary: Fast fuzzy string search on Redis using Lua. UTF-8 Ready.
|
132
|
+
test_files:
|
133
|
+
- spec/redis/asm/asm_spec.rb
|
134
|
+
- spec/redis/asm/test_data.txt
|
135
|
+
- spec/spec_helper.rb
|