redis-asm 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0614a530aee374d91c9b3fdbea83c6ac4020c588
4
+ data.tar.gz: c8e1a86c8f7dcf5b4a6ef455f47d50ada66af54b
5
+ SHA512:
6
+ metadata.gz: 70a2cc486e140531cc11310965f0313870af99e512cc2f0d825d401202a1d95992a931eb86440016070b6eed76ba9ad238d195c812946a8679c72e2ba0eaebeb
7
+ data.tar.gz: 6467734e8eb1d868b35b80826afaa88bd46971f98451f135dba7faf7ed7588bb66a44cf0901b1618c9b3dca5538a7142cd17af73482964fd24185473abdbbaba
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in redis-asm.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Masato Yamaguchi
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,114 @@
1
+ # Redis::Asm
2
+
3
+ ##### Fast fuzzy string search on Redis using Lua. UTF-8 Ready.
4
+
5
+ ## Description
6
+ Fast ASM(Approximate String Matching) by calculating edit distance within the collections such as ZSET, HASH, LIST, SET on Redis using Lua script.
7
+ Redis::Asm provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings.
8
+
9
+ ## Prerequisites
10
+ This library requires a Redis server with Lua scripting support (EVAL and EVALSHA commands). This support was added in Redis 2.6.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'redis-asm'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ Or install it yourself as:
25
+
26
+ $ gem install redis-asm
27
+
28
+ ## Usage
29
+
30
+ To initialize `Redis::Asm` with host and port:
31
+ ```ruby
32
+ redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT)
33
+ asm = Redis::Asm.new(redis)
34
+ ```
35
+ To execute fuzzy search from Redis collections:
36
+ ```ruby
37
+ require 'json'
38
+
39
+ # asm.search(KEY, NEELDE, MAX_RESULTS=10)
40
+
41
+ # To search from SET or LIST
42
+
43
+ result = asm.search(SET_OR_LIST_KEY, 'example')
44
+ puts JSON.parse(result).to_yaml
45
+ # ---
46
+ # - haystack: example
47
+ # match: 1
48
+ # - haystack: samples
49
+ # match: 0.5
50
+ # - haystack: abampere
51
+ # match: 0.42857142857143
52
+ .
53
+ .
54
+
55
+ # To search from HASH
56
+
57
+ # Redis::Asm matches HASH values
58
+ # each item has 'field' property
59
+
60
+ result = asm.search(HASH_KEY, '東京都')
61
+ puts JSON.parse(result).to_yaml
62
+ # ---
63
+ # - haystack: "東京都"
64
+ # field: '126'
65
+ # match: 1
66
+ # - haystack: "京都府"
67
+ # field: '125'
68
+ # match: 0.33333333333333
69
+
70
+ # To search from ZSET
71
+ # each item has 'score' property
72
+
73
+ result = asm.search(ZSET_KEY, '東京都')
74
+ puts JSON.parse(result).to_yaml
75
+ # ---
76
+ # - haystack: "東京都"
77
+ # score: '126'
78
+ # match: 1
79
+ # - haystack: "京都府"
80
+ # score: '125'
81
+ # match: 0.33333333333333
82
+ ```
83
+ ## Performance
84
+
85
+ - PC: MBP 2.6 GHz Intel Core i5 16GM DD3 RAM
86
+ - OS: Mac OSX 10.9.5
87
+ - ruby 2.1.5p273 [x86_64-darwin13.0]
88
+ - Redis server v=2.6.17 bits=64
89
+
90
+ ```bash
91
+ # search from 10,000 items of SETS
92
+ # each item contains UTF-8 characters, and consists of between 1 and 30 chars.
93
+ % ruby search_bench.rb stone
94
+ user system total real
95
+ 0.000000 0.000000 0.000000 ( 0.038567)
96
+ % ruby search_bench.rb 東京都
97
+ user system total real
98
+ 0.000000 0.000000 0.000000 ( 0.022540)
99
+
100
+ % ruby search_bench.rb 弊社といたしましては
101
+ user system total real
102
+ 0.000000 0.000000 0.000000 ( 0.063109)
103
+
104
+ ```
105
+
106
+
107
+
108
+ ## Contributing
109
+
110
+ 1. Fork it ( https://github.com/krt/redis-asm/fork )
111
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
112
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
113
+ 4. Push to the branch (`git push origin my-new-feature`)
114
+ 5. Create a new Pull Request
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,2 @@
1
+ require 'redis'
2
+ require 'redis/asm'
@@ -0,0 +1,26 @@
1
+ require 'redis'
2
+ require "redis/asm/version"
3
+ require "digest/sha1"
4
+
5
+ class Redis
6
+ class Asm
7
+
8
+ SCRIPT_DIR = File.expand_path('../../', __FILE__)
9
+ SCRIPT = File.read File.join(SCRIPT_DIR, "redis_asm.lua")
10
+ SHA1 = Digest::SHA1.hexdigest SCRIPT
11
+
12
+ def initialize(redis)
13
+ @redis = redis
14
+ end
15
+
16
+ def search(key, needle, max_results=10)
17
+ @redis.evalsha(SHA1, :keys => [key], :argv => [needle, max_results])
18
+ rescue Exception => e
19
+ if e.message =~ /NOSCRIPT/
20
+ @redis.eval script, :keys => [key], :argv => [needle, max_results]
21
+ else
22
+ raise e
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,5 @@
1
+ class Redis
2
+ class Asm
3
+ VERSION = "0.1"
4
+ end
5
+ end
@@ -0,0 +1,239 @@
1
+ --[[
2
+
3
+ redis_asm.lua
4
+ approximate string matching for redis
5
+
6
+ Copyright (c) 2015 Masato Yamaguchi
7
+
8
+ This software is released under the MIT License.
9
+
10
+ http://opensource.org/licenses/mit-license.php
11
+
12
+
13
+ USAGE:
14
+ > eval "(content of this script)" 1 KEY NEEDLE MAX_RESULTS
15
+
16
+ @param {string} KEY Name of key. Accepts ZSET, SET, HASH and LIST.
17
+ @param {string} NEEDLE Search word.
18
+ @param {boolean} MAX_RESULTS Max size of results, defaults 10.
19
+ @return {string} Result as json string.
20
+ ]]
21
+
22
+ local i
23
+ local haystacks = {}
24
+ local opt_data = {} -- score for ZSET, or field for HASH.
25
+
26
+ local key_type = redis.call('TYPE', KEYS[1])["ok"]
27
+
28
+ if not key_type then return nil end
29
+ if key_type == 'zset' then
30
+ local zset = redis.call('ZRANGE', KEYS[1], 0, -1, 'WITHSCORES')
31
+ local is_value = true
32
+ for i = 1, #zset do
33
+ if is_value then haystacks[#haystacks + 1] = zset[i] end
34
+ if not is_value then opt_data[#opt_data + 1] = zset[i] end
35
+ is_value = not is_value
36
+ end
37
+ elseif key_type == 'list' then
38
+ haystacks = redis.call('LRANGE', KEYS[1], 0, -1)
39
+ elseif key_type == 'set' then
40
+ haystacks = redis.call('SMEMBERS', KEYS[1])
41
+ elseif key_type == 'hash' then
42
+ local hash = redis.call('HGETALL', KEYS[1])
43
+ local is_field = true
44
+ for i = 1, #hash do
45
+ if is_field then opt_data[#opt_data + 1] = hash[i] end
46
+ if not is_field then haystacks[#haystacks + 1] = hash[i] end
47
+ is_field = not is_field
48
+ end
49
+ else
50
+ return nil
51
+ end
52
+
53
+ local needle = ARGV[1]
54
+ if not needle then return nil end
55
+
56
+ local max_results = tonumber(ARGV[2]) or 10
57
+
58
+ local cjson = cjson
59
+ local s_byte = string.byte
60
+ local s_sub = string.sub
61
+ local s_find = string.find
62
+ local m_min = math.min
63
+ local m_max = math.max
64
+ local m_floor = math.floor
65
+ local m_ceil = math.ceil
66
+ local t_sort = table.sort
67
+
68
+
69
+ -- mapping utf-8 leading-byte to byte offset
70
+ local byte_offsets = {
71
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82
+ 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
83
+ 3, 3, 3, 3, 3, 3, 3}
84
+
85
+ --[[
86
+ * Split utf-8 string into multi-byte chunks according to its leading-byte.
87
+ * @param {string}
88
+ * @return {Array.<string>} Array of multi-byte strings.
89
+ --]]
90
+ local function split_into_utf8_bytes(str)
91
+ local codes = {}
92
+ local i
93
+ local offset = 0
94
+
95
+ local mb_str, byte, offset_pos
96
+
97
+ for i = 1, #str do
98
+ offset_pos = i + offset
99
+ if offset_pos >= #str then
100
+ break
101
+ end
102
+
103
+ byte = byte_offsets[s_byte(str, offset_pos, offset_pos)] or 0
104
+
105
+ mb_str = s_sub(str, offset_pos, offset_pos + byte)
106
+ codes[#codes + 1] = mb_str
107
+ offset = offset + byte
108
+ end
109
+ return codes
110
+ end
111
+
112
+ --[[
113
+ * Check if haystack includes any character in needle.
114
+ * @param {string}
115
+ * @param {Array.<string>}
116
+ * @return {boolean} true if haystack includes utf_needle
117
+ --]]
118
+ local function haystack_includes_needle_char(haystack, utf_needle)
119
+ for i = 1, #utf_needle do
120
+ if s_find(haystack, utf_needle[i]) then return true end
121
+ end
122
+ return false
123
+ end
124
+
125
+ local cache = {}
126
+
127
+ --[[
128
+ * Calculate match score using levenshtein distance.
129
+ * @param {Array.<string>} haystack
130
+ * @param {Array.<string>} needle
131
+ * @param {boolean} if true, stop calculating
132
+ when the result might be lower than lowest_score
133
+ * @param {number|nil} lowest_score
134
+ * @return {number|nil} match score(0..1)
135
+ --]]
136
+ local function levenshtein_score(str, needle, should_cutoff, lowest_score)
137
+ local length, length_needle, code, result, should_break
138
+ local distance, distance_needle, index, index_needle, cutoff_distance
139
+ local longer_length = m_max(#str, #needle)
140
+
141
+ if should_cutoff and lowest_score then
142
+ cutoff_distance = m_ceil((1 - lowest_score) * longer_length) + 1
143
+ end
144
+
145
+ length = #str
146
+ length_needle = #needle
147
+ for index = 1, length do
148
+ cache[index] = index + 1
149
+ end
150
+
151
+ for index_needle = 1, length_needle do
152
+ code = needle[index_needle]
153
+ result = index_needle - 1
154
+ distance = index_needle - 1
155
+
156
+ for index = 1, length do
157
+ distance_needle = (code == str[index]) and distance or distance + 1
158
+ distance = cache[index]
159
+ result = (distance > result) and
160
+ ((distance_needle > result) and result + 1 or distance_needle)
161
+ or
162
+ ((distance_needle > distance) and distance + 1 or distance_needle)
163
+ cache[index] = result
164
+
165
+ if cutoff_distance and result > cutoff_distance then
166
+ return nil
167
+ end
168
+ end
169
+ end
170
+ return 1 - (result / longer_length)
171
+ end
172
+
173
+ local scores = {}
174
+ local utf_needle = split_into_utf8_bytes(needle)
175
+ local lowest_score, utf_word, longer_length, score
176
+ local should_cutoff = false
177
+
178
+ -- main loop.
179
+ for i = 1, #haystacks do
180
+ if haystack_includes_needle_char(haystacks[i], utf_needle) then
181
+ utf_word = split_into_utf8_bytes(haystacks[i])
182
+
183
+ if #utf_word >= #utf_needle then
184
+ longer_length = #utf_word
185
+
186
+ if s_find(haystacks[i], needle) then
187
+ score = #utf_needle * (1 / longer_length)
188
+ else
189
+ score = levenshtein_score(utf_word, utf_needle, should_cutoff, lowest_score)
190
+ end
191
+
192
+ if score and not(score == 0) then
193
+ if #scores > max_results then
194
+ should_cutoff = true
195
+ t_sort(
196
+ scores,
197
+ function(a,b)
198
+ return a.score > b.score
199
+ end
200
+ )
201
+ lowest_score = scores[max_results].score
202
+ if score > lowest_score then
203
+ scores[#scores + 1] = {score = score, idx = i}
204
+ end
205
+ else
206
+ scores[#scores + 1] = {score = score, idx = i}
207
+ end
208
+ end
209
+ end
210
+ end
211
+
212
+ end
213
+
214
+ t_sort(
215
+ scores,
216
+ function(a,b)
217
+ return a.score > b.score
218
+ end
219
+ )
220
+
221
+ local result = {}
222
+ local output_length = m_min(#scores, max_results)
223
+
224
+ for i = 1, output_length do
225
+ local item = {}
226
+ item['match'] = scores[i].score
227
+ item['haystack'] = haystacks[scores[i].idx]
228
+ if key_type == 'zset' then
229
+ item['score'] = opt_data[scores[i].idx]
230
+ elseif key_type == 'hash' then
231
+ item['field'] = opt_data[scores[i].idx]
232
+ end
233
+ result[#result + 1] = item
234
+ end
235
+
236
+ local text = cjson.encode(result)
237
+
238
+ return(text)
239
+
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'redis/asm/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "redis-asm"
8
+ spec.version = Redis::Asm::VERSION
9
+ spec.authors = ["Masato Yamaguchi"]
10
+ spec.email = ["karateka2000@gmail.com"]
11
+ spec.summary = "Fast fuzzy string search on Redis using Lua. UTF-8 Ready."
12
+ spec.description = "Fast ASM(Approximate String Matching) by calucuating edit distance within the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides you to search multi-byte characters correctly, because it recognizes lead-byte of UTF-8 strings."
13
+ spec.homepage = "http://github.com/krt/redis-asm"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_dependency 'redis', '~> 3.0'
25
+ spec.add_dependency 'digest/sha1'
26
+ end
@@ -0,0 +1,129 @@
1
+ require 'spec_helper'
2
+ require 'json'
3
+ require 'yaml'
4
+
5
+ REDIS_PORT = ENV['REDIS_PORT'] || 6379
6
+ REDIS_HOST = ENV['REDIS_HOST'] || 'localhost'
7
+
8
+ redis = Redis.new(:host => REDIS_HOST, :port => REDIS_PORT)
9
+ asm = Redis::Asm.new(redis)
10
+
11
+ SKEY = 'redis:asm:testing:set'
12
+ ZKEY = 'redis:asm:testing:zset'
13
+ HKEY = 'redis:asm:testing:hash'
14
+ LKEY = 'redis:asm:testing:list'
15
+
16
+ describe Redis::Asm do
17
+
18
+ before :all do
19
+ test_data = File.read(File.expand_path('../test_data.txt', __FILE__))
20
+ .split("\n")
21
+ i = 0
22
+ zdata = test_data.map{|item| i += 1; [i, item]}
23
+ i = 0
24
+ hdata = test_data.inject({}){|ha, k| i += 1; ha.merge(i=>k)}
25
+
26
+ redis.pipelined do |r|
27
+ redis.sadd SKEY, test_data
28
+ redis.zadd ZKEY, zdata
29
+ redis.mapped_hmset HKEY, hdata
30
+ test_data.each{|item| redis.rpush LKEY,item}
31
+ end
32
+ end
33
+
34
+ after :all do
35
+ redis.del ZKEY
36
+ redis.del HKEY
37
+ redis.del SKEY
38
+ redis.del LKEY
39
+ end
40
+
41
+ it 'has a version number' do
42
+ expect(Redis::Asm::VERSION).not_to be nil
43
+ end
44
+
45
+ it 'responds to search method' do
46
+ expect(asm.respond_to?(:search)).to eq(true)
47
+ end
48
+
49
+ context 'execute fuzzy searching on Redis SET or LIST' do
50
+ let(:result_set) {JSON.parse(asm.search(SKEY, 'example'))}
51
+ let(:result_list) {JSON.parse(asm.search(LKEY, 'example'))}
52
+
53
+ it "result has exactly matched string" do
54
+ expect(result_set.first).to eq({"haystack"=>"example", "match"=>1})
55
+ expect(result_list.first).to eq({"haystack"=>"example", "match"=>1})
56
+ end
57
+
58
+ it "result has fuzzy matched string" do
59
+ expect(result_set[1]).to eq({"haystack"=>"samples", "match"=>0.5})
60
+ expect(result_list[1]).to eq({"haystack"=>"samples", "match"=>0.5})
61
+ end
62
+
63
+ it "result size must be default limit(10)" do
64
+ expect(result_set.size).to eq 10
65
+ expect(result_list.size).to eq 10
66
+ end
67
+ end
68
+
69
+ context 'execute fuzzy searching on Redis SET or LIST using multi-byte string' do
70
+ let(:result_set) {JSON.parse(asm.search(SKEY, '東京都'))}
71
+ let(:result_list) {JSON.parse(asm.search(LKEY, '東京都'))}
72
+
73
+ it "result has exactly matched string" do
74
+ expect(result_set.first).to eq({"haystack"=>"東京都", "match"=>1})
75
+ expect(result_list.first).to eq({"haystack"=>"東京都", "match"=>1})
76
+ end
77
+
78
+ it "result has fuzzy matched string" do
79
+ expect(result_set[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333})
80
+ expect(result_list[1]).to eq({"haystack"=>"京都府", "match"=>0.33333333333333})
81
+ end
82
+
83
+ it "result size must be matched item count" do
84
+ expect(result_set.size).to eq 2
85
+ expect(result_list.size).to eq 2
86
+ end
87
+ end
88
+
89
+ context 'execute fuzzy searching on Redis ZSET or HASH' do
90
+ let(:result_zset) {JSON.parse(asm.search(ZKEY, 'example'))}
91
+ let(:result_hash) {JSON.parse(asm.search(HKEY, 'example'))}
92
+
93
+ it "result has exactly matched string, zset has 'score' and hash has 'field'" do
94
+ expect(result_zset.first).to eq({"haystack"=>"example", "score"=>"114", "match"=>1})
95
+ expect(result_hash.first).to eq({"haystack"=>"example", "field"=>"114", "match"=>1})
96
+ end
97
+
98
+ it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do
99
+ expect(result_zset[1]).to eq({"haystack"=>"samples", "score"=>"119", "match"=>0.5})
100
+ expect(result_hash[1]).to eq({"haystack"=>"samples", "field"=>"119", "match"=>0.5})
101
+ end
102
+
103
+ it "result size must be default limit(10)" do
104
+ expect(result_zset.size).to eq 10
105
+ expect(result_hash.size).to eq 10
106
+ end
107
+ end
108
+
109
+ context 'execute fuzzy searching on Redis ZSET or HASH using multi-byte string' do
110
+ let(:result_zset) {JSON.parse(asm.search(ZKEY, '東京都'))}
111
+ let(:result_hash) {JSON.parse(asm.search(HKEY, '東京都'))}
112
+
113
+ it "result has exactly matched string, zset has 'score' and hash has 'field'" do
114
+ expect(result_zset.first).to eq({"haystack"=>"東京都", "score"=>"126", "match"=>1})
115
+ expect(result_hash.first).to eq({"haystack"=>"東京都", "field"=>"126", "match"=>1})
116
+ end
117
+
118
+ it "result has fuzzy matched string, zset has 'score' and hash has 'field'" do
119
+ expect(result_zset[1]).to eq({"haystack"=>"京都府", "score"=>"125", "match"=>0.33333333333333})
120
+ expect(result_hash[1]).to eq({"haystack"=>"京都府", "field"=>"125", "match"=>0.33333333333333})
121
+ end
122
+
123
+ it "result size must be matched item count" do
124
+ expect(result_zset.size).to eq 2
125
+ expect(result_hash.size).to eq 2
126
+ end
127
+ end
128
+
129
+ end
@@ -0,0 +1,131 @@
1
+ 1ab2cd34ef5g6
2
+ a
3
+ aa
4
+ aah
5
+ aahed
6
+ aahing
7
+ aahs
8
+ aal
9
+ aalii
10
+ aaliis
11
+ aals
12
+ aardvark
13
+ aardvarks
14
+ aardwolf
15
+ aardwolves
16
+ aargh
17
+ aarrgh
18
+ aarrghh
19
+ aarti
20
+ aartis
21
+ aas
22
+ aasvogel
23
+ aasvogels
24
+ ab
25
+ aba
26
+ abac
27
+ abaca
28
+ abacas
29
+ abaci
30
+ aback
31
+ abacs
32
+ abacterial
33
+ abactinal
34
+ abactinally
35
+ abactor
36
+ abactors
37
+ abacus
38
+ abacuses
39
+ abaft
40
+ abaka
41
+ abakas
42
+ abalone
43
+ abalones
44
+ abamp
45
+ abampere
46
+ abamperes
47
+ abamps
48
+ aband
49
+ abanded
50
+ abanding
51
+ abandon
52
+ abandoned
53
+ abandonedly
54
+ abandonee
55
+ abandonees
56
+ abandoner
57
+ abandoners
58
+ abandoning
59
+ abandonment
60
+ abandonments
61
+ abandons
62
+ abandonware
63
+ abandonwares
64
+ abands
65
+ abapical
66
+ abas
67
+ abase
68
+ abased
69
+ abasedly
70
+ abasement
71
+ abasements
72
+ abaser
73
+ abasers
74
+ abases
75
+ abash
76
+ abashed
77
+ abashedly
78
+ abashes
79
+ abashing
80
+ abashless
81
+ abashment
82
+ abashments
83
+ abasia
84
+ abasias
85
+ abasing
86
+ abask
87
+ abatable
88
+ abate
89
+ abated
90
+ abatement
91
+ abatements
92
+ abater
93
+ abaters
94
+ abates
95
+ abating
96
+ abatis
97
+ abatises
98
+ abator
99
+ abators
100
+ abattis
101
+ abattises
102
+ abattoir
103
+ abattoirs
104
+ abc
105
+ abcdefg
106
+ ac
107
+ axc
108
+ b
109
+ bc
110
+ cat
111
+ cow
112
+ difference
113
+ distance
114
+ example
115
+ frankenstein
116
+ javawasneat
117
+ kitten
118
+ levenshtein
119
+ samples
120
+ scalaisgreat
121
+ sitting
122
+ sturgeon
123
+ urgently
124
+ xabxcdxxefxgx
125
+ 京都府
126
+ 東京都
127
+ 弊社佐藤
128
+ 弊社と致しましては
129
+ 貴社におかれましては
130
+ 因為我是中國人所以我會說中文
131
+ 因為我是英國人所以我會說英文
@@ -0,0 +1,3 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'redis'
3
+ require 'redis/asm'
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redis-asm
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Masato Yamaguchi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: redis
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: digest/sha1
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Fast ASM(Approximate String Matching) by calucuating edit distance within
84
+ the collecitons such as ZSET, HASH, LIST, SET on Redis using Lua script. It provides
85
+ you to search multi-byte characters correctly, because it recognizes lead-byte of
86
+ UTF-8 strings.
87
+ email:
88
+ - karateka2000@gmail.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - ".rspec"
95
+ - ".travis.yml"
96
+ - Gemfile
97
+ - LICENSE.txt
98
+ - README.md
99
+ - Rakefile
100
+ - lib/redis-asm.rb
101
+ - lib/redis/asm.rb
102
+ - lib/redis/asm/version.rb
103
+ - lib/redis_asm.lua
104
+ - redis-asm.gemspec
105
+ - spec/redis/asm/asm_spec.rb
106
+ - spec/redis/asm/test_data.txt
107
+ - spec/spec_helper.rb
108
+ homepage: http://github.com/krt/redis-asm
109
+ licenses:
110
+ - MIT
111
+ metadata: {}
112
+ post_install_message:
113
+ rdoc_options: []
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ requirements:
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
121
+ required_rubygems_version: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ requirements: []
127
+ rubyforge_project:
128
+ rubygems_version: 2.2.2
129
+ signing_key:
130
+ specification_version: 4
131
+ summary: Fast fuzzy string search on Redis using Lua. UTF-8 Ready.
132
+ test_files:
133
+ - spec/redis/asm/asm_spec.rb
134
+ - spec/redis/asm/test_data.txt
135
+ - spec/spec_helper.rb