redis-search 0.5.2 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +6 -3
- data/lib/redis/search/config.rb +7 -1
- data/lib/redis/search/finder.rb +77 -39
- data/lib/redis/search/index.rb +35 -13
- metadata +18 -7
data/README.markdown
CHANGED
@@ -9,6 +9,7 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
|
|
9
9
|
* Segment words search and prefix match search
|
10
10
|
* Support ActiveRecord and Mongoid
|
11
11
|
* Sort results by one field
|
12
|
+
* Homophone search, pinyin search
|
12
13
|
|
13
14
|
## Requirements
|
14
15
|
|
@@ -19,8 +20,9 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
|
|
19
20
|
in Rails application Gemfile
|
20
21
|
|
21
22
|
gem 'redis','>= 2.1.1'
|
22
|
-
gem
|
23
|
-
gem '
|
23
|
+
gem 'chinese_pinyin', '0.3.0'
|
24
|
+
gem 'rmmseg-cpp-huacnlee', '0.2.8'
|
25
|
+
gem 'redis-search', '0.6'
|
24
26
|
|
25
27
|
install bundlers
|
26
28
|
|
@@ -37,6 +39,7 @@ create file in: config/initializers/redis_search.rb
|
|
37
39
|
Redis::Search.configure do |config|
|
38
40
|
config.redis = redis
|
39
41
|
config.complete_max_length = 100
|
42
|
+
config.pinyin_match = true
|
40
43
|
end
|
41
44
|
|
42
45
|
## Usage
|
@@ -106,4 +109,4 @@ see [Rdoc.info redis-search](http://rubydoc.info/gems/redis-search)
|
|
106
109
|
|
107
110
|
## Demo
|
108
111
|
|
109
|
-
You can try the search feature in [`zheye.org`](http://zheye.org)
|
112
|
+
You can try the search feature in [`zheye.org`](http://zheye.org) | [`shu.im`](http://shu.im)
|
data/lib/redis/search/config.rb
CHANGED
@@ -12,18 +12,24 @@ class Redis
|
|
12
12
|
class Config
|
13
13
|
# Redis
|
14
14
|
attr_accessor :redis
|
15
|
-
# Debug toggle
|
15
|
+
# Debug toggle, default false
|
16
16
|
attr_accessor :debug
|
17
17
|
# config for max length of content with Redis::Search.complete method,default 100
|
18
18
|
# Please change this with your real data length, short is fast
|
19
19
|
# For example: You use complete search for your User model name field, and the "name" as max length in 15 chars, then you can set here to 15
|
20
20
|
# warring! The long content will can't be found, if the config length less than real content.
|
21
21
|
attr_accessor :complete_max_length
|
22
|
+
# Pinyin search/index (true|false) - default = false
|
23
|
+
# If set this is true, the indexer will convert Chinese to Pinyin, and index them
|
24
|
+
# When you search "de" will -> 得|的|德...
|
25
|
+
# When you search "得" will -> "de" -> 得|的|德...
|
26
|
+
attr_accessor :pinyin_match
|
22
27
|
|
23
28
|
def initialize
|
24
29
|
self.debug = false
|
25
30
|
self.redis = nil
|
26
31
|
self.complete_max_length = 100
|
32
|
+
self.pinyin_match = false
|
27
33
|
end
|
28
34
|
end
|
29
35
|
end
|
data/lib/redis/search/finder.rb
CHANGED
@@ -1,34 +1,13 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require "rmmseg"
|
3
|
+
require 'chinese_pinyin'
|
4
|
+
|
3
5
|
class Redis
|
4
6
|
module Search
|
7
|
+
# use rmmseg to split words
|
5
8
|
def self.split(text)
|
6
|
-
|
7
|
-
|
8
|
-
loop do
|
9
|
-
tok = algor.next_token
|
10
|
-
break if tok.nil?
|
11
|
-
words << tok.text
|
12
|
-
end
|
13
|
-
words
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.warn(msg)
|
17
|
-
puts "[Redis::Search][warn]: #{msg}"
|
18
|
-
end
|
19
|
-
|
20
|
-
# 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
|
21
|
-
def self.mk_sets_key(type, key)
|
22
|
-
"#{type}:#{key.downcase}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.mk_score_key(type, id)
|
26
|
-
"#{type}:_score_:#{id}"
|
27
|
-
end
|
28
|
-
|
29
|
-
def self.mk_complete_key(type)
|
30
|
-
"Compl#{type}"
|
31
|
-
end
|
9
|
+
_split(text)
|
10
|
+
end
|
32
11
|
|
33
12
|
# Use for short title search, this method is search by chars, for example Tag, User, Category ...
|
34
13
|
#
|
@@ -104,39 +83,98 @@ class Redis
|
|
104
83
|
# h3. usage:
|
105
84
|
# * Redis::Search.query("Tag","Ruby vs Python")
|
106
85
|
def self.query(type, text,options = {})
|
86
|
+
tm = Time.now
|
107
87
|
result = []
|
108
88
|
return result if text.strip.blank?
|
109
89
|
|
110
|
-
words = Search.split(text)
|
111
90
|
limit = options[:limit] || 10
|
112
91
|
sort_field = options[:sort_field] || "id"
|
92
|
+
|
93
|
+
words = Search.split(text)
|
113
94
|
words = words.collect { |w| Search.mk_sets_key(type,w) }
|
114
|
-
|
95
|
+
|
115
96
|
return result if words.blank?
|
97
|
+
|
116
98
|
temp_store_key = "tmpinterstore:#{words.join("+")}"
|
99
|
+
|
117
100
|
if words.length > 1
|
118
101
|
if !Redis::Search.config.redis.exists(temp_store_key)
|
119
102
|
# 将多个词语组合对比,得到交集,并存入临时区域
|
120
103
|
Redis::Search.config.redis.sinterstore(temp_store_key,*words)
|
121
104
|
# 将临时搜索设为1天后自动清除
|
122
105
|
Redis::Search.config.redis.expire(temp_store_key,86400)
|
106
|
+
|
107
|
+
# 拼音搜索
|
108
|
+
if Search.config.pinyin_match
|
109
|
+
pinyin_words = Search.split_pinyin(text)
|
110
|
+
pinyin_words = pinyin_words.collect { |w| Search.mk_sets_key(type,w) }
|
111
|
+
temp_sunion_key = "tmpsunionstore:#{words.join("+")}"
|
112
|
+
if Search.config.pinyin_match
|
113
|
+
temp_pinyin_store_key = "tmpinterstore:#{pinyin_words.join("+")}"
|
114
|
+
end
|
115
|
+
# 找出拼音的
|
116
|
+
Redis::Search.config.redis.sinterstore(temp_pinyin_store_key,*pinyin_words)
|
117
|
+
# 合并中文和拼音的搜索结果
|
118
|
+
Redis::Search.config.redis.sunionstore(temp_sunion_key,*[temp_store_key,temp_pinyin_store_key])
|
119
|
+
# 将临时搜索设为1天后自动清除
|
120
|
+
Redis::Search.config.redis.expire(temp_pinyin_store_key,86400)
|
121
|
+
Redis::Search.config.redis.expire(temp_sunion_key,86400)
|
122
|
+
temp_store_key = temp_sunion_key
|
123
|
+
end
|
123
124
|
end
|
124
|
-
# 根据需要的数量取出 ids
|
125
|
-
ids = Redis::Search.config.redis.sort(temp_store_key,
|
126
|
-
:limit => [0,limit],
|
127
|
-
:by => Search.mk_score_key(type,"*"),
|
128
|
-
:order => "desc")
|
129
125
|
else
|
130
|
-
|
131
|
-
ids = Redis::Search.config.redis.sort(words.first,
|
132
|
-
:limit => [0,limit],
|
133
|
-
:by => Search.mk_score_key(type,"*"),
|
134
|
-
:order => "desc")
|
126
|
+
temp_store_key = words.first
|
135
127
|
end
|
136
|
-
|
128
|
+
|
129
|
+
# 根据需要的数量取出 ids
|
130
|
+
ids = Redis::Search.config.redis.sort(temp_store_key,
|
131
|
+
:limit => [0,limit],
|
132
|
+
:by => Search.mk_score_key(type,"*"),
|
133
|
+
:order => "desc")
|
134
|
+
result = hmget(type,ids, :sort_field => sort_field)
|
135
|
+
Search.info("{#{type} : \"#{text}\"} | Time spend: #{Time.now - tm}s")
|
136
|
+
result
|
137
137
|
end
|
138
138
|
|
139
|
+
protected
|
140
|
+
def self.split_pinyin(text)
|
141
|
+
# Pinyin search split as pinyin again
|
142
|
+
_split(Pinyin.t(text))
|
143
|
+
end
|
144
|
+
|
139
145
|
private
|
146
|
+
def self._split(text)
|
147
|
+
algor = RMMSeg::Algorithm.new(text)
|
148
|
+
words = []
|
149
|
+
loop do
|
150
|
+
tok = algor.next_token
|
151
|
+
break if tok.nil?
|
152
|
+
words << tok.text
|
153
|
+
end
|
154
|
+
words
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.warn(msg)
|
158
|
+
::Rails.logger.warn("\e[33m[Redis::Search] #{msg}\e[0m")
|
159
|
+
end
|
160
|
+
|
161
|
+
def self.info(msg)
|
162
|
+
::Rails.logger.debug("\e[32m[Redis::Search] #{msg}\e[0m")
|
163
|
+
end
|
164
|
+
|
165
|
+
# 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
|
166
|
+
def self.mk_sets_key(type, key)
|
167
|
+
"#{type}:#{key.downcase}"
|
168
|
+
end
|
169
|
+
|
170
|
+
def self.mk_score_key(type, id)
|
171
|
+
"#{type}:_score_:#{id}"
|
172
|
+
end
|
173
|
+
|
174
|
+
def self.mk_complete_key(type)
|
175
|
+
"Compl#{type}"
|
176
|
+
end
|
177
|
+
|
140
178
|
def self.hmget(type, ids, options = {})
|
141
179
|
result = []
|
142
180
|
sort_field = options[:sort_field] || "id"
|
data/lib/redis/search/index.rb
CHANGED
@@ -19,11 +19,13 @@ class Redis
|
|
19
19
|
# 将原始数据存入 hashes
|
20
20
|
res = Redis::Search.config.redis.hset(self.type, self.id, data.to_json)
|
21
21
|
# 保存 sets 索引,以分词的单词为key,用于后面搜索,里面存储 ids
|
22
|
-
words = Search.
|
22
|
+
words = Search::Index.split_words_for_index(self.title)
|
23
23
|
return if words.blank?
|
24
24
|
words.each do |word|
|
25
25
|
key = Search.mk_sets_key(self.type,word)
|
26
|
+
# word index for item id
|
26
27
|
Redis::Search.config.redis.sadd(key, self.id)
|
28
|
+
# score for search sort
|
27
29
|
Redis::Search.config.redis.set(Search.mk_score_key(self.type,self.id),self.score)
|
28
30
|
end
|
29
31
|
|
@@ -32,28 +34,48 @@ class Redis
|
|
32
34
|
save_prefix_index
|
33
35
|
end
|
34
36
|
end
|
35
|
-
|
36
|
-
def save_prefix_index
|
37
|
-
word = self.title.downcase
|
38
|
-
Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
|
39
|
-
key = Search.mk_complete_key(self.type)
|
40
|
-
(1..(word.length)).each do |l|
|
41
|
-
prefix = word[0...l]
|
42
|
-
Redis::Search.config.redis.zadd(key, 0, prefix)
|
43
|
-
end
|
44
|
-
Redis::Search.config.redis.zadd(key, 0, word + "*")
|
45
|
-
end
|
46
37
|
|
47
38
|
def self.remove(options = {})
|
48
39
|
type = options[:type]
|
49
40
|
Redis::Search.config.redis.hdel(type,options[:id])
|
50
|
-
words = Search.
|
41
|
+
words = Search::Index.split_words_for_index(options[:title])
|
51
42
|
words.each do |word|
|
52
43
|
key = Search.mk_sets_key(type,word)
|
53
44
|
Redis::Search.config.redis.srem(key, options[:id])
|
54
45
|
Redis::Search.config.redis.del(Search.mk_score_key(type,options[:id]))
|
55
46
|
end
|
56
47
|
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def self.split_words_for_index(title)
|
51
|
+
words = Search.split(title)
|
52
|
+
if Search.config.pinyin_match
|
53
|
+
# covert Chinese to pinyin to as an index
|
54
|
+
words += Search.split_pinyin(title)
|
55
|
+
end
|
56
|
+
words.uniq
|
57
|
+
end
|
58
|
+
|
59
|
+
def save_prefix_index
|
60
|
+
words = []
|
61
|
+
words << self.title.downcase
|
62
|
+
Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
|
63
|
+
if Search.config.pinyin_match
|
64
|
+
pinyin = Pinyin.t(self.title.downcase,'')
|
65
|
+
words << pinyin
|
66
|
+
Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,pinyin), self.id)
|
67
|
+
end
|
68
|
+
|
69
|
+
words.each do |word|
|
70
|
+
|
71
|
+
key = Search.mk_complete_key(self.type)
|
72
|
+
(1..(word.length)).each do |l|
|
73
|
+
prefix = word[0...l]
|
74
|
+
Redis::Search.config.redis.zadd(key, 0, prefix)
|
75
|
+
end
|
76
|
+
Redis::Search.config.redis.zadd(key, 0, word + "*")
|
77
|
+
end
|
78
|
+
end
|
57
79
|
end
|
58
80
|
end
|
59
81
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,22 +9,33 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-30 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: chinese_pinyin
|
16
|
+
requirement: &2157459160 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.3.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2157459160
|
14
25
|
- !ruby/object:Gem::Dependency
|
15
26
|
name: rmmseg-cpp-huacnlee
|
16
|
-
requirement: &
|
27
|
+
requirement: &2157458680 !ruby/object:Gem::Requirement
|
17
28
|
none: false
|
18
29
|
requirements:
|
19
|
-
- -
|
30
|
+
- - ! '>='
|
20
31
|
- !ruby/object:Gem::Version
|
21
32
|
version: 0.2.8
|
22
33
|
type: :runtime
|
23
34
|
prerelease: false
|
24
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157458680
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: redis
|
27
|
-
requirement: &
|
38
|
+
requirement: &2157458200 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,7 +43,7 @@ dependencies:
|
|
32
43
|
version: 2.1.1
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157458200
|
36
47
|
description: High performance real-time search (Support Chinese), index in Redis for
|
37
48
|
Rails application.
|
38
49
|
email:
|