redis-search 0.5.2 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +6 -3
- data/lib/redis/search/config.rb +7 -1
- data/lib/redis/search/finder.rb +77 -39
- data/lib/redis/search/index.rb +35 -13
- metadata +18 -7
data/README.markdown
CHANGED
@@ -9,6 +9,7 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
|
|
9
9
|
* Segment words search and prefix match search
|
10
10
|
* Support ActiveRecord and Mongoid
|
11
11
|
* Sort results by one field
|
12
|
+
* Homophone search, pinyin search
|
12
13
|
|
13
14
|
## Requirements
|
14
15
|
|
@@ -19,8 +20,9 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
|
|
19
20
|
in Rails application Gemfile
|
20
21
|
|
21
22
|
gem 'redis','>= 2.1.1'
|
22
|
-
gem
|
23
|
-
gem '
|
23
|
+
gem 'chinese_pinyin', '0.3.0'
|
24
|
+
gem 'rmmseg-cpp-huacnlee', '0.2.8'
|
25
|
+
gem 'redis-search', '0.6'
|
24
26
|
|
25
27
|
install bundlers
|
26
28
|
|
@@ -37,6 +39,7 @@ create file in: config/initializers/redis_search.rb
|
|
37
39
|
Redis::Search.configure do |config|
|
38
40
|
config.redis = redis
|
39
41
|
config.complete_max_length = 100
|
42
|
+
config.pinyin_match = true
|
40
43
|
end
|
41
44
|
|
42
45
|
## Usage
|
@@ -106,4 +109,4 @@ see [Rdoc.info redis-search](http://rubydoc.info/gems/redis-search)
|
|
106
109
|
|
107
110
|
## Demo
|
108
111
|
|
109
|
-
You can try the search feature in [`zheye.org`](http://zheye.org)
|
112
|
+
You can try the search feature in [`zheye.org`](http://zheye.org) | [`shu.im`](http://shu.im)
|
data/lib/redis/search/config.rb
CHANGED
@@ -12,18 +12,24 @@ class Redis
|
|
12
12
|
class Config
|
13
13
|
# Redis
|
14
14
|
attr_accessor :redis
|
15
|
-
# Debug toggle
|
15
|
+
# Debug toggle, default false
|
16
16
|
attr_accessor :debug
|
17
17
|
# config for max length of content with Redis::Search.complete method,default 100
|
18
18
|
# Please change this with your real data length, short is fast
|
19
19
|
# For example: You use complete search for your User model name field, and the "name" as max length in 15 chars, then you can set here to 15
|
20
20
|
# warring! The long content will can't be found, if the config length less than real content.
|
21
21
|
attr_accessor :complete_max_length
|
22
|
+
# Pinyin search/index (true|false) - default = false
|
23
|
+
# If set this is true, the indexer will convert Chinese to Pinyin, and index them
|
24
|
+
# When you search "de" will -> 得|的|德...
|
25
|
+
# When you search "得" will -> "de" -> 得|的|德...
|
26
|
+
attr_accessor :pinyin_match
|
22
27
|
|
23
28
|
def initialize
|
24
29
|
self.debug = false
|
25
30
|
self.redis = nil
|
26
31
|
self.complete_max_length = 100
|
32
|
+
self.pinyin_match = false
|
27
33
|
end
|
28
34
|
end
|
29
35
|
end
|
data/lib/redis/search/finder.rb
CHANGED
@@ -1,34 +1,13 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require "rmmseg"
|
3
|
+
require 'chinese_pinyin'
|
4
|
+
|
3
5
|
class Redis
|
4
6
|
module Search
|
7
|
+
# use rmmseg to split words
|
5
8
|
def self.split(text)
|
6
|
-
|
7
|
-
|
8
|
-
loop do
|
9
|
-
tok = algor.next_token
|
10
|
-
break if tok.nil?
|
11
|
-
words << tok.text
|
12
|
-
end
|
13
|
-
words
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.warn(msg)
|
17
|
-
puts "[Redis::Search][warn]: #{msg}"
|
18
|
-
end
|
19
|
-
|
20
|
-
# 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
|
21
|
-
def self.mk_sets_key(type, key)
|
22
|
-
"#{type}:#{key.downcase}"
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.mk_score_key(type, id)
|
26
|
-
"#{type}:_score_:#{id}"
|
27
|
-
end
|
28
|
-
|
29
|
-
def self.mk_complete_key(type)
|
30
|
-
"Compl#{type}"
|
31
|
-
end
|
9
|
+
_split(text)
|
10
|
+
end
|
32
11
|
|
33
12
|
# Use for short title search, this method is search by chars, for example Tag, User, Category ...
|
34
13
|
#
|
@@ -104,39 +83,98 @@ class Redis
|
|
104
83
|
# h3. usage:
|
105
84
|
# * Redis::Search.query("Tag","Ruby vs Python")
|
106
85
|
def self.query(type, text,options = {})
|
86
|
+
tm = Time.now
|
107
87
|
result = []
|
108
88
|
return result if text.strip.blank?
|
109
89
|
|
110
|
-
words = Search.split(text)
|
111
90
|
limit = options[:limit] || 10
|
112
91
|
sort_field = options[:sort_field] || "id"
|
92
|
+
|
93
|
+
words = Search.split(text)
|
113
94
|
words = words.collect { |w| Search.mk_sets_key(type,w) }
|
114
|
-
|
95
|
+
|
115
96
|
return result if words.blank?
|
97
|
+
|
116
98
|
temp_store_key = "tmpinterstore:#{words.join("+")}"
|
99
|
+
|
117
100
|
if words.length > 1
|
118
101
|
if !Redis::Search.config.redis.exists(temp_store_key)
|
119
102
|
# 将多个词语组合对比,得到交集,并存入临时区域
|
120
103
|
Redis::Search.config.redis.sinterstore(temp_store_key,*words)
|
121
104
|
# 将临时搜索设为1天后自动清除
|
122
105
|
Redis::Search.config.redis.expire(temp_store_key,86400)
|
106
|
+
|
107
|
+
# 拼音搜索
|
108
|
+
if Search.config.pinyin_match
|
109
|
+
pinyin_words = Search.split_pinyin(text)
|
110
|
+
pinyin_words = pinyin_words.collect { |w| Search.mk_sets_key(type,w) }
|
111
|
+
temp_sunion_key = "tmpsunionstore:#{words.join("+")}"
|
112
|
+
if Search.config.pinyin_match
|
113
|
+
temp_pinyin_store_key = "tmpinterstore:#{pinyin_words.join("+")}"
|
114
|
+
end
|
115
|
+
# 找出拼音的
|
116
|
+
Redis::Search.config.redis.sinterstore(temp_pinyin_store_key,*pinyin_words)
|
117
|
+
# 合并中文和拼音的搜索结果
|
118
|
+
Redis::Search.config.redis.sunionstore(temp_sunion_key,*[temp_store_key,temp_pinyin_store_key])
|
119
|
+
# 将临时搜索设为1天后自动清除
|
120
|
+
Redis::Search.config.redis.expire(temp_pinyin_store_key,86400)
|
121
|
+
Redis::Search.config.redis.expire(temp_sunion_key,86400)
|
122
|
+
temp_store_key = temp_sunion_key
|
123
|
+
end
|
123
124
|
end
|
124
|
-
# 根据需要的数量取出 ids
|
125
|
-
ids = Redis::Search.config.redis.sort(temp_store_key,
|
126
|
-
:limit => [0,limit],
|
127
|
-
:by => Search.mk_score_key(type,"*"),
|
128
|
-
:order => "desc")
|
129
125
|
else
|
130
|
-
|
131
|
-
ids = Redis::Search.config.redis.sort(words.first,
|
132
|
-
:limit => [0,limit],
|
133
|
-
:by => Search.mk_score_key(type,"*"),
|
134
|
-
:order => "desc")
|
126
|
+
temp_store_key = words.first
|
135
127
|
end
|
136
|
-
|
128
|
+
|
129
|
+
# 根据需要的数量取出 ids
|
130
|
+
ids = Redis::Search.config.redis.sort(temp_store_key,
|
131
|
+
:limit => [0,limit],
|
132
|
+
:by => Search.mk_score_key(type,"*"),
|
133
|
+
:order => "desc")
|
134
|
+
result = hmget(type,ids, :sort_field => sort_field)
|
135
|
+
Search.info("{#{type} : \"#{text}\"} | Time spend: #{Time.now - tm}s")
|
136
|
+
result
|
137
137
|
end
|
138
138
|
|
139
|
+
protected
|
140
|
+
def self.split_pinyin(text)
|
141
|
+
# Pinyin search split as pinyin again
|
142
|
+
_split(Pinyin.t(text))
|
143
|
+
end
|
144
|
+
|
139
145
|
private
|
146
|
+
def self._split(text)
|
147
|
+
algor = RMMSeg::Algorithm.new(text)
|
148
|
+
words = []
|
149
|
+
loop do
|
150
|
+
tok = algor.next_token
|
151
|
+
break if tok.nil?
|
152
|
+
words << tok.text
|
153
|
+
end
|
154
|
+
words
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.warn(msg)
|
158
|
+
::Rails.logger.warn("\e[33m[Redis::Search] #{msg}\e[0m")
|
159
|
+
end
|
160
|
+
|
161
|
+
def self.info(msg)
|
162
|
+
::Rails.logger.debug("\e[32m[Redis::Search] #{msg}\e[0m")
|
163
|
+
end
|
164
|
+
|
165
|
+
# 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
|
166
|
+
def self.mk_sets_key(type, key)
|
167
|
+
"#{type}:#{key.downcase}"
|
168
|
+
end
|
169
|
+
|
170
|
+
def self.mk_score_key(type, id)
|
171
|
+
"#{type}:_score_:#{id}"
|
172
|
+
end
|
173
|
+
|
174
|
+
def self.mk_complete_key(type)
|
175
|
+
"Compl#{type}"
|
176
|
+
end
|
177
|
+
|
140
178
|
def self.hmget(type, ids, options = {})
|
141
179
|
result = []
|
142
180
|
sort_field = options[:sort_field] || "id"
|
data/lib/redis/search/index.rb
CHANGED
@@ -19,11 +19,13 @@ class Redis
|
|
19
19
|
# 将原始数据存入 hashes
|
20
20
|
res = Redis::Search.config.redis.hset(self.type, self.id, data.to_json)
|
21
21
|
# 保存 sets 索引,以分词的单词为key,用于后面搜索,里面存储 ids
|
22
|
-
words = Search.
|
22
|
+
words = Search::Index.split_words_for_index(self.title)
|
23
23
|
return if words.blank?
|
24
24
|
words.each do |word|
|
25
25
|
key = Search.mk_sets_key(self.type,word)
|
26
|
+
# word index for item id
|
26
27
|
Redis::Search.config.redis.sadd(key, self.id)
|
28
|
+
# score for search sort
|
27
29
|
Redis::Search.config.redis.set(Search.mk_score_key(self.type,self.id),self.score)
|
28
30
|
end
|
29
31
|
|
@@ -32,28 +34,48 @@ class Redis
|
|
32
34
|
save_prefix_index
|
33
35
|
end
|
34
36
|
end
|
35
|
-
|
36
|
-
def save_prefix_index
|
37
|
-
word = self.title.downcase
|
38
|
-
Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
|
39
|
-
key = Search.mk_complete_key(self.type)
|
40
|
-
(1..(word.length)).each do |l|
|
41
|
-
prefix = word[0...l]
|
42
|
-
Redis::Search.config.redis.zadd(key, 0, prefix)
|
43
|
-
end
|
44
|
-
Redis::Search.config.redis.zadd(key, 0, word + "*")
|
45
|
-
end
|
46
37
|
|
47
38
|
def self.remove(options = {})
|
48
39
|
type = options[:type]
|
49
40
|
Redis::Search.config.redis.hdel(type,options[:id])
|
50
|
-
words = Search.
|
41
|
+
words = Search::Index.split_words_for_index(options[:title])
|
51
42
|
words.each do |word|
|
52
43
|
key = Search.mk_sets_key(type,word)
|
53
44
|
Redis::Search.config.redis.srem(key, options[:id])
|
54
45
|
Redis::Search.config.redis.del(Search.mk_score_key(type,options[:id]))
|
55
46
|
end
|
56
47
|
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def self.split_words_for_index(title)
|
51
|
+
words = Search.split(title)
|
52
|
+
if Search.config.pinyin_match
|
53
|
+
# covert Chinese to pinyin to as an index
|
54
|
+
words += Search.split_pinyin(title)
|
55
|
+
end
|
56
|
+
words.uniq
|
57
|
+
end
|
58
|
+
|
59
|
+
def save_prefix_index
|
60
|
+
words = []
|
61
|
+
words << self.title.downcase
|
62
|
+
Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
|
63
|
+
if Search.config.pinyin_match
|
64
|
+
pinyin = Pinyin.t(self.title.downcase,'')
|
65
|
+
words << pinyin
|
66
|
+
Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,pinyin), self.id)
|
67
|
+
end
|
68
|
+
|
69
|
+
words.each do |word|
|
70
|
+
|
71
|
+
key = Search.mk_complete_key(self.type)
|
72
|
+
(1..(word.length)).each do |l|
|
73
|
+
prefix = word[0...l]
|
74
|
+
Redis::Search.config.redis.zadd(key, 0, prefix)
|
75
|
+
end
|
76
|
+
Redis::Search.config.redis.zadd(key, 0, word + "*")
|
77
|
+
end
|
78
|
+
end
|
57
79
|
end
|
58
80
|
end
|
59
81
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis-search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,22 +9,33 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-30 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: chinese_pinyin
|
16
|
+
requirement: &2157459160 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.3.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2157459160
|
14
25
|
- !ruby/object:Gem::Dependency
|
15
26
|
name: rmmseg-cpp-huacnlee
|
16
|
-
requirement: &
|
27
|
+
requirement: &2157458680 !ruby/object:Gem::Requirement
|
17
28
|
none: false
|
18
29
|
requirements:
|
19
|
-
- -
|
30
|
+
- - ! '>='
|
20
31
|
- !ruby/object:Gem::Version
|
21
32
|
version: 0.2.8
|
22
33
|
type: :runtime
|
23
34
|
prerelease: false
|
24
|
-
version_requirements: *
|
35
|
+
version_requirements: *2157458680
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: redis
|
27
|
-
requirement: &
|
38
|
+
requirement: &2157458200 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,7 +43,7 @@ dependencies:
|
|
32
43
|
version: 2.1.1
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *2157458200
|
36
47
|
description: High performance real-time search (Support Chinese), index in Redis for
|
37
48
|
Rails application.
|
38
49
|
email:
|