redis-search 0.5.2 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -9,6 +9,7 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
9
9
  * Segment words search and prefix match search
10
10
  * Support ActiveRecord and Mongoid
11
11
  * Sort results by one field
12
+ * Homophone search, pinyin search
12
13
 
13
14
  ## Requirements
14
15
 
@@ -19,8 +20,9 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
19
20
  in Rails application Gemfile
20
21
 
21
22
  gem 'redis','>= 2.1.1'
22
- gem "rmmseg-cpp-huacnlee", "0.2.8"
23
- gem 'redis-search', '0.5'
23
+ gem 'chinese_pinyin', '0.3.0'
24
+ gem 'rmmseg-cpp-huacnlee', '0.2.8'
25
+ gem 'redis-search', '0.6'
24
26
 
25
27
  install bundlers
26
28
 
@@ -37,6 +39,7 @@ create file in: config/initializers/redis_search.rb
37
39
  Redis::Search.configure do |config|
38
40
  config.redis = redis
39
41
  config.complete_max_length = 100
42
+ config.pinyin_match = true
40
43
  end
41
44
 
42
45
  ## Usage
@@ -106,4 +109,4 @@ see [Rdoc.info redis-search](http://rubydoc.info/gems/redis-search)
106
109
 
107
110
  ## Demo
108
111
 
109
- You can try the search feature in [`zheye.org`](http://zheye.org)
112
+ You can try the search feature in [`zheye.org`](http://zheye.org) | [`shu.im`](http://shu.im)
@@ -12,18 +12,24 @@ class Redis
12
12
  class Config
13
13
  # Redis
14
14
  attr_accessor :redis
15
- # Debug toggle
15
+ # Debug toggle, default false
16
16
  attr_accessor :debug
17
17
  # config for max length of content with Redis::Search.complete method,default 100
18
18
  # Please change this with your real data length, short is fast
19
19
  # For example: You use complete search for your User model name field, and the "name" as max length in 15 chars, then you can set here to 15
20
20
  # warring! The long content will can't be found, if the config length less than real content.
21
21
  attr_accessor :complete_max_length
22
+ # Pinyin search/index (true|false) - default = false
23
+ # If set this is true, the indexer will convert Chinese to Pinyin, and index them
24
+ # When you search "de" will -> 得|的|德...
25
+ # When you search "得" will -> "de" -> 得|的|德...
26
+ attr_accessor :pinyin_match
22
27
 
23
28
  def initialize
24
29
  self.debug = false
25
30
  self.redis = nil
26
31
  self.complete_max_length = 100
32
+ self.pinyin_match = false
27
33
  end
28
34
  end
29
35
  end
@@ -1,34 +1,13 @@
1
1
  # coding: utf-8
2
2
  require "rmmseg"
3
+ require 'chinese_pinyin'
4
+
3
5
  class Redis
4
6
  module Search
7
+ # use rmmseg to split words
5
8
  def self.split(text)
6
- algor = RMMSeg::Algorithm.new(text)
7
- words = []
8
- loop do
9
- tok = algor.next_token
10
- break if tok.nil?
11
- words << tok.text
12
- end
13
- words
14
- end
15
-
16
- def self.warn(msg)
17
- puts "[Redis::Search][warn]: #{msg}"
18
- end
19
-
20
- # 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
21
- def self.mk_sets_key(type, key)
22
- "#{type}:#{key.downcase}"
23
- end
24
-
25
- def self.mk_score_key(type, id)
26
- "#{type}:_score_:#{id}"
27
- end
28
-
29
- def self.mk_complete_key(type)
30
- "Compl#{type}"
31
- end
9
+ _split(text)
10
+ end
32
11
 
33
12
  # Use for short title search, this method is search by chars, for example Tag, User, Category ...
34
13
  #
@@ -104,39 +83,98 @@ class Redis
104
83
  # h3. usage:
105
84
  # * Redis::Search.query("Tag","Ruby vs Python")
106
85
  def self.query(type, text,options = {})
86
+ tm = Time.now
107
87
  result = []
108
88
  return result if text.strip.blank?
109
89
 
110
- words = Search.split(text)
111
90
  limit = options[:limit] || 10
112
91
  sort_field = options[:sort_field] || "id"
92
+
93
+ words = Search.split(text)
113
94
  words = words.collect { |w| Search.mk_sets_key(type,w) }
114
- word_score = words.collect { |w| "#{w}:*" }
95
+
115
96
  return result if words.blank?
97
+
116
98
  temp_store_key = "tmpinterstore:#{words.join("+")}"
99
+
117
100
  if words.length > 1
118
101
  if !Redis::Search.config.redis.exists(temp_store_key)
119
102
  # 将多个词语组合对比,得到交集,并存入临时区域
120
103
  Redis::Search.config.redis.sinterstore(temp_store_key,*words)
121
104
  # 将临时搜索设为1天后自动清除
122
105
  Redis::Search.config.redis.expire(temp_store_key,86400)
106
+
107
+ # 拼音搜索
108
+ if Search.config.pinyin_match
109
+ pinyin_words = Search.split_pinyin(text)
110
+ pinyin_words = pinyin_words.collect { |w| Search.mk_sets_key(type,w) }
111
+ temp_sunion_key = "tmpsunionstore:#{words.join("+")}"
112
+ if Search.config.pinyin_match
113
+ temp_pinyin_store_key = "tmpinterstore:#{pinyin_words.join("+")}"
114
+ end
115
+ # 找出拼音的
116
+ Redis::Search.config.redis.sinterstore(temp_pinyin_store_key,*pinyin_words)
117
+ # 合并中文和拼音的搜索结果
118
+ Redis::Search.config.redis.sunionstore(temp_sunion_key,*[temp_store_key,temp_pinyin_store_key])
119
+ # 将临时搜索设为1天后自动清除
120
+ Redis::Search.config.redis.expire(temp_pinyin_store_key,86400)
121
+ Redis::Search.config.redis.expire(temp_sunion_key,86400)
122
+ temp_store_key = temp_sunion_key
123
+ end
123
124
  end
124
- # 根据需要的数量取出 ids
125
- ids = Redis::Search.config.redis.sort(temp_store_key,
126
- :limit => [0,limit],
127
- :by => Search.mk_score_key(type,"*"),
128
- :order => "desc")
129
125
  else
130
- # 根据需要的数量取出 ids
131
- ids = Redis::Search.config.redis.sort(words.first,
132
- :limit => [0,limit],
133
- :by => Search.mk_score_key(type,"*"),
134
- :order => "desc")
126
+ temp_store_key = words.first
135
127
  end
136
- hmget(type,ids, :sort_field => sort_field)
128
+
129
+ # 根据需要的数量取出 ids
130
+ ids = Redis::Search.config.redis.sort(temp_store_key,
131
+ :limit => [0,limit],
132
+ :by => Search.mk_score_key(type,"*"),
133
+ :order => "desc")
134
+ result = hmget(type,ids, :sort_field => sort_field)
135
+ Search.info("{#{type} : \"#{text}\"} | Time spend: #{Time.now - tm}s")
136
+ result
137
137
  end
138
138
 
139
+ protected
140
+ def self.split_pinyin(text)
141
+ # Pinyin search split as pinyin again
142
+ _split(Pinyin.t(text))
143
+ end
144
+
139
145
  private
146
+ def self._split(text)
147
+ algor = RMMSeg::Algorithm.new(text)
148
+ words = []
149
+ loop do
150
+ tok = algor.next_token
151
+ break if tok.nil?
152
+ words << tok.text
153
+ end
154
+ words
155
+ end
156
+
157
+ def self.warn(msg)
158
+ ::Rails.logger.warn("\e[33m[Redis::Search] #{msg}\e[0m")
159
+ end
160
+
161
+ def self.info(msg)
162
+ ::Rails.logger.debug("\e[32m[Redis::Search] #{msg}\e[0m")
163
+ end
164
+
165
+ # 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
166
+ def self.mk_sets_key(type, key)
167
+ "#{type}:#{key.downcase}"
168
+ end
169
+
170
+ def self.mk_score_key(type, id)
171
+ "#{type}:_score_:#{id}"
172
+ end
173
+
174
+ def self.mk_complete_key(type)
175
+ "Compl#{type}"
176
+ end
177
+
140
178
  def self.hmget(type, ids, options = {})
141
179
  result = []
142
180
  sort_field = options[:sort_field] || "id"
@@ -19,11 +19,13 @@ class Redis
19
19
  # 将原始数据存入 hashes
20
20
  res = Redis::Search.config.redis.hset(self.type, self.id, data.to_json)
21
21
  # 保存 sets 索引,以分词的单词为key,用于后面搜索,里面存储 ids
22
- words = Search.split(self.title)
22
+ words = Search::Index.split_words_for_index(self.title)
23
23
  return if words.blank?
24
24
  words.each do |word|
25
25
  key = Search.mk_sets_key(self.type,word)
26
+ # word index for item id
26
27
  Redis::Search.config.redis.sadd(key, self.id)
28
+ # score for search sort
27
29
  Redis::Search.config.redis.set(Search.mk_score_key(self.type,self.id),self.score)
28
30
  end
29
31
 
@@ -32,28 +34,48 @@ class Redis
32
34
  save_prefix_index
33
35
  end
34
36
  end
35
-
36
- def save_prefix_index
37
- word = self.title.downcase
38
- Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
39
- key = Search.mk_complete_key(self.type)
40
- (1..(word.length)).each do |l|
41
- prefix = word[0...l]
42
- Redis::Search.config.redis.zadd(key, 0, prefix)
43
- end
44
- Redis::Search.config.redis.zadd(key, 0, word + "*")
45
- end
46
37
 
47
38
  def self.remove(options = {})
48
39
  type = options[:type]
49
40
  Redis::Search.config.redis.hdel(type,options[:id])
50
- words = Search.split(options[:title])
41
+ words = Search::Index.split_words_for_index(options[:title])
51
42
  words.each do |word|
52
43
  key = Search.mk_sets_key(type,word)
53
44
  Redis::Search.config.redis.srem(key, options[:id])
54
45
  Redis::Search.config.redis.del(Search.mk_score_key(type,options[:id]))
55
46
  end
56
47
  end
48
+
49
+ private
50
+ def self.split_words_for_index(title)
51
+ words = Search.split(title)
52
+ if Search.config.pinyin_match
53
+ # covert Chinese to pinyin to as an index
54
+ words += Search.split_pinyin(title)
55
+ end
56
+ words.uniq
57
+ end
58
+
59
+ def save_prefix_index
60
+ words = []
61
+ words << self.title.downcase
62
+ Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
63
+ if Search.config.pinyin_match
64
+ pinyin = Pinyin.t(self.title.downcase,'')
65
+ words << pinyin
66
+ Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,pinyin), self.id)
67
+ end
68
+
69
+ words.each do |word|
70
+
71
+ key = Search.mk_complete_key(self.type)
72
+ (1..(word.length)).each do |l|
73
+ prefix = word[0...l]
74
+ Redis::Search.config.redis.zadd(key, 0, prefix)
75
+ end
76
+ Redis::Search.config.redis.zadd(key, 0, word + "*")
77
+ end
78
+ end
57
79
  end
58
80
  end
59
81
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redis-search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,33 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-25 00:00:00.000000000Z
12
+ date: 2011-09-30 00:00:00.000000000Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: chinese_pinyin
16
+ requirement: &2157459160 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.3.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2157459160
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: rmmseg-cpp-huacnlee
16
- requirement: &2151875920 !ruby/object:Gem::Requirement
27
+ requirement: &2157458680 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
- - - ~>
30
+ - - ! '>='
20
31
  - !ruby/object:Gem::Version
21
32
  version: 0.2.8
22
33
  type: :runtime
23
34
  prerelease: false
24
- version_requirements: *2151875920
35
+ version_requirements: *2157458680
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: redis
27
- requirement: &2151873980 !ruby/object:Gem::Requirement
38
+ requirement: &2157458200 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ! '>='
@@ -32,7 +43,7 @@ dependencies:
32
43
  version: 2.1.1
33
44
  type: :runtime
34
45
  prerelease: false
35
- version_requirements: *2151873980
46
+ version_requirements: *2157458200
36
47
  description: High performance real-time search (Support Chinese), index in Redis for
37
48
  Rails application.
38
49
  email: