redis-search 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown CHANGED
@@ -9,6 +9,7 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
9
9
  * Segment words search and prefix match search
10
10
  * Support ActiveRecord and Mongoid
11
11
  * Sort results by one field
12
+ * Homophone search, pinyin search
12
13
 
13
14
  ## Requirements
14
15
 
@@ -19,8 +20,9 @@ High performance real-time search (Support Chinese), index in Redis for Rails ap
19
20
  in Rails application Gemfile
20
21
 
21
22
  gem 'redis','>= 2.1.1'
22
- gem "rmmseg-cpp-huacnlee", "0.2.8"
23
- gem 'redis-search', '0.5'
23
+ gem 'chinese_pinyin', '0.3.0'
24
+ gem 'rmmseg-cpp-huacnlee', '0.2.8'
25
+ gem 'redis-search', '0.6'
24
26
 
25
27
  install bundlers
26
28
 
@@ -37,6 +39,7 @@ create file in: config/initializers/redis_search.rb
37
39
  Redis::Search.configure do |config|
38
40
  config.redis = redis
39
41
  config.complete_max_length = 100
42
+ config.pinyin_match = true
40
43
  end
41
44
 
42
45
  ## Usage
@@ -106,4 +109,4 @@ see [Rdoc.info redis-search](http://rubydoc.info/gems/redis-search)
106
109
 
107
110
  ## Demo
108
111
 
109
- You can try the search feature in [`zheye.org`](http://zheye.org)
112
+ You can try the search feature in [`zheye.org`](http://zheye.org) | [`shu.im`](http://shu.im)
@@ -12,18 +12,24 @@ class Redis
12
12
  class Config
13
13
  # Redis
14
14
  attr_accessor :redis
15
- # Debug toggle
15
+ # Debug toggle, default false
16
16
  attr_accessor :debug
17
17
  # config for max length of content with Redis::Search.complete method,default 100
18
18
  # Please change this with your real data length, short is fast
19
19
  # For example: You use complete search for your User model name field, and the "name" as max length in 15 chars, then you can set here to 15
20
20
  # warring! The long content will can't be found, if the config length less than real content.
21
21
  attr_accessor :complete_max_length
22
+ # Pinyin search/index (true|false) - default = false
23
+ # If set this is true, the indexer will convert Chinese to Pinyin, and index them
24
+ # When you search "de" will -> 得|的|德...
25
+ # When you search "得" will -> "de" -> 得|的|德...
26
+ attr_accessor :pinyin_match
22
27
 
23
28
  def initialize
24
29
  self.debug = false
25
30
  self.redis = nil
26
31
  self.complete_max_length = 100
32
+ self.pinyin_match = false
27
33
  end
28
34
  end
29
35
  end
@@ -1,34 +1,13 @@
1
1
  # coding: utf-8
2
2
  require "rmmseg"
3
+ require 'chinese_pinyin'
4
+
3
5
  class Redis
4
6
  module Search
7
+ # use rmmseg to split words
5
8
  def self.split(text)
6
- algor = RMMSeg::Algorithm.new(text)
7
- words = []
8
- loop do
9
- tok = algor.next_token
10
- break if tok.nil?
11
- words << tok.text
12
- end
13
- words
14
- end
15
-
16
- def self.warn(msg)
17
- puts "[Redis::Search][warn]: #{msg}"
18
- end
19
-
20
- # 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
21
- def self.mk_sets_key(type, key)
22
- "#{type}:#{key.downcase}"
23
- end
24
-
25
- def self.mk_score_key(type, id)
26
- "#{type}:_score_:#{id}"
27
- end
28
-
29
- def self.mk_complete_key(type)
30
- "Compl#{type}"
31
- end
9
+ _split(text)
10
+ end
32
11
 
33
12
  # Use for short title search, this method is search by chars, for example Tag, User, Category ...
34
13
  #
@@ -104,39 +83,98 @@ class Redis
104
83
  # h3. usage:
105
84
  # * Redis::Search.query("Tag","Ruby vs Python")
106
85
  def self.query(type, text,options = {})
86
+ tm = Time.now
107
87
  result = []
108
88
  return result if text.strip.blank?
109
89
 
110
- words = Search.split(text)
111
90
  limit = options[:limit] || 10
112
91
  sort_field = options[:sort_field] || "id"
92
+
93
+ words = Search.split(text)
113
94
  words = words.collect { |w| Search.mk_sets_key(type,w) }
114
- word_score = words.collect { |w| "#{w}:*" }
95
+
115
96
  return result if words.blank?
97
+
116
98
  temp_store_key = "tmpinterstore:#{words.join("+")}"
99
+
117
100
  if words.length > 1
118
101
  if !Redis::Search.config.redis.exists(temp_store_key)
119
102
  # 将多个词语组合对比,得到交集,并存入临时区域
120
103
  Redis::Search.config.redis.sinterstore(temp_store_key,*words)
121
104
  # 将临时搜索设为1天后自动清除
122
105
  Redis::Search.config.redis.expire(temp_store_key,86400)
106
+
107
+ # 拼音搜索
108
+ if Search.config.pinyin_match
109
+ pinyin_words = Search.split_pinyin(text)
110
+ pinyin_words = pinyin_words.collect { |w| Search.mk_sets_key(type,w) }
111
+ temp_sunion_key = "tmpsunionstore:#{words.join("+")}"
112
+ if Search.config.pinyin_match
113
+ temp_pinyin_store_key = "tmpinterstore:#{pinyin_words.join("+")}"
114
+ end
115
+ # 找出拼音的
116
+ Redis::Search.config.redis.sinterstore(temp_pinyin_store_key,*pinyin_words)
117
+ # 合并中文和拼音的搜索结果
118
+ Redis::Search.config.redis.sunionstore(temp_sunion_key,*[temp_store_key,temp_pinyin_store_key])
119
+ # 将临时搜索设为1天后自动清除
120
+ Redis::Search.config.redis.expire(temp_pinyin_store_key,86400)
121
+ Redis::Search.config.redis.expire(temp_sunion_key,86400)
122
+ temp_store_key = temp_sunion_key
123
+ end
123
124
  end
124
- # 根据需要的数量取出 ids
125
- ids = Redis::Search.config.redis.sort(temp_store_key,
126
- :limit => [0,limit],
127
- :by => Search.mk_score_key(type,"*"),
128
- :order => "desc")
129
125
  else
130
- # 根据需要的数量取出 ids
131
- ids = Redis::Search.config.redis.sort(words.first,
132
- :limit => [0,limit],
133
- :by => Search.mk_score_key(type,"*"),
134
- :order => "desc")
126
+ temp_store_key = words.first
135
127
  end
136
- hmget(type,ids, :sort_field => sort_field)
128
+
129
+ # 根据需要的数量取出 ids
130
+ ids = Redis::Search.config.redis.sort(temp_store_key,
131
+ :limit => [0,limit],
132
+ :by => Search.mk_score_key(type,"*"),
133
+ :order => "desc")
134
+ result = hmget(type,ids, :sort_field => sort_field)
135
+ Search.info("{#{type} : \"#{text}\"} | Time spend: #{Time.now - tm}s")
136
+ result
137
137
  end
138
138
 
139
+ protected
140
+ def self.split_pinyin(text)
141
+ # Pinyin search split as pinyin again
142
+ _split(Pinyin.t(text))
143
+ end
144
+
139
145
  private
146
+ def self._split(text)
147
+ algor = RMMSeg::Algorithm.new(text)
148
+ words = []
149
+ loop do
150
+ tok = algor.next_token
151
+ break if tok.nil?
152
+ words << tok.text
153
+ end
154
+ words
155
+ end
156
+
157
+ def self.warn(msg)
158
+ ::Rails.logger.warn("\e[33m[Redis::Search] #{msg}\e[0m")
159
+ end
160
+
161
+ def self.info(msg)
162
+ ::Rails.logger.debug("\e[32m[Redis::Search] #{msg}\e[0m")
163
+ end
164
+
165
+ # 生成 uuid,用于作为 hashes 的 field, sets 关键词的值
166
+ def self.mk_sets_key(type, key)
167
+ "#{type}:#{key.downcase}"
168
+ end
169
+
170
+ def self.mk_score_key(type, id)
171
+ "#{type}:_score_:#{id}"
172
+ end
173
+
174
+ def self.mk_complete_key(type)
175
+ "Compl#{type}"
176
+ end
177
+
140
178
  def self.hmget(type, ids, options = {})
141
179
  result = []
142
180
  sort_field = options[:sort_field] || "id"
@@ -19,11 +19,13 @@ class Redis
19
19
  # 将原始数据存入 hashes
20
20
  res = Redis::Search.config.redis.hset(self.type, self.id, data.to_json)
21
21
  # 保存 sets 索引,以分词的单词为key,用于后面搜索,里面存储 ids
22
- words = Search.split(self.title)
22
+ words = Search::Index.split_words_for_index(self.title)
23
23
  return if words.blank?
24
24
  words.each do |word|
25
25
  key = Search.mk_sets_key(self.type,word)
26
+ # word index for item id
26
27
  Redis::Search.config.redis.sadd(key, self.id)
28
+ # score for search sort
27
29
  Redis::Search.config.redis.set(Search.mk_score_key(self.type,self.id),self.score)
28
30
  end
29
31
 
@@ -32,28 +34,48 @@ class Redis
32
34
  save_prefix_index
33
35
  end
34
36
  end
35
-
36
- def save_prefix_index
37
- word = self.title.downcase
38
- Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
39
- key = Search.mk_complete_key(self.type)
40
- (1..(word.length)).each do |l|
41
- prefix = word[0...l]
42
- Redis::Search.config.redis.zadd(key, 0, prefix)
43
- end
44
- Redis::Search.config.redis.zadd(key, 0, word + "*")
45
- end
46
37
 
47
38
  def self.remove(options = {})
48
39
  type = options[:type]
49
40
  Redis::Search.config.redis.hdel(type,options[:id])
50
- words = Search.split(options[:title])
41
+ words = Search::Index.split_words_for_index(options[:title])
51
42
  words.each do |word|
52
43
  key = Search.mk_sets_key(type,word)
53
44
  Redis::Search.config.redis.srem(key, options[:id])
54
45
  Redis::Search.config.redis.del(Search.mk_score_key(type,options[:id]))
55
46
  end
56
47
  end
48
+
49
+ private
50
+ def self.split_words_for_index(title)
51
+ words = Search.split(title)
52
+ if Search.config.pinyin_match
53
+ # covert Chinese to pinyin to as an index
54
+ words += Search.split_pinyin(title)
55
+ end
56
+ words.uniq
57
+ end
58
+
59
+ def save_prefix_index
60
+ words = []
61
+ words << self.title.downcase
62
+ Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,self.title), self.id)
63
+ if Search.config.pinyin_match
64
+ pinyin = Pinyin.t(self.title.downcase,'')
65
+ words << pinyin
66
+ Redis::Search.config.redis.sadd(Search.mk_sets_key(self.type,pinyin), self.id)
67
+ end
68
+
69
+ words.each do |word|
70
+
71
+ key = Search.mk_complete_key(self.type)
72
+ (1..(word.length)).each do |l|
73
+ prefix = word[0...l]
74
+ Redis::Search.config.redis.zadd(key, 0, prefix)
75
+ end
76
+ Redis::Search.config.redis.zadd(key, 0, word + "*")
77
+ end
78
+ end
57
79
  end
58
80
  end
59
81
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redis-search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,33 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-25 00:00:00.000000000Z
12
+ date: 2011-09-30 00:00:00.000000000Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: chinese_pinyin
16
+ requirement: &2157459160 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.3.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2157459160
14
25
  - !ruby/object:Gem::Dependency
15
26
  name: rmmseg-cpp-huacnlee
16
- requirement: &2151875920 !ruby/object:Gem::Requirement
27
+ requirement: &2157458680 !ruby/object:Gem::Requirement
17
28
  none: false
18
29
  requirements:
19
- - - ~>
30
+ - - ! '>='
20
31
  - !ruby/object:Gem::Version
21
32
  version: 0.2.8
22
33
  type: :runtime
23
34
  prerelease: false
24
- version_requirements: *2151875920
35
+ version_requirements: *2157458680
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: redis
27
- requirement: &2151873980 !ruby/object:Gem::Requirement
38
+ requirement: &2157458200 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ! '>='
@@ -32,7 +43,7 @@ dependencies:
32
43
  version: 2.1.1
33
44
  type: :runtime
34
45
  prerelease: false
35
- version_requirements: *2151873980
46
+ version_requirements: *2157458200
36
47
  description: High performance real-time search (Support Chinese), index in Redis for
37
48
  Rails application.
38
49
  email: