mongoid-haystack 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +249 -0
- data/lib/mongoid-haystack/index.rb +10 -11
- data/lib/mongoid-haystack/search.rb +97 -6
- data/lib/mongoid-haystack/stemming.rb +63 -60
- data/lib/mongoid-haystack/token.rb +8 -21
- data/lib/mongoid-haystack/util.rb +17 -3
- data/lib/mongoid-haystack.rb +13 -4
- data/mongoid-haystack.gemspec +7 -2
- data/test/helper.rb +13 -0
- data/test/mongoid-haystack_test.rb +156 -12
- metadata +34 -1
data/README.md
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
NAME
|
2
|
+
----
|
3
|
+
|
4
|
+
mongoid-haystack.rb
|
5
|
+
|
6
|
+
DESCRIPTION
|
7
|
+
-----------
|
8
|
+
|
9
|
+
mongoid-haystack provides a zero-config, POLS, pure mongo, fulltext search
|
10
|
+
solution for your mongoid models.
|
11
|
+
|
12
|
+
SYNOPSIS
|
13
|
+
--------
|
14
|
+
|
15
|
+
````ruby
|
16
|
+
|
17
|
+
# simple usage is simple
|
18
|
+
#
|
19
|
+
class Article
|
20
|
+
include Mongoid::Document
|
21
|
+
include Mongoid::Haystack
|
22
|
+
|
23
|
+
field(:content, :type => String)
|
24
|
+
end
|
25
|
+
|
26
|
+
Article.create!(:content => 'teh cats')
|
27
|
+
|
28
|
+
results = Article.search('cat')
|
29
|
+
|
30
|
+
article = results.first.model
|
31
|
+
|
32
|
+
|
33
|
+
# haystack stems the search terms and does score based sorting all using a
|
34
|
+
# fast b-tree
|
35
|
+
#
|
36
|
+
a = Article.create!(:content => 'cats are awesome')
|
37
|
+
b = Article.create!(:content => 'dogs eat cats')
|
38
|
+
c = Article.create!(:content => 'dogs dogs dogs')
|
39
|
+
|
40
|
+
results = Article.search('dogs cats').models
|
41
|
+
results == [b, a, c] #=> true
|
42
|
+
|
43
|
+
results = Article.search('awesome').models
|
44
|
+
results == [a] #=> true
|
45
|
+
|
46
|
+
|
47
|
+
# cross models searching is supported out of the box, and models can
|
48
|
+
# customise how they are indexed:
|
49
|
+
#
|
50
|
+
# - a global score lets some models appear hight in the global results
|
51
|
+
# - keywords count more than fulltext
|
52
|
+
#
|
53
|
+
class Article
|
54
|
+
include Mongoid::Document
|
55
|
+
include Mongoid::Haystack
|
56
|
+
|
57
|
+
field(:title, :type => String)
|
58
|
+
field(:content, :type => String)
|
59
|
+
|
60
|
+
def to_haystack
|
61
|
+
{ :score => 11, :keywords => title, :fulltext => content }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Comment
|
66
|
+
include Mongoid::Document
|
67
|
+
include Mongoid::Haystack
|
68
|
+
|
69
|
+
field(:content, :type => String)
|
70
|
+
|
71
|
+
def to_haystack
|
72
|
+
{ :score => -11, :fulltext => content }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
a1 = Article.create!(:title => 'hot pants', :content => 'teh b 52s rock')
|
77
|
+
a2 = Article.create!(:title => 'boring title', :content => 'but hot content that rocks')
|
78
|
+
|
79
|
+
c = Comment.create!(:content => 'those guys rock')
|
80
|
+
|
81
|
+
results = Mongoid::Haystack.search('rock')
|
82
|
+
results.count #=> 3
|
83
|
+
|
84
|
+
models = results.models
|
85
|
+
models == [a1, a2, c] #=> true. articles first beause we generally score them higher
|
86
|
+
|
87
|
+
results = Mongoid::Haystack.search('hot')
|
88
|
+
models = results.models
|
89
|
+
models == [a1, a2] #=> true. because keywords score highter than general fulltext
|
90
|
+
|
91
|
+
|
92
|
+
# by default searching returns Mongoid::Haystack::Index objects. you'll want
|
93
|
+
# to expand these results to the models they reference in your views, but
|
94
|
+
# avoid doing an N+1 query. to do this simply call #models on the result set
|
95
|
+
# and the models will be eager loaded using only as many queries as their are
|
96
|
+
# model types in your result set
|
97
|
+
#
|
98
|
+
|
99
|
+
@results = Mongoid::Haystack.search('needle').page(params[:page]).per(10)
|
100
|
+
@models = @results.models
|
101
|
+
|
102
|
+
|
103
|
+
# you can decorate your search items with arbirtrary meta data and filter
|
104
|
+
# searches by it later. this too uses a b-tree index.
|
105
|
+
#
|
106
|
+
class Article
|
107
|
+
include Mongoid::Document
|
108
|
+
include Mongoid::Haystack
|
109
|
+
|
110
|
+
belongs_to :author, :class_name => '::User'
|
111
|
+
|
112
|
+
field(:title, :type => String)
|
113
|
+
field(:content, :type => String)
|
114
|
+
|
115
|
+
def to_haystack
|
116
|
+
{
|
117
|
+
:score => author.popularity,
|
118
|
+
:keywords => title,
|
119
|
+
:fulltext => content,
|
120
|
+
:facets => {:author_id => author.id}
|
121
|
+
}
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
a =
|
126
|
+
author.articles.create!(
|
127
|
+
:title => 'iggy and keith',
|
128
|
+
:content => 'seen the needles and the damage done...'
|
129
|
+
)
|
130
|
+
|
131
|
+
author_articles = Article.search('needle', :facets => {:author_id => author.id})
|
132
|
+
|
133
|
+
|
134
|
+
````
|
135
|
+
|
136
|
+
DESCRIPTION
|
137
|
+
-----------
|
138
|
+
|
139
|
+
there two main pathways to understand in the code. shit going into the
|
140
|
+
index, and shit coming out.
|
141
|
+
|
142
|
+
shit going in entails:
|
143
|
+
|
144
|
+
- stem and stopword the search terms.
|
145
|
+
- create or update a new token for each
|
146
|
+
- create an index item reference all the tokens with precomputed scores
|
147
|
+
|
148
|
+
for example the terms 'dog dogs cat' might result in these tokens
|
149
|
+
|
150
|
+
````javascript
|
151
|
+
|
152
|
+
[
|
153
|
+
{
|
154
|
+
'_id' : '0x1',
|
155
|
+
'value' : 'dog',
|
156
|
+
'count' : 2
|
157
|
+
},
|
158
|
+
|
159
|
+
|
160
|
+
{
|
161
|
+
'_id' : '0x2',
|
162
|
+
'value' : 'cat',
|
163
|
+
'count' : 1
|
164
|
+
}
|
165
|
+
]
|
166
|
+
|
167
|
+
````
|
168
|
+
|
169
|
+
and this index item
|
170
|
+
|
171
|
+
|
172
|
+
````javascript
|
173
|
+
|
174
|
+
{
|
175
|
+
'_id' : '50c11759a04745961e000001'
|
176
|
+
|
177
|
+
'model_type' : 'Article',
|
178
|
+
'model_id' : '50c11775a04745461f000001'
|
179
|
+
|
180
|
+
'tokens' : ['0x1', '0x2'],
|
181
|
+
|
182
|
+
'score' : 10,
|
183
|
+
|
184
|
+
'keyword_scores' : {
|
185
|
+
'0x1' : 2,
|
186
|
+
'0x2' : 1
|
187
|
+
},
|
188
|
+
|
189
|
+
'fulltext_scores' : {
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
|
194
|
+
````
|
195
|
+
|
196
|
+
being built
|
197
|
+
|
198
|
+
in addition, some other information is tracked such and the total number of
|
199
|
+
search tokens every discovered in the corpus
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
a few things to notice:
|
204
|
+
|
205
|
+
- the tokens are counted and auto-id'd using hex notation and a sequence
|
206
|
+
generator. the reason for this is so that their ids are legit hash keys
|
207
|
+
in the keyword and fulltext score hashes.
|
208
|
+
|
209
|
+
- the data structure above allows both filtering for index items that have
|
210
|
+
certain tokens, but also ordering them based on global, keyword, and
|
211
|
+
fulltext score without resorting to map-reduce: a b-tree index can be
|
212
|
+
used.
|
213
|
+
|
214
|
+
- all tokens have their text/stem stored exactly once. aka: we do not store
|
215
|
+
'hugewords' all over the place but store it once and count occurances of
|
216
|
+
it to keep the total index much smaller
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
pulling objects back out in a search involved these logical steps:
|
222
|
+
|
223
|
+
- filter the search terms through the same tokenizer as when indexed
|
224
|
+
|
225
|
+
- lookup tokens for each of the tokens in the search string
|
226
|
+
|
227
|
+
- using the count for each token, plus the global token count that has been
|
228
|
+
tracked we can decide to order the results by relatively rare words first
|
229
|
+
and, all else being equal (same rarity bin: 0.10, 0.20, 0.30, etc.), the
|
230
|
+
order in which the user typed the words
|
231
|
+
|
232
|
+
- this approach is applies and is valid whether we are doing a union (or) or
|
233
|
+
intersection (all) search and regardless of whether facets are included in
|
234
|
+
the search. facets, however, never affect the order unless done so by the
|
235
|
+
user manually. eg
|
236
|
+
|
237
|
+
````ruby
|
238
|
+
|
239
|
+
results =
|
240
|
+
Mongoid::Haystack.
|
241
|
+
search('foo bar', :facets => {:hotness.gte => 11}).
|
242
|
+
order_by('facets.hotness' => :desc)
|
243
|
+
|
244
|
+
````
|
245
|
+
|
246
|
+
|
247
|
+
SEE ALSO
|
248
|
+
--------
|
249
|
+
tests: <a href='https://github.com/ahoward/mongoid-haystack/blob/master/test/mongoid-haystack_test.rb'>./test/mongoid-haystack_test.rb<a/>
|
@@ -73,12 +73,13 @@ module Mongoid
|
|
73
73
|
|
74
74
|
keyword_scores = Hash.new{|h,k| h[k] = 0}
|
75
75
|
fulltext_scores = Hash.new{|h,k| h[k] = 0}
|
76
|
+
token_ids = []
|
76
77
|
|
77
78
|
Token.values_for(keywords).each do |value|
|
78
79
|
token = Token.add(value)
|
79
80
|
id = token.id
|
80
81
|
|
81
|
-
|
82
|
+
token_ids.push(id)
|
82
83
|
keyword_scores[id] += 1
|
83
84
|
end
|
84
85
|
|
@@ -86,7 +87,7 @@ module Mongoid
|
|
86
87
|
token = Token.add(value)
|
87
88
|
id = token.id
|
88
89
|
|
89
|
-
|
90
|
+
token_ids.push(id)
|
90
91
|
fulltext_scores[id] += 1
|
91
92
|
end
|
92
93
|
|
@@ -96,7 +97,7 @@ module Mongoid
|
|
96
97
|
index.score = score if score
|
97
98
|
index.facets = facets if facets
|
98
99
|
|
99
|
-
index.
|
100
|
+
index.token_ids = token_ids
|
100
101
|
|
101
102
|
index.save!
|
102
103
|
end
|
@@ -105,16 +106,12 @@ module Mongoid
|
|
105
106
|
def remove(*args)
|
106
107
|
models_for(*args) do |model|
|
107
108
|
index = where(:model_type => model.class.name, :model_id => model.id).first
|
108
|
-
|
109
|
-
if index
|
110
|
-
subtract(index)
|
111
|
-
index.destroy
|
112
|
-
end
|
109
|
+
index.destroy if index
|
113
110
|
end
|
114
111
|
end
|
115
112
|
|
116
113
|
def subtract(index)
|
117
|
-
tokens =
|
114
|
+
tokens = index.tokens
|
118
115
|
|
119
116
|
n = 0
|
120
117
|
|
@@ -145,9 +142,11 @@ module Mongoid
|
|
145
142
|
end
|
146
143
|
end
|
147
144
|
|
145
|
+
before_destroy{|index| Index.subtract(index)}
|
146
|
+
|
148
147
|
belongs_to(:model, :polymorphic => true)
|
149
148
|
|
150
|
-
|
149
|
+
has_and_belongs_to_many(:tokens, :class_name => '::Mongoid::Haystack::Token', :inverse_of => nil)
|
151
150
|
field(:score, :type => Integer, :default => 0)
|
152
151
|
field(:keyword_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
153
152
|
field(:fulltext_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
@@ -156,7 +155,7 @@ module Mongoid
|
|
156
155
|
index({:model_type => 1})
|
157
156
|
index({:model_id => 1})
|
158
157
|
|
159
|
-
index({:
|
158
|
+
index({:token_ids => 1})
|
160
159
|
index({:score => 1})
|
161
160
|
index({:keyword_scores => 1})
|
162
161
|
index({:fulltext_scores => 1})
|
@@ -5,15 +5,34 @@ module Mongoid
|
|
5
5
|
options = Map.options_for!(args)
|
6
6
|
search = args.join(' ')
|
7
7
|
|
8
|
+
conditions = {}
|
9
|
+
order = []
|
10
|
+
|
11
|
+
op = :token_ids.in
|
12
|
+
|
13
|
+
#
|
14
|
+
case
|
15
|
+
when options[:all]
|
16
|
+
op = :token_ids.all
|
17
|
+
search += Coerce.string(options[:all])
|
18
|
+
|
19
|
+
when options[:any]
|
20
|
+
op = :token_ids.in
|
21
|
+
search += Coerce.string(options[:any])
|
22
|
+
|
23
|
+
when options[:in]
|
24
|
+
op = :token_ids.in
|
25
|
+
search += Coerce.string(options[:in])
|
26
|
+
end
|
27
|
+
|
8
28
|
#
|
9
29
|
tokens = search_tokens_for(search)
|
30
|
+
token_ids = tokens.map{|token| token.id}
|
10
31
|
|
11
32
|
#
|
12
|
-
conditions =
|
13
|
-
conditions[:tokens.in] = tokens.map{|token| token.id}
|
33
|
+
conditions[op] = token_ids
|
14
34
|
|
15
35
|
#
|
16
|
-
order = []
|
17
36
|
order.push(["score", :desc])
|
18
37
|
|
19
38
|
tokens.each do |token|
|
@@ -26,7 +45,7 @@ module Mongoid
|
|
26
45
|
|
27
46
|
#
|
28
47
|
if options[:facets]
|
29
|
-
conditions[:facets] = options[:facets]
|
48
|
+
conditions[:facets] = {'$elemMatch' => options[:facets]}
|
30
49
|
end
|
31
50
|
|
32
51
|
#
|
@@ -36,7 +55,9 @@ module Mongoid
|
|
36
55
|
end
|
37
56
|
|
38
57
|
#
|
39
|
-
Index.where(conditions).order_by(order)
|
58
|
+
Index.where(conditions).order_by(order).tap do |results|
|
59
|
+
results.extend(Denormalize)
|
60
|
+
end
|
40
61
|
end
|
41
62
|
|
42
63
|
def search_tokens_for(search)
|
@@ -62,7 +83,7 @@ module Mongoid
|
|
62
83
|
options[:types] = Array(options[:types]).flatten.compact
|
63
84
|
options[:types].push(self)
|
64
85
|
args.push(options)
|
65
|
-
Haystack.search(*args, &block)
|
86
|
+
results = Haystack.search(*args, &block)
|
66
87
|
end
|
67
88
|
|
68
89
|
after_save do |doc|
|
@@ -80,6 +101,8 @@ module Mongoid
|
|
80
101
|
nil
|
81
102
|
end
|
82
103
|
end
|
104
|
+
|
105
|
+
has_one(:haystack_index, :as => :model, :class_name => '::Mongoid::Haystack::Index')
|
83
106
|
end
|
84
107
|
|
85
108
|
InstanceMethods = proc do
|
@@ -92,5 +115,73 @@ module Mongoid
|
|
92
115
|
other.class_eval(&InstanceMethods)
|
93
116
|
end
|
94
117
|
end
|
118
|
+
|
119
|
+
module Denormalize
|
120
|
+
def denormalize
|
121
|
+
::Mongoid::Haystack.denormalize(self)
|
122
|
+
self
|
123
|
+
end
|
124
|
+
|
125
|
+
def models
|
126
|
+
denormalize
|
127
|
+
map(&:model)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def Haystack.denormalize(results)
|
132
|
+
queries = Hash.new{|h,k| h[k] = []}
|
133
|
+
|
134
|
+
results = results.to_a.flatten.compact
|
135
|
+
|
136
|
+
results.each do |result|
|
137
|
+
model_type = result[:model_type]
|
138
|
+
model_id = result[:model_id]
|
139
|
+
model_class = model_type.constantize
|
140
|
+
queries[model_class].push(model_id)
|
141
|
+
end
|
142
|
+
|
143
|
+
index = Hash.new{|h,k| h[k] = {}}
|
144
|
+
|
145
|
+
queries.each do |model_class, model_ids|
|
146
|
+
models =
|
147
|
+
begin
|
148
|
+
model_class.find(model_ids)
|
149
|
+
rescue Mongoid::Errors::DocumentNotFound
|
150
|
+
model_ids.map do |model_id|
|
151
|
+
begin
|
152
|
+
model_class.find(model_id)
|
153
|
+
rescue Mongoid::Errors::DocumentNotFound
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
models.each do |model|
|
160
|
+
index[model.class.name] ||= Hash.new
|
161
|
+
next unless model
|
162
|
+
index[model.class.name][model.id.to_s] = model
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
to_ignore = []
|
167
|
+
|
168
|
+
results.each_with_index do |result, i|
|
169
|
+
model = index[result['model_type']][result['model_id'].to_s]
|
170
|
+
|
171
|
+
if model.nil?
|
172
|
+
to_ignore.push(i)
|
173
|
+
next
|
174
|
+
else
|
175
|
+
result.model = model
|
176
|
+
end
|
177
|
+
|
178
|
+
result.model.freeze
|
179
|
+
result.freeze
|
180
|
+
end
|
181
|
+
|
182
|
+
to_ignore.reverse.each{|i| results.delete_at(i)}
|
183
|
+
|
184
|
+
results.to_a
|
185
|
+
end
|
95
186
|
end
|
96
187
|
end
|
@@ -1,77 +1,80 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
module
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
3
|
+
module Mongoid
|
4
|
+
module Haystack
|
5
|
+
module Stemming
|
6
|
+
def stem(*args)
|
7
|
+
string = args.join(' ')
|
8
|
+
words = Util.words_for(*args)
|
9
|
+
stems = []
|
10
|
+
words.each do |word|
|
11
|
+
stem = word.stem.downcase
|
12
|
+
next if Stopwords.stopword?(word)
|
13
|
+
next if Stopwords.stopword?(stem)
|
14
|
+
stems.push(stem)
|
15
|
+
end
|
16
|
+
stems
|
17
|
+
end
|
17
18
|
|
18
|
-
|
19
|
+
alias_method('for', 'stem')
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
module Stopwords
|
22
|
+
dirname = __FILE__.sub(/\.rb\Z/, '')
|
23
|
+
glob = File.join(dirname, 'stopwords', '*.txt')
|
23
24
|
|
24
|
-
|
25
|
+
List = {}
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
Dir.glob(glob).each do |wordlist|
|
28
|
+
basename = File.basename(wordlist)
|
29
|
+
name = basename.split(/\./).first
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
open(wordlist) do |fd|
|
32
|
+
lines = fd.readlines
|
33
|
+
words = lines.map{|line| line.strip}
|
34
|
+
words.delete_if{|word| word.empty?}
|
35
|
+
words.push('')
|
36
|
+
List[name] = words
|
37
|
+
end
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
40
|
+
unless defined?(All)
|
41
|
+
All = []
|
42
|
+
All.concat(List['english'])
|
43
|
+
All.concat(List['full_english'])
|
44
|
+
All.concat(List['extended_english'])
|
45
|
+
#All.concat(List['full_french'])
|
46
|
+
#All.concat(List['full_spanish'])
|
47
|
+
#All.concat(List['full_portuguese'])
|
48
|
+
#All.concat(List['full_italian'])
|
49
|
+
#All.concat(List['full_german'])
|
50
|
+
#All.concat(List['full_dutch'])
|
51
|
+
#All.concat(List['full_norwegian'])
|
52
|
+
#All.concat(List['full_danish'])
|
53
|
+
#All.concat(List['full_russian'])
|
54
|
+
#All.concat(List['full_russian_koi8_r'])
|
55
|
+
#All.concat(List['full_finnish'])
|
56
|
+
All.sort!
|
57
|
+
All.uniq!
|
58
|
+
end
|
58
59
|
|
59
|
-
|
60
|
-
|
60
|
+
unless defined?(Index)
|
61
|
+
Index = {}
|
61
62
|
|
62
|
-
|
63
|
-
|
63
|
+
All.each do |word|
|
64
|
+
Index[word] = word
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def stopword?(word)
|
69
|
+
!!Index[word]
|
70
|
+
end
|
71
|
+
|
72
|
+
extend(Stopwords)
|
64
73
|
end
|
65
|
-
end
|
66
74
|
|
67
|
-
|
68
|
-
!!Index[word]
|
75
|
+
extend(Stemming)
|
69
76
|
end
|
70
|
-
|
71
|
-
extend(Stopwords)
|
72
77
|
end
|
73
|
-
|
74
|
-
extend(Stemming)
|
75
78
|
end
|
76
79
|
|
77
80
|
if $0 == __FILE__
|
@@ -4,33 +4,20 @@ module Mongoid
|
|
4
4
|
include Mongoid::Document
|
5
5
|
|
6
6
|
class << Token
|
7
|
-
def values_for(*args
|
8
|
-
|
9
|
-
values = string.scan(/[^\s]+/)
|
10
|
-
Stemming.stem(*values)
|
7
|
+
def values_for(*args)
|
8
|
+
Haystack.stems_for(*args)
|
11
9
|
end
|
12
10
|
|
13
11
|
def add(value)
|
14
|
-
token =
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
token = where(:value => value).first
|
20
|
-
created = false if token
|
21
|
-
token
|
22
|
-
end,
|
23
|
-
|
24
|
-
proc do
|
25
|
-
token = create!(:value => value)
|
26
|
-
created = true if token
|
27
|
-
token
|
28
|
-
end
|
29
|
-
)
|
12
|
+
token =
|
13
|
+
Haystack.find_or_create(
|
14
|
+
->{ where(:value => value).first },
|
15
|
+
->{ create!(:value => value) }
|
16
|
+
)
|
30
17
|
|
31
18
|
token.inc(:count, 1)
|
32
19
|
|
33
|
-
Count[:tokens].inc(1)
|
20
|
+
Count[:tokens].inc(1)
|
34
21
|
|
35
22
|
token
|
36
23
|
end
|
@@ -33,9 +33,6 @@ module Mongoid
|
|
33
33
|
models.map{|model| model.destroy_all}
|
34
34
|
end
|
35
35
|
|
36
|
-
def stem(*args, &block)
|
37
|
-
Stemming.stem(*args, &block)
|
38
|
-
end
|
39
36
|
|
40
37
|
def find_or_create(finder, creator)
|
41
38
|
doc = finder.call()
|
@@ -59,6 +56,23 @@ module Mongoid
|
|
59
56
|
end
|
60
57
|
end
|
61
58
|
|
59
|
+
def words_for(*args)
|
60
|
+
string = args.flatten.compact.join(' ').scan(/\w+/).join(' ')
|
61
|
+
words = []
|
62
|
+
UnicodeUtils.each_word(string) do |word|
|
63
|
+
word = UnicodeUtils.nfkd(word.strip)
|
64
|
+
word.gsub!(/\A(?:[^\w]|_|\s)+/, '') # leading punctuation/spaces
|
65
|
+
word.gsub!(/(?:[^\w]|_|\s+)+\Z/, '') # trailing punctuation/spaces
|
66
|
+
next if word.empty?
|
67
|
+
words.push(word)
|
68
|
+
end
|
69
|
+
words
|
70
|
+
end
|
71
|
+
|
72
|
+
def stems_for(*args, &block)
|
73
|
+
Stemming.stem(*args, &block)
|
74
|
+
end
|
75
|
+
|
62
76
|
extend Util
|
63
77
|
end
|
64
78
|
|
data/lib/mongoid-haystack.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
module Mongoid
|
4
4
|
module Haystack
|
5
|
-
const_set :Version, '1.
|
5
|
+
const_set :Version, '1.1.0'
|
6
6
|
|
7
7
|
class << Haystack
|
8
8
|
def version
|
@@ -11,9 +11,11 @@
|
|
11
11
|
|
12
12
|
def dependencies
|
13
13
|
{
|
14
|
-
'mongoid'
|
15
|
-
'map'
|
16
|
-
'fattr'
|
14
|
+
'mongoid' => [ 'mongoid' , '~> 3.0' ] ,
|
15
|
+
'map' => [ 'map' , '~> 6.2' ] ,
|
16
|
+
'fattr' => [ 'fattr' , '~> 2.2' ] ,
|
17
|
+
'coerce' => [ 'coerce' , '~> 0.0.3' ] ,
|
18
|
+
'unicode_utils' => [ 'unicode_utils' , '~> 1.4.0' ] ,
|
17
19
|
}
|
18
20
|
end
|
19
21
|
|
@@ -66,6 +68,9 @@
|
|
66
68
|
end
|
67
69
|
end
|
68
70
|
|
71
|
+
require 'unicode_utils/u'
|
72
|
+
require 'unicode_utils/each_word'
|
73
|
+
|
69
74
|
load Haystack.libdir('stemming.rb')
|
70
75
|
load Haystack.libdir('util.rb')
|
71
76
|
load Haystack.libdir('count.rb')
|
@@ -74,6 +79,10 @@
|
|
74
79
|
load Haystack.libdir('index.rb')
|
75
80
|
load Haystack.libdir('search.rb')
|
76
81
|
|
82
|
+
def Haystack.included(other)
|
83
|
+
other.send(:include, Search)
|
84
|
+
end
|
85
|
+
|
77
86
|
extend Haystack
|
78
87
|
end
|
79
88
|
end
|
data/mongoid-haystack.gemspec
CHANGED
@@ -3,13 +3,14 @@
|
|
3
3
|
|
4
4
|
Gem::Specification::new do |spec|
|
5
5
|
spec.name = "mongoid-haystack"
|
6
|
-
spec.version = "1.
|
6
|
+
spec.version = "1.1.0"
|
7
7
|
spec.platform = Gem::Platform::RUBY
|
8
8
|
spec.summary = "mongoid-haystack"
|
9
9
|
spec.description = "a mongoid 3 zero-config, zero-integration, POLS pure mongo fulltext solution"
|
10
10
|
|
11
11
|
spec.files =
|
12
|
-
["
|
12
|
+
["README.md",
|
13
|
+
"Rakefile",
|
13
14
|
"lib",
|
14
15
|
"lib/app",
|
15
16
|
"lib/app/models",
|
@@ -63,6 +64,10 @@ Gem::Specification::new do |spec|
|
|
63
64
|
|
64
65
|
spec.add_dependency(*["fattr", "~> 2.2"])
|
65
66
|
|
67
|
+
spec.add_dependency(*["coerce", "~> 0.0.3"])
|
68
|
+
|
69
|
+
spec.add_dependency(*["unicode_utils", "~> 1.4.0"])
|
70
|
+
|
66
71
|
|
67
72
|
spec.extensions.push(*[])
|
68
73
|
|
data/test/helper.rb
CHANGED
@@ -7,22 +7,35 @@ require_relative 'testing'
|
|
7
7
|
require_relative '../lib/mongoid-haystack.rb'
|
8
8
|
|
9
9
|
Mongoid::Haystack.connect!
|
10
|
+
Mongoid::Haystack.reset!
|
10
11
|
|
11
12
|
class A
|
12
13
|
include Mongoid::Document
|
13
14
|
field(:content, :type => String)
|
14
15
|
def to_s; content; end
|
16
|
+
|
17
|
+
field(:a)
|
18
|
+
field(:b)
|
19
|
+
field(:c)
|
15
20
|
end
|
16
21
|
|
17
22
|
class B
|
18
23
|
include Mongoid::Document
|
19
24
|
field(:content, :type => String)
|
20
25
|
def to_s; content; end
|
26
|
+
|
27
|
+
field(:a)
|
28
|
+
field(:b)
|
29
|
+
field(:c)
|
21
30
|
end
|
22
31
|
|
23
32
|
class C
|
24
33
|
include Mongoid::Document
|
25
34
|
field(:content, :type => String)
|
26
35
|
def to_s; content; end
|
36
|
+
|
37
|
+
field(:a)
|
38
|
+
field(:b)
|
39
|
+
field(:c)
|
27
40
|
end
|
28
41
|
|
@@ -1,15 +1,6 @@
|
|
1
1
|
require_relative 'helper'
|
2
2
|
|
3
3
|
Testing Mongoid::Haystack do
|
4
|
-
##
|
5
|
-
#
|
6
|
-
Mongoid::Haystack.reset!
|
7
|
-
|
8
|
-
setup do
|
9
|
-
[A, B, C].map{|m| m.destroy_all}
|
10
|
-
Mongoid::Haystack.destroy_all
|
11
|
-
end
|
12
|
-
|
13
4
|
##
|
14
5
|
#
|
15
6
|
testing 'that models can, at minimum, be indexed and searched' do
|
@@ -49,7 +40,7 @@ Testing Mongoid::Haystack do
|
|
49
40
|
##
|
50
41
|
#
|
51
42
|
testing 'that basic stemming can be performed' do
|
52
|
-
assert{ Mongoid::Haystack.
|
43
|
+
assert{ Mongoid::Haystack.stems_for('dogs cats fishes') == %w[ dog cat fish ] }
|
53
44
|
end
|
54
45
|
|
55
46
|
testing 'that words are stemmed when they are indexed' do
|
@@ -80,14 +71,12 @@ Testing Mongoid::Haystack do
|
|
80
71
|
end
|
81
72
|
|
82
73
|
testing 'that removing a model from the index decrements counts appropriately' do
|
83
|
-
#
|
84
74
|
a = A.create!(:content => 'dog')
|
85
75
|
b = A.create!(:content => 'cat')
|
86
76
|
c = A.create!(:content => 'cats dogs')
|
87
77
|
|
88
78
|
assert{ Mongoid::Haystack.index(A) }
|
89
79
|
|
90
|
-
#
|
91
80
|
assert{ Mongoid::Haystack.search('cat').first }
|
92
81
|
|
93
82
|
assert{ Mongoid::Haystack::Token.where(:value => 'cat').first.count == 2 }
|
@@ -116,4 +105,159 @@ Testing Mongoid::Haystack do
|
|
116
105
|
assert{ Mongoid::Haystack::Token.where(:value => 'cat').first.count == 0 }
|
117
106
|
assert{ Mongoid::Haystack::Token.where(:value => 'dog').first.count == 0 }
|
118
107
|
end
|
108
|
+
|
109
|
+
##
|
110
|
+
#
|
111
|
+
testing 'that search uses a b-tree index' do
|
112
|
+
a = A.create!(:content => 'dog')
|
113
|
+
|
114
|
+
assert{ Mongoid::Haystack.index(A) }
|
115
|
+
assert{ Mongoid::Haystack.search('dog').explain['cursor'] =~ /BtreeCursor/i }
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
#
|
120
|
+
testing 'that classes can export a custom [score|keywords|fulltext] for the search index' do
|
121
|
+
k = new_klass do
|
122
|
+
def to_haystack
|
123
|
+
colors.push(color = colors.shift)
|
124
|
+
|
125
|
+
{
|
126
|
+
:score => score,
|
127
|
+
|
128
|
+
:keywords => "cats #{ color }",
|
129
|
+
|
130
|
+
:fulltext => 'now is the time for all good men...'
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.score
|
135
|
+
@score ||= 0
|
136
|
+
ensure
|
137
|
+
@score += 1
|
138
|
+
end
|
139
|
+
|
140
|
+
def score
|
141
|
+
self.class.score
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.colors
|
145
|
+
@colors ||= %w( black white )
|
146
|
+
end
|
147
|
+
|
148
|
+
def colors
|
149
|
+
self.class.colors
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
a = k.create!(:content => 'dog')
|
154
|
+
b = k.create!(:content => 'dogs too')
|
155
|
+
|
156
|
+
assert{ a.haystack_index.score == 0 }
|
157
|
+
assert{ b.haystack_index.score == 1 }
|
158
|
+
|
159
|
+
assert do
|
160
|
+
a.haystack_index.tokens.map(&:value).sort ==
|
161
|
+
["black", "cat", "good", "men", "time"]
|
162
|
+
end
|
163
|
+
assert do
|
164
|
+
b.haystack_index.tokens.map(&:value).sort ==
|
165
|
+
["cat", "good", "men", "time", "white"]
|
166
|
+
end
|
167
|
+
|
168
|
+
assert{ Mongoid::Haystack.search('cat').count == 2 }
|
169
|
+
assert{ Mongoid::Haystack.search('black').count == 1 }
|
170
|
+
assert{ Mongoid::Haystack.search('white').count == 1 }
|
171
|
+
assert{ Mongoid::Haystack.search('good men').count == 2 }
|
172
|
+
end
|
173
|
+
|
174
|
+
##
|
175
|
+
#
|
176
|
+
testing 'that set intersection and union are supported via search' do
|
177
|
+
a = A.create!(:content => 'dog')
|
178
|
+
b = A.create!(:content => 'dog cat')
|
179
|
+
c = A.create!(:content => 'dog cat fish')
|
180
|
+
|
181
|
+
assert{ Mongoid::Haystack.index(A) }
|
182
|
+
|
183
|
+
assert{ Mongoid::Haystack.search(:any => 'dog').count == 3 }
|
184
|
+
assert{ Mongoid::Haystack.search(:any => 'dog cat').count == 3 }
|
185
|
+
assert{ Mongoid::Haystack.search(:any => 'dog cat fish').count == 3 }
|
186
|
+
|
187
|
+
assert{ Mongoid::Haystack.search(:all => 'dog').count == 3 }
|
188
|
+
assert{ Mongoid::Haystack.search(:all => 'dog cat').count == 2 }
|
189
|
+
assert{ Mongoid::Haystack.search(:all => 'dog cat fish').count == 1 }
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
#
|
194
|
+
testing 'that classes can export custom facets and then search them, again using a b-tree index' do
|
195
|
+
k = new_klass do
|
196
|
+
field(:to_haystack, :type => Hash, :default => proc{ Hash.new })
|
197
|
+
end
|
198
|
+
|
199
|
+
a = k.create!(:content => 'hello kitty', :to_haystack => { :keywords => 'cat', :facets => {:x => 42.0}})
|
200
|
+
b = k.create!(:content => 'hello kitty', :to_haystack => { :keywords => 'cat', :facets => {:x => 4.20}})
|
201
|
+
|
202
|
+
assert{ Mongoid::Haystack.search('cat').where(:facets => {'x' => 42.0}).first.model == a }
|
203
|
+
assert{ Mongoid::Haystack.search('cat').where(:facets => {'x' => 4.20}).first.model == b }
|
204
|
+
|
205
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 42.0).first.model == a }
|
206
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 4.20).first.model == b }
|
207
|
+
|
208
|
+
assert{ Mongoid::Haystack.search('cat').where('facets' => {'x' => 42.0}).explain['cursor'] =~ /BtreeCursor/ }
|
209
|
+
assert{ Mongoid::Haystack.search('cat').where('facets' => {'x' => 4.20}).explain['cursor'] =~ /BtreeCursor/ }
|
210
|
+
|
211
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 42.0).explain['cursor'] =~ /BtreeCursor/ }
|
212
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 4.20).explain['cursor'] =~ /BtreeCursor/ }
|
213
|
+
end
|
214
|
+
|
215
|
+
##
|
216
|
+
#
|
217
|
+
testing 'that keywords are considered more highly than fulltext' do
|
218
|
+
k = new_klass do
|
219
|
+
field(:title)
|
220
|
+
field(:body)
|
221
|
+
|
222
|
+
def to_haystack
|
223
|
+
{ :keywords => title, :fulltext => body }
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
a = k.create!(:title => 'the cats', :body => 'like to meow')
|
228
|
+
b = k.create!(:title => 'the dogs', :body => 'do not like to meow, they bark at cats')
|
229
|
+
|
230
|
+
assert{ Mongoid::Haystack.search('cat').count == 2 }
|
231
|
+
assert{ Mongoid::Haystack.search('cat').first.model == a }
|
232
|
+
|
233
|
+
assert{ Mongoid::Haystack.search('meow').count == 2 }
|
234
|
+
assert{ Mongoid::Haystack.search('bark').count == 1 }
|
235
|
+
assert{ Mongoid::Haystack.search('dog').first.model == b }
|
236
|
+
end
|
237
|
+
|
238
|
+
protected
|
239
|
+
|
240
|
+
def new_klass(&block)
|
241
|
+
Object.send(:remove_const, :K) if Object.send(:const_defined?, :K)
|
242
|
+
|
243
|
+
k = Class.new(A) do
|
244
|
+
self.default_collection_name = :ks
|
245
|
+
def self.name() 'K' end
|
246
|
+
include ::Mongoid::Haystack::Search
|
247
|
+
class_eval(&block) if block
|
248
|
+
end
|
249
|
+
|
250
|
+
Object.const_set(:K, k)
|
251
|
+
|
252
|
+
k
|
253
|
+
end
|
254
|
+
|
255
|
+
H = Mongoid::Haystack
|
256
|
+
T = Mongoid::Haystack::Token
|
257
|
+
I = Mongoid::Haystack::Index
|
258
|
+
|
259
|
+
setup do
|
260
|
+
[A, B, C].map{|m| m.destroy_all}
|
261
|
+
Mongoid::Haystack.destroy_all
|
262
|
+
end
|
119
263
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid-haystack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -59,12 +59,45 @@ dependencies:
|
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '2.2'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: coerce
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.0.3
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.0.3
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: unicode_utils
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 1.4.0
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 1.4.0
|
62
94
|
description: a mongoid 3 zero-config, zero-integration, POLS pure mongo fulltext solution
|
63
95
|
email: ara.t.howard@gmail.com
|
64
96
|
executables: []
|
65
97
|
extensions: []
|
66
98
|
extra_rdoc_files: []
|
67
99
|
files:
|
100
|
+
- README.md
|
68
101
|
- Rakefile
|
69
102
|
- lib/app/models/mongoid/haystack/count.rb
|
70
103
|
- lib/app/models/mongoid/haystack/index.rb
|