mongoid-haystack 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +249 -0
- data/lib/mongoid-haystack/index.rb +10 -11
- data/lib/mongoid-haystack/search.rb +97 -6
- data/lib/mongoid-haystack/stemming.rb +63 -60
- data/lib/mongoid-haystack/token.rb +8 -21
- data/lib/mongoid-haystack/util.rb +17 -3
- data/lib/mongoid-haystack.rb +13 -4
- data/mongoid-haystack.gemspec +7 -2
- data/test/helper.rb +13 -0
- data/test/mongoid-haystack_test.rb +156 -12
- metadata +34 -1
data/README.md
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
NAME
|
2
|
+
----
|
3
|
+
|
4
|
+
mongoid-haystack.rb
|
5
|
+
|
6
|
+
DESCRIPTION
|
7
|
+
-----------
|
8
|
+
|
9
|
+
mongoid-haystack provides a zero-config, POLS, pure mongo, fulltext search
|
10
|
+
solution for your mongoid models.
|
11
|
+
|
12
|
+
SYNOPSIS
|
13
|
+
--------
|
14
|
+
|
15
|
+
````ruby
|
16
|
+
|
17
|
+
# simple usage is simple
|
18
|
+
#
|
19
|
+
class Article
|
20
|
+
include Mongoid::Document
|
21
|
+
include Mongoid::Haystack
|
22
|
+
|
23
|
+
field(:content, :type => String)
|
24
|
+
end
|
25
|
+
|
26
|
+
Article.create!(:content => 'teh cats')
|
27
|
+
|
28
|
+
results = Article.search('cat')
|
29
|
+
|
30
|
+
article = results.first.model
|
31
|
+
|
32
|
+
|
33
|
+
# haystack stems the search terms and does score based sorting all using a
|
34
|
+
# fast b-tree
|
35
|
+
#
|
36
|
+
a = Article.create!(:content => 'cats are awesome')
|
37
|
+
b = Article.create!(:content => 'dogs eat cats')
|
38
|
+
c = Article.create!(:content => 'dogs dogs dogs')
|
39
|
+
|
40
|
+
results = Article.search('dogs cats').models
|
41
|
+
results == [b, a, c] #=> true
|
42
|
+
|
43
|
+
results = Article.search('awesome').models
|
44
|
+
results == [a] #=> true
|
45
|
+
|
46
|
+
|
47
|
+
# cross models searching is supported out of the box, and models can
|
48
|
+
# customise how they are indexed:
|
49
|
+
#
|
50
|
+
# - a global score lets some models appear hight in the global results
|
51
|
+
# - keywords count more than fulltext
|
52
|
+
#
|
53
|
+
class Article
|
54
|
+
include Mongoid::Document
|
55
|
+
include Mongoid::Haystack
|
56
|
+
|
57
|
+
field(:title, :type => String)
|
58
|
+
field(:content, :type => String)
|
59
|
+
|
60
|
+
def to_haystack
|
61
|
+
{ :score => 11, :keywords => title, :fulltext => content }
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Comment
|
66
|
+
include Mongoid::Document
|
67
|
+
include Mongoid::Haystack
|
68
|
+
|
69
|
+
field(:content, :type => String)
|
70
|
+
|
71
|
+
def to_haystack
|
72
|
+
{ :score => -11, :fulltext => content }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
a1 = Article.create!(:title => 'hot pants', :content => 'teh b 52s rock')
|
77
|
+
a2 = Article.create!(:title => 'boring title', :content => 'but hot content that rocks')
|
78
|
+
|
79
|
+
c = Comment.create!(:content => 'those guys rock')
|
80
|
+
|
81
|
+
results = Mongoid::Haystack.search('rock')
|
82
|
+
results.count #=> 3
|
83
|
+
|
84
|
+
models = results.models
|
85
|
+
models == [a1, a2, c] #=> true. articles first beause we generally score them higher
|
86
|
+
|
87
|
+
results = Mongoid::Haystack.search('hot')
|
88
|
+
models = results.models
|
89
|
+
models == [a1, a2] #=> true. because keywords score highter than general fulltext
|
90
|
+
|
91
|
+
|
92
|
+
# by default searching returns Mongoid::Haystack::Index objects. you'll want
|
93
|
+
# to expand these results to the models they reference in your views, but
|
94
|
+
# avoid doing an N+1 query. to do this simply call #models on the result set
|
95
|
+
# and the models will be eager loaded using only as many queries as their are
|
96
|
+
# model types in your result set
|
97
|
+
#
|
98
|
+
|
99
|
+
@results = Mongoid::Haystack.search('needle').page(params[:page]).per(10)
|
100
|
+
@models = @results.models
|
101
|
+
|
102
|
+
|
103
|
+
# you can decorate your search items with arbirtrary meta data and filter
|
104
|
+
# searches by it later. this too uses a b-tree index.
|
105
|
+
#
|
106
|
+
class Article
|
107
|
+
include Mongoid::Document
|
108
|
+
include Mongoid::Haystack
|
109
|
+
|
110
|
+
belongs_to :author, :class_name => '::User'
|
111
|
+
|
112
|
+
field(:title, :type => String)
|
113
|
+
field(:content, :type => String)
|
114
|
+
|
115
|
+
def to_haystack
|
116
|
+
{
|
117
|
+
:score => author.popularity,
|
118
|
+
:keywords => title,
|
119
|
+
:fulltext => content,
|
120
|
+
:facets => {:author_id => author.id}
|
121
|
+
}
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
a =
|
126
|
+
author.articles.create!(
|
127
|
+
:title => 'iggy and keith',
|
128
|
+
:content => 'seen the needles and the damage done...'
|
129
|
+
)
|
130
|
+
|
131
|
+
author_articles = Article.search('needle', :facets => {:author_id => author.id})
|
132
|
+
|
133
|
+
|
134
|
+
````
|
135
|
+
|
136
|
+
DESCRIPTION
|
137
|
+
-----------
|
138
|
+
|
139
|
+
there two main pathways to understand in the code. shit going into the
|
140
|
+
index, and shit coming out.
|
141
|
+
|
142
|
+
shit going in entails:
|
143
|
+
|
144
|
+
- stem and stopword the search terms.
|
145
|
+
- create or update a new token for each
|
146
|
+
- create an index item reference all the tokens with precomputed scores
|
147
|
+
|
148
|
+
for example the terms 'dog dogs cat' might result in these tokens
|
149
|
+
|
150
|
+
````javascript
|
151
|
+
|
152
|
+
[
|
153
|
+
{
|
154
|
+
'_id' : '0x1',
|
155
|
+
'value' : 'dog',
|
156
|
+
'count' : 2
|
157
|
+
},
|
158
|
+
|
159
|
+
|
160
|
+
{
|
161
|
+
'_id' : '0x2',
|
162
|
+
'value' : 'cat',
|
163
|
+
'count' : 1
|
164
|
+
}
|
165
|
+
]
|
166
|
+
|
167
|
+
````
|
168
|
+
|
169
|
+
and this index item
|
170
|
+
|
171
|
+
|
172
|
+
````javascript
|
173
|
+
|
174
|
+
{
|
175
|
+
'_id' : '50c11759a04745961e000001'
|
176
|
+
|
177
|
+
'model_type' : 'Article',
|
178
|
+
'model_id' : '50c11775a04745461f000001'
|
179
|
+
|
180
|
+
'tokens' : ['0x1', '0x2'],
|
181
|
+
|
182
|
+
'score' : 10,
|
183
|
+
|
184
|
+
'keyword_scores' : {
|
185
|
+
'0x1' : 2,
|
186
|
+
'0x2' : 1
|
187
|
+
},
|
188
|
+
|
189
|
+
'fulltext_scores' : {
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
|
194
|
+
````
|
195
|
+
|
196
|
+
being built
|
197
|
+
|
198
|
+
in addition, some other information is tracked such and the total number of
|
199
|
+
search tokens every discovered in the corpus
|
200
|
+
|
201
|
+
|
202
|
+
|
203
|
+
a few things to notice:
|
204
|
+
|
205
|
+
- the tokens are counted and auto-id'd using hex notation and a sequence
|
206
|
+
generator. the reason for this is so that their ids are legit hash keys
|
207
|
+
in the keyword and fulltext score hashes.
|
208
|
+
|
209
|
+
- the data structure above allows both filtering for index items that have
|
210
|
+
certain tokens, but also ordering them based on global, keyword, and
|
211
|
+
fulltext score without resorting to map-reduce: a b-tree index can be
|
212
|
+
used.
|
213
|
+
|
214
|
+
- all tokens have their text/stem stored exactly once. aka: we do not store
|
215
|
+
'hugewords' all over the place but store it once and count occurances of
|
216
|
+
it to keep the total index much smaller
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
|
221
|
+
pulling objects back out in a search involved these logical steps:
|
222
|
+
|
223
|
+
- filter the search terms through the same tokenizer as when indexed
|
224
|
+
|
225
|
+
- lookup tokens for each of the tokens in the search string
|
226
|
+
|
227
|
+
- using the count for each token, plus the global token count that has been
|
228
|
+
tracked we can decide to order the results by relatively rare words first
|
229
|
+
and, all else being equal (same rarity bin: 0.10, 0.20, 0.30, etc.), the
|
230
|
+
order in which the user typed the words
|
231
|
+
|
232
|
+
- this approach is applies and is valid whether we are doing a union (or) or
|
233
|
+
intersection (all) search and regardless of whether facets are included in
|
234
|
+
the search. facets, however, never affect the order unless done so by the
|
235
|
+
user manually. eg
|
236
|
+
|
237
|
+
````ruby
|
238
|
+
|
239
|
+
results =
|
240
|
+
Mongoid::Haystack.
|
241
|
+
search('foo bar', :facets => {:hotness.gte => 11}).
|
242
|
+
order_by('facets.hotness' => :desc)
|
243
|
+
|
244
|
+
````
|
245
|
+
|
246
|
+
|
247
|
+
SEE ALSO
|
248
|
+
--------
|
249
|
+
tests: <a href='https://github.com/ahoward/mongoid-haystack/blob/master/test/mongoid-haystack_test.rb'>./test/mongoid-haystack_test.rb<a/>
|
@@ -73,12 +73,13 @@ module Mongoid
|
|
73
73
|
|
74
74
|
keyword_scores = Hash.new{|h,k| h[k] = 0}
|
75
75
|
fulltext_scores = Hash.new{|h,k| h[k] = 0}
|
76
|
+
token_ids = []
|
76
77
|
|
77
78
|
Token.values_for(keywords).each do |value|
|
78
79
|
token = Token.add(value)
|
79
80
|
id = token.id
|
80
81
|
|
81
|
-
|
82
|
+
token_ids.push(id)
|
82
83
|
keyword_scores[id] += 1
|
83
84
|
end
|
84
85
|
|
@@ -86,7 +87,7 @@ module Mongoid
|
|
86
87
|
token = Token.add(value)
|
87
88
|
id = token.id
|
88
89
|
|
89
|
-
|
90
|
+
token_ids.push(id)
|
90
91
|
fulltext_scores[id] += 1
|
91
92
|
end
|
92
93
|
|
@@ -96,7 +97,7 @@ module Mongoid
|
|
96
97
|
index.score = score if score
|
97
98
|
index.facets = facets if facets
|
98
99
|
|
99
|
-
index.
|
100
|
+
index.token_ids = token_ids
|
100
101
|
|
101
102
|
index.save!
|
102
103
|
end
|
@@ -105,16 +106,12 @@ module Mongoid
|
|
105
106
|
def remove(*args)
|
106
107
|
models_for(*args) do |model|
|
107
108
|
index = where(:model_type => model.class.name, :model_id => model.id).first
|
108
|
-
|
109
|
-
if index
|
110
|
-
subtract(index)
|
111
|
-
index.destroy
|
112
|
-
end
|
109
|
+
index.destroy if index
|
113
110
|
end
|
114
111
|
end
|
115
112
|
|
116
113
|
def subtract(index)
|
117
|
-
tokens =
|
114
|
+
tokens = index.tokens
|
118
115
|
|
119
116
|
n = 0
|
120
117
|
|
@@ -145,9 +142,11 @@ module Mongoid
|
|
145
142
|
end
|
146
143
|
end
|
147
144
|
|
145
|
+
before_destroy{|index| Index.subtract(index)}
|
146
|
+
|
148
147
|
belongs_to(:model, :polymorphic => true)
|
149
148
|
|
150
|
-
|
149
|
+
has_and_belongs_to_many(:tokens, :class_name => '::Mongoid::Haystack::Token', :inverse_of => nil)
|
151
150
|
field(:score, :type => Integer, :default => 0)
|
152
151
|
field(:keyword_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
153
152
|
field(:fulltext_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
@@ -156,7 +155,7 @@ module Mongoid
|
|
156
155
|
index({:model_type => 1})
|
157
156
|
index({:model_id => 1})
|
158
157
|
|
159
|
-
index({:
|
158
|
+
index({:token_ids => 1})
|
160
159
|
index({:score => 1})
|
161
160
|
index({:keyword_scores => 1})
|
162
161
|
index({:fulltext_scores => 1})
|
@@ -5,15 +5,34 @@ module Mongoid
|
|
5
5
|
options = Map.options_for!(args)
|
6
6
|
search = args.join(' ')
|
7
7
|
|
8
|
+
conditions = {}
|
9
|
+
order = []
|
10
|
+
|
11
|
+
op = :token_ids.in
|
12
|
+
|
13
|
+
#
|
14
|
+
case
|
15
|
+
when options[:all]
|
16
|
+
op = :token_ids.all
|
17
|
+
search += Coerce.string(options[:all])
|
18
|
+
|
19
|
+
when options[:any]
|
20
|
+
op = :token_ids.in
|
21
|
+
search += Coerce.string(options[:any])
|
22
|
+
|
23
|
+
when options[:in]
|
24
|
+
op = :token_ids.in
|
25
|
+
search += Coerce.string(options[:in])
|
26
|
+
end
|
27
|
+
|
8
28
|
#
|
9
29
|
tokens = search_tokens_for(search)
|
30
|
+
token_ids = tokens.map{|token| token.id}
|
10
31
|
|
11
32
|
#
|
12
|
-
conditions =
|
13
|
-
conditions[:tokens.in] = tokens.map{|token| token.id}
|
33
|
+
conditions[op] = token_ids
|
14
34
|
|
15
35
|
#
|
16
|
-
order = []
|
17
36
|
order.push(["score", :desc])
|
18
37
|
|
19
38
|
tokens.each do |token|
|
@@ -26,7 +45,7 @@ module Mongoid
|
|
26
45
|
|
27
46
|
#
|
28
47
|
if options[:facets]
|
29
|
-
conditions[:facets] = options[:facets]
|
48
|
+
conditions[:facets] = {'$elemMatch' => options[:facets]}
|
30
49
|
end
|
31
50
|
|
32
51
|
#
|
@@ -36,7 +55,9 @@ module Mongoid
|
|
36
55
|
end
|
37
56
|
|
38
57
|
#
|
39
|
-
Index.where(conditions).order_by(order)
|
58
|
+
Index.where(conditions).order_by(order).tap do |results|
|
59
|
+
results.extend(Denormalize)
|
60
|
+
end
|
40
61
|
end
|
41
62
|
|
42
63
|
def search_tokens_for(search)
|
@@ -62,7 +83,7 @@ module Mongoid
|
|
62
83
|
options[:types] = Array(options[:types]).flatten.compact
|
63
84
|
options[:types].push(self)
|
64
85
|
args.push(options)
|
65
|
-
Haystack.search(*args, &block)
|
86
|
+
results = Haystack.search(*args, &block)
|
66
87
|
end
|
67
88
|
|
68
89
|
after_save do |doc|
|
@@ -80,6 +101,8 @@ module Mongoid
|
|
80
101
|
nil
|
81
102
|
end
|
82
103
|
end
|
104
|
+
|
105
|
+
has_one(:haystack_index, :as => :model, :class_name => '::Mongoid::Haystack::Index')
|
83
106
|
end
|
84
107
|
|
85
108
|
InstanceMethods = proc do
|
@@ -92,5 +115,73 @@ module Mongoid
|
|
92
115
|
other.class_eval(&InstanceMethods)
|
93
116
|
end
|
94
117
|
end
|
118
|
+
|
119
|
+
module Denormalize
|
120
|
+
def denormalize
|
121
|
+
::Mongoid::Haystack.denormalize(self)
|
122
|
+
self
|
123
|
+
end
|
124
|
+
|
125
|
+
def models
|
126
|
+
denormalize
|
127
|
+
map(&:model)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def Haystack.denormalize(results)
|
132
|
+
queries = Hash.new{|h,k| h[k] = []}
|
133
|
+
|
134
|
+
results = results.to_a.flatten.compact
|
135
|
+
|
136
|
+
results.each do |result|
|
137
|
+
model_type = result[:model_type]
|
138
|
+
model_id = result[:model_id]
|
139
|
+
model_class = model_type.constantize
|
140
|
+
queries[model_class].push(model_id)
|
141
|
+
end
|
142
|
+
|
143
|
+
index = Hash.new{|h,k| h[k] = {}}
|
144
|
+
|
145
|
+
queries.each do |model_class, model_ids|
|
146
|
+
models =
|
147
|
+
begin
|
148
|
+
model_class.find(model_ids)
|
149
|
+
rescue Mongoid::Errors::DocumentNotFound
|
150
|
+
model_ids.map do |model_id|
|
151
|
+
begin
|
152
|
+
model_class.find(model_id)
|
153
|
+
rescue Mongoid::Errors::DocumentNotFound
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
models.each do |model|
|
160
|
+
index[model.class.name] ||= Hash.new
|
161
|
+
next unless model
|
162
|
+
index[model.class.name][model.id.to_s] = model
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
to_ignore = []
|
167
|
+
|
168
|
+
results.each_with_index do |result, i|
|
169
|
+
model = index[result['model_type']][result['model_id'].to_s]
|
170
|
+
|
171
|
+
if model.nil?
|
172
|
+
to_ignore.push(i)
|
173
|
+
next
|
174
|
+
else
|
175
|
+
result.model = model
|
176
|
+
end
|
177
|
+
|
178
|
+
result.model.freeze
|
179
|
+
result.freeze
|
180
|
+
end
|
181
|
+
|
182
|
+
to_ignore.reverse.each{|i| results.delete_at(i)}
|
183
|
+
|
184
|
+
results.to_a
|
185
|
+
end
|
95
186
|
end
|
96
187
|
end
|
@@ -1,77 +1,80 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
module
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
3
|
+
module Mongoid
|
4
|
+
module Haystack
|
5
|
+
module Stemming
|
6
|
+
def stem(*args)
|
7
|
+
string = args.join(' ')
|
8
|
+
words = Util.words_for(*args)
|
9
|
+
stems = []
|
10
|
+
words.each do |word|
|
11
|
+
stem = word.stem.downcase
|
12
|
+
next if Stopwords.stopword?(word)
|
13
|
+
next if Stopwords.stopword?(stem)
|
14
|
+
stems.push(stem)
|
15
|
+
end
|
16
|
+
stems
|
17
|
+
end
|
17
18
|
|
18
|
-
|
19
|
+
alias_method('for', 'stem')
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
module Stopwords
|
22
|
+
dirname = __FILE__.sub(/\.rb\Z/, '')
|
23
|
+
glob = File.join(dirname, 'stopwords', '*.txt')
|
23
24
|
|
24
|
-
|
25
|
+
List = {}
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
Dir.glob(glob).each do |wordlist|
|
28
|
+
basename = File.basename(wordlist)
|
29
|
+
name = basename.split(/\./).first
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
open(wordlist) do |fd|
|
32
|
+
lines = fd.readlines
|
33
|
+
words = lines.map{|line| line.strip}
|
34
|
+
words.delete_if{|word| word.empty?}
|
35
|
+
words.push('')
|
36
|
+
List[name] = words
|
37
|
+
end
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
40
|
+
unless defined?(All)
|
41
|
+
All = []
|
42
|
+
All.concat(List['english'])
|
43
|
+
All.concat(List['full_english'])
|
44
|
+
All.concat(List['extended_english'])
|
45
|
+
#All.concat(List['full_french'])
|
46
|
+
#All.concat(List['full_spanish'])
|
47
|
+
#All.concat(List['full_portuguese'])
|
48
|
+
#All.concat(List['full_italian'])
|
49
|
+
#All.concat(List['full_german'])
|
50
|
+
#All.concat(List['full_dutch'])
|
51
|
+
#All.concat(List['full_norwegian'])
|
52
|
+
#All.concat(List['full_danish'])
|
53
|
+
#All.concat(List['full_russian'])
|
54
|
+
#All.concat(List['full_russian_koi8_r'])
|
55
|
+
#All.concat(List['full_finnish'])
|
56
|
+
All.sort!
|
57
|
+
All.uniq!
|
58
|
+
end
|
58
59
|
|
59
|
-
|
60
|
-
|
60
|
+
unless defined?(Index)
|
61
|
+
Index = {}
|
61
62
|
|
62
|
-
|
63
|
-
|
63
|
+
All.each do |word|
|
64
|
+
Index[word] = word
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def stopword?(word)
|
69
|
+
!!Index[word]
|
70
|
+
end
|
71
|
+
|
72
|
+
extend(Stopwords)
|
64
73
|
end
|
65
|
-
end
|
66
74
|
|
67
|
-
|
68
|
-
!!Index[word]
|
75
|
+
extend(Stemming)
|
69
76
|
end
|
70
|
-
|
71
|
-
extend(Stopwords)
|
72
77
|
end
|
73
|
-
|
74
|
-
extend(Stemming)
|
75
78
|
end
|
76
79
|
|
77
80
|
if $0 == __FILE__
|
@@ -4,33 +4,20 @@ module Mongoid
|
|
4
4
|
include Mongoid::Document
|
5
5
|
|
6
6
|
class << Token
|
7
|
-
def values_for(*args
|
8
|
-
|
9
|
-
values = string.scan(/[^\s]+/)
|
10
|
-
Stemming.stem(*values)
|
7
|
+
def values_for(*args)
|
8
|
+
Haystack.stems_for(*args)
|
11
9
|
end
|
12
10
|
|
13
11
|
def add(value)
|
14
|
-
token =
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
token = where(:value => value).first
|
20
|
-
created = false if token
|
21
|
-
token
|
22
|
-
end,
|
23
|
-
|
24
|
-
proc do
|
25
|
-
token = create!(:value => value)
|
26
|
-
created = true if token
|
27
|
-
token
|
28
|
-
end
|
29
|
-
)
|
12
|
+
token =
|
13
|
+
Haystack.find_or_create(
|
14
|
+
->{ where(:value => value).first },
|
15
|
+
->{ create!(:value => value) }
|
16
|
+
)
|
30
17
|
|
31
18
|
token.inc(:count, 1)
|
32
19
|
|
33
|
-
Count[:tokens].inc(1)
|
20
|
+
Count[:tokens].inc(1)
|
34
21
|
|
35
22
|
token
|
36
23
|
end
|
@@ -33,9 +33,6 @@ module Mongoid
|
|
33
33
|
models.map{|model| model.destroy_all}
|
34
34
|
end
|
35
35
|
|
36
|
-
def stem(*args, &block)
|
37
|
-
Stemming.stem(*args, &block)
|
38
|
-
end
|
39
36
|
|
40
37
|
def find_or_create(finder, creator)
|
41
38
|
doc = finder.call()
|
@@ -59,6 +56,23 @@ module Mongoid
|
|
59
56
|
end
|
60
57
|
end
|
61
58
|
|
59
|
+
def words_for(*args)
|
60
|
+
string = args.flatten.compact.join(' ').scan(/\w+/).join(' ')
|
61
|
+
words = []
|
62
|
+
UnicodeUtils.each_word(string) do |word|
|
63
|
+
word = UnicodeUtils.nfkd(word.strip)
|
64
|
+
word.gsub!(/\A(?:[^\w]|_|\s)+/, '') # leading punctuation/spaces
|
65
|
+
word.gsub!(/(?:[^\w]|_|\s+)+\Z/, '') # trailing punctuation/spaces
|
66
|
+
next if word.empty?
|
67
|
+
words.push(word)
|
68
|
+
end
|
69
|
+
words
|
70
|
+
end
|
71
|
+
|
72
|
+
def stems_for(*args, &block)
|
73
|
+
Stemming.stem(*args, &block)
|
74
|
+
end
|
75
|
+
|
62
76
|
extend Util
|
63
77
|
end
|
64
78
|
|
data/lib/mongoid-haystack.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#
|
3
3
|
module Mongoid
|
4
4
|
module Haystack
|
5
|
-
const_set :Version, '1.
|
5
|
+
const_set :Version, '1.1.0'
|
6
6
|
|
7
7
|
class << Haystack
|
8
8
|
def version
|
@@ -11,9 +11,11 @@
|
|
11
11
|
|
12
12
|
def dependencies
|
13
13
|
{
|
14
|
-
'mongoid'
|
15
|
-
'map'
|
16
|
-
'fattr'
|
14
|
+
'mongoid' => [ 'mongoid' , '~> 3.0' ] ,
|
15
|
+
'map' => [ 'map' , '~> 6.2' ] ,
|
16
|
+
'fattr' => [ 'fattr' , '~> 2.2' ] ,
|
17
|
+
'coerce' => [ 'coerce' , '~> 0.0.3' ] ,
|
18
|
+
'unicode_utils' => [ 'unicode_utils' , '~> 1.4.0' ] ,
|
17
19
|
}
|
18
20
|
end
|
19
21
|
|
@@ -66,6 +68,9 @@
|
|
66
68
|
end
|
67
69
|
end
|
68
70
|
|
71
|
+
require 'unicode_utils/u'
|
72
|
+
require 'unicode_utils/each_word'
|
73
|
+
|
69
74
|
load Haystack.libdir('stemming.rb')
|
70
75
|
load Haystack.libdir('util.rb')
|
71
76
|
load Haystack.libdir('count.rb')
|
@@ -74,6 +79,10 @@
|
|
74
79
|
load Haystack.libdir('index.rb')
|
75
80
|
load Haystack.libdir('search.rb')
|
76
81
|
|
82
|
+
def Haystack.included(other)
|
83
|
+
other.send(:include, Search)
|
84
|
+
end
|
85
|
+
|
77
86
|
extend Haystack
|
78
87
|
end
|
79
88
|
end
|
data/mongoid-haystack.gemspec
CHANGED
@@ -3,13 +3,14 @@
|
|
3
3
|
|
4
4
|
Gem::Specification::new do |spec|
|
5
5
|
spec.name = "mongoid-haystack"
|
6
|
-
spec.version = "1.
|
6
|
+
spec.version = "1.1.0"
|
7
7
|
spec.platform = Gem::Platform::RUBY
|
8
8
|
spec.summary = "mongoid-haystack"
|
9
9
|
spec.description = "a mongoid 3 zero-config, zero-integration, POLS pure mongo fulltext solution"
|
10
10
|
|
11
11
|
spec.files =
|
12
|
-
["
|
12
|
+
["README.md",
|
13
|
+
"Rakefile",
|
13
14
|
"lib",
|
14
15
|
"lib/app",
|
15
16
|
"lib/app/models",
|
@@ -63,6 +64,10 @@ Gem::Specification::new do |spec|
|
|
63
64
|
|
64
65
|
spec.add_dependency(*["fattr", "~> 2.2"])
|
65
66
|
|
67
|
+
spec.add_dependency(*["coerce", "~> 0.0.3"])
|
68
|
+
|
69
|
+
spec.add_dependency(*["unicode_utils", "~> 1.4.0"])
|
70
|
+
|
66
71
|
|
67
72
|
spec.extensions.push(*[])
|
68
73
|
|
data/test/helper.rb
CHANGED
@@ -7,22 +7,35 @@ require_relative 'testing'
|
|
7
7
|
require_relative '../lib/mongoid-haystack.rb'
|
8
8
|
|
9
9
|
Mongoid::Haystack.connect!
|
10
|
+
Mongoid::Haystack.reset!
|
10
11
|
|
11
12
|
class A
|
12
13
|
include Mongoid::Document
|
13
14
|
field(:content, :type => String)
|
14
15
|
def to_s; content; end
|
16
|
+
|
17
|
+
field(:a)
|
18
|
+
field(:b)
|
19
|
+
field(:c)
|
15
20
|
end
|
16
21
|
|
17
22
|
class B
|
18
23
|
include Mongoid::Document
|
19
24
|
field(:content, :type => String)
|
20
25
|
def to_s; content; end
|
26
|
+
|
27
|
+
field(:a)
|
28
|
+
field(:b)
|
29
|
+
field(:c)
|
21
30
|
end
|
22
31
|
|
23
32
|
class C
|
24
33
|
include Mongoid::Document
|
25
34
|
field(:content, :type => String)
|
26
35
|
def to_s; content; end
|
36
|
+
|
37
|
+
field(:a)
|
38
|
+
field(:b)
|
39
|
+
field(:c)
|
27
40
|
end
|
28
41
|
|
@@ -1,15 +1,6 @@
|
|
1
1
|
require_relative 'helper'
|
2
2
|
|
3
3
|
Testing Mongoid::Haystack do
|
4
|
-
##
|
5
|
-
#
|
6
|
-
Mongoid::Haystack.reset!
|
7
|
-
|
8
|
-
setup do
|
9
|
-
[A, B, C].map{|m| m.destroy_all}
|
10
|
-
Mongoid::Haystack.destroy_all
|
11
|
-
end
|
12
|
-
|
13
4
|
##
|
14
5
|
#
|
15
6
|
testing 'that models can, at minimum, be indexed and searched' do
|
@@ -49,7 +40,7 @@ Testing Mongoid::Haystack do
|
|
49
40
|
##
|
50
41
|
#
|
51
42
|
testing 'that basic stemming can be performed' do
|
52
|
-
assert{ Mongoid::Haystack.
|
43
|
+
assert{ Mongoid::Haystack.stems_for('dogs cats fishes') == %w[ dog cat fish ] }
|
53
44
|
end
|
54
45
|
|
55
46
|
testing 'that words are stemmed when they are indexed' do
|
@@ -80,14 +71,12 @@ Testing Mongoid::Haystack do
|
|
80
71
|
end
|
81
72
|
|
82
73
|
testing 'that removing a model from the index decrements counts appropriately' do
|
83
|
-
#
|
84
74
|
a = A.create!(:content => 'dog')
|
85
75
|
b = A.create!(:content => 'cat')
|
86
76
|
c = A.create!(:content => 'cats dogs')
|
87
77
|
|
88
78
|
assert{ Mongoid::Haystack.index(A) }
|
89
79
|
|
90
|
-
#
|
91
80
|
assert{ Mongoid::Haystack.search('cat').first }
|
92
81
|
|
93
82
|
assert{ Mongoid::Haystack::Token.where(:value => 'cat').first.count == 2 }
|
@@ -116,4 +105,159 @@ Testing Mongoid::Haystack do
|
|
116
105
|
assert{ Mongoid::Haystack::Token.where(:value => 'cat').first.count == 0 }
|
117
106
|
assert{ Mongoid::Haystack::Token.where(:value => 'dog').first.count == 0 }
|
118
107
|
end
|
108
|
+
|
109
|
+
##
|
110
|
+
#
|
111
|
+
testing 'that search uses a b-tree index' do
|
112
|
+
a = A.create!(:content => 'dog')
|
113
|
+
|
114
|
+
assert{ Mongoid::Haystack.index(A) }
|
115
|
+
assert{ Mongoid::Haystack.search('dog').explain['cursor'] =~ /BtreeCursor/i }
|
116
|
+
end
|
117
|
+
|
118
|
+
##
|
119
|
+
#
|
120
|
+
testing 'that classes can export a custom [score|keywords|fulltext] for the search index' do
|
121
|
+
k = new_klass do
|
122
|
+
def to_haystack
|
123
|
+
colors.push(color = colors.shift)
|
124
|
+
|
125
|
+
{
|
126
|
+
:score => score,
|
127
|
+
|
128
|
+
:keywords => "cats #{ color }",
|
129
|
+
|
130
|
+
:fulltext => 'now is the time for all good men...'
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.score
|
135
|
+
@score ||= 0
|
136
|
+
ensure
|
137
|
+
@score += 1
|
138
|
+
end
|
139
|
+
|
140
|
+
def score
|
141
|
+
self.class.score
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.colors
|
145
|
+
@colors ||= %w( black white )
|
146
|
+
end
|
147
|
+
|
148
|
+
def colors
|
149
|
+
self.class.colors
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
a = k.create!(:content => 'dog')
|
154
|
+
b = k.create!(:content => 'dogs too')
|
155
|
+
|
156
|
+
assert{ a.haystack_index.score == 0 }
|
157
|
+
assert{ b.haystack_index.score == 1 }
|
158
|
+
|
159
|
+
assert do
|
160
|
+
a.haystack_index.tokens.map(&:value).sort ==
|
161
|
+
["black", "cat", "good", "men", "time"]
|
162
|
+
end
|
163
|
+
assert do
|
164
|
+
b.haystack_index.tokens.map(&:value).sort ==
|
165
|
+
["cat", "good", "men", "time", "white"]
|
166
|
+
end
|
167
|
+
|
168
|
+
assert{ Mongoid::Haystack.search('cat').count == 2 }
|
169
|
+
assert{ Mongoid::Haystack.search('black').count == 1 }
|
170
|
+
assert{ Mongoid::Haystack.search('white').count == 1 }
|
171
|
+
assert{ Mongoid::Haystack.search('good men').count == 2 }
|
172
|
+
end
|
173
|
+
|
174
|
+
##
|
175
|
+
#
|
176
|
+
testing 'that set intersection and union are supported via search' do
|
177
|
+
a = A.create!(:content => 'dog')
|
178
|
+
b = A.create!(:content => 'dog cat')
|
179
|
+
c = A.create!(:content => 'dog cat fish')
|
180
|
+
|
181
|
+
assert{ Mongoid::Haystack.index(A) }
|
182
|
+
|
183
|
+
assert{ Mongoid::Haystack.search(:any => 'dog').count == 3 }
|
184
|
+
assert{ Mongoid::Haystack.search(:any => 'dog cat').count == 3 }
|
185
|
+
assert{ Mongoid::Haystack.search(:any => 'dog cat fish').count == 3 }
|
186
|
+
|
187
|
+
assert{ Mongoid::Haystack.search(:all => 'dog').count == 3 }
|
188
|
+
assert{ Mongoid::Haystack.search(:all => 'dog cat').count == 2 }
|
189
|
+
assert{ Mongoid::Haystack.search(:all => 'dog cat fish').count == 1 }
|
190
|
+
end
|
191
|
+
|
192
|
+
##
|
193
|
+
#
|
194
|
+
testing 'that classes can export custom facets and then search them, again using a b-tree index' do
|
195
|
+
k = new_klass do
|
196
|
+
field(:to_haystack, :type => Hash, :default => proc{ Hash.new })
|
197
|
+
end
|
198
|
+
|
199
|
+
a = k.create!(:content => 'hello kitty', :to_haystack => { :keywords => 'cat', :facets => {:x => 42.0}})
|
200
|
+
b = k.create!(:content => 'hello kitty', :to_haystack => { :keywords => 'cat', :facets => {:x => 4.20}})
|
201
|
+
|
202
|
+
assert{ Mongoid::Haystack.search('cat').where(:facets => {'x' => 42.0}).first.model == a }
|
203
|
+
assert{ Mongoid::Haystack.search('cat').where(:facets => {'x' => 4.20}).first.model == b }
|
204
|
+
|
205
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 42.0).first.model == a }
|
206
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 4.20).first.model == b }
|
207
|
+
|
208
|
+
assert{ Mongoid::Haystack.search('cat').where('facets' => {'x' => 42.0}).explain['cursor'] =~ /BtreeCursor/ }
|
209
|
+
assert{ Mongoid::Haystack.search('cat').where('facets' => {'x' => 4.20}).explain['cursor'] =~ /BtreeCursor/ }
|
210
|
+
|
211
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 42.0).explain['cursor'] =~ /BtreeCursor/ }
|
212
|
+
assert{ Mongoid::Haystack.search('cat').where('facets.x' => 4.20).explain['cursor'] =~ /BtreeCursor/ }
|
213
|
+
end
|
214
|
+
|
215
|
+
##
|
216
|
+
#
|
217
|
+
testing 'that keywords are considered more highly than fulltext' do
|
218
|
+
k = new_klass do
|
219
|
+
field(:title)
|
220
|
+
field(:body)
|
221
|
+
|
222
|
+
def to_haystack
|
223
|
+
{ :keywords => title, :fulltext => body }
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
a = k.create!(:title => 'the cats', :body => 'like to meow')
|
228
|
+
b = k.create!(:title => 'the dogs', :body => 'do not like to meow, they bark at cats')
|
229
|
+
|
230
|
+
assert{ Mongoid::Haystack.search('cat').count == 2 }
|
231
|
+
assert{ Mongoid::Haystack.search('cat').first.model == a }
|
232
|
+
|
233
|
+
assert{ Mongoid::Haystack.search('meow').count == 2 }
|
234
|
+
assert{ Mongoid::Haystack.search('bark').count == 1 }
|
235
|
+
assert{ Mongoid::Haystack.search('dog').first.model == b }
|
236
|
+
end
|
237
|
+
|
238
|
+
protected
|
239
|
+
|
240
|
+
def new_klass(&block)
|
241
|
+
Object.send(:remove_const, :K) if Object.send(:const_defined?, :K)
|
242
|
+
|
243
|
+
k = Class.new(A) do
|
244
|
+
self.default_collection_name = :ks
|
245
|
+
def self.name() 'K' end
|
246
|
+
include ::Mongoid::Haystack::Search
|
247
|
+
class_eval(&block) if block
|
248
|
+
end
|
249
|
+
|
250
|
+
Object.const_set(:K, k)
|
251
|
+
|
252
|
+
k
|
253
|
+
end
|
254
|
+
|
255
|
+
H = Mongoid::Haystack
|
256
|
+
T = Mongoid::Haystack::Token
|
257
|
+
I = Mongoid::Haystack::Index
|
258
|
+
|
259
|
+
setup do
|
260
|
+
[A, B, C].map{|m| m.destroy_all}
|
261
|
+
Mongoid::Haystack.destroy_all
|
262
|
+
end
|
119
263
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mongoid-haystack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -59,12 +59,45 @@ dependencies:
|
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '2.2'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: coerce
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.0.3
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.0.3
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: unicode_utils
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 1.4.0
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 1.4.0
|
62
94
|
description: a mongoid 3 zero-config, zero-integration, POLS pure mongo fulltext solution
|
63
95
|
email: ara.t.howard@gmail.com
|
64
96
|
executables: []
|
65
97
|
extensions: []
|
66
98
|
extra_rdoc_files: []
|
67
99
|
files:
|
100
|
+
- README.md
|
68
101
|
- Rakefile
|
69
102
|
- lib/app/models/mongoid/haystack/count.rb
|
70
103
|
- lib/app/models/mongoid/haystack/index.rb
|