mongoid-haystack 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +446 -0
- data/lib/app/models/mongoid/haystack/count.rb +1 -0
- data/lib/app/models/mongoid/haystack/index.rb +1 -0
- data/lib/app/models/mongoid/haystack/sequence.rb +1 -0
- data/lib/app/models/mongoid/haystack/token.rb +1 -0
- data/lib/mongoid-haystack.rb +79 -0
- data/lib/mongoid-haystack/count.rb +28 -0
- data/lib/mongoid-haystack/index.rb +165 -0
- data/lib/mongoid-haystack/search.rb +96 -0
- data/lib/mongoid-haystack/sequence.rb +55 -0
- data/lib/mongoid-haystack/stemming.rb +79 -0
- data/lib/mongoid-haystack/stemming/stopwords/english.txt +32 -0
- data/lib/mongoid-haystack/stemming/stopwords/extended_english.txt +216 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_danish.txt +94 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_dutch.txt +101 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_english.txt +174 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_finnish.txt +0 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_french.txt +155 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_german.txt +231 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_italian.txt +279 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_norwegian.txt +176 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_portuguese.txt +203 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_russian.txt +101 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_russiankoi8_r.txt +101 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_spanish.txt +313 -0
- data/lib/mongoid-haystack/token.rb +71 -0
- data/lib/mongoid-haystack/util.rb +67 -0
- data/mongoid-haystack.gemspec +73 -0
- data/test/helper.rb +28 -0
- data/test/mongoid-haystack_test.rb +119 -0
- data/test/testing.rb +196 -0
- metadata +123 -0
@@ -0,0 +1 @@
|
|
1
|
+
Mongoid::Haystack::Sequence
|
@@ -0,0 +1 @@
|
|
1
|
+
Mongoid::Haystack::Token
|
@@ -0,0 +1,79 @@
|
|
1
|
+
##
|
2
|
+
#
|
3
|
+
module Mongoid
|
4
|
+
module Haystack
|
5
|
+
const_set :Version, '1.0.0'
|
6
|
+
|
7
|
+
class << Haystack
|
8
|
+
def version
|
9
|
+
const_get :Version
|
10
|
+
end
|
11
|
+
|
12
|
+
def dependencies
|
13
|
+
{
|
14
|
+
'mongoid' => [ 'mongoid' , '~> 3.0' ] ,
|
15
|
+
'map' => [ 'map' , '~> 6.2' ] ,
|
16
|
+
'fattr' => [ 'fattr' , '~> 2.2' ] ,
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def libdir(*args, &block)
|
21
|
+
@libdir ||= File.expand_path(__FILE__).sub(/\.rb$/,'')
|
22
|
+
args.empty? ? @libdir : File.join(@libdir, *args)
|
23
|
+
ensure
|
24
|
+
if block
|
25
|
+
begin
|
26
|
+
$LOAD_PATH.unshift(@libdir)
|
27
|
+
block.call()
|
28
|
+
ensure
|
29
|
+
$LOAD_PATH.shift()
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def load(*libs)
|
35
|
+
libs = libs.join(' ').scan(/[^\s+]+/)
|
36
|
+
libdir{ libs.each{|lib| Kernel.load(lib) } }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
begin
|
41
|
+
require 'rubygems'
|
42
|
+
rescue LoadError
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
|
46
|
+
if defined?(gem)
|
47
|
+
dependencies.each do |lib, dependency|
|
48
|
+
gem(*dependency)
|
49
|
+
require(lib)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
begin
|
54
|
+
require 'pry'
|
55
|
+
rescue LoadError
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
require 'fast_stemmer'
|
61
|
+
rescue LoadError
|
62
|
+
begin
|
63
|
+
require 'stemmer'
|
64
|
+
rescue LoadError
|
65
|
+
abort("mongoid-haystack requires either the 'fast-stemmer' or 'ruby-stemmer' gems")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
load Haystack.libdir('stemming.rb')
|
70
|
+
load Haystack.libdir('util.rb')
|
71
|
+
load Haystack.libdir('count.rb')
|
72
|
+
load Haystack.libdir('sequence.rb')
|
73
|
+
load Haystack.libdir('token.rb')
|
74
|
+
load Haystack.libdir('index.rb')
|
75
|
+
load Haystack.libdir('search.rb')
|
76
|
+
|
77
|
+
extend Haystack
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
class Count
|
4
|
+
include Mongoid::Document
|
5
|
+
|
6
|
+
field(:name, :type => String)
|
7
|
+
field(:value, :type => Integer, :default => 0)
|
8
|
+
|
9
|
+
index({:name => 1}, {:unique => true})
|
10
|
+
index({:value => 1})
|
11
|
+
|
12
|
+
def Count.for(name)
|
13
|
+
Haystack.find_or_create(
|
14
|
+
->{ where(:name => name.to_s).first },
|
15
|
+
->{ create!(:name => name.to_s) }
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
def Count.[](name)
|
20
|
+
Count.for(name)
|
21
|
+
end
|
22
|
+
|
23
|
+
def inc(n = 1)
|
24
|
+
super(:value, n)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
def Haystack.index(*args, &block)
|
4
|
+
Index.add(*args, &block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def Haystack.unindex(*args, &block)
|
8
|
+
Index.remove(*args, &block)
|
9
|
+
end
|
10
|
+
|
11
|
+
def Haystack.reindex!(*args, &block)
|
12
|
+
Index.all.each do |index|
|
13
|
+
model =
|
14
|
+
begin
|
15
|
+
index.model
|
16
|
+
rescue Object => e
|
17
|
+
index.destroy
|
18
|
+
next
|
19
|
+
end
|
20
|
+
|
21
|
+
index(model)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Index
|
26
|
+
include Mongoid::Document
|
27
|
+
|
28
|
+
class << Index
|
29
|
+
def add(*args)
|
30
|
+
models_for(*args) do |model|
|
31
|
+
config = nil
|
32
|
+
|
33
|
+
if model.respond_to?(:to_haystack)
|
34
|
+
config = Map.for(model.to_haystack)
|
35
|
+
else
|
36
|
+
keywords = []
|
37
|
+
%w( keywords title ).each do |attr|
|
38
|
+
if model.respond_to?(attr)
|
39
|
+
keywords.push(*model.send(attr))
|
40
|
+
break
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
fulltext = []
|
45
|
+
%w( fulltext text content body description to_s ).each do |attr|
|
46
|
+
if model.respond_to?(attr)
|
47
|
+
fulltext.push(*model.send(attr))
|
48
|
+
break
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
config =
|
53
|
+
Map.for(
|
54
|
+
:keywords => keywords,
|
55
|
+
:fulltext => fulltext
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
keywords = Array(config[:keywords]).join(' ')
|
60
|
+
fulltext = Array(config[:fulltext]).join(' ')
|
61
|
+
facets = Map.for(config[:facets] || {})
|
62
|
+
score = config[:score]
|
63
|
+
|
64
|
+
index =
|
65
|
+
Haystack.find_or_create(
|
66
|
+
->{ where(:model => model).first },
|
67
|
+
->{ new(:model => model) },
|
68
|
+
)
|
69
|
+
|
70
|
+
if index.persisted?
|
71
|
+
Index.subtract(index)
|
72
|
+
end
|
73
|
+
|
74
|
+
keyword_scores = Hash.new{|h,k| h[k] = 0}
|
75
|
+
fulltext_scores = Hash.new{|h,k| h[k] = 0}
|
76
|
+
|
77
|
+
Token.values_for(keywords).each do |value|
|
78
|
+
token = Token.add(value)
|
79
|
+
id = token.id
|
80
|
+
|
81
|
+
index.tokens.push(id)
|
82
|
+
keyword_scores[id] += 1
|
83
|
+
end
|
84
|
+
|
85
|
+
Token.values_for(fulltext).each do |value|
|
86
|
+
token = Token.add(value)
|
87
|
+
id = token.id
|
88
|
+
|
89
|
+
index.tokens.push(id)
|
90
|
+
fulltext_scores[id] += 1
|
91
|
+
end
|
92
|
+
|
93
|
+
index.keyword_scores = keyword_scores
|
94
|
+
index.fulltext_scores = fulltext_scores
|
95
|
+
|
96
|
+
index.score = score if score
|
97
|
+
index.facets = facets if facets
|
98
|
+
|
99
|
+
index.tokens = index.tokens.uniq
|
100
|
+
|
101
|
+
index.save!
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def remove(*args)
|
106
|
+
models_for(*args) do |model|
|
107
|
+
index = where(:model_type => model.class.name, :model_id => model.id).first
|
108
|
+
|
109
|
+
if index
|
110
|
+
subtract(index)
|
111
|
+
index.destroy
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def subtract(index)
|
117
|
+
tokens = Token.where(:id.in => index.tokens)
|
118
|
+
|
119
|
+
n = 0
|
120
|
+
|
121
|
+
tokens.each do |token|
|
122
|
+
keyword_score = index.keyword_scores[token.id].to_i
|
123
|
+
fulltext_score = index.fulltext_scores[token.id].to_i
|
124
|
+
|
125
|
+
i = keyword_score + fulltext_score
|
126
|
+
token.inc(:count, -i)
|
127
|
+
|
128
|
+
n += i
|
129
|
+
end
|
130
|
+
|
131
|
+
Count[:tokens].inc(-n)
|
132
|
+
end
|
133
|
+
|
134
|
+
def models_for(*args, &block)
|
135
|
+
args.flatten.compact.each do |arg|
|
136
|
+
if arg.respond_to?(:persisted?)
|
137
|
+
model = arg
|
138
|
+
block.call(model)
|
139
|
+
else
|
140
|
+
arg.all.each do |model|
|
141
|
+
block.call(model)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
belongs_to(:model, :polymorphic => true)
|
149
|
+
|
150
|
+
field(:tokens, :type => Array, :default => [])
|
151
|
+
field(:score, :type => Integer, :default => 0)
|
152
|
+
field(:keyword_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
153
|
+
field(:fulltext_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
154
|
+
field(:facets, :type => Hash, :default => {})
|
155
|
+
|
156
|
+
index({:model_type => 1})
|
157
|
+
index({:model_id => 1})
|
158
|
+
|
159
|
+
index({:tokens => 1})
|
160
|
+
index({:score => 1})
|
161
|
+
index({:keyword_scores => 1})
|
162
|
+
index({:fulltext_scores => 1})
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
def search(*args, &block)
|
4
|
+
#
|
5
|
+
options = Map.options_for!(args)
|
6
|
+
search = args.join(' ')
|
7
|
+
|
8
|
+
#
|
9
|
+
tokens = search_tokens_for(search)
|
10
|
+
|
11
|
+
#
|
12
|
+
conditions = {}
|
13
|
+
conditions[:tokens.in] = tokens.map{|token| token.id}
|
14
|
+
|
15
|
+
#
|
16
|
+
order = []
|
17
|
+
order.push(["score", :desc])
|
18
|
+
|
19
|
+
tokens.each do |token|
|
20
|
+
order.push(["keyword_scores.#{ token.id }", :desc])
|
21
|
+
end
|
22
|
+
|
23
|
+
tokens.each do |token|
|
24
|
+
order.push(["fulltext_scores.#{ token.id }", :desc])
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
if options[:facets]
|
29
|
+
conditions[:facets] = options[:facets]
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
if options[:types]
|
34
|
+
model_types = Array(options[:types]).map{|type| type.name}
|
35
|
+
conditions[:model_type.in] = model_types
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
Index.where(conditions).order_by(order)
|
40
|
+
end
|
41
|
+
|
42
|
+
def search_tokens_for(search)
|
43
|
+
values = Token.values_for(search.to_s)
|
44
|
+
tokens = Token.where(:value.in => values).to_a
|
45
|
+
|
46
|
+
positions = {}
|
47
|
+
tokens.each_with_index{|token, index| positions[token] = index + 1}
|
48
|
+
|
49
|
+
t = Count[:tokens].value.to_f
|
50
|
+
|
51
|
+
tokens.sort! do |a,b|
|
52
|
+
[b.rarity_bin(t), positions[b]] <=> [a.rarity_bin(t), positions[a]]
|
53
|
+
end
|
54
|
+
|
55
|
+
tokens
|
56
|
+
end
|
57
|
+
|
58
|
+
module Search
|
59
|
+
ClassMethods = proc do
|
60
|
+
def search(*args, &block)
|
61
|
+
options = Map.options_for!(args)
|
62
|
+
options[:types] = Array(options[:types]).flatten.compact
|
63
|
+
options[:types].push(self)
|
64
|
+
args.push(options)
|
65
|
+
Haystack.search(*args, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
after_save do |doc|
|
69
|
+
begin
|
70
|
+
Mongoid::Haystack::Index.add(doc) if doc.persisted?
|
71
|
+
rescue Object
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
after_destroy do |doc|
|
77
|
+
begin
|
78
|
+
Mongoid::Haystack::Index.remove(doc)
|
79
|
+
rescue Object
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
InstanceMethods = proc do
|
86
|
+
end
|
87
|
+
|
88
|
+
def Search.included(other)
|
89
|
+
super
|
90
|
+
ensure
|
91
|
+
other.instance_eval(&ClassMethods)
|
92
|
+
other.class_eval(&InstanceMethods)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
class Sequence
|
4
|
+
include Mongoid::Document
|
5
|
+
|
6
|
+
field(:name, :type => String)
|
7
|
+
|
8
|
+
field(:value, :default => 0, :type => Integer)
|
9
|
+
|
10
|
+
validates_presence_of(:name)
|
11
|
+
validates_uniqueness_of(:name)
|
12
|
+
|
13
|
+
validates_presence_of(:value)
|
14
|
+
|
15
|
+
index({:name => 1}, {:unique => true})
|
16
|
+
|
17
|
+
Cache = Hash.new
|
18
|
+
|
19
|
+
class << self
|
20
|
+
def for(name)
|
21
|
+
name = name.to_s
|
22
|
+
|
23
|
+
Cache[name] ||= (
|
24
|
+
Haystack.find_or_create(
|
25
|
+
->{ where(:name => name).first },
|
26
|
+
->{ create!(:name => name) }
|
27
|
+
)
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
alias_method('[]', 'for')
|
32
|
+
|
33
|
+
def sequence_name_for(klass, fieldname)
|
34
|
+
"#{ klass.name.gsub(/::/, '.').downcase }-#{ fieldname }"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
after_destroy do |sequence|
|
39
|
+
Cache.delete(sequence.name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def next
|
43
|
+
inc(:value, 1)
|
44
|
+
end
|
45
|
+
|
46
|
+
def current_value
|
47
|
+
reload.value
|
48
|
+
end
|
49
|
+
|
50
|
+
def reset!
|
51
|
+
update_attributes!(:value => 0)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Stemming
|
4
|
+
def stem(*args)
|
5
|
+
string = args.join(' ')
|
6
|
+
words = string.scan(/[\w._-]+/)
|
7
|
+
stems = []
|
8
|
+
words.each do |word|
|
9
|
+
word = word.downcase
|
10
|
+
stem = word.stem.downcase
|
11
|
+
next if Stopwords.stopword?(word)
|
12
|
+
next if Stopwords.stopword?(stem)
|
13
|
+
stems.push(stem)
|
14
|
+
end
|
15
|
+
stems
|
16
|
+
end
|
17
|
+
|
18
|
+
alias_method('for', 'stem')
|
19
|
+
|
20
|
+
module Stopwords
|
21
|
+
dirname = __FILE__.sub(/\.rb\Z/, '')
|
22
|
+
glob = File.join(dirname, 'stopwords', '*.txt')
|
23
|
+
|
24
|
+
List = {}
|
25
|
+
|
26
|
+
Dir.glob(glob).each do |wordlist|
|
27
|
+
basename = File.basename(wordlist)
|
28
|
+
name = basename.split(/\./).first
|
29
|
+
|
30
|
+
open(wordlist) do |fd|
|
31
|
+
lines = fd.readlines
|
32
|
+
words = lines.map{|line| line.strip}
|
33
|
+
words.delete_if{|word| word.empty?}
|
34
|
+
words.push('')
|
35
|
+
List[name] = words
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
unless defined?(All)
|
40
|
+
All = []
|
41
|
+
All.concat(List['english'])
|
42
|
+
All.concat(List['full_english'])
|
43
|
+
All.concat(List['extended_english'])
|
44
|
+
#All.concat(List['full_french'])
|
45
|
+
#All.concat(List['full_spanish'])
|
46
|
+
#All.concat(List['full_portuguese'])
|
47
|
+
#All.concat(List['full_italian'])
|
48
|
+
#All.concat(List['full_german'])
|
49
|
+
#All.concat(List['full_dutch'])
|
50
|
+
#All.concat(List['full_norwegian'])
|
51
|
+
#All.concat(List['full_danish'])
|
52
|
+
#All.concat(List['full_russian'])
|
53
|
+
#All.concat(List['full_russian_koi8_r'])
|
54
|
+
#All.concat(List['full_finnish'])
|
55
|
+
All.sort!
|
56
|
+
All.uniq!
|
57
|
+
end
|
58
|
+
|
59
|
+
unless defined?(Index)
|
60
|
+
Index = {}
|
61
|
+
|
62
|
+
All.each do |word|
|
63
|
+
Index[word] = word
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def stopword?(word)
|
68
|
+
!!Index[word]
|
69
|
+
end
|
70
|
+
|
71
|
+
extend(Stopwords)
|
72
|
+
end
|
73
|
+
|
74
|
+
extend(Stemming)
|
75
|
+
end
|
76
|
+
|
77
|
+
if $0 == __FILE__
|
78
|
+
p Stemming.stem("the foobars foo-bars foos bars cat and mountains")
|
79
|
+
end
|