mongoid-haystack 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +446 -0
- data/lib/app/models/mongoid/haystack/count.rb +1 -0
- data/lib/app/models/mongoid/haystack/index.rb +1 -0
- data/lib/app/models/mongoid/haystack/sequence.rb +1 -0
- data/lib/app/models/mongoid/haystack/token.rb +1 -0
- data/lib/mongoid-haystack.rb +79 -0
- data/lib/mongoid-haystack/count.rb +28 -0
- data/lib/mongoid-haystack/index.rb +165 -0
- data/lib/mongoid-haystack/search.rb +96 -0
- data/lib/mongoid-haystack/sequence.rb +55 -0
- data/lib/mongoid-haystack/stemming.rb +79 -0
- data/lib/mongoid-haystack/stemming/stopwords/english.txt +32 -0
- data/lib/mongoid-haystack/stemming/stopwords/extended_english.txt +216 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_danish.txt +94 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_dutch.txt +101 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_english.txt +174 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_finnish.txt +0 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_french.txt +155 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_german.txt +231 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_italian.txt +279 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_norwegian.txt +176 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_portuguese.txt +203 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_russian.txt +101 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_russiankoi8_r.txt +101 -0
- data/lib/mongoid-haystack/stemming/stopwords/full_spanish.txt +313 -0
- data/lib/mongoid-haystack/token.rb +71 -0
- data/lib/mongoid-haystack/util.rb +67 -0
- data/mongoid-haystack.gemspec +73 -0
- data/test/helper.rb +28 -0
- data/test/mongoid-haystack_test.rb +119 -0
- data/test/testing.rb +196 -0
- metadata +123 -0
@@ -0,0 +1 @@
|
|
1
|
+
Mongoid::Haystack::Sequence
|
@@ -0,0 +1 @@
|
|
1
|
+
Mongoid::Haystack::Token
|
@@ -0,0 +1,79 @@
|
|
1
|
+
##
|
2
|
+
#
|
3
|
+
module Mongoid
|
4
|
+
module Haystack
|
5
|
+
const_set :Version, '1.0.0'
|
6
|
+
|
7
|
+
class << Haystack
|
8
|
+
def version
|
9
|
+
const_get :Version
|
10
|
+
end
|
11
|
+
|
12
|
+
def dependencies
|
13
|
+
{
|
14
|
+
'mongoid' => [ 'mongoid' , '~> 3.0' ] ,
|
15
|
+
'map' => [ 'map' , '~> 6.2' ] ,
|
16
|
+
'fattr' => [ 'fattr' , '~> 2.2' ] ,
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def libdir(*args, &block)
|
21
|
+
@libdir ||= File.expand_path(__FILE__).sub(/\.rb$/,'')
|
22
|
+
args.empty? ? @libdir : File.join(@libdir, *args)
|
23
|
+
ensure
|
24
|
+
if block
|
25
|
+
begin
|
26
|
+
$LOAD_PATH.unshift(@libdir)
|
27
|
+
block.call()
|
28
|
+
ensure
|
29
|
+
$LOAD_PATH.shift()
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def load(*libs)
|
35
|
+
libs = libs.join(' ').scan(/[^\s+]+/)
|
36
|
+
libdir{ libs.each{|lib| Kernel.load(lib) } }
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
begin
|
41
|
+
require 'rubygems'
|
42
|
+
rescue LoadError
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
|
46
|
+
if defined?(gem)
|
47
|
+
dependencies.each do |lib, dependency|
|
48
|
+
gem(*dependency)
|
49
|
+
require(lib)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
begin
|
54
|
+
require 'pry'
|
55
|
+
rescue LoadError
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
require 'fast_stemmer'
|
61
|
+
rescue LoadError
|
62
|
+
begin
|
63
|
+
require 'stemmer'
|
64
|
+
rescue LoadError
|
65
|
+
abort("mongoid-haystack requires either the 'fast-stemmer' or 'ruby-stemmer' gems")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
load Haystack.libdir('stemming.rb')
|
70
|
+
load Haystack.libdir('util.rb')
|
71
|
+
load Haystack.libdir('count.rb')
|
72
|
+
load Haystack.libdir('sequence.rb')
|
73
|
+
load Haystack.libdir('token.rb')
|
74
|
+
load Haystack.libdir('index.rb')
|
75
|
+
load Haystack.libdir('search.rb')
|
76
|
+
|
77
|
+
extend Haystack
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
class Count
|
4
|
+
include Mongoid::Document
|
5
|
+
|
6
|
+
field(:name, :type => String)
|
7
|
+
field(:value, :type => Integer, :default => 0)
|
8
|
+
|
9
|
+
index({:name => 1}, {:unique => true})
|
10
|
+
index({:value => 1})
|
11
|
+
|
12
|
+
def Count.for(name)
|
13
|
+
Haystack.find_or_create(
|
14
|
+
->{ where(:name => name.to_s).first },
|
15
|
+
->{ create!(:name => name.to_s) }
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
def Count.[](name)
|
20
|
+
Count.for(name)
|
21
|
+
end
|
22
|
+
|
23
|
+
def inc(n = 1)
|
24
|
+
super(:value, n)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
def Haystack.index(*args, &block)
|
4
|
+
Index.add(*args, &block)
|
5
|
+
end
|
6
|
+
|
7
|
+
def Haystack.unindex(*args, &block)
|
8
|
+
Index.remove(*args, &block)
|
9
|
+
end
|
10
|
+
|
11
|
+
def Haystack.reindex!(*args, &block)
|
12
|
+
Index.all.each do |index|
|
13
|
+
model =
|
14
|
+
begin
|
15
|
+
index.model
|
16
|
+
rescue Object => e
|
17
|
+
index.destroy
|
18
|
+
next
|
19
|
+
end
|
20
|
+
|
21
|
+
index(model)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Index
|
26
|
+
include Mongoid::Document
|
27
|
+
|
28
|
+
class << Index
|
29
|
+
def add(*args)
|
30
|
+
models_for(*args) do |model|
|
31
|
+
config = nil
|
32
|
+
|
33
|
+
if model.respond_to?(:to_haystack)
|
34
|
+
config = Map.for(model.to_haystack)
|
35
|
+
else
|
36
|
+
keywords = []
|
37
|
+
%w( keywords title ).each do |attr|
|
38
|
+
if model.respond_to?(attr)
|
39
|
+
keywords.push(*model.send(attr))
|
40
|
+
break
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
fulltext = []
|
45
|
+
%w( fulltext text content body description to_s ).each do |attr|
|
46
|
+
if model.respond_to?(attr)
|
47
|
+
fulltext.push(*model.send(attr))
|
48
|
+
break
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
config =
|
53
|
+
Map.for(
|
54
|
+
:keywords => keywords,
|
55
|
+
:fulltext => fulltext
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
keywords = Array(config[:keywords]).join(' ')
|
60
|
+
fulltext = Array(config[:fulltext]).join(' ')
|
61
|
+
facets = Map.for(config[:facets] || {})
|
62
|
+
score = config[:score]
|
63
|
+
|
64
|
+
index =
|
65
|
+
Haystack.find_or_create(
|
66
|
+
->{ where(:model => model).first },
|
67
|
+
->{ new(:model => model) },
|
68
|
+
)
|
69
|
+
|
70
|
+
if index.persisted?
|
71
|
+
Index.subtract(index)
|
72
|
+
end
|
73
|
+
|
74
|
+
keyword_scores = Hash.new{|h,k| h[k] = 0}
|
75
|
+
fulltext_scores = Hash.new{|h,k| h[k] = 0}
|
76
|
+
|
77
|
+
Token.values_for(keywords).each do |value|
|
78
|
+
token = Token.add(value)
|
79
|
+
id = token.id
|
80
|
+
|
81
|
+
index.tokens.push(id)
|
82
|
+
keyword_scores[id] += 1
|
83
|
+
end
|
84
|
+
|
85
|
+
Token.values_for(fulltext).each do |value|
|
86
|
+
token = Token.add(value)
|
87
|
+
id = token.id
|
88
|
+
|
89
|
+
index.tokens.push(id)
|
90
|
+
fulltext_scores[id] += 1
|
91
|
+
end
|
92
|
+
|
93
|
+
index.keyword_scores = keyword_scores
|
94
|
+
index.fulltext_scores = fulltext_scores
|
95
|
+
|
96
|
+
index.score = score if score
|
97
|
+
index.facets = facets if facets
|
98
|
+
|
99
|
+
index.tokens = index.tokens.uniq
|
100
|
+
|
101
|
+
index.save!
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def remove(*args)
|
106
|
+
models_for(*args) do |model|
|
107
|
+
index = where(:model_type => model.class.name, :model_id => model.id).first
|
108
|
+
|
109
|
+
if index
|
110
|
+
subtract(index)
|
111
|
+
index.destroy
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def subtract(index)
|
117
|
+
tokens = Token.where(:id.in => index.tokens)
|
118
|
+
|
119
|
+
n = 0
|
120
|
+
|
121
|
+
tokens.each do |token|
|
122
|
+
keyword_score = index.keyword_scores[token.id].to_i
|
123
|
+
fulltext_score = index.fulltext_scores[token.id].to_i
|
124
|
+
|
125
|
+
i = keyword_score + fulltext_score
|
126
|
+
token.inc(:count, -i)
|
127
|
+
|
128
|
+
n += i
|
129
|
+
end
|
130
|
+
|
131
|
+
Count[:tokens].inc(-n)
|
132
|
+
end
|
133
|
+
|
134
|
+
def models_for(*args, &block)
|
135
|
+
args.flatten.compact.each do |arg|
|
136
|
+
if arg.respond_to?(:persisted?)
|
137
|
+
model = arg
|
138
|
+
block.call(model)
|
139
|
+
else
|
140
|
+
arg.all.each do |model|
|
141
|
+
block.call(model)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
belongs_to(:model, :polymorphic => true)
|
149
|
+
|
150
|
+
field(:tokens, :type => Array, :default => [])
|
151
|
+
field(:score, :type => Integer, :default => 0)
|
152
|
+
field(:keyword_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
153
|
+
field(:fulltext_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
|
154
|
+
field(:facets, :type => Hash, :default => {})
|
155
|
+
|
156
|
+
index({:model_type => 1})
|
157
|
+
index({:model_id => 1})
|
158
|
+
|
159
|
+
index({:tokens => 1})
|
160
|
+
index({:score => 1})
|
161
|
+
index({:keyword_scores => 1})
|
162
|
+
index({:fulltext_scores => 1})
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
def search(*args, &block)
|
4
|
+
#
|
5
|
+
options = Map.options_for!(args)
|
6
|
+
search = args.join(' ')
|
7
|
+
|
8
|
+
#
|
9
|
+
tokens = search_tokens_for(search)
|
10
|
+
|
11
|
+
#
|
12
|
+
conditions = {}
|
13
|
+
conditions[:tokens.in] = tokens.map{|token| token.id}
|
14
|
+
|
15
|
+
#
|
16
|
+
order = []
|
17
|
+
order.push(["score", :desc])
|
18
|
+
|
19
|
+
tokens.each do |token|
|
20
|
+
order.push(["keyword_scores.#{ token.id }", :desc])
|
21
|
+
end
|
22
|
+
|
23
|
+
tokens.each do |token|
|
24
|
+
order.push(["fulltext_scores.#{ token.id }", :desc])
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
if options[:facets]
|
29
|
+
conditions[:facets] = options[:facets]
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
if options[:types]
|
34
|
+
model_types = Array(options[:types]).map{|type| type.name}
|
35
|
+
conditions[:model_type.in] = model_types
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
Index.where(conditions).order_by(order)
|
40
|
+
end
|
41
|
+
|
42
|
+
def search_tokens_for(search)
|
43
|
+
values = Token.values_for(search.to_s)
|
44
|
+
tokens = Token.where(:value.in => values).to_a
|
45
|
+
|
46
|
+
positions = {}
|
47
|
+
tokens.each_with_index{|token, index| positions[token] = index + 1}
|
48
|
+
|
49
|
+
t = Count[:tokens].value.to_f
|
50
|
+
|
51
|
+
tokens.sort! do |a,b|
|
52
|
+
[b.rarity_bin(t), positions[b]] <=> [a.rarity_bin(t), positions[a]]
|
53
|
+
end
|
54
|
+
|
55
|
+
tokens
|
56
|
+
end
|
57
|
+
|
58
|
+
module Search
|
59
|
+
ClassMethods = proc do
|
60
|
+
def search(*args, &block)
|
61
|
+
options = Map.options_for!(args)
|
62
|
+
options[:types] = Array(options[:types]).flatten.compact
|
63
|
+
options[:types].push(self)
|
64
|
+
args.push(options)
|
65
|
+
Haystack.search(*args, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
after_save do |doc|
|
69
|
+
begin
|
70
|
+
Mongoid::Haystack::Index.add(doc) if doc.persisted?
|
71
|
+
rescue Object
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
after_destroy do |doc|
|
77
|
+
begin
|
78
|
+
Mongoid::Haystack::Index.remove(doc)
|
79
|
+
rescue Object
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
InstanceMethods = proc do
|
86
|
+
end
|
87
|
+
|
88
|
+
def Search.included(other)
|
89
|
+
super
|
90
|
+
ensure
|
91
|
+
other.instance_eval(&ClassMethods)
|
92
|
+
other.class_eval(&InstanceMethods)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Mongoid
|
2
|
+
module Haystack
|
3
|
+
class Sequence
|
4
|
+
include Mongoid::Document
|
5
|
+
|
6
|
+
field(:name, :type => String)
|
7
|
+
|
8
|
+
field(:value, :default => 0, :type => Integer)
|
9
|
+
|
10
|
+
validates_presence_of(:name)
|
11
|
+
validates_uniqueness_of(:name)
|
12
|
+
|
13
|
+
validates_presence_of(:value)
|
14
|
+
|
15
|
+
index({:name => 1}, {:unique => true})
|
16
|
+
|
17
|
+
Cache = Hash.new
|
18
|
+
|
19
|
+
class << self
|
20
|
+
def for(name)
|
21
|
+
name = name.to_s
|
22
|
+
|
23
|
+
Cache[name] ||= (
|
24
|
+
Haystack.find_or_create(
|
25
|
+
->{ where(:name => name).first },
|
26
|
+
->{ create!(:name => name) }
|
27
|
+
)
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
alias_method('[]', 'for')
|
32
|
+
|
33
|
+
def sequence_name_for(klass, fieldname)
|
34
|
+
"#{ klass.name.gsub(/::/, '.').downcase }-#{ fieldname }"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
after_destroy do |sequence|
|
39
|
+
Cache.delete(sequence.name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def next
|
43
|
+
inc(:value, 1)
|
44
|
+
end
|
45
|
+
|
46
|
+
def current_value
|
47
|
+
reload.value
|
48
|
+
end
|
49
|
+
|
50
|
+
def reset!
|
51
|
+
update_attributes!(:value => 0)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Stemming
|
4
|
+
def stem(*args)
|
5
|
+
string = args.join(' ')
|
6
|
+
words = string.scan(/[\w._-]+/)
|
7
|
+
stems = []
|
8
|
+
words.each do |word|
|
9
|
+
word = word.downcase
|
10
|
+
stem = word.stem.downcase
|
11
|
+
next if Stopwords.stopword?(word)
|
12
|
+
next if Stopwords.stopword?(stem)
|
13
|
+
stems.push(stem)
|
14
|
+
end
|
15
|
+
stems
|
16
|
+
end
|
17
|
+
|
18
|
+
alias_method('for', 'stem')
|
19
|
+
|
20
|
+
module Stopwords
|
21
|
+
dirname = __FILE__.sub(/\.rb\Z/, '')
|
22
|
+
glob = File.join(dirname, 'stopwords', '*.txt')
|
23
|
+
|
24
|
+
List = {}
|
25
|
+
|
26
|
+
Dir.glob(glob).each do |wordlist|
|
27
|
+
basename = File.basename(wordlist)
|
28
|
+
name = basename.split(/\./).first
|
29
|
+
|
30
|
+
open(wordlist) do |fd|
|
31
|
+
lines = fd.readlines
|
32
|
+
words = lines.map{|line| line.strip}
|
33
|
+
words.delete_if{|word| word.empty?}
|
34
|
+
words.push('')
|
35
|
+
List[name] = words
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
unless defined?(All)
|
40
|
+
All = []
|
41
|
+
All.concat(List['english'])
|
42
|
+
All.concat(List['full_english'])
|
43
|
+
All.concat(List['extended_english'])
|
44
|
+
#All.concat(List['full_french'])
|
45
|
+
#All.concat(List['full_spanish'])
|
46
|
+
#All.concat(List['full_portuguese'])
|
47
|
+
#All.concat(List['full_italian'])
|
48
|
+
#All.concat(List['full_german'])
|
49
|
+
#All.concat(List['full_dutch'])
|
50
|
+
#All.concat(List['full_norwegian'])
|
51
|
+
#All.concat(List['full_danish'])
|
52
|
+
#All.concat(List['full_russian'])
|
53
|
+
#All.concat(List['full_russian_koi8_r'])
|
54
|
+
#All.concat(List['full_finnish'])
|
55
|
+
All.sort!
|
56
|
+
All.uniq!
|
57
|
+
end
|
58
|
+
|
59
|
+
unless defined?(Index)
|
60
|
+
Index = {}
|
61
|
+
|
62
|
+
All.each do |word|
|
63
|
+
Index[word] = word
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def stopword?(word)
|
68
|
+
!!Index[word]
|
69
|
+
end
|
70
|
+
|
71
|
+
extend(Stopwords)
|
72
|
+
end
|
73
|
+
|
74
|
+
extend(Stemming)
|
75
|
+
end
|
76
|
+
|
77
|
+
if $0 == __FILE__
|
78
|
+
p Stemming.stem("the foobars foo-bars foos bars cat and mountains")
|
79
|
+
end
|