mongoid-haystack 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. data/Rakefile +446 -0
  2. data/lib/app/models/mongoid/haystack/count.rb +1 -0
  3. data/lib/app/models/mongoid/haystack/index.rb +1 -0
  4. data/lib/app/models/mongoid/haystack/sequence.rb +1 -0
  5. data/lib/app/models/mongoid/haystack/token.rb +1 -0
  6. data/lib/mongoid-haystack.rb +79 -0
  7. data/lib/mongoid-haystack/count.rb +28 -0
  8. data/lib/mongoid-haystack/index.rb +165 -0
  9. data/lib/mongoid-haystack/search.rb +96 -0
  10. data/lib/mongoid-haystack/sequence.rb +55 -0
  11. data/lib/mongoid-haystack/stemming.rb +79 -0
  12. data/lib/mongoid-haystack/stemming/stopwords/english.txt +32 -0
  13. data/lib/mongoid-haystack/stemming/stopwords/extended_english.txt +216 -0
  14. data/lib/mongoid-haystack/stemming/stopwords/full_danish.txt +94 -0
  15. data/lib/mongoid-haystack/stemming/stopwords/full_dutch.txt +101 -0
  16. data/lib/mongoid-haystack/stemming/stopwords/full_english.txt +174 -0
  17. data/lib/mongoid-haystack/stemming/stopwords/full_finnish.txt +0 -0
  18. data/lib/mongoid-haystack/stemming/stopwords/full_french.txt +155 -0
  19. data/lib/mongoid-haystack/stemming/stopwords/full_german.txt +231 -0
  20. data/lib/mongoid-haystack/stemming/stopwords/full_italian.txt +279 -0
  21. data/lib/mongoid-haystack/stemming/stopwords/full_norwegian.txt +176 -0
  22. data/lib/mongoid-haystack/stemming/stopwords/full_portuguese.txt +203 -0
  23. data/lib/mongoid-haystack/stemming/stopwords/full_russian.txt +101 -0
  24. data/lib/mongoid-haystack/stemming/stopwords/full_russiankoi8_r.txt +101 -0
  25. data/lib/mongoid-haystack/stemming/stopwords/full_spanish.txt +313 -0
  26. data/lib/mongoid-haystack/token.rb +71 -0
  27. data/lib/mongoid-haystack/util.rb +67 -0
  28. data/mongoid-haystack.gemspec +73 -0
  29. data/test/helper.rb +28 -0
  30. data/test/mongoid-haystack_test.rb +119 -0
  31. data/test/testing.rb +196 -0
  32. metadata +123 -0
@@ -0,0 +1 @@
1
+ Mongoid::Haystack::Sequence
@@ -0,0 +1 @@
1
+ Mongoid::Haystack::Token
@@ -0,0 +1,79 @@
1
+ ##
2
+ #
3
+ module Mongoid
4
+ module Haystack
5
+ const_set :Version, '1.0.0'
6
+
7
+ class << Haystack
8
+ def version
9
+ const_get :Version
10
+ end
11
+
12
+ def dependencies
13
+ {
14
+ 'mongoid' => [ 'mongoid' , '~> 3.0' ] ,
15
+ 'map' => [ 'map' , '~> 6.2' ] ,
16
+ 'fattr' => [ 'fattr' , '~> 2.2' ] ,
17
+ }
18
+ end
19
+
20
+ def libdir(*args, &block)
21
+ @libdir ||= File.expand_path(__FILE__).sub(/\.rb$/,'')
22
+ args.empty? ? @libdir : File.join(@libdir, *args)
23
+ ensure
24
+ if block
25
+ begin
26
+ $LOAD_PATH.unshift(@libdir)
27
+ block.call()
28
+ ensure
29
+ $LOAD_PATH.shift()
30
+ end
31
+ end
32
+ end
33
+
34
+ def load(*libs)
35
+ libs = libs.join(' ').scan(/[^\s+]+/)
36
+ libdir{ libs.each{|lib| Kernel.load(lib) } }
37
+ end
38
+ end
39
+
40
+ begin
41
+ require 'rubygems'
42
+ rescue LoadError
43
+ nil
44
+ end
45
+
46
+ if defined?(gem)
47
+ dependencies.each do |lib, dependency|
48
+ gem(*dependency)
49
+ require(lib)
50
+ end
51
+ end
52
+
53
+ begin
54
+ require 'pry'
55
+ rescue LoadError
56
+ nil
57
+ end
58
+
59
+ begin
60
+ require 'fast_stemmer'
61
+ rescue LoadError
62
+ begin
63
+ require 'stemmer'
64
+ rescue LoadError
65
+ abort("mongoid-haystack requires either the 'fast-stemmer' or 'ruby-stemmer' gems")
66
+ end
67
+ end
68
+
69
+ load Haystack.libdir('stemming.rb')
70
+ load Haystack.libdir('util.rb')
71
+ load Haystack.libdir('count.rb')
72
+ load Haystack.libdir('sequence.rb')
73
+ load Haystack.libdir('token.rb')
74
+ load Haystack.libdir('index.rb')
75
+ load Haystack.libdir('search.rb')
76
+
77
+ extend Haystack
78
+ end
79
+ end
@@ -0,0 +1,28 @@
1
+ module Mongoid
2
+ module Haystack
3
+ class Count
4
+ include Mongoid::Document
5
+
6
+ field(:name, :type => String)
7
+ field(:value, :type => Integer, :default => 0)
8
+
9
+ index({:name => 1}, {:unique => true})
10
+ index({:value => 1})
11
+
12
+ def Count.for(name)
13
+ Haystack.find_or_create(
14
+ ->{ where(:name => name.to_s).first },
15
+ ->{ create!(:name => name.to_s) }
16
+ )
17
+ end
18
+
19
+ def Count.[](name)
20
+ Count.for(name)
21
+ end
22
+
23
+ def inc(n = 1)
24
+ super(:value, n)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,165 @@
1
+ module Mongoid
2
+ module Haystack
3
+ def Haystack.index(*args, &block)
4
+ Index.add(*args, &block)
5
+ end
6
+
7
+ def Haystack.unindex(*args, &block)
8
+ Index.remove(*args, &block)
9
+ end
10
+
11
+ def Haystack.reindex!(*args, &block)
12
+ Index.all.each do |index|
13
+ model =
14
+ begin
15
+ index.model
16
+ rescue Object => e
17
+ index.destroy
18
+ next
19
+ end
20
+
21
+ index(model)
22
+ end
23
+ end
24
+
25
+ class Index
26
+ include Mongoid::Document
27
+
28
+ class << Index
29
+ def add(*args)
30
+ models_for(*args) do |model|
31
+ config = nil
32
+
33
+ if model.respond_to?(:to_haystack)
34
+ config = Map.for(model.to_haystack)
35
+ else
36
+ keywords = []
37
+ %w( keywords title ).each do |attr|
38
+ if model.respond_to?(attr)
39
+ keywords.push(*model.send(attr))
40
+ break
41
+ end
42
+ end
43
+
44
+ fulltext = []
45
+ %w( fulltext text content body description to_s ).each do |attr|
46
+ if model.respond_to?(attr)
47
+ fulltext.push(*model.send(attr))
48
+ break
49
+ end
50
+ end
51
+
52
+ config =
53
+ Map.for(
54
+ :keywords => keywords,
55
+ :fulltext => fulltext
56
+ )
57
+ end
58
+
59
+ keywords = Array(config[:keywords]).join(' ')
60
+ fulltext = Array(config[:fulltext]).join(' ')
61
+ facets = Map.for(config[:facets] || {})
62
+ score = config[:score]
63
+
64
+ index =
65
+ Haystack.find_or_create(
66
+ ->{ where(:model => model).first },
67
+ ->{ new(:model => model) },
68
+ )
69
+
70
+ if index.persisted?
71
+ Index.subtract(index)
72
+ end
73
+
74
+ keyword_scores = Hash.new{|h,k| h[k] = 0}
75
+ fulltext_scores = Hash.new{|h,k| h[k] = 0}
76
+
77
+ Token.values_for(keywords).each do |value|
78
+ token = Token.add(value)
79
+ id = token.id
80
+
81
+ index.tokens.push(id)
82
+ keyword_scores[id] += 1
83
+ end
84
+
85
+ Token.values_for(fulltext).each do |value|
86
+ token = Token.add(value)
87
+ id = token.id
88
+
89
+ index.tokens.push(id)
90
+ fulltext_scores[id] += 1
91
+ end
92
+
93
+ index.keyword_scores = keyword_scores
94
+ index.fulltext_scores = fulltext_scores
95
+
96
+ index.score = score if score
97
+ index.facets = facets if facets
98
+
99
+ index.tokens = index.tokens.uniq
100
+
101
+ index.save!
102
+ end
103
+ end
104
+
105
+ def remove(*args)
106
+ models_for(*args) do |model|
107
+ index = where(:model_type => model.class.name, :model_id => model.id).first
108
+
109
+ if index
110
+ subtract(index)
111
+ index.destroy
112
+ end
113
+ end
114
+ end
115
+
116
+ def subtract(index)
117
+ tokens = Token.where(:id.in => index.tokens)
118
+
119
+ n = 0
120
+
121
+ tokens.each do |token|
122
+ keyword_score = index.keyword_scores[token.id].to_i
123
+ fulltext_score = index.fulltext_scores[token.id].to_i
124
+
125
+ i = keyword_score + fulltext_score
126
+ token.inc(:count, -i)
127
+
128
+ n += i
129
+ end
130
+
131
+ Count[:tokens].inc(-n)
132
+ end
133
+
134
+ def models_for(*args, &block)
135
+ args.flatten.compact.each do |arg|
136
+ if arg.respond_to?(:persisted?)
137
+ model = arg
138
+ block.call(model)
139
+ else
140
+ arg.all.each do |model|
141
+ block.call(model)
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
147
+
148
+ belongs_to(:model, :polymorphic => true)
149
+
150
+ field(:tokens, :type => Array, :default => [])
151
+ field(:score, :type => Integer, :default => 0)
152
+ field(:keyword_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
153
+ field(:fulltext_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
154
+ field(:facets, :type => Hash, :default => {})
155
+
156
+ index({:model_type => 1})
157
+ index({:model_id => 1})
158
+
159
+ index({:tokens => 1})
160
+ index({:score => 1})
161
+ index({:keyword_scores => 1})
162
+ index({:fulltext_scores => 1})
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,96 @@
1
+ module Mongoid
2
+ module Haystack
3
+ def search(*args, &block)
4
+ #
5
+ options = Map.options_for!(args)
6
+ search = args.join(' ')
7
+
8
+ #
9
+ tokens = search_tokens_for(search)
10
+
11
+ #
12
+ conditions = {}
13
+ conditions[:tokens.in] = tokens.map{|token| token.id}
14
+
15
+ #
16
+ order = []
17
+ order.push(["score", :desc])
18
+
19
+ tokens.each do |token|
20
+ order.push(["keyword_scores.#{ token.id }", :desc])
21
+ end
22
+
23
+ tokens.each do |token|
24
+ order.push(["fulltext_scores.#{ token.id }", :desc])
25
+ end
26
+
27
+ #
28
+ if options[:facets]
29
+ conditions[:facets] = options[:facets]
30
+ end
31
+
32
+ #
33
+ if options[:types]
34
+ model_types = Array(options[:types]).map{|type| type.name}
35
+ conditions[:model_type.in] = model_types
36
+ end
37
+
38
+ #
39
+ Index.where(conditions).order_by(order)
40
+ end
41
+
42
+ def search_tokens_for(search)
43
+ values = Token.values_for(search.to_s)
44
+ tokens = Token.where(:value.in => values).to_a
45
+
46
+ positions = {}
47
+ tokens.each_with_index{|token, index| positions[token] = index + 1}
48
+
49
+ t = Count[:tokens].value.to_f
50
+
51
+ tokens.sort! do |a,b|
52
+ [b.rarity_bin(t), positions[b]] <=> [a.rarity_bin(t), positions[a]]
53
+ end
54
+
55
+ tokens
56
+ end
57
+
58
+ module Search
59
+ ClassMethods = proc do
60
+ def search(*args, &block)
61
+ options = Map.options_for!(args)
62
+ options[:types] = Array(options[:types]).flatten.compact
63
+ options[:types].push(self)
64
+ args.push(options)
65
+ Haystack.search(*args, &block)
66
+ end
67
+
68
+ after_save do |doc|
69
+ begin
70
+ Mongoid::Haystack::Index.add(doc) if doc.persisted?
71
+ rescue Object
72
+ nil
73
+ end
74
+ end
75
+
76
+ after_destroy do |doc|
77
+ begin
78
+ Mongoid::Haystack::Index.remove(doc)
79
+ rescue Object
80
+ nil
81
+ end
82
+ end
83
+ end
84
+
85
+ InstanceMethods = proc do
86
+ end
87
+
88
+ def Search.included(other)
89
+ super
90
+ ensure
91
+ other.instance_eval(&ClassMethods)
92
+ other.class_eval(&InstanceMethods)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,55 @@
1
+ module Mongoid
2
+ module Haystack
3
+ class Sequence
4
+ include Mongoid::Document
5
+
6
+ field(:name, :type => String)
7
+
8
+ field(:value, :default => 0, :type => Integer)
9
+
10
+ validates_presence_of(:name)
11
+ validates_uniqueness_of(:name)
12
+
13
+ validates_presence_of(:value)
14
+
15
+ index({:name => 1}, {:unique => true})
16
+
17
+ Cache = Hash.new
18
+
19
+ class << self
20
+ def for(name)
21
+ name = name.to_s
22
+
23
+ Cache[name] ||= (
24
+ Haystack.find_or_create(
25
+ ->{ where(:name => name).first },
26
+ ->{ create!(:name => name) }
27
+ )
28
+ )
29
+ end
30
+
31
+ alias_method('[]', 'for')
32
+
33
+ def sequence_name_for(klass, fieldname)
34
+ "#{ klass.name.gsub(/::/, '.').downcase }-#{ fieldname }"
35
+ end
36
+ end
37
+
38
+ after_destroy do |sequence|
39
+ Cache.delete(sequence.name)
40
+ end
41
+
42
+ def next
43
+ inc(:value, 1)
44
+ end
45
+
46
+ def current_value
47
+ reload.value
48
+ end
49
+
50
+ def reset!
51
+ update_attributes!(:value => 0)
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,79 @@
1
+ # encoding: utf-8
2
+
3
+ module Stemming
4
+ def stem(*args)
5
+ string = args.join(' ')
6
+ words = string.scan(/[\w._-]+/)
7
+ stems = []
8
+ words.each do |word|
9
+ word = word.downcase
10
+ stem = word.stem.downcase
11
+ next if Stopwords.stopword?(word)
12
+ next if Stopwords.stopword?(stem)
13
+ stems.push(stem)
14
+ end
15
+ stems
16
+ end
17
+
18
+ alias_method('for', 'stem')
19
+
20
+ module Stopwords
21
+ dirname = __FILE__.sub(/\.rb\Z/, '')
22
+ glob = File.join(dirname, 'stopwords', '*.txt')
23
+
24
+ List = {}
25
+
26
+ Dir.glob(glob).each do |wordlist|
27
+ basename = File.basename(wordlist)
28
+ name = basename.split(/\./).first
29
+
30
+ open(wordlist) do |fd|
31
+ lines = fd.readlines
32
+ words = lines.map{|line| line.strip}
33
+ words.delete_if{|word| word.empty?}
34
+ words.push('')
35
+ List[name] = words
36
+ end
37
+ end
38
+
39
+ unless defined?(All)
40
+ All = []
41
+ All.concat(List['english'])
42
+ All.concat(List['full_english'])
43
+ All.concat(List['extended_english'])
44
+ #All.concat(List['full_french'])
45
+ #All.concat(List['full_spanish'])
46
+ #All.concat(List['full_portuguese'])
47
+ #All.concat(List['full_italian'])
48
+ #All.concat(List['full_german'])
49
+ #All.concat(List['full_dutch'])
50
+ #All.concat(List['full_norwegian'])
51
+ #All.concat(List['full_danish'])
52
+ #All.concat(List['full_russian'])
53
+ #All.concat(List['full_russian_koi8_r'])
54
+ #All.concat(List['full_finnish'])
55
+ All.sort!
56
+ All.uniq!
57
+ end
58
+
59
+ unless defined?(Index)
60
+ Index = {}
61
+
62
+ All.each do |word|
63
+ Index[word] = word
64
+ end
65
+ end
66
+
67
+ def stopword?(word)
68
+ !!Index[word]
69
+ end
70
+
71
+ extend(Stopwords)
72
+ end
73
+
74
+ extend(Stemming)
75
+ end
76
+
77
+ if $0 == __FILE__
78
+ p Stemming.stem("the foobars foo-bars foos bars cat and mountains")
79
+ end