mongoid-haystack 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/Rakefile +446 -0
  2. data/lib/app/models/mongoid/haystack/count.rb +1 -0
  3. data/lib/app/models/mongoid/haystack/index.rb +1 -0
  4. data/lib/app/models/mongoid/haystack/sequence.rb +1 -0
  5. data/lib/app/models/mongoid/haystack/token.rb +1 -0
  6. data/lib/mongoid-haystack.rb +79 -0
  7. data/lib/mongoid-haystack/count.rb +28 -0
  8. data/lib/mongoid-haystack/index.rb +165 -0
  9. data/lib/mongoid-haystack/search.rb +96 -0
  10. data/lib/mongoid-haystack/sequence.rb +55 -0
  11. data/lib/mongoid-haystack/stemming.rb +79 -0
  12. data/lib/mongoid-haystack/stemming/stopwords/english.txt +32 -0
  13. data/lib/mongoid-haystack/stemming/stopwords/extended_english.txt +216 -0
  14. data/lib/mongoid-haystack/stemming/stopwords/full_danish.txt +94 -0
  15. data/lib/mongoid-haystack/stemming/stopwords/full_dutch.txt +101 -0
  16. data/lib/mongoid-haystack/stemming/stopwords/full_english.txt +174 -0
  17. data/lib/mongoid-haystack/stemming/stopwords/full_finnish.txt +0 -0
  18. data/lib/mongoid-haystack/stemming/stopwords/full_french.txt +155 -0
  19. data/lib/mongoid-haystack/stemming/stopwords/full_german.txt +231 -0
  20. data/lib/mongoid-haystack/stemming/stopwords/full_italian.txt +279 -0
  21. data/lib/mongoid-haystack/stemming/stopwords/full_norwegian.txt +176 -0
  22. data/lib/mongoid-haystack/stemming/stopwords/full_portuguese.txt +203 -0
  23. data/lib/mongoid-haystack/stemming/stopwords/full_russian.txt +101 -0
  24. data/lib/mongoid-haystack/stemming/stopwords/full_russiankoi8_r.txt +101 -0
  25. data/lib/mongoid-haystack/stemming/stopwords/full_spanish.txt +313 -0
  26. data/lib/mongoid-haystack/token.rb +71 -0
  27. data/lib/mongoid-haystack/util.rb +67 -0
  28. data/mongoid-haystack.gemspec +73 -0
  29. data/test/helper.rb +28 -0
  30. data/test/mongoid-haystack_test.rb +119 -0
  31. data/test/testing.rb +196 -0
  32. metadata +123 -0
@@ -0,0 +1 @@
1
+ Mongoid::Haystack::Sequence
@@ -0,0 +1 @@
1
+ Mongoid::Haystack::Token
@@ -0,0 +1,79 @@
1
+ ##
2
+ #
3
+ module Mongoid
4
+ module Haystack
5
+ const_set :Version, '1.0.0'
6
+
7
+ class << Haystack
8
+ def version
9
+ const_get :Version
10
+ end
11
+
12
+ def dependencies
13
+ {
14
+ 'mongoid' => [ 'mongoid' , '~> 3.0' ] ,
15
+ 'map' => [ 'map' , '~> 6.2' ] ,
16
+ 'fattr' => [ 'fattr' , '~> 2.2' ] ,
17
+ }
18
+ end
19
+
20
+ def libdir(*args, &block)
21
+ @libdir ||= File.expand_path(__FILE__).sub(/\.rb$/,'')
22
+ args.empty? ? @libdir : File.join(@libdir, *args)
23
+ ensure
24
+ if block
25
+ begin
26
+ $LOAD_PATH.unshift(@libdir)
27
+ block.call()
28
+ ensure
29
+ $LOAD_PATH.shift()
30
+ end
31
+ end
32
+ end
33
+
34
+ def load(*libs)
35
+ libs = libs.join(' ').scan(/[^\s+]+/)
36
+ libdir{ libs.each{|lib| Kernel.load(lib) } }
37
+ end
38
+ end
39
+
40
+ begin
41
+ require 'rubygems'
42
+ rescue LoadError
43
+ nil
44
+ end
45
+
46
+ if defined?(gem)
47
+ dependencies.each do |lib, dependency|
48
+ gem(*dependency)
49
+ require(lib)
50
+ end
51
+ end
52
+
53
+ begin
54
+ require 'pry'
55
+ rescue LoadError
56
+ nil
57
+ end
58
+
59
+ begin
60
+ require 'fast_stemmer'
61
+ rescue LoadError
62
+ begin
63
+ require 'stemmer'
64
+ rescue LoadError
65
+ abort("mongoid-haystack requires either the 'fast-stemmer' or 'ruby-stemmer' gems")
66
+ end
67
+ end
68
+
69
+ load Haystack.libdir('stemming.rb')
70
+ load Haystack.libdir('util.rb')
71
+ load Haystack.libdir('count.rb')
72
+ load Haystack.libdir('sequence.rb')
73
+ load Haystack.libdir('token.rb')
74
+ load Haystack.libdir('index.rb')
75
+ load Haystack.libdir('search.rb')
76
+
77
+ extend Haystack
78
+ end
79
+ end
@@ -0,0 +1,28 @@
1
+ module Mongoid
2
+ module Haystack
3
+ class Count
4
+ include Mongoid::Document
5
+
6
+ field(:name, :type => String)
7
+ field(:value, :type => Integer, :default => 0)
8
+
9
+ index({:name => 1}, {:unique => true})
10
+ index({:value => 1})
11
+
12
+ def Count.for(name)
13
+ Haystack.find_or_create(
14
+ ->{ where(:name => name.to_s).first },
15
+ ->{ create!(:name => name.to_s) }
16
+ )
17
+ end
18
+
19
+ def Count.[](name)
20
+ Count.for(name)
21
+ end
22
+
23
+ def inc(n = 1)
24
+ super(:value, n)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,165 @@
1
+ module Mongoid
2
+ module Haystack
3
+ def Haystack.index(*args, &block)
4
+ Index.add(*args, &block)
5
+ end
6
+
7
+ def Haystack.unindex(*args, &block)
8
+ Index.remove(*args, &block)
9
+ end
10
+
11
+ def Haystack.reindex!(*args, &block)
12
+ Index.all.each do |index|
13
+ model =
14
+ begin
15
+ index.model
16
+ rescue Object => e
17
+ index.destroy
18
+ next
19
+ end
20
+
21
+ index(model)
22
+ end
23
+ end
24
+
25
+ class Index
26
+ include Mongoid::Document
27
+
28
+ class << Index
29
+ def add(*args)
30
+ models_for(*args) do |model|
31
+ config = nil
32
+
33
+ if model.respond_to?(:to_haystack)
34
+ config = Map.for(model.to_haystack)
35
+ else
36
+ keywords = []
37
+ %w( keywords title ).each do |attr|
38
+ if model.respond_to?(attr)
39
+ keywords.push(*model.send(attr))
40
+ break
41
+ end
42
+ end
43
+
44
+ fulltext = []
45
+ %w( fulltext text content body description to_s ).each do |attr|
46
+ if model.respond_to?(attr)
47
+ fulltext.push(*model.send(attr))
48
+ break
49
+ end
50
+ end
51
+
52
+ config =
53
+ Map.for(
54
+ :keywords => keywords,
55
+ :fulltext => fulltext
56
+ )
57
+ end
58
+
59
+ keywords = Array(config[:keywords]).join(' ')
60
+ fulltext = Array(config[:fulltext]).join(' ')
61
+ facets = Map.for(config[:facets] || {})
62
+ score = config[:score]
63
+
64
+ index =
65
+ Haystack.find_or_create(
66
+ ->{ where(:model => model).first },
67
+ ->{ new(:model => model) },
68
+ )
69
+
70
+ if index.persisted?
71
+ Index.subtract(index)
72
+ end
73
+
74
+ keyword_scores = Hash.new{|h,k| h[k] = 0}
75
+ fulltext_scores = Hash.new{|h,k| h[k] = 0}
76
+
77
+ Token.values_for(keywords).each do |value|
78
+ token = Token.add(value)
79
+ id = token.id
80
+
81
+ index.tokens.push(id)
82
+ keyword_scores[id] += 1
83
+ end
84
+
85
+ Token.values_for(fulltext).each do |value|
86
+ token = Token.add(value)
87
+ id = token.id
88
+
89
+ index.tokens.push(id)
90
+ fulltext_scores[id] += 1
91
+ end
92
+
93
+ index.keyword_scores = keyword_scores
94
+ index.fulltext_scores = fulltext_scores
95
+
96
+ index.score = score if score
97
+ index.facets = facets if facets
98
+
99
+ index.tokens = index.tokens.uniq
100
+
101
+ index.save!
102
+ end
103
+ end
104
+
105
+ def remove(*args)
106
+ models_for(*args) do |model|
107
+ index = where(:model_type => model.class.name, :model_id => model.id).first
108
+
109
+ if index
110
+ subtract(index)
111
+ index.destroy
112
+ end
113
+ end
114
+ end
115
+
116
+ def subtract(index)
117
+ tokens = Token.where(:id.in => index.tokens)
118
+
119
+ n = 0
120
+
121
+ tokens.each do |token|
122
+ keyword_score = index.keyword_scores[token.id].to_i
123
+ fulltext_score = index.fulltext_scores[token.id].to_i
124
+
125
+ i = keyword_score + fulltext_score
126
+ token.inc(:count, -i)
127
+
128
+ n += i
129
+ end
130
+
131
+ Count[:tokens].inc(-n)
132
+ end
133
+
134
+ def models_for(*args, &block)
135
+ args.flatten.compact.each do |arg|
136
+ if arg.respond_to?(:persisted?)
137
+ model = arg
138
+ block.call(model)
139
+ else
140
+ arg.all.each do |model|
141
+ block.call(model)
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
147
+
148
+ belongs_to(:model, :polymorphic => true)
149
+
150
+ field(:tokens, :type => Array, :default => [])
151
+ field(:score, :type => Integer, :default => 0)
152
+ field(:keyword_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
153
+ field(:fulltext_scores, :type => Hash, :default => proc{ Hash.new{|h,k| h[k] = 0} })
154
+ field(:facets, :type => Hash, :default => {})
155
+
156
+ index({:model_type => 1})
157
+ index({:model_id => 1})
158
+
159
+ index({:tokens => 1})
160
+ index({:score => 1})
161
+ index({:keyword_scores => 1})
162
+ index({:fulltext_scores => 1})
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,96 @@
1
+ module Mongoid
2
+ module Haystack
3
+ def search(*args, &block)
4
+ #
5
+ options = Map.options_for!(args)
6
+ search = args.join(' ')
7
+
8
+ #
9
+ tokens = search_tokens_for(search)
10
+
11
+ #
12
+ conditions = {}
13
+ conditions[:tokens.in] = tokens.map{|token| token.id}
14
+
15
+ #
16
+ order = []
17
+ order.push(["score", :desc])
18
+
19
+ tokens.each do |token|
20
+ order.push(["keyword_scores.#{ token.id }", :desc])
21
+ end
22
+
23
+ tokens.each do |token|
24
+ order.push(["fulltext_scores.#{ token.id }", :desc])
25
+ end
26
+
27
+ #
28
+ if options[:facets]
29
+ conditions[:facets] = options[:facets]
30
+ end
31
+
32
+ #
33
+ if options[:types]
34
+ model_types = Array(options[:types]).map{|type| type.name}
35
+ conditions[:model_type.in] = model_types
36
+ end
37
+
38
+ #
39
+ Index.where(conditions).order_by(order)
40
+ end
41
+
42
+ def search_tokens_for(search)
43
+ values = Token.values_for(search.to_s)
44
+ tokens = Token.where(:value.in => values).to_a
45
+
46
+ positions = {}
47
+ tokens.each_with_index{|token, index| positions[token] = index + 1}
48
+
49
+ t = Count[:tokens].value.to_f
50
+
51
+ tokens.sort! do |a,b|
52
+ [b.rarity_bin(t), positions[b]] <=> [a.rarity_bin(t), positions[a]]
53
+ end
54
+
55
+ tokens
56
+ end
57
+
58
+ module Search
59
+ ClassMethods = proc do
60
+ def search(*args, &block)
61
+ options = Map.options_for!(args)
62
+ options[:types] = Array(options[:types]).flatten.compact
63
+ options[:types].push(self)
64
+ args.push(options)
65
+ Haystack.search(*args, &block)
66
+ end
67
+
68
+ after_save do |doc|
69
+ begin
70
+ Mongoid::Haystack::Index.add(doc) if doc.persisted?
71
+ rescue Object
72
+ nil
73
+ end
74
+ end
75
+
76
+ after_destroy do |doc|
77
+ begin
78
+ Mongoid::Haystack::Index.remove(doc)
79
+ rescue Object
80
+ nil
81
+ end
82
+ end
83
+ end
84
+
85
+ InstanceMethods = proc do
86
+ end
87
+
88
+ def Search.included(other)
89
+ super
90
+ ensure
91
+ other.instance_eval(&ClassMethods)
92
+ other.class_eval(&InstanceMethods)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,55 @@
1
+ module Mongoid
2
+ module Haystack
3
+ class Sequence
4
+ include Mongoid::Document
5
+
6
+ field(:name, :type => String)
7
+
8
+ field(:value, :default => 0, :type => Integer)
9
+
10
+ validates_presence_of(:name)
11
+ validates_uniqueness_of(:name)
12
+
13
+ validates_presence_of(:value)
14
+
15
+ index({:name => 1}, {:unique => true})
16
+
17
+ Cache = Hash.new
18
+
19
+ class << self
20
+ def for(name)
21
+ name = name.to_s
22
+
23
+ Cache[name] ||= (
24
+ Haystack.find_or_create(
25
+ ->{ where(:name => name).first },
26
+ ->{ create!(:name => name) }
27
+ )
28
+ )
29
+ end
30
+
31
+ alias_method('[]', 'for')
32
+
33
+ def sequence_name_for(klass, fieldname)
34
+ "#{ klass.name.gsub(/::/, '.').downcase }-#{ fieldname }"
35
+ end
36
+ end
37
+
38
+ after_destroy do |sequence|
39
+ Cache.delete(sequence.name)
40
+ end
41
+
42
+ def next
43
+ inc(:value, 1)
44
+ end
45
+
46
+ def current_value
47
+ reload.value
48
+ end
49
+
50
+ def reset!
51
+ update_attributes!(:value => 0)
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,79 @@
1
+ # encoding: utf-8
2
+
3
+ module Stemming
4
+ def stem(*args)
5
+ string = args.join(' ')
6
+ words = string.scan(/[\w._-]+/)
7
+ stems = []
8
+ words.each do |word|
9
+ word = word.downcase
10
+ stem = word.stem.downcase
11
+ next if Stopwords.stopword?(word)
12
+ next if Stopwords.stopword?(stem)
13
+ stems.push(stem)
14
+ end
15
+ stems
16
+ end
17
+
18
+ alias_method('for', 'stem')
19
+
20
+ module Stopwords
21
+ dirname = __FILE__.sub(/\.rb\Z/, '')
22
+ glob = File.join(dirname, 'stopwords', '*.txt')
23
+
24
+ List = {}
25
+
26
+ Dir.glob(glob).each do |wordlist|
27
+ basename = File.basename(wordlist)
28
+ name = basename.split(/\./).first
29
+
30
+ open(wordlist) do |fd|
31
+ lines = fd.readlines
32
+ words = lines.map{|line| line.strip}
33
+ words.delete_if{|word| word.empty?}
34
+ words.push('')
35
+ List[name] = words
36
+ end
37
+ end
38
+
39
+ unless defined?(All)
40
+ All = []
41
+ All.concat(List['english'])
42
+ All.concat(List['full_english'])
43
+ All.concat(List['extended_english'])
44
+ #All.concat(List['full_french'])
45
+ #All.concat(List['full_spanish'])
46
+ #All.concat(List['full_portuguese'])
47
+ #All.concat(List['full_italian'])
48
+ #All.concat(List['full_german'])
49
+ #All.concat(List['full_dutch'])
50
+ #All.concat(List['full_norwegian'])
51
+ #All.concat(List['full_danish'])
52
+ #All.concat(List['full_russian'])
53
+ #All.concat(List['full_russian_koi8_r'])
54
+ #All.concat(List['full_finnish'])
55
+ All.sort!
56
+ All.uniq!
57
+ end
58
+
59
+ unless defined?(Index)
60
+ Index = {}
61
+
62
+ All.each do |word|
63
+ Index[word] = word
64
+ end
65
+ end
66
+
67
+ def stopword?(word)
68
+ !!Index[word]
69
+ end
70
+
71
+ extend(Stopwords)
72
+ end
73
+
74
+ extend(Stemming)
75
+ end
76
+
77
+ if $0 == __FILE__
78
+ p Stemming.stem("the foobars foo-bars foos bars cat and mountains")
79
+ end