smart_search 0.0.4 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/config/database.yml +6 -0
- data/config/routes.rb +3 -2
- data/lib/smart_search/smart_search_engine.rb +10 -0
- data/lib/smart_search.rb +156 -78
- data/lib/smart_search_history.rb +7 -0
- data/lib/smart_search_ignore_word.rb +30 -0
- data/lib/smart_search_tag.rb +16 -0
- data/lib/smart_similarity.rb +133 -0
- data/lib/tasks/smart_search.rake +58 -0
- data/lib/tasks/testing.rake +27 -0
- data/test/test_document_multi_line.txt +6 -0
- data/test/test_document_one_line.txt +1 -0
- data/test/test_helper.rb +77 -0
- data/test/unit/01_smart_search_test.rb +81 -0
- data/test/unit/02_smart_search_similarity_test.rb +32 -0
- data/test/unit/03_smart_search_boost_test.rb +60 -0
- metadata +92 -35
- data/app/controllers/search_controller.rb +0 -46
- data/app/views/search/_results_small.html.erb +0 -25
- data/app/views/search/all.html.erb +0 -15
- data/app/views/search/results/_ansprechpartner.html.erb +0 -8
- data/app/views/search/results/_calendar_entry.html.erb +0 -29
- data/app/views/search/results/_company.html.erb +0 -25
- data/app/views/search/results/_concept.html.erb +0 -26
- data/app/views/search/results/_customer.html.erb +0 -51
- data/app/views/search/results/_family.html.erb +0 -1
- data/app/views/search/results/_firma.html.erb +0 -1
- data/app/views/search/results/_product.html.erb +0 -37
- data/app/views/search/results/_smart_document.html.erb +0 -8
- data/app/views/search/results/_topic.html.erb +0 -8
- data/app/views/search/results/_umsatz.html.erb +0 -1
- data/app/views/search/results/_umsatz_refactor.html.erb +0 -4
- data/app/views/search/results/_umsatz_storno.html.erb +0 -1
- data/app/views/search/results/_user.html.erb +0 -3
- data/app/views/search/results/_wiki_page.html.erb +0 -9
- data/lib/add_search_tags.rb +0 -8
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b4452571bb09d489fdd980a8b151245b891470c2
|
4
|
+
data.tar.gz: d5d8f1b4a301583170703bf6f7c0e322d3b1c59a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 001028ede264fd11fad4bbf5a2a1c106c28fa3d5581ef23f1c5839e432b4b66286efc55152b35738187da969a606277c86bb2e2bce9b9839e0dfefe7c5ef2697
|
7
|
+
data.tar.gz: 770d3e266a57b8e1adf23893c85893c2d3248e840974cbd050fad9f110930bebd8561365b377287688d2570d02173491e7a9b55b472e9969ce097b1cb06fb311
|
data/config/database.yml
ADDED
data/config/routes.rb
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
Rails.application.routes.draw do
|
3
3
|
|
4
4
|
#=== Search Routing
|
5
|
-
get "/
|
6
|
-
post
|
5
|
+
get "/suche/:query(/:only)", :controller => "search", :action => "all"
|
6
|
+
post "/suche/:query(/:only)", :controller => "search", :action => "all"
|
7
|
+
post "/suche", :controller => "search", :action => "all"
|
7
8
|
|
8
9
|
end
|
data/lib/smart_search.rb
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
require "rails"
|
3
|
+
|
2
4
|
require "smart_search"
|
3
|
-
require "
|
5
|
+
require "smart_search/smart_search_engine"
|
6
|
+
|
7
|
+
require "smart_similarity"
|
8
|
+
require "smart_search_history"
|
9
|
+
require "smart_search_tag"
|
10
|
+
|
4
11
|
|
5
12
|
module SmartSearch
|
6
13
|
|
@@ -8,148 +15,222 @@ module SmartSearch
|
|
8
15
|
base.extend ClassMethods
|
9
16
|
end
|
10
17
|
|
18
|
+
# Class Methods for ActiveRecord
|
11
19
|
module ClassMethods
|
12
|
-
|
20
|
+
# Enable SmartSearch for the current ActiveRecord model.
|
21
|
+
# accepts options:
|
22
|
+
# - :on, define which attributes to add to the search index
|
23
|
+
# - :conditions, define default scope for all queries made
|
24
|
+
# - :group, group by column
|
25
|
+
# - :order, order by column
|
26
|
+
# see readme for details
|
27
|
+
def smart_search(options = {:on => [], :conditions => nil, :group => nil, :order => "created_at", :force => false})
|
13
28
|
if table_exists?
|
14
29
|
# Check if search_tags exists
|
15
|
-
|
16
|
-
|
30
|
+
if !is_smart_search? || options[:force] == true || Rails.env == "test"
|
31
|
+
|
32
|
+
cattr_accessor :condition_default, :group_default, :tags, :order_default, :enable_similarity, :default_template_path
|
17
33
|
send :include, InstanceMethods
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
34
|
+
self.send(:after_save, :create_search_tags)
|
35
|
+
self.send(:before_destroy, :clear_search_tags)
|
36
|
+
self.enable_similarity ||= true
|
37
|
+
|
38
|
+
attr_accessor :query_score
|
39
|
+
|
23
40
|
# options zuweisen
|
24
41
|
if options[:conditions].is_a?(String) && !options[:conditions].blank?
|
25
42
|
self.condition_default = options[:conditions]
|
26
43
|
elsif !options[:conditions].nil?
|
27
44
|
raise ArgumentError, ":conditions must be a valid SQL Query"
|
28
|
-
|
29
|
-
|
30
|
-
if self.column_names.include?("created_at")
|
31
|
-
self.order = options[:order] || "created_at"
|
32
|
-
else
|
33
|
-
self.order = options[:order] || "id"
|
45
|
+
else
|
46
|
+
self.condition_default = nil
|
34
47
|
end
|
35
|
-
|
36
|
-
|
37
|
-
self.group_default = options[:group]
|
38
|
-
elsif !options[:group].nil?
|
39
|
-
raise ArgumentError, ":group must be a valid SQL Query"
|
40
|
-
end
|
48
|
+
|
49
|
+
self.order_default = options[:order]
|
41
50
|
|
42
51
|
self.tags = options[:on] || []
|
43
52
|
end
|
44
53
|
end
|
45
54
|
end
|
46
55
|
|
56
|
+
# Verify if SmartSearch already loaded for this model
|
47
57
|
def is_smart_search?
|
48
58
|
self.included_modules.include?(InstanceMethods)
|
49
59
|
end
|
50
60
|
|
61
|
+
# defines where to look for a partial to load when displaying results for this model
|
51
62
|
def result_template_path
|
52
63
|
"/search/results/#{self.name.split("::").last.underscore}"
|
53
64
|
end
|
54
65
|
|
55
|
-
|
66
|
+
# Serach database for given search tags
|
67
|
+
def find_by_tags(tags = "", options = {})
|
56
68
|
if self.is_smart_search?
|
57
|
-
|
69
|
+
|
70
|
+
# Save Data for similarity analysis
|
71
|
+
if tags.size > 3
|
72
|
+
self.connection.execute("INSERT INTO `#{::SmartSearchHistory.table_name}` (`query`) VALUES ('#{tags.gsub(/[^a-zA-ZäöüÖÄÜß\ ]/, '')}');")
|
73
|
+
end
|
74
|
+
|
75
|
+
tags = tags.split(/[\ -]/).select {|t| !t.blank?}
|
58
76
|
|
59
77
|
# Fallback for Empty String
|
60
78
|
tags << "#" if tags.empty?
|
61
79
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
80
|
+
# Similarity
|
81
|
+
if self.enable_similarity == true
|
82
|
+
tags.map! do |t|
|
83
|
+
similars = SmartSimilarity.similars(t, :increment_counter => true).join("|")
|
84
|
+
"search_tags REGEXP '#{similars}'"
|
85
|
+
end
|
86
|
+
|
66
87
|
else
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
88
|
+
tags.map! {|t| "search_tags LIKE '%#{t}%'"}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Load ranking from Search tags
|
92
|
+
result_ids = []
|
93
|
+
result_scores = {}
|
94
|
+
SmartSearchTag.connection.select_all("select entry_id, sum(boost) as score, group_concat(search_tags) as grouped_tags
|
95
|
+
from smart_search_tags where `table_name`= '#{self.table_name}' and
|
96
|
+
|
97
|
+
(#{tags.join(' OR ')}) group by entry_id having (#{tags.join(' AND ').gsub('search_tags', 'grouped_tags')}) order by score DESC").each do |r|
|
98
|
+
result_ids << r["entry_id"].to_i
|
99
|
+
result_scores[r["entry_id"].to_i] = r['score'].to_f
|
100
|
+
end
|
101
|
+
|
102
|
+
# Enable unscoped searching
|
103
|
+
if options[:unscoped] == true
|
104
|
+
results = self.unscoped.where(:id => result_ids)
|
75
105
|
else
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
end
|
84
|
-
if value.is_a?(String) || value.is_a?(Numeric)
|
85
|
-
value = "'#{value}'"
|
86
|
-
condi << " AND #{field.to_s} = #{value}"
|
87
|
-
elsif value.is_a?(Array)
|
88
|
-
condi << " AND #{field.to_s} IN (#{value.join(",")})"
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
sql << " #{condi}"
|
93
|
-
end
|
94
|
-
if !group_by.nil?
|
95
|
-
sql << " GROUP BY #{group_by}"
|
96
|
-
elsif self.group_default
|
97
|
-
sql << " GROUP BY #{self.group_default}"
|
106
|
+
results = self.where(:id => result_ids)
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
if options[:conditions]
|
112
|
+
results = results.where(options[:conditions])
|
98
113
|
end
|
99
114
|
|
100
|
-
if !self.
|
101
|
-
|
115
|
+
if !self.condition_default.blank?
|
116
|
+
results = results.where(self.condition_default)
|
117
|
+
end
|
118
|
+
|
119
|
+
if options[:group]
|
120
|
+
results = results.group(options[:group])
|
102
121
|
end
|
103
122
|
|
104
|
-
|
105
|
-
|
123
|
+
if options[:order] || self.order_default
|
124
|
+
results = results.order(options[:order] || self.order_default)
|
125
|
+
else
|
126
|
+
ordered_results = []
|
127
|
+
results.each do |r|
|
128
|
+
r.query_score = result_scores[r.id]
|
129
|
+
ordered_results[result_ids.index(r.id)] = r
|
130
|
+
end
|
131
|
+
|
132
|
+
results = ordered_results.compact
|
133
|
+
end
|
134
|
+
|
135
|
+
return results
|
106
136
|
else
|
107
137
|
raise "#{self.inspect} is not a SmartSearch"
|
108
138
|
end
|
109
139
|
end
|
110
140
|
|
141
|
+
# reload search_tags for entire table based on the attributes defined in ':on' option passed to the 'smart_search' method
|
111
142
|
def set_search_index
|
112
143
|
s = self.all.size.to_f
|
113
144
|
self.all.each_with_index do |a, i|
|
114
145
|
a.create_search_tags
|
115
|
-
a.send(:update_without_callbacks)
|
116
146
|
done = ((i+1).to_f/s)*100
|
117
147
|
printf "Set search index for #{self.name}: #{done}%% \r"
|
118
148
|
end
|
119
149
|
end
|
150
|
+
|
151
|
+
# Load all search tags for this table into similarity index
|
152
|
+
def set_similarity_index
|
153
|
+
|
154
|
+
search_tags_list = self.connection.select_all("SELECT search_tags from #{SmartSearchTag.table_name} where `table_name` = #{self.table_name}").map {|r| r["search_tags"]}
|
155
|
+
|
156
|
+
SmartSimilarity.create_from_text(search_tags_list.join(" "))
|
157
|
+
end
|
120
158
|
|
121
159
|
end
|
122
160
|
|
161
|
+
# Instance Methods for ActiveRecord
|
123
162
|
module InstanceMethods
|
124
163
|
|
164
|
+
# Load the result template path for this instance
|
125
165
|
def result_template_path
|
126
166
|
self.class.result_template_path
|
127
167
|
end
|
128
168
|
|
169
|
+
# create search tags for this very record based on the attributes defined in ':on' option passed to the 'Class.smart_search' method
|
129
170
|
def create_search_tags
|
130
|
-
tags
|
171
|
+
tags = []
|
172
|
+
|
131
173
|
self.class.tags.each do |tag|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
174
|
+
|
175
|
+
if !tag.is_a?(Hash)
|
176
|
+
tag = {:field_name => tag, :boost => 1, :search_tags => ""}
|
177
|
+
else
|
178
|
+
tag[:search_tags] = ""
|
179
|
+
tag[:boost] ||= 1
|
180
|
+
end
|
181
|
+
|
182
|
+
if tag[:field_name].is_a?(Symbol)
|
183
|
+
tag[:search_tags] << self.send(tag[:field_name]).to_s
|
184
|
+
elsif tag[:field_name].is_a?(String)
|
185
|
+
tag_methods = tag[:field_name].split(".")
|
136
186
|
tagx = self.send(tag_methods[0])
|
137
187
|
tag_methods[1..-1].each do |x|
|
138
188
|
tagx = tagx.send(x) rescue ""
|
139
189
|
end
|
140
|
-
|
190
|
+
tag[:search_tags] << tagx.to_s
|
191
|
+
end
|
192
|
+
|
193
|
+
tag[:search_tags] = tag[:search_tags].split(" ").uniq.join(" ").downcase.clear_html
|
194
|
+
tags << tag
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
self.clear_search_tags
|
199
|
+
|
200
|
+
# Merge search tags with same boost
|
201
|
+
@merged_tags = {}
|
202
|
+
|
203
|
+
tags.each do |t|
|
204
|
+
boost = t[:boost]
|
205
|
+
|
206
|
+
if @merged_tags[boost]
|
207
|
+
|
208
|
+
@merged_tags[boost][:field_name] << ",#{t[:field_name]}"
|
209
|
+
@merged_tags[boost][:search_tags] << " #{t[:search_tags]}"
|
210
|
+
else
|
211
|
+
@merged_tags[boost] = {:field_name => "#{t[:field_name]}", :search_tags => t[:search_tags], :boost => boost }
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
@merged_tags.values.each do |t|
|
217
|
+
if !t[:search_tags].blank? && t[:search_tags].size > 1
|
218
|
+
SmartSearchTag.create(t.merge!(:table_name => self.class.table_name, :entry_id => self.id, :search_tags => t[:search_tags].strip.split(" ").uniq.join(" ")))
|
141
219
|
end
|
142
220
|
end
|
143
|
-
searchtags = tags.join(" ").split(" ")
|
144
|
-
searchtags = searchtags.uniq.join(" ")
|
145
|
-
search_tags_min = searchtags.gsub(" ", "").downcase
|
146
221
|
|
147
|
-
|
148
|
-
|
222
|
+
end
|
223
|
+
|
224
|
+
# Remove search data for the instance from the index
|
225
|
+
def clear_search_tags
|
226
|
+
if !self.id.nil?
|
227
|
+
SmartSearchTag.connection.execute("DELETE from #{SmartSearchTag.table_name} where `table_name` = '#{self.class.table_name}' and entry_id = #{self.id}")
|
228
|
+
end
|
229
|
+
end
|
149
230
|
|
150
231
|
end
|
151
232
|
|
152
|
-
|
233
|
+
|
153
234
|
class Config
|
154
235
|
|
155
236
|
cattr_accessor :search_models
|
@@ -169,10 +250,7 @@ module SmartSearch
|
|
169
250
|
end
|
170
251
|
|
171
252
|
|
172
|
-
class SmartSearchEngine < Rails::Engine
|
173
|
-
end
|
174
|
-
|
175
253
|
end
|
176
254
|
|
177
255
|
|
178
|
-
ActiveRecord::Base.send(:include, SmartSearch)
|
256
|
+
ActiveRecord::Base.send(:include, SmartSearch)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# The keep words out of the index, they can be added into this table
|
2
|
+
# TODO: Its not working yet
|
3
|
+
class SmartSearchIgnoreWord < ActiveRecord::Base
|
4
|
+
|
5
|
+
#= Configuration
|
6
|
+
self.table_name = "smart_search_ignore_words"
|
7
|
+
#== Associations
|
8
|
+
# => Stuff in Here
|
9
|
+
|
10
|
+
#== Plugins and modules
|
11
|
+
#=== PlugIns
|
12
|
+
# => Stuff in Here
|
13
|
+
|
14
|
+
#=== include Modules
|
15
|
+
# => Stuff in Here
|
16
|
+
|
17
|
+
#== Konstanten
|
18
|
+
# => Stuff in Here
|
19
|
+
|
20
|
+
#== Validation and Callbacks
|
21
|
+
#=== Validation
|
22
|
+
validates_uniqueness_of :word
|
23
|
+
|
24
|
+
#=== Callbacks
|
25
|
+
# => Stuff in Here
|
26
|
+
|
27
|
+
|
28
|
+
# => END
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Represents the search index
|
2
|
+
class SmartSearchTag < ActiveRecord::Base
|
3
|
+
|
4
|
+
|
5
|
+
# Get a list of available search tags
|
6
|
+
def self.tags_list(query, table= nil)
|
7
|
+
if query.size < 3
|
8
|
+
return []
|
9
|
+
else
|
10
|
+
list = sql_query!("select search_tags from #{self.table_name} where search_tags like '%#{query}%' #{"and table_name = '#{table}'" if table} ").map {|r| r['search_tags']}
|
11
|
+
list = list.join(" ").clear_html.split(" ").uniq
|
12
|
+
return list.sort.grep(Regexp.new(query))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# This class is used to build similiarity index
|
2
|
+
class SmartSimilarity < ActiveRecord::Base
|
3
|
+
#= Configuration
|
4
|
+
serialize :similarities, Array
|
5
|
+
self.table_name = "smart_search_similarities"
|
6
|
+
|
7
|
+
#== Associations
|
8
|
+
# => Stuff in Here
|
9
|
+
|
10
|
+
#== Plugins and modules
|
11
|
+
#=== PlugIns
|
12
|
+
# => Stuff in Here
|
13
|
+
|
14
|
+
#=== include Modules
|
15
|
+
# => Stuff in Here
|
16
|
+
|
17
|
+
#== Konstanten
|
18
|
+
# Defines the min. result of word simililarity check
|
19
|
+
SIMILARITY_FACTOR = 0.77
|
20
|
+
# Defines first simililarity check method
|
21
|
+
SIMILARITY_METHOD_1 = :jarowinkler
|
22
|
+
# Defines first simililarity check method
|
23
|
+
SIMILARITY_METHOD_2 = :levenshtein
|
24
|
+
|
25
|
+
# An average of both results will generated and compered with 'SIMILARITY_FACTOR'
|
26
|
+
|
27
|
+
# Limit Number of similar words (still unused)
|
28
|
+
SIMILARITY_LIMIT = 8
|
29
|
+
|
30
|
+
# USe this regexp to split texts into words
|
31
|
+
SPLITTING_REGEXP = /\b/
|
32
|
+
|
33
|
+
#== Validation and Callbacks
|
34
|
+
#=== Validation
|
35
|
+
|
36
|
+
#=== Callbacks
|
37
|
+
# => Stuff in Here
|
38
|
+
# => END
|
39
|
+
|
40
|
+
# Create similarity data based on the given text
|
41
|
+
# This method is used to generate date from every source, e.g. file, url, single words etc..
|
42
|
+
def self.create_from_text(text)
|
43
|
+
# prepare text
|
44
|
+
prepared_text = text.downcase.split(SPLITTING_REGEXP).uniq
|
45
|
+
prepared_text = prepared_text.select {|w| w.size >= 3 && !w.match(/[0-9\-_<>\.\/(){}&\?"'@*+$!=,:'#;]/)}
|
46
|
+
list = {}
|
47
|
+
prepared_text.each do |word|
|
48
|
+
# Load index from database
|
49
|
+
words_in_db = self.find_by_phrase(word)
|
50
|
+
if words_in_db.nil?
|
51
|
+
self.connection.execute "INSERT INTO `#{self.table_name}` (`phrase`, `ind`) VALUES ('#{word}', '#{word[0..1]}');"
|
52
|
+
current = []
|
53
|
+
else
|
54
|
+
current = words_in_db.similarities
|
55
|
+
end
|
56
|
+
|
57
|
+
# If word is a substring of similarity word, it must not be saved,
|
58
|
+
# cause it will match anyway:
|
59
|
+
# 'how' will match 'show', so 'show' is not needed in index for 'how'
|
60
|
+
# Vice Versa, 'how' should also be found if query is 'show', so it will be kept in the index
|
61
|
+
current += prepared_text.select {|w| w != word && self.match_words(w,word) >= SIMILARITY_FACTOR && !w.match(word)}
|
62
|
+
|
63
|
+
list[word] = current.uniq
|
64
|
+
end
|
65
|
+
|
66
|
+
# Write to Database
|
67
|
+
list.each do |word, sims|
|
68
|
+
sims = sims.sort_by {|s| self.match_words(s,word) }.reverse.first(SIMILARITY_LIMIT)
|
69
|
+
|
70
|
+
self.connection.execute 'UPDATE %s set similarities = "%s" where phrase = "%s"' % [self.table_name, sims.to_yaml, word] rescue nil
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Add one simgle word to database and check if there are already similars
|
75
|
+
def self.add_word(word)
|
76
|
+
words = [word]
|
77
|
+
phrases = self.connection.select_all("SELECT phrase from smart_search_similarities").map {|r| r["phrase"] }
|
78
|
+
words += phrases.select {|p| self.match_words(p,word) >= SIMILARITY_FACTOR }
|
79
|
+
|
80
|
+
self.create_from_text(words.join(" "))
|
81
|
+
end
|
82
|
+
|
83
|
+
# Load an entire file to the index.
|
84
|
+
# Best used for loading big dictionary files.
|
85
|
+
# Uses 'spawnling' to split the data into 8 stacks and load them simultaniously
|
86
|
+
def self.load_file(path)
|
87
|
+
count = %x{wc -l #{path}}.split[0].to_i.max(1)
|
88
|
+
puts "loading file: #{path}"
|
89
|
+
puts "=> #{count} rows"
|
90
|
+
|
91
|
+
if count == 1
|
92
|
+
File.open(path, "r").read.split(SPLITTING_REGEXP).each {|w| self.add_word(w)}
|
93
|
+
else
|
94
|
+
File.open(path, "r").read.split(SPLITTING_REGEXP).seperate([8,count].min).each_with_index do |stack, si|
|
95
|
+
stack.each_with_index do |l,i|
|
96
|
+
self.add_word(l)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Load words from website and save them to index
|
103
|
+
def self.load_url(url)
|
104
|
+
self.create_from_text(%x(curl #{url}))
|
105
|
+
end
|
106
|
+
|
107
|
+
# Loads your created query history and saves them to the index
|
108
|
+
def self.load_from_query_history
|
109
|
+
queries = ActiveRecord::Base.connection.select_all("SELECT query from `#{::SmartSearchHistory.table_name}`").map {|r| r["query"]}
|
110
|
+
queries.each {|q| self.add_word(q) }
|
111
|
+
|
112
|
+
self.connection.execute("TRUNCATE `#{::SmartSearchHistory.table_name}`")
|
113
|
+
end
|
114
|
+
|
115
|
+
# Get array of similar words including orig word
|
116
|
+
def self.similars(word, options = {})
|
117
|
+
list = self.where(:phrase => word).first
|
118
|
+
if list.nil?
|
119
|
+
return [word]
|
120
|
+
else
|
121
|
+
self.connection.execute("UPDATE `smart_search_similarities` SET `count` = #{list.count+1} where `smart_search_similarities`.`phrase` = '#{list.phrase}'")
|
122
|
+
return [word, list.similarities].flatten
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Return match score for two words bases und the two defined similarity methods
|
127
|
+
def self.match_words(word1, word2)
|
128
|
+
x1 = word1.send("#{SIMILARITY_METHOD_1}_similar", word2)
|
129
|
+
x2 = word1.send("#{SIMILARITY_METHOD_2}_similar", word2)
|
130
|
+
return (x1+x2)/2.0
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
namespace :smart_search do
|
2
|
+
desc "Load similarity data from query history"
|
3
|
+
task :similarity_from_query_history => :environment do
|
4
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
5
|
+
SmartSimilarity.load_from_query_history
|
6
|
+
end
|
7
|
+
|
8
|
+
desc "Load similarity data from file - Use FILE=path/to/file to specify file"
|
9
|
+
task :similarity_from_file => :environment do
|
10
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
11
|
+
if ENV['FILE_PATH'].nil?
|
12
|
+
raise ArgumentError, "No file specified. "
|
13
|
+
elsif !File.exist?(ENV['FILE_PATH'])
|
14
|
+
raise ArgumentError, "File not found "
|
15
|
+
else
|
16
|
+
SmartSimilarity.load_file(ENV['FILE_PATH'])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Load similarity data from url - Use URL=http://.../ to specify url - Requires 'curl'"
|
21
|
+
task :similarity_from_url => :environment do
|
22
|
+
require File.expand_path("../../smart_similarity", __FILE__)
|
23
|
+
if ENV['URL'].nil?
|
24
|
+
raise ArgumentError, "No URL specified. "
|
25
|
+
else
|
26
|
+
SmartSimilarity.load_url(ENV['URL'])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
desc "load ignore words list"
|
33
|
+
task :load_ignore_words => :environment do
|
34
|
+
require File.expand_path("../../smart_search_ignore_word", __FILE__)
|
35
|
+
|
36
|
+
dic_path = File.expand_path("../../../dictionaries/*", __FILE__)
|
37
|
+
|
38
|
+
raise dic_path.inspect
|
39
|
+
|
40
|
+
dic_folders = Dir.glob(dic_path).select {|d| File.directory?(d)}
|
41
|
+
|
42
|
+
dic_folders.each do |folder|
|
43
|
+
locale = folder.split("/").last
|
44
|
+
word_file = File.join(folder, "#{locale}.ignore_words.dic")
|
45
|
+
if File.exists?(word_file)
|
46
|
+
File.open(word_file, "r").each_line do |word|
|
47
|
+
SmartSearchIgnoreWord.create(:word => word.strip.downcase, :locale => locale)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "active_record"
|
2
|
+
namespace :db do
|
3
|
+
desc "Create test database. Overwrite dasebase config with USERNAME=, PASSWORD=, DATABASE="
|
4
|
+
task :create_test_db do
|
5
|
+
config = YAML::load(File.open(File.expand_path("config/database.yml")))["test"]
|
6
|
+
|
7
|
+
# Overwrite config
|
8
|
+
config.merge!('database' => ENV['DATABASE']) if ENV['DATABASE']
|
9
|
+
config.merge!('username' => ENV['USERNAME']) if ENV['USERNAME']
|
10
|
+
config.merge!('password' => ENV['PASSWORD']) if ENV['PASSWORD']
|
11
|
+
|
12
|
+
ActiveRecord::Base.establish_connection(config.merge('database' => nil))
|
13
|
+
ActiveRecord::Base.connection.drop_database(config['database']) rescue nil
|
14
|
+
ActiveRecord::Base.connection.create_database(config['database'])
|
15
|
+
ActiveRecord::Base.establish_connection(config)
|
16
|
+
end
|
17
|
+
|
18
|
+
task :migrate do
|
19
|
+
ActiveRecord::Migrator.migrate(ActiveRecord::Migrator.migrations_paths)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
task :test_smart_search do
|
24
|
+
Rake::Task["db:create_test_db"].execute
|
25
|
+
Rake::Task["db:migrate"].execute
|
26
|
+
Rake::Task["test"].execute
|
27
|
+
end
|
@@ -0,0 +1,6 @@
|
|
1
|
+
The quick brown fox jumps over the lazy dog.
|
2
|
+
The five boxing wizards jump quickly.
|
3
|
+
Sphinx of all black quartz judge my vow.
|
4
|
+
A quick movement of the enemy will jeopardize six gunboats.
|
5
|
+
Five quacking Zephyrs jolt my wax bed.
|
6
|
+
Heavy boxes perform waltzes and jigs.
|
@@ -0,0 +1 @@
|
|
1
|
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|