smart_search 0.0.9 → 0.0.55
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/add_search_tags.rb +8 -0
- data/lib/smart_search.rb +83 -193
- data/test/test_helper.rb +6 -7
- data/test/unit/{01_smart_search_test.rb → smart_search_test.rb} +6 -14
- metadata +12 -83
- data/config/database.yml +0 -6
- data/lib/smart_search/engine.rb +0 -12
- data/lib/smart_search_history.rb +0 -7
- data/lib/smart_search_tag.rb +0 -16
- data/lib/smart_similarity.rb +0 -133
- data/lib/tasks/smart_search.rake +0 -20
- data/lib/tasks/testing.rake +0 -27
- data/test/test_document_multi_line.txt +0 -6
- data/test/test_document_one_line.txt +0 -1
- data/test/unit/02_smart_search_similarity_test.rb +0 -32
- data/test/unit/03_smart_search_boost_test.rb +0 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebbed44ee232de1e1b002de0114c895f22669cbe
|
4
|
+
data.tar.gz: 6fe910045758f11f1c1fccc40229ca3b14919361
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92f42ecd8d1f22b42221f93453ff31df341d546771525183c8fef42cd900aa05db1148c3251832e75e28e4f38170916b9aee4bef9862bc8d72b06959f89a17eb
|
7
|
+
data.tar.gz: b1d4e94ec2556002bce8a453d6830eae3eb6808f1af30580e988bf5ccb785ff4eee7014d6ad7951a3eae5b9d0a22e5b64eb72ad9763f6185c7c73fa00ed46495
|
data/lib/smart_search.rb
CHANGED
@@ -1,261 +1,151 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
-
require "rails"
|
3
|
-
|
4
2
|
require "smart_search"
|
5
|
-
require "
|
6
|
-
|
7
|
-
require "smart_similarity"
|
8
|
-
require "smart_search_history"
|
9
|
-
require "smart_search_tag"
|
10
|
-
|
3
|
+
require "add_search_tags"
|
11
4
|
|
12
5
|
module SmartSearch
|
13
|
-
|
6
|
+
|
14
7
|
def self.included(base)
|
15
8
|
base.extend ClassMethods
|
16
|
-
end
|
17
|
-
|
18
|
-
# Class Methods for ActiveRecord
|
9
|
+
end
|
10
|
+
|
19
11
|
module ClassMethods
|
20
|
-
# Enable SmartSearch for the current ActiveRecord model.
|
21
|
-
# accepts options:
|
22
|
-
# - :on, define which attributes to add to the search index
|
23
|
-
# - :conditions, define default scope for all queries made
|
24
|
-
# - :group, group by column
|
25
|
-
# - :order, order by column
|
26
|
-
# see readme for details
|
27
12
|
def smart_search(options = {:on => [], :conditions => nil, :group => nil, :order => "created_at", :force => false})
|
28
13
|
if table_exists?
|
29
14
|
# Check if search_tags exists
|
30
|
-
if !is_smart_search? || options[:force] == true
|
31
|
-
|
32
|
-
cattr_accessor :condition_default, :group_default, :tags, :order_default
|
15
|
+
if !is_smart_search? || options[:force] == true
|
16
|
+
puts "\nAdding SmartSearch to #{self.name}: #{options.inspect}"
|
17
|
+
cattr_accessor :condition_default, :group_default, :tags, :order_default
|
33
18
|
send :include, InstanceMethods
|
34
|
-
|
35
|
-
self.
|
36
|
-
|
37
|
-
|
38
|
-
attr_accessor :query_score, :dont_update_search_tags
|
19
|
+
if self.column_names.index("search_tags").nil?
|
20
|
+
::AddSearchTags.add_to_table(self.table_name)
|
21
|
+
end
|
22
|
+
self.send(:before_save, :create_search_tags)
|
39
23
|
|
40
24
|
# options zuweisen
|
41
25
|
if options[:conditions].is_a?(String) && !options[:conditions].blank?
|
42
26
|
self.condition_default = options[:conditions]
|
43
27
|
elsif !options[:conditions].nil?
|
44
|
-
raise ArgumentError, ":conditions must be a valid SQL Query"
|
28
|
+
raise ArgumentError, ":conditions must be a valid SQL Query"
|
45
29
|
else
|
46
30
|
self.condition_default = nil
|
47
|
-
end
|
48
|
-
|
49
|
-
self.
|
31
|
+
end
|
32
|
+
|
33
|
+
if self.column_names.include?("created_at")
|
34
|
+
self.order_default = options[:order] || "created_at"
|
35
|
+
else
|
36
|
+
self.order_default = options[:order] || "id"
|
37
|
+
end
|
50
38
|
|
51
39
|
self.tags = options[:on] || []
|
52
40
|
end
|
53
|
-
end
|
41
|
+
end
|
54
42
|
end
|
55
|
-
|
56
|
-
# Verify if SmartSearch already loaded for this model
|
43
|
+
|
57
44
|
def is_smart_search?
|
58
45
|
self.included_modules.include?(InstanceMethods)
|
59
46
|
end
|
60
|
-
|
61
|
-
# defines where to look for a partial to load when displaying results for this model
|
47
|
+
|
62
48
|
def result_template_path
|
63
49
|
"/search/results/#{self.name.split("::").last.underscore}"
|
64
|
-
end
|
65
|
-
|
66
|
-
# Serach database for given search tags
|
50
|
+
end
|
51
|
+
|
67
52
|
def find_by_tags(tags = "", options = {})
|
68
53
|
if self.is_smart_search?
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
# Save Data for similarity analysis
|
73
|
-
if tags.size > 3
|
74
|
-
self.connection.execute("INSERT INTO `#{::SmartSearchHistory.table_name}` (`query`) VALUES ('#{tags.gsub(/[^a-zA-ZäöüÖÄÜß\ ]/, '')}');")
|
75
|
-
end
|
76
|
-
|
77
|
-
tags = tags.gsub(/[\(\)\[\]\'\"\*\%\|]/, '').split(/[\ -]/).select {|t| !t.blank?}
|
78
|
-
|
54
|
+
tags = tags.split(" ")
|
55
|
+
|
79
56
|
# Fallback for Empty String
|
80
57
|
tags << "#" if tags.empty?
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
else
|
90
|
-
tags.map! {|t| "search_tags LIKE '%#{t}%'"}
|
91
|
-
end
|
92
|
-
|
93
|
-
# Load ranking from Search tags
|
94
|
-
result_ids = []
|
95
|
-
result_scores = {}
|
96
|
-
SmartSearchTag.connection.select_all("select entry_id, sum(boost) as score, group_concat(search_tags) as grouped_tags
|
97
|
-
from smart_search_tags where `table_name`= '#{self.table_name}' and
|
98
|
-
|
99
|
-
(#{tags.join(' OR ')}) group by entry_id having (#{tags.join(' AND ').gsub('search_tags', 'grouped_tags')}) order by score DESC").each do |r|
|
100
|
-
result_ids << r["entry_id"].to_i
|
101
|
-
result_scores[r["entry_id"].to_i] = r['score'].to_f
|
102
|
-
end
|
103
|
-
|
104
|
-
# Enable unscoped searching
|
105
|
-
if options[:unscoped] == true
|
106
|
-
results = self.unscoped.where(:id => result_ids)
|
107
|
-
else
|
108
|
-
results = self.where(:id => result_ids)
|
109
|
-
end
|
110
|
-
|
58
|
+
|
59
|
+
tags.map! {|t| "search_tags LIKE '%#{t.downcase}%'"}
|
60
|
+
|
61
|
+
|
62
|
+
results = self.where("(#{tags.join(' AND ')})")
|
63
|
+
|
111
64
|
if options[:conditions]
|
112
65
|
results = results.where(options[:conditions])
|
113
66
|
end
|
114
|
-
|
67
|
+
|
115
68
|
if !self.condition_default.blank?
|
116
69
|
results = results.where(self.condition_default)
|
117
|
-
end
|
118
|
-
|
70
|
+
end
|
71
|
+
|
119
72
|
if options[:group]
|
120
73
|
results = results.group(options[:group])
|
121
|
-
end
|
122
|
-
|
123
|
-
if options[:order]
|
124
|
-
results = results.order(options[:order]
|
74
|
+
end
|
75
|
+
|
76
|
+
if options[:order]
|
77
|
+
results = results.order(options[:order])
|
125
78
|
else
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
ordered_results[result_ids.index(r.id)] = r
|
130
|
-
end
|
131
|
-
|
132
|
-
results = ordered_results.compact
|
133
|
-
end
|
134
|
-
|
79
|
+
results = results.order(self.order_default)
|
80
|
+
end
|
81
|
+
|
135
82
|
return results
|
136
|
-
else
|
83
|
+
else
|
137
84
|
raise "#{self.inspect} is not a SmartSearch"
|
138
|
-
end
|
85
|
+
end
|
139
86
|
end
|
140
|
-
|
141
|
-
# reload search_tags for entire table based on the attributes defined in ':on' option passed to the 'smart_search' method
|
87
|
+
|
142
88
|
def set_search_index
|
143
89
|
s = self.all.size.to_f
|
144
90
|
self.all.each_with_index do |a, i|
|
145
91
|
a.create_search_tags
|
92
|
+
a.send(:update_without_callbacks)
|
146
93
|
done = ((i+1).to_f/s)*100
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
SmartSimilarity.create_from_text(search_tags_list.join(" "))
|
155
|
-
end
|
156
|
-
|
157
|
-
end
|
158
|
-
|
159
|
-
# Instance Methods for ActiveRecord
|
94
|
+
printf "Set search index for #{self.name}: #{done}%% \r"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
160
100
|
module InstanceMethods
|
161
|
-
|
162
|
-
# Load the result template path for this instance
|
101
|
+
|
163
102
|
def result_template_path
|
164
103
|
self.class.result_template_path
|
165
|
-
end
|
166
|
-
|
167
|
-
def dont_update_search_tags!
|
168
|
-
self.dont_update_search_tags = true
|
169
|
-
end
|
170
|
-
|
171
|
-
def update_search_tags?
|
172
|
-
!self.dont_update_search_tags
|
173
|
-
end
|
174
|
-
|
175
|
-
# create search tags for this very record based on the attributes defined in ':on' option passed to the 'Class.smart_search' method
|
104
|
+
end
|
105
|
+
|
176
106
|
def create_search_tags
|
177
|
-
tags
|
178
|
-
|
107
|
+
tags = []
|
179
108
|
self.class.tags.each do |tag|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
tag[:search_tags] = ""
|
185
|
-
tag[:boost] ||= 1
|
186
|
-
end
|
187
|
-
|
188
|
-
if tag[:field_name].is_a?(Symbol)
|
189
|
-
tag[:search_tags] << self.send(tag[:field_name]).to_s
|
190
|
-
elsif tag[:field_name].is_a?(String)
|
191
|
-
tag_methods = tag[:field_name].split(".")
|
109
|
+
if tag.is_a?(Symbol)
|
110
|
+
tags << self.send(tag)
|
111
|
+
elsif tag.is_a?(String)
|
112
|
+
tag_methods = tag.split(".")
|
192
113
|
tagx = self.send(tag_methods[0])
|
193
114
|
tag_methods[1..-1].each do |x|
|
194
115
|
tagx = tagx.send(x) rescue ""
|
195
116
|
end
|
196
|
-
|
197
|
-
end
|
198
|
-
|
199
|
-
tag[:search_tags] = tag[:search_tags].split(" ").uniq.join(" ").downcase.clear_html
|
200
|
-
tags << tag
|
201
|
-
end
|
202
|
-
|
203
|
-
|
204
|
-
self.clear_search_tags
|
205
|
-
|
206
|
-
# Merge search tags with same boost
|
207
|
-
@merged_tags = {}
|
208
|
-
|
209
|
-
tags.each do |t|
|
210
|
-
boost = t[:boost]
|
211
|
-
|
212
|
-
if @merged_tags[boost]
|
213
|
-
|
214
|
-
@merged_tags[boost][:field_name] << ",#{t[:field_name]}"
|
215
|
-
@merged_tags[boost][:search_tags] << " #{t[:search_tags]}"
|
216
|
-
else
|
217
|
-
@merged_tags[boost] = {:field_name => "#{t[:field_name]}", :search_tags => t[:search_tags], :boost => boost }
|
218
|
-
end
|
219
|
-
|
220
|
-
end
|
221
|
-
|
222
|
-
@merged_tags.values.each do |t|
|
223
|
-
if !t[:search_tags].blank? && t[:search_tags].size > 1
|
224
|
-
SmartSearchTag.create(t.merge!(:table_name => self.class.table_name, :entry_id => self.id, :search_tags => t[:search_tags].strip.split(" ").uniq.join(" ")))
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
|
-
end
|
229
|
-
|
230
|
-
# Remove search data for the instance from the index
|
231
|
-
def clear_search_tags
|
232
|
-
if !self.id.nil?
|
233
|
-
SmartSearchTag.connection.execute("DELETE from #{SmartSearchTag.table_name} where `table_name` = '#{self.class.table_name}' and entry_id = #{self.id}") rescue nil
|
117
|
+
tags << tagx
|
118
|
+
end
|
234
119
|
end
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
120
|
+
searchtags = tags.join(" ").split(" ")
|
121
|
+
searchtags = searchtags.uniq.join(" ")
|
122
|
+
search_tags_min = searchtags.gsub(" ", "").downcase
|
123
|
+
|
124
|
+
self.search_tags = "#{searchtags.downcase}"
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
|
240
130
|
class Config
|
241
|
-
|
131
|
+
|
242
132
|
cattr_accessor :search_models
|
243
133
|
cattr_accessor :public_models
|
244
|
-
|
134
|
+
|
245
135
|
self.search_models = []
|
246
136
|
self.public_models = []
|
247
|
-
|
137
|
+
|
248
138
|
def self.get_search_models
|
249
|
-
self.search_models.map {|m| m.constantize}
|
139
|
+
self.search_models.map {|m| m.constantize}
|
250
140
|
end
|
251
|
-
|
141
|
+
|
252
142
|
def self.get_public_models
|
253
|
-
self.public_models.map {|m| m.constantize}
|
254
|
-
end
|
255
|
-
|
256
|
-
end
|
257
|
-
|
258
|
-
|
143
|
+
self.public_models.map {|m| m.constantize}
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
|
259
149
|
end
|
260
150
|
|
261
151
|
|
data/test/test_helper.rb
CHANGED
@@ -1,16 +1,14 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
require 'rubygems'
|
3
|
-
require 'bundler'
|
4
3
|
require 'test/unit'
|
5
4
|
require 'active_support'
|
6
5
|
require 'active_record'
|
7
6
|
require 'active_model'
|
8
7
|
|
9
8
|
require "smart_search"
|
10
|
-
require "
|
9
|
+
require "add_search_tags"
|
11
10
|
|
12
|
-
|
13
|
-
ActiveRecord::Base.establish_connection(:adapter => "mysql2", :database => "smart_search_test")
|
11
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :database => ":memory:")
|
14
12
|
|
15
13
|
ActiveRecord::Schema.define(:version => 1) do
|
16
14
|
create_table :users do |t|
|
@@ -18,6 +16,7 @@ ActiveRecord::Base.establish_connection(:adapter => "mysql2", :database => "smar
|
|
18
16
|
t.string :last_name
|
19
17
|
t.integer :office_id
|
20
18
|
t.date :birthday
|
19
|
+
t.text :search_tags
|
21
20
|
t.timestamps
|
22
21
|
end
|
23
22
|
|
@@ -26,15 +25,15 @@ ActiveRecord::Base.establish_connection(:adapter => "mysql2", :database => "smar
|
|
26
25
|
t.string :last_name
|
27
26
|
t.integer :user_id
|
28
27
|
t.date :birthday
|
28
|
+
t.text :search_tags
|
29
29
|
t.timestamps
|
30
30
|
end
|
31
31
|
|
32
32
|
create_table :offices do |t|
|
33
33
|
t.string :name
|
34
|
+
t.text :search_tags
|
34
35
|
t.timestamps
|
35
|
-
end
|
36
|
-
|
37
|
-
|
36
|
+
end
|
38
37
|
end
|
39
38
|
#
|
40
39
|
# def drop_db
|
@@ -6,8 +6,6 @@ class SmartSearchTest < Test::Unit::TestCase
|
|
6
6
|
office_name = "Office1"
|
7
7
|
office = Office.create(:name => office_name)
|
8
8
|
|
9
|
-
Office.enable_similarity = false
|
10
|
-
|
11
9
|
assert_equal office, Office.find_by_tags(office_name).first
|
12
10
|
end
|
13
11
|
|
@@ -38,7 +36,6 @@ class SmartSearchTest < Test::Unit::TestCase
|
|
38
36
|
office_id_nok = 5
|
39
37
|
|
40
38
|
User.smart_search :on => [:full_name], :conditions => "office_id <> #{office_id_nok}", :force => true
|
41
|
-
User.enable_similarity = false
|
42
39
|
|
43
40
|
user = User.create(:first_name => "Unknown", :last_name => "User", :office_id => office_id_nok)
|
44
41
|
user = User.create(:first_name => "Public", :last_name => "User", :office_id => office_id_ok)
|
@@ -49,7 +46,6 @@ class SmartSearchTest < Test::Unit::TestCase
|
|
49
46
|
|
50
47
|
def test_should_use_default_order_and_order_should_be_overwriteable
|
51
48
|
User.smart_search :on => [:full_name], :order => :first_name, :force => true
|
52
|
-
User.enable_similarity = false
|
53
49
|
|
54
50
|
user_c = User.create(:first_name => "C", :last_name => "Test1")
|
55
51
|
user_a = User.create(:first_name => "A", :last_name => "Test3")
|
@@ -61,21 +57,17 @@ class SmartSearchTest < Test::Unit::TestCase
|
|
61
57
|
|
62
58
|
assert_equal user_c, User.find_by_tags("test", :order => :last_name).first
|
63
59
|
assert_equal user_a, User.find_by_tags("test", :order => :last_name).last
|
64
|
-
end
|
60
|
+
end
|
65
61
|
|
66
|
-
def
|
67
|
-
User.
|
68
|
-
|
69
|
-
|
62
|
+
def test_result_should_be_redefinable
|
63
|
+
user_c = User.create(:first_name => "C", :last_name => "Next1")
|
64
|
+
user_a = User.create(:first_name => "A", :last_name => "Bah")
|
65
|
+
user_b = User.create(:first_name => "B", :last_name => "Next2")
|
70
66
|
|
67
|
+
assert_equal [], User.find_by_tags("A").where("last_name <> 'Bah' ")
|
71
68
|
end
|
72
69
|
|
73
70
|
|
74
|
-
def test_should_create_search_history
|
75
|
-
User.find_by_tags("XXXYYY")
|
76
|
-
|
77
|
-
assert_not_equal 0, SmartSearchHistory.where(:query => "XXXYYY").size
|
78
|
-
end
|
79
71
|
|
80
72
|
|
81
73
|
end
|
metadata
CHANGED
@@ -1,95 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: smart_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.55
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Eck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
13
|
-
-
|
14
|
-
|
15
|
-
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 4.0.4
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 4.0.4
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: amatch
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: friendly_extensions
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: mysql2
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
description: SmartSearch adds full-text search functions to ActiveRecord running with
|
70
|
-
MySQL, including search for similiar words. Its fast, simple, and works with almost
|
71
|
-
zero-config!
|
72
|
-
email: florian.eck@el-digital.de
|
11
|
+
date: 2013-03-11 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Adds easy to use full-text search to ActiveRecord models, based the attributes
|
14
|
+
you want to search.
|
15
|
+
email: it-support@friends-systems.de
|
73
16
|
executables: []
|
74
17
|
extensions: []
|
75
18
|
extra_rdoc_files: []
|
76
19
|
files:
|
77
|
-
- config/database.yml
|
78
20
|
- config/routes.rb
|
21
|
+
- lib/add_search_tags.rb
|
79
22
|
- lib/smart_search.rb
|
80
|
-
- lib/smart_search/engine.rb
|
81
|
-
- lib/smart_search_history.rb
|
82
|
-
- lib/smart_search_tag.rb
|
83
|
-
- lib/smart_similarity.rb
|
84
|
-
- lib/tasks/smart_search.rake
|
85
|
-
- lib/tasks/testing.rake
|
86
|
-
- test/test_document_multi_line.txt
|
87
|
-
- test/test_document_one_line.txt
|
88
23
|
- test/test_helper.rb
|
89
|
-
- test/unit/
|
90
|
-
|
91
|
-
- test/unit/03_smart_search_boost_test.rb
|
92
|
-
homepage: https://github.com/florianeck/smart_search
|
24
|
+
- test/unit/smart_search_test.rb
|
25
|
+
homepage: https://rubygems.org/gems/smart_search
|
93
26
|
licenses: []
|
94
27
|
metadata: {}
|
95
28
|
post_install_message:
|
@@ -108,14 +41,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
41
|
version: '0'
|
109
42
|
requirements: []
|
110
43
|
rubyforge_project:
|
111
|
-
rubygems_version: 2.
|
44
|
+
rubygems_version: 2.2.1
|
112
45
|
signing_key:
|
113
46
|
specification_version: 4
|
114
|
-
summary: Simple, easy to use search
|
47
|
+
summary: Simple, easy to use search.
|
115
48
|
test_files:
|
116
|
-
- test/test_document_multi_line.txt
|
117
|
-
- test/test_document_one_line.txt
|
118
49
|
- test/test_helper.rb
|
119
|
-
- test/unit/
|
120
|
-
- test/unit/02_smart_search_similarity_test.rb
|
121
|
-
- test/unit/03_smart_search_boost_test.rb
|
50
|
+
- test/unit/smart_search_test.rb
|
data/config/database.yml
DELETED
data/lib/smart_search/engine.rb
DELETED
data/lib/smart_search_history.rb
DELETED
data/lib/smart_search_tag.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
# Represents the search index
|
2
|
-
class SmartSearchTag < ActiveRecord::Base
|
3
|
-
|
4
|
-
|
5
|
-
# Get a list of available search tags
|
6
|
-
def self.tags_list(query, table= nil)
|
7
|
-
if query.size < 3
|
8
|
-
return []
|
9
|
-
else
|
10
|
-
list = sql_query!("select search_tags from #{self.table_name} where search_tags like '%#{query}%' #{"and table_name = '#{table}'" if table} ").map {|r| r['search_tags']}
|
11
|
-
list = list.join(" ").clear_html.split(" ").uniq
|
12
|
-
return list.sort.grep(Regexp.new(query))
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
data/lib/smart_similarity.rb
DELETED
@@ -1,133 +0,0 @@
|
|
1
|
-
# This class is used to build similiarity index
|
2
|
-
class SmartSimilarity < ActiveRecord::Base
|
3
|
-
#= Configuration
|
4
|
-
serialize :similarities, Array
|
5
|
-
self.table_name = "smart_search_similarities"
|
6
|
-
|
7
|
-
#== Associations
|
8
|
-
# => Stuff in Here
|
9
|
-
|
10
|
-
#== Plugins and modules
|
11
|
-
#=== PlugIns
|
12
|
-
# => Stuff in Here
|
13
|
-
|
14
|
-
#=== include Modules
|
15
|
-
# => Stuff in Here
|
16
|
-
|
17
|
-
#== Konstanten
|
18
|
-
# Defines the min. result of word simililarity check
|
19
|
-
SIMILARITY_FACTOR = 0.77
|
20
|
-
# Defines first simililarity check method
|
21
|
-
SIMILARITY_METHOD_1 = :jarowinkler
|
22
|
-
# Defines first simililarity check method
|
23
|
-
SIMILARITY_METHOD_2 = :levenshtein
|
24
|
-
|
25
|
-
# An average of both results will generated and compered with 'SIMILARITY_FACTOR'
|
26
|
-
|
27
|
-
# Limit Number of similar words (still unused)
|
28
|
-
SIMILARITY_LIMIT = 8
|
29
|
-
|
30
|
-
# USe this regexp to split texts into words
|
31
|
-
SPLITTING_REGEXP = /\b/
|
32
|
-
|
33
|
-
#== Validation and Callbacks
|
34
|
-
#=== Validation
|
35
|
-
|
36
|
-
#=== Callbacks
|
37
|
-
# => Stuff in Here
|
38
|
-
# => END
|
39
|
-
|
40
|
-
# Create similarity data based on the given text
|
41
|
-
# This method is used to generate date from every source, e.g. file, url, single words etc..
|
42
|
-
def self.create_from_text(text)
|
43
|
-
# prepare text
|
44
|
-
prepared_text = text.downcase.split(SPLITTING_REGEXP).uniq
|
45
|
-
prepared_text = prepared_text.select {|w| w.size >= 3 && !w.match(/[0-9\-_<>\.\/(){}&\?"'@*+$!=,:'#;]/)}
|
46
|
-
list = {}
|
47
|
-
prepared_text.each do |word|
|
48
|
-
# Load index from database
|
49
|
-
words_in_db = self.find_by_phrase(word)
|
50
|
-
if words_in_db.nil?
|
51
|
-
self.connection.execute "INSERT INTO `#{self.table_name}` (`phrase`, `ind`) VALUES ('#{word}', '#{word[0..1]}');"
|
52
|
-
current = []
|
53
|
-
else
|
54
|
-
current = words_in_db.similarities
|
55
|
-
end
|
56
|
-
|
57
|
-
# If word is a substring of similarity word, it must not be saved,
|
58
|
-
# cause it will match anyway:
|
59
|
-
# 'how' will match 'show', so 'show' is not needed in index for 'how'
|
60
|
-
# Vice Versa, 'how' should also be found if query is 'show', so it will be kept in the index
|
61
|
-
current += prepared_text.select {|w| w != word && self.match_words(w,word) >= SIMILARITY_FACTOR && !w.match(word)}
|
62
|
-
|
63
|
-
list[word] = current.uniq
|
64
|
-
end
|
65
|
-
|
66
|
-
# Write to Database
|
67
|
-
list.each do |word, sims|
|
68
|
-
sims = sims.sort_by {|s| self.match_words(s,word) }.reverse.first(SIMILARITY_LIMIT)
|
69
|
-
|
70
|
-
self.connection.execute 'UPDATE %s set similarities = "%s" where phrase = "%s"' % [self.table_name, sims.to_yaml, word] rescue nil
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
# Add one simgle word to database and check if there are already similars
|
75
|
-
def self.add_word(word)
|
76
|
-
words = [word]
|
77
|
-
phrases = self.connection.select_all("SELECT phrase from smart_search_similarities").map {|r| r["phrase"] }
|
78
|
-
words += phrases.select {|p| self.match_words(p,word) >= SIMILARITY_FACTOR }
|
79
|
-
|
80
|
-
self.create_from_text(words.join(" "))
|
81
|
-
end
|
82
|
-
|
83
|
-
# Load an entire file to the index.
|
84
|
-
# Best used for loading big dictionary files.
|
85
|
-
# Uses 'spawnling' to split the data into 8 stacks and load them simultaniously
|
86
|
-
def self.load_file(path)
|
87
|
-
count = %x{wc -l #{path}}.split[0].to_i.max(1)
|
88
|
-
puts "loading file: #{path}"
|
89
|
-
puts "=> #{count} rows"
|
90
|
-
|
91
|
-
if count == 1
|
92
|
-
File.open(path, "r").read.split(SPLITTING_REGEXP).each {|w| self.add_word(w)}
|
93
|
-
else
|
94
|
-
File.open(path, "r").read.split(SPLITTING_REGEXP).seperate([8,count].min).each_with_index do |stack, si|
|
95
|
-
stack.each_with_index do |l,i|
|
96
|
-
self.add_word(l)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
# Load words from website and save them to index
|
103
|
-
def self.load_url(url)
|
104
|
-
self.create_from_text(%x(curl #{url}))
|
105
|
-
end
|
106
|
-
|
107
|
-
# Loads your created query history and saves them to the index
|
108
|
-
def self.load_from_query_history
|
109
|
-
queries = ActiveRecord::Base.connection.select_all("SELECT query from `#{::SmartSearchHistory.table_name}`").map {|r| r["query"]}
|
110
|
-
queries.each {|q| self.add_word(q) }
|
111
|
-
|
112
|
-
self.connection.execute("TRUNCATE `#{::SmartSearchHistory.table_name}`")
|
113
|
-
end
|
114
|
-
|
115
|
-
# Get array of similar words including orig word
|
116
|
-
def self.similars(word, options = {})
|
117
|
-
list = self.where(:phrase => word).first
|
118
|
-
if list.nil?
|
119
|
-
return [word]
|
120
|
-
else
|
121
|
-
self.connection.execute("UPDATE `smart_search_similarities` SET `count` = #{list.count+1} where `smart_search_similarities`.`phrase` = '#{list.phrase}'")
|
122
|
-
return [word, list.similarities].flatten
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
# Return match score for two words bases und the two defined similarity methods
|
127
|
-
def self.match_words(word1, word2)
|
128
|
-
x1 = word1.send("#{SIMILARITY_METHOD_1}_similar", word2)
|
129
|
-
x2 = word1.send("#{SIMILARITY_METHOD_2}_similar", word2)
|
130
|
-
return (x1+x2)/2.0
|
131
|
-
end
|
132
|
-
|
133
|
-
end
|
data/lib/tasks/smart_search.rake
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
namespace :smart_search do
|
2
|
-
desc "Load similarity data from query history"
|
3
|
-
task :similarity_from_query_history => :environment do
|
4
|
-
require File.expand_path("../../smart_similarity", __FILE__)
|
5
|
-
SmartSimilarity.load_from_query_history
|
6
|
-
end
|
7
|
-
|
8
|
-
desc "Load similarity data from url - Use URL=http://.../ to specify url - Requires 'curl'"
|
9
|
-
task :similarity_from_url => :environment do
|
10
|
-
require File.expand_path("../../smart_similarity", __FILE__)
|
11
|
-
if ENV['URL'].nil?
|
12
|
-
raise ArgumentError, "No URL specified. "
|
13
|
-
else
|
14
|
-
SmartSimilarity.load_url(ENV['URL'])
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
|
data/lib/tasks/testing.rake
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require "active_record"
|
2
|
-
namespace :db do
|
3
|
-
desc "Create test database. Overwrite dasebase config with USERNAME=, PASSWORD=, DATABASE="
|
4
|
-
task :create_test_db do
|
5
|
-
config = YAML::load(File.open(File.expand_path("config/database.yml")))["test"]
|
6
|
-
|
7
|
-
# Overwrite config
|
8
|
-
config.merge!('database' => ENV['DATABASE']) if ENV['DATABASE']
|
9
|
-
config.merge!('username' => ENV['USERNAME']) if ENV['USERNAME']
|
10
|
-
config.merge!('password' => ENV['PASSWORD']) if ENV['PASSWORD']
|
11
|
-
|
12
|
-
ActiveRecord::Base.establish_connection(config.merge('database' => nil))
|
13
|
-
ActiveRecord::Base.connection.drop_database(config['database']) rescue nil
|
14
|
-
ActiveRecord::Base.connection.create_database(config['database'])
|
15
|
-
ActiveRecord::Base.establish_connection(config)
|
16
|
-
end
|
17
|
-
|
18
|
-
task :migrate do
|
19
|
-
ActiveRecord::Migrator.migrate(ActiveRecord::Migrator.migrations_paths)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
task :test_smart_search do
|
24
|
-
Rake::Task["db:create_test_db"].execute
|
25
|
-
Rake::Task["db:migrate"].execute
|
26
|
-
Rake::Task["test"].execute
|
27
|
-
end
|
@@ -1,6 +0,0 @@
|
|
1
|
-
The quick brown fox jumps over the lazy dog.
|
2
|
-
The five boxing wizards jump quickly.
|
3
|
-
Sphinx of all black quartz judge my vow.
|
4
|
-
A quick movement of the enemy will jeopardize six gunboats.
|
5
|
-
Five quacking Zephyrs jolt my wax bed.
|
6
|
-
Heavy boxes perform waltzes and jigs.
|
@@ -1 +0,0 @@
|
|
1
|
-
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
@@ -1,32 +0,0 @@
|
|
1
|
-
# -*- encoding : utf-8 -*-
|
2
|
-
require "test_helper"
|
3
|
-
class SmartSearchSimilartyTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_similarity_should_load_from_file
|
6
|
-
testfile_1 = File.expand_path("../../test_document_one_line.txt", __FILE__)
|
7
|
-
testfile_2 = File.expand_path("../../test_document_multi_line.txt", __FILE__)
|
8
|
-
SmartSimilarity.connection.execute "Truncate table #{SmartSimilarity.table_name}"
|
9
|
-
|
10
|
-
assert_equal 0, SmartSimilarity.count
|
11
|
-
SmartSimilarity.load_file(testfile_2)
|
12
|
-
new_count = SmartSimilarity.count
|
13
|
-
assert_not_equal 0, new_count
|
14
|
-
|
15
|
-
SmartSimilarity.load_file(testfile_1)
|
16
|
-
assert_not_equal new_count, SmartSimilarity.count
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_similarity_should_load_from_url
|
20
|
-
count = SmartSimilarity.count
|
21
|
-
SmartSimilarity.load_url("https://github.com/florianeck/smart_search")
|
22
|
-
assert_not_equal count, SmartSimilarity.count
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_similarity_should_load_from_history
|
26
|
-
count = SmartSimilarity.count
|
27
|
-
User.find_by_tags("this is history now")
|
28
|
-
SmartSimilarity.load_from_query_history
|
29
|
-
assert_not_equal count, SmartSimilarity.count
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
@@ -1,60 +0,0 @@
|
|
1
|
-
# -*- encoding : utf-8 -*-
|
2
|
-
require "test_helper"
|
3
|
-
class SmartSearchBoostTest < Test::Unit::TestCase
|
4
|
-
|
5
|
-
def test_boost_search_index_cols_should_be_created
|
6
|
-
SmartSearchTag.connection.execute("TRUNCATE #{SmartSearchTag.table_name}")
|
7
|
-
Customer.smart_search :on => [
|
8
|
-
{:field_name => :first_name, :boost => 1},
|
9
|
-
{:field_name => :last_name, :boost => 2},
|
10
|
-
{:field_name => "user.full_name", :boost => 0.5},
|
11
|
-
], :force => true
|
12
|
-
|
13
|
-
user = User.create(:first_name => "Pi", :last_name => "Pa")
|
14
|
-
|
15
|
-
Customer.create(:first_name => "Lorem", :last_name => "Ipsum", :user_id => user.id)
|
16
|
-
|
17
|
-
assert_equal 1, SmartSearchTag.where(:field_name => "first_name", :boost => 1).count
|
18
|
-
assert_equal 1, SmartSearchTag.where(:field_name => "last_name", :boost => 2).count
|
19
|
-
assert_equal 1, SmartSearchTag.where(:field_name => "user.full_name", :boost => 0.5).count
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_boost_search__results_should_order_by_score
|
23
|
-
Customer.smart_search :on => [
|
24
|
-
{:field_name => :first_name, :boost => 1},
|
25
|
-
{:field_name => :last_name, :boost => 2},
|
26
|
-
{:field_name => "user.full_name", :boost => 0.5},
|
27
|
-
], :force => true
|
28
|
-
|
29
|
-
user = User.create(:first_name => "Rudi", :last_name => "Piff")
|
30
|
-
|
31
|
-
c1 = Customer.create(:first_name => "Rudi", :last_name => "Rolle", :user_id => user.id)
|
32
|
-
c2 = Customer.create(:first_name => "Rolle", :last_name => "Rudi", :user_id => user.id)
|
33
|
-
c3 = Customer.create(:first_name => "Jackie", :last_name => "Brown", :user_id => user.id)
|
34
|
-
|
35
|
-
results = Customer.find_by_tags("Rudi")
|
36
|
-
|
37
|
-
assert_equal c1, results[1]
|
38
|
-
assert_equal c2, results[0]
|
39
|
-
assert_equal c3, results[2]
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
def test_same_boost_search_index_cols_should_be_grouped
|
44
|
-
|
45
|
-
Customer.smart_search :on => [
|
46
|
-
{:field_name => :first_name, :boost => 2},
|
47
|
-
{:field_name => :last_name, :boost => 2},
|
48
|
-
{:field_name => "user.full_name", :boost => 0.5},
|
49
|
-
], :force => true
|
50
|
-
|
51
|
-
user = User.create(:first_name => "Pipi", :last_name => "Papa")
|
52
|
-
|
53
|
-
customer = Customer.create(:first_name => "Lorem", :last_name => "Ipsum", :user_id => user.id)
|
54
|
-
|
55
|
-
assert_equal 2, SmartSearchTag.where(:table_name => Customer.table_name, :entry_id => customer.id).count
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|