freelancing-god-thinking-sphinx 0.9.7 → 0.9.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README +11 -1
- data/lib/riddle.rb +10 -6
- data/lib/riddle/client.rb +54 -8
- data/lib/riddle/client/filter.rb +1 -1
- data/lib/thinking_sphinx.rb +5 -3
- data/lib/thinking_sphinx/active_record.rb +14 -9
- data/lib/thinking_sphinx/active_record/delta.rb +2 -5
- data/lib/thinking_sphinx/active_record/search.rb +7 -0
- data/lib/thinking_sphinx/configuration.rb +69 -49
- data/lib/thinking_sphinx/index.rb +44 -8
- data/lib/thinking_sphinx/index/builder.rb +15 -0
- data/lib/thinking_sphinx/search.rb +70 -12
- data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +11 -13
- data/spec/unit/thinking_sphinx/active_record/search_spec.rb +27 -9
- data/spec/unit/thinking_sphinx/active_record_spec.rb +76 -10
- data/spec/unit/thinking_sphinx/configuration_spec.rb +60 -22
- data/spec/unit/thinking_sphinx/index_spec.rb +78 -1
- data/spec/unit/thinking_sphinx/search_spec.rb +42 -0
- data/spec/unit/thinking_sphinx_spec.rb +2 -1
- metadata +3 -3
data/README
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
First, if you haven't done so already, check out the main usage[http://ts.freelancing-gods.com/usage.html] page. Once you've done that, the next place to look for information is the specific method docs - ThinkingSphinx::Search and ThinkingSphinx::Index::Builder in particular.
|
6
6
|
|
7
|
+
Keep in mind that while Thinking Sphinx works for ActiveRecord with Merb, it doesn't yet support DataMapper (although that is planned).
|
8
|
+
|
7
9
|
== Contributing
|
8
10
|
|
9
11
|
Fork on GitHub and after you've committed tested patches, send a pull request.
|
@@ -44,4 +46,12 @@ Since I first released this library, there's been quite a few people who have su
|
|
44
46
|
- James Healy
|
45
47
|
- Jae-Jun Hwang
|
46
48
|
- Xavier Shay
|
47
|
-
- Jason Rust
|
49
|
+
- Jason Rust
|
50
|
+
- Gopal Patel
|
51
|
+
- Chris Heald
|
52
|
+
- Peter Vandenberk
|
53
|
+
- Josh French
|
54
|
+
- Andrew Bennett
|
55
|
+
- Jordan Fowler
|
56
|
+
- Seth Walker
|
57
|
+
- Joe Noon
|
data/lib/riddle.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'socket'
|
2
|
+
require 'timeout'
|
2
3
|
require 'riddle/client'
|
3
4
|
require 'riddle/client/filter'
|
4
5
|
require 'riddle/client/message'
|
@@ -9,14 +10,17 @@ module Riddle #:nodoc:
|
|
9
10
|
end
|
10
11
|
|
11
12
|
module Version #:nodoc:
|
12
|
-
Major
|
13
|
-
Minor
|
14
|
-
Tiny
|
13
|
+
Major = 0
|
14
|
+
Minor = 9
|
15
|
+
Tiny = 8
|
15
16
|
# Revision number for RubyForge's sake, taken from what Sphinx
|
16
17
|
# outputs to the command line.
|
17
|
-
Rev
|
18
|
+
Rev = 1371
|
19
|
+
# Release number to mark my own fixes, beyond feature parity with
|
20
|
+
# Sphinx itself.
|
21
|
+
Release = 0
|
18
22
|
|
19
|
-
String = [Major, Minor, Tiny].join('.')
|
20
|
-
GemVersion = [Major, Minor, Tiny, Rev].join('.')
|
23
|
+
String = [Major, Minor, Tiny].join('.')
|
24
|
+
GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
|
21
25
|
end
|
22
26
|
end
|
data/lib/riddle/client.rb
CHANGED
@@ -100,7 +100,7 @@ module Riddle
|
|
100
100
|
:match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
|
101
101
|
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
102
102
|
:retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
|
103
|
-
:max_query_time, :field_weights
|
103
|
+
:max_query_time, :field_weights, :timeout
|
104
104
|
attr_reader :queue
|
105
105
|
|
106
106
|
# Can instantiate with a specific server and port - otherwise it assumes
|
@@ -134,10 +134,40 @@ module Riddle
|
|
134
134
|
@max_query_time = 0
|
135
135
|
# string keys are field names, integer values are weightings
|
136
136
|
@field_weights = {}
|
137
|
+
@timeout = 0
|
137
138
|
|
138
139
|
@queue = []
|
139
140
|
end
|
140
141
|
|
142
|
+
# Reset attributes and settings to defaults.
|
143
|
+
def reset
|
144
|
+
# defaults
|
145
|
+
@offset = 0
|
146
|
+
@limit = 20
|
147
|
+
@max_matches = 1000
|
148
|
+
@match_mode = :all
|
149
|
+
@sort_mode = :relevance
|
150
|
+
@sort_by = ''
|
151
|
+
@weights = []
|
152
|
+
@id_range = 0..0
|
153
|
+
@filters = []
|
154
|
+
@group_by = ''
|
155
|
+
@group_function = :day
|
156
|
+
@group_clause = '@group desc'
|
157
|
+
@group_distinct = ''
|
158
|
+
@cut_off = 0
|
159
|
+
@retry_count = 0
|
160
|
+
@retry_delay = 0
|
161
|
+
@anchor = {}
|
162
|
+
# string keys are index names, integer values are weightings
|
163
|
+
@index_weights = {}
|
164
|
+
@rank_mode = :proximity_bm25
|
165
|
+
@max_query_time = 0
|
166
|
+
# string keys are field names, integer values are weightings
|
167
|
+
@field_weights = {}
|
168
|
+
@timeout = 0
|
169
|
+
end
|
170
|
+
|
141
171
|
# Set the geo-anchor point - with the names of the attributes that contain
|
142
172
|
# the latitude and longitude (in radians), and the reference position.
|
143
173
|
# Note that for geocoding to work properly, you must also set
|
@@ -384,6 +414,26 @@ module Riddle
|
|
384
414
|
# Connects to the Sphinx daemon, and yields a socket to use. The socket is
|
385
415
|
# closed at the end of the block.
|
386
416
|
def connect(&block)
|
417
|
+
socket = nil
|
418
|
+
if @timeout == 0
|
419
|
+
socket = initialise_connection
|
420
|
+
else
|
421
|
+
begin
|
422
|
+
Timeout.timeout(@timeout) { socket = initialise_connection }
|
423
|
+
rescue Timeout::Error
|
424
|
+
raise Riddle::ConnectionError,
|
425
|
+
"Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
begin
|
430
|
+
yield socket
|
431
|
+
ensure
|
432
|
+
socket.close
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
def initialise_connection
|
387
437
|
socket = TCPSocket.new @server, @port
|
388
438
|
|
389
439
|
# Checking version
|
@@ -396,11 +446,7 @@ module Riddle
|
|
396
446
|
# Send version
|
397
447
|
socket.send [1].pack('N'), 0
|
398
448
|
|
399
|
-
|
400
|
-
yield socket
|
401
|
-
ensure
|
402
|
-
socket.close
|
403
|
-
end
|
449
|
+
socket
|
404
450
|
end
|
405
451
|
|
406
452
|
# Send a collection of messages, for a command type (eg, search, excerpts,
|
@@ -507,7 +553,7 @@ module Riddle
|
|
507
553
|
# Per Index Weights
|
508
554
|
message.append_int @index_weights.length
|
509
555
|
@index_weights.each do |key,val|
|
510
|
-
message.append_string key
|
556
|
+
message.append_string key.to_s
|
511
557
|
message.append_int val
|
512
558
|
end
|
513
559
|
|
@@ -517,7 +563,7 @@ module Riddle
|
|
517
563
|
# Per Field Weights
|
518
564
|
message.append_int @field_weights.length
|
519
565
|
@field_weights.each do |key,val|
|
520
|
-
message.append_string key
|
566
|
+
message.append_string key.to_s
|
521
567
|
message.append_int val
|
522
568
|
end
|
523
569
|
|
data/lib/riddle/client/filter.rb
CHANGED
data/lib/thinking_sphinx.rb
CHANGED
@@ -20,7 +20,7 @@ module ThinkingSphinx
|
|
20
20
|
module Version #:nodoc:
|
21
21
|
Major = 0
|
22
22
|
Minor = 9
|
23
|
-
Tiny =
|
23
|
+
Tiny = 8
|
24
24
|
|
25
25
|
String = [Major, Minor, Tiny].join('.')
|
26
26
|
end
|
@@ -52,10 +52,12 @@ module ThinkingSphinx
|
|
52
52
|
@@define_indexes = value
|
53
53
|
end
|
54
54
|
|
55
|
+
@@deltas_enabled = nil
|
56
|
+
|
55
57
|
# Check if delta indexing is enabled.
|
56
58
|
#
|
57
59
|
def self.deltas_enabled?
|
58
|
-
@@deltas_enabled
|
60
|
+
@@deltas_enabled = (ThinkingSphinx::Configuration.environment != 'test') if @@deltas_enabled.nil?
|
59
61
|
@@deltas_enabled == true
|
60
62
|
end
|
61
63
|
|
@@ -79,4 +81,4 @@ module ThinkingSphinx
|
|
79
81
|
"SELECT @@global.sql_mode, @@session.sql_mode;"
|
80
82
|
).all? { |key,value| value.nil? || value[/ONLY_FULL_GROUP_BY/].nil? }
|
81
83
|
end
|
82
|
-
end
|
84
|
+
end
|
@@ -10,9 +10,8 @@ module ThinkingSphinx
|
|
10
10
|
module ActiveRecord
|
11
11
|
def self.included(base)
|
12
12
|
base.class_eval do
|
13
|
+
class_inheritable_array :indexes
|
13
14
|
class << self
|
14
|
-
attr_accessor :indexes
|
15
|
-
|
16
15
|
# Allows creation of indexes for Sphinx. If you don't do this, there
|
17
16
|
# isn't much point trying to search (or using this plugin at all,
|
18
17
|
# really).
|
@@ -65,10 +64,10 @@ module ThinkingSphinx
|
|
65
64
|
def define_index(&block)
|
66
65
|
return unless ThinkingSphinx.define_indexes?
|
67
66
|
|
68
|
-
|
67
|
+
self.indexes ||= []
|
69
68
|
index = Index.new(self, &block)
|
70
69
|
|
71
|
-
|
70
|
+
self.indexes << index
|
72
71
|
unless ThinkingSphinx.indexed_models.include?(self.name)
|
73
72
|
ThinkingSphinx.indexed_models << self.name
|
74
73
|
end
|
@@ -114,21 +113,27 @@ module ThinkingSphinx
|
|
114
113
|
)
|
115
114
|
end
|
116
115
|
|
116
|
+
def in_core_index?
|
117
|
+
@in_core_index ||= self.class.search_for_id(self.id, "#{self.class.name.downcase}_core")
|
118
|
+
end
|
119
|
+
|
117
120
|
def toggle_deleted
|
118
121
|
config = ThinkingSphinx::Configuration.new
|
119
122
|
client = Riddle::Client.new config.address, config.port
|
120
123
|
|
121
124
|
client.update(
|
122
|
-
"#{self.class.name
|
125
|
+
"#{self.class.indexes.first.name}_core",
|
123
126
|
['sphinx_deleted'],
|
124
127
|
{self.id => 1}
|
125
|
-
)
|
128
|
+
) if self.in_core_index?
|
126
129
|
|
127
130
|
client.update(
|
128
|
-
"#{self.class.name
|
131
|
+
"#{self.class.indexes.first.name}_delta",
|
129
132
|
['sphinx_deleted'],
|
130
133
|
{self.id => 1}
|
131
|
-
) if
|
134
|
+
) if ThinkingSphinx.deltas_enabled? &&
|
135
|
+
self.class.indexes.any? { |index| index.delta? } &&
|
136
|
+
self.delta?
|
132
137
|
end
|
133
138
|
end
|
134
|
-
end
|
139
|
+
end
|
@@ -73,13 +73,10 @@ module ThinkingSphinx
|
|
73
73
|
# if running in the test environment.
|
74
74
|
#
|
75
75
|
def index_delta
|
76
|
-
|
77
|
-
!ThinkingSphinx.deltas_enabled?
|
78
|
-
return true
|
79
|
-
end
|
76
|
+
return true unless ThinkingSphinx.deltas_enabled?
|
80
77
|
|
81
78
|
configuration = ThinkingSphinx::Configuration.new
|
82
|
-
system "indexer --config #{configuration.config_file} --rotate #{self.class.name
|
79
|
+
system "indexer --config #{configuration.config_file} --rotate #{self.class.indexes.first.name}_delta"
|
83
80
|
|
84
81
|
true
|
85
82
|
end
|
@@ -28,6 +28,13 @@ module ThinkingSphinx
|
|
28
28
|
args << options
|
29
29
|
ThinkingSphinx::Search.search(*args)
|
30
30
|
end
|
31
|
+
|
32
|
+
def search_for_id(*args)
|
33
|
+
options = args.extract_options!
|
34
|
+
options[:class] = self
|
35
|
+
args << options
|
36
|
+
ThinkingSphinx::Search.search_for_id(*args)
|
37
|
+
end
|
31
38
|
end
|
32
39
|
end
|
33
40
|
end
|
@@ -1,30 +1,38 @@
|
|
1
|
+
require 'erb'
|
2
|
+
|
1
3
|
module ThinkingSphinx
|
2
4
|
# This class both keeps track of the configuration settings for Sphinx and
|
3
5
|
# also generates the resulting file for Sphinx to use.
|
4
6
|
#
|
5
7
|
# Here are the default settings, relative to RAILS_ROOT where relevant:
|
6
8
|
#
|
7
|
-
# config file::
|
8
|
-
# searchd log file::
|
9
|
-
# query log file::
|
10
|
-
# pid file::
|
11
|
-
# searchd files::
|
12
|
-
# address::
|
13
|
-
# port::
|
14
|
-
# allow star::
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
9
|
+
# config file:: config/#{environment}.sphinx.conf
|
10
|
+
# searchd log file:: log/searchd.log
|
11
|
+
# query log file:: log/searchd.query.log
|
12
|
+
# pid file:: log/searchd.#{environment}.pid
|
13
|
+
# searchd files:: db/sphinx/#{environment}/
|
14
|
+
# address:: 127.0.0.1
|
15
|
+
# port:: 3312
|
16
|
+
# allow star:: false
|
17
|
+
# min prefix length:: 1
|
18
|
+
# min infix length:: 1
|
19
|
+
# mem limit:: 64M
|
20
|
+
# max matches:: 1000
|
21
|
+
# morphology:: stem_en
|
22
|
+
# charset type:: utf-8
|
23
|
+
# charset table:: nil
|
24
|
+
# ignore chars:: nil
|
25
|
+
# html strip:: false
|
26
|
+
# html remove elements:: ''
|
21
27
|
#
|
22
28
|
# If you want to change these settings, create a YAML file at
|
23
29
|
# config/sphinx.yml with settings for each environment, in a similar
|
24
30
|
# fashion to database.yml - using the following keys: config_file,
|
25
31
|
# searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
|
26
|
-
# allow_star,
|
27
|
-
# charset_table, ignore_chars
|
32
|
+
# allow_star, min_prefix_len, min_infix_len, mem_limit, max_matches,
|
33
|
+
# morphology, charset_type, charset_table, ignore_chars, html_strip,
|
34
|
+
# html_remove_elements. I think you've got
|
35
|
+
# the idea.
|
28
36
|
#
|
29
37
|
# Each setting in the YAML file is optional - so only put in the ones you
|
30
38
|
# want to change.
|
@@ -35,9 +43,10 @@ module ThinkingSphinx
|
|
35
43
|
#
|
36
44
|
class Configuration
|
37
45
|
attr_accessor :config_file, :searchd_log_file, :query_log_file,
|
38
|
-
:pid_file, :searchd_file_path, :address, :port, :allow_star,
|
39
|
-
:
|
40
|
-
:
|
46
|
+
:pid_file, :searchd_file_path, :address, :port, :allow_star,
|
47
|
+
:min_prefix_len, :min_infix_len, :mem_limit, :max_matches, :morphology,
|
48
|
+
:charset_type, :charset_table, :ignore_chars, :html_strip,
|
49
|
+
:html_remove_elements, :app_root
|
41
50
|
|
42
51
|
attr_reader :environment
|
43
52
|
|
@@ -49,20 +58,24 @@ module ThinkingSphinx
|
|
49
58
|
self.app_root = Merb.root if defined?(Merb)
|
50
59
|
self.app_root ||= app_root
|
51
60
|
|
52
|
-
self.config_file
|
53
|
-
self.searchd_log_file
|
54
|
-
self.query_log_file
|
55
|
-
self.pid_file
|
56
|
-
self.searchd_file_path
|
57
|
-
self.address
|
58
|
-
self.port
|
59
|
-
self.allow_star
|
60
|
-
self.
|
61
|
-
self.
|
62
|
-
self.
|
63
|
-
self.
|
64
|
-
self.
|
65
|
-
self.
|
61
|
+
self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
|
62
|
+
self.searchd_log_file = "#{self.app_root}/log/searchd.log"
|
63
|
+
self.query_log_file = "#{self.app_root}/log/searchd.query.log"
|
64
|
+
self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
|
65
|
+
self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
|
66
|
+
self.address = "127.0.0.1"
|
67
|
+
self.port = 3312
|
68
|
+
self.allow_star = false
|
69
|
+
self.min_prefix_len = 1
|
70
|
+
self.min_infix_len = 1
|
71
|
+
self.mem_limit = "64M"
|
72
|
+
self.max_matches = 1000
|
73
|
+
self.morphology = "stem_en"
|
74
|
+
self.charset_type = "utf-8"
|
75
|
+
self.charset_table = nil
|
76
|
+
self.ignore_chars = nil
|
77
|
+
self.html_strip = false
|
78
|
+
self.html_remove_elements = ""
|
66
79
|
|
67
80
|
parse_config
|
68
81
|
end
|
@@ -84,7 +97,7 @@ module ThinkingSphinx
|
|
84
97
|
def build(file_path=nil)
|
85
98
|
load_models
|
86
99
|
file_path ||= "#{self.config_file}"
|
87
|
-
database_confs = YAML
|
100
|
+
database_confs = YAML::load(ERB.new(IO.read("#{app_root}/config/database.yml")).result)
|
88
101
|
database_confs.symbolize_keys!
|
89
102
|
database_conf = database_confs[environment.to_sym]
|
90
103
|
database_conf.symbolize_keys!
|
@@ -120,8 +133,8 @@ searchd
|
|
120
133
|
file.write index.to_config(i, database_conf, charset_type)
|
121
134
|
|
122
135
|
create_array_accum if index.adapter == :postgres
|
123
|
-
sources << "#{model.name
|
124
|
-
delta_sources << "#{model.name
|
136
|
+
sources << "#{model.indexes.first.name}_#{i}_core"
|
137
|
+
delta_sources << "#{model.indexes.first.name}_#{i}_delta" if index.delta?
|
125
138
|
end
|
126
139
|
|
127
140
|
source_list = sources.collect { |s| "source = #{s}" }.join("\n")
|
@@ -152,9 +165,10 @@ searchd
|
|
152
165
|
}
|
153
166
|
|
154
167
|
begin
|
155
|
-
model_name.
|
168
|
+
model_name.camelize.constantize
|
156
169
|
rescue LoadError
|
157
|
-
model_name.gsub(/.*[\/\\]/, '')
|
170
|
+
model_name.gsub!(/.*[\/\\]/, '')
|
171
|
+
retry
|
158
172
|
rescue NameError
|
159
173
|
next
|
160
174
|
end
|
@@ -170,7 +184,7 @@ searchd
|
|
170
184
|
path = "#{app_root}/config/sphinx.yml"
|
171
185
|
return unless File.exists?(path)
|
172
186
|
|
173
|
-
conf = YAML
|
187
|
+
conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
|
174
188
|
|
175
189
|
conf.each do |key,value|
|
176
190
|
self.send("#{key}=", value) if self.methods.include?("#{key}=")
|
@@ -180,23 +194,29 @@ searchd
|
|
180
194
|
def core_index_for_model(model, sources)
|
181
195
|
output = <<-INDEX
|
182
196
|
|
183
|
-
index #{model.name
|
197
|
+
index #{model.indexes.first.name}_core
|
184
198
|
{
|
185
199
|
#{sources}
|
186
|
-
path = #{self.searchd_file_path}/#{model.name
|
200
|
+
path = #{self.searchd_file_path}/#{model.indexes.first.name}_core
|
187
201
|
charset_type = #{self.charset_type}
|
188
202
|
INDEX
|
189
203
|
|
190
|
-
|
204
|
+
morphology = model.indexes.inject(self.morphology) { |morph, index|
|
205
|
+
index.options[:morphology] || morph
|
206
|
+
}
|
207
|
+
output += " morphology = #{morphology}\n" unless morphology.blank?
|
191
208
|
output += " charset_table = #{self.charset_table}\n" unless self.charset_table.nil?
|
192
209
|
output += " ignore_chars = #{self.ignore_chars}\n" unless self.ignore_chars.nil?
|
193
210
|
|
194
211
|
if self.allow_star
|
195
212
|
output += " enable_star = 1\n"
|
196
|
-
output += " min_prefix_len =
|
197
|
-
output += " min_infix_len =
|
213
|
+
output += " min_prefix_len = #{self.min_prefix_len}\n"
|
214
|
+
output += " min_infix_len = #{self.min_infix_len}\n"
|
198
215
|
end
|
199
216
|
|
217
|
+
output += " html_strip = 1\n" if self.html_strip
|
218
|
+
output += " html_remove_elements = #{self.html_remove_elements}\n" unless self.html_remove_elements.blank?
|
219
|
+
|
200
220
|
unless model.indexes.collect(&:prefix_fields).flatten.empty?
|
201
221
|
output += " prefix_fields = #{model.indexes.collect(&:prefix_fields).flatten.join(', ')}\n"
|
202
222
|
end
|
@@ -210,22 +230,22 @@ INDEX
|
|
210
230
|
|
211
231
|
def delta_index_for_model(model, sources)
|
212
232
|
<<-INDEX
|
213
|
-
index #{model.name
|
233
|
+
index #{model.indexes.first.name}_delta : #{model.indexes.first.name}_core
|
214
234
|
{
|
215
235
|
#{sources}
|
216
|
-
path = #{self.searchd_file_path}/#{model.name
|
236
|
+
path = #{self.searchd_file_path}/#{model.indexes.first.name}_delta
|
217
237
|
}
|
218
238
|
INDEX
|
219
239
|
end
|
220
240
|
|
221
241
|
def distributed_index_for_model(model)
|
222
|
-
sources = ["local = #{model.name
|
242
|
+
sources = ["local = #{model.indexes.first.name}_core"]
|
223
243
|
if model.indexes.any? { |index| index.delta? }
|
224
|
-
sources << "local = #{model.name
|
244
|
+
sources << "local = #{model.indexes.first.name}_delta"
|
225
245
|
end
|
226
246
|
|
227
247
|
<<-INDEX
|
228
|
-
index #{model.name
|
248
|
+
index #{model.indexes.first.name}
|
229
249
|
{
|
230
250
|
type = distributed
|
231
251
|
#{ sources.join("\n ") }
|