freelancing-god-thinking-sphinx 0.9.7 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  First, if you haven't done so already, check out the main usage[http://ts.freelancing-gods.com/usage.html] page. Once you've done that, the next place to look for information is the specific method docs - ThinkingSphinx::Search and ThinkingSphinx::Index::Builder in particular.
6
6
 
7
+ Keep in mind that while Thinking Sphinx works for ActiveRecord with Merb, it doesn't yet support DataMapper (although that is planned).
8
+
7
9
  == Contributing
8
10
 
9
11
  Fork on GitHub and after you've committed tested patches, send a pull request.
@@ -44,4 +46,12 @@ Since I first released this library, there's been quite a few people who have su
44
46
  - James Healy
45
47
  - Jae-Jun Hwang
46
48
  - Xavier Shay
47
- - Jason Rust
49
+ - Jason Rust
50
+ - Gopal Patel
51
+ - Chris Heald
52
+ - Peter Vandenberk
53
+ - Josh French
54
+ - Andrew Bennett
55
+ - Jordan Fowler
56
+ - Seth Walker
57
+ - Joe Noon
data/lib/riddle.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'socket'
2
+ require 'timeout'
2
3
  require 'riddle/client'
3
4
  require 'riddle/client/filter'
4
5
  require 'riddle/client/message'
@@ -9,14 +10,17 @@ module Riddle #:nodoc:
9
10
  end
10
11
 
11
12
  module Version #:nodoc:
12
- Major = 0
13
- Minor = 9
14
- Tiny = 8
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 8
15
16
  # Revision number for RubyForge's sake, taken from what Sphinx
16
17
  # outputs to the command line.
17
- Rev = 1198
18
+ Rev = 1371
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 0
18
22
 
19
- String = [Major, Minor, Tiny].join('.') + "rc1"
20
- GemVersion = [Major, Minor, Tiny, Rev].join('.')
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
21
25
  end
22
26
  end
data/lib/riddle/client.rb CHANGED
@@ -100,7 +100,7 @@ module Riddle
100
100
  :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
101
101
  :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
102
102
  :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
103
- :max_query_time, :field_weights
103
+ :max_query_time, :field_weights, :timeout
104
104
  attr_reader :queue
105
105
 
106
106
  # Can instantiate with a specific server and port - otherwise it assumes
@@ -134,10 +134,40 @@ module Riddle
134
134
  @max_query_time = 0
135
135
  # string keys are field names, integer values are weightings
136
136
  @field_weights = {}
137
+ @timeout = 0
137
138
 
138
139
  @queue = []
139
140
  end
140
141
 
142
+ # Reset attributes and settings to defaults.
143
+ def reset
144
+ # defaults
145
+ @offset = 0
146
+ @limit = 20
147
+ @max_matches = 1000
148
+ @match_mode = :all
149
+ @sort_mode = :relevance
150
+ @sort_by = ''
151
+ @weights = []
152
+ @id_range = 0..0
153
+ @filters = []
154
+ @group_by = ''
155
+ @group_function = :day
156
+ @group_clause = '@group desc'
157
+ @group_distinct = ''
158
+ @cut_off = 0
159
+ @retry_count = 0
160
+ @retry_delay = 0
161
+ @anchor = {}
162
+ # string keys are index names, integer values are weightings
163
+ @index_weights = {}
164
+ @rank_mode = :proximity_bm25
165
+ @max_query_time = 0
166
+ # string keys are field names, integer values are weightings
167
+ @field_weights = {}
168
+ @timeout = 0
169
+ end
170
+
141
171
  # Set the geo-anchor point - with the names of the attributes that contain
142
172
  # the latitude and longitude (in radians), and the reference position.
143
173
  # Note that for geocoding to work properly, you must also set
@@ -384,6 +414,26 @@ module Riddle
384
414
  # Connects to the Sphinx daemon, and yields a socket to use. The socket is
385
415
  # closed at the end of the block.
386
416
  def connect(&block)
417
+ socket = nil
418
+ if @timeout == 0
419
+ socket = initialise_connection
420
+ else
421
+ begin
422
+ Timeout.timeout(@timeout) { socket = initialise_connection }
423
+ rescue Timeout::Error
424
+ raise Riddle::ConnectionError,
425
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
426
+ end
427
+ end
428
+
429
+ begin
430
+ yield socket
431
+ ensure
432
+ socket.close
433
+ end
434
+ end
435
+
436
+ def initialise_connection
387
437
  socket = TCPSocket.new @server, @port
388
438
 
389
439
  # Checking version
@@ -396,11 +446,7 @@ module Riddle
396
446
  # Send version
397
447
  socket.send [1].pack('N'), 0
398
448
 
399
- begin
400
- yield socket
401
- ensure
402
- socket.close
403
- end
449
+ socket
404
450
  end
405
451
 
406
452
  # Send a collection of messages, for a command type (eg, search, excerpts,
@@ -507,7 +553,7 @@ module Riddle
507
553
  # Per Index Weights
508
554
  message.append_int @index_weights.length
509
555
  @index_weights.each do |key,val|
510
- message.append_string key
556
+ message.append_string key.to_s
511
557
  message.append_int val
512
558
  end
513
559
 
@@ -517,7 +563,7 @@ module Riddle
517
563
  # Per Field Weights
518
564
  message.append_int @field_weights.length
519
565
  @field_weights.each do |key,val|
520
- message.append_string key
566
+ message.append_string key.to_s
521
567
  message.append_int val
522
568
  end
523
569
 
@@ -18,7 +18,7 @@ module Riddle
18
18
  def query_message
19
19
  message = Message.new
20
20
 
21
- message.append_string self.attribute
21
+ message.append_string self.attribute.to_s
22
22
  case self.values
23
23
  when Range
24
24
  if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
@@ -20,7 +20,7 @@ module ThinkingSphinx
20
20
  module Version #:nodoc:
21
21
  Major = 0
22
22
  Minor = 9
23
- Tiny = 7
23
+ Tiny = 8
24
24
 
25
25
  String = [Major, Minor, Tiny].join('.')
26
26
  end
@@ -52,10 +52,12 @@ module ThinkingSphinx
52
52
  @@define_indexes = value
53
53
  end
54
54
 
55
+ @@deltas_enabled = nil
56
+
55
57
  # Check if delta indexing is enabled.
56
58
  #
57
59
  def self.deltas_enabled?
58
- @@deltas_enabled = true unless defined?(@@deltas_enabled)
60
+ @@deltas_enabled = (ThinkingSphinx::Configuration.environment != 'test') if @@deltas_enabled.nil?
59
61
  @@deltas_enabled == true
60
62
  end
61
63
 
@@ -79,4 +81,4 @@ module ThinkingSphinx
79
81
  "SELECT @@global.sql_mode, @@session.sql_mode;"
80
82
  ).all? { |key,value| value.nil? || value[/ONLY_FULL_GROUP_BY/].nil? }
81
83
  end
82
- end
84
+ end
@@ -10,9 +10,8 @@ module ThinkingSphinx
10
10
  module ActiveRecord
11
11
  def self.included(base)
12
12
  base.class_eval do
13
+ class_inheritable_array :indexes
13
14
  class << self
14
- attr_accessor :indexes
15
-
16
15
  # Allows creation of indexes for Sphinx. If you don't do this, there
17
16
  # isn't much point trying to search (or using this plugin at all,
18
17
  # really).
@@ -65,10 +64,10 @@ module ThinkingSphinx
65
64
  def define_index(&block)
66
65
  return unless ThinkingSphinx.define_indexes?
67
66
 
68
- @indexes ||= []
67
+ self.indexes ||= []
69
68
  index = Index.new(self, &block)
70
69
 
71
- @indexes << index
70
+ self.indexes << index
72
71
  unless ThinkingSphinx.indexed_models.include?(self.name)
73
72
  ThinkingSphinx.indexed_models << self.name
74
73
  end
@@ -114,21 +113,27 @@ module ThinkingSphinx
114
113
  )
115
114
  end
116
115
 
116
+ def in_core_index?
117
+ @in_core_index ||= self.class.search_for_id(self.id, "#{self.class.name.downcase}_core")
118
+ end
119
+
117
120
  def toggle_deleted
118
121
  config = ThinkingSphinx::Configuration.new
119
122
  client = Riddle::Client.new config.address, config.port
120
123
 
121
124
  client.update(
122
- "#{self.class.name.downcase}_core",
125
+ "#{self.class.indexes.first.name}_core",
123
126
  ['sphinx_deleted'],
124
127
  {self.id => 1}
125
- )
128
+ ) if self.in_core_index?
126
129
 
127
130
  client.update(
128
- "#{self.class.name.downcase}_delta",
131
+ "#{self.class.indexes.first.name}_delta",
129
132
  ['sphinx_deleted'],
130
133
  {self.id => 1}
131
- ) if self.class.indexes.any? { |index| index.delta? }
134
+ ) if ThinkingSphinx.deltas_enabled? &&
135
+ self.class.indexes.any? { |index| index.delta? } &&
136
+ self.delta?
132
137
  end
133
138
  end
134
- end
139
+ end
@@ -73,13 +73,10 @@ module ThinkingSphinx
73
73
  # if running in the test environment.
74
74
  #
75
75
  def index_delta
76
- if ThinkingSphinx::Configuration.environment == "test" ||
77
- !ThinkingSphinx.deltas_enabled?
78
- return true
79
- end
76
+ return true unless ThinkingSphinx.deltas_enabled?
80
77
 
81
78
  configuration = ThinkingSphinx::Configuration.new
82
- system "indexer --config #{configuration.config_file} --rotate #{self.class.name.downcase}_delta"
79
+ system "indexer --config #{configuration.config_file} --rotate #{self.class.indexes.first.name}_delta"
83
80
 
84
81
  true
85
82
  end
@@ -28,6 +28,13 @@ module ThinkingSphinx
28
28
  args << options
29
29
  ThinkingSphinx::Search.search(*args)
30
30
  end
31
+
32
+ def search_for_id(*args)
33
+ options = args.extract_options!
34
+ options[:class] = self
35
+ args << options
36
+ ThinkingSphinx::Search.search_for_id(*args)
37
+ end
31
38
  end
32
39
  end
33
40
  end
@@ -1,30 +1,38 @@
1
+ require 'erb'
2
+
1
3
  module ThinkingSphinx
2
4
  # This class both keeps track of the configuration settings for Sphinx and
3
5
  # also generates the resulting file for Sphinx to use.
4
6
  #
5
7
  # Here are the default settings, relative to RAILS_ROOT where relevant:
6
8
  #
7
- # config file:: config/#{environment}.sphinx.conf
8
- # searchd log file:: log/searchd.log
9
- # query log file:: log/searchd.query.log
10
- # pid file:: log/searchd.#{environment}.pid
11
- # searchd files:: db/sphinx/#{environment}/
12
- # address:: 0.0.0.0 (all)
13
- # port:: 3312
14
- # allow star:: false
15
- # mem limit:: 64M
16
- # max matches:: 1000
17
- # morphology:: stem_en
18
- # charset type:: utf-8
19
- # charset table:: nil
20
- # ignore chars:: nil
9
+ # config file:: config/#{environment}.sphinx.conf
10
+ # searchd log file:: log/searchd.log
11
+ # query log file:: log/searchd.query.log
12
+ # pid file:: log/searchd.#{environment}.pid
13
+ # searchd files:: db/sphinx/#{environment}/
14
+ # address:: 127.0.0.1
15
+ # port:: 3312
16
+ # allow star:: false
17
+ # min prefix length:: 1
18
+ # min infix length:: 1
19
+ # mem limit:: 64M
20
+ # max matches:: 1000
21
+ # morphology:: stem_en
22
+ # charset type:: utf-8
23
+ # charset table:: nil
24
+ # ignore chars:: nil
25
+ # html strip:: false
26
+ # html remove elements:: ''
21
27
  #
22
28
  # If you want to change these settings, create a YAML file at
23
29
  # config/sphinx.yml with settings for each environment, in a similar
24
30
  # fashion to database.yml - using the following keys: config_file,
25
31
  # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
26
- # allow_star, mem_limit, max_matches, morphology, charset_type,
27
- # charset_table, ignore_chars. I think you've got the idea.
32
+ # allow_star, min_prefix_len, min_infix_len, mem_limit, max_matches,
33
+ # morphology, charset_type, charset_table, ignore_chars, html_strip,
34
+ # html_remove_elements. I think you've got
35
+ # the idea.
28
36
  #
29
37
  # Each setting in the YAML file is optional - so only put in the ones you
30
38
  # want to change.
@@ -35,9 +43,10 @@ module ThinkingSphinx
35
43
  #
36
44
  class Configuration
37
45
  attr_accessor :config_file, :searchd_log_file, :query_log_file,
38
- :pid_file, :searchd_file_path, :address, :port, :allow_star, :mem_limit,
39
- :max_matches, :morphology, :charset_type, :charset_table, :ignore_chars,
40
- :app_root
46
+ :pid_file, :searchd_file_path, :address, :port, :allow_star,
47
+ :min_prefix_len, :min_infix_len, :mem_limit, :max_matches, :morphology,
48
+ :charset_type, :charset_table, :ignore_chars, :html_strip,
49
+ :html_remove_elements, :app_root
41
50
 
42
51
  attr_reader :environment
43
52
 
@@ -49,20 +58,24 @@ module ThinkingSphinx
49
58
  self.app_root = Merb.root if defined?(Merb)
50
59
  self.app_root ||= app_root
51
60
 
52
- self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
53
- self.searchd_log_file = "#{self.app_root}/log/searchd.log"
54
- self.query_log_file = "#{self.app_root}/log/searchd.query.log"
55
- self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
56
- self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
57
- self.address = "0.0.0.0"
58
- self.port = 3312
59
- self.allow_star = false
60
- self.mem_limit = "64M"
61
- self.max_matches = 1000
62
- self.morphology = "stem_en"
63
- self.charset_type = "utf-8"
64
- self.charset_table = nil
65
- self.ignore_chars = nil
61
+ self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
62
+ self.searchd_log_file = "#{self.app_root}/log/searchd.log"
63
+ self.query_log_file = "#{self.app_root}/log/searchd.query.log"
64
+ self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
65
+ self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
66
+ self.address = "127.0.0.1"
67
+ self.port = 3312
68
+ self.allow_star = false
69
+ self.min_prefix_len = 1
70
+ self.min_infix_len = 1
71
+ self.mem_limit = "64M"
72
+ self.max_matches = 1000
73
+ self.morphology = "stem_en"
74
+ self.charset_type = "utf-8"
75
+ self.charset_table = nil
76
+ self.ignore_chars = nil
77
+ self.html_strip = false
78
+ self.html_remove_elements = ""
66
79
 
67
80
  parse_config
68
81
  end
@@ -84,7 +97,7 @@ module ThinkingSphinx
84
97
  def build(file_path=nil)
85
98
  load_models
86
99
  file_path ||= "#{self.config_file}"
87
- database_confs = YAML.load(File.open("#{app_root}/config/database.yml"))
100
+ database_confs = YAML::load(ERB.new(IO.read("#{app_root}/config/database.yml")).result)
88
101
  database_confs.symbolize_keys!
89
102
  database_conf = database_confs[environment.to_sym]
90
103
  database_conf.symbolize_keys!
@@ -120,8 +133,8 @@ searchd
120
133
  file.write index.to_config(i, database_conf, charset_type)
121
134
 
122
135
  create_array_accum if index.adapter == :postgres
123
- sources << "#{model.name.downcase}_#{i}_core"
124
- delta_sources << "#{model.name.downcase}_#{i}_delta" if index.delta?
136
+ sources << "#{model.indexes.first.name}_#{i}_core"
137
+ delta_sources << "#{model.indexes.first.name}_#{i}_delta" if index.delta?
125
138
  end
126
139
 
127
140
  source_list = sources.collect { |s| "source = #{s}" }.join("\n")
@@ -152,9 +165,10 @@ searchd
152
165
  }
153
166
 
154
167
  begin
155
- model_name.classify.constantize
168
+ model_name.camelize.constantize
156
169
  rescue LoadError
157
- model_name.gsub(/.*[\/\\]/, '').classify.constantize
170
+ model_name.gsub!(/.*[\/\\]/, '')
171
+ retry
158
172
  rescue NameError
159
173
  next
160
174
  end
@@ -170,7 +184,7 @@ searchd
170
184
  path = "#{app_root}/config/sphinx.yml"
171
185
  return unless File.exists?(path)
172
186
 
173
- conf = YAML.load(File.open(path))[environment]
187
+ conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
174
188
 
175
189
  conf.each do |key,value|
176
190
  self.send("#{key}=", value) if self.methods.include?("#{key}=")
@@ -180,23 +194,29 @@ searchd
180
194
  def core_index_for_model(model, sources)
181
195
  output = <<-INDEX
182
196
 
183
- index #{model.name.downcase}_core
197
+ index #{model.indexes.first.name}_core
184
198
  {
185
199
  #{sources}
186
- path = #{self.searchd_file_path}/#{model.name.downcase}_core
200
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_core
187
201
  charset_type = #{self.charset_type}
188
202
  INDEX
189
203
 
190
- output += " morphology = #{self.morphology}\n" unless self.morphology.blank?
204
+ morphology = model.indexes.inject(self.morphology) { |morph, index|
205
+ index.options[:morphology] || morph
206
+ }
207
+ output += " morphology = #{morphology}\n" unless morphology.blank?
191
208
  output += " charset_table = #{self.charset_table}\n" unless self.charset_table.nil?
192
209
  output += " ignore_chars = #{self.ignore_chars}\n" unless self.ignore_chars.nil?
193
210
 
194
211
  if self.allow_star
195
212
  output += " enable_star = 1\n"
196
- output += " min_prefix_len = 1\n"
197
- output += " min_infix_len = 1\n"
213
+ output += " min_prefix_len = #{self.min_prefix_len}\n"
214
+ output += " min_infix_len = #{self.min_infix_len}\n"
198
215
  end
199
216
 
217
+ output += " html_strip = 1\n" if self.html_strip
218
+ output += " html_remove_elements = #{self.html_remove_elements}\n" unless self.html_remove_elements.blank?
219
+
200
220
  unless model.indexes.collect(&:prefix_fields).flatten.empty?
201
221
  output += " prefix_fields = #{model.indexes.collect(&:prefix_fields).flatten.join(', ')}\n"
202
222
  end
@@ -210,22 +230,22 @@ INDEX
210
230
 
211
231
  def delta_index_for_model(model, sources)
212
232
  <<-INDEX
213
- index #{model.name.downcase}_delta : #{model.name.downcase}_core
233
+ index #{model.indexes.first.name}_delta : #{model.indexes.first.name}_core
214
234
  {
215
235
  #{sources}
216
- path = #{self.searchd_file_path}/#{model.name.downcase}_delta
236
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_delta
217
237
  }
218
238
  INDEX
219
239
  end
220
240
 
221
241
  def distributed_index_for_model(model)
222
- sources = ["local = #{model.name.downcase}_core"]
242
+ sources = ["local = #{model.indexes.first.name}_core"]
223
243
  if model.indexes.any? { |index| index.delta? }
224
- sources << "local = #{model.name.downcase}_delta"
244
+ sources << "local = #{model.indexes.first.name}_delta"
225
245
  end
226
246
 
227
247
  <<-INDEX
228
- index #{model.name.downcase}
248
+ index #{model.indexes.first.name}
229
249
  {
230
250
  type = distributed
231
251
  #{ sources.join("\n ") }