freelancing-god-thinking-sphinx 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  First, if you haven't done so already, check out the main usage[http://ts.freelancing-gods.com/usage.html] page. Once you've done that, the next place to look for information is the specific method docs - ThinkingSphinx::Search and ThinkingSphinx::Index::Builder in particular.
6
6
 
7
+ Keep in mind that while Thinking Sphinx works for ActiveRecord with Merb, it doesn't yet support DataMapper (although that is planned).
8
+
7
9
  == Contributing
8
10
 
9
11
  Fork on GitHub and after you've committed tested patches, send a pull request.
@@ -44,4 +46,12 @@ Since I first released this library, there's been quite a few people who have su
44
46
  - James Healy
45
47
  - Jae-Jun Hwang
46
48
  - Xavier Shay
47
- - Jason Rust
49
+ - Jason Rust
50
+ - Gopal Patel
51
+ - Chris Heald
52
+ - Peter Vandenberk
53
+ - Josh French
54
+ - Andrew Bennett
55
+ - Jordan Fowler
56
+ - Seth Walker
57
+ - Joe Noon
data/lib/riddle.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'socket'
2
+ require 'timeout'
2
3
  require 'riddle/client'
3
4
  require 'riddle/client/filter'
4
5
  require 'riddle/client/message'
@@ -9,14 +10,17 @@ module Riddle #:nodoc:
9
10
  end
10
11
 
11
12
  module Version #:nodoc:
12
- Major = 0
13
- Minor = 9
14
- Tiny = 8
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 8
15
16
  # Revision number for RubyForge's sake, taken from what Sphinx
16
17
  # outputs to the command line.
17
- Rev = 1198
18
+ Rev = 1371
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 0
18
22
 
19
- String = [Major, Minor, Tiny].join('.') + "rc1"
20
- GemVersion = [Major, Minor, Tiny, Rev].join('.')
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
21
25
  end
22
26
  end
data/lib/riddle/client.rb CHANGED
@@ -100,7 +100,7 @@ module Riddle
100
100
  :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
101
101
  :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
102
102
  :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
103
- :max_query_time, :field_weights
103
+ :max_query_time, :field_weights, :timeout
104
104
  attr_reader :queue
105
105
 
106
106
  # Can instantiate with a specific server and port - otherwise it assumes
@@ -134,10 +134,40 @@ module Riddle
134
134
  @max_query_time = 0
135
135
  # string keys are field names, integer values are weightings
136
136
  @field_weights = {}
137
+ @timeout = 0
137
138
 
138
139
  @queue = []
139
140
  end
140
141
 
142
+ # Reset attributes and settings to defaults.
143
+ def reset
144
+ # defaults
145
+ @offset = 0
146
+ @limit = 20
147
+ @max_matches = 1000
148
+ @match_mode = :all
149
+ @sort_mode = :relevance
150
+ @sort_by = ''
151
+ @weights = []
152
+ @id_range = 0..0
153
+ @filters = []
154
+ @group_by = ''
155
+ @group_function = :day
156
+ @group_clause = '@group desc'
157
+ @group_distinct = ''
158
+ @cut_off = 0
159
+ @retry_count = 0
160
+ @retry_delay = 0
161
+ @anchor = {}
162
+ # string keys are index names, integer values are weightings
163
+ @index_weights = {}
164
+ @rank_mode = :proximity_bm25
165
+ @max_query_time = 0
166
+ # string keys are field names, integer values are weightings
167
+ @field_weights = {}
168
+ @timeout = 0
169
+ end
170
+
141
171
  # Set the geo-anchor point - with the names of the attributes that contain
142
172
  # the latitude and longitude (in radians), and the reference position.
143
173
  # Note that for geocoding to work properly, you must also set
@@ -384,6 +414,26 @@ module Riddle
384
414
  # Connects to the Sphinx daemon, and yields a socket to use. The socket is
385
415
  # closed at the end of the block.
386
416
  def connect(&block)
417
+ socket = nil
418
+ if @timeout == 0
419
+ socket = initialise_connection
420
+ else
421
+ begin
422
+ Timeout.timeout(@timeout) { socket = initialise_connection }
423
+ rescue Timeout::Error
424
+ raise Riddle::ConnectionError,
425
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
426
+ end
427
+ end
428
+
429
+ begin
430
+ yield socket
431
+ ensure
432
+ socket.close
433
+ end
434
+ end
435
+
436
+ def initialise_connection
387
437
  socket = TCPSocket.new @server, @port
388
438
 
389
439
  # Checking version
@@ -396,11 +446,7 @@ module Riddle
396
446
  # Send version
397
447
  socket.send [1].pack('N'), 0
398
448
 
399
- begin
400
- yield socket
401
- ensure
402
- socket.close
403
- end
449
+ socket
404
450
  end
405
451
 
406
452
  # Send a collection of messages, for a command type (eg, search, excerpts,
@@ -507,7 +553,7 @@ module Riddle
507
553
  # Per Index Weights
508
554
  message.append_int @index_weights.length
509
555
  @index_weights.each do |key,val|
510
- message.append_string key
556
+ message.append_string key.to_s
511
557
  message.append_int val
512
558
  end
513
559
 
@@ -517,7 +563,7 @@ module Riddle
517
563
  # Per Field Weights
518
564
  message.append_int @field_weights.length
519
565
  @field_weights.each do |key,val|
520
- message.append_string key
566
+ message.append_string key.to_s
521
567
  message.append_int val
522
568
  end
523
569
 
@@ -18,7 +18,7 @@ module Riddle
18
18
  def query_message
19
19
  message = Message.new
20
20
 
21
- message.append_string self.attribute
21
+ message.append_string self.attribute.to_s
22
22
  case self.values
23
23
  when Range
24
24
  if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
@@ -20,7 +20,7 @@ module ThinkingSphinx
20
20
  module Version #:nodoc:
21
21
  Major = 0
22
22
  Minor = 9
23
- Tiny = 7
23
+ Tiny = 8
24
24
 
25
25
  String = [Major, Minor, Tiny].join('.')
26
26
  end
@@ -52,10 +52,12 @@ module ThinkingSphinx
52
52
  @@define_indexes = value
53
53
  end
54
54
 
55
+ @@deltas_enabled = nil
56
+
55
57
  # Check if delta indexing is enabled.
56
58
  #
57
59
  def self.deltas_enabled?
58
- @@deltas_enabled = true unless defined?(@@deltas_enabled)
60
+ @@deltas_enabled = (ThinkingSphinx::Configuration.environment != 'test') if @@deltas_enabled.nil?
59
61
  @@deltas_enabled == true
60
62
  end
61
63
 
@@ -79,4 +81,4 @@ module ThinkingSphinx
79
81
  "SELECT @@global.sql_mode, @@session.sql_mode;"
80
82
  ).all? { |key,value| value.nil? || value[/ONLY_FULL_GROUP_BY/].nil? }
81
83
  end
82
- end
84
+ end
@@ -10,9 +10,8 @@ module ThinkingSphinx
10
10
  module ActiveRecord
11
11
  def self.included(base)
12
12
  base.class_eval do
13
+ class_inheritable_array :indexes
13
14
  class << self
14
- attr_accessor :indexes
15
-
16
15
  # Allows creation of indexes for Sphinx. If you don't do this, there
17
16
  # isn't much point trying to search (or using this plugin at all,
18
17
  # really).
@@ -65,10 +64,10 @@ module ThinkingSphinx
65
64
  def define_index(&block)
66
65
  return unless ThinkingSphinx.define_indexes?
67
66
 
68
- @indexes ||= []
67
+ self.indexes ||= []
69
68
  index = Index.new(self, &block)
70
69
 
71
- @indexes << index
70
+ self.indexes << index
72
71
  unless ThinkingSphinx.indexed_models.include?(self.name)
73
72
  ThinkingSphinx.indexed_models << self.name
74
73
  end
@@ -114,21 +113,27 @@ module ThinkingSphinx
114
113
  )
115
114
  end
116
115
 
116
+ def in_core_index?
117
+ @in_core_index ||= self.class.search_for_id(self.id, "#{self.class.name.downcase}_core")
118
+ end
119
+
117
120
  def toggle_deleted
118
121
  config = ThinkingSphinx::Configuration.new
119
122
  client = Riddle::Client.new config.address, config.port
120
123
 
121
124
  client.update(
122
- "#{self.class.name.downcase}_core",
125
+ "#{self.class.indexes.first.name}_core",
123
126
  ['sphinx_deleted'],
124
127
  {self.id => 1}
125
- )
128
+ ) if self.in_core_index?
126
129
 
127
130
  client.update(
128
- "#{self.class.name.downcase}_delta",
131
+ "#{self.class.indexes.first.name}_delta",
129
132
  ['sphinx_deleted'],
130
133
  {self.id => 1}
131
- ) if self.class.indexes.any? { |index| index.delta? }
134
+ ) if ThinkingSphinx.deltas_enabled? &&
135
+ self.class.indexes.any? { |index| index.delta? } &&
136
+ self.delta?
132
137
  end
133
138
  end
134
- end
139
+ end
@@ -73,13 +73,10 @@ module ThinkingSphinx
73
73
  # if running in the test environment.
74
74
  #
75
75
  def index_delta
76
- if ThinkingSphinx::Configuration.environment == "test" ||
77
- !ThinkingSphinx.deltas_enabled?
78
- return true
79
- end
76
+ return true unless ThinkingSphinx.deltas_enabled?
80
77
 
81
78
  configuration = ThinkingSphinx::Configuration.new
82
- system "indexer --config #{configuration.config_file} --rotate #{self.class.name.downcase}_delta"
79
+ system "indexer --config #{configuration.config_file} --rotate #{self.class.indexes.first.name}_delta"
83
80
 
84
81
  true
85
82
  end
@@ -28,6 +28,13 @@ module ThinkingSphinx
28
28
  args << options
29
29
  ThinkingSphinx::Search.search(*args)
30
30
  end
31
+
32
+ def search_for_id(*args)
33
+ options = args.extract_options!
34
+ options[:class] = self
35
+ args << options
36
+ ThinkingSphinx::Search.search_for_id(*args)
37
+ end
31
38
  end
32
39
  end
33
40
  end
@@ -1,30 +1,38 @@
1
+ require 'erb'
2
+
1
3
  module ThinkingSphinx
2
4
  # This class both keeps track of the configuration settings for Sphinx and
3
5
  # also generates the resulting file for Sphinx to use.
4
6
  #
5
7
  # Here are the default settings, relative to RAILS_ROOT where relevant:
6
8
  #
7
- # config file:: config/#{environment}.sphinx.conf
8
- # searchd log file:: log/searchd.log
9
- # query log file:: log/searchd.query.log
10
- # pid file:: log/searchd.#{environment}.pid
11
- # searchd files:: db/sphinx/#{environment}/
12
- # address:: 0.0.0.0 (all)
13
- # port:: 3312
14
- # allow star:: false
15
- # mem limit:: 64M
16
- # max matches:: 1000
17
- # morphology:: stem_en
18
- # charset type:: utf-8
19
- # charset table:: nil
20
- # ignore chars:: nil
9
+ # config file:: config/#{environment}.sphinx.conf
10
+ # searchd log file:: log/searchd.log
11
+ # query log file:: log/searchd.query.log
12
+ # pid file:: log/searchd.#{environment}.pid
13
+ # searchd files:: db/sphinx/#{environment}/
14
+ # address:: 127.0.0.1
15
+ # port:: 3312
16
+ # allow star:: false
17
+ # min prefix length:: 1
18
+ # min infix length:: 1
19
+ # mem limit:: 64M
20
+ # max matches:: 1000
21
+ # morphology:: stem_en
22
+ # charset type:: utf-8
23
+ # charset table:: nil
24
+ # ignore chars:: nil
25
+ # html strip:: false
26
+ # html remove elements:: ''
21
27
  #
22
28
  # If you want to change these settings, create a YAML file at
23
29
  # config/sphinx.yml with settings for each environment, in a similar
24
30
  # fashion to database.yml - using the following keys: config_file,
25
31
  # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
26
- # allow_star, mem_limit, max_matches, morphology, charset_type,
27
- # charset_table, ignore_chars. I think you've got the idea.
32
+ # allow_star, min_prefix_len, min_infix_len, mem_limit, max_matches,
33
+ # morphology, charset_type, charset_table, ignore_chars, html_strip,
34
+ # html_remove_elements. I think you've got
35
+ # the idea.
28
36
  #
29
37
  # Each setting in the YAML file is optional - so only put in the ones you
30
38
  # want to change.
@@ -35,9 +43,10 @@ module ThinkingSphinx
35
43
  #
36
44
  class Configuration
37
45
  attr_accessor :config_file, :searchd_log_file, :query_log_file,
38
- :pid_file, :searchd_file_path, :address, :port, :allow_star, :mem_limit,
39
- :max_matches, :morphology, :charset_type, :charset_table, :ignore_chars,
40
- :app_root
46
+ :pid_file, :searchd_file_path, :address, :port, :allow_star,
47
+ :min_prefix_len, :min_infix_len, :mem_limit, :max_matches, :morphology,
48
+ :charset_type, :charset_table, :ignore_chars, :html_strip,
49
+ :html_remove_elements, :app_root
41
50
 
42
51
  attr_reader :environment
43
52
 
@@ -49,20 +58,24 @@ module ThinkingSphinx
49
58
  self.app_root = Merb.root if defined?(Merb)
50
59
  self.app_root ||= app_root
51
60
 
52
- self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
53
- self.searchd_log_file = "#{self.app_root}/log/searchd.log"
54
- self.query_log_file = "#{self.app_root}/log/searchd.query.log"
55
- self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
56
- self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
57
- self.address = "0.0.0.0"
58
- self.port = 3312
59
- self.allow_star = false
60
- self.mem_limit = "64M"
61
- self.max_matches = 1000
62
- self.morphology = "stem_en"
63
- self.charset_type = "utf-8"
64
- self.charset_table = nil
65
- self.ignore_chars = nil
61
+ self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
62
+ self.searchd_log_file = "#{self.app_root}/log/searchd.log"
63
+ self.query_log_file = "#{self.app_root}/log/searchd.query.log"
64
+ self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
65
+ self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
66
+ self.address = "127.0.0.1"
67
+ self.port = 3312
68
+ self.allow_star = false
69
+ self.min_prefix_len = 1
70
+ self.min_infix_len = 1
71
+ self.mem_limit = "64M"
72
+ self.max_matches = 1000
73
+ self.morphology = "stem_en"
74
+ self.charset_type = "utf-8"
75
+ self.charset_table = nil
76
+ self.ignore_chars = nil
77
+ self.html_strip = false
78
+ self.html_remove_elements = ""
66
79
 
67
80
  parse_config
68
81
  end
@@ -84,7 +97,7 @@ module ThinkingSphinx
84
97
  def build(file_path=nil)
85
98
  load_models
86
99
  file_path ||= "#{self.config_file}"
87
- database_confs = YAML.load(File.open("#{app_root}/config/database.yml"))
100
+ database_confs = YAML::load(ERB.new(IO.read("#{app_root}/config/database.yml")).result)
88
101
  database_confs.symbolize_keys!
89
102
  database_conf = database_confs[environment.to_sym]
90
103
  database_conf.symbolize_keys!
@@ -120,8 +133,8 @@ searchd
120
133
  file.write index.to_config(i, database_conf, charset_type)
121
134
 
122
135
  create_array_accum if index.adapter == :postgres
123
- sources << "#{model.name.downcase}_#{i}_core"
124
- delta_sources << "#{model.name.downcase}_#{i}_delta" if index.delta?
136
+ sources << "#{model.indexes.first.name}_#{i}_core"
137
+ delta_sources << "#{model.indexes.first.name}_#{i}_delta" if index.delta?
125
138
  end
126
139
 
127
140
  source_list = sources.collect { |s| "source = #{s}" }.join("\n")
@@ -152,9 +165,10 @@ searchd
152
165
  }
153
166
 
154
167
  begin
155
- model_name.classify.constantize
168
+ model_name.camelize.constantize
156
169
  rescue LoadError
157
- model_name.gsub(/.*[\/\\]/, '').classify.constantize
170
+ model_name.gsub!(/.*[\/\\]/, '')
171
+ retry
158
172
  rescue NameError
159
173
  next
160
174
  end
@@ -170,7 +184,7 @@ searchd
170
184
  path = "#{app_root}/config/sphinx.yml"
171
185
  return unless File.exists?(path)
172
186
 
173
- conf = YAML.load(File.open(path))[environment]
187
+ conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
174
188
 
175
189
  conf.each do |key,value|
176
190
  self.send("#{key}=", value) if self.methods.include?("#{key}=")
@@ -180,23 +194,29 @@ searchd
180
194
  def core_index_for_model(model, sources)
181
195
  output = <<-INDEX
182
196
 
183
- index #{model.name.downcase}_core
197
+ index #{model.indexes.first.name}_core
184
198
  {
185
199
  #{sources}
186
- path = #{self.searchd_file_path}/#{model.name.downcase}_core
200
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_core
187
201
  charset_type = #{self.charset_type}
188
202
  INDEX
189
203
 
190
- output += " morphology = #{self.morphology}\n" unless self.morphology.blank?
204
+ morphology = model.indexes.inject(self.morphology) { |morph, index|
205
+ index.options[:morphology] || morph
206
+ }
207
+ output += " morphology = #{morphology}\n" unless morphology.blank?
191
208
  output += " charset_table = #{self.charset_table}\n" unless self.charset_table.nil?
192
209
  output += " ignore_chars = #{self.ignore_chars}\n" unless self.ignore_chars.nil?
193
210
 
194
211
  if self.allow_star
195
212
  output += " enable_star = 1\n"
196
- output += " min_prefix_len = 1\n"
197
- output += " min_infix_len = 1\n"
213
+ output += " min_prefix_len = #{self.min_prefix_len}\n"
214
+ output += " min_infix_len = #{self.min_infix_len}\n"
198
215
  end
199
216
 
217
+ output += " html_strip = 1\n" if self.html_strip
218
+ output += " html_remove_elements = #{self.html_remove_elements}\n" unless self.html_remove_elements.blank?
219
+
200
220
  unless model.indexes.collect(&:prefix_fields).flatten.empty?
201
221
  output += " prefix_fields = #{model.indexes.collect(&:prefix_fields).flatten.join(', ')}\n"
202
222
  end
@@ -210,22 +230,22 @@ INDEX
210
230
 
211
231
  def delta_index_for_model(model, sources)
212
232
  <<-INDEX
213
- index #{model.name.downcase}_delta : #{model.name.downcase}_core
233
+ index #{model.indexes.first.name}_delta : #{model.indexes.first.name}_core
214
234
  {
215
235
  #{sources}
216
- path = #{self.searchd_file_path}/#{model.name.downcase}_delta
236
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_delta
217
237
  }
218
238
  INDEX
219
239
  end
220
240
 
221
241
  def distributed_index_for_model(model)
222
- sources = ["local = #{model.name.downcase}_core"]
242
+ sources = ["local = #{model.indexes.first.name}_core"]
223
243
  if model.indexes.any? { |index| index.delta? }
224
- sources << "local = #{model.name.downcase}_delta"
244
+ sources << "local = #{model.indexes.first.name}_delta"
225
245
  end
226
246
 
227
247
  <<-INDEX
228
- index #{model.name.downcase}
248
+ index #{model.indexes.first.name}
229
249
  {
230
250
  type = distributed
231
251
  #{ sources.join("\n ") }