DrMark-thinking-sphinx 0.9.9 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/README +64 -2
  2. data/lib/thinking_sphinx.rb +88 -11
  3. data/lib/thinking_sphinx/active_record.rb +136 -21
  4. data/lib/thinking_sphinx/active_record/delta.rb +43 -62
  5. data/lib/thinking_sphinx/active_record/has_many_association.rb +1 -1
  6. data/lib/thinking_sphinx/active_record/search.rb +7 -0
  7. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  8. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  9. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +130 -0
  10. data/lib/thinking_sphinx/association.rb +17 -0
  11. data/lib/thinking_sphinx/attribute.rb +171 -97
  12. data/lib/thinking_sphinx/collection.rb +126 -2
  13. data/lib/thinking_sphinx/configuration.rb +120 -171
  14. data/lib/thinking_sphinx/core/string.rb +15 -0
  15. data/lib/thinking_sphinx/deltas.rb +27 -0
  16. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  17. data/lib/thinking_sphinx/deltas/default_delta.rb +67 -0
  18. data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
  19. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  20. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  22. data/lib/thinking_sphinx/facet.rb +58 -0
  23. data/lib/thinking_sphinx/facet_collection.rb +60 -0
  24. data/lib/thinking_sphinx/field.rb +18 -52
  25. data/lib/thinking_sphinx/index.rb +246 -199
  26. data/lib/thinking_sphinx/index/builder.rb +85 -16
  27. data/lib/thinking_sphinx/rails_additions.rb +85 -5
  28. data/lib/thinking_sphinx/search.rb +459 -190
  29. data/lib/thinking_sphinx/tasks.rb +128 -0
  30. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +53 -124
  31. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +2 -2
  32. data/spec/unit/thinking_sphinx/active_record_spec.rb +110 -30
  33. data/spec/unit/thinking_sphinx/attribute_spec.rb +16 -149
  34. data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
  35. data/spec/unit/thinking_sphinx/configuration_spec.rb +54 -412
  36. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  37. data/spec/unit/thinking_sphinx/field_spec.rb +0 -79
  38. data/spec/unit/thinking_sphinx/index/builder_spec.rb +1 -29
  39. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +1 -39
  40. data/spec/unit/thinking_sphinx/index_spec.rb +78 -226
  41. data/spec/unit/thinking_sphinx/search_spec.rb +29 -228
  42. data/spec/unit/thinking_sphinx_spec.rb +23 -19
  43. data/tasks/distribution.rb +48 -0
  44. data/tasks/rails.rake +1 -0
  45. data/tasks/testing.rb +86 -0
  46. data/vendor/after_commit/LICENSE +20 -0
  47. data/vendor/after_commit/README +16 -0
  48. data/vendor/after_commit/Rakefile +22 -0
  49. data/vendor/after_commit/init.rb +8 -0
  50. data/vendor/after_commit/lib/after_commit.rb +45 -0
  51. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  52. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  53. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  54. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  55. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  56. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  57. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  58. data/{lib → vendor/riddle/lib}/riddle.rb +9 -5
  59. data/{lib → vendor/riddle/lib}/riddle/client.rb +6 -26
  60. data/{lib → vendor/riddle/lib}/riddle/client/filter.rb +10 -1
  61. data/{lib → vendor/riddle/lib}/riddle/client/message.rb +0 -0
  62. data/{lib → vendor/riddle/lib}/riddle/client/response.rb +0 -0
  63. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  64. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  65. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  66. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  67. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  68. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  69. data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
  70. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  71. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  72. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  73. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  74. metadata +63 -10
  75. data/lib/test.rb +0 -46
  76. data/tasks/thinking_sphinx_tasks.rake +0 -1
  77. data/tasks/thinking_sphinx_tasks.rb +0 -86
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -1,9 +1,9 @@
1
1
  require 'socket'
2
2
  require 'timeout'
3
+
3
4
  require 'riddle/client'
4
- require 'riddle/client/filter'
5
- require 'riddle/client/message'
6
- require 'riddle/client/response'
5
+ require 'riddle/configuration'
6
+ require 'riddle/controller'
7
7
 
8
8
  module Riddle #:nodoc:
9
9
  class ConnectionError < StandardError #:nodoc:
@@ -15,12 +15,16 @@ module Riddle #:nodoc:
15
15
  Tiny = 8
16
16
  # Revision number for RubyForge's sake, taken from what Sphinx
17
17
  # outputs to the command line.
18
- Rev = 1371
18
+ Rev = 1533
19
19
  # Release number to mark my own fixes, beyond feature parity with
20
20
  # Sphinx itself.
21
- Release = 0
21
+ Release = 4
22
22
 
23
23
  String = [Major, Minor, Tiny].join('.')
24
24
  GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
25
  end
26
+
27
+ def self.escape(string)
28
+ string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
29
+ end
26
30
  end
@@ -1,3 +1,7 @@
1
+ require 'riddle/client/filter'
2
+ require 'riddle/client/message'
3
+ require 'riddle/client/response'
4
+
1
5
  module Riddle
2
6
  class VersionError < StandardError; end
3
7
  class ResponseError < StandardError; end
@@ -110,31 +114,7 @@ module Riddle
110
114
  @server = server || "localhost"
111
115
  @port = port || 3312
112
116
 
113
- # defaults
114
- @offset = 0
115
- @limit = 20
116
- @max_matches = 1000
117
- @match_mode = :all
118
- @sort_mode = :relevance
119
- @sort_by = ''
120
- @weights = []
121
- @id_range = 0..0
122
- @filters = []
123
- @group_by = ''
124
- @group_function = :day
125
- @group_clause = '@group desc'
126
- @group_distinct = ''
127
- @cut_off = 0
128
- @retry_count = 0
129
- @retry_delay = 0
130
- @anchor = {}
131
- # string keys are index names, integer values are weightings
132
- @index_weights = {}
133
- @rank_mode = :proximity_bm25
134
- @max_query_time = 0
135
- # string keys are field names, integer values are weightings
136
- @field_weights = {}
137
- @timeout = 0
117
+ reset
138
118
 
139
119
  @queue = []
140
120
  end
@@ -476,7 +456,7 @@ module Riddle
476
456
  header = socket.recv(8)
477
457
  status, version, length = header.unpack('n2N')
478
458
 
479
- while response.length < length
459
+ while response.length < (length || 0)
480
460
  part = socket.recv(length - response.length)
481
461
  response << part if part
482
462
  end
@@ -33,7 +33,16 @@ module Riddle
33
33
  message.append_int self.values.length
34
34
  # using to_f is a hack from the php client - to workaround 32bit
35
35
  # signed ints on x32 platforms
36
- message.append_ints *self.values.collect { |val| val.to_f }
36
+ message.append_ints *self.values.collect { |val|
37
+ case val
38
+ when TrueClass
39
+ 1.0
40
+ when FalseClass
41
+ 0.0
42
+ else
43
+ val.to_f
44
+ end
45
+ }
37
46
  end
38
47
  message.append_int self.exclude? ? 1 : 0
39
48
 
File without changes
@@ -0,0 +1,33 @@
1
+ require 'riddle/configuration/section'
2
+
3
+ require 'riddle/configuration/distributed_index'
4
+ require 'riddle/configuration/index'
5
+ require 'riddle/configuration/indexer'
6
+ require 'riddle/configuration/remote_index'
7
+ require 'riddle/configuration/searchd'
8
+ require 'riddle/configuration/source'
9
+ require 'riddle/configuration/sql_source'
10
+ require 'riddle/configuration/xml_source'
11
+
12
+ module Riddle
13
+ class Configuration
14
+ class ConfigurationError < StandardError #:nodoc:
15
+ end
16
+
17
+ attr_reader :indexes, :searchd
18
+ attr_accessor :indexer
19
+
20
+ def initialize
21
+ @indexer = Riddle::Configuration::Indexer.new
22
+ @searchd = Riddle::Configuration::Searchd.new
23
+ @indexes = []
24
+ end
25
+
26
+ def render
27
+ (
28
+ [@indexer.render, @searchd.render] +
29
+ @indexes.collect { |index| index.render }
30
+ ).join("\n")
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,48 @@
1
+ module Riddle
2
+ class Configuration
3
+ class DistributedIndex < Riddle::Configuration::Section
4
+ self.settings = [:type, :local, :agent, :agent_connect_timeout,
5
+ :agent_query_timeout]
6
+
7
+ attr_accessor :name, :local_indexes, :remote_indexes,
8
+ :agent_connect_timeout, :agent_query_timeout
9
+
10
+ def initialize(name)
11
+ @name = name
12
+ @local_indexes = []
13
+ @remote_indexes = []
14
+ end
15
+
16
+ def type
17
+ "distributed"
18
+ end
19
+
20
+ def local
21
+ self.local_indexes
22
+ end
23
+
24
+ def agent
25
+ agents = remote_indexes.collect { |index| index.remote }.uniq
26
+ agents.collect { |agent|
27
+ agent + ":" + remote_indexes.select { |index|
28
+ index.remote == agent
29
+ }.collect { |index| index.name }.join(",")
30
+ }
31
+ end
32
+
33
+ def render
34
+ raise ConfigurationError unless valid?
35
+
36
+ (
37
+ ["index #{name}", "{"] +
38
+ settings_body +
39
+ ["}", ""]
40
+ ).join("\n")
41
+ end
42
+
43
+ def valid?
44
+ @local_indexes.length > 0 || @remote_indexes.length > 0
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,142 @@
1
+ module Riddle
2
+ class Configuration
3
+ class Index < Riddle::Configuration::Section
4
+ self.settings = [:source, :path, :docinfo, :mlock, :morphology,
5
+ :stopwords, :wordforms, :exceptions, :min_word_len, :charset_type,
6
+ :charset_table, :ignore_chars, :min_prefix_len, :min_infix_len,
7
+ :prefix_fields, :infix_fields, :enable_star, :ngram_len, :ngram_chars,
8
+ :phrase_boundary, :phrase_boundary_step, :html_strip,
9
+ :html_index_attrs, :html_remove_elements, :preopen]
10
+
11
+ attr_accessor :name, :parent, :sources, :path, :docinfo, :mlock,
12
+ :morphologies, :stopword_files, :wordform_files, :exception_files,
13
+ :min_word_len, :charset_type, :charset_table, :ignore_characters,
14
+ :min_prefix_len, :min_infix_len, :prefix_field_names,
15
+ :infix_field_names, :enable_star, :ngram_len, :ngram_characters,
16
+ :phrase_boundaries, :phrase_boundary_step, :html_strip,
17
+ :html_index_attrs, :html_remove_element_tags, :preopen
18
+
19
+ def initialize(name, *sources)
20
+ @name = name
21
+ @sources = sources
22
+ @morphologies = []
23
+ @stopword_files = []
24
+ @wordform_files = []
25
+ @exception_files = []
26
+ @ignore_characters = []
27
+ @prefix_field_names = []
28
+ @infix_field_names = []
29
+ @ngram_characters = []
30
+ @phrase_boundaries = []
31
+ @html_remove_element_tags = []
32
+ end
33
+
34
+ def source
35
+ @sources.collect { |s| s.name }
36
+ end
37
+
38
+ def morphology
39
+ nil_join @morphologies, ", "
40
+ end
41
+
42
+ def morphology=(morphology)
43
+ @morphologies = nil_split morphology, /,\s?/
44
+ end
45
+
46
+ def stopwords
47
+ nil_join @stopword_files, " "
48
+ end
49
+
50
+ def stopwords=(stopwords)
51
+ @stopword_files = nil_split stopwords, ' '
52
+ end
53
+
54
+ def wordforms
55
+ nil_join @wordform_files, " "
56
+ end
57
+
58
+ def wordforms=(wordforms)
59
+ @wordform_files = nil_split wordforms, ' '
60
+ end
61
+
62
+ def exceptions
63
+ nil_join @exception_files, " "
64
+ end
65
+
66
+ def exceptions=(exceptions)
67
+ @exception_files = nil_split exceptions, ' '
68
+ end
69
+
70
+ def ignore_chars
71
+ nil_join @ignore_characters, ", "
72
+ end
73
+
74
+ def ignore_chars=(ignore_chars)
75
+ @ignore_characters = nil_split ignore_chars, /,\s?/
76
+ end
77
+
78
+ def prefix_fields
79
+ nil_join @prefix_field_names, ", "
80
+ end
81
+
82
+ def infix_fields
83
+ nil_join @infix_field_names, ", "
84
+ end
85
+
86
+ def ngram_chars
87
+ nil_join @ngram_characters, ", "
88
+ end
89
+
90
+ def ngram_chars=(ngram_chars)
91
+ @ngram_characters = nil_split ngram_chars, /,\s?/
92
+ end
93
+
94
+ def phrase_boundary
95
+ nil_join @phrase_boundaries, ", "
96
+ end
97
+
98
+ def phrase_boundary=(phrase_boundary)
99
+ @phrase_boundaries = nil_split phrase_boundary, /,\s?/
100
+ end
101
+
102
+ def html_remove_elements
103
+ nil_join @html_remove_element_tags, ", "
104
+ end
105
+
106
+ def html_remove_elements=(html_remove_elements)
107
+ @html_remove_element_tags = nil_split html_remove_elements, /,\s?/
108
+ end
109
+
110
+ def render
111
+ raise ConfigurationError, "#{@name} #{@sources.inspect} #{@path} #{@parent}" unless valid?
112
+
113
+ inherited_name = "#{name}"
114
+ inherited_name << " : #{parent}" if parent
115
+ (
116
+ @sources.collect { |s| s.render } +
117
+ ["index #{inherited_name}", "{"] +
118
+ settings_body +
119
+ ["}", ""]
120
+ ).join("\n")
121
+ end
122
+
123
+ def valid?
124
+ (!@name.nil?) && (!( @sources.length == 0 || @path.nil? ) || !@parent.nil?)
125
+ end
126
+
127
+ private
128
+
129
+ def nil_split(string, pattern)
130
+ (string || "").split(pattern)
131
+ end
132
+
133
+ def nil_join(array, delimiter)
134
+ if array.length == 0
135
+ nil
136
+ else
137
+ array.join(delimiter)
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end