DrMark-thinking-sphinx 0.9.9 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +64 -2
- data/lib/thinking_sphinx.rb +88 -11
- data/lib/thinking_sphinx/active_record.rb +136 -21
- data/lib/thinking_sphinx/active_record/delta.rb +43 -62
- data/lib/thinking_sphinx/active_record/has_many_association.rb +1 -1
- data/lib/thinking_sphinx/active_record/search.rb +7 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +130 -0
- data/lib/thinking_sphinx/association.rb +17 -0
- data/lib/thinking_sphinx/attribute.rb +171 -97
- data/lib/thinking_sphinx/collection.rb +126 -2
- data/lib/thinking_sphinx/configuration.rb +120 -171
- data/lib/thinking_sphinx/core/string.rb +15 -0
- data/lib/thinking_sphinx/deltas.rb +27 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +67 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/facet.rb +58 -0
- data/lib/thinking_sphinx/facet_collection.rb +60 -0
- data/lib/thinking_sphinx/field.rb +18 -52
- data/lib/thinking_sphinx/index.rb +246 -199
- data/lib/thinking_sphinx/index/builder.rb +85 -16
- data/lib/thinking_sphinx/rails_additions.rb +85 -5
- data/lib/thinking_sphinx/search.rb +459 -190
- data/lib/thinking_sphinx/tasks.rb +128 -0
- data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +53 -124
- data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +2 -2
- data/spec/unit/thinking_sphinx/active_record_spec.rb +110 -30
- data/spec/unit/thinking_sphinx/attribute_spec.rb +16 -149
- data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
- data/spec/unit/thinking_sphinx/configuration_spec.rb +54 -412
- data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/unit/thinking_sphinx/field_spec.rb +0 -79
- data/spec/unit/thinking_sphinx/index/builder_spec.rb +1 -29
- data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +1 -39
- data/spec/unit/thinking_sphinx/index_spec.rb +78 -226
- data/spec/unit/thinking_sphinx/search_spec.rb +29 -228
- data/spec/unit/thinking_sphinx_spec.rb +23 -19
- data/tasks/distribution.rb +48 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +86 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +8 -0
- data/vendor/after_commit/lib/after_commit.rb +45 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/{lib → vendor/riddle/lib}/riddle.rb +9 -5
- data/{lib → vendor/riddle/lib}/riddle/client.rb +6 -26
- data/{lib → vendor/riddle/lib}/riddle/client/filter.rb +10 -1
- data/{lib → vendor/riddle/lib}/riddle/client/message.rb +0 -0
- data/{lib → vendor/riddle/lib}/riddle/client/response.rb +0 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/controller.rb +44 -0
- metadata +63 -10
- data/lib/test.rb +0 -46
- data/tasks/thinking_sphinx_tasks.rake +0 -1
- data/tasks/thinking_sphinx_tasks.rb +0 -86
@@ -0,0 +1,55 @@
|
|
1
|
+
module Delayed
|
2
|
+
class PerformableMethod < Struct.new(:object, :method, :args)
|
3
|
+
CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
|
4
|
+
AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
|
5
|
+
|
6
|
+
def initialize(object, method, args)
|
7
|
+
raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
|
8
|
+
|
9
|
+
self.object = dump(object)
|
10
|
+
self.args = args.map { |a| dump(a) }
|
11
|
+
self.method = method.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
def display_name
|
15
|
+
case self.object
|
16
|
+
when CLASS_STRING_FORMAT then "#{$1}.#{method}"
|
17
|
+
when AR_STRING_FORMAT then "#{$1}##{method}"
|
18
|
+
else "Unknown##{method}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def perform
|
23
|
+
load(object).send(method, *args.map{|a| load(a)})
|
24
|
+
rescue ActiveRecord::RecordNotFound
|
25
|
+
# We cannot do anything about objects which were deleted in the meantime
|
26
|
+
true
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def load(arg)
|
32
|
+
case arg
|
33
|
+
when CLASS_STRING_FORMAT then $1.constantize
|
34
|
+
when AR_STRING_FORMAT then $1.constantize.find($2)
|
35
|
+
else arg
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def dump(arg)
|
40
|
+
case arg
|
41
|
+
when Class then class_to_string(arg)
|
42
|
+
when ActiveRecord::Base then ar_to_string(arg)
|
43
|
+
else arg
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def ar_to_string(obj)
|
48
|
+
"AR:#{obj.class}:#{obj.id}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def class_to_string(obj)
|
52
|
+
"CLASS:#{obj.name}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Delayed
|
2
|
+
class Worker
|
3
|
+
SLEEP = 5
|
4
|
+
|
5
|
+
cattr_accessor :logger
|
6
|
+
self.logger = if defined?(Merb::Logger)
|
7
|
+
Merb.logger
|
8
|
+
elsif defined?(RAILS_DEFAULT_LOGGER)
|
9
|
+
RAILS_DEFAULT_LOGGER
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(options={})
|
13
|
+
@quiet = options[:quiet]
|
14
|
+
Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
|
15
|
+
Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
|
16
|
+
end
|
17
|
+
|
18
|
+
def start
|
19
|
+
say "*** Starting job worker #{Delayed::Job.worker_name}"
|
20
|
+
|
21
|
+
trap('TERM') { say 'Exiting...'; $exit = true }
|
22
|
+
trap('INT') { say 'Exiting...'; $exit = true }
|
23
|
+
|
24
|
+
loop do
|
25
|
+
result = nil
|
26
|
+
|
27
|
+
realtime = Benchmark.realtime do
|
28
|
+
result = Delayed::Job.work_off
|
29
|
+
end
|
30
|
+
|
31
|
+
count = result.sum
|
32
|
+
|
33
|
+
break if $exit
|
34
|
+
|
35
|
+
if count.zero?
|
36
|
+
sleep(SLEEP)
|
37
|
+
else
|
38
|
+
say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
|
39
|
+
end
|
40
|
+
|
41
|
+
break if $exit
|
42
|
+
end
|
43
|
+
|
44
|
+
ensure
|
45
|
+
Delayed::Job.clear_locks!
|
46
|
+
end
|
47
|
+
|
48
|
+
def say(text)
|
49
|
+
puts text unless @quiet
|
50
|
+
logger.info text if logger
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
require 'socket'
|
2
2
|
require 'timeout'
|
3
|
+
|
3
4
|
require 'riddle/client'
|
4
|
-
require 'riddle/
|
5
|
-
require 'riddle/
|
6
|
-
require 'riddle/client/response'
|
5
|
+
require 'riddle/configuration'
|
6
|
+
require 'riddle/controller'
|
7
7
|
|
8
8
|
module Riddle #:nodoc:
|
9
9
|
class ConnectionError < StandardError #:nodoc:
|
@@ -15,12 +15,16 @@ module Riddle #:nodoc:
|
|
15
15
|
Tiny = 8
|
16
16
|
# Revision number for RubyForge's sake, taken from what Sphinx
|
17
17
|
# outputs to the command line.
|
18
|
-
Rev =
|
18
|
+
Rev = 1533
|
19
19
|
# Release number to mark my own fixes, beyond feature parity with
|
20
20
|
# Sphinx itself.
|
21
|
-
Release =
|
21
|
+
Release = 4
|
22
22
|
|
23
23
|
String = [Major, Minor, Tiny].join('.')
|
24
24
|
GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
|
25
25
|
end
|
26
|
+
|
27
|
+
def self.escape(string)
|
28
|
+
string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
|
29
|
+
end
|
26
30
|
end
|
@@ -1,3 +1,7 @@
|
|
1
|
+
require 'riddle/client/filter'
|
2
|
+
require 'riddle/client/message'
|
3
|
+
require 'riddle/client/response'
|
4
|
+
|
1
5
|
module Riddle
|
2
6
|
class VersionError < StandardError; end
|
3
7
|
class ResponseError < StandardError; end
|
@@ -110,31 +114,7 @@ module Riddle
|
|
110
114
|
@server = server || "localhost"
|
111
115
|
@port = port || 3312
|
112
116
|
|
113
|
-
|
114
|
-
@offset = 0
|
115
|
-
@limit = 20
|
116
|
-
@max_matches = 1000
|
117
|
-
@match_mode = :all
|
118
|
-
@sort_mode = :relevance
|
119
|
-
@sort_by = ''
|
120
|
-
@weights = []
|
121
|
-
@id_range = 0..0
|
122
|
-
@filters = []
|
123
|
-
@group_by = ''
|
124
|
-
@group_function = :day
|
125
|
-
@group_clause = '@group desc'
|
126
|
-
@group_distinct = ''
|
127
|
-
@cut_off = 0
|
128
|
-
@retry_count = 0
|
129
|
-
@retry_delay = 0
|
130
|
-
@anchor = {}
|
131
|
-
# string keys are index names, integer values are weightings
|
132
|
-
@index_weights = {}
|
133
|
-
@rank_mode = :proximity_bm25
|
134
|
-
@max_query_time = 0
|
135
|
-
# string keys are field names, integer values are weightings
|
136
|
-
@field_weights = {}
|
137
|
-
@timeout = 0
|
117
|
+
reset
|
138
118
|
|
139
119
|
@queue = []
|
140
120
|
end
|
@@ -476,7 +456,7 @@ module Riddle
|
|
476
456
|
header = socket.recv(8)
|
477
457
|
status, version, length = header.unpack('n2N')
|
478
458
|
|
479
|
-
while response.length < length
|
459
|
+
while response.length < (length || 0)
|
480
460
|
part = socket.recv(length - response.length)
|
481
461
|
response << part if part
|
482
462
|
end
|
@@ -33,7 +33,16 @@ module Riddle
|
|
33
33
|
message.append_int self.values.length
|
34
34
|
# using to_f is a hack from the php client - to workaround 32bit
|
35
35
|
# signed ints on x32 platforms
|
36
|
-
message.append_ints *self.values.collect { |val|
|
36
|
+
message.append_ints *self.values.collect { |val|
|
37
|
+
case val
|
38
|
+
when TrueClass
|
39
|
+
1.0
|
40
|
+
when FalseClass
|
41
|
+
0.0
|
42
|
+
else
|
43
|
+
val.to_f
|
44
|
+
end
|
45
|
+
}
|
37
46
|
end
|
38
47
|
message.append_int self.exclude? ? 1 : 0
|
39
48
|
|
File without changes
|
File without changes
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'riddle/configuration/section'
|
2
|
+
|
3
|
+
require 'riddle/configuration/distributed_index'
|
4
|
+
require 'riddle/configuration/index'
|
5
|
+
require 'riddle/configuration/indexer'
|
6
|
+
require 'riddle/configuration/remote_index'
|
7
|
+
require 'riddle/configuration/searchd'
|
8
|
+
require 'riddle/configuration/source'
|
9
|
+
require 'riddle/configuration/sql_source'
|
10
|
+
require 'riddle/configuration/xml_source'
|
11
|
+
|
12
|
+
module Riddle
|
13
|
+
class Configuration
|
14
|
+
class ConfigurationError < StandardError #:nodoc:
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :indexes, :searchd
|
18
|
+
attr_accessor :indexer
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@indexer = Riddle::Configuration::Indexer.new
|
22
|
+
@searchd = Riddle::Configuration::Searchd.new
|
23
|
+
@indexes = []
|
24
|
+
end
|
25
|
+
|
26
|
+
def render
|
27
|
+
(
|
28
|
+
[@indexer.render, @searchd.render] +
|
29
|
+
@indexes.collect { |index| index.render }
|
30
|
+
).join("\n")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Riddle
|
2
|
+
class Configuration
|
3
|
+
class DistributedIndex < Riddle::Configuration::Section
|
4
|
+
self.settings = [:type, :local, :agent, :agent_connect_timeout,
|
5
|
+
:agent_query_timeout]
|
6
|
+
|
7
|
+
attr_accessor :name, :local_indexes, :remote_indexes,
|
8
|
+
:agent_connect_timeout, :agent_query_timeout
|
9
|
+
|
10
|
+
def initialize(name)
|
11
|
+
@name = name
|
12
|
+
@local_indexes = []
|
13
|
+
@remote_indexes = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def type
|
17
|
+
"distributed"
|
18
|
+
end
|
19
|
+
|
20
|
+
def local
|
21
|
+
self.local_indexes
|
22
|
+
end
|
23
|
+
|
24
|
+
def agent
|
25
|
+
agents = remote_indexes.collect { |index| index.remote }.uniq
|
26
|
+
agents.collect { |agent|
|
27
|
+
agent + ":" + remote_indexes.select { |index|
|
28
|
+
index.remote == agent
|
29
|
+
}.collect { |index| index.name }.join(",")
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def render
|
34
|
+
raise ConfigurationError unless valid?
|
35
|
+
|
36
|
+
(
|
37
|
+
["index #{name}", "{"] +
|
38
|
+
settings_body +
|
39
|
+
["}", ""]
|
40
|
+
).join("\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
def valid?
|
44
|
+
@local_indexes.length > 0 || @remote_indexes.length > 0
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
module Riddle
|
2
|
+
class Configuration
|
3
|
+
class Index < Riddle::Configuration::Section
|
4
|
+
self.settings = [:source, :path, :docinfo, :mlock, :morphology,
|
5
|
+
:stopwords, :wordforms, :exceptions, :min_word_len, :charset_type,
|
6
|
+
:charset_table, :ignore_chars, :min_prefix_len, :min_infix_len,
|
7
|
+
:prefix_fields, :infix_fields, :enable_star, :ngram_len, :ngram_chars,
|
8
|
+
:phrase_boundary, :phrase_boundary_step, :html_strip,
|
9
|
+
:html_index_attrs, :html_remove_elements, :preopen]
|
10
|
+
|
11
|
+
attr_accessor :name, :parent, :sources, :path, :docinfo, :mlock,
|
12
|
+
:morphologies, :stopword_files, :wordform_files, :exception_files,
|
13
|
+
:min_word_len, :charset_type, :charset_table, :ignore_characters,
|
14
|
+
:min_prefix_len, :min_infix_len, :prefix_field_names,
|
15
|
+
:infix_field_names, :enable_star, :ngram_len, :ngram_characters,
|
16
|
+
:phrase_boundaries, :phrase_boundary_step, :html_strip,
|
17
|
+
:html_index_attrs, :html_remove_element_tags, :preopen
|
18
|
+
|
19
|
+
def initialize(name, *sources)
|
20
|
+
@name = name
|
21
|
+
@sources = sources
|
22
|
+
@morphologies = []
|
23
|
+
@stopword_files = []
|
24
|
+
@wordform_files = []
|
25
|
+
@exception_files = []
|
26
|
+
@ignore_characters = []
|
27
|
+
@prefix_field_names = []
|
28
|
+
@infix_field_names = []
|
29
|
+
@ngram_characters = []
|
30
|
+
@phrase_boundaries = []
|
31
|
+
@html_remove_element_tags = []
|
32
|
+
end
|
33
|
+
|
34
|
+
def source
|
35
|
+
@sources.collect { |s| s.name }
|
36
|
+
end
|
37
|
+
|
38
|
+
def morphology
|
39
|
+
nil_join @morphologies, ", "
|
40
|
+
end
|
41
|
+
|
42
|
+
def morphology=(morphology)
|
43
|
+
@morphologies = nil_split morphology, /,\s?/
|
44
|
+
end
|
45
|
+
|
46
|
+
def stopwords
|
47
|
+
nil_join @stopword_files, " "
|
48
|
+
end
|
49
|
+
|
50
|
+
def stopwords=(stopwords)
|
51
|
+
@stopword_files = nil_split stopwords, ' '
|
52
|
+
end
|
53
|
+
|
54
|
+
def wordforms
|
55
|
+
nil_join @wordform_files, " "
|
56
|
+
end
|
57
|
+
|
58
|
+
def wordforms=(wordforms)
|
59
|
+
@wordform_files = nil_split wordforms, ' '
|
60
|
+
end
|
61
|
+
|
62
|
+
def exceptions
|
63
|
+
nil_join @exception_files, " "
|
64
|
+
end
|
65
|
+
|
66
|
+
def exceptions=(exceptions)
|
67
|
+
@exception_files = nil_split exceptions, ' '
|
68
|
+
end
|
69
|
+
|
70
|
+
def ignore_chars
|
71
|
+
nil_join @ignore_characters, ", "
|
72
|
+
end
|
73
|
+
|
74
|
+
def ignore_chars=(ignore_chars)
|
75
|
+
@ignore_characters = nil_split ignore_chars, /,\s?/
|
76
|
+
end
|
77
|
+
|
78
|
+
def prefix_fields
|
79
|
+
nil_join @prefix_field_names, ", "
|
80
|
+
end
|
81
|
+
|
82
|
+
def infix_fields
|
83
|
+
nil_join @infix_field_names, ", "
|
84
|
+
end
|
85
|
+
|
86
|
+
def ngram_chars
|
87
|
+
nil_join @ngram_characters, ", "
|
88
|
+
end
|
89
|
+
|
90
|
+
def ngram_chars=(ngram_chars)
|
91
|
+
@ngram_characters = nil_split ngram_chars, /,\s?/
|
92
|
+
end
|
93
|
+
|
94
|
+
def phrase_boundary
|
95
|
+
nil_join @phrase_boundaries, ", "
|
96
|
+
end
|
97
|
+
|
98
|
+
def phrase_boundary=(phrase_boundary)
|
99
|
+
@phrase_boundaries = nil_split phrase_boundary, /,\s?/
|
100
|
+
end
|
101
|
+
|
102
|
+
def html_remove_elements
|
103
|
+
nil_join @html_remove_element_tags, ", "
|
104
|
+
end
|
105
|
+
|
106
|
+
def html_remove_elements=(html_remove_elements)
|
107
|
+
@html_remove_element_tags = nil_split html_remove_elements, /,\s?/
|
108
|
+
end
|
109
|
+
|
110
|
+
def render
|
111
|
+
raise ConfigurationError, "#{@name} #{@sources.inspect} #{@path} #{@parent}" unless valid?
|
112
|
+
|
113
|
+
inherited_name = "#{name}"
|
114
|
+
inherited_name << " : #{parent}" if parent
|
115
|
+
(
|
116
|
+
@sources.collect { |s| s.render } +
|
117
|
+
["index #{inherited_name}", "{"] +
|
118
|
+
settings_body +
|
119
|
+
["}", ""]
|
120
|
+
).join("\n")
|
121
|
+
end
|
122
|
+
|
123
|
+
def valid?
|
124
|
+
(!@name.nil?) && (!( @sources.length == 0 || @path.nil? ) || !@parent.nil?)
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
def nil_split(string, pattern)
|
130
|
+
(string || "").split(pattern)
|
131
|
+
end
|
132
|
+
|
133
|
+
def nil_join(array, delimiter)
|
134
|
+
if array.length == 0
|
135
|
+
nil
|
136
|
+
else
|
137
|
+
array.join(delimiter)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|