ryansch-ts-resque-delta 1.1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +16 -0
- data/.rspec +3 -0
- data/Gemfile +25 -0
- data/Guardfile +16 -0
- data/LICENSE +20 -0
- data/README.markdown +82 -0
- data/Rakefile +10 -0
- data/config/redis-cucumber.conf +13 -0
- data/features/resque_deltas.feature +62 -0
- data/features/smart_indexing.feature +42 -0
- data/features/step_definitions/common_steps.rb +76 -0
- data/features/step_definitions/resque_delta_steps.rb +33 -0
- data/features/step_definitions/smart_indexing_steps.rb +3 -0
- data/features/support/env.rb +32 -0
- data/features/thinking_sphinx/database.example.yml +3 -0
- data/features/thinking_sphinx/db/migrations/create_delayed_betas.rb +4 -0
- data/features/thinking_sphinx/models/delayed_beta.rb +6 -0
- data/lib/flying_sphinx/resque_delta.rb +38 -0
- data/lib/flying_sphinx/resque_delta/delta_job.rb +14 -0
- data/lib/flying_sphinx/resque_delta/flag_as_deleted_job.rb +7 -0
- data/lib/thinking_sphinx/deltas/resque_delta.rb +118 -0
- data/lib/thinking_sphinx/deltas/resque_delta/core_index.rb +98 -0
- data/lib/thinking_sphinx/deltas/resque_delta/delta_job.rb +90 -0
- data/lib/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set.rb +56 -0
- data/lib/thinking_sphinx/deltas/resque_delta/index_utils.rb +47 -0
- data/lib/thinking_sphinx/deltas/resque_delta/railtie.rb +8 -0
- data/lib/thinking_sphinx/deltas/resque_delta/tasks.rb +38 -0
- data/lib/thinking_sphinx/deltas/resque_delta/version.rb +7 -0
- data/lib/ts-resque-delta.rb +2 -0
- data/spec/flying_sphinx/resque_delta/delta_job_spec.rb +32 -0
- data/spec/flying_sphinx/resque_delta/flag_as_deleted_job_spec.rb +23 -0
- data/spec/flying_sphinx/resque_delta_spec.rb +131 -0
- data/spec/spec_helper.rb +13 -0
- data/spec/thinking_sphinx/deltas/resque_delta/core_index_spec.rb +208 -0
- data/spec/thinking_sphinx/deltas/resque_delta/delta_job_spec.rb +172 -0
- data/spec/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set_spec.rb +126 -0
- data/spec/thinking_sphinx/deltas/resque_delta/index_utils_spec.rb +67 -0
- data/spec/thinking_sphinx/deltas/resque_delta_spec.rb +191 -0
- data/tasks/rails.rake +1 -0
- data/ts-resque-delta.gemspec +40 -0
- metadata +393 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'thinking_sphinx/deltas/resque_delta'
|
2
|
+
|
3
|
+
class FlyingSphinx::ResqueDelta < ThinkingSphinx::Deltas::ResqueDelta
|
4
|
+
def self.job_types
|
5
|
+
[
|
6
|
+
FlyingSphinx::ResqueDelta::DeltaJob,
|
7
|
+
FlyingSphinx::ResqueDelta::FlagAsDeletedJob
|
8
|
+
]
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.job_prefix
|
12
|
+
'fs-delta'
|
13
|
+
end
|
14
|
+
|
15
|
+
def index(model, instance = nil)
|
16
|
+
return true if skip?(instance)
|
17
|
+
|
18
|
+
model.delta_index_names.each do |delta|
|
19
|
+
next if self.class.locked?(delta)
|
20
|
+
|
21
|
+
Resque.enqueue(
|
22
|
+
FlyingSphinx::ResqueDelta::DeltaJob,
|
23
|
+
[delta]
|
24
|
+
)
|
25
|
+
end
|
26
|
+
|
27
|
+
Resque.enqueue(
|
28
|
+
FlyingSphinx::ResqueDelta::FlagAsDeletedJob,
|
29
|
+
model.core_index_names,
|
30
|
+
instance.sphinx_document_id
|
31
|
+
) if instance
|
32
|
+
|
33
|
+
true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
require 'flying_sphinx/resque_delta/delta_job'
|
38
|
+
require 'flying_sphinx/resque_delta/flag_as_deleted_job'
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class FlyingSphinx::ResqueDelta::DeltaJob < ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
|
2
|
+
@queue = :fs_delta
|
3
|
+
|
4
|
+
# Runs Sphinx's indexer tool to process the index. Currently assumes Sphinx
|
5
|
+
# is running.
|
6
|
+
#
|
7
|
+
# @param [String] index the name of the Sphinx index
|
8
|
+
#
|
9
|
+
def self.perform(indices)
|
10
|
+
return if skip?(indices)
|
11
|
+
|
12
|
+
FlyingSphinx::IndexRequest.new(indices).perform
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'resque'
|
2
|
+
require 'thinking_sphinx'
|
3
|
+
|
4
|
+
require 'thinking_sphinx/deltas/resque_delta/flag_as_deleted_set'
|
5
|
+
require 'thinking_sphinx/deltas/resque_delta/index_utils'
|
6
|
+
|
7
|
+
# Delayed Deltas for Thinking Sphinx, using Resque.
|
8
|
+
#
|
9
|
+
# This documentation is aimed at those reading the code. If you're looking for
|
10
|
+
# a guide to Thinking Sphinx and/or deltas, I recommend you start with the
|
11
|
+
# Thinking Sphinx site instead - or the README for this library at the very
|
12
|
+
# least.
|
13
|
+
#
|
14
|
+
# @author Patrick Allan
|
15
|
+
# @see http://ts.freelancing-gods.com Thinking Sphinx
|
16
|
+
#
|
17
|
+
class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
|
18
|
+
def self.job_types
|
19
|
+
[
|
20
|
+
ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
|
21
|
+
]
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.job_prefix
|
25
|
+
'ts-delta'
|
26
|
+
end
|
27
|
+
|
28
|
+
# LTRIM + LPOP deletes all items from the Resque queue without loading it
|
29
|
+
# into client memory (unlike Resque.dequeue).
|
30
|
+
# WARNING: This will clear ALL jobs in any queue used by a ResqueDelta job.
|
31
|
+
# If you're sharing a queue with other jobs they'll be deleted!
|
32
|
+
def self.clear_thinking_sphinx_queues
|
33
|
+
job_types.collect { |c| c.instance_variable_get(:@queue) }.uniq.each do |q|
|
34
|
+
Resque.redis.ltrim("queue:#{q}", 0, 0)
|
35
|
+
Resque.redis.lpop("queue:#{q}")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Clear both the resque queues and any other state maintained in redis
|
40
|
+
def self.clear!
|
41
|
+
self.clear_thinking_sphinx_queues
|
42
|
+
|
43
|
+
FlagAsDeletedSet.clear_all!
|
44
|
+
end
|
45
|
+
|
46
|
+
# Use simplistic locking. We're assuming that the user won't run more than one
|
47
|
+
# `rake ts:si` or `rake ts:in` task at a time.
|
48
|
+
def self.lock(index_name)
|
49
|
+
Resque.redis.set("#{job_prefix}:index:#{index_name}:locked", 'true')
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.unlock(index_name)
|
53
|
+
Resque.redis.del("#{job_prefix}:index:#{index_name}:locked")
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.locked?(index_name)
|
57
|
+
Resque.redis.get("#{job_prefix}:index:#{index_name}:locked") == 'true'
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.prepare_for_core_index(index_name)
|
61
|
+
core = "#{index_name}_core"
|
62
|
+
delta = "#{index_name}_delta"
|
63
|
+
|
64
|
+
FlagAsDeletedSet.clear!(core)
|
65
|
+
|
66
|
+
#clear delta jobs
|
67
|
+
# dequeue is fast for jobs with arguments
|
68
|
+
Resque.dequeue(ThinkingSphinx::Deltas::ResqueDelta::DeltaJob, delta)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Adds a job to the queue for processing the given model's delta index. A job
|
72
|
+
# for hiding the instance in the core index is also created, if an instance is
|
73
|
+
# provided.
|
74
|
+
#
|
75
|
+
# Neither job will be queued if updates or deltas are disabled, or if the
|
76
|
+
# instance (when given) is not toggled to be in the delta index. The first two
|
77
|
+
# options are controlled via ThinkingSphinx.updates_enabled? and
|
78
|
+
# ThinkingSphinx.deltas_enabled?.
|
79
|
+
#
|
80
|
+
# @param [Class] model the ActiveRecord model to index.
|
81
|
+
# @param [ActiveRecord::Base] instance the instance of the given model that
|
82
|
+
# has changed. Optional.
|
83
|
+
# @return [Boolean] true
|
84
|
+
#
|
85
|
+
def index(model, instance = nil)
|
86
|
+
return true if skip?(instance)
|
87
|
+
model.delta_index_names.each do |delta|
|
88
|
+
next if self.class.locked?(delta)
|
89
|
+
Resque.enqueue(
|
90
|
+
ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
|
91
|
+
delta
|
92
|
+
)
|
93
|
+
end
|
94
|
+
if instance
|
95
|
+
model.core_index_names.each do |core|
|
96
|
+
FlagAsDeletedSet.add(core, instance.sphinx_document_id)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
true
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
# Checks whether jobs should be enqueued. Only true if updates and deltas are
|
105
|
+
# enabled, and the instance (if there is one) is toggled.
|
106
|
+
#
|
107
|
+
# @param [ActiveRecord::Base, NilClass] instance
|
108
|
+
# @return [Boolean]
|
109
|
+
#
|
110
|
+
def skip?(instance)
|
111
|
+
!ThinkingSphinx.updates_enabled? ||
|
112
|
+
!ThinkingSphinx.deltas_enabled? ||
|
113
|
+
(instance && !toggled(instance))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
require 'thinking_sphinx/deltas/resque_delta/delta_job'
|
118
|
+
require 'thinking_sphinx/deltas/resque_delta/core_index'
|
@@ -0,0 +1,98 @@
|
|
1
|
+
class ThinkingSphinx::Deltas::ResqueDelta::CoreIndex
|
2
|
+
# Public: Lock a delta index against indexing or new index jobs.
|
3
|
+
#
|
4
|
+
# index_name - The String index prefix.
|
5
|
+
#
|
6
|
+
# Examples
|
7
|
+
#
|
8
|
+
# lock_delta('foo')
|
9
|
+
#
|
10
|
+
# Returns nothing.
|
11
|
+
def lock_delta(index_name)
|
12
|
+
ThinkingSphinx::Deltas::ResqueDelta.lock("#{index_name}_delta")
|
13
|
+
end
|
14
|
+
|
15
|
+
# Public: Unlock a delta index for indexing or new index jobs.
|
16
|
+
#
|
17
|
+
# index_name - The String index prefix.
|
18
|
+
#
|
19
|
+
# Examples
|
20
|
+
#
|
21
|
+
# unlock_delta('foo')
|
22
|
+
#
|
23
|
+
# Returns nothing.
|
24
|
+
def unlock_delta(index_name)
|
25
|
+
ThinkingSphinx::Deltas::ResqueDelta.unlock("#{index_name}_delta")
|
26
|
+
end
|
27
|
+
|
28
|
+
# Public: Lock all delta indexes against indexing or new index jobs.
|
29
|
+
#
|
30
|
+
# Returns nothing.
|
31
|
+
def lock_deltas
|
32
|
+
sphinx_indices.each { |index_name| lock_delta(index_name) }
|
33
|
+
end
|
34
|
+
|
35
|
+
# Public: Unlock all delta indexes for indexing or new index jobs.
|
36
|
+
#
|
37
|
+
# Returns nothing.
|
38
|
+
def unlock_deltas
|
39
|
+
sphinx_indices.each { |index_name| unlock_delta(index_name) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# Public: Index all indices while locking each delta as we index the corresponding core index.
|
43
|
+
#
|
44
|
+
# Returns true on success; false on failure.
|
45
|
+
def smart_index
|
46
|
+
# Load config like ts:in.
|
47
|
+
unless ENV['INDEX_ONLY'] == 'true'
|
48
|
+
puts "Generating Configuration to #{ts_config.config_file}"
|
49
|
+
ts_config.build
|
50
|
+
end
|
51
|
+
FileUtils.mkdir_p(ts_config.searchd_file_path)
|
52
|
+
|
53
|
+
# Index each core, one at a time. Wrap with delta locking logic.
|
54
|
+
index_prefixes.each do |index_name|
|
55
|
+
ret = nil
|
56
|
+
|
57
|
+
with_delta_index_lock(index_name) do
|
58
|
+
ThinkingSphinx::Deltas::ResqueDelta.prepare_for_core_index(index_name)
|
59
|
+
ts_config.controller.index("#{index_name}_core", :verbose => true)
|
60
|
+
ret = $?
|
61
|
+
end
|
62
|
+
|
63
|
+
return false if ret.to_i != 0
|
64
|
+
|
65
|
+
Resque.enqueue(
|
66
|
+
ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
|
67
|
+
"#{index_name}_delta"
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
true
|
72
|
+
end
|
73
|
+
|
74
|
+
# Public: Wraps the passed block with a delta index lock
|
75
|
+
#
|
76
|
+
# index_name - The String index prefix.
|
77
|
+
#
|
78
|
+
# Examples
|
79
|
+
#
|
80
|
+
# with_delta_index_lock('foo')
|
81
|
+
#
|
82
|
+
# Returns nothing.
|
83
|
+
def with_delta_index_lock(index_name)
|
84
|
+
lock_delta(index_name)
|
85
|
+
yield
|
86
|
+
unlock_delta(index_name)
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def ts_config
|
92
|
+
ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.ts_config
|
93
|
+
end
|
94
|
+
|
95
|
+
def index_prefixes
|
96
|
+
ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.index_prefixes
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'resque-lock-timeout'
|
2
|
+
|
3
|
+
# A simple job class that processes a given index.
|
4
|
+
#
|
5
|
+
class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
|
6
|
+
|
7
|
+
extend Resque::Plugins::LockTimeout
|
8
|
+
@queue = :ts_delta
|
9
|
+
@lock_timeout = 240
|
10
|
+
|
11
|
+
# Runs Sphinx's indexer tool to process the index. Currently assumes Sphinx
|
12
|
+
# is running.
|
13
|
+
#
|
14
|
+
# @param [String] index the name of the Sphinx index
|
15
|
+
#
|
16
|
+
def self.perform(index)
|
17
|
+
return if skip?(index)
|
18
|
+
|
19
|
+
config = ThinkingSphinx::Configuration.instance
|
20
|
+
|
21
|
+
# Delta Index
|
22
|
+
output = `#{config.bin_path}#{config.indexer_binary_name} --config #{config.config_file} --rotate #{index}`
|
23
|
+
puts output unless ThinkingSphinx.suppress_delta_output?
|
24
|
+
|
25
|
+
# Flag As Deleted
|
26
|
+
return unless ThinkingSphinx.sphinx_running?
|
27
|
+
|
28
|
+
index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(index)
|
29
|
+
|
30
|
+
# Get the document ids we've saved
|
31
|
+
flag_as_deleted_ids = ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.processing_members(index)
|
32
|
+
|
33
|
+
# Filter out the ids that aren't present in sphinx
|
34
|
+
flag_as_deleted_ids = ThinkingSphinx::Search.bundle_searches(flag_as_deleted_ids) do |sphinx, id|
|
35
|
+
sphinx.search_for_ids([], :index => index, :id_range => id..id)
|
36
|
+
end.map(&:to_a).flatten
|
37
|
+
|
38
|
+
# Each hash element should be of the form { id => [1] }
|
39
|
+
flag_hash = Hash[*flag_as_deleted_ids.collect {|id| [id, [1]] }.flatten(1)]
|
40
|
+
|
41
|
+
config.client.update(index, ['sphinx_deleted'], flag_hash)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Try again later if lock is in use.
|
45
|
+
def self.lock_failed(*args)
|
46
|
+
Resque.enqueue(self, *args)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Run only one DeltaJob at a time regardless of index.
|
50
|
+
#def self.identifier(*args)
|
51
|
+
#nil
|
52
|
+
#end
|
53
|
+
|
54
|
+
# This allows us to have a concurrency safe version of ts-delayed-delta's
|
55
|
+
# duplicates_exist:
|
56
|
+
#
|
57
|
+
# http://github.com/freelancing-god/ts-delayed-delta/blob/master/lib/thinkin
|
58
|
+
# g_sphinx/deltas/delayed_delta/job.rb#L47
|
59
|
+
#
|
60
|
+
# The name of this method ensures that it runs within around_perform_lock.
|
61
|
+
#
|
62
|
+
# We've leveraged resque-lock-timeout to ensure that only one DeltaJob is
|
63
|
+
# running at a time. Now, this around filter essentially ensures that only
|
64
|
+
# one DeltaJob of each index type can sit at the queue at once. If the queue
|
65
|
+
# has more than one, lrem will clear the rest off.
|
66
|
+
#
|
67
|
+
def self.around_perform_lock1(*args)
|
68
|
+
# Remove all other instances of this job (with the same args) from the
|
69
|
+
# queue. Uses LREM (http://code.google.com/p/redis/wiki/LremCommand) which
|
70
|
+
# takes the form: "LREM key count value" and if count == 0 removes all
|
71
|
+
# instances of value from the list.
|
72
|
+
redis_job_value = Resque.encode(:class => self.to_s, :args => args)
|
73
|
+
Resque.redis.lrem("queue:#{@queue}", 0, redis_job_value)
|
74
|
+
|
75
|
+
# Grab the subset of flag as deleted document ids to work on
|
76
|
+
core_index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(*args)
|
77
|
+
ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.get_subset_for_processing(core_index)
|
78
|
+
|
79
|
+
yield
|
80
|
+
|
81
|
+
# Clear processing set
|
82
|
+
ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.clear_processing(core_index)
|
83
|
+
end
|
84
|
+
|
85
|
+
protected
|
86
|
+
|
87
|
+
def self.skip?(index)
|
88
|
+
ThinkingSphinx::Deltas::ResqueDelta.locked?(index)
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
|
2
|
+
module FlagAsDeletedSet
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def set_name(core_name)
|
6
|
+
"#{ThinkingSphinx::Deltas::ResqueDelta.job_prefix}:flag.deleted:#{core_name}:set"
|
7
|
+
end
|
8
|
+
|
9
|
+
def temp_name(core_name)
|
10
|
+
"#{ThinkingSphinx::Deltas::ResqueDelta.job_prefix}:flag.deleted:#{core_name}:temp"
|
11
|
+
end
|
12
|
+
|
13
|
+
def processing_name(core_name)
|
14
|
+
"#{ThinkingSphinx::Deltas::ResqueDelta.job_prefix}:flag.deleted:#{core_name}:processing"
|
15
|
+
end
|
16
|
+
|
17
|
+
def add(core_name, document_id)
|
18
|
+
Resque.redis.sadd(set_name(core_name), document_id)
|
19
|
+
end
|
20
|
+
|
21
|
+
def clear!(core_name)
|
22
|
+
Resque.redis.del(set_name(core_name))
|
23
|
+
|
24
|
+
#Clear processing set as well
|
25
|
+
delta_name = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.core_to_delta(core_name)
|
26
|
+
ThinkingSphinx::Deltas::ResqueDelta::DeltaJob.around_perform_lock(delta_name) do
|
27
|
+
Resque.redis.del(processing_name(core_name))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def clear_all!
|
32
|
+
ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.core_indices.each do |core_index|
|
33
|
+
clear!(core_index)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_subset_for_processing(core_name)
|
38
|
+
# Copy set to temp
|
39
|
+
Resque.redis.sunionstore temp_name(core_name), set_name(core_name)
|
40
|
+
# Store (set - temp) into set. This removes all items we copied into temp from set.
|
41
|
+
Resque.redis.sdiffstore set_name(core_name), set_name(core_name), temp_name(core_name)
|
42
|
+
# Merge processing and temp together and store into processing.
|
43
|
+
Resque.redis.sunionstore processing_name(core_name), processing_name(core_name), temp_name(core_name)
|
44
|
+
|
45
|
+
Resque.redis.del temp_name(core_name)
|
46
|
+
end
|
47
|
+
|
48
|
+
def processing_members(core_name)
|
49
|
+
Resque.redis.smembers(processing_name(core_name)).collect(&:to_i)
|
50
|
+
end
|
51
|
+
|
52
|
+
def clear_processing(core_name)
|
53
|
+
Resque.redis.del(processing_name(core_name))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|