ryansch-ts-resque-delta 1.1.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/.gitignore +16 -0
  2. data/.rspec +3 -0
  3. data/Gemfile +25 -0
  4. data/Guardfile +16 -0
  5. data/LICENSE +20 -0
  6. data/README.markdown +82 -0
  7. data/Rakefile +10 -0
  8. data/config/redis-cucumber.conf +13 -0
  9. data/features/resque_deltas.feature +62 -0
  10. data/features/smart_indexing.feature +42 -0
  11. data/features/step_definitions/common_steps.rb +76 -0
  12. data/features/step_definitions/resque_delta_steps.rb +33 -0
  13. data/features/step_definitions/smart_indexing_steps.rb +3 -0
  14. data/features/support/env.rb +32 -0
  15. data/features/thinking_sphinx/database.example.yml +3 -0
  16. data/features/thinking_sphinx/db/migrations/create_delayed_betas.rb +4 -0
  17. data/features/thinking_sphinx/models/delayed_beta.rb +6 -0
  18. data/lib/flying_sphinx/resque_delta.rb +38 -0
  19. data/lib/flying_sphinx/resque_delta/delta_job.rb +14 -0
  20. data/lib/flying_sphinx/resque_delta/flag_as_deleted_job.rb +7 -0
  21. data/lib/thinking_sphinx/deltas/resque_delta.rb +118 -0
  22. data/lib/thinking_sphinx/deltas/resque_delta/core_index.rb +98 -0
  23. data/lib/thinking_sphinx/deltas/resque_delta/delta_job.rb +90 -0
  24. data/lib/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set.rb +56 -0
  25. data/lib/thinking_sphinx/deltas/resque_delta/index_utils.rb +47 -0
  26. data/lib/thinking_sphinx/deltas/resque_delta/railtie.rb +8 -0
  27. data/lib/thinking_sphinx/deltas/resque_delta/tasks.rb +38 -0
  28. data/lib/thinking_sphinx/deltas/resque_delta/version.rb +7 -0
  29. data/lib/ts-resque-delta.rb +2 -0
  30. data/spec/flying_sphinx/resque_delta/delta_job_spec.rb +32 -0
  31. data/spec/flying_sphinx/resque_delta/flag_as_deleted_job_spec.rb +23 -0
  32. data/spec/flying_sphinx/resque_delta_spec.rb +131 -0
  33. data/spec/spec_helper.rb +13 -0
  34. data/spec/thinking_sphinx/deltas/resque_delta/core_index_spec.rb +208 -0
  35. data/spec/thinking_sphinx/deltas/resque_delta/delta_job_spec.rb +172 -0
  36. data/spec/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set_spec.rb +126 -0
  37. data/spec/thinking_sphinx/deltas/resque_delta/index_utils_spec.rb +67 -0
  38. data/spec/thinking_sphinx/deltas/resque_delta_spec.rb +191 -0
  39. data/tasks/rails.rake +1 -0
  40. data/ts-resque-delta.gemspec +40 -0
  41. metadata +393 -0
@@ -0,0 +1,3 @@
1
+ username: root
2
+ host: localhost
3
+ password:
@@ -0,0 +1,4 @@
1
+ ActiveRecord::Base.connection.create_table :delayed_betas, :force => true do |t|
2
+ t.column :name, :string, :null => false
3
+ t.column :delta, :boolean, :null => false, :default => false
4
+ end
@@ -0,0 +1,6 @@
1
+ class DelayedBeta < ActiveRecord::Base
2
+ define_index do
3
+ indexes :name, :sortable => true
4
+ set_property :delta => ThinkingSphinx::Deltas::ResqueDelta
5
+ end
6
+ end
@@ -0,0 +1,38 @@
1
+ require 'thinking_sphinx/deltas/resque_delta'
2
+
3
+ class FlyingSphinx::ResqueDelta < ThinkingSphinx::Deltas::ResqueDelta
4
+ def self.job_types
5
+ [
6
+ FlyingSphinx::ResqueDelta::DeltaJob,
7
+ FlyingSphinx::ResqueDelta::FlagAsDeletedJob
8
+ ]
9
+ end
10
+
11
+ def self.job_prefix
12
+ 'fs-delta'
13
+ end
14
+
15
+ def index(model, instance = nil)
16
+ return true if skip?(instance)
17
+
18
+ model.delta_index_names.each do |delta|
19
+ next if self.class.locked?(delta)
20
+
21
+ Resque.enqueue(
22
+ FlyingSphinx::ResqueDelta::DeltaJob,
23
+ [delta]
24
+ )
25
+ end
26
+
27
+ Resque.enqueue(
28
+ FlyingSphinx::ResqueDelta::FlagAsDeletedJob,
29
+ model.core_index_names,
30
+ instance.sphinx_document_id
31
+ ) if instance
32
+
33
+ true
34
+ end
35
+ end
36
+
37
+ require 'flying_sphinx/resque_delta/delta_job'
38
+ require 'flying_sphinx/resque_delta/flag_as_deleted_job'
@@ -0,0 +1,14 @@
1
+ class FlyingSphinx::ResqueDelta::DeltaJob < ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
2
+ @queue = :fs_delta
3
+
4
+ # Runs Sphinx's indexer tool to process the index. Currently assumes Sphinx
5
+ # is running.
6
+ #
7
+ # @param [String] index the name of the Sphinx index
8
+ #
9
+ def self.perform(indices)
10
+ return if skip?(indices)
11
+
12
+ FlyingSphinx::IndexRequest.new(indices).perform
13
+ end
14
+ end
@@ -0,0 +1,7 @@
1
+ class FlyingSphinx::ResqueDelta::FlagAsDeletedJob
2
+ @queue = :fs_delta
3
+
4
+ def self.perform(indices, document_id)
5
+ FlyingSphinx::FlagAsDeletedJob.new(indices, document_id).perform
6
+ end
7
+ end
@@ -0,0 +1,118 @@
1
+ require 'resque'
2
+ require 'thinking_sphinx'
3
+
4
+ require 'thinking_sphinx/deltas/resque_delta/flag_as_deleted_set'
5
+ require 'thinking_sphinx/deltas/resque_delta/index_utils'
6
+
7
+ # Delayed Deltas for Thinking Sphinx, using Resque.
8
+ #
9
+ # This documentation is aimed at those reading the code. If you're looking for
10
+ # a guide to Thinking Sphinx and/or deltas, I recommend you start with the
11
+ # Thinking Sphinx site instead - or the README for this library at the very
12
+ # least.
13
+ #
14
+ # @author Patrick Allan
15
+ # @see http://ts.freelancing-gods.com Thinking Sphinx
16
+ #
17
+ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
18
+ def self.job_types
19
+ [
20
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
21
+ ]
22
+ end
23
+
24
+ def self.job_prefix
25
+ 'ts-delta'
26
+ end
27
+
28
+ # LTRIM + LPOP deletes all items from the Resque queue without loading it
29
+ # into client memory (unlike Resque.dequeue).
30
+ # WARNING: This will clear ALL jobs in any queue used by a ResqueDelta job.
31
+ # If you're sharing a queue with other jobs they'll be deleted!
32
+ def self.clear_thinking_sphinx_queues
33
+ job_types.collect { |c| c.instance_variable_get(:@queue) }.uniq.each do |q|
34
+ Resque.redis.ltrim("queue:#{q}", 0, 0)
35
+ Resque.redis.lpop("queue:#{q}")
36
+ end
37
+ end
38
+
39
+ # Clear both the resque queues and any other state maintained in redis
40
+ def self.clear!
41
+ self.clear_thinking_sphinx_queues
42
+
43
+ FlagAsDeletedSet.clear_all!
44
+ end
45
+
46
+ # Use simplistic locking. We're assuming that the user won't run more than one
47
+ # `rake ts:si` or `rake ts:in` task at a time.
48
+ def self.lock(index_name)
49
+ Resque.redis.set("#{job_prefix}:index:#{index_name}:locked", 'true')
50
+ end
51
+
52
+ def self.unlock(index_name)
53
+ Resque.redis.del("#{job_prefix}:index:#{index_name}:locked")
54
+ end
55
+
56
+ def self.locked?(index_name)
57
+ Resque.redis.get("#{job_prefix}:index:#{index_name}:locked") == 'true'
58
+ end
59
+
60
+ def self.prepare_for_core_index(index_name)
61
+ core = "#{index_name}_core"
62
+ delta = "#{index_name}_delta"
63
+
64
+ FlagAsDeletedSet.clear!(core)
65
+
66
+ #clear delta jobs
67
+ # dequeue is fast for jobs with arguments
68
+ Resque.dequeue(ThinkingSphinx::Deltas::ResqueDelta::DeltaJob, delta)
69
+ end
70
+
71
+ # Adds a job to the queue for processing the given model's delta index. A job
72
+ # for hiding the instance in the core index is also created, if an instance is
73
+ # provided.
74
+ #
75
+ # Neither job will be queued if updates or deltas are disabled, or if the
76
+ # instance (when given) is not toggled to be in the delta index. The first two
77
+ # options are controlled via ThinkingSphinx.updates_enabled? and
78
+ # ThinkingSphinx.deltas_enabled?.
79
+ #
80
+ # @param [Class] model the ActiveRecord model to index.
81
+ # @param [ActiveRecord::Base] instance the instance of the given model that
82
+ # has changed. Optional.
83
+ # @return [Boolean] true
84
+ #
85
+ def index(model, instance = nil)
86
+ return true if skip?(instance)
87
+ model.delta_index_names.each do |delta|
88
+ next if self.class.locked?(delta)
89
+ Resque.enqueue(
90
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
91
+ delta
92
+ )
93
+ end
94
+ if instance
95
+ model.core_index_names.each do |core|
96
+ FlagAsDeletedSet.add(core, instance.sphinx_document_id)
97
+ end
98
+ end
99
+ true
100
+ end
101
+
102
+ private
103
+
104
+ # Checks whether jobs should be enqueued. Only true if updates and deltas are
105
+ # enabled, and the instance (if there is one) is toggled.
106
+ #
107
+ # @param [ActiveRecord::Base, NilClass] instance
108
+ # @return [Boolean]
109
+ #
110
+ def skip?(instance)
111
+ !ThinkingSphinx.updates_enabled? ||
112
+ !ThinkingSphinx.deltas_enabled? ||
113
+ (instance && !toggled(instance))
114
+ end
115
+ end
116
+
117
+ require 'thinking_sphinx/deltas/resque_delta/delta_job'
118
+ require 'thinking_sphinx/deltas/resque_delta/core_index'
@@ -0,0 +1,98 @@
1
+ class ThinkingSphinx::Deltas::ResqueDelta::CoreIndex
2
+ # Public: Lock a delta index against indexing or new index jobs.
3
+ #
4
+ # index_name - The String index prefix.
5
+ #
6
+ # Examples
7
+ #
8
+ # lock_delta('foo')
9
+ #
10
+ # Returns nothing.
11
+ def lock_delta(index_name)
12
+ ThinkingSphinx::Deltas::ResqueDelta.lock("#{index_name}_delta")
13
+ end
14
+
15
+ # Public: Unlock a delta index for indexing or new index jobs.
16
+ #
17
+ # index_name - The String index prefix.
18
+ #
19
+ # Examples
20
+ #
21
+ # unlock_delta('foo')
22
+ #
23
+ # Returns nothing.
24
+ def unlock_delta(index_name)
25
+ ThinkingSphinx::Deltas::ResqueDelta.unlock("#{index_name}_delta")
26
+ end
27
+
28
+ # Public: Lock all delta indexes against indexing or new index jobs.
29
+ #
30
+ # Returns nothing.
31
+ def lock_deltas
32
+ sphinx_indices.each { |index_name| lock_delta(index_name) }
33
+ end
34
+
35
+ # Public: Unlock all delta indexes for indexing or new index jobs.
36
+ #
37
+ # Returns nothing.
38
+ def unlock_deltas
39
+ sphinx_indices.each { |index_name| unlock_delta(index_name) }
40
+ end
41
+
42
+ # Public: Index all indices while locking each delta as we index the corresponding core index.
43
+ #
44
+ # Returns true on success; false on failure.
45
+ def smart_index
46
+ # Load config like ts:in.
47
+ unless ENV['INDEX_ONLY'] == 'true'
48
+ puts "Generating Configuration to #{ts_config.config_file}"
49
+ ts_config.build
50
+ end
51
+ FileUtils.mkdir_p(ts_config.searchd_file_path)
52
+
53
+ # Index each core, one at a time. Wrap with delta locking logic.
54
+ index_prefixes.each do |index_name|
55
+ ret = nil
56
+
57
+ with_delta_index_lock(index_name) do
58
+ ThinkingSphinx::Deltas::ResqueDelta.prepare_for_core_index(index_name)
59
+ ts_config.controller.index("#{index_name}_core", :verbose => true)
60
+ ret = $?
61
+ end
62
+
63
+ return false if ret.to_i != 0
64
+
65
+ Resque.enqueue(
66
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
67
+ "#{index_name}_delta"
68
+ )
69
+ end
70
+
71
+ true
72
+ end
73
+
74
+ # Public: Wraps the passed block with a delta index lock
75
+ #
76
+ # index_name - The String index prefix.
77
+ #
78
+ # Examples
79
+ #
80
+ # with_delta_index_lock('foo')
81
+ #
82
+ # Returns nothing.
83
+ def with_delta_index_lock(index_name)
84
+ lock_delta(index_name)
85
+ yield
86
+ unlock_delta(index_name)
87
+ end
88
+
89
+ private
90
+
91
+ def ts_config
92
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.ts_config
93
+ end
94
+
95
+ def index_prefixes
96
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.index_prefixes
97
+ end
98
+ end
@@ -0,0 +1,90 @@
1
+ require 'resque-lock-timeout'
2
+
3
+ # A simple job class that processes a given index.
4
+ #
5
+ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
6
+
7
+ extend Resque::Plugins::LockTimeout
8
+ @queue = :ts_delta
9
+ @lock_timeout = 240
10
+
11
+ # Runs Sphinx's indexer tool to process the index. Currently assumes Sphinx
12
+ # is running.
13
+ #
14
+ # @param [String] index the name of the Sphinx index
15
+ #
16
+ def self.perform(index)
17
+ return if skip?(index)
18
+
19
+ config = ThinkingSphinx::Configuration.instance
20
+
21
+ # Delta Index
22
+ output = `#{config.bin_path}#{config.indexer_binary_name} --config #{config.config_file} --rotate #{index}`
23
+ puts output unless ThinkingSphinx.suppress_delta_output?
24
+
25
+ # Flag As Deleted
26
+ return unless ThinkingSphinx.sphinx_running?
27
+
28
+ index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(index)
29
+
30
+ # Get the document ids we've saved
31
+ flag_as_deleted_ids = ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.processing_members(index)
32
+
33
+ # Filter out the ids that aren't present in sphinx
34
+ flag_as_deleted_ids = ThinkingSphinx::Search.bundle_searches(flag_as_deleted_ids) do |sphinx, id|
35
+ sphinx.search_for_ids([], :index => index, :id_range => id..id)
36
+ end.map(&:to_a).flatten
37
+
38
+ # Each hash element should be of the form { id => [1] }
39
+ flag_hash = Hash[*flag_as_deleted_ids.collect {|id| [id, [1]] }.flatten(1)]
40
+
41
+ config.client.update(index, ['sphinx_deleted'], flag_hash)
42
+ end
43
+
44
+ # Try again later if lock is in use.
45
+ def self.lock_failed(*args)
46
+ Resque.enqueue(self, *args)
47
+ end
48
+
49
+ # Run only one DeltaJob at a time regardless of index.
50
+ #def self.identifier(*args)
51
+ #nil
52
+ #end
53
+
54
+ # This allows us to have a concurrency safe version of ts-delayed-delta's
55
+ # duplicates_exist:
56
+ #
57
+ # http://github.com/freelancing-god/ts-delayed-delta/blob/master/lib/thinkin
58
+ # g_sphinx/deltas/delayed_delta/job.rb#L47
59
+ #
60
+ # The name of this method ensures that it runs within around_perform_lock.
61
+ #
62
+ # We've leveraged resque-lock-timeout to ensure that only one DeltaJob is
63
+ # running at a time. Now, this around filter essentially ensures that only
64
+ # one DeltaJob of each index type can sit at the queue at once. If the queue
65
+ # has more than one, lrem will clear the rest off.
66
+ #
67
+ def self.around_perform_lock1(*args)
68
+ # Remove all other instances of this job (with the same args) from the
69
+ # queue. Uses LREM (http://code.google.com/p/redis/wiki/LremCommand) which
70
+ # takes the form: "LREM key count value" and if count == 0 removes all
71
+ # instances of value from the list.
72
+ redis_job_value = Resque.encode(:class => self.to_s, :args => args)
73
+ Resque.redis.lrem("queue:#{@queue}", 0, redis_job_value)
74
+
75
+ # Grab the subset of flag as deleted document ids to work on
76
+ core_index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(*args)
77
+ ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.get_subset_for_processing(core_index)
78
+
79
+ yield
80
+
81
+ # Clear processing set
82
+ ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.clear_processing(core_index)
83
+ end
84
+
85
+ protected
86
+
87
+ def self.skip?(index)
88
+ ThinkingSphinx::Deltas::ResqueDelta.locked?(index)
89
+ end
90
+ end
@@ -0,0 +1,56 @@
1
+ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
2
+ module FlagAsDeletedSet
3
+ extend self
4
+
5
+ def set_name(core_name)
6
+ "#{ThinkingSphinx::Deltas::ResqueDelta.job_prefix}:flag.deleted:#{core_name}:set"
7
+ end
8
+
9
+ def temp_name(core_name)
10
+ "#{ThinkingSphinx::Deltas::ResqueDelta.job_prefix}:flag.deleted:#{core_name}:temp"
11
+ end
12
+
13
+ def processing_name(core_name)
14
+ "#{ThinkingSphinx::Deltas::ResqueDelta.job_prefix}:flag.deleted:#{core_name}:processing"
15
+ end
16
+
17
+ def add(core_name, document_id)
18
+ Resque.redis.sadd(set_name(core_name), document_id)
19
+ end
20
+
21
+ def clear!(core_name)
22
+ Resque.redis.del(set_name(core_name))
23
+
24
+ #Clear processing set as well
25
+ delta_name = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.core_to_delta(core_name)
26
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob.around_perform_lock(delta_name) do
27
+ Resque.redis.del(processing_name(core_name))
28
+ end
29
+ end
30
+
31
+ def clear_all!
32
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.core_indices.each do |core_index|
33
+ clear!(core_index)
34
+ end
35
+ end
36
+
37
+ def get_subset_for_processing(core_name)
38
+ # Copy set to temp
39
+ Resque.redis.sunionstore temp_name(core_name), set_name(core_name)
40
+ # Store (set - temp) into set. This removes all items we copied into temp from set.
41
+ Resque.redis.sdiffstore set_name(core_name), set_name(core_name), temp_name(core_name)
42
+ # Merge processing and temp together and store into processing.
43
+ Resque.redis.sunionstore processing_name(core_name), processing_name(core_name), temp_name(core_name)
44
+
45
+ Resque.redis.del temp_name(core_name)
46
+ end
47
+
48
+ def processing_members(core_name)
49
+ Resque.redis.smembers(processing_name(core_name)).collect(&:to_i)
50
+ end
51
+
52
+ def clear_processing(core_name)
53
+ Resque.redis.del(processing_name(core_name))
54
+ end
55
+ end
56
+ end