ts-resque-delta 1.1.5 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. data/Gemfile +24 -0
  2. data/Guardfile +17 -0
  3. data/README.markdown +1 -0
  4. data/Rakefile +16 -4
  5. data/cucumber.yml +2 -0
  6. data/features/smart_indexing.feature +43 -0
  7. data/features/step_definitions/common_steps.rb +16 -3
  8. data/features/step_definitions/resque_delta_steps.rb +1 -1
  9. data/features/step_definitions/smart_indexing_steps.rb +3 -0
  10. data/features/support/env.rb +3 -4
  11. data/lib/thinking_sphinx/deltas/resque_delta.rb +32 -10
  12. data/lib/thinking_sphinx/deltas/resque_delta/core_index.rb +101 -0
  13. data/lib/thinking_sphinx/deltas/resque_delta/delta_job.rb +72 -10
  14. data/lib/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set.rb +56 -0
  15. data/lib/thinking_sphinx/deltas/resque_delta/index_utils.rb +47 -0
  16. data/lib/thinking_sphinx/deltas/resque_delta/tasks.rb +4 -46
  17. data/lib/thinking_sphinx/deltas/resque_delta/version.rb +1 -1
  18. data/spec/spec_helper.rb +9 -5
  19. data/spec/thinking_sphinx/deltas/resque_delta/core_index_spec.rb +210 -0
  20. data/spec/thinking_sphinx/deltas/resque_delta/delta_job_spec.rb +138 -35
  21. data/spec/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set_spec.rb +126 -0
  22. data/spec/thinking_sphinx/deltas/resque_delta/index_utils_spec.rb +67 -0
  23. data/spec/thinking_sphinx/deltas/resque_delta_spec.rb +126 -53
  24. data/ts-resque-delta.gemspec +8 -2
  25. metadata +185 -180
  26. data/features/support/redis_test_setup.rb +0 -23
  27. data/lib/thinking_sphinx/deltas/resque_delta/flag_as_deleted_job.rb +0 -30
  28. data/spec/spec.opts +0 -1
  29. data/spec/thinking_sphinx/deltas/resque_delta/flag_as_deleted_job_spec.rb +0 -66
  30. data/tasks/testing.rb +0 -20
data/Gemfile CHANGED
@@ -2,3 +2,27 @@ source "http://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in ts-resque-delta.gemspec
4
4
  gemspec
5
+
6
+ group :development do
7
+ if RUBY_PLATFORM =~ /darwin/i
8
+ gem 'rb-fsevent'
9
+ gem 'ruby_gntp'
10
+ else
11
+ gem 'rb-fsevent', :require => false
12
+ gem 'growl', :require => false
13
+ end
14
+ if RUBY_PLATFORM =~ /linux/i
15
+ gem 'rb-inotify'
16
+ gem 'libnotify'
17
+ else
18
+ gem 'rb-inotify', :require => false
19
+ gem 'libnotify', :require => false
20
+ end
21
+ end
22
+
23
+ group :test do
24
+ if RUBY_VERSION >= '1.9'
25
+ else
26
+ gem 'ruby-debug'
27
+ end
28
+ end
data/Guardfile ADDED
@@ -0,0 +1,17 @@
1
+ guard 'bundler' do
2
+ watch('Gemfile')
3
+ watch(/^.+\.gemspec/)
4
+ end
5
+
6
+ guard 'rspec', :version => 2, :cli => "-c --format progress", :all_on_start => false do
7
+ watch(%r{^spec/.+_spec\.rb$})
8
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
9
+ watch('spec/spec_helper.rb') { "spec" }
10
+ end
11
+
12
+ guard 'cucumber', :all_on_start => false do
13
+ watch(%r{^features/.+\.feature$})
14
+ watch(%r{^features/support/.+$}) { 'features' }
15
+ watch(%r{^features/step_definitions/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'features' }
16
+ end
17
+
data/README.markdown CHANGED
@@ -68,6 +68,7 @@ Contributors (for ts-delayed-delta)
68
68
  -----------------------------------
69
69
  * [Aaron Gibralter](https://github.com/agibralter)
70
70
  * [Ryan Schlesinger](https://github.com/ryansch) (Locking/`smart_index`)
71
+ * [Pat Allan](https://github.com/freelancing-god) (FlyingSphinx support)
71
72
 
72
73
  Original Contributors (for ts-delayed-delta)
73
74
  --------------------------------------------
data/Rakefile CHANGED
@@ -1,4 +1,16 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
3
- require File.join(File.dirname(__FILE__), 'tasks/testing')
4
- task :default => :spec
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+ require 'cucumber'
5
+ require 'cucumber/rake/task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec) do |t|
8
+ t.rspec_opts = ["-c", "--format progress"]
9
+ end
10
+
11
+ Cucumber::Rake::Task.new(:features) do |t|
12
+ end
13
+
14
+ task :all_tests => [:spec, :features]
15
+
16
+ task :default => :all_tests
data/cucumber.yml ADDED
@@ -0,0 +1,2 @@
1
+ ---
2
+ default: --color --format progress --strict
@@ -0,0 +1,43 @@
1
+ Feature: Smart Indexing
2
+ In order to have core indexing that works well with resque delta indexing
3
+ Developers
4
+ Should be able to use smart index to update core indices
5
+
6
+ Background:
7
+ Given Sphinx is running
8
+ And I am searching on delayed betas
9
+ And I have data
10
+
11
+ Scenario: Smart indexing should update core indices
12
+ When I run the smart indexer
13
+ And I wait for Sphinx to catch up
14
+ And I search for one
15
+ Then I should get 1 result
16
+
17
+ Scenario: Smart indexing should reset the delta index
18
+ Given I have indexed
19
+ When I change the name of delayed beta one to eleven
20
+ And I run the delayed jobs
21
+ And I wait for Sphinx to catch up
22
+
23
+ When I change the name of delayed beta eleven to one
24
+ And I run the smart indexer
25
+ And I run the delayed jobs
26
+ And I wait for Sphinx to catch up
27
+
28
+ When I search for eleven
29
+ Then I should get 0 results
30
+
31
+ Scenario: Delta Index running after smart indexing should not hide records
32
+ When I run the smart indexer
33
+ And I run the delayed jobs
34
+ And I wait for Sphinx to catch up
35
+
36
+ When I search for two
37
+ Then I should get 1 result
38
+
39
+ Scenario: Smart index should remove existing delta jobs
40
+ When I run the smart indexer
41
+ And I run one delayed job
42
+ And I wait for Sphinx to catch up
43
+ Then there should be no more DeltaJobs on the Resque queue
@@ -1,6 +1,6 @@
1
1
  Before do
2
2
  $queries_executed = []
3
- ThinkingSphinx::Deltas::ResqueDelta.cancel_thinking_sphinx_jobs
3
+ ThinkingSphinx::Deltas::ResqueDelta.clear!
4
4
  @model = nil
5
5
  @method = :search
6
6
  @query = ""
@@ -20,7 +20,7 @@ Given /^I am searching on (.+)$/ do |model|
20
20
  @model = model.gsub(/\s/, '_').singularize.camelize.constantize
21
21
  end
22
22
 
23
- Given "I have data and it has been indexed" do
23
+ Given "I have data" do
24
24
  DelayedBeta.create(:name => "one")
25
25
  DelayedBeta.create(:name => "two")
26
26
  DelayedBeta.create(:name => "three")
@@ -31,11 +31,19 @@ Given "I have data and it has been indexed" do
31
31
  DelayedBeta.create(:name => "eight")
32
32
  DelayedBeta.create(:name => "nine")
33
33
  DelayedBeta.create(:name => "ten")
34
- ThinkingSphinx::Deltas::ResqueDelta.cancel_thinking_sphinx_jobs
34
+ end
35
+
36
+ Given "I have indexed" do
37
+ ThinkingSphinx::Deltas::ResqueDelta.clear!
35
38
  ThinkingSphinx::Configuration.instance.controller.index
36
39
  sleep(1.5)
37
40
  end
38
41
 
42
+ Given "I have data and it has been indexed" do
43
+ step "I have data"
44
+ step "I have indexed"
45
+ end
46
+
39
47
  When "I wait for Sphinx to catch up" do
40
48
  sleep(0.5)
41
49
  end
@@ -49,6 +57,11 @@ Then /^I should get (\d+) results?$/ do |count|
49
57
  results.length.should == count.to_i
50
58
  end
51
59
 
60
+ Then /^I debug$/ do
61
+ debugger
62
+ 0
63
+ end
64
+
52
65
  def results
53
66
  @results ||= (@model || ThinkingSphinx).send(
54
67
  @method,
@@ -18,7 +18,7 @@ When /^I run one delayed job$/ do
18
18
  end
19
19
 
20
20
  When /^I cancel the jobs$/ do
21
- ThinkingSphinx::Deltas::ResqueDelta.cancel_thinking_sphinx_jobs
21
+ ThinkingSphinx::Deltas::ResqueDelta.clear!
22
22
  end
23
23
 
24
24
  When /^I change the name of delayed beta (\w+) to (\w+)$/ do |current, replacement|
@@ -0,0 +1,3 @@
1
+ When /^I run the smart indexer$/ do
2
+ ThinkingSphinx::Deltas::ResqueDelta::CoreIndex.new.smart_index(:verbose => false)
3
+ end
@@ -1,7 +1,8 @@
1
1
  require 'cucumber'
2
- require 'spec/expectations'
2
+ require 'rspec/expectations'
3
3
  require 'fileutils'
4
4
  require 'active_record'
5
+ require 'mock_redis'
5
6
 
6
7
  PROJECT_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
7
8
 
@@ -20,9 +21,7 @@ require 'thinking_sphinx/deltas/resque_delta'
20
21
 
21
22
  world.setup
22
23
 
23
- require 'redis_test_setup'
24
- RedisTestSetup.start_redis!(PROJECT_ROOT, :cucumber)
25
- Resque.redis = '127.0.0.1:6398'
24
+ Resque.redis = MockRedis.new
26
25
  Before do
27
26
  Resque.redis.flushall
28
27
  end
@@ -1,6 +1,9 @@
1
1
  require 'resque'
2
2
  require 'thinking_sphinx'
3
3
 
4
+ require 'thinking_sphinx/deltas/resque_delta/flag_as_deleted_set'
5
+ require 'thinking_sphinx/deltas/resque_delta/index_utils'
6
+
4
7
  # Delayed Deltas for Thinking Sphinx, using Resque.
5
8
  #
6
9
  # This documentation is aimed at those reading the code. If you're looking for
@@ -14,8 +17,7 @@ require 'thinking_sphinx'
14
17
  class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
15
18
  def self.job_types
16
19
  [
17
- ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
18
- ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedJob
20
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
19
21
  ]
20
22
  end
21
23
 
@@ -25,13 +27,24 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
25
27
 
26
28
  # LTRIM + LPOP deletes all items from the Resque queue without loading it
27
29
  # into client memory (unlike Resque.dequeue).
28
- def self.cancel_thinking_sphinx_jobs
30
+ # WARNING: This will clear ALL jobs in any queue used by a ResqueDelta job.
31
+ # If you're sharing a queue with other jobs they'll be deleted!
32
+ def self.clear_thinking_sphinx_queues
29
33
  job_types.collect { |c| c.instance_variable_get(:@queue) }.uniq.each do |q|
30
34
  Resque.redis.ltrim("queue:#{q}", 0, 0)
31
35
  Resque.redis.lpop("queue:#{q}")
32
36
  end
33
37
  end
34
38
 
39
+ # Clear both the resque queues and any other state maintained in redis
40
+ def self.clear!
41
+ self.clear_thinking_sphinx_queues
42
+
43
+ FlagAsDeletedSet.clear_all!
44
+ end
45
+
46
+ # Use simplistic locking. We're assuming that the user won't run more than one
47
+ # `rake ts:si` or `rake ts:in` task at a time.
35
48
  def self.lock(index_name)
36
49
  Resque.redis.set("#{job_prefix}:index:#{index_name}:locked", 'true')
37
50
  end
@@ -44,6 +57,17 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
44
57
  Resque.redis.get("#{job_prefix}:index:#{index_name}:locked") == 'true'
45
58
  end
46
59
 
60
+ def self.prepare_for_core_index(index_name)
61
+ core = "#{index_name}_core"
62
+ delta = "#{index_name}_delta"
63
+
64
+ FlagAsDeletedSet.clear!(core)
65
+
66
+ #clear delta jobs
67
+ # dequeue is fast for jobs with arguments
68
+ Resque.dequeue(ThinkingSphinx::Deltas::ResqueDelta::DeltaJob, delta)
69
+ end
70
+
47
71
  # Adds a job to the queue for processing the given model's delta index. A job
48
72
  # for hiding the instance in the core index is also created, if an instance is
49
73
  # provided.
@@ -64,15 +88,13 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
64
88
  next if self.class.locked?(delta)
65
89
  Resque.enqueue(
66
90
  ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
67
- [delta]
91
+ delta
68
92
  )
69
93
  end
70
94
  if instance
71
- Resque.enqueue(
72
- ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedJob,
73
- model.core_index_names,
74
- instance.sphinx_document_id
75
- )
95
+ model.core_index_names.each do |core|
96
+ FlagAsDeletedSet.add(core, instance.sphinx_document_id)
97
+ end
76
98
  end
77
99
  true
78
100
  end
@@ -93,4 +115,4 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
93
115
  end
94
116
 
95
117
  require 'thinking_sphinx/deltas/resque_delta/delta_job'
96
- require 'thinking_sphinx/deltas/resque_delta/flag_as_deleted_job'
118
+ require 'thinking_sphinx/deltas/resque_delta/core_index'
@@ -0,0 +1,101 @@
1
+ class ThinkingSphinx::Deltas::ResqueDelta::CoreIndex
2
+ # Public: Lock a delta index against indexing or new index jobs.
3
+ #
4
+ # index_name - The String index prefix.
5
+ #
6
+ # Examples
7
+ #
8
+ # lock_delta('foo')
9
+ #
10
+ # Returns nothing.
11
+ def lock_delta(index_name)
12
+ ThinkingSphinx::Deltas::ResqueDelta.lock("#{index_name}_delta")
13
+ end
14
+
15
+ # Public: Unlock a delta index for indexing or new index jobs.
16
+ #
17
+ # index_name - The String index prefix.
18
+ #
19
+ # Examples
20
+ #
21
+ # unlock_delta('foo')
22
+ #
23
+ # Returns nothing.
24
+ def unlock_delta(index_name)
25
+ ThinkingSphinx::Deltas::ResqueDelta.unlock("#{index_name}_delta")
26
+ end
27
+
28
+ # Public: Lock all delta indexes against indexing or new index jobs.
29
+ #
30
+ # Returns nothing.
31
+ def lock_deltas
32
+ sphinx_indices.each { |index_name| lock_delta(index_name) }
33
+ end
34
+
35
+ # Public: Unlock all delta indexes for indexing or new index jobs.
36
+ #
37
+ # Returns nothing.
38
+ def unlock_deltas
39
+ sphinx_indices.each { |index_name| unlock_delta(index_name) }
40
+ end
41
+
42
+ # Public: Index all indices while locking each delta as we index the corresponding core index.
43
+ #
44
+ # Returns true on success; false on failure.
45
+ def smart_index(opts = {})
46
+ verbose = opts.fetch(:verbose, true)
47
+ verbose = false if ENV['SILENT'] == 'true'
48
+
49
+ # Load config like ts:in.
50
+ unless ENV['INDEX_ONLY'] == 'true'
51
+ puts "Generating Configuration to #{ts_config.config_file}" if verbose
52
+ ts_config.build
53
+ end
54
+ FileUtils.mkdir_p(ts_config.searchd_file_path)
55
+
56
+ # Index each core, one at a time. Wrap with delta locking logic.
57
+ index_prefixes.each do |index_name|
58
+ ret = nil
59
+
60
+ with_delta_index_lock(index_name) do
61
+ ThinkingSphinx::Deltas::ResqueDelta.prepare_for_core_index(index_name)
62
+ ts_config.controller.index("#{index_name}_core", :verbose => verbose)
63
+ ret = $?
64
+ end
65
+
66
+ return false if ret.to_i != 0
67
+
68
+ Resque.enqueue(
69
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
70
+ "#{index_name}_delta"
71
+ )
72
+ end
73
+
74
+ true
75
+ end
76
+
77
+ # Public: Wraps the passed block with a delta index lock
78
+ #
79
+ # index_name - The String index prefix.
80
+ #
81
+ # Examples
82
+ #
83
+ # with_delta_index_lock('foo')
84
+ #
85
+ # Returns nothing.
86
+ def with_delta_index_lock(index_name)
87
+ lock_delta(index_name)
88
+ yield
89
+ unlock_delta(index_name)
90
+ end
91
+
92
+ private
93
+
94
+ def ts_config
95
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.ts_config
96
+ end
97
+
98
+ def index_prefixes
99
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.index_prefixes
100
+ end
101
+ end
@@ -13,12 +13,34 @@ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
13
13
  #
14
14
  # @param [String] index the name of the Sphinx index
15
15
  #
16
- def self.perform(indices)
17
- return if skip?(indices)
16
+ def self.perform(index)
17
+ return if skip?(index)
18
18
 
19
19
  config = ThinkingSphinx::Configuration.instance
20
- output = `#{config.bin_path}#{config.indexer_binary_name} --config #{config.config_file} --rotate #{indices.join(' ')}`
20
+
21
+ # Delta Index
22
+ output = `#{config.bin_path}#{config.indexer_binary_name} --config #{config.config_file} --rotate #{index}`
21
23
  puts output unless ThinkingSphinx.suppress_delta_output?
24
+
25
+ # Flag As Deleted
26
+ return unless ThinkingSphinx.sphinx_running?
27
+
28
+ index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(index)
29
+
30
+ # Get the document ids we've saved
31
+ flag_as_deleted_ids = ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.processing_members(index)
32
+
33
+ unless flag_as_deleted_ids.empty?
34
+ # Filter out the ids that aren't present in sphinx
35
+ flag_as_deleted_ids = filter_flag_as_deleted_ids(flag_as_deleted_ids, index)
36
+
37
+ unless flag_as_deleted_ids.empty?
38
+ # Each hash element should be of the form { id => [1] }
39
+ flag_hash = Hash[*flag_as_deleted_ids.collect {|id| [id, [1]] }.flatten(1)]
40
+
41
+ config.client.update(index, ['sphinx_deleted'], flag_hash)
42
+ end
43
+ end
22
44
  end
23
45
 
24
46
  # Try again later if lock is in use.
@@ -26,10 +48,10 @@ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
26
48
  Resque.enqueue(self, *args)
27
49
  end
28
50
 
29
- # Run only one DeltaJob at a time regardless of indices.
30
- def self.identifier(*args)
31
- nil
32
- end
51
+ # Run only one DeltaJob at a time regardless of index.
52
+ #def self.identifier(*args)
53
+ #nil
54
+ #end
33
55
 
34
56
  # This allows us to have a concurrency safe version of ts-delayed-delta's
35
57
  # duplicates_exist:
@@ -51,14 +73,54 @@ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
51
73
  # instances of value from the list.
52
74
  redis_job_value = Resque.encode(:class => self.to_s, :args => args)
53
75
  Resque.redis.lrem("queue:#{@queue}", 0, redis_job_value)
76
+
77
+ # Grab the subset of flag as deleted document ids to work on
78
+ core_index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(*args)
79
+ ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.get_subset_for_processing(core_index)
80
+
54
81
  yield
82
+
83
+ # Clear processing set
84
+ ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.clear_processing(core_index)
55
85
  end
56
86
 
57
87
  protected
58
88
 
59
- def self.skip?(indices)
60
- indices.any? do |index|
61
- ThinkingSphinx::Deltas::ResqueDelta.locked?(index)
89
+ def self.skip?(index)
90
+ ThinkingSphinx::Deltas::ResqueDelta.locked?(index)
91
+ end
92
+
93
+ def self.filter_flag_as_deleted_ids(ids, index)
94
+ search_results = []
95
+ partition_ids(ids, 4096) do |subset|
96
+ search_results += ThinkingSphinx.search_for_ids(
97
+ :with => {:@id => subset}, :index => index
98
+ ).results[:matches].collect { |match| match[:doc] }
99
+ end
100
+
101
+ search_results
102
+ end
103
+
104
+ def self.partition_ids(ids, n)
105
+ if n > 0 && n < ids.size
106
+ result = []
107
+ max_subarray_size = n - 1
108
+ i = j = 0
109
+ while i < ids.size && j < ids.size
110
+ j = i + max_subarray_size
111
+ result << ids.slice(i..j)
112
+ i += n
113
+ end
114
+ else
115
+ result = ids
62
116
  end
117
+
118
+ if block_given?
119
+ result.each do |ary|
120
+ yield ary
121
+ end
122
+ end
123
+
124
+ result
63
125
  end
64
126
  end