ts-resque-delta 1.1.5 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. data/Gemfile +24 -0
  2. data/Guardfile +17 -0
  3. data/README.markdown +1 -0
  4. data/Rakefile +16 -4
  5. data/cucumber.yml +2 -0
  6. data/features/smart_indexing.feature +43 -0
  7. data/features/step_definitions/common_steps.rb +16 -3
  8. data/features/step_definitions/resque_delta_steps.rb +1 -1
  9. data/features/step_definitions/smart_indexing_steps.rb +3 -0
  10. data/features/support/env.rb +3 -4
  11. data/lib/thinking_sphinx/deltas/resque_delta.rb +32 -10
  12. data/lib/thinking_sphinx/deltas/resque_delta/core_index.rb +101 -0
  13. data/lib/thinking_sphinx/deltas/resque_delta/delta_job.rb +72 -10
  14. data/lib/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set.rb +56 -0
  15. data/lib/thinking_sphinx/deltas/resque_delta/index_utils.rb +47 -0
  16. data/lib/thinking_sphinx/deltas/resque_delta/tasks.rb +4 -46
  17. data/lib/thinking_sphinx/deltas/resque_delta/version.rb +1 -1
  18. data/spec/spec_helper.rb +9 -5
  19. data/spec/thinking_sphinx/deltas/resque_delta/core_index_spec.rb +210 -0
  20. data/spec/thinking_sphinx/deltas/resque_delta/delta_job_spec.rb +138 -35
  21. data/spec/thinking_sphinx/deltas/resque_delta/flag_as_deleted_set_spec.rb +126 -0
  22. data/spec/thinking_sphinx/deltas/resque_delta/index_utils_spec.rb +67 -0
  23. data/spec/thinking_sphinx/deltas/resque_delta_spec.rb +126 -53
  24. data/ts-resque-delta.gemspec +8 -2
  25. metadata +185 -180
  26. data/features/support/redis_test_setup.rb +0 -23
  27. data/lib/thinking_sphinx/deltas/resque_delta/flag_as_deleted_job.rb +0 -30
  28. data/spec/spec.opts +0 -1
  29. data/spec/thinking_sphinx/deltas/resque_delta/flag_as_deleted_job_spec.rb +0 -66
  30. data/tasks/testing.rb +0 -20
data/Gemfile CHANGED
@@ -2,3 +2,27 @@ source "http://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in ts-resque-delta.gemspec
4
4
  gemspec
5
+
6
+ group :development do
7
+ if RUBY_PLATFORM =~ /darwin/i
8
+ gem 'rb-fsevent'
9
+ gem 'ruby_gntp'
10
+ else
11
+ gem 'rb-fsevent', :require => false
12
+ gem 'growl', :require => false
13
+ end
14
+ if RUBY_PLATFORM =~ /linux/i
15
+ gem 'rb-inotify'
16
+ gem 'libnotify'
17
+ else
18
+ gem 'rb-inotify', :require => false
19
+ gem 'libnotify', :require => false
20
+ end
21
+ end
22
+
23
+ group :test do
24
+ if RUBY_VERSION >= '1.9'
25
+ else
26
+ gem 'ruby-debug'
27
+ end
28
+ end
data/Guardfile ADDED
@@ -0,0 +1,17 @@
1
+ guard 'bundler' do
2
+ watch('Gemfile')
3
+ watch(/^.+\.gemspec/)
4
+ end
5
+
6
+ guard 'rspec', :version => 2, :cli => "-c --format progress", :all_on_start => false do
7
+ watch(%r{^spec/.+_spec\.rb$})
8
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
9
+ watch('spec/spec_helper.rb') { "spec" }
10
+ end
11
+
12
+ guard 'cucumber', :all_on_start => false do
13
+ watch(%r{^features/.+\.feature$})
14
+ watch(%r{^features/support/.+$}) { 'features' }
15
+ watch(%r{^features/step_definitions/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'features' }
16
+ end
17
+
data/README.markdown CHANGED
@@ -68,6 +68,7 @@ Contributors (for ts-delayed-delta)
68
68
  -----------------------------------
69
69
  * [Aaron Gibralter](https://github.com/agibralter)
70
70
  * [Ryan Schlesinger](https://github.com/ryansch) (Locking/`smart_index`)
71
+ * [Pat Allan](https://github.com/freelancing-god) (FlyingSphinx support)
71
72
 
72
73
  Original Contributors (for ts-delayed-delta)
73
74
  --------------------------------------------
data/Rakefile CHANGED
@@ -1,4 +1,16 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
3
- require File.join(File.dirname(__FILE__), 'tasks/testing')
4
- task :default => :spec
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+ require 'cucumber'
5
+ require 'cucumber/rake/task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec) do |t|
8
+ t.rspec_opts = ["-c", "--format progress"]
9
+ end
10
+
11
+ Cucumber::Rake::Task.new(:features) do |t|
12
+ end
13
+
14
+ task :all_tests => [:spec, :features]
15
+
16
+ task :default => :all_tests
data/cucumber.yml ADDED
@@ -0,0 +1,2 @@
1
+ ---
2
+ default: --color --format progress --strict
@@ -0,0 +1,43 @@
1
+ Feature: Smart Indexing
2
+ In order to have core indexing that works well with resque delta indexing
3
+ Developers
4
+ Should be able to use smart index to update core indices
5
+
6
+ Background:
7
+ Given Sphinx is running
8
+ And I am searching on delayed betas
9
+ And I have data
10
+
11
+ Scenario: Smart indexing should update core indices
12
+ When I run the smart indexer
13
+ And I wait for Sphinx to catch up
14
+ And I search for one
15
+ Then I should get 1 result
16
+
17
+ Scenario: Smart indexing should reset the delta index
18
+ Given I have indexed
19
+ When I change the name of delayed beta one to eleven
20
+ And I run the delayed jobs
21
+ And I wait for Sphinx to catch up
22
+
23
+ When I change the name of delayed beta eleven to one
24
+ And I run the smart indexer
25
+ And I run the delayed jobs
26
+ And I wait for Sphinx to catch up
27
+
28
+ When I search for eleven
29
+ Then I should get 0 results
30
+
31
+ Scenario: Delta Index running after smart indexing should not hide records
32
+ When I run the smart indexer
33
+ And I run the delayed jobs
34
+ And I wait for Sphinx to catch up
35
+
36
+ When I search for two
37
+ Then I should get 1 result
38
+
39
+ Scenario: Smart index should remove existing delta jobs
40
+ When I run the smart indexer
41
+ And I run one delayed job
42
+ And I wait for Sphinx to catch up
43
+ Then there should be no more DeltaJobs on the Resque queue
@@ -1,6 +1,6 @@
1
1
  Before do
2
2
  $queries_executed = []
3
- ThinkingSphinx::Deltas::ResqueDelta.cancel_thinking_sphinx_jobs
3
+ ThinkingSphinx::Deltas::ResqueDelta.clear!
4
4
  @model = nil
5
5
  @method = :search
6
6
  @query = ""
@@ -20,7 +20,7 @@ Given /^I am searching on (.+)$/ do |model|
20
20
  @model = model.gsub(/\s/, '_').singularize.camelize.constantize
21
21
  end
22
22
 
23
- Given "I have data and it has been indexed" do
23
+ Given "I have data" do
24
24
  DelayedBeta.create(:name => "one")
25
25
  DelayedBeta.create(:name => "two")
26
26
  DelayedBeta.create(:name => "three")
@@ -31,11 +31,19 @@ Given "I have data and it has been indexed" do
31
31
  DelayedBeta.create(:name => "eight")
32
32
  DelayedBeta.create(:name => "nine")
33
33
  DelayedBeta.create(:name => "ten")
34
- ThinkingSphinx::Deltas::ResqueDelta.cancel_thinking_sphinx_jobs
34
+ end
35
+
36
+ Given "I have indexed" do
37
+ ThinkingSphinx::Deltas::ResqueDelta.clear!
35
38
  ThinkingSphinx::Configuration.instance.controller.index
36
39
  sleep(1.5)
37
40
  end
38
41
 
42
+ Given "I have data and it has been indexed" do
43
+ step "I have data"
44
+ step "I have indexed"
45
+ end
46
+
39
47
  When "I wait for Sphinx to catch up" do
40
48
  sleep(0.5)
41
49
  end
@@ -49,6 +57,11 @@ Then /^I should get (\d+) results?$/ do |count|
49
57
  results.length.should == count.to_i
50
58
  end
51
59
 
60
+ Then /^I debug$/ do
61
+ debugger
62
+ 0
63
+ end
64
+
52
65
  def results
53
66
  @results ||= (@model || ThinkingSphinx).send(
54
67
  @method,
@@ -18,7 +18,7 @@ When /^I run one delayed job$/ do
18
18
  end
19
19
 
20
20
  When /^I cancel the jobs$/ do
21
- ThinkingSphinx::Deltas::ResqueDelta.cancel_thinking_sphinx_jobs
21
+ ThinkingSphinx::Deltas::ResqueDelta.clear!
22
22
  end
23
23
 
24
24
  When /^I change the name of delayed beta (\w+) to (\w+)$/ do |current, replacement|
@@ -0,0 +1,3 @@
1
+ When /^I run the smart indexer$/ do
2
+ ThinkingSphinx::Deltas::ResqueDelta::CoreIndex.new.smart_index(:verbose => false)
3
+ end
@@ -1,7 +1,8 @@
1
1
  require 'cucumber'
2
- require 'spec/expectations'
2
+ require 'rspec/expectations'
3
3
  require 'fileutils'
4
4
  require 'active_record'
5
+ require 'mock_redis'
5
6
 
6
7
  PROJECT_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
7
8
 
@@ -20,9 +21,7 @@ require 'thinking_sphinx/deltas/resque_delta'
20
21
 
21
22
  world.setup
22
23
 
23
- require 'redis_test_setup'
24
- RedisTestSetup.start_redis!(PROJECT_ROOT, :cucumber)
25
- Resque.redis = '127.0.0.1:6398'
24
+ Resque.redis = MockRedis.new
26
25
  Before do
27
26
  Resque.redis.flushall
28
27
  end
@@ -1,6 +1,9 @@
1
1
  require 'resque'
2
2
  require 'thinking_sphinx'
3
3
 
4
+ require 'thinking_sphinx/deltas/resque_delta/flag_as_deleted_set'
5
+ require 'thinking_sphinx/deltas/resque_delta/index_utils'
6
+
4
7
  # Delayed Deltas for Thinking Sphinx, using Resque.
5
8
  #
6
9
  # This documentation is aimed at those reading the code. If you're looking for
@@ -14,8 +17,7 @@ require 'thinking_sphinx'
14
17
  class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
15
18
  def self.job_types
16
19
  [
17
- ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
18
- ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedJob
20
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
19
21
  ]
20
22
  end
21
23
 
@@ -25,13 +27,24 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
25
27
 
26
28
  # LTRIM + LPOP deletes all items from the Resque queue without loading it
27
29
  # into client memory (unlike Resque.dequeue).
28
- def self.cancel_thinking_sphinx_jobs
30
+ # WARNING: This will clear ALL jobs in any queue used by a ResqueDelta job.
31
+ # If you're sharing a queue with other jobs they'll be deleted!
32
+ def self.clear_thinking_sphinx_queues
29
33
  job_types.collect { |c| c.instance_variable_get(:@queue) }.uniq.each do |q|
30
34
  Resque.redis.ltrim("queue:#{q}", 0, 0)
31
35
  Resque.redis.lpop("queue:#{q}")
32
36
  end
33
37
  end
34
38
 
39
+ # Clear both the resque queues and any other state maintained in redis
40
+ def self.clear!
41
+ self.clear_thinking_sphinx_queues
42
+
43
+ FlagAsDeletedSet.clear_all!
44
+ end
45
+
46
+ # Use simplistic locking. We're assuming that the user won't run more than one
47
+ # `rake ts:si` or `rake ts:in` task at a time.
35
48
  def self.lock(index_name)
36
49
  Resque.redis.set("#{job_prefix}:index:#{index_name}:locked", 'true')
37
50
  end
@@ -44,6 +57,17 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
44
57
  Resque.redis.get("#{job_prefix}:index:#{index_name}:locked") == 'true'
45
58
  end
46
59
 
60
+ def self.prepare_for_core_index(index_name)
61
+ core = "#{index_name}_core"
62
+ delta = "#{index_name}_delta"
63
+
64
+ FlagAsDeletedSet.clear!(core)
65
+
66
+ #clear delta jobs
67
+ # dequeue is fast for jobs with arguments
68
+ Resque.dequeue(ThinkingSphinx::Deltas::ResqueDelta::DeltaJob, delta)
69
+ end
70
+
47
71
  # Adds a job to the queue for processing the given model's delta index. A job
48
72
  # for hiding the instance in the core index is also created, if an instance is
49
73
  # provided.
@@ -64,15 +88,13 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
64
88
  next if self.class.locked?(delta)
65
89
  Resque.enqueue(
66
90
  ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
67
- [delta]
91
+ delta
68
92
  )
69
93
  end
70
94
  if instance
71
- Resque.enqueue(
72
- ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedJob,
73
- model.core_index_names,
74
- instance.sphinx_document_id
75
- )
95
+ model.core_index_names.each do |core|
96
+ FlagAsDeletedSet.add(core, instance.sphinx_document_id)
97
+ end
76
98
  end
77
99
  true
78
100
  end
@@ -93,4 +115,4 @@ class ThinkingSphinx::Deltas::ResqueDelta < ThinkingSphinx::Deltas::DefaultDelta
93
115
  end
94
116
 
95
117
  require 'thinking_sphinx/deltas/resque_delta/delta_job'
96
- require 'thinking_sphinx/deltas/resque_delta/flag_as_deleted_job'
118
+ require 'thinking_sphinx/deltas/resque_delta/core_index'
@@ -0,0 +1,101 @@
1
+ class ThinkingSphinx::Deltas::ResqueDelta::CoreIndex
2
+ # Public: Lock a delta index against indexing or new index jobs.
3
+ #
4
+ # index_name - The String index prefix.
5
+ #
6
+ # Examples
7
+ #
8
+ # lock_delta('foo')
9
+ #
10
+ # Returns nothing.
11
+ def lock_delta(index_name)
12
+ ThinkingSphinx::Deltas::ResqueDelta.lock("#{index_name}_delta")
13
+ end
14
+
15
+ # Public: Unlock a delta index for indexing or new index jobs.
16
+ #
17
+ # index_name - The String index prefix.
18
+ #
19
+ # Examples
20
+ #
21
+ # unlock_delta('foo')
22
+ #
23
+ # Returns nothing.
24
+ def unlock_delta(index_name)
25
+ ThinkingSphinx::Deltas::ResqueDelta.unlock("#{index_name}_delta")
26
+ end
27
+
28
+ # Public: Lock all delta indexes against indexing or new index jobs.
29
+ #
30
+ # Returns nothing.
31
+ def lock_deltas
32
+ sphinx_indices.each { |index_name| lock_delta(index_name) }
33
+ end
34
+
35
+ # Public: Unlock all delta indexes for indexing or new index jobs.
36
+ #
37
+ # Returns nothing.
38
+ def unlock_deltas
39
+ sphinx_indices.each { |index_name| unlock_delta(index_name) }
40
+ end
41
+
42
+ # Public: Index all indices while locking each delta as we index the corresponding core index.
43
+ #
44
+ # Returns true on success; false on failure.
45
+ def smart_index(opts = {})
46
+ verbose = opts.fetch(:verbose, true)
47
+ verbose = false if ENV['SILENT'] == 'true'
48
+
49
+ # Load config like ts:in.
50
+ unless ENV['INDEX_ONLY'] == 'true'
51
+ puts "Generating Configuration to #{ts_config.config_file}" if verbose
52
+ ts_config.build
53
+ end
54
+ FileUtils.mkdir_p(ts_config.searchd_file_path)
55
+
56
+ # Index each core, one at a time. Wrap with delta locking logic.
57
+ index_prefixes.each do |index_name|
58
+ ret = nil
59
+
60
+ with_delta_index_lock(index_name) do
61
+ ThinkingSphinx::Deltas::ResqueDelta.prepare_for_core_index(index_name)
62
+ ts_config.controller.index("#{index_name}_core", :verbose => verbose)
63
+ ret = $?
64
+ end
65
+
66
+ return false if ret.to_i != 0
67
+
68
+ Resque.enqueue(
69
+ ThinkingSphinx::Deltas::ResqueDelta::DeltaJob,
70
+ "#{index_name}_delta"
71
+ )
72
+ end
73
+
74
+ true
75
+ end
76
+
77
+ # Public: Wraps the passed block with a delta index lock
78
+ #
79
+ # index_name - The String index prefix.
80
+ #
81
+ # Examples
82
+ #
83
+ # with_delta_index_lock('foo')
84
+ #
85
+ # Returns nothing.
86
+ def with_delta_index_lock(index_name)
87
+ lock_delta(index_name)
88
+ yield
89
+ unlock_delta(index_name)
90
+ end
91
+
92
+ private
93
+
94
+ def ts_config
95
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.ts_config
96
+ end
97
+
98
+ def index_prefixes
99
+ ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.index_prefixes
100
+ end
101
+ end
@@ -13,12 +13,34 @@ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
13
13
  #
14
14
  # @param [String] index the name of the Sphinx index
15
15
  #
16
- def self.perform(indices)
17
- return if skip?(indices)
16
+ def self.perform(index)
17
+ return if skip?(index)
18
18
 
19
19
  config = ThinkingSphinx::Configuration.instance
20
- output = `#{config.bin_path}#{config.indexer_binary_name} --config #{config.config_file} --rotate #{indices.join(' ')}`
20
+
21
+ # Delta Index
22
+ output = `#{config.bin_path}#{config.indexer_binary_name} --config #{config.config_file} --rotate #{index}`
21
23
  puts output unless ThinkingSphinx.suppress_delta_output?
24
+
25
+ # Flag As Deleted
26
+ return unless ThinkingSphinx.sphinx_running?
27
+
28
+ index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(index)
29
+
30
+ # Get the document ids we've saved
31
+ flag_as_deleted_ids = ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.processing_members(index)
32
+
33
+ unless flag_as_deleted_ids.empty?
34
+ # Filter out the ids that aren't present in sphinx
35
+ flag_as_deleted_ids = filter_flag_as_deleted_ids(flag_as_deleted_ids, index)
36
+
37
+ unless flag_as_deleted_ids.empty?
38
+ # Each hash element should be of the form { id => [1] }
39
+ flag_hash = Hash[*flag_as_deleted_ids.collect {|id| [id, [1]] }.flatten(1)]
40
+
41
+ config.client.update(index, ['sphinx_deleted'], flag_hash)
42
+ end
43
+ end
22
44
  end
23
45
 
24
46
  # Try again later if lock is in use.
@@ -26,10 +48,10 @@ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
26
48
  Resque.enqueue(self, *args)
27
49
  end
28
50
 
29
- # Run only one DeltaJob at a time regardless of indices.
30
- def self.identifier(*args)
31
- nil
32
- end
51
+ # Run only one DeltaJob at a time regardless of index.
52
+ #def self.identifier(*args)
53
+ #nil
54
+ #end
33
55
 
34
56
  # This allows us to have a concurrency safe version of ts-delayed-delta's
35
57
  # duplicates_exist:
@@ -51,14 +73,54 @@ class ThinkingSphinx::Deltas::ResqueDelta::DeltaJob
51
73
  # instances of value from the list.
52
74
  redis_job_value = Resque.encode(:class => self.to_s, :args => args)
53
75
  Resque.redis.lrem("queue:#{@queue}", 0, redis_job_value)
76
+
77
+ # Grab the subset of flag as deleted document ids to work on
78
+ core_index = ThinkingSphinx::Deltas::ResqueDelta::IndexUtils.delta_to_core(*args)
79
+ ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.get_subset_for_processing(core_index)
80
+
54
81
  yield
82
+
83
+ # Clear processing set
84
+ ThinkingSphinx::Deltas::ResqueDelta::FlagAsDeletedSet.clear_processing(core_index)
55
85
  end
56
86
 
57
87
  protected
58
88
 
59
- def self.skip?(indices)
60
- indices.any? do |index|
61
- ThinkingSphinx::Deltas::ResqueDelta.locked?(index)
89
+ def self.skip?(index)
90
+ ThinkingSphinx::Deltas::ResqueDelta.locked?(index)
91
+ end
92
+
93
+ def self.filter_flag_as_deleted_ids(ids, index)
94
+ search_results = []
95
+ partition_ids(ids, 4096) do |subset|
96
+ search_results += ThinkingSphinx.search_for_ids(
97
+ :with => {:@id => subset}, :index => index
98
+ ).results[:matches].collect { |match| match[:doc] }
99
+ end
100
+
101
+ search_results
102
+ end
103
+
104
+ def self.partition_ids(ids, n)
105
+ if n > 0 && n < ids.size
106
+ result = []
107
+ max_subarray_size = n - 1
108
+ i = j = 0
109
+ while i < ids.size && j < ids.size
110
+ j = i + max_subarray_size
111
+ result << ids.slice(i..j)
112
+ i += n
113
+ end
114
+ else
115
+ result = ids
62
116
  end
117
+
118
+ if block_given?
119
+ result.each do |ary|
120
+ yield ary
121
+ end
122
+ end
123
+
124
+ result
63
125
  end
64
126
  end