RubyGems - cleansweep - Versions diffs - 1.0.3 → 1.0.4 - Mend

cleansweep 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/.gitignore +0 -1
data/.travis.yml +10 -0
data/CHANGES.md +15 -5
data/Gemfile +2 -0
data/Gemfile.lock +82 -0
data/README.md +15 -7
data/Rakefile +1 -0
data/cleansweep.gemspec +2 -1
data/lib/clean_sweep/purge_runner.rb +18 -28
data/lib/clean_sweep/purge_runner/logging.rb +46 -14
data/lib/clean_sweep/table_schema.rb +13 -9
data/lib/clean_sweep/version.rb +1 -1
data/spec/factories/annotations.rb +19 -0
data/spec/factories/comments.rb +2 -2
data/spec/factories/tables.rb +49 -0
data/spec/purge_runner_spec.rb +12 -11
data/spec/spec_helper.rb +5 -3
data/spec/table_schema_spec.rb +101 -71
metadata +22 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ceb5f4b259349242b4a2c4f11854bb1182b2015a
-  data.tar.gz: 4e2292c3547793a58f5f69599241f595bfed9358
+  metadata.gz: f772f74727a7d58fdebda097fb0b70572cb92c34
+  data.tar.gz: a82986ae0e26308e4842193441e427e998c4f5a0
 SHA512:
-  metadata.gz: b695e4a7a553ebedb460f20ec9dea0a12b7f3012ec62d0b9127ae27f299458d296beffb7b395069fe09f570c084a0f6f2b4df424fa04a3f74b1f34fde401fe39
-  data.tar.gz: fde7d9b0ba62dbff94610402472144873e5df4d54a70dcb3545a4c929944be54adbd8ff9aad2d664ff518390bc56529914f93cff9433ecd408b2521d572c37a6
+  metadata.gz: 183922164f35fbd986ca9617fa1c73fc4133db90fc15a59552f48def16a8d39d8a8164bbef8d61ee334f6a8dffbfc74a34f5fbb182d446d7485372b6add8667a
+  data.tar.gz: 77e4f14d2e44e7400d4bb14a09719fea8a13aac2660fab0712355b053d8117e848f7dd967878b01ec5c87c6181eca821056f4e388b00c93cd4b005432d1c0ebc

data/.gitignore CHANGED Viewed

@@ -1,7 +1,6 @@
 /.bundle/
 .ruby-version
 /.yardoc
-/Gemfile.lock
 /_yardoc/
 /coverage/
 /doc/

data/.travis.yml ADDED Viewed

@@ -0,0 +1,10 @@
+language: ruby
+rvm:
+  - 2.1.4
+  - 1.9.3
+gemfile:
+  - gemfiles/Gemfile.rails3
+  - gemfiles/Gemfile.rails4
+addons:
+  code_climate:
+    repo_token: 7ec6fd701b7d2b206cdd233c2202b6e11c8ba6af01f8a93f5e24595008ac20a0

data/CHANGES.md CHANGED Viewed

@@ -1,8 +1,17 @@
 See the [documentation](http://bkayser.github.io/cleansweep) for details
-### Version 1.0.1
+### Version 1.0.4
-* Initial release
+* Print dry run output using the logger
+* Add option `non_traversing` so you can explicitly not use an index.  If an index
+  is not specified, now it will guess using the first non-unique index or primary key.
+* Added more tests
+* Added Travis CI build, metrics
+### Version 1.0.3
+* Small bug in instrumentation and target model reference
+* Support first unique index as primary when primary key not found
 ### Version 1.0.2
@@ -10,6 +19,7 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
 * Added `dest_columns` option as a map of column names in the source to column names in the destination.
 * More testing and bug fixing in real environments
-### Version 1.0.3
-* Small bug in instrumentation and target model reference
-* Support first unique index as primary when primary key not found
+### Version 1.0.1
+* Initial release

data/Gemfile CHANGED Viewed

@@ -1,4 +1,6 @@
 source 'https://rubygems.org'
+gem "codeclimate-test-reporter", group: :test, require: nil
 # Specify your gem's dependencies in cleansweep.gemspec
 gemspec

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,82 @@
+PATH
+  remote: .
+  specs:
+    cleansweep (1.0.4)
+      activerecord (>= 3.0)
+      mysql2 (~> 0.3)
+      newrelic_rpm
+GEM
+  remote: https://rubygems.org/
+  specs:
+    activemodel (4.2.0)
+      activesupport (= 4.2.0)
+      builder (~> 3.1)
+    activerecord (4.2.0)
+      activemodel (= 4.2.0)
+      activesupport (= 4.2.0)
+      arel (~> 6.0)
+    activesupport (4.2.0)
+      i18n (~> 0.7)
+      json (~> 1.7, >= 1.7.7)
+      minitest (~> 5.1)
+      thread_safe (~> 0.3, >= 0.3.4)
+      tzinfo (~> 1.1)
+    arel (6.0.0)
+    awesome_print (1.6.1)
+    builder (3.2.2)
+    codeclimate-test-reporter (0.4.4)
+      simplecov (>= 0.7.1, < 1.0.0)
+    coderay (1.1.0)
+    diff-lcs (1.2.5)
+    docile (1.1.5)
+    factory_girl (4.5.0)
+      activesupport (>= 3.0.0)
+    i18n (0.7.0)
+    json (1.8.1)
+    method_source (0.8.2)
+    minitest (5.5.0)
+    multi_json (1.10.1)
+    mysql2 (0.3.17)
+    newrelic_rpm (3.9.9.275)
+    pry (0.10.1)
+      coderay (~> 1.1.0)
+      method_source (~> 0.8.1)
+      slop (~> 3.4)
+    rake (10.4.2)
+    rspec (3.1.0)
+      rspec-core (~> 3.1.0)
+      rspec-expectations (~> 3.1.0)
+      rspec-mocks (~> 3.1.0)
+    rspec-core (3.1.7)
+      rspec-support (~> 3.1.0)
+    rspec-expectations (3.1.2)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.1.0)
+    rspec-mocks (3.1.3)
+      rspec-support (~> 3.1.0)
+    rspec-support (3.1.2)
+    simplecov (0.9.1)
+      docile (~> 1.1.0)
+      multi_json (~> 1.0)
+      simplecov-html (~> 0.8.0)
+    simplecov-html (0.8.0)
+    slop (3.6.0)
+    thread_safe (0.3.4)
+    timecop (0.7.1)
+    tzinfo (1.2.2)
+      thread_safe (~> 0.1)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  awesome_print (~> 1.2)
+  bundler (~> 1.7)
+  cleansweep!
+  codeclimate-test-reporter
+  factory_girl (~> 4.4)
+  pry (~> 0)
+  rake (~> 10.0)
+  rspec (~> 3.1)
+  timecop (~> 0.7.1)

data/README.md CHANGED Viewed

@@ -2,6 +2,10 @@ Cleansweep is a utility for scripting purges using ruby in an
 efficient, low-impact manner on mysql innodb tables.  Based on the
 Percona `pt-archive` utility.
+[![Build Status](https://img.shields.io/travis/bkayser/cleansweep/master.svg?x=3)](https://travis-ci.org/bkayser/cleansweep)
+[![Code Climate](https://codeclimate.com/github/bkayser/cleansweep/badges/gpa.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
+[![Test Coverage](https://codeclimate.com/github/bkayser/cleansweep/badges/coverage.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
 ## Installation
 Add this line to your application's Gemfile:
@@ -116,6 +120,9 @@ The chunk query looks like:
 You can scan the index in either direction.  To specify descending
 order, use the `reverse: true` option.
+If no index is specified, it will pick the primary key or the first unique index if there
+is no primary key.
 ### Copying rows from one table to another
 You can use the same technique to copy rows from one table to another.
@@ -179,16 +186,13 @@ Now create as many jobs as you need for the tables which refer to these metrics:
 ```ruby
 CleanSweep::PurgeRunner.new(model: ExpiredMetric,
-                            index: 'PRIMARY',
                             dest_model: Metric,
                             dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
 CleanSweep::PurgeRunner.new(model: ExpiredMetric,
-                            index: 'PRIMARY',
                             dest_model: ChartMetric).execute_in_batches
 CleanSweep::PurgeRunner.new(model: ExpiredMetric,
-                            index: 'PRIMARY',
                             dest_model: SystemMetric).execute_in_batches
 ```
@@ -202,6 +206,10 @@ into an unsafe territory.  The script will pause for 5 minutes and
 only start once the corresponding metric goes back down to 90% of the
 specified threshold.
+Note: You will need process privileges to be able to see the history list and
+replication client privileges to monitor the replication lag.
 ### Logging and monitoring progress
 You pass in a standard log instance to capture all running output.  By
@@ -221,8 +229,8 @@ in your target table.
 ### Limitations
-* Only works for mysql (as far as I know).  I have only used it against 5.5.
-* Should work with ActiveRecord 3.* - 4.*.
+* Only works for mysql.  I have only used it against 5.5.
+* Tested with ActiveRecord 3.1.\* - 4.0.\*.
 * Using a non-unique index risks missing duplicate rows unless you use the `first_only` option.
 * Using the `first_only` option risks rescanning many rows if you have many more duplicates than your
   chunk size
@@ -279,11 +287,11 @@ db called 'cstest'.
 ## License and Copyright
-Copyright 2014 New Relic, Inc., and Bill Kayser
+Copyright 2014-2015 New Relic, Inc., and Bill Kayser
 Covered by the MIT [LICENSE](LICENSE.txt).
-### Credits
+## Credits
 This was all inspired and informed by [Percona's `pt-archiver`
 script](http://www.percona.com/doc/percona-toolkit/2.1/pt-archiver.html)

data/Rakefile CHANGED Viewed

@@ -1,3 +1,4 @@
 require "bundler/gem_tasks"
 require 'rspec/core/rake_task'
 RSpec::Core::RakeTask.new(:spec)
+task :default => :spec

data/cleansweep.gemspec CHANGED Viewed

@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
   spec.homepage      = "http://bkayser.github.com/cleansweep"
   spec.license       = "MIT"
-  spec.files         = `git ls-files -z`.split("\x0")
+  spec.files         = `git ls-files -z`.split("\x0").delete_if {  | f | f =~ /^gemfiles/ }
   spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
   spec.test_files    = spec.files.grep(%r{^spec/})
   spec.require_paths = ["lib"]
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency 'mysql2', '~> 0.3'
   spec.add_development_dependency 'pry', '~> 0'
+  spec.add_development_dependency 'timecop', '~> 0.7.1'
   spec.add_development_dependency 'bundler', '~> 1.7'
   spec.add_development_dependency 'rake', '~> 10.0'
   spec.add_development_dependency 'rspec', '~> 3.1'

data/lib/clean_sweep/purge_runner.rb CHANGED Viewed

@@ -16,14 +16,23 @@ require 'stringio'
 #    The number of rows to copy in each block.  Defaults to 500.
 # [:index]
 #    The index to traverse in ascending order doing the purge.  Rows are read in the order of
-#    the index, which must be a btree index.  If not specified, <tt>PRIMARY</tt> is assumed.
+#    the index, which must be a btree index.  If not specified, An index is chosen automatically
+#    in order of preference:
+#    1. PRIMARY KEY
+#    2. First UNIQUE index
+#    3. First non-UNIQUE index
+#    4. No index used if no indexes defined.
+# [:non_traversing]
+#    When true, specifies the table will not be traversed using an index.
+#    This only makes sense if you are deleting everything as you go along, otherwise you'll
+#    be re-scanning skipped rows.
 # [:reverse]
 #    Traverse the index in reverse order.  For example, if your index is on <tt>account_id</tt>,
 #    <tt>timestamp</tt>, this option will move through the rows starting at the highest account
 #    number, then move through timestamps starting with the most recent.
 # [:first_only]
-#    Traverse only the first column of the index, and do so inclusively using the <tt>&gt;=</tt> operator
-#    instead of the strict <tt>&gt;</tt> operator.  This is important if the index is not unique and there
+#    Traverse only the first column of the index, and do so inclusively using the <tt>'>='</tt> operator
+#    instead of the strict <tt>'>'</tt> operator.  This is important if the index is not unique and there
 #    are a lot of duplicates.  Otherwise the delete could miss rows.  Not allowed in copy mode because you'd
 #    be inserting duplicate rows.
 # [:dry_run]
@@ -94,11 +103,12 @@ class CleanSweep::PurgeRunner
     @copy_mode        = @target_model && options[:copy_only]
     @table_schema     = CleanSweep::TableSchema.new @model,
-                                                    key_name: options[:index],
-                                                    ascending: !options[:reverse],
-                                                    extra_columns: options[:copy_columns],
+                                                    non_traversing: options[:non_traversing],
+                                                    index: options[:index],
+                                                    reverse: options[:reverse],
+                                                    copy_columns: options[:copy_columns],
                                                     first_only: options[:first_only],
-                                                    dest_model: @target_model,
+                                                    dest_model: options[:dest_model],
                                                     dest_columns: options[:dest_columns]
     if (@max_history || @max_repl_lag)
@@ -134,7 +144,7 @@ class CleanSweep::PurgeRunner
   def execute_in_batches
     if @dry_run
-      print_queries($stdout)
+      log :info, print_queries
       return 0
     end
@@ -205,26 +215,6 @@ class CleanSweep::PurgeRunner
   add_method_tracer :sleep
   add_method_tracer :execute_in_batches
-  def print_queries(io)
-    io.puts 'Initial Query:'
-    io.puts format_query('    ', @query.to_sql)
-    rows = @model.connection.select_rows @query.limit(1).to_sql
-    if rows.empty?
-      # Don't have any sample data to use for the sample queries, so use NULL values just
-      # so the query will print out.
-      rows << [nil] * 100
-    end
-    io.puts "Chunk Query:"
-    io.puts format_query('    ', @table_schema.scope_to_next_chunk(@query, rows.first).to_sql)
-    if copy_mode?
-      io.puts "Insert Statement:"
-      io.puts format_query('    ', @table_schema.insert_statement(rows))
-    else
-      io.puts "Delete Statement:"
-      io.puts format_query('    ', @table_schema.delete_statement(rows))
-    end
-  end
   private
   def format_query indentation, query

data/lib/clean_sweep/purge_runner/logging.rb CHANGED Viewed

@@ -6,19 +6,7 @@ module CleanSweep::PurgeRunner::Logging
       while (@report_interval_start < Time.now - @report_interval) do
         @report_interval_start += @report_interval
       end
-      report  = []
-      elapsed = [1, (Time.now - @start).to_i].max
-      rate    = (@total_deleted / elapsed).to_i
-      rate    = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
-      report << "report:"
-      if copy_mode?
-        report << "  #{@dry_run ? 'queried' : 'copied'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
-      else
-        report << "  #{@dry_run ? 'queried' : 'deleted'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
-      end
-      report << "  elapsed: #{'%12s' % format(elapsed)}"
-      report << "  rate:    #{rate}"
-      log :info,  report.join("\n")
+      print_report
     end
   end
@@ -28,6 +16,34 @@ module CleanSweep::PurgeRunner::Logging
     @logger.send level, out
   end
+  def print_queries
+    io = StringIO.new
+    io.puts 'Initial Query:'
+    io.puts format_query('    ', @query.to_sql)
+    io.puts "Chunk Query:"
+    io.puts format_query('    ', @table_schema.scope_to_next_chunk(@query, sample_rows.first).to_sql)
+    if copy_mode?
+      io.puts "Insert Statement:"
+      io.puts format_query('    ', @table_schema.insert_statement(sample_rows))
+    else
+      io.puts "Delete Statement:"
+      io.puts format_query('    ', @table_schema.delete_statement(sample_rows))
+    end
+    io.string
+  end
+  private
+  def sample_rows
+    @sample_rows ||= @model.connection.select_rows @query.limit(1).to_sql
+    if @sample_rows.empty?
+      # Don't have any sample data to use for the sample queries, so use NULL values just
+      # so the query will print out.
+      @sample_rows << [nil] * 100
+    end
+    @sample_rows
+  end
   def format(time)
     format_string = "%H:%M:%S"
     if (time.to_i > (24 * 60 * 60))
@@ -35,4 +51,20 @@ module CleanSweep::PurgeRunner::Logging
     end
     Time.at(time).strftime(format_string)
   end
-end
+  def print_report
+    elapsed = [1, (Time.now - @start).to_i].max
+    rate    = (@total_deleted / elapsed).to_i
+    rate    = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
+    report = [ "report:" ]
+    action = case
+             when @dry_run then 'queried'
+             when copy_mode? then 'copied'
+             else 'deleted'
+             end
+    report << "  #{action}: #{'%12i' % @total_deleted} #{@model.table_name} records"
+    report << "  elapsed: #{'%12s' % format(elapsed)}"
+    report << "  rate:    #{rate}"
+    log :info,  report.join("\n")
+  end
+end

data/lib/clean_sweep/table_schema.rb CHANGED Viewed

@@ -14,8 +14,8 @@ class CleanSweep::TableSchema
   def initialize(model, options={})
-    traversing_key_name  = options[:key_name]
-    ascending            = options.include?(:ascending) ? options[:ascending] : true
+    traversing_key_name  = options[:index]
+    ascending            = !options[:reverse]
     first_only           = options[:first_only]
     @model               = model
     @dest_model          = options[:dest_model] || @model
@@ -26,7 +26,7 @@ class CleanSweep::TableSchema
     @name                = @model.table_name
     @columns      =
-      (options[:extra_columns] || []).map do | extra_col_name |
+      (options[:copy_columns] || []).map do | extra_col_name |
         CleanSweep::TableSchema::ColumnSchema.new extra_col_name, model
       end
@@ -38,11 +38,15 @@ class CleanSweep::TableSchema
     raise "Table #{model.table_name} must have a primary key" unless @primary_key
     @primary_key.add_columns_to @columns
-    if traversing_key_name
-      traversing_key_name.downcase!
-      raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
-      @traversing_key = key_schemas[traversing_key_name]
-      @traversing_key.add_columns_to @columns
+    if !options[:non_traversing]
+      if traversing_key_name
+        traversing_key_name.downcase!
+        raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
+        @traversing_key = key_schemas[traversing_key_name]
+        @traversing_key.add_columns_to @columns
+      else
+        @traversing_key = @primary_key
+      end
       @traversing_key.ascending = ascending
       @traversing_key.first_only = first_only
     end
@@ -74,7 +78,7 @@ class CleanSweep::TableSchema
   end
   def initial_scope
-    scope = @model.all.select(quoted_column_names).from(from_clause)
+    scope = @model.select(quoted_column_names).from(from_clause)
     scope = @traversing_key.order(scope) if @traversing_key
     return scope
   end

data/lib/clean_sweep/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module CleanSweep
-  VERSION = "1.0.3"
+  VERSION = "1.0.4"
 end

data/spec/factories/annotations.rb ADDED Viewed

@@ -0,0 +1,19 @@
+# Defines a table that does not have a primary key but does
+# have a unique key.
+class Annotation < ActiveRecord::Base
+  def self.create_table
+    connection.execute <<-EOF
+    create temporary table if not exists
+    annotations (
+       `article_id` int(11) NOT NULL,
+       `text` varchar(64),
+       key `index_on_text` (`text`),
+       unique key (`article_id`)
+    )
+    EOF
+    Annotation.delete_all
+  end
+end

data/spec/factories/comments.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class Comment < ActiveRecord::Base
     create temporary table if not exists
     comments (
        `id` int(11) primary key auto_increment,
-       `timestamp` datetime,
+       `timestamp` date,
        `account` int(11),
        `seen` boolean,
        key comments_on_account_timestamp(account, timestamp),
@@ -19,7 +19,7 @@ end
 FactoryGirl.define do
   factory :comment do | comment |
-    comment.timestamp Time.now
+    comment.timestamp Date.new
     comment.seen false
     comment.sequence(:account) { | n | (n % 3)* 100 }
   end

data/spec/factories/tables.rb ADDED Viewed

@@ -0,0 +1,49 @@
+class TableWithPrimaryKey < ActiveRecord::Base
+  def self.create_table
+    connection.execute <<-EOF
+    create temporary table if not exists
+    table_with_primary_keys (
+       `pk` int(11) primary key auto_increment,
+       `k1` int(11),
+       `k2` int(11),
+       key key_nonunique (k1),
+       unique key key_unique (k2)
+    )
+    EOF
+  end
+end
+class TableWithUniqueKey < ActiveRecord::Base
+  def self.create_table
+    connection.execute <<-EOF
+    create temporary table if not exists
+    table_with_unique_keys (
+       `k1` int(11),
+       `k2` int(11),
+       key key_nonunique (k1),
+       unique key key_unique (k2)
+    )
+    EOF
+  end
+end
+class TableWithRegularKey < ActiveRecord::Base
+  def self.create_table
+    connection.execute <<-EOF
+    create temporary table if not exists
+    table_with_regular_keys (
+       `k1` int(11),
+       `k2` int(11),
+        key key_nonunique (k1),
+        key key_extra (k2)
+    )
+    EOF
+  end
+end

data/spec/purge_runner_spec.rb CHANGED Viewed

@@ -1,15 +1,17 @@
 require 'spec_helper'
-require 'active_support/testing/time_helpers'
+# Time mocking features are available in Rails 4 but not Rails 3 and the Timecop
+# gem works with both.
+require 'timecop'
 describe CleanSweep::PurgeRunner do
   context 'PurgeRunner' do
-    include ActiveSupport::Testing::TimeHelpers
     before do
-      travel_to Time.parse("2014-12-02 13:47:43 -0800")
+      Timecop.freeze Time.parse("2014-12-02 13:47:43.000000 -0800")
     end
     after do
-      travel_back
+      Timecop.return
     end
     context "using comments" do
@@ -66,21 +68,20 @@ describe CleanSweep::PurgeRunner do
         it 'prints out the queries in a dry run' do
           purger = CleanSweep::PurgeRunner.new model: Comment,
                                                index: 'comments_on_account_timestamp'  do | scope |
-            scope.where('timestamp < ?', 1.week.ago)
+            scope.where('timestamp < ?', 1.week.ago.to_date)
           end
-          output = StringIO.new
-          purger.print_queries(output)
-          expect(output.string).to eq <<EOF
+          output = purger.print_queries
+          expect(output).to eq <<EOF
 Initial Query:
     SELECT  `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
     FROM `comments` FORCE INDEX(comments_on_account_timestamp)
-    WHERE (timestamp < '2014-11-25 21:47:43')
+    WHERE (timestamp < '2014-11-25')
     ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
     LIMIT 500
 Chunk Query:
     SELECT  `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
     FROM `comments` FORCE INDEX(comments_on_account_timestamp)
-    WHERE (timestamp < '2014-11-25 21:47:43') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18 21:47:43'))\n    ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
+    WHERE (timestamp < '2014-11-25') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18'))\n    ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
     LIMIT 500
 Delete Statement:
     DELETE
@@ -105,7 +106,7 @@ EOF
           end
           expect(Comment.count).to eq(5)
           # Only old comments deleted before stopping
-          expect(Comment.where('timestamp >= ?', 4.days.ago).count).to eq(5)
+          expect(Comment.where('timestamp >= ?', 4.days.ago.to_date).count).to eq(5)
         end
         it "descends the index" do
           purger = CleanSweep::PurgeRunner.new model: Comment,

data/spec/spec_helper.rb CHANGED Viewed

@@ -1,15 +1,16 @@
 ENV['RACK_ENV'] = 'test'
+require "codeclimate-test-reporter"
+CodeClimate::TestReporter.start
 require 'clean_sweep'
 require 'factory_girl'
 require 'fileutils'
 require 'active_record'
 require 'mysql2'
+require 'timecop'
 RSpec.configure do |config|
   config.include FactoryGirl::Syntax::Methods
   config.formatter = :progress
-  #config.order = 'random'
   config.before(:suite) do
     FactoryGirl.find_definitions
   end
@@ -20,7 +21,7 @@ logdir = File.expand_path "../../log",__FILE__
 FileUtils.mkdir_p logdir
 logfile = File.open(File.join(logdir, "test.log"), "w+")
 ActiveRecord::Base.logger = Logger.new(logfile)
+Time.zone = 'America/Los_Angeles'
 database = {
   encoding: 'utf8',
   adapter: 'mysql2',
@@ -34,3 +35,4 @@ connection.query "CREATE DATABASE IF NOT EXISTS #{db_name}"
 database[:database] = db_name
 ActiveRecord::Base.establish_connection(database)

data/spec/table_schema_spec.rb CHANGED Viewed

@@ -2,110 +2,140 @@ require 'spec_helper'
 describe CleanSweep::TableSchema do
-  before do
-    Comment.create_table
-  end
-  context "using ascending account, timestamp index" do
-    let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: true }
-    it 'should read comments' do
-      expect(schema.primary_key.columns.map(&:name)).to eq([:id])
-      expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
+  context "using sample tables" do
+    it 'should pick the primary key' do
+      TableWithPrimaryKey.create_table
+      schema = CleanSweep::TableSchema.new TableWithPrimaryKey
+      expect(schema.primary_key.name).to eq "primary"
+      expect(schema.traversing_key.name).to eq "primary"
     end
-    it 'should produce an ascending chunk clause' do
-      rows = account_and_timestamp_rows
-      expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
-          .to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-12-01 23:13:25'))")
+    it "should identify unique key as primary key" do
+      TableWithUniqueKey.create_table
+      schema = CleanSweep::TableSchema.new TableWithUniqueKey
+      expect(schema.primary_key.name).to eq("key_unique")
+      expect(schema.traversing_key.name).to eq "key_unique"
     end
-    it 'should produce all select columns' do
-      expect(schema.column_names).to eq([:id, :account, :timestamp])
+    it "should skip the traversing key if non_traversing is true" do
+      TableWithUniqueKey.create_table
+      schema = CleanSweep::TableSchema.new TableWithUniqueKey, non_traversing: true
+      expect(schema.primary_key.name).to eq("key_unique")
+      expect(schema.traversing_key).to be_nil
     end
-    it 'should produce the ascending order clause' do
-      expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
+    it "should error out if there is no unique key at all" do
+      TableWithRegularKey.create_table
+      expect(->{CleanSweep::TableSchema.new TableWithRegularKey}).to raise_exception(RuntimeError, 'Table table_with_regular_keys must have a primary key')
     end
+  end
-    it 'should produce an insert statement' do
-      schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp'
-      rows = account_and_timestamp_rows
-      expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-02 01:13:25'),(1002,2,'2014-12-02 00:13:25'),(1005,5,'2014-12-01 23:13:25')")
+  context "on comments" do
+    before do
+      Comment.create_table
     end
-  end
-  context "using descending account, timestamp index" do
+    context "using ascending account, timestamp index" do
+      let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', ascending: true }
-    let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: false }
+      it 'should read comments' do
+        expect(schema.primary_key.columns.map(&:name)).to eq([:id])
+        expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
+      end
-    it 'should produce a descending where clause' do
-      rows = account_and_timestamp_rows
-      expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
-          .to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-12-01 23:13:25'))")
-    end
+      it 'should produce an ascending chunk clause' do
+        rows = account_and_timestamp_rows
+        expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
+            .to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-11-29'))")
+      end
+      it 'should produce all select columns' do
+        expect(schema.column_names).to eq([:id, :account, :timestamp])
+      end
-    it 'should produce the descending order clause' do
-      rows = account_and_timestamp_rows
-      expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
-          .to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
+      it 'should produce the ascending order clause' do
+        expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
+      end
+      it 'should produce an insert statement' do
+        schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp'
+        rows = account_and_timestamp_rows
+        expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-01'),(1002,2,'2014-11-30'),(1005,5,'2014-11-29')")
+      end
     end
-  end
+    context "using descending account, timestamp index" do
+      let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', reverse: true }
+      it 'should produce a descending where clause' do
+        rows = account_and_timestamp_rows
+        expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
+            .to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-11-29'))")
+      end
-  context "using account, timestamp index first column only" do
-    let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', first_only: true }
+      it 'should produce the descending order clause' do
+        rows = account_and_timestamp_rows
+        expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
+            .to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
+      end
-    it 'should select all the rows' do
-      expect(schema.column_names).to eq([:id, :account, :timestamp])
     end
-    it 'should only query using the first column of the index' do
-      rows = account_and_timestamp_rows
-      expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
-        .to include(" (`comments`.`account` >= 5) ")
+    context "using account, timestamp index first column only" do
+      let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', first_only: true }
+      it 'should select all the rows' do
+        expect(schema.column_names).to eq([:id, :account, :timestamp])
+      end
+      it 'should only query using the first column of the index' do
+        rows = account_and_timestamp_rows
+        expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
+            .to include(" (`comments`.`account` >= 5) ")
+      end
     end
-  end
+    it 'should not care about case' do
+      CleanSweep::TableSchema.new Comment, index: 'primary'
+    end
-  it 'should not care about case' do
-    CleanSweep::TableSchema.new Comment, key_name: 'primary'
-  end
+    it 'should work without a descending index' do
+      schema = CleanSweep::TableSchema.new Comment, non_traversing: true
+      expect(schema.primary_key.columns.map(&:name)).to eq([:id])
+      expect(schema.traversing_key).to be_nil
+    end
-  it 'should work without a descending index' do
-    schema = CleanSweep::TableSchema.new Comment
-    expect(schema.primary_key.columns.map(&:name)).to eq([:id])
-    expect(schema.traversing_key).to be_nil
-  end
+    it 'should produce minimal select columns' do
+      schema = CleanSweep::TableSchema.new Comment, index: 'PRIMARY'
+      expect(schema.column_names).to eq([:id])
+    end
-  it 'should produce minimal select columns' do
-    schema = CleanSweep::TableSchema.new Comment, key_name: 'PRIMARY'
-    expect(schema.column_names).to eq([:id])
-  end
+    it 'should produce the from clause with an index' do
+      schema = CleanSweep::TableSchema.new Comment, index:'comments_on_timestamp'
+      expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
+    end
-  it 'should produce the from clause with an index' do
-    schema = CleanSweep::TableSchema.new Comment, key_name:'comments_on_timestamp'
-    expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
-  end
+    it 'should include additional columns' do
+      schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp', copy_columns: %w[seen id]
+      expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
+      rows = account_and_timestamp_rows
+      rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
+      expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-01'),(1,1002,2,'2014-11-30'),(1,1005,5,'2014-11-29')")
-  it 'should include additional columns' do
-    schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp', extra_columns: %w[seen id]
-    expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
-    rows = account_and_timestamp_rows
-    rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
-    expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-02 01:13:25'),(1,1002,2,'2014-12-02 00:13:25'),(1,1005,5,'2014-12-01 23:13:25')")
+    end
   end
   def account_and_timestamp_rows
     rows = []
-    t = Time.parse '2014-12-01 17:13:25'
+    t = Date.parse '2014-12-01'
     rows << [1001, 5, t]
-    rows << [1002, 2, t - 1.hour]
-    rows << [1005, 5, t - 2.hours]
+    rows << [1002, 2, t - 1]
+    rows << [1005, 5, t - 2]
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cleansweep
 version: !ruby/object:Gem::Version
-  version: 1.0.3
+  version: 1.0.4
 platform: ruby
 authors:
 - Bill Kayser
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-12-17 00:00:00.000000000 Z
+date: 2015-01-07 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activerecord
@@ -66,6 +66,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: timecop
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.7.1
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.7.1
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -146,8 +160,10 @@ extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- ".travis.yml"
 - CHANGES.md
 - Gemfile
+- Gemfile.lock
 - LICENSE.txt
 - README.md
 - Rakefile
@@ -162,8 +178,10 @@ files:
 - lib/clean_sweep/table_schema/index_schema.rb
 - lib/clean_sweep/version.rb
 - lib/cleansweep.rb
+- spec/factories/annotations.rb
 - spec/factories/books.rb
 - spec/factories/comments.rb
+- spec/factories/tables.rb
 - spec/purge_runner_spec.rb
 - spec/spec_helper.rb
 - spec/table_schema_spec.rb
@@ -192,8 +210,10 @@ signing_key:
 specification_version: 4
 summary: Utility to purge or archive rows in mysql tables
 test_files:
+- spec/factories/annotations.rb
 - spec/factories/books.rb
 - spec/factories/comments.rb
+- spec/factories/tables.rb
 - spec/purge_runner_spec.rb
 - spec/spec_helper.rb
 - spec/table_schema_spec.rb