cleansweep 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ceb5f4b259349242b4a2c4f11854bb1182b2015a
4
- data.tar.gz: 4e2292c3547793a58f5f69599241f595bfed9358
3
+ metadata.gz: f772f74727a7d58fdebda097fb0b70572cb92c34
4
+ data.tar.gz: a82986ae0e26308e4842193441e427e998c4f5a0
5
5
  SHA512:
6
- metadata.gz: b695e4a7a553ebedb460f20ec9dea0a12b7f3012ec62d0b9127ae27f299458d296beffb7b395069fe09f570c084a0f6f2b4df424fa04a3f74b1f34fde401fe39
7
- data.tar.gz: fde7d9b0ba62dbff94610402472144873e5df4d54a70dcb3545a4c929944be54adbd8ff9aad2d664ff518390bc56529914f93cff9433ecd408b2521d572c37a6
6
+ metadata.gz: 183922164f35fbd986ca9617fa1c73fc4133db90fc15a59552f48def16a8d39d8a8164bbef8d61ee334f6a8dffbfc74a34f5fbb182d446d7485372b6add8667a
7
+ data.tar.gz: 77e4f14d2e44e7400d4bb14a09719fea8a13aac2660fab0712355b053d8117e848f7dd967878b01ec5c87c6181eca821056f4e388b00c93cd4b005432d1c0ebc
data/.gitignore CHANGED
@@ -1,7 +1,6 @@
1
1
  /.bundle/
2
2
  .ruby-version
3
3
  /.yardoc
4
- /Gemfile.lock
5
4
  /_yardoc/
6
5
  /coverage/
7
6
  /doc/
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.4
4
+ - 1.9.3
5
+ gemfile:
6
+ - gemfiles/Gemfile.rails3
7
+ - gemfiles/Gemfile.rails4
8
+ addons:
9
+ code_climate:
10
+ repo_token: 7ec6fd701b7d2b206cdd233c2202b6e11c8ba6af01f8a93f5e24595008ac20a0
data/CHANGES.md CHANGED
@@ -1,8 +1,17 @@
1
1
  See the [documentation](http://bkayser.github.io/cleansweep) for details
2
2
 
3
- ### Version 1.0.1
3
+ ### Version 1.0.4
4
4
 
5
- * Initial release
5
+ * Print dry run output using the logger
6
+ * Add option `non_traversing` so you can explicitly not use an index. If an index
7
+ is not specified, now it will guess using the first non-unique index or primary key.
8
+ * Added more tests
9
+ * Added Travis CI build, metrics
10
+
11
+ ### Version 1.0.3
12
+
13
+ * Small bug in instrumentation and target model reference
14
+ * Support first unique index as primary when primary key not found
6
15
 
7
16
  ### Version 1.0.2
8
17
 
@@ -10,6 +19,7 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
10
19
  * Added `dest_columns` option as a map of column names in the source to column names in the destination.
11
20
  * More testing and bug fixing in real environments
12
21
 
13
- ### Version 1.0.3
14
- * Small bug in instrumentation and target model reference
15
- * Support first unique index as primary when primary key not found
22
+ ### Version 1.0.1
23
+
24
+ * Initial release
25
+
data/Gemfile CHANGED
@@ -1,4 +1,6 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ gem "codeclimate-test-reporter", group: :test, require: nil
4
+
3
5
  # Specify your gem's dependencies in cleansweep.gemspec
4
6
  gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,82 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ cleansweep (1.0.4)
5
+ activerecord (>= 3.0)
6
+ mysql2 (~> 0.3)
7
+ newrelic_rpm
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ activemodel (4.2.0)
13
+ activesupport (= 4.2.0)
14
+ builder (~> 3.1)
15
+ activerecord (4.2.0)
16
+ activemodel (= 4.2.0)
17
+ activesupport (= 4.2.0)
18
+ arel (~> 6.0)
19
+ activesupport (4.2.0)
20
+ i18n (~> 0.7)
21
+ json (~> 1.7, >= 1.7.7)
22
+ minitest (~> 5.1)
23
+ thread_safe (~> 0.3, >= 0.3.4)
24
+ tzinfo (~> 1.1)
25
+ arel (6.0.0)
26
+ awesome_print (1.6.1)
27
+ builder (3.2.2)
28
+ codeclimate-test-reporter (0.4.4)
29
+ simplecov (>= 0.7.1, < 1.0.0)
30
+ coderay (1.1.0)
31
+ diff-lcs (1.2.5)
32
+ docile (1.1.5)
33
+ factory_girl (4.5.0)
34
+ activesupport (>= 3.0.0)
35
+ i18n (0.7.0)
36
+ json (1.8.1)
37
+ method_source (0.8.2)
38
+ minitest (5.5.0)
39
+ multi_json (1.10.1)
40
+ mysql2 (0.3.17)
41
+ newrelic_rpm (3.9.9.275)
42
+ pry (0.10.1)
43
+ coderay (~> 1.1.0)
44
+ method_source (~> 0.8.1)
45
+ slop (~> 3.4)
46
+ rake (10.4.2)
47
+ rspec (3.1.0)
48
+ rspec-core (~> 3.1.0)
49
+ rspec-expectations (~> 3.1.0)
50
+ rspec-mocks (~> 3.1.0)
51
+ rspec-core (3.1.7)
52
+ rspec-support (~> 3.1.0)
53
+ rspec-expectations (3.1.2)
54
+ diff-lcs (>= 1.2.0, < 2.0)
55
+ rspec-support (~> 3.1.0)
56
+ rspec-mocks (3.1.3)
57
+ rspec-support (~> 3.1.0)
58
+ rspec-support (3.1.2)
59
+ simplecov (0.9.1)
60
+ docile (~> 1.1.0)
61
+ multi_json (~> 1.0)
62
+ simplecov-html (~> 0.8.0)
63
+ simplecov-html (0.8.0)
64
+ slop (3.6.0)
65
+ thread_safe (0.3.4)
66
+ timecop (0.7.1)
67
+ tzinfo (1.2.2)
68
+ thread_safe (~> 0.1)
69
+
70
+ PLATFORMS
71
+ ruby
72
+
73
+ DEPENDENCIES
74
+ awesome_print (~> 1.2)
75
+ bundler (~> 1.7)
76
+ cleansweep!
77
+ codeclimate-test-reporter
78
+ factory_girl (~> 4.4)
79
+ pry (~> 0)
80
+ rake (~> 10.0)
81
+ rspec (~> 3.1)
82
+ timecop (~> 0.7.1)
data/README.md CHANGED
@@ -2,6 +2,10 @@ Cleansweep is a utility for scripting purges using ruby in an
2
2
  efficient, low-impact manner on mysql innodb tables. Based on the
3
3
  Percona `pt-archive` utility.
4
4
 
5
+ [![Build Status](https://img.shields.io/travis/bkayser/cleansweep/master.svg?x=3)](https://travis-ci.org/bkayser/cleansweep)
6
+ [![Code Climate](https://codeclimate.com/github/bkayser/cleansweep/badges/gpa.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
7
+ [![Test Coverage](https://codeclimate.com/github/bkayser/cleansweep/badges/coverage.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
8
+
5
9
  ## Installation
6
10
 
7
11
  Add this line to your application's Gemfile:
@@ -116,6 +120,9 @@ The chunk query looks like:
116
120
  You can scan the index in either direction. To specify descending
117
121
  order, use the `reverse: true` option.
118
122
 
123
+ If no index is specified, it will pick the primary key or the first unique index if there
124
+ is no primary key.
125
+
119
126
  ### Copying rows from one table to another
120
127
 
121
128
  You can use the same technique to copy rows from one table to another.
@@ -179,16 +186,13 @@ Now create as many jobs as you need for the tables which refer to these metrics:
179
186
 
180
187
  ```ruby
181
188
  CleanSweep::PurgeRunner.new(model: ExpiredMetric,
182
- index: 'PRIMARY',
183
189
  dest_model: Metric,
184
190
  dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
185
191
 
186
192
  CleanSweep::PurgeRunner.new(model: ExpiredMetric,
187
- index: 'PRIMARY',
188
193
  dest_model: ChartMetric).execute_in_batches
189
194
 
190
195
  CleanSweep::PurgeRunner.new(model: ExpiredMetric,
191
- index: 'PRIMARY',
192
196
  dest_model: SystemMetric).execute_in_batches
193
197
  ```
194
198
 
@@ -202,6 +206,10 @@ into an unsafe territory. The script will pause for 5 minutes and
202
206
  only start once the corresponding metric goes back down to 90% of the
203
207
  specified threshold.
204
208
 
209
+ Note: You will need process privileges to be able to see the history list and
210
+ replication client privileges to monitor the replication lag.
211
+
212
+
205
213
  ### Logging and monitoring progress
206
214
 
207
215
  You pass in a standard log instance to capture all running output. By
@@ -221,8 +229,8 @@ in your target table.
221
229
 
222
230
  ### Limitations
223
231
 
224
- * Only works for mysql (as far as I know). I have only used it against 5.5.
225
- * Should work with ActiveRecord 3.* - 4.*.
232
+ * Only works for mysql. I have only used it against 5.5.
233
+ * Tested with ActiveRecord 3.1.\* - 4.0.\*.
226
234
  * Using a non-unique index risks missing duplicate rows unless you use the `first_only` option.
227
235
  * Using the `first_only` option risks rescanning many rows if you have many more duplicates than your
228
236
  chunk size
@@ -279,11 +287,11 @@ db called 'cstest'.
279
287
 
280
288
  ## License and Copyright
281
289
 
282
- Copyright 2014 New Relic, Inc., and Bill Kayser
290
+ Copyright 2014-2015 New Relic, Inc., and Bill Kayser
283
291
 
284
292
  Covered by the MIT [LICENSE](LICENSE.txt).
285
293
 
286
- ### Credits
294
+ ## Credits
287
295
 
288
296
  This was all inspired and informed by [Percona's `pt-archiver`
289
297
  script](http://www.percona.com/doc/percona-toolkit/2.1/pt-archiver.html)
data/Rakefile CHANGED
@@ -1,3 +1,4 @@
1
1
  require "bundler/gem_tasks"
2
2
  require 'rspec/core/rake_task'
3
3
  RSpec::Core::RakeTask.new(:spec)
4
+ task :default => :spec
data/cleansweep.gemspec CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.homepage = "http://bkayser.github.com/cleansweep"
21
21
  spec.license = "MIT"
22
22
 
23
- spec.files = `git ls-files -z`.split("\x0")
23
+ spec.files = `git ls-files -z`.split("\x0").delete_if { | f | f =~ /^gemfiles/ }
24
24
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
25
  spec.test_files = spec.files.grep(%r{^spec/})
26
26
  spec.require_paths = ["lib"]
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.add_runtime_dependency 'mysql2', '~> 0.3'
31
31
 
32
32
  spec.add_development_dependency 'pry', '~> 0'
33
+ spec.add_development_dependency 'timecop', '~> 0.7.1'
33
34
  spec.add_development_dependency 'bundler', '~> 1.7'
34
35
  spec.add_development_dependency 'rake', '~> 10.0'
35
36
  spec.add_development_dependency 'rspec', '~> 3.1'
@@ -16,14 +16,23 @@ require 'stringio'
16
16
  # The number of rows to copy in each block. Defaults to 500.
17
17
  # [:index]
18
18
  # The index to traverse in ascending order doing the purge. Rows are read in the order of
19
- # the index, which must be a btree index. If not specified, <tt>PRIMARY</tt> is assumed.
19
+ # the index, which must be a btree index. If not specified, An index is chosen automatically
20
+ # in order of preference:
21
+ # 1. PRIMARY KEY
22
+ # 2. First UNIQUE index
23
+ # 3. First non-UNIQUE index
24
+ # 4. No index used if no indexes defined.
25
+ # [:non_traversing]
26
+ # When true, specifies the table will not be traversed using an index.
27
+ # This only makes sense if you are deleting everything as you go along, otherwise you'll
28
+ # be re-scanning skipped rows.
20
29
  # [:reverse]
21
30
  # Traverse the index in reverse order. For example, if your index is on <tt>account_id</tt>,
22
31
  # <tt>timestamp</tt>, this option will move through the rows starting at the highest account
23
32
  # number, then move through timestamps starting with the most recent.
24
33
  # [:first_only]
25
- # Traverse only the first column of the index, and do so inclusively using the <tt>&gt;=</tt> operator
26
- # instead of the strict <tt>&gt;</tt> operator. This is important if the index is not unique and there
34
+ # Traverse only the first column of the index, and do so inclusively using the <tt>'>='</tt> operator
35
+ # instead of the strict <tt>'>'</tt> operator. This is important if the index is not unique and there
27
36
  # are a lot of duplicates. Otherwise the delete could miss rows. Not allowed in copy mode because you'd
28
37
  # be inserting duplicate rows.
29
38
  # [:dry_run]
@@ -94,11 +103,12 @@ class CleanSweep::PurgeRunner
94
103
  @copy_mode = @target_model && options[:copy_only]
95
104
 
96
105
  @table_schema = CleanSweep::TableSchema.new @model,
97
- key_name: options[:index],
98
- ascending: !options[:reverse],
99
- extra_columns: options[:copy_columns],
106
+ non_traversing: options[:non_traversing],
107
+ index: options[:index],
108
+ reverse: options[:reverse],
109
+ copy_columns: options[:copy_columns],
100
110
  first_only: options[:first_only],
101
- dest_model: @target_model,
111
+ dest_model: options[:dest_model],
102
112
  dest_columns: options[:dest_columns]
103
113
 
104
114
  if (@max_history || @max_repl_lag)
@@ -134,7 +144,7 @@ class CleanSweep::PurgeRunner
134
144
  def execute_in_batches
135
145
 
136
146
  if @dry_run
137
- print_queries($stdout)
147
+ log :info, print_queries
138
148
  return 0
139
149
  end
140
150
 
@@ -205,26 +215,6 @@ class CleanSweep::PurgeRunner
205
215
  add_method_tracer :sleep
206
216
  add_method_tracer :execute_in_batches
207
217
 
208
- def print_queries(io)
209
- io.puts 'Initial Query:'
210
- io.puts format_query(' ', @query.to_sql)
211
- rows = @model.connection.select_rows @query.limit(1).to_sql
212
- if rows.empty?
213
- # Don't have any sample data to use for the sample queries, so use NULL values just
214
- # so the query will print out.
215
- rows << [nil] * 100
216
- end
217
- io.puts "Chunk Query:"
218
- io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, rows.first).to_sql)
219
- if copy_mode?
220
- io.puts "Insert Statement:"
221
- io.puts format_query(' ', @table_schema.insert_statement(rows))
222
- else
223
- io.puts "Delete Statement:"
224
- io.puts format_query(' ', @table_schema.delete_statement(rows))
225
- end
226
- end
227
-
228
218
  private
229
219
 
230
220
  def format_query indentation, query
@@ -6,19 +6,7 @@ module CleanSweep::PurgeRunner::Logging
6
6
  while (@report_interval_start < Time.now - @report_interval) do
7
7
  @report_interval_start += @report_interval
8
8
  end
9
- report = []
10
- elapsed = [1, (Time.now - @start).to_i].max
11
- rate = (@total_deleted / elapsed).to_i
12
- rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
13
- report << "report:"
14
- if copy_mode?
15
- report << " #{@dry_run ? 'queried' : 'copied'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
16
- else
17
- report << " #{@dry_run ? 'queried' : 'deleted'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
18
- end
19
- report << " elapsed: #{'%12s' % format(elapsed)}"
20
- report << " rate: #{rate}"
21
- log :info, report.join("\n")
9
+ print_report
22
10
  end
23
11
  end
24
12
 
@@ -28,6 +16,34 @@ module CleanSweep::PurgeRunner::Logging
28
16
  @logger.send level, out
29
17
  end
30
18
 
19
+ def print_queries
20
+ io = StringIO.new
21
+ io.puts 'Initial Query:'
22
+ io.puts format_query(' ', @query.to_sql)
23
+ io.puts "Chunk Query:"
24
+ io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, sample_rows.first).to_sql)
25
+ if copy_mode?
26
+ io.puts "Insert Statement:"
27
+ io.puts format_query(' ', @table_schema.insert_statement(sample_rows))
28
+ else
29
+ io.puts "Delete Statement:"
30
+ io.puts format_query(' ', @table_schema.delete_statement(sample_rows))
31
+ end
32
+ io.string
33
+ end
34
+
35
+ private
36
+
37
+ def sample_rows
38
+ @sample_rows ||= @model.connection.select_rows @query.limit(1).to_sql
39
+ if @sample_rows.empty?
40
+ # Don't have any sample data to use for the sample queries, so use NULL values just
41
+ # so the query will print out.
42
+ @sample_rows << [nil] * 100
43
+ end
44
+ @sample_rows
45
+ end
46
+
31
47
  def format(time)
32
48
  format_string = "%H:%M:%S"
33
49
  if (time.to_i > (24 * 60 * 60))
@@ -35,4 +51,20 @@ module CleanSweep::PurgeRunner::Logging
35
51
  end
36
52
  Time.at(time).strftime(format_string)
37
53
  end
38
- end
54
+
55
+ def print_report
56
+ elapsed = [1, (Time.now - @start).to_i].max
57
+ rate = (@total_deleted / elapsed).to_i
58
+ rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
59
+ report = [ "report:" ]
60
+ action = case
61
+ when @dry_run then 'queried'
62
+ when copy_mode? then 'copied'
63
+ else 'deleted'
64
+ end
65
+ report << " #{action}: #{'%12i' % @total_deleted} #{@model.table_name} records"
66
+ report << " elapsed: #{'%12s' % format(elapsed)}"
67
+ report << " rate: #{rate}"
68
+ log :info, report.join("\n")
69
+ end
70
+ end
@@ -14,8 +14,8 @@ class CleanSweep::TableSchema
14
14
 
15
15
  def initialize(model, options={})
16
16
 
17
- traversing_key_name = options[:key_name]
18
- ascending = options.include?(:ascending) ? options[:ascending] : true
17
+ traversing_key_name = options[:index]
18
+ ascending = !options[:reverse]
19
19
  first_only = options[:first_only]
20
20
  @model = model
21
21
  @dest_model = options[:dest_model] || @model
@@ -26,7 +26,7 @@ class CleanSweep::TableSchema
26
26
  @name = @model.table_name
27
27
 
28
28
  @columns =
29
- (options[:extra_columns] || []).map do | extra_col_name |
29
+ (options[:copy_columns] || []).map do | extra_col_name |
30
30
  CleanSweep::TableSchema::ColumnSchema.new extra_col_name, model
31
31
  end
32
32
 
@@ -38,11 +38,15 @@ class CleanSweep::TableSchema
38
38
  raise "Table #{model.table_name} must have a primary key" unless @primary_key
39
39
 
40
40
  @primary_key.add_columns_to @columns
41
- if traversing_key_name
42
- traversing_key_name.downcase!
43
- raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
44
- @traversing_key = key_schemas[traversing_key_name]
45
- @traversing_key.add_columns_to @columns
41
+ if !options[:non_traversing]
42
+ if traversing_key_name
43
+ traversing_key_name.downcase!
44
+ raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
45
+ @traversing_key = key_schemas[traversing_key_name]
46
+ @traversing_key.add_columns_to @columns
47
+ else
48
+ @traversing_key = @primary_key
49
+ end
46
50
  @traversing_key.ascending = ascending
47
51
  @traversing_key.first_only = first_only
48
52
  end
@@ -74,7 +78,7 @@ class CleanSweep::TableSchema
74
78
  end
75
79
 
76
80
  def initial_scope
77
- scope = @model.all.select(quoted_column_names).from(from_clause)
81
+ scope = @model.select(quoted_column_names).from(from_clause)
78
82
  scope = @traversing_key.order(scope) if @traversing_key
79
83
  return scope
80
84
  end
@@ -1,3 +1,3 @@
1
1
  module CleanSweep
2
- VERSION = "1.0.3"
2
+ VERSION = "1.0.4"
3
3
  end
@@ -0,0 +1,19 @@
1
+
2
+ # Defines a table that does not have a primary key but does
3
+ # have a unique key.
4
+ class Annotation < ActiveRecord::Base
5
+
6
+ def self.create_table
7
+ connection.execute <<-EOF
8
+ create temporary table if not exists
9
+ annotations (
10
+ `article_id` int(11) NOT NULL,
11
+ `text` varchar(64),
12
+ key `index_on_text` (`text`),
13
+ unique key (`article_id`)
14
+ )
15
+ EOF
16
+ Annotation.delete_all
17
+ end
18
+
19
+ end
@@ -5,7 +5,7 @@ class Comment < ActiveRecord::Base
5
5
  create temporary table if not exists
6
6
  comments (
7
7
  `id` int(11) primary key auto_increment,
8
- `timestamp` datetime,
8
+ `timestamp` date,
9
9
  `account` int(11),
10
10
  `seen` boolean,
11
11
  key comments_on_account_timestamp(account, timestamp),
@@ -19,7 +19,7 @@ end
19
19
 
20
20
  FactoryGirl.define do
21
21
  factory :comment do | comment |
22
- comment.timestamp Time.now
22
+ comment.timestamp Date.new
23
23
  comment.seen false
24
24
  comment.sequence(:account) { | n | (n % 3)* 100 }
25
25
  end
@@ -0,0 +1,49 @@
1
+ class TableWithPrimaryKey < ActiveRecord::Base
2
+
3
+ def self.create_table
4
+ connection.execute <<-EOF
5
+ create temporary table if not exists
6
+ table_with_primary_keys (
7
+ `pk` int(11) primary key auto_increment,
8
+ `k1` int(11),
9
+ `k2` int(11),
10
+ key key_nonunique (k1),
11
+ unique key key_unique (k2)
12
+ )
13
+ EOF
14
+ end
15
+
16
+ end
17
+
18
+ class TableWithUniqueKey < ActiveRecord::Base
19
+
20
+ def self.create_table
21
+ connection.execute <<-EOF
22
+ create temporary table if not exists
23
+ table_with_unique_keys (
24
+ `k1` int(11),
25
+ `k2` int(11),
26
+ key key_nonunique (k1),
27
+ unique key key_unique (k2)
28
+ )
29
+ EOF
30
+ end
31
+
32
+ end
33
+
34
+ class TableWithRegularKey < ActiveRecord::Base
35
+
36
+ def self.create_table
37
+ connection.execute <<-EOF
38
+ create temporary table if not exists
39
+ table_with_regular_keys (
40
+ `k1` int(11),
41
+ `k2` int(11),
42
+ key key_nonunique (k1),
43
+ key key_extra (k2)
44
+ )
45
+ EOF
46
+ end
47
+
48
+ end
49
+
@@ -1,15 +1,17 @@
1
1
  require 'spec_helper'
2
2
 
3
- require 'active_support/testing/time_helpers'
3
+ # Time mocking features are available in Rails 4 but not Rails 3 and the Timecop
4
+ # gem works with both.
5
+ require 'timecop'
6
+
4
7
  describe CleanSweep::PurgeRunner do
5
8
 
6
9
  context 'PurgeRunner' do
7
- include ActiveSupport::Testing::TimeHelpers
8
10
  before do
9
- travel_to Time.parse("2014-12-02 13:47:43 -0800")
11
+ Timecop.freeze Time.parse("2014-12-02 13:47:43.000000 -0800")
10
12
  end
11
13
  after do
12
- travel_back
14
+ Timecop.return
13
15
  end
14
16
 
15
17
  context "using comments" do
@@ -66,21 +68,20 @@ describe CleanSweep::PurgeRunner do
66
68
  it 'prints out the queries in a dry run' do
67
69
  purger = CleanSweep::PurgeRunner.new model: Comment,
68
70
  index: 'comments_on_account_timestamp' do | scope |
69
- scope.where('timestamp < ?', 1.week.ago)
71
+ scope.where('timestamp < ?', 1.week.ago.to_date)
70
72
  end
71
- output = StringIO.new
72
- purger.print_queries(output)
73
- expect(output.string).to eq <<EOF
73
+ output = purger.print_queries
74
+ expect(output).to eq <<EOF
74
75
  Initial Query:
75
76
  SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
76
77
  FROM `comments` FORCE INDEX(comments_on_account_timestamp)
77
- WHERE (timestamp < '2014-11-25 21:47:43')
78
+ WHERE (timestamp < '2014-11-25')
78
79
  ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
79
80
  LIMIT 500
80
81
  Chunk Query:
81
82
  SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
82
83
  FROM `comments` FORCE INDEX(comments_on_account_timestamp)
83
- WHERE (timestamp < '2014-11-25 21:47:43') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18 21:47:43'))\n ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
84
+ WHERE (timestamp < '2014-11-25') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18'))\n ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
84
85
  LIMIT 500
85
86
  Delete Statement:
86
87
  DELETE
@@ -105,7 +106,7 @@ EOF
105
106
  end
106
107
  expect(Comment.count).to eq(5)
107
108
  # Only old comments deleted before stopping
108
- expect(Comment.where('timestamp >= ?', 4.days.ago).count).to eq(5)
109
+ expect(Comment.where('timestamp >= ?', 4.days.ago.to_date).count).to eq(5)
109
110
  end
110
111
  it "descends the index" do
111
112
  purger = CleanSweep::PurgeRunner.new model: Comment,
data/spec/spec_helper.rb CHANGED
@@ -1,15 +1,16 @@
1
1
  ENV['RACK_ENV'] = 'test'
2
2
 
3
+ require "codeclimate-test-reporter"
4
+ CodeClimate::TestReporter.start
3
5
  require 'clean_sweep'
4
6
  require 'factory_girl'
5
7
  require 'fileutils'
6
8
  require 'active_record'
7
9
  require 'mysql2'
10
+ require 'timecop'
8
11
  RSpec.configure do |config|
9
12
  config.include FactoryGirl::Syntax::Methods
10
13
  config.formatter = :progress
11
- #config.order = 'random'
12
-
13
14
  config.before(:suite) do
14
15
  FactoryGirl.find_definitions
15
16
  end
@@ -20,7 +21,7 @@ logdir = File.expand_path "../../log",__FILE__
20
21
  FileUtils.mkdir_p logdir
21
22
  logfile = File.open(File.join(logdir, "test.log"), "w+")
22
23
  ActiveRecord::Base.logger = Logger.new(logfile)
23
-
24
+ Time.zone = 'America/Los_Angeles'
24
25
  database = {
25
26
  encoding: 'utf8',
26
27
  adapter: 'mysql2',
@@ -34,3 +35,4 @@ connection.query "CREATE DATABASE IF NOT EXISTS #{db_name}"
34
35
  database[:database] = db_name
35
36
 
36
37
  ActiveRecord::Base.establish_connection(database)
38
+
@@ -2,110 +2,140 @@ require 'spec_helper'
2
2
 
3
3
  describe CleanSweep::TableSchema do
4
4
 
5
- before do
6
- Comment.create_table
7
- end
8
-
9
- context "using ascending account, timestamp index" do
10
- let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: true }
11
-
12
- it 'should read comments' do
13
- expect(schema.primary_key.columns.map(&:name)).to eq([:id])
14
- expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
5
+ context "using sample tables" do
6
+ it 'should pick the primary key' do
7
+ TableWithPrimaryKey.create_table
8
+ schema = CleanSweep::TableSchema.new TableWithPrimaryKey
9
+ expect(schema.primary_key.name).to eq "primary"
10
+ expect(schema.traversing_key.name).to eq "primary"
15
11
  end
16
12
 
17
- it 'should produce an ascending chunk clause' do
18
- rows = account_and_timestamp_rows
19
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
20
- .to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-12-01 23:13:25'))")
13
+ it "should identify unique key as primary key" do
14
+ TableWithUniqueKey.create_table
15
+ schema = CleanSweep::TableSchema.new TableWithUniqueKey
16
+ expect(schema.primary_key.name).to eq("key_unique")
17
+ expect(schema.traversing_key.name).to eq "key_unique"
21
18
  end
22
19
 
23
- it 'should produce all select columns' do
24
- expect(schema.column_names).to eq([:id, :account, :timestamp])
20
+ it "should skip the traversing key if non_traversing is true" do
21
+ TableWithUniqueKey.create_table
22
+ schema = CleanSweep::TableSchema.new TableWithUniqueKey, non_traversing: true
23
+ expect(schema.primary_key.name).to eq("key_unique")
24
+ expect(schema.traversing_key).to be_nil
25
25
  end
26
-
27
- it 'should produce the ascending order clause' do
28
- expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
26
+ it "should error out if there is no unique key at all" do
27
+ TableWithRegularKey.create_table
28
+ expect(->{CleanSweep::TableSchema.new TableWithRegularKey}).to raise_exception(RuntimeError, 'Table table_with_regular_keys must have a primary key')
29
29
  end
30
30
 
31
+ end
31
32
 
32
- it 'should produce an insert statement' do
33
- schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp'
34
- rows = account_and_timestamp_rows
35
- expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-02 01:13:25'),(1002,2,'2014-12-02 00:13:25'),(1005,5,'2014-12-01 23:13:25')")
33
+ context "on comments" do
34
+ before do
35
+ Comment.create_table
36
36
  end
37
- end
38
37
 
39
- context "using descending account, timestamp index" do
38
+ context "using ascending account, timestamp index" do
39
+ let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', ascending: true }
40
40
 
41
- let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: false }
41
+ it 'should read comments' do
42
+ expect(schema.primary_key.columns.map(&:name)).to eq([:id])
43
+ expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
44
+ end
42
45
 
43
- it 'should produce a descending where clause' do
44
- rows = account_and_timestamp_rows
45
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
46
- .to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-12-01 23:13:25'))")
47
- end
46
+ it 'should produce an ascending chunk clause' do
47
+ rows = account_and_timestamp_rows
48
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
49
+ .to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-11-29'))")
50
+ end
48
51
 
52
+ it 'should produce all select columns' do
53
+ expect(schema.column_names).to eq([:id, :account, :timestamp])
54
+ end
49
55
 
50
- it 'should produce the descending order clause' do
51
- rows = account_and_timestamp_rows
52
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
53
- .to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
56
+ it 'should produce the ascending order clause' do
57
+ expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
58
+ end
59
+
60
+
61
+ it 'should produce an insert statement' do
62
+ schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp'
63
+ rows = account_and_timestamp_rows
64
+ expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-01'),(1002,2,'2014-11-30'),(1005,5,'2014-11-29')")
65
+ end
54
66
  end
55
67
 
56
- end
68
+ context "using descending account, timestamp index" do
69
+
70
+ let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', reverse: true }
71
+
72
+ it 'should produce a descending where clause' do
73
+ rows = account_and_timestamp_rows
74
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
75
+ .to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-11-29'))")
76
+ end
77
+
57
78
 
58
- context "using account, timestamp index first column only" do
59
- let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', first_only: true }
79
+ it 'should produce the descending order clause' do
80
+ rows = account_and_timestamp_rows
81
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
82
+ .to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
83
+ end
60
84
 
61
- it 'should select all the rows' do
62
- expect(schema.column_names).to eq([:id, :account, :timestamp])
63
85
  end
64
86
 
65
- it 'should only query using the first column of the index' do
66
- rows = account_and_timestamp_rows
67
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
68
- .to include(" (`comments`.`account` >= 5) ")
87
+ context "using account, timestamp index first column only" do
88
+ let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', first_only: true }
89
+
90
+ it 'should select all the rows' do
91
+ expect(schema.column_names).to eq([:id, :account, :timestamp])
92
+ end
93
+
94
+ it 'should only query using the first column of the index' do
95
+ rows = account_and_timestamp_rows
96
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
97
+ .to include(" (`comments`.`account` >= 5) ")
98
+
99
+ end
69
100
 
70
101
  end
71
102
 
72
- end
103
+ it 'should not care about case' do
104
+ CleanSweep::TableSchema.new Comment, index: 'primary'
105
+ end
73
106
 
74
- it 'should not care about case' do
75
- CleanSweep::TableSchema.new Comment, key_name: 'primary'
76
- end
107
+ it 'should work without a descending index' do
108
+ schema = CleanSweep::TableSchema.new Comment, non_traversing: true
109
+ expect(schema.primary_key.columns.map(&:name)).to eq([:id])
110
+ expect(schema.traversing_key).to be_nil
111
+ end
77
112
 
78
- it 'should work without a descending index' do
79
- schema = CleanSweep::TableSchema.new Comment
80
- expect(schema.primary_key.columns.map(&:name)).to eq([:id])
81
- expect(schema.traversing_key).to be_nil
82
- end
113
+ it 'should produce minimal select columns' do
114
+ schema = CleanSweep::TableSchema.new Comment, index: 'PRIMARY'
115
+ expect(schema.column_names).to eq([:id])
116
+ end
83
117
 
84
- it 'should produce minimal select columns' do
85
- schema = CleanSweep::TableSchema.new Comment, key_name: 'PRIMARY'
86
- expect(schema.column_names).to eq([:id])
87
- end
118
+ it 'should produce the from clause with an index' do
119
+ schema = CleanSweep::TableSchema.new Comment, index:'comments_on_timestamp'
120
+ expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
121
+ end
88
122
 
89
- it 'should produce the from clause with an index' do
90
- schema = CleanSweep::TableSchema.new Comment, key_name:'comments_on_timestamp'
91
- expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
92
- end
123
+ it 'should include additional columns' do
124
+ schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp', copy_columns: %w[seen id]
125
+ expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
126
+ rows = account_and_timestamp_rows
127
+ rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
128
+ expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-01'),(1,1002,2,'2014-11-30'),(1,1005,5,'2014-11-29')")
93
129
 
94
- it 'should include additional columns' do
95
- schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp', extra_columns: %w[seen id]
96
- expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
97
- rows = account_and_timestamp_rows
98
- rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
99
- expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-02 01:13:25'),(1,1002,2,'2014-12-02 00:13:25'),(1,1005,5,'2014-12-01 23:13:25')")
130
+ end
100
131
 
101
132
  end
102
133
 
103
-
104
134
  def account_and_timestamp_rows
105
135
  rows = []
106
- t = Time.parse '2014-12-01 17:13:25'
136
+ t = Date.parse '2014-12-01'
107
137
  rows << [1001, 5, t]
108
- rows << [1002, 2, t - 1.hour]
109
- rows << [1005, 5, t - 2.hours]
138
+ rows << [1002, 2, t - 1]
139
+ rows << [1005, 5, t - 2]
110
140
  end
111
141
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cleansweep
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bill Kayser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-17 00:00:00.000000000 Z
11
+ date: 2015-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: timecop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.7.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.7.1
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: bundler
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -146,8 +160,10 @@ extensions: []
146
160
  extra_rdoc_files: []
147
161
  files:
148
162
  - ".gitignore"
163
+ - ".travis.yml"
149
164
  - CHANGES.md
150
165
  - Gemfile
166
+ - Gemfile.lock
151
167
  - LICENSE.txt
152
168
  - README.md
153
169
  - Rakefile
@@ -162,8 +178,10 @@ files:
162
178
  - lib/clean_sweep/table_schema/index_schema.rb
163
179
  - lib/clean_sweep/version.rb
164
180
  - lib/cleansweep.rb
181
+ - spec/factories/annotations.rb
165
182
  - spec/factories/books.rb
166
183
  - spec/factories/comments.rb
184
+ - spec/factories/tables.rb
167
185
  - spec/purge_runner_spec.rb
168
186
  - spec/spec_helper.rb
169
187
  - spec/table_schema_spec.rb
@@ -192,8 +210,10 @@ signing_key:
192
210
  specification_version: 4
193
211
  summary: Utility to purge or archive rows in mysql tables
194
212
  test_files:
213
+ - spec/factories/annotations.rb
195
214
  - spec/factories/books.rb
196
215
  - spec/factories/comments.rb
216
+ - spec/factories/tables.rb
197
217
  - spec/purge_runner_spec.rb
198
218
  - spec/spec_helper.rb
199
219
  - spec/table_schema_spec.rb