cleansweep 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.travis.yml +10 -0
- data/CHANGES.md +15 -5
- data/Gemfile +2 -0
- data/Gemfile.lock +82 -0
- data/README.md +15 -7
- data/Rakefile +1 -0
- data/cleansweep.gemspec +2 -1
- data/lib/clean_sweep/purge_runner.rb +18 -28
- data/lib/clean_sweep/purge_runner/logging.rb +46 -14
- data/lib/clean_sweep/table_schema.rb +13 -9
- data/lib/clean_sweep/version.rb +1 -1
- data/spec/factories/annotations.rb +19 -0
- data/spec/factories/comments.rb +2 -2
- data/spec/factories/tables.rb +49 -0
- data/spec/purge_runner_spec.rb +12 -11
- data/spec/spec_helper.rb +5 -3
- data/spec/table_schema_spec.rb +101 -71
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f772f74727a7d58fdebda097fb0b70572cb92c34
|
4
|
+
data.tar.gz: a82986ae0e26308e4842193441e427e998c4f5a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 183922164f35fbd986ca9617fa1c73fc4133db90fc15a59552f48def16a8d39d8a8164bbef8d61ee334f6a8dffbfc74a34f5fbb182d446d7485372b6add8667a
|
7
|
+
data.tar.gz: 77e4f14d2e44e7400d4bb14a09719fea8a13aac2660fab0712355b053d8117e848f7dd967878b01ec5c87c6181eca821056f4e388b00c93cd4b005432d1c0ebc
|
data/.gitignore
CHANGED
data/.travis.yml
ADDED
data/CHANGES.md
CHANGED
@@ -1,8 +1,17 @@
|
|
1
1
|
See the [documentation](http://bkayser.github.io/cleansweep) for details
|
2
2
|
|
3
|
-
### Version 1.0.
|
3
|
+
### Version 1.0.4
|
4
4
|
|
5
|
-
*
|
5
|
+
* Print dry run output using the logger
|
6
|
+
* Add option `non_traversing` so you can explicitly not use an index. If an index
|
7
|
+
is not specified, now it will guess using the first non-unique index or primary key.
|
8
|
+
* Added more tests
|
9
|
+
* Added Travis CI build, metrics
|
10
|
+
|
11
|
+
### Version 1.0.3
|
12
|
+
|
13
|
+
* Small bug in instrumentation and target model reference
|
14
|
+
* Support first unique index as primary when primary key not found
|
6
15
|
|
7
16
|
### Version 1.0.2
|
8
17
|
|
@@ -10,6 +19,7 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
|
|
10
19
|
* Added `dest_columns` option as a map of column names in the source to column names in the destination.
|
11
20
|
* More testing and bug fixing in real environments
|
12
21
|
|
13
|
-
### Version 1.0.
|
14
|
-
|
15
|
-
*
|
22
|
+
### Version 1.0.1
|
23
|
+
|
24
|
+
* Initial release
|
25
|
+
|
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
cleansweep (1.0.4)
|
5
|
+
activerecord (>= 3.0)
|
6
|
+
mysql2 (~> 0.3)
|
7
|
+
newrelic_rpm
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activemodel (4.2.0)
|
13
|
+
activesupport (= 4.2.0)
|
14
|
+
builder (~> 3.1)
|
15
|
+
activerecord (4.2.0)
|
16
|
+
activemodel (= 4.2.0)
|
17
|
+
activesupport (= 4.2.0)
|
18
|
+
arel (~> 6.0)
|
19
|
+
activesupport (4.2.0)
|
20
|
+
i18n (~> 0.7)
|
21
|
+
json (~> 1.7, >= 1.7.7)
|
22
|
+
minitest (~> 5.1)
|
23
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
24
|
+
tzinfo (~> 1.1)
|
25
|
+
arel (6.0.0)
|
26
|
+
awesome_print (1.6.1)
|
27
|
+
builder (3.2.2)
|
28
|
+
codeclimate-test-reporter (0.4.4)
|
29
|
+
simplecov (>= 0.7.1, < 1.0.0)
|
30
|
+
coderay (1.1.0)
|
31
|
+
diff-lcs (1.2.5)
|
32
|
+
docile (1.1.5)
|
33
|
+
factory_girl (4.5.0)
|
34
|
+
activesupport (>= 3.0.0)
|
35
|
+
i18n (0.7.0)
|
36
|
+
json (1.8.1)
|
37
|
+
method_source (0.8.2)
|
38
|
+
minitest (5.5.0)
|
39
|
+
multi_json (1.10.1)
|
40
|
+
mysql2 (0.3.17)
|
41
|
+
newrelic_rpm (3.9.9.275)
|
42
|
+
pry (0.10.1)
|
43
|
+
coderay (~> 1.1.0)
|
44
|
+
method_source (~> 0.8.1)
|
45
|
+
slop (~> 3.4)
|
46
|
+
rake (10.4.2)
|
47
|
+
rspec (3.1.0)
|
48
|
+
rspec-core (~> 3.1.0)
|
49
|
+
rspec-expectations (~> 3.1.0)
|
50
|
+
rspec-mocks (~> 3.1.0)
|
51
|
+
rspec-core (3.1.7)
|
52
|
+
rspec-support (~> 3.1.0)
|
53
|
+
rspec-expectations (3.1.2)
|
54
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
55
|
+
rspec-support (~> 3.1.0)
|
56
|
+
rspec-mocks (3.1.3)
|
57
|
+
rspec-support (~> 3.1.0)
|
58
|
+
rspec-support (3.1.2)
|
59
|
+
simplecov (0.9.1)
|
60
|
+
docile (~> 1.1.0)
|
61
|
+
multi_json (~> 1.0)
|
62
|
+
simplecov-html (~> 0.8.0)
|
63
|
+
simplecov-html (0.8.0)
|
64
|
+
slop (3.6.0)
|
65
|
+
thread_safe (0.3.4)
|
66
|
+
timecop (0.7.1)
|
67
|
+
tzinfo (1.2.2)
|
68
|
+
thread_safe (~> 0.1)
|
69
|
+
|
70
|
+
PLATFORMS
|
71
|
+
ruby
|
72
|
+
|
73
|
+
DEPENDENCIES
|
74
|
+
awesome_print (~> 1.2)
|
75
|
+
bundler (~> 1.7)
|
76
|
+
cleansweep!
|
77
|
+
codeclimate-test-reporter
|
78
|
+
factory_girl (~> 4.4)
|
79
|
+
pry (~> 0)
|
80
|
+
rake (~> 10.0)
|
81
|
+
rspec (~> 3.1)
|
82
|
+
timecop (~> 0.7.1)
|
data/README.md
CHANGED
@@ -2,6 +2,10 @@ Cleansweep is a utility for scripting purges using ruby in an
|
|
2
2
|
efficient, low-impact manner on mysql innodb tables. Based on the
|
3
3
|
Percona `pt-archive` utility.
|
4
4
|
|
5
|
+
[![Build Status](https://img.shields.io/travis/bkayser/cleansweep/master.svg?x=3)](https://travis-ci.org/bkayser/cleansweep)
|
6
|
+
[![Code Climate](https://codeclimate.com/github/bkayser/cleansweep/badges/gpa.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
|
7
|
+
[![Test Coverage](https://codeclimate.com/github/bkayser/cleansweep/badges/coverage.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
|
8
|
+
|
5
9
|
## Installation
|
6
10
|
|
7
11
|
Add this line to your application's Gemfile:
|
@@ -116,6 +120,9 @@ The chunk query looks like:
|
|
116
120
|
You can scan the index in either direction. To specify descending
|
117
121
|
order, use the `reverse: true` option.
|
118
122
|
|
123
|
+
If no index is specified, it will pick the primary key or the first unique index if there
|
124
|
+
is no primary key.
|
125
|
+
|
119
126
|
### Copying rows from one table to another
|
120
127
|
|
121
128
|
You can use the same technique to copy rows from one table to another.
|
@@ -179,16 +186,13 @@ Now create as many jobs as you need for the tables which refer to these metrics:
|
|
179
186
|
|
180
187
|
```ruby
|
181
188
|
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
182
|
-
index: 'PRIMARY',
|
183
189
|
dest_model: Metric,
|
184
190
|
dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
|
185
191
|
|
186
192
|
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
187
|
-
index: 'PRIMARY',
|
188
193
|
dest_model: ChartMetric).execute_in_batches
|
189
194
|
|
190
195
|
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
191
|
-
index: 'PRIMARY',
|
192
196
|
dest_model: SystemMetric).execute_in_batches
|
193
197
|
```
|
194
198
|
|
@@ -202,6 +206,10 @@ into an unsafe territory. The script will pause for 5 minutes and
|
|
202
206
|
only start once the corresponding metric goes back down to 90% of the
|
203
207
|
specified threshold.
|
204
208
|
|
209
|
+
Note: You will need process privileges to be able to see the history list and
|
210
|
+
replication client privileges to monitor the replication lag.
|
211
|
+
|
212
|
+
|
205
213
|
### Logging and monitoring progress
|
206
214
|
|
207
215
|
You pass in a standard log instance to capture all running output. By
|
@@ -221,8 +229,8 @@ in your target table.
|
|
221
229
|
|
222
230
|
### Limitations
|
223
231
|
|
224
|
-
* Only works for mysql
|
225
|
-
*
|
232
|
+
* Only works for mysql. I have only used it against 5.5.
|
233
|
+
* Tested with ActiveRecord 3.1.\* - 4.0.\*.
|
226
234
|
* Using a non-unique index risks missing duplicate rows unless you use the `first_only` option.
|
227
235
|
* Using the `first_only` option risks rescanning many rows if you have many more duplicates than your
|
228
236
|
chunk size
|
@@ -279,11 +287,11 @@ db called 'cstest'.
|
|
279
287
|
|
280
288
|
## License and Copyright
|
281
289
|
|
282
|
-
Copyright 2014 New Relic, Inc., and Bill Kayser
|
290
|
+
Copyright 2014-2015 New Relic, Inc., and Bill Kayser
|
283
291
|
|
284
292
|
Covered by the MIT [LICENSE](LICENSE.txt).
|
285
293
|
|
286
|
-
|
294
|
+
## Credits
|
287
295
|
|
288
296
|
This was all inspired and informed by [Percona's `pt-archiver`
|
289
297
|
script](http://www.percona.com/doc/percona-toolkit/2.1/pt-archiver.html)
|
data/Rakefile
CHANGED
data/cleansweep.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.homepage = "http://bkayser.github.com/cleansweep"
|
21
21
|
spec.license = "MIT"
|
22
22
|
|
23
|
-
spec.files = `git ls-files -z`.split("\x0")
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").delete_if { | f | f =~ /^gemfiles/ }
|
24
24
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
25
25
|
spec.test_files = spec.files.grep(%r{^spec/})
|
26
26
|
spec.require_paths = ["lib"]
|
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_runtime_dependency 'mysql2', '~> 0.3'
|
31
31
|
|
32
32
|
spec.add_development_dependency 'pry', '~> 0'
|
33
|
+
spec.add_development_dependency 'timecop', '~> 0.7.1'
|
33
34
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
34
35
|
spec.add_development_dependency 'rake', '~> 10.0'
|
35
36
|
spec.add_development_dependency 'rspec', '~> 3.1'
|
@@ -16,14 +16,23 @@ require 'stringio'
|
|
16
16
|
# The number of rows to copy in each block. Defaults to 500.
|
17
17
|
# [:index]
|
18
18
|
# The index to traverse in ascending order doing the purge. Rows are read in the order of
|
19
|
-
# the index, which must be a btree index. If not specified,
|
19
|
+
# the index, which must be a btree index. If not specified, An index is chosen automatically
|
20
|
+
# in order of preference:
|
21
|
+
# 1. PRIMARY KEY
|
22
|
+
# 2. First UNIQUE index
|
23
|
+
# 3. First non-UNIQUE index
|
24
|
+
# 4. No index used if no indexes defined.
|
25
|
+
# [:non_traversing]
|
26
|
+
# When true, specifies the table will not be traversed using an index.
|
27
|
+
# This only makes sense if you are deleting everything as you go along, otherwise you'll
|
28
|
+
# be re-scanning skipped rows.
|
20
29
|
# [:reverse]
|
21
30
|
# Traverse the index in reverse order. For example, if your index is on <tt>account_id</tt>,
|
22
31
|
# <tt>timestamp</tt>, this option will move through the rows starting at the highest account
|
23
32
|
# number, then move through timestamps starting with the most recent.
|
24
33
|
# [:first_only]
|
25
|
-
# Traverse only the first column of the index, and do so inclusively using the <tt
|
26
|
-
# instead of the strict <tt
|
34
|
+
# Traverse only the first column of the index, and do so inclusively using the <tt>'>='</tt> operator
|
35
|
+
# instead of the strict <tt>'>'</tt> operator. This is important if the index is not unique and there
|
27
36
|
# are a lot of duplicates. Otherwise the delete could miss rows. Not allowed in copy mode because you'd
|
28
37
|
# be inserting duplicate rows.
|
29
38
|
# [:dry_run]
|
@@ -94,11 +103,12 @@ class CleanSweep::PurgeRunner
|
|
94
103
|
@copy_mode = @target_model && options[:copy_only]
|
95
104
|
|
96
105
|
@table_schema = CleanSweep::TableSchema.new @model,
|
97
|
-
|
98
|
-
|
99
|
-
|
106
|
+
non_traversing: options[:non_traversing],
|
107
|
+
index: options[:index],
|
108
|
+
reverse: options[:reverse],
|
109
|
+
copy_columns: options[:copy_columns],
|
100
110
|
first_only: options[:first_only],
|
101
|
-
dest_model:
|
111
|
+
dest_model: options[:dest_model],
|
102
112
|
dest_columns: options[:dest_columns]
|
103
113
|
|
104
114
|
if (@max_history || @max_repl_lag)
|
@@ -134,7 +144,7 @@ class CleanSweep::PurgeRunner
|
|
134
144
|
def execute_in_batches
|
135
145
|
|
136
146
|
if @dry_run
|
137
|
-
print_queries
|
147
|
+
log :info, print_queries
|
138
148
|
return 0
|
139
149
|
end
|
140
150
|
|
@@ -205,26 +215,6 @@ class CleanSweep::PurgeRunner
|
|
205
215
|
add_method_tracer :sleep
|
206
216
|
add_method_tracer :execute_in_batches
|
207
217
|
|
208
|
-
def print_queries(io)
|
209
|
-
io.puts 'Initial Query:'
|
210
|
-
io.puts format_query(' ', @query.to_sql)
|
211
|
-
rows = @model.connection.select_rows @query.limit(1).to_sql
|
212
|
-
if rows.empty?
|
213
|
-
# Don't have any sample data to use for the sample queries, so use NULL values just
|
214
|
-
# so the query will print out.
|
215
|
-
rows << [nil] * 100
|
216
|
-
end
|
217
|
-
io.puts "Chunk Query:"
|
218
|
-
io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, rows.first).to_sql)
|
219
|
-
if copy_mode?
|
220
|
-
io.puts "Insert Statement:"
|
221
|
-
io.puts format_query(' ', @table_schema.insert_statement(rows))
|
222
|
-
else
|
223
|
-
io.puts "Delete Statement:"
|
224
|
-
io.puts format_query(' ', @table_schema.delete_statement(rows))
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
218
|
private
|
229
219
|
|
230
220
|
def format_query indentation, query
|
@@ -6,19 +6,7 @@ module CleanSweep::PurgeRunner::Logging
|
|
6
6
|
while (@report_interval_start < Time.now - @report_interval) do
|
7
7
|
@report_interval_start += @report_interval
|
8
8
|
end
|
9
|
-
|
10
|
-
elapsed = [1, (Time.now - @start).to_i].max
|
11
|
-
rate = (@total_deleted / elapsed).to_i
|
12
|
-
rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
|
13
|
-
report << "report:"
|
14
|
-
if copy_mode?
|
15
|
-
report << " #{@dry_run ? 'queried' : 'copied'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
|
16
|
-
else
|
17
|
-
report << " #{@dry_run ? 'queried' : 'deleted'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
|
18
|
-
end
|
19
|
-
report << " elapsed: #{'%12s' % format(elapsed)}"
|
20
|
-
report << " rate: #{rate}"
|
21
|
-
log :info, report.join("\n")
|
9
|
+
print_report
|
22
10
|
end
|
23
11
|
end
|
24
12
|
|
@@ -28,6 +16,34 @@ module CleanSweep::PurgeRunner::Logging
|
|
28
16
|
@logger.send level, out
|
29
17
|
end
|
30
18
|
|
19
|
+
def print_queries
|
20
|
+
io = StringIO.new
|
21
|
+
io.puts 'Initial Query:'
|
22
|
+
io.puts format_query(' ', @query.to_sql)
|
23
|
+
io.puts "Chunk Query:"
|
24
|
+
io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, sample_rows.first).to_sql)
|
25
|
+
if copy_mode?
|
26
|
+
io.puts "Insert Statement:"
|
27
|
+
io.puts format_query(' ', @table_schema.insert_statement(sample_rows))
|
28
|
+
else
|
29
|
+
io.puts "Delete Statement:"
|
30
|
+
io.puts format_query(' ', @table_schema.delete_statement(sample_rows))
|
31
|
+
end
|
32
|
+
io.string
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def sample_rows
|
38
|
+
@sample_rows ||= @model.connection.select_rows @query.limit(1).to_sql
|
39
|
+
if @sample_rows.empty?
|
40
|
+
# Don't have any sample data to use for the sample queries, so use NULL values just
|
41
|
+
# so the query will print out.
|
42
|
+
@sample_rows << [nil] * 100
|
43
|
+
end
|
44
|
+
@sample_rows
|
45
|
+
end
|
46
|
+
|
31
47
|
def format(time)
|
32
48
|
format_string = "%H:%M:%S"
|
33
49
|
if (time.to_i > (24 * 60 * 60))
|
@@ -35,4 +51,20 @@ module CleanSweep::PurgeRunner::Logging
|
|
35
51
|
end
|
36
52
|
Time.at(time).strftime(format_string)
|
37
53
|
end
|
38
|
-
|
54
|
+
|
55
|
+
def print_report
|
56
|
+
elapsed = [1, (Time.now - @start).to_i].max
|
57
|
+
rate = (@total_deleted / elapsed).to_i
|
58
|
+
rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
|
59
|
+
report = [ "report:" ]
|
60
|
+
action = case
|
61
|
+
when @dry_run then 'queried'
|
62
|
+
when copy_mode? then 'copied'
|
63
|
+
else 'deleted'
|
64
|
+
end
|
65
|
+
report << " #{action}: #{'%12i' % @total_deleted} #{@model.table_name} records"
|
66
|
+
report << " elapsed: #{'%12s' % format(elapsed)}"
|
67
|
+
report << " rate: #{rate}"
|
68
|
+
log :info, report.join("\n")
|
69
|
+
end
|
70
|
+
end
|
@@ -14,8 +14,8 @@ class CleanSweep::TableSchema
|
|
14
14
|
|
15
15
|
def initialize(model, options={})
|
16
16
|
|
17
|
-
traversing_key_name = options[:
|
18
|
-
ascending = options
|
17
|
+
traversing_key_name = options[:index]
|
18
|
+
ascending = !options[:reverse]
|
19
19
|
first_only = options[:first_only]
|
20
20
|
@model = model
|
21
21
|
@dest_model = options[:dest_model] || @model
|
@@ -26,7 +26,7 @@ class CleanSweep::TableSchema
|
|
26
26
|
@name = @model.table_name
|
27
27
|
|
28
28
|
@columns =
|
29
|
-
(options[:
|
29
|
+
(options[:copy_columns] || []).map do | extra_col_name |
|
30
30
|
CleanSweep::TableSchema::ColumnSchema.new extra_col_name, model
|
31
31
|
end
|
32
32
|
|
@@ -38,11 +38,15 @@ class CleanSweep::TableSchema
|
|
38
38
|
raise "Table #{model.table_name} must have a primary key" unless @primary_key
|
39
39
|
|
40
40
|
@primary_key.add_columns_to @columns
|
41
|
-
if
|
42
|
-
traversing_key_name
|
43
|
-
|
44
|
-
|
45
|
-
|
41
|
+
if !options[:non_traversing]
|
42
|
+
if traversing_key_name
|
43
|
+
traversing_key_name.downcase!
|
44
|
+
raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
|
45
|
+
@traversing_key = key_schemas[traversing_key_name]
|
46
|
+
@traversing_key.add_columns_to @columns
|
47
|
+
else
|
48
|
+
@traversing_key = @primary_key
|
49
|
+
end
|
46
50
|
@traversing_key.ascending = ascending
|
47
51
|
@traversing_key.first_only = first_only
|
48
52
|
end
|
@@ -74,7 +78,7 @@ class CleanSweep::TableSchema
|
|
74
78
|
end
|
75
79
|
|
76
80
|
def initial_scope
|
77
|
-
scope = @model.
|
81
|
+
scope = @model.select(quoted_column_names).from(from_clause)
|
78
82
|
scope = @traversing_key.order(scope) if @traversing_key
|
79
83
|
return scope
|
80
84
|
end
|
data/lib/clean_sweep/version.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
# Defines a table that does not have a primary key but does
|
3
|
+
# have a unique key.
|
4
|
+
class Annotation < ActiveRecord::Base
|
5
|
+
|
6
|
+
def self.create_table
|
7
|
+
connection.execute <<-EOF
|
8
|
+
create temporary table if not exists
|
9
|
+
annotations (
|
10
|
+
`article_id` int(11) NOT NULL,
|
11
|
+
`text` varchar(64),
|
12
|
+
key `index_on_text` (`text`),
|
13
|
+
unique key (`article_id`)
|
14
|
+
)
|
15
|
+
EOF
|
16
|
+
Annotation.delete_all
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
data/spec/factories/comments.rb
CHANGED
@@ -5,7 +5,7 @@ class Comment < ActiveRecord::Base
|
|
5
5
|
create temporary table if not exists
|
6
6
|
comments (
|
7
7
|
`id` int(11) primary key auto_increment,
|
8
|
-
`timestamp`
|
8
|
+
`timestamp` date,
|
9
9
|
`account` int(11),
|
10
10
|
`seen` boolean,
|
11
11
|
key comments_on_account_timestamp(account, timestamp),
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
FactoryGirl.define do
|
21
21
|
factory :comment do | comment |
|
22
|
-
comment.timestamp
|
22
|
+
comment.timestamp Date.new
|
23
23
|
comment.seen false
|
24
24
|
comment.sequence(:account) { | n | (n % 3)* 100 }
|
25
25
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class TableWithPrimaryKey < ActiveRecord::Base
|
2
|
+
|
3
|
+
def self.create_table
|
4
|
+
connection.execute <<-EOF
|
5
|
+
create temporary table if not exists
|
6
|
+
table_with_primary_keys (
|
7
|
+
`pk` int(11) primary key auto_increment,
|
8
|
+
`k1` int(11),
|
9
|
+
`k2` int(11),
|
10
|
+
key key_nonunique (k1),
|
11
|
+
unique key key_unique (k2)
|
12
|
+
)
|
13
|
+
EOF
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
class TableWithUniqueKey < ActiveRecord::Base
|
19
|
+
|
20
|
+
def self.create_table
|
21
|
+
connection.execute <<-EOF
|
22
|
+
create temporary table if not exists
|
23
|
+
table_with_unique_keys (
|
24
|
+
`k1` int(11),
|
25
|
+
`k2` int(11),
|
26
|
+
key key_nonunique (k1),
|
27
|
+
unique key key_unique (k2)
|
28
|
+
)
|
29
|
+
EOF
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
class TableWithRegularKey < ActiveRecord::Base
|
35
|
+
|
36
|
+
def self.create_table
|
37
|
+
connection.execute <<-EOF
|
38
|
+
create temporary table if not exists
|
39
|
+
table_with_regular_keys (
|
40
|
+
`k1` int(11),
|
41
|
+
`k2` int(11),
|
42
|
+
key key_nonunique (k1),
|
43
|
+
key key_extra (k2)
|
44
|
+
)
|
45
|
+
EOF
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
data/spec/purge_runner_spec.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
|
3
|
+
# Time mocking features are available in Rails 4 but not Rails 3 and the Timecop
|
4
|
+
# gem works with both.
|
5
|
+
require 'timecop'
|
6
|
+
|
4
7
|
describe CleanSweep::PurgeRunner do
|
5
8
|
|
6
9
|
context 'PurgeRunner' do
|
7
|
-
include ActiveSupport::Testing::TimeHelpers
|
8
10
|
before do
|
9
|
-
|
11
|
+
Timecop.freeze Time.parse("2014-12-02 13:47:43.000000 -0800")
|
10
12
|
end
|
11
13
|
after do
|
12
|
-
|
14
|
+
Timecop.return
|
13
15
|
end
|
14
16
|
|
15
17
|
context "using comments" do
|
@@ -66,21 +68,20 @@ describe CleanSweep::PurgeRunner do
|
|
66
68
|
it 'prints out the queries in a dry run' do
|
67
69
|
purger = CleanSweep::PurgeRunner.new model: Comment,
|
68
70
|
index: 'comments_on_account_timestamp' do | scope |
|
69
|
-
scope.where('timestamp < ?', 1.week.ago)
|
71
|
+
scope.where('timestamp < ?', 1.week.ago.to_date)
|
70
72
|
end
|
71
|
-
output =
|
72
|
-
|
73
|
-
expect(output.string).to eq <<EOF
|
73
|
+
output = purger.print_queries
|
74
|
+
expect(output).to eq <<EOF
|
74
75
|
Initial Query:
|
75
76
|
SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
|
76
77
|
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
77
|
-
WHERE (timestamp < '2014-11-25
|
78
|
+
WHERE (timestamp < '2014-11-25')
|
78
79
|
ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
|
79
80
|
LIMIT 500
|
80
81
|
Chunk Query:
|
81
82
|
SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
|
82
83
|
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
83
|
-
WHERE (timestamp < '2014-11-25
|
84
|
+
WHERE (timestamp < '2014-11-25') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18'))\n ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
|
84
85
|
LIMIT 500
|
85
86
|
Delete Statement:
|
86
87
|
DELETE
|
@@ -105,7 +106,7 @@ EOF
|
|
105
106
|
end
|
106
107
|
expect(Comment.count).to eq(5)
|
107
108
|
# Only old comments deleted before stopping
|
108
|
-
expect(Comment.where('timestamp >= ?', 4.days.ago).count).to eq(5)
|
109
|
+
expect(Comment.where('timestamp >= ?', 4.days.ago.to_date).count).to eq(5)
|
109
110
|
end
|
110
111
|
it "descends the index" do
|
111
112
|
purger = CleanSweep::PurgeRunner.new model: Comment,
|
data/spec/spec_helper.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
ENV['RACK_ENV'] = 'test'
|
2
2
|
|
3
|
+
require "codeclimate-test-reporter"
|
4
|
+
CodeClimate::TestReporter.start
|
3
5
|
require 'clean_sweep'
|
4
6
|
require 'factory_girl'
|
5
7
|
require 'fileutils'
|
6
8
|
require 'active_record'
|
7
9
|
require 'mysql2'
|
10
|
+
require 'timecop'
|
8
11
|
RSpec.configure do |config|
|
9
12
|
config.include FactoryGirl::Syntax::Methods
|
10
13
|
config.formatter = :progress
|
11
|
-
#config.order = 'random'
|
12
|
-
|
13
14
|
config.before(:suite) do
|
14
15
|
FactoryGirl.find_definitions
|
15
16
|
end
|
@@ -20,7 +21,7 @@ logdir = File.expand_path "../../log",__FILE__
|
|
20
21
|
FileUtils.mkdir_p logdir
|
21
22
|
logfile = File.open(File.join(logdir, "test.log"), "w+")
|
22
23
|
ActiveRecord::Base.logger = Logger.new(logfile)
|
23
|
-
|
24
|
+
Time.zone = 'America/Los_Angeles'
|
24
25
|
database = {
|
25
26
|
encoding: 'utf8',
|
26
27
|
adapter: 'mysql2',
|
@@ -34,3 +35,4 @@ connection.query "CREATE DATABASE IF NOT EXISTS #{db_name}"
|
|
34
35
|
database[:database] = db_name
|
35
36
|
|
36
37
|
ActiveRecord::Base.establish_connection(database)
|
38
|
+
|
data/spec/table_schema_spec.rb
CHANGED
@@ -2,110 +2,140 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe CleanSweep::TableSchema do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
it 'should read comments' do
|
13
|
-
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
14
|
-
expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
|
5
|
+
context "using sample tables" do
|
6
|
+
it 'should pick the primary key' do
|
7
|
+
TableWithPrimaryKey.create_table
|
8
|
+
schema = CleanSweep::TableSchema.new TableWithPrimaryKey
|
9
|
+
expect(schema.primary_key.name).to eq "primary"
|
10
|
+
expect(schema.traversing_key.name).to eq "primary"
|
15
11
|
end
|
16
12
|
|
17
|
-
it
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
it "should identify unique key as primary key" do
|
14
|
+
TableWithUniqueKey.create_table
|
15
|
+
schema = CleanSweep::TableSchema.new TableWithUniqueKey
|
16
|
+
expect(schema.primary_key.name).to eq("key_unique")
|
17
|
+
expect(schema.traversing_key.name).to eq "key_unique"
|
21
18
|
end
|
22
19
|
|
23
|
-
it
|
24
|
-
|
20
|
+
it "should skip the traversing key if non_traversing is true" do
|
21
|
+
TableWithUniqueKey.create_table
|
22
|
+
schema = CleanSweep::TableSchema.new TableWithUniqueKey, non_traversing: true
|
23
|
+
expect(schema.primary_key.name).to eq("key_unique")
|
24
|
+
expect(schema.traversing_key).to be_nil
|
25
25
|
end
|
26
|
-
|
27
|
-
|
28
|
-
expect(
|
26
|
+
it "should error out if there is no unique key at all" do
|
27
|
+
TableWithRegularKey.create_table
|
28
|
+
expect(->{CleanSweep::TableSchema.new TableWithRegularKey}).to raise_exception(RuntimeError, 'Table table_with_regular_keys must have a primary key')
|
29
29
|
end
|
30
30
|
|
31
|
+
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-02 01:13:25'),(1002,2,'2014-12-02 00:13:25'),(1005,5,'2014-12-01 23:13:25')")
|
33
|
+
context "on comments" do
|
34
|
+
before do
|
35
|
+
Comment.create_table
|
36
36
|
end
|
37
|
-
end
|
38
37
|
|
39
|
-
|
38
|
+
context "using ascending account, timestamp index" do
|
39
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', ascending: true }
|
40
40
|
|
41
|
-
|
41
|
+
it 'should read comments' do
|
42
|
+
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
43
|
+
expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
|
44
|
+
end
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
it 'should produce an ascending chunk clause' do
|
47
|
+
rows = account_and_timestamp_rows
|
48
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
49
|
+
.to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-11-29'))")
|
50
|
+
end
|
48
51
|
|
52
|
+
it 'should produce all select columns' do
|
53
|
+
expect(schema.column_names).to eq([:id, :account, :timestamp])
|
54
|
+
end
|
49
55
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
56
|
+
it 'should produce the ascending order clause' do
|
57
|
+
expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
it 'should produce an insert statement' do
|
62
|
+
schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp'
|
63
|
+
rows = account_and_timestamp_rows
|
64
|
+
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-01'),(1002,2,'2014-11-30'),(1005,5,'2014-11-29')")
|
65
|
+
end
|
54
66
|
end
|
55
67
|
|
56
|
-
|
68
|
+
context "using descending account, timestamp index" do
|
69
|
+
|
70
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', reverse: true }
|
71
|
+
|
72
|
+
it 'should produce a descending where clause' do
|
73
|
+
rows = account_and_timestamp_rows
|
74
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
75
|
+
.to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-11-29'))")
|
76
|
+
end
|
77
|
+
|
57
78
|
|
58
|
-
|
59
|
-
|
79
|
+
it 'should produce the descending order clause' do
|
80
|
+
rows = account_and_timestamp_rows
|
81
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
82
|
+
.to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
|
83
|
+
end
|
60
84
|
|
61
|
-
it 'should select all the rows' do
|
62
|
-
expect(schema.column_names).to eq([:id, :account, :timestamp])
|
63
85
|
end
|
64
86
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
87
|
+
context "using account, timestamp index first column only" do
|
88
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', first_only: true }
|
89
|
+
|
90
|
+
it 'should select all the rows' do
|
91
|
+
expect(schema.column_names).to eq([:id, :account, :timestamp])
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should only query using the first column of the index' do
|
95
|
+
rows = account_and_timestamp_rows
|
96
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
97
|
+
.to include(" (`comments`.`account` >= 5) ")
|
98
|
+
|
99
|
+
end
|
69
100
|
|
70
101
|
end
|
71
102
|
|
72
|
-
|
103
|
+
it 'should not care about case' do
|
104
|
+
CleanSweep::TableSchema.new Comment, index: 'primary'
|
105
|
+
end
|
73
106
|
|
74
|
-
|
75
|
-
|
76
|
-
|
107
|
+
it 'should work without a descending index' do
|
108
|
+
schema = CleanSweep::TableSchema.new Comment, non_traversing: true
|
109
|
+
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
110
|
+
expect(schema.traversing_key).to be_nil
|
111
|
+
end
|
77
112
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
end
|
113
|
+
it 'should produce minimal select columns' do
|
114
|
+
schema = CleanSweep::TableSchema.new Comment, index: 'PRIMARY'
|
115
|
+
expect(schema.column_names).to eq([:id])
|
116
|
+
end
|
83
117
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
118
|
+
it 'should produce the from clause with an index' do
|
119
|
+
schema = CleanSweep::TableSchema.new Comment, index:'comments_on_timestamp'
|
120
|
+
expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
|
121
|
+
end
|
88
122
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
123
|
+
it 'should include additional columns' do
|
124
|
+
schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp', copy_columns: %w[seen id]
|
125
|
+
expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
|
126
|
+
rows = account_and_timestamp_rows
|
127
|
+
rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
|
128
|
+
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-01'),(1,1002,2,'2014-11-30'),(1,1005,5,'2014-11-29')")
|
93
129
|
|
94
|
-
|
95
|
-
schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp', extra_columns: %w[seen id]
|
96
|
-
expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
|
97
|
-
rows = account_and_timestamp_rows
|
98
|
-
rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
|
99
|
-
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-02 01:13:25'),(1,1002,2,'2014-12-02 00:13:25'),(1,1005,5,'2014-12-01 23:13:25')")
|
130
|
+
end
|
100
131
|
|
101
132
|
end
|
102
133
|
|
103
|
-
|
104
134
|
def account_and_timestamp_rows
|
105
135
|
rows = []
|
106
|
-
t =
|
136
|
+
t = Date.parse '2014-12-01'
|
107
137
|
rows << [1001, 5, t]
|
108
|
-
rows << [1002, 2, t - 1
|
109
|
-
rows << [1005, 5, t - 2
|
138
|
+
rows << [1002, 2, t - 1]
|
139
|
+
rows << [1005, 5, t - 2]
|
110
140
|
end
|
111
141
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cleansweep
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Kayser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: timecop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.7.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.7.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: bundler
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -146,8 +160,10 @@ extensions: []
|
|
146
160
|
extra_rdoc_files: []
|
147
161
|
files:
|
148
162
|
- ".gitignore"
|
163
|
+
- ".travis.yml"
|
149
164
|
- CHANGES.md
|
150
165
|
- Gemfile
|
166
|
+
- Gemfile.lock
|
151
167
|
- LICENSE.txt
|
152
168
|
- README.md
|
153
169
|
- Rakefile
|
@@ -162,8 +178,10 @@ files:
|
|
162
178
|
- lib/clean_sweep/table_schema/index_schema.rb
|
163
179
|
- lib/clean_sweep/version.rb
|
164
180
|
- lib/cleansweep.rb
|
181
|
+
- spec/factories/annotations.rb
|
165
182
|
- spec/factories/books.rb
|
166
183
|
- spec/factories/comments.rb
|
184
|
+
- spec/factories/tables.rb
|
167
185
|
- spec/purge_runner_spec.rb
|
168
186
|
- spec/spec_helper.rb
|
169
187
|
- spec/table_schema_spec.rb
|
@@ -192,8 +210,10 @@ signing_key:
|
|
192
210
|
specification_version: 4
|
193
211
|
summary: Utility to purge or archive rows in mysql tables
|
194
212
|
test_files:
|
213
|
+
- spec/factories/annotations.rb
|
195
214
|
- spec/factories/books.rb
|
196
215
|
- spec/factories/comments.rb
|
216
|
+
- spec/factories/tables.rb
|
197
217
|
- spec/purge_runner_spec.rb
|
198
218
|
- spec/spec_helper.rb
|
199
219
|
- spec/table_schema_spec.rb
|