cleansweep 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.travis.yml +10 -0
- data/CHANGES.md +15 -5
- data/Gemfile +2 -0
- data/Gemfile.lock +82 -0
- data/README.md +15 -7
- data/Rakefile +1 -0
- data/cleansweep.gemspec +2 -1
- data/lib/clean_sweep/purge_runner.rb +18 -28
- data/lib/clean_sweep/purge_runner/logging.rb +46 -14
- data/lib/clean_sweep/table_schema.rb +13 -9
- data/lib/clean_sweep/version.rb +1 -1
- data/spec/factories/annotations.rb +19 -0
- data/spec/factories/comments.rb +2 -2
- data/spec/factories/tables.rb +49 -0
- data/spec/purge_runner_spec.rb +12 -11
- data/spec/spec_helper.rb +5 -3
- data/spec/table_schema_spec.rb +101 -71
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f772f74727a7d58fdebda097fb0b70572cb92c34
|
4
|
+
data.tar.gz: a82986ae0e26308e4842193441e427e998c4f5a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 183922164f35fbd986ca9617fa1c73fc4133db90fc15a59552f48def16a8d39d8a8164bbef8d61ee334f6a8dffbfc74a34f5fbb182d446d7485372b6add8667a
|
7
|
+
data.tar.gz: 77e4f14d2e44e7400d4bb14a09719fea8a13aac2660fab0712355b053d8117e848f7dd967878b01ec5c87c6181eca821056f4e388b00c93cd4b005432d1c0ebc
|
data/.gitignore
CHANGED
data/.travis.yml
ADDED
data/CHANGES.md
CHANGED
@@ -1,8 +1,17 @@
|
|
1
1
|
See the [documentation](http://bkayser.github.io/cleansweep) for details
|
2
2
|
|
3
|
-
### Version 1.0.
|
3
|
+
### Version 1.0.4
|
4
4
|
|
5
|
-
*
|
5
|
+
* Print dry run output using the logger
|
6
|
+
* Add option `non_traversing` so you can explicitly not use an index. If an index
|
7
|
+
is not specified, now it will guess using the first non-unique index or primary key.
|
8
|
+
* Added more tests
|
9
|
+
* Added Travis CI build, metrics
|
10
|
+
|
11
|
+
### Version 1.0.3
|
12
|
+
|
13
|
+
* Small bug in instrumentation and target model reference
|
14
|
+
* Support first unique index as primary when primary key not found
|
6
15
|
|
7
16
|
### Version 1.0.2
|
8
17
|
|
@@ -10,6 +19,7 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
|
|
10
19
|
* Added `dest_columns` option as a map of column names in the source to column names in the destination.
|
11
20
|
* More testing and bug fixing in real environments
|
12
21
|
|
13
|
-
### Version 1.0.
|
14
|
-
|
15
|
-
*
|
22
|
+
### Version 1.0.1
|
23
|
+
|
24
|
+
* Initial release
|
25
|
+
|
data/Gemfile
CHANGED
data/Gemfile.lock
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
cleansweep (1.0.4)
|
5
|
+
activerecord (>= 3.0)
|
6
|
+
mysql2 (~> 0.3)
|
7
|
+
newrelic_rpm
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
activemodel (4.2.0)
|
13
|
+
activesupport (= 4.2.0)
|
14
|
+
builder (~> 3.1)
|
15
|
+
activerecord (4.2.0)
|
16
|
+
activemodel (= 4.2.0)
|
17
|
+
activesupport (= 4.2.0)
|
18
|
+
arel (~> 6.0)
|
19
|
+
activesupport (4.2.0)
|
20
|
+
i18n (~> 0.7)
|
21
|
+
json (~> 1.7, >= 1.7.7)
|
22
|
+
minitest (~> 5.1)
|
23
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
24
|
+
tzinfo (~> 1.1)
|
25
|
+
arel (6.0.0)
|
26
|
+
awesome_print (1.6.1)
|
27
|
+
builder (3.2.2)
|
28
|
+
codeclimate-test-reporter (0.4.4)
|
29
|
+
simplecov (>= 0.7.1, < 1.0.0)
|
30
|
+
coderay (1.1.0)
|
31
|
+
diff-lcs (1.2.5)
|
32
|
+
docile (1.1.5)
|
33
|
+
factory_girl (4.5.0)
|
34
|
+
activesupport (>= 3.0.0)
|
35
|
+
i18n (0.7.0)
|
36
|
+
json (1.8.1)
|
37
|
+
method_source (0.8.2)
|
38
|
+
minitest (5.5.0)
|
39
|
+
multi_json (1.10.1)
|
40
|
+
mysql2 (0.3.17)
|
41
|
+
newrelic_rpm (3.9.9.275)
|
42
|
+
pry (0.10.1)
|
43
|
+
coderay (~> 1.1.0)
|
44
|
+
method_source (~> 0.8.1)
|
45
|
+
slop (~> 3.4)
|
46
|
+
rake (10.4.2)
|
47
|
+
rspec (3.1.0)
|
48
|
+
rspec-core (~> 3.1.0)
|
49
|
+
rspec-expectations (~> 3.1.0)
|
50
|
+
rspec-mocks (~> 3.1.0)
|
51
|
+
rspec-core (3.1.7)
|
52
|
+
rspec-support (~> 3.1.0)
|
53
|
+
rspec-expectations (3.1.2)
|
54
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
55
|
+
rspec-support (~> 3.1.0)
|
56
|
+
rspec-mocks (3.1.3)
|
57
|
+
rspec-support (~> 3.1.0)
|
58
|
+
rspec-support (3.1.2)
|
59
|
+
simplecov (0.9.1)
|
60
|
+
docile (~> 1.1.0)
|
61
|
+
multi_json (~> 1.0)
|
62
|
+
simplecov-html (~> 0.8.0)
|
63
|
+
simplecov-html (0.8.0)
|
64
|
+
slop (3.6.0)
|
65
|
+
thread_safe (0.3.4)
|
66
|
+
timecop (0.7.1)
|
67
|
+
tzinfo (1.2.2)
|
68
|
+
thread_safe (~> 0.1)
|
69
|
+
|
70
|
+
PLATFORMS
|
71
|
+
ruby
|
72
|
+
|
73
|
+
DEPENDENCIES
|
74
|
+
awesome_print (~> 1.2)
|
75
|
+
bundler (~> 1.7)
|
76
|
+
cleansweep!
|
77
|
+
codeclimate-test-reporter
|
78
|
+
factory_girl (~> 4.4)
|
79
|
+
pry (~> 0)
|
80
|
+
rake (~> 10.0)
|
81
|
+
rspec (~> 3.1)
|
82
|
+
timecop (~> 0.7.1)
|
data/README.md
CHANGED
@@ -2,6 +2,10 @@ Cleansweep is a utility for scripting purges using ruby in an
|
|
2
2
|
efficient, low-impact manner on mysql innodb tables. Based on the
|
3
3
|
Percona `pt-archive` utility.
|
4
4
|
|
5
|
+
[](https://travis-ci.org/bkayser/cleansweep)
|
6
|
+
[](https://codeclimate.com/github/bkayser/cleansweep)
|
7
|
+
[](https://codeclimate.com/github/bkayser/cleansweep)
|
8
|
+
|
5
9
|
## Installation
|
6
10
|
|
7
11
|
Add this line to your application's Gemfile:
|
@@ -116,6 +120,9 @@ The chunk query looks like:
|
|
116
120
|
You can scan the index in either direction. To specify descending
|
117
121
|
order, use the `reverse: true` option.
|
118
122
|
|
123
|
+
If no index is specified, it will pick the primary key or the first unique index if there
|
124
|
+
is no primary key.
|
125
|
+
|
119
126
|
### Copying rows from one table to another
|
120
127
|
|
121
128
|
You can use the same technique to copy rows from one table to another.
|
@@ -179,16 +186,13 @@ Now create as many jobs as you need for the tables which refer to these metrics:
|
|
179
186
|
|
180
187
|
```ruby
|
181
188
|
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
182
|
-
index: 'PRIMARY',
|
183
189
|
dest_model: Metric,
|
184
190
|
dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
|
185
191
|
|
186
192
|
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
187
|
-
index: 'PRIMARY',
|
188
193
|
dest_model: ChartMetric).execute_in_batches
|
189
194
|
|
190
195
|
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
191
|
-
index: 'PRIMARY',
|
192
196
|
dest_model: SystemMetric).execute_in_batches
|
193
197
|
```
|
194
198
|
|
@@ -202,6 +206,10 @@ into an unsafe territory. The script will pause for 5 minutes and
|
|
202
206
|
only start once the corresponding metric goes back down to 90% of the
|
203
207
|
specified threshold.
|
204
208
|
|
209
|
+
Note: You will need process privileges to be able to see the history list and
|
210
|
+
replication client privileges to monitor the replication lag.
|
211
|
+
|
212
|
+
|
205
213
|
### Logging and monitoring progress
|
206
214
|
|
207
215
|
You pass in a standard log instance to capture all running output. By
|
@@ -221,8 +229,8 @@ in your target table.
|
|
221
229
|
|
222
230
|
### Limitations
|
223
231
|
|
224
|
-
* Only works for mysql
|
225
|
-
*
|
232
|
+
* Only works for mysql. I have only used it against 5.5.
|
233
|
+
* Tested with ActiveRecord 3.1.\* - 4.0.\*.
|
226
234
|
* Using a non-unique index risks missing duplicate rows unless you use the `first_only` option.
|
227
235
|
* Using the `first_only` option risks rescanning many rows if you have many more duplicates than your
|
228
236
|
chunk size
|
@@ -279,11 +287,11 @@ db called 'cstest'.
|
|
279
287
|
|
280
288
|
## License and Copyright
|
281
289
|
|
282
|
-
Copyright 2014 New Relic, Inc., and Bill Kayser
|
290
|
+
Copyright 2014-2015 New Relic, Inc., and Bill Kayser
|
283
291
|
|
284
292
|
Covered by the MIT [LICENSE](LICENSE.txt).
|
285
293
|
|
286
|
-
|
294
|
+
## Credits
|
287
295
|
|
288
296
|
This was all inspired and informed by [Percona's `pt-archiver`
|
289
297
|
script](http://www.percona.com/doc/percona-toolkit/2.1/pt-archiver.html)
|
data/Rakefile
CHANGED
data/cleansweep.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.homepage = "http://bkayser.github.com/cleansweep"
|
21
21
|
spec.license = "MIT"
|
22
22
|
|
23
|
-
spec.files = `git ls-files -z`.split("\x0")
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").delete_if { | f | f =~ /^gemfiles/ }
|
24
24
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
25
25
|
spec.test_files = spec.files.grep(%r{^spec/})
|
26
26
|
spec.require_paths = ["lib"]
|
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_runtime_dependency 'mysql2', '~> 0.3'
|
31
31
|
|
32
32
|
spec.add_development_dependency 'pry', '~> 0'
|
33
|
+
spec.add_development_dependency 'timecop', '~> 0.7.1'
|
33
34
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
34
35
|
spec.add_development_dependency 'rake', '~> 10.0'
|
35
36
|
spec.add_development_dependency 'rspec', '~> 3.1'
|
@@ -16,14 +16,23 @@ require 'stringio'
|
|
16
16
|
# The number of rows to copy in each block. Defaults to 500.
|
17
17
|
# [:index]
|
18
18
|
# The index to traverse in ascending order doing the purge. Rows are read in the order of
|
19
|
-
# the index, which must be a btree index. If not specified,
|
19
|
+
# the index, which must be a btree index. If not specified, An index is chosen automatically
|
20
|
+
# in order of preference:
|
21
|
+
# 1. PRIMARY KEY
|
22
|
+
# 2. First UNIQUE index
|
23
|
+
# 3. First non-UNIQUE index
|
24
|
+
# 4. No index used if no indexes defined.
|
25
|
+
# [:non_traversing]
|
26
|
+
# When true, specifies the table will not be traversed using an index.
|
27
|
+
# This only makes sense if you are deleting everything as you go along, otherwise you'll
|
28
|
+
# be re-scanning skipped rows.
|
20
29
|
# [:reverse]
|
21
30
|
# Traverse the index in reverse order. For example, if your index is on <tt>account_id</tt>,
|
22
31
|
# <tt>timestamp</tt>, this option will move through the rows starting at the highest account
|
23
32
|
# number, then move through timestamps starting with the most recent.
|
24
33
|
# [:first_only]
|
25
|
-
# Traverse only the first column of the index, and do so inclusively using the <tt
|
26
|
-
# instead of the strict <tt
|
34
|
+
# Traverse only the first column of the index, and do so inclusively using the <tt>'>='</tt> operator
|
35
|
+
# instead of the strict <tt>'>'</tt> operator. This is important if the index is not unique and there
|
27
36
|
# are a lot of duplicates. Otherwise the delete could miss rows. Not allowed in copy mode because you'd
|
28
37
|
# be inserting duplicate rows.
|
29
38
|
# [:dry_run]
|
@@ -94,11 +103,12 @@ class CleanSweep::PurgeRunner
|
|
94
103
|
@copy_mode = @target_model && options[:copy_only]
|
95
104
|
|
96
105
|
@table_schema = CleanSweep::TableSchema.new @model,
|
97
|
-
|
98
|
-
|
99
|
-
|
106
|
+
non_traversing: options[:non_traversing],
|
107
|
+
index: options[:index],
|
108
|
+
reverse: options[:reverse],
|
109
|
+
copy_columns: options[:copy_columns],
|
100
110
|
first_only: options[:first_only],
|
101
|
-
dest_model:
|
111
|
+
dest_model: options[:dest_model],
|
102
112
|
dest_columns: options[:dest_columns]
|
103
113
|
|
104
114
|
if (@max_history || @max_repl_lag)
|
@@ -134,7 +144,7 @@ class CleanSweep::PurgeRunner
|
|
134
144
|
def execute_in_batches
|
135
145
|
|
136
146
|
if @dry_run
|
137
|
-
print_queries
|
147
|
+
log :info, print_queries
|
138
148
|
return 0
|
139
149
|
end
|
140
150
|
|
@@ -205,26 +215,6 @@ class CleanSweep::PurgeRunner
|
|
205
215
|
add_method_tracer :sleep
|
206
216
|
add_method_tracer :execute_in_batches
|
207
217
|
|
208
|
-
def print_queries(io)
|
209
|
-
io.puts 'Initial Query:'
|
210
|
-
io.puts format_query(' ', @query.to_sql)
|
211
|
-
rows = @model.connection.select_rows @query.limit(1).to_sql
|
212
|
-
if rows.empty?
|
213
|
-
# Don't have any sample data to use for the sample queries, so use NULL values just
|
214
|
-
# so the query will print out.
|
215
|
-
rows << [nil] * 100
|
216
|
-
end
|
217
|
-
io.puts "Chunk Query:"
|
218
|
-
io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, rows.first).to_sql)
|
219
|
-
if copy_mode?
|
220
|
-
io.puts "Insert Statement:"
|
221
|
-
io.puts format_query(' ', @table_schema.insert_statement(rows))
|
222
|
-
else
|
223
|
-
io.puts "Delete Statement:"
|
224
|
-
io.puts format_query(' ', @table_schema.delete_statement(rows))
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
218
|
private
|
229
219
|
|
230
220
|
def format_query indentation, query
|
@@ -6,19 +6,7 @@ module CleanSweep::PurgeRunner::Logging
|
|
6
6
|
while (@report_interval_start < Time.now - @report_interval) do
|
7
7
|
@report_interval_start += @report_interval
|
8
8
|
end
|
9
|
-
|
10
|
-
elapsed = [1, (Time.now - @start).to_i].max
|
11
|
-
rate = (@total_deleted / elapsed).to_i
|
12
|
-
rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
|
13
|
-
report << "report:"
|
14
|
-
if copy_mode?
|
15
|
-
report << " #{@dry_run ? 'queried' : 'copied'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
|
16
|
-
else
|
17
|
-
report << " #{@dry_run ? 'queried' : 'deleted'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
|
18
|
-
end
|
19
|
-
report << " elapsed: #{'%12s' % format(elapsed)}"
|
20
|
-
report << " rate: #{rate}"
|
21
|
-
log :info, report.join("\n")
|
9
|
+
print_report
|
22
10
|
end
|
23
11
|
end
|
24
12
|
|
@@ -28,6 +16,34 @@ module CleanSweep::PurgeRunner::Logging
|
|
28
16
|
@logger.send level, out
|
29
17
|
end
|
30
18
|
|
19
|
+
def print_queries
|
20
|
+
io = StringIO.new
|
21
|
+
io.puts 'Initial Query:'
|
22
|
+
io.puts format_query(' ', @query.to_sql)
|
23
|
+
io.puts "Chunk Query:"
|
24
|
+
io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, sample_rows.first).to_sql)
|
25
|
+
if copy_mode?
|
26
|
+
io.puts "Insert Statement:"
|
27
|
+
io.puts format_query(' ', @table_schema.insert_statement(sample_rows))
|
28
|
+
else
|
29
|
+
io.puts "Delete Statement:"
|
30
|
+
io.puts format_query(' ', @table_schema.delete_statement(sample_rows))
|
31
|
+
end
|
32
|
+
io.string
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def sample_rows
|
38
|
+
@sample_rows ||= @model.connection.select_rows @query.limit(1).to_sql
|
39
|
+
if @sample_rows.empty?
|
40
|
+
# Don't have any sample data to use for the sample queries, so use NULL values just
|
41
|
+
# so the query will print out.
|
42
|
+
@sample_rows << [nil] * 100
|
43
|
+
end
|
44
|
+
@sample_rows
|
45
|
+
end
|
46
|
+
|
31
47
|
def format(time)
|
32
48
|
format_string = "%H:%M:%S"
|
33
49
|
if (time.to_i > (24 * 60 * 60))
|
@@ -35,4 +51,20 @@ module CleanSweep::PurgeRunner::Logging
|
|
35
51
|
end
|
36
52
|
Time.at(time).strftime(format_string)
|
37
53
|
end
|
38
|
-
|
54
|
+
|
55
|
+
def print_report
|
56
|
+
elapsed = [1, (Time.now - @start).to_i].max
|
57
|
+
rate = (@total_deleted / elapsed).to_i
|
58
|
+
rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
|
59
|
+
report = [ "report:" ]
|
60
|
+
action = case
|
61
|
+
when @dry_run then 'queried'
|
62
|
+
when copy_mode? then 'copied'
|
63
|
+
else 'deleted'
|
64
|
+
end
|
65
|
+
report << " #{action}: #{'%12i' % @total_deleted} #{@model.table_name} records"
|
66
|
+
report << " elapsed: #{'%12s' % format(elapsed)}"
|
67
|
+
report << " rate: #{rate}"
|
68
|
+
log :info, report.join("\n")
|
69
|
+
end
|
70
|
+
end
|
@@ -14,8 +14,8 @@ class CleanSweep::TableSchema
|
|
14
14
|
|
15
15
|
def initialize(model, options={})
|
16
16
|
|
17
|
-
traversing_key_name = options[:
|
18
|
-
ascending = options
|
17
|
+
traversing_key_name = options[:index]
|
18
|
+
ascending = !options[:reverse]
|
19
19
|
first_only = options[:first_only]
|
20
20
|
@model = model
|
21
21
|
@dest_model = options[:dest_model] || @model
|
@@ -26,7 +26,7 @@ class CleanSweep::TableSchema
|
|
26
26
|
@name = @model.table_name
|
27
27
|
|
28
28
|
@columns =
|
29
|
-
(options[:
|
29
|
+
(options[:copy_columns] || []).map do | extra_col_name |
|
30
30
|
CleanSweep::TableSchema::ColumnSchema.new extra_col_name, model
|
31
31
|
end
|
32
32
|
|
@@ -38,11 +38,15 @@ class CleanSweep::TableSchema
|
|
38
38
|
raise "Table #{model.table_name} must have a primary key" unless @primary_key
|
39
39
|
|
40
40
|
@primary_key.add_columns_to @columns
|
41
|
-
if
|
42
|
-
traversing_key_name
|
43
|
-
|
44
|
-
|
45
|
-
|
41
|
+
if !options[:non_traversing]
|
42
|
+
if traversing_key_name
|
43
|
+
traversing_key_name.downcase!
|
44
|
+
raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
|
45
|
+
@traversing_key = key_schemas[traversing_key_name]
|
46
|
+
@traversing_key.add_columns_to @columns
|
47
|
+
else
|
48
|
+
@traversing_key = @primary_key
|
49
|
+
end
|
46
50
|
@traversing_key.ascending = ascending
|
47
51
|
@traversing_key.first_only = first_only
|
48
52
|
end
|
@@ -74,7 +78,7 @@ class CleanSweep::TableSchema
|
|
74
78
|
end
|
75
79
|
|
76
80
|
def initial_scope
|
77
|
-
scope = @model.
|
81
|
+
scope = @model.select(quoted_column_names).from(from_clause)
|
78
82
|
scope = @traversing_key.order(scope) if @traversing_key
|
79
83
|
return scope
|
80
84
|
end
|
data/lib/clean_sweep/version.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
# Defines a table that does not have a primary key but does
|
3
|
+
# have a unique key.
|
4
|
+
class Annotation < ActiveRecord::Base
|
5
|
+
|
6
|
+
def self.create_table
|
7
|
+
connection.execute <<-EOF
|
8
|
+
create temporary table if not exists
|
9
|
+
annotations (
|
10
|
+
`article_id` int(11) NOT NULL,
|
11
|
+
`text` varchar(64),
|
12
|
+
key `index_on_text` (`text`),
|
13
|
+
unique key (`article_id`)
|
14
|
+
)
|
15
|
+
EOF
|
16
|
+
Annotation.delete_all
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
data/spec/factories/comments.rb
CHANGED
@@ -5,7 +5,7 @@ class Comment < ActiveRecord::Base
|
|
5
5
|
create temporary table if not exists
|
6
6
|
comments (
|
7
7
|
`id` int(11) primary key auto_increment,
|
8
|
-
`timestamp`
|
8
|
+
`timestamp` date,
|
9
9
|
`account` int(11),
|
10
10
|
`seen` boolean,
|
11
11
|
key comments_on_account_timestamp(account, timestamp),
|
@@ -19,7 +19,7 @@ end
|
|
19
19
|
|
20
20
|
FactoryGirl.define do
|
21
21
|
factory :comment do | comment |
|
22
|
-
comment.timestamp
|
22
|
+
comment.timestamp Date.new
|
23
23
|
comment.seen false
|
24
24
|
comment.sequence(:account) { | n | (n % 3)* 100 }
|
25
25
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class TableWithPrimaryKey < ActiveRecord::Base
|
2
|
+
|
3
|
+
def self.create_table
|
4
|
+
connection.execute <<-EOF
|
5
|
+
create temporary table if not exists
|
6
|
+
table_with_primary_keys (
|
7
|
+
`pk` int(11) primary key auto_increment,
|
8
|
+
`k1` int(11),
|
9
|
+
`k2` int(11),
|
10
|
+
key key_nonunique (k1),
|
11
|
+
unique key key_unique (k2)
|
12
|
+
)
|
13
|
+
EOF
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
class TableWithUniqueKey < ActiveRecord::Base
|
19
|
+
|
20
|
+
def self.create_table
|
21
|
+
connection.execute <<-EOF
|
22
|
+
create temporary table if not exists
|
23
|
+
table_with_unique_keys (
|
24
|
+
`k1` int(11),
|
25
|
+
`k2` int(11),
|
26
|
+
key key_nonunique (k1),
|
27
|
+
unique key key_unique (k2)
|
28
|
+
)
|
29
|
+
EOF
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
class TableWithRegularKey < ActiveRecord::Base
|
35
|
+
|
36
|
+
def self.create_table
|
37
|
+
connection.execute <<-EOF
|
38
|
+
create temporary table if not exists
|
39
|
+
table_with_regular_keys (
|
40
|
+
`k1` int(11),
|
41
|
+
`k2` int(11),
|
42
|
+
key key_nonunique (k1),
|
43
|
+
key key_extra (k2)
|
44
|
+
)
|
45
|
+
EOF
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
data/spec/purge_runner_spec.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
|
3
|
+
# Time mocking features are available in Rails 4 but not Rails 3 and the Timecop
|
4
|
+
# gem works with both.
|
5
|
+
require 'timecop'
|
6
|
+
|
4
7
|
describe CleanSweep::PurgeRunner do
|
5
8
|
|
6
9
|
context 'PurgeRunner' do
|
7
|
-
include ActiveSupport::Testing::TimeHelpers
|
8
10
|
before do
|
9
|
-
|
11
|
+
Timecop.freeze Time.parse("2014-12-02 13:47:43.000000 -0800")
|
10
12
|
end
|
11
13
|
after do
|
12
|
-
|
14
|
+
Timecop.return
|
13
15
|
end
|
14
16
|
|
15
17
|
context "using comments" do
|
@@ -66,21 +68,20 @@ describe CleanSweep::PurgeRunner do
|
|
66
68
|
it 'prints out the queries in a dry run' do
|
67
69
|
purger = CleanSweep::PurgeRunner.new model: Comment,
|
68
70
|
index: 'comments_on_account_timestamp' do | scope |
|
69
|
-
scope.where('timestamp < ?', 1.week.ago)
|
71
|
+
scope.where('timestamp < ?', 1.week.ago.to_date)
|
70
72
|
end
|
71
|
-
output =
|
72
|
-
|
73
|
-
expect(output.string).to eq <<EOF
|
73
|
+
output = purger.print_queries
|
74
|
+
expect(output).to eq <<EOF
|
74
75
|
Initial Query:
|
75
76
|
SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
|
76
77
|
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
77
|
-
WHERE (timestamp < '2014-11-25
|
78
|
+
WHERE (timestamp < '2014-11-25')
|
78
79
|
ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
|
79
80
|
LIMIT 500
|
80
81
|
Chunk Query:
|
81
82
|
SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
|
82
83
|
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
83
|
-
WHERE (timestamp < '2014-11-25
|
84
|
+
WHERE (timestamp < '2014-11-25') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18'))\n ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
|
84
85
|
LIMIT 500
|
85
86
|
Delete Statement:
|
86
87
|
DELETE
|
@@ -105,7 +106,7 @@ EOF
|
|
105
106
|
end
|
106
107
|
expect(Comment.count).to eq(5)
|
107
108
|
# Only old comments deleted before stopping
|
108
|
-
expect(Comment.where('timestamp >= ?', 4.days.ago).count).to eq(5)
|
109
|
+
expect(Comment.where('timestamp >= ?', 4.days.ago.to_date).count).to eq(5)
|
109
110
|
end
|
110
111
|
it "descends the index" do
|
111
112
|
purger = CleanSweep::PurgeRunner.new model: Comment,
|
data/spec/spec_helper.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
ENV['RACK_ENV'] = 'test'
|
2
2
|
|
3
|
+
require "codeclimate-test-reporter"
|
4
|
+
CodeClimate::TestReporter.start
|
3
5
|
require 'clean_sweep'
|
4
6
|
require 'factory_girl'
|
5
7
|
require 'fileutils'
|
6
8
|
require 'active_record'
|
7
9
|
require 'mysql2'
|
10
|
+
require 'timecop'
|
8
11
|
RSpec.configure do |config|
|
9
12
|
config.include FactoryGirl::Syntax::Methods
|
10
13
|
config.formatter = :progress
|
11
|
-
#config.order = 'random'
|
12
|
-
|
13
14
|
config.before(:suite) do
|
14
15
|
FactoryGirl.find_definitions
|
15
16
|
end
|
@@ -20,7 +21,7 @@ logdir = File.expand_path "../../log",__FILE__
|
|
20
21
|
FileUtils.mkdir_p logdir
|
21
22
|
logfile = File.open(File.join(logdir, "test.log"), "w+")
|
22
23
|
ActiveRecord::Base.logger = Logger.new(logfile)
|
23
|
-
|
24
|
+
Time.zone = 'America/Los_Angeles'
|
24
25
|
database = {
|
25
26
|
encoding: 'utf8',
|
26
27
|
adapter: 'mysql2',
|
@@ -34,3 +35,4 @@ connection.query "CREATE DATABASE IF NOT EXISTS #{db_name}"
|
|
34
35
|
database[:database] = db_name
|
35
36
|
|
36
37
|
ActiveRecord::Base.establish_connection(database)
|
38
|
+
|
data/spec/table_schema_spec.rb
CHANGED
@@ -2,110 +2,140 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe CleanSweep::TableSchema do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
it 'should read comments' do
|
13
|
-
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
14
|
-
expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
|
5
|
+
context "using sample tables" do
|
6
|
+
it 'should pick the primary key' do
|
7
|
+
TableWithPrimaryKey.create_table
|
8
|
+
schema = CleanSweep::TableSchema.new TableWithPrimaryKey
|
9
|
+
expect(schema.primary_key.name).to eq "primary"
|
10
|
+
expect(schema.traversing_key.name).to eq "primary"
|
15
11
|
end
|
16
12
|
|
17
|
-
it
|
18
|
-
|
19
|
-
|
20
|
-
|
13
|
+
it "should identify unique key as primary key" do
|
14
|
+
TableWithUniqueKey.create_table
|
15
|
+
schema = CleanSweep::TableSchema.new TableWithUniqueKey
|
16
|
+
expect(schema.primary_key.name).to eq("key_unique")
|
17
|
+
expect(schema.traversing_key.name).to eq "key_unique"
|
21
18
|
end
|
22
19
|
|
23
|
-
it
|
24
|
-
|
20
|
+
it "should skip the traversing key if non_traversing is true" do
|
21
|
+
TableWithUniqueKey.create_table
|
22
|
+
schema = CleanSweep::TableSchema.new TableWithUniqueKey, non_traversing: true
|
23
|
+
expect(schema.primary_key.name).to eq("key_unique")
|
24
|
+
expect(schema.traversing_key).to be_nil
|
25
25
|
end
|
26
|
-
|
27
|
-
|
28
|
-
expect(
|
26
|
+
it "should error out if there is no unique key at all" do
|
27
|
+
TableWithRegularKey.create_table
|
28
|
+
expect(->{CleanSweep::TableSchema.new TableWithRegularKey}).to raise_exception(RuntimeError, 'Table table_with_regular_keys must have a primary key')
|
29
29
|
end
|
30
30
|
|
31
|
+
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-02 01:13:25'),(1002,2,'2014-12-02 00:13:25'),(1005,5,'2014-12-01 23:13:25')")
|
33
|
+
context "on comments" do
|
34
|
+
before do
|
35
|
+
Comment.create_table
|
36
36
|
end
|
37
|
-
end
|
38
37
|
|
39
|
-
|
38
|
+
context "using ascending account, timestamp index" do
|
39
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', ascending: true }
|
40
40
|
|
41
|
-
|
41
|
+
it 'should read comments' do
|
42
|
+
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
43
|
+
expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
|
44
|
+
end
|
42
45
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
46
|
+
it 'should produce an ascending chunk clause' do
|
47
|
+
rows = account_and_timestamp_rows
|
48
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
49
|
+
.to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-11-29'))")
|
50
|
+
end
|
48
51
|
|
52
|
+
it 'should produce all select columns' do
|
53
|
+
expect(schema.column_names).to eq([:id, :account, :timestamp])
|
54
|
+
end
|
49
55
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
56
|
+
it 'should produce the ascending order clause' do
|
57
|
+
expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
it 'should produce an insert statement' do
|
62
|
+
schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp'
|
63
|
+
rows = account_and_timestamp_rows
|
64
|
+
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-01'),(1002,2,'2014-11-30'),(1005,5,'2014-11-29')")
|
65
|
+
end
|
54
66
|
end
|
55
67
|
|
56
|
-
|
68
|
+
context "using descending account, timestamp index" do
|
69
|
+
|
70
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', reverse: true }
|
71
|
+
|
72
|
+
it 'should produce a descending where clause' do
|
73
|
+
rows = account_and_timestamp_rows
|
74
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
75
|
+
.to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-11-29'))")
|
76
|
+
end
|
77
|
+
|
57
78
|
|
58
|
-
|
59
|
-
|
79
|
+
it 'should produce the descending order clause' do
|
80
|
+
rows = account_and_timestamp_rows
|
81
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
82
|
+
.to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
|
83
|
+
end
|
60
84
|
|
61
|
-
it 'should select all the rows' do
|
62
|
-
expect(schema.column_names).to eq([:id, :account, :timestamp])
|
63
85
|
end
|
64
86
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
87
|
+
context "using account, timestamp index first column only" do
|
88
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', first_only: true }
|
89
|
+
|
90
|
+
it 'should select all the rows' do
|
91
|
+
expect(schema.column_names).to eq([:id, :account, :timestamp])
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should only query using the first column of the index' do
|
95
|
+
rows = account_and_timestamp_rows
|
96
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
97
|
+
.to include(" (`comments`.`account` >= 5) ")
|
98
|
+
|
99
|
+
end
|
69
100
|
|
70
101
|
end
|
71
102
|
|
72
|
-
|
103
|
+
it 'should not care about case' do
|
104
|
+
CleanSweep::TableSchema.new Comment, index: 'primary'
|
105
|
+
end
|
73
106
|
|
74
|
-
|
75
|
-
|
76
|
-
|
107
|
+
it 'should work without a descending index' do
|
108
|
+
schema = CleanSweep::TableSchema.new Comment, non_traversing: true
|
109
|
+
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
110
|
+
expect(schema.traversing_key).to be_nil
|
111
|
+
end
|
77
112
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
end
|
113
|
+
it 'should produce minimal select columns' do
|
114
|
+
schema = CleanSweep::TableSchema.new Comment, index: 'PRIMARY'
|
115
|
+
expect(schema.column_names).to eq([:id])
|
116
|
+
end
|
83
117
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
118
|
+
it 'should produce the from clause with an index' do
|
119
|
+
schema = CleanSweep::TableSchema.new Comment, index:'comments_on_timestamp'
|
120
|
+
expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
|
121
|
+
end
|
88
122
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
123
|
+
it 'should include additional columns' do
|
124
|
+
schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp', copy_columns: %w[seen id]
|
125
|
+
expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
|
126
|
+
rows = account_and_timestamp_rows
|
127
|
+
rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
|
128
|
+
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-01'),(1,1002,2,'2014-11-30'),(1,1005,5,'2014-11-29')")
|
93
129
|
|
94
|
-
|
95
|
-
schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp', extra_columns: %w[seen id]
|
96
|
-
expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
|
97
|
-
rows = account_and_timestamp_rows
|
98
|
-
rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
|
99
|
-
expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-02 01:13:25'),(1,1002,2,'2014-12-02 00:13:25'),(1,1005,5,'2014-12-01 23:13:25')")
|
130
|
+
end
|
100
131
|
|
101
132
|
end
|
102
133
|
|
103
|
-
|
104
134
|
def account_and_timestamp_rows
|
105
135
|
rows = []
|
106
|
-
t =
|
136
|
+
t = Date.parse '2014-12-01'
|
107
137
|
rows << [1001, 5, t]
|
108
|
-
rows << [1002, 2, t - 1
|
109
|
-
rows << [1005, 5, t - 2
|
138
|
+
rows << [1002, 2, t - 1]
|
139
|
+
rows << [1005, 5, t - 2]
|
110
140
|
end
|
111
141
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cleansweep
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Kayser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: timecop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.7.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.7.1
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: bundler
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -146,8 +160,10 @@ extensions: []
|
|
146
160
|
extra_rdoc_files: []
|
147
161
|
files:
|
148
162
|
- ".gitignore"
|
163
|
+
- ".travis.yml"
|
149
164
|
- CHANGES.md
|
150
165
|
- Gemfile
|
166
|
+
- Gemfile.lock
|
151
167
|
- LICENSE.txt
|
152
168
|
- README.md
|
153
169
|
- Rakefile
|
@@ -162,8 +178,10 @@ files:
|
|
162
178
|
- lib/clean_sweep/table_schema/index_schema.rb
|
163
179
|
- lib/clean_sweep/version.rb
|
164
180
|
- lib/cleansweep.rb
|
181
|
+
- spec/factories/annotations.rb
|
165
182
|
- spec/factories/books.rb
|
166
183
|
- spec/factories/comments.rb
|
184
|
+
- spec/factories/tables.rb
|
167
185
|
- spec/purge_runner_spec.rb
|
168
186
|
- spec/spec_helper.rb
|
169
187
|
- spec/table_schema_spec.rb
|
@@ -192,8 +210,10 @@ signing_key:
|
|
192
210
|
specification_version: 4
|
193
211
|
summary: Utility to purge or archive rows in mysql tables
|
194
212
|
test_files:
|
213
|
+
- spec/factories/annotations.rb
|
195
214
|
- spec/factories/books.rb
|
196
215
|
- spec/factories/comments.rb
|
216
|
+
- spec/factories/tables.rb
|
197
217
|
- spec/purge_runner_spec.rb
|
198
218
|
- spec/spec_helper.rb
|
199
219
|
- spec/table_schema_spec.rb
|