cleansweep 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -1
- data/README.md +61 -3
- data/lib/clean_sweep/purge_runner.rb +2 -2
- data/lib/clean_sweep/purge_runner/mysql_status.rb +2 -2
- data/lib/clean_sweep/table_schema.rb +9 -4
- data/lib/clean_sweep/table_schema/index_schema.rb +7 -2
- data/lib/clean_sweep/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ceb5f4b259349242b4a2c4f11854bb1182b2015a
|
4
|
+
data.tar.gz: 4e2292c3547793a58f5f69599241f595bfed9358
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b695e4a7a553ebedb460f20ec9dea0a12b7f3012ec62d0b9127ae27f299458d296beffb7b395069fe09f570c084a0f6f2b4df424fa04a3f74b1f34fde401fe39
|
7
|
+
data.tar.gz: fde7d9b0ba62dbff94610402472144873e5df4d54a70dcb3545a4c929944be54adbd8ff9aad2d664ff518390bc56529914f93cff9433ecd408b2521d572c37a6
|
data/CHANGES.md
CHANGED
@@ -8,4 +8,8 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
|
|
8
8
|
|
9
9
|
* Changed destination options so you can delete from a different table.
|
10
10
|
* Added `dest_columns` option as a map of column names in the source to column names in the destination.
|
11
|
-
* More testing and bug fixing in real environments
|
11
|
+
* More testing and bug fixing in real environments
|
12
|
+
|
13
|
+
### Version 1.0.3
|
14
|
+
* Small bug in instrumentation and target model reference
|
15
|
+
* Support first unique index as primary when primary key not found
|
data/README.md
CHANGED
@@ -77,7 +77,8 @@ statements used:
|
|
77
77
|
Chunk Query:
|
78
78
|
SELECT `id`,`account`,`timestamp`
|
79
79
|
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
80
|
-
WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))
|
80
|
+
WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))
|
81
|
+
ORDER BY `account` ASC,`timestamp` ASC
|
81
82
|
LIMIT 500
|
82
83
|
Delete Statement:
|
83
84
|
DELETE
|
@@ -153,6 +154,46 @@ tables that referenced those account ids. To do that, specify a
|
|
153
154
|
the delete statement on the destination table without removing rows
|
154
155
|
from the source table.
|
155
156
|
|
157
|
+
Here's an example:
|
158
|
+
|
159
|
+
```sql
|
160
|
+
create temporary table expired_metrics (
|
161
|
+
metric_id int,
|
162
|
+
account_id int,
|
163
|
+
primary key (account_id, metric_id)
|
164
|
+
EOF
|
165
|
+
```
|
166
|
+
Then run a job to pull account_id, metric_id into the expired metrics table:
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
copier = CleanSweep::PurgeRunner.new index: 'index_on_metric_account_id',
|
170
|
+
model: AccountMetric,
|
171
|
+
dest_model: ExpiredMetric,
|
172
|
+
copy_only: true) do | model |
|
173
|
+
model.where("last_used_at < ?)", expiration_date)
|
174
|
+
end
|
175
|
+
copier.execute_in_batches
|
176
|
+
```
|
177
|
+
|
178
|
+
Now create as many jobs as you need for the tables which refer to these metrics:
|
179
|
+
|
180
|
+
```ruby
|
181
|
+
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
182
|
+
index: 'PRIMARY',
|
183
|
+
dest_model: Metric,
|
184
|
+
dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
|
185
|
+
|
186
|
+
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
187
|
+
index: 'PRIMARY',
|
188
|
+
dest_model: ChartMetric).execute_in_batches
|
189
|
+
|
190
|
+
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
191
|
+
index: 'PRIMARY',
|
192
|
+
dest_model: SystemMetric).execute_in_batches
|
193
|
+
```
|
194
|
+
|
195
|
+
These will delete the expired metrics from all the tables that refer to them.
|
196
|
+
|
156
197
|
### Watching the history list and replication lag
|
157
198
|
|
158
199
|
You can enter thresholds for the history list size and replication lag
|
@@ -196,12 +237,29 @@ There are a number of other options you can use to tune the script.
|
|
196
237
|
For details look at the [API on the `PurgeRunner`
|
197
238
|
class](http://bkayser.github.io/cleansweep/rdoc/CleanSweep/PurgeRunner.html)
|
198
239
|
|
199
|
-
###
|
240
|
+
### New Relic integration
|
200
241
|
|
201
242
|
The script requires the [New Relic](http://github.com/newrelic/rpm)
|
202
243
|
gem. It won't impact anyting if you don't have a New Relic account to
|
203
244
|
report to, but if you do use New Relic it is configured to show you
|
204
|
-
detailed metrics.
|
245
|
+
detailed metrics.
|
246
|
+
|
247
|
+
In order to see the data in New Relic your purge must be identified as
|
248
|
+
a background transaction. If you are running in Resque or DelayedJob,
|
249
|
+
it will automatically be tagged as such, but if you are just invoking
|
250
|
+
your purge directly, you'll need to tag it as a background
|
251
|
+
transaction. The easy way to do that is shown in this example:
|
252
|
+
|
253
|
+
```ruby
|
254
|
+
class Purge
|
255
|
+
include NewRelic::Agent::Instrumentation::ControllerInstrumentation
|
256
|
+
def run()
|
257
|
+
...
|
258
|
+
end
|
259
|
+
add_transaction_tracer :run
|
260
|
+
end
|
261
|
+
```
|
262
|
+
Also, I recommend turning off transaction traces for long
|
205
263
|
purge jobs to reduce your memory footprint.
|
206
264
|
|
207
265
|
## Testing
|
@@ -171,8 +171,8 @@ class CleanSweep::PurgeRunner
|
|
171
171
|
statement = @table_schema.delete_statement(rows)
|
172
172
|
end
|
173
173
|
log :debug, statement if @logger.level == Logger::DEBUG
|
174
|
-
chunk_deleted = NewRelic::Agent.with_database_metric_name(@target_model, metric_op_name) do
|
175
|
-
@model.connection.update statement
|
174
|
+
chunk_deleted = NewRelic::Agent.with_database_metric_name((@target_model||@model), metric_op_name) do
|
175
|
+
(@target_model||@model).connection.update statement
|
176
176
|
end
|
177
177
|
|
178
178
|
@total_deleted += chunk_deleted
|
@@ -16,7 +16,7 @@ class CleanSweep::PurgeRunner::MysqlStatus
|
|
16
16
|
def check!
|
17
17
|
return if Time.now - @check_period < @last_check
|
18
18
|
while (v = get_violations).any? do
|
19
|
-
@logger.warn("pausing
|
19
|
+
@logger.warn("pausing until threshold violations clear (#{v.to_a.map{ |key, value| "#{key} = #{value}"}.join(", ")})")
|
20
20
|
@paused = true
|
21
21
|
pause 5.minutes
|
22
22
|
end
|
@@ -28,7 +28,7 @@ class CleanSweep::PurgeRunner::MysqlStatus
|
|
28
28
|
violations = {}
|
29
29
|
if @max_history
|
30
30
|
current = get_history_length
|
31
|
-
violations["history length"] = current if threshold(@max_history) < current
|
31
|
+
violations["history length"] = "#{(current/1_000_000.0)} m" if threshold(@max_history) < current
|
32
32
|
end
|
33
33
|
if @max_replication_lag
|
34
34
|
current = get_replication_lag
|
@@ -34,13 +34,13 @@ class CleanSweep::TableSchema
|
|
34
34
|
|
35
35
|
# Primary key only supported, but we could probably get around this by adding
|
36
36
|
# all columns as 'primary key columns'
|
37
|
-
|
37
|
+
@primary_key = find_primary_key(key_schemas)
|
38
|
+
raise "Table #{model.table_name} must have a primary key" unless @primary_key
|
38
39
|
|
39
|
-
@primary_key = key_schemas['primary']
|
40
40
|
@primary_key.add_columns_to @columns
|
41
41
|
if traversing_key_name
|
42
42
|
traversing_key_name.downcase!
|
43
|
-
raise "BTREE Index #{traversing_key_name} not found" unless key_schemas.include? traversing_key_name
|
43
|
+
raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
|
44
44
|
@traversing_key = key_schemas[traversing_key_name]
|
45
45
|
@traversing_key.add_columns_to @columns
|
46
46
|
@traversing_key.ascending = ascending
|
@@ -123,13 +123,18 @@ class CleanSweep::TableSchema
|
|
123
123
|
column_details.each do | col |
|
124
124
|
key_name = col[2].downcase
|
125
125
|
col_name = col[4].downcase
|
126
|
+
unique = col[1] != 1
|
126
127
|
type = col[10]
|
127
128
|
next if key_name != 'PRIMARY' && type != 'BTREE' # Only BTREE indexes supported for traversing
|
128
|
-
indexes[key_name] ||= IndexSchema.new key_name, @model
|
129
|
+
indexes[key_name] ||= IndexSchema.new key_name, @model, unique
|
129
130
|
indexes[key_name] << col_name
|
130
131
|
end
|
131
132
|
return indexes
|
132
133
|
end
|
133
134
|
|
135
|
+
def find_primary_key(indexes)
|
136
|
+
indexes['primary'] || indexes.values.find { | index_schema | index_schema.unique? }
|
137
|
+
end
|
138
|
+
|
134
139
|
end
|
135
140
|
|
@@ -1,11 +1,12 @@
|
|
1
|
-
class CleanSweep::TableSchema::IndexSchema
|
1
|
+
class CleanSweep::TableSchema::IndexSchema
|
2
2
|
|
3
3
|
attr_accessor :columns, :name, :model, :ascending, :first_only, :dest_model
|
4
4
|
|
5
|
-
def initialize name, model
|
5
|
+
def initialize name, model, unique = false
|
6
6
|
@model = model
|
7
7
|
@columns = []
|
8
8
|
@name = name
|
9
|
+
@unique = unique
|
9
10
|
end
|
10
11
|
|
11
12
|
# Add a column
|
@@ -13,6 +14,10 @@ class CleanSweep::TableSchema::IndexSchema < Struct.new :name, :model, :ascendin
|
|
13
14
|
@columns << CleanSweep::TableSchema::ColumnSchema.new(col_name, model)
|
14
15
|
end
|
15
16
|
|
17
|
+
def unique?
|
18
|
+
@unique
|
19
|
+
end
|
20
|
+
|
16
21
|
# Take columns referenced by this index and add them to the list if they
|
17
22
|
# are not present. Record their position in the list because the position will
|
18
23
|
# be where they are located in a row of values passed in later to #scope_to_next_chunk
|
data/lib/clean_sweep/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cleansweep
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Kayser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|