cleansweep 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -1
- data/README.md +61 -3
- data/lib/clean_sweep/purge_runner.rb +2 -2
- data/lib/clean_sweep/purge_runner/mysql_status.rb +2 -2
- data/lib/clean_sweep/table_schema.rb +9 -4
- data/lib/clean_sweep/table_schema/index_schema.rb +7 -2
- data/lib/clean_sweep/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ceb5f4b259349242b4a2c4f11854bb1182b2015a
|
4
|
+
data.tar.gz: 4e2292c3547793a58f5f69599241f595bfed9358
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b695e4a7a553ebedb460f20ec9dea0a12b7f3012ec62d0b9127ae27f299458d296beffb7b395069fe09f570c084a0f6f2b4df424fa04a3f74b1f34fde401fe39
|
7
|
+
data.tar.gz: fde7d9b0ba62dbff94610402472144873e5df4d54a70dcb3545a4c929944be54adbd8ff9aad2d664ff518390bc56529914f93cff9433ecd408b2521d572c37a6
|
data/CHANGES.md
CHANGED
@@ -8,4 +8,8 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
|
|
8
8
|
|
9
9
|
* Changed destination options so you can delete from a different table.
|
10
10
|
* Added `dest_columns` option as a map of column names in the source to column names in the destination.
|
11
|
-
* More testing and bug fixing in real environments
|
11
|
+
* More testing and bug fixing in real environments
|
12
|
+
|
13
|
+
### Version 1.0.3
|
14
|
+
* Small bug in instrumentation and target model reference
|
15
|
+
* Support first unique index as primary when primary key not found
|
data/README.md
CHANGED
@@ -77,7 +77,8 @@ statements used:
|
|
77
77
|
Chunk Query:
|
78
78
|
SELECT `id`,`account`,`timestamp`
|
79
79
|
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
80
|
-
WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))
|
80
|
+
WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))
|
81
|
+
ORDER BY `account` ASC,`timestamp` ASC
|
81
82
|
LIMIT 500
|
82
83
|
Delete Statement:
|
83
84
|
DELETE
|
@@ -153,6 +154,46 @@ tables that referenced those account ids. To do that, specify a
|
|
153
154
|
the delete statement on the destination table without removing rows
|
154
155
|
from the source table.
|
155
156
|
|
157
|
+
Here's an example:
|
158
|
+
|
159
|
+
```sql
|
160
|
+
create temporary table expired_metrics (
|
161
|
+
metric_id int,
|
162
|
+
account_id int,
|
163
|
+
primary key (account_id, metric_id)
|
164
|
+
EOF
|
165
|
+
```
|
166
|
+
Then run a job to pull account_id, metric_id into the expired metrics table:
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
copier = CleanSweep::PurgeRunner.new index: 'index_on_metric_account_id',
|
170
|
+
model: AccountMetric,
|
171
|
+
dest_model: ExpiredMetric,
|
172
|
+
copy_only: true) do | model |
|
173
|
+
model.where("last_used_at < ?)", expiration_date)
|
174
|
+
end
|
175
|
+
copier.execute_in_batches
|
176
|
+
```
|
177
|
+
|
178
|
+
Now create as many jobs as you need for the tables which refer to these metrics:
|
179
|
+
|
180
|
+
```ruby
|
181
|
+
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
182
|
+
index: 'PRIMARY',
|
183
|
+
dest_model: Metric,
|
184
|
+
dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
|
185
|
+
|
186
|
+
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
187
|
+
index: 'PRIMARY',
|
188
|
+
dest_model: ChartMetric).execute_in_batches
|
189
|
+
|
190
|
+
CleanSweep::PurgeRunner.new(model: ExpiredMetric,
|
191
|
+
index: 'PRIMARY',
|
192
|
+
dest_model: SystemMetric).execute_in_batches
|
193
|
+
```
|
194
|
+
|
195
|
+
These will delete the expired metrics from all the tables that refer to them.
|
196
|
+
|
156
197
|
### Watching the history list and replication lag
|
157
198
|
|
158
199
|
You can enter thresholds for the history list size and replication lag
|
@@ -196,12 +237,29 @@ There are a number of other options you can use to tune the script.
|
|
196
237
|
For details look at the [API on the `PurgeRunner`
|
197
238
|
class](http://bkayser.github.io/cleansweep/rdoc/CleanSweep/PurgeRunner.html)
|
198
239
|
|
199
|
-
###
|
240
|
+
### New Relic integration
|
200
241
|
|
201
242
|
The script requires the [New Relic](http://github.com/newrelic/rpm)
|
202
243
|
gem. It won't impact anyting if you don't have a New Relic account to
|
203
244
|
report to, but if you do use New Relic it is configured to show you
|
204
|
-
detailed metrics.
|
245
|
+
detailed metrics.
|
246
|
+
|
247
|
+
In order to see the data in New Relic your purge must be identified as
|
248
|
+
a background transaction. If you are running in Resque or DelayedJob,
|
249
|
+
it will automatically be tagged as such, but if you are just invoking
|
250
|
+
your purge directly, you'll need to tag it as a background
|
251
|
+
transaction. The easy way to do that is shown in this example:
|
252
|
+
|
253
|
+
```ruby
|
254
|
+
class Purge
|
255
|
+
include NewRelic::Agent::Instrumentation::ControllerInstrumentation
|
256
|
+
def run()
|
257
|
+
...
|
258
|
+
end
|
259
|
+
add_transaction_tracer :run
|
260
|
+
end
|
261
|
+
```
|
262
|
+
Also, I recommend turning off transaction traces for long
|
205
263
|
purge jobs to reduce your memory footprint.
|
206
264
|
|
207
265
|
## Testing
|
@@ -171,8 +171,8 @@ class CleanSweep::PurgeRunner
|
|
171
171
|
statement = @table_schema.delete_statement(rows)
|
172
172
|
end
|
173
173
|
log :debug, statement if @logger.level == Logger::DEBUG
|
174
|
-
chunk_deleted = NewRelic::Agent.with_database_metric_name(@target_model, metric_op_name) do
|
175
|
-
@model.connection.update statement
|
174
|
+
chunk_deleted = NewRelic::Agent.with_database_metric_name((@target_model||@model), metric_op_name) do
|
175
|
+
(@target_model||@model).connection.update statement
|
176
176
|
end
|
177
177
|
|
178
178
|
@total_deleted += chunk_deleted
|
@@ -16,7 +16,7 @@ class CleanSweep::PurgeRunner::MysqlStatus
|
|
16
16
|
def check!
|
17
17
|
return if Time.now - @check_period < @last_check
|
18
18
|
while (v = get_violations).any? do
|
19
|
-
@logger.warn("pausing
|
19
|
+
@logger.warn("pausing until threshold violations clear (#{v.to_a.map{ |key, value| "#{key} = #{value}"}.join(", ")})")
|
20
20
|
@paused = true
|
21
21
|
pause 5.minutes
|
22
22
|
end
|
@@ -28,7 +28,7 @@ class CleanSweep::PurgeRunner::MysqlStatus
|
|
28
28
|
violations = {}
|
29
29
|
if @max_history
|
30
30
|
current = get_history_length
|
31
|
-
violations["history length"] = current if threshold(@max_history) < current
|
31
|
+
violations["history length"] = "#{(current/1_000_000.0)} m" if threshold(@max_history) < current
|
32
32
|
end
|
33
33
|
if @max_replication_lag
|
34
34
|
current = get_replication_lag
|
@@ -34,13 +34,13 @@ class CleanSweep::TableSchema
|
|
34
34
|
|
35
35
|
# Primary key only supported, but we could probably get around this by adding
|
36
36
|
# all columns as 'primary key columns'
|
37
|
-
|
37
|
+
@primary_key = find_primary_key(key_schemas)
|
38
|
+
raise "Table #{model.table_name} must have a primary key" unless @primary_key
|
38
39
|
|
39
|
-
@primary_key = key_schemas['primary']
|
40
40
|
@primary_key.add_columns_to @columns
|
41
41
|
if traversing_key_name
|
42
42
|
traversing_key_name.downcase!
|
43
|
-
raise "BTREE Index #{traversing_key_name} not found" unless key_schemas.include? traversing_key_name
|
43
|
+
raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
|
44
44
|
@traversing_key = key_schemas[traversing_key_name]
|
45
45
|
@traversing_key.add_columns_to @columns
|
46
46
|
@traversing_key.ascending = ascending
|
@@ -123,13 +123,18 @@ class CleanSweep::TableSchema
|
|
123
123
|
column_details.each do | col |
|
124
124
|
key_name = col[2].downcase
|
125
125
|
col_name = col[4].downcase
|
126
|
+
unique = col[1] != 1
|
126
127
|
type = col[10]
|
127
128
|
next if key_name != 'PRIMARY' && type != 'BTREE' # Only BTREE indexes supported for traversing
|
128
|
-
indexes[key_name] ||= IndexSchema.new key_name, @model
|
129
|
+
indexes[key_name] ||= IndexSchema.new key_name, @model, unique
|
129
130
|
indexes[key_name] << col_name
|
130
131
|
end
|
131
132
|
return indexes
|
132
133
|
end
|
133
134
|
|
135
|
+
def find_primary_key(indexes)
|
136
|
+
indexes['primary'] || indexes.values.find { | index_schema | index_schema.unique? }
|
137
|
+
end
|
138
|
+
|
134
139
|
end
|
135
140
|
|
@@ -1,11 +1,12 @@
|
|
1
|
-
class CleanSweep::TableSchema::IndexSchema
|
1
|
+
class CleanSweep::TableSchema::IndexSchema
|
2
2
|
|
3
3
|
attr_accessor :columns, :name, :model, :ascending, :first_only, :dest_model
|
4
4
|
|
5
|
-
def initialize name, model
|
5
|
+
def initialize name, model, unique = false
|
6
6
|
@model = model
|
7
7
|
@columns = []
|
8
8
|
@name = name
|
9
|
+
@unique = unique
|
9
10
|
end
|
10
11
|
|
11
12
|
# Add a column
|
@@ -13,6 +14,10 @@ class CleanSweep::TableSchema::IndexSchema < Struct.new :name, :model, :ascendin
|
|
13
14
|
@columns << CleanSweep::TableSchema::ColumnSchema.new(col_name, model)
|
14
15
|
end
|
15
16
|
|
17
|
+
def unique?
|
18
|
+
@unique
|
19
|
+
end
|
20
|
+
|
16
21
|
# Take columns referenced by this index and add them to the list if they
|
17
22
|
# are not present. Record their position in the list because the position will
|
18
23
|
# be where they are located in a row of values passed in later to #scope_to_next_chunk
|
data/lib/clean_sweep/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cleansweep
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Kayser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|