cleansweep 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 171c5ce6b972df17162909a1538cf8ecc867e347
4
- data.tar.gz: 6d91698f6759a599e03683287ea230230d99a475
3
+ metadata.gz: ceb5f4b259349242b4a2c4f11854bb1182b2015a
4
+ data.tar.gz: 4e2292c3547793a58f5f69599241f595bfed9358
5
5
  SHA512:
6
- metadata.gz: 5373eb62b1acbf097681efde6a1ad08ad94c574ee5676546e0c798ba91d93a8a28efc97fe8f4ce95e8b5f0ee8f1e4b12e340c6cdf0a78e901589ecbc85f4fee5
7
- data.tar.gz: 199c96ba5a90457bd6d3310b59a293de1dd185d84b009d1f849ce13637210415606ba29fd704312186c8767aaaf9990e902bfcbc7d4e561f28f200112ef83f0e
6
+ metadata.gz: b695e4a7a553ebedb460f20ec9dea0a12b7f3012ec62d0b9127ae27f299458d296beffb7b395069fe09f570c084a0f6f2b4df424fa04a3f74b1f34fde401fe39
7
+ data.tar.gz: fde7d9b0ba62dbff94610402472144873e5df4d54a70dcb3545a4c929944be54adbd8ff9aad2d664ff518390bc56529914f93cff9433ecd408b2521d572c37a6
data/CHANGES.md CHANGED
@@ -8,4 +8,8 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
8
8
 
9
9
  * Changed destination options so you can delete from a different table.
10
10
  * Added `dest_columns` option as a map of column names in the source to column names in the destination.
11
- * More testing and bug fixing in real environments
11
+ * More testing and bug fixing in real environments
12
+
13
+ ### Version 1.0.3
14
+ * Small bug in instrumentation and target model reference
15
+ * Support first unique index as primary when primary key not found
data/README.md CHANGED
@@ -77,7 +77,8 @@ statements used:
77
77
  Chunk Query:
78
78
  SELECT `id`,`account`,`timestamp`
79
79
  FROM `comments` FORCE INDEX(comments_on_account_timestamp)
80
- WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))\n ORDER BY `account` ASC,`timestamp` ASC
80
+ WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))
81
+ ORDER BY `account` ASC,`timestamp` ASC
81
82
  LIMIT 500
82
83
  Delete Statement:
83
84
  DELETE
@@ -153,6 +154,46 @@ tables that referenced those account ids. To do that, specify a
153
154
  the delete statement on the destination table without removing rows
154
155
  from the source table.
155
156
 
157
+ Here's an example:
158
+
159
+ ```sql
160
+ create temporary table expired_metrics (
161
+ metric_id int,
162
+ account_id int,
163
+ primary key (account_id, metric_id)
164
+ EOF
165
+ ```
166
+ Then run a job to pull account_id, metric_id into the expired metrics table:
167
+
168
+ ```ruby
169
+ copier = CleanSweep::PurgeRunner.new index: 'index_on_metric_account_id',
170
+ model: AccountMetric,
171
+ dest_model: ExpiredMetric,
172
+ copy_only: true) do | model |
173
+ model.where("last_used_at < ?)", expiration_date)
174
+ end
175
+ copier.execute_in_batches
176
+ ```
177
+
178
+ Now create as many jobs as you need for the tables which refer to these metrics:
179
+
180
+ ```ruby
181
+ CleanSweep::PurgeRunner.new(model: ExpiredMetric,
182
+ index: 'PRIMARY',
183
+ dest_model: Metric,
184
+ dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
185
+
186
+ CleanSweep::PurgeRunner.new(model: ExpiredMetric,
187
+ index: 'PRIMARY',
188
+ dest_model: ChartMetric).execute_in_batches
189
+
190
+ CleanSweep::PurgeRunner.new(model: ExpiredMetric,
191
+ index: 'PRIMARY',
192
+ dest_model: SystemMetric).execute_in_batches
193
+ ```
194
+
195
+ These will delete the expired metrics from all the tables that refer to them.
196
+
156
197
  ### Watching the history list and replication lag
157
198
 
158
199
  You can enter thresholds for the history list size and replication lag
@@ -196,12 +237,29 @@ There are a number of other options you can use to tune the script.
196
237
  For details look at the [API on the `PurgeRunner`
197
238
  class](http://bkayser.github.io/cleansweep/rdoc/CleanSweep/PurgeRunner.html)
198
239
 
199
- ### NewRelic integration
240
+ ### New Relic integration
200
241
 
201
242
  The script requires the [New Relic](http://github.com/newrelic/rpm)
202
243
  gem. It won't impact anyting if you don't have a New Relic account to
203
244
  report to, but if you do use New Relic it is configured to show you
204
- detailed metrics. I recommend turning off transaction traces for long
245
+ detailed metrics.
246
+
247
+ In order to see the data in New Relic your purge must be identified as
248
+ a background transaction. If you are running in Resque or DelayedJob,
249
+ it will automatically be tagged as such, but if you are just invoking
250
+ your purge directly, you'll need to tag it as a background
251
+ transaction. The easy way to do that is shown in this example:
252
+
253
+ ```ruby
254
+ class Purge
255
+ include NewRelic::Agent::Instrumentation::ControllerInstrumentation
256
+ def run()
257
+ ...
258
+ end
259
+ add_transaction_tracer :run
260
+ end
261
+ ```
262
+ Also, I recommend turning off transaction traces for long
205
263
  purge jobs to reduce your memory footprint.
206
264
 
207
265
  ## Testing
@@ -171,8 +171,8 @@ class CleanSweep::PurgeRunner
171
171
  statement = @table_schema.delete_statement(rows)
172
172
  end
173
173
  log :debug, statement if @logger.level == Logger::DEBUG
174
- chunk_deleted = NewRelic::Agent.with_database_metric_name(@target_model, metric_op_name) do
175
- @model.connection.update statement
174
+ chunk_deleted = NewRelic::Agent.with_database_metric_name((@target_model||@model), metric_op_name) do
175
+ (@target_model||@model).connection.update statement
176
176
  end
177
177
 
178
178
  @total_deleted += chunk_deleted
@@ -16,7 +16,7 @@ class CleanSweep::PurgeRunner::MysqlStatus
16
16
  def check!
17
17
  return if Time.now - @check_period < @last_check
18
18
  while (v = get_violations).any? do
19
- @logger.warn("pausing 5 minutes (#{v.to_a.map{ |key, value| "#{key} = #{value}"}.join(", ")})") if !paused?
19
+ @logger.warn("pausing until threshold violations clear (#{v.to_a.map{ |key, value| "#{key} = #{value}"}.join(", ")})")
20
20
  @paused = true
21
21
  pause 5.minutes
22
22
  end
@@ -28,7 +28,7 @@ class CleanSweep::PurgeRunner::MysqlStatus
28
28
  violations = {}
29
29
  if @max_history
30
30
  current = get_history_length
31
- violations["history length"] = current if threshold(@max_history) < current
31
+ violations["history length"] = "#{(current/1_000_000.0)} m" if threshold(@max_history) < current
32
32
  end
33
33
  if @max_replication_lag
34
34
  current = get_replication_lag
@@ -34,13 +34,13 @@ class CleanSweep::TableSchema
34
34
 
35
35
  # Primary key only supported, but we could probably get around this by adding
36
36
  # all columns as 'primary key columns'
37
- raise "Table #{model.table_name} must have a primary key" unless key_schemas.include? 'primary'
37
+ @primary_key = find_primary_key(key_schemas)
38
+ raise "Table #{model.table_name} must have a primary key" unless @primary_key
38
39
 
39
- @primary_key = key_schemas['primary']
40
40
  @primary_key.add_columns_to @columns
41
41
  if traversing_key_name
42
42
  traversing_key_name.downcase!
43
- raise "BTREE Index #{traversing_key_name} not found" unless key_schemas.include? traversing_key_name
43
+ raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
44
44
  @traversing_key = key_schemas[traversing_key_name]
45
45
  @traversing_key.add_columns_to @columns
46
46
  @traversing_key.ascending = ascending
@@ -123,13 +123,18 @@ class CleanSweep::TableSchema
123
123
  column_details.each do | col |
124
124
  key_name = col[2].downcase
125
125
  col_name = col[4].downcase
126
+ unique = col[1] != 1
126
127
  type = col[10]
127
128
  next if key_name != 'PRIMARY' && type != 'BTREE' # Only BTREE indexes supported for traversing
128
- indexes[key_name] ||= IndexSchema.new key_name, @model
129
+ indexes[key_name] ||= IndexSchema.new key_name, @model, unique
129
130
  indexes[key_name] << col_name
130
131
  end
131
132
  return indexes
132
133
  end
133
134
 
135
+ def find_primary_key(indexes)
136
+ indexes['primary'] || indexes.values.find { | index_schema | index_schema.unique? }
137
+ end
138
+
134
139
  end
135
140
 
@@ -1,11 +1,12 @@
1
- class CleanSweep::TableSchema::IndexSchema < Struct.new :name, :model, :ascending
1
+ class CleanSweep::TableSchema::IndexSchema
2
2
 
3
3
  attr_accessor :columns, :name, :model, :ascending, :first_only, :dest_model
4
4
 
5
- def initialize name, model
5
+ def initialize name, model, unique = false
6
6
  @model = model
7
7
  @columns = []
8
8
  @name = name
9
+ @unique = unique
9
10
  end
10
11
 
11
12
  # Add a column
@@ -13,6 +14,10 @@ class CleanSweep::TableSchema::IndexSchema < Struct.new :name, :model, :ascendin
13
14
  @columns << CleanSweep::TableSchema::ColumnSchema.new(col_name, model)
14
15
  end
15
16
 
17
+ def unique?
18
+ @unique
19
+ end
20
+
16
21
  # Take columns referenced by this index and add them to the list if they
17
22
  # are not present. Record their position in the list because the position will
18
23
  # be where they are located in a row of values passed in later to #scope_to_next_chunk
@@ -1,3 +1,3 @@
1
1
  module CleanSweep
2
- VERSION = "1.0.2"
2
+ VERSION = "1.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cleansweep
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bill Kayser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-03 00:00:00.000000000 Z
11
+ date: 2014-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord