cleansweep 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ceb5f4b259349242b4a2c4f11854bb1182b2015a
4
- data.tar.gz: 4e2292c3547793a58f5f69599241f595bfed9358
3
+ metadata.gz: f772f74727a7d58fdebda097fb0b70572cb92c34
4
+ data.tar.gz: a82986ae0e26308e4842193441e427e998c4f5a0
5
5
  SHA512:
6
- metadata.gz: b695e4a7a553ebedb460f20ec9dea0a12b7f3012ec62d0b9127ae27f299458d296beffb7b395069fe09f570c084a0f6f2b4df424fa04a3f74b1f34fde401fe39
7
- data.tar.gz: fde7d9b0ba62dbff94610402472144873e5df4d54a70dcb3545a4c929944be54adbd8ff9aad2d664ff518390bc56529914f93cff9433ecd408b2521d572c37a6
6
+ metadata.gz: 183922164f35fbd986ca9617fa1c73fc4133db90fc15a59552f48def16a8d39d8a8164bbef8d61ee334f6a8dffbfc74a34f5fbb182d446d7485372b6add8667a
7
+ data.tar.gz: 77e4f14d2e44e7400d4bb14a09719fea8a13aac2660fab0712355b053d8117e848f7dd967878b01ec5c87c6181eca821056f4e388b00c93cd4b005432d1c0ebc
data/.gitignore CHANGED
@@ -1,7 +1,6 @@
1
1
  /.bundle/
2
2
  .ruby-version
3
3
  /.yardoc
4
- /Gemfile.lock
5
4
  /_yardoc/
6
5
  /coverage/
7
6
  /doc/
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.4
4
+ - 1.9.3
5
+ gemfile:
6
+ - gemfiles/Gemfile.rails3
7
+ - gemfiles/Gemfile.rails4
8
+ addons:
9
+ code_climate:
10
+ repo_token: 7ec6fd701b7d2b206cdd233c2202b6e11c8ba6af01f8a93f5e24595008ac20a0
data/CHANGES.md CHANGED
@@ -1,8 +1,17 @@
1
1
  See the [documentation](http://bkayser.github.io/cleansweep) for details
2
2
 
3
- ### Version 1.0.1
3
+ ### Version 1.0.4
4
4
 
5
- * Initial release
5
+ * Print dry run output using the logger
6
+ * Add option `non_traversing` so you can explicitly not use an index. If an index
7
+ is not specified, now it will guess using the first non-unique index or primary key.
8
+ * Added more tests
9
+ * Added Travis CI build, metrics
10
+
11
+ ### Version 1.0.3
12
+
13
+ * Small bug in instrumentation and target model reference
14
+ * Support first unique index as primary when primary key not found
6
15
 
7
16
  ### Version 1.0.2
8
17
 
@@ -10,6 +19,7 @@ See the [documentation](http://bkayser.github.io/cleansweep) for details
10
19
  * Added `dest_columns` option as a map of column names in the source to column names in the destination.
11
20
  * More testing and bug fixing in real environments
12
21
 
13
- ### Version 1.0.3
14
- * Small bug in instrumentation and target model reference
15
- * Support first unique index as primary when primary key not found
22
+ ### Version 1.0.1
23
+
24
+ * Initial release
25
+
data/Gemfile CHANGED
@@ -1,4 +1,6 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ gem "codeclimate-test-reporter", group: :test, require: nil
4
+
3
5
  # Specify your gem's dependencies in cleansweep.gemspec
4
6
  gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,82 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ cleansweep (1.0.4)
5
+ activerecord (>= 3.0)
6
+ mysql2 (~> 0.3)
7
+ newrelic_rpm
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ activemodel (4.2.0)
13
+ activesupport (= 4.2.0)
14
+ builder (~> 3.1)
15
+ activerecord (4.2.0)
16
+ activemodel (= 4.2.0)
17
+ activesupport (= 4.2.0)
18
+ arel (~> 6.0)
19
+ activesupport (4.2.0)
20
+ i18n (~> 0.7)
21
+ json (~> 1.7, >= 1.7.7)
22
+ minitest (~> 5.1)
23
+ thread_safe (~> 0.3, >= 0.3.4)
24
+ tzinfo (~> 1.1)
25
+ arel (6.0.0)
26
+ awesome_print (1.6.1)
27
+ builder (3.2.2)
28
+ codeclimate-test-reporter (0.4.4)
29
+ simplecov (>= 0.7.1, < 1.0.0)
30
+ coderay (1.1.0)
31
+ diff-lcs (1.2.5)
32
+ docile (1.1.5)
33
+ factory_girl (4.5.0)
34
+ activesupport (>= 3.0.0)
35
+ i18n (0.7.0)
36
+ json (1.8.1)
37
+ method_source (0.8.2)
38
+ minitest (5.5.0)
39
+ multi_json (1.10.1)
40
+ mysql2 (0.3.17)
41
+ newrelic_rpm (3.9.9.275)
42
+ pry (0.10.1)
43
+ coderay (~> 1.1.0)
44
+ method_source (~> 0.8.1)
45
+ slop (~> 3.4)
46
+ rake (10.4.2)
47
+ rspec (3.1.0)
48
+ rspec-core (~> 3.1.0)
49
+ rspec-expectations (~> 3.1.0)
50
+ rspec-mocks (~> 3.1.0)
51
+ rspec-core (3.1.7)
52
+ rspec-support (~> 3.1.0)
53
+ rspec-expectations (3.1.2)
54
+ diff-lcs (>= 1.2.0, < 2.0)
55
+ rspec-support (~> 3.1.0)
56
+ rspec-mocks (3.1.3)
57
+ rspec-support (~> 3.1.0)
58
+ rspec-support (3.1.2)
59
+ simplecov (0.9.1)
60
+ docile (~> 1.1.0)
61
+ multi_json (~> 1.0)
62
+ simplecov-html (~> 0.8.0)
63
+ simplecov-html (0.8.0)
64
+ slop (3.6.0)
65
+ thread_safe (0.3.4)
66
+ timecop (0.7.1)
67
+ tzinfo (1.2.2)
68
+ thread_safe (~> 0.1)
69
+
70
+ PLATFORMS
71
+ ruby
72
+
73
+ DEPENDENCIES
74
+ awesome_print (~> 1.2)
75
+ bundler (~> 1.7)
76
+ cleansweep!
77
+ codeclimate-test-reporter
78
+ factory_girl (~> 4.4)
79
+ pry (~> 0)
80
+ rake (~> 10.0)
81
+ rspec (~> 3.1)
82
+ timecop (~> 0.7.1)
data/README.md CHANGED
@@ -2,6 +2,10 @@ Cleansweep is a utility for scripting purges using ruby in an
2
2
  efficient, low-impact manner on mysql innodb tables. Based on the
3
3
  Percona `pt-archive` utility.
4
4
 
5
+ [![Build Status](https://img.shields.io/travis/bkayser/cleansweep/master.svg?x=3)](https://travis-ci.org/bkayser/cleansweep)
6
+ [![Code Climate](https://codeclimate.com/github/bkayser/cleansweep/badges/gpa.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
7
+ [![Test Coverage](https://codeclimate.com/github/bkayser/cleansweep/badges/coverage.svg?x=3)](https://codeclimate.com/github/bkayser/cleansweep)
8
+
5
9
  ## Installation
6
10
 
7
11
  Add this line to your application's Gemfile:
@@ -116,6 +120,9 @@ The chunk query looks like:
116
120
  You can scan the index in either direction. To specify descending
117
121
  order, use the `reverse: true` option.
118
122
 
123
+ If no index is specified, it will pick the primary key or the first unique index if there
124
+ is no primary key.
125
+
119
126
  ### Copying rows from one table to another
120
127
 
121
128
  You can use the same technique to copy rows from one table to another.
@@ -179,16 +186,13 @@ Now create as many jobs as you need for the tables which refer to these metrics:
179
186
 
180
187
  ```ruby
181
188
  CleanSweep::PurgeRunner.new(model: ExpiredMetric,
182
- index: 'PRIMARY',
183
189
  dest_model: Metric,
184
190
  dest_columns: { 'metric_id' => 'id'} ).execute_in_batches
185
191
 
186
192
  CleanSweep::PurgeRunner.new(model: ExpiredMetric,
187
- index: 'PRIMARY',
188
193
  dest_model: ChartMetric).execute_in_batches
189
194
 
190
195
  CleanSweep::PurgeRunner.new(model: ExpiredMetric,
191
- index: 'PRIMARY',
192
196
  dest_model: SystemMetric).execute_in_batches
193
197
  ```
194
198
 
@@ -202,6 +206,10 @@ into an unsafe territory. The script will pause for 5 minutes and
202
206
  only start once the corresponding metric goes back down to 90% of the
203
207
  specified threshold.
204
208
 
209
+ Note: You will need process privileges to be able to see the history list and
210
+ replication client privileges to monitor the replication lag.
211
+
212
+
205
213
  ### Logging and monitoring progress
206
214
 
207
215
  You pass in a standard log instance to capture all running output. By
@@ -221,8 +229,8 @@ in your target table.
221
229
 
222
230
  ### Limitations
223
231
 
224
- * Only works for mysql (as far as I know). I have only used it against 5.5.
225
- * Should work with ActiveRecord 3.* - 4.*.
232
+ * Only works for mysql. I have only used it against 5.5.
233
+ * Tested with ActiveRecord 3.1.\* - 4.0.\*.
226
234
  * Using a non-unique index risks missing duplicate rows unless you use the `first_only` option.
227
235
  * Using the `first_only` option risks rescanning many rows if you have many more duplicates than your
228
236
  chunk size
@@ -279,11 +287,11 @@ db called 'cstest'.
279
287
 
280
288
  ## License and Copyright
281
289
 
282
- Copyright 2014 New Relic, Inc., and Bill Kayser
290
+ Copyright 2014-2015 New Relic, Inc., and Bill Kayser
283
291
 
284
292
  Covered by the MIT [LICENSE](LICENSE.txt).
285
293
 
286
- ### Credits
294
+ ## Credits
287
295
 
288
296
  This was all inspired and informed by [Percona's `pt-archiver`
289
297
  script](http://www.percona.com/doc/percona-toolkit/2.1/pt-archiver.html)
data/Rakefile CHANGED
@@ -1,3 +1,4 @@
1
1
  require "bundler/gem_tasks"
2
2
  require 'rspec/core/rake_task'
3
3
  RSpec::Core::RakeTask.new(:spec)
4
+ task :default => :spec
data/cleansweep.gemspec CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.homepage = "http://bkayser.github.com/cleansweep"
21
21
  spec.license = "MIT"
22
22
 
23
- spec.files = `git ls-files -z`.split("\x0")
23
+ spec.files = `git ls-files -z`.split("\x0").delete_if { | f | f =~ /^gemfiles/ }
24
24
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
25
25
  spec.test_files = spec.files.grep(%r{^spec/})
26
26
  spec.require_paths = ["lib"]
@@ -30,6 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.add_runtime_dependency 'mysql2', '~> 0.3'
31
31
 
32
32
  spec.add_development_dependency 'pry', '~> 0'
33
+ spec.add_development_dependency 'timecop', '~> 0.7.1'
33
34
  spec.add_development_dependency 'bundler', '~> 1.7'
34
35
  spec.add_development_dependency 'rake', '~> 10.0'
35
36
  spec.add_development_dependency 'rspec', '~> 3.1'
@@ -16,14 +16,23 @@ require 'stringio'
16
16
  # The number of rows to copy in each block. Defaults to 500.
17
17
  # [:index]
18
18
  # The index to traverse in ascending order doing the purge. Rows are read in the order of
19
- # the index, which must be a btree index. If not specified, <tt>PRIMARY</tt> is assumed.
19
+ # the index, which must be a btree index. If not specified, An index is chosen automatically
20
+ # in order of preference:
21
+ # 1. PRIMARY KEY
22
+ # 2. First UNIQUE index
23
+ # 3. First non-UNIQUE index
24
+ # 4. No index used if no indexes defined.
25
+ # [:non_traversing]
26
+ # When true, specifies the table will not be traversed using an index.
27
+ # This only makes sense if you are deleting everything as you go along, otherwise you'll
28
+ # be re-scanning skipped rows.
20
29
  # [:reverse]
21
30
  # Traverse the index in reverse order. For example, if your index is on <tt>account_id</tt>,
22
31
  # <tt>timestamp</tt>, this option will move through the rows starting at the highest account
23
32
  # number, then move through timestamps starting with the most recent.
24
33
  # [:first_only]
25
- # Traverse only the first column of the index, and do so inclusively using the <tt>&gt;=</tt> operator
26
- # instead of the strict <tt>&gt;</tt> operator. This is important if the index is not unique and there
34
+ # Traverse only the first column of the index, and do so inclusively using the <tt>'>='</tt> operator
35
+ # instead of the strict <tt>'>'</tt> operator. This is important if the index is not unique and there
27
36
  # are a lot of duplicates. Otherwise the delete could miss rows. Not allowed in copy mode because you'd
28
37
  # be inserting duplicate rows.
29
38
  # [:dry_run]
@@ -94,11 +103,12 @@ class CleanSweep::PurgeRunner
94
103
  @copy_mode = @target_model && options[:copy_only]
95
104
 
96
105
  @table_schema = CleanSweep::TableSchema.new @model,
97
- key_name: options[:index],
98
- ascending: !options[:reverse],
99
- extra_columns: options[:copy_columns],
106
+ non_traversing: options[:non_traversing],
107
+ index: options[:index],
108
+ reverse: options[:reverse],
109
+ copy_columns: options[:copy_columns],
100
110
  first_only: options[:first_only],
101
- dest_model: @target_model,
111
+ dest_model: options[:dest_model],
102
112
  dest_columns: options[:dest_columns]
103
113
 
104
114
  if (@max_history || @max_repl_lag)
@@ -134,7 +144,7 @@ class CleanSweep::PurgeRunner
134
144
  def execute_in_batches
135
145
 
136
146
  if @dry_run
137
- print_queries($stdout)
147
+ log :info, print_queries
138
148
  return 0
139
149
  end
140
150
 
@@ -205,26 +215,6 @@ class CleanSweep::PurgeRunner
205
215
  add_method_tracer :sleep
206
216
  add_method_tracer :execute_in_batches
207
217
 
208
- def print_queries(io)
209
- io.puts 'Initial Query:'
210
- io.puts format_query(' ', @query.to_sql)
211
- rows = @model.connection.select_rows @query.limit(1).to_sql
212
- if rows.empty?
213
- # Don't have any sample data to use for the sample queries, so use NULL values just
214
- # so the query will print out.
215
- rows << [nil] * 100
216
- end
217
- io.puts "Chunk Query:"
218
- io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, rows.first).to_sql)
219
- if copy_mode?
220
- io.puts "Insert Statement:"
221
- io.puts format_query(' ', @table_schema.insert_statement(rows))
222
- else
223
- io.puts "Delete Statement:"
224
- io.puts format_query(' ', @table_schema.delete_statement(rows))
225
- end
226
- end
227
-
228
218
  private
229
219
 
230
220
  def format_query indentation, query
@@ -6,19 +6,7 @@ module CleanSweep::PurgeRunner::Logging
6
6
  while (@report_interval_start < Time.now - @report_interval) do
7
7
  @report_interval_start += @report_interval
8
8
  end
9
- report = []
10
- elapsed = [1, (Time.now - @start).to_i].max
11
- rate = (@total_deleted / elapsed).to_i
12
- rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
13
- report << "report:"
14
- if copy_mode?
15
- report << " #{@dry_run ? 'queried' : 'copied'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
16
- else
17
- report << " #{@dry_run ? 'queried' : 'deleted'}: #{'%12i' % @total_deleted} #{@model.table_name} records"
18
- end
19
- report << " elapsed: #{'%12s' % format(elapsed)}"
20
- report << " rate: #{rate}"
21
- log :info, report.join("\n")
9
+ print_report
22
10
  end
23
11
  end
24
12
 
@@ -28,6 +16,34 @@ module CleanSweep::PurgeRunner::Logging
28
16
  @logger.send level, out
29
17
  end
30
18
 
19
+ def print_queries
20
+ io = StringIO.new
21
+ io.puts 'Initial Query:'
22
+ io.puts format_query(' ', @query.to_sql)
23
+ io.puts "Chunk Query:"
24
+ io.puts format_query(' ', @table_schema.scope_to_next_chunk(@query, sample_rows.first).to_sql)
25
+ if copy_mode?
26
+ io.puts "Insert Statement:"
27
+ io.puts format_query(' ', @table_schema.insert_statement(sample_rows))
28
+ else
29
+ io.puts "Delete Statement:"
30
+ io.puts format_query(' ', @table_schema.delete_statement(sample_rows))
31
+ end
32
+ io.string
33
+ end
34
+
35
+ private
36
+
37
+ def sample_rows
38
+ @sample_rows ||= @model.connection.select_rows @query.limit(1).to_sql
39
+ if @sample_rows.empty?
40
+ # Don't have any sample data to use for the sample queries, so use NULL values just
41
+ # so the query will print out.
42
+ @sample_rows << [nil] * 100
43
+ end
44
+ @sample_rows
45
+ end
46
+
31
47
  def format(time)
32
48
  format_string = "%H:%M:%S"
33
49
  if (time.to_i > (24 * 60 * 60))
@@ -35,4 +51,20 @@ module CleanSweep::PurgeRunner::Logging
35
51
  end
36
52
  Time.at(time).strftime(format_string)
37
53
  end
38
- end
54
+
55
+ def print_report
56
+ elapsed = [1, (Time.now - @start).to_i].max
57
+ rate = (@total_deleted / elapsed).to_i
58
+ rate = "#{rate > 0 ? '%12i' % rate : ('%12s' %'< 1')} records/second"
59
+ report = [ "report:" ]
60
+ action = case
61
+ when @dry_run then 'queried'
62
+ when copy_mode? then 'copied'
63
+ else 'deleted'
64
+ end
65
+ report << " #{action}: #{'%12i' % @total_deleted} #{@model.table_name} records"
66
+ report << " elapsed: #{'%12s' % format(elapsed)}"
67
+ report << " rate: #{rate}"
68
+ log :info, report.join("\n")
69
+ end
70
+ end
@@ -14,8 +14,8 @@ class CleanSweep::TableSchema
14
14
 
15
15
  def initialize(model, options={})
16
16
 
17
- traversing_key_name = options[:key_name]
18
- ascending = options.include?(:ascending) ? options[:ascending] : true
17
+ traversing_key_name = options[:index]
18
+ ascending = !options[:reverse]
19
19
  first_only = options[:first_only]
20
20
  @model = model
21
21
  @dest_model = options[:dest_model] || @model
@@ -26,7 +26,7 @@ class CleanSweep::TableSchema
26
26
  @name = @model.table_name
27
27
 
28
28
  @columns =
29
- (options[:extra_columns] || []).map do | extra_col_name |
29
+ (options[:copy_columns] || []).map do | extra_col_name |
30
30
  CleanSweep::TableSchema::ColumnSchema.new extra_col_name, model
31
31
  end
32
32
 
@@ -38,11 +38,15 @@ class CleanSweep::TableSchema
38
38
  raise "Table #{model.table_name} must have a primary key" unless @primary_key
39
39
 
40
40
  @primary_key.add_columns_to @columns
41
- if traversing_key_name
42
- traversing_key_name.downcase!
43
- raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
44
- @traversing_key = key_schemas[traversing_key_name]
45
- @traversing_key.add_columns_to @columns
41
+ if !options[:non_traversing]
42
+ if traversing_key_name
43
+ traversing_key_name.downcase!
44
+ raise "BTREE Index #{traversing_key_name} not found in #@name" unless key_schemas.include? traversing_key_name
45
+ @traversing_key = key_schemas[traversing_key_name]
46
+ @traversing_key.add_columns_to @columns
47
+ else
48
+ @traversing_key = @primary_key
49
+ end
46
50
  @traversing_key.ascending = ascending
47
51
  @traversing_key.first_only = first_only
48
52
  end
@@ -74,7 +78,7 @@ class CleanSweep::TableSchema
74
78
  end
75
79
 
76
80
  def initial_scope
77
- scope = @model.all.select(quoted_column_names).from(from_clause)
81
+ scope = @model.select(quoted_column_names).from(from_clause)
78
82
  scope = @traversing_key.order(scope) if @traversing_key
79
83
  return scope
80
84
  end
@@ -1,3 +1,3 @@
1
1
  module CleanSweep
2
- VERSION = "1.0.3"
2
+ VERSION = "1.0.4"
3
3
  end
@@ -0,0 +1,19 @@
1
+
2
+ # Defines a table that does not have a primary key but does
3
+ # have a unique key.
4
+ class Annotation < ActiveRecord::Base
5
+
6
+ def self.create_table
7
+ connection.execute <<-EOF
8
+ create temporary table if not exists
9
+ annotations (
10
+ `article_id` int(11) NOT NULL,
11
+ `text` varchar(64),
12
+ key `index_on_text` (`text`),
13
+ unique key (`article_id`)
14
+ )
15
+ EOF
16
+ Annotation.delete_all
17
+ end
18
+
19
+ end
@@ -5,7 +5,7 @@ class Comment < ActiveRecord::Base
5
5
  create temporary table if not exists
6
6
  comments (
7
7
  `id` int(11) primary key auto_increment,
8
- `timestamp` datetime,
8
+ `timestamp` date,
9
9
  `account` int(11),
10
10
  `seen` boolean,
11
11
  key comments_on_account_timestamp(account, timestamp),
@@ -19,7 +19,7 @@ end
19
19
 
20
20
  FactoryGirl.define do
21
21
  factory :comment do | comment |
22
- comment.timestamp Time.now
22
+ comment.timestamp Date.new
23
23
  comment.seen false
24
24
  comment.sequence(:account) { | n | (n % 3)* 100 }
25
25
  end
@@ -0,0 +1,49 @@
1
+ class TableWithPrimaryKey < ActiveRecord::Base
2
+
3
+ def self.create_table
4
+ connection.execute <<-EOF
5
+ create temporary table if not exists
6
+ table_with_primary_keys (
7
+ `pk` int(11) primary key auto_increment,
8
+ `k1` int(11),
9
+ `k2` int(11),
10
+ key key_nonunique (k1),
11
+ unique key key_unique (k2)
12
+ )
13
+ EOF
14
+ end
15
+
16
+ end
17
+
18
+ class TableWithUniqueKey < ActiveRecord::Base
19
+
20
+ def self.create_table
21
+ connection.execute <<-EOF
22
+ create temporary table if not exists
23
+ table_with_unique_keys (
24
+ `k1` int(11),
25
+ `k2` int(11),
26
+ key key_nonunique (k1),
27
+ unique key key_unique (k2)
28
+ )
29
+ EOF
30
+ end
31
+
32
+ end
33
+
34
+ class TableWithRegularKey < ActiveRecord::Base
35
+
36
+ def self.create_table
37
+ connection.execute <<-EOF
38
+ create temporary table if not exists
39
+ table_with_regular_keys (
40
+ `k1` int(11),
41
+ `k2` int(11),
42
+ key key_nonunique (k1),
43
+ key key_extra (k2)
44
+ )
45
+ EOF
46
+ end
47
+
48
+ end
49
+
@@ -1,15 +1,17 @@
1
1
  require 'spec_helper'
2
2
 
3
- require 'active_support/testing/time_helpers'
3
+ # Time mocking features are available in Rails 4 but not Rails 3 and the Timecop
4
+ # gem works with both.
5
+ require 'timecop'
6
+
4
7
  describe CleanSweep::PurgeRunner do
5
8
 
6
9
  context 'PurgeRunner' do
7
- include ActiveSupport::Testing::TimeHelpers
8
10
  before do
9
- travel_to Time.parse("2014-12-02 13:47:43 -0800")
11
+ Timecop.freeze Time.parse("2014-12-02 13:47:43.000000 -0800")
10
12
  end
11
13
  after do
12
- travel_back
14
+ Timecop.return
13
15
  end
14
16
 
15
17
  context "using comments" do
@@ -66,21 +68,20 @@ describe CleanSweep::PurgeRunner do
66
68
  it 'prints out the queries in a dry run' do
67
69
  purger = CleanSweep::PurgeRunner.new model: Comment,
68
70
  index: 'comments_on_account_timestamp' do | scope |
69
- scope.where('timestamp < ?', 1.week.ago)
71
+ scope.where('timestamp < ?', 1.week.ago.to_date)
70
72
  end
71
- output = StringIO.new
72
- purger.print_queries(output)
73
- expect(output.string).to eq <<EOF
73
+ output = purger.print_queries
74
+ expect(output).to eq <<EOF
74
75
  Initial Query:
75
76
  SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
76
77
  FROM `comments` FORCE INDEX(comments_on_account_timestamp)
77
- WHERE (timestamp < '2014-11-25 21:47:43')
78
+ WHERE (timestamp < '2014-11-25')
78
79
  ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
79
80
  LIMIT 500
80
81
  Chunk Query:
81
82
  SELECT `comments`.`id`,`comments`.`account`,`comments`.`timestamp`
82
83
  FROM `comments` FORCE INDEX(comments_on_account_timestamp)
83
- WHERE (timestamp < '2014-11-25 21:47:43') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18 21:47:43'))\n ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
84
+ WHERE (timestamp < '2014-11-25') AND (`comments`.`account` > 0 OR (`comments`.`account` = 0 AND `comments`.`timestamp` > '2014-11-18'))\n ORDER BY `comments`.`account` ASC,`comments`.`timestamp` ASC
84
85
  LIMIT 500
85
86
  Delete Statement:
86
87
  DELETE
@@ -105,7 +106,7 @@ EOF
105
106
  end
106
107
  expect(Comment.count).to eq(5)
107
108
  # Only old comments deleted before stopping
108
- expect(Comment.where('timestamp >= ?', 4.days.ago).count).to eq(5)
109
+ expect(Comment.where('timestamp >= ?', 4.days.ago.to_date).count).to eq(5)
109
110
  end
110
111
  it "descends the index" do
111
112
  purger = CleanSweep::PurgeRunner.new model: Comment,
data/spec/spec_helper.rb CHANGED
@@ -1,15 +1,16 @@
1
1
  ENV['RACK_ENV'] = 'test'
2
2
 
3
+ require "codeclimate-test-reporter"
4
+ CodeClimate::TestReporter.start
3
5
  require 'clean_sweep'
4
6
  require 'factory_girl'
5
7
  require 'fileutils'
6
8
  require 'active_record'
7
9
  require 'mysql2'
10
+ require 'timecop'
8
11
  RSpec.configure do |config|
9
12
  config.include FactoryGirl::Syntax::Methods
10
13
  config.formatter = :progress
11
- #config.order = 'random'
12
-
13
14
  config.before(:suite) do
14
15
  FactoryGirl.find_definitions
15
16
  end
@@ -20,7 +21,7 @@ logdir = File.expand_path "../../log",__FILE__
20
21
  FileUtils.mkdir_p logdir
21
22
  logfile = File.open(File.join(logdir, "test.log"), "w+")
22
23
  ActiveRecord::Base.logger = Logger.new(logfile)
23
-
24
+ Time.zone = 'America/Los_Angeles'
24
25
  database = {
25
26
  encoding: 'utf8',
26
27
  adapter: 'mysql2',
@@ -34,3 +35,4 @@ connection.query "CREATE DATABASE IF NOT EXISTS #{db_name}"
34
35
  database[:database] = db_name
35
36
 
36
37
  ActiveRecord::Base.establish_connection(database)
38
+
@@ -2,110 +2,140 @@ require 'spec_helper'
2
2
 
3
3
  describe CleanSweep::TableSchema do
4
4
 
5
- before do
6
- Comment.create_table
7
- end
8
-
9
- context "using ascending account, timestamp index" do
10
- let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: true }
11
-
12
- it 'should read comments' do
13
- expect(schema.primary_key.columns.map(&:name)).to eq([:id])
14
- expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
5
+ context "using sample tables" do
6
+ it 'should pick the primary key' do
7
+ TableWithPrimaryKey.create_table
8
+ schema = CleanSweep::TableSchema.new TableWithPrimaryKey
9
+ expect(schema.primary_key.name).to eq "primary"
10
+ expect(schema.traversing_key.name).to eq "primary"
15
11
  end
16
12
 
17
- it 'should produce an ascending chunk clause' do
18
- rows = account_and_timestamp_rows
19
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
20
- .to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-12-01 23:13:25'))")
13
+ it "should identify unique key as primary key" do
14
+ TableWithUniqueKey.create_table
15
+ schema = CleanSweep::TableSchema.new TableWithUniqueKey
16
+ expect(schema.primary_key.name).to eq("key_unique")
17
+ expect(schema.traversing_key.name).to eq "key_unique"
21
18
  end
22
19
 
23
- it 'should produce all select columns' do
24
- expect(schema.column_names).to eq([:id, :account, :timestamp])
20
+ it "should skip the traversing key if non_traversing is true" do
21
+ TableWithUniqueKey.create_table
22
+ schema = CleanSweep::TableSchema.new TableWithUniqueKey, non_traversing: true
23
+ expect(schema.primary_key.name).to eq("key_unique")
24
+ expect(schema.traversing_key).to be_nil
25
25
  end
26
-
27
- it 'should produce the ascending order clause' do
28
- expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
26
+ it "should error out if there is no unique key at all" do
27
+ TableWithRegularKey.create_table
28
+ expect(->{CleanSweep::TableSchema.new TableWithRegularKey}).to raise_exception(RuntimeError, 'Table table_with_regular_keys must have a primary key')
29
29
  end
30
30
 
31
+ end
31
32
 
32
- it 'should produce an insert statement' do
33
- schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp'
34
- rows = account_and_timestamp_rows
35
- expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-02 01:13:25'),(1002,2,'2014-12-02 00:13:25'),(1005,5,'2014-12-01 23:13:25')")
33
+ context "on comments" do
34
+ before do
35
+ Comment.create_table
36
36
  end
37
- end
38
37
 
39
- context "using descending account, timestamp index" do
38
+ context "using ascending account, timestamp index" do
39
+ let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', ascending: true }
40
40
 
41
- let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: false }
41
+ it 'should read comments' do
42
+ expect(schema.primary_key.columns.map(&:name)).to eq([:id])
43
+ expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
44
+ end
42
45
 
43
- it 'should produce a descending where clause' do
44
- rows = account_and_timestamp_rows
45
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
46
- .to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-12-01 23:13:25'))")
47
- end
46
+ it 'should produce an ascending chunk clause' do
47
+ rows = account_and_timestamp_rows
48
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
49
+ .to include("(`comments`.`account` > 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` > '2014-11-29'))")
50
+ end
48
51
 
52
+ it 'should produce all select columns' do
53
+ expect(schema.column_names).to eq([:id, :account, :timestamp])
54
+ end
49
55
 
50
- it 'should produce the descending order clause' do
51
- rows = account_and_timestamp_rows
52
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
53
- .to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
56
+ it 'should produce the ascending order clause' do
57
+ expect(schema.initial_scope.to_sql).to include('`comments`.`account` ASC,`comments`.`timestamp` ASC')
58
+ end
59
+
60
+
61
+ it 'should produce an insert statement' do
62
+ schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp'
63
+ rows = account_and_timestamp_rows
64
+ expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1001,5,'2014-12-01'),(1002,2,'2014-11-30'),(1005,5,'2014-11-29')")
65
+ end
54
66
  end
55
67
 
56
- end
68
+ context "using descending account, timestamp index" do
69
+
70
+ let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', reverse: true }
71
+
72
+ it 'should produce a descending where clause' do
73
+ rows = account_and_timestamp_rows
74
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
75
+ .to include("(`comments`.`account` < 5 OR (`comments`.`account` = 5 AND `comments`.`timestamp` < '2014-11-29'))")
76
+ end
77
+
57
78
 
58
- context "using account, timestamp index first column only" do
59
- let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', first_only: true }
79
+ it 'should produce the descending order clause' do
80
+ rows = account_and_timestamp_rows
81
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
82
+ .to include("`comments`.`account` DESC,`comments`.`timestamp` DESC")
83
+ end
60
84
 
61
- it 'should select all the rows' do
62
- expect(schema.column_names).to eq([:id, :account, :timestamp])
63
85
  end
64
86
 
65
- it 'should only query using the first column of the index' do
66
- rows = account_and_timestamp_rows
67
- expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
68
- .to include(" (`comments`.`account` >= 5) ")
87
+ context "using account, timestamp index first column only" do
88
+ let(:schema) { CleanSweep::TableSchema.new Comment, index:'comments_on_account_timestamp', first_only: true }
89
+
90
+ it 'should select all the rows' do
91
+ expect(schema.column_names).to eq([:id, :account, :timestamp])
92
+ end
93
+
94
+ it 'should only query using the first column of the index' do
95
+ rows = account_and_timestamp_rows
96
+ expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
97
+ .to include(" (`comments`.`account` >= 5) ")
98
+
99
+ end
69
100
 
70
101
  end
71
102
 
72
- end
103
+ it 'should not care about case' do
104
+ CleanSweep::TableSchema.new Comment, index: 'primary'
105
+ end
73
106
 
74
- it 'should not care about case' do
75
- CleanSweep::TableSchema.new Comment, key_name: 'primary'
76
- end
107
+ it 'should work without a descending index' do
108
+ schema = CleanSweep::TableSchema.new Comment, non_traversing: true
109
+ expect(schema.primary_key.columns.map(&:name)).to eq([:id])
110
+ expect(schema.traversing_key).to be_nil
111
+ end
77
112
 
78
- it 'should work without a descending index' do
79
- schema = CleanSweep::TableSchema.new Comment
80
- expect(schema.primary_key.columns.map(&:name)).to eq([:id])
81
- expect(schema.traversing_key).to be_nil
82
- end
113
+ it 'should produce minimal select columns' do
114
+ schema = CleanSweep::TableSchema.new Comment, index: 'PRIMARY'
115
+ expect(schema.column_names).to eq([:id])
116
+ end
83
117
 
84
- it 'should produce minimal select columns' do
85
- schema = CleanSweep::TableSchema.new Comment, key_name: 'PRIMARY'
86
- expect(schema.column_names).to eq([:id])
87
- end
118
+ it 'should produce the from clause with an index' do
119
+ schema = CleanSweep::TableSchema.new Comment, index:'comments_on_timestamp'
120
+ expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
121
+ end
88
122
 
89
- it 'should produce the from clause with an index' do
90
- schema = CleanSweep::TableSchema.new Comment, key_name:'comments_on_timestamp'
91
- expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
92
- end
123
+ it 'should include additional columns' do
124
+ schema = CleanSweep::TableSchema.new Comment, index: 'comments_on_account_timestamp', copy_columns: %w[seen id]
125
+ expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
126
+ rows = account_and_timestamp_rows
127
+ rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
128
+ expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-01'),(1,1002,2,'2014-11-30'),(1,1005,5,'2014-11-29')")
93
129
 
94
- it 'should include additional columns' do
95
- schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp', extra_columns: %w[seen id]
96
- expect(schema.column_names).to eq([:seen, :id, :account, :timestamp])
97
- rows = account_and_timestamp_rows
98
- rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
99
- expect(schema.insert_statement(rows)).to eq("insert into `comments` (`comments`.`seen`,`comments`.`id`,`comments`.`account`,`comments`.`timestamp`) values (1,1001,5,'2014-12-02 01:13:25'),(1,1002,2,'2014-12-02 00:13:25'),(1,1005,5,'2014-12-01 23:13:25')")
130
+ end
100
131
 
101
132
  end
102
133
 
103
-
104
134
  def account_and_timestamp_rows
105
135
  rows = []
106
- t = Time.parse '2014-12-01 17:13:25'
136
+ t = Date.parse '2014-12-01'
107
137
  rows << [1001, 5, t]
108
- rows << [1002, 2, t - 1.hour]
109
- rows << [1005, 5, t - 2.hours]
138
+ rows << [1002, 2, t - 1]
139
+ rows << [1005, 5, t - 2]
110
140
  end
111
141
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cleansweep
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bill Kayser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-17 00:00:00.000000000 Z
11
+ date: 2015-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: timecop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.7.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.7.1
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: bundler
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -146,8 +160,10 @@ extensions: []
146
160
  extra_rdoc_files: []
147
161
  files:
148
162
  - ".gitignore"
163
+ - ".travis.yml"
149
164
  - CHANGES.md
150
165
  - Gemfile
166
+ - Gemfile.lock
151
167
  - LICENSE.txt
152
168
  - README.md
153
169
  - Rakefile
@@ -162,8 +178,10 @@ files:
162
178
  - lib/clean_sweep/table_schema/index_schema.rb
163
179
  - lib/clean_sweep/version.rb
164
180
  - lib/cleansweep.rb
181
+ - spec/factories/annotations.rb
165
182
  - spec/factories/books.rb
166
183
  - spec/factories/comments.rb
184
+ - spec/factories/tables.rb
167
185
  - spec/purge_runner_spec.rb
168
186
  - spec/spec_helper.rb
169
187
  - spec/table_schema_spec.rb
@@ -192,8 +210,10 @@ signing_key:
192
210
  specification_version: 4
193
211
  summary: Utility to purge or archive rows in mysql tables
194
212
  test_files:
213
+ - spec/factories/annotations.rb
195
214
  - spec/factories/books.rb
196
215
  - spec/factories/comments.rb
216
+ - spec/factories/tables.rb
197
217
  - spec/purge_runner_spec.rb
198
218
  - spec/spec_helper.rb
199
219
  - spec/table_schema_spec.rb