cleansweep 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +201 -0
- data/Rakefile +3 -0
- data/cleansweep.gemspec +34 -0
- data/lib/clean_sweep/purge_runner/logging.rb +38 -0
- data/lib/clean_sweep/purge_runner/mysql_status.rb +82 -0
- data/lib/clean_sweep/purge_runner.rb +211 -0
- data/lib/clean_sweep/purge_stopped.rb +9 -0
- data/lib/clean_sweep/table_schema/column_schema.rb +23 -0
- data/lib/clean_sweep/table_schema/index_schema.rb +72 -0
- data/lib/clean_sweep/table_schema.rb +112 -0
- data/lib/clean_sweep/version.rb +3 -0
- data/lib/clean_sweep.rb +11 -0
- data/lib/cleansweep.rb +1 -0
- data/spec/factories/books.rb +36 -0
- data/spec/factories/comments.rb +26 -0
- data/spec/purge_runner_spec.rb +222 -0
- data/spec/spec_helper.rb +36 -0
- data/spec/table_schema_spec.rb +111 -0
- metadata +199 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
class CleanSweep::TableSchema::IndexSchema < Struct.new :name, :model, :ascending
|
2
|
+
|
3
|
+
attr_accessor :columns, :name, :model, :ascending, :first_only
|
4
|
+
|
5
|
+
def initialize name, model
|
6
|
+
@model = model
|
7
|
+
@columns = []
|
8
|
+
@name = name
|
9
|
+
end
|
10
|
+
|
11
|
+
# Add a column
|
12
|
+
def << col_name
|
13
|
+
@columns << CleanSweep::TableSchema::ColumnSchema.new(col_name, model)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Take columns referenced by this index and add them to the list if they
|
17
|
+
# are not present. Record their position in the list because the position will
|
18
|
+
# be where they are located in a row of values passed in later to #scope_to_next_chunk
|
19
|
+
def add_columns_to select_columns
|
20
|
+
@columns.each do | column |
|
21
|
+
pos = select_columns.index column.name
|
22
|
+
if pos.nil?
|
23
|
+
select_columns << column.name
|
24
|
+
pos = select_columns.size - 1
|
25
|
+
end
|
26
|
+
column.select_position = pos
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def order(scope)
|
31
|
+
direction = ascending ? 'ASC' : 'DESC'
|
32
|
+
if @first_only
|
33
|
+
scope.order("#{columns.first.quoted_name} #{direction}")
|
34
|
+
else
|
35
|
+
scope.order(columns.map { |col| "#{col.quoted_name} #{direction}"}.join(","))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def scope_to_next_chunk(scope, last_row)
|
40
|
+
query_args = {}
|
41
|
+
if @first_only
|
42
|
+
query_args[columns.first.name] = columns.first.value(last_row)
|
43
|
+
else
|
44
|
+
columns.each do |column|
|
45
|
+
query_args[column.name] = column.value(last_row)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
scope.where(chunk_clause, query_args)
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def chunk_clause
|
54
|
+
@chunk_clause ||=
|
55
|
+
if @first_only
|
56
|
+
# If we're only using the first column, you have to do an inclusive comparison
|
57
|
+
"#{columns.first.quoted_name} #{ascending ? ">=" : "<="} :#{columns.first.name}"
|
58
|
+
else
|
59
|
+
# If you are using all columns of the index, build the expression recursively
|
60
|
+
add_term(columns.dup)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def add_term(columns)
|
65
|
+
column = columns.shift
|
66
|
+
clause = "#{column.quoted_name} #{ascending ? ">" : "<"} :#{column.name}"
|
67
|
+
if columns.any?
|
68
|
+
clause << " OR (#{column.quoted_name} = :#{column.name} AND #{add_term columns})"
|
69
|
+
end
|
70
|
+
return clause
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
|
2
|
+
class CleanSweep::TableSchema
|
3
|
+
|
4
|
+
# The list of columns used when selecting, the union of pk and traversing key columns
|
5
|
+
attr_reader :select_columns
|
6
|
+
|
7
|
+
# The schema for the primary key
|
8
|
+
attr_reader :primary_key
|
9
|
+
|
10
|
+
# The schema for the traversing key, or nil
|
11
|
+
attr_reader :traversing_key
|
12
|
+
|
13
|
+
attr_reader :name
|
14
|
+
|
15
|
+
def initialize(model, options={})
|
16
|
+
|
17
|
+
traversing_key_name = options[:key_name]
|
18
|
+
ascending = options.include?(:ascending) ? options[:ascending] : true
|
19
|
+
first_only = options[:first_only]
|
20
|
+
@model = model
|
21
|
+
@name = @model.table_name
|
22
|
+
@select_columns = (options[:extra_columns] && options[:extra_columns].map(&:to_sym)) || []
|
23
|
+
|
24
|
+
key_schemas = build_indexes
|
25
|
+
|
26
|
+
# Primary key only supported, but we could probably get around this by adding
|
27
|
+
# all columns as 'primary key columns'
|
28
|
+
raise "Table #{model.table_name} must have a primary key" unless key_schemas.include? 'primary'
|
29
|
+
|
30
|
+
@primary_key = key_schemas['primary']
|
31
|
+
@primary_key.add_columns_to @select_columns
|
32
|
+
if traversing_key_name
|
33
|
+
traversing_key_name.downcase!
|
34
|
+
raise "BTREE Index #{traversing_key_name} not found" unless key_schemas.include? traversing_key_name
|
35
|
+
@traversing_key = key_schemas[traversing_key_name]
|
36
|
+
@traversing_key.add_columns_to @select_columns
|
37
|
+
@traversing_key.ascending = ascending
|
38
|
+
@traversing_key.first_only = first_only
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def insert_statement(target_model, rows)
|
44
|
+
"insert into #{target_model.quoted_table_name} (#{quoted_column_names}) values #{quoted_row_values(rows)}"
|
45
|
+
end
|
46
|
+
|
47
|
+
def delete_statement(rows)
|
48
|
+
rec_criteria = rows.map do | row |
|
49
|
+
row_compares = []
|
50
|
+
@primary_key.columns.each do |column|
|
51
|
+
row_compares << "#{column.quoted_name} = #{column.quoted_value(row)}"
|
52
|
+
end
|
53
|
+
"(" + row_compares.join(" AND ") + ")"
|
54
|
+
end
|
55
|
+
"DELETE FROM #{@model.quoted_table_name} WHERE #{rec_criteria.join(" OR ")}"
|
56
|
+
end
|
57
|
+
|
58
|
+
def initial_scope
|
59
|
+
scope = @model.all.select(quoted_column_names).from(from_clause)
|
60
|
+
scope = @traversing_key.order(scope) if @traversing_key
|
61
|
+
return scope
|
62
|
+
end
|
63
|
+
|
64
|
+
def scope_to_next_chunk scope, last_row
|
65
|
+
if @traversing_key.blank?
|
66
|
+
scope
|
67
|
+
else
|
68
|
+
@traversing_key.scope_to_next_chunk(scope, last_row)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def first_only?
|
73
|
+
@traversing_key && @traversing_key.first_only
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def from_clause
|
79
|
+
table_name = @model.quoted_table_name
|
80
|
+
table_name += " FORCE INDEX(#{@traversing_key.name})" if @traversing_key
|
81
|
+
return table_name
|
82
|
+
end
|
83
|
+
|
84
|
+
def quoted_column_names
|
85
|
+
select_columns.map{|c| "`#{c}`"}.join(",")
|
86
|
+
end
|
87
|
+
|
88
|
+
def quoted_row_values(rows)
|
89
|
+
rows.map do |vec|
|
90
|
+
quoted_column_values = vec.map do |col_value|
|
91
|
+
@model.connection.quote(col_value)
|
92
|
+
end.join(",")
|
93
|
+
"(#{quoted_column_values})"
|
94
|
+
end.join(",")
|
95
|
+
end
|
96
|
+
|
97
|
+
def build_indexes
|
98
|
+
indexes = {}
|
99
|
+
column_details = @model.connection.select_rows "show indexes from #{@model.quoted_table_name}"
|
100
|
+
column_details.each do | col |
|
101
|
+
key_name = col[2].downcase
|
102
|
+
col_name = col[4].downcase
|
103
|
+
type = col[10]
|
104
|
+
next if key_name != 'PRIMARY' && type != 'BTREE' # Only BTREE indexes supported for traversing
|
105
|
+
indexes[key_name] ||= IndexSchema.new key_name, @model
|
106
|
+
indexes[key_name] << col_name
|
107
|
+
end
|
108
|
+
return indexes
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
data/lib/clean_sweep.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "clean_sweep/version"
|
2
|
+
|
3
|
+
module CleanSweep
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'clean_sweep/purge_stopped'
|
8
|
+
require 'clean_sweep/table_schema'
|
9
|
+
require 'clean_sweep/table_schema/column_schema'
|
10
|
+
require 'clean_sweep/table_schema/index_schema'
|
11
|
+
require 'clean_sweep/purge_runner'
|
data/lib/cleansweep.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'clean_sweep'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class Book < ActiveRecord::Base
|
2
|
+
|
3
|
+
def self.create_table
|
4
|
+
connection.execute <<-EOF
|
5
|
+
create temporary table if not exists
|
6
|
+
books (
|
7
|
+
`id` int(11) auto_increment,
|
8
|
+
`bin` int(11),
|
9
|
+
`publisher` varchar(64),
|
10
|
+
`title` varchar(64),
|
11
|
+
primary key (id),
|
12
|
+
key book_index_by_bin(bin, id)
|
13
|
+
)
|
14
|
+
EOF
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
FactoryGirl.define do
|
20
|
+
factory :book do | book |
|
21
|
+
book.publisher "Random House"
|
22
|
+
book.sequence(:bin) { | n | (n % 3) * 1000 }
|
23
|
+
book.sequence(:title) { |n| "Jaws, Part #{n}"}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class BookTemp < ActiveRecord::Base
|
28
|
+
|
29
|
+
self.table_name = 'book_vault'
|
30
|
+
|
31
|
+
def self.create_table
|
32
|
+
connection.execute <<-EOF
|
33
|
+
create temporary table if not exists book_vault like books
|
34
|
+
EOF
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
class Comment < ActiveRecord::Base
|
2
|
+
|
3
|
+
def self.create_table
|
4
|
+
connection.execute <<-EOF
|
5
|
+
create temporary table if not exists
|
6
|
+
comments (
|
7
|
+
`id` int(11) primary key auto_increment,
|
8
|
+
`timestamp` datetime,
|
9
|
+
`account` int(11),
|
10
|
+
`seen` boolean,
|
11
|
+
key comments_on_account_timestamp(account, timestamp),
|
12
|
+
key comments_on_timestamp(timestamp desc)
|
13
|
+
)
|
14
|
+
EOF
|
15
|
+
connection.execute 'truncate table comments'
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
FactoryGirl.define do
|
21
|
+
factory :comment do | comment |
|
22
|
+
comment.timestamp Time.now
|
23
|
+
comment.seen false
|
24
|
+
comment.sequence(:account) { | n | (n % 3)* 100 }
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,222 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'active_support/testing/time_helpers'
|
4
|
+
describe CleanSweep::PurgeRunner do
|
5
|
+
|
6
|
+
context 'PurgeRunner' do
|
7
|
+
include ActiveSupport::Testing::TimeHelpers
|
8
|
+
before do
|
9
|
+
travel_to Time.parse("2014-12-02 13:47:43 -0800")
|
10
|
+
end
|
11
|
+
after do
|
12
|
+
travel_back
|
13
|
+
end
|
14
|
+
|
15
|
+
context "using comments" do
|
16
|
+
before do
|
17
|
+
Comment.create_table
|
18
|
+
end
|
19
|
+
context "with duplicate rows" do
|
20
|
+
|
21
|
+
# This testcase demonstrates a weakness in the index traversal
|
22
|
+
# which is that if you aren't using a unique index or the first_only option,
|
23
|
+
# you can miss rows.
|
24
|
+
#
|
25
|
+
# In this case we have some duplicate rows but because the chunk_size is
|
26
|
+
# set low, we don't get all the duplicates in one chunk. And they miss
|
27
|
+
# the next chunk because we are looking for values greater than the
|
28
|
+
# columns in the current chunk.
|
29
|
+
#
|
30
|
+
# If you use the first_only option it means it builds the where clause using only
|
31
|
+
# the first column of the index, and it also uses the >=, <= operators instead
|
32
|
+
# of >, <. So it picks up all the rows.
|
33
|
+
#
|
34
|
+
|
35
|
+
before do
|
36
|
+
10.times { create(:comment, timestamp: 2.weeks.ago, seen: false) }
|
37
|
+
10.times { create(:comment, timestamp: 2.weeks.ago, seen: false) }
|
38
|
+
10.times { create(:comment, timestamp: 2.days.ago, seen: false) }
|
39
|
+
10.times { create(:comment, timestamp: 2.days.ago, seen: false) }
|
40
|
+
10.times { create(:comment, timestamp: 2.days.ago, seen: false) }
|
41
|
+
end
|
42
|
+
|
43
|
+
it "can miss some rows" do
|
44
|
+
purger = CleanSweep::PurgeRunner.new model: Comment,
|
45
|
+
index: 'comments_on_timestamp',
|
46
|
+
chunk_size: 7 do | scope |
|
47
|
+
scope.where('timestamp < ?', 1.week.ago)
|
48
|
+
end
|
49
|
+
expect( -> {
|
50
|
+
purger.execute_in_batches
|
51
|
+
}).to change(Comment, :count).from(50).to(43) # if it deleted all dups this would be 30, not 42
|
52
|
+
end
|
53
|
+
it "won't miss rows using first_only option" do
|
54
|
+
purger = CleanSweep::PurgeRunner.new model: Comment,
|
55
|
+
index: 'comments_on_timestamp',
|
56
|
+
first_only: true,
|
57
|
+
chunk_size: 7 do | scope |
|
58
|
+
scope.where('timestamp < ?', 1.week.ago)
|
59
|
+
end
|
60
|
+
expect( -> {
|
61
|
+
purger.execute_in_batches
|
62
|
+
}).to change(Comment, :count).from(50).to(30) # if it deleted all dups this would be 30, not 42
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'prints out the queries in a dry run' do
|
67
|
+
purger = CleanSweep::PurgeRunner.new model: Comment,
|
68
|
+
index: 'comments_on_account_timestamp' do | scope |
|
69
|
+
scope.where('timestamp < ?', 1.week.ago)
|
70
|
+
end
|
71
|
+
output = StringIO.new
|
72
|
+
purger.print_queries(output)
|
73
|
+
expect(output.string).to eq <<EOF
|
74
|
+
Initial Query:
|
75
|
+
SELECT `id`,`account`,`timestamp`
|
76
|
+
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
77
|
+
WHERE (timestamp < '2014-11-25 21:47:43')
|
78
|
+
ORDER BY `account` ASC,`timestamp` ASC
|
79
|
+
LIMIT 500
|
80
|
+
Chunk Query:
|
81
|
+
SELECT `id`,`account`,`timestamp`
|
82
|
+
FROM `comments` FORCE INDEX(comments_on_account_timestamp)
|
83
|
+
WHERE (timestamp < '2014-11-25 21:47:43') AND (`account` > 0 OR (`account` = 0 AND `timestamp` > '2014-11-18 21:47:43'))\n ORDER BY `account` ASC,`timestamp` ASC
|
84
|
+
LIMIT 500
|
85
|
+
Delete Statement:
|
86
|
+
DELETE
|
87
|
+
FROM `comments`
|
88
|
+
WHERE (`id` = 2)
|
89
|
+
EOF
|
90
|
+
end
|
91
|
+
end
|
92
|
+
context "with unique rows" do
|
93
|
+
before do
|
94
|
+
# Create 10 comments going back 0..9 days...
|
95
|
+
10.times { |i| create(:comment, timestamp: i.days.ago) }
|
96
|
+
end
|
97
|
+
|
98
|
+
it "ascends the index" do
|
99
|
+
purger = CleanSweep::PurgeRunner.new model: Comment,
|
100
|
+
index: 'comments_on_timestamp',
|
101
|
+
stop_after: 5
|
102
|
+
begin
|
103
|
+
purger.execute_in_batches
|
104
|
+
rescue CleanSweep::PurgeStopped
|
105
|
+
end
|
106
|
+
expect(Comment.count).to eq(5)
|
107
|
+
# Only old comments deleted before stopping
|
108
|
+
expect(Comment.where('timestamp >= ?', 4.days.ago).count).to eq(5)
|
109
|
+
end
|
110
|
+
it "descends the index" do
|
111
|
+
purger = CleanSweep::PurgeRunner.new model: Comment,
|
112
|
+
index: 'comments_on_timestamp',
|
113
|
+
reverse: true,
|
114
|
+
stop_after: 5
|
115
|
+
begin
|
116
|
+
purger.execute_in_batches
|
117
|
+
rescue CleanSweep::PurgeStopped
|
118
|
+
end
|
119
|
+
# Delete from the most recent comments, so only old ones are left.
|
120
|
+
expect(Comment.count).to eq(5)
|
121
|
+
expect(Comment.where('timestamp <= ?', 4.days.ago).count).to eq(5)
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
context "using books" do
|
129
|
+
|
130
|
+
before do
|
131
|
+
@total_book_size = 50
|
132
|
+
Book.create_table
|
133
|
+
@total_book_size.times { create(:book) }
|
134
|
+
end
|
135
|
+
|
136
|
+
after do
|
137
|
+
Book.delete_all
|
138
|
+
end
|
139
|
+
|
140
|
+
it 'waits for history' do
|
141
|
+
purger = CleanSweep::PurgeRunner.new model: Book,
|
142
|
+
max_history: 100,
|
143
|
+
chunk_size: 10
|
144
|
+
mysql_status = purger.mysql_status
|
145
|
+
expect(mysql_status).to receive(:check!).exactly(6).times
|
146
|
+
|
147
|
+
purger.execute_in_batches
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
it 'should not check when there are no limits' do
|
152
|
+
purger = CleanSweep::PurgeRunner.new model: Book,
|
153
|
+
chunk_size: 4
|
154
|
+
|
155
|
+
expect(purger.mysql_status).to be_nil
|
156
|
+
end
|
157
|
+
|
158
|
+
it 'purges books' do
|
159
|
+
purger = CleanSweep::PurgeRunner.new model: Book,
|
160
|
+
chunk_size: 4
|
161
|
+
|
162
|
+
count = purger.execute_in_batches
|
163
|
+
expect(count).to be(@total_book_size)
|
164
|
+
expect(Book.count).to be 0
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'copies books' do
|
168
|
+
BookTemp.create_table
|
169
|
+
purger = CleanSweep::PurgeRunner.new model: Book,
|
170
|
+
dest_model: BookTemp,
|
171
|
+
chunk_size: 4,
|
172
|
+
index: 'book_index_by_bin'
|
173
|
+
|
174
|
+
count = purger.execute_in_batches
|
175
|
+
expect(count).to be(@total_book_size)
|
176
|
+
expect(BookTemp.count).to eq(@total_book_size)
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
describe CleanSweep::PurgeRunner::MysqlStatus do
|
184
|
+
|
185
|
+
context "mysql status check tool" do
|
186
|
+
|
187
|
+
let(:mysql_status) do
|
188
|
+
CleanSweep::PurgeRunner::MysqlStatus.new model: Book, max_history:100, max_repl_lag: 100
|
189
|
+
end
|
190
|
+
|
191
|
+
before do
|
192
|
+
Book.create_table
|
193
|
+
end
|
194
|
+
|
195
|
+
it "fetches innodb status" do
|
196
|
+
mysql_status.get_replication_lag
|
197
|
+
end
|
198
|
+
it "checks history and pauses" do
|
199
|
+
allow(mysql_status).to receive(:get_history_length).and_return(101, 95, 89)
|
200
|
+
expect(mysql_status).to receive(:pause).twice
|
201
|
+
mysql_status.check!
|
202
|
+
end
|
203
|
+
it "checks replication and pauses" do
|
204
|
+
allow(mysql_status).to receive(:get_replication_lag).and_return(101, 95, 89)
|
205
|
+
expect(mysql_status).to receive(:pause).twice
|
206
|
+
mysql_status.check!
|
207
|
+
end
|
208
|
+
|
209
|
+
it "checks and continues" do
|
210
|
+
allow(mysql_status).to receive(:get_history_length).and_return(80)
|
211
|
+
expect(mysql_status).not_to receive(:pause)
|
212
|
+
mysql_status.check!
|
213
|
+
end
|
214
|
+
|
215
|
+
it "fetches slave status" do
|
216
|
+
mysql_status.get_history_length
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
ENV['RACK_ENV'] = 'test'
|
2
|
+
|
3
|
+
require 'clean_sweep'
|
4
|
+
require 'factory_girl'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'active_record'
|
7
|
+
require 'mysql2'
|
8
|
+
RSpec.configure do |config|
|
9
|
+
config.include FactoryGirl::Syntax::Methods
|
10
|
+
config.formatter = :progress
|
11
|
+
#config.order = 'random'
|
12
|
+
|
13
|
+
config.before(:suite) do
|
14
|
+
FactoryGirl.find_definitions
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
logdir = File.expand_path "../../log",__FILE__
|
20
|
+
FileUtils.mkdir_p logdir
|
21
|
+
logfile = File.open(File.join(logdir, "test.log"), "w+")
|
22
|
+
ActiveRecord::Base.logger = Logger.new(logfile)
|
23
|
+
|
24
|
+
database = {
|
25
|
+
encoding: 'utf8',
|
26
|
+
adapter: 'mysql2',
|
27
|
+
username: ENV['DB_USERNAME'] || 'root',
|
28
|
+
host: 'localhost',
|
29
|
+
password: ENV['DB_PASSWORD'],
|
30
|
+
}
|
31
|
+
db_name = ENV['DB_SCHEMA'] || 'cstest'
|
32
|
+
connection = Mysql2::Client.new(database)
|
33
|
+
connection.query "CREATE DATABASE IF NOT EXISTS #{db_name}"
|
34
|
+
database[:database] = db_name
|
35
|
+
|
36
|
+
ActiveRecord::Base.establish_connection(database)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe CleanSweep::TableSchema do
|
4
|
+
|
5
|
+
before do
|
6
|
+
Comment.create_table
|
7
|
+
end
|
8
|
+
|
9
|
+
context "using ascending account, timestamp index" do
|
10
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: true }
|
11
|
+
|
12
|
+
it 'should read comments' do
|
13
|
+
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
14
|
+
expect(schema.traversing_key.columns.map(&:name)).to eq([:account, :timestamp])
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should produce an ascending chunk clause' do
|
18
|
+
rows = account_and_timestamp_rows
|
19
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
20
|
+
.to include("(`account` > 5 OR (`account` = 5 AND `timestamp` > '2014-12-01 23:13:25'))")
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should produce all select columns' do
|
24
|
+
expect(schema.select_columns).to eq([:id, :account, :timestamp])
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should produce the ascending order clause' do
|
28
|
+
expect(schema.initial_scope.to_sql).to include('`account` ASC,`timestamp` ASC')
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
it 'should produce an insert statement' do
|
33
|
+
schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp'
|
34
|
+
rows = account_and_timestamp_rows
|
35
|
+
expect(schema.insert_statement(Comment, rows)).to eq("insert into `comments` (`id`,`account`,`timestamp`) values (1001,5,'2014-12-02 01:13:25'),(1002,2,'2014-12-02 00:13:25'),(1005,5,'2014-12-01 23:13:25')")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "using descending account, timestamp index" do
|
40
|
+
|
41
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', ascending: false }
|
42
|
+
|
43
|
+
it 'should produce a descending where clause' do
|
44
|
+
rows = account_and_timestamp_rows
|
45
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
46
|
+
.to include("(`account` < 5 OR (`account` = 5 AND `timestamp` < '2014-12-01 23:13:25'))")
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
it 'should produce the descending order clause' do
|
51
|
+
rows = account_and_timestamp_rows
|
52
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
53
|
+
.to include("`account` DESC,`timestamp` DESC")
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
context "using account, timestamp index first column only" do
|
59
|
+
let(:schema) { CleanSweep::TableSchema.new Comment, key_name:'comments_on_account_timestamp', first_only: true }
|
60
|
+
|
61
|
+
it 'should select all the rows' do
|
62
|
+
expect(schema.select_columns).to eq([:id, :account, :timestamp])
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'should only query using the first column of the index' do
|
66
|
+
rows = account_and_timestamp_rows
|
67
|
+
expect(schema.scope_to_next_chunk(schema.initial_scope, rows.last).to_sql)
|
68
|
+
.to include(" (`account` >= 5) ")
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'should not care about case' do
|
75
|
+
CleanSweep::TableSchema.new Comment, key_name: 'primary'
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'should work without a descending index' do
|
79
|
+
schema = CleanSweep::TableSchema.new Comment
|
80
|
+
expect(schema.primary_key.columns.map(&:name)).to eq([:id])
|
81
|
+
expect(schema.traversing_key).to be_nil
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'should produce minimal select columns' do
|
85
|
+
schema = CleanSweep::TableSchema.new Comment, key_name: 'PRIMARY'
|
86
|
+
expect(schema.select_columns).to eq([:id])
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'should produce the from clause with an index' do
|
90
|
+
schema = CleanSweep::TableSchema.new Comment, key_name:'comments_on_timestamp'
|
91
|
+
expect(schema.initial_scope.to_sql).to include("`comments` FORCE INDEX(comments_on_timestamp)")
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'should include additional columns' do
|
95
|
+
schema = CleanSweep::TableSchema.new Comment, key_name: 'comments_on_account_timestamp', extra_columns: %w[seen id]
|
96
|
+
expect(schema.select_columns).to eq([:seen, :id, :account, :timestamp])
|
97
|
+
rows = account_and_timestamp_rows
|
98
|
+
rows.map! { |row| row.unshift 1 } # Insert 'seen' value to beginning of row
|
99
|
+
expect(schema.insert_statement(Comment, rows)).to eq("insert into `comments` (`seen`,`id`,`account`,`timestamp`) values (1,1001,5,'2014-12-02 01:13:25'),(1,1002,2,'2014-12-02 00:13:25'),(1,1005,5,'2014-12-01 23:13:25')")
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
def account_and_timestamp_rows
|
105
|
+
rows = []
|
106
|
+
t = Time.parse '2014-12-01 17:13:25'
|
107
|
+
rows << [1001, 5, t]
|
108
|
+
rows << [1002, 2, t - 1.hour]
|
109
|
+
rows << [1005, 5, t - 2.hours]
|
110
|
+
end
|
111
|
+
end
|