flydata 0.5.17 → 0.5.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/flydata-core/lib/flydata-core/event/event_handler_base.rb +56 -0
- data/flydata-core/lib/flydata-core/event/flydata_event.rb +29 -0
- data/flydata-core/lib/flydata-core/event/flydata_event_handler_registry.rb +11 -0
- data/flydata-core/lib/flydata-core/event/flydata_event_processor.rb +19 -0
- data/flydata-core/lib/flydata-core/event/flydata_event_sender.rb +27 -0
- data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +4 -5
- data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +12 -2
- data/flydata-core/spec/event/flydata_event_handler_registry_spec.rb +33 -0
- data/flydata-core/spec/event/flydata_event_handler_spec.rb +39 -0
- data/flydata-core/spec/event/flydata_event_processor_spec.rb +42 -0
- data/flydata-core/spec/event/flydata_event_sender_spec.rb +33 -0
- data/flydata-core/spec/event/flydata_event_spec.rb +39 -0
- data/flydata-core/spec/event/shared_event.rb +33 -0
- data/flydata-core/spec/mysql/compatibility_checker_spec.rb +4 -5
- data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +25 -25
- data/flydata-core/spec/table_def/redshift_table_def_spec.rb +4 -4
- data/flydata.gemspec +0 -0
- data/lib/flydata/command/start.rb +15 -8
- data/lib/flydata/command/sync.rb +79 -61
- data/lib/flydata/errors.rb +1 -1
- data/lib/flydata/event/api_event_sender.rb +16 -0
- data/lib/flydata/fluent-plugins/mysql/ddl_query_handler.rb +8 -0
- data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +1 -1
- data/lib/flydata/output/forwarder.rb +7 -5
- data/lib/flydata/parser/mysql/dump_parser.rb +76 -12
- data/lib/flydata/queueable_thread.rb +32 -0
- data/lib/flydata/sync_file_manager.rb +45 -0
- data/spec/flydata/command/start_spec.rb +1 -1
- data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +10 -3
- data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +10 -10
- data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +11 -2
- data/spec/flydata/output/forwarder_spec.rb +4 -4
- metadata +15 -2
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'flydata-core/event/flydata_event_sender'
|
2
|
+
require 'flydata-core/event/flydata_event'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module FlydataCore
|
6
|
+
module Event
|
7
|
+
class ApiEventSender <FlydataEventSender
|
8
|
+
include Singleton
|
9
|
+
protected
|
10
|
+
def send(event, routing_key=nil)
|
11
|
+
#TODO async when async api code is pushed
|
12
|
+
Flydata::ApiClient.instance.post("/events/process",{:headers => {:content_type => :json}}, event.to_json)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -5,6 +5,14 @@ module Mysql
|
|
5
5
|
class DdlQueryHandler < BinlogQueryHandler
|
6
6
|
DDL_TABLE_QUERY = /^(?:(?:ALTER|CREATE|DROP|RENAME) +(?:\w+ +)*TABLE +([^ ]+)|TRUNCATE +(?:TABLE +)?([^ ;]+))/i
|
7
7
|
|
8
|
+
def emit_record(type, record)
|
9
|
+
# ddl event record doesn't have "table_name"
|
10
|
+
record['table_name'] = table_info(record)[:table_name]
|
11
|
+
super do |opt|
|
12
|
+
yield opt
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
8
16
|
def acceptable_db?(record)
|
9
17
|
supported_database == table_info(record)[:db_name]
|
10
18
|
end
|
@@ -60,15 +60,13 @@ module Flydata
|
|
60
60
|
@buffer_record_count += 1
|
61
61
|
@buffer_size += event_data.bytesize
|
62
62
|
end
|
63
|
-
|
64
|
-
send
|
65
|
-
else
|
66
|
-
false
|
67
|
-
end
|
63
|
+
buffer_full?
|
68
64
|
end
|
69
65
|
|
70
66
|
#TODO retry logic
|
71
67
|
def send
|
68
|
+
byte_size = nil
|
69
|
+
record_count = nil
|
72
70
|
if @buffer_size > 0
|
73
71
|
else
|
74
72
|
return false
|
@@ -113,6 +111,10 @@ module Flydata
|
|
113
111
|
{ byte_size: byte_size, record_count: record_count }
|
114
112
|
end
|
115
113
|
|
114
|
+
def buffer_full?
|
115
|
+
@buffer_size > @buffer_size_limit
|
116
|
+
end
|
117
|
+
|
116
118
|
#TODO: Check server status
|
117
119
|
def pickup_server
|
118
120
|
ret_server = @servers[@server_index]
|
@@ -38,8 +38,7 @@ module Flydata
|
|
38
38
|
@value_converters = {}
|
39
39
|
end
|
40
40
|
|
41
|
-
attr_accessor :table_name, :columns, :column_names, :primary_keys
|
42
|
-
attr_reader :value_converters
|
41
|
+
attr_accessor :table_name, :columns, :column_names, :primary_keys, :value_converters
|
43
42
|
|
44
43
|
def add_column(column)
|
45
44
|
cn = column[:column_name]
|
@@ -82,7 +81,7 @@ module Flydata
|
|
82
81
|
dump_cmd = generate_dump_cmd(@conf, file_path)
|
83
82
|
|
84
83
|
# RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
|
85
|
-
table_locker = create_table_locker
|
84
|
+
table_locker = create_table_locker(@conf["database"], @conf["tables"])
|
86
85
|
table_locker.resume # Lock tables
|
87
86
|
|
88
87
|
begin
|
@@ -181,12 +180,12 @@ AND TABLE_SCHEMA IN (%s)
|
|
181
180
|
AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
|
182
181
|
EOS
|
183
182
|
|
184
|
-
def create_table_locker
|
183
|
+
def create_table_locker(database, tables)
|
185
184
|
Fiber.new do
|
186
185
|
# short timeout to avoid blocking other queries
|
187
186
|
client = FlydataMysqlClient.new({reconnect: true, read_timeout:9}.merge(@db_opts))
|
188
187
|
|
189
|
-
q = flush_tables_with_read_lock_query(client)
|
188
|
+
q = flush_tables_with_read_lock_query(client, database, tables)
|
190
189
|
$log.debug "FLUSH TABLES query: #{q}"
|
191
190
|
thread_id = nil
|
192
191
|
begin
|
@@ -238,12 +237,12 @@ EOS
|
|
238
237
|
end
|
239
238
|
end
|
240
239
|
|
241
|
-
def flush_tables_with_read_lock_query(client)
|
240
|
+
def flush_tables_with_read_lock_query(client, database, tbls)
|
241
|
+
tbls ||= []
|
242
242
|
tables = ""
|
243
243
|
if mysql_server_version(client) >= "5.5"
|
244
244
|
# FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
|
245
|
-
|
246
|
-
tables = result.collect{|r| r['tables']}.join(", ")
|
245
|
+
tables = tbls.collect{|t| "`#{database}`.`#{t}`"}.join(",")
|
247
246
|
end
|
248
247
|
FLUSH_TABLES_QUERY_TEMPLATE % [tables]
|
249
248
|
end
|
@@ -307,11 +306,12 @@ EOS
|
|
307
306
|
@option = option
|
308
307
|
end
|
309
308
|
|
310
|
-
def parse(
|
311
|
-
unless
|
309
|
+
def parse(dmpio, create_table_block, insert_record_block, check_point_block)
|
310
|
+
unless dmpio.kind_of?(IO)
|
312
311
|
raise ArgumentError.new("Invalid argument. The first parameter must be io.")
|
313
312
|
end
|
314
313
|
|
314
|
+
dump_io = nil
|
315
315
|
invalid_file = false
|
316
316
|
current_state = State::START
|
317
317
|
substate = nil
|
@@ -449,6 +449,7 @@ EOS
|
|
449
449
|
current_state = State::INSERT_RECORD
|
450
450
|
|
451
451
|
if insert_record_block.call(current_table, values_set)
|
452
|
+
values_set = nil
|
452
453
|
check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
|
453
454
|
end
|
454
455
|
end
|
@@ -457,13 +458,15 @@ EOS
|
|
457
458
|
begin
|
458
459
|
# resume(only when using dump file)
|
459
460
|
if @option[:last_pos] && (@option[:last_pos].to_i != -1)
|
460
|
-
|
461
|
+
dmpio.pos = @option[:last_pos].to_i
|
461
462
|
current_state = @option[:state]
|
462
463
|
substate = @option[:substate]
|
463
464
|
current_table = @option[:mysql_table]
|
464
|
-
bytesize =
|
465
|
+
bytesize = dmpio.pos
|
465
466
|
end
|
466
467
|
|
468
|
+
dump_io = AsyncIO.new(dmpio)
|
469
|
+
|
467
470
|
until dump_io.eof? do
|
468
471
|
case current_state
|
469
472
|
when State::START
|
@@ -480,6 +483,8 @@ EOS
|
|
480
483
|
state_parsing_insert_record.call
|
481
484
|
end
|
482
485
|
end
|
486
|
+
ensure
|
487
|
+
dump_io.close
|
483
488
|
end
|
484
489
|
@binlog_pos
|
485
490
|
end
|
@@ -679,6 +684,65 @@ EOT
|
|
679
684
|
client.close rescue nil
|
680
685
|
end
|
681
686
|
end
|
687
|
+
|
688
|
+
# Read and buffer data in a separate thread
|
689
|
+
class AsyncIO
|
690
|
+
MAX_ITEMS = 200
|
691
|
+
def initialize(io, options = {})
|
692
|
+
max_items = options[:max_items] ? options[:max_items] : MAX_ITEMS
|
693
|
+
@io = io
|
694
|
+
@queue = SizedQueue.new(max_items)
|
695
|
+
_readline
|
696
|
+
@last = @queue.shift
|
697
|
+
@stop = false
|
698
|
+
@thread = Thread.new(&method(:run))
|
699
|
+
end
|
700
|
+
|
701
|
+
def readline
|
702
|
+
if @last[:eof]
|
703
|
+
raise EOFError.new("end of file reached")
|
704
|
+
else
|
705
|
+
result = @last[:line]
|
706
|
+
@last = @queue.shift
|
707
|
+
result
|
708
|
+
end
|
709
|
+
end
|
710
|
+
|
711
|
+
def pos
|
712
|
+
@last[:pos]
|
713
|
+
end
|
714
|
+
|
715
|
+
def eof?
|
716
|
+
@last[:eof]
|
717
|
+
end
|
718
|
+
|
719
|
+
def close
|
720
|
+
@stop = true
|
721
|
+
# remove an item if the queue is full. Otherwise, the thread will not
|
722
|
+
# wake up.
|
723
|
+
@queue.shift if @queue.size == @queue.max
|
724
|
+
@thread.join
|
725
|
+
end
|
726
|
+
|
727
|
+
private
|
728
|
+
|
729
|
+
def run
|
730
|
+
until @io.eof?
|
731
|
+
return if @stop
|
732
|
+
|
733
|
+
_readline
|
734
|
+
end
|
735
|
+
_readline
|
736
|
+
end
|
737
|
+
|
738
|
+
def _readline
|
739
|
+
pos = @io.pos
|
740
|
+
eof = @io.eof?
|
741
|
+
line = eof ? nil : @io.readline
|
742
|
+
data = { line: line, pos: pos, eof: eof }
|
743
|
+
@queue << data
|
744
|
+
end
|
745
|
+
end
|
682
746
|
end
|
683
747
|
end
|
684
748
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Flydata
|
2
|
+
|
3
|
+
class QueueableThread
|
4
|
+
MAX_JOBS = 60
|
5
|
+
def initialize(max_jobs = MAX_JOBS)
|
6
|
+
@queue = SizedQueue.new(max_jobs)
|
7
|
+
@stop = false
|
8
|
+
@thread = Thread.new(&method(:run_loop))
|
9
|
+
@thread.abort_on_exception = true
|
10
|
+
end
|
11
|
+
|
12
|
+
def run(&block)
|
13
|
+
@queue << block
|
14
|
+
end
|
15
|
+
|
16
|
+
def join
|
17
|
+
@stop = true
|
18
|
+
@queue << nil if @queue.empty? # wake up the thread
|
19
|
+
@thread.join
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def run_loop
|
25
|
+
until @stop && @queue.empty?
|
26
|
+
block = @queue.shift
|
27
|
+
block.call if block
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'mysql2'
|
2
2
|
require 'flydata-core/query_job'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
module Flydata
|
5
6
|
class SyncFileManager
|
@@ -298,6 +299,50 @@ module Flydata
|
|
298
299
|
new_rev
|
299
300
|
end
|
300
301
|
|
302
|
+
def delete_table_control_files(*tables)
|
303
|
+
return if (tables.nil? or tables.empty?)
|
304
|
+
files_to_delete = [
|
305
|
+
table_position_file_paths(*tables),
|
306
|
+
table_binlog_pos_paths(*tables),
|
307
|
+
table_binlog_pos_init_paths(*tables),
|
308
|
+
table_rev_file_paths(*tables),
|
309
|
+
table_ddl_file_paths(*tables)
|
310
|
+
]
|
311
|
+
files_to_delete.flatten.each do |path|
|
312
|
+
FileUtils.rm(path) if File.exists?(path)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def tables_from_positions_dir
|
317
|
+
all_table_control_files = Dir.glob(File.join(table_positions_dir_path, '*.{pos,generated_ddl,init,rev}'))
|
318
|
+
return if all_table_control_files.nil?
|
319
|
+
tables = Set.new
|
320
|
+
all_table_control_files.each do |control_file|
|
321
|
+
file_name = File.basename(control_file)
|
322
|
+
file_name = file_name.slice(0...(file_name.index('.')))
|
323
|
+
tables << file_name
|
324
|
+
end
|
325
|
+
tables.to_a
|
326
|
+
end
|
327
|
+
|
328
|
+
def delete_non_table_control_files(delete_binlog= false)
|
329
|
+
files_to_delete = [
|
330
|
+
dump_file_path,
|
331
|
+
dump_pos_path,
|
332
|
+
mysql_table_marshal_dump_path,
|
333
|
+
sync_info_file,
|
334
|
+
stats_path
|
335
|
+
]
|
336
|
+
if delete_binlog
|
337
|
+
files_to_delete << binlog_path
|
338
|
+
files_to_delete << sent_binlog_path
|
339
|
+
files_to_delete << lock_pid_file
|
340
|
+
end
|
341
|
+
files_to_delete.flatten.each do |file_to_delete|
|
342
|
+
FileUtils.rm(file_to_delete) if File.exists?(file_to_delete)
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
301
346
|
def delete_table_binlog_pos(table_name)
|
302
347
|
file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
|
303
348
|
if File.exists?(file)
|
@@ -14,7 +14,7 @@ module Flydata
|
|
14
14
|
expect(sender).to receive(:start)
|
15
15
|
expect(Flydata::Command::Helper).to receive(:new).and_return(helper)
|
16
16
|
expect(helper).to receive(:stop)
|
17
|
-
expect(
|
17
|
+
expect(subject).to receive(:system) # Use Kernal#system to launch the helper
|
18
18
|
subject.run
|
19
19
|
|
20
20
|
end
|
@@ -19,9 +19,9 @@ module Mysql
|
|
19
19
|
before do
|
20
20
|
parser_class = ParserProvider.parser(:mysql, :mysql_alter_table)
|
21
21
|
allow(parser_class).to receive(:new).and_return(parser)
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
allow_any_instance_of(described_class).to receive(:check_empty_binlog)
|
23
|
+
record.delete('table_name')
|
24
|
+
record['normalized_query'] = "truncate table #{table};"
|
25
25
|
end
|
26
26
|
shared_examples "a process method receiving an exception" do
|
27
27
|
let(:an_error) { "an error" }
|
@@ -54,6 +54,13 @@ module Mysql
|
|
54
54
|
expect(subject.process(record)).to eq(nil)
|
55
55
|
end
|
56
56
|
end
|
57
|
+
context "when event binlog is older than table binlog.pos" do
|
58
|
+
it 'skip sending event' do
|
59
|
+
expect(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120").once
|
60
|
+
expect(ParserProvider).not_to receive(:parser)
|
61
|
+
expect(subject.process(record)).to eq(nil)
|
62
|
+
end
|
63
|
+
end
|
57
64
|
end
|
58
65
|
end
|
59
66
|
end
|
@@ -28,20 +28,20 @@ module Mysql
|
|
28
28
|
r
|
29
29
|
end
|
30
30
|
let(:query) { "a_query" }
|
31
|
-
let(:normalized_query) {
|
31
|
+
let(:normalized_query) { 'a_query' }
|
32
32
|
let(:event_length) { 20 }
|
33
33
|
let(:next_position) { 200 }
|
34
34
|
let(:timestamp) { 1427973738 }
|
35
35
|
let(:record) do
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
36
|
+
{
|
37
|
+
'db_name' => database,
|
38
|
+
'query' => query,
|
39
|
+
'table_name' => table,
|
40
|
+
'normalized_query' => normalized_query,
|
41
|
+
'next_position' => next_position,
|
42
|
+
'event_length' => event_length,
|
43
|
+
'timestamp' => "#{timestamp}",
|
44
|
+
}
|
45
45
|
end
|
46
46
|
subject { described_class.new(context) }
|
47
47
|
end
|
@@ -10,8 +10,9 @@ module Mysql
|
|
10
10
|
let(:truncate_query) { "TRUNCATE table foo" }
|
11
11
|
|
12
12
|
before do
|
13
|
-
|
14
|
-
|
13
|
+
record['query'] = truncate_query
|
14
|
+
record['normalized_query'] = truncate_query
|
15
|
+
record.delete('table_name')
|
15
16
|
end
|
16
17
|
|
17
18
|
shared_examples "process truncate queries correctly" do
|
@@ -74,6 +75,14 @@ module Mysql
|
|
74
75
|
include_examples "skip processing queries"
|
75
76
|
end
|
76
77
|
end
|
78
|
+
|
79
|
+
context 'when per-table binlog pos exists' do
|
80
|
+
let(:truncate_query) { "TRUNCATE #{table}" }
|
81
|
+
before do
|
82
|
+
allow(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120")
|
83
|
+
end
|
84
|
+
include_examples "skip processing queries"
|
85
|
+
end
|
77
86
|
end
|
78
87
|
end
|
79
88
|
end
|
@@ -68,12 +68,12 @@ module Flydata
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
context 'when the buffer size exceeds threthold' do
|
71
|
-
it do
|
71
|
+
it 'returns true but does not flush buffer' do
|
72
72
|
expect(forwarder.emit(record)).to be(false)
|
73
73
|
expect(forwarder.emit(record)).to be(false)
|
74
74
|
expect(forwarder.buffer_record_count).to be(2)
|
75
|
-
expect(forwarder.emit(record)).to eq(
|
76
|
-
expect(forwarder.buffer_record_count).to be(
|
75
|
+
expect(forwarder.emit(record)).to eq(true)
|
76
|
+
expect(forwarder.buffer_record_count).to be(3)
|
77
77
|
end
|
78
78
|
end
|
79
79
|
context 'when the error happens during the data sending' do
|
@@ -91,7 +91,7 @@ module Flydata
|
|
91
91
|
it 'retry and succeed sending data' do
|
92
92
|
forwarder.emit(record)
|
93
93
|
forwarder.emit(record)
|
94
|
-
expect(forwarder.emit(record)).to eq(
|
94
|
+
expect(forwarder.emit(record)).to eq(true)
|
95
95
|
end
|
96
96
|
end
|
97
97
|
end
|