flydata 0.5.17 → 0.5.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/flydata-core/lib/flydata-core/event/event_handler_base.rb +56 -0
- data/flydata-core/lib/flydata-core/event/flydata_event.rb +29 -0
- data/flydata-core/lib/flydata-core/event/flydata_event_handler_registry.rb +11 -0
- data/flydata-core/lib/flydata-core/event/flydata_event_processor.rb +19 -0
- data/flydata-core/lib/flydata-core/event/flydata_event_sender.rb +27 -0
- data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +4 -5
- data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +12 -2
- data/flydata-core/spec/event/flydata_event_handler_registry_spec.rb +33 -0
- data/flydata-core/spec/event/flydata_event_handler_spec.rb +39 -0
- data/flydata-core/spec/event/flydata_event_processor_spec.rb +42 -0
- data/flydata-core/spec/event/flydata_event_sender_spec.rb +33 -0
- data/flydata-core/spec/event/flydata_event_spec.rb +39 -0
- data/flydata-core/spec/event/shared_event.rb +33 -0
- data/flydata-core/spec/mysql/compatibility_checker_spec.rb +4 -5
- data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +25 -25
- data/flydata-core/spec/table_def/redshift_table_def_spec.rb +4 -4
- data/flydata.gemspec +0 -0
- data/lib/flydata/command/start.rb +15 -8
- data/lib/flydata/command/sync.rb +79 -61
- data/lib/flydata/errors.rb +1 -1
- data/lib/flydata/event/api_event_sender.rb +16 -0
- data/lib/flydata/fluent-plugins/mysql/ddl_query_handler.rb +8 -0
- data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +1 -1
- data/lib/flydata/output/forwarder.rb +7 -5
- data/lib/flydata/parser/mysql/dump_parser.rb +76 -12
- data/lib/flydata/queueable_thread.rb +32 -0
- data/lib/flydata/sync_file_manager.rb +45 -0
- data/spec/flydata/command/start_spec.rb +1 -1
- data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +10 -3
- data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +10 -10
- data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +11 -2
- data/spec/flydata/output/forwarder_spec.rb +4 -4
- metadata +15 -2
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'flydata-core/event/flydata_event_sender'
|
2
|
+
require 'flydata-core/event/flydata_event'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module FlydataCore
|
6
|
+
module Event
|
7
|
+
class ApiEventSender <FlydataEventSender
|
8
|
+
include Singleton
|
9
|
+
protected
|
10
|
+
def send(event, routing_key=nil)
|
11
|
+
#TODO async when async api code is pushed
|
12
|
+
Flydata::ApiClient.instance.post("/events/process",{:headers => {:content_type => :json}}, event.to_json)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -5,6 +5,14 @@ module Mysql
|
|
5
5
|
class DdlQueryHandler < BinlogQueryHandler
|
6
6
|
DDL_TABLE_QUERY = /^(?:(?:ALTER|CREATE|DROP|RENAME) +(?:\w+ +)*TABLE +([^ ]+)|TRUNCATE +(?:TABLE +)?([^ ;]+))/i
|
7
7
|
|
8
|
+
def emit_record(type, record)
|
9
|
+
# ddl event record doesn't have "table_name"
|
10
|
+
record['table_name'] = table_info(record)[:table_name]
|
11
|
+
super do |opt|
|
12
|
+
yield opt
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
8
16
|
def acceptable_db?(record)
|
9
17
|
supported_database == table_info(record)[:db_name]
|
10
18
|
end
|
@@ -60,15 +60,13 @@ module Flydata
|
|
60
60
|
@buffer_record_count += 1
|
61
61
|
@buffer_size += event_data.bytesize
|
62
62
|
end
|
63
|
-
|
64
|
-
send
|
65
|
-
else
|
66
|
-
false
|
67
|
-
end
|
63
|
+
buffer_full?
|
68
64
|
end
|
69
65
|
|
70
66
|
#TODO retry logic
|
71
67
|
def send
|
68
|
+
byte_size = nil
|
69
|
+
record_count = nil
|
72
70
|
if @buffer_size > 0
|
73
71
|
else
|
74
72
|
return false
|
@@ -113,6 +111,10 @@ module Flydata
|
|
113
111
|
{ byte_size: byte_size, record_count: record_count }
|
114
112
|
end
|
115
113
|
|
114
|
+
def buffer_full?
|
115
|
+
@buffer_size > @buffer_size_limit
|
116
|
+
end
|
117
|
+
|
116
118
|
#TODO: Check server status
|
117
119
|
def pickup_server
|
118
120
|
ret_server = @servers[@server_index]
|
@@ -38,8 +38,7 @@ module Flydata
|
|
38
38
|
@value_converters = {}
|
39
39
|
end
|
40
40
|
|
41
|
-
attr_accessor :table_name, :columns, :column_names, :primary_keys
|
42
|
-
attr_reader :value_converters
|
41
|
+
attr_accessor :table_name, :columns, :column_names, :primary_keys, :value_converters
|
43
42
|
|
44
43
|
def add_column(column)
|
45
44
|
cn = column[:column_name]
|
@@ -82,7 +81,7 @@ module Flydata
|
|
82
81
|
dump_cmd = generate_dump_cmd(@conf, file_path)
|
83
82
|
|
84
83
|
# RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
|
85
|
-
table_locker = create_table_locker
|
84
|
+
table_locker = create_table_locker(@conf["database"], @conf["tables"])
|
86
85
|
table_locker.resume # Lock tables
|
87
86
|
|
88
87
|
begin
|
@@ -181,12 +180,12 @@ AND TABLE_SCHEMA IN (%s)
|
|
181
180
|
AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
|
182
181
|
EOS
|
183
182
|
|
184
|
-
def create_table_locker
|
183
|
+
def create_table_locker(database, tables)
|
185
184
|
Fiber.new do
|
186
185
|
# short timeout to avoid blocking other queries
|
187
186
|
client = FlydataMysqlClient.new({reconnect: true, read_timeout:9}.merge(@db_opts))
|
188
187
|
|
189
|
-
q = flush_tables_with_read_lock_query(client)
|
188
|
+
q = flush_tables_with_read_lock_query(client, database, tables)
|
190
189
|
$log.debug "FLUSH TABLES query: #{q}"
|
191
190
|
thread_id = nil
|
192
191
|
begin
|
@@ -238,12 +237,12 @@ EOS
|
|
238
237
|
end
|
239
238
|
end
|
240
239
|
|
241
|
-
def flush_tables_with_read_lock_query(client)
|
240
|
+
def flush_tables_with_read_lock_query(client, database, tbls)
|
241
|
+
tbls ||= []
|
242
242
|
tables = ""
|
243
243
|
if mysql_server_version(client) >= "5.5"
|
244
244
|
# FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
|
245
|
-
|
246
|
-
tables = result.collect{|r| r['tables']}.join(", ")
|
245
|
+
tables = tbls.collect{|t| "`#{database}`.`#{t}`"}.join(",")
|
247
246
|
end
|
248
247
|
FLUSH_TABLES_QUERY_TEMPLATE % [tables]
|
249
248
|
end
|
@@ -307,11 +306,12 @@ EOS
|
|
307
306
|
@option = option
|
308
307
|
end
|
309
308
|
|
310
|
-
def parse(
|
311
|
-
unless
|
309
|
+
def parse(dmpio, create_table_block, insert_record_block, check_point_block)
|
310
|
+
unless dmpio.kind_of?(IO)
|
312
311
|
raise ArgumentError.new("Invalid argument. The first parameter must be io.")
|
313
312
|
end
|
314
313
|
|
314
|
+
dump_io = nil
|
315
315
|
invalid_file = false
|
316
316
|
current_state = State::START
|
317
317
|
substate = nil
|
@@ -449,6 +449,7 @@ EOS
|
|
449
449
|
current_state = State::INSERT_RECORD
|
450
450
|
|
451
451
|
if insert_record_block.call(current_table, values_set)
|
452
|
+
values_set = nil
|
452
453
|
check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
|
453
454
|
end
|
454
455
|
end
|
@@ -457,13 +458,15 @@ EOS
|
|
457
458
|
begin
|
458
459
|
# resume(only when using dump file)
|
459
460
|
if @option[:last_pos] && (@option[:last_pos].to_i != -1)
|
460
|
-
|
461
|
+
dmpio.pos = @option[:last_pos].to_i
|
461
462
|
current_state = @option[:state]
|
462
463
|
substate = @option[:substate]
|
463
464
|
current_table = @option[:mysql_table]
|
464
|
-
bytesize =
|
465
|
+
bytesize = dmpio.pos
|
465
466
|
end
|
466
467
|
|
468
|
+
dump_io = AsyncIO.new(dmpio)
|
469
|
+
|
467
470
|
until dump_io.eof? do
|
468
471
|
case current_state
|
469
472
|
when State::START
|
@@ -480,6 +483,8 @@ EOS
|
|
480
483
|
state_parsing_insert_record.call
|
481
484
|
end
|
482
485
|
end
|
486
|
+
ensure
|
487
|
+
dump_io.close
|
483
488
|
end
|
484
489
|
@binlog_pos
|
485
490
|
end
|
@@ -679,6 +684,65 @@ EOT
|
|
679
684
|
client.close rescue nil
|
680
685
|
end
|
681
686
|
end
|
687
|
+
|
688
|
+
# Read and buffer data in a separate thread
|
689
|
+
class AsyncIO
|
690
|
+
MAX_ITEMS = 200
|
691
|
+
def initialize(io, options = {})
|
692
|
+
max_items = options[:max_items] ? options[:max_items] : MAX_ITEMS
|
693
|
+
@io = io
|
694
|
+
@queue = SizedQueue.new(max_items)
|
695
|
+
_readline
|
696
|
+
@last = @queue.shift
|
697
|
+
@stop = false
|
698
|
+
@thread = Thread.new(&method(:run))
|
699
|
+
end
|
700
|
+
|
701
|
+
def readline
|
702
|
+
if @last[:eof]
|
703
|
+
raise EOFError.new("end of file reached")
|
704
|
+
else
|
705
|
+
result = @last[:line]
|
706
|
+
@last = @queue.shift
|
707
|
+
result
|
708
|
+
end
|
709
|
+
end
|
710
|
+
|
711
|
+
def pos
|
712
|
+
@last[:pos]
|
713
|
+
end
|
714
|
+
|
715
|
+
def eof?
|
716
|
+
@last[:eof]
|
717
|
+
end
|
718
|
+
|
719
|
+
def close
|
720
|
+
@stop = true
|
721
|
+
# remove an item if the queue is full. Otherwise, the thread will not
|
722
|
+
# wake up.
|
723
|
+
@queue.shift if @queue.size == @queue.max
|
724
|
+
@thread.join
|
725
|
+
end
|
726
|
+
|
727
|
+
private
|
728
|
+
|
729
|
+
def run
|
730
|
+
until @io.eof?
|
731
|
+
return if @stop
|
732
|
+
|
733
|
+
_readline
|
734
|
+
end
|
735
|
+
_readline
|
736
|
+
end
|
737
|
+
|
738
|
+
def _readline
|
739
|
+
pos = @io.pos
|
740
|
+
eof = @io.eof?
|
741
|
+
line = eof ? nil : @io.readline
|
742
|
+
data = { line: line, pos: pos, eof: eof }
|
743
|
+
@queue << data
|
744
|
+
end
|
745
|
+
end
|
682
746
|
end
|
683
747
|
end
|
684
748
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Flydata
|
2
|
+
|
3
|
+
class QueueableThread
|
4
|
+
MAX_JOBS = 60
|
5
|
+
def initialize(max_jobs = MAX_JOBS)
|
6
|
+
@queue = SizedQueue.new(max_jobs)
|
7
|
+
@stop = false
|
8
|
+
@thread = Thread.new(&method(:run_loop))
|
9
|
+
@thread.abort_on_exception = true
|
10
|
+
end
|
11
|
+
|
12
|
+
def run(&block)
|
13
|
+
@queue << block
|
14
|
+
end
|
15
|
+
|
16
|
+
def join
|
17
|
+
@stop = true
|
18
|
+
@queue << nil if @queue.empty? # wake up the thread
|
19
|
+
@thread.join
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def run_loop
|
25
|
+
until @stop && @queue.empty?
|
26
|
+
block = @queue.shift
|
27
|
+
block.call if block
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'mysql2'
|
2
2
|
require 'flydata-core/query_job'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
module Flydata
|
5
6
|
class SyncFileManager
|
@@ -298,6 +299,50 @@ module Flydata
|
|
298
299
|
new_rev
|
299
300
|
end
|
300
301
|
|
302
|
+
def delete_table_control_files(*tables)
|
303
|
+
return if (tables.nil? or tables.empty?)
|
304
|
+
files_to_delete = [
|
305
|
+
table_position_file_paths(*tables),
|
306
|
+
table_binlog_pos_paths(*tables),
|
307
|
+
table_binlog_pos_init_paths(*tables),
|
308
|
+
table_rev_file_paths(*tables),
|
309
|
+
table_ddl_file_paths(*tables)
|
310
|
+
]
|
311
|
+
files_to_delete.flatten.each do |path|
|
312
|
+
FileUtils.rm(path) if File.exists?(path)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def tables_from_positions_dir
|
317
|
+
all_table_control_files = Dir.glob(File.join(table_positions_dir_path, '*.{pos,generated_ddl,init,rev}'))
|
318
|
+
return if all_table_control_files.nil?
|
319
|
+
tables = Set.new
|
320
|
+
all_table_control_files.each do |control_file|
|
321
|
+
file_name = File.basename(control_file)
|
322
|
+
file_name = file_name.slice(0...(file_name.index('.')))
|
323
|
+
tables << file_name
|
324
|
+
end
|
325
|
+
tables.to_a
|
326
|
+
end
|
327
|
+
|
328
|
+
def delete_non_table_control_files(delete_binlog= false)
|
329
|
+
files_to_delete = [
|
330
|
+
dump_file_path,
|
331
|
+
dump_pos_path,
|
332
|
+
mysql_table_marshal_dump_path,
|
333
|
+
sync_info_file,
|
334
|
+
stats_path
|
335
|
+
]
|
336
|
+
if delete_binlog
|
337
|
+
files_to_delete << binlog_path
|
338
|
+
files_to_delete << sent_binlog_path
|
339
|
+
files_to_delete << lock_pid_file
|
340
|
+
end
|
341
|
+
files_to_delete.flatten.each do |file_to_delete|
|
342
|
+
FileUtils.rm(file_to_delete) if File.exists?(file_to_delete)
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
301
346
|
def delete_table_binlog_pos(table_name)
|
302
347
|
file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
|
303
348
|
if File.exists?(file)
|
@@ -14,7 +14,7 @@ module Flydata
|
|
14
14
|
expect(sender).to receive(:start)
|
15
15
|
expect(Flydata::Command::Helper).to receive(:new).and_return(helper)
|
16
16
|
expect(helper).to receive(:stop)
|
17
|
-
expect(
|
17
|
+
expect(subject).to receive(:system) # Use Kernal#system to launch the helper
|
18
18
|
subject.run
|
19
19
|
|
20
20
|
end
|
@@ -19,9 +19,9 @@ module Mysql
|
|
19
19
|
before do
|
20
20
|
parser_class = ParserProvider.parser(:mysql, :mysql_alter_table)
|
21
21
|
allow(parser_class).to receive(:new).and_return(parser)
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
allow_any_instance_of(described_class).to receive(:check_empty_binlog)
|
23
|
+
record.delete('table_name')
|
24
|
+
record['normalized_query'] = "truncate table #{table};"
|
25
25
|
end
|
26
26
|
shared_examples "a process method receiving an exception" do
|
27
27
|
let(:an_error) { "an error" }
|
@@ -54,6 +54,13 @@ module Mysql
|
|
54
54
|
expect(subject.process(record)).to eq(nil)
|
55
55
|
end
|
56
56
|
end
|
57
|
+
context "when event binlog is older than table binlog.pos" do
|
58
|
+
it 'skip sending event' do
|
59
|
+
expect(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120").once
|
60
|
+
expect(ParserProvider).not_to receive(:parser)
|
61
|
+
expect(subject.process(record)).to eq(nil)
|
62
|
+
end
|
63
|
+
end
|
57
64
|
end
|
58
65
|
end
|
59
66
|
end
|
@@ -28,20 +28,20 @@ module Mysql
|
|
28
28
|
r
|
29
29
|
end
|
30
30
|
let(:query) { "a_query" }
|
31
|
-
let(:normalized_query) {
|
31
|
+
let(:normalized_query) { 'a_query' }
|
32
32
|
let(:event_length) { 20 }
|
33
33
|
let(:next_position) { 200 }
|
34
34
|
let(:timestamp) { 1427973738 }
|
35
35
|
let(:record) do
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
36
|
+
{
|
37
|
+
'db_name' => database,
|
38
|
+
'query' => query,
|
39
|
+
'table_name' => table,
|
40
|
+
'normalized_query' => normalized_query,
|
41
|
+
'next_position' => next_position,
|
42
|
+
'event_length' => event_length,
|
43
|
+
'timestamp' => "#{timestamp}",
|
44
|
+
}
|
45
45
|
end
|
46
46
|
subject { described_class.new(context) }
|
47
47
|
end
|
@@ -10,8 +10,9 @@ module Mysql
|
|
10
10
|
let(:truncate_query) { "TRUNCATE table foo" }
|
11
11
|
|
12
12
|
before do
|
13
|
-
|
14
|
-
|
13
|
+
record['query'] = truncate_query
|
14
|
+
record['normalized_query'] = truncate_query
|
15
|
+
record.delete('table_name')
|
15
16
|
end
|
16
17
|
|
17
18
|
shared_examples "process truncate queries correctly" do
|
@@ -74,6 +75,14 @@ module Mysql
|
|
74
75
|
include_examples "skip processing queries"
|
75
76
|
end
|
76
77
|
end
|
78
|
+
|
79
|
+
context 'when per-table binlog pos exists' do
|
80
|
+
let(:truncate_query) { "TRUNCATE #{table}" }
|
81
|
+
before do
|
82
|
+
allow(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120")
|
83
|
+
end
|
84
|
+
include_examples "skip processing queries"
|
85
|
+
end
|
77
86
|
end
|
78
87
|
end
|
79
88
|
end
|
@@ -68,12 +68,12 @@ module Flydata
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
context 'when the buffer size exceeds threthold' do
|
71
|
-
it do
|
71
|
+
it 'returns true but does not flush buffer' do
|
72
72
|
expect(forwarder.emit(record)).to be(false)
|
73
73
|
expect(forwarder.emit(record)).to be(false)
|
74
74
|
expect(forwarder.buffer_record_count).to be(2)
|
75
|
-
expect(forwarder.emit(record)).to eq(
|
76
|
-
expect(forwarder.buffer_record_count).to be(
|
75
|
+
expect(forwarder.emit(record)).to eq(true)
|
76
|
+
expect(forwarder.buffer_record_count).to be(3)
|
77
77
|
end
|
78
78
|
end
|
79
79
|
context 'when the error happens during the data sending' do
|
@@ -91,7 +91,7 @@ module Flydata
|
|
91
91
|
it 'retry and succeed sending data' do
|
92
92
|
forwarder.emit(record)
|
93
93
|
forwarder.emit(record)
|
94
|
-
expect(forwarder.emit(record)).to eq(
|
94
|
+
expect(forwarder.emit(record)).to eq(true)
|
95
95
|
end
|
96
96
|
end
|
97
97
|
end
|