flydata 0.5.17 → 0.5.20

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/flydata-core/lib/flydata-core/event/event_handler_base.rb +56 -0
  4. data/flydata-core/lib/flydata-core/event/flydata_event.rb +29 -0
  5. data/flydata-core/lib/flydata-core/event/flydata_event_handler_registry.rb +11 -0
  6. data/flydata-core/lib/flydata-core/event/flydata_event_processor.rb +19 -0
  7. data/flydata-core/lib/flydata-core/event/flydata_event_sender.rb +27 -0
  8. data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +4 -5
  9. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +12 -2
  10. data/flydata-core/spec/event/flydata_event_handler_registry_spec.rb +33 -0
  11. data/flydata-core/spec/event/flydata_event_handler_spec.rb +39 -0
  12. data/flydata-core/spec/event/flydata_event_processor_spec.rb +42 -0
  13. data/flydata-core/spec/event/flydata_event_sender_spec.rb +33 -0
  14. data/flydata-core/spec/event/flydata_event_spec.rb +39 -0
  15. data/flydata-core/spec/event/shared_event.rb +33 -0
  16. data/flydata-core/spec/mysql/compatibility_checker_spec.rb +4 -5
  17. data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +25 -25
  18. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +4 -4
  19. data/flydata.gemspec +0 -0
  20. data/lib/flydata/command/start.rb +15 -8
  21. data/lib/flydata/command/sync.rb +79 -61
  22. data/lib/flydata/errors.rb +1 -1
  23. data/lib/flydata/event/api_event_sender.rb +16 -0
  24. data/lib/flydata/fluent-plugins/mysql/ddl_query_handler.rb +8 -0
  25. data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +1 -1
  26. data/lib/flydata/output/forwarder.rb +7 -5
  27. data/lib/flydata/parser/mysql/dump_parser.rb +76 -12
  28. data/lib/flydata/queueable_thread.rb +32 -0
  29. data/lib/flydata/sync_file_manager.rb +45 -0
  30. data/spec/flydata/command/start_spec.rb +1 -1
  31. data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +10 -3
  32. data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +10 -10
  33. data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +11 -2
  34. data/spec/flydata/output/forwarder_spec.rb +4 -4
  35. metadata +15 -2
@@ -0,0 +1,16 @@
1
+ require 'flydata-core/event/flydata_event_sender'
2
+ require 'flydata-core/event/flydata_event'
3
+ require 'singleton'
4
+
5
+ module FlydataCore
6
+ module Event
7
+ class ApiEventSender <FlydataEventSender
8
+ include Singleton
9
+ protected
10
+ def send(event, routing_key=nil)
11
+ #TODO async when async api code is pushed
12
+ Flydata::ApiClient.instance.post("/events/process",{:headers => {:content_type => :json}}, event.to_json)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -5,6 +5,14 @@ module Mysql
5
5
  class DdlQueryHandler < BinlogQueryHandler
6
6
  DDL_TABLE_QUERY = /^(?:(?:ALTER|CREATE|DROP|RENAME) +(?:\w+ +)*TABLE +([^ ]+)|TRUNCATE +(?:TABLE +)?([^ ;]+))/i
7
7
 
8
+ def emit_record(type, record)
9
+ # ddl event record doesn't have "table_name"
10
+ record['table_name'] = table_info(record)[:table_name]
11
+ super do |opt|
12
+ yield opt
13
+ end
14
+ end
15
+
8
16
  def acceptable_db?(record)
9
17
  supported_database == table_info(record)[:db_name]
10
18
  end
@@ -16,7 +16,7 @@ module Mysql
16
16
  def process(record)
17
17
  emit_record(:truncate_table, record) do |opt|
18
18
  {
19
- table_name: table_info(record)[:table_name],
19
+ table_name: record['table_name'],
20
20
  query: record["query"]
21
21
  }
22
22
  end
@@ -60,15 +60,13 @@ module Flydata
60
60
  @buffer_record_count += 1
61
61
  @buffer_size += event_data.bytesize
62
62
  end
63
- if @buffer_size > @buffer_size_limit
64
- send
65
- else
66
- false
67
- end
63
+ buffer_full?
68
64
  end
69
65
 
70
66
  #TODO retry logic
71
67
  def send
68
+ byte_size = nil
69
+ record_count = nil
72
70
  if @buffer_size > 0
73
71
  else
74
72
  return false
@@ -113,6 +111,10 @@ module Flydata
113
111
  { byte_size: byte_size, record_count: record_count }
114
112
  end
115
113
 
114
+ def buffer_full?
115
+ @buffer_size > @buffer_size_limit
116
+ end
117
+
116
118
  #TODO: Check server status
117
119
  def pickup_server
118
120
  ret_server = @servers[@server_index]
@@ -38,8 +38,7 @@ module Flydata
38
38
  @value_converters = {}
39
39
  end
40
40
 
41
- attr_accessor :table_name, :columns, :column_names, :primary_keys
42
- attr_reader :value_converters
41
+ attr_accessor :table_name, :columns, :column_names, :primary_keys, :value_converters
43
42
 
44
43
  def add_column(column)
45
44
  cn = column[:column_name]
@@ -82,7 +81,7 @@ module Flydata
82
81
  dump_cmd = generate_dump_cmd(@conf, file_path)
83
82
 
84
83
  # RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
85
- table_locker = create_table_locker
84
+ table_locker = create_table_locker(@conf["database"], @conf["tables"])
86
85
  table_locker.resume # Lock tables
87
86
 
88
87
  begin
@@ -181,12 +180,12 @@ AND TABLE_SCHEMA IN (%s)
181
180
  AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
182
181
  EOS
183
182
 
184
- def create_table_locker
183
+ def create_table_locker(database, tables)
185
184
  Fiber.new do
186
185
  # short timeout to avoid blocking other queries
187
186
  client = FlydataMysqlClient.new({reconnect: true, read_timeout:9}.merge(@db_opts))
188
187
 
189
- q = flush_tables_with_read_lock_query(client)
188
+ q = flush_tables_with_read_lock_query(client, database, tables)
190
189
  $log.debug "FLUSH TABLES query: #{q}"
191
190
  thread_id = nil
192
191
  begin
@@ -238,12 +237,12 @@ EOS
238
237
  end
239
238
  end
240
239
 
241
- def flush_tables_with_read_lock_query(client)
240
+ def flush_tables_with_read_lock_query(client, database, tbls)
241
+ tbls ||= []
242
242
  tables = ""
243
243
  if mysql_server_version(client) >= "5.5"
244
244
  # FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
245
- result = client.query(USER_TABLES_QUERY % ["mysql", client.query_options[:database].to_s].collect{|db| "'#{db}'"}.join(","))
246
- tables = result.collect{|r| r['tables']}.join(", ")
245
+ tables = tbls.collect{|t| "`#{database}`.`#{t}`"}.join(",")
247
246
  end
248
247
  FLUSH_TABLES_QUERY_TEMPLATE % [tables]
249
248
  end
@@ -307,11 +306,12 @@ EOS
307
306
  @option = option
308
307
  end
309
308
 
310
- def parse(dump_io, create_table_block, insert_record_block, check_point_block)
311
- unless dump_io.kind_of?(IO)
309
+ def parse(dmpio, create_table_block, insert_record_block, check_point_block)
310
+ unless dmpio.kind_of?(IO)
312
311
  raise ArgumentError.new("Invalid argument. The first parameter must be io.")
313
312
  end
314
313
 
314
+ dump_io = nil
315
315
  invalid_file = false
316
316
  current_state = State::START
317
317
  substate = nil
@@ -449,6 +449,7 @@ EOS
449
449
  current_state = State::INSERT_RECORD
450
450
 
451
451
  if insert_record_block.call(current_table, values_set)
452
+ values_set = nil
452
453
  check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
453
454
  end
454
455
  end
@@ -457,13 +458,15 @@ EOS
457
458
  begin
458
459
  # resume(only when using dump file)
459
460
  if @option[:last_pos] && (@option[:last_pos].to_i != -1)
460
- dump_io.pos = @option[:last_pos].to_i
461
+ dmpio.pos = @option[:last_pos].to_i
461
462
  current_state = @option[:state]
462
463
  substate = @option[:substate]
463
464
  current_table = @option[:mysql_table]
464
- bytesize = dump_io.pos
465
+ bytesize = dmpio.pos
465
466
  end
466
467
 
468
+ dump_io = AsyncIO.new(dmpio)
469
+
467
470
  until dump_io.eof? do
468
471
  case current_state
469
472
  when State::START
@@ -480,6 +483,8 @@ EOS
480
483
  state_parsing_insert_record.call
481
484
  end
482
485
  end
486
+ ensure
487
+ dump_io.close
483
488
  end
484
489
  @binlog_pos
485
490
  end
@@ -679,6 +684,65 @@ EOT
679
684
  client.close rescue nil
680
685
  end
681
686
  end
687
+
688
+ # Read and buffer data in a separate thread
689
+ class AsyncIO
690
+ MAX_ITEMS = 200
691
+ def initialize(io, options = {})
692
+ max_items = options[:max_items] ? options[:max_items] : MAX_ITEMS
693
+ @io = io
694
+ @queue = SizedQueue.new(max_items)
695
+ _readline
696
+ @last = @queue.shift
697
+ @stop = false
698
+ @thread = Thread.new(&method(:run))
699
+ end
700
+
701
+ def readline
702
+ if @last[:eof]
703
+ raise EOFError.new("end of file reached")
704
+ else
705
+ result = @last[:line]
706
+ @last = @queue.shift
707
+ result
708
+ end
709
+ end
710
+
711
+ def pos
712
+ @last[:pos]
713
+ end
714
+
715
+ def eof?
716
+ @last[:eof]
717
+ end
718
+
719
+ def close
720
+ @stop = true
721
+ # remove an item if the queue is full. Otherwise, the thread will not
722
+ # wake up.
723
+ @queue.shift if @queue.size == @queue.max
724
+ @thread.join
725
+ end
726
+
727
+ private
728
+
729
+ def run
730
+ until @io.eof?
731
+ return if @stop
732
+
733
+ _readline
734
+ end
735
+ _readline
736
+ end
737
+
738
+ def _readline
739
+ pos = @io.pos
740
+ eof = @io.eof?
741
+ line = eof ? nil : @io.readline
742
+ data = { line: line, pos: pos, eof: eof }
743
+ @queue << data
744
+ end
745
+ end
682
746
  end
683
747
  end
684
748
  end
@@ -0,0 +1,32 @@
1
+ module Flydata
2
+
3
+ class QueueableThread
4
+ MAX_JOBS = 60
5
+ def initialize(max_jobs = MAX_JOBS)
6
+ @queue = SizedQueue.new(max_jobs)
7
+ @stop = false
8
+ @thread = Thread.new(&method(:run_loop))
9
+ @thread.abort_on_exception = true
10
+ end
11
+
12
+ def run(&block)
13
+ @queue << block
14
+ end
15
+
16
+ def join
17
+ @stop = true
18
+ @queue << nil if @queue.empty? # wake up the thread
19
+ @thread.join
20
+ end
21
+
22
+ private
23
+
24
+ def run_loop
25
+ until @stop && @queue.empty?
26
+ block = @queue.shift
27
+ block.call if block
28
+ end
29
+ end
30
+ end
31
+
32
+ end
@@ -1,5 +1,6 @@
1
1
  require 'mysql2'
2
2
  require 'flydata-core/query_job'
3
+ require 'set'
3
4
 
4
5
  module Flydata
5
6
  class SyncFileManager
@@ -298,6 +299,50 @@ module Flydata
298
299
  new_rev
299
300
  end
300
301
 
302
+ def delete_table_control_files(*tables)
303
+ return if (tables.nil? or tables.empty?)
304
+ files_to_delete = [
305
+ table_position_file_paths(*tables),
306
+ table_binlog_pos_paths(*tables),
307
+ table_binlog_pos_init_paths(*tables),
308
+ table_rev_file_paths(*tables),
309
+ table_ddl_file_paths(*tables)
310
+ ]
311
+ files_to_delete.flatten.each do |path|
312
+ FileUtils.rm(path) if File.exists?(path)
313
+ end
314
+ end
315
+
316
+ def tables_from_positions_dir
317
+ all_table_control_files = Dir.glob(File.join(table_positions_dir_path, '*.{pos,generated_ddl,init,rev}'))
318
+ return if all_table_control_files.nil?
319
+ tables = Set.new
320
+ all_table_control_files.each do |control_file|
321
+ file_name = File.basename(control_file)
322
+ file_name = file_name.slice(0...(file_name.index('.')))
323
+ tables << file_name
324
+ end
325
+ tables.to_a
326
+ end
327
+
328
+ def delete_non_table_control_files(delete_binlog= false)
329
+ files_to_delete = [
330
+ dump_file_path,
331
+ dump_pos_path,
332
+ mysql_table_marshal_dump_path,
333
+ sync_info_file,
334
+ stats_path
335
+ ]
336
+ if delete_binlog
337
+ files_to_delete << binlog_path
338
+ files_to_delete << sent_binlog_path
339
+ files_to_delete << lock_pid_file
340
+ end
341
+ files_to_delete.flatten.each do |file_to_delete|
342
+ FileUtils.rm(file_to_delete) if File.exists?(file_to_delete)
343
+ end
344
+ end
345
+
301
346
  def delete_table_binlog_pos(table_name)
302
347
  file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
303
348
  if File.exists?(file)
@@ -14,7 +14,7 @@ module Flydata
14
14
  expect(sender).to receive(:start)
15
15
  expect(Flydata::Command::Helper).to receive(:new).and_return(helper)
16
16
  expect(helper).to receive(:stop)
17
- expect(helper).to receive(:start)
17
+ expect(subject).to receive(:system) # Use Kernal#system to launch the helper
18
18
  subject.run
19
19
 
20
20
  end
@@ -19,9 +19,9 @@ module Mysql
19
19
  before do
20
20
  parser_class = ParserProvider.parser(:mysql, :mysql_alter_table)
21
21
  allow(parser_class).to receive(:new).and_return(parser)
22
- allow(subject).to receive(:acceptable_table?).and_return(true)
23
- allow(subject).to receive(:acceptable_db?).and_return(true)
24
- allow(subject).to receive(:check_empty_binlog)
22
+ allow_any_instance_of(described_class).to receive(:check_empty_binlog)
23
+ record.delete('table_name')
24
+ record['normalized_query'] = "truncate table #{table};"
25
25
  end
26
26
  shared_examples "a process method receiving an exception" do
27
27
  let(:an_error) { "an error" }
@@ -54,6 +54,13 @@ module Mysql
54
54
  expect(subject.process(record)).to eq(nil)
55
55
  end
56
56
  end
57
+ context "when event binlog is older than table binlog.pos" do
58
+ it 'skip sending event' do
59
+ expect(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120").once
60
+ expect(ParserProvider).not_to receive(:parser)
61
+ expect(subject.process(record)).to eq(nil)
62
+ end
63
+ end
57
64
  end
58
65
  end
59
66
  end
@@ -28,20 +28,20 @@ module Mysql
28
28
  r
29
29
  end
30
30
  let(:query) { "a_query" }
31
- let(:normalized_query) { double('normalized_query') }
31
+ let(:normalized_query) { 'a_query' }
32
32
  let(:event_length) { 20 }
33
33
  let(:next_position) { 200 }
34
34
  let(:timestamp) { 1427973738 }
35
35
  let(:record) do
36
- r = double('record')
37
- allow(r).to receive(:[]).with("db_name").and_return(database)
38
- allow(r).to receive(:[]).with("query").and_return(query)
39
- allow(r).to receive(:[]).with("table_name").and_return(table)
40
- allow(r).to receive(:[]).with("normalized_query").and_return(normalized_query)
41
- allow(r).to receive(:[]).with("next_position").and_return(next_position)
42
- allow(r).to receive(:[]).with("event_length").and_return(event_length)
43
- allow(r).to receive(:[]).with("timestamp").and_return("#{timestamp}")
44
- r
36
+ {
37
+ 'db_name' => database,
38
+ 'query' => query,
39
+ 'table_name' => table,
40
+ 'normalized_query' => normalized_query,
41
+ 'next_position' => next_position,
42
+ 'event_length' => event_length,
43
+ 'timestamp' => "#{timestamp}",
44
+ }
45
45
  end
46
46
  subject { described_class.new(context) }
47
47
  end
@@ -10,8 +10,9 @@ module Mysql
10
10
  let(:truncate_query) { "TRUNCATE table foo" }
11
11
 
12
12
  before do
13
- allow(record).to receive(:[]).with("query").and_return(truncate_query)
14
- allow(record).to receive(:[]).with("normalized_query").and_return(truncate_query)
13
+ record['query'] = truncate_query
14
+ record['normalized_query'] = truncate_query
15
+ record.delete('table_name')
15
16
  end
16
17
 
17
18
  shared_examples "process truncate queries correctly" do
@@ -74,6 +75,14 @@ module Mysql
74
75
  include_examples "skip processing queries"
75
76
  end
76
77
  end
78
+
79
+ context 'when per-table binlog pos exists' do
80
+ let(:truncate_query) { "TRUNCATE #{table}" }
81
+ before do
82
+ allow(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120")
83
+ end
84
+ include_examples "skip processing queries"
85
+ end
77
86
  end
78
87
  end
79
88
  end
@@ -68,12 +68,12 @@ module Flydata
68
68
  end
69
69
  end
70
70
  context 'when the buffer size exceeds threthold' do
71
- it do
71
+ it 'returns true but does not flush buffer' do
72
72
  expect(forwarder.emit(record)).to be(false)
73
73
  expect(forwarder.emit(record)).to be(false)
74
74
  expect(forwarder.buffer_record_count).to be(2)
75
- expect(forwarder.emit(record)).to eq(sent_stats)
76
- expect(forwarder.buffer_record_count).to be(0)
75
+ expect(forwarder.emit(record)).to eq(true)
76
+ expect(forwarder.buffer_record_count).to be(3)
77
77
  end
78
78
  end
79
79
  context 'when the error happens during the data sending' do
@@ -91,7 +91,7 @@ module Flydata
91
91
  it 'retry and succeed sending data' do
92
92
  forwarder.emit(record)
93
93
  forwarder.emit(record)
94
- expect(forwarder.emit(record)).to eq(sent_stats)
94
+ expect(forwarder.emit(record)).to eq(true)
95
95
  end
96
96
  end
97
97
  end