flydata 0.5.17 → 0.5.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/flydata-core/lib/flydata-core/event/event_handler_base.rb +56 -0
  4. data/flydata-core/lib/flydata-core/event/flydata_event.rb +29 -0
  5. data/flydata-core/lib/flydata-core/event/flydata_event_handler_registry.rb +11 -0
  6. data/flydata-core/lib/flydata-core/event/flydata_event_processor.rb +19 -0
  7. data/flydata-core/lib/flydata-core/event/flydata_event_sender.rb +27 -0
  8. data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +4 -5
  9. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +12 -2
  10. data/flydata-core/spec/event/flydata_event_handler_registry_spec.rb +33 -0
  11. data/flydata-core/spec/event/flydata_event_handler_spec.rb +39 -0
  12. data/flydata-core/spec/event/flydata_event_processor_spec.rb +42 -0
  13. data/flydata-core/spec/event/flydata_event_sender_spec.rb +33 -0
  14. data/flydata-core/spec/event/flydata_event_spec.rb +39 -0
  15. data/flydata-core/spec/event/shared_event.rb +33 -0
  16. data/flydata-core/spec/mysql/compatibility_checker_spec.rb +4 -5
  17. data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +25 -25
  18. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +4 -4
  19. data/flydata.gemspec +0 -0
  20. data/lib/flydata/command/start.rb +15 -8
  21. data/lib/flydata/command/sync.rb +79 -61
  22. data/lib/flydata/errors.rb +1 -1
  23. data/lib/flydata/event/api_event_sender.rb +16 -0
  24. data/lib/flydata/fluent-plugins/mysql/ddl_query_handler.rb +8 -0
  25. data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +1 -1
  26. data/lib/flydata/output/forwarder.rb +7 -5
  27. data/lib/flydata/parser/mysql/dump_parser.rb +76 -12
  28. data/lib/flydata/queueable_thread.rb +32 -0
  29. data/lib/flydata/sync_file_manager.rb +45 -0
  30. data/spec/flydata/command/start_spec.rb +1 -1
  31. data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +10 -3
  32. data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +10 -10
  33. data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +11 -2
  34. data/spec/flydata/output/forwarder_spec.rb +4 -4
  35. metadata +15 -2
@@ -0,0 +1,16 @@
1
+ require 'flydata-core/event/flydata_event_sender'
2
+ require 'flydata-core/event/flydata_event'
3
+ require 'singleton'
4
+
5
+ module FlydataCore
6
+ module Event
7
+ class ApiEventSender <FlydataEventSender
8
+ include Singleton
9
+ protected
10
+ def send(event, routing_key=nil)
11
+ #TODO async when async api code is pushed
12
+ Flydata::ApiClient.instance.post("/events/process",{:headers => {:content_type => :json}}, event.to_json)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -5,6 +5,14 @@ module Mysql
5
5
  class DdlQueryHandler < BinlogQueryHandler
6
6
  DDL_TABLE_QUERY = /^(?:(?:ALTER|CREATE|DROP|RENAME) +(?:\w+ +)*TABLE +([^ ]+)|TRUNCATE +(?:TABLE +)?([^ ;]+))/i
7
7
 
8
+ def emit_record(type, record)
9
+ # ddl event record doesn't have "table_name"
10
+ record['table_name'] = table_info(record)[:table_name]
11
+ super do |opt|
12
+ yield opt
13
+ end
14
+ end
15
+
8
16
  def acceptable_db?(record)
9
17
  supported_database == table_info(record)[:db_name]
10
18
  end
@@ -16,7 +16,7 @@ module Mysql
16
16
  def process(record)
17
17
  emit_record(:truncate_table, record) do |opt|
18
18
  {
19
- table_name: table_info(record)[:table_name],
19
+ table_name: record['table_name'],
20
20
  query: record["query"]
21
21
  }
22
22
  end
@@ -60,15 +60,13 @@ module Flydata
60
60
  @buffer_record_count += 1
61
61
  @buffer_size += event_data.bytesize
62
62
  end
63
- if @buffer_size > @buffer_size_limit
64
- send
65
- else
66
- false
67
- end
63
+ buffer_full?
68
64
  end
69
65
 
70
66
  #TODO retry logic
71
67
  def send
68
+ byte_size = nil
69
+ record_count = nil
72
70
  if @buffer_size > 0
73
71
  else
74
72
  return false
@@ -113,6 +111,10 @@ module Flydata
113
111
  { byte_size: byte_size, record_count: record_count }
114
112
  end
115
113
 
114
+ def buffer_full?
115
+ @buffer_size > @buffer_size_limit
116
+ end
117
+
116
118
  #TODO: Check server status
117
119
  def pickup_server
118
120
  ret_server = @servers[@server_index]
@@ -38,8 +38,7 @@ module Flydata
38
38
  @value_converters = {}
39
39
  end
40
40
 
41
- attr_accessor :table_name, :columns, :column_names, :primary_keys
42
- attr_reader :value_converters
41
+ attr_accessor :table_name, :columns, :column_names, :primary_keys, :value_converters
43
42
 
44
43
  def add_column(column)
45
44
  cn = column[:column_name]
@@ -82,7 +81,7 @@ module Flydata
82
81
  dump_cmd = generate_dump_cmd(@conf, file_path)
83
82
 
84
83
  # RDS doesn't allow obtaining binlog position using mysqldump. Get it separately and insert it into the dump file.
85
- table_locker = create_table_locker
84
+ table_locker = create_table_locker(@conf["database"], @conf["tables"])
86
85
  table_locker.resume # Lock tables
87
86
 
88
87
  begin
@@ -181,12 +180,12 @@ AND TABLE_SCHEMA IN (%s)
181
180
  AND ENGINE NOT IN ('MEMORY', 'CSV', 'PERFORMANCE_SCHEMA');
182
181
  EOS
183
182
 
184
- def create_table_locker
183
+ def create_table_locker(database, tables)
185
184
  Fiber.new do
186
185
  # short timeout to avoid blocking other queries
187
186
  client = FlydataMysqlClient.new({reconnect: true, read_timeout:9}.merge(@db_opts))
188
187
 
189
- q = flush_tables_with_read_lock_query(client)
188
+ q = flush_tables_with_read_lock_query(client, database, tables)
190
189
  $log.debug "FLUSH TABLES query: #{q}"
191
190
  thread_id = nil
192
191
  begin
@@ -238,12 +237,12 @@ EOS
238
237
  end
239
238
  end
240
239
 
241
- def flush_tables_with_read_lock_query(client)
240
+ def flush_tables_with_read_lock_query(client, database, tbls)
241
+ tbls ||= []
242
242
  tables = ""
243
243
  if mysql_server_version(client) >= "5.5"
244
244
  # FLUSH TABLES table_names,... WITH READ LOCK syntax is supported from MySQL 5.5
245
- result = client.query(USER_TABLES_QUERY % ["mysql", client.query_options[:database].to_s].collect{|db| "'#{db}'"}.join(","))
246
- tables = result.collect{|r| r['tables']}.join(", ")
245
+ tables = tbls.collect{|t| "`#{database}`.`#{t}`"}.join(",")
247
246
  end
248
247
  FLUSH_TABLES_QUERY_TEMPLATE % [tables]
249
248
  end
@@ -307,11 +306,12 @@ EOS
307
306
  @option = option
308
307
  end
309
308
 
310
- def parse(dump_io, create_table_block, insert_record_block, check_point_block)
311
- unless dump_io.kind_of?(IO)
309
+ def parse(dmpio, create_table_block, insert_record_block, check_point_block)
310
+ unless dmpio.kind_of?(IO)
312
311
  raise ArgumentError.new("Invalid argument. The first parameter must be io.")
313
312
  end
314
313
 
314
+ dump_io = nil
315
315
  invalid_file = false
316
316
  current_state = State::START
317
317
  substate = nil
@@ -449,6 +449,7 @@ EOS
449
449
  current_state = State::INSERT_RECORD
450
450
 
451
451
  if insert_record_block.call(current_table, values_set)
452
+ values_set = nil
452
453
  check_point_block.call(current_table, dump_io.pos, bytesize, @binlog_pos, current_state)
453
454
  end
454
455
  end
@@ -457,13 +458,15 @@ EOS
457
458
  begin
458
459
  # resume(only when using dump file)
459
460
  if @option[:last_pos] && (@option[:last_pos].to_i != -1)
460
- dump_io.pos = @option[:last_pos].to_i
461
+ dmpio.pos = @option[:last_pos].to_i
461
462
  current_state = @option[:state]
462
463
  substate = @option[:substate]
463
464
  current_table = @option[:mysql_table]
464
- bytesize = dump_io.pos
465
+ bytesize = dmpio.pos
465
466
  end
466
467
 
468
+ dump_io = AsyncIO.new(dmpio)
469
+
467
470
  until dump_io.eof? do
468
471
  case current_state
469
472
  when State::START
@@ -480,6 +483,8 @@ EOS
480
483
  state_parsing_insert_record.call
481
484
  end
482
485
  end
486
+ ensure
487
+ dump_io.close
483
488
  end
484
489
  @binlog_pos
485
490
  end
@@ -679,6 +684,65 @@ EOT
679
684
  client.close rescue nil
680
685
  end
681
686
  end
687
+
688
+ # Read and buffer data in a separate thread
689
+ class AsyncIO
690
+ MAX_ITEMS = 200
691
+ def initialize(io, options = {})
692
+ max_items = options[:max_items] ? options[:max_items] : MAX_ITEMS
693
+ @io = io
694
+ @queue = SizedQueue.new(max_items)
695
+ _readline
696
+ @last = @queue.shift
697
+ @stop = false
698
+ @thread = Thread.new(&method(:run))
699
+ end
700
+
701
+ def readline
702
+ if @last[:eof]
703
+ raise EOFError.new("end of file reached")
704
+ else
705
+ result = @last[:line]
706
+ @last = @queue.shift
707
+ result
708
+ end
709
+ end
710
+
711
+ def pos
712
+ @last[:pos]
713
+ end
714
+
715
+ def eof?
716
+ @last[:eof]
717
+ end
718
+
719
+ def close
720
+ @stop = true
721
+ # remove an item if the queue is full. Otherwise, the thread will not
722
+ # wake up.
723
+ @queue.shift if @queue.size == @queue.max
724
+ @thread.join
725
+ end
726
+
727
+ private
728
+
729
+ def run
730
+ until @io.eof?
731
+ return if @stop
732
+
733
+ _readline
734
+ end
735
+ _readline
736
+ end
737
+
738
+ def _readline
739
+ pos = @io.pos
740
+ eof = @io.eof?
741
+ line = eof ? nil : @io.readline
742
+ data = { line: line, pos: pos, eof: eof }
743
+ @queue << data
744
+ end
745
+ end
682
746
  end
683
747
  end
684
748
  end
@@ -0,0 +1,32 @@
1
+ module Flydata
2
+
3
+ class QueueableThread
4
+ MAX_JOBS = 60
5
+ def initialize(max_jobs = MAX_JOBS)
6
+ @queue = SizedQueue.new(max_jobs)
7
+ @stop = false
8
+ @thread = Thread.new(&method(:run_loop))
9
+ @thread.abort_on_exception = true
10
+ end
11
+
12
+ def run(&block)
13
+ @queue << block
14
+ end
15
+
16
+ def join
17
+ @stop = true
18
+ @queue << nil if @queue.empty? # wake up the thread
19
+ @thread.join
20
+ end
21
+
22
+ private
23
+
24
+ def run_loop
25
+ until @stop && @queue.empty?
26
+ block = @queue.shift
27
+ block.call if block
28
+ end
29
+ end
30
+ end
31
+
32
+ end
@@ -1,5 +1,6 @@
1
1
  require 'mysql2'
2
2
  require 'flydata-core/query_job'
3
+ require 'set'
3
4
 
4
5
  module Flydata
5
6
  class SyncFileManager
@@ -298,6 +299,50 @@ module Flydata
298
299
  new_rev
299
300
  end
300
301
 
302
+ def delete_table_control_files(*tables)
303
+ return if (tables.nil? or tables.empty?)
304
+ files_to_delete = [
305
+ table_position_file_paths(*tables),
306
+ table_binlog_pos_paths(*tables),
307
+ table_binlog_pos_init_paths(*tables),
308
+ table_rev_file_paths(*tables),
309
+ table_ddl_file_paths(*tables)
310
+ ]
311
+ files_to_delete.flatten.each do |path|
312
+ FileUtils.rm(path) if File.exists?(path)
313
+ end
314
+ end
315
+
316
+ def tables_from_positions_dir
317
+ all_table_control_files = Dir.glob(File.join(table_positions_dir_path, '*.{pos,generated_ddl,init,rev}'))
318
+ return if all_table_control_files.nil?
319
+ tables = Set.new
320
+ all_table_control_files.each do |control_file|
321
+ file_name = File.basename(control_file)
322
+ file_name = file_name.slice(0...(file_name.index('.')))
323
+ tables << file_name
324
+ end
325
+ tables.to_a
326
+ end
327
+
328
+ def delete_non_table_control_files(delete_binlog= false)
329
+ files_to_delete = [
330
+ dump_file_path,
331
+ dump_pos_path,
332
+ mysql_table_marshal_dump_path,
333
+ sync_info_file,
334
+ stats_path
335
+ ]
336
+ if delete_binlog
337
+ files_to_delete << binlog_path
338
+ files_to_delete << sent_binlog_path
339
+ files_to_delete << lock_pid_file
340
+ end
341
+ files_to_delete.flatten.each do |file_to_delete|
342
+ FileUtils.rm(file_to_delete) if File.exists?(file_to_delete)
343
+ end
344
+ end
345
+
301
346
  def delete_table_binlog_pos(table_name)
302
347
  file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
303
348
  if File.exists?(file)
@@ -14,7 +14,7 @@ module Flydata
14
14
  expect(sender).to receive(:start)
15
15
  expect(Flydata::Command::Helper).to receive(:new).and_return(helper)
16
16
  expect(helper).to receive(:stop)
17
- expect(helper).to receive(:start)
17
+ expect(subject).to receive(:system) # Use Kernal#system to launch the helper
18
18
  subject.run
19
19
 
20
20
  end
@@ -19,9 +19,9 @@ module Mysql
19
19
  before do
20
20
  parser_class = ParserProvider.parser(:mysql, :mysql_alter_table)
21
21
  allow(parser_class).to receive(:new).and_return(parser)
22
- allow(subject).to receive(:acceptable_table?).and_return(true)
23
- allow(subject).to receive(:acceptable_db?).and_return(true)
24
- allow(subject).to receive(:check_empty_binlog)
22
+ allow_any_instance_of(described_class).to receive(:check_empty_binlog)
23
+ record.delete('table_name')
24
+ record['normalized_query'] = "truncate table #{table};"
25
25
  end
26
26
  shared_examples "a process method receiving an exception" do
27
27
  let(:an_error) { "an error" }
@@ -54,6 +54,13 @@ module Mysql
54
54
  expect(subject.process(record)).to eq(nil)
55
55
  end
56
56
  end
57
+ context "when event binlog is older than table binlog.pos" do
58
+ it 'skip sending event' do
59
+ expect(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120").once
60
+ expect(ParserProvider).not_to receive(:parser)
61
+ expect(subject.process(record)).to eq(nil)
62
+ end
63
+ end
57
64
  end
58
65
  end
59
66
  end
@@ -28,20 +28,20 @@ module Mysql
28
28
  r
29
29
  end
30
30
  let(:query) { "a_query" }
31
- let(:normalized_query) { double('normalized_query') }
31
+ let(:normalized_query) { 'a_query' }
32
32
  let(:event_length) { 20 }
33
33
  let(:next_position) { 200 }
34
34
  let(:timestamp) { 1427973738 }
35
35
  let(:record) do
36
- r = double('record')
37
- allow(r).to receive(:[]).with("db_name").and_return(database)
38
- allow(r).to receive(:[]).with("query").and_return(query)
39
- allow(r).to receive(:[]).with("table_name").and_return(table)
40
- allow(r).to receive(:[]).with("normalized_query").and_return(normalized_query)
41
- allow(r).to receive(:[]).with("next_position").and_return(next_position)
42
- allow(r).to receive(:[]).with("event_length").and_return(event_length)
43
- allow(r).to receive(:[]).with("timestamp").and_return("#{timestamp}")
44
- r
36
+ {
37
+ 'db_name' => database,
38
+ 'query' => query,
39
+ 'table_name' => table,
40
+ 'normalized_query' => normalized_query,
41
+ 'next_position' => next_position,
42
+ 'event_length' => event_length,
43
+ 'timestamp' => "#{timestamp}",
44
+ }
45
45
  end
46
46
  subject { described_class.new(context) }
47
47
  end
@@ -10,8 +10,9 @@ module Mysql
10
10
  let(:truncate_query) { "TRUNCATE table foo" }
11
11
 
12
12
  before do
13
- allow(record).to receive(:[]).with("query").and_return(truncate_query)
14
- allow(record).to receive(:[]).with("normalized_query").and_return(truncate_query)
13
+ record['query'] = truncate_query
14
+ record['normalized_query'] = truncate_query
15
+ record.delete('table_name')
15
16
  end
16
17
 
17
18
  shared_examples "process truncate queries correctly" do
@@ -74,6 +75,14 @@ module Mysql
74
75
  include_examples "skip processing queries"
75
76
  end
76
77
  end
78
+
79
+ context 'when per-table binlog pos exists' do
80
+ let(:truncate_query) { "TRUNCATE #{table}" }
81
+ before do
82
+ allow(sync_fm).to receive(:get_table_binlog_pos).and_return("mysql-bin.000067\t120")
83
+ end
84
+ include_examples "skip processing queries"
85
+ end
77
86
  end
78
87
  end
79
88
  end
@@ -68,12 +68,12 @@ module Flydata
68
68
  end
69
69
  end
70
70
  context 'when the buffer size exceeds threthold' do
71
- it do
71
+ it 'returns true but does not flush buffer' do
72
72
  expect(forwarder.emit(record)).to be(false)
73
73
  expect(forwarder.emit(record)).to be(false)
74
74
  expect(forwarder.buffer_record_count).to be(2)
75
- expect(forwarder.emit(record)).to eq(sent_stats)
76
- expect(forwarder.buffer_record_count).to be(0)
75
+ expect(forwarder.emit(record)).to eq(true)
76
+ expect(forwarder.buffer_record_count).to be(3)
77
77
  end
78
78
  end
79
79
  context 'when the error happens during the data sending' do
@@ -91,7 +91,7 @@ module Flydata
91
91
  it 'retry and succeed sending data' do
92
92
  forwarder.emit(record)
93
93
  forwarder.emit(record)
94
- expect(forwarder.emit(record)).to eq(sent_stats)
94
+ expect(forwarder.emit(record)).to eq(true)
95
95
  end
96
96
  end
97
97
  end