flydata 0.6.3 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +2 -2
  3. data/VERSION +1 -1
  4. data/bin/fdredshift +78 -0
  5. data/circle.yml +1 -1
  6. data/ext/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
  7. data/ext/flydata/{parser/mysql → source_mysql/parser}/dump_parser_ext.cpp +3 -3
  8. data/ext/flydata/source_mysql/parser/extconf.rb +3 -0
  9. data/ext/flydata/{parser/mysql → source_mysql/parser}/parser.txt +0 -0
  10. data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.cpp +0 -0
  11. data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.h +0 -0
  12. data/flydata-core/lib/flydata-core/mysql/binlog_pos.rb +34 -32
  13. data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +20 -0
  14. data/flydata-core/lib/flydata-core/table_def/mysql_table_def.rb +12 -4
  15. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +60 -6
  16. data/flydata-core/spec/mysql/binlog_pos_spec.rb +474 -0
  17. data/flydata-core/spec/table_def/mysql_table_def_spec.rb +57 -0
  18. data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +174 -20
  19. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_AUTO_INCREMENT_keyword.dump +43 -0
  20. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_not_null_keyword.dump +43 -0
  21. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unique_keyword.dump +43 -0
  22. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unsigned_keyword.dump +43 -0
  23. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +41 -8
  24. data/flydata.gemspec +0 -0
  25. data/lib/flydata/cli.rb +11 -5
  26. data/lib/flydata/command/base.rb +14 -1
  27. data/lib/flydata/command/exclusive_runnable.rb +42 -12
  28. data/lib/flydata/command/helper.rb +6 -6
  29. data/lib/flydata/command/sender.rb +4 -3
  30. data/lib/flydata/command/setup.rb +30 -381
  31. data/lib/flydata/command/stop.rb +1 -0
  32. data/lib/flydata/command/sync.rb +273 -301
  33. data/lib/flydata/compatibility_check.rb +24 -117
  34. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +3 -3
  35. data/lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb +2 -2
  36. data/lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb +6 -6
  37. data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +0 -1
  38. data/lib/flydata/parser.rb +14 -0
  39. data/lib/flydata/{parser_provider.rb → parser/parser_provider.rb} +6 -4
  40. data/lib/flydata/parser/source_table.rb +33 -0
  41. data/lib/flydata/source.rb +105 -0
  42. data/lib/flydata/source/component.rb +21 -0
  43. data/lib/flydata/source/errors.rb +7 -0
  44. data/lib/flydata/source/generate_source_dump.rb +72 -0
  45. data/lib/flydata/source/parse_dump_and_send.rb +52 -0
  46. data/lib/flydata/source/setup.rb +31 -0
  47. data/lib/flydata/source/source_pos.rb +45 -0
  48. data/lib/flydata/source/sync.rb +56 -0
  49. data/lib/flydata/source/sync_generate_table_ddl.rb +43 -0
  50. data/lib/flydata/source_file/setup.rb +17 -0
  51. data/lib/flydata/source_file/sync.rb +14 -0
  52. data/lib/flydata/{command → source_mysql/command}/mysql.rb +2 -1
  53. data/lib/flydata/{command → source_mysql/command}/mysql_command_base.rb +2 -4
  54. data/lib/flydata/{command → source_mysql/command}/mysqlbinlog.rb +2 -1
  55. data/lib/flydata/{command → source_mysql/command}/mysqldump.rb +2 -1
  56. data/lib/flydata/source_mysql/generate_source_dump.rb +53 -0
  57. data/lib/flydata/source_mysql/mysql_compatibility_check.rb +114 -0
  58. data/lib/flydata/source_mysql/parse_dump_and_send.rb +28 -0
  59. data/lib/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
  60. data/lib/flydata/{parser/mysql → source_mysql/parser}/dump_parser.rb +32 -67
  61. data/lib/flydata/{parser/mysql → source_mysql/parser}/mysql_alter_table.treetop +0 -0
  62. data/lib/flydata/source_mysql/setup.rb +24 -0
  63. data/lib/flydata/source_mysql/source_pos.rb +21 -0
  64. data/lib/flydata/source_mysql/sync.rb +45 -0
  65. data/lib/flydata/source_mysql/sync_generate_table_ddl.rb +40 -0
  66. data/lib/flydata/{mysql → source_mysql}/table_ddl.rb +6 -17
  67. data/lib/flydata/source_zendesk/sync_generate_table_ddl.rb +30 -0
  68. data/lib/flydata/source_zendesk/zendesk_flydata_tabledefs.rb +133 -0
  69. data/lib/flydata/sync_file_manager.rb +132 -73
  70. data/lib/flydata/table_ddl.rb +18 -0
  71. data/spec/flydata/cli_spec.rb +1 -0
  72. data/spec/flydata/command/exclusive_runnable_spec.rb +19 -8
  73. data/spec/flydata/command/sender_spec.rb +1 -1
  74. data/spec/flydata/command/setup_spec.rb +4 -4
  75. data/spec/flydata/command/sync_spec.rb +97 -134
  76. data/spec/flydata/compatibility_check_spec.rb +16 -289
  77. data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +3 -3
  78. data/spec/flydata/fluent-plugins/mysql/dml_record_handler_spec.rb +1 -1
  79. data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +4 -2
  80. data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +1 -1
  81. data/spec/flydata/source_mysql/generate_source_dump_spec.rb +69 -0
  82. data/spec/flydata/source_mysql/mysql_compatibility_check_spec.rb +280 -0
  83. data/spec/flydata/{parser/mysql → source_mysql/parser}/alter_table_parser_spec.rb +2 -2
  84. data/spec/flydata/{parser/mysql → source_mysql/parser}/dump_parser_spec.rb +75 -70
  85. data/spec/flydata/source_mysql/sync_generate_table_ddl_spec.rb +137 -0
  86. data/spec/flydata/{mysql → source_mysql}/table_ddl_spec.rb +2 -2
  87. data/spec/flydata/source_spec.rb +140 -0
  88. data/spec/flydata/source_zendesk/sync_generate_table_ddl_spec.rb +33 -0
  89. data/spec/flydata/sync_file_manager_spec.rb +157 -77
  90. data/tmpl/redshift_mysql_data_entry.conf.tmpl +1 -1
  91. metadata +56 -23
  92. data/ext/flydata/parser/mysql/extconf.rb +0 -3
  93. data/lib/flydata/mysql/binlog_position.rb +0 -22
  94. data/spec/flydata/mysql/binlog_position_spec.rb +0 -35
@@ -0,0 +1,7 @@
1
+ module Flydata
2
+ module Source
3
+
4
+ class UnsupportedSourceError < RuntimeError; end
5
+
6
+ end
7
+ end
@@ -0,0 +1,72 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class GenerateSourceDump < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ def initialize(source, dp, options)
14
+ super(source, options)
15
+ @dp = dp
16
+ end
17
+ attr_reader :dp
18
+
19
+ # Public Interface: Run compatibility check
20
+ #
21
+ # Run whatever check (compatibility, connectivity, privilege, etc) to ensure
22
+ # that initial sync can run without an issue.
23
+ #
24
+ # dump_dir: A directory path string to the dump directory
25
+ # backup_dir: A directory path string to the backup directory
26
+ #
27
+ # Raises exception when check fails
28
+ def run_compatibility_check(dump_dir, backup_dir)
29
+ raise UnsupportedSourceError, "subclass must implement"
30
+ end
31
+
32
+ # Public Interface: Confirmation items
33
+ #
34
+ # Returns a hash of items to be shown as the final confirmation before
35
+ # initial sync.
36
+ # Example: Return value {"host" => "ubertas","port" => 3306}
37
+ # will be displayed as
38
+ # host: ubertas
39
+ # port: 3306
40
+ def confirmation_items
41
+ raise UnsupportedSourceError, "subclass must implement"
42
+ end
43
+
44
+ # Public Interface: Dump size
45
+ #
46
+ # tables: An array of table names to be dumped.
47
+ #
48
+ # Returns an approximate size of dump in bytes. The value may be a
49
+ # best-effort estimate. It doesn't have to be accurate.
50
+ def dump_size(tables)
51
+ raise UnsupportedSourceError, "subclass must implement"
52
+ end
53
+
54
+ # Public Interface: Dump data
55
+ #
56
+ # tables: An array of tables to be dumped.
57
+ # file_path: A file path string of the dump file to which data is written.
58
+ # This value may be nil, in which case contents are written to a
59
+ # pipe.
60
+ # src_pos_callback: A callback called when the source position of the dump
61
+ # becomes available. The callback takes the following arguments.
62
+ # io: Input IO to the dump.
63
+ # src_pos: Source position of the dump
64
+ #
65
+ # Returns none
66
+ def dump(tables, file_path = nil, &src_pos_callback)
67
+ raise UnsupportedSourceError, "subclass must implement"
68
+ end
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,52 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class ParseDumpAndSend < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Value Converters
14
+ #
15
+ # Returns a hash of value converters which convert source values to FlyData values.
16
+ # Hash key is a data type for which the converter is called. A conveter takes a
17
+ # source value and returns a converted value.
18
+ def value_converters
19
+ raise UnsupportedSourceError, "subclass must implement"
20
+ end
21
+
22
+ # Public Interface: Parse Dump
23
+ #
24
+ # dump_pos_info: A hash containing the dump position from where the parse
25
+ # starts
26
+ # dmpio: A read IO object to the dump
27
+ # create_table_block: A callback called with a table info before its data gets
28
+ # sent
29
+ # callback parameters:
30
+ # source_table: A SourceTable object
31
+ # insert_record_block: A callback called with parsed records
32
+ # callback parameters:
33
+ # source_table: A SourceTable object
34
+ # values_set: An array of arrays. Each array has values for a row.
35
+ # check_point_block: A callback called between transactions. Dump position
36
+ # gets saved for resume at this timing.
37
+ # callback_parameters:
38
+ # source_table: A SourceTable object
39
+ # last_pos: An IO location as of the check point
40
+ # bytesize: Number of bytes which have been processed
41
+ # source_pos: Source position of the dump
42
+ # state: Current dump state
43
+ # substate: Current dump sub-state
44
+ #
45
+ # Returns none
46
+ def parse_dump(dump_pos_info, dmpio, create_table_block, insert_record_block, check_point_block)
47
+ raise UnsupportedSourceError, "subclass must implement"
48
+ end
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,31 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class Setup < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Tells if Agent process needs restart.
14
+ #
15
+ # Returns true if Agent process needs restart at the end of #initial_run.
16
+ # Returns false otherwise.
17
+ def initial_run_need_restart?
18
+ raise UnsupportedSourceError, "subclass must implement"
19
+ end
20
+
21
+ # Public Interface: initial_run complete message
22
+ #
23
+ # Returns a string which is displayed as the completion message at the
24
+ # end of #initial_run, which gets called at the end of Agent install.
25
+ def initial_run_complete_message
26
+ raise UnsupportedSourceError, "subclass must implement"
27
+ end
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,45 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class SourcePos < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ def initialize(source)
14
+ super(source)
15
+ end
16
+ #attr_reader :Should not use any attr_reader/attr_accessor if the variable is Source dependent
17
+
18
+ # Public Interface: Create source pos
19
+ #
20
+ # Returns a context-dependent position object that has the following methods:
21
+ # (includeng Comparable mixin is preferable)
22
+ # - all comparison operators (<=>, <, <=, >=, >, ==, !=)
23
+ # - to_s
24
+ #
25
+ def create_source_pos(source_pos_str)
26
+ raise UnsupportedSourceError, "subclass must implement"
27
+ end
28
+
29
+ # Public Interface: Back to last known safe source pos
30
+ #
31
+ # Returns a source position object
32
+ #
33
+ # A source pos may not be restart-able from the very position because
34
+ # there may be preceding records necessary to restart the source log
35
+ # playback. This method returns such a 'safe' position from which
36
+ # the source log can be played back.
37
+ #
38
+ def resume_pos(source_pos)
39
+ raise UnsupportedSourceError, "subclass must implement"
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,56 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class Sync < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Set up data source
14
+ #
15
+ # Called right after the source instance is created. Perform one-time setup
16
+ # which is necessary to initialize the source for sync (and sync only)
17
+ #
18
+ # Raises exception when the source does not support sync
19
+ def setup
20
+ raise UnsupportedSourceError,"subclass must implement"
21
+ end
22
+
23
+ # Public Interface: Tells if the source support sync or not
24
+ #
25
+ # Returns true if the source supports sync. No otherwise.
26
+ def supported?
27
+ raise UnsupportedSourceError, "subclass must implement"
28
+ end
29
+
30
+ # Public Interface: Table lists
31
+ #
32
+ # Returns lists of tables in a hash. The following lists will be returned
33
+ # "tables" : An array of tables currently in sync
34
+ # "new_tables" : An array of tables for which no generate_table_ddl has been run yet
35
+ # "invalid_tables" : An array of tables that had an issue starting sync
36
+ def table_lists
37
+ raise UnsupportedSourceError, "subclass must implement"
38
+ end
39
+
40
+ # Public Interface: Data Servers
41
+ #
42
+ # Returns a comma separated list of data servers to which the agent sends data
43
+ def data_servers
44
+ raise UnsupportedSourceError, "subclass must implement"
45
+ end
46
+
47
+ # Public Interface: Forwarder
48
+ #
49
+ # Returns a forwarder type in string. Values are 'tcpforwarder' or 'sslforwarder'.
50
+ def forwarder
51
+ raise UnsupportedSourceError, "subclass must implement"
52
+ end
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,43 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class SyncGenerateTableDdl < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ def initialize(source, dp, options)
14
+ super(source, options)
15
+ @dp = dp
16
+ end
17
+ attr_reader :dp
18
+
19
+ # Public Interface: Run compatibility check
20
+ #
21
+ # Run whatever check (compatibility, connectivity, privilege, etc) to ensure
22
+ # that the 'sync:generate_table_ddl' command can run without an issue.
23
+ #
24
+ # Raises exception when check fails
25
+ def run_compatibility_check
26
+ raise UnsupportedSourceError, "subclass must implement"
27
+ end
28
+
29
+ # Public Interface: Generate FlyData table definitions for given tables
30
+ #
31
+ # tables - An array of table names
32
+ # options - A hash of options
33
+ #
34
+ # Returns flydata_tabledefs, errors
35
+ # flydata_tablesdefs - An array of FlyData tabledefs
36
+ # errors - An array of error hashes for tables whose tabledef generation failed
37
+ def generate_flydata_tabledef(tables, options)
38
+ raise UnsupportedSourceError, "subclass must implement"
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,17 @@
1
+ require 'flydata/source/setup'
2
+
3
+ module Flydata
4
+ module SourceFile
5
+
6
+ class Setup < Source::Setup
7
+ def initial_run_need_restart?
8
+ true
9
+ end
10
+
11
+ def initial_run_complete_message
12
+ :all_done
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ require 'flydata/source/sync'
2
+ require 'flydata/source/errors'
3
+
4
+ module Flydata
5
+ module SourceFile
6
+
7
+ class Sync < Source::Sync
8
+ def setup
9
+ raise Source::UnsupportedSourceError, "This data entry does not support 'sync' commands"
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -1,6 +1,7 @@
1
1
  require 'flydata/command/sync'
2
- require 'flydata/command/mysql_command_base'
2
+ require 'flydata/source_mysql/command/mysql_command_base'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
  class Mysql < Sync
@@ -1,12 +1,13 @@
1
1
  require 'open3'
2
2
  require 'flydata-core/mysql/command_generator'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
 
7
8
  module MysqlCommandBase
8
9
  def run(*args)
9
- de = retrieve_sync_data_entry
10
+ de = data_entry
10
11
  cmd = generate_command(de['mysql_data_entry_preference'], args)
11
12
  return if cmd.to_s.empty?
12
13
  $stderr.puts "command:#{cmd}" if FLYDATA_DEBUG
@@ -37,10 +38,7 @@ module Flydata
37
38
  def reset; end
38
39
  def skip; end
39
40
  def generate_table_ddl; end
40
- def fix_binlogpos; end
41
41
  end
42
42
 
43
43
  end
44
44
  end
45
-
46
-
@@ -1,6 +1,7 @@
1
1
  require 'flydata/command/sync'
2
- require 'flydata/command/mysql_command_base'
2
+ require 'flydata/source_mysql/command/mysql_command_base'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
  class Mysqlbinlog < Sync
@@ -1,6 +1,7 @@
1
1
  require 'flydata/command/sync'
2
- require 'flydata/command/mysql_command_base'
2
+ require 'flydata/source_mysql/command/mysql_command_base'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
  class Mysqldump < Sync
@@ -0,0 +1,53 @@
1
+ require 'flydata/source/generate_source_dump'
2
+ require 'flydata/preference/data_entry_preference'
3
+ require 'flydata/source_mysql/mysql_compatibility_check'
4
+ require 'flydata/source_mysql/parser/dump_parser'
5
+ require 'flydata-core/mysql/binlog_pos'
6
+
7
+ module Flydata
8
+ module SourceMysql
9
+
10
+ class GenerateSourceDump < Source::GenerateSourceDump
11
+ def run_compatibility_check(dump_dir, backup_dir)
12
+ %w(host username database).each do |k|
13
+ if de['mysql_data_entry_preference'][k].to_s.empty?
14
+ raise "'#{k}' is required. Set the value in the conf file " +
15
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
16
+ end
17
+ end
18
+
19
+ MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference'], dump_dir: dump_dir, backup_dir: backup_dir).check
20
+ end
21
+
22
+ def confirmation_items
23
+ mp = de['mysql_data_entry_preference']
24
+ items = {
25
+ "host" => mp['host'],
26
+ "port" => mp['port'],
27
+ "username" => mp['username'],
28
+ "database" => mp['database'],
29
+ }
30
+ items["ssl"] = "Yes" if mp['ssl_ca']
31
+
32
+ items
33
+ end
34
+
35
+ def dump_size(tables)
36
+ opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
37
+ Parser::DatabaseSizeCheck.new(opts).get_db_bytesize
38
+ end
39
+
40
+ def dump(tables, file_path = nil, &src_pos_callback)
41
+ opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
42
+ dump_generator = Parser::MysqlDumpGeneratorNoMasterData.new(opts)
43
+
44
+ dump_generator.dump(file_path) do |io, binlog_hash|
45
+ source_pos = FlydataCore::Mysql::BinlogPos.new(binlog_hash)
46
+ src_pos_callback.call(io, source_pos)
47
+ end
48
+ nil
49
+ end
50
+ end
51
+
52
+ end
53
+ end