flydata 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +2 -2
  3. data/VERSION +1 -1
  4. data/bin/fdredshift +78 -0
  5. data/circle.yml +1 -1
  6. data/ext/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
  7. data/ext/flydata/{parser/mysql → source_mysql/parser}/dump_parser_ext.cpp +3 -3
  8. data/ext/flydata/source_mysql/parser/extconf.rb +3 -0
  9. data/ext/flydata/{parser/mysql → source_mysql/parser}/parser.txt +0 -0
  10. data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.cpp +0 -0
  11. data/ext/flydata/{parser/mysql → source_mysql/parser}/sql_parser.h +0 -0
  12. data/flydata-core/lib/flydata-core/mysql/binlog_pos.rb +34 -32
  13. data/flydata-core/lib/flydata-core/mysql/compatibility_checker.rb +20 -0
  14. data/flydata-core/lib/flydata-core/table_def/mysql_table_def.rb +12 -4
  15. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +60 -6
  16. data/flydata-core/spec/mysql/binlog_pos_spec.rb +474 -0
  17. data/flydata-core/spec/table_def/mysql_table_def_spec.rb +57 -0
  18. data/flydata-core/spec/table_def/mysql_to_redshift_table_def_spec.rb +174 -20
  19. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_AUTO_INCREMENT_keyword.dump +43 -0
  20. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_not_null_keyword.dump +43 -0
  21. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unique_keyword.dump +43 -0
  22. data/flydata-core/spec/table_def/mysqldump_test_col_comment_with_unsigned_keyword.dump +43 -0
  23. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +41 -8
  24. data/flydata.gemspec +0 -0
  25. data/lib/flydata/cli.rb +11 -5
  26. data/lib/flydata/command/base.rb +14 -1
  27. data/lib/flydata/command/exclusive_runnable.rb +42 -12
  28. data/lib/flydata/command/helper.rb +6 -6
  29. data/lib/flydata/command/sender.rb +4 -3
  30. data/lib/flydata/command/setup.rb +30 -381
  31. data/lib/flydata/command/stop.rb +1 -0
  32. data/lib/flydata/command/sync.rb +273 -301
  33. data/lib/flydata/compatibility_check.rb +24 -117
  34. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +3 -3
  35. data/lib/flydata/fluent-plugins/mysql/alter_table_query_handler.rb +2 -2
  36. data/lib/flydata/fluent-plugins/mysql/binlog_record_handler.rb +6 -6
  37. data/lib/flydata/fluent-plugins/mysql/truncate_table_query_handler.rb +0 -1
  38. data/lib/flydata/parser.rb +14 -0
  39. data/lib/flydata/{parser_provider.rb → parser/parser_provider.rb} +6 -4
  40. data/lib/flydata/parser/source_table.rb +33 -0
  41. data/lib/flydata/source.rb +105 -0
  42. data/lib/flydata/source/component.rb +21 -0
  43. data/lib/flydata/source/errors.rb +7 -0
  44. data/lib/flydata/source/generate_source_dump.rb +72 -0
  45. data/lib/flydata/source/parse_dump_and_send.rb +52 -0
  46. data/lib/flydata/source/setup.rb +31 -0
  47. data/lib/flydata/source/source_pos.rb +45 -0
  48. data/lib/flydata/source/sync.rb +56 -0
  49. data/lib/flydata/source/sync_generate_table_ddl.rb +43 -0
  50. data/lib/flydata/source_file/setup.rb +17 -0
  51. data/lib/flydata/source_file/sync.rb +14 -0
  52. data/lib/flydata/{command → source_mysql/command}/mysql.rb +2 -1
  53. data/lib/flydata/{command → source_mysql/command}/mysql_command_base.rb +2 -4
  54. data/lib/flydata/{command → source_mysql/command}/mysqlbinlog.rb +2 -1
  55. data/lib/flydata/{command → source_mysql/command}/mysqldump.rb +2 -1
  56. data/lib/flydata/source_mysql/generate_source_dump.rb +53 -0
  57. data/lib/flydata/source_mysql/mysql_compatibility_check.rb +114 -0
  58. data/lib/flydata/source_mysql/parse_dump_and_send.rb +28 -0
  59. data/lib/flydata/{parser/mysql → source_mysql/parser}/.gitignore +0 -0
  60. data/lib/flydata/{parser/mysql → source_mysql/parser}/dump_parser.rb +32 -67
  61. data/lib/flydata/{parser/mysql → source_mysql/parser}/mysql_alter_table.treetop +0 -0
  62. data/lib/flydata/source_mysql/setup.rb +24 -0
  63. data/lib/flydata/source_mysql/source_pos.rb +21 -0
  64. data/lib/flydata/source_mysql/sync.rb +45 -0
  65. data/lib/flydata/source_mysql/sync_generate_table_ddl.rb +40 -0
  66. data/lib/flydata/{mysql → source_mysql}/table_ddl.rb +6 -17
  67. data/lib/flydata/source_zendesk/sync_generate_table_ddl.rb +30 -0
  68. data/lib/flydata/source_zendesk/zendesk_flydata_tabledefs.rb +133 -0
  69. data/lib/flydata/sync_file_manager.rb +132 -73
  70. data/lib/flydata/table_ddl.rb +18 -0
  71. data/spec/flydata/cli_spec.rb +1 -0
  72. data/spec/flydata/command/exclusive_runnable_spec.rb +19 -8
  73. data/spec/flydata/command/sender_spec.rb +1 -1
  74. data/spec/flydata/command/setup_spec.rb +4 -4
  75. data/spec/flydata/command/sync_spec.rb +97 -134
  76. data/spec/flydata/compatibility_check_spec.rb +16 -289
  77. data/spec/flydata/fluent-plugins/mysql/alter_table_query_handler_spec.rb +3 -3
  78. data/spec/flydata/fluent-plugins/mysql/dml_record_handler_spec.rb +1 -1
  79. data/spec/flydata/fluent-plugins/mysql/shared_query_handler_context.rb +4 -2
  80. data/spec/flydata/fluent-plugins/mysql/truncate_query_handler_spec.rb +1 -1
  81. data/spec/flydata/source_mysql/generate_source_dump_spec.rb +69 -0
  82. data/spec/flydata/source_mysql/mysql_compatibility_check_spec.rb +280 -0
  83. data/spec/flydata/{parser/mysql → source_mysql/parser}/alter_table_parser_spec.rb +2 -2
  84. data/spec/flydata/{parser/mysql → source_mysql/parser}/dump_parser_spec.rb +75 -70
  85. data/spec/flydata/source_mysql/sync_generate_table_ddl_spec.rb +137 -0
  86. data/spec/flydata/{mysql → source_mysql}/table_ddl_spec.rb +2 -2
  87. data/spec/flydata/source_spec.rb +140 -0
  88. data/spec/flydata/source_zendesk/sync_generate_table_ddl_spec.rb +33 -0
  89. data/spec/flydata/sync_file_manager_spec.rb +157 -77
  90. data/tmpl/redshift_mysql_data_entry.conf.tmpl +1 -1
  91. metadata +56 -23
  92. data/ext/flydata/parser/mysql/extconf.rb +0 -3
  93. data/lib/flydata/mysql/binlog_position.rb +0 -22
  94. data/spec/flydata/mysql/binlog_position_spec.rb +0 -35
@@ -0,0 +1,7 @@
1
+ module Flydata
2
+ module Source
3
+
4
+ class UnsupportedSourceError < RuntimeError; end
5
+
6
+ end
7
+ end
@@ -0,0 +1,72 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class GenerateSourceDump < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ def initialize(source, dp, options)
14
+ super(source, options)
15
+ @dp = dp
16
+ end
17
+ attr_reader :dp
18
+
19
+ # Public Interface: Run compatibility check
20
+ #
21
+ # Run whatever check (compatibility, connectivity, privilege, etc) to ensure
22
+ # that initial sync can run without an issue.
23
+ #
24
+ # dump_dir: A directory path string to the dump directory
25
+ # backup_dir: A directory path string to the backup directory
26
+ #
27
+ # Raises exception when check fails
28
+ def run_compatibility_check(dump_dir, backup_dir)
29
+ raise UnsupportedSourceError, "subclass must implement"
30
+ end
31
+
32
+ # Public Interface: Confirmation items
33
+ #
34
+ # Returns a hash of items to be shown as the final confirmation before
35
+ # initial sync.
36
+ # Example: Return value {"host" => "ubertas","port" => 3306}
37
+ # will be displayed as
38
+ # host: ubertas
39
+ # port: 3306
40
+ def confirmation_items
41
+ raise UnsupportedSourceError, "subclass must implement"
42
+ end
43
+
44
+ # Public Interface: Dump size
45
+ #
46
+ # tables: An array of table names to be dumped.
47
+ #
48
+ # Returns an approximate size of dump in bytes. The value may be a
49
+ # best-effort estimate. It doesn't have to be accurate.
50
+ def dump_size(tables)
51
+ raise UnsupportedSourceError, "subclass must implement"
52
+ end
53
+
54
+ # Public Interface: Dump data
55
+ #
56
+ # tables: An array of tables to be dumped.
57
+ # file_path: A file path string of the dump file to which data is written.
58
+ # This value may be nil, in which case contents are written to a
59
+ # pipe.
60
+ # src_pos_callback: A callback called when the source position of the dump
61
+ # becomes available. The callback takes the following arguments.
62
+ # io: Input IO to the dump.
63
+ # src_pos: Source position of the dump
64
+ #
65
+ # Returns none
66
+ def dump(tables, file_path = nil, &src_pos_callback)
67
+ raise UnsupportedSourceError, "subclass must implement"
68
+ end
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,52 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class ParseDumpAndSend < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Value Converters
14
+ #
15
+ # Returns a hash of value converters which convert source values to FlyData values.
16
+ # Hash key is a data type for which the converter is called. A conveter takes a
17
+ # source value and returns a converted value.
18
+ def value_converters
19
+ raise UnsupportedSourceError, "subclass must implement"
20
+ end
21
+
22
+ # Public Interface: Parse Dump
23
+ #
24
+ # dump_pos_info: A hash containing the dump position from where the parse
25
+ # starts
26
+ # dmpio: A read IO object to the dump
27
+ # create_table_block: A callback called with a table info before its data gets
28
+ # sent
29
+ # callback parameters:
30
+ # source_table: A SourceTable object
31
+ # insert_record_block: A callback called with parsed records
32
+ # callback parameters:
33
+ # source_table: A SourceTable object
34
+ # values_set: An array of arrays. Each array has values for a row.
35
+ # check_point_block: A callback called between transactions. Dump position
36
+ # gets saved for resume at this timing.
37
+ # callback_parameters:
38
+ # source_table: A SourceTable object
39
+ # last_pos: An IO location as of the check point
40
+ # bytesize: Number of bytes which have been processed
41
+ # source_pos: Source position of the dump
42
+ # state: Current dump state
43
+ # substate: Current dump sub-state
44
+ #
45
+ # Returns none
46
+ def parse_dump(dump_pos_info, dmpio, create_table_block, insert_record_block, check_point_block)
47
+ raise UnsupportedSourceError, "subclass must implement"
48
+ end
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,31 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class Setup < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Tells if Agent process needs restart.
14
+ #
15
+ # Returns true if Agent process needs restart at the end of #initial_run.
16
+ # Returns false otherwise.
17
+ def initial_run_need_restart?
18
+ raise UnsupportedSourceError, "subclass must implement"
19
+ end
20
+
21
+ # Public Interface: initial_run complete message
22
+ #
23
+ # Returns a string which is displayed as the completion message at the
24
+ # end of #initial_run, which gets called at the end of Agent install.
25
+ def initial_run_complete_message
26
+ raise UnsupportedSourceError, "subclass must implement"
27
+ end
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,45 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class SourcePos < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ def initialize(source)
14
+ super(source)
15
+ end
16
+ #attr_reader :Should not use any attr_reader/attr_accessor if the variable is Source dependent
17
+
18
+ # Public Interface: Create source pos
19
+ #
20
+ # Returns a context-dependent position object that has the following methods:
21
+ # (includeng Comparable mixin is preferable)
22
+ # - all comparison operators (<=>, <, <=, >=, >, ==, !=)
23
+ # - to_s
24
+ #
25
+ def create_source_pos(source_pos_str)
26
+ raise UnsupportedSourceError, "subclass must implement"
27
+ end
28
+
29
+ # Public Interface: Back to last known safe source pos
30
+ #
31
+ # Returns a source position object
32
+ #
33
+ # A source pos may not be restart-able from the very position because
34
+ # there may be preceding records necessary to restart the source log
35
+ # playback. This method returns such a 'safe' position from which
36
+ # the source log can be played back.
37
+ #
38
+ def resume_pos(source_pos)
39
+ raise UnsupportedSourceError, "subclass must implement"
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,56 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class Sync < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface: Set up data source
14
+ #
15
+ # Called right after the source instance is created. Perform one-time setup
16
+ # which is necessary to initialize the source for sync (and sync only)
17
+ #
18
+ # Raises exception when the source does not support sync
19
+ def setup
20
+ raise UnsupportedSourceError,"subclass must implement"
21
+ end
22
+
23
+ # Public Interface: Tells if the source support sync or not
24
+ #
25
+ # Returns true if the source supports sync. No otherwise.
26
+ def supported?
27
+ raise UnsupportedSourceError, "subclass must implement"
28
+ end
29
+
30
+ # Public Interface: Table lists
31
+ #
32
+ # Returns lists of tables in a hash. The following lists will be returned
33
+ # "tables" : An array of tables currently in sync
34
+ # "new_tables" : An array of tables for which no generate_table_ddl has been run yet
35
+ # "invalid_tables" : An array of tables that had an issue starting sync
36
+ def table_lists
37
+ raise UnsupportedSourceError, "subclass must implement"
38
+ end
39
+
40
+ # Public Interface: Data Servers
41
+ #
42
+ # Returns a comma separated list of data servers to which the agent sends data
43
+ def data_servers
44
+ raise UnsupportedSourceError, "subclass must implement"
45
+ end
46
+
47
+ # Public Interface: Forwarder
48
+ #
49
+ # Returns a forwarder type in string. Values are 'tcpforwarder' or 'sslforwarder'.
50
+ def forwarder
51
+ raise UnsupportedSourceError, "subclass must implement"
52
+ end
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,43 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class SyncGenerateTableDdl < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ def initialize(source, dp, options)
14
+ super(source, options)
15
+ @dp = dp
16
+ end
17
+ attr_reader :dp
18
+
19
+ # Public Interface: Run compatibility check
20
+ #
21
+ # Run whatever check (compatibility, connectivity, privilege, etc) to ensure
22
+ # that the 'sync:generate_table_ddl' command can run without an issue.
23
+ #
24
+ # Raises exception when check fails
25
+ def run_compatibility_check
26
+ raise UnsupportedSourceError, "subclass must implement"
27
+ end
28
+
29
+ # Public Interface: Generate FlyData table definitions for given tables
30
+ #
31
+ # tables - An array of table names
32
+ # options - A hash of options
33
+ #
34
+ # Returns flydata_tabledefs, errors
35
+ # flydata_tablesdefs - An array of FlyData tabledefs
36
+ # errors - An array of error hashes for tables whose tabledef generation failed
37
+ def generate_flydata_tabledef(tables, options)
38
+ raise UnsupportedSourceError, "subclass must implement"
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,17 @@
1
+ require 'flydata/source/setup'
2
+
3
+ module Flydata
4
+ module SourceFile
5
+
6
+ class Setup < Source::Setup
7
+ def initial_run_need_restart?
8
+ true
9
+ end
10
+
11
+ def initial_run_complete_message
12
+ :all_done
13
+ end
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ require 'flydata/source/sync'
2
+ require 'flydata/source/errors'
3
+
4
+ module Flydata
5
+ module SourceFile
6
+
7
+ class Sync < Source::Sync
8
+ def setup
9
+ raise Source::UnsupportedSourceError, "This data entry does not support 'sync' commands"
10
+ end
11
+ end
12
+
13
+ end
14
+ end
@@ -1,6 +1,7 @@
1
1
  require 'flydata/command/sync'
2
- require 'flydata/command/mysql_command_base'
2
+ require 'flydata/source_mysql/command/mysql_command_base'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
  class Mysql < Sync
@@ -1,12 +1,13 @@
1
1
  require 'open3'
2
2
  require 'flydata-core/mysql/command_generator'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
 
7
8
  module MysqlCommandBase
8
9
  def run(*args)
9
- de = retrieve_sync_data_entry
10
+ de = data_entry
10
11
  cmd = generate_command(de['mysql_data_entry_preference'], args)
11
12
  return if cmd.to_s.empty?
12
13
  $stderr.puts "command:#{cmd}" if FLYDATA_DEBUG
@@ -37,10 +38,7 @@ module Flydata
37
38
  def reset; end
38
39
  def skip; end
39
40
  def generate_table_ddl; end
40
- def fix_binlogpos; end
41
41
  end
42
42
 
43
43
  end
44
44
  end
45
-
46
-
@@ -1,6 +1,7 @@
1
1
  require 'flydata/command/sync'
2
- require 'flydata/command/mysql_command_base'
2
+ require 'flydata/source_mysql/command/mysql_command_base'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
  class Mysqlbinlog < Sync
@@ -1,6 +1,7 @@
1
1
  require 'flydata/command/sync'
2
- require 'flydata/command/mysql_command_base'
2
+ require 'flydata/source_mysql/command/mysql_command_base'
3
3
 
4
+ # Command class must be in module Flydata::Command
4
5
  module Flydata
5
6
  module Command
6
7
  class Mysqldump < Sync
@@ -0,0 +1,53 @@
1
+ require 'flydata/source/generate_source_dump'
2
+ require 'flydata/preference/data_entry_preference'
3
+ require 'flydata/source_mysql/mysql_compatibility_check'
4
+ require 'flydata/source_mysql/parser/dump_parser'
5
+ require 'flydata-core/mysql/binlog_pos'
6
+
7
+ module Flydata
8
+ module SourceMysql
9
+
10
+ class GenerateSourceDump < Source::GenerateSourceDump
11
+ def run_compatibility_check(dump_dir, backup_dir)
12
+ %w(host username database).each do |k|
13
+ if de['mysql_data_entry_preference'][k].to_s.empty?
14
+ raise "'#{k}' is required. Set the value in the conf file " +
15
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
16
+ end
17
+ end
18
+
19
+ MysqlCompatibilityCheck.new(dp, de['mysql_data_entry_preference'], dump_dir: dump_dir, backup_dir: backup_dir).check
20
+ end
21
+
22
+ def confirmation_items
23
+ mp = de['mysql_data_entry_preference']
24
+ items = {
25
+ "host" => mp['host'],
26
+ "port" => mp['port'],
27
+ "username" => mp['username'],
28
+ "database" => mp['database'],
29
+ }
30
+ items["ssl"] = "Yes" if mp['ssl_ca']
31
+
32
+ items
33
+ end
34
+
35
+ def dump_size(tables)
36
+ opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
37
+ Parser::DatabaseSizeCheck.new(opts).get_db_bytesize
38
+ end
39
+
40
+ def dump(tables, file_path = nil, &src_pos_callback)
41
+ opts = de['mysql_data_entry_preference'].merge({"tables" => tables})
42
+ dump_generator = Parser::MysqlDumpGeneratorNoMasterData.new(opts)
43
+
44
+ dump_generator.dump(file_path) do |io, binlog_hash|
45
+ source_pos = FlydataCore::Mysql::BinlogPos.new(binlog_hash)
46
+ src_pos_callback.call(io, source_pos)
47
+ end
48
+ nil
49
+ end
50
+ end
51
+
52
+ end
53
+ end