flydata 0.6.11 → 0.6.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +4 -4
  4. data/VERSION +1 -1
  5. data/flydata-core/lib/flydata-core/postgresql/source_pos.rb +34 -0
  6. data/flydata-core/lib/flydata-core/table_def/base.rb +10 -0
  7. data/flydata-core/lib/flydata-core/table_def/postgresql_table_def.rb +20 -4
  8. data/flydata-core/spec/postgresql/source_pos_spec.rb +43 -0
  9. data/flydata-core/spec/table_def/base_spec.rb +51 -0
  10. data/flydata.gemspec +0 -0
  11. data/lib/flydata/command/sender.rb +9 -6
  12. data/lib/flydata/command/setup.rb +6 -12
  13. data/lib/flydata/command/sync.rb +31 -17
  14. data/lib/flydata/fluent-plugins/flydata_plugin_ext/flydata_sync.rb +2 -3
  15. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +15 -14
  16. data/lib/flydata/parser/source_table.rb +4 -3
  17. data/lib/flydata/plugin_support/context.rb +46 -0
  18. data/lib/flydata/plugin_support/sync_record_emittable.rb +69 -0
  19. data/lib/flydata/source/component.rb +1 -1
  20. data/lib/flydata/source/generate_source_dump.rb +3 -2
  21. data/lib/flydata/source_mysql/mysql_compatibility_check.rb +12 -11
  22. data/lib/flydata/source_mysql/parser/dump_parser.rb +0 -4
  23. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/alter_table_query_handler.rb +8 -2
  24. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_position_file.rb +7 -1
  25. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_query_dispatcher.rb +10 -4
  26. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_query_handler.rb +8 -2
  27. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_record_dispatcher.rb +9 -3
  28. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_record_handler.rb +16 -34
  29. data/lib/flydata/source_mysql/plugin_support/context.rb +7 -0
  30. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/ddl_query_handler.rb +11 -19
  31. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/dml_record_handler.rb +8 -2
  32. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/drop_database_query_handler.rb +8 -2
  33. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/table_meta.rb +5 -1
  34. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/truncate_table_query_handler.rb +8 -2
  35. data/lib/flydata/source_postgresql/generate_source_dump.rb +175 -0
  36. data/lib/flydata/source_postgresql/parse_dump_and_send.rb +126 -0
  37. data/lib/flydata/source_postgresql/pg_client.rb +43 -0
  38. data/lib/flydata/source_postgresql/postgresql_component.rb +12 -0
  39. data/lib/flydata/source_postgresql/setup.rb +24 -0
  40. data/lib/flydata/source_postgresql/source_pos.rb +18 -0
  41. data/lib/flydata/source_postgresql/sync_generate_table_ddl.rb +7 -15
  42. data/lib/flydata/sync_file_manager.rb +39 -28
  43. data/spec/flydata/command/setup_spec.rb +0 -1
  44. data/spec/flydata/command/sync_spec.rb +2 -2
  45. data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +5 -6
  46. data/spec/flydata/plugin_support/context_spec.rb +27 -0
  47. data/spec/flydata/source_mysql/parser/dump_parser_spec.rb +4 -4
  48. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/alter_table_query_handler_spec.rb +3 -3
  49. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_query_dispatcher_spec.rb +5 -5
  50. data/spec/flydata/source_mysql/plugin_support/context_spec.rb +26 -0
  51. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/ddl_query_handler_spec.rb +3 -3
  52. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/dml_record_handler_spec.rb +2 -2
  53. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/drop_database_query_handler_spec.rb +3 -3
  54. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/shared_query_handler_context.rb +3 -1
  55. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/table_meta_spec.rb +3 -3
  56. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/truncate_query_handler_spec.rb +7 -4
  57. data/spec/flydata/source_postgresql/generate_source_dump_spec.rb +144 -0
  58. data/spec/flydata/sync_file_manager_spec.rb +1 -1
  59. metadata +38 -24
  60. data/lib/flydata/fluent-plugins/mysql/context.rb +0 -25
@@ -1,6 +1,9 @@
1
- require 'flydata/fluent-plugins/mysql/binlog_record_handler'
1
+ require 'flydata/source_mysql/plugin_support/binlog_record_handler'
2
2
 
3
- module Mysql
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ module PluginSupport
4
7
  class DmlRecordHandler < BinlogRecordHandler
5
8
  ROW = :row
6
9
  OLD = :old
@@ -106,3 +109,6 @@ module Mysql
106
109
  end
107
110
  end
108
111
  end
112
+
113
+ end
114
+ end
@@ -1,6 +1,9 @@
1
- require 'flydata/fluent-plugins/mysql/ddl_query_handler'
1
+ require 'flydata/source_mysql/plugin_support/ddl_query_handler'
2
2
 
3
- module Mysql
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ module PluginSupport
4
7
  class DropDatabaseQueryHandler < DatabaseDdlQueryHandler
5
8
  # For MySQL, database and schema are exchangable
6
9
  PATTERN = /^DROP (DATABASE|SCHEMA)/i
@@ -22,3 +25,6 @@ module Mysql
22
25
  end
23
26
  end
24
27
  end
28
+
29
+ end
30
+ end
@@ -3,7 +3,9 @@ require 'flydata-core/mysql/config'
3
3
  require 'flydata-core/table_def/mysql_table_def'
4
4
 
5
5
  module Flydata
6
- module Mysql
6
+ module SourceMysql
7
+
8
+ module PluginSupport
7
9
  class TableMeta
8
10
  MANDATORY_OPTS = [
9
11
  :host, :port, :username, :password,
@@ -57,4 +59,6 @@ EOT
57
59
  end
58
60
  end
59
61
  end
62
+
63
+ end
60
64
  end
@@ -1,6 +1,9 @@
1
- require 'flydata/fluent-plugins/mysql/ddl_query_handler'
1
+ require 'flydata/source_mysql/plugin_support/ddl_query_handler'
2
2
 
3
- module Mysql
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ module PluginSupport
4
7
  class TruncateTableQueryHandler < TableDdlQueryHandler
5
8
  PATTERN = /^TRUNCATE/i
6
9
 
@@ -22,3 +25,6 @@ module Mysql
22
25
  end
23
26
  end
24
27
  end
28
+
29
+ end
30
+ end
@@ -0,0 +1,175 @@
1
+ require 'flydata/source/generate_source_dump'
2
+ require 'flydata/preference/data_entry_preference'
3
+ require 'flydata/source_postgresql/postgresql_component'
4
+ require 'flydata/source_postgresql/pg_client'
5
+ require 'flydata-core/postgresql/source_pos'
6
+ require 'msgpack'
7
+
8
+ module Flydata
9
+ module SourcePostgresql
10
+
11
+ class GenerateSourceDump < Source::GenerateSourceDump
12
+ include PostgresqlComponent
13
+
14
+ def run_compatibility_check(dump_dir, backup_dir)
15
+ %w(host username database schema).each do |k|
16
+ if de_prefs[k].to_s.empty?
17
+ raise "'#{k}' is required. Set the value in the conf file " +
18
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
19
+ end
20
+ end
21
+ end
22
+
23
+ def confirmation_items
24
+ items = {
25
+ "host" => de_prefs['host'],
26
+ "port" => de_prefs['port'],
27
+ "username" => de_prefs['username'],
28
+ "database" => de_prefs['database'],
29
+ "schema" => de_prefs['schema'],
30
+ }
31
+
32
+ items
33
+ end
34
+
35
+ DUMP_SIZE_QUERY = <<EOS
36
+ SELECT sum(pg_total_relation_size(c.oid)) AS "total_size"
37
+ FROM pg_class c
38
+ LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
39
+ WHERE nspname = $1 AND relname in (%s)
40
+ EOS
41
+ TABLE_PLACEHOLDER_START_NUM = 2 # because $1 is used by table_schema
42
+
43
+ def dump_size(tables)
44
+ cli = PGClient.new(de_prefs)
45
+
46
+ res = cli.query(DUMP_SIZE_QUERY, [de_prefs['schema']] + tables, placeholder_size: tables.size, placeholder_start_num: TABLE_PLACEHOLDER_START_NUM)
47
+
48
+ res.first['total_size'].to_i
49
+ end
50
+
51
+ def dump(tables, file_path = nil, &src_pos_callback)
52
+ if file_path
53
+ io = File.open(file_path, "w")
54
+ else
55
+ raise "dump via pipe has not been implemented yet"
56
+ end
57
+
58
+ cli = PGClient.new(de_prefs)
59
+
60
+ source_pos = get_source_pos(cli, &src_pos_callback)
61
+
62
+ context = source.sync_generate_table_ddl(dp, nil)
63
+ missing_tables = context.each_source_tabledef(tables, de_prefs) do |tabledef, error|
64
+ dump_table(tabledef, source_pos, io, cli) if tabledef
65
+ end
66
+ nil
67
+ end
68
+
69
+ private
70
+
71
+ CURRENT_SNAPSHOT_QUERY = <<EOS
72
+ SELECT txid_current_snapshot() AS current_snapshot;
73
+ EOS
74
+
75
+ def get_source_pos(cli, &src_pos_callback)
76
+ res = cli.query(CURRENT_SNAPSHOT_QUERY)
77
+
78
+ current_snapshot = res.first['current_snapshot']
79
+ src_pos = FlydataCore::Postgresql::SourcePos.new(current_snapshot)
80
+ src_pos_callback.call(nil, src_pos)
81
+
82
+ src_pos
83
+ end
84
+
85
+ NUM_ROWS = 500000
86
+
87
+ def dump_table(tabledef, source_pos, io, cli)
88
+ fqtn = fq_table_name(de_prefs['schema'], tabledef.table_name)
89
+ pk_columns = tabledef.pk_columns
90
+ columns = tabledef.column_names
91
+
92
+ dump_source_table(tabledef, io)
93
+
94
+ last_pks = nil
95
+ loop do
96
+ num_rows, last_pks = dump_table_chunk(fqtn, source_pos, NUM_ROWS,
97
+ columns, pk_columns, last_pks, io, cli)
98
+ break if num_rows < NUM_ROWS
99
+ end
100
+ end
101
+
102
+ def dump_source_table(tabledef, io)
103
+ columns = {}
104
+ tabledef.column_def.each do |column_name, columndef_str|
105
+ columndef_hash = eval columndef_str
106
+ columns[column_name] = {"column_name" => column_name,
107
+ "format_type" => columndef_hash["data_type"]}
108
+ end
109
+ source_table_hash = { "table_name" => tabledef.table_name,
110
+ "columns" => columns }
111
+
112
+ io.write(source_table_hash.to_msgpack)
113
+ end
114
+
115
+ SELECT_QUERY = <<EOS
116
+ SELECT * FROM %s%s%s LIMIT %s;
117
+ EOS
118
+
119
+ def dump_table_chunk(fqtn, source_pos, num_rows, columns, pk_columns, last_pks, io, cli)
120
+ where_clause = build_where_clause(source_pos, pk_columns, last_pks)
121
+ params = last_pks || []
122
+ order_by_clause = build_order_by_clause(pk_columns)
123
+ query = SELECT_QUERY % [fqtn, where_clause, order_by_clause, num_rows]
124
+
125
+ res = cli.query(query, params)
126
+ count = 0
127
+ last_row = nil
128
+ res.each do |row|
129
+ count += 1
130
+ last_row = row
131
+ dump_row(row, columns, io)
132
+ end
133
+ last_pks = last_row ? pk_columns.collect{|col| last_row[col]} : nil
134
+
135
+ [count, last_pks]
136
+ end
137
+
138
+ def dump_row(row, columns, io)
139
+ data = columns.collect{|col| row[col] }.to_msgpack
140
+ io.write(data)
141
+ end
142
+
143
+ def build_where_clause(source_pos, pk_columns, last_pks)
144
+ clause = " WHERE txid_visible_in_snapshot(xmin::TEXT::BIGINT, '#{source_pos.snapshot_id}'::TXID_SNAPSHOT)"
145
+ if last_pks
146
+ clause += pk_conditions(pk_columns)
147
+ end
148
+ end
149
+
150
+ def build_order_by_clause(pk_columns)
151
+ " ORDER BY #{pk_columns.collect{|col| %Q|"#{col}"| }.join(",")}"
152
+ end
153
+
154
+ def pk_conditions(pk_columns)
155
+ i = pk_columns.size - 1
156
+ str = nil
157
+ while i >= 0
158
+ colname = pk_columns[i]
159
+ if str
160
+ str = %Q|"#{colname}" > $#{i + 1} OR ("#{colname}" = $#{i + 1} AND (#{str}))|
161
+ else
162
+ str = %Q|"#{colname}" > $#{i + 1}|
163
+ end
164
+ i -= 1
165
+ end
166
+ " AND (#{str})"
167
+ end
168
+
169
+ def fq_table_name(schema, table)
170
+ schema ? %Q|"#{schema}"."#{table}"| : %Q|"#{table}"|
171
+ end
172
+ end
173
+
174
+ end
175
+ end
@@ -0,0 +1,126 @@
1
+ require 'flydata/source/parse_dump_and_send'
2
+ require 'flydata/source_postgresql/postgresql_component'
3
+ require 'flydata-core/table_def/postgresql_table_def'
4
+
5
+ module Flydata
6
+ module SourcePostgresql
7
+
8
+ class ParseDumpAndSend < Source::ParseDumpAndSend
9
+ include PostgresqlComponent
10
+
11
+ def value_converters
12
+ FlydataCore::TableDef::PostgresqlTableDef::VALUE_CONVERTERS
13
+ end
14
+
15
+ # dump format
16
+ # dump file is in msgpack. Each table data starts with a source table hash
17
+ # followed by row arrays.
18
+ #
19
+ # {"table_name"=>"users", "columns"=>{"id"=>{"column_name"=>"id", "format_type"=>"bigint"}, "name"=>{"column_name"=>"name", "format_type"=>"character varying"}, "another_id"=>{"column_name"=>"another_id", "format_type"=>"integer"}}}
20
+ # ["2", "hay", "1"]
21
+ # ["3", "hoe", "2"]
22
+ def parse_dump(dump_pos_info, dmpio, create_table_block, insert_record_block,
23
+ check_point_block)
24
+ parser = DumpParser.new(dump_pos_info, dmpio, create_table_block,
25
+ insert_record_block, check_point_block)
26
+ parser.parse_all
27
+ end
28
+ end
29
+
30
+ class DumpParser
31
+ MAX_ROW_BYTES = 1 * 1024 * 1024 # Parser holds rows until the total byte size
32
+ # reaches this number
33
+
34
+ def initialize(dump_pos_info, dmpio, create_table_block, insert_record_block,
35
+ check_point_block)
36
+ @source_pos = dump_pos_info[:source_pos]
37
+ raise ArgumentError.new("source position is required") unless @source_pos
38
+ @current_table = nil
39
+ @last_pos = 0
40
+ @row_head_pos = nil
41
+ @rows = []
42
+ @dmpio = dmpio
43
+ @create_table_block = create_table_block
44
+ @insert_record_block = insert_record_block
45
+ @check_point_block = check_point_block
46
+
47
+ resume(dump_pos_info)
48
+ end
49
+
50
+ def parse_all
51
+ u = MessagePack::Unpacker.new(@dmpio)
52
+ u.each do |obj|
53
+ @last_pos = @dmpio.pos - u.buffer.size
54
+ parse(obj)
55
+ end
56
+ close
57
+ end
58
+
59
+ def parse(obj)
60
+ obj.kind_of?(Hash) ? handle_table_info(obj) : handle_data_row(obj)
61
+ end
62
+
63
+ def close
64
+ unless @rows.empty?
65
+ call_insert_record_block
66
+ # Core logic expects a check point callback with CREATE_TABLE at the
67
+ # end of table data insertion of each table. #handle_table_info takes
68
+ # care of all tables but the last table. This is for the last table.
69
+ call_check_point_block(Parser::State::CREATE_TABLE)
70
+ end
71
+ end
72
+
73
+
74
+ private
75
+
76
+ def resume(dump_pos_info)
77
+ last_pos = dump_pos_info[:last_pos] ? dump_pos_info[:last_pos].to_i : -1
78
+ if last_pos == -1
79
+ # no resume point
80
+ return
81
+ end
82
+
83
+ @last_pos = last_pos
84
+ @dmpio.pos = last_pos
85
+ @current_table = dump_pos_info[:source_table]
86
+ end
87
+
88
+ # {"table_name"=>"users", "columns"=>{"id"=>{"column_name"=>"id", "format_type"=>"bigint"}, "name"=>{"column_name"=>"name", "format_type"=>"character varying"}, "another_id"=>{"column_name"=>"another_id", "format_type"=>"integer"}}}
89
+ def handle_table_info(table_info)
90
+ call_check_point_block(Parser::State::CREATE_TABLE)
91
+ columns = table_info["columns"].inject({}) do |h, (k, v)|
92
+ h[k] = %w(column_name format_type).inject({}) do |hh, kk|
93
+ hh[kk.to_sym] = v[kk]
94
+ hh
95
+ end
96
+ h
97
+ end
98
+ @current_table = Parser::SourceTable.new(table_info["table_name"], columns)
99
+ @create_table_block.call(@current_table)
100
+ call_check_point_block(Parser::State::INSERT_RECORD)
101
+ end
102
+
103
+ def handle_data_row(row)
104
+ @row_head_pos = @last_pos unless @row_head_pos
105
+ @rows << row
106
+ if @last_pos - @row_head_pos > MAX_ROW_BYTES
107
+ call_insert_record_block
108
+ end
109
+ end
110
+
111
+ def call_check_point_block(state)
112
+ @check_point_block.call(@current_table, @last_pos, @last_pos, @source_pos,
113
+ state)
114
+ end
115
+
116
+ def call_insert_record_block
117
+ if @insert_record_block.call(@current_table, @rows)
118
+ call_check_point_block(Parser::State::INSERT_RECORD)
119
+ end
120
+ @rows = []
121
+ @row_head_pos = nil
122
+ end
123
+ end
124
+
125
+ end
126
+ end
@@ -0,0 +1,43 @@
1
+ require 'pg'
2
+
3
+ module Flydata
4
+ module SourcePostgresql
5
+
6
+ class PGClient
7
+ def initialize(de_prefs)
8
+ @de_prefs = de_prefs
9
+ @cli = PG::Connection.new(pg_opts_from_de_prefs(de_prefs))
10
+ end
11
+
12
+ def query(query, params = [], opts = {})
13
+ if opts.has_key?(:placeholder_start_num)
14
+ placeholders = placeholder_string(opts[:placeholder_size],
15
+ opts[:placeholder_start_num])
16
+ q = query % [placeholders]
17
+ else
18
+ q = query
19
+ end
20
+
21
+ @cli.query(q, params)
22
+ end
23
+
24
+ private
25
+
26
+ def pg_opts_from_de_prefs(de_prefs)
27
+ {
28
+ host: de_prefs['host'],
29
+ port: de_prefs['port'],
30
+ dbname: de_prefs['database'],
31
+ user: de_prefs['username'],
32
+ password: de_prefs['password'],
33
+ sslmode: :prefer,
34
+ }
35
+ end
36
+
37
+ def placeholder_string(num_items, start_num)
38
+ num_items.times.collect{|i| "$#{i + start_num}"}.join(",")
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,12 @@
1
+ module Flydata
2
+ module SourcePostgresql
3
+
4
+ # Postgresql specific Component helper methods
5
+ module PostgresqlComponent
6
+ def de_prefs
7
+ de['postgresql_data_entry_preference']
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,24 @@
1
+ require 'flydata/source/setup'
2
+ require 'flydata/sync_file_manager'
3
+
4
+ module Flydata
5
+ module SourcePostgresql
6
+
7
+ class Setup < Source::Setup
8
+ def initial_run_need_restart?
9
+ overinstall?
10
+ end
11
+
12
+ def initial_run_complete_message
13
+ overinstall? ? :all_done : :initial_sync
14
+ end
15
+
16
+ private
17
+
18
+ def overinstall?
19
+ File.exists?(Flydata::SyncFileManager.new(de).source_pos_path)
20
+ end
21
+ end
22
+
23
+ end
24
+ end