flydata 0.6.11 → 0.6.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +4 -4
  4. data/VERSION +1 -1
  5. data/flydata-core/lib/flydata-core/postgresql/source_pos.rb +34 -0
  6. data/flydata-core/lib/flydata-core/table_def/base.rb +10 -0
  7. data/flydata-core/lib/flydata-core/table_def/postgresql_table_def.rb +20 -4
  8. data/flydata-core/spec/postgresql/source_pos_spec.rb +43 -0
  9. data/flydata-core/spec/table_def/base_spec.rb +51 -0
  10. data/flydata.gemspec +0 -0
  11. data/lib/flydata/command/sender.rb +9 -6
  12. data/lib/flydata/command/setup.rb +6 -12
  13. data/lib/flydata/command/sync.rb +31 -17
  14. data/lib/flydata/fluent-plugins/flydata_plugin_ext/flydata_sync.rb +2 -3
  15. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +15 -14
  16. data/lib/flydata/parser/source_table.rb +4 -3
  17. data/lib/flydata/plugin_support/context.rb +46 -0
  18. data/lib/flydata/plugin_support/sync_record_emittable.rb +69 -0
  19. data/lib/flydata/source/component.rb +1 -1
  20. data/lib/flydata/source/generate_source_dump.rb +3 -2
  21. data/lib/flydata/source_mysql/mysql_compatibility_check.rb +12 -11
  22. data/lib/flydata/source_mysql/parser/dump_parser.rb +0 -4
  23. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/alter_table_query_handler.rb +8 -2
  24. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_position_file.rb +7 -1
  25. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_query_dispatcher.rb +10 -4
  26. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_query_handler.rb +8 -2
  27. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_record_dispatcher.rb +9 -3
  28. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_record_handler.rb +16 -34
  29. data/lib/flydata/source_mysql/plugin_support/context.rb +7 -0
  30. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/ddl_query_handler.rb +11 -19
  31. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/dml_record_handler.rb +8 -2
  32. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/drop_database_query_handler.rb +8 -2
  33. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/table_meta.rb +5 -1
  34. data/lib/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/truncate_table_query_handler.rb +8 -2
  35. data/lib/flydata/source_postgresql/generate_source_dump.rb +175 -0
  36. data/lib/flydata/source_postgresql/parse_dump_and_send.rb +126 -0
  37. data/lib/flydata/source_postgresql/pg_client.rb +43 -0
  38. data/lib/flydata/source_postgresql/postgresql_component.rb +12 -0
  39. data/lib/flydata/source_postgresql/setup.rb +24 -0
  40. data/lib/flydata/source_postgresql/source_pos.rb +18 -0
  41. data/lib/flydata/source_postgresql/sync_generate_table_ddl.rb +7 -15
  42. data/lib/flydata/sync_file_manager.rb +39 -28
  43. data/spec/flydata/command/setup_spec.rb +0 -1
  44. data/spec/flydata/command/sync_spec.rb +2 -2
  45. data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +5 -6
  46. data/spec/flydata/plugin_support/context_spec.rb +27 -0
  47. data/spec/flydata/source_mysql/parser/dump_parser_spec.rb +4 -4
  48. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/alter_table_query_handler_spec.rb +3 -3
  49. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/binlog_query_dispatcher_spec.rb +5 -5
  50. data/spec/flydata/source_mysql/plugin_support/context_spec.rb +26 -0
  51. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/ddl_query_handler_spec.rb +3 -3
  52. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/dml_record_handler_spec.rb +2 -2
  53. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/drop_database_query_handler_spec.rb +3 -3
  54. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/shared_query_handler_context.rb +3 -1
  55. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/table_meta_spec.rb +3 -3
  56. data/spec/flydata/{fluent-plugins/mysql → source_mysql/plugin_support}/truncate_query_handler_spec.rb +7 -4
  57. data/spec/flydata/source_postgresql/generate_source_dump_spec.rb +144 -0
  58. data/spec/flydata/sync_file_manager_spec.rb +1 -1
  59. metadata +38 -24
  60. data/lib/flydata/fluent-plugins/mysql/context.rb +0 -25
@@ -1,6 +1,9 @@
1
- require 'flydata/fluent-plugins/mysql/binlog_record_handler'
1
+ require 'flydata/source_mysql/plugin_support/binlog_record_handler'
2
2
 
3
- module Mysql
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ module PluginSupport
4
7
  class DmlRecordHandler < BinlogRecordHandler
5
8
  ROW = :row
6
9
  OLD = :old
@@ -106,3 +109,6 @@ module Mysql
106
109
  end
107
110
  end
108
111
  end
112
+
113
+ end
114
+ end
@@ -1,6 +1,9 @@
1
- require 'flydata/fluent-plugins/mysql/ddl_query_handler'
1
+ require 'flydata/source_mysql/plugin_support/ddl_query_handler'
2
2
 
3
- module Mysql
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ module PluginSupport
4
7
  class DropDatabaseQueryHandler < DatabaseDdlQueryHandler
5
8
  # For MySQL, database and schema are exchangable
6
9
  PATTERN = /^DROP (DATABASE|SCHEMA)/i
@@ -22,3 +25,6 @@ module Mysql
22
25
  end
23
26
  end
24
27
  end
28
+
29
+ end
30
+ end
@@ -3,7 +3,9 @@ require 'flydata-core/mysql/config'
3
3
  require 'flydata-core/table_def/mysql_table_def'
4
4
 
5
5
  module Flydata
6
- module Mysql
6
+ module SourceMysql
7
+
8
+ module PluginSupport
7
9
  class TableMeta
8
10
  MANDATORY_OPTS = [
9
11
  :host, :port, :username, :password,
@@ -57,4 +59,6 @@ EOT
57
59
  end
58
60
  end
59
61
  end
62
+
63
+ end
60
64
  end
@@ -1,6 +1,9 @@
1
- require 'flydata/fluent-plugins/mysql/ddl_query_handler'
1
+ require 'flydata/source_mysql/plugin_support/ddl_query_handler'
2
2
 
3
- module Mysql
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ module PluginSupport
4
7
  class TruncateTableQueryHandler < TableDdlQueryHandler
5
8
  PATTERN = /^TRUNCATE/i
6
9
 
@@ -22,3 +25,6 @@ module Mysql
22
25
  end
23
26
  end
24
27
  end
28
+
29
+ end
30
+ end
@@ -0,0 +1,175 @@
1
+ require 'flydata/source/generate_source_dump'
2
+ require 'flydata/preference/data_entry_preference'
3
+ require 'flydata/source_postgresql/postgresql_component'
4
+ require 'flydata/source_postgresql/pg_client'
5
+ require 'flydata-core/postgresql/source_pos'
6
+ require 'msgpack'
7
+
8
+ module Flydata
9
+ module SourcePostgresql
10
+
11
+ class GenerateSourceDump < Source::GenerateSourceDump
12
+ include PostgresqlComponent
13
+
14
+ def run_compatibility_check(dump_dir, backup_dir)
15
+ %w(host username database schema).each do |k|
16
+ if de_prefs[k].to_s.empty?
17
+ raise "'#{k}' is required. Set the value in the conf file " +
18
+ "-> #{Flydata::Preference::DataEntryPreference.conf_path(de)}"
19
+ end
20
+ end
21
+ end
22
+
23
+ def confirmation_items
24
+ items = {
25
+ "host" => de_prefs['host'],
26
+ "port" => de_prefs['port'],
27
+ "username" => de_prefs['username'],
28
+ "database" => de_prefs['database'],
29
+ "schema" => de_prefs['schema'],
30
+ }
31
+
32
+ items
33
+ end
34
+
35
+ DUMP_SIZE_QUERY = <<EOS
36
+ SELECT sum(pg_total_relation_size(c.oid)) AS "total_size"
37
+ FROM pg_class c
38
+ LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
39
+ WHERE nspname = $1 AND relname in (%s)
40
+ EOS
41
+ TABLE_PLACEHOLDER_START_NUM = 2 # because $1 is used by table_schema
42
+
43
+ def dump_size(tables)
44
+ cli = PGClient.new(de_prefs)
45
+
46
+ res = cli.query(DUMP_SIZE_QUERY, [de_prefs['schema']] + tables, placeholder_size: tables.size, placeholder_start_num: TABLE_PLACEHOLDER_START_NUM)
47
+
48
+ res.first['total_size'].to_i
49
+ end
50
+
51
+ def dump(tables, file_path = nil, &src_pos_callback)
52
+ if file_path
53
+ io = File.open(file_path, "w")
54
+ else
55
+ raise "dump via pipe has not been implemented yet"
56
+ end
57
+
58
+ cli = PGClient.new(de_prefs)
59
+
60
+ source_pos = get_source_pos(cli, &src_pos_callback)
61
+
62
+ context = source.sync_generate_table_ddl(dp, nil)
63
+ missing_tables = context.each_source_tabledef(tables, de_prefs) do |tabledef, error|
64
+ dump_table(tabledef, source_pos, io, cli) if tabledef
65
+ end
66
+ nil
67
+ end
68
+
69
+ private
70
+
71
+ CURRENT_SNAPSHOT_QUERY = <<EOS
72
+ SELECT txid_current_snapshot() AS current_snapshot;
73
+ EOS
74
+
75
+ def get_source_pos(cli, &src_pos_callback)
76
+ res = cli.query(CURRENT_SNAPSHOT_QUERY)
77
+
78
+ current_snapshot = res.first['current_snapshot']
79
+ src_pos = FlydataCore::Postgresql::SourcePos.new(current_snapshot)
80
+ src_pos_callback.call(nil, src_pos)
81
+
82
+ src_pos
83
+ end
84
+
85
+ NUM_ROWS = 500000
86
+
87
+ def dump_table(tabledef, source_pos, io, cli)
88
+ fqtn = fq_table_name(de_prefs['schema'], tabledef.table_name)
89
+ pk_columns = tabledef.pk_columns
90
+ columns = tabledef.column_names
91
+
92
+ dump_source_table(tabledef, io)
93
+
94
+ last_pks = nil
95
+ loop do
96
+ num_rows, last_pks = dump_table_chunk(fqtn, source_pos, NUM_ROWS,
97
+ columns, pk_columns, last_pks, io, cli)
98
+ break if num_rows < NUM_ROWS
99
+ end
100
+ end
101
+
102
+ def dump_source_table(tabledef, io)
103
+ columns = {}
104
+ tabledef.column_def.each do |column_name, columndef_str|
105
+ columndef_hash = eval columndef_str
106
+ columns[column_name] = {"column_name" => column_name,
107
+ "format_type" => columndef_hash["data_type"]}
108
+ end
109
+ source_table_hash = { "table_name" => tabledef.table_name,
110
+ "columns" => columns }
111
+
112
+ io.write(source_table_hash.to_msgpack)
113
+ end
114
+
115
+ SELECT_QUERY = <<EOS
116
+ SELECT * FROM %s%s%s LIMIT %s;
117
+ EOS
118
+
119
+ def dump_table_chunk(fqtn, source_pos, num_rows, columns, pk_columns, last_pks, io, cli)
120
+ where_clause = build_where_clause(source_pos, pk_columns, last_pks)
121
+ params = last_pks || []
122
+ order_by_clause = build_order_by_clause(pk_columns)
123
+ query = SELECT_QUERY % [fqtn, where_clause, order_by_clause, num_rows]
124
+
125
+ res = cli.query(query, params)
126
+ count = 0
127
+ last_row = nil
128
+ res.each do |row|
129
+ count += 1
130
+ last_row = row
131
+ dump_row(row, columns, io)
132
+ end
133
+ last_pks = last_row ? pk_columns.collect{|col| last_row[col]} : nil
134
+
135
+ [count, last_pks]
136
+ end
137
+
138
+ def dump_row(row, columns, io)
139
+ data = columns.collect{|col| row[col] }.to_msgpack
140
+ io.write(data)
141
+ end
142
+
143
+ def build_where_clause(source_pos, pk_columns, last_pks)
144
+ clause = " WHERE txid_visible_in_snapshot(xmin::TEXT::BIGINT, '#{source_pos.snapshot_id}'::TXID_SNAPSHOT)"
145
+ if last_pks
146
+ clause += pk_conditions(pk_columns)
147
+ end
148
+ end
149
+
150
+ def build_order_by_clause(pk_columns)
151
+ " ORDER BY #{pk_columns.collect{|col| %Q|"#{col}"| }.join(",")}"
152
+ end
153
+
154
+ def pk_conditions(pk_columns)
155
+ i = pk_columns.size - 1
156
+ str = nil
157
+ while i >= 0
158
+ colname = pk_columns[i]
159
+ if str
160
+ str = %Q|"#{colname}" > $#{i + 1} OR ("#{colname}" = $#{i + 1} AND (#{str}))|
161
+ else
162
+ str = %Q|"#{colname}" > $#{i + 1}|
163
+ end
164
+ i -= 1
165
+ end
166
+ " AND (#{str})"
167
+ end
168
+
169
+ def fq_table_name(schema, table)
170
+ schema ? %Q|"#{schema}"."#{table}"| : %Q|"#{table}"|
171
+ end
172
+ end
173
+
174
+ end
175
+ end
@@ -0,0 +1,126 @@
1
+ require 'flydata/source/parse_dump_and_send'
2
+ require 'flydata/source_postgresql/postgresql_component'
3
+ require 'flydata-core/table_def/postgresql_table_def'
4
+
5
+ module Flydata
6
+ module SourcePostgresql
7
+
8
+ class ParseDumpAndSend < Source::ParseDumpAndSend
9
+ include PostgresqlComponent
10
+
11
+ def value_converters
12
+ FlydataCore::TableDef::PostgresqlTableDef::VALUE_CONVERTERS
13
+ end
14
+
15
+ # dump format
16
+ # dump file is in msgpack. Each table data starts with a source table hash
17
+ # followed by row arrays.
18
+ #
19
+ # {"table_name"=>"users", "columns"=>{"id"=>{"column_name"=>"id", "format_type"=>"bigint"}, "name"=>{"column_name"=>"name", "format_type"=>"character varying"}, "another_id"=>{"column_name"=>"another_id", "format_type"=>"integer"}}}
20
+ # ["2", "hay", "1"]
21
+ # ["3", "hoe", "2"]
22
+ def parse_dump(dump_pos_info, dmpio, create_table_block, insert_record_block,
23
+ check_point_block)
24
+ parser = DumpParser.new(dump_pos_info, dmpio, create_table_block,
25
+ insert_record_block, check_point_block)
26
+ parser.parse_all
27
+ end
28
+ end
29
+
30
+ class DumpParser
31
+ MAX_ROW_BYTES = 1 * 1024 * 1024 # Parser holds rows until the total byte size
32
+ # reaches this number
33
+
34
+ def initialize(dump_pos_info, dmpio, create_table_block, insert_record_block,
35
+ check_point_block)
36
+ @source_pos = dump_pos_info[:source_pos]
37
+ raise ArgumentError.new("source position is required") unless @source_pos
38
+ @current_table = nil
39
+ @last_pos = 0
40
+ @row_head_pos = nil
41
+ @rows = []
42
+ @dmpio = dmpio
43
+ @create_table_block = create_table_block
44
+ @insert_record_block = insert_record_block
45
+ @check_point_block = check_point_block
46
+
47
+ resume(dump_pos_info)
48
+ end
49
+
50
+ def parse_all
51
+ u = MessagePack::Unpacker.new(@dmpio)
52
+ u.each do |obj|
53
+ @last_pos = @dmpio.pos - u.buffer.size
54
+ parse(obj)
55
+ end
56
+ close
57
+ end
58
+
59
+ def parse(obj)
60
+ obj.kind_of?(Hash) ? handle_table_info(obj) : handle_data_row(obj)
61
+ end
62
+
63
+ def close
64
+ unless @rows.empty?
65
+ call_insert_record_block
66
+ # Core logic expects a check point callback with CREATE_TABLE at the
67
+ # end of table data insertion of each table. #handle_table_info takes
68
+ # care of all tables but the last table. This is for the last table.
69
+ call_check_point_block(Parser::State::CREATE_TABLE)
70
+ end
71
+ end
72
+
73
+
74
+ private
75
+
76
+ def resume(dump_pos_info)
77
+ last_pos = dump_pos_info[:last_pos] ? dump_pos_info[:last_pos].to_i : -1
78
+ if last_pos == -1
79
+ # no resume point
80
+ return
81
+ end
82
+
83
+ @last_pos = last_pos
84
+ @dmpio.pos = last_pos
85
+ @current_table = dump_pos_info[:source_table]
86
+ end
87
+
88
+ # {"table_name"=>"users", "columns"=>{"id"=>{"column_name"=>"id", "format_type"=>"bigint"}, "name"=>{"column_name"=>"name", "format_type"=>"character varying"}, "another_id"=>{"column_name"=>"another_id", "format_type"=>"integer"}}}
89
+ def handle_table_info(table_info)
90
+ call_check_point_block(Parser::State::CREATE_TABLE)
91
+ columns = table_info["columns"].inject({}) do |h, (k, v)|
92
+ h[k] = %w(column_name format_type).inject({}) do |hh, kk|
93
+ hh[kk.to_sym] = v[kk]
94
+ hh
95
+ end
96
+ h
97
+ end
98
+ @current_table = Parser::SourceTable.new(table_info["table_name"], columns)
99
+ @create_table_block.call(@current_table)
100
+ call_check_point_block(Parser::State::INSERT_RECORD)
101
+ end
102
+
103
+ def handle_data_row(row)
104
+ @row_head_pos = @last_pos unless @row_head_pos
105
+ @rows << row
106
+ if @last_pos - @row_head_pos > MAX_ROW_BYTES
107
+ call_insert_record_block
108
+ end
109
+ end
110
+
111
+ def call_check_point_block(state)
112
+ @check_point_block.call(@current_table, @last_pos, @last_pos, @source_pos,
113
+ state)
114
+ end
115
+
116
+ def call_insert_record_block
117
+ if @insert_record_block.call(@current_table, @rows)
118
+ call_check_point_block(Parser::State::INSERT_RECORD)
119
+ end
120
+ @rows = []
121
+ @row_head_pos = nil
122
+ end
123
+ end
124
+
125
+ end
126
+ end
@@ -0,0 +1,43 @@
1
+ require 'pg'
2
+
3
+ module Flydata
4
+ module SourcePostgresql
5
+
6
+ class PGClient
7
+ def initialize(de_prefs)
8
+ @de_prefs = de_prefs
9
+ @cli = PG::Connection.new(pg_opts_from_de_prefs(de_prefs))
10
+ end
11
+
12
+ def query(query, params = [], opts = {})
13
+ if opts.has_key?(:placeholder_start_num)
14
+ placeholders = placeholder_string(opts[:placeholder_size],
15
+ opts[:placeholder_start_num])
16
+ q = query % [placeholders]
17
+ else
18
+ q = query
19
+ end
20
+
21
+ @cli.query(q, params)
22
+ end
23
+
24
+ private
25
+
26
+ def pg_opts_from_de_prefs(de_prefs)
27
+ {
28
+ host: de_prefs['host'],
29
+ port: de_prefs['port'],
30
+ dbname: de_prefs['database'],
31
+ user: de_prefs['username'],
32
+ password: de_prefs['password'],
33
+ sslmode: :prefer,
34
+ }
35
+ end
36
+
37
+ def placeholder_string(num_items, start_num)
38
+ num_items.times.collect{|i| "$#{i + start_num}"}.join(",")
39
+ end
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,12 @@
1
+ module Flydata
2
+ module SourcePostgresql
3
+
4
+ # Postgresql specific Component helper methods
5
+ module PostgresqlComponent
6
+ def de_prefs
7
+ de['postgresql_data_entry_preference']
8
+ end
9
+ end
10
+
11
+ end
12
+ end
@@ -0,0 +1,24 @@
1
+ require 'flydata/source/setup'
2
+ require 'flydata/sync_file_manager'
3
+
4
+ module Flydata
5
+ module SourcePostgresql
6
+
7
+ class Setup < Source::Setup
8
+ def initial_run_need_restart?
9
+ overinstall?
10
+ end
11
+
12
+ def initial_run_complete_message
13
+ overinstall? ? :all_done : :initial_sync
14
+ end
15
+
16
+ private
17
+
18
+ def overinstall?
19
+ File.exists?(Flydata::SyncFileManager.new(de).source_pos_path)
20
+ end
21
+ end
22
+
23
+ end
24
+ end