flydata 0.6.4 → 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Gemfile.lock +2 -0
  4. data/VERSION +1 -1
  5. data/flydata-core/lib/flydata-core/table_def/base.rb +31 -0
  6. data/flydata-core/lib/flydata-core/table_def/mysql_table_def.rb +9 -30
  7. data/flydata-core/lib/flydata-core/table_def/postgresql_table_def.rb +111 -0
  8. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +4 -1
  9. data/flydata-core/spec/table_def/postgresql_table_def_spec.rb +348 -0
  10. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +25 -0
  11. data/flydata.gemspec +0 -0
  12. data/lib/flydata.rb +0 -7
  13. data/lib/flydata/command/base.rb +3 -2
  14. data/lib/flydata/fluent-plugins/flydata_plugin_ext/base.rb +5 -0
  15. data/lib/flydata/fluent-plugins/flydata_plugin_ext/flush_support.rb +52 -0
  16. data/lib/flydata/fluent-plugins/flydata_plugin_ext/flydata_sync.rb +55 -0
  17. data/lib/flydata/fluent-plugins/{idle_event_detector.rb → flydata_plugin_ext/idle_event_detector.rb} +0 -0
  18. data/lib/flydata/fluent-plugins/{preference.rb → flydata_plugin_ext/preference.rb} +2 -14
  19. data/lib/flydata/fluent-plugins/flydata_plugin_ext/transaction_support.rb +58 -0
  20. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +55 -135
  21. data/lib/flydata/fluent-plugins/mysql/dml_record_handler.rb +9 -4
  22. data/lib/flydata/helper/server.rb +7 -0
  23. data/lib/flydata/preference/data_entry_preference.rb +5 -13
  24. data/lib/flydata/source.rb +1 -1
  25. data/lib/flydata/source/data_entry.rb +29 -0
  26. data/lib/flydata/source/sync.rb +19 -0
  27. data/lib/flydata/source/sync_generate_table_ddl.rb +47 -7
  28. data/lib/flydata/source_mysql/data_entry.rb +22 -0
  29. data/lib/flydata/source_mysql/parser/dump_parser.rb +1 -1
  30. data/lib/flydata/source_mysql/parser/mysql_alter_table.treetop +8 -3
  31. data/lib/flydata/source_mysql/sync.rb +1 -8
  32. data/lib/flydata/source_mysql/sync_generate_table_ddl.rb +11 -16
  33. data/lib/flydata/source_postgresql/data_entry.rb +21 -0
  34. data/lib/flydata/source_postgresql/sync.rb +29 -0
  35. data/lib/flydata/source_postgresql/sync_generate_table_ddl.rb +126 -0
  36. data/spec/flydata/fluent-plugins/{idle_event_detector_spec.rb → flydata_plugin_ext/idle_event_detector_spec.rb} +1 -1
  37. data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +10 -0
  38. data/spec/flydata/source_mysql/parser/alter_table_parser_spec.rb +119 -0
  39. data/spec/flydata/source_mysql/parser/dump_parser_spec.rb +32 -1
  40. data/spec/flydata/source_mysql/sync_generate_table_ddl_spec.rb +4 -4
  41. metadata +31 -5
@@ -91,10 +91,15 @@ module Mysql
91
91
  # It's a signless integer.
92
92
  intval = record[row_type][position]
93
93
  next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
94
- width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
95
- signless_val = SIGNLESS_INTEGER_PREFIX
96
- signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
97
- record[row_type][position] = signless_val
94
+ begin
95
+ width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
96
+ signless_val = SIGNLESS_INTEGER_PREFIX
97
+ signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
98
+ record[row_type][position] = signless_val
99
+ rescue => e
100
+ $log.debug "failed to encode signless integer. - exception:`#{e.class.to_s}` record:`#{record[row_type][position]}` column_type:`#{column_type}` width:`#{width}` intval:`#{intval}`"
101
+ raise
102
+ end
98
103
  end
99
104
  end
100
105
  end
@@ -1,5 +1,12 @@
1
1
  require 'serverengine'
2
2
  require 'flydata-core/logger'
3
+ require 'flydata/helper/worker'
4
+
5
+ # Require all helper files -
6
+ lib_dir = File.absolute_path(File.join(__FILE__, '../../..'))
7
+ FileUtils.cd(lib_dir) do
8
+ Dir["flydata/helper/**/*.rb"].each { |file| require file }
9
+ end
3
10
 
4
11
  module Flydata
5
12
  module Helper
@@ -1,17 +1,14 @@
1
1
  require 'fileutils'
2
- require 'flydata/fluent-plugins/preference'
2
+ require 'flydata/fluent-plugins/flydata_plugin_ext/preference'
3
3
 
4
4
  module Flydata
5
5
  module Preference
6
6
  class DataEntryPreference
7
7
  CONFS_HOME = File.join(FLYDATA_HOME, 'confs')
8
- CUSTOM_CONFIG_PARAMS = {
9
- RedshiftMysqlDataEntry: ::Fluent::MysqlBinlogFlydataInputPreference::CUSTOM_CONFIG_PARAMS
10
- }
11
8
 
12
9
  class << self
13
10
  # data_entry must be hash
14
- def load_conf(data_entry)
11
+ def load_conf(data_entry, source)
15
12
  path = conf_path(data_entry)
16
13
  raise "Conf file does not exist. path:#{path}" unless File.exists?(path)
17
14
  custom_conf = YAML::load(File.open(path, 'r'))
@@ -25,12 +22,12 @@ module Flydata
25
22
  data_entry[k] = v
26
23
  end
27
24
  end
28
- filter_data_entry(data_entry)
25
+ filter_data_entry(data_entry, source)
29
26
  data_entry
30
27
  end
31
28
 
32
- def filter_data_entry(de)
33
- configurable_params = CUSTOM_CONFIG_PARAMS[de['type'].to_sym]
29
+ def filter_data_entry(de, source)
30
+ configurable_params = source.data_entry.config_params
34
31
  return de if configurable_params.nil? or configurable_params.empty?
35
32
 
36
33
  configurable_params.each do |pref_name, param_info|
@@ -87,11 +84,6 @@ module Flydata
87
84
  type = ActiveSupport::Inflector.underscore(data_entry['type'])
88
85
  "#{type}.conf.tmpl"
89
86
  end
90
-
91
- def create(type)
92
- custom_config_params = CUSTOM_CONFIG_PARAMS[type]
93
- return nil unless custom_config_params
94
- end
95
87
  end
96
88
  end
97
89
  end
@@ -22,7 +22,7 @@ module Source
22
22
  "RedshiftMysqlDataEntry" => :source_mysql,
23
23
  "RedshiftFileDataEntry" => :source_file,
24
24
  "FileDataEntry" => :source_file,
25
- "RedshiftPostgresDataEntry" => :source_postgres,
25
+ "RedshiftPostgresqlDataEntry" => :source_postgresql,
26
26
  "RedshiftZendeskDataEntry" => :source_zendesk,
27
27
  }
28
28
  def self.component_class_for(component_sym, de)
@@ -0,0 +1,29 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class DataEntry < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface CONFIG_PARAMS
14
+ #
15
+ # Has the definition of data entry parameters specific to the data source.
16
+ # Override
17
+ CONFIG_PARAMS = {
18
+ sourcename_data_entry_preference: {
19
+ source_specific_param1: { key: "subclass must define" },
20
+ }
21
+ }
22
+
23
+ def config_params
24
+ self.class::CONFIG_PARAMS
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -50,6 +50,25 @@ class Sync < Component
50
50
  def forwarder
51
51
  raise UnsupportedSourceError, "subclass must implement"
52
52
  end
53
+
54
+ private
55
+
56
+ def setup_table_prefs(prefs)
57
+ if prefs['tables_append_only']
58
+ prefs['tables_append_only'] =
59
+ prefs['tables_append_only'].split(/(?:\s*,\s*|\s+)/).uniq
60
+ prefs['tables'] = (prefs['tables'].split(/(?:\s*,\s*|\s+)/) +
61
+ prefs['tables_append_only']).uniq
62
+ else
63
+ prefs['tables'] = prefs['tables'].split(/(?:\s*,\s*|\s+)/).uniq
64
+ end
65
+ prefs['invalid_tables'] =
66
+ prefs['invalid_tables'].kind_of?(String) ?
67
+ prefs['invalid_tables'].split(/(?:\s*,\s*|\s+)/).uniq : []
68
+ prefs['new_tables'] =
69
+ prefs['new_tables'].kind_of?(String) ?
70
+ prefs['new_tables'].split(/(?:\s*,\s*|\s+)/).uniq : []
71
+ end
53
72
  end
54
73
 
55
74
  end
@@ -26,15 +26,55 @@ class SyncGenerateTableDdl < Component
26
26
  raise UnsupportedSourceError, "subclass must implement"
27
27
  end
28
28
 
29
- # Public Interface: Generate FlyData table definitions for given tables
29
+ def generate_flydata_tabledef(tables, options)
30
+ prefs = data_entry_prefs
31
+ options = options.merge(prefs)
32
+ flydata_tabledefs = []
33
+ error_list = []
34
+ missing_tables = each_source_tabledef(tables, options) do |source_tabledef, error|
35
+ if error
36
+ error_list << error.err_hash
37
+ next
38
+ end
39
+ flydata_tabledefs << source_tabledef.to_flydata_tabledef
40
+ end
41
+ if missing_tables
42
+ missing_tables.each {|missing_table| error_list << { error: "table does not exist in the #{data_source_type_display_name}", table: missing_table } }
43
+ end
44
+
45
+ [flydata_tabledefs, error_list]
46
+ end
47
+
48
+ private
49
+
50
+ # Returns the namne of the data source type. The name will be used in an error message.
51
+ def data_source_type_display_name
52
+ raise UnsupportedSourceError, "subclass must implement"
53
+ end
54
+
55
+ # Returns a data entry preference hash
56
+ def data_entry_prefs
57
+ raise UnsupportedSourceError, "subclass must implement"
58
+ end
59
+
60
+ # Calls `block` with the source tabledef or error for each table.
30
61
  #
31
- # tables - An array of table names
32
- # options - A hash of options
62
+ # tables: An array of table names
33
63
  #
34
- # Returns flydata_tabledefs, errors
35
- # flydata_tablesdefs - An array of FlyData tabledefs
36
- # errors - An array of error hashes for tables whose tabledef generation failed
37
- def generate_flydata_tabledef(tables, options)
64
+ # options: A hash including options. It includes the contents of
65
+ # `data_entry_prefs` and :skip_primary_key_check. When the option is true,
66
+ # the method must call the `block` for a table which is missing the primary
67
+ # key. Otherwise, it should call the `block` with an error.
68
+ #
69
+ # block: A callback block called for each table with the following
70
+ # arguments:
71
+ # source_tabledef: A TableDef object of the source (e.g. MysqlTableDef)
72
+ # The value will be nil if it failed to create the object.
73
+ # error: A FlydataCore::TableDefError object. If no error ocurred,
74
+ # this will be nil.
75
+ #
76
+ # Returns an array of tables which do not exist in the source
77
+ def each_source_tabledef(tables, options, &block)
38
78
  raise UnsupportedSourceError, "subclass must implement"
39
79
  end
40
80
  end
@@ -0,0 +1,22 @@
1
+ require 'flydata/source/data_entry'
2
+
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ class DataEntry < Source::DataEntry
7
+ CONFIG_PARAMS = {
8
+ mysql_data_entry_preference: {
9
+ database: {},
10
+ tables: {},
11
+ tables_append_only: {},
12
+ host: {},
13
+ username: {},
14
+ password: {encrypted: true},
15
+ ssl_ca_content: {},
16
+ ssl_cipher: {},
17
+ },
18
+ }
19
+ end
20
+
21
+ end
22
+ end
@@ -478,7 +478,7 @@ EOS
478
478
  def parse(line)
479
479
  start_ruby_prof
480
480
  bench_start_time = Time.now
481
- _parse2(line)
481
+ _parse(line)
482
482
  ensure
483
483
  stop_ruby_prof
484
484
  if ENV['FLYDATA_BENCHMARK']
@@ -745,7 +745,7 @@ grammar MysqlAlterTable
745
745
  rule data_type
746
746
  data_type_name meta_text unsigned zerofill {
747
747
  def data_type
748
- meta = (meta_text.text_value.size > 1) ? meta_text.text_value : ''
748
+ meta = (meta_text.text_value.size > 1) ? meta_text.text_value.strip : ''
749
749
  type = data_type_name.text_value.downcase + meta
750
750
  type = FlydataCore::TableDef::MysqlTableDef.convert_to_flydata_type(type)
751
751
  type << " unsigned" if !unsigned.terminal?
@@ -769,7 +769,7 @@ grammar MysqlAlterTable
769
769
  end
770
770
 
771
771
  rule meta_text
772
- '(' meta_value ')' / ''
772
+ ( nsp '(' nsp meta_value nsp ')' )?
773
773
  end
774
774
 
775
775
  rule meta_value
@@ -1154,12 +1154,17 @@ grammar MysqlAlterTable
1154
1154
  end
1155
1155
 
1156
1156
  rule value
1157
- quoted_value { def raw_value; text_raw_value; end }
1157
+ single_quoted_value { def raw_value; text_raw_value; end }
1158
1158
  / double_quoted_value { def raw_value; text_raw_value; end }
1159
1159
  / ident_sym { def raw_value; text_value; end }
1160
1160
  end
1161
1161
 
1162
1162
  rule quoted_value
1163
+ single_quoted_value { def raw_value; text_raw_value; end }
1164
+ / double_quoted_value { def raw_value; text_raw_value; end }
1165
+ end
1166
+
1167
+ rule single_quoted_value
1163
1168
  "'" text "'" { def text_raw_value; text.text_value; end }
1164
1169
  end
1165
1170
 
@@ -7,14 +7,7 @@ class Sync < Source::Sync
7
7
  def setup
8
8
  mp = de['mysql_data_entry_preference']
9
9
 
10
- if mp['tables_append_only']
11
- mp['tables_append_only'] = mp['tables_append_only'].split(",").uniq
12
- mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only']).uniq
13
- else
14
- mp['tables'] = mp['tables'].split(",").uniq
15
- end
16
- mp['invalid_tables'] = mp['invalid_tables'].kind_of?(String) ? mp['invalid_tables'].split(",").uniq : []
17
- mp['new_tables'] = mp['new_tables'].kind_of?(String) ? mp['new_tables'].split(",").uniq : []
10
+ setup_table_prefs(mp)
18
11
 
19
12
  unless mp['ssl_ca_content'].to_s.strip.empty?
20
13
  sync_fm = SyncFileManager.new(de)
@@ -16,23 +16,18 @@ class SyncGenerateTableDdl < Source::SyncGenerateTableDdl
16
16
  end
17
17
  end
18
18
 
19
- def generate_flydata_tabledef(tables, options)
20
- mp = de['mysql_data_entry_preference']
21
- options = options.merge(mp)
22
- flydata_tabledefs = []
23
- error_list = []
24
- missing_tables = FlydataCore::Mysql::CommandGenerator.each_mysql_tabledef(tables, options) do |mysql_tabledef, error|
25
- if error
26
- error_list << error.err_hash
27
- next
28
- end
29
- flydata_tabledefs << mysql_tabledef.to_flydata_tabledef
30
- end
31
- if missing_tables
32
- missing_tables.each {|missing_table| error_list << { error: 'table does not exist in the MySQL database', table: missing_table } }
33
- end
19
+ private
20
+
21
+ def data_source_type_display_name
22
+ "MySQL database"
23
+ end
24
+
25
+ def data_entry_prefs
26
+ de['mysql_data_entry_preference']
27
+ end
34
28
 
35
- [flydata_tabledefs, error_list]
29
+ def each_source_tabledef(tables, options, &block)
30
+ FlydataCore::Mysql::CommandGenerator.each_mysql_tabledef(tables, options, &block)
36
31
  end
37
32
  end
38
33
 
@@ -0,0 +1,21 @@
1
+ require 'flydata/source/data_entry'
2
+
3
+ module Flydata
4
+ module SourcePostgresql
5
+
6
+ class DataEntry < Source::DataEntry
7
+ CONFIG_PARAMS = {
8
+ postgresql_data_entry_preference: {
9
+ database: {},
10
+ tables: {},
11
+ tables_append_only: {},
12
+ host: {},
13
+ username: {},
14
+ password: {encrypted: true},
15
+ schema: {},
16
+ }
17
+ }
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ require 'flydata/source/sync'
2
+
3
+ module Flydata
4
+ module SourcePostgresql
5
+
6
+ class Sync < Source::Sync
7
+ def setup
8
+ setup_table_prefs(de['postgresql_data_entry_preference'])
9
+ end
10
+
11
+ def supported?
12
+ true
13
+ end
14
+
15
+ def table_lists
16
+ de['postgresql_data_entry_preference'].select {|key, value| %w(tables new_tables invalid_tables tables_append_only).include?(key)}
17
+ end
18
+
19
+ def data_servers
20
+ de['postgresql_data_entry_preference']['data_servers']
21
+ end
22
+
23
+ def forwarder
24
+ de['postgresql_data_entry_preference']['forwarder']
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,126 @@
1
+ require 'flydata/source/sync_generate_table_ddl'
2
+ require 'flydata-core/table_def/postgresql_table_def'
3
+ require 'pg'
4
+
5
+ module Flydata
6
+ module SourcePostgresql
7
+
8
+ class SyncGenerateTableDdl < Source::SyncGenerateTableDdl
9
+ def run_compatibility_check
10
+ # do nothing for now
11
+ end
12
+
13
+ def data_source_type_display_name
14
+ "PostgreSQL database"
15
+ end
16
+
17
+ def data_entry_prefs
18
+ de['postgresql_data_entry_preference']
19
+ end
20
+
21
+ def each_source_tabledef(tables, options, &block)
22
+ pg_opts = {
23
+ host: options['host'],
24
+ port: options['port'],
25
+ dbname: options['database'],
26
+ user: options['username'],
27
+ password: options['password'],
28
+ sslmode: :prefer,
29
+ }
30
+ # PostgreSQL options.
31
+ tables = tables.clone
32
+ missing_tables = []
33
+ begin
34
+ if tables.to_s == '' || tables.to_s == '[]'
35
+ raise ArgumentError, "tables is nil or empty"
36
+ end
37
+ _each_tabledef(tables, options, pg_opts, &block)
38
+ rescue TableMissingError => e
39
+ tables.delete e.table
40
+ missing_tables << e.table
41
+ return missing_tables if tables.empty?
42
+ retry
43
+ end
44
+ missing_tables
45
+ end
46
+
47
+ private
48
+
49
+ COLUMNS_QUERY = <<EOS
50
+ SELECT c.table_name, c.column_name, c.data_type, c.character_octet_length,
51
+ c.numeric_precision, c.numeric_scale, c.is_nullable, c.column_default, i.indisprimary AS is_primary
52
+ FROM pg_index i
53
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
54
+ RIGHT JOIN
55
+ (SELECT (table_catalog ||'.'|| table_schema ||'.'|| table_name)::regclass AS regid, *
56
+ FROM information_schema.columns) c
57
+ ON i.indrelid = c.regid AND a.attname = c.column_name
58
+ WHERE c.table_schema = $1 AND c.table_name IN (%s)
59
+ ORDER BY c.table_name, c.ordinal_position;
60
+ EOS
61
+ TABLE_PLACEHOLDER_START_NUM = 2 # because $1 is used by table_schema
62
+
63
+ def _each_tabledef(tables, options, pg_opts, &block)
64
+ cli = PG::Connection.new(pg_opts)
65
+
66
+ # TODO call the query for every 50 tables
67
+ placeholders = placeholder_string(tables.size, TABLE_PLACEHOLDER_START_NUM)
68
+ query = COLUMNS_QUERY % [placeholders]
69
+ res = cli.query(query, [options['schema']] + tables)
70
+
71
+ create_opt = {}
72
+ if options.has_key?(:skip_primary_key_check)
73
+ create_opt[:skip_primary_key_check] = options[:skip_primary_key_check]
74
+ end
75
+ current_table = nil
76
+ columns = []
77
+ completed_tables = []
78
+ res.each do |row|
79
+ table_name = row["table_name"]
80
+ unless table_name == current_table
81
+ unless columns.empty?
82
+ tabledef = create_tabledef_and_yield(columns, create_opt, &block)
83
+ completed_tables << current_table
84
+ columns = []
85
+ break unless tabledef
86
+ end
87
+ current_table = table_name
88
+ end
89
+ columns << row
90
+ end
91
+ unless columns.empty?
92
+ create_tabledef_and_yield(columns, create_opt, &block)
93
+ completed_tables << current_table
94
+ end
95
+ missing_tables = tables - completed_tables
96
+ unless missing_tables.empty?
97
+ raise TableMissingError.new("Table is missing", missing_tables.first)
98
+ end
99
+ end
100
+
101
+ def placeholder_string(num_items, start_num)
102
+ num_items.times.collect{|i| "$#{i + start_num}"}.join(",")
103
+ end
104
+
105
+ def create_tabledef_and_yield(columns, create_opt, &block)
106
+ pg_tabledef = nil
107
+ begin
108
+ pg_tabledef = FlydataCore::TableDef::PostgresqlTableDef.create(columns, create_opt)
109
+ yield(pg_tabledef, nil) if pg_tabledef
110
+ rescue FlydataCore::TableDefError => e
111
+ yield(nil, e)
112
+ end
113
+ pg_tabledef
114
+ end
115
+
116
+ class TableMissingError < RuntimeError
117
+ def initialize(message, table)
118
+ super(message)
119
+ @table = table
120
+ end
121
+ attr_reader :table
122
+ end
123
+ end
124
+
125
+ end
126
+ end