flydata 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -0
  3. data/Gemfile.lock +2 -0
  4. data/VERSION +1 -1
  5. data/flydata-core/lib/flydata-core/table_def/base.rb +31 -0
  6. data/flydata-core/lib/flydata-core/table_def/mysql_table_def.rb +9 -30
  7. data/flydata-core/lib/flydata-core/table_def/postgresql_table_def.rb +111 -0
  8. data/flydata-core/lib/flydata-core/table_def/redshift_table_def.rb +4 -1
  9. data/flydata-core/spec/table_def/postgresql_table_def_spec.rb +348 -0
  10. data/flydata-core/spec/table_def/redshift_table_def_spec.rb +25 -0
  11. data/flydata.gemspec +0 -0
  12. data/lib/flydata.rb +0 -7
  13. data/lib/flydata/command/base.rb +3 -2
  14. data/lib/flydata/fluent-plugins/flydata_plugin_ext/base.rb +5 -0
  15. data/lib/flydata/fluent-plugins/flydata_plugin_ext/flush_support.rb +52 -0
  16. data/lib/flydata/fluent-plugins/flydata_plugin_ext/flydata_sync.rb +55 -0
  17. data/lib/flydata/fluent-plugins/{idle_event_detector.rb → flydata_plugin_ext/idle_event_detector.rb} +0 -0
  18. data/lib/flydata/fluent-plugins/{preference.rb → flydata_plugin_ext/preference.rb} +2 -14
  19. data/lib/flydata/fluent-plugins/flydata_plugin_ext/transaction_support.rb +58 -0
  20. data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +55 -135
  21. data/lib/flydata/fluent-plugins/mysql/dml_record_handler.rb +9 -4
  22. data/lib/flydata/helper/server.rb +7 -0
  23. data/lib/flydata/preference/data_entry_preference.rb +5 -13
  24. data/lib/flydata/source.rb +1 -1
  25. data/lib/flydata/source/data_entry.rb +29 -0
  26. data/lib/flydata/source/sync.rb +19 -0
  27. data/lib/flydata/source/sync_generate_table_ddl.rb +47 -7
  28. data/lib/flydata/source_mysql/data_entry.rb +22 -0
  29. data/lib/flydata/source_mysql/parser/dump_parser.rb +1 -1
  30. data/lib/flydata/source_mysql/parser/mysql_alter_table.treetop +8 -3
  31. data/lib/flydata/source_mysql/sync.rb +1 -8
  32. data/lib/flydata/source_mysql/sync_generate_table_ddl.rb +11 -16
  33. data/lib/flydata/source_postgresql/data_entry.rb +21 -0
  34. data/lib/flydata/source_postgresql/sync.rb +29 -0
  35. data/lib/flydata/source_postgresql/sync_generate_table_ddl.rb +126 -0
  36. data/spec/flydata/fluent-plugins/{idle_event_detector_spec.rb → flydata_plugin_ext/idle_event_detector_spec.rb} +1 -1
  37. data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +10 -0
  38. data/spec/flydata/source_mysql/parser/alter_table_parser_spec.rb +119 -0
  39. data/spec/flydata/source_mysql/parser/dump_parser_spec.rb +32 -1
  40. data/spec/flydata/source_mysql/sync_generate_table_ddl_spec.rb +4 -4
  41. metadata +31 -5
@@ -91,10 +91,15 @@ module Mysql
91
91
  # It's a signless integer.
92
92
  intval = record[row_type][position]
93
93
  next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
94
- width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
95
- signless_val = SIGNLESS_INTEGER_PREFIX
96
- signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
97
- record[row_type][position] = signless_val
94
+ begin
95
+ width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
96
+ signless_val = SIGNLESS_INTEGER_PREFIX
97
+ signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
98
+ record[row_type][position] = signless_val
99
+ rescue => e
100
+ $log.debug "failed to encode signless integer. - exception:`#{e.class.to_s}` record:`#{record[row_type][position]}` column_type:`#{column_type}` width:`#{width}` intval:`#{intval}`"
101
+ raise
102
+ end
98
103
  end
99
104
  end
100
105
  end
@@ -1,5 +1,12 @@
1
1
  require 'serverengine'
2
2
  require 'flydata-core/logger'
3
+ require 'flydata/helper/worker'
4
+
5
+ # Require all helper files -
6
+ lib_dir = File.absolute_path(File.join(__FILE__, '../../..'))
7
+ FileUtils.cd(lib_dir) do
8
+ Dir["flydata/helper/**/*.rb"].each { |file| require file }
9
+ end
3
10
 
4
11
  module Flydata
5
12
  module Helper
@@ -1,17 +1,14 @@
1
1
  require 'fileutils'
2
- require 'flydata/fluent-plugins/preference'
2
+ require 'flydata/fluent-plugins/flydata_plugin_ext/preference'
3
3
 
4
4
  module Flydata
5
5
  module Preference
6
6
  class DataEntryPreference
7
7
  CONFS_HOME = File.join(FLYDATA_HOME, 'confs')
8
- CUSTOM_CONFIG_PARAMS = {
9
- RedshiftMysqlDataEntry: ::Fluent::MysqlBinlogFlydataInputPreference::CUSTOM_CONFIG_PARAMS
10
- }
11
8
 
12
9
  class << self
13
10
  # data_entry must be hash
14
- def load_conf(data_entry)
11
+ def load_conf(data_entry, source)
15
12
  path = conf_path(data_entry)
16
13
  raise "Conf file does not exist. path:#{path}" unless File.exists?(path)
17
14
  custom_conf = YAML::load(File.open(path, 'r'))
@@ -25,12 +22,12 @@ module Flydata
25
22
  data_entry[k] = v
26
23
  end
27
24
  end
28
- filter_data_entry(data_entry)
25
+ filter_data_entry(data_entry, source)
29
26
  data_entry
30
27
  end
31
28
 
32
- def filter_data_entry(de)
33
- configurable_params = CUSTOM_CONFIG_PARAMS[de['type'].to_sym]
29
+ def filter_data_entry(de, source)
30
+ configurable_params = source.data_entry.config_params
34
31
  return de if configurable_params.nil? or configurable_params.empty?
35
32
 
36
33
  configurable_params.each do |pref_name, param_info|
@@ -87,11 +84,6 @@ module Flydata
87
84
  type = ActiveSupport::Inflector.underscore(data_entry['type'])
88
85
  "#{type}.conf.tmpl"
89
86
  end
90
-
91
- def create(type)
92
- custom_config_params = CUSTOM_CONFIG_PARAMS[type]
93
- return nil unless custom_config_params
94
- end
95
87
  end
96
88
  end
97
89
  end
@@ -22,7 +22,7 @@ module Source
22
22
  "RedshiftMysqlDataEntry" => :source_mysql,
23
23
  "RedshiftFileDataEntry" => :source_file,
24
24
  "FileDataEntry" => :source_file,
25
- "RedshiftPostgresDataEntry" => :source_postgres,
25
+ "RedshiftPostgresqlDataEntry" => :source_postgresql,
26
26
  "RedshiftZendeskDataEntry" => :source_zendesk,
27
27
  }
28
28
  def self.component_class_for(component_sym, de)
@@ -0,0 +1,29 @@
1
+ require 'flydata/source'
2
+ require 'flydata/source/component'
3
+ require 'flydata/source/errors'
4
+
5
+ module Flydata
6
+ module Source
7
+
8
+ class DataEntry < Component
9
+ def self.inherited(child_class)
10
+ Source.register(child_class, self)
11
+ end
12
+
13
+ # Public Interface CONFIG_PARAMS
14
+ #
15
+ # Has the definition of data entry parameters specific to the data source.
16
+ # Override
17
+ CONFIG_PARAMS = {
18
+ sourcename_data_entry_preference: {
19
+ source_specific_param1: { key: "subclass must define" },
20
+ }
21
+ }
22
+
23
+ def config_params
24
+ self.class::CONFIG_PARAMS
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -50,6 +50,25 @@ class Sync < Component
50
50
  def forwarder
51
51
  raise UnsupportedSourceError, "subclass must implement"
52
52
  end
53
+
54
+ private
55
+
56
+ def setup_table_prefs(prefs)
57
+ if prefs['tables_append_only']
58
+ prefs['tables_append_only'] =
59
+ prefs['tables_append_only'].split(/(?:\s*,\s*|\s+)/).uniq
60
+ prefs['tables'] = (prefs['tables'].split(/(?:\s*,\s*|\s+)/) +
61
+ prefs['tables_append_only']).uniq
62
+ else
63
+ prefs['tables'] = prefs['tables'].split(/(?:\s*,\s*|\s+)/).uniq
64
+ end
65
+ prefs['invalid_tables'] =
66
+ prefs['invalid_tables'].kind_of?(String) ?
67
+ prefs['invalid_tables'].split(/(?:\s*,\s*|\s+)/).uniq : []
68
+ prefs['new_tables'] =
69
+ prefs['new_tables'].kind_of?(String) ?
70
+ prefs['new_tables'].split(/(?:\s*,\s*|\s+)/).uniq : []
71
+ end
53
72
  end
54
73
 
55
74
  end
@@ -26,15 +26,55 @@ class SyncGenerateTableDdl < Component
26
26
  raise UnsupportedSourceError, "subclass must implement"
27
27
  end
28
28
 
29
- # Public Interface: Generate FlyData table definitions for given tables
29
+ def generate_flydata_tabledef(tables, options)
30
+ prefs = data_entry_prefs
31
+ options = options.merge(prefs)
32
+ flydata_tabledefs = []
33
+ error_list = []
34
+ missing_tables = each_source_tabledef(tables, options) do |source_tabledef, error|
35
+ if error
36
+ error_list << error.err_hash
37
+ next
38
+ end
39
+ flydata_tabledefs << source_tabledef.to_flydata_tabledef
40
+ end
41
+ if missing_tables
42
+ missing_tables.each {|missing_table| error_list << { error: "table does not exist in the #{data_source_type_display_name}", table: missing_table } }
43
+ end
44
+
45
+ [flydata_tabledefs, error_list]
46
+ end
47
+
48
+ private
49
+
50
+ # Returns the namne of the data source type. The name will be used in an error message.
51
+ def data_source_type_display_name
52
+ raise UnsupportedSourceError, "subclass must implement"
53
+ end
54
+
55
+ # Returns a data entry preference hash
56
+ def data_entry_prefs
57
+ raise UnsupportedSourceError, "subclass must implement"
58
+ end
59
+
60
+ # Calls `block` with the source tabledef or error for each table.
30
61
  #
31
- # tables - An array of table names
32
- # options - A hash of options
62
+ # tables: An array of table names
33
63
  #
34
- # Returns flydata_tabledefs, errors
35
- # flydata_tablesdefs - An array of FlyData tabledefs
36
- # errors - An array of error hashes for tables whose tabledef generation failed
37
- def generate_flydata_tabledef(tables, options)
64
+ # options: A hash including options. It includes the contents of
65
+ # `data_entry_prefs` and :skip_primary_key_check. When the option is true,
66
+ # the method must call the `block` for a table which is missing the primary
67
+ # key. Otherwise, it should call the `block` with an error.
68
+ #
69
+ # block: A callback block called for each table with the following
70
+ # arguments:
71
+ # source_tabledef: A TableDef object of the source (e.g. MysqlTableDef)
72
+ # The value will be nil if it failed to create the object.
73
+ # error: A FlydataCore::TableDefError object. If no error ocurred,
74
+ # this will be nil.
75
+ #
76
+ # Returns an array of tables which do not exist in the source
77
+ def each_source_tabledef(tables, options, &block)
38
78
  raise UnsupportedSourceError, "subclass must implement"
39
79
  end
40
80
  end
@@ -0,0 +1,22 @@
1
+ require 'flydata/source/data_entry'
2
+
3
+ module Flydata
4
+ module SourceMysql
5
+
6
+ class DataEntry < Source::DataEntry
7
+ CONFIG_PARAMS = {
8
+ mysql_data_entry_preference: {
9
+ database: {},
10
+ tables: {},
11
+ tables_append_only: {},
12
+ host: {},
13
+ username: {},
14
+ password: {encrypted: true},
15
+ ssl_ca_content: {},
16
+ ssl_cipher: {},
17
+ },
18
+ }
19
+ end
20
+
21
+ end
22
+ end
@@ -478,7 +478,7 @@ EOS
478
478
  def parse(line)
479
479
  start_ruby_prof
480
480
  bench_start_time = Time.now
481
- _parse2(line)
481
+ _parse(line)
482
482
  ensure
483
483
  stop_ruby_prof
484
484
  if ENV['FLYDATA_BENCHMARK']
@@ -745,7 +745,7 @@ grammar MysqlAlterTable
745
745
  rule data_type
746
746
  data_type_name meta_text unsigned zerofill {
747
747
  def data_type
748
- meta = (meta_text.text_value.size > 1) ? meta_text.text_value : ''
748
+ meta = (meta_text.text_value.size > 1) ? meta_text.text_value.strip : ''
749
749
  type = data_type_name.text_value.downcase + meta
750
750
  type = FlydataCore::TableDef::MysqlTableDef.convert_to_flydata_type(type)
751
751
  type << " unsigned" if !unsigned.terminal?
@@ -769,7 +769,7 @@ grammar MysqlAlterTable
769
769
  end
770
770
 
771
771
  rule meta_text
772
- '(' meta_value ')' / ''
772
+ ( nsp '(' nsp meta_value nsp ')' )?
773
773
  end
774
774
 
775
775
  rule meta_value
@@ -1154,12 +1154,17 @@ grammar MysqlAlterTable
1154
1154
  end
1155
1155
 
1156
1156
  rule value
1157
- quoted_value { def raw_value; text_raw_value; end }
1157
+ single_quoted_value { def raw_value; text_raw_value; end }
1158
1158
  / double_quoted_value { def raw_value; text_raw_value; end }
1159
1159
  / ident_sym { def raw_value; text_value; end }
1160
1160
  end
1161
1161
 
1162
1162
  rule quoted_value
1163
+ single_quoted_value { def raw_value; text_raw_value; end }
1164
+ / double_quoted_value { def raw_value; text_raw_value; end }
1165
+ end
1166
+
1167
+ rule single_quoted_value
1163
1168
  "'" text "'" { def text_raw_value; text.text_value; end }
1164
1169
  end
1165
1170
 
@@ -7,14 +7,7 @@ class Sync < Source::Sync
7
7
  def setup
8
8
  mp = de['mysql_data_entry_preference']
9
9
 
10
- if mp['tables_append_only']
11
- mp['tables_append_only'] = mp['tables_append_only'].split(",").uniq
12
- mp['tables'] = (mp['tables'].split(",") + mp['tables_append_only']).uniq
13
- else
14
- mp['tables'] = mp['tables'].split(",").uniq
15
- end
16
- mp['invalid_tables'] = mp['invalid_tables'].kind_of?(String) ? mp['invalid_tables'].split(",").uniq : []
17
- mp['new_tables'] = mp['new_tables'].kind_of?(String) ? mp['new_tables'].split(",").uniq : []
10
+ setup_table_prefs(mp)
18
11
 
19
12
  unless mp['ssl_ca_content'].to_s.strip.empty?
20
13
  sync_fm = SyncFileManager.new(de)
@@ -16,23 +16,18 @@ class SyncGenerateTableDdl < Source::SyncGenerateTableDdl
16
16
  end
17
17
  end
18
18
 
19
- def generate_flydata_tabledef(tables, options)
20
- mp = de['mysql_data_entry_preference']
21
- options = options.merge(mp)
22
- flydata_tabledefs = []
23
- error_list = []
24
- missing_tables = FlydataCore::Mysql::CommandGenerator.each_mysql_tabledef(tables, options) do |mysql_tabledef, error|
25
- if error
26
- error_list << error.err_hash
27
- next
28
- end
29
- flydata_tabledefs << mysql_tabledef.to_flydata_tabledef
30
- end
31
- if missing_tables
32
- missing_tables.each {|missing_table| error_list << { error: 'table does not exist in the MySQL database', table: missing_table } }
33
- end
19
+ private
20
+
21
+ def data_source_type_display_name
22
+ "MySQL database"
23
+ end
24
+
25
+ def data_entry_prefs
26
+ de['mysql_data_entry_preference']
27
+ end
34
28
 
35
- [flydata_tabledefs, error_list]
29
+ def each_source_tabledef(tables, options, &block)
30
+ FlydataCore::Mysql::CommandGenerator.each_mysql_tabledef(tables, options, &block)
36
31
  end
37
32
  end
38
33
 
@@ -0,0 +1,21 @@
1
+ require 'flydata/source/data_entry'
2
+
3
+ module Flydata
4
+ module SourcePostgresql
5
+
6
+ class DataEntry < Source::DataEntry
7
+ CONFIG_PARAMS = {
8
+ postgresql_data_entry_preference: {
9
+ database: {},
10
+ tables: {},
11
+ tables_append_only: {},
12
+ host: {},
13
+ username: {},
14
+ password: {encrypted: true},
15
+ schema: {},
16
+ }
17
+ }
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ require 'flydata/source/sync'
2
+
3
+ module Flydata
4
+ module SourcePostgresql
5
+
6
+ class Sync < Source::Sync
7
+ def setup
8
+ setup_table_prefs(de['postgresql_data_entry_preference'])
9
+ end
10
+
11
+ def supported?
12
+ true
13
+ end
14
+
15
+ def table_lists
16
+ de['postgresql_data_entry_preference'].select {|key, value| %w(tables new_tables invalid_tables tables_append_only).include?(key)}
17
+ end
18
+
19
+ def data_servers
20
+ de['postgresql_data_entry_preference']['data_servers']
21
+ end
22
+
23
+ def forwarder
24
+ de['postgresql_data_entry_preference']['forwarder']
25
+ end
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,126 @@
1
+ require 'flydata/source/sync_generate_table_ddl'
2
+ require 'flydata-core/table_def/postgresql_table_def'
3
+ require 'pg'
4
+
5
+ module Flydata
6
+ module SourcePostgresql
7
+
8
+ class SyncGenerateTableDdl < Source::SyncGenerateTableDdl
9
+ def run_compatibility_check
10
+ # do nothing for now
11
+ end
12
+
13
+ def data_source_type_display_name
14
+ "PostgreSQL database"
15
+ end
16
+
17
+ def data_entry_prefs
18
+ de['postgresql_data_entry_preference']
19
+ end
20
+
21
+ def each_source_tabledef(tables, options, &block)
22
+ pg_opts = {
23
+ host: options['host'],
24
+ port: options['port'],
25
+ dbname: options['database'],
26
+ user: options['username'],
27
+ password: options['password'],
28
+ sslmode: :prefer,
29
+ }
30
+ # PostgreSQL options.
31
+ tables = tables.clone
32
+ missing_tables = []
33
+ begin
34
+ if tables.to_s == '' || tables.to_s == '[]'
35
+ raise ArgumentError, "tables is nil or empty"
36
+ end
37
+ _each_tabledef(tables, options, pg_opts, &block)
38
+ rescue TableMissingError => e
39
+ tables.delete e.table
40
+ missing_tables << e.table
41
+ return missing_tables if tables.empty?
42
+ retry
43
+ end
44
+ missing_tables
45
+ end
46
+
47
+ private
48
+
49
+ COLUMNS_QUERY = <<EOS
50
+ SELECT c.table_name, c.column_name, c.data_type, c.character_octet_length,
51
+ c.numeric_precision, c.numeric_scale, c.is_nullable, c.column_default, i.indisprimary AS is_primary
52
+ FROM pg_index i
53
+ JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
54
+ RIGHT JOIN
55
+ (SELECT (table_catalog ||'.'|| table_schema ||'.'|| table_name)::regclass AS regid, *
56
+ FROM information_schema.columns) c
57
+ ON i.indrelid = c.regid AND a.attname = c.column_name
58
+ WHERE c.table_schema = $1 AND c.table_name IN (%s)
59
+ ORDER BY c.table_name, c.ordinal_position;
60
+ EOS
61
+ TABLE_PLACEHOLDER_START_NUM = 2 # because $1 is used by table_schema
62
+
63
+ def _each_tabledef(tables, options, pg_opts, &block)
64
+ cli = PG::Connection.new(pg_opts)
65
+
66
+ # TODO call the query for every 50 tables
67
+ placeholders = placeholder_string(tables.size, TABLE_PLACEHOLDER_START_NUM)
68
+ query = COLUMNS_QUERY % [placeholders]
69
+ res = cli.query(query, [options['schema']] + tables)
70
+
71
+ create_opt = {}
72
+ if options.has_key?(:skip_primary_key_check)
73
+ create_opt[:skip_primary_key_check] = options[:skip_primary_key_check]
74
+ end
75
+ current_table = nil
76
+ columns = []
77
+ completed_tables = []
78
+ res.each do |row|
79
+ table_name = row["table_name"]
80
+ unless table_name == current_table
81
+ unless columns.empty?
82
+ tabledef = create_tabledef_and_yield(columns, create_opt, &block)
83
+ completed_tables << current_table
84
+ columns = []
85
+ break unless tabledef
86
+ end
87
+ current_table = table_name
88
+ end
89
+ columns << row
90
+ end
91
+ unless columns.empty?
92
+ create_tabledef_and_yield(columns, create_opt, &block)
93
+ completed_tables << current_table
94
+ end
95
+ missing_tables = tables - completed_tables
96
+ unless missing_tables.empty?
97
+ raise TableMissingError.new("Table is missing", missing_tables.first)
98
+ end
99
+ end
100
+
101
+ def placeholder_string(num_items, start_num)
102
+ num_items.times.collect{|i| "$#{i + start_num}"}.join(",")
103
+ end
104
+
105
+ def create_tabledef_and_yield(columns, create_opt, &block)
106
+ pg_tabledef = nil
107
+ begin
108
+ pg_tabledef = FlydataCore::TableDef::PostgresqlTableDef.create(columns, create_opt)
109
+ yield(pg_tabledef, nil) if pg_tabledef
110
+ rescue FlydataCore::TableDefError => e
111
+ yield(nil, e)
112
+ end
113
+ pg_tabledef
114
+ end
115
+
116
+ class TableMissingError < RuntimeError
117
+ def initialize(message, table)
118
+ super(message)
119
+ @table = table
120
+ end
121
+ attr_reader :table
122
+ end
123
+ end
124
+
125
+ end
126
+ end