flydata 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ require_relative 'binlog_record_handler'
2
+
3
+ module Mysql
4
+ class BinlogQueryHandler < BinlogRecordHandler
5
+ # Return regexp
6
+ # This class will be used if the pattern matches with the query
7
+ def pattern
8
+ raise "Not implemented."
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,50 @@
1
+ require 'fluent/plugin/in_mysql_binlog'
2
+ require 'binlog'
3
+ require_relative 'dml_record_handler'
4
+ require_relative 'binlog_query_dispatcher'
5
+
6
+ module Mysql
7
+ class BinlogRecordDispatcher
8
+ def dispatch(event)
9
+ method_name = "on_#{event.event_type.downcase}"
10
+ if self.respond_to?(method_name)
11
+ # TODO to_hash method call below can fail if event.event_type is
12
+ # "Update_rows". This seems to be a bug of ruby-binlog. The bug must
13
+ # be fixed when we support record update.
14
+ record = Fluent::MysqlBinlogInput::BinlogUtil.to_hash(event)
15
+ self.send(method_name, record)
16
+ else
17
+ # $log.trace "Unhandled type: #{record["event_type"]}"
18
+ end
19
+ end
20
+ end
21
+
22
+ class FlydataBinlogRecordDispatcher < BinlogRecordDispatcher
23
+ def initialize(context)
24
+ context.current_binlog_file = ""
25
+ @context = context
26
+ @query_dispatcher = FlydataBinlogQueryDispatcher.new(context)
27
+ @dml_record_handler = DmlRecordHandler.new(context)
28
+ end
29
+
30
+ def on_rotate(record)
31
+ @context.current_binlog_file = record["binlog_file"]
32
+ end
33
+
34
+ def on_write_rows(record)
35
+ @dml_record_handler.process(record, :write_rows)
36
+ end
37
+
38
+ def on_update_rows(record)
39
+ @dml_record_handler.process(record, :update_rows)
40
+ end
41
+
42
+ def on_delete_rows(record)
43
+ @dml_record_handler.process(record, :delete_rows)
44
+ end
45
+
46
+ def on_query(record)
47
+ @query_dispatcher.dispatch(record)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,100 @@
1
+ require 'fluent/plugin/in_mysql_binlog'
2
+ require 'binlog'
3
+ require_relative 'binlog_position'
4
+ require_relative 'query_parser'
5
+
6
+ module Mysql
7
+ class BinlogRecordHandler
8
+ TABLE_NAME = :table_name # A Flydata JSON tag to specify a table name
9
+ TYPE = :type
10
+ SEQ = :seq
11
+ RESPECT_ORDER = :respect_order
12
+ SRC_POS = :src_pos
13
+ TABLE_REV = :table_rev
14
+
15
+ def initialize(context)
16
+ @context = context
17
+ @first_empty_binlog = true
18
+
19
+ # Load per-table binlog position
20
+ @table_binlog_pos = {}
21
+ @context.tables.each do |table_name|
22
+ table_binlog_content = @context.sync_fm.get_table_binlog_pos(table_name)
23
+ if table_binlog_content
24
+ @table_binlog_pos[table_name] = BinLogPosition.new(table_binlog_content)
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def acceptable_db?(db_name)
32
+ @context.database == db_name
33
+ end
34
+
35
+ def acceptable_table?(record, table)
36
+ acceptable = @context.tables.include?(table)
37
+
38
+ if acceptable and @table_binlog_pos[record['table_name']]
39
+ if @table_binlog_pos[record['table_name']] >= BinLogPosition.new(
40
+ "#{@context.current_binlog_file}\t#{record['next_position'] - record['event_length']}")
41
+ acceptable = false
42
+ else
43
+ @context.sync_fm.delete_table_binlog_pos(record['table_name'])
44
+ @table_binlog_pos.delete(record['table_name'])
45
+ end
46
+ end
47
+ acceptable
48
+ end
49
+
50
+ def emit_record(type, record, opt = {})
51
+ return unless acceptable_db?(record["db_name"])
52
+ return unless record["table_name"].nil? or acceptable_table?(record, record["table_name"])
53
+
54
+ check_empty_binlog
55
+
56
+ records = yield
57
+ records = [records] unless records.kind_of?(Array)
58
+
59
+ table = records.first[TABLE_NAME] || record['table_name']
60
+ raise "Missing table name. #{record}" if table.to_s.empty?
61
+ return unless acceptable_table?(record, table)
62
+
63
+ table_rev = @context.sync_fm.table_rev(table)
64
+ position = record['next_position'] - record['event_length']
65
+
66
+ # Add common information to each record
67
+ records.each do |r|
68
+ if opt[:increment_table_rev]
69
+ table_rev = @context.sync_fm.increment_table_rev(table, table_rev)
70
+ end
71
+ r[TYPE] = type
72
+ r[RESPECT_ORDER] = true
73
+ r[TABLE_NAME] = table
74
+ r[SRC_POS] = "#{@context.current_binlog_file}\t#{position}"
75
+ r[TABLE_REV] = table_rev
76
+ end
77
+
78
+ # Use binlog's timestamp
79
+ timestamp = record["timestamp"].to_i
80
+ records.each do |row|
81
+ @context.sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
82
+ row[SEQ] = seq
83
+ Fluent::Engine.emit(@context.tag, timestamp, row)
84
+ end
85
+ end
86
+ end
87
+
88
+ def check_empty_binlog
89
+ #Log one warning per consecutive records that have empty binlog filename
90
+ if @context.current_binlog_file.to_s.empty?
91
+ if @first_empty_binlog
92
+ $log.warn "Binlog file name is empty. Rotate event not received!"
93
+ @first_empty_binlog = false
94
+ end
95
+ else
96
+ @first_empty_binlog = true
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,25 @@
1
+ module Mysql
2
+ class Context
3
+ MANDATORY_OPTS = [
4
+ :database, :tables, :tag, :sync_fm,
5
+ ]
6
+ OPTIONAL_OPTS = [
7
+ :current_binlog_file,
8
+ ]
9
+
10
+ (MANDATORY_OPTS + OPTIONAL_OPTS).each do |opt|
11
+ attr_accessor opt
12
+ end
13
+
14
+ def initialize(opts)
15
+ missing_opts = MANDATORY_OPTS - opts.keys
16
+ unless (missing_opts.empty?)
17
+ raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
18
+ end
19
+
20
+ opts.each do |k, v|
21
+ self.instance_variable_set(:"@#{k}", v)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,82 @@
1
+ require_relative 'binlog_record_handler'
2
+
3
+ module Mysql
4
+ class DmlRecordHandler < BinlogRecordHandler
5
+ ROW = :row
6
+ INTEGER_TYPES = {'TINY' => 1,
7
+ 'SHORT' => 2,
8
+ 'INT24' => 3,
9
+ 'LONG' => 4,
10
+ 'LONGLONG' => 8
11
+ }
12
+ SIGNLESS_INTEGER_PREFIX = '0SL'
13
+
14
+ def process(record, type)
15
+ case type
16
+ when :write_rows
17
+ emit_insert(record)
18
+ when :delete_rows
19
+ emit_delete(record)
20
+ when :update_rows
21
+ emit_update(record)
22
+ else
23
+ raise "Invalid type:#{type}"
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def emit_insert(record)
30
+ emit_rows(:insert, record)
31
+ end
32
+
33
+ def emit_delete(record)
34
+ emit_rows(:delete, record)
35
+ end
36
+
37
+ def emit_update(record)
38
+ emit_rows(:update, record) do |row|
39
+ row.last # For update, row has two arrays (old and new values) Use new values
40
+ end
41
+ end
42
+
43
+ def emit_rows(type, record)
44
+ emit_record(type, record) do |table|
45
+ records = record["rows"].collect do |row|
46
+ row = yield(row) if block_given? # Give the caller a chance to generate the correct row
47
+ { ROW => convert_to_flydata_row_format(row) }
48
+ end
49
+ encode_signless_integer(records, record["columns"])
50
+ records
51
+ end
52
+ end
53
+
54
+ def convert_to_flydata_row_format(row)
55
+ row.each.with_index(1).inject({}) do |h, (v, i)|
56
+ if v.kind_of?(String)
57
+ v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
58
+ end
59
+ h[i.to_s] = v
60
+ h
61
+ end
62
+ end
63
+
64
+ def encode_signless_integer(records, column_types)
65
+ records.each do |record|
66
+ record[ROW].keys.each do |position|
67
+ index = position.to_i - 1
68
+ column_type = column_types[index]
69
+ if INTEGER_TYPES.keys.include?(column_type)
70
+ # It's a signless integer.
71
+ intval = record[ROW][position]
72
+ next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
73
+ width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
74
+ signless_val = SIGNLESS_INTEGER_PREFIX
75
+ signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
76
+ record[ROW][position] = signless_val
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,69 @@
1
+ require_relative '../../table_def'
2
+
3
+ module Mysql
4
+ class QueryParser
5
+ # Return hash object
6
+ def parse(query)
7
+ end
8
+ end
9
+
10
+ class AlterTableAddColumnParser < QueryParser
11
+ # Return hash object or array
12
+ # {
13
+ # alter_action : "add_column",
14
+ # table_name : table_name,
15
+ # column : {
16
+ # name: column_name,
17
+ # type: column_type,
18
+ # not_null: [true|false],
19
+ # default: default_value,
20
+ # position: [first|last],
21
+ # after: column_name
22
+ # }
23
+ # }
24
+ def parse(query)
25
+ do_parse(query)
26
+ rescue
27
+ $log.error("Failed to parse query. query:'#{query}' error:#{$!}")
28
+ raise
29
+ end
30
+
31
+ private
32
+
33
+ def do_parse(query)
34
+ m = /^\s*alter\s+table\s+`?(?<table_name>[^\s]+)`?\s+add\s+column\s+/i.match(query)
35
+ table_name = m['table_name']
36
+ query = query[m[0].length..-1]
37
+
38
+ columns = if query[0] == '('
39
+ query = query[1..-2] # delete parenthesis (...,...,...)
40
+ parse_multiple_columns(query)
41
+ else
42
+ [parse_one_column(query)]
43
+ end
44
+ columns.collect do |col|
45
+ {
46
+ subtype: :add_column,
47
+ table_name: table_name,
48
+ column: col
49
+ }
50
+ end
51
+ end
52
+
53
+ def parse_one_column(query)
54
+ Flydata::TableDef::MysqlTableDef.parse_one_column_def(query) do |column, query, pos|
55
+ option_str = " " + query[pos..-1]
56
+ if /\sFIRST/i.match(option_str)
57
+ column[:position] = :first
58
+ elsif /\sAFTER\s+`?([^`]+)`?/i.match(option_str)
59
+ column[:after] = $1
60
+ end
61
+ column
62
+ end
63
+ end
64
+
65
+ def parse_multiple_columns(query)
66
+ raise "Not supported to add multiple columns"
67
+ end
68
+ end
69
+ end
@@ -2,26 +2,16 @@ module Flydata
2
2
  module FileUtil
3
3
  class SyncFileManager
4
4
  DUMP_DIR = ENV['FLYDATA_DUMP'] || File.join(FLYDATA_HOME, 'dump')
5
+ BACKUP_DIR = ENV['FLYDATA_BACKUP'] || File.join(FLYDATA_HOME, 'backup')
5
6
  TABLE_POSITIONS_DIR = ENV['FLYDATA_TABLE_POSITIONS'] || File.join(FLYDATA_HOME, 'positions')
6
7
  def initialize(data_entry)
7
8
  @data_entry = data_entry
8
9
  end
9
10
 
10
11
  def dump_file_path
11
- dump_dir = @data_entry['mysql_data_entry_preference']['mysqldump_dir']
12
- if dump_dir
13
- dump_dir = dump_dir.dup
14
- dump_dir[0] = ENV['HOME'] if dump_dir.match(/^~$|^~\//)
15
- else
16
- dump_dir = DUMP_DIR.dup
17
- end
18
- if File.exists?(dump_dir) and not Dir.exists?(dump_dir)
19
- raise "'mysqldump_dir'(#{dump_dir}) must be a directory."
20
- end
21
- FileUtils.mkdir_p(dump_dir) unless Dir.exists?(dump_dir)
22
12
  File.join(dump_dir, @data_entry['name']) + ".dump"
23
13
  end
24
-
14
+
25
15
  # dump pos file for resume
26
16
  def dump_pos_path
27
17
  dump_file_path + ".pos"
@@ -67,6 +57,13 @@ module Flydata
67
57
  File.join(FLYDATA_HOME, @data_entry['name'] + ".binlog.pos")
68
58
  end
69
59
 
60
+ def reset_table_position_files(tables)
61
+ tables.each do |table_name|
62
+ file = File.join(table_positions_dir_path, table_name + ".pos")
63
+ File.open(file, "w") {|f| f.write('0') }
64
+ end
65
+ end
66
+
70
67
  def table_positions_dir_path
71
68
  TABLE_POSITIONS_DIR
72
69
  end
@@ -99,7 +96,97 @@ module Flydata
99
96
  retry
100
97
  end
101
98
  end
99
+
100
+ def sync_info_file
101
+ File.join(dump_dir, "sync.info")
102
+ end
103
+
104
+ def save_sync_info(initial_sync, tables)
105
+ File.open(sync_info_file, "w") do |f|
106
+ f.write([initial_sync, tables].join("\t"))
107
+ end
108
+ end
109
+
110
+ def load_sync_info
111
+ return nil unless File.exists?(sync_info_file)
112
+ items = File.open(sync_info_file, 'r').readline.split("\t")
113
+ { initial_sync: (items[0] == 'true'),
114
+ tables: items[1] }
115
+ end
116
+
117
+ def get_table_binlog_pos(table_name)
118
+ file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
119
+ return nil unless File.exists?(file)
120
+ File.open(file, 'r').readline
121
+ end
122
+
123
+ def table_rev_file_path(table_name)
124
+ File.join(table_positions_dir_path, table_name + ".rev")
125
+ end
126
+
127
+ def table_rev_file_paths
128
+ Dir.glob(File.join(table_positions_dir_path, "*.rev"))
129
+ end
130
+
131
+ def table_rev(table_name)
132
+ file = table_rev_file_path(table_name)
133
+ return 1 unless File.exists?(file) #default revision is 1
134
+ File.open(file, "r+") do |f|
135
+ seq = f.read
136
+ if seq.empty?
137
+ return 1
138
+ else
139
+ return seq.to_i
140
+ end
141
+ end
142
+ end
143
+
144
+ def increment_table_rev(table_name, base_rev)
145
+ file = table_rev_file_path(table_name)
146
+ new_rev = base_rev + 1
147
+ File.open(file, "w") do |f|
148
+ f.write(new_rev)
149
+ end
150
+ new_rev
151
+ end
102
152
 
153
+ def delete_table_binlog_pos(table_name)
154
+ file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
155
+ if File.exists?(file)
156
+ FileUtils.rm(file, :force => true)
157
+ else
158
+ puts "#{file} does not exist. Something is wrong. Did you delete the file manually when flydata was running?"
159
+ end
160
+ end
161
+
162
+ def save_table_binlog_pos(tables, binlog_pos)
163
+ tables.split(" ").each do |table_name|
164
+ file = File.join(dump_dir, table_name + ".binlog.pos")
165
+ File.open(file, "w") do |f|
166
+ f.write(binlog_content(binlog_pos))
167
+ end
168
+ end
169
+ end
170
+
171
+ def move_table_binlog_files(tables)
172
+ FileUtils.mkdir_p(table_positions_dir_path) unless Dir.exists?(table_positions_dir_path)
173
+ tables.each do |table_name|
174
+ file = File.join(dump_dir, table_name + ".binlog.pos")
175
+ if ! File.exists?(file)
176
+ raise "#{file} does not exist. Error!!"
177
+ end
178
+ FileUtils.mv(file, table_positions_dir_path)
179
+ end
180
+ end
181
+
182
+ def backup_dump_dir
183
+ backup_dir = BACKUP_DIR.dup
184
+ FileUtils.mkdir_p(backup_dir) unless Dir.exists?(backup_dir)
185
+ dest_dir = File.join(backup_dir, Time.now.strftime("%Y%m%d%H%M%S"))
186
+ FileUtils.mkdir(dest_dir)
187
+ FileUtils.mv(dump_dir, dest_dir)
188
+ end
189
+
103
190
  private
104
191
 
105
192
  def dump_pos_content(status, table_name, last_pos, binlog_pos, state = nil, substate = nil)
@@ -115,6 +202,26 @@ module Flydata
115
202
  return nil unless File.exists?(path)
116
203
  Marshal.load(File.open(path, 'r'))
117
204
  end
205
+
206
+ def dump_dir
207
+ pref = @data_entry['mysql_data_entry_preference']
208
+ dump_dir = if pref and pref['mysqldump_dir']
209
+ pref['mysqldump_dir']
210
+ else
211
+ nil
212
+ end
213
+ if dump_dir
214
+ dump_dir = dump_dir.dup
215
+ dump_dir[0] = ENV['HOME'] if dump_dir.match(/^~$|^~\//)
216
+ else
217
+ dump_dir = DUMP_DIR.dup
218
+ end
219
+ if File.exists?(dump_dir) and not Dir.exists?(dump_dir)
220
+ raise "'mysqldump_dir'(#{dump_dir}) must be a directory."
221
+ end
222
+ FileUtils.mkdir_p(dump_dir) unless Dir.exists?(dump_dir)
223
+ dump_dir
224
+ end
118
225
  end
119
226
  end
120
227
  end