flydata 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ require_relative 'binlog_record_handler'
2
+
3
+ module Mysql
4
+ class BinlogQueryHandler < BinlogRecordHandler
5
+ # Return regexp
6
+ # This class will be used if the pattern matches with the query
7
+ def pattern
8
+ raise "Not implemented."
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,50 @@
1
+ require 'fluent/plugin/in_mysql_binlog'
2
+ require 'binlog'
3
+ require_relative 'dml_record_handler'
4
+ require_relative 'binlog_query_dispatcher'
5
+
6
+ module Mysql
7
+ class BinlogRecordDispatcher
8
+ def dispatch(event)
9
+ method_name = "on_#{event.event_type.downcase}"
10
+ if self.respond_to?(method_name)
11
+ # TODO to_hash method call below can fail if event.event_type is
12
+ # "Update_rows". This seems to be a bug of ruby-binlog. The bug must
13
+ # be fixed when we support record update.
14
+ record = Fluent::MysqlBinlogInput::BinlogUtil.to_hash(event)
15
+ self.send(method_name, record)
16
+ else
17
+ # $log.trace "Unhandled type: #{record["event_type"]}"
18
+ end
19
+ end
20
+ end
21
+
22
+ class FlydataBinlogRecordDispatcher < BinlogRecordDispatcher
23
+ def initialize(context)
24
+ context.current_binlog_file = ""
25
+ @context = context
26
+ @query_dispatcher = FlydataBinlogQueryDispatcher.new(context)
27
+ @dml_record_handler = DmlRecordHandler.new(context)
28
+ end
29
+
30
+ def on_rotate(record)
31
+ @context.current_binlog_file = record["binlog_file"]
32
+ end
33
+
34
+ def on_write_rows(record)
35
+ @dml_record_handler.process(record, :write_rows)
36
+ end
37
+
38
+ def on_update_rows(record)
39
+ @dml_record_handler.process(record, :update_rows)
40
+ end
41
+
42
+ def on_delete_rows(record)
43
+ @dml_record_handler.process(record, :delete_rows)
44
+ end
45
+
46
+ def on_query(record)
47
+ @query_dispatcher.dispatch(record)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,100 @@
1
+ require 'fluent/plugin/in_mysql_binlog'
2
+ require 'binlog'
3
+ require_relative 'binlog_position'
4
+ require_relative 'query_parser'
5
+
6
+ module Mysql
7
+ class BinlogRecordHandler
8
+ TABLE_NAME = :table_name # A Flydata JSON tag to specify a table name
9
+ TYPE = :type
10
+ SEQ = :seq
11
+ RESPECT_ORDER = :respect_order
12
+ SRC_POS = :src_pos
13
+ TABLE_REV = :table_rev
14
+
15
+ def initialize(context)
16
+ @context = context
17
+ @first_empty_binlog = true
18
+
19
+ # Load per-table binlog position
20
+ @table_binlog_pos = {}
21
+ @context.tables.each do |table_name|
22
+ table_binlog_content = @context.sync_fm.get_table_binlog_pos(table_name)
23
+ if table_binlog_content
24
+ @table_binlog_pos[table_name] = BinLogPosition.new(table_binlog_content)
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def acceptable_db?(db_name)
32
+ @context.database == db_name
33
+ end
34
+
35
+ def acceptable_table?(record, table)
36
+ acceptable = @context.tables.include?(table)
37
+
38
+ if acceptable and @table_binlog_pos[record['table_name']]
39
+ if @table_binlog_pos[record['table_name']] >= BinLogPosition.new(
40
+ "#{@context.current_binlog_file}\t#{record['next_position'] - record['event_length']}")
41
+ acceptable = false
42
+ else
43
+ @context.sync_fm.delete_table_binlog_pos(record['table_name'])
44
+ @table_binlog_pos.delete(record['table_name'])
45
+ end
46
+ end
47
+ acceptable
48
+ end
49
+
50
+ def emit_record(type, record, opt = {})
51
+ return unless acceptable_db?(record["db_name"])
52
+ return unless record["table_name"].nil? or acceptable_table?(record, record["table_name"])
53
+
54
+ check_empty_binlog
55
+
56
+ records = yield
57
+ records = [records] unless records.kind_of?(Array)
58
+
59
+ table = records.first[TABLE_NAME] || record['table_name']
60
+ raise "Missing table name. #{record}" if table.to_s.empty?
61
+ return unless acceptable_table?(record, table)
62
+
63
+ table_rev = @context.sync_fm.table_rev(table)
64
+ position = record['next_position'] - record['event_length']
65
+
66
+ # Add common information to each record
67
+ records.each do |r|
68
+ if opt[:increment_table_rev]
69
+ table_rev = @context.sync_fm.increment_table_rev(table, table_rev)
70
+ end
71
+ r[TYPE] = type
72
+ r[RESPECT_ORDER] = true
73
+ r[TABLE_NAME] = table
74
+ r[SRC_POS] = "#{@context.current_binlog_file}\t#{position}"
75
+ r[TABLE_REV] = table_rev
76
+ end
77
+
78
+ # Use binlog's timestamp
79
+ timestamp = record["timestamp"].to_i
80
+ records.each do |row|
81
+ @context.sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
82
+ row[SEQ] = seq
83
+ Fluent::Engine.emit(@context.tag, timestamp, row)
84
+ end
85
+ end
86
+ end
87
+
88
+ def check_empty_binlog
89
+ #Log one warning per consecutive records that have empty binlog filename
90
+ if @context.current_binlog_file.to_s.empty?
91
+ if @first_empty_binlog
92
+ $log.warn "Binlog file name is empty. Rotate event not received!"
93
+ @first_empty_binlog = false
94
+ end
95
+ else
96
+ @first_empty_binlog = true
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,25 @@
1
+ module Mysql
2
+ class Context
3
+ MANDATORY_OPTS = [
4
+ :database, :tables, :tag, :sync_fm,
5
+ ]
6
+ OPTIONAL_OPTS = [
7
+ :current_binlog_file,
8
+ ]
9
+
10
+ (MANDATORY_OPTS + OPTIONAL_OPTS).each do |opt|
11
+ attr_accessor opt
12
+ end
13
+
14
+ def initialize(opts)
15
+ missing_opts = MANDATORY_OPTS - opts.keys
16
+ unless (missing_opts.empty?)
17
+ raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
18
+ end
19
+
20
+ opts.each do |k, v|
21
+ self.instance_variable_set(:"@#{k}", v)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,82 @@
1
+ require_relative 'binlog_record_handler'
2
+
3
+ module Mysql
4
+ class DmlRecordHandler < BinlogRecordHandler
5
+ ROW = :row
6
+ INTEGER_TYPES = {'TINY' => 1,
7
+ 'SHORT' => 2,
8
+ 'INT24' => 3,
9
+ 'LONG' => 4,
10
+ 'LONGLONG' => 8
11
+ }
12
+ SIGNLESS_INTEGER_PREFIX = '0SL'
13
+
14
+ def process(record, type)
15
+ case type
16
+ when :write_rows
17
+ emit_insert(record)
18
+ when :delete_rows
19
+ emit_delete(record)
20
+ when :update_rows
21
+ emit_update(record)
22
+ else
23
+ raise "Invalid type:#{type}"
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def emit_insert(record)
30
+ emit_rows(:insert, record)
31
+ end
32
+
33
+ def emit_delete(record)
34
+ emit_rows(:delete, record)
35
+ end
36
+
37
+ def emit_update(record)
38
+ emit_rows(:update, record) do |row|
39
+ row.last # For update, row has two arrays (old and new values) Use new values
40
+ end
41
+ end
42
+
43
+ def emit_rows(type, record)
44
+ emit_record(type, record) do |table|
45
+ records = record["rows"].collect do |row|
46
+ row = yield(row) if block_given? # Give the caller a chance to generate the correct row
47
+ { ROW => convert_to_flydata_row_format(row) }
48
+ end
49
+ encode_signless_integer(records, record["columns"])
50
+ records
51
+ end
52
+ end
53
+
54
+ def convert_to_flydata_row_format(row)
55
+ row.each.with_index(1).inject({}) do |h, (v, i)|
56
+ if v.kind_of?(String)
57
+ v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
58
+ end
59
+ h[i.to_s] = v
60
+ h
61
+ end
62
+ end
63
+
64
+ def encode_signless_integer(records, column_types)
65
+ records.each do |record|
66
+ record[ROW].keys.each do |position|
67
+ index = position.to_i - 1
68
+ column_type = column_types[index]
69
+ if INTEGER_TYPES.keys.include?(column_type)
70
+ # It's a signless integer.
71
+ intval = record[ROW][position]
72
+ next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
73
+ width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
74
+ signless_val = SIGNLESS_INTEGER_PREFIX
75
+ signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
76
+ record[ROW][position] = signless_val
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,69 @@
1
+ require_relative '../../table_def'
2
+
3
+ module Mysql
4
+ class QueryParser
5
+ # Return hash object
6
+ def parse(query)
7
+ end
8
+ end
9
+
10
+ class AlterTableAddColumnParser < QueryParser
11
+ # Return hash object or array
12
+ # {
13
+ # alter_action : "add_column",
14
+ # table_name : table_name,
15
+ # column : {
16
+ # name: column_name,
17
+ # type: column_type,
18
+ # not_null: [true|false],
19
+ # default: default_value,
20
+ # position: [first|last],
21
+ # after: column_name
22
+ # }
23
+ # }
24
+ def parse(query)
25
+ do_parse(query)
26
+ rescue
27
+ $log.error("Failed to parse query. query:'#{query}' error:#{$!}")
28
+ raise
29
+ end
30
+
31
+ private
32
+
33
+ def do_parse(query)
34
+ m = /^\s*alter\s+table\s+`?(?<table_name>[^\s]+)`?\s+add\s+column\s+/i.match(query)
35
+ table_name = m['table_name']
36
+ query = query[m[0].length..-1]
37
+
38
+ columns = if query[0] == '('
39
+ query = query[1..-2] # delete parenthesis (...,...,...)
40
+ parse_multiple_columns(query)
41
+ else
42
+ [parse_one_column(query)]
43
+ end
44
+ columns.collect do |col|
45
+ {
46
+ subtype: :add_column,
47
+ table_name: table_name,
48
+ column: col
49
+ }
50
+ end
51
+ end
52
+
53
+ def parse_one_column(query)
54
+ Flydata::TableDef::MysqlTableDef.parse_one_column_def(query) do |column, query, pos|
55
+ option_str = " " + query[pos..-1]
56
+ if /\sFIRST/i.match(option_str)
57
+ column[:position] = :first
58
+ elsif /\sAFTER\s+`?([^`]+)`?/i.match(option_str)
59
+ column[:after] = $1
60
+ end
61
+ column
62
+ end
63
+ end
64
+
65
+ def parse_multiple_columns(query)
66
+ raise "Not supported to add multiple columns"
67
+ end
68
+ end
69
+ end
@@ -2,26 +2,16 @@ module Flydata
2
2
  module FileUtil
3
3
  class SyncFileManager
4
4
  DUMP_DIR = ENV['FLYDATA_DUMP'] || File.join(FLYDATA_HOME, 'dump')
5
+ BACKUP_DIR = ENV['FLYDATA_BACKUP'] || File.join(FLYDATA_HOME, 'backup')
5
6
  TABLE_POSITIONS_DIR = ENV['FLYDATA_TABLE_POSITIONS'] || File.join(FLYDATA_HOME, 'positions')
6
7
  def initialize(data_entry)
7
8
  @data_entry = data_entry
8
9
  end
9
10
 
10
11
  def dump_file_path
11
- dump_dir = @data_entry['mysql_data_entry_preference']['mysqldump_dir']
12
- if dump_dir
13
- dump_dir = dump_dir.dup
14
- dump_dir[0] = ENV['HOME'] if dump_dir.match(/^~$|^~\//)
15
- else
16
- dump_dir = DUMP_DIR.dup
17
- end
18
- if File.exists?(dump_dir) and not Dir.exists?(dump_dir)
19
- raise "'mysqldump_dir'(#{dump_dir}) must be a directory."
20
- end
21
- FileUtils.mkdir_p(dump_dir) unless Dir.exists?(dump_dir)
22
12
  File.join(dump_dir, @data_entry['name']) + ".dump"
23
13
  end
24
-
14
+
25
15
  # dump pos file for resume
26
16
  def dump_pos_path
27
17
  dump_file_path + ".pos"
@@ -67,6 +57,13 @@ module Flydata
67
57
  File.join(FLYDATA_HOME, @data_entry['name'] + ".binlog.pos")
68
58
  end
69
59
 
60
+ def reset_table_position_files(tables)
61
+ tables.each do |table_name|
62
+ file = File.join(table_positions_dir_path, table_name + ".pos")
63
+ File.open(file, "w") {|f| f.write('0') }
64
+ end
65
+ end
66
+
70
67
  def table_positions_dir_path
71
68
  TABLE_POSITIONS_DIR
72
69
  end
@@ -99,7 +96,97 @@ module Flydata
99
96
  retry
100
97
  end
101
98
  end
99
+
100
+ def sync_info_file
101
+ File.join(dump_dir, "sync.info")
102
+ end
103
+
104
+ def save_sync_info(initial_sync, tables)
105
+ File.open(sync_info_file, "w") do |f|
106
+ f.write([initial_sync, tables].join("\t"))
107
+ end
108
+ end
109
+
110
+ def load_sync_info
111
+ return nil unless File.exists?(sync_info_file)
112
+ items = File.open(sync_info_file, 'r').readline.split("\t")
113
+ { initial_sync: (items[0] == 'true'),
114
+ tables: items[1] }
115
+ end
116
+
117
+ def get_table_binlog_pos(table_name)
118
+ file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
119
+ return nil unless File.exists?(file)
120
+ File.open(file, 'r').readline
121
+ end
122
+
123
+ def table_rev_file_path(table_name)
124
+ File.join(table_positions_dir_path, table_name + ".rev")
125
+ end
126
+
127
+ def table_rev_file_paths
128
+ Dir.glob(File.join(table_positions_dir_path, "*.rev"))
129
+ end
130
+
131
+ def table_rev(table_name)
132
+ file = table_rev_file_path(table_name)
133
+ return 1 unless File.exists?(file) #default revision is 1
134
+ File.open(file, "r+") do |f|
135
+ seq = f.read
136
+ if seq.empty?
137
+ return 1
138
+ else
139
+ return seq.to_i
140
+ end
141
+ end
142
+ end
143
+
144
+ def increment_table_rev(table_name, base_rev)
145
+ file = table_rev_file_path(table_name)
146
+ new_rev = base_rev + 1
147
+ File.open(file, "w") do |f|
148
+ f.write(new_rev)
149
+ end
150
+ new_rev
151
+ end
102
152
 
153
+ def delete_table_binlog_pos(table_name)
154
+ file = File.join(table_positions_dir_path, table_name + ".binlog.pos")
155
+ if File.exists?(file)
156
+ FileUtils.rm(file, :force => true)
157
+ else
158
+ puts "#{file} does not exist. Something is wrong. Did you delete the file manually when flydata was running?"
159
+ end
160
+ end
161
+
162
+ def save_table_binlog_pos(tables, binlog_pos)
163
+ tables.split(" ").each do |table_name|
164
+ file = File.join(dump_dir, table_name + ".binlog.pos")
165
+ File.open(file, "w") do |f|
166
+ f.write(binlog_content(binlog_pos))
167
+ end
168
+ end
169
+ end
170
+
171
+ def move_table_binlog_files(tables)
172
+ FileUtils.mkdir_p(table_positions_dir_path) unless Dir.exists?(table_positions_dir_path)
173
+ tables.each do |table_name|
174
+ file = File.join(dump_dir, table_name + ".binlog.pos")
175
+ if ! File.exists?(file)
176
+ raise "#{file} does not exist. Error!!"
177
+ end
178
+ FileUtils.mv(file, table_positions_dir_path)
179
+ end
180
+ end
181
+
182
+ def backup_dump_dir
183
+ backup_dir = BACKUP_DIR.dup
184
+ FileUtils.mkdir_p(backup_dir) unless Dir.exists?(backup_dir)
185
+ dest_dir = File.join(backup_dir, Time.now.strftime("%Y%m%d%H%M%S"))
186
+ FileUtils.mkdir(dest_dir)
187
+ FileUtils.mv(dump_dir, dest_dir)
188
+ end
189
+
103
190
  private
104
191
 
105
192
  def dump_pos_content(status, table_name, last_pos, binlog_pos, state = nil, substate = nil)
@@ -115,6 +202,26 @@ module Flydata
115
202
  return nil unless File.exists?(path)
116
203
  Marshal.load(File.open(path, 'r'))
117
204
  end
205
+
206
+ def dump_dir
207
+ pref = @data_entry['mysql_data_entry_preference']
208
+ dump_dir = if pref and pref['mysqldump_dir']
209
+ pref['mysqldump_dir']
210
+ else
211
+ nil
212
+ end
213
+ if dump_dir
214
+ dump_dir = dump_dir.dup
215
+ dump_dir[0] = ENV['HOME'] if dump_dir.match(/^~$|^~\//)
216
+ else
217
+ dump_dir = DUMP_DIR.dup
218
+ end
219
+ if File.exists?(dump_dir) and not Dir.exists?(dump_dir)
220
+ raise "'mysqldump_dir'(#{dump_dir}) must be a directory."
221
+ end
222
+ FileUtils.mkdir_p(dump_dir) unless Dir.exists?(dump_dir)
223
+ dump_dir
224
+ end
118
225
  end
119
226
  end
120
227
  end