flydata 0.0.5.6 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +8 -0
- data/Gemfile.lock +36 -1
- data/VERSION +1 -1
- data/bin/fdmysqldump +59 -0
- data/flydata.gemspec +49 -5
- data/lib/flydata.rb +3 -1
- data/lib/flydata/api/data_entry.rb +4 -0
- data/lib/flydata/api/redshift_cluster.rb +15 -0
- data/lib/flydata/cli.rb +1 -0
- data/lib/flydata/command/base.rb +8 -2
- data/lib/flydata/command/conf.rb +48 -0
- data/lib/flydata/command/encrypt.rb +18 -0
- data/lib/flydata/command/sender.rb +10 -3
- data/lib/flydata/command/setlogdel.rb +1 -1
- data/lib/flydata/command/setup.rb +26 -3
- data/lib/flydata/command/sync.rb +962 -0
- data/lib/flydata/command/version.rb +10 -0
- data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +305 -0
- data/lib/flydata/fluent-plugins/out_forward_ssl.rb +91 -0
- data/lib/flydata/fluent-plugins/preference.rb +92 -0
- data/lib/flydata/helpers.rb +13 -1
- data/lib/flydata/preference/data_entry_preference.rb +98 -0
- data/lib/flydata/sync_file_manager.rb +120 -0
- data/lib/flydata/table_def.rb +2 -0
- data/lib/flydata/table_def/mysql_table_def.rb +128 -0
- data/lib/flydata/table_def/redshift_table_def.rb +144 -0
- data/lib/flydata/util/encryptor.rb +53 -0
- data/spec/fluent_plugins_spec_helper.rb +19 -0
- data/spec/flydata/command/sender_spec.rb +3 -29
- data/spec/flydata/command/sync_spec.rb +1049 -0
- data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +204 -0
- data/spec/flydata/util/encryptor_spec.rb +96 -0
- data/spec/spec_helper.rb +1 -0
- data/tmpl/redshift_mysql_data_entry.conf.tmpl +11 -0
- metadata +153 -4
@@ -0,0 +1,305 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
require 'fluent/plugin/in_mysql_binlog'
|
4
|
+
require 'binlog'
|
5
|
+
require 'kodama'
|
6
|
+
require File.dirname(__FILE__) + '/preference'
|
7
|
+
|
8
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../../flydata'))
|
9
|
+
require 'flydata/sync_file_manager'
|
10
|
+
|
11
|
+
class MysqlBinlogFlydataInput < MysqlBinlogInput
|
12
|
+
include MysqlBinlogFlydataInputPreference
|
13
|
+
Plugin.register_input('mysql_binlog_flydata', self)
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
config_param :database, :string
|
20
|
+
config_param :tables, :string
|
21
|
+
|
22
|
+
def configure(conf)
|
23
|
+
super
|
24
|
+
unless File.exists?(@position_file)
|
25
|
+
raise "No position file(#{@position_file}). Initial synchronization is required before starting."
|
26
|
+
end
|
27
|
+
load_custom_conf
|
28
|
+
$log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\""
|
29
|
+
@tables = @tables.split(/,\s*/)
|
30
|
+
@sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
|
31
|
+
@record_handler = FlydataMysqlBinlogRecordHandler.new(
|
32
|
+
database: @database,
|
33
|
+
tables: @tables,
|
34
|
+
tag: @tag,
|
35
|
+
sync_fm: @sync_fm)
|
36
|
+
end
|
37
|
+
|
38
|
+
def start
|
39
|
+
super
|
40
|
+
positions_path = @sync_fm.table_positions_dir_path
|
41
|
+
Dir.mkdir positions_path unless File.exists? positions_path
|
42
|
+
end
|
43
|
+
|
44
|
+
def event_listener(event)
|
45
|
+
begin
|
46
|
+
@record_handler.dispatch(event)
|
47
|
+
rescue Exception => e
|
48
|
+
position = File.open(@position_file) {|f| f.read }
|
49
|
+
$log.error "error occured while processing #{event.event_type} event at #{position}"
|
50
|
+
$log.error e.message
|
51
|
+
$log.error e.backtrace.join("\n")
|
52
|
+
# Not reraising a StandardError because the underlying code can't handle an error well.
|
53
|
+
raise unless e.kind_of?(StandardError)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
class MysqlBinlogRecordHandler
|
60
|
+
def dispatch(event)
|
61
|
+
method_name = "on_#{event.event_type.downcase}"
|
62
|
+
if self.respond_to?(method_name)
|
63
|
+
# TODO to_hash method call below can fail if event.event_type is
|
64
|
+
# "Update_rows". This seems to be a bug of ruby-binlog. The bug must
|
65
|
+
# be fixed when we support record update.
|
66
|
+
record = MysqlBinlogInput::BinlogUtil.to_hash(event)
|
67
|
+
self.send(method_name, record)
|
68
|
+
else
|
69
|
+
# $log.trace "Unhandled type: #{record["event_type"]}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class FlydataMysqlBinlogRecordHandler < MysqlBinlogRecordHandler
|
75
|
+
TABLE_NAME = 'table_name' # A Flydata JSON tag to specify a table name
|
76
|
+
TYPE = 'type'
|
77
|
+
ROW = 'row'
|
78
|
+
SEQ = 'seq'
|
79
|
+
RESPECT_ORDER = 'respect_order'
|
80
|
+
INTEGER_TYPES = {'TINY' => 1,
|
81
|
+
'SHORT' => 2,
|
82
|
+
'INT24' => 3,
|
83
|
+
'LONG' => 4,
|
84
|
+
'LONGLONG' => 8
|
85
|
+
}
|
86
|
+
SIGNLESS_INTEGER_PREFIX = '0SL'
|
87
|
+
|
88
|
+
def initialize(opts)
|
89
|
+
mandatory_opts = [:database, :tables, :tag, :sync_fm]
|
90
|
+
|
91
|
+
missing_opts = mandatory_opts - opts.keys
|
92
|
+
unless (missing_opts.empty?)
|
93
|
+
raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
|
94
|
+
end
|
95
|
+
@database = opts[:database]
|
96
|
+
@tables = opts[:tables]
|
97
|
+
@tag = opts[:tag]
|
98
|
+
@sync_fm = opts[:sync_fm]
|
99
|
+
|
100
|
+
@query_handler = FlydataMysqlBinlogQueryHandler.new(record_handler: self)
|
101
|
+
end
|
102
|
+
|
103
|
+
def on_write_rows(record)
|
104
|
+
emit_insert(record)
|
105
|
+
end
|
106
|
+
|
107
|
+
def on_update_rows(record)
|
108
|
+
emit_update(record)
|
109
|
+
end
|
110
|
+
|
111
|
+
def on_delete_rows(record)
|
112
|
+
emit_delete(record)
|
113
|
+
end
|
114
|
+
|
115
|
+
def on_query(record)
|
116
|
+
@query_handler.dispatch(record)
|
117
|
+
end
|
118
|
+
|
119
|
+
def on_table_changed(table)
|
120
|
+
$log.trace "Table #{table} has changed. Reloading the table column"
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def acceptable?(record)
|
126
|
+
(@database == record["db_name"]) and @tables.include?(record["table_name"])
|
127
|
+
end
|
128
|
+
|
129
|
+
def emit_insert(record)
|
130
|
+
emit_record(:insert, record)
|
131
|
+
end
|
132
|
+
|
133
|
+
def emit_delete(record)
|
134
|
+
emit_record(:delete, record)
|
135
|
+
end
|
136
|
+
|
137
|
+
def emit_update(record)
|
138
|
+
emit_record(:update, record) do |row|
|
139
|
+
row.last # For update, row has two arrays (old and new values) Use new values
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def emit_record(type, record)
|
144
|
+
return unless acceptable?(record)
|
145
|
+
|
146
|
+
table = record['table_name']
|
147
|
+
|
148
|
+
records = record["rows"].collect do |row|
|
149
|
+
row = yield(row) if block_given? # Give the caller a chance to generate the correct row
|
150
|
+
{ TYPE => type, TABLE_NAME => table,
|
151
|
+
RESPECT_ORDER => true, # Continuous sync needs record order to be kept
|
152
|
+
ROW => row.each.with_index(1).inject({}) do |h, (v, i)|
|
153
|
+
if v.kind_of?(String)
|
154
|
+
v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
|
155
|
+
end
|
156
|
+
h[i.to_s] = v
|
157
|
+
h
|
158
|
+
end
|
159
|
+
}
|
160
|
+
end
|
161
|
+
|
162
|
+
encode_signless_integer(records, record["columns"])
|
163
|
+
|
164
|
+
# Use binlog's timestamp
|
165
|
+
timestamp = record["timestamp"].to_i
|
166
|
+
records.each do |row|
|
167
|
+
@sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
|
168
|
+
row[SEQ] = seq
|
169
|
+
Engine.emit(@tag, timestamp, row)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
def encode_signless_integer(records, column_types)
|
176
|
+
records.each do |record|
|
177
|
+
record[ROW].keys.each do |position|
|
178
|
+
index = position.to_i - 1
|
179
|
+
column_type = column_types[index]
|
180
|
+
if INTEGER_TYPES.keys.include?(column_type)
|
181
|
+
# It's a signless integer.
|
182
|
+
intval = record[ROW][position]
|
183
|
+
next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
|
184
|
+
width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
|
185
|
+
signless_val = SIGNLESS_INTEGER_PREFIX
|
186
|
+
signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
|
187
|
+
record[ROW][position] = signless_val
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
class MysqlBinlogQueryHandler
|
195
|
+
def initialize
|
196
|
+
@mapping_table = []
|
197
|
+
end
|
198
|
+
|
199
|
+
def dispatch(record)
|
200
|
+
@mapping_table.each do |pattern, method_name|
|
201
|
+
query = normalize_query(record["query"])
|
202
|
+
if (pattern.match(query))
|
203
|
+
if (self.respond_to?(method_name))
|
204
|
+
self.send(method_name, record, query)
|
205
|
+
else
|
206
|
+
raise "method '#{method_name}' is not defined in #{self.class.name} although its matching pattern is defined"
|
207
|
+
end
|
208
|
+
break
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
private
|
214
|
+
def normalize_query(query)
|
215
|
+
query = strip_comments(query)
|
216
|
+
end
|
217
|
+
|
218
|
+
def strip_comments(query)
|
219
|
+
query = query.gsub(/--\s.*\n/, ' ') # -- style comments
|
220
|
+
query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
|
221
|
+
query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
class FlydataMysqlBinlogQueryHandler < MysqlBinlogQueryHandler
|
226
|
+
def initialize(opts)
|
227
|
+
mandatory_opts = [:record_handler]
|
228
|
+
missing_opts = mandatory_opts - opts.keys
|
229
|
+
unless missing_opts.empty?
|
230
|
+
raise "mandatory options are missing: #{missing_opts.join(", ")}"
|
231
|
+
end
|
232
|
+
@opts = opts
|
233
|
+
|
234
|
+
@mapping_table = [
|
235
|
+
[/^alter table/i, :on_alter_table],
|
236
|
+
]
|
237
|
+
end
|
238
|
+
|
239
|
+
def on_alter_table(record, query)
|
240
|
+
m = /alter table\s+(?<table>[^\s]+)/i.match(query)
|
241
|
+
if m.nil?
|
242
|
+
raise "This alter table query has no table name? '#{query}'"
|
243
|
+
end
|
244
|
+
|
245
|
+
@opts[:record_handler].on_table_changed(m[:table])
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
# HACK
|
252
|
+
# Monkey patch the class to manage string's character encoding.
|
253
|
+
module Binlog
|
254
|
+
|
255
|
+
class Client
|
256
|
+
old_method = instance_method(:wait_for_next_event)
|
257
|
+
|
258
|
+
define_method(:wait_for_next_event) do
|
259
|
+
event = old_method.bind(self).()
|
260
|
+
if (event.kind_of?(Binlog::RowEvent))
|
261
|
+
class << event
|
262
|
+
def rows
|
263
|
+
rs = super
|
264
|
+
# HACK
|
265
|
+
# Assuming all string values are UTF-8
|
266
|
+
# To make this right, MySQL client's encoding must be set to UTF-8
|
267
|
+
# But how?
|
268
|
+
new_rs = rs.collect {|row|
|
269
|
+
row.collect{|value|
|
270
|
+
if (value.kind_of?(Array))
|
271
|
+
# Update has two rows in it
|
272
|
+
value.collect{|val| val.force_encoding("UTF-8") if val.respond_to?(:force_encoding); val}
|
273
|
+
else
|
274
|
+
value.force_encoding("UTF-8") if value.respond_to?(:force_encoding); value
|
275
|
+
end
|
276
|
+
}
|
277
|
+
}
|
278
|
+
new_rs
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
event
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
|
288
|
+
# HACK
|
289
|
+
# Monkey patch so that we can replace Kodama's logger
|
290
|
+
module Kodama
|
291
|
+
|
292
|
+
Client.class_eval do
|
293
|
+
attr_accessor :logger
|
294
|
+
|
295
|
+
# Except for replacing logger, the implementation is a copy of the original
|
296
|
+
# method.
|
297
|
+
def self.start(options = {}, &block)
|
298
|
+
client = self.new(mysql_url(options))
|
299
|
+
client.logger = $log
|
300
|
+
block.call(client)
|
301
|
+
client.start
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
|
4
|
+
class ForwardSslOutput < ForwardOutput
|
5
|
+
Plugin.register_output('forward_ssl', self)
|
6
|
+
|
7
|
+
def configure(conf)
|
8
|
+
super
|
9
|
+
conf.elements.each do |e|
|
10
|
+
if e['ssl_port']
|
11
|
+
node = @nodes.find {|n| n.host == e['host'] }
|
12
|
+
node.set_ssl_port(e['ssl_port']) if node
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def send_data(node, tag, chunk)
|
17
|
+
sock = connect_ssl(node)
|
18
|
+
begin
|
19
|
+
# Copy of ForwardOutput send_data
|
20
|
+
# beginArray(2)
|
21
|
+
sock.write FORWARD_HEADER
|
22
|
+
|
23
|
+
# writeRaw(tag)
|
24
|
+
sock.write tag.to_msgpack # tag
|
25
|
+
|
26
|
+
# beginRaw(size)
|
27
|
+
sz = chunk.size
|
28
|
+
#if sz < 32
|
29
|
+
# # FixRaw
|
30
|
+
# sock.write [0xa0 | sz].pack('C')
|
31
|
+
#elsif sz < 65536
|
32
|
+
# # raw 16
|
33
|
+
# sock.write [0xda, sz].pack('Cn')
|
34
|
+
#else
|
35
|
+
# raw 32
|
36
|
+
sock.write [0xdb, sz].pack('CN')
|
37
|
+
#end
|
38
|
+
|
39
|
+
# writeRawBody(packed_es)
|
40
|
+
chunk.write_to(sock)
|
41
|
+
|
42
|
+
node.heartbeat(false)
|
43
|
+
ensure
|
44
|
+
sock.close
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def connect(node)
|
49
|
+
tcp_sock = TCPSocket.new(node.resolved_host, node.port)
|
50
|
+
set_tcp_sock_opts(tcp_sock)
|
51
|
+
tcp_sock
|
52
|
+
end
|
53
|
+
|
54
|
+
def connect_ssl(node)
|
55
|
+
tcp_sock = TCPSocket.new(node.resolved_host, node.ssl_port)
|
56
|
+
set_tcp_sock_opts(tcp_sock)
|
57
|
+
ssl_ctx = ssl_ctx_with_verification
|
58
|
+
ssl_sock = OpenSSL::SSL::SSLSocket.new(tcp_sock, ssl_ctx)
|
59
|
+
ssl_sock.sync_close = true
|
60
|
+
ssl_sock.connect
|
61
|
+
ssl_sock
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
def ssl_ctx_with_verification
|
66
|
+
cert_store = OpenSSL::X509::Store.new
|
67
|
+
cert_store.set_default_paths
|
68
|
+
ssl_ctx = OpenSSL::SSL::SSLContext.new
|
69
|
+
ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
70
|
+
ssl_ctx.cert_store = cert_store
|
71
|
+
ssl_ctx
|
72
|
+
end
|
73
|
+
|
74
|
+
def set_tcp_sock_opts(tcp_sock)
|
75
|
+
opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
76
|
+
tcp_sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
77
|
+
opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
78
|
+
tcp_sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
79
|
+
end
|
80
|
+
|
81
|
+
class ForwardOutput::Node
|
82
|
+
attr_reader :ssl_port
|
83
|
+
def set_ssl_port(ssl_port)
|
84
|
+
@ssl_port = ssl_port
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require_relative '../util/encryptor'
|
3
|
+
|
4
|
+
module Fluent
|
5
|
+
module DataEntryPreferenceConfigurable
|
6
|
+
@@supported_custom_confs = Hash.new{|h,k| h[k] = {}}
|
7
|
+
|
8
|
+
def self.included(base)
|
9
|
+
base.extend ClassMethods
|
10
|
+
base.class_eval do
|
11
|
+
config_param :custom_conf_path, :string, default: nil
|
12
|
+
config_param :key, :string, default: nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def load_custom_conf(file_path = @custom_conf_path)
|
17
|
+
custom_conf = if file_path and File.exists?(file_path)
|
18
|
+
YAML.load_file(file_path)
|
19
|
+
else
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
@@supported_custom_confs.each do |type, settings|
|
23
|
+
settings.each do |key, option|
|
24
|
+
apply_custom_conf(custom_conf, key, type, option) if custom_conf
|
25
|
+
apply_custom_option(key, option)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def apply_custom_conf(conf, key, type, option)
|
31
|
+
if conf[type.to_s] and value = conf[type.to_s][key.to_s]
|
32
|
+
var_name = option[:var_name] || key
|
33
|
+
instance_variable_set(:"@#{var_name}", value)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def apply_custom_option(key, option)
|
38
|
+
var_name = option[:var_name] || key
|
39
|
+
original_value = instance_variable_get(:"@#{var_name}")
|
40
|
+
value = original_value
|
41
|
+
option.each do |option_name, option_value|
|
42
|
+
value = Fluent::DataEntryPreferenceConfigurable.replace_value_with_option(
|
43
|
+
key, value, option_name, option_value, key: @key)
|
44
|
+
end
|
45
|
+
if original_value != value
|
46
|
+
instance_variable_set(:"@#{var_name}", value)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
module ClassMethods
|
51
|
+
def custom_config_param(key, type, option = {})
|
52
|
+
conf = class_variable_get(:@@supported_custom_confs)
|
53
|
+
conf[type.to_sym][key.to_sym] = option
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.replace_value_with_option(param_name, param_value, option_name, option_value, opts = {})
|
58
|
+
ret = param_value
|
59
|
+
case option_name
|
60
|
+
when :encrypted
|
61
|
+
if option_value
|
62
|
+
ret = Flydata::Util::Encryptor.decrypt(
|
63
|
+
param_value, opts[:key], param_name)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
ret
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
module MysqlBinlogFlydataInputPreference
|
71
|
+
CUSTOM_CONFIG_PARAMS = {
|
72
|
+
mysql_data_entry_preference: {
|
73
|
+
database: {},
|
74
|
+
tables: {},
|
75
|
+
host: {},
|
76
|
+
username: {},
|
77
|
+
password: {encrypted: true},
|
78
|
+
},
|
79
|
+
}
|
80
|
+
|
81
|
+
def self.included(base)
|
82
|
+
base.class_eval do
|
83
|
+
include DataEntryPreferenceConfigurable
|
84
|
+
CUSTOM_CONFIG_PARAMS.each do |type, custom_conf|
|
85
|
+
custom_conf.each do |key, option|
|
86
|
+
custom_config_param key, type, option
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|