flydata 0.0.5.6 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +8 -0
- data/Gemfile.lock +36 -1
- data/VERSION +1 -1
- data/bin/fdmysqldump +59 -0
- data/flydata.gemspec +49 -5
- data/lib/flydata.rb +3 -1
- data/lib/flydata/api/data_entry.rb +4 -0
- data/lib/flydata/api/redshift_cluster.rb +15 -0
- data/lib/flydata/cli.rb +1 -0
- data/lib/flydata/command/base.rb +8 -2
- data/lib/flydata/command/conf.rb +48 -0
- data/lib/flydata/command/encrypt.rb +18 -0
- data/lib/flydata/command/sender.rb +10 -3
- data/lib/flydata/command/setlogdel.rb +1 -1
- data/lib/flydata/command/setup.rb +26 -3
- data/lib/flydata/command/sync.rb +962 -0
- data/lib/flydata/command/version.rb +10 -0
- data/lib/flydata/fluent-plugins/in_mysql_binlog_flydata.rb +305 -0
- data/lib/flydata/fluent-plugins/out_forward_ssl.rb +91 -0
- data/lib/flydata/fluent-plugins/preference.rb +92 -0
- data/lib/flydata/helpers.rb +13 -1
- data/lib/flydata/preference/data_entry_preference.rb +98 -0
- data/lib/flydata/sync_file_manager.rb +120 -0
- data/lib/flydata/table_def.rb +2 -0
- data/lib/flydata/table_def/mysql_table_def.rb +128 -0
- data/lib/flydata/table_def/redshift_table_def.rb +144 -0
- data/lib/flydata/util/encryptor.rb +53 -0
- data/spec/fluent_plugins_spec_helper.rb +19 -0
- data/spec/flydata/command/sender_spec.rb +3 -29
- data/spec/flydata/command/sync_spec.rb +1049 -0
- data/spec/flydata/fluent-plugins/in_mysql_binlog_flydata_spec.rb +204 -0
- data/spec/flydata/util/encryptor_spec.rb +96 -0
- data/spec/spec_helper.rb +1 -0
- data/tmpl/redshift_mysql_data_entry.conf.tmpl +11 -0
- metadata +153 -4
@@ -0,0 +1,305 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
require 'fluent/plugin/in_mysql_binlog'
|
4
|
+
require 'binlog'
|
5
|
+
require 'kodama'
|
6
|
+
require File.dirname(__FILE__) + '/preference'
|
7
|
+
|
8
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../../flydata'))
|
9
|
+
require 'flydata/sync_file_manager'
|
10
|
+
|
11
|
+
class MysqlBinlogFlydataInput < MysqlBinlogInput
|
12
|
+
include MysqlBinlogFlydataInputPreference
|
13
|
+
Plugin.register_input('mysql_binlog_flydata', self)
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
config_param :database, :string
|
20
|
+
config_param :tables, :string
|
21
|
+
|
22
|
+
def configure(conf)
|
23
|
+
super
|
24
|
+
unless File.exists?(@position_file)
|
25
|
+
raise "No position file(#{@position_file}). Initial synchronization is required before starting."
|
26
|
+
end
|
27
|
+
load_custom_conf
|
28
|
+
$log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\""
|
29
|
+
@tables = @tables.split(/,\s*/)
|
30
|
+
@sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
|
31
|
+
@record_handler = FlydataMysqlBinlogRecordHandler.new(
|
32
|
+
database: @database,
|
33
|
+
tables: @tables,
|
34
|
+
tag: @tag,
|
35
|
+
sync_fm: @sync_fm)
|
36
|
+
end
|
37
|
+
|
38
|
+
def start
|
39
|
+
super
|
40
|
+
positions_path = @sync_fm.table_positions_dir_path
|
41
|
+
Dir.mkdir positions_path unless File.exists? positions_path
|
42
|
+
end
|
43
|
+
|
44
|
+
def event_listener(event)
|
45
|
+
begin
|
46
|
+
@record_handler.dispatch(event)
|
47
|
+
rescue Exception => e
|
48
|
+
position = File.open(@position_file) {|f| f.read }
|
49
|
+
$log.error "error occured while processing #{event.event_type} event at #{position}"
|
50
|
+
$log.error e.message
|
51
|
+
$log.error e.backtrace.join("\n")
|
52
|
+
# Not reraising a StandardError because the underlying code can't handle an error well.
|
53
|
+
raise unless e.kind_of?(StandardError)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
class MysqlBinlogRecordHandler
|
60
|
+
def dispatch(event)
|
61
|
+
method_name = "on_#{event.event_type.downcase}"
|
62
|
+
if self.respond_to?(method_name)
|
63
|
+
# TODO to_hash method call below can fail if event.event_type is
|
64
|
+
# "Update_rows". This seems to be a bug of ruby-binlog. The bug must
|
65
|
+
# be fixed when we support record update.
|
66
|
+
record = MysqlBinlogInput::BinlogUtil.to_hash(event)
|
67
|
+
self.send(method_name, record)
|
68
|
+
else
|
69
|
+
# $log.trace "Unhandled type: #{record["event_type"]}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class FlydataMysqlBinlogRecordHandler < MysqlBinlogRecordHandler
|
75
|
+
TABLE_NAME = 'table_name' # A Flydata JSON tag to specify a table name
|
76
|
+
TYPE = 'type'
|
77
|
+
ROW = 'row'
|
78
|
+
SEQ = 'seq'
|
79
|
+
RESPECT_ORDER = 'respect_order'
|
80
|
+
INTEGER_TYPES = {'TINY' => 1,
|
81
|
+
'SHORT' => 2,
|
82
|
+
'INT24' => 3,
|
83
|
+
'LONG' => 4,
|
84
|
+
'LONGLONG' => 8
|
85
|
+
}
|
86
|
+
SIGNLESS_INTEGER_PREFIX = '0SL'
|
87
|
+
|
88
|
+
def initialize(opts)
|
89
|
+
mandatory_opts = [:database, :tables, :tag, :sync_fm]
|
90
|
+
|
91
|
+
missing_opts = mandatory_opts - opts.keys
|
92
|
+
unless (missing_opts.empty?)
|
93
|
+
raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
|
94
|
+
end
|
95
|
+
@database = opts[:database]
|
96
|
+
@tables = opts[:tables]
|
97
|
+
@tag = opts[:tag]
|
98
|
+
@sync_fm = opts[:sync_fm]
|
99
|
+
|
100
|
+
@query_handler = FlydataMysqlBinlogQueryHandler.new(record_handler: self)
|
101
|
+
end
|
102
|
+
|
103
|
+
def on_write_rows(record)
|
104
|
+
emit_insert(record)
|
105
|
+
end
|
106
|
+
|
107
|
+
def on_update_rows(record)
|
108
|
+
emit_update(record)
|
109
|
+
end
|
110
|
+
|
111
|
+
def on_delete_rows(record)
|
112
|
+
emit_delete(record)
|
113
|
+
end
|
114
|
+
|
115
|
+
def on_query(record)
|
116
|
+
@query_handler.dispatch(record)
|
117
|
+
end
|
118
|
+
|
119
|
+
def on_table_changed(table)
|
120
|
+
$log.trace "Table #{table} has changed. Reloading the table column"
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def acceptable?(record)
|
126
|
+
(@database == record["db_name"]) and @tables.include?(record["table_name"])
|
127
|
+
end
|
128
|
+
|
129
|
+
def emit_insert(record)
|
130
|
+
emit_record(:insert, record)
|
131
|
+
end
|
132
|
+
|
133
|
+
def emit_delete(record)
|
134
|
+
emit_record(:delete, record)
|
135
|
+
end
|
136
|
+
|
137
|
+
def emit_update(record)
|
138
|
+
emit_record(:update, record) do |row|
|
139
|
+
row.last # For update, row has two arrays (old and new values) Use new values
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def emit_record(type, record)
|
144
|
+
return unless acceptable?(record)
|
145
|
+
|
146
|
+
table = record['table_name']
|
147
|
+
|
148
|
+
records = record["rows"].collect do |row|
|
149
|
+
row = yield(row) if block_given? # Give the caller a chance to generate the correct row
|
150
|
+
{ TYPE => type, TABLE_NAME => table,
|
151
|
+
RESPECT_ORDER => true, # Continuous sync needs record order to be kept
|
152
|
+
ROW => row.each.with_index(1).inject({}) do |h, (v, i)|
|
153
|
+
if v.kind_of?(String)
|
154
|
+
v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
|
155
|
+
end
|
156
|
+
h[i.to_s] = v
|
157
|
+
h
|
158
|
+
end
|
159
|
+
}
|
160
|
+
end
|
161
|
+
|
162
|
+
encode_signless_integer(records, record["columns"])
|
163
|
+
|
164
|
+
# Use binlog's timestamp
|
165
|
+
timestamp = record["timestamp"].to_i
|
166
|
+
records.each do |row|
|
167
|
+
@sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
|
168
|
+
row[SEQ] = seq
|
169
|
+
Engine.emit(@tag, timestamp, row)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
def encode_signless_integer(records, column_types)
|
176
|
+
records.each do |record|
|
177
|
+
record[ROW].keys.each do |position|
|
178
|
+
index = position.to_i - 1
|
179
|
+
column_type = column_types[index]
|
180
|
+
if INTEGER_TYPES.keys.include?(column_type)
|
181
|
+
# It's a signless integer.
|
182
|
+
intval = record[ROW][position]
|
183
|
+
next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
|
184
|
+
width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
|
185
|
+
signless_val = SIGNLESS_INTEGER_PREFIX
|
186
|
+
signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
|
187
|
+
record[ROW][position] = signless_val
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
class MysqlBinlogQueryHandler
|
195
|
+
def initialize
|
196
|
+
@mapping_table = []
|
197
|
+
end
|
198
|
+
|
199
|
+
def dispatch(record)
|
200
|
+
@mapping_table.each do |pattern, method_name|
|
201
|
+
query = normalize_query(record["query"])
|
202
|
+
if (pattern.match(query))
|
203
|
+
if (self.respond_to?(method_name))
|
204
|
+
self.send(method_name, record, query)
|
205
|
+
else
|
206
|
+
raise "method '#{method_name}' is not defined in #{self.class.name} although its matching pattern is defined"
|
207
|
+
end
|
208
|
+
break
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
private
|
214
|
+
def normalize_query(query)
|
215
|
+
query = strip_comments(query)
|
216
|
+
end
|
217
|
+
|
218
|
+
def strip_comments(query)
|
219
|
+
query = query.gsub(/--\s.*\n/, ' ') # -- style comments
|
220
|
+
query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
|
221
|
+
query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
class FlydataMysqlBinlogQueryHandler < MysqlBinlogQueryHandler
|
226
|
+
def initialize(opts)
|
227
|
+
mandatory_opts = [:record_handler]
|
228
|
+
missing_opts = mandatory_opts - opts.keys
|
229
|
+
unless missing_opts.empty?
|
230
|
+
raise "mandatory options are missing: #{missing_opts.join(", ")}"
|
231
|
+
end
|
232
|
+
@opts = opts
|
233
|
+
|
234
|
+
@mapping_table = [
|
235
|
+
[/^alter table/i, :on_alter_table],
|
236
|
+
]
|
237
|
+
end
|
238
|
+
|
239
|
+
def on_alter_table(record, query)
|
240
|
+
m = /alter table\s+(?<table>[^\s]+)/i.match(query)
|
241
|
+
if m.nil?
|
242
|
+
raise "This alter table query has no table name? '#{query}'"
|
243
|
+
end
|
244
|
+
|
245
|
+
@opts[:record_handler].on_table_changed(m[:table])
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
# HACK
|
252
|
+
# Monkey patch the class to manage string's character encoding.
|
253
|
+
module Binlog
|
254
|
+
|
255
|
+
class Client
|
256
|
+
old_method = instance_method(:wait_for_next_event)
|
257
|
+
|
258
|
+
define_method(:wait_for_next_event) do
|
259
|
+
event = old_method.bind(self).()
|
260
|
+
if (event.kind_of?(Binlog::RowEvent))
|
261
|
+
class << event
|
262
|
+
def rows
|
263
|
+
rs = super
|
264
|
+
# HACK
|
265
|
+
# Assuming all string values are UTF-8
|
266
|
+
# To make this right, MySQL client's encoding must be set to UTF-8
|
267
|
+
# But how?
|
268
|
+
new_rs = rs.collect {|row|
|
269
|
+
row.collect{|value|
|
270
|
+
if (value.kind_of?(Array))
|
271
|
+
# Update has two rows in it
|
272
|
+
value.collect{|val| val.force_encoding("UTF-8") if val.respond_to?(:force_encoding); val}
|
273
|
+
else
|
274
|
+
value.force_encoding("UTF-8") if value.respond_to?(:force_encoding); value
|
275
|
+
end
|
276
|
+
}
|
277
|
+
}
|
278
|
+
new_rs
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
event
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
end
|
287
|
+
|
288
|
+
# HACK
|
289
|
+
# Monkey patch so that we can replace Kodama's logger
|
290
|
+
module Kodama
|
291
|
+
|
292
|
+
Client.class_eval do
|
293
|
+
attr_accessor :logger
|
294
|
+
|
295
|
+
# Except for replacing logger, the implementation is a copy of the original
|
296
|
+
# method.
|
297
|
+
def self.start(options = {}, &block)
|
298
|
+
client = self.new(mysql_url(options))
|
299
|
+
client.logger = $log
|
300
|
+
block.call(client)
|
301
|
+
client.start
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Fluent
|
2
|
+
|
3
|
+
|
4
|
+
class ForwardSslOutput < ForwardOutput
|
5
|
+
Plugin.register_output('forward_ssl', self)
|
6
|
+
|
7
|
+
def configure(conf)
|
8
|
+
super
|
9
|
+
conf.elements.each do |e|
|
10
|
+
if e['ssl_port']
|
11
|
+
node = @nodes.find {|n| n.host == e['host'] }
|
12
|
+
node.set_ssl_port(e['ssl_port']) if node
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def send_data(node, tag, chunk)
|
17
|
+
sock = connect_ssl(node)
|
18
|
+
begin
|
19
|
+
# Copy of ForwardOutput send_data
|
20
|
+
# beginArray(2)
|
21
|
+
sock.write FORWARD_HEADER
|
22
|
+
|
23
|
+
# writeRaw(tag)
|
24
|
+
sock.write tag.to_msgpack # tag
|
25
|
+
|
26
|
+
# beginRaw(size)
|
27
|
+
sz = chunk.size
|
28
|
+
#if sz < 32
|
29
|
+
# # FixRaw
|
30
|
+
# sock.write [0xa0 | sz].pack('C')
|
31
|
+
#elsif sz < 65536
|
32
|
+
# # raw 16
|
33
|
+
# sock.write [0xda, sz].pack('Cn')
|
34
|
+
#else
|
35
|
+
# raw 32
|
36
|
+
sock.write [0xdb, sz].pack('CN')
|
37
|
+
#end
|
38
|
+
|
39
|
+
# writeRawBody(packed_es)
|
40
|
+
chunk.write_to(sock)
|
41
|
+
|
42
|
+
node.heartbeat(false)
|
43
|
+
ensure
|
44
|
+
sock.close
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def connect(node)
|
49
|
+
tcp_sock = TCPSocket.new(node.resolved_host, node.port)
|
50
|
+
set_tcp_sock_opts(tcp_sock)
|
51
|
+
tcp_sock
|
52
|
+
end
|
53
|
+
|
54
|
+
def connect_ssl(node)
|
55
|
+
tcp_sock = TCPSocket.new(node.resolved_host, node.ssl_port)
|
56
|
+
set_tcp_sock_opts(tcp_sock)
|
57
|
+
ssl_ctx = ssl_ctx_with_verification
|
58
|
+
ssl_sock = OpenSSL::SSL::SSLSocket.new(tcp_sock, ssl_ctx)
|
59
|
+
ssl_sock.sync_close = true
|
60
|
+
ssl_sock.connect
|
61
|
+
ssl_sock
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
def ssl_ctx_with_verification
|
66
|
+
cert_store = OpenSSL::X509::Store.new
|
67
|
+
cert_store.set_default_paths
|
68
|
+
ssl_ctx = OpenSSL::SSL::SSLContext.new
|
69
|
+
ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
70
|
+
ssl_ctx.cert_store = cert_store
|
71
|
+
ssl_ctx
|
72
|
+
end
|
73
|
+
|
74
|
+
def set_tcp_sock_opts(tcp_sock)
|
75
|
+
opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
76
|
+
tcp_sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
77
|
+
opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
78
|
+
tcp_sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
79
|
+
end
|
80
|
+
|
81
|
+
class ForwardOutput::Node
|
82
|
+
attr_reader :ssl_port
|
83
|
+
def set_ssl_port(ssl_port)
|
84
|
+
@ssl_port = ssl_port
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require_relative '../util/encryptor'
|
3
|
+
|
4
|
+
module Fluent
|
5
|
+
module DataEntryPreferenceConfigurable
|
6
|
+
@@supported_custom_confs = Hash.new{|h,k| h[k] = {}}
|
7
|
+
|
8
|
+
def self.included(base)
|
9
|
+
base.extend ClassMethods
|
10
|
+
base.class_eval do
|
11
|
+
config_param :custom_conf_path, :string, default: nil
|
12
|
+
config_param :key, :string, default: nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def load_custom_conf(file_path = @custom_conf_path)
|
17
|
+
custom_conf = if file_path and File.exists?(file_path)
|
18
|
+
YAML.load_file(file_path)
|
19
|
+
else
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
@@supported_custom_confs.each do |type, settings|
|
23
|
+
settings.each do |key, option|
|
24
|
+
apply_custom_conf(custom_conf, key, type, option) if custom_conf
|
25
|
+
apply_custom_option(key, option)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def apply_custom_conf(conf, key, type, option)
|
31
|
+
if conf[type.to_s] and value = conf[type.to_s][key.to_s]
|
32
|
+
var_name = option[:var_name] || key
|
33
|
+
instance_variable_set(:"@#{var_name}", value)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def apply_custom_option(key, option)
|
38
|
+
var_name = option[:var_name] || key
|
39
|
+
original_value = instance_variable_get(:"@#{var_name}")
|
40
|
+
value = original_value
|
41
|
+
option.each do |option_name, option_value|
|
42
|
+
value = Fluent::DataEntryPreferenceConfigurable.replace_value_with_option(
|
43
|
+
key, value, option_name, option_value, key: @key)
|
44
|
+
end
|
45
|
+
if original_value != value
|
46
|
+
instance_variable_set(:"@#{var_name}", value)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
module ClassMethods
|
51
|
+
def custom_config_param(key, type, option = {})
|
52
|
+
conf = class_variable_get(:@@supported_custom_confs)
|
53
|
+
conf[type.to_sym][key.to_sym] = option
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.replace_value_with_option(param_name, param_value, option_name, option_value, opts = {})
|
58
|
+
ret = param_value
|
59
|
+
case option_name
|
60
|
+
when :encrypted
|
61
|
+
if option_value
|
62
|
+
ret = Flydata::Util::Encryptor.decrypt(
|
63
|
+
param_value, opts[:key], param_name)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
ret
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
module MysqlBinlogFlydataInputPreference
|
71
|
+
CUSTOM_CONFIG_PARAMS = {
|
72
|
+
mysql_data_entry_preference: {
|
73
|
+
database: {},
|
74
|
+
tables: {},
|
75
|
+
host: {},
|
76
|
+
username: {},
|
77
|
+
password: {encrypted: true},
|
78
|
+
},
|
79
|
+
}
|
80
|
+
|
81
|
+
def self.included(base)
|
82
|
+
base.class_eval do
|
83
|
+
include DataEntryPreferenceConfigurable
|
84
|
+
CUSTOM_CONFIG_PARAMS.each do |type, custom_conf|
|
85
|
+
custom_conf.each do |key, option|
|
86
|
+
custom_config_param key, type, option
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|