flydata 0.0.5.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ module Flydata
2
+ module Command
3
+ class Version < Base
4
+ VERSION_PATH = File.join(FLYDATA_GEM_HOME, 'VERSION')
5
+ def run
6
+ puts "flydata version \"#{File.open(VERSION_PATH, 'r').readline.strip}\""
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,305 @@
1
+ module Fluent
2
+
3
+ require 'fluent/plugin/in_mysql_binlog'
4
+ require 'binlog'
5
+ require 'kodama'
6
+ require File.dirname(__FILE__) + '/preference'
7
+
8
+ require File.expand_path(File.join(File.dirname(__FILE__), '../../flydata'))
9
+ require 'flydata/sync_file_manager'
10
+
11
+ class MysqlBinlogFlydataInput < MysqlBinlogInput
12
+ include MysqlBinlogFlydataInputPreference
13
+ Plugin.register_input('mysql_binlog_flydata', self)
14
+
15
+ def initialize
16
+ super
17
+ end
18
+
19
+ config_param :database, :string
20
+ config_param :tables, :string
21
+
22
+ def configure(conf)
23
+ super
24
+ unless File.exists?(@position_file)
25
+ raise "No position file(#{@position_file}). Initial synchronization is required before starting."
26
+ end
27
+ load_custom_conf
28
+ $log.info "mysql host:\"#{@host}\" username:\"#{@username}\" database:\"#{@database}\" tables:\"#{@tables}\""
29
+ @tables = @tables.split(/,\s*/)
30
+ @sync_fm = Flydata::FileUtil::SyncFileManager.new(nil) # Passing nil for data_entry as this class does not use methods which require data_entry
31
+ @record_handler = FlydataMysqlBinlogRecordHandler.new(
32
+ database: @database,
33
+ tables: @tables,
34
+ tag: @tag,
35
+ sync_fm: @sync_fm)
36
+ end
37
+
38
+ def start
39
+ super
40
+ positions_path = @sync_fm.table_positions_dir_path
41
+ Dir.mkdir positions_path unless File.exists? positions_path
42
+ end
43
+
44
+ def event_listener(event)
45
+ begin
46
+ @record_handler.dispatch(event)
47
+ rescue Exception => e
48
+ position = File.open(@position_file) {|f| f.read }
49
+ $log.error "error occured while processing #{event.event_type} event at #{position}"
50
+ $log.error e.message
51
+ $log.error e.backtrace.join("\n")
52
+ # Not reraising a StandardError because the underlying code can't handle an error well.
53
+ raise unless e.kind_of?(StandardError)
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ class MysqlBinlogRecordHandler
60
+ def dispatch(event)
61
+ method_name = "on_#{event.event_type.downcase}"
62
+ if self.respond_to?(method_name)
63
+ # TODO to_hash method call below can fail if event.event_type is
64
+ # "Update_rows". This seems to be a bug of ruby-binlog. The bug must
65
+ # be fixed when we support record update.
66
+ record = MysqlBinlogInput::BinlogUtil.to_hash(event)
67
+ self.send(method_name, record)
68
+ else
69
+ # $log.trace "Unhandled type: #{record["event_type"]}"
70
+ end
71
+ end
72
+ end
73
+
74
+ class FlydataMysqlBinlogRecordHandler < MysqlBinlogRecordHandler
75
+ TABLE_NAME = 'table_name' # A Flydata JSON tag to specify a table name
76
+ TYPE = 'type'
77
+ ROW = 'row'
78
+ SEQ = 'seq'
79
+ RESPECT_ORDER = 'respect_order'
80
+ INTEGER_TYPES = {'TINY' => 1,
81
+ 'SHORT' => 2,
82
+ 'INT24' => 3,
83
+ 'LONG' => 4,
84
+ 'LONGLONG' => 8
85
+ }
86
+ SIGNLESS_INTEGER_PREFIX = '0SL'
87
+
88
+ def initialize(opts)
89
+ mandatory_opts = [:database, :tables, :tag, :sync_fm]
90
+
91
+ missing_opts = mandatory_opts - opts.keys
92
+ unless (missing_opts.empty?)
93
+ raise "Mandatory option(s) are missing: #{missing_opts.join(', ')}"
94
+ end
95
+ @database = opts[:database]
96
+ @tables = opts[:tables]
97
+ @tag = opts[:tag]
98
+ @sync_fm = opts[:sync_fm]
99
+
100
+ @query_handler = FlydataMysqlBinlogQueryHandler.new(record_handler: self)
101
+ end
102
+
103
+ def on_write_rows(record)
104
+ emit_insert(record)
105
+ end
106
+
107
+ def on_update_rows(record)
108
+ emit_update(record)
109
+ end
110
+
111
+ def on_delete_rows(record)
112
+ emit_delete(record)
113
+ end
114
+
115
+ def on_query(record)
116
+ @query_handler.dispatch(record)
117
+ end
118
+
119
+ def on_table_changed(table)
120
+ $log.trace "Table #{table} has changed. Reloading the table column"
121
+ end
122
+
123
+ private
124
+
125
+ def acceptable?(record)
126
+ (@database == record["db_name"]) and @tables.include?(record["table_name"])
127
+ end
128
+
129
+ def emit_insert(record)
130
+ emit_record(:insert, record)
131
+ end
132
+
133
+ def emit_delete(record)
134
+ emit_record(:delete, record)
135
+ end
136
+
137
+ def emit_update(record)
138
+ emit_record(:update, record) do |row|
139
+ row.last # For update, row has two arrays (old and new values) Use new values
140
+ end
141
+ end
142
+
143
+ def emit_record(type, record)
144
+ return unless acceptable?(record)
145
+
146
+ table = record['table_name']
147
+
148
+ records = record["rows"].collect do |row|
149
+ row = yield(row) if block_given? # Give the caller a chance to generate the correct row
150
+ { TYPE => type, TABLE_NAME => table,
151
+ RESPECT_ORDER => true, # Continuous sync needs record order to be kept
152
+ ROW => row.each.with_index(1).inject({}) do |h, (v, i)|
153
+ if v.kind_of?(String)
154
+ v = v.encode('utf-16', :undef => :replace, :invalid => :replace).encode('utf-8')
155
+ end
156
+ h[i.to_s] = v
157
+ h
158
+ end
159
+ }
160
+ end
161
+
162
+ encode_signless_integer(records, record["columns"])
163
+
164
+ # Use binlog's timestamp
165
+ timestamp = record["timestamp"].to_i
166
+ records.each do |row|
167
+ @sync_fm.increment_and_save_table_position(row[TABLE_NAME]) do |seq|
168
+ row[SEQ] = seq
169
+ Engine.emit(@tag, timestamp, row)
170
+ end
171
+ end
172
+ end
173
+
174
+ private
175
+ def encode_signless_integer(records, column_types)
176
+ records.each do |record|
177
+ record[ROW].keys.each do |position|
178
+ index = position.to_i - 1
179
+ column_type = column_types[index]
180
+ if INTEGER_TYPES.keys.include?(column_type)
181
+ # It's a signless integer.
182
+ intval = record[ROW][position]
183
+ next unless (intval.kind_of?(Numeric) || intval =~ /^-?[\d]+$/)
184
+ width = INTEGER_TYPES[column_type] * 2 # * 2 because a single byte requires two characters (e.g. ff)
185
+ signless_val = SIGNLESS_INTEGER_PREFIX
186
+ signless_val += sprintf("%0#{width}x", intval).gsub(/\.\.f/, 'f' * width).slice(-width..-1)
187
+ record[ROW][position] = signless_val
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
193
+
194
+ class MysqlBinlogQueryHandler
195
+ def initialize
196
+ @mapping_table = []
197
+ end
198
+
199
+ def dispatch(record)
200
+ @mapping_table.each do |pattern, method_name|
201
+ query = normalize_query(record["query"])
202
+ if (pattern.match(query))
203
+ if (self.respond_to?(method_name))
204
+ self.send(method_name, record, query)
205
+ else
206
+ raise "method '#{method_name}' is not defined in #{self.class.name} although its matching pattern is defined"
207
+ end
208
+ break
209
+ end
210
+ end
211
+ end
212
+
213
+ private
214
+ def normalize_query(query)
215
+ query = strip_comments(query)
216
+ end
217
+
218
+ def strip_comments(query)
219
+ query = query.gsub(/--\s.*\n/, ' ') # -- style comments
220
+ query = query.gsub(/\/\*[^\*].*\*\//, ' ') # /* */ style comments
221
+ query = query.gsub(/\s+/, ' ') # replace multiple spaces with a space
222
+ end
223
+ end
224
+
225
+ class FlydataMysqlBinlogQueryHandler < MysqlBinlogQueryHandler
226
+ def initialize(opts)
227
+ mandatory_opts = [:record_handler]
228
+ missing_opts = mandatory_opts - opts.keys
229
+ unless missing_opts.empty?
230
+ raise "mandatory options are missing: #{missing_opts.join(", ")}"
231
+ end
232
+ @opts = opts
233
+
234
+ @mapping_table = [
235
+ [/^alter table/i, :on_alter_table],
236
+ ]
237
+ end
238
+
239
+ def on_alter_table(record, query)
240
+ m = /alter table\s+(?<table>[^\s]+)/i.match(query)
241
+ if m.nil?
242
+ raise "This alter table query has no table name? '#{query}'"
243
+ end
244
+
245
+ @opts[:record_handler].on_table_changed(m[:table])
246
+ end
247
+ end
248
+
249
+ end
250
+
251
+ # HACK
252
+ # Monkey patch the class to manage string's character encoding.
253
+ module Binlog
254
+
255
+ class Client
256
+ old_method = instance_method(:wait_for_next_event)
257
+
258
+ define_method(:wait_for_next_event) do
259
+ event = old_method.bind(self).()
260
+ if (event.kind_of?(Binlog::RowEvent))
261
+ class << event
262
+ def rows
263
+ rs = super
264
+ # HACK
265
+ # Assuming all string values are UTF-8
266
+ # To make this right, MySQL client's encoding must be set to UTF-8
267
+ # But how?
268
+ new_rs = rs.collect {|row|
269
+ row.collect{|value|
270
+ if (value.kind_of?(Array))
271
+ # Update has two rows in it
272
+ value.collect{|val| val.force_encoding("UTF-8") if val.respond_to?(:force_encoding); val}
273
+ else
274
+ value.force_encoding("UTF-8") if value.respond_to?(:force_encoding); value
275
+ end
276
+ }
277
+ }
278
+ new_rs
279
+ end
280
+ end
281
+ end
282
+ event
283
+ end
284
+ end
285
+
286
+ end
287
+
288
+ # HACK
289
+ # Monkey patch so that we can replace Kodama's logger
290
+ module Kodama
291
+
292
+ Client.class_eval do
293
+ attr_accessor :logger
294
+
295
+ # Except for replacing logger, the implementation is a copy of the original
296
+ # method.
297
+ def self.start(options = {}, &block)
298
+ client = self.new(mysql_url(options))
299
+ client.logger = $log
300
+ block.call(client)
301
+ client.start
302
+ end
303
+ end
304
+
305
+ end
@@ -0,0 +1,91 @@
1
+ module Fluent
2
+
3
+
4
+ class ForwardSslOutput < ForwardOutput
5
+ Plugin.register_output('forward_ssl', self)
6
+
7
+ def configure(conf)
8
+ super
9
+ conf.elements.each do |e|
10
+ if e['ssl_port']
11
+ node = @nodes.find {|n| n.host == e['host'] }
12
+ node.set_ssl_port(e['ssl_port']) if node
13
+ end
14
+ end
15
+ end
16
+ def send_data(node, tag, chunk)
17
+ sock = connect_ssl(node)
18
+ begin
19
+ # Copy of ForwardOutput send_data
20
+ # beginArray(2)
21
+ sock.write FORWARD_HEADER
22
+
23
+ # writeRaw(tag)
24
+ sock.write tag.to_msgpack # tag
25
+
26
+ # beginRaw(size)
27
+ sz = chunk.size
28
+ #if sz < 32
29
+ # # FixRaw
30
+ # sock.write [0xa0 | sz].pack('C')
31
+ #elsif sz < 65536
32
+ # # raw 16
33
+ # sock.write [0xda, sz].pack('Cn')
34
+ #else
35
+ # raw 32
36
+ sock.write [0xdb, sz].pack('CN')
37
+ #end
38
+
39
+ # writeRawBody(packed_es)
40
+ chunk.write_to(sock)
41
+
42
+ node.heartbeat(false)
43
+ ensure
44
+ sock.close
45
+ end
46
+ end
47
+
48
+ def connect(node)
49
+ tcp_sock = TCPSocket.new(node.resolved_host, node.port)
50
+ set_tcp_sock_opts(tcp_sock)
51
+ tcp_sock
52
+ end
53
+
54
+ def connect_ssl(node)
55
+ tcp_sock = TCPSocket.new(node.resolved_host, node.ssl_port)
56
+ set_tcp_sock_opts(tcp_sock)
57
+ ssl_ctx = ssl_ctx_with_verification
58
+ ssl_sock = OpenSSL::SSL::SSLSocket.new(tcp_sock, ssl_ctx)
59
+ ssl_sock.sync_close = true
60
+ ssl_sock.connect
61
+ ssl_sock
62
+ end
63
+
64
+ private
65
+ def ssl_ctx_with_verification
66
+ cert_store = OpenSSL::X509::Store.new
67
+ cert_store.set_default_paths
68
+ ssl_ctx = OpenSSL::SSL::SSLContext.new
69
+ ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_PEER
70
+ ssl_ctx.cert_store = cert_store
71
+ ssl_ctx
72
+ end
73
+
74
+ def set_tcp_sock_opts(tcp_sock)
75
+ opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
76
+ tcp_sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
77
+ opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
78
+ tcp_sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
79
+ end
80
+
81
+ class ForwardOutput::Node
82
+ attr_reader :ssl_port
83
+ def set_ssl_port(ssl_port)
84
+ @ssl_port = ssl_port
85
+ end
86
+ end
87
+
88
+ end
89
+
90
+
91
+ end
@@ -0,0 +1,92 @@
1
+ require 'yaml'
2
+ require_relative '../util/encryptor'
3
+
4
+ module Fluent
5
+ module DataEntryPreferenceConfigurable
6
+ @@supported_custom_confs = Hash.new{|h,k| h[k] = {}}
7
+
8
+ def self.included(base)
9
+ base.extend ClassMethods
10
+ base.class_eval do
11
+ config_param :custom_conf_path, :string, default: nil
12
+ config_param :key, :string, default: nil
13
+ end
14
+ end
15
+
16
+ def load_custom_conf(file_path = @custom_conf_path)
17
+ custom_conf = if file_path and File.exists?(file_path)
18
+ YAML.load_file(file_path)
19
+ else
20
+ nil
21
+ end
22
+ @@supported_custom_confs.each do |type, settings|
23
+ settings.each do |key, option|
24
+ apply_custom_conf(custom_conf, key, type, option) if custom_conf
25
+ apply_custom_option(key, option)
26
+ end
27
+ end
28
+ end
29
+
30
+ def apply_custom_conf(conf, key, type, option)
31
+ if conf[type.to_s] and value = conf[type.to_s][key.to_s]
32
+ var_name = option[:var_name] || key
33
+ instance_variable_set(:"@#{var_name}", value)
34
+ end
35
+ end
36
+
37
+ def apply_custom_option(key, option)
38
+ var_name = option[:var_name] || key
39
+ original_value = instance_variable_get(:"@#{var_name}")
40
+ value = original_value
41
+ option.each do |option_name, option_value|
42
+ value = Fluent::DataEntryPreferenceConfigurable.replace_value_with_option(
43
+ key, value, option_name, option_value, key: @key)
44
+ end
45
+ if original_value != value
46
+ instance_variable_set(:"@#{var_name}", value)
47
+ end
48
+ end
49
+
50
+ module ClassMethods
51
+ def custom_config_param(key, type, option = {})
52
+ conf = class_variable_get(:@@supported_custom_confs)
53
+ conf[type.to_sym][key.to_sym] = option
54
+ end
55
+ end
56
+
57
+ def self.replace_value_with_option(param_name, param_value, option_name, option_value, opts = {})
58
+ ret = param_value
59
+ case option_name
60
+ when :encrypted
61
+ if option_value
62
+ ret = Flydata::Util::Encryptor.decrypt(
63
+ param_value, opts[:key], param_name)
64
+ end
65
+ end
66
+ ret
67
+ end
68
+ end
69
+
70
+ module MysqlBinlogFlydataInputPreference
71
+ CUSTOM_CONFIG_PARAMS = {
72
+ mysql_data_entry_preference: {
73
+ database: {},
74
+ tables: {},
75
+ host: {},
76
+ username: {},
77
+ password: {encrypted: true},
78
+ },
79
+ }
80
+
81
+ def self.included(base)
82
+ base.class_eval do
83
+ include DataEntryPreferenceConfigurable
84
+ CUSTOM_CONFIG_PARAMS.each do |type, custom_conf|
85
+ custom_conf.each do |key, option|
86
+ custom_config_param key, type, option
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end