fluent-plugin-sql-enchanced 0.5.3 → 0.5.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/fluent/plugin/in_sql.rb +328 -166
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b75bc87abe0cbb08f1daf17600a67674cb67c6f
|
4
|
+
data.tar.gz: f3b4b9627884ce8d777a0be24dbe592f9d965193
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac85a5965b87fa1a280837decf528147f4e2156fc6a1966fc9d184cf75675ecb827ac9e3104cb4ad44e2c15432ce4c8e6449c4411f2f55b00e3e3a83a4b062c1
|
7
|
+
data.tar.gz: 0048de749270caaf8311900d194ce726376b6c52146e1e9c84b798c66d184254477c985906836dabe83be9ff31fa3e7754ddb01870a6d5188057777d1d276405
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.5
|
data/lib/fluent/plugin/in_sql.rb
CHANGED
@@ -1,194 +1,356 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
1
|
+
#
|
2
|
+
# Fluent
|
3
|
+
#
|
4
|
+
# Copyright (C) 2013 FURUHASHI Sadayuki
|
5
|
+
#
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License.
|
8
|
+
# You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
#
|
18
|
+
module Fluent
|
19
|
+
|
20
|
+
require 'active_record'
|
21
|
+
require 'redis'
|
22
|
+
require 'pry'
|
23
|
+
|
24
|
+
SERVER_PREFIX = "ip#{Socket.ip_address_list.detect(&:ipv4_private?).try(:ip_address).delete('.')}"
|
25
|
+
# contents = IO.read("/etc/data_enchilada/data_enchilada.properties").strip
|
26
|
+
|
27
|
+
# KAFKA_SERVER = contents.lines.first.split("=")[1].strip
|
28
|
+
KAFKA_SERVER = '10.1.0.57'
|
29
|
+
|
30
|
+
class SQLInput < Input
|
31
|
+
Plugin.register_input('sql', self)
|
32
|
+
|
33
|
+
config_param :host, :string
|
34
|
+
config_param :port, :integer, :default => nil
|
35
|
+
config_param :adapter, :string
|
36
|
+
config_param :database, :string
|
37
|
+
config_param :username, :string, :default => nil
|
38
|
+
config_param :password, :string, :default => nil, :secret => true
|
39
|
+
config_param :socket, :string, :default => nil
|
40
|
+
|
41
|
+
config_param :state_file, :string, :default => nil
|
42
|
+
config_param :tag_prefix, :string, :default => nil
|
43
|
+
config_param :select_interval, :time, :default => 60
|
44
|
+
config_param :select_limit, :time, :default => 500
|
45
|
+
|
46
|
+
unless method_defined?(:log)
|
47
|
+
define_method(:log) { $log }
|
48
|
+
end
|
49
|
+
|
50
|
+
class TableElement
|
51
|
+
include Configurable
|
52
|
+
|
53
|
+
config_param :table, :string
|
54
|
+
config_param :tag, :string, :default => nil
|
55
|
+
config_param :update_column, :string, :default => nil
|
56
|
+
config_param :time_column, :string, :default => nil
|
57
|
+
config_param :primary_key, :string, :default => nil
|
58
|
+
|
59
|
+
def configure(conf)
|
60
|
+
super
|
53
61
|
end
|
54
62
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
63
|
+
def init(tag_prefix, base_model, router)
|
64
|
+
@router = router
|
65
|
+
@tag = "#{tag_prefix}.#{@tag}" if tag_prefix
|
66
|
+
|
67
|
+
# creates a model for this table
|
68
|
+
table_name = @table
|
69
|
+
primary_key = @primary_key
|
70
|
+
@model = Class.new(base_model) do
|
71
|
+
self.table_name = table_name
|
72
|
+
self.inheritance_column = '_never_use_'
|
73
|
+
self.primary_key = primary_key if primary_key
|
74
|
+
|
75
|
+
#self.include_root_in_json = false
|
76
|
+
|
77
|
+
def read_attribute_for_serialization(n)
|
78
|
+
v = send(n)
|
79
|
+
if v.respond_to?(:to_msgpack)
|
80
|
+
v
|
81
|
+
else
|
82
|
+
v.to_s
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# ActiveRecord requires model class to have a name.
|
88
|
+
class_name = table_name.singularize.camelize
|
89
|
+
base_model.const_set(class_name, @model)
|
90
|
+
|
91
|
+
# Sets model_name otherwise ActiveRecord causes errors
|
92
|
+
model_name = ActiveModel::Name.new(@model, nil, class_name)
|
93
|
+
@model.define_singleton_method(:model_name) { model_name }
|
94
|
+
|
95
|
+
# if update_column is not set, here uses primary key
|
96
|
+
unless @update_column
|
97
|
+
columns = Hash[@model.columns.map {|c| [c.name, c] }]
|
98
|
+
pk = columns[@model.primary_key]
|
99
|
+
unless pk
|
100
|
+
raise "Composite primary key is not supported. Set update_column parameter to <table> section."
|
101
|
+
end
|
102
|
+
@update_column = pk.name
|
103
|
+
end
|
76
104
|
end
|
77
105
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
106
|
+
# emits next records and returns the last record of emitted records
|
107
|
+
def emit_next_records(last_record, limit)
|
108
|
+
relation = @model
|
109
|
+
if last_record && last_update_value = last_record[@update_column]
|
110
|
+
relation = relation.where("#{@update_column} > ?", last_update_value)
|
111
|
+
end
|
112
|
+
relation = relation.order("#{@update_column} ASC")
|
113
|
+
relation = relation.limit(limit) if limit > 0
|
114
|
+
|
115
|
+
now = Engine.now
|
116
|
+
entry_name = @model.table_name.singularize
|
117
|
+
|
118
|
+
me = MultiEventStream.new
|
119
|
+
|
120
|
+
relation.each do |obj|
|
121
|
+
record = obj.serializable_hash rescue nil
|
122
|
+
if record
|
123
|
+
if @time_column && tv = obj.read_attribute(@time_column)
|
124
|
+
if tv.is_a?(Time)
|
125
|
+
time = tv.to_i
|
126
|
+
else
|
127
|
+
time = Time.parse(tv.to_s).to_i rescue now
|
128
|
+
end
|
129
|
+
else
|
130
|
+
time = now
|
131
|
+
end
|
132
|
+
me.add(time, record)
|
133
|
+
last_record = record
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
last_record = last_record.dup if last_record # some plugin rewrites record :(
|
138
|
+
@router.emit_stream(@tag, me)
|
139
|
+
|
140
|
+
return last_record
|
88
141
|
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def configure(conf)
|
145
|
+
super
|
89
146
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
:select_limit,
|
94
|
-
:state_file
|
95
|
-
]
|
147
|
+
unless @state_file
|
148
|
+
$log.warn "'state_file PATH' parameter is not set to a 'sql' source."
|
149
|
+
$log.warn "this parameter is highly recommended to save the last rows to resume tailing."
|
96
150
|
end
|
97
151
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
152
|
+
@tables = conf.elements.select {|e|
|
153
|
+
e.name == 'table'
|
154
|
+
}.map {|e|
|
155
|
+
te = TableElement.new
|
156
|
+
te.configure(e)
|
157
|
+
te
|
158
|
+
}
|
159
|
+
|
160
|
+
if config['all_tables']
|
161
|
+
@all_tables = true
|
102
162
|
end
|
163
|
+
end
|
164
|
+
|
165
|
+
SKIP_TABLE_REGEXP = /\Aschema_migrations\Z/i
|
166
|
+
|
167
|
+
def start
|
168
|
+
@state_store = @state_file.nil? ? MemoryStateStore.new : StateStore.new(@state_file)
|
169
|
+
|
170
|
+
config = {
|
171
|
+
:adapter => @adapter,
|
172
|
+
:host => @host,
|
173
|
+
:port => @port,
|
174
|
+
:database => @database,
|
175
|
+
:username => @username,
|
176
|
+
:password => @password,
|
177
|
+
:socket => @socket,
|
178
|
+
}
|
103
179
|
|
104
|
-
|
105
|
-
|
180
|
+
# creates subclass of ActiveRecord::Base so that it can have different
|
181
|
+
# database configuration from ActiveRecord::Base.
|
182
|
+
@base_model = Class.new(ActiveRecord::Base) do
|
183
|
+
# base model doesn't have corresponding phisical table
|
184
|
+
self.abstract_class = true
|
106
185
|
end
|
107
186
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
187
|
+
# ActiveRecord requires the base_model to have a name. Here sets name
|
188
|
+
# of an anonymous class by assigning it to a constant. In Ruby, class has
|
189
|
+
# a name of a constant assigned first
|
190
|
+
SQLInput.const_set("BaseModel_#{rand(1 << 31)}", @base_model)
|
191
|
+
|
192
|
+
# Now base_model can have independent configuration from ActiveRecord::Base
|
193
|
+
@base_model.establish_connection(config)
|
194
|
+
|
195
|
+
if @all_tables
|
196
|
+
# get list of tables from the database
|
197
|
+
@tables = @base_model.connection.tables.map do |table_name|
|
198
|
+
if table_name.match(SKIP_TABLE_REGEXP)
|
199
|
+
# some tables such as "schema_migrations" should be ignored
|
200
|
+
nil
|
201
|
+
else
|
202
|
+
te = TableElement.new
|
203
|
+
te.configure({
|
204
|
+
'table' => table_name,
|
205
|
+
'tag' => table_name,
|
206
|
+
'update_column' => nil,
|
207
|
+
})
|
208
|
+
te
|
209
|
+
end
|
210
|
+
end.compact
|
116
211
|
end
|
117
212
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
config << conf(key)
|
127
|
-
config << "\n"
|
128
|
-
end
|
129
|
-
tables = send(:table).reject{|t| t.values.join('') == ''} rescue []
|
130
|
-
if tables.present? && all_tables != '1'
|
131
|
-
tables.each do |tab|
|
132
|
-
config << "\n"
|
133
|
-
config << indent
|
134
|
-
config << "<table>\n"
|
135
|
-
tab.each do |key, value|
|
136
|
-
config << indent
|
137
|
-
config << indent
|
138
|
-
config << "#{key} #{value}"
|
139
|
-
config << "\n"
|
140
|
-
end
|
141
|
-
config << indent
|
142
|
-
config << "</table>\n"
|
213
|
+
init_redis
|
214
|
+
|
215
|
+
# ignore tables if TableElement#init failed
|
216
|
+
@tables.reject! do |te|
|
217
|
+
begin
|
218
|
+
schema_name = "#{SERVER_PREFIX}_#{@tag_prefix}_#{te.tag.presence || te.table}_#{Digest::MD5.new.hexdigest(@base_model.connection.columns(te.table).map{|c| c.name}.to_s)[0..5]}"
|
219
|
+
unless get_schema_from_redis_by_name schema_name
|
220
|
+
generate_schema te, schema_name
|
143
221
|
end
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
222
|
+
|
223
|
+
te.init(@tag_prefix, @base_model, router)
|
224
|
+
log.info "Selecting '#{te.table}' table"
|
225
|
+
false
|
226
|
+
rescue => e
|
227
|
+
log.warn "Can't handle '#{te.table}' table. Ignoring.", :error => e.message, :error_class => e.class
|
228
|
+
log.warn_backtrace e.backtrace
|
229
|
+
true
|
148
230
|
end
|
231
|
+
end
|
149
232
|
|
233
|
+
@stop_flag = false
|
234
|
+
@thread = Thread.new(&method(:thread_main))
|
235
|
+
end
|
150
236
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
@
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
when '</table>'
|
168
|
-
to_table = false
|
169
|
-
@tables << table
|
170
|
-
table = {}
|
171
|
-
when 'all_tables'
|
172
|
-
@setting[:all_tables] = true
|
173
|
-
else
|
174
|
-
param = str.split(' ')
|
175
|
-
if to_table
|
176
|
-
table[param.first] = param.second
|
177
|
-
else
|
178
|
-
@setting[param.first] = param.second unless param.first == 'type'
|
179
|
-
end
|
237
|
+
def shutdown
|
238
|
+
@stop_flag = true
|
239
|
+
end
|
240
|
+
|
241
|
+
def thread_main
|
242
|
+
until @stop_flag
|
243
|
+
sleep @select_interval
|
244
|
+
|
245
|
+
@tables.each do |t|
|
246
|
+
begin
|
247
|
+
last_record = @state_store.last_records[t.table]
|
248
|
+
@state_store.last_records[t.table] = t.emit_next_records(last_record, @select_limit)
|
249
|
+
@state_store.update!
|
250
|
+
rescue => e
|
251
|
+
log.error "unexpected error", :error => e.message, :error_class => e.class
|
252
|
+
log.error_backtrace e.backtrace
|
180
253
|
end
|
181
|
-
|
182
|
-
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def init_redis
|
259
|
+
$redis = Redis.new
|
260
|
+
end
|
261
|
+
|
262
|
+
def generate_schema table, schema_name
|
263
|
+
require "avro_turf"
|
264
|
+
require 'avro_turf/messaging'
|
265
|
+
require "avro/builder"
|
266
|
+
avro = AvroTurf::Messaging.new(registry_url: "http://#{KAFKA_SERVER}:8081")
|
267
|
+
fields = @base_model.connection.columns(table.table).map do |col|
|
268
|
+
col_type = if col.sql_type.include? 'bigint'
|
269
|
+
'long'
|
270
|
+
elsif ['int', 'bool'].any? {|needle| col.sql_type.include?(needle)}
|
271
|
+
'int'
|
272
|
+
elsif ['float', 'double', 'real'].any? {|needle| col.sql_type.include?(needle)}
|
273
|
+
'float'
|
274
|
+
else
|
275
|
+
'string'
|
276
|
+
end
|
277
|
+
{
|
278
|
+
'name' => col.name,
|
279
|
+
'type' => ['null', col_type]
|
280
|
+
}
|
281
|
+
end
|
282
|
+
field_types = fields.map{|field| [field['name'], (field['type'] - ['null']).first]}.to_h
|
283
|
+
fields << {"name" => "enchilada_timestamp", "type" => "long"}
|
284
|
+
fields << {"name" => "enchilada_time_with_format", "type" => "string"}
|
285
|
+
schema_json = {
|
286
|
+
"type": "record",
|
287
|
+
"name": schema_name,
|
288
|
+
"fields": fields
|
289
|
+
}.to_json
|
290
|
+
registry = avro.instance_variable_get('@registry')
|
291
|
+
schema = Avro::Schema.parse(schema_json)
|
292
|
+
schema_id = registry.register("#{schema_name}-value", schema)
|
293
|
+
|
294
|
+
stored_schema = {
|
295
|
+
'schema_json' => schema_json,
|
296
|
+
'schema_id' => schema_id,
|
297
|
+
'field_types' => field_types,
|
298
|
+
'schema' => schema
|
299
|
+
}
|
300
|
+
|
301
|
+
set_schema_to_redis(schema_name, stored_schema)
|
302
|
+
end
|
303
|
+
|
304
|
+
def set_schema_to_redis schema_name, schema
|
305
|
+
$redis.set(schema_name, schema.to_json)
|
306
|
+
end
|
307
|
+
|
308
|
+
def get_schema_from_redis_by_name schema_name
|
309
|
+
stored_schema = $redis.get(schema_name)
|
310
|
+
end
|
311
|
+
|
312
|
+
class StateStore
|
313
|
+
def initialize(path)
|
314
|
+
require 'yaml'
|
315
|
+
|
316
|
+
@path = path
|
317
|
+
if File.exists?(@path)
|
318
|
+
@data = YAML.load_file(@path)
|
319
|
+
if @data == false || @data == []
|
320
|
+
# this happens if an users created an empty file accidentally
|
321
|
+
@data = {}
|
322
|
+
elsif !@data.is_a?(Hash)
|
323
|
+
raise "state_file on #{@path.inspect} is invalid"
|
183
324
|
end
|
325
|
+
else
|
326
|
+
@data = {}
|
184
327
|
end
|
185
|
-
self.new @setting
|
186
328
|
end
|
187
329
|
|
188
|
-
def
|
189
|
-
|
330
|
+
def last_records
|
331
|
+
@data['last_records'] ||= {}
|
332
|
+
end
|
333
|
+
|
334
|
+
def update!
|
335
|
+
File.open(@path, 'w') {|f|
|
336
|
+
f.write YAML.dump(@data)
|
337
|
+
}
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
class MemoryStateStore
|
342
|
+
def initialize
|
343
|
+
@data = {}
|
344
|
+
end
|
345
|
+
|
346
|
+
def last_records
|
347
|
+
@data['last_records'] ||= {}
|
348
|
+
end
|
349
|
+
|
350
|
+
def update!
|
190
351
|
end
|
191
352
|
end
|
192
353
|
end
|
354
|
+
|
193
355
|
end
|
194
356
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sql-enchanced
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|