fluent-plugin-sql-enchanced 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/fluent/plugin/in_sql.rb +328 -166
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8b75bc87abe0cbb08f1daf17600a67674cb67c6f
|
4
|
+
data.tar.gz: f3b4b9627884ce8d777a0be24dbe592f9d965193
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ac85a5965b87fa1a280837decf528147f4e2156fc6a1966fc9d184cf75675ecb827ac9e3104cb4ad44e2c15432ce4c8e6449c4411f2f55b00e3e3a83a4b062c1
|
7
|
+
data.tar.gz: 0048de749270caaf8311900d194ce726376b6c52146e1e9c84b798c66d184254477c985906836dabe83be9ff31fa3e7754ddb01870a6d5188057777d1d276405
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.5
|
data/lib/fluent/plugin/in_sql.rb
CHANGED
@@ -1,194 +1,356 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
1
|
+
#
|
2
|
+
# Fluent
|
3
|
+
#
|
4
|
+
# Copyright (C) 2013 FURUHASHI Sadayuki
|
5
|
+
#
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License.
|
8
|
+
# You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
# See the License for the specific language governing permissions and
|
16
|
+
# limitations under the License.
|
17
|
+
#
|
18
|
+
module Fluent
|
19
|
+
|
20
|
+
require 'active_record'
|
21
|
+
require 'redis'
|
22
|
+
require 'pry'
|
23
|
+
|
24
|
+
SERVER_PREFIX = "ip#{Socket.ip_address_list.detect(&:ipv4_private?).try(:ip_address).delete('.')}"
|
25
|
+
# contents = IO.read("/etc/data_enchilada/data_enchilada.properties").strip
|
26
|
+
|
27
|
+
# KAFKA_SERVER = contents.lines.first.split("=")[1].strip
|
28
|
+
KAFKA_SERVER = '10.1.0.57'
|
29
|
+
|
30
|
+
class SQLInput < Input
|
31
|
+
Plugin.register_input('sql', self)
|
32
|
+
|
33
|
+
config_param :host, :string
|
34
|
+
config_param :port, :integer, :default => nil
|
35
|
+
config_param :adapter, :string
|
36
|
+
config_param :database, :string
|
37
|
+
config_param :username, :string, :default => nil
|
38
|
+
config_param :password, :string, :default => nil, :secret => true
|
39
|
+
config_param :socket, :string, :default => nil
|
40
|
+
|
41
|
+
config_param :state_file, :string, :default => nil
|
42
|
+
config_param :tag_prefix, :string, :default => nil
|
43
|
+
config_param :select_interval, :time, :default => 60
|
44
|
+
config_param :select_limit, :time, :default => 500
|
45
|
+
|
46
|
+
unless method_defined?(:log)
|
47
|
+
define_method(:log) { $log }
|
48
|
+
end
|
49
|
+
|
50
|
+
class TableElement
|
51
|
+
include Configurable
|
52
|
+
|
53
|
+
config_param :table, :string
|
54
|
+
config_param :tag, :string, :default => nil
|
55
|
+
config_param :update_column, :string, :default => nil
|
56
|
+
config_param :time_column, :string, :default => nil
|
57
|
+
config_param :primary_key, :string, :default => nil
|
58
|
+
|
59
|
+
def configure(conf)
|
60
|
+
super
|
53
61
|
end
|
54
62
|
|
55
|
-
def
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
63
|
+
def init(tag_prefix, base_model, router)
|
64
|
+
@router = router
|
65
|
+
@tag = "#{tag_prefix}.#{@tag}" if tag_prefix
|
66
|
+
|
67
|
+
# creates a model for this table
|
68
|
+
table_name = @table
|
69
|
+
primary_key = @primary_key
|
70
|
+
@model = Class.new(base_model) do
|
71
|
+
self.table_name = table_name
|
72
|
+
self.inheritance_column = '_never_use_'
|
73
|
+
self.primary_key = primary_key if primary_key
|
74
|
+
|
75
|
+
#self.include_root_in_json = false
|
76
|
+
|
77
|
+
def read_attribute_for_serialization(n)
|
78
|
+
v = send(n)
|
79
|
+
if v.respond_to?(:to_msgpack)
|
80
|
+
v
|
81
|
+
else
|
82
|
+
v.to_s
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# ActiveRecord requires model class to have a name.
|
88
|
+
class_name = table_name.singularize.camelize
|
89
|
+
base_model.const_set(class_name, @model)
|
90
|
+
|
91
|
+
# Sets model_name otherwise ActiveRecord causes errors
|
92
|
+
model_name = ActiveModel::Name.new(@model, nil, class_name)
|
93
|
+
@model.define_singleton_method(:model_name) { model_name }
|
94
|
+
|
95
|
+
# if update_column is not set, here uses primary key
|
96
|
+
unless @update_column
|
97
|
+
columns = Hash[@model.columns.map {|c| [c.name, c] }]
|
98
|
+
pk = columns[@model.primary_key]
|
99
|
+
unless pk
|
100
|
+
raise "Composite primary key is not supported. Set update_column parameter to <table> section."
|
101
|
+
end
|
102
|
+
@update_column = pk.name
|
103
|
+
end
|
76
104
|
end
|
77
105
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
106
|
+
# emits next records and returns the last record of emitted records
|
107
|
+
def emit_next_records(last_record, limit)
|
108
|
+
relation = @model
|
109
|
+
if last_record && last_update_value = last_record[@update_column]
|
110
|
+
relation = relation.where("#{@update_column} > ?", last_update_value)
|
111
|
+
end
|
112
|
+
relation = relation.order("#{@update_column} ASC")
|
113
|
+
relation = relation.limit(limit) if limit > 0
|
114
|
+
|
115
|
+
now = Engine.now
|
116
|
+
entry_name = @model.table_name.singularize
|
117
|
+
|
118
|
+
me = MultiEventStream.new
|
119
|
+
|
120
|
+
relation.each do |obj|
|
121
|
+
record = obj.serializable_hash rescue nil
|
122
|
+
if record
|
123
|
+
if @time_column && tv = obj.read_attribute(@time_column)
|
124
|
+
if tv.is_a?(Time)
|
125
|
+
time = tv.to_i
|
126
|
+
else
|
127
|
+
time = Time.parse(tv.to_s).to_i rescue now
|
128
|
+
end
|
129
|
+
else
|
130
|
+
time = now
|
131
|
+
end
|
132
|
+
me.add(time, record)
|
133
|
+
last_record = record
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
last_record = last_record.dup if last_record # some plugin rewrites record :(
|
138
|
+
@router.emit_stream(@tag, me)
|
139
|
+
|
140
|
+
return last_record
|
88
141
|
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def configure(conf)
|
145
|
+
super
|
89
146
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
:select_limit,
|
94
|
-
:state_file
|
95
|
-
]
|
147
|
+
unless @state_file
|
148
|
+
$log.warn "'state_file PATH' parameter is not set to a 'sql' source."
|
149
|
+
$log.warn "this parameter is highly recommended to save the last rows to resume tailing."
|
96
150
|
end
|
97
151
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
152
|
+
@tables = conf.elements.select {|e|
|
153
|
+
e.name == 'table'
|
154
|
+
}.map {|e|
|
155
|
+
te = TableElement.new
|
156
|
+
te.configure(e)
|
157
|
+
te
|
158
|
+
}
|
159
|
+
|
160
|
+
if config['all_tables']
|
161
|
+
@all_tables = true
|
102
162
|
end
|
163
|
+
end
|
164
|
+
|
165
|
+
SKIP_TABLE_REGEXP = /\Aschema_migrations\Z/i
|
166
|
+
|
167
|
+
def start
|
168
|
+
@state_store = @state_file.nil? ? MemoryStateStore.new : StateStore.new(@state_file)
|
169
|
+
|
170
|
+
config = {
|
171
|
+
:adapter => @adapter,
|
172
|
+
:host => @host,
|
173
|
+
:port => @port,
|
174
|
+
:database => @database,
|
175
|
+
:username => @username,
|
176
|
+
:password => @password,
|
177
|
+
:socket => @socket,
|
178
|
+
}
|
103
179
|
|
104
|
-
|
105
|
-
|
180
|
+
# creates subclass of ActiveRecord::Base so that it can have different
|
181
|
+
# database configuration from ActiveRecord::Base.
|
182
|
+
@base_model = Class.new(ActiveRecord::Base) do
|
183
|
+
# base model doesn't have corresponding phisical table
|
184
|
+
self.abstract_class = true
|
106
185
|
end
|
107
186
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
187
|
+
# ActiveRecord requires the base_model to have a name. Here sets name
|
188
|
+
# of an anonymous class by assigning it to a constant. In Ruby, class has
|
189
|
+
# a name of a constant assigned first
|
190
|
+
SQLInput.const_set("BaseModel_#{rand(1 << 31)}", @base_model)
|
191
|
+
|
192
|
+
# Now base_model can have independent configuration from ActiveRecord::Base
|
193
|
+
@base_model.establish_connection(config)
|
194
|
+
|
195
|
+
if @all_tables
|
196
|
+
# get list of tables from the database
|
197
|
+
@tables = @base_model.connection.tables.map do |table_name|
|
198
|
+
if table_name.match(SKIP_TABLE_REGEXP)
|
199
|
+
# some tables such as "schema_migrations" should be ignored
|
200
|
+
nil
|
201
|
+
else
|
202
|
+
te = TableElement.new
|
203
|
+
te.configure({
|
204
|
+
'table' => table_name,
|
205
|
+
'tag' => table_name,
|
206
|
+
'update_column' => nil,
|
207
|
+
})
|
208
|
+
te
|
209
|
+
end
|
210
|
+
end.compact
|
116
211
|
end
|
117
212
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
config << conf(key)
|
127
|
-
config << "\n"
|
128
|
-
end
|
129
|
-
tables = send(:table).reject{|t| t.values.join('') == ''} rescue []
|
130
|
-
if tables.present? && all_tables != '1'
|
131
|
-
tables.each do |tab|
|
132
|
-
config << "\n"
|
133
|
-
config << indent
|
134
|
-
config << "<table>\n"
|
135
|
-
tab.each do |key, value|
|
136
|
-
config << indent
|
137
|
-
config << indent
|
138
|
-
config << "#{key} #{value}"
|
139
|
-
config << "\n"
|
140
|
-
end
|
141
|
-
config << indent
|
142
|
-
config << "</table>\n"
|
213
|
+
init_redis
|
214
|
+
|
215
|
+
# ignore tables if TableElement#init failed
|
216
|
+
@tables.reject! do |te|
|
217
|
+
begin
|
218
|
+
schema_name = "#{SERVER_PREFIX}_#{@tag_prefix}_#{te.tag.presence || te.table}_#{Digest::MD5.new.hexdigest(@base_model.connection.columns(te.table).map{|c| c.name}.to_s)[0..5]}"
|
219
|
+
unless get_schema_from_redis_by_name schema_name
|
220
|
+
generate_schema te, schema_name
|
143
221
|
end
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
222
|
+
|
223
|
+
te.init(@tag_prefix, @base_model, router)
|
224
|
+
log.info "Selecting '#{te.table}' table"
|
225
|
+
false
|
226
|
+
rescue => e
|
227
|
+
log.warn "Can't handle '#{te.table}' table. Ignoring.", :error => e.message, :error_class => e.class
|
228
|
+
log.warn_backtrace e.backtrace
|
229
|
+
true
|
148
230
|
end
|
231
|
+
end
|
149
232
|
|
233
|
+
@stop_flag = false
|
234
|
+
@thread = Thread.new(&method(:thread_main))
|
235
|
+
end
|
150
236
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
@
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
when '</table>'
|
168
|
-
to_table = false
|
169
|
-
@tables << table
|
170
|
-
table = {}
|
171
|
-
when 'all_tables'
|
172
|
-
@setting[:all_tables] = true
|
173
|
-
else
|
174
|
-
param = str.split(' ')
|
175
|
-
if to_table
|
176
|
-
table[param.first] = param.second
|
177
|
-
else
|
178
|
-
@setting[param.first] = param.second unless param.first == 'type'
|
179
|
-
end
|
237
|
+
def shutdown
|
238
|
+
@stop_flag = true
|
239
|
+
end
|
240
|
+
|
241
|
+
def thread_main
|
242
|
+
until @stop_flag
|
243
|
+
sleep @select_interval
|
244
|
+
|
245
|
+
@tables.each do |t|
|
246
|
+
begin
|
247
|
+
last_record = @state_store.last_records[t.table]
|
248
|
+
@state_store.last_records[t.table] = t.emit_next_records(last_record, @select_limit)
|
249
|
+
@state_store.update!
|
250
|
+
rescue => e
|
251
|
+
log.error "unexpected error", :error => e.message, :error_class => e.class
|
252
|
+
log.error_backtrace e.backtrace
|
180
253
|
end
|
181
|
-
|
182
|
-
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def init_redis
|
259
|
+
$redis = Redis.new
|
260
|
+
end
|
261
|
+
|
262
|
+
def generate_schema table, schema_name
|
263
|
+
require "avro_turf"
|
264
|
+
require 'avro_turf/messaging'
|
265
|
+
require "avro/builder"
|
266
|
+
avro = AvroTurf::Messaging.new(registry_url: "http://#{KAFKA_SERVER}:8081")
|
267
|
+
fields = @base_model.connection.columns(table.table).map do |col|
|
268
|
+
col_type = if col.sql_type.include? 'bigint'
|
269
|
+
'long'
|
270
|
+
elsif ['int', 'bool'].any? {|needle| col.sql_type.include?(needle)}
|
271
|
+
'int'
|
272
|
+
elsif ['float', 'double', 'real'].any? {|needle| col.sql_type.include?(needle)}
|
273
|
+
'float'
|
274
|
+
else
|
275
|
+
'string'
|
276
|
+
end
|
277
|
+
{
|
278
|
+
'name' => col.name,
|
279
|
+
'type' => ['null', col_type]
|
280
|
+
}
|
281
|
+
end
|
282
|
+
field_types = fields.map{|field| [field['name'], (field['type'] - ['null']).first]}.to_h
|
283
|
+
fields << {"name" => "enchilada_timestamp", "type" => "long"}
|
284
|
+
fields << {"name" => "enchilada_time_with_format", "type" => "string"}
|
285
|
+
schema_json = {
|
286
|
+
"type": "record",
|
287
|
+
"name": schema_name,
|
288
|
+
"fields": fields
|
289
|
+
}.to_json
|
290
|
+
registry = avro.instance_variable_get('@registry')
|
291
|
+
schema = Avro::Schema.parse(schema_json)
|
292
|
+
schema_id = registry.register("#{schema_name}-value", schema)
|
293
|
+
|
294
|
+
stored_schema = {
|
295
|
+
'schema_json' => schema_json,
|
296
|
+
'schema_id' => schema_id,
|
297
|
+
'field_types' => field_types,
|
298
|
+
'schema' => schema
|
299
|
+
}
|
300
|
+
|
301
|
+
set_schema_to_redis(schema_name, stored_schema)
|
302
|
+
end
|
303
|
+
|
304
|
+
def set_schema_to_redis schema_name, schema
|
305
|
+
$redis.set(schema_name, schema.to_json)
|
306
|
+
end
|
307
|
+
|
308
|
+
def get_schema_from_redis_by_name schema_name
|
309
|
+
stored_schema = $redis.get(schema_name)
|
310
|
+
end
|
311
|
+
|
312
|
+
class StateStore
|
313
|
+
def initialize(path)
|
314
|
+
require 'yaml'
|
315
|
+
|
316
|
+
@path = path
|
317
|
+
if File.exists?(@path)
|
318
|
+
@data = YAML.load_file(@path)
|
319
|
+
if @data == false || @data == []
|
320
|
+
# this happens if an users created an empty file accidentally
|
321
|
+
@data = {}
|
322
|
+
elsif !@data.is_a?(Hash)
|
323
|
+
raise "state_file on #{@path.inspect} is invalid"
|
183
324
|
end
|
325
|
+
else
|
326
|
+
@data = {}
|
184
327
|
end
|
185
|
-
self.new @setting
|
186
328
|
end
|
187
329
|
|
188
|
-
def
|
189
|
-
|
330
|
+
def last_records
|
331
|
+
@data['last_records'] ||= {}
|
332
|
+
end
|
333
|
+
|
334
|
+
def update!
|
335
|
+
File.open(@path, 'w') {|f|
|
336
|
+
f.write YAML.dump(@data)
|
337
|
+
}
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
class MemoryStateStore
|
342
|
+
def initialize
|
343
|
+
@data = {}
|
344
|
+
end
|
345
|
+
|
346
|
+
def last_records
|
347
|
+
@data['last_records'] ||= {}
|
348
|
+
end
|
349
|
+
|
350
|
+
def update!
|
190
351
|
end
|
191
352
|
end
|
192
353
|
end
|
354
|
+
|
193
355
|
end
|
194
356
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sql-enchanced
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fluentd
|