fluent-plugin-sql-enchanced 0.5.3 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/fluent/plugin/in_sql.rb +328 -166
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ddedbc4c3f5babcc5b2eaf9f3a41b730f2d370d1
4
- data.tar.gz: 7a15b6a37feac936116e4c06933c135a3166eb73
3
+ metadata.gz: 8b75bc87abe0cbb08f1daf17600a67674cb67c6f
4
+ data.tar.gz: f3b4b9627884ce8d777a0be24dbe592f9d965193
5
5
  SHA512:
6
- metadata.gz: bf80c88deee559b0b425613401f6c7ac517edf604f1a142473ca69824cd2b4d63cef08a10f6c3ef7c74430d3ccdd03bf0d21b92aa80c1e37593d33eb42adc00a
7
- data.tar.gz: 3f18833a4a466ffa103b75c347df05773387394f06a00523f58d3a5a89e7c357a0e26eb1be6b85b19408678629b703148489c411ca14793b239f97ae59b648d2
6
+ metadata.gz: ac85a5965b87fa1a280837decf528147f4e2156fc6a1966fc9d184cf75675ecb827ac9e3104cb4ad44e2c15432ce4c8e6449c4411f2f55b00e3e3a83a4b062c1
7
+ data.tar.gz: 0048de749270caaf8311900d194ce726376b6c52146e1e9c84b798c66d184254477c985906836dabe83be9ff31fa3e7754ddb01870a6d5188057777d1d276405
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.3
1
+ 0.5.5
@@ -1,194 +1,356 @@
1
- class Fluentd
2
- module Setting
3
- class InSql
4
- include ActiveModel::Model
5
- include Common
6
-
7
- KEYS = [
8
- :host,
9
- :port,
10
- :database,
11
- :adapter,
12
- :username,
13
- :password,
14
- :tag_prefix,
15
- :select_interval,
16
- :select_limit,
17
- :state_file,
18
- :table,
19
- :all_tables
20
- ].freeze
21
-
22
- attr_accessor(*KEYS)
23
- attr_accessor(:fields_descriptions)
24
-
25
- validates :host, presence: true
26
- validates :database, presence: true
27
- validates :adapter, presence: true
28
- validates :username, presence: true
29
- validates :password, presence: true
30
- validates :state_file, presence: true
31
-
32
- def self.initial_params
33
- {
34
- host: 'localhost',
35
- database: 'rdb_database',
36
- adapter: 'mysql2',
37
- username: 'myusername',
38
- password: 'mypassword',
39
- tag_prefix: 'my.rdb',
40
- select_interval: '60s',
41
- select_limit: '500',
42
- state_file: "/tmp/data_enchilada-sql-#{Fluentd.instance.id}-#{Time.now.to_i}.pos",
43
- table: [
44
- {
45
- table: 'rdb_table',
46
- tag: 'rdb_table_tag',
47
- update_column: 'updated_at',
48
- time_column: 'updated_at',
49
- primary_key: ''
50
- }
51
- ]
52
- }
1
+ #
2
+ # Fluent
3
+ #
4
+ # Copyright (C) 2013 FURUHASHI Sadayuki
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ module Fluent
19
+
20
+ require 'active_record'
21
+ require 'redis'
22
+ require 'pry'
23
+
24
+ SERVER_PREFIX = "ip#{Socket.ip_address_list.detect(&:ipv4_private?).try(:ip_address).delete('.')}"
25
+ # contents = IO.read("/etc/data_enchilada/data_enchilada.properties").strip
26
+
27
+ # KAFKA_SERVER = contents.lines.first.split("=")[1].strip
28
+ KAFKA_SERVER = '10.1.0.57'
29
+
30
+ class SQLInput < Input
31
+ Plugin.register_input('sql', self)
32
+
33
+ config_param :host, :string
34
+ config_param :port, :integer, :default => nil
35
+ config_param :adapter, :string
36
+ config_param :database, :string
37
+ config_param :username, :string, :default => nil
38
+ config_param :password, :string, :default => nil, :secret => true
39
+ config_param :socket, :string, :default => nil
40
+
41
+ config_param :state_file, :string, :default => nil
42
+ config_param :tag_prefix, :string, :default => nil
43
+ config_param :select_interval, :time, :default => 60
44
+ config_param :select_limit, :time, :default => 500
45
+
46
+ unless method_defined?(:log)
47
+ define_method(:log) { $log }
48
+ end
49
+
50
+ class TableElement
51
+ include Configurable
52
+
53
+ config_param :table, :string
54
+ config_param :tag, :string, :default => nil
55
+ config_param :update_column, :string, :default => nil
56
+ config_param :time_column, :string, :default => nil
57
+ config_param :primary_key, :string, :default => nil
58
+
59
+ def configure(conf)
60
+ super
53
61
  end
54
62
 
55
- def fields_descriptions
56
- {
57
- host: '* RDBMS host (required)',
58
- port: 'RDBMS port (optional)',
59
- database: '* RDBMS database name (required)',
60
- adapter: '* RDBMS driver name. You should install corresponding gem before start (mysql2 gem for mysql2 adapter, pg gem for postgresql adapter, etc. (required)',
61
- username: '* RDBMS login user name (required)',
62
- password: '* RDBMS login password (required)',
63
- tag_prefix: 'prefix of tags of events. actual tag will be this_tag_prefix.tables_tag (optional, but recommended)',
64
- select_interval: 'interval to run SQLs (optional)',
65
- select_limit: 'LIMIT of number of rows for each SQL (optional)',
66
- state_file: '* path to a file to store last rows (required)',
67
- all_tables: 'reads all tables instead of configuring each tables in &lt;table&gt; sections (optional)',
68
- table: {
69
- tag: 'tag name of events (optional; default value is table name)',
70
- table: '* RDBM table name',
71
- update_column: '* see above description',
72
- time_column: "(optional): if this option is set, this plugin uses this column's value as the the event's time. Otherwise it uses current time.",
73
- primary_key: "(optional): if you want to get data from the table which doesn't have primary key like PostgreSQL's View, set this parameter."
74
- }
75
- }
63
+ def init(tag_prefix, base_model, router)
64
+ @router = router
65
+ @tag = "#{tag_prefix}.#{@tag}" if tag_prefix
66
+
67
+ # creates a model for this table
68
+ table_name = @table
69
+ primary_key = @primary_key
70
+ @model = Class.new(base_model) do
71
+ self.table_name = table_name
72
+ self.inheritance_column = '_never_use_'
73
+ self.primary_key = primary_key if primary_key
74
+
75
+ #self.include_root_in_json = false
76
+
77
+ def read_attribute_for_serialization(n)
78
+ v = send(n)
79
+ if v.respond_to?(:to_msgpack)
80
+ v
81
+ else
82
+ v.to_s
83
+ end
84
+ end
85
+ end
86
+
87
+ # ActiveRecord requires model class to have a name.
88
+ class_name = table_name.singularize.camelize
89
+ base_model.const_set(class_name, @model)
90
+
91
+ # Sets model_name otherwise ActiveRecord causes errors
92
+ model_name = ActiveModel::Name.new(@model, nil, class_name)
93
+ @model.define_singleton_method(:model_name) { model_name }
94
+
95
+ # if update_column is not set, here uses primary key
96
+ unless @update_column
97
+ columns = Hash[@model.columns.map {|c| [c.name, c] }]
98
+ pk = columns[@model.primary_key]
99
+ unless pk
100
+ raise "Composite primary key is not supported. Set update_column parameter to <table> section."
101
+ end
102
+ @update_column = pk.name
103
+ end
76
104
  end
77
105
 
78
- def common_options
79
- [
80
- :host,
81
- :port,
82
- :database,
83
- :adapter,
84
- :username,
85
- :password,
86
- :tag_prefix
87
- ]
106
+ # emits next records and returns the last record of emitted records
107
+ def emit_next_records(last_record, limit)
108
+ relation = @model
109
+ if last_record && last_update_value = last_record[@update_column]
110
+ relation = relation.where("#{@update_column} > ?", last_update_value)
111
+ end
112
+ relation = relation.order("#{@update_column} ASC")
113
+ relation = relation.limit(limit) if limit > 0
114
+
115
+ now = Engine.now
116
+ entry_name = @model.table_name.singularize
117
+
118
+ me = MultiEventStream.new
119
+
120
+ relation.each do |obj|
121
+ record = obj.serializable_hash rescue nil
122
+ if record
123
+ if @time_column && tv = obj.read_attribute(@time_column)
124
+ if tv.is_a?(Time)
125
+ time = tv.to_i
126
+ else
127
+ time = Time.parse(tv.to_s).to_i rescue now
128
+ end
129
+ else
130
+ time = now
131
+ end
132
+ me.add(time, record)
133
+ last_record = record
134
+ end
135
+ end
136
+
137
+ last_record = last_record.dup if last_record # some plugin rewrites record :(
138
+ @router.emit_stream(@tag, me)
139
+
140
+ return last_record
88
141
  end
142
+ end
143
+
144
+ def configure(conf)
145
+ super
89
146
 
90
- def advanced_options
91
- [
92
- :select_interval,
93
- :select_limit,
94
- :state_file
95
- ]
147
+ unless @state_file
148
+ $log.warn "'state_file PATH' parameter is not set to a 'sql' source."
149
+ $log.warn "this parameter is highly recommended to save the last rows to resume tailing."
96
150
  end
97
151
 
98
- def table=(value)
99
- @tables = value.map do |t|
100
- t.map{|field, value| ([field.to_sym, value] if table_fields.include?(field.to_sym))}.to_h
101
- end
152
+ @tables = conf.elements.select {|e|
153
+ e.name == 'table'
154
+ }.map {|e|
155
+ te = TableElement.new
156
+ te.configure(e)
157
+ te
158
+ }
159
+
160
+ if config['all_tables']
161
+ @all_tables = true
102
162
  end
163
+ end
164
+
165
+ SKIP_TABLE_REGEXP = /\Aschema_migrations\Z/i
166
+
167
+ def start
168
+ @state_store = @state_file.nil? ? MemoryStateStore.new : StateStore.new(@state_file)
169
+
170
+ config = {
171
+ :adapter => @adapter,
172
+ :host => @host,
173
+ :port => @port,
174
+ :database => @database,
175
+ :username => @username,
176
+ :password => @password,
177
+ :socket => @socket,
178
+ }
103
179
 
104
- def table
105
- @tables
180
+ # creates subclass of ActiveRecord::Base so that it can have different
181
+ # database configuration from ActiveRecord::Base.
182
+ @base_model = Class.new(ActiveRecord::Base) do
183
+ # base model doesn't have corresponding phisical table
184
+ self.abstract_class = true
106
185
  end
107
186
 
108
- def table_fields
109
- [
110
- :table,
111
- :tag,
112
- :update_column,
113
- :time_column,
114
- :primary_key
115
- ]
187
+ # ActiveRecord requires the base_model to have a name. Here sets name
188
+ # of an anonymous class by assigning it to a constant. In Ruby, class has
189
+ # a name of a constant assigned first
190
+ SQLInput.const_set("BaseModel_#{rand(1 << 31)}", @base_model)
191
+
192
+ # Now base_model can have independent configuration from ActiveRecord::Base
193
+ @base_model.establish_connection(config)
194
+
195
+ if @all_tables
196
+ # get list of tables from the database
197
+ @tables = @base_model.connection.tables.map do |table_name|
198
+ if table_name.match(SKIP_TABLE_REGEXP)
199
+ # some tables such as "schema_migrations" should be ignored
200
+ nil
201
+ else
202
+ te = TableElement.new
203
+ te.configure({
204
+ 'table' => table_name,
205
+ 'tag' => table_name,
206
+ 'update_column' => nil,
207
+ })
208
+ te
209
+ end
210
+ end.compact
116
211
  end
117
212
 
118
- def to_config
119
- indent = " "
120
- config = "<source>\n"
121
- config << "#{indent}type #{plugin_type_name}\n"
122
- self.class.const_get(:KEYS).each do |key|
123
- next if key == :table
124
- next if key == :all_tables
125
- config << indent
126
- config << conf(key)
127
- config << "\n"
128
- end
129
- tables = send(:table).reject{|t| t.values.join('') == ''} rescue []
130
- if tables.present? && all_tables != '1'
131
- tables.each do |tab|
132
- config << "\n"
133
- config << indent
134
- config << "<table>\n"
135
- tab.each do |key, value|
136
- config << indent
137
- config << indent
138
- config << "#{key} #{value}"
139
- config << "\n"
140
- end
141
- config << indent
142
- config << "</table>\n"
213
+ init_redis
214
+
215
+ # ignore tables if TableElement#init failed
216
+ @tables.reject! do |te|
217
+ begin
218
+ schema_name = "#{SERVER_PREFIX}_#{@tag_prefix}_#{te.tag.presence || te.table}_#{Digest::MD5.new.hexdigest(@base_model.connection.columns(te.table).map{|c| c.name}.to_s)[0..5]}"
219
+ unless get_schema_from_redis_by_name schema_name
220
+ generate_schema te, schema_name
143
221
  end
144
- else
145
- config << indent
146
- config << 'all_tables'
147
- config << "\n"
222
+
223
+ te.init(@tag_prefix, @base_model, router)
224
+ log.info "Selecting '#{te.table}' table"
225
+ false
226
+ rescue => e
227
+ log.warn "Can't handle '#{te.table}' table. Ignoring.", :error => e.message, :error_class => e.class
228
+ log.warn_backtrace e.backtrace
229
+ true
148
230
  end
231
+ end
149
232
 
233
+ @stop_flag = false
234
+ @thread = Thread.new(&method(:thread_main))
235
+ end
150
236
 
151
- config << "</source>\n"
152
- config.gsub(/^[ ]*\n/m, "")
153
- end
154
-
155
- def self.create_from_config config
156
- @setting = {}
157
- @tables = []
158
- table = {}
159
- to_table = false
160
- str_params = config.split("\r\n").map{|str| str.squish}
161
- str_params.each do |str|
162
- case str
163
- when '<source>', '</source>'
164
- next
165
- when '<table>'
166
- to_table = true
167
- when '</table>'
168
- to_table = false
169
- @tables << table
170
- table = {}
171
- when 'all_tables'
172
- @setting[:all_tables] = true
173
- else
174
- param = str.split(' ')
175
- if to_table
176
- table[param.first] = param.second
177
- else
178
- @setting[param.first] = param.second unless param.first == 'type'
179
- end
237
+ def shutdown
238
+ @stop_flag = true
239
+ end
240
+
241
+ def thread_main
242
+ until @stop_flag
243
+ sleep @select_interval
244
+
245
+ @tables.each do |t|
246
+ begin
247
+ last_record = @state_store.last_records[t.table]
248
+ @state_store.last_records[t.table] = t.emit_next_records(last_record, @select_limit)
249
+ @state_store.update!
250
+ rescue => e
251
+ log.error "unexpected error", :error => e.message, :error_class => e.class
252
+ log.error_backtrace e.backtrace
180
253
  end
181
- if @tables.present?
182
- @setting[:table] = @tables
254
+ end
255
+ end
256
+ end
257
+
258
+ def init_redis
259
+ $redis = Redis.new
260
+ end
261
+
262
+ def generate_schema table, schema_name
263
+ require "avro_turf"
264
+ require 'avro_turf/messaging'
265
+ require "avro/builder"
266
+ avro = AvroTurf::Messaging.new(registry_url: "http://#{KAFKA_SERVER}:8081")
267
+ fields = @base_model.connection.columns(table.table).map do |col|
268
+ col_type = if col.sql_type.include? 'bigint'
269
+ 'long'
270
+ elsif ['int', 'bool'].any? {|needle| col.sql_type.include?(needle)}
271
+ 'int'
272
+ elsif ['float', 'double', 'real'].any? {|needle| col.sql_type.include?(needle)}
273
+ 'float'
274
+ else
275
+ 'string'
276
+ end
277
+ {
278
+ 'name' => col.name,
279
+ 'type' => ['null', col_type]
280
+ }
281
+ end
282
+ field_types = fields.map{|field| [field['name'], (field['type'] - ['null']).first]}.to_h
283
+ fields << {"name" => "enchilada_timestamp", "type" => "long"}
284
+ fields << {"name" => "enchilada_time_with_format", "type" => "string"}
285
+ schema_json = {
286
+ "type": "record",
287
+ "name": schema_name,
288
+ "fields": fields
289
+ }.to_json
290
+ registry = avro.instance_variable_get('@registry')
291
+ schema = Avro::Schema.parse(schema_json)
292
+ schema_id = registry.register("#{schema_name}-value", schema)
293
+
294
+ stored_schema = {
295
+ 'schema_json' => schema_json,
296
+ 'schema_id' => schema_id,
297
+ 'field_types' => field_types,
298
+ 'schema' => schema
299
+ }
300
+
301
+ set_schema_to_redis(schema_name, stored_schema)
302
+ end
303
+
304
+ def set_schema_to_redis schema_name, schema
305
+ $redis.set(schema_name, schema.to_json)
306
+ end
307
+
308
+ def get_schema_from_redis_by_name schema_name
309
+ stored_schema = $redis.get(schema_name)
310
+ end
311
+
312
+ class StateStore
313
+ def initialize(path)
314
+ require 'yaml'
315
+
316
+ @path = path
317
+ if File.exists?(@path)
318
+ @data = YAML.load_file(@path)
319
+ if @data == false || @data == []
320
+ # this happens if an users created an empty file accidentally
321
+ @data = {}
322
+ elsif !@data.is_a?(Hash)
323
+ raise "state_file on #{@path.inspect} is invalid"
183
324
  end
325
+ else
326
+ @data = {}
184
327
  end
185
- self.new @setting
186
328
  end
187
329
 
188
- def plugin_name
189
- "sql"
330
+ def last_records
331
+ @data['last_records'] ||= {}
332
+ end
333
+
334
+ def update!
335
+ File.open(@path, 'w') {|f|
336
+ f.write YAML.dump(@data)
337
+ }
338
+ end
339
+ end
340
+
341
+ class MemoryStateStore
342
+ def initialize
343
+ @data = {}
344
+ end
345
+
346
+ def last_records
347
+ @data['last_records'] ||= {}
348
+ end
349
+
350
+ def update!
190
351
  end
191
352
  end
192
353
  end
354
+
193
355
  end
194
356
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-sql-enchanced
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-31 00:00:00.000000000 Z
11
+ date: 2017-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd