fluent-plugin-sql-enchanced 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/fluent/plugin/in_sql.rb +328 -166
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ddedbc4c3f5babcc5b2eaf9f3a41b730f2d370d1
4
- data.tar.gz: 7a15b6a37feac936116e4c06933c135a3166eb73
3
+ metadata.gz: 8b75bc87abe0cbb08f1daf17600a67674cb67c6f
4
+ data.tar.gz: f3b4b9627884ce8d777a0be24dbe592f9d965193
5
5
  SHA512:
6
- metadata.gz: bf80c88deee559b0b425613401f6c7ac517edf604f1a142473ca69824cd2b4d63cef08a10f6c3ef7c74430d3ccdd03bf0d21b92aa80c1e37593d33eb42adc00a
7
- data.tar.gz: 3f18833a4a466ffa103b75c347df05773387394f06a00523f58d3a5a89e7c357a0e26eb1be6b85b19408678629b703148489c411ca14793b239f97ae59b648d2
6
+ metadata.gz: ac85a5965b87fa1a280837decf528147f4e2156fc6a1966fc9d184cf75675ecb827ac9e3104cb4ad44e2c15432ce4c8e6449c4411f2f55b00e3e3a83a4b062c1
7
+ data.tar.gz: 0048de749270caaf8311900d194ce726376b6c52146e1e9c84b798c66d184254477c985906836dabe83be9ff31fa3e7754ddb01870a6d5188057777d1d276405
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.3
1
+ 0.5.5
@@ -1,194 +1,356 @@
1
- class Fluentd
2
- module Setting
3
- class InSql
4
- include ActiveModel::Model
5
- include Common
6
-
7
- KEYS = [
8
- :host,
9
- :port,
10
- :database,
11
- :adapter,
12
- :username,
13
- :password,
14
- :tag_prefix,
15
- :select_interval,
16
- :select_limit,
17
- :state_file,
18
- :table,
19
- :all_tables
20
- ].freeze
21
-
22
- attr_accessor(*KEYS)
23
- attr_accessor(:fields_descriptions)
24
-
25
- validates :host, presence: true
26
- validates :database, presence: true
27
- validates :adapter, presence: true
28
- validates :username, presence: true
29
- validates :password, presence: true
30
- validates :state_file, presence: true
31
-
32
- def self.initial_params
33
- {
34
- host: 'localhost',
35
- database: 'rdb_database',
36
- adapter: 'mysql2',
37
- username: 'myusername',
38
- password: 'mypassword',
39
- tag_prefix: 'my.rdb',
40
- select_interval: '60s',
41
- select_limit: '500',
42
- state_file: "/tmp/data_enchilada-sql-#{Fluentd.instance.id}-#{Time.now.to_i}.pos",
43
- table: [
44
- {
45
- table: 'rdb_table',
46
- tag: 'rdb_table_tag',
47
- update_column: 'updated_at',
48
- time_column: 'updated_at',
49
- primary_key: ''
50
- }
51
- ]
52
- }
1
+ #
2
+ # Fluent
3
+ #
4
+ # Copyright (C) 2013 FURUHASHI Sadayuki
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ module Fluent
19
+
20
+ require 'active_record'
21
+ require 'redis'
22
+ require 'pry'
23
+
24
+ SERVER_PREFIX = "ip#{Socket.ip_address_list.detect(&:ipv4_private?).try(:ip_address).delete('.')}"
25
+ # contents = IO.read("/etc/data_enchilada/data_enchilada.properties").strip
26
+
27
+ # KAFKA_SERVER = contents.lines.first.split("=")[1].strip
28
+ KAFKA_SERVER = '10.1.0.57'
29
+
30
+ class SQLInput < Input
31
+ Plugin.register_input('sql', self)
32
+
33
+ config_param :host, :string
34
+ config_param :port, :integer, :default => nil
35
+ config_param :adapter, :string
36
+ config_param :database, :string
37
+ config_param :username, :string, :default => nil
38
+ config_param :password, :string, :default => nil, :secret => true
39
+ config_param :socket, :string, :default => nil
40
+
41
+ config_param :state_file, :string, :default => nil
42
+ config_param :tag_prefix, :string, :default => nil
43
+ config_param :select_interval, :time, :default => 60
44
+ config_param :select_limit, :time, :default => 500
45
+
46
+ unless method_defined?(:log)
47
+ define_method(:log) { $log }
48
+ end
49
+
50
+ class TableElement
51
+ include Configurable
52
+
53
+ config_param :table, :string
54
+ config_param :tag, :string, :default => nil
55
+ config_param :update_column, :string, :default => nil
56
+ config_param :time_column, :string, :default => nil
57
+ config_param :primary_key, :string, :default => nil
58
+
59
+ def configure(conf)
60
+ super
53
61
  end
54
62
 
55
- def fields_descriptions
56
- {
57
- host: '* RDBMS host (required)',
58
- port: 'RDBMS port (optional)',
59
- database: '* RDBMS database name (required)',
60
- adapter: '* RDBMS driver name. You should install corresponding gem before start (mysql2 gem for mysql2 adapter, pg gem for postgresql adapter, etc. (required)',
61
- username: '* RDBMS login user name (required)',
62
- password: '* RDBMS login password (required)',
63
- tag_prefix: 'prefix of tags of events. actual tag will be this_tag_prefix.tables_tag (optional, but recommended)',
64
- select_interval: 'interval to run SQLs (optional)',
65
- select_limit: 'LIMIT of number of rows for each SQL (optional)',
66
- state_file: '* path to a file to store last rows (required)',
67
- all_tables: 'reads all tables instead of configuring each tables in &lt;table&gt; sections (optional)',
68
- table: {
69
- tag: 'tag name of events (optional; default value is table name)',
70
- table: '* RDBM table name',
71
- update_column: '* see above description',
72
- time_column: "(optional): if this option is set, this plugin uses this column's value as the the event's time. Otherwise it uses current time.",
73
- primary_key: "(optional): if you want to get data from the table which doesn't have primary key like PostgreSQL's View, set this parameter."
74
- }
75
- }
63
+ def init(tag_prefix, base_model, router)
64
+ @router = router
65
+ @tag = "#{tag_prefix}.#{@tag}" if tag_prefix
66
+
67
+ # creates a model for this table
68
+ table_name = @table
69
+ primary_key = @primary_key
70
+ @model = Class.new(base_model) do
71
+ self.table_name = table_name
72
+ self.inheritance_column = '_never_use_'
73
+ self.primary_key = primary_key if primary_key
74
+
75
+ #self.include_root_in_json = false
76
+
77
+ def read_attribute_for_serialization(n)
78
+ v = send(n)
79
+ if v.respond_to?(:to_msgpack)
80
+ v
81
+ else
82
+ v.to_s
83
+ end
84
+ end
85
+ end
86
+
87
+ # ActiveRecord requires model class to have a name.
88
+ class_name = table_name.singularize.camelize
89
+ base_model.const_set(class_name, @model)
90
+
91
+ # Sets model_name otherwise ActiveRecord causes errors
92
+ model_name = ActiveModel::Name.new(@model, nil, class_name)
93
+ @model.define_singleton_method(:model_name) { model_name }
94
+
95
+ # if update_column is not set, here uses primary key
96
+ unless @update_column
97
+ columns = Hash[@model.columns.map {|c| [c.name, c] }]
98
+ pk = columns[@model.primary_key]
99
+ unless pk
100
+ raise "Composite primary key is not supported. Set update_column parameter to <table> section."
101
+ end
102
+ @update_column = pk.name
103
+ end
76
104
  end
77
105
 
78
- def common_options
79
- [
80
- :host,
81
- :port,
82
- :database,
83
- :adapter,
84
- :username,
85
- :password,
86
- :tag_prefix
87
- ]
106
+ # emits next records and returns the last record of emitted records
107
+ def emit_next_records(last_record, limit)
108
+ relation = @model
109
+ if last_record && last_update_value = last_record[@update_column]
110
+ relation = relation.where("#{@update_column} > ?", last_update_value)
111
+ end
112
+ relation = relation.order("#{@update_column} ASC")
113
+ relation = relation.limit(limit) if limit > 0
114
+
115
+ now = Engine.now
116
+ entry_name = @model.table_name.singularize
117
+
118
+ me = MultiEventStream.new
119
+
120
+ relation.each do |obj|
121
+ record = obj.serializable_hash rescue nil
122
+ if record
123
+ if @time_column && tv = obj.read_attribute(@time_column)
124
+ if tv.is_a?(Time)
125
+ time = tv.to_i
126
+ else
127
+ time = Time.parse(tv.to_s).to_i rescue now
128
+ end
129
+ else
130
+ time = now
131
+ end
132
+ me.add(time, record)
133
+ last_record = record
134
+ end
135
+ end
136
+
137
+ last_record = last_record.dup if last_record # some plugin rewrites record :(
138
+ @router.emit_stream(@tag, me)
139
+
140
+ return last_record
88
141
  end
142
+ end
143
+
144
+ def configure(conf)
145
+ super
89
146
 
90
- def advanced_options
91
- [
92
- :select_interval,
93
- :select_limit,
94
- :state_file
95
- ]
147
+ unless @state_file
148
+ $log.warn "'state_file PATH' parameter is not set to a 'sql' source."
149
+ $log.warn "this parameter is highly recommended to save the last rows to resume tailing."
96
150
  end
97
151
 
98
- def table=(value)
99
- @tables = value.map do |t|
100
- t.map{|field, value| ([field.to_sym, value] if table_fields.include?(field.to_sym))}.to_h
101
- end
152
+ @tables = conf.elements.select {|e|
153
+ e.name == 'table'
154
+ }.map {|e|
155
+ te = TableElement.new
156
+ te.configure(e)
157
+ te
158
+ }
159
+
160
+ if config['all_tables']
161
+ @all_tables = true
102
162
  end
163
+ end
164
+
165
+ SKIP_TABLE_REGEXP = /\Aschema_migrations\Z/i
166
+
167
+ def start
168
+ @state_store = @state_file.nil? ? MemoryStateStore.new : StateStore.new(@state_file)
169
+
170
+ config = {
171
+ :adapter => @adapter,
172
+ :host => @host,
173
+ :port => @port,
174
+ :database => @database,
175
+ :username => @username,
176
+ :password => @password,
177
+ :socket => @socket,
178
+ }
103
179
 
104
- def table
105
- @tables
180
+ # creates subclass of ActiveRecord::Base so that it can have different
181
+ # database configuration from ActiveRecord::Base.
182
+ @base_model = Class.new(ActiveRecord::Base) do
183
+ # base model doesn't have corresponding phisical table
184
+ self.abstract_class = true
106
185
  end
107
186
 
108
- def table_fields
109
- [
110
- :table,
111
- :tag,
112
- :update_column,
113
- :time_column,
114
- :primary_key
115
- ]
187
+ # ActiveRecord requires the base_model to have a name. Here sets name
188
+ # of an anonymous class by assigning it to a constant. In Ruby, class has
189
+ # a name of a constant assigned first
190
+ SQLInput.const_set("BaseModel_#{rand(1 << 31)}", @base_model)
191
+
192
+ # Now base_model can have independent configuration from ActiveRecord::Base
193
+ @base_model.establish_connection(config)
194
+
195
+ if @all_tables
196
+ # get list of tables from the database
197
+ @tables = @base_model.connection.tables.map do |table_name|
198
+ if table_name.match(SKIP_TABLE_REGEXP)
199
+ # some tables such as "schema_migrations" should be ignored
200
+ nil
201
+ else
202
+ te = TableElement.new
203
+ te.configure({
204
+ 'table' => table_name,
205
+ 'tag' => table_name,
206
+ 'update_column' => nil,
207
+ })
208
+ te
209
+ end
210
+ end.compact
116
211
  end
117
212
 
118
- def to_config
119
- indent = " "
120
- config = "<source>\n"
121
- config << "#{indent}type #{plugin_type_name}\n"
122
- self.class.const_get(:KEYS).each do |key|
123
- next if key == :table
124
- next if key == :all_tables
125
- config << indent
126
- config << conf(key)
127
- config << "\n"
128
- end
129
- tables = send(:table).reject{|t| t.values.join('') == ''} rescue []
130
- if tables.present? && all_tables != '1'
131
- tables.each do |tab|
132
- config << "\n"
133
- config << indent
134
- config << "<table>\n"
135
- tab.each do |key, value|
136
- config << indent
137
- config << indent
138
- config << "#{key} #{value}"
139
- config << "\n"
140
- end
141
- config << indent
142
- config << "</table>\n"
213
+ init_redis
214
+
215
+ # ignore tables if TableElement#init failed
216
+ @tables.reject! do |te|
217
+ begin
218
+ schema_name = "#{SERVER_PREFIX}_#{@tag_prefix}_#{te.tag.presence || te.table}_#{Digest::MD5.new.hexdigest(@base_model.connection.columns(te.table).map{|c| c.name}.to_s)[0..5]}"
219
+ unless get_schema_from_redis_by_name schema_name
220
+ generate_schema te, schema_name
143
221
  end
144
- else
145
- config << indent
146
- config << 'all_tables'
147
- config << "\n"
222
+
223
+ te.init(@tag_prefix, @base_model, router)
224
+ log.info "Selecting '#{te.table}' table"
225
+ false
226
+ rescue => e
227
+ log.warn "Can't handle '#{te.table}' table. Ignoring.", :error => e.message, :error_class => e.class
228
+ log.warn_backtrace e.backtrace
229
+ true
148
230
  end
231
+ end
149
232
 
233
+ @stop_flag = false
234
+ @thread = Thread.new(&method(:thread_main))
235
+ end
150
236
 
151
- config << "</source>\n"
152
- config.gsub(/^[ ]*\n/m, "")
153
- end
154
-
155
- def self.create_from_config config
156
- @setting = {}
157
- @tables = []
158
- table = {}
159
- to_table = false
160
- str_params = config.split("\r\n").map{|str| str.squish}
161
- str_params.each do |str|
162
- case str
163
- when '<source>', '</source>'
164
- next
165
- when '<table>'
166
- to_table = true
167
- when '</table>'
168
- to_table = false
169
- @tables << table
170
- table = {}
171
- when 'all_tables'
172
- @setting[:all_tables] = true
173
- else
174
- param = str.split(' ')
175
- if to_table
176
- table[param.first] = param.second
177
- else
178
- @setting[param.first] = param.second unless param.first == 'type'
179
- end
237
+ def shutdown
238
+ @stop_flag = true
239
+ end
240
+
241
+ def thread_main
242
+ until @stop_flag
243
+ sleep @select_interval
244
+
245
+ @tables.each do |t|
246
+ begin
247
+ last_record = @state_store.last_records[t.table]
248
+ @state_store.last_records[t.table] = t.emit_next_records(last_record, @select_limit)
249
+ @state_store.update!
250
+ rescue => e
251
+ log.error "unexpected error", :error => e.message, :error_class => e.class
252
+ log.error_backtrace e.backtrace
180
253
  end
181
- if @tables.present?
182
- @setting[:table] = @tables
254
+ end
255
+ end
256
+ end
257
+
258
+ def init_redis
259
+ $redis = Redis.new
260
+ end
261
+
262
+ def generate_schema table, schema_name
263
+ require "avro_turf"
264
+ require 'avro_turf/messaging'
265
+ require "avro/builder"
266
+ avro = AvroTurf::Messaging.new(registry_url: "http://#{KAFKA_SERVER}:8081")
267
+ fields = @base_model.connection.columns(table.table).map do |col|
268
+ col_type = if col.sql_type.include? 'bigint'
269
+ 'long'
270
+ elsif ['int', 'bool'].any? {|needle| col.sql_type.include?(needle)}
271
+ 'int'
272
+ elsif ['float', 'double', 'real'].any? {|needle| col.sql_type.include?(needle)}
273
+ 'float'
274
+ else
275
+ 'string'
276
+ end
277
+ {
278
+ 'name' => col.name,
279
+ 'type' => ['null', col_type]
280
+ }
281
+ end
282
+ field_types = fields.map{|field| [field['name'], (field['type'] - ['null']).first]}.to_h
283
+ fields << {"name" => "enchilada_timestamp", "type" => "long"}
284
+ fields << {"name" => "enchilada_time_with_format", "type" => "string"}
285
+ schema_json = {
286
+ "type": "record",
287
+ "name": schema_name,
288
+ "fields": fields
289
+ }.to_json
290
+ registry = avro.instance_variable_get('@registry')
291
+ schema = Avro::Schema.parse(schema_json)
292
+ schema_id = registry.register("#{schema_name}-value", schema)
293
+
294
+ stored_schema = {
295
+ 'schema_json' => schema_json,
296
+ 'schema_id' => schema_id,
297
+ 'field_types' => field_types,
298
+ 'schema' => schema
299
+ }
300
+
301
+ set_schema_to_redis(schema_name, stored_schema)
302
+ end
303
+
304
+ def set_schema_to_redis schema_name, schema
305
+ $redis.set(schema_name, schema.to_json)
306
+ end
307
+
308
+ def get_schema_from_redis_by_name schema_name
309
+ stored_schema = $redis.get(schema_name)
310
+ end
311
+
312
+ class StateStore
313
+ def initialize(path)
314
+ require 'yaml'
315
+
316
+ @path = path
317
+ if File.exists?(@path)
318
+ @data = YAML.load_file(@path)
319
+ if @data == false || @data == []
320
+ # this happens if an users created an empty file accidentally
321
+ @data = {}
322
+ elsif !@data.is_a?(Hash)
323
+ raise "state_file on #{@path.inspect} is invalid"
183
324
  end
325
+ else
326
+ @data = {}
184
327
  end
185
- self.new @setting
186
328
  end
187
329
 
188
- def plugin_name
189
- "sql"
330
+ def last_records
331
+ @data['last_records'] ||= {}
332
+ end
333
+
334
+ def update!
335
+ File.open(@path, 'w') {|f|
336
+ f.write YAML.dump(@data)
337
+ }
338
+ end
339
+ end
340
+
341
+ class MemoryStateStore
342
+ def initialize
343
+ @data = {}
344
+ end
345
+
346
+ def last_records
347
+ @data['last_records'] ||= {}
348
+ end
349
+
350
+ def update!
190
351
  end
191
352
  end
192
353
  end
354
+
193
355
  end
194
356
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-sql-enchanced
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-31 00:00:00.000000000 Z
11
+ date: 2017-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd