fluent-plugin-sql 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +19 -14
- data/VERSION +1 -1
- data/fluent-plugin-sql.gemspec +1 -0
- data/lib/fluent/plugin/in_sql.rb +43 -3
- metadata +4 -3
data/README.md
CHANGED
@@ -2,18 +2,18 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
This sql input plugin reads records from a RDBMS periodically. Thus you can
|
5
|
+
This sql input plugin reads records from a RDBMS periodically. Thus you can copy tables to other storages through Fluentd.
|
6
6
|
|
7
7
|
## How does it work?
|
8
8
|
|
9
|
-
This plugin runs following SQL
|
9
|
+
This plugin runs following SQL periodically:
|
10
10
|
|
11
11
|
SELECT * FROM *table* WHERE *update\_column* > *last\_update\_column\_value* ORDER BY *update_column* ASC LIMIT 500
|
12
12
|
|
13
|
-
What you need to configure is *update\_column*. The column
|
14
|
-
If you omit to set
|
13
|
+
What you need to configure is *update\_column*. The column should be an incremental column (such as AUTO\_ INCREMENT primary key) so that this plugin reads newly INSERTed rows. Alternatively, you can use a column incremented every time when you update the row (such as `last_updated_at` column) so that this plugin reads the UPDATEd rows as well.
|
14
|
+
If you omit to set *update\_column* parameter, it uses primary key.
|
15
15
|
|
16
|
-
It stores last selected rows to a file named state\_file to not forget the last row when
|
16
|
+
It stores last selected rows to a file (named *state\_file*) to not forget the last row when Fluentd restarts.
|
17
17
|
|
18
18
|
## Configuration
|
19
19
|
|
@@ -26,25 +26,25 @@ It stores last selected rows to a file named state\_file to not forget the last
|
|
26
26
|
user myusername
|
27
27
|
password mypassword
|
28
28
|
|
29
|
-
tag_prefix my.rdb
|
29
|
+
tag_prefix my.rdb # optional, but recommended
|
30
30
|
|
31
|
-
select_interval 60s
|
32
|
-
select_limit 500
|
31
|
+
select_interval 60s # optional
|
32
|
+
select_limit 500 # optional
|
33
33
|
|
34
34
|
state_file /var/run/fluentd/sql_state
|
35
35
|
|
36
36
|
<table>
|
37
|
-
tag table1
|
38
37
|
table table1
|
38
|
+
tag table1 # optional
|
39
39
|
update_column update_col1
|
40
|
-
time_column time_col2
|
40
|
+
time_column time_col2 # optional
|
41
41
|
</table>
|
42
42
|
|
43
43
|
<table>
|
44
|
-
tag table2
|
45
44
|
table table2
|
45
|
+
tag table2 # optional
|
46
46
|
update_column updated_at
|
47
|
-
time_column updated_at
|
47
|
+
time_column updated_at # optional
|
48
48
|
</table>
|
49
49
|
|
50
50
|
# detects all tables instead of <table> sections
|
@@ -67,6 +67,11 @@ It stores last selected rows to a file named state\_file to not forget the last
|
|
67
67
|
|
68
68
|
* **tag** tag name of events (optional; default value is table name)
|
69
69
|
* **table** RDBM table name
|
70
|
-
* **update_column
|
71
|
-
* **time_column** (optional)
|
70
|
+
* **update_column**: see above description
|
71
|
+
* **time_column** (optional): if this option is set, this plugin uses this column's value as the the event's time. Otherwise it uses current time.
|
72
72
|
|
73
|
+
## Limitation
|
74
|
+
|
75
|
+
You should make sure target tables have index (and/or partitions) on the *update\_column*. Otherwise SELECT causes full table scan and serious performance problem.
|
76
|
+
|
77
|
+
You can't replicate DELETEd rows.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
data/fluent-plugin-sql.gemspec
CHANGED
@@ -15,6 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
16
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
17
|
gem.require_paths = ['lib']
|
18
|
+
gem.license = "Apache 2.0"
|
18
19
|
|
19
20
|
gem.add_dependency "fluentd", "~> 0.10.0"
|
20
21
|
gem.add_dependency 'activerecord', ['3.2.12']
|
data/lib/fluent/plugin/in_sql.rb
CHANGED
@@ -54,16 +54,32 @@ module Fluent
|
|
54
54
|
def init(tag_prefix, base_model)
|
55
55
|
@tag = "#{tag_prefix}.#{@tag}" if tag_prefix
|
56
56
|
|
57
|
+
# creates a model for this table
|
57
58
|
table_name = @table
|
58
59
|
@model = Class.new(base_model) do
|
59
60
|
self.table_name = table_name
|
60
61
|
self.inheritance_column = '_never_use_'
|
62
|
+
#self.include_root_in_json = false
|
63
|
+
|
64
|
+
def read_attribute_for_serialization(n)
|
65
|
+
v = send(n)
|
66
|
+
if v.respond_to?(:to_msgpack)
|
67
|
+
v
|
68
|
+
else
|
69
|
+
v.to_s
|
70
|
+
end
|
71
|
+
end
|
61
72
|
end
|
73
|
+
|
74
|
+
# ActiveRecord requires model class to have a name.
|
62
75
|
class_name = table_name.singularize.camelize
|
63
76
|
base_model.const_set(class_name, @model)
|
77
|
+
|
78
|
+
# Sets model_name otherwise ActiveRecord causes errors
|
64
79
|
model_name = ActiveModel::Name.new(@model, nil, class_name)
|
65
80
|
@model.define_singleton_method(:model_name) { model_name }
|
66
81
|
|
82
|
+
# if update_column is not set, here uses primary key
|
67
83
|
unless @update_column
|
68
84
|
columns = Hash[@model.columns.map {|c| [c.name, c] }]
|
69
85
|
pk = columns[@model.primary_key]
|
@@ -74,6 +90,7 @@ module Fluent
|
|
74
90
|
end
|
75
91
|
end
|
76
92
|
|
93
|
+
# emits next records and returns the last record of emitted records
|
77
94
|
def emit_next_records(last_record, limit)
|
78
95
|
relation = @model
|
79
96
|
if last_record && last_update_value = last_record[@update_column]
|
@@ -86,10 +103,14 @@ module Fluent
|
|
86
103
|
|
87
104
|
me = MultiEventStream.new
|
88
105
|
relation.each do |obj|
|
89
|
-
record = obj.
|
106
|
+
record = obj.serializable_hash rescue nil
|
90
107
|
if record
|
91
|
-
if tv =
|
92
|
-
|
108
|
+
if @time_column && tv = obj.read_attribute(@time_column)
|
109
|
+
if tv.is_a?(Time)
|
110
|
+
time = tv.to_i
|
111
|
+
else
|
112
|
+
time = Time.parse(tv.to_s).to_i rescue now
|
113
|
+
end
|
93
114
|
else
|
94
115
|
time = now
|
95
116
|
end
|
@@ -98,6 +119,7 @@ module Fluent
|
|
98
119
|
end
|
99
120
|
end
|
100
121
|
|
122
|
+
last_record = last_record.dup # some plugin rewrites record :(
|
101
123
|
Engine.emit_stream(@tag, me)
|
102
124
|
|
103
125
|
return last_record
|
@@ -134,15 +156,26 @@ module Fluent
|
|
134
156
|
:password => @password,
|
135
157
|
}
|
136
158
|
|
159
|
+
# creates subclass of ActiveRecord::Base so that it can have different
|
160
|
+
# database configuration from ActiveRecord::Base.
|
137
161
|
@base_model = Class.new(ActiveRecord::Base) do
|
162
|
+
# base model doesn't have corresponding phisical table
|
138
163
|
self.abstract_class = true
|
139
164
|
end
|
165
|
+
|
166
|
+
# ActiveRecord requires the base_model to have a name. Here sets name
|
167
|
+
# of an anonymous class by assigning it to a constant. In Ruby, class has
|
168
|
+
# a name of a constant assigned first
|
140
169
|
SQLInput.const_set("BaseModel_#{rand(1<<31)}", @base_model)
|
170
|
+
|
171
|
+
# Now base_model can have independent configuration from ActiveRecord::Base
|
141
172
|
@base_model.establish_connection(config)
|
142
173
|
|
143
174
|
if @all_tables
|
175
|
+
# get list of tables from the database
|
144
176
|
@tables = @base_model.connection.tables.map do |table_name|
|
145
177
|
if table_name.match(SKIP_TABLE_REGEXP)
|
178
|
+
# some tables such as "schema_migrations" should be ignored
|
146
179
|
nil
|
147
180
|
else
|
148
181
|
te = TableElement.new
|
@@ -156,6 +189,7 @@ module Fluent
|
|
156
189
|
end.compact
|
157
190
|
end
|
158
191
|
|
192
|
+
# ignore tables if TableElement#init failed
|
159
193
|
@tables.reject! do |te|
|
160
194
|
begin
|
161
195
|
te.init(@tag_prefix, @base_model)
|
@@ -198,6 +232,12 @@ module Fluent
|
|
198
232
|
@path = path
|
199
233
|
if File.exists?(@path)
|
200
234
|
@data = YAML.load_file(@path)
|
235
|
+
if @data == false || @data == []
|
236
|
+
# this happens if an users created an empty file accidentally
|
237
|
+
@data = {}
|
238
|
+
elsif !@data.is_a?(Hash)
|
239
|
+
raise "state_file on #{@path.inspect} is invalid"
|
240
|
+
end
|
201
241
|
else
|
202
242
|
@data = {}
|
203
243
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sql
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-09
|
12
|
+
date: 2013-12-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: fluentd
|
@@ -104,7 +104,8 @@ files:
|
|
104
104
|
- fluent-plugin-sql.gemspec
|
105
105
|
- lib/fluent/plugin/in_sql.rb
|
106
106
|
homepage: https://github.com/frsyuki/fluent-plugin-sql
|
107
|
-
licenses:
|
107
|
+
licenses:
|
108
|
+
- Apache 2.0
|
108
109
|
post_install_message:
|
109
110
|
rdoc_options: []
|
110
111
|
require_paths:
|