fluent-plugin-mysql-replicator 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +74 -3
- data/fluent-plugin-mysql-replicator.gemspec +2 -2
- data/lib/fluent/plugin/in_mysql_replicator.rb +7 -4
- data/lib/fluent/plugin/in_mysql_replicator_multi.rb +201 -0
- data/setup_mysql_replicator_multi.sql +28 -0
- data/test/helper.rb +1 -0
- data/test/plugin/test_in_mysql_replicator.rb +2 -0
- data/test/plugin/test_in_mysql_replicator_multi.rb +26 -0
- metadata +8 -4
data/README.md
CHANGED
@@ -14,7 +14,15 @@ gem install fluent-plugin-mysql-replicator
|
|
14
14
|
/usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-mysql-replicator
|
15
15
|
`````
|
16
16
|
|
17
|
-
## Tutorial
|
17
|
+
## Tutorial for Quickstart
|
18
|
+
|
19
|
+
It is useful for these purpose.
|
20
|
+
|
21
|
+
* try it on this plugin.
|
22
|
+
* replicate small record under a millons table.
|
23
|
+
|
24
|
+
**Note:**
|
25
|
+
On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
|
18
26
|
|
19
27
|
#### configuration
|
20
28
|
|
@@ -57,9 +65,72 @@ $ tail -f /var/log/td-agent/td-agent.log
|
|
57
65
|
2013-11-25 18:22:45 +0900 replicator.delete: {"id":"1"}
|
58
66
|
`````
|
59
67
|
|
60
|
-
##
|
68
|
+
## Tutorial for Production
|
69
|
+
|
70
|
+
It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
|
71
|
+
This architecture is storing hash table in mysql management table instead of ruby internal memory.
|
72
|
+
|
73
|
+
**Note:**
|
74
|
+
On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
|
75
|
+
|
76
|
+
#### prepare
|
77
|
+
|
78
|
+
* create database and tables.
|
79
|
+
* add replicator configuration.
|
80
|
+
|
81
|
+
```
|
82
|
+
$ cat setup_mysql_replicator_multi.sql
|
83
|
+
CREATE DATABASE replicator_manager;
|
84
|
+
USE replicator_manager;
|
85
|
+
|
86
|
+
CREATE TABLE `hash_tables` (
|
87
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
88
|
+
`setting_name` varchar(255) NOT NULL,
|
89
|
+
`setting_query_pk` int(11) NOT NULL,
|
90
|
+
`setting_query_hash` varchar(255) NOT NULL,
|
91
|
+
PRIMARY KEY (`id`),
|
92
|
+
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
93
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
94
|
+
|
95
|
+
CREATE TABLE `settings` (
|
96
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
97
|
+
`name` varchar(255) NOT NULL,
|
98
|
+
`host` varchar(255) NOT NULL DEFAULT 'localhost',
|
99
|
+
`port` int(11) NOT NULL DEFAULT '3306',
|
100
|
+
`username` varchar(255) NOT NULL,
|
101
|
+
`password` varchar(255) NOT NULL,
|
102
|
+
`database` varchar(255) NOT NULL,
|
103
|
+
`query` TEXT NOT NULL,
|
104
|
+
`interval` int(11) NOT NULL,
|
105
|
+
`tag` varchar(255) NOT NULL,
|
106
|
+
`primary_key` varchar(11) DEFAULT 'id',
|
107
|
+
`enable_delete` int(11) DEFAULT '1',
|
108
|
+
PRIMARY KEY (`id`),
|
109
|
+
UNIQUE KEY `name` (`name`)
|
110
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
111
|
+
```
|
112
|
+
|
113
|
+
```
|
114
|
+
$ mysql
|
115
|
+
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
116
|
+
mysql> insert into source ...snip...;
|
117
|
+
```
|
61
118
|
|
62
|
-
|
119
|
+
#### configuration
|
120
|
+
|
121
|
+
`````
|
122
|
+
<source>
|
123
|
+
type mysql_replicator_multi
|
124
|
+
manager_host localhost
|
125
|
+
manager_username your_mysql_user
|
126
|
+
manager_password your_mysql_password
|
127
|
+
manager_database replicator_manager
|
128
|
+
</source>
|
129
|
+
|
130
|
+
<match replicator.*>
|
131
|
+
type stdout
|
132
|
+
</match>
|
133
|
+
`````
|
63
134
|
|
64
135
|
## TODO
|
65
136
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |s|
|
3
3
|
s.name = "fluent-plugin-mysql-replicator"
|
4
|
-
s.version = "0.0
|
4
|
+
s.version = "0.1.0"
|
5
5
|
s.authors = ["Kentaro Yoshida"]
|
6
6
|
s.email = ["y.ken.studio@gmail.com"]
|
7
7
|
s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
|
8
|
-
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL
|
8
|
+
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL databases. It will support multiple table replication to another RDB/noSQL like Elasticsearch.}
|
9
9
|
|
10
10
|
s.files = `git ls-files`.split("\n")
|
11
11
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -18,6 +18,7 @@ module Fluent
|
|
18
18
|
config_param :tag, :string
|
19
19
|
config_param :query, :string
|
20
20
|
config_param :primary_key, :string, :default => 'id'
|
21
|
+
config_param :enable_delete, :bool, :default => 'yes'
|
21
22
|
|
22
23
|
def configure(conf)
|
23
24
|
super
|
@@ -59,10 +60,12 @@ module Fluent
|
|
59
60
|
table_hash[row[@primary_key]] = current_hash
|
60
61
|
end
|
61
62
|
ids = current_ids
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
unless @enable_delete
|
64
|
+
deleted_ids = previous_ids - current_ids
|
65
|
+
if deleted_ids.count > 0
|
66
|
+
hash_delete_by_list(table_hash, deleted_ids)
|
67
|
+
deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
|
68
|
+
end
|
66
69
|
end
|
67
70
|
sleep @interval
|
68
71
|
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Fluent
|
2
|
+
class MysqlReplicatorMultiInput < Fluent::Input
|
3
|
+
Plugin.register_input('mysql_replicator_multi', self)
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
require 'mysql2'
|
7
|
+
require 'digest/sha1'
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
config_param :manager_host, :string, :default => 'localhost'
|
12
|
+
config_param :manager_port, :integer, :default => 3306
|
13
|
+
config_param :manager_username, :string, :default => nil
|
14
|
+
config_param :manager_password, :string, :default => ''
|
15
|
+
config_param :manager_database, :string, :default => 'replicator_manager'
|
16
|
+
|
17
|
+
def configure(conf)
|
18
|
+
super
|
19
|
+
@reconnect_interval = Config.time_value('10sec')
|
20
|
+
end
|
21
|
+
|
22
|
+
def start
|
23
|
+
begin
|
24
|
+
@threads = []
|
25
|
+
@mutex = Mutex.new
|
26
|
+
get_settings.each do |config|
|
27
|
+
@threads << Thread.new {
|
28
|
+
poll(config)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
$log.error "mysql_replicator_multi: stop working due to empty configuration" if @threads.empty?
|
32
|
+
rescue StandardError => e
|
33
|
+
$log.error "error: #{e.message}"
|
34
|
+
$log.error e.backtrace.join("\n")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def shutdown
|
39
|
+
@threads.each do |thread|
|
40
|
+
Thread.kill(thread)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_settings
|
45
|
+
manager_db = get_manager_connection
|
46
|
+
settings = []
|
47
|
+
query = "SELECT * FROM settings"
|
48
|
+
manager_db.query(query).each do |row|
|
49
|
+
settings << row
|
50
|
+
end
|
51
|
+
return settings
|
52
|
+
end
|
53
|
+
|
54
|
+
def poll(config)
|
55
|
+
begin
|
56
|
+
@manager_db = get_manager_connection
|
57
|
+
masked_config = config.map {|k,v| (k == 'password') ? v.to_s.gsub(/./, '*') : v}
|
58
|
+
@mutex.synchronize {
|
59
|
+
$log.info "mysql_replicator_multi: polling start. :config=>#{masked_config}"
|
60
|
+
}
|
61
|
+
primary_key = config['primary_key']
|
62
|
+
previous_id = current_id = 0
|
63
|
+
loop do
|
64
|
+
db = get_origin_connection(config)
|
65
|
+
db.query(config['query']).each do |row|
|
66
|
+
@mutex.lock
|
67
|
+
current_id = row[primary_key]
|
68
|
+
detect_insert_update(config, row)
|
69
|
+
detect_delete(config, current_id, previous_id)
|
70
|
+
previous_id = current_id
|
71
|
+
@mutex.unlock
|
72
|
+
end
|
73
|
+
db.close
|
74
|
+
sleep config['interval']
|
75
|
+
end
|
76
|
+
rescue StandardError => e
|
77
|
+
$log.error "error: #{e.message}"
|
78
|
+
$log.error e.backtrace.join("\n")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def detect_insert_update(config, row)
|
83
|
+
primary_key = config['primary_key']
|
84
|
+
current_id = row[primary_key]
|
85
|
+
stored_hash = get_stored_hash(config['name'], current_id)
|
86
|
+
current_hash = Digest::SHA1.hexdigest(row.flatten.join)
|
87
|
+
|
88
|
+
event = nil
|
89
|
+
if stored_hash.empty?
|
90
|
+
event = :insert
|
91
|
+
elsif stored_hash != current_hash
|
92
|
+
event = :update
|
93
|
+
end
|
94
|
+
unless event.nil?
|
95
|
+
emit_record("#{config['tag']}.#{event.to_s}", row)
|
96
|
+
update_hashtable({:event => event, :ids => current_id, :setting_name => config['name'], :hash => current_hash})
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def get_stored_hash(setting_name, id)
|
101
|
+
query = "SELECT setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
|
102
|
+
@manager_db.query(query).each do |row|
|
103
|
+
return row['setting_query_hash']
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def detect_delete(config, current_id, previous_id)
|
108
|
+
return unless config['enable_delete'] == 1
|
109
|
+
deleted_ids = collect_gap_ids(config['name'], current_id, previous_id)
|
110
|
+
unless deleted_ids.empty?
|
111
|
+
event = :delete
|
112
|
+
deleted_ids.each do |id|
|
113
|
+
emit_record("#{config['tag']}.#{event.to_s}", {config['primary_key'] => id})
|
114
|
+
end
|
115
|
+
update_hashtable({:event => event, :ids => deleted_ids, :setting_name => config['name']})
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def collect_gap_ids(setting_name, current_id, previous_id)
|
120
|
+
if (current_id - previous_id) > 1
|
121
|
+
query = "SELECT setting_query_pk FROM hash_tables
|
122
|
+
WHERE setting_name = '#{setting_name}'
|
123
|
+
AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
|
124
|
+
elsif previous_id > current_id
|
125
|
+
query = "SELECT setting_query_pk FROM hash_tables
|
126
|
+
WHERE setting_name = '#{setting_name}'
|
127
|
+
AND setting_query_pk > #{previous_id.to_i}"
|
128
|
+
elsif previous_id == current_id
|
129
|
+
query = "SELECT setting_query_pk FROM hash_tables
|
130
|
+
WHERE setting_name = '#{setting_name}'
|
131
|
+
AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
|
132
|
+
end
|
133
|
+
ids = Array.new
|
134
|
+
unless query.nil?
|
135
|
+
@manager_db.query(query).each do |row|
|
136
|
+
ids << row['setting_query_pk']
|
137
|
+
end
|
138
|
+
end
|
139
|
+
return ids
|
140
|
+
end
|
141
|
+
|
142
|
+
def update_hashtable(opts)
|
143
|
+
ids = opts[:ids].is_a?(Integer) ? [opts[:ids]] : opts[:ids]
|
144
|
+
ids.each do |id|
|
145
|
+
case opts[:event]
|
146
|
+
when :insert
|
147
|
+
query = "insert into hash_tables (setting_name,setting_query_pk,setting_query_hash) values('#{opts[:setting_name]}','#{id}','#{opts[:hash]}')"
|
148
|
+
when :update
|
149
|
+
query = "update hash_tables set setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
|
150
|
+
when :delete
|
151
|
+
query = "delete from hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
|
152
|
+
end
|
153
|
+
@manager_db.query(query) unless query.nil?
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def emit_record(tag, record)
|
158
|
+
Engine.emit(tag, Engine.now, record)
|
159
|
+
end
|
160
|
+
|
161
|
+
def get_manager_connection
|
162
|
+
begin
|
163
|
+
return Mysql2::Client.new(
|
164
|
+
:host => @manager_host,
|
165
|
+
:port => @manager_port,
|
166
|
+
:username => @manager_username,
|
167
|
+
:password => @manager_password,
|
168
|
+
:database => @manager_database,
|
169
|
+
:encoding => 'utf8',
|
170
|
+
:reconnect => true,
|
171
|
+
:stream => false,
|
172
|
+
:cache_rows => false
|
173
|
+
)
|
174
|
+
rescue Exception => e
|
175
|
+
$log.warn "mysql_replicator_multi: #{e}"
|
176
|
+
sleep @reconnect_interval
|
177
|
+
retry
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def get_origin_connection(config)
|
182
|
+
begin
|
183
|
+
return Mysql2::Client.new(
|
184
|
+
:host => config['host'],
|
185
|
+
:port => config['manager_port'],
|
186
|
+
:username => config['username'],
|
187
|
+
:password => config['password'],
|
188
|
+
:database => config['database'],
|
189
|
+
:encoding => 'utf8',
|
190
|
+
:reconnect => true,
|
191
|
+
:stream => true,
|
192
|
+
:cache_rows => false
|
193
|
+
)
|
194
|
+
rescue Exception => e
|
195
|
+
$log.warn "mysql_replicator_multi: #{e}"
|
196
|
+
sleep @reconnect_interval
|
197
|
+
retry
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
CREATE DATABASE replicator_manager;
|
2
|
+
USE replicator_manager;
|
3
|
+
|
4
|
+
CREATE TABLE `hash_tables` (
|
5
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
6
|
+
`setting_name` varchar(255) NOT NULL,
|
7
|
+
`setting_query_pk` int(11) NOT NULL,
|
8
|
+
`setting_query_hash` varchar(255) NOT NULL,
|
9
|
+
PRIMARY KEY (`id`),
|
10
|
+
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
11
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
12
|
+
|
13
|
+
CREATE TABLE `settings` (
|
14
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
15
|
+
`name` varchar(255) NOT NULL,
|
16
|
+
`host` varchar(255) NOT NULL DEFAULT 'localhost',
|
17
|
+
`port` int(11) NOT NULL DEFAULT '3306',
|
18
|
+
`username` varchar(255) NOT NULL,
|
19
|
+
`password` varchar(255) NOT NULL,
|
20
|
+
`database` varchar(255) NOT NULL,
|
21
|
+
`query` TEXT NOT NULL,
|
22
|
+
`interval` int(11) NOT NULL,
|
23
|
+
`tag` varchar(255) NOT NULL,
|
24
|
+
`primary_key` varchar(11) DEFAULT 'id',
|
25
|
+
`enable_delete` int(11) DEFAULT '1',
|
26
|
+
PRIMARY KEY (`id`),
|
27
|
+
UNIQUE KEY `name` (`name`)
|
28
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
data/test/helper.rb
CHANGED
@@ -28,11 +28,13 @@ class MysqlReplicatorInputTest < Test::Unit::TestCase
|
|
28
28
|
interval 30
|
29
29
|
tag input.mysql
|
30
30
|
query SELECT id, text from search_text
|
31
|
+
enable_delete no
|
31
32
|
]
|
32
33
|
d.instance.inspect
|
33
34
|
assert_equal 'localhost', d.instance.host
|
34
35
|
assert_equal 3306, d.instance.port
|
35
36
|
assert_equal 30, d.instance.interval
|
36
37
|
assert_equal 'input.mysql', d.instance.tag
|
38
|
+
assert_equal false, d.instance.enable_delete
|
37
39
|
end
|
38
40
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class MysqlReplicatorMultiInputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
manager_host localhost
|
10
|
+
manager_port 3306
|
11
|
+
manager_username foo
|
12
|
+
manager_password bar
|
13
|
+
]
|
14
|
+
|
15
|
+
def create_driver(conf=CONFIG,tag='test')
|
16
|
+
Fluent::Test::OutputTestDriver.new(Fluent::MysqlReplicatorMultiInput, tag).configure(conf)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_configure
|
20
|
+
d = create_driver(CONFIG)
|
21
|
+
d.instance.inspect
|
22
|
+
assert_equal 'localhost', d.instance.manager_host
|
23
|
+
assert_equal 3306, d.instance.manager_port
|
24
|
+
assert_equal 'replicator_manager', d.instance.manager_database
|
25
|
+
end
|
26
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-mysql-replicator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-12-
|
12
|
+
date: 2013-12-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -74,8 +74,11 @@ files:
|
|
74
74
|
- Rakefile
|
75
75
|
- fluent-plugin-mysql-replicator.gemspec
|
76
76
|
- lib/fluent/plugin/in_mysql_replicator.rb
|
77
|
+
- lib/fluent/plugin/in_mysql_replicator_multi.rb
|
78
|
+
- setup_mysql_replicator_multi.sql
|
77
79
|
- test/helper.rb
|
78
80
|
- test/plugin/test_in_mysql_replicator.rb
|
81
|
+
- test/plugin/test_in_mysql_replicator_multi.rb
|
79
82
|
homepage: https://github.com/y-ken/fluent-plugin-mysql-replicator
|
80
83
|
licenses: []
|
81
84
|
post_install_message:
|
@@ -99,8 +102,9 @@ rubyforge_project:
|
|
99
102
|
rubygems_version: 1.8.23
|
100
103
|
signing_key:
|
101
104
|
specification_version: 3
|
102
|
-
summary: Fluentd input plugin to track insert/update/delete event from MySQL
|
103
|
-
|
105
|
+
summary: Fluentd input plugin to track insert/update/delete event from MySQL databases.
|
106
|
+
It will support multiple table replication to another RDB/noSQL like Elasticsearch.
|
104
107
|
test_files:
|
105
108
|
- test/helper.rb
|
106
109
|
- test/plugin/test_in_mysql_replicator.rb
|
110
|
+
- test/plugin/test_in_mysql_replicator_multi.rb
|