fluent-plugin-mysql-replicator 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +74 -3
- data/fluent-plugin-mysql-replicator.gemspec +2 -2
- data/lib/fluent/plugin/in_mysql_replicator.rb +7 -4
- data/lib/fluent/plugin/in_mysql_replicator_multi.rb +201 -0
- data/setup_mysql_replicator_multi.sql +28 -0
- data/test/helper.rb +1 -0
- data/test/plugin/test_in_mysql_replicator.rb +2 -0
- data/test/plugin/test_in_mysql_replicator_multi.rb +26 -0
- metadata +8 -4
data/README.md
CHANGED
@@ -14,7 +14,15 @@ gem install fluent-plugin-mysql-replicator
|
|
14
14
|
/usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-mysql-replicator
|
15
15
|
`````
|
16
16
|
|
17
|
-
## Tutorial
|
17
|
+
## Tutorial for Quickstart
|
18
|
+
|
19
|
+
It is useful for these purpose.
|
20
|
+
|
21
|
+
* try it on this plugin.
|
22
|
+
* replicate small record under a millons table.
|
23
|
+
|
24
|
+
**Note:**
|
25
|
+
On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
|
18
26
|
|
19
27
|
#### configuration
|
20
28
|
|
@@ -57,9 +65,72 @@ $ tail -f /var/log/td-agent/td-agent.log
|
|
57
65
|
2013-11-25 18:22:45 +0900 replicator.delete: {"id":"1"}
|
58
66
|
`````
|
59
67
|
|
60
|
-
##
|
68
|
+
## Tutorial for Production
|
69
|
+
|
70
|
+
It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
|
71
|
+
This architecture is storing hash table in mysql management table instead of ruby internal memory.
|
72
|
+
|
73
|
+
**Note:**
|
74
|
+
On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
|
75
|
+
|
76
|
+
#### prepare
|
77
|
+
|
78
|
+
* create database and tables.
|
79
|
+
* add replicator configuration.
|
80
|
+
|
81
|
+
```
|
82
|
+
$ cat setup_mysql_replicator_multi.sql
|
83
|
+
CREATE DATABASE replicator_manager;
|
84
|
+
USE replicator_manager;
|
85
|
+
|
86
|
+
CREATE TABLE `hash_tables` (
|
87
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
88
|
+
`setting_name` varchar(255) NOT NULL,
|
89
|
+
`setting_query_pk` int(11) NOT NULL,
|
90
|
+
`setting_query_hash` varchar(255) NOT NULL,
|
91
|
+
PRIMARY KEY (`id`),
|
92
|
+
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
93
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
94
|
+
|
95
|
+
CREATE TABLE `settings` (
|
96
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
97
|
+
`name` varchar(255) NOT NULL,
|
98
|
+
`host` varchar(255) NOT NULL DEFAULT 'localhost',
|
99
|
+
`port` int(11) NOT NULL DEFAULT '3306',
|
100
|
+
`username` varchar(255) NOT NULL,
|
101
|
+
`password` varchar(255) NOT NULL,
|
102
|
+
`database` varchar(255) NOT NULL,
|
103
|
+
`query` TEXT NOT NULL,
|
104
|
+
`interval` int(11) NOT NULL,
|
105
|
+
`tag` varchar(255) NOT NULL,
|
106
|
+
`primary_key` varchar(11) DEFAULT 'id',
|
107
|
+
`enable_delete` int(11) DEFAULT '1',
|
108
|
+
PRIMARY KEY (`id`),
|
109
|
+
UNIQUE KEY `name` (`name`)
|
110
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
111
|
+
```
|
112
|
+
|
113
|
+
```
|
114
|
+
$ mysql
|
115
|
+
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
116
|
+
mysql> insert into source ...snip...;
|
117
|
+
```
|
61
118
|
|
62
|
-
|
119
|
+
#### configuration
|
120
|
+
|
121
|
+
`````
|
122
|
+
<source>
|
123
|
+
type mysql_replicator_multi
|
124
|
+
manager_host localhost
|
125
|
+
manager_username your_mysql_user
|
126
|
+
manager_password your_mysql_password
|
127
|
+
manager_database replicator_manager
|
128
|
+
</source>
|
129
|
+
|
130
|
+
<match replicator.*>
|
131
|
+
type stdout
|
132
|
+
</match>
|
133
|
+
`````
|
63
134
|
|
64
135
|
## TODO
|
65
136
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |s|
|
3
3
|
s.name = "fluent-plugin-mysql-replicator"
|
4
|
-
s.version = "0.0
|
4
|
+
s.version = "0.1.0"
|
5
5
|
s.authors = ["Kentaro Yoshida"]
|
6
6
|
s.email = ["y.ken.studio@gmail.com"]
|
7
7
|
s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
|
8
|
-
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL
|
8
|
+
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL databases. It will support multiple table replication to another RDB/noSQL like Elasticsearch.}
|
9
9
|
|
10
10
|
s.files = `git ls-files`.split("\n")
|
11
11
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -18,6 +18,7 @@ module Fluent
|
|
18
18
|
config_param :tag, :string
|
19
19
|
config_param :query, :string
|
20
20
|
config_param :primary_key, :string, :default => 'id'
|
21
|
+
config_param :enable_delete, :bool, :default => 'yes'
|
21
22
|
|
22
23
|
def configure(conf)
|
23
24
|
super
|
@@ -59,10 +60,12 @@ module Fluent
|
|
59
60
|
table_hash[row[@primary_key]] = current_hash
|
60
61
|
end
|
61
62
|
ids = current_ids
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
unless @enable_delete
|
64
|
+
deleted_ids = previous_ids - current_ids
|
65
|
+
if deleted_ids.count > 0
|
66
|
+
hash_delete_by_list(table_hash, deleted_ids)
|
67
|
+
deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
|
68
|
+
end
|
66
69
|
end
|
67
70
|
sleep @interval
|
68
71
|
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Fluent
|
2
|
+
class MysqlReplicatorMultiInput < Fluent::Input
|
3
|
+
Plugin.register_input('mysql_replicator_multi', self)
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
require 'mysql2'
|
7
|
+
require 'digest/sha1'
|
8
|
+
super
|
9
|
+
end
|
10
|
+
|
11
|
+
config_param :manager_host, :string, :default => 'localhost'
|
12
|
+
config_param :manager_port, :integer, :default => 3306
|
13
|
+
config_param :manager_username, :string, :default => nil
|
14
|
+
config_param :manager_password, :string, :default => ''
|
15
|
+
config_param :manager_database, :string, :default => 'replicator_manager'
|
16
|
+
|
17
|
+
def configure(conf)
|
18
|
+
super
|
19
|
+
@reconnect_interval = Config.time_value('10sec')
|
20
|
+
end
|
21
|
+
|
22
|
+
def start
|
23
|
+
begin
|
24
|
+
@threads = []
|
25
|
+
@mutex = Mutex.new
|
26
|
+
get_settings.each do |config|
|
27
|
+
@threads << Thread.new {
|
28
|
+
poll(config)
|
29
|
+
}
|
30
|
+
end
|
31
|
+
$log.error "mysql_replicator_multi: stop working due to empty configuration" if @threads.empty?
|
32
|
+
rescue StandardError => e
|
33
|
+
$log.error "error: #{e.message}"
|
34
|
+
$log.error e.backtrace.join("\n")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def shutdown
|
39
|
+
@threads.each do |thread|
|
40
|
+
Thread.kill(thread)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_settings
|
45
|
+
manager_db = get_manager_connection
|
46
|
+
settings = []
|
47
|
+
query = "SELECT * FROM settings"
|
48
|
+
manager_db.query(query).each do |row|
|
49
|
+
settings << row
|
50
|
+
end
|
51
|
+
return settings
|
52
|
+
end
|
53
|
+
|
54
|
+
def poll(config)
|
55
|
+
begin
|
56
|
+
@manager_db = get_manager_connection
|
57
|
+
masked_config = config.map {|k,v| (k == 'password') ? v.to_s.gsub(/./, '*') : v}
|
58
|
+
@mutex.synchronize {
|
59
|
+
$log.info "mysql_replicator_multi: polling start. :config=>#{masked_config}"
|
60
|
+
}
|
61
|
+
primary_key = config['primary_key']
|
62
|
+
previous_id = current_id = 0
|
63
|
+
loop do
|
64
|
+
db = get_origin_connection(config)
|
65
|
+
db.query(config['query']).each do |row|
|
66
|
+
@mutex.lock
|
67
|
+
current_id = row[primary_key]
|
68
|
+
detect_insert_update(config, row)
|
69
|
+
detect_delete(config, current_id, previous_id)
|
70
|
+
previous_id = current_id
|
71
|
+
@mutex.unlock
|
72
|
+
end
|
73
|
+
db.close
|
74
|
+
sleep config['interval']
|
75
|
+
end
|
76
|
+
rescue StandardError => e
|
77
|
+
$log.error "error: #{e.message}"
|
78
|
+
$log.error e.backtrace.join("\n")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def detect_insert_update(config, row)
|
83
|
+
primary_key = config['primary_key']
|
84
|
+
current_id = row[primary_key]
|
85
|
+
stored_hash = get_stored_hash(config['name'], current_id)
|
86
|
+
current_hash = Digest::SHA1.hexdigest(row.flatten.join)
|
87
|
+
|
88
|
+
event = nil
|
89
|
+
if stored_hash.empty?
|
90
|
+
event = :insert
|
91
|
+
elsif stored_hash != current_hash
|
92
|
+
event = :update
|
93
|
+
end
|
94
|
+
unless event.nil?
|
95
|
+
emit_record("#{config['tag']}.#{event.to_s}", row)
|
96
|
+
update_hashtable({:event => event, :ids => current_id, :setting_name => config['name'], :hash => current_hash})
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def get_stored_hash(setting_name, id)
|
101
|
+
query = "SELECT setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
|
102
|
+
@manager_db.query(query).each do |row|
|
103
|
+
return row['setting_query_hash']
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def detect_delete(config, current_id, previous_id)
|
108
|
+
return unless config['enable_delete'] == 1
|
109
|
+
deleted_ids = collect_gap_ids(config['name'], current_id, previous_id)
|
110
|
+
unless deleted_ids.empty?
|
111
|
+
event = :delete
|
112
|
+
deleted_ids.each do |id|
|
113
|
+
emit_record("#{config['tag']}.#{event.to_s}", {config['primary_key'] => id})
|
114
|
+
end
|
115
|
+
update_hashtable({:event => event, :ids => deleted_ids, :setting_name => config['name']})
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def collect_gap_ids(setting_name, current_id, previous_id)
|
120
|
+
if (current_id - previous_id) > 1
|
121
|
+
query = "SELECT setting_query_pk FROM hash_tables
|
122
|
+
WHERE setting_name = '#{setting_name}'
|
123
|
+
AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
|
124
|
+
elsif previous_id > current_id
|
125
|
+
query = "SELECT setting_query_pk FROM hash_tables
|
126
|
+
WHERE setting_name = '#{setting_name}'
|
127
|
+
AND setting_query_pk > #{previous_id.to_i}"
|
128
|
+
elsif previous_id == current_id
|
129
|
+
query = "SELECT setting_query_pk FROM hash_tables
|
130
|
+
WHERE setting_name = '#{setting_name}'
|
131
|
+
AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
|
132
|
+
end
|
133
|
+
ids = Array.new
|
134
|
+
unless query.nil?
|
135
|
+
@manager_db.query(query).each do |row|
|
136
|
+
ids << row['setting_query_pk']
|
137
|
+
end
|
138
|
+
end
|
139
|
+
return ids
|
140
|
+
end
|
141
|
+
|
142
|
+
def update_hashtable(opts)
|
143
|
+
ids = opts[:ids].is_a?(Integer) ? [opts[:ids]] : opts[:ids]
|
144
|
+
ids.each do |id|
|
145
|
+
case opts[:event]
|
146
|
+
when :insert
|
147
|
+
query = "insert into hash_tables (setting_name,setting_query_pk,setting_query_hash) values('#{opts[:setting_name]}','#{id}','#{opts[:hash]}')"
|
148
|
+
when :update
|
149
|
+
query = "update hash_tables set setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
|
150
|
+
when :delete
|
151
|
+
query = "delete from hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
|
152
|
+
end
|
153
|
+
@manager_db.query(query) unless query.nil?
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def emit_record(tag, record)
|
158
|
+
Engine.emit(tag, Engine.now, record)
|
159
|
+
end
|
160
|
+
|
161
|
+
def get_manager_connection
|
162
|
+
begin
|
163
|
+
return Mysql2::Client.new(
|
164
|
+
:host => @manager_host,
|
165
|
+
:port => @manager_port,
|
166
|
+
:username => @manager_username,
|
167
|
+
:password => @manager_password,
|
168
|
+
:database => @manager_database,
|
169
|
+
:encoding => 'utf8',
|
170
|
+
:reconnect => true,
|
171
|
+
:stream => false,
|
172
|
+
:cache_rows => false
|
173
|
+
)
|
174
|
+
rescue Exception => e
|
175
|
+
$log.warn "mysql_replicator_multi: #{e}"
|
176
|
+
sleep @reconnect_interval
|
177
|
+
retry
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def get_origin_connection(config)
|
182
|
+
begin
|
183
|
+
return Mysql2::Client.new(
|
184
|
+
:host => config['host'],
|
185
|
+
:port => config['manager_port'],
|
186
|
+
:username => config['username'],
|
187
|
+
:password => config['password'],
|
188
|
+
:database => config['database'],
|
189
|
+
:encoding => 'utf8',
|
190
|
+
:reconnect => true,
|
191
|
+
:stream => true,
|
192
|
+
:cache_rows => false
|
193
|
+
)
|
194
|
+
rescue Exception => e
|
195
|
+
$log.warn "mysql_replicator_multi: #{e}"
|
196
|
+
sleep @reconnect_interval
|
197
|
+
retry
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
CREATE DATABASE replicator_manager;
|
2
|
+
USE replicator_manager;
|
3
|
+
|
4
|
+
CREATE TABLE `hash_tables` (
|
5
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
6
|
+
`setting_name` varchar(255) NOT NULL,
|
7
|
+
`setting_query_pk` int(11) NOT NULL,
|
8
|
+
`setting_query_hash` varchar(255) NOT NULL,
|
9
|
+
PRIMARY KEY (`id`),
|
10
|
+
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
11
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
12
|
+
|
13
|
+
CREATE TABLE `settings` (
|
14
|
+
`id` int(11) NOT NULL AUTO_INCREMENT,
|
15
|
+
`name` varchar(255) NOT NULL,
|
16
|
+
`host` varchar(255) NOT NULL DEFAULT 'localhost',
|
17
|
+
`port` int(11) NOT NULL DEFAULT '3306',
|
18
|
+
`username` varchar(255) NOT NULL,
|
19
|
+
`password` varchar(255) NOT NULL,
|
20
|
+
`database` varchar(255) NOT NULL,
|
21
|
+
`query` TEXT NOT NULL,
|
22
|
+
`interval` int(11) NOT NULL,
|
23
|
+
`tag` varchar(255) NOT NULL,
|
24
|
+
`primary_key` varchar(11) DEFAULT 'id',
|
25
|
+
`enable_delete` int(11) DEFAULT '1',
|
26
|
+
PRIMARY KEY (`id`),
|
27
|
+
UNIQUE KEY `name` (`name`)
|
28
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
data/test/helper.rb
CHANGED
@@ -28,11 +28,13 @@ class MysqlReplicatorInputTest < Test::Unit::TestCase
|
|
28
28
|
interval 30
|
29
29
|
tag input.mysql
|
30
30
|
query SELECT id, text from search_text
|
31
|
+
enable_delete no
|
31
32
|
]
|
32
33
|
d.instance.inspect
|
33
34
|
assert_equal 'localhost', d.instance.host
|
34
35
|
assert_equal 3306, d.instance.port
|
35
36
|
assert_equal 30, d.instance.interval
|
36
37
|
assert_equal 'input.mysql', d.instance.tag
|
38
|
+
assert_equal false, d.instance.enable_delete
|
37
39
|
end
|
38
40
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class MysqlReplicatorMultiInputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
manager_host localhost
|
10
|
+
manager_port 3306
|
11
|
+
manager_username foo
|
12
|
+
manager_password bar
|
13
|
+
]
|
14
|
+
|
15
|
+
def create_driver(conf=CONFIG,tag='test')
|
16
|
+
Fluent::Test::OutputTestDriver.new(Fluent::MysqlReplicatorMultiInput, tag).configure(conf)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_configure
|
20
|
+
d = create_driver(CONFIG)
|
21
|
+
d.instance.inspect
|
22
|
+
assert_equal 'localhost', d.instance.manager_host
|
23
|
+
assert_equal 3306, d.instance.manager_port
|
24
|
+
assert_equal 'replicator_manager', d.instance.manager_database
|
25
|
+
end
|
26
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-mysql-replicator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-12-
|
12
|
+
date: 2013-12-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -74,8 +74,11 @@ files:
|
|
74
74
|
- Rakefile
|
75
75
|
- fluent-plugin-mysql-replicator.gemspec
|
76
76
|
- lib/fluent/plugin/in_mysql_replicator.rb
|
77
|
+
- lib/fluent/plugin/in_mysql_replicator_multi.rb
|
78
|
+
- setup_mysql_replicator_multi.sql
|
77
79
|
- test/helper.rb
|
78
80
|
- test/plugin/test_in_mysql_replicator.rb
|
81
|
+
- test/plugin/test_in_mysql_replicator_multi.rb
|
79
82
|
homepage: https://github.com/y-ken/fluent-plugin-mysql-replicator
|
80
83
|
licenses: []
|
81
84
|
post_install_message:
|
@@ -99,8 +102,9 @@ rubyforge_project:
|
|
99
102
|
rubygems_version: 1.8.23
|
100
103
|
signing_key:
|
101
104
|
specification_version: 3
|
102
|
-
summary: Fluentd input plugin to track insert/update/delete event from MySQL
|
103
|
-
|
105
|
+
summary: Fluentd input plugin to track insert/update/delete event from MySQL databases.
|
106
|
+
It will support multiple table replication to another RDB/noSQL like Elasticsearch.
|
104
107
|
test_files:
|
105
108
|
- test/helper.rb
|
106
109
|
- test/plugin/test_in_mysql_replicator.rb
|
110
|
+
- test/plugin/test_in_mysql_replicator_multi.rb
|