fluent-plugin-mysql-replicator 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +9 -0
- data/README.md +34 -179
- data/Tutorial-mysql_replicator.md +64 -0
- data/Tutorial-mysql_replicator_multi.md +129 -0
- data/example/mysql_multi_table_to_elasticsearch.md +4 -1
- data/example/mysql_multi_table_to_solr.md +7 -1
- data/example/mysql_single_table_to_elasticsearch.md +4 -1
- data/example/mysql_single_table_to_solr.md +7 -1
- data/fluent-plugin-mysql-replicator.gemspec +2 -2
- data/lib/fluent/plugin/in_mysql_replicator.rb +15 -4
- data/lib/fluent/plugin/in_mysql_replicator_multi.rb +16 -4
- data/setup_mysql_replicator_multi.sql +7 -3
- metadata +6 -4
data/.travis.yml
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
3
|
rvm:
|
4
|
+
- 2.1.0
|
4
5
|
- 2.0.0
|
5
6
|
- 1.9.3
|
7
|
+
|
8
|
+
services:
|
9
|
+
- elasticsearch
|
10
|
+
|
11
|
+
before_script:
|
12
|
+
- mysql < setup_mysql_replicator_multi.sql
|
13
|
+
- curl https://raw.github.com/moliware/travis-solr/master/travis-solr.sh | SOLR_VERSION=4.6.1 bash
|
14
|
+
|
data/README.md
CHANGED
@@ -8,18 +8,17 @@ It's comming support replicate to another RDB/noSQL.
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
-
|
12
|
-
### native gem
|
13
|
-
gem install fluent-plugin-mysql-replicator
|
11
|
+
install with gem or fluent-gem command as:
|
14
12
|
|
15
|
-
|
16
|
-
|
13
|
+
`````
|
14
|
+
# for system installed fluentd
|
15
|
+
$ gem install fluent-plugin-mysql-replicator
|
17
16
|
|
18
|
-
|
19
|
-
|
17
|
+
# for td-agent
|
18
|
+
$ sudo /usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-mysql-replicator
|
20
19
|
`````
|
21
20
|
|
22
|
-
**Note:** RPM package available which does not conflict system installed Ruby or td-agent.
|
21
|
+
**Note:** [recommend] RPM package available which does not conflict system installed Ruby or td-agent.
|
23
22
|
https://github.com/y-ken/yamabiko/releases
|
24
23
|
|
25
24
|
|
@@ -56,194 +55,50 @@ $ tail -f /var/log/td-agent/td-agent.log
|
|
56
55
|
2013-11-25 18:22:45 +0900 replicator.myweb.search_test.delete.id: {"id":"1"}
|
57
56
|
`````
|
58
57
|
|
59
|
-
##
|
60
|
-
|
61
|
-
* [mysql_single_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_elasticsearch.md)
|
62
|
-
* [mysql_multi_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_elasticsearch.md)
|
63
|
-
* [mysql_single_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_solr.md)
|
64
|
-
* [mysql_multi_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_solr.md)
|
65
|
-
|
66
|
-
## Tutorial for Quickstart (mysql_replicator)
|
67
|
-
|
68
|
-
It is useful for these purpose.
|
69
|
-
|
70
|
-
* try it on this plugin quickly.
|
71
|
-
* replicate small record under a millons table.
|
72
|
-
|
73
|
-
**Note:**
|
74
|
-
On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
|
75
|
-
|
76
|
-
### configuration
|
77
|
-
|
78
|
-
`````
|
79
|
-
<source>
|
80
|
-
type mysql_replicator
|
81
|
-
|
82
|
-
# Set connection settings for replicate source.
|
83
|
-
host localhost
|
84
|
-
username your_mysql_user
|
85
|
-
password your_mysql_password
|
86
|
-
database myweb
|
87
|
-
|
88
|
-
# Set replicate query configuration.
|
89
|
-
query SELECT id, text, updated_at from search_test;
|
90
|
-
primary_key id # specify unique key (default: id)
|
91
|
-
interval 10s # execute query interval (default: 1m)
|
92
|
-
|
93
|
-
# Enable detect deletion event not only insert/update events. (default: yes)
|
94
|
-
# It is useful to use `enable_delete no` that keep following recently updated record with this query.
|
95
|
-
# `SELECT * FROM search_test WHERE DATE_ADD(updated_at, INTERVAL 5 MINUTE) > NOW();`
|
96
|
-
enable_delete yes
|
97
|
-
|
98
|
-
# Format output tag for each events. Placeholders usage as described below.
|
99
|
-
tag replicator.myweb.search_test.${event}.${primary_key}
|
100
|
-
# ${event} : the variation of row event type by insert/update/delete.
|
101
|
-
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
102
|
-
</source>
|
103
|
-
|
104
|
-
<match replicator.*>
|
105
|
-
type copy
|
106
|
-
<store>
|
107
|
-
type stdout
|
108
|
-
</store>
|
109
|
-
<store>
|
110
|
-
type mysql_replicator_elasticsearch
|
111
|
-
|
112
|
-
# Set Elasticsearch connection.
|
113
|
-
host localhost
|
114
|
-
port 9200
|
115
|
-
|
116
|
-
# Set Elasticsearch index, type, and unique id (primary_key) from tag.
|
117
|
-
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
118
|
-
|
119
|
-
# Set frequency of sending bulk request to Elasticsearch node.
|
120
|
-
flush_interval 5s
|
121
|
-
|
122
|
-
# Queued chunks are flushed at shutdown process. (recommend for more stability)
|
123
|
-
flush_at_shutdown yes
|
124
|
-
buffer_type file
|
125
|
-
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
126
|
-
</store>
|
127
|
-
</match>
|
128
|
-
`````
|
129
|
-
|
130
|
-
## Tutorial for Production (mysql_replicator_multi)
|
131
|
-
|
132
|
-
It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
|
133
|
-
This architecture is storing hash table in mysql management table instead of ruby internal memory.
|
134
|
-
|
135
|
-
**Note:**
|
136
|
-
On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
|
58
|
+
## Tutorial
|
137
59
|
|
138
|
-
###
|
60
|
+
### mysql_replicator
|
139
61
|
|
140
|
-
It
|
62
|
+
It is easy to try it on this plugin quickly.
|
63
|
+
For more detail are described at [Tutorial-mysql_replicator.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/Tutorial-mysql_replicator.md)
|
141
64
|
|
142
|
-
|
143
|
-
* add replicator configuration.
|
65
|
+
**Features**
|
144
66
|
|
145
|
-
|
67
|
+
* Table (or view table) synchronization supported.
|
68
|
+
* Replicate small record under a millons table.
|
69
|
+
* It is recommend to use insert only table.
|
70
|
+
* Nested documents are supported with placeholder which accessing to temporary table created at the each loop.
|
146
71
|
|
147
|
-
|
148
|
-
$ cat setup_mysql_replicator_multi.sql
|
149
|
-
CREATE DATABASE replicator_manager;
|
150
|
-
USE replicator_manager;
|
72
|
+
**Examples**
|
151
73
|
|
152
|
-
|
153
|
-
|
154
|
-
`setting_name` varchar(255) NOT NULL,
|
155
|
-
`setting_query_pk` int(11) NOT NULL,
|
156
|
-
`setting_query_hash` varchar(255) NOT NULL,
|
157
|
-
PRIMARY KEY (`id`),
|
158
|
-
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
159
|
-
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
160
|
-
|
161
|
-
CREATE TABLE `settings` (
|
162
|
-
`id` int(11) NOT NULL AUTO_INCREMENT,
|
163
|
-
`is_active` int(11) NOT NULL DEFAULT '1',
|
164
|
-
`name` varchar(255) NOT NULL,
|
165
|
-
`host` varchar(255) NOT NULL DEFAULT 'localhost',
|
166
|
-
`port` int(11) NOT NULL DEFAULT '3306',
|
167
|
-
`username` varchar(255) NOT NULL,
|
168
|
-
`password` varchar(255) NOT NULL,
|
169
|
-
`database` varchar(255) NOT NULL,
|
170
|
-
`query` TEXT NOT NULL,
|
171
|
-
`interval` int(11) NOT NULL,
|
172
|
-
`primary_key` varchar(255) DEFAULT 'id',
|
173
|
-
`enable_delete` int(11) DEFAULT '1',
|
174
|
-
`enable_loose_insert` int(11) DEFAULT '0',
|
175
|
-
`enable_loose_delete` int(11) DEFAULT '0',
|
176
|
-
PRIMARY KEY (`id`),
|
177
|
-
UNIQUE KEY `name` (`name`)
|
178
|
-
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
179
|
-
```
|
180
|
-
|
181
|
-
##### add replicator configuration.
|
182
|
-
|
183
|
-
```
|
184
|
-
$ mysql -umysqluser -p
|
185
|
-
|
186
|
-
-- For the first time, load schema.
|
187
|
-
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
188
|
-
|
189
|
-
-- Add replicate source connection and query settings like below.
|
190
|
-
mysql> INSERT INTO `settings`
|
191
|
-
(`id`, `is_active`, `name`, `host`, `port`, `username`, `password`, `database`, `query`, `interval`, `primary_key`, `enable_delete`)
|
192
|
-
VALUES
|
193
|
-
(NULL, 1, 'mydb.mytable', '192.168.100.221', 3306, 'mysqluser', 'mysqlpassword', 'mydb', 'SELECT id, text from mytable;', 5, 'id', 1);
|
194
|
-
```
|
195
|
-
|
196
|
-
it is a sample which you have inserted row.
|
197
|
-
|
198
|
-
| id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete | enable_loose_insert | enable_loose_delete |
|
199
|
-
|----|-----------|--------------|-----------------|------|-----------|---------------|----------|------------------------------|----------|-------------|---------------|----|----|
|
200
|
-
| 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 | 0 | 0 |
|
201
|
-
|
202
|
-
### configuration
|
203
|
-
|
204
|
-
`````
|
205
|
-
<source>
|
206
|
-
type mysql_replicator_multi
|
207
|
-
|
208
|
-
# Database connection setting for manager table.
|
209
|
-
manager_host localhost
|
210
|
-
manager_username your_mysql_user
|
211
|
-
manager_password your_mysql_password
|
212
|
-
manager_database replicator_manager
|
213
|
-
|
214
|
-
# Format output tag for each events. Placeholders usage as described below.
|
215
|
-
tag replicator.${name}.${event}.${primary_key}
|
216
|
-
# ${name} : the value of `replicator_manager.settings.name` in manager table.
|
217
|
-
# ${event} : the variation of row event type by insert/update/delete.
|
218
|
-
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
219
|
-
</source>
|
74
|
+
* [mysql_single_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_elasticsearch.md)
|
75
|
+
* [mysql_single_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_solr.md)
|
220
76
|
|
221
|
-
|
222
|
-
type mysql_replicator_elasticsearch
|
77
|
+
### mysql_replicator_multi
|
223
78
|
|
224
|
-
|
225
|
-
|
226
|
-
|
79
|
+
It replicates a millions of records and/or multiple tables with multiple threads.
|
80
|
+
This architecture is storing hash table in MySQL management table instead of ruby internal memory.
|
81
|
+
See tutorial at [Tutorial-mysql_replicator_multi.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/Tutorial-mysql_replicator_multi.md)
|
227
82
|
|
228
|
-
|
229
|
-
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
83
|
+
**Features**
|
230
84
|
|
231
|
-
|
232
|
-
|
85
|
+
* table (or view table) synchronization supported.
|
86
|
+
* Multiple table synchronization supported and its DSN stored in MySQL management table.
|
87
|
+
* Using MySQL database as hash table cache to support replicate over a millions table.
|
88
|
+
* It is recommend to make whole copy of tables.
|
89
|
+
* Nested documents are supported with placeholder which accessing to temporary table created at the each loop.
|
233
90
|
|
234
|
-
|
235
|
-
max_retry_wait 1800
|
91
|
+
**Examples**
|
236
92
|
|
237
|
-
|
238
|
-
|
239
|
-
</match>
|
240
|
-
`````
|
93
|
+
* [mysql_multi_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_elasticsearch.md)
|
94
|
+
* [mysql_multi_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_solr.md)
|
241
95
|
|
242
96
|
## TODO
|
243
97
|
|
244
98
|
Pull requests are very welcome like below!!
|
245
99
|
|
246
|
-
* more
|
100
|
+
* more documents
|
101
|
+
* more tests with mock.
|
247
102
|
* support string type of primary_key.
|
248
103
|
* support reload setting on demand.
|
249
104
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
## Tutorial for Quickstart (mysql_replicator)
|
2
|
+
|
3
|
+
It is useful for these purpose.
|
4
|
+
|
5
|
+
* try it on this plugin quickly.
|
6
|
+
* replicate small record under a millons table.
|
7
|
+
|
8
|
+
**Note:**
|
9
|
+
On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
|
10
|
+
|
11
|
+
### configuration
|
12
|
+
|
13
|
+
`````
|
14
|
+
<source>
|
15
|
+
type mysql_replicator
|
16
|
+
|
17
|
+
# Set connection settings for replicate source.
|
18
|
+
host localhost
|
19
|
+
username your_mysql_user
|
20
|
+
password your_mysql_password
|
21
|
+
database myweb
|
22
|
+
|
23
|
+
# Set replicate query configuration.
|
24
|
+
query SELECT id, text, updated_at from search_test;
|
25
|
+
primary_key id # specify unique key (default: id)
|
26
|
+
interval 10s # execute query interval (default: 1m)
|
27
|
+
|
28
|
+
# Enable detect deletion event not only insert/update events. (default: yes)
|
29
|
+
# It is useful to use `enable_delete no` that keep following recently updated record with this query.
|
30
|
+
# `SELECT * FROM search_test WHERE DATE_ADD(updated_at, INTERVAL 5 MINUTE) > NOW();`
|
31
|
+
enable_delete yes
|
32
|
+
|
33
|
+
# Format output tag for each events. Placeholders usage as described below.
|
34
|
+
tag replicator.myweb.search_test.${event}.${primary_key}
|
35
|
+
# ${event} : the variation of row event type by insert/update/delete.
|
36
|
+
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
37
|
+
</source>
|
38
|
+
|
39
|
+
<match replicator.*>
|
40
|
+
type copy
|
41
|
+
<store>
|
42
|
+
type stdout
|
43
|
+
</store>
|
44
|
+
<store>
|
45
|
+
type mysql_replicator_elasticsearch
|
46
|
+
|
47
|
+
# Set Elasticsearch connection.
|
48
|
+
host localhost
|
49
|
+
port 9200
|
50
|
+
|
51
|
+
# Set Elasticsearch index, type, and unique id (primary_key) from tag.
|
52
|
+
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
53
|
+
|
54
|
+
# Set frequency of sending bulk request to Elasticsearch node.
|
55
|
+
flush_interval 5s
|
56
|
+
|
57
|
+
# Queued chunks are flushed at shutdown process. (recommend for more stability)
|
58
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
59
|
+
flush_at_shutdown yes
|
60
|
+
buffer_type file
|
61
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
62
|
+
</store>
|
63
|
+
</match>
|
64
|
+
`````
|
@@ -0,0 +1,129 @@
|
|
1
|
+
## Tutorial for Production (mysql_replicator_multi)
|
2
|
+
|
3
|
+
It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
|
4
|
+
This architecture is storing hash table in mysql management table instead of ruby internal memory.
|
5
|
+
|
6
|
+
**Note:**
|
7
|
+
On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
|
8
|
+
|
9
|
+
### prepare
|
10
|
+
|
11
|
+
It has done with follwing two steps.
|
12
|
+
|
13
|
+
* create database and tables.
|
14
|
+
* add replicator configuration.
|
15
|
+
|
16
|
+
##### create database and tables.
|
17
|
+
|
18
|
+
```
|
19
|
+
$ mysql -umysqluser -p
|
20
|
+
|
21
|
+
-- For the first time, load schema.
|
22
|
+
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
23
|
+
```
|
24
|
+
|
25
|
+
see [setup_mysql_replicator_multi.sql](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/setup_mysql_replicator_multi.sql)
|
26
|
+
|
27
|
+
##### add replicator configuration.
|
28
|
+
|
29
|
+
Let's download sql first.
|
30
|
+
|
31
|
+
```
|
32
|
+
$ wget https://raw2.github.com/y-ken/fluent-plugin-mysql-replicator/master/setup_mysql_replicator_multi.sql
|
33
|
+
```
|
34
|
+
|
35
|
+
```sql
|
36
|
+
-- Build
|
37
|
+
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
38
|
+
|
39
|
+
-- Set working database
|
40
|
+
mysql> use replicator_manager;
|
41
|
+
|
42
|
+
-- Add replicate source connection and query settings like below.
|
43
|
+
mysql> INSERT INTO `settings`
|
44
|
+
(`id`, `is_active`, `name`, `host`, `port`, `username`, `password`, `database`, `query`, `interval`, `primary_key`, `enable_delete`)
|
45
|
+
VALUES
|
46
|
+
(NULL, 1, 'mydb.mytable', '192.168.100.221', 3306, 'mysqluser', 'mysqlpassword', 'mydb', 'SELECT id, text from mytable;', 5, 'id', 1);
|
47
|
+
```
|
48
|
+
|
49
|
+
it is a sample which you have inserted row.
|
50
|
+
|
51
|
+
<table>
|
52
|
+
<thead><tr>
|
53
|
+
<th>id</th>
|
54
|
+
<th>is_active</th>
|
55
|
+
<th>name</th>
|
56
|
+
<th>host</th>
|
57
|
+
<th>port</th>
|
58
|
+
<th>username</th>
|
59
|
+
<th>password</th>
|
60
|
+
<th>database</th>
|
61
|
+
<th>query</th>
|
62
|
+
<th>prepared_query</th>
|
63
|
+
<th>interval</th>
|
64
|
+
<th>primary_key</th>
|
65
|
+
<th>enable_delete</th>
|
66
|
+
<th>enable_loose_insert</th>
|
67
|
+
<th>enable_loose_delete</th>
|
68
|
+
</tr></thead>
|
69
|
+
<tbody><tr>
|
70
|
+
<td>1</td>
|
71
|
+
<td>1</td>
|
72
|
+
<td>mydb.mytable</td>
|
73
|
+
<td>192.168.100.221</td>
|
74
|
+
<td>3306</td>
|
75
|
+
<td>mysqluser</td>
|
76
|
+
<td>mysqlpassword</td>
|
77
|
+
<td>mydb</td>
|
78
|
+
<td>SELECT id, text from mytable;</td>
|
79
|
+
<td> </td>
|
80
|
+
<td>5</td>
|
81
|
+
<td>id</td>
|
82
|
+
<td>1</td>
|
83
|
+
<td>0</td>
|
84
|
+
<td>0</td>
|
85
|
+
</tr></tbody>
|
86
|
+
</table>
|
87
|
+
|
88
|
+
### configuration
|
89
|
+
|
90
|
+
`````
|
91
|
+
<source>
|
92
|
+
type mysql_replicator_multi
|
93
|
+
|
94
|
+
# Database connection setting for manager table.
|
95
|
+
manager_host localhost
|
96
|
+
manager_username your_mysql_user
|
97
|
+
manager_password your_mysql_password
|
98
|
+
manager_database replicator_manager
|
99
|
+
|
100
|
+
# Format output tag for each events. Placeholders usage as described below.
|
101
|
+
tag replicator.${name}.${event}.${primary_key}
|
102
|
+
# ${name} : the value of `replicator_manager.settings.name` in manager table.
|
103
|
+
# ${event} : the variation of row event type by insert/update/delete.
|
104
|
+
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
105
|
+
</source>
|
106
|
+
|
107
|
+
<match replicator.**>
|
108
|
+
type mysql_replicator_elasticsearch
|
109
|
+
|
110
|
+
# Set Elasticsearch connection.
|
111
|
+
host localhost
|
112
|
+
port 9200
|
113
|
+
|
114
|
+
# Set Elasticsearch index, type, and unique id (primary_key) from tag.
|
115
|
+
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
116
|
+
|
117
|
+
# Set frequency of sending bulk request to Elasticsearch node.
|
118
|
+
flush_interval 5s
|
119
|
+
|
120
|
+
# Set maximum retry interval (required fluentd >= 0.10.41)
|
121
|
+
max_retry_wait 1800
|
122
|
+
|
123
|
+
# Queued chunks are flushed at shutdown process.
|
124
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
125
|
+
flush_at_shutdown yes
|
126
|
+
buffer_type file
|
127
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
128
|
+
</match>
|
129
|
+
`````
|
@@ -38,6 +38,9 @@ It is a guide to replicate multiple mysql table to elasticsearch.
|
|
38
38
|
max_retry_wait 1800
|
39
39
|
|
40
40
|
# Queued chunks are flushed at shutdown process.
|
41
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
41
42
|
flush_at_shutdown yes
|
43
|
+
buffer_type file
|
44
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
42
45
|
</match>
|
43
|
-
```
|
46
|
+
```
|
@@ -39,7 +39,10 @@ It is a guide to replicate multiple mysql table to solr.
|
|
39
39
|
max_retry_wait 1800
|
40
40
|
|
41
41
|
# Queued chunks are flushed at shutdown process.
|
42
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
42
43
|
flush_at_shutdown yes
|
44
|
+
buffer_type file
|
45
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
43
46
|
</match>
|
44
47
|
```
|
45
48
|
|
@@ -64,6 +67,9 @@ When you use default core (won't specify), change the value of `tag_format` like
|
|
64
67
|
max_retry_wait 1800
|
65
68
|
|
66
69
|
# Queued chunks are flushed at shutdown process.
|
70
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
67
71
|
flush_at_shutdown yes
|
72
|
+
buffer_type file
|
73
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_solr
|
68
74
|
</match>
|
69
|
-
```
|
75
|
+
```
|
@@ -47,6 +47,9 @@ It is a guide to replicate single mysql table to elasticsearch.
|
|
47
47
|
max_retry_wait 1800
|
48
48
|
|
49
49
|
# Queued chunks are flushed at shutdown process.
|
50
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
50
51
|
flush_at_shutdown yes
|
52
|
+
buffer_type file
|
53
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
51
54
|
</match>
|
52
|
-
```
|
55
|
+
```
|
@@ -48,7 +48,10 @@ It is a guide to replicate single mysql table to solr.
|
|
48
48
|
max_retry_wait 1800
|
49
49
|
|
50
50
|
# Queued chunks are flushed at shutdown process.
|
51
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
51
52
|
flush_at_shutdown yes
|
53
|
+
buffer_type file
|
54
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
52
55
|
</match>
|
53
56
|
```
|
54
57
|
|
@@ -74,6 +77,9 @@ On this case, the solr url will be set `http://localhost:8983/solr`
|
|
74
77
|
max_retry_wait 1800
|
75
78
|
|
76
79
|
# Queued chunks are flushed at shutdown process.
|
80
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
77
81
|
flush_at_shutdown yes
|
82
|
+
buffer_type file
|
83
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_solr
|
78
84
|
</match>
|
79
|
-
```
|
85
|
+
```
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |s|
|
3
3
|
s.name = "fluent-plugin-mysql-replicator"
|
4
|
-
s.version = "0.
|
4
|
+
s.version = "0.4.0"
|
5
5
|
s.authors = ["Kentaro Yoshida"]
|
6
6
|
s.email = ["y.ken.studio@gmail.com"]
|
7
7
|
s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
|
8
|
-
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server. Not only that, it could multiple table replication
|
8
|
+
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server. Not only that, it could multiple table replication and generate nested document for Elasticsearch/Solr. It's comming support replicate to another RDB/noSQL.}
|
9
9
|
|
10
10
|
s.files = `git ls-files`.split("\n")
|
11
11
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -15,6 +15,7 @@ module Fluent
|
|
15
15
|
config_param :database, :string, :default => nil
|
16
16
|
config_param :encoding, :string, :default => 'utf8'
|
17
17
|
config_param :query, :string
|
18
|
+
config_param :prepared_query, :string, :default => nil
|
18
19
|
config_param :primary_key, :string, :default => 'id'
|
19
20
|
config_param :interval, :string, :default => '1m'
|
20
21
|
config_param :enable_delete, :bool, :default => true
|
@@ -28,7 +29,7 @@ module Fluent
|
|
28
29
|
raise Fluent::ConfigError, "mysql_replicator: missing 'tag' parameter. Please add following line into config like 'tag replicator.mydatabase.mytable.${event}.${primary_key}'"
|
29
30
|
end
|
30
31
|
|
31
|
-
$log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>#{@query} :interval=>#{@interval}sec :enable_delete=>#{enable_delete}"
|
32
|
+
$log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>#{@query} :prepared_query=>#{@prepared_query} :interval=>#{@interval}sec :enable_delete=>#{enable_delete}"
|
32
33
|
end
|
33
34
|
|
34
35
|
def start
|
@@ -57,10 +58,20 @@ module Fluent
|
|
57
58
|
start_time = Time.now
|
58
59
|
previous_ids = ids
|
59
60
|
current_ids = Array.new
|
61
|
+
prepared_con = get_connection()
|
62
|
+
@prepared_query.split(/;/).each do |query|
|
63
|
+
prepared_con.query(query)
|
64
|
+
end
|
60
65
|
query(@query).each do |row|
|
61
66
|
current_ids << row[@primary_key]
|
62
67
|
current_hash = Digest::SHA1.hexdigest(row.flatten.join)
|
63
68
|
row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date)}
|
69
|
+
row.select {|k, v| v.to_s.match(/^SELECT/i) }.each do |k, v|
|
70
|
+
row[k] = [] unless row[k].is_a?(Array)
|
71
|
+
prepared_con.query(v.gsub(/\$\{([^\}]+)\}/, row[$1].to_s)).each do |nest_row|
|
72
|
+
row[k] << nest_row
|
73
|
+
end
|
74
|
+
end
|
64
75
|
if row[@primary_key].nil?
|
65
76
|
$log.error "mysql_replicator: missing primary_key. :tag=>#{tag} :primary_key=>#{primary_key}"
|
66
77
|
break
|
@@ -112,10 +123,10 @@ module Fluent
|
|
112
123
|
Engine.emit(tag, Engine.now, record)
|
113
124
|
end
|
114
125
|
|
115
|
-
def query(query)
|
116
|
-
@mysql ||= get_connection
|
126
|
+
def query(query, con = nil)
|
117
127
|
begin
|
118
|
-
|
128
|
+
mysql = get_connection if con.nil?
|
129
|
+
return mysql.query(query)
|
119
130
|
rescue Exception => e
|
120
131
|
$log.warn "mysql_replicator: #{e}"
|
121
132
|
sleep @interval
|
@@ -75,9 +75,21 @@ module Fluent
|
|
75
75
|
loop do
|
76
76
|
rows_count = 0
|
77
77
|
start_time = Time.now
|
78
|
+
unless config['prepared_query'].nil?
|
79
|
+
nest_db = get_origin_connection(config)
|
80
|
+
config['prepared_query'].strip.split(/;/).each do |query|
|
81
|
+
nest_db.query(query)
|
82
|
+
end
|
83
|
+
end
|
78
84
|
db = get_origin_connection(config)
|
79
85
|
db.query(config['query']).each do |row|
|
80
86
|
row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date)}
|
87
|
+
row.select {|k, v| v.to_s.match(/^SELECT/i) }.each do |k, v|
|
88
|
+
row[k] = [] unless row[k].is_a?(Array)
|
89
|
+
nest_db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
|
90
|
+
row[k] << nest_row
|
91
|
+
end
|
92
|
+
end
|
81
93
|
current_id = row[primary_key]
|
82
94
|
@mutex.synchronize {
|
83
95
|
if row[primary_key].nil?
|
@@ -148,17 +160,17 @@ module Fluent
|
|
148
160
|
setting_name = config['name']
|
149
161
|
if (current_id - previous_id) > 1 && config['enable_loose_delete'] == 0
|
150
162
|
query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
|
151
|
-
WHERE setting_name = '#{setting_name}'
|
163
|
+
WHERE setting_name = '#{setting_name}'
|
152
164
|
AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
|
153
165
|
elsif (current_id - previous_id) > 1 && config['enable_loose_delete'] == 1
|
154
166
|
return [*previous_id...current_id] - [current_id,previous_id]
|
155
167
|
elsif previous_id > current_id
|
156
168
|
query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
|
157
|
-
WHERE setting_name = '#{setting_name}'
|
169
|
+
WHERE setting_name = '#{setting_name}'
|
158
170
|
AND setting_query_pk > #{previous_id.to_i}"
|
159
171
|
elsif previous_id == current_id
|
160
172
|
query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
|
161
|
-
WHERE setting_name = '#{setting_name}'
|
173
|
+
WHERE setting_name = '#{setting_name}'
|
162
174
|
AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
|
163
175
|
end
|
164
176
|
ids = Array.new
|
@@ -217,7 +229,7 @@ module Fluent
|
|
217
229
|
|
218
230
|
def flush_hash_table
|
219
231
|
return if @hash_table_bulk_insert.empty?
|
220
|
-
query = "INSERT INTO hash_tables (setting_name,setting_query_pk,setting_query_hash)
|
232
|
+
query = "INSERT INTO hash_tables (setting_name,setting_query_pk,setting_query_hash)
|
221
233
|
VALUES #{@hash_table_bulk_insert.join(',')}
|
222
234
|
ON DUPLICATE KEY UPDATE setting_query_hash = VALUES(setting_query_hash)"
|
223
235
|
@manager_db.query(query)
|
@@ -1,7 +1,7 @@
|
|
1
|
-
CREATE DATABASE replicator_manager;
|
1
|
+
CREATE DATABASE IF NOT EXISTS replicator_manager;
|
2
2
|
USE replicator_manager;
|
3
3
|
|
4
|
-
CREATE TABLE `hash_tables` (
|
4
|
+
CREATE TABLE IF NOT EXISTS `hash_tables` (
|
5
5
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
6
6
|
`setting_name` varchar(255) NOT NULL,
|
7
7
|
`setting_query_pk` int(11) NOT NULL,
|
@@ -10,7 +10,7 @@ CREATE TABLE `hash_tables` (
|
|
10
10
|
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
11
11
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
12
12
|
|
13
|
-
CREATE TABLE `settings` (
|
13
|
+
CREATE TABLE IF NOT EXISTS `settings` (
|
14
14
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
15
15
|
`is_active` int(11) NOT NULL DEFAULT '1',
|
16
16
|
`name` varchar(255) NOT NULL,
|
@@ -20,10 +20,14 @@ CREATE TABLE `settings` (
|
|
20
20
|
`password` varchar(255) NOT NULL,
|
21
21
|
`database` varchar(255) NOT NULL,
|
22
22
|
`query` TEXT NOT NULL,
|
23
|
+
-- Use this field to pre execute query (TEMPORARY TABLE) for improving performance of generating nestd document.
|
24
|
+
`prepared_query` TEXT NOT NULL,
|
23
25
|
`interval` int(11) NOT NULL,
|
24
26
|
`primary_key` varchar(255) DEFAULT 'id',
|
25
27
|
`enable_delete` int(11) DEFAULT '1',
|
28
|
+
-- On enabling 'enable_loose_insert: 1', make it faster synchronization to skip checking hash_tables.
|
26
29
|
`enable_loose_insert` int(11) DEFAULT '0',
|
30
|
+
-- On enabling 'enable_loose_delete: 1', turn on speculative delete but performance penalty on non-contiguous primary key.
|
27
31
|
`enable_loose_delete` int(11) DEFAULT '0',
|
28
32
|
PRIMARY KEY (`id`),
|
29
33
|
UNIQUE KEY `name` (`name`)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-mysql-replicator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -104,6 +104,8 @@ files:
|
|
104
104
|
- LICENSE
|
105
105
|
- README.md
|
106
106
|
- Rakefile
|
107
|
+
- Tutorial-mysql_replicator.md
|
108
|
+
- Tutorial-mysql_replicator_multi.md
|
107
109
|
- example/mysql_multi_table_to_elasticsearch.md
|
108
110
|
- example/mysql_multi_table_to_solr.md
|
109
111
|
- example/mysql_single_table_to_elasticsearch.md
|
@@ -143,8 +145,8 @@ rubygems_version: 1.8.23
|
|
143
145
|
signing_key:
|
144
146
|
specification_version: 3
|
145
147
|
summary: Fluentd input plugin to track insert/update/delete event from MySQL database
|
146
|
-
server. Not only that, it could multiple table replication
|
147
|
-
It's comming support replicate to another RDB/noSQL.
|
148
|
+
server. Not only that, it could multiple table replication and generate nested document
|
149
|
+
for Elasticsearch/Solr. It's comming support replicate to another RDB/noSQL.
|
148
150
|
test_files:
|
149
151
|
- test/helper.rb
|
150
152
|
- test/plugin/test_in_mysql_replicator.rb
|