fluent-plugin-mysql-replicator 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +9 -0
- data/README.md +34 -179
- data/Tutorial-mysql_replicator.md +64 -0
- data/Tutorial-mysql_replicator_multi.md +129 -0
- data/example/mysql_multi_table_to_elasticsearch.md +4 -1
- data/example/mysql_multi_table_to_solr.md +7 -1
- data/example/mysql_single_table_to_elasticsearch.md +4 -1
- data/example/mysql_single_table_to_solr.md +7 -1
- data/fluent-plugin-mysql-replicator.gemspec +2 -2
- data/lib/fluent/plugin/in_mysql_replicator.rb +15 -4
- data/lib/fluent/plugin/in_mysql_replicator_multi.rb +16 -4
- data/setup_mysql_replicator_multi.sql +7 -3
- metadata +6 -4
data/.travis.yml
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
language: ruby
|
2
2
|
|
3
3
|
rvm:
|
4
|
+
- 2.1.0
|
4
5
|
- 2.0.0
|
5
6
|
- 1.9.3
|
7
|
+
|
8
|
+
services:
|
9
|
+
- elasticsearch
|
10
|
+
|
11
|
+
before_script:
|
12
|
+
- mysql < setup_mysql_replicator_multi.sql
|
13
|
+
- curl https://raw.github.com/moliware/travis-solr/master/travis-solr.sh | SOLR_VERSION=4.6.1 bash
|
14
|
+
|
data/README.md
CHANGED
@@ -8,18 +8,17 @@ It's comming support replicate to another RDB/noSQL.
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
-
|
12
|
-
### native gem
|
13
|
-
gem install fluent-plugin-mysql-replicator
|
11
|
+
install with gem or fluent-gem command as:
|
14
12
|
|
15
|
-
|
16
|
-
|
13
|
+
`````
|
14
|
+
# for system installed fluentd
|
15
|
+
$ gem install fluent-plugin-mysql-replicator
|
17
16
|
|
18
|
-
|
19
|
-
|
17
|
+
# for td-agent
|
18
|
+
$ sudo /usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-mysql-replicator
|
20
19
|
`````
|
21
20
|
|
22
|
-
**Note:** RPM package available which does not conflict system installed Ruby or td-agent.
|
21
|
+
**Note:** [recommend] RPM package available which does not conflict system installed Ruby or td-agent.
|
23
22
|
https://github.com/y-ken/yamabiko/releases
|
24
23
|
|
25
24
|
|
@@ -56,194 +55,50 @@ $ tail -f /var/log/td-agent/td-agent.log
|
|
56
55
|
2013-11-25 18:22:45 +0900 replicator.myweb.search_test.delete.id: {"id":"1"}
|
57
56
|
`````
|
58
57
|
|
59
|
-
##
|
60
|
-
|
61
|
-
* [mysql_single_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_elasticsearch.md)
|
62
|
-
* [mysql_multi_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_elasticsearch.md)
|
63
|
-
* [mysql_single_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_solr.md)
|
64
|
-
* [mysql_multi_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_solr.md)
|
65
|
-
|
66
|
-
## Tutorial for Quickstart (mysql_replicator)
|
67
|
-
|
68
|
-
It is useful for these purpose.
|
69
|
-
|
70
|
-
* try it on this plugin quickly.
|
71
|
-
* replicate small record under a millons table.
|
72
|
-
|
73
|
-
**Note:**
|
74
|
-
On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
|
75
|
-
|
76
|
-
### configuration
|
77
|
-
|
78
|
-
`````
|
79
|
-
<source>
|
80
|
-
type mysql_replicator
|
81
|
-
|
82
|
-
# Set connection settings for replicate source.
|
83
|
-
host localhost
|
84
|
-
username your_mysql_user
|
85
|
-
password your_mysql_password
|
86
|
-
database myweb
|
87
|
-
|
88
|
-
# Set replicate query configuration.
|
89
|
-
query SELECT id, text, updated_at from search_test;
|
90
|
-
primary_key id # specify unique key (default: id)
|
91
|
-
interval 10s # execute query interval (default: 1m)
|
92
|
-
|
93
|
-
# Enable detect deletion event not only insert/update events. (default: yes)
|
94
|
-
# It is useful to use `enable_delete no` that keep following recently updated record with this query.
|
95
|
-
# `SELECT * FROM search_test WHERE DATE_ADD(updated_at, INTERVAL 5 MINUTE) > NOW();`
|
96
|
-
enable_delete yes
|
97
|
-
|
98
|
-
# Format output tag for each events. Placeholders usage as described below.
|
99
|
-
tag replicator.myweb.search_test.${event}.${primary_key}
|
100
|
-
# ${event} : the variation of row event type by insert/update/delete.
|
101
|
-
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
102
|
-
</source>
|
103
|
-
|
104
|
-
<match replicator.*>
|
105
|
-
type copy
|
106
|
-
<store>
|
107
|
-
type stdout
|
108
|
-
</store>
|
109
|
-
<store>
|
110
|
-
type mysql_replicator_elasticsearch
|
111
|
-
|
112
|
-
# Set Elasticsearch connection.
|
113
|
-
host localhost
|
114
|
-
port 9200
|
115
|
-
|
116
|
-
# Set Elasticsearch index, type, and unique id (primary_key) from tag.
|
117
|
-
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
118
|
-
|
119
|
-
# Set frequency of sending bulk request to Elasticsearch node.
|
120
|
-
flush_interval 5s
|
121
|
-
|
122
|
-
# Queued chunks are flushed at shutdown process. (recommend for more stability)
|
123
|
-
flush_at_shutdown yes
|
124
|
-
buffer_type file
|
125
|
-
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
126
|
-
</store>
|
127
|
-
</match>
|
128
|
-
`````
|
129
|
-
|
130
|
-
## Tutorial for Production (mysql_replicator_multi)
|
131
|
-
|
132
|
-
It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
|
133
|
-
This architecture is storing hash table in mysql management table instead of ruby internal memory.
|
134
|
-
|
135
|
-
**Note:**
|
136
|
-
On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
|
58
|
+
## Tutorial
|
137
59
|
|
138
|
-
###
|
60
|
+
### mysql_replicator
|
139
61
|
|
140
|
-
It
|
62
|
+
It is easy to try it on this plugin quickly.
|
63
|
+
For more detail are described at [Tutorial-mysql_replicator.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/Tutorial-mysql_replicator.md)
|
141
64
|
|
142
|
-
|
143
|
-
* add replicator configuration.
|
65
|
+
**Features**
|
144
66
|
|
145
|
-
|
67
|
+
* Table (or view table) synchronization supported.
|
68
|
+
* Replicate small record under a millons table.
|
69
|
+
* It is recommend to use insert only table.
|
70
|
+
* Nested documents are supported with placeholder which accessing to temporary table created at the each loop.
|
146
71
|
|
147
|
-
|
148
|
-
$ cat setup_mysql_replicator_multi.sql
|
149
|
-
CREATE DATABASE replicator_manager;
|
150
|
-
USE replicator_manager;
|
72
|
+
**Examples**
|
151
73
|
|
152
|
-
|
153
|
-
|
154
|
-
`setting_name` varchar(255) NOT NULL,
|
155
|
-
`setting_query_pk` int(11) NOT NULL,
|
156
|
-
`setting_query_hash` varchar(255) NOT NULL,
|
157
|
-
PRIMARY KEY (`id`),
|
158
|
-
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
159
|
-
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
160
|
-
|
161
|
-
CREATE TABLE `settings` (
|
162
|
-
`id` int(11) NOT NULL AUTO_INCREMENT,
|
163
|
-
`is_active` int(11) NOT NULL DEFAULT '1',
|
164
|
-
`name` varchar(255) NOT NULL,
|
165
|
-
`host` varchar(255) NOT NULL DEFAULT 'localhost',
|
166
|
-
`port` int(11) NOT NULL DEFAULT '3306',
|
167
|
-
`username` varchar(255) NOT NULL,
|
168
|
-
`password` varchar(255) NOT NULL,
|
169
|
-
`database` varchar(255) NOT NULL,
|
170
|
-
`query` TEXT NOT NULL,
|
171
|
-
`interval` int(11) NOT NULL,
|
172
|
-
`primary_key` varchar(255) DEFAULT 'id',
|
173
|
-
`enable_delete` int(11) DEFAULT '1',
|
174
|
-
`enable_loose_insert` int(11) DEFAULT '0',
|
175
|
-
`enable_loose_delete` int(11) DEFAULT '0',
|
176
|
-
PRIMARY KEY (`id`),
|
177
|
-
UNIQUE KEY `name` (`name`)
|
178
|
-
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
179
|
-
```
|
180
|
-
|
181
|
-
##### add replicator configuration.
|
182
|
-
|
183
|
-
```
|
184
|
-
$ mysql -umysqluser -p
|
185
|
-
|
186
|
-
-- For the first time, load schema.
|
187
|
-
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
188
|
-
|
189
|
-
-- Add replicate source connection and query settings like below.
|
190
|
-
mysql> INSERT INTO `settings`
|
191
|
-
(`id`, `is_active`, `name`, `host`, `port`, `username`, `password`, `database`, `query`, `interval`, `primary_key`, `enable_delete`)
|
192
|
-
VALUES
|
193
|
-
(NULL, 1, 'mydb.mytable', '192.168.100.221', 3306, 'mysqluser', 'mysqlpassword', 'mydb', 'SELECT id, text from mytable;', 5, 'id', 1);
|
194
|
-
```
|
195
|
-
|
196
|
-
it is a sample which you have inserted row.
|
197
|
-
|
198
|
-
| id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete | enable_loose_insert | enable_loose_delete |
|
199
|
-
|----|-----------|--------------|-----------------|------|-----------|---------------|----------|------------------------------|----------|-------------|---------------|----|----|
|
200
|
-
| 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 | 0 | 0 |
|
201
|
-
|
202
|
-
### configuration
|
203
|
-
|
204
|
-
`````
|
205
|
-
<source>
|
206
|
-
type mysql_replicator_multi
|
207
|
-
|
208
|
-
# Database connection setting for manager table.
|
209
|
-
manager_host localhost
|
210
|
-
manager_username your_mysql_user
|
211
|
-
manager_password your_mysql_password
|
212
|
-
manager_database replicator_manager
|
213
|
-
|
214
|
-
# Format output tag for each events. Placeholders usage as described below.
|
215
|
-
tag replicator.${name}.${event}.${primary_key}
|
216
|
-
# ${name} : the value of `replicator_manager.settings.name` in manager table.
|
217
|
-
# ${event} : the variation of row event type by insert/update/delete.
|
218
|
-
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
219
|
-
</source>
|
74
|
+
* [mysql_single_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_elasticsearch.md)
|
75
|
+
* [mysql_single_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_single_table_to_solr.md)
|
220
76
|
|
221
|
-
|
222
|
-
type mysql_replicator_elasticsearch
|
77
|
+
### mysql_replicator_multi
|
223
78
|
|
224
|
-
|
225
|
-
|
226
|
-
|
79
|
+
It replicates a millions of records and/or multiple tables with multiple threads.
|
80
|
+
This architecture is storing hash table in MySQL management table instead of ruby internal memory.
|
81
|
+
See tutorial at [Tutorial-mysql_replicator_multi.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/Tutorial-mysql_replicator_multi.md)
|
227
82
|
|
228
|
-
|
229
|
-
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
83
|
+
**Features**
|
230
84
|
|
231
|
-
|
232
|
-
|
85
|
+
* table (or view table) synchronization supported.
|
86
|
+
* Multiple table synchronization supported and its DSN stored in MySQL management table.
|
87
|
+
* Using MySQL database as hash table cache to support replicate over a millions table.
|
88
|
+
* It is recommend to make whole copy of tables.
|
89
|
+
* Nested documents are supported with placeholder which accessing to temporary table created at the each loop.
|
233
90
|
|
234
|
-
|
235
|
-
max_retry_wait 1800
|
91
|
+
**Examples**
|
236
92
|
|
237
|
-
|
238
|
-
|
239
|
-
</match>
|
240
|
-
`````
|
93
|
+
* [mysql_multi_table_to_elasticsearch.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_elasticsearch.md)
|
94
|
+
* [mysql_multi_table_to_solr.md](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/example/mysql_multi_table_to_solr.md)
|
241
95
|
|
242
96
|
## TODO
|
243
97
|
|
244
98
|
Pull requests are very welcome like below!!
|
245
99
|
|
246
|
-
* more
|
100
|
+
* more documents
|
101
|
+
* more tests with mock.
|
247
102
|
* support string type of primary_key.
|
248
103
|
* support reload setting on demand.
|
249
104
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
## Tutorial for Quickstart (mysql_replicator)
|
2
|
+
|
3
|
+
It is useful for these purpose.
|
4
|
+
|
5
|
+
* try it on this plugin quickly.
|
6
|
+
* replicate small record under a millons table.
|
7
|
+
|
8
|
+
**Note:**
|
9
|
+
On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
|
10
|
+
|
11
|
+
### configuration
|
12
|
+
|
13
|
+
`````
|
14
|
+
<source>
|
15
|
+
type mysql_replicator
|
16
|
+
|
17
|
+
# Set connection settings for replicate source.
|
18
|
+
host localhost
|
19
|
+
username your_mysql_user
|
20
|
+
password your_mysql_password
|
21
|
+
database myweb
|
22
|
+
|
23
|
+
# Set replicate query configuration.
|
24
|
+
query SELECT id, text, updated_at from search_test;
|
25
|
+
primary_key id # specify unique key (default: id)
|
26
|
+
interval 10s # execute query interval (default: 1m)
|
27
|
+
|
28
|
+
# Enable detect deletion event not only insert/update events. (default: yes)
|
29
|
+
# It is useful to use `enable_delete no` that keep following recently updated record with this query.
|
30
|
+
# `SELECT * FROM search_test WHERE DATE_ADD(updated_at, INTERVAL 5 MINUTE) > NOW();`
|
31
|
+
enable_delete yes
|
32
|
+
|
33
|
+
# Format output tag for each events. Placeholders usage as described below.
|
34
|
+
tag replicator.myweb.search_test.${event}.${primary_key}
|
35
|
+
# ${event} : the variation of row event type by insert/update/delete.
|
36
|
+
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
37
|
+
</source>
|
38
|
+
|
39
|
+
<match replicator.*>
|
40
|
+
type copy
|
41
|
+
<store>
|
42
|
+
type stdout
|
43
|
+
</store>
|
44
|
+
<store>
|
45
|
+
type mysql_replicator_elasticsearch
|
46
|
+
|
47
|
+
# Set Elasticsearch connection.
|
48
|
+
host localhost
|
49
|
+
port 9200
|
50
|
+
|
51
|
+
# Set Elasticsearch index, type, and unique id (primary_key) from tag.
|
52
|
+
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
53
|
+
|
54
|
+
# Set frequency of sending bulk request to Elasticsearch node.
|
55
|
+
flush_interval 5s
|
56
|
+
|
57
|
+
# Queued chunks are flushed at shutdown process. (recommend for more stability)
|
58
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
59
|
+
flush_at_shutdown yes
|
60
|
+
buffer_type file
|
61
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
62
|
+
</store>
|
63
|
+
</match>
|
64
|
+
`````
|
@@ -0,0 +1,129 @@
|
|
1
|
+
## Tutorial for Production (mysql_replicator_multi)
|
2
|
+
|
3
|
+
It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
|
4
|
+
This architecture is storing hash table in mysql management table instead of ruby internal memory.
|
5
|
+
|
6
|
+
**Note:**
|
7
|
+
On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
|
8
|
+
|
9
|
+
### prepare
|
10
|
+
|
11
|
+
It has done with follwing two steps.
|
12
|
+
|
13
|
+
* create database and tables.
|
14
|
+
* add replicator configuration.
|
15
|
+
|
16
|
+
##### create database and tables.
|
17
|
+
|
18
|
+
```
|
19
|
+
$ mysql -umysqluser -p
|
20
|
+
|
21
|
+
-- For the first time, load schema.
|
22
|
+
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
23
|
+
```
|
24
|
+
|
25
|
+
see [setup_mysql_replicator_multi.sql](https://github.com/y-ken/fluent-plugin-mysql-replicator/blob/master/setup_mysql_replicator_multi.sql)
|
26
|
+
|
27
|
+
##### add replicator configuration.
|
28
|
+
|
29
|
+
Let's download sql first.
|
30
|
+
|
31
|
+
```
|
32
|
+
$ wget https://raw2.github.com/y-ken/fluent-plugin-mysql-replicator/master/setup_mysql_replicator_multi.sql
|
33
|
+
```
|
34
|
+
|
35
|
+
```sql
|
36
|
+
-- Build
|
37
|
+
mysql> source /path/to/setup_mysql_replicator_multi.sql
|
38
|
+
|
39
|
+
-- Set working database
|
40
|
+
mysql> use replicator_manager;
|
41
|
+
|
42
|
+
-- Add replicate source connection and query settings like below.
|
43
|
+
mysql> INSERT INTO `settings`
|
44
|
+
(`id`, `is_active`, `name`, `host`, `port`, `username`, `password`, `database`, `query`, `interval`, `primary_key`, `enable_delete`)
|
45
|
+
VALUES
|
46
|
+
(NULL, 1, 'mydb.mytable', '192.168.100.221', 3306, 'mysqluser', 'mysqlpassword', 'mydb', 'SELECT id, text from mytable;', 5, 'id', 1);
|
47
|
+
```
|
48
|
+
|
49
|
+
it is a sample which you have inserted row.
|
50
|
+
|
51
|
+
<table>
|
52
|
+
<thead><tr>
|
53
|
+
<th>id</th>
|
54
|
+
<th>is_active</th>
|
55
|
+
<th>name</th>
|
56
|
+
<th>host</th>
|
57
|
+
<th>port</th>
|
58
|
+
<th>username</th>
|
59
|
+
<th>password</th>
|
60
|
+
<th>database</th>
|
61
|
+
<th>query</th>
|
62
|
+
<th>prepared_query</th>
|
63
|
+
<th>interval</th>
|
64
|
+
<th>primary_key</th>
|
65
|
+
<th>enable_delete</th>
|
66
|
+
<th>enable_loose_insert</th>
|
67
|
+
<th>enable_loose_delete</th>
|
68
|
+
</tr></thead>
|
69
|
+
<tbody><tr>
|
70
|
+
<td>1</td>
|
71
|
+
<td>1</td>
|
72
|
+
<td>mydb.mytable</td>
|
73
|
+
<td>192.168.100.221</td>
|
74
|
+
<td>3306</td>
|
75
|
+
<td>mysqluser</td>
|
76
|
+
<td>mysqlpassword</td>
|
77
|
+
<td>mydb</td>
|
78
|
+
<td>SELECT id, text from mytable;</td>
|
79
|
+
<td> </td>
|
80
|
+
<td>5</td>
|
81
|
+
<td>id</td>
|
82
|
+
<td>1</td>
|
83
|
+
<td>0</td>
|
84
|
+
<td>0</td>
|
85
|
+
</tr></tbody>
|
86
|
+
</table>
|
87
|
+
|
88
|
+
### configuration
|
89
|
+
|
90
|
+
`````
|
91
|
+
<source>
|
92
|
+
type mysql_replicator_multi
|
93
|
+
|
94
|
+
# Database connection setting for manager table.
|
95
|
+
manager_host localhost
|
96
|
+
manager_username your_mysql_user
|
97
|
+
manager_password your_mysql_password
|
98
|
+
manager_database replicator_manager
|
99
|
+
|
100
|
+
# Format output tag for each events. Placeholders usage as described below.
|
101
|
+
tag replicator.${name}.${event}.${primary_key}
|
102
|
+
# ${name} : the value of `replicator_manager.settings.name` in manager table.
|
103
|
+
# ${event} : the variation of row event type by insert/update/delete.
|
104
|
+
# ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
|
105
|
+
</source>
|
106
|
+
|
107
|
+
<match replicator.**>
|
108
|
+
type mysql_replicator_elasticsearch
|
109
|
+
|
110
|
+
# Set Elasticsearch connection.
|
111
|
+
host localhost
|
112
|
+
port 9200
|
113
|
+
|
114
|
+
# Set Elasticsearch index, type, and unique id (primary_key) from tag.
|
115
|
+
tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+)\.(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
|
116
|
+
|
117
|
+
# Set frequency of sending bulk request to Elasticsearch node.
|
118
|
+
flush_interval 5s
|
119
|
+
|
120
|
+
# Set maximum retry interval (required fluentd >= 0.10.41)
|
121
|
+
max_retry_wait 1800
|
122
|
+
|
123
|
+
# Queued chunks are flushed at shutdown process.
|
124
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
125
|
+
flush_at_shutdown yes
|
126
|
+
buffer_type file
|
127
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
128
|
+
</match>
|
129
|
+
`````
|
@@ -38,6 +38,9 @@ It is a guide to replicate multiple mysql table to elasticsearch.
|
|
38
38
|
max_retry_wait 1800
|
39
39
|
|
40
40
|
# Queued chunks are flushed at shutdown process.
|
41
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
41
42
|
flush_at_shutdown yes
|
43
|
+
buffer_type file
|
44
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
42
45
|
</match>
|
43
|
-
```
|
46
|
+
```
|
@@ -39,7 +39,10 @@ It is a guide to replicate multiple mysql table to solr.
|
|
39
39
|
max_retry_wait 1800
|
40
40
|
|
41
41
|
# Queued chunks are flushed at shutdown process.
|
42
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
42
43
|
flush_at_shutdown yes
|
44
|
+
buffer_type file
|
45
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
43
46
|
</match>
|
44
47
|
```
|
45
48
|
|
@@ -64,6 +67,9 @@ When you use default core (won't specify), change the value of `tag_format` like
|
|
64
67
|
max_retry_wait 1800
|
65
68
|
|
66
69
|
# Queued chunks are flushed at shutdown process.
|
70
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
67
71
|
flush_at_shutdown yes
|
72
|
+
buffer_type file
|
73
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_solr
|
68
74
|
</match>
|
69
|
-
```
|
75
|
+
```
|
@@ -47,6 +47,9 @@ It is a guide to replicate single mysql table to elasticsearch.
|
|
47
47
|
max_retry_wait 1800
|
48
48
|
|
49
49
|
# Queued chunks are flushed at shutdown process.
|
50
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
50
51
|
flush_at_shutdown yes
|
52
|
+
buffer_type file
|
53
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
51
54
|
</match>
|
52
|
-
```
|
55
|
+
```
|
@@ -48,7 +48,10 @@ It is a guide to replicate single mysql table to solr.
|
|
48
48
|
max_retry_wait 1800
|
49
49
|
|
50
50
|
# Queued chunks are flushed at shutdown process.
|
51
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
51
52
|
flush_at_shutdown yes
|
53
|
+
buffer_type file
|
54
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_elasticsearch
|
52
55
|
</match>
|
53
56
|
```
|
54
57
|
|
@@ -74,6 +77,9 @@ On this case, the solr url will be set `http://localhost:8983/solr`
|
|
74
77
|
max_retry_wait 1800
|
75
78
|
|
76
79
|
# Queued chunks are flushed at shutdown process.
|
80
|
+
# It's sample for td-agent. If you use Yamabiko, replace path from 'td-agent' to 'yamabiko'.
|
77
81
|
flush_at_shutdown yes
|
82
|
+
buffer_type file
|
83
|
+
buffer_path /var/log/td-agent/buffer/mysql_replicator_solr
|
78
84
|
</match>
|
79
|
-
```
|
85
|
+
```
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |s|
|
3
3
|
s.name = "fluent-plugin-mysql-replicator"
|
4
|
-
s.version = "0.
|
4
|
+
s.version = "0.4.0"
|
5
5
|
s.authors = ["Kentaro Yoshida"]
|
6
6
|
s.email = ["y.ken.studio@gmail.com"]
|
7
7
|
s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
|
8
|
-
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server. Not only that, it could multiple table replication
|
8
|
+
s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server. Not only that, it could multiple table replication and generate nested document for Elasticsearch/Solr. It's comming support replicate to another RDB/noSQL.}
|
9
9
|
|
10
10
|
s.files = `git ls-files`.split("\n")
|
11
11
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -15,6 +15,7 @@ module Fluent
|
|
15
15
|
config_param :database, :string, :default => nil
|
16
16
|
config_param :encoding, :string, :default => 'utf8'
|
17
17
|
config_param :query, :string
|
18
|
+
config_param :prepared_query, :string, :default => nil
|
18
19
|
config_param :primary_key, :string, :default => 'id'
|
19
20
|
config_param :interval, :string, :default => '1m'
|
20
21
|
config_param :enable_delete, :bool, :default => true
|
@@ -28,7 +29,7 @@ module Fluent
|
|
28
29
|
raise Fluent::ConfigError, "mysql_replicator: missing 'tag' parameter. Please add following line into config like 'tag replicator.mydatabase.mytable.${event}.${primary_key}'"
|
29
30
|
end
|
30
31
|
|
31
|
-
$log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>#{@query} :interval=>#{@interval}sec :enable_delete=>#{enable_delete}"
|
32
|
+
$log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>#{@query} :prepared_query=>#{@prepared_query} :interval=>#{@interval}sec :enable_delete=>#{enable_delete}"
|
32
33
|
end
|
33
34
|
|
34
35
|
def start
|
@@ -57,10 +58,20 @@ module Fluent
|
|
57
58
|
start_time = Time.now
|
58
59
|
previous_ids = ids
|
59
60
|
current_ids = Array.new
|
61
|
+
prepared_con = get_connection()
|
62
|
+
@prepared_query.split(/;/).each do |query|
|
63
|
+
prepared_con.query(query)
|
64
|
+
end
|
60
65
|
query(@query).each do |row|
|
61
66
|
current_ids << row[@primary_key]
|
62
67
|
current_hash = Digest::SHA1.hexdigest(row.flatten.join)
|
63
68
|
row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date)}
|
69
|
+
row.select {|k, v| v.to_s.match(/^SELECT/i) }.each do |k, v|
|
70
|
+
row[k] = [] unless row[k].is_a?(Array)
|
71
|
+
prepared_con.query(v.gsub(/\$\{([^\}]+)\}/, row[$1].to_s)).each do |nest_row|
|
72
|
+
row[k] << nest_row
|
73
|
+
end
|
74
|
+
end
|
64
75
|
if row[@primary_key].nil?
|
65
76
|
$log.error "mysql_replicator: missing primary_key. :tag=>#{tag} :primary_key=>#{primary_key}"
|
66
77
|
break
|
@@ -112,10 +123,10 @@ module Fluent
|
|
112
123
|
Engine.emit(tag, Engine.now, record)
|
113
124
|
end
|
114
125
|
|
115
|
-
def query(query)
|
116
|
-
@mysql ||= get_connection
|
126
|
+
def query(query, con = nil)
|
117
127
|
begin
|
118
|
-
|
128
|
+
mysql = get_connection if con.nil?
|
129
|
+
return mysql.query(query)
|
119
130
|
rescue Exception => e
|
120
131
|
$log.warn "mysql_replicator: #{e}"
|
121
132
|
sleep @interval
|
@@ -75,9 +75,21 @@ module Fluent
|
|
75
75
|
loop do
|
76
76
|
rows_count = 0
|
77
77
|
start_time = Time.now
|
78
|
+
unless config['prepared_query'].nil?
|
79
|
+
nest_db = get_origin_connection(config)
|
80
|
+
config['prepared_query'].strip.split(/;/).each do |query|
|
81
|
+
nest_db.query(query)
|
82
|
+
end
|
83
|
+
end
|
78
84
|
db = get_origin_connection(config)
|
79
85
|
db.query(config['query']).each do |row|
|
80
86
|
row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date)}
|
87
|
+
row.select {|k, v| v.to_s.match(/^SELECT/i) }.each do |k, v|
|
88
|
+
row[k] = [] unless row[k].is_a?(Array)
|
89
|
+
nest_db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
|
90
|
+
row[k] << nest_row
|
91
|
+
end
|
92
|
+
end
|
81
93
|
current_id = row[primary_key]
|
82
94
|
@mutex.synchronize {
|
83
95
|
if row[primary_key].nil?
|
@@ -148,17 +160,17 @@ module Fluent
|
|
148
160
|
setting_name = config['name']
|
149
161
|
if (current_id - previous_id) > 1 && config['enable_loose_delete'] == 0
|
150
162
|
query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
|
151
|
-
WHERE setting_name = '#{setting_name}'
|
163
|
+
WHERE setting_name = '#{setting_name}'
|
152
164
|
AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
|
153
165
|
elsif (current_id - previous_id) > 1 && config['enable_loose_delete'] == 1
|
154
166
|
return [*previous_id...current_id] - [current_id,previous_id]
|
155
167
|
elsif previous_id > current_id
|
156
168
|
query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
|
157
|
-
WHERE setting_name = '#{setting_name}'
|
169
|
+
WHERE setting_name = '#{setting_name}'
|
158
170
|
AND setting_query_pk > #{previous_id.to_i}"
|
159
171
|
elsif previous_id == current_id
|
160
172
|
query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
|
161
|
-
WHERE setting_name = '#{setting_name}'
|
173
|
+
WHERE setting_name = '#{setting_name}'
|
162
174
|
AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
|
163
175
|
end
|
164
176
|
ids = Array.new
|
@@ -217,7 +229,7 @@ module Fluent
|
|
217
229
|
|
218
230
|
def flush_hash_table
|
219
231
|
return if @hash_table_bulk_insert.empty?
|
220
|
-
query = "INSERT INTO hash_tables (setting_name,setting_query_pk,setting_query_hash)
|
232
|
+
query = "INSERT INTO hash_tables (setting_name,setting_query_pk,setting_query_hash)
|
221
233
|
VALUES #{@hash_table_bulk_insert.join(',')}
|
222
234
|
ON DUPLICATE KEY UPDATE setting_query_hash = VALUES(setting_query_hash)"
|
223
235
|
@manager_db.query(query)
|
@@ -1,7 +1,7 @@
|
|
1
|
-
CREATE DATABASE replicator_manager;
|
1
|
+
CREATE DATABASE IF NOT EXISTS replicator_manager;
|
2
2
|
USE replicator_manager;
|
3
3
|
|
4
|
-
CREATE TABLE `hash_tables` (
|
4
|
+
CREATE TABLE IF NOT EXISTS `hash_tables` (
|
5
5
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
6
6
|
`setting_name` varchar(255) NOT NULL,
|
7
7
|
`setting_query_pk` int(11) NOT NULL,
|
@@ -10,7 +10,7 @@ CREATE TABLE `hash_tables` (
|
|
10
10
|
UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
|
11
11
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
12
12
|
|
13
|
-
CREATE TABLE `settings` (
|
13
|
+
CREATE TABLE IF NOT EXISTS `settings` (
|
14
14
|
`id` int(11) NOT NULL AUTO_INCREMENT,
|
15
15
|
`is_active` int(11) NOT NULL DEFAULT '1',
|
16
16
|
`name` varchar(255) NOT NULL,
|
@@ -20,10 +20,14 @@ CREATE TABLE `settings` (
|
|
20
20
|
`password` varchar(255) NOT NULL,
|
21
21
|
`database` varchar(255) NOT NULL,
|
22
22
|
`query` TEXT NOT NULL,
|
23
|
+
-- Use this field to pre execute query (TEMPORARY TABLE) for improving performance of generating nestd document.
|
24
|
+
`prepared_query` TEXT NOT NULL,
|
23
25
|
`interval` int(11) NOT NULL,
|
24
26
|
`primary_key` varchar(255) DEFAULT 'id',
|
25
27
|
`enable_delete` int(11) DEFAULT '1',
|
28
|
+
-- On enabling 'enable_loose_insert: 1', make it faster synchronization to skip checking hash_tables.
|
26
29
|
`enable_loose_insert` int(11) DEFAULT '0',
|
30
|
+
-- On enabling 'enable_loose_delete: 1', turn on speculative delete but performance penalty on non-contiguous primary key.
|
27
31
|
`enable_loose_delete` int(11) DEFAULT '0',
|
28
32
|
PRIMARY KEY (`id`),
|
29
33
|
UNIQUE KEY `name` (`name`)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-mysql-replicator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -104,6 +104,8 @@ files:
|
|
104
104
|
- LICENSE
|
105
105
|
- README.md
|
106
106
|
- Rakefile
|
107
|
+
- Tutorial-mysql_replicator.md
|
108
|
+
- Tutorial-mysql_replicator_multi.md
|
107
109
|
- example/mysql_multi_table_to_elasticsearch.md
|
108
110
|
- example/mysql_multi_table_to_solr.md
|
109
111
|
- example/mysql_single_table_to_elasticsearch.md
|
@@ -143,8 +145,8 @@ rubygems_version: 1.8.23
|
|
143
145
|
signing_key:
|
144
146
|
specification_version: 3
|
145
147
|
summary: Fluentd input plugin to track insert/update/delete event from MySQL database
|
146
|
-
server. Not only that, it could multiple table replication
|
147
|
-
It's comming support replicate to another RDB/noSQL.
|
148
|
+
server. Not only that, it could multiple table replication and generate nested document
|
149
|
+
for Elasticsearch/Solr. It's comming support replicate to another RDB/noSQL.
|
148
150
|
test_files:
|
149
151
|
- test/helper.rb
|
150
152
|
- test/plugin/test_in_mysql_replicator.rb
|