fluent-plugin-mysql-replicator 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- Fluentd input plugin to track insert/update/delete event from MySQL database server.
5
+ Fluentd input plugin to track insert/update/delete event from MySQL database server.
6
+ Not only that, it could multiple table replication into Elasticsearch nodes.
7
+ It's comming support replicate to another RDB/noSQL.
6
8
 
7
9
  ## Installation
8
10
 
@@ -24,26 +26,56 @@ It is useful for these purpose.
24
26
  **Note:**
25
27
  On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
26
28
 
27
- #### configuration
29
+ ### configuration
28
30
 
29
31
  `````
30
32
  <source>
31
33
  type mysql_replicator
34
+
35
+ # Set connection settings for replicate source.
32
36
  host localhost
33
37
  username your_mysql_user
34
38
  password your_mysql_password
35
39
  database myweb
36
- interval 5s
37
- tag replicator
38
- query SELECT id, text from search_test
40
+
41
+ # Set replicate query configuration.
42
+ query SELECT id, text, updated_at from search_test;
43
+ primary_key id # specify unique key (default: id)
44
+ interval 10s # execute query interval (default: 1m)
45
+
46
+ # Enable detect deletion event not only insert/update events. (default: yes)
47
+ # It is useful to use `enable_delete no` that keep following recently updated record with this query.
48
+ # `SELECT * FROM search_test WHERE DATE_ADD(updated_at, INTERVAL 5 MINUTE) > NOW();`
49
+ enable_delete yes
50
+
51
+ # Format output tag for each events. Placeholders usage as described below.
52
+ tag replicator.myweb.search_test.${event}.${primary_key}
53
+ # ${event} : the variation of row event type by insert/update/delete.
54
+ # ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
39
55
  </source>
40
56
 
41
57
  <match replicator.*>
42
- type stdout
58
+ type copy
59
+ <store>
60
+ type stdout
61
+ </store>
62
+ <store>
63
+ type mysql_replicator_elasticsearch
64
+
65
+ # Set Elasticsearch connection.
66
+ host localhost
67
+ port 9200
68
+
69
+ # Set Elasticsearch index, type, and unique id (primary_key) from tag.
70
+ tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+).(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
71
+
72
+ # Set frequency of sending bulk request to Elasticsearch node.
73
+ flush_interval 5s
74
+ </store>
43
75
  </match>
44
76
  `````
45
77
 
46
- #### sample query
78
+ ### sample query
47
79
 
48
80
  `````
49
81
  $ mysql -e "create database myweb"
@@ -56,13 +88,13 @@ $ sleep 10
56
88
  $ mysql myweb -e "delete from search_test where text='bbb'"
57
89
  `````
58
90
 
59
- #### result
91
+ ### result
60
92
 
61
93
  `````
62
94
  $ tail -f /var/log/td-agent/td-agent.log
63
- 2013-11-25 18:22:25 +0900 replicator.insert: {"id":"1","text":"aaa"}
64
- 2013-11-25 18:22:35 +0900 replicator.update: {"id":"1","text":"bbb"}
65
- 2013-11-25 18:22:45 +0900 replicator.delete: {"id":"1"}
95
+ 2013-11-25 18:22:25 +0900 replicator.insert.id: {"id":"1","text":"aaa"}
96
+ 2013-11-25 18:22:35 +0900 replicator.update.id: {"id":"1","text":"bbb"}
97
+ 2013-11-25 18:22:45 +0900 replicator.delete.id: {"id":"1"}
66
98
  `````
67
99
 
68
100
  ## Tutorial for Production
@@ -73,11 +105,15 @@ This architecture is storing hash table in mysql management table instead of rub
73
105
  **Note:**
74
106
  On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
75
107
 
76
- #### prepare
108
+ ### prepare
109
+
110
+ It has done with follwing two steps.
77
111
 
78
112
  * create database and tables.
79
113
  * add replicator configuration.
80
114
 
115
+ ##### create database and tables.
116
+
81
117
  ```
82
118
  $ cat setup_mysql_replicator_multi.sql
83
119
  CREATE DATABASE replicator_manager;
@@ -110,13 +146,28 @@ CREATE TABLE `settings` (
110
146
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
111
147
  ```
112
148
 
149
+ ##### add replicator configuration.
150
+
113
151
  ```
114
- $ mysql
152
+ $ mysql -umysqluser -p
153
+
154
+ -- For the first time, load schema.
115
155
  mysql> source /path/to/setup_mysql_replicator_multi.sql
116
- mysql> insert into source ...snip...;
156
+
157
+ -- Add replicate source connection and query settings like below.
158
+ mysql> INSERT INTO `settings`
159
+ (`id`, `is_active`, `name`, `host`, `port`, `username`, `password`, `database`, `query`, `interval`, `primary_key`, `enable_delete`)
160
+ VALUES
161
+ (NULL, 1, 'mydb.mytable', '192.168.100.221', 3306, 'mysqluser', 'mysqlpassword', 'mydb', 'SELECT id, text from mytable;', 5, 'id', 1);
117
162
  ```
118
163
 
119
- #### configuration
164
+ it is a sample which you have inserted row.
165
+
166
+ | id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete |
167
+ |----|-----------|--------------|-----------------|------|-----------|---------------|----------|------------------------------|----------|-------------|---------------|
168
+ | 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 |
169
+
170
+ ### configuration
120
171
 
121
172
  `````
122
173
  <source>
@@ -136,16 +187,27 @@ mysql> insert into source ...snip...;
136
187
  </source>
137
188
 
138
189
  <match replicator.**>
139
- type stdout
190
+ type mysql_replicator_elasticsearch
191
+
192
+ # Set Elasticsearch connection.
193
+ host localhost
194
+ port 9200
195
+
196
+ # Set Elasticsearch index, type, and unique id (primary_key) from tag.
197
+ tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+).(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
198
+
199
+ # Set frequency of sending bulk request to Elasticsearch node.
200
+ flush_interval 5s
140
201
  </match>
141
202
  `````
142
203
 
143
204
  ## TODO
144
205
 
145
- * support string type primary_key.
146
- * support reload setting on demand.
206
+ Pull requests are very welcome like below!!
147
207
 
148
- Pull requests are very welcome!!
208
+ * more tests.
209
+ * support string type of primary_key.
210
+ * support reload setting on demand.
149
211
 
150
212
  ## Copyright
151
213
 
@@ -1,11 +1,11 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "0.1.1"
4
+ s.version = "0.2.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
8
- s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL databases. It's comming support multiple table replication to another RDB/noSQL like Elasticsearch.}
8
+ s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server. Not only that, it could multiple table replication into Elasticsearch nodes. It's comming support replicate to another RDB/noSQL.}
9
9
 
10
10
  s.files = `git ls-files`.split("\n")
11
11
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -13,6 +13,8 @@ Gem::Specification.new do |s|
13
13
  s.require_paths = ["lib"]
14
14
 
15
15
  s.add_development_dependency "rake"
16
+ s.add_development_dependency "webmock"
17
+
16
18
  s.add_runtime_dependency "fluentd"
17
19
  s.add_runtime_dependency "mysql2"
18
20
  end
@@ -14,16 +14,21 @@ module Fluent
14
14
  config_param :password, :string, :default => nil
15
15
  config_param :database, :string, :default => nil
16
16
  config_param :encoding, :string, :default => 'utf8'
17
- config_param :interval, :string, :default => '1m'
18
- config_param :tag, :string
19
17
  config_param :query, :string
20
18
  config_param :primary_key, :string, :default => 'id'
21
- config_param :enable_delete, :bool, :default => 'yes'
19
+ config_param :interval, :string, :default => '1m'
20
+ config_param :enable_delete, :bool, :default => true
21
+ config_param :tag, :string, :default => nil
22
22
 
23
23
  def configure(conf)
24
24
  super
25
25
  @interval = Config.time_value(@interval)
26
- $log.info "adding mysql_replicator job: [#{@query}] interval: #{@interval}sec"
26
+
27
+ if @tag.nil?
28
+ raise Fluent::ConfigError, "mysql_replicator: missing 'tag' parameter. Please add following line into config like 'tag replicator.mydatabase.mytable.${event}.${primary_key}'"
29
+ end
30
+
31
+ $log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>[#{@query}] :interval=>#{@interval}sec :enable_delete=>#{enable_delete}"
27
32
  end
28
33
 
29
34
  def start
@@ -54,18 +59,23 @@ module Fluent
54
59
  current_hash = Digest::SHA1.hexdigest(row.flatten.join)
55
60
  row.each {|k, v| row[k] = v.to_s if v.is_a? Time}
56
61
  if !table_hash.include?(row[@primary_key])
57
- emit_record(:insert, row)
62
+ tag = format_tag(@tag, {:event => :insert})
63
+ emit_record(tag, row)
58
64
  elsif table_hash[row[@primary_key]] != current_hash
59
- emit_record(:update, row)
65
+ tag = format_tag(@tag, {:event => :update})
66
+ emit_record(tag, row)
60
67
  end
61
68
  table_hash[row[@primary_key]] = current_hash
62
69
  end
63
70
  ids = current_ids
64
- unless @enable_delete
71
+ if @enable_delete
65
72
  deleted_ids = previous_ids - current_ids
66
73
  if deleted_ids.count > 0
67
74
  hash_delete_by_list(table_hash, deleted_ids)
68
- deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
75
+ deleted_ids.each do |id|
76
+ tag = format_tag(@tag, {:event => :delete})
77
+ emit_record(tag, {@primary_key => id})
78
+ end
69
79
  end
70
80
  end
71
81
  sleep @interval
@@ -76,15 +86,22 @@ module Fluent
76
86
  deleted_keys.each{|k| hash.delete(k)}
77
87
  end
78
88
 
79
- def emit_record(type, record)
80
- tag = "#{@tag}.#{type.to_s}"
89
+ def format_tag(tag, param)
90
+ pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key}
91
+ tag.gsub(/\${[a-z_]+(\[[0-9]+\])?}/, pattern) do
92
+ $log.warn "mysql_replicator: missing placeholder. tag:#{tag} placeholder:#{$1}" unless pattern.include?($1)
93
+ pattern[$1]
94
+ end
95
+ end
96
+
97
+ def emit_record(tag, record)
81
98
  Engine.emit(tag, Engine.now, record)
82
99
  end
83
100
 
84
101
  def query(query)
85
102
  @mysql ||= get_connection
86
103
  begin
87
- return @mysql.query(query, :cast => false, :cache_rows => false)
104
+ return @mysql.query(query)
88
105
  rescue Exception => e
89
106
  $log.warn "mysql_replicator: #{e}"
90
107
  sleep @interval
@@ -0,0 +1,67 @@
1
+ require 'net/http'
2
+ require 'date'
3
+
4
+ class Fluent::MysqlReplicatorElasticsearchOutput < Fluent::BufferedOutput
5
+ Fluent::Plugin.register_output('mysql_replicator_elasticsearch', self)
6
+
7
+ config_param :host, :string, :default => 'localhost'
8
+ config_param :port, :integer, :default => 9200
9
+ config_param :tag_format, :string, :default => nil
10
+
11
+ DEFAULT_TAG_FORMAT = /(?<index_name>[^\.]+)\.(?<type_name>[^\.]+).(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$/
12
+
13
+ def initialize
14
+ super
15
+ end
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ if @tag_format.nil? || @tag_format == DEFAULT_TAG_FORMAT
21
+ @tag_format = DEFAULT_TAG_FORMAT
22
+ else
23
+ @tag_format = Regexp.new(conf['tag_format'])
24
+ end
25
+ end
26
+
27
+ def start
28
+ super
29
+ end
30
+
31
+ def format(tag, time, record)
32
+ [tag, time, record].to_msgpack
33
+ end
34
+
35
+ def shutdown
36
+ super
37
+ end
38
+
39
+ def write(chunk)
40
+ bulk_message = []
41
+
42
+ chunk.msgpack_each do |tag, time, record|
43
+ tag_parts = tag.match(@tag_format)
44
+ target_index = tag_parts['index_name']
45
+ target_type = tag_parts['type_name']
46
+ id_key = tag_parts['primary_key']
47
+
48
+ if tag_parts['event'] == 'delete'
49
+ meta = { "delete" => {"_index" => target_index, "_type" => target_type, "_id" => record[id_key]} }
50
+ bulk_message << Yajl::Encoder.encode(meta)
51
+ else
52
+ meta = { "index" => {"_index" => target_index, "_type" => target_type} }
53
+ if id_key && record[id_key]
54
+ meta['index']['_id'] = record[id_key]
55
+ end
56
+ bulk_message << Yajl::Encoder.encode(meta)
57
+ bulk_message << Yajl::Encoder.encode(record)
58
+ end
59
+ end
60
+ bulk_message << ""
61
+
62
+ http = Net::HTTP.new(@host, @port.to_i)
63
+ request = Net::HTTP::Post.new('/_bulk', {'content-type' => 'application/json; charset=utf-8'})
64
+ request.body = bulk_message.join("\n")
65
+ http.request(request).value
66
+ end
67
+ end
data/test/helper.rb CHANGED
@@ -24,6 +24,7 @@ end
24
24
 
25
25
  require 'fluent/plugin/in_mysql_replicator'
26
26
  require 'fluent/plugin/in_mysql_replicator_multi'
27
+ require 'fluent/plugin/out_mysql_replicator_elasticsearch'
27
28
 
28
29
  class Test::Unit::TestCase
29
30
  end
@@ -0,0 +1,127 @@
1
+ require 'helper'
2
+ require 'webmock/test_unit'
3
+
4
+ WebMock.disable_net_connect!
5
+
6
+ class MysqlReplicatorElasticsearchOutput < Test::Unit::TestCase
7
+ attr_accessor :index_cmds, :content_type
8
+
9
+ def setup
10
+ Fluent::Test.setup
11
+ @driver = nil
12
+ end
13
+
14
+ def driver(tag='myindex.mytype.insert.id', conf='')
15
+ @driver ||= Fluent::Test::BufferedOutputTestDriver.new(Fluent::MysqlReplicatorElasticsearchOutput, tag).configure(conf)
16
+ end
17
+
18
+ def sample_record
19
+ {'age' => 26, 'request_id' => '42'}
20
+ end
21
+
22
+ def stub_elastic(url="http://localhost:9200/_bulk")
23
+ stub_request(:post, url).with do |req|
24
+ @content_type = req.headers["Content-Type"]
25
+ @index_cmds = req.body.split("\n").map {|r| JSON.parse(r) }
26
+ end
27
+ end
28
+
29
+ def stub_elastic_unavailable(url="http://localhost:9200/_bulk")
30
+ stub_request(:post, url).to_return(:status => [503, "Service Unavailable"])
31
+ end
32
+
33
+ def test_wrties_with_proper_content_type
34
+ stub_elastic
35
+ driver.emit(sample_record)
36
+ driver.run
37
+ assert_equal("application/json; charset=utf-8", @content_type)
38
+ end
39
+
40
+ def test_writes_to_speficied_index
41
+ driver.configure("index_name myindex\n")
42
+ stub_elastic
43
+ driver.emit(sample_record)
44
+ driver.run
45
+ assert_equal('myindex', index_cmds.first['index']['_index'])
46
+ end
47
+
48
+ def test_writes_to_speficied_type
49
+ driver.configure("type_name mytype\n")
50
+ stub_elastic
51
+ driver.emit(sample_record)
52
+ driver.run
53
+ assert_equal('mytype', index_cmds.first['index']['_type'])
54
+ end
55
+
56
+ def test_writes_to_speficied_host
57
+ driver.configure("host 192.168.33.50\n")
58
+ elastic_request = stub_elastic("http://192.168.33.50:9200/_bulk")
59
+ driver.emit(sample_record)
60
+ driver.run
61
+ assert_requested(elastic_request)
62
+ end
63
+
64
+ def test_writes_to_speficied_port
65
+ driver.configure("port 9201\n")
66
+ elastic_request = stub_elastic("http://localhost:9201/_bulk")
67
+ driver.emit(sample_record)
68
+ driver.run
69
+ assert_requested(elastic_request)
70
+ end
71
+
72
+ def test_makes_bulk_request
73
+ stub_elastic
74
+ driver.emit(sample_record)
75
+ driver.emit(sample_record.merge('age' => 27))
76
+ driver.run
77
+ assert_equal(4, index_cmds.count)
78
+ end
79
+
80
+ def test_all_records_are_preserved_in_bulk
81
+ stub_elastic
82
+ driver.emit(sample_record)
83
+ driver.emit(sample_record.merge('age' => 27))
84
+ driver.run
85
+ assert_equal(26, index_cmds[1]['age'])
86
+ assert_equal(27, index_cmds[3]['age'])
87
+ end
88
+
89
+
90
+ def test_doesnt_add_logstash_timestamp_by_default
91
+ stub_elastic
92
+ driver.emit(sample_record)
93
+ driver.run
94
+ assert_nil(index_cmds[1]['@timestamp'])
95
+ end
96
+
97
+
98
+ def test_doesnt_add_tag_key_by_default
99
+ stub_elastic
100
+ driver.emit(sample_record)
101
+ driver.run
102
+ assert_nil(index_cmds[1]['tag'])
103
+ end
104
+
105
+ def test_doesnt_add_id_key_if_missing_when_configured
106
+ driver.configure("id_key another_request_id\n")
107
+ stub_elastic
108
+ driver.emit(sample_record)
109
+ driver.run
110
+ assert(!index_cmds[0]['index'].has_key?('_id'))
111
+ end
112
+
113
+ def test_adds_id_key_when_not_configured
114
+ stub_elastic
115
+ driver.emit(sample_record)
116
+ driver.run
117
+ assert(!index_cmds[0]['index'].has_key?('_id'))
118
+ end
119
+
120
+ def test_request_error
121
+ stub_elastic_unavailable
122
+ driver.emit(sample_record)
123
+ assert_raise(Net::HTTPFatalError) {
124
+ driver.run
125
+ }
126
+ end
127
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: webmock
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: fluentd
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -75,10 +91,12 @@ files:
75
91
  - fluent-plugin-mysql-replicator.gemspec
76
92
  - lib/fluent/plugin/in_mysql_replicator.rb
77
93
  - lib/fluent/plugin/in_mysql_replicator_multi.rb
94
+ - lib/fluent/plugin/out_mysql_replicator_elasticsearch.rb
78
95
  - setup_mysql_replicator_multi.sql
79
96
  - test/helper.rb
80
97
  - test/plugin/test_in_mysql_replicator.rb
81
98
  - test/plugin/test_in_mysql_replicator_multi.rb
99
+ - test/plugin/test_out_mysql_replicator_elasticsearch.rb
82
100
  homepage: https://github.com/y-ken/fluent-plugin-mysql-replicator
83
101
  licenses: []
84
102
  post_install_message:
@@ -102,9 +120,11 @@ rubyforge_project:
102
120
  rubygems_version: 1.8.23
103
121
  signing_key:
104
122
  specification_version: 3
105
- summary: Fluentd input plugin to track insert/update/delete event from MySQL databases.
106
- It's comming support multiple table replication to another RDB/noSQL like Elasticsearch.
123
+ summary: Fluentd input plugin to track insert/update/delete event from MySQL database
124
+ server. Not only that, it could multiple table replication into Elasticsearch nodes.
125
+ It's comming support replicate to another RDB/noSQL.
107
126
  test_files:
108
127
  - test/helper.rb
109
128
  - test/plugin/test_in_mysql_replicator.rb
110
129
  - test/plugin/test_in_mysql_replicator_multi.rb
130
+ - test/plugin/test_out_mysql_replicator_elasticsearch.rb