fluent-plugin-mysql-replicator 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- Fluentd input plugin to track insert/update/delete event from MySQL database server.
5
+ Fluentd input plugin to track insert/update/delete event from MySQL database server.
6
+ Not only that, it could multiple table replication into Elasticsearch nodes.
7
+ It's comming support replicate to another RDB/noSQL.
6
8
 
7
9
  ## Installation
8
10
 
@@ -24,26 +26,56 @@ It is useful for these purpose.
24
26
  **Note:**
25
27
  On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
26
28
 
27
- #### configuration
29
+ ### configuration
28
30
 
29
31
  `````
30
32
  <source>
31
33
  type mysql_replicator
34
+
35
+ # Set connection settings for replicate source.
32
36
  host localhost
33
37
  username your_mysql_user
34
38
  password your_mysql_password
35
39
  database myweb
36
- interval 5s
37
- tag replicator
38
- query SELECT id, text from search_test
40
+
41
+ # Set replicate query configuration.
42
+ query SELECT id, text, updated_at from search_test;
43
+ primary_key id # specify unique key (default: id)
44
+ interval 10s # execute query interval (default: 1m)
45
+
46
+ # Enable detect deletion event not only insert/update events. (default: yes)
47
+ # It is useful to use `enable_delete no` that keep following recently updated record with this query.
48
+ # `SELECT * FROM search_test WHERE DATE_ADD(updated_at, INTERVAL 5 MINUTE) > NOW();`
49
+ enable_delete yes
50
+
51
+ # Format output tag for each events. Placeholders usage as described below.
52
+ tag replicator.myweb.search_test.${event}.${primary_key}
53
+ # ${event} : the variation of row event type by insert/update/delete.
54
+ # ${primary_key} : the value of `replicator_manager.settings.primary_key` in manager table.
39
55
  </source>
40
56
 
41
57
  <match replicator.*>
42
- type stdout
58
+ type copy
59
+ <store>
60
+ type stdout
61
+ </store>
62
+ <store>
63
+ type mysql_replicator_elasticsearch
64
+
65
+ # Set Elasticsearch connection.
66
+ host localhost
67
+ port 9200
68
+
69
+ # Set Elasticsearch index, type, and unique id (primary_key) from tag.
70
+ tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+).(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
71
+
72
+ # Set frequency of sending bulk request to Elasticsearch node.
73
+ flush_interval 5s
74
+ </store>
43
75
  </match>
44
76
  `````
45
77
 
46
- #### sample query
78
+ ### sample query
47
79
 
48
80
  `````
49
81
  $ mysql -e "create database myweb"
@@ -56,13 +88,13 @@ $ sleep 10
56
88
  $ mysql myweb -e "delete from search_test where text='bbb'"
57
89
  `````
58
90
 
59
- #### result
91
+ ### result
60
92
 
61
93
  `````
62
94
  $ tail -f /var/log/td-agent/td-agent.log
63
- 2013-11-25 18:22:25 +0900 replicator.insert: {"id":"1","text":"aaa"}
64
- 2013-11-25 18:22:35 +0900 replicator.update: {"id":"1","text":"bbb"}
65
- 2013-11-25 18:22:45 +0900 replicator.delete: {"id":"1"}
95
+ 2013-11-25 18:22:25 +0900 replicator.insert.id: {"id":"1","text":"aaa"}
96
+ 2013-11-25 18:22:35 +0900 replicator.update.id: {"id":"1","text":"bbb"}
97
+ 2013-11-25 18:22:45 +0900 replicator.delete.id: {"id":"1"}
66
98
  `````
67
99
 
68
100
  ## Tutorial for Production
@@ -73,11 +105,15 @@ This architecture is storing hash table in mysql management table instead of rub
73
105
  **Note:**
74
106
  On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
75
107
 
76
- #### prepare
108
+ ### prepare
109
+
110
+ It has done with follwing two steps.
77
111
 
78
112
  * create database and tables.
79
113
  * add replicator configuration.
80
114
 
115
+ ##### create database and tables.
116
+
81
117
  ```
82
118
  $ cat setup_mysql_replicator_multi.sql
83
119
  CREATE DATABASE replicator_manager;
@@ -110,13 +146,28 @@ CREATE TABLE `settings` (
110
146
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
111
147
  ```
112
148
 
149
+ ##### add replicator configuration.
150
+
113
151
  ```
114
- $ mysql
152
+ $ mysql -umysqluser -p
153
+
154
+ -- For the first time, load schema.
115
155
  mysql> source /path/to/setup_mysql_replicator_multi.sql
116
- mysql> insert into source ...snip...;
156
+
157
+ -- Add replicate source connection and query settings like below.
158
+ mysql> INSERT INTO `settings`
159
+ (`id`, `is_active`, `name`, `host`, `port`, `username`, `password`, `database`, `query`, `interval`, `primary_key`, `enable_delete`)
160
+ VALUES
161
+ (NULL, 1, 'mydb.mytable', '192.168.100.221', 3306, 'mysqluser', 'mysqlpassword', 'mydb', 'SELECT id, text from mytable;', 5, 'id', 1);
117
162
  ```
118
163
 
119
- #### configuration
164
+ it is a sample which you have inserted row.
165
+
166
+ | id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete |
167
+ |----|-----------|--------------|-----------------|------|-----------|---------------|----------|------------------------------|----------|-------------|---------------|
168
+ | 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 |
169
+
170
+ ### configuration
120
171
 
121
172
  `````
122
173
  <source>
@@ -136,16 +187,27 @@ mysql> insert into source ...snip...;
136
187
  </source>
137
188
 
138
189
  <match replicator.**>
139
- type stdout
190
+ type mysql_replicator_elasticsearch
191
+
192
+ # Set Elasticsearch connection.
193
+ host localhost
194
+ port 9200
195
+
196
+ # Set Elasticsearch index, type, and unique id (primary_key) from tag.
197
+ tag_format (?<index_name>[^\.]+)\.(?<type_name>[^\.]+).(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$
198
+
199
+ # Set frequency of sending bulk request to Elasticsearch node.
200
+ flush_interval 5s
140
201
  </match>
141
202
  `````
142
203
 
143
204
  ## TODO
144
205
 
145
- * support string type primary_key.
146
- * support reload setting on demand.
206
+ Pull requests are very welcome like below!!
147
207
 
148
- Pull requests are very welcome!!
208
+ * more tests.
209
+ * support string type of primary_key.
210
+ * support reload setting on demand.
149
211
 
150
212
  ## Copyright
151
213
 
@@ -1,11 +1,11 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "0.1.1"
4
+ s.version = "0.2.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
8
- s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL databases. It's comming support multiple table replication to another RDB/noSQL like Elasticsearch.}
8
+ s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server. Not only that, it could multiple table replication into Elasticsearch nodes. It's comming support replicate to another RDB/noSQL.}
9
9
 
10
10
  s.files = `git ls-files`.split("\n")
11
11
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -13,6 +13,8 @@ Gem::Specification.new do |s|
13
13
  s.require_paths = ["lib"]
14
14
 
15
15
  s.add_development_dependency "rake"
16
+ s.add_development_dependency "webmock"
17
+
16
18
  s.add_runtime_dependency "fluentd"
17
19
  s.add_runtime_dependency "mysql2"
18
20
  end
@@ -14,16 +14,21 @@ module Fluent
14
14
  config_param :password, :string, :default => nil
15
15
  config_param :database, :string, :default => nil
16
16
  config_param :encoding, :string, :default => 'utf8'
17
- config_param :interval, :string, :default => '1m'
18
- config_param :tag, :string
19
17
  config_param :query, :string
20
18
  config_param :primary_key, :string, :default => 'id'
21
- config_param :enable_delete, :bool, :default => 'yes'
19
+ config_param :interval, :string, :default => '1m'
20
+ config_param :enable_delete, :bool, :default => true
21
+ config_param :tag, :string, :default => nil
22
22
 
23
23
  def configure(conf)
24
24
  super
25
25
  @interval = Config.time_value(@interval)
26
- $log.info "adding mysql_replicator job: [#{@query}] interval: #{@interval}sec"
26
+
27
+ if @tag.nil?
28
+ raise Fluent::ConfigError, "mysql_replicator: missing 'tag' parameter. Please add following line into config like 'tag replicator.mydatabase.mytable.${event}.${primary_key}'"
29
+ end
30
+
31
+ $log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>[#{@query}] :interval=>#{@interval}sec :enable_delete=>#{enable_delete}"
27
32
  end
28
33
 
29
34
  def start
@@ -54,18 +59,23 @@ module Fluent
54
59
  current_hash = Digest::SHA1.hexdigest(row.flatten.join)
55
60
  row.each {|k, v| row[k] = v.to_s if v.is_a? Time}
56
61
  if !table_hash.include?(row[@primary_key])
57
- emit_record(:insert, row)
62
+ tag = format_tag(@tag, {:event => :insert})
63
+ emit_record(tag, row)
58
64
  elsif table_hash[row[@primary_key]] != current_hash
59
- emit_record(:update, row)
65
+ tag = format_tag(@tag, {:event => :update})
66
+ emit_record(tag, row)
60
67
  end
61
68
  table_hash[row[@primary_key]] = current_hash
62
69
  end
63
70
  ids = current_ids
64
- unless @enable_delete
71
+ if @enable_delete
65
72
  deleted_ids = previous_ids - current_ids
66
73
  if deleted_ids.count > 0
67
74
  hash_delete_by_list(table_hash, deleted_ids)
68
- deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
75
+ deleted_ids.each do |id|
76
+ tag = format_tag(@tag, {:event => :delete})
77
+ emit_record(tag, {@primary_key => id})
78
+ end
69
79
  end
70
80
  end
71
81
  sleep @interval
@@ -76,15 +86,22 @@ module Fluent
76
86
  deleted_keys.each{|k| hash.delete(k)}
77
87
  end
78
88
 
79
- def emit_record(type, record)
80
- tag = "#{@tag}.#{type.to_s}"
89
+ def format_tag(tag, param)
90
+ pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key}
91
+ tag.gsub(/\${[a-z_]+(\[[0-9]+\])?}/, pattern) do
92
+ $log.warn "mysql_replicator: missing placeholder. tag:#{tag} placeholder:#{$1}" unless pattern.include?($1)
93
+ pattern[$1]
94
+ end
95
+ end
96
+
97
+ def emit_record(tag, record)
81
98
  Engine.emit(tag, Engine.now, record)
82
99
  end
83
100
 
84
101
  def query(query)
85
102
  @mysql ||= get_connection
86
103
  begin
87
- return @mysql.query(query, :cast => false, :cache_rows => false)
104
+ return @mysql.query(query)
88
105
  rescue Exception => e
89
106
  $log.warn "mysql_replicator: #{e}"
90
107
  sleep @interval
@@ -0,0 +1,67 @@
1
+ require 'net/http'
2
+ require 'date'
3
+
4
+ class Fluent::MysqlReplicatorElasticsearchOutput < Fluent::BufferedOutput
5
+ Fluent::Plugin.register_output('mysql_replicator_elasticsearch', self)
6
+
7
+ config_param :host, :string, :default => 'localhost'
8
+ config_param :port, :integer, :default => 9200
9
+ config_param :tag_format, :string, :default => nil
10
+
11
+ DEFAULT_TAG_FORMAT = /(?<index_name>[^\.]+)\.(?<type_name>[^\.]+).(?<event>[^\.]+)\.(?<primary_key>[^\.]+)$/
12
+
13
+ def initialize
14
+ super
15
+ end
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ if @tag_format.nil? || @tag_format == DEFAULT_TAG_FORMAT
21
+ @tag_format = DEFAULT_TAG_FORMAT
22
+ else
23
+ @tag_format = Regexp.new(conf['tag_format'])
24
+ end
25
+ end
26
+
27
+ def start
28
+ super
29
+ end
30
+
31
+ def format(tag, time, record)
32
+ [tag, time, record].to_msgpack
33
+ end
34
+
35
+ def shutdown
36
+ super
37
+ end
38
+
39
+ def write(chunk)
40
+ bulk_message = []
41
+
42
+ chunk.msgpack_each do |tag, time, record|
43
+ tag_parts = tag.match(@tag_format)
44
+ target_index = tag_parts['index_name']
45
+ target_type = tag_parts['type_name']
46
+ id_key = tag_parts['primary_key']
47
+
48
+ if tag_parts['event'] == 'delete'
49
+ meta = { "delete" => {"_index" => target_index, "_type" => target_type, "_id" => record[id_key]} }
50
+ bulk_message << Yajl::Encoder.encode(meta)
51
+ else
52
+ meta = { "index" => {"_index" => target_index, "_type" => target_type} }
53
+ if id_key && record[id_key]
54
+ meta['index']['_id'] = record[id_key]
55
+ end
56
+ bulk_message << Yajl::Encoder.encode(meta)
57
+ bulk_message << Yajl::Encoder.encode(record)
58
+ end
59
+ end
60
+ bulk_message << ""
61
+
62
+ http = Net::HTTP.new(@host, @port.to_i)
63
+ request = Net::HTTP::Post.new('/_bulk', {'content-type' => 'application/json; charset=utf-8'})
64
+ request.body = bulk_message.join("\n")
65
+ http.request(request).value
66
+ end
67
+ end
data/test/helper.rb CHANGED
@@ -24,6 +24,7 @@ end
24
24
 
25
25
  require 'fluent/plugin/in_mysql_replicator'
26
26
  require 'fluent/plugin/in_mysql_replicator_multi'
27
+ require 'fluent/plugin/out_mysql_replicator_elasticsearch'
27
28
 
28
29
  class Test::Unit::TestCase
29
30
  end
@@ -0,0 +1,127 @@
1
+ require 'helper'
2
+ require 'webmock/test_unit'
3
+
4
+ WebMock.disable_net_connect!
5
+
6
+ class MysqlReplicatorElasticsearchOutput < Test::Unit::TestCase
7
+ attr_accessor :index_cmds, :content_type
8
+
9
+ def setup
10
+ Fluent::Test.setup
11
+ @driver = nil
12
+ end
13
+
14
+ def driver(tag='myindex.mytype.insert.id', conf='')
15
+ @driver ||= Fluent::Test::BufferedOutputTestDriver.new(Fluent::MysqlReplicatorElasticsearchOutput, tag).configure(conf)
16
+ end
17
+
18
+ def sample_record
19
+ {'age' => 26, 'request_id' => '42'}
20
+ end
21
+
22
+ def stub_elastic(url="http://localhost:9200/_bulk")
23
+ stub_request(:post, url).with do |req|
24
+ @content_type = req.headers["Content-Type"]
25
+ @index_cmds = req.body.split("\n").map {|r| JSON.parse(r) }
26
+ end
27
+ end
28
+
29
+ def stub_elastic_unavailable(url="http://localhost:9200/_bulk")
30
+ stub_request(:post, url).to_return(:status => [503, "Service Unavailable"])
31
+ end
32
+
33
+ def test_wrties_with_proper_content_type
34
+ stub_elastic
35
+ driver.emit(sample_record)
36
+ driver.run
37
+ assert_equal("application/json; charset=utf-8", @content_type)
38
+ end
39
+
40
+ def test_writes_to_speficied_index
41
+ driver.configure("index_name myindex\n")
42
+ stub_elastic
43
+ driver.emit(sample_record)
44
+ driver.run
45
+ assert_equal('myindex', index_cmds.first['index']['_index'])
46
+ end
47
+
48
+ def test_writes_to_speficied_type
49
+ driver.configure("type_name mytype\n")
50
+ stub_elastic
51
+ driver.emit(sample_record)
52
+ driver.run
53
+ assert_equal('mytype', index_cmds.first['index']['_type'])
54
+ end
55
+
56
+ def test_writes_to_speficied_host
57
+ driver.configure("host 192.168.33.50\n")
58
+ elastic_request = stub_elastic("http://192.168.33.50:9200/_bulk")
59
+ driver.emit(sample_record)
60
+ driver.run
61
+ assert_requested(elastic_request)
62
+ end
63
+
64
+ def test_writes_to_speficied_port
65
+ driver.configure("port 9201\n")
66
+ elastic_request = stub_elastic("http://localhost:9201/_bulk")
67
+ driver.emit(sample_record)
68
+ driver.run
69
+ assert_requested(elastic_request)
70
+ end
71
+
72
+ def test_makes_bulk_request
73
+ stub_elastic
74
+ driver.emit(sample_record)
75
+ driver.emit(sample_record.merge('age' => 27))
76
+ driver.run
77
+ assert_equal(4, index_cmds.count)
78
+ end
79
+
80
+ def test_all_records_are_preserved_in_bulk
81
+ stub_elastic
82
+ driver.emit(sample_record)
83
+ driver.emit(sample_record.merge('age' => 27))
84
+ driver.run
85
+ assert_equal(26, index_cmds[1]['age'])
86
+ assert_equal(27, index_cmds[3]['age'])
87
+ end
88
+
89
+
90
+ def test_doesnt_add_logstash_timestamp_by_default
91
+ stub_elastic
92
+ driver.emit(sample_record)
93
+ driver.run
94
+ assert_nil(index_cmds[1]['@timestamp'])
95
+ end
96
+
97
+
98
+ def test_doesnt_add_tag_key_by_default
99
+ stub_elastic
100
+ driver.emit(sample_record)
101
+ driver.run
102
+ assert_nil(index_cmds[1]['tag'])
103
+ end
104
+
105
+ def test_doesnt_add_id_key_if_missing_when_configured
106
+ driver.configure("id_key another_request_id\n")
107
+ stub_elastic
108
+ driver.emit(sample_record)
109
+ driver.run
110
+ assert(!index_cmds[0]['index'].has_key?('_id'))
111
+ end
112
+
113
+ def test_adds_id_key_when_not_configured
114
+ stub_elastic
115
+ driver.emit(sample_record)
116
+ driver.run
117
+ assert(!index_cmds[0]['index'].has_key?('_id'))
118
+ end
119
+
120
+ def test_request_error
121
+ stub_elastic_unavailable
122
+ driver.emit(sample_record)
123
+ assert_raise(Net::HTTPFatalError) {
124
+ driver.run
125
+ }
126
+ end
127
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: webmock
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: fluentd
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -75,10 +91,12 @@ files:
75
91
  - fluent-plugin-mysql-replicator.gemspec
76
92
  - lib/fluent/plugin/in_mysql_replicator.rb
77
93
  - lib/fluent/plugin/in_mysql_replicator_multi.rb
94
+ - lib/fluent/plugin/out_mysql_replicator_elasticsearch.rb
78
95
  - setup_mysql_replicator_multi.sql
79
96
  - test/helper.rb
80
97
  - test/plugin/test_in_mysql_replicator.rb
81
98
  - test/plugin/test_in_mysql_replicator_multi.rb
99
+ - test/plugin/test_out_mysql_replicator_elasticsearch.rb
82
100
  homepage: https://github.com/y-ken/fluent-plugin-mysql-replicator
83
101
  licenses: []
84
102
  post_install_message:
@@ -102,9 +120,11 @@ rubyforge_project:
102
120
  rubygems_version: 1.8.23
103
121
  signing_key:
104
122
  specification_version: 3
105
- summary: Fluentd input plugin to track insert/update/delete event from MySQL databases.
106
- It's comming support multiple table replication to another RDB/noSQL like Elasticsearch.
123
+ summary: Fluentd input plugin to track insert/update/delete event from MySQL database
124
+ server. Not only that, it could multiple table replication into Elasticsearch nodes.
125
+ It's comming support replicate to another RDB/noSQL.
107
126
  test_files:
108
127
  - test/helper.rb
109
128
  - test/plugin/test_in_mysql_replicator.rb
110
129
  - test/plugin/test_in_mysql_replicator_multi.rb
130
+ - test/plugin/test_out_mysql_replicator_elasticsearch.rb