fluent-plugin-mysql-replicator 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -14,7 +14,15 @@ gem install fluent-plugin-mysql-replicator
14
14
  /usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-mysql-replicator
15
15
  `````
16
16
 
17
- ## Tutorial
17
+ ## Tutorial for Quickstart
18
+
19
+ It is useful for these purpose.
20
+
21
+ * try it on this plugin.
22
+ * replicate small record under a millons table.
23
+
24
+ **Note:**
25
+ On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
18
26
 
19
27
  #### configuration
20
28
 
@@ -57,9 +65,72 @@ $ tail -f /var/log/td-agent/td-agent.log
57
65
  2013-11-25 18:22:45 +0900 replicator.delete: {"id":"1"}
58
66
  `````
59
67
 
60
- ## Performance
68
+ ## Tutorial for Production
69
+
70
+ It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
71
+ This architecture is storing hash table in mysql management table instead of ruby internal memory.
72
+
73
+ **Note:**
74
+ On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
75
+
76
+ #### prepare
77
+
78
+ * create database and tables.
79
+ * add replicator configuration.
80
+
81
+ ```
82
+ $ cat setup_mysql_replicator_multi.sql
83
+ CREATE DATABASE replicator_manager;
84
+ USE replicator_manager;
85
+
86
+ CREATE TABLE `hash_tables` (
87
+ `id` int(11) NOT NULL AUTO_INCREMENT,
88
+ `setting_name` varchar(255) NOT NULL,
89
+ `setting_query_pk` int(11) NOT NULL,
90
+ `setting_query_hash` varchar(255) NOT NULL,
91
+ PRIMARY KEY (`id`),
92
+ UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
93
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
94
+
95
+ CREATE TABLE `settings` (
96
+ `id` int(11) NOT NULL AUTO_INCREMENT,
97
+ `name` varchar(255) NOT NULL,
98
+ `host` varchar(255) NOT NULL DEFAULT 'localhost',
99
+ `port` int(11) NOT NULL DEFAULT '3306',
100
+ `username` varchar(255) NOT NULL,
101
+ `password` varchar(255) NOT NULL,
102
+ `database` varchar(255) NOT NULL,
103
+ `query` TEXT NOT NULL,
104
+ `interval` int(11) NOT NULL,
105
+ `tag` varchar(255) NOT NULL,
106
+ `primary_key` varchar(11) DEFAULT 'id',
107
+ `enable_delete` int(11) DEFAULT '1',
108
+ PRIMARY KEY (`id`),
109
+ UNIQUE KEY `name` (`name`)
110
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
111
+ ```
112
+
113
+ ```
114
+ $ mysql
115
+ mysql> source /path/to/setup_mysql_replicator_multi.sql
116
+ mysql> insert into source ...snip...;
117
+ ```
61
118
 
62
- On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
119
+ #### configuration
120
+
121
+ `````
122
+ <source>
123
+ type mysql_replicator_multi
124
+ manager_host localhost
125
+ manager_username your_mysql_user
126
+ manager_password your_mysql_password
127
+ manager_database replicator_manager
128
+ </source>
129
+
130
+ <match replicator.*>
131
+ type stdout
132
+ </match>
133
+ `````
63
134
 
64
135
  ## TODO
65
136
 
@@ -1,11 +1,11 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "0.0.2"
4
+ s.version = "0.1.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
8
- s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server.}
8
+ s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL databases. It will support multiple table replication to another RDB/noSQL like Elasticsearch.}
9
9
 
10
10
  s.files = `git ls-files`.split("\n")
11
11
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -18,6 +18,7 @@ module Fluent
18
18
  config_param :tag, :string
19
19
  config_param :query, :string
20
20
  config_param :primary_key, :string, :default => 'id'
21
+ config_param :enable_delete, :bool, :default => 'yes'
21
22
 
22
23
  def configure(conf)
23
24
  super
@@ -59,10 +60,12 @@ module Fluent
59
60
  table_hash[row[@primary_key]] = current_hash
60
61
  end
61
62
  ids = current_ids
62
- deleted_ids = previous_ids - current_ids
63
- if deleted_ids.count > 0
64
- hash_delete_by_list(table_hash, deleted_ids)
65
- deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
63
+ unless @enable_delete
64
+ deleted_ids = previous_ids - current_ids
65
+ if deleted_ids.count > 0
66
+ hash_delete_by_list(table_hash, deleted_ids)
67
+ deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
68
+ end
66
69
  end
67
70
  sleep @interval
68
71
  end
@@ -0,0 +1,201 @@
1
+ module Fluent
2
+ class MysqlReplicatorMultiInput < Fluent::Input
3
+ Plugin.register_input('mysql_replicator_multi', self)
4
+
5
+ def initialize
6
+ require 'mysql2'
7
+ require 'digest/sha1'
8
+ super
9
+ end
10
+
11
+ config_param :manager_host, :string, :default => 'localhost'
12
+ config_param :manager_port, :integer, :default => 3306
13
+ config_param :manager_username, :string, :default => nil
14
+ config_param :manager_password, :string, :default => ''
15
+ config_param :manager_database, :string, :default => 'replicator_manager'
16
+
17
+ def configure(conf)
18
+ super
19
+ @reconnect_interval = Config.time_value('10sec')
20
+ end
21
+
22
+ def start
23
+ begin
24
+ @threads = []
25
+ @mutex = Mutex.new
26
+ get_settings.each do |config|
27
+ @threads << Thread.new {
28
+ poll(config)
29
+ }
30
+ end
31
+ $log.error "mysql_replicator_multi: stop working due to empty configuration" if @threads.empty?
32
+ rescue StandardError => e
33
+ $log.error "error: #{e.message}"
34
+ $log.error e.backtrace.join("\n")
35
+ end
36
+ end
37
+
38
+ def shutdown
39
+ @threads.each do |thread|
40
+ Thread.kill(thread)
41
+ end
42
+ end
43
+
44
+ def get_settings
45
+ manager_db = get_manager_connection
46
+ settings = []
47
+ query = "SELECT * FROM settings"
48
+ manager_db.query(query).each do |row|
49
+ settings << row
50
+ end
51
+ return settings
52
+ end
53
+
54
+ def poll(config)
55
+ begin
56
+ @manager_db = get_manager_connection
57
+ masked_config = config.map {|k,v| (k == 'password') ? v.to_s.gsub(/./, '*') : v}
58
+ @mutex.synchronize {
59
+ $log.info "mysql_replicator_multi: polling start. :config=>#{masked_config}"
60
+ }
61
+ primary_key = config['primary_key']
62
+ previous_id = current_id = 0
63
+ loop do
64
+ db = get_origin_connection(config)
65
+ db.query(config['query']).each do |row|
66
+ @mutex.lock
67
+ current_id = row[primary_key]
68
+ detect_insert_update(config, row)
69
+ detect_delete(config, current_id, previous_id)
70
+ previous_id = current_id
71
+ @mutex.unlock
72
+ end
73
+ db.close
74
+ sleep config['interval']
75
+ end
76
+ rescue StandardError => e
77
+ $log.error "error: #{e.message}"
78
+ $log.error e.backtrace.join("\n")
79
+ end
80
+ end
81
+
82
+ def detect_insert_update(config, row)
83
+ primary_key = config['primary_key']
84
+ current_id = row[primary_key]
85
+ stored_hash = get_stored_hash(config['name'], current_id)
86
+ current_hash = Digest::SHA1.hexdigest(row.flatten.join)
87
+
88
+ event = nil
89
+ if stored_hash.empty?
90
+ event = :insert
91
+ elsif stored_hash != current_hash
92
+ event = :update
93
+ end
94
+ unless event.nil?
95
+ emit_record("#{config['tag']}.#{event.to_s}", row)
96
+ update_hashtable({:event => event, :ids => current_id, :setting_name => config['name'], :hash => current_hash})
97
+ end
98
+ end
99
+
100
+ def get_stored_hash(setting_name, id)
101
+ query = "SELECT setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
102
+ @manager_db.query(query).each do |row|
103
+ return row['setting_query_hash']
104
+ end
105
+ end
106
+
107
+ def detect_delete(config, current_id, previous_id)
108
+ return unless config['enable_delete'] == 1
109
+ deleted_ids = collect_gap_ids(config['name'], current_id, previous_id)
110
+ unless deleted_ids.empty?
111
+ event = :delete
112
+ deleted_ids.each do |id|
113
+ emit_record("#{config['tag']}.#{event.to_s}", {config['primary_key'] => id})
114
+ end
115
+ update_hashtable({:event => event, :ids => deleted_ids, :setting_name => config['name']})
116
+ end
117
+ end
118
+
119
+ def collect_gap_ids(setting_name, current_id, previous_id)
120
+ if (current_id - previous_id) > 1
121
+ query = "SELECT setting_query_pk FROM hash_tables
122
+ WHERE setting_name = '#{setting_name}'
123
+ AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
124
+ elsif previous_id > current_id
125
+ query = "SELECT setting_query_pk FROM hash_tables
126
+ WHERE setting_name = '#{setting_name}'
127
+ AND setting_query_pk > #{previous_id.to_i}"
128
+ elsif previous_id == current_id
129
+ query = "SELECT setting_query_pk FROM hash_tables
130
+ WHERE setting_name = '#{setting_name}'
131
+ AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
132
+ end
133
+ ids = Array.new
134
+ unless query.nil?
135
+ @manager_db.query(query).each do |row|
136
+ ids << row['setting_query_pk']
137
+ end
138
+ end
139
+ return ids
140
+ end
141
+
142
+ def update_hashtable(opts)
143
+ ids = opts[:ids].is_a?(Integer) ? [opts[:ids]] : opts[:ids]
144
+ ids.each do |id|
145
+ case opts[:event]
146
+ when :insert
147
+ query = "insert into hash_tables (setting_name,setting_query_pk,setting_query_hash) values('#{opts[:setting_name]}','#{id}','#{opts[:hash]}')"
148
+ when :update
149
+ query = "update hash_tables set setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
150
+ when :delete
151
+ query = "delete from hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
152
+ end
153
+ @manager_db.query(query) unless query.nil?
154
+ end
155
+ end
156
+
157
+ def emit_record(tag, record)
158
+ Engine.emit(tag, Engine.now, record)
159
+ end
160
+
161
+ def get_manager_connection
162
+ begin
163
+ return Mysql2::Client.new(
164
+ :host => @manager_host,
165
+ :port => @manager_port,
166
+ :username => @manager_username,
167
+ :password => @manager_password,
168
+ :database => @manager_database,
169
+ :encoding => 'utf8',
170
+ :reconnect => true,
171
+ :stream => false,
172
+ :cache_rows => false
173
+ )
174
+ rescue Exception => e
175
+ $log.warn "mysql_replicator_multi: #{e}"
176
+ sleep @reconnect_interval
177
+ retry
178
+ end
179
+ end
180
+
181
+ def get_origin_connection(config)
182
+ begin
183
+ return Mysql2::Client.new(
184
+ :host => config['host'],
185
+ :port => config['manager_port'],
186
+ :username => config['username'],
187
+ :password => config['password'],
188
+ :database => config['database'],
189
+ :encoding => 'utf8',
190
+ :reconnect => true,
191
+ :stream => true,
192
+ :cache_rows => false
193
+ )
194
+ rescue Exception => e
195
+ $log.warn "mysql_replicator_multi: #{e}"
196
+ sleep @reconnect_interval
197
+ retry
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,28 @@
1
+ CREATE DATABASE replicator_manager;
2
+ USE replicator_manager;
3
+
4
+ CREATE TABLE `hash_tables` (
5
+ `id` int(11) NOT NULL AUTO_INCREMENT,
6
+ `setting_name` varchar(255) NOT NULL,
7
+ `setting_query_pk` int(11) NOT NULL,
8
+ `setting_query_hash` varchar(255) NOT NULL,
9
+ PRIMARY KEY (`id`),
10
+ UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
11
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
12
+
13
+ CREATE TABLE `settings` (
14
+ `id` int(11) NOT NULL AUTO_INCREMENT,
15
+ `name` varchar(255) NOT NULL,
16
+ `host` varchar(255) NOT NULL DEFAULT 'localhost',
17
+ `port` int(11) NOT NULL DEFAULT '3306',
18
+ `username` varchar(255) NOT NULL,
19
+ `password` varchar(255) NOT NULL,
20
+ `database` varchar(255) NOT NULL,
21
+ `query` TEXT NOT NULL,
22
+ `interval` int(11) NOT NULL,
23
+ `tag` varchar(255) NOT NULL,
24
+ `primary_key` varchar(11) DEFAULT 'id',
25
+ `enable_delete` int(11) DEFAULT '1',
26
+ PRIMARY KEY (`id`),
27
+ UNIQUE KEY `name` (`name`)
28
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
@@ -23,6 +23,7 @@ unless ENV.has_key?('VERBOSE')
23
23
  end
24
24
 
25
25
  require 'fluent/plugin/in_mysql_replicator'
26
+ require 'fluent/plugin/in_mysql_replicator_multi'
26
27
 
27
28
  class Test::Unit::TestCase
28
29
  end
@@ -28,11 +28,13 @@ class MysqlReplicatorInputTest < Test::Unit::TestCase
28
28
  interval 30
29
29
  tag input.mysql
30
30
  query SELECT id, text from search_text
31
+ enable_delete no
31
32
  ]
32
33
  d.instance.inspect
33
34
  assert_equal 'localhost', d.instance.host
34
35
  assert_equal 3306, d.instance.port
35
36
  assert_equal 30, d.instance.interval
36
37
  assert_equal 'input.mysql', d.instance.tag
38
+ assert_equal false, d.instance.enable_delete
37
39
  end
38
40
  end
@@ -0,0 +1,26 @@
1
+ require 'helper'
2
+
3
+ class MysqlReplicatorMultiInputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ manager_host localhost
10
+ manager_port 3306
11
+ manager_username foo
12
+ manager_password bar
13
+ ]
14
+
15
+ def create_driver(conf=CONFIG,tag='test')
16
+ Fluent::Test::OutputTestDriver.new(Fluent::MysqlReplicatorMultiInput, tag).configure(conf)
17
+ end
18
+
19
+ def test_configure
20
+ d = create_driver(CONFIG)
21
+ d.instance.inspect
22
+ assert_equal 'localhost', d.instance.manager_host
23
+ assert_equal 3306, d.instance.manager_port
24
+ assert_equal 'replicator_manager', d.instance.manager_database
25
+ end
26
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-06 00:00:00.000000000 Z
12
+ date: 2013-12-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -74,8 +74,11 @@ files:
74
74
  - Rakefile
75
75
  - fluent-plugin-mysql-replicator.gemspec
76
76
  - lib/fluent/plugin/in_mysql_replicator.rb
77
+ - lib/fluent/plugin/in_mysql_replicator_multi.rb
78
+ - setup_mysql_replicator_multi.sql
77
79
  - test/helper.rb
78
80
  - test/plugin/test_in_mysql_replicator.rb
81
+ - test/plugin/test_in_mysql_replicator_multi.rb
79
82
  homepage: https://github.com/y-ken/fluent-plugin-mysql-replicator
80
83
  licenses: []
81
84
  post_install_message:
@@ -99,8 +102,9 @@ rubyforge_project:
99
102
  rubygems_version: 1.8.23
100
103
  signing_key:
101
104
  specification_version: 3
102
- summary: Fluentd input plugin to track insert/update/delete event from MySQL database
103
- server.
105
+ summary: Fluentd input plugin to track insert/update/delete event from MySQL databases.
106
+ It will support multiple table replication to another RDB/noSQL like Elasticsearch.
104
107
  test_files:
105
108
  - test/helper.rb
106
109
  - test/plugin/test_in_mysql_replicator.rb
110
+ - test/plugin/test_in_mysql_replicator_multi.rb