fluent-plugin-mysql-replicator 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -14,7 +14,15 @@ gem install fluent-plugin-mysql-replicator
14
14
  /usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-mysql-replicator
15
15
  `````
16
16
 
17
- ## Tutorial
17
+ ## Tutorial for Quickstart
18
+
19
+ It is useful for these purpose.
20
+
21
+ * try it on this plugin.
22
+ * replicate small record under a millons table.
23
+
24
+ **Note:**
25
+ On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
18
26
 
19
27
  #### configuration
20
28
 
@@ -57,9 +65,72 @@ $ tail -f /var/log/td-agent/td-agent.log
57
65
  2013-11-25 18:22:45 +0900 replicator.delete: {"id":"1"}
58
66
  `````
59
67
 
60
- ## Performance
68
+ ## Tutorial for Production
69
+
70
+ It is very useful to replicate a millions of records and/or multiple tables with multiple threads.
71
+ This architecture is storing hash table in mysql management table instead of ruby internal memory.
72
+
73
+ **Note:**
74
+ On syncing 300 million rows table, it will consume around 20MB of memory with ruby 1.9.3 environment.
75
+
76
+ #### prepare
77
+
78
+ * create database and tables.
79
+ * add replicator configuration.
80
+
81
+ ```
82
+ $ cat setup_mysql_replicator_multi.sql
83
+ CREATE DATABASE replicator_manager;
84
+ USE replicator_manager;
85
+
86
+ CREATE TABLE `hash_tables` (
87
+ `id` int(11) NOT NULL AUTO_INCREMENT,
88
+ `setting_name` varchar(255) NOT NULL,
89
+ `setting_query_pk` int(11) NOT NULL,
90
+ `setting_query_hash` varchar(255) NOT NULL,
91
+ PRIMARY KEY (`id`),
92
+ UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
93
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
94
+
95
+ CREATE TABLE `settings` (
96
+ `id` int(11) NOT NULL AUTO_INCREMENT,
97
+ `name` varchar(255) NOT NULL,
98
+ `host` varchar(255) NOT NULL DEFAULT 'localhost',
99
+ `port` int(11) NOT NULL DEFAULT '3306',
100
+ `username` varchar(255) NOT NULL,
101
+ `password` varchar(255) NOT NULL,
102
+ `database` varchar(255) NOT NULL,
103
+ `query` TEXT NOT NULL,
104
+ `interval` int(11) NOT NULL,
105
+ `tag` varchar(255) NOT NULL,
106
+ `primary_key` varchar(11) DEFAULT 'id',
107
+ `enable_delete` int(11) DEFAULT '1',
108
+ PRIMARY KEY (`id`),
109
+ UNIQUE KEY `name` (`name`)
110
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
111
+ ```
112
+
113
+ ```
114
+ $ mysql
115
+ mysql> source /path/to/setup_mysql_replicator_multi.sql
116
+ mysql> insert into source ...snip...;
117
+ ```
61
118
 
62
- On syncing 300 million rows table, it will consume around 800MB of memory with ruby 1.9.3 environment.
119
+ #### configuration
120
+
121
+ `````
122
+ <source>
123
+ type mysql_replicator_multi
124
+ manager_host localhost
125
+ manager_username your_mysql_user
126
+ manager_password your_mysql_password
127
+ manager_database replicator_manager
128
+ </source>
129
+
130
+ <match replicator.*>
131
+ type stdout
132
+ </match>
133
+ `````
63
134
 
64
135
  ## TODO
65
136
 
@@ -1,11 +1,11 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "0.0.2"
4
+ s.version = "0.1.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
8
- s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL database server.}
8
+ s.summary = %q{Fluentd input plugin to track insert/update/delete event from MySQL databases. It will support multiple table replication to another RDB/noSQL like Elasticsearch.}
9
9
 
10
10
  s.files = `git ls-files`.split("\n")
11
11
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -18,6 +18,7 @@ module Fluent
18
18
  config_param :tag, :string
19
19
  config_param :query, :string
20
20
  config_param :primary_key, :string, :default => 'id'
21
+ config_param :enable_delete, :bool, :default => 'yes'
21
22
 
22
23
  def configure(conf)
23
24
  super
@@ -59,10 +60,12 @@ module Fluent
59
60
  table_hash[row[@primary_key]] = current_hash
60
61
  end
61
62
  ids = current_ids
62
- deleted_ids = previous_ids - current_ids
63
- if deleted_ids.count > 0
64
- hash_delete_by_list(table_hash, deleted_ids)
65
- deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
63
+ unless @enable_delete
64
+ deleted_ids = previous_ids - current_ids
65
+ if deleted_ids.count > 0
66
+ hash_delete_by_list(table_hash, deleted_ids)
67
+ deleted_ids.each {|id| emit_record(:delete, {@primary_key => id})}
68
+ end
66
69
  end
67
70
  sleep @interval
68
71
  end
@@ -0,0 +1,201 @@
1
+ module Fluent
2
+ class MysqlReplicatorMultiInput < Fluent::Input
3
+ Plugin.register_input('mysql_replicator_multi', self)
4
+
5
+ def initialize
6
+ require 'mysql2'
7
+ require 'digest/sha1'
8
+ super
9
+ end
10
+
11
+ config_param :manager_host, :string, :default => 'localhost'
12
+ config_param :manager_port, :integer, :default => 3306
13
+ config_param :manager_username, :string, :default => nil
14
+ config_param :manager_password, :string, :default => ''
15
+ config_param :manager_database, :string, :default => 'replicator_manager'
16
+
17
+ def configure(conf)
18
+ super
19
+ @reconnect_interval = Config.time_value('10sec')
20
+ end
21
+
22
+ def start
23
+ begin
24
+ @threads = []
25
+ @mutex = Mutex.new
26
+ get_settings.each do |config|
27
+ @threads << Thread.new {
28
+ poll(config)
29
+ }
30
+ end
31
+ $log.error "mysql_replicator_multi: stop working due to empty configuration" if @threads.empty?
32
+ rescue StandardError => e
33
+ $log.error "error: #{e.message}"
34
+ $log.error e.backtrace.join("\n")
35
+ end
36
+ end
37
+
38
+ def shutdown
39
+ @threads.each do |thread|
40
+ Thread.kill(thread)
41
+ end
42
+ end
43
+
44
+ def get_settings
45
+ manager_db = get_manager_connection
46
+ settings = []
47
+ query = "SELECT * FROM settings"
48
+ manager_db.query(query).each do |row|
49
+ settings << row
50
+ end
51
+ return settings
52
+ end
53
+
54
+ def poll(config)
55
+ begin
56
+ @manager_db = get_manager_connection
57
+ masked_config = config.map {|k,v| (k == 'password') ? v.to_s.gsub(/./, '*') : v}
58
+ @mutex.synchronize {
59
+ $log.info "mysql_replicator_multi: polling start. :config=>#{masked_config}"
60
+ }
61
+ primary_key = config['primary_key']
62
+ previous_id = current_id = 0
63
+ loop do
64
+ db = get_origin_connection(config)
65
+ db.query(config['query']).each do |row|
66
+ @mutex.lock
67
+ current_id = row[primary_key]
68
+ detect_insert_update(config, row)
69
+ detect_delete(config, current_id, previous_id)
70
+ previous_id = current_id
71
+ @mutex.unlock
72
+ end
73
+ db.close
74
+ sleep config['interval']
75
+ end
76
+ rescue StandardError => e
77
+ $log.error "error: #{e.message}"
78
+ $log.error e.backtrace.join("\n")
79
+ end
80
+ end
81
+
82
+ def detect_insert_update(config, row)
83
+ primary_key = config['primary_key']
84
+ current_id = row[primary_key]
85
+ stored_hash = get_stored_hash(config['name'], current_id)
86
+ current_hash = Digest::SHA1.hexdigest(row.flatten.join)
87
+
88
+ event = nil
89
+ if stored_hash.empty?
90
+ event = :insert
91
+ elsif stored_hash != current_hash
92
+ event = :update
93
+ end
94
+ unless event.nil?
95
+ emit_record("#{config['tag']}.#{event.to_s}", row)
96
+ update_hashtable({:event => event, :ids => current_id, :setting_name => config['name'], :hash => current_hash})
97
+ end
98
+ end
99
+
100
+ def get_stored_hash(setting_name, id)
101
+ query = "SELECT setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
102
+ @manager_db.query(query).each do |row|
103
+ return row['setting_query_hash']
104
+ end
105
+ end
106
+
107
+ def detect_delete(config, current_id, previous_id)
108
+ return unless config['enable_delete'] == 1
109
+ deleted_ids = collect_gap_ids(config['name'], current_id, previous_id)
110
+ unless deleted_ids.empty?
111
+ event = :delete
112
+ deleted_ids.each do |id|
113
+ emit_record("#{config['tag']}.#{event.to_s}", {config['primary_key'] => id})
114
+ end
115
+ update_hashtable({:event => event, :ids => deleted_ids, :setting_name => config['name']})
116
+ end
117
+ end
118
+
119
+ def collect_gap_ids(setting_name, current_id, previous_id)
120
+ if (current_id - previous_id) > 1
121
+ query = "SELECT setting_query_pk FROM hash_tables
122
+ WHERE setting_name = '#{setting_name}'
123
+ AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
124
+ elsif previous_id > current_id
125
+ query = "SELECT setting_query_pk FROM hash_tables
126
+ WHERE setting_name = '#{setting_name}'
127
+ AND setting_query_pk > #{previous_id.to_i}"
128
+ elsif previous_id == current_id
129
+ query = "SELECT setting_query_pk FROM hash_tables
130
+ WHERE setting_name = '#{setting_name}'
131
+ AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
132
+ end
133
+ ids = Array.new
134
+ unless query.nil?
135
+ @manager_db.query(query).each do |row|
136
+ ids << row['setting_query_pk']
137
+ end
138
+ end
139
+ return ids
140
+ end
141
+
142
+ def update_hashtable(opts)
143
+ ids = opts[:ids].is_a?(Integer) ? [opts[:ids]] : opts[:ids]
144
+ ids.each do |id|
145
+ case opts[:event]
146
+ when :insert
147
+ query = "insert into hash_tables (setting_name,setting_query_pk,setting_query_hash) values('#{opts[:setting_name]}','#{id}','#{opts[:hash]}')"
148
+ when :update
149
+ query = "update hash_tables set setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
150
+ when :delete
151
+ query = "delete from hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
152
+ end
153
+ @manager_db.query(query) unless query.nil?
154
+ end
155
+ end
156
+
157
+ def emit_record(tag, record)
158
+ Engine.emit(tag, Engine.now, record)
159
+ end
160
+
161
+ def get_manager_connection
162
+ begin
163
+ return Mysql2::Client.new(
164
+ :host => @manager_host,
165
+ :port => @manager_port,
166
+ :username => @manager_username,
167
+ :password => @manager_password,
168
+ :database => @manager_database,
169
+ :encoding => 'utf8',
170
+ :reconnect => true,
171
+ :stream => false,
172
+ :cache_rows => false
173
+ )
174
+ rescue Exception => e
175
+ $log.warn "mysql_replicator_multi: #{e}"
176
+ sleep @reconnect_interval
177
+ retry
178
+ end
179
+ end
180
+
181
+ def get_origin_connection(config)
182
+ begin
183
+ return Mysql2::Client.new(
184
+ :host => config['host'],
185
+ :port => config['manager_port'],
186
+ :username => config['username'],
187
+ :password => config['password'],
188
+ :database => config['database'],
189
+ :encoding => 'utf8',
190
+ :reconnect => true,
191
+ :stream => true,
192
+ :cache_rows => false
193
+ )
194
+ rescue Exception => e
195
+ $log.warn "mysql_replicator_multi: #{e}"
196
+ sleep @reconnect_interval
197
+ retry
198
+ end
199
+ end
200
+ end
201
+ end
@@ -0,0 +1,28 @@
1
+ CREATE DATABASE replicator_manager;
2
+ USE replicator_manager;
3
+
4
+ CREATE TABLE `hash_tables` (
5
+ `id` int(11) NOT NULL AUTO_INCREMENT,
6
+ `setting_name` varchar(255) NOT NULL,
7
+ `setting_query_pk` int(11) NOT NULL,
8
+ `setting_query_hash` varchar(255) NOT NULL,
9
+ PRIMARY KEY (`id`),
10
+ UNIQUE KEY `setting_query_pk` (`setting_query_pk`,`setting_name`)
11
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
12
+
13
+ CREATE TABLE `settings` (
14
+ `id` int(11) NOT NULL AUTO_INCREMENT,
15
+ `name` varchar(255) NOT NULL,
16
+ `host` varchar(255) NOT NULL DEFAULT 'localhost',
17
+ `port` int(11) NOT NULL DEFAULT '3306',
18
+ `username` varchar(255) NOT NULL,
19
+ `password` varchar(255) NOT NULL,
20
+ `database` varchar(255) NOT NULL,
21
+ `query` TEXT NOT NULL,
22
+ `interval` int(11) NOT NULL,
23
+ `tag` varchar(255) NOT NULL,
24
+ `primary_key` varchar(11) DEFAULT 'id',
25
+ `enable_delete` int(11) DEFAULT '1',
26
+ PRIMARY KEY (`id`),
27
+ UNIQUE KEY `name` (`name`)
28
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
@@ -23,6 +23,7 @@ unless ENV.has_key?('VERBOSE')
23
23
  end
24
24
 
25
25
  require 'fluent/plugin/in_mysql_replicator'
26
+ require 'fluent/plugin/in_mysql_replicator_multi'
26
27
 
27
28
  class Test::Unit::TestCase
28
29
  end
@@ -28,11 +28,13 @@ class MysqlReplicatorInputTest < Test::Unit::TestCase
28
28
  interval 30
29
29
  tag input.mysql
30
30
  query SELECT id, text from search_text
31
+ enable_delete no
31
32
  ]
32
33
  d.instance.inspect
33
34
  assert_equal 'localhost', d.instance.host
34
35
  assert_equal 3306, d.instance.port
35
36
  assert_equal 30, d.instance.interval
36
37
  assert_equal 'input.mysql', d.instance.tag
38
+ assert_equal false, d.instance.enable_delete
37
39
  end
38
40
  end
@@ -0,0 +1,26 @@
1
+ require 'helper'
2
+
3
+ class MysqlReplicatorMultiInputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ manager_host localhost
10
+ manager_port 3306
11
+ manager_username foo
12
+ manager_password bar
13
+ ]
14
+
15
+ def create_driver(conf=CONFIG,tag='test')
16
+ Fluent::Test::OutputTestDriver.new(Fluent::MysqlReplicatorMultiInput, tag).configure(conf)
17
+ end
18
+
19
+ def test_configure
20
+ d = create_driver(CONFIG)
21
+ d.instance.inspect
22
+ assert_equal 'localhost', d.instance.manager_host
23
+ assert_equal 3306, d.instance.manager_port
24
+ assert_equal 'replicator_manager', d.instance.manager_database
25
+ end
26
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-06 00:00:00.000000000 Z
12
+ date: 2013-12-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -74,8 +74,11 @@ files:
74
74
  - Rakefile
75
75
  - fluent-plugin-mysql-replicator.gemspec
76
76
  - lib/fluent/plugin/in_mysql_replicator.rb
77
+ - lib/fluent/plugin/in_mysql_replicator_multi.rb
78
+ - setup_mysql_replicator_multi.sql
77
79
  - test/helper.rb
78
80
  - test/plugin/test_in_mysql_replicator.rb
81
+ - test/plugin/test_in_mysql_replicator_multi.rb
79
82
  homepage: https://github.com/y-ken/fluent-plugin-mysql-replicator
80
83
  licenses: []
81
84
  post_install_message:
@@ -99,8 +102,9 @@ rubyforge_project:
99
102
  rubygems_version: 1.8.23
100
103
  signing_key:
101
104
  specification_version: 3
102
- summary: Fluentd input plugin to track insert/update/delete event from MySQL database
103
- server.
105
+ summary: Fluentd input plugin to track insert/update/delete event from MySQL databases.
106
+ It will support multiple table replication to another RDB/noSQL like Elasticsearch.
104
107
  test_files:
105
108
  - test/helper.rb
106
109
  - test/plugin/test_in_mysql_replicator.rb
110
+ - test/plugin/test_in_mysql_replicator_multi.rb