fluent-plugin-mysql-replicator 0.2.3 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  ## Overview
4
4
 
5
5
  Fluentd input plugin to track insert/update/delete event from MySQL database server.
6
- Not only that, it could multiple table replication into Elasticsearch nodes.
6
+ Not only that, it could multiple table replication into Elasticsearch/Solr.
7
7
  It's comming support replicate to another RDB/noSQL.
8
8
 
9
9
  ## Installation
@@ -162,6 +162,8 @@ CREATE TABLE `settings` (
162
162
  `interval` int(11) NOT NULL,
163
163
  `primary_key` varchar(255) DEFAULT 'id',
164
164
  `enable_delete` int(11) DEFAULT '1',
165
+ `enable_loose_insert` int(11) DEFAULT '0',
166
+ `enable_loose_delete` int(11) DEFAULT '0',
165
167
  PRIMARY KEY (`id`),
166
168
  UNIQUE KEY `name` (`name`)
167
169
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
@@ -184,9 +186,9 @@ VALUES
184
186
 
185
187
  it is a sample which you have inserted row.
186
188
 
187
- | id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete |
189
+ | id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete |enable_loose_insert |enable_loose_delete |
188
190
  |----|-----------|--------------|-----------------|------|-----------|---------------|----------|------------------------------|----------|-------------|---------------|
189
- | 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 |
191
+ | 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 | 0 | 0 |
190
192
 
191
193
  ### configuration
192
194
 
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "0.2.3"
4
+ s.version = "0.3.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
@@ -53,6 +53,7 @@ module Fluent
53
53
  table_hash = Hash.new
54
54
  ids = Array.new
55
55
  loop do
56
+ rows_count = 0
56
57
  start_time = Time.now
57
58
  previous_ids = ids
58
59
  current_ids = Array.new
@@ -72,6 +73,7 @@ module Fluent
72
73
  emit_record(tag, row)
73
74
  end
74
75
  table_hash[row[@primary_key]] = current_hash
76
+ rows_count += 1
75
77
  end
76
78
  ids = current_ids
77
79
  if @enable_delete
@@ -89,7 +91,7 @@ module Fluent
89
91
  end
90
92
  end
91
93
  elapsed_time = sprintf("%0.02f", Time.now - start_time)
92
- $log.info "mysql_replicator: finished execution :tag=>#{tag} :elapsed_time=>#{elapsed_time} sec"
94
+ $log.info "mysql_replicator: finished execution :tag=>#{tag} :rows_count=>#{rows_count} :elapsed_time=>#{elapsed_time} sec"
93
95
  sleep @interval
94
96
  end
95
97
  end
@@ -100,7 +102,7 @@ module Fluent
100
102
 
101
103
  def format_tag(tag, param)
102
104
  pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key}
103
- tag.gsub(/\${[a-z_]+(\[[0-9]+\])?}/, pattern) do
105
+ tag.gsub(/(\${[a-z_]+})/) do
104
106
  $log.warn "mysql_replicator: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
105
107
  pattern[$1]
106
108
  end
@@ -15,6 +15,9 @@ module Fluent
15
15
  config_param :manager_database, :string, :default => 'replicator_manager'
16
16
  config_param :tag, :string, :default => nil
17
17
 
18
+ config_param :bulk_insert_count, :integer, :default => 50
19
+ config_param :bulk_insert_timeout, :integer, :default => 10
20
+
18
21
  def configure(conf)
19
22
  super
20
23
  @reconnect_interval = Config.time_value('10sec')
@@ -27,6 +30,13 @@ module Fluent
27
30
  begin
28
31
  @threads = []
29
32
  @mutex = Mutex.new
33
+ @manager_db = get_manager_connection
34
+ @manager_db.query("SET SESSION wait_timeout=1800;")
35
+ @threads << Thread.new {
36
+ @hash_table_bulk_insert = []
37
+ @hash_table_bulk_insert_last_time = Time.now
38
+ hash_table_flusher
39
+ }
30
40
  get_settings.each do |config|
31
41
  @threads << Thread.new {
32
42
  poll(config)
@@ -57,33 +67,35 @@ module Fluent
57
67
 
58
68
  def poll(config)
59
69
  begin
60
- @manager_db = get_manager_connection
61
70
  masked_config = Hash[config.map {|k,v| (k == 'password') ? [k, v.to_s.gsub(/./, '*')] : [k,v]}]
62
71
  @mutex.synchronize {
63
72
  $log.info "mysql_replicator_multi: polling start. :config=>#{masked_config}"
64
73
  }
65
74
  primary_key = config['primary_key']
66
- previous_id = current_id = 0
75
+ previous_id = current_id = nil
67
76
  loop do
77
+ rows_count = 0
68
78
  start_time = Time.now
69
79
  db = get_origin_connection(config)
70
80
  db.query(config['query']).each do |row|
71
- @mutex.lock
72
81
  row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date)}
73
82
  current_id = row[primary_key]
74
- if row[primary_key].nil?
75
- $log.error "mysql_replicator_multi: missing primary_key. :setting_name=>#{config['name']} :primary_key=>#{primary_key}"
76
- @mutex.unlock
77
- break
78
- end
79
- detect_insert_update(config, row)
80
- detect_delete(config, current_id, previous_id)
83
+ @mutex.synchronize {
84
+ if row[primary_key].nil?
85
+ $log.error "mysql_replicator_multi: missing primary_key. :setting_name=>#{config['name']} :primary_key=>#{primary_key}"
86
+ break
87
+ end
88
+ detect_insert_update(config, row)
89
+ detect_delete(config, current_id, previous_id)
90
+ }
81
91
  previous_id = current_id
82
- @mutex.unlock
92
+ rows_count += 1
83
93
  end
84
94
  db.close
85
95
  elapsed_time = sprintf("%0.02f", Time.now - start_time)
86
- $log.info "mysql_replicator_multi: finished execution :setting_name=>#{config['name']} :elapsed_time=>#{elapsed_time} sec"
96
+ @mutex.synchronize {
97
+ $log.info "mysql_replicator_multi: execution finished. :setting_name=>#{config['name']} :rows_count=>#{rows_count} :elapsed_time=>#{elapsed_time} sec"
98
+ }
87
99
  sleep config['interval']
88
100
  end
89
101
  rescue StandardError => e
@@ -98,7 +110,7 @@ module Fluent
98
110
  def detect_insert_update(config, row)
99
111
  primary_key = config['primary_key']
100
112
  current_id = row[primary_key]
101
- stored_hash = get_stored_hash(config['name'], current_id)
113
+ stored_hash = config['enable_loose_insert'] == 1 ? "" : get_stored_hash(config['name'], current_id)
102
114
  current_hash = Digest::SHA1.hexdigest(row.flatten.join)
103
115
 
104
116
  event = nil
@@ -110,20 +122,19 @@ module Fluent
110
122
  unless event.nil?
111
123
  tag = format_tag(@tag, {:name => config['name'], :event => event, :primary_key => config['primary_key']})
112
124
  emit_record(tag, row)
113
- update_hashtable({:event => event, :ids => current_id, :setting_name => config['name'], :hash => current_hash})
125
+ update_hashtable({:event => event, :id => current_id, :setting_name => config['name'], :hash => current_hash})
114
126
  end
115
127
  end
116
128
 
117
129
  def get_stored_hash(setting_name, id)
118
- query = "SELECT setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
119
- @manager_db.query(query).each do |row|
120
- return row['setting_query_hash']
121
- end
130
+ query = "SELECT SQL_NO_CACHE setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
131
+ row = @manager_db.query(query).first
132
+ return row.nil? ? "" : row['setting_query_hash']
122
133
  end
123
134
 
124
135
  def detect_delete(config, current_id, previous_id)
125
- return unless config['enable_delete'] == 1
126
- deleted_ids = collect_gap_ids(config['name'], current_id, previous_id)
136
+ return if config['enable_delete'] != 1 || previous_id.nil?
137
+ deleted_ids = collect_gap_ids(config, current_id, previous_id)
127
138
  unless deleted_ids.empty?
128
139
  event = :delete
129
140
  deleted_ids.each do |id|
@@ -134,17 +145,20 @@ module Fluent
134
145
  end
135
146
  end
136
147
 
137
- def collect_gap_ids(setting_name, current_id, previous_id)
138
- if (current_id - previous_id) > 1
139
- query = "SELECT setting_query_pk FROM hash_tables
148
+ def collect_gap_ids(config, current_id, previous_id)
149
+ setting_name = config['name']
150
+ if (current_id - previous_id) > 1 && config['enable_loose_delete'] == 0
151
+ query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
140
152
  WHERE setting_name = '#{setting_name}'
141
153
  AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
154
+ elsif (current_id - previous_id) > 1 && config['enable_loose_delete'] == 1
155
+ return [*previous_id...current_id] - [current_id,previous_id]
142
156
  elsif previous_id > current_id
143
- query = "SELECT setting_query_pk FROM hash_tables
157
+ query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
144
158
  WHERE setting_name = '#{setting_name}'
145
159
  AND setting_query_pk > #{previous_id.to_i}"
146
160
  elsif previous_id == current_id
147
- query = "SELECT setting_query_pk FROM hash_tables
161
+ query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
148
162
  WHERE setting_name = '#{setting_name}'
149
163
  AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
150
164
  end
@@ -158,28 +172,60 @@ module Fluent
158
172
  end
159
173
 
160
174
  def update_hashtable(opts)
161
- ids = opts[:ids].is_a?(Integer) ? [opts[:ids]] : opts[:ids]
162
- ids.each do |id|
163
- case opts[:event]
164
- when :insert
165
- query = "insert into hash_tables (setting_name,setting_query_pk,setting_query_hash) values('#{opts[:setting_name]}','#{id}','#{opts[:hash]}')"
166
- when :update
167
- query = "update hash_tables set setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
168
- when :delete
169
- query = "delete from hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
170
- end
171
- @manager_db.query(query) unless query.nil?
175
+ case opts[:event]
176
+ when :insert
177
+ add_hash_table_buffer(opts[:setting_name], opts[:id], opts[:hash])
178
+ when :update
179
+ query = "UPDATE hash_tables SET setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{opts[:id]}'"
180
+ when :delete
181
+ query = "DELETE FROM hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk IN(#{opts[:ids].join(',')})"
172
182
  end
183
+ @manager_db.query(query) unless query.nil?
173
184
  end
174
185
 
175
186
  def format_tag(tag, param)
176
187
  pattern = {'${name}' => param[:name], '${event}' => param[:event].to_s, '${primary_key}' => param[:primary_key]}
177
- tag.gsub(/\${[a-z_]+(\[[0-9]+\])?}/, pattern) do
178
- $log.warn "mysql_replicator_multi: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
188
+ tag.gsub(/(\${[a-z_]+})/) do
189
+ $log.warn "mysql_replicator_multi: unknown placeholder found. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
179
190
  pattern[$1]
180
191
  end
181
192
  end
182
193
 
194
+ def add_hash_table_buffer(setting_name, id, hash)
195
+ @hash_table_bulk_insert << "('#{setting_name}','#{id}','#{hash}')"
196
+ flush_hash_table if @hash_table_bulk_insert.size >= @bulk_insert_count
197
+ end
198
+
199
+ def hash_table_flusher
200
+ begin
201
+ loop do
202
+ if @hash_table_bulk_insert.empty? || @bulk_insert_timeout > (Time.now - @hash_table_bulk_insert_last_time)
203
+ sleep @bulk_insert_timeout
204
+ next
205
+ end
206
+ @mutex.synchronize {
207
+ flush_hash_table
208
+ }
209
+ end
210
+ rescue StandardError => e
211
+ @mutex.synchronize {
212
+ $log.error "mysql_replicator_multi: failed to flush buffered query. :config=>#{masked_config}"
213
+ $log.error "error: #{e.message}"
214
+ $log.error e.backtrace.join("\n")
215
+ }
216
+ end
217
+ end
218
+
219
+ def flush_hash_table
220
+ return if @hash_table_bulk_insert.empty?
221
+ query = "INSERT INTO hash_tables (setting_name,setting_query_pk,setting_query_hash)
222
+ VALUES #{@hash_table_bulk_insert.join(',')}
223
+ ON DUPLICATE KEY UPDATE setting_query_hash = VALUES(setting_query_hash)"
224
+ @manager_db.query(query)
225
+ @hash_table_bulk_insert.clear
226
+ @hash_table_bulk_insert_last_time = Time.now
227
+ end
228
+
183
229
  def emit_record(tag, record)
184
230
  Engine.emit(tag, Engine.now, record)
185
231
  end
@@ -23,6 +23,8 @@ CREATE TABLE `settings` (
23
23
  `interval` int(11) NOT NULL,
24
24
  `primary_key` varchar(255) DEFAULT 'id',
25
25
  `enable_delete` int(11) DEFAULT '1',
26
+ `enable_loose_insert` int(11) DEFAULT '0',
27
+ `enable_loose_delete` int(11) DEFAULT '0',
26
28
  PRIMARY KEY (`id`),
27
29
  UNIQUE KEY `name` (`name`)
28
30
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-16 00:00:00.000000000 Z
12
+ date: 2013-12-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake