fluent-plugin-mysql-replicator 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  ## Overview
4
4
 
5
5
  Fluentd input plugin to track insert/update/delete event from MySQL database server.
6
- Not only that, it could multiple table replication into Elasticsearch nodes.
6
+ Not only that, it could multiple table replication into Elasticsearch/Solr.
7
7
  It's comming support replicate to another RDB/noSQL.
8
8
 
9
9
  ## Installation
@@ -162,6 +162,8 @@ CREATE TABLE `settings` (
162
162
  `interval` int(11) NOT NULL,
163
163
  `primary_key` varchar(255) DEFAULT 'id',
164
164
  `enable_delete` int(11) DEFAULT '1',
165
+ `enable_loose_insert` int(11) DEFAULT '0',
166
+ `enable_loose_delete` int(11) DEFAULT '0',
165
167
  PRIMARY KEY (`id`),
166
168
  UNIQUE KEY `name` (`name`)
167
169
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
@@ -184,9 +186,9 @@ VALUES
184
186
 
185
187
  it is a sample which you have inserted row.
186
188
 
187
- | id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete |
189
+ | id | is_active | name | host | port | username | password | database | query | interval | primary_key | enable_delete |enable_loose_insert |enable_loose_delete |
188
190
  |----|-----------|--------------|-----------------|------|-----------|---------------|----------|------------------------------|----------|-------------|---------------|
189
- | 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 |
191
+ | 1 | 1 | mydb.mytable | 192.168.100.221 | 3306 | mysqluser | mysqlpassword | mydb | SELECT id, text from mytable; | 5 | id | 1 | 0 | 0 |
190
192
 
191
193
  ### configuration
192
194
 
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "0.2.3"
4
+ s.version = "0.3.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
@@ -53,6 +53,7 @@ module Fluent
53
53
  table_hash = Hash.new
54
54
  ids = Array.new
55
55
  loop do
56
+ rows_count = 0
56
57
  start_time = Time.now
57
58
  previous_ids = ids
58
59
  current_ids = Array.new
@@ -72,6 +73,7 @@ module Fluent
72
73
  emit_record(tag, row)
73
74
  end
74
75
  table_hash[row[@primary_key]] = current_hash
76
+ rows_count += 1
75
77
  end
76
78
  ids = current_ids
77
79
  if @enable_delete
@@ -89,7 +91,7 @@ module Fluent
89
91
  end
90
92
  end
91
93
  elapsed_time = sprintf("%0.02f", Time.now - start_time)
92
- $log.info "mysql_replicator: finished execution :tag=>#{tag} :elapsed_time=>#{elapsed_time} sec"
94
+ $log.info "mysql_replicator: finished execution :tag=>#{tag} :rows_count=>#{rows_count} :elapsed_time=>#{elapsed_time} sec"
93
95
  sleep @interval
94
96
  end
95
97
  end
@@ -100,7 +102,7 @@ module Fluent
100
102
 
101
103
  def format_tag(tag, param)
102
104
  pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key}
103
- tag.gsub(/\${[a-z_]+(\[[0-9]+\])?}/, pattern) do
105
+ tag.gsub(/(\${[a-z_]+})/) do
104
106
  $log.warn "mysql_replicator: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
105
107
  pattern[$1]
106
108
  end
@@ -15,6 +15,9 @@ module Fluent
15
15
  config_param :manager_database, :string, :default => 'replicator_manager'
16
16
  config_param :tag, :string, :default => nil
17
17
 
18
+ config_param :bulk_insert_count, :integer, :default => 50
19
+ config_param :bulk_insert_timeout, :integer, :default => 10
20
+
18
21
  def configure(conf)
19
22
  super
20
23
  @reconnect_interval = Config.time_value('10sec')
@@ -27,6 +30,13 @@ module Fluent
27
30
  begin
28
31
  @threads = []
29
32
  @mutex = Mutex.new
33
+ @manager_db = get_manager_connection
34
+ @manager_db.query("SET SESSION wait_timeout=1800;")
35
+ @threads << Thread.new {
36
+ @hash_table_bulk_insert = []
37
+ @hash_table_bulk_insert_last_time = Time.now
38
+ hash_table_flusher
39
+ }
30
40
  get_settings.each do |config|
31
41
  @threads << Thread.new {
32
42
  poll(config)
@@ -57,33 +67,35 @@ module Fluent
57
67
 
58
68
  def poll(config)
59
69
  begin
60
- @manager_db = get_manager_connection
61
70
  masked_config = Hash[config.map {|k,v| (k == 'password') ? [k, v.to_s.gsub(/./, '*')] : [k,v]}]
62
71
  @mutex.synchronize {
63
72
  $log.info "mysql_replicator_multi: polling start. :config=>#{masked_config}"
64
73
  }
65
74
  primary_key = config['primary_key']
66
- previous_id = current_id = 0
75
+ previous_id = current_id = nil
67
76
  loop do
77
+ rows_count = 0
68
78
  start_time = Time.now
69
79
  db = get_origin_connection(config)
70
80
  db.query(config['query']).each do |row|
71
- @mutex.lock
72
81
  row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date)}
73
82
  current_id = row[primary_key]
74
- if row[primary_key].nil?
75
- $log.error "mysql_replicator_multi: missing primary_key. :setting_name=>#{config['name']} :primary_key=>#{primary_key}"
76
- @mutex.unlock
77
- break
78
- end
79
- detect_insert_update(config, row)
80
- detect_delete(config, current_id, previous_id)
83
+ @mutex.synchronize {
84
+ if row[primary_key].nil?
85
+ $log.error "mysql_replicator_multi: missing primary_key. :setting_name=>#{config['name']} :primary_key=>#{primary_key}"
86
+ break
87
+ end
88
+ detect_insert_update(config, row)
89
+ detect_delete(config, current_id, previous_id)
90
+ }
81
91
  previous_id = current_id
82
- @mutex.unlock
92
+ rows_count += 1
83
93
  end
84
94
  db.close
85
95
  elapsed_time = sprintf("%0.02f", Time.now - start_time)
86
- $log.info "mysql_replicator_multi: finished execution :setting_name=>#{config['name']} :elapsed_time=>#{elapsed_time} sec"
96
+ @mutex.synchronize {
97
+ $log.info "mysql_replicator_multi: execution finished. :setting_name=>#{config['name']} :rows_count=>#{rows_count} :elapsed_time=>#{elapsed_time} sec"
98
+ }
87
99
  sleep config['interval']
88
100
  end
89
101
  rescue StandardError => e
@@ -98,7 +110,7 @@ module Fluent
98
110
  def detect_insert_update(config, row)
99
111
  primary_key = config['primary_key']
100
112
  current_id = row[primary_key]
101
- stored_hash = get_stored_hash(config['name'], current_id)
113
+ stored_hash = config['enable_loose_insert'] == 1 ? "" : get_stored_hash(config['name'], current_id)
102
114
  current_hash = Digest::SHA1.hexdigest(row.flatten.join)
103
115
 
104
116
  event = nil
@@ -110,20 +122,19 @@ module Fluent
110
122
  unless event.nil?
111
123
  tag = format_tag(@tag, {:name => config['name'], :event => event, :primary_key => config['primary_key']})
112
124
  emit_record(tag, row)
113
- update_hashtable({:event => event, :ids => current_id, :setting_name => config['name'], :hash => current_hash})
125
+ update_hashtable({:event => event, :id => current_id, :setting_name => config['name'], :hash => current_hash})
114
126
  end
115
127
  end
116
128
 
117
129
  def get_stored_hash(setting_name, id)
118
- query = "SELECT setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
119
- @manager_db.query(query).each do |row|
120
- return row['setting_query_hash']
121
- end
130
+ query = "SELECT SQL_NO_CACHE setting_query_hash FROM hash_tables WHERE setting_query_pk = #{id.to_i} AND setting_name = '#{setting_name}'"
131
+ row = @manager_db.query(query).first
132
+ return row.nil? ? "" : row['setting_query_hash']
122
133
  end
123
134
 
124
135
  def detect_delete(config, current_id, previous_id)
125
- return unless config['enable_delete'] == 1
126
- deleted_ids = collect_gap_ids(config['name'], current_id, previous_id)
136
+ return if config['enable_delete'] != 1 || previous_id.nil?
137
+ deleted_ids = collect_gap_ids(config, current_id, previous_id)
127
138
  unless deleted_ids.empty?
128
139
  event = :delete
129
140
  deleted_ids.each do |id|
@@ -134,17 +145,20 @@ module Fluent
134
145
  end
135
146
  end
136
147
 
137
- def collect_gap_ids(setting_name, current_id, previous_id)
138
- if (current_id - previous_id) > 1
139
- query = "SELECT setting_query_pk FROM hash_tables
148
+ def collect_gap_ids(config, current_id, previous_id)
149
+ setting_name = config['name']
150
+ if (current_id - previous_id) > 1 && config['enable_loose_delete'] == 0
151
+ query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
140
152
  WHERE setting_name = '#{setting_name}'
141
153
  AND setting_query_pk > #{previous_id.to_i} AND setting_query_pk < #{current_id.to_i}"
154
+ elsif (current_id - previous_id) > 1 && config['enable_loose_delete'] == 1
155
+ return [*previous_id...current_id] - [current_id,previous_id]
142
156
  elsif previous_id > current_id
143
- query = "SELECT setting_query_pk FROM hash_tables
157
+ query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
144
158
  WHERE setting_name = '#{setting_name}'
145
159
  AND setting_query_pk > #{previous_id.to_i}"
146
160
  elsif previous_id == current_id
147
- query = "SELECT setting_query_pk FROM hash_tables
161
+ query = "SELECT SQL_NO_CACHE setting_query_pk FROM hash_tables
148
162
  WHERE setting_name = '#{setting_name}'
149
163
  AND (setting_query_pk > #{current_id.to_i} OR setting_query_pk < #{current_id.to_i})"
150
164
  end
@@ -158,28 +172,60 @@ module Fluent
158
172
  end
159
173
 
160
174
  def update_hashtable(opts)
161
- ids = opts[:ids].is_a?(Integer) ? [opts[:ids]] : opts[:ids]
162
- ids.each do |id|
163
- case opts[:event]
164
- when :insert
165
- query = "insert into hash_tables (setting_name,setting_query_pk,setting_query_hash) values('#{opts[:setting_name]}','#{id}','#{opts[:hash]}')"
166
- when :update
167
- query = "update hash_tables set setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
168
- when :delete
169
- query = "delete from hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{id}'"
170
- end
171
- @manager_db.query(query) unless query.nil?
175
+ case opts[:event]
176
+ when :insert
177
+ add_hash_table_buffer(opts[:setting_name], opts[:id], opts[:hash])
178
+ when :update
179
+ query = "UPDATE hash_tables SET setting_query_hash = '#{opts[:hash]}' WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk = '#{opts[:id]}'"
180
+ when :delete
181
+ query = "DELETE FROM hash_tables WHERE setting_name = '#{opts[:setting_name]}' AND setting_query_pk IN(#{opts[:ids].join(',')})"
172
182
  end
183
+ @manager_db.query(query) unless query.nil?
173
184
  end
174
185
 
175
186
  def format_tag(tag, param)
176
187
  pattern = {'${name}' => param[:name], '${event}' => param[:event].to_s, '${primary_key}' => param[:primary_key]}
177
- tag.gsub(/\${[a-z_]+(\[[0-9]+\])?}/, pattern) do
178
- $log.warn "mysql_replicator_multi: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
188
+ tag.gsub(/(\${[a-z_]+})/) do
189
+ $log.warn "mysql_replicator_multi: unknown placeholder found. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
179
190
  pattern[$1]
180
191
  end
181
192
  end
182
193
 
194
+ def add_hash_table_buffer(setting_name, id, hash)
195
+ @hash_table_bulk_insert << "('#{setting_name}','#{id}','#{hash}')"
196
+ flush_hash_table if @hash_table_bulk_insert.size >= @bulk_insert_count
197
+ end
198
+
199
+ def hash_table_flusher
200
+ begin
201
+ loop do
202
+ if @hash_table_bulk_insert.empty? || @bulk_insert_timeout > (Time.now - @hash_table_bulk_insert_last_time)
203
+ sleep @bulk_insert_timeout
204
+ next
205
+ end
206
+ @mutex.synchronize {
207
+ flush_hash_table
208
+ }
209
+ end
210
+ rescue StandardError => e
211
+ @mutex.synchronize {
212
+ $log.error "mysql_replicator_multi: failed to flush buffered query. :config=>#{masked_config}"
213
+ $log.error "error: #{e.message}"
214
+ $log.error e.backtrace.join("\n")
215
+ }
216
+ end
217
+ end
218
+
219
+ def flush_hash_table
220
+ return if @hash_table_bulk_insert.empty?
221
+ query = "INSERT INTO hash_tables (setting_name,setting_query_pk,setting_query_hash)
222
+ VALUES #{@hash_table_bulk_insert.join(',')}
223
+ ON DUPLICATE KEY UPDATE setting_query_hash = VALUES(setting_query_hash)"
224
+ @manager_db.query(query)
225
+ @hash_table_bulk_insert.clear
226
+ @hash_table_bulk_insert_last_time = Time.now
227
+ end
228
+
183
229
  def emit_record(tag, record)
184
230
  Engine.emit(tag, Engine.now, record)
185
231
  end
@@ -23,6 +23,8 @@ CREATE TABLE `settings` (
23
23
  `interval` int(11) NOT NULL,
24
24
  `primary_key` varchar(255) DEFAULT 'id',
25
25
  `enable_delete` int(11) DEFAULT '1',
26
+ `enable_loose_insert` int(11) DEFAULT '0',
27
+ `enable_loose_delete` int(11) DEFAULT '0',
26
28
  PRIMARY KEY (`id`),
27
29
  UNIQUE KEY `name` (`name`)
28
30
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-16 00:00:00.000000000 Z
12
+ date: 2013-12-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake