fluent-plugin-mysql-replicator 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c523ddf1d5052d11c6c7a0f7e9172d319c881257d791a0006cd1a6601f5a948
4
- data.tar.gz: ca9f218dfa56543f141d8911578250194281bfbad16d8f5ed6b4980120d99c8d
3
+ metadata.gz: 16c6600b210b85bad55965f707eadd53017558fd8a6a2b55cdf28bc1e86b7be3
4
+ data.tar.gz: 07d8b7367ee234afb128845fb8ef9169929cb18caf4c1813040efe951e72f68e
5
5
  SHA512:
6
- metadata.gz: a5266cd4f824e781d53eaa613c37f260c7814badcee87fb6c49a1321e9c9ac8bc8d9505b2ea800f3a8ccde237b2b0eeb808e187463996c2b1aecc93065128869
7
- data.tar.gz: 70834ef8629d34bbd53aa1ff9d66ba706d6df5b48cd310d8a554c7d25628682633860123f63855e851482abda385e6288c4fcd01da848cbffc56bc0525553968
6
+ metadata.gz: 65ec0c92b273597f7f7ff976fc42878e4fa413d8471947125a6873d7209142bec87a59aa37e3c491fd1a81dea4a9d448e3280cbca1eafd381fdee241d723bcae
7
+ data.tar.gz: 20fb6ffe6e217e5d8ee0108c5243fad7f6abd910f1a2d336f65bcb8ba0388ac22e329d408edaa5ad8f298c959cffc2656932d71302d6b2b38c21c28cab14585d
@@ -0,0 +1,17 @@
1
+ FROM ruby:3.3-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV BUNDLE_PATH=/workspace/vendor/bundle
5
+
6
+ RUN apt-get update \
7
+ && apt-get install -y --no-install-recommends \
8
+ build-essential \
9
+ default-libmysqlclient-dev \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ WORKDIR /workspace
14
+
15
+ RUN gem install bundler
16
+
17
+ CMD ["bash"]
@@ -0,0 +1,16 @@
1
+ {
2
+ "name": "fluent-plugin-mysql-replicator",
3
+ "build": {
4
+ "dockerfile": "Dockerfile"
5
+ },
6
+ "workspaceFolder": "/workspace",
7
+ "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
8
+ "settings": {
9
+ "terminal.integrated.shell.linux": "/bin/bash"
10
+ },
11
+ "extensions": [
12
+ "rebornix.Ruby"
13
+ ],
14
+ "postCreateCommand": "bundle config set --local path \"$BUNDLE_PATH\" && bundle install --jobs 4 --retry 3",
15
+ "remoteUser": "root"
16
+ }
data/.dockerignore ADDED
@@ -0,0 +1,4 @@
1
+ vendor/bundle
2
+ .bundle
3
+ *.gem
4
+ pkg
@@ -0,0 +1,102 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+
8
+ jobs:
9
+ unit:
10
+ name: Unit tests (Ruby ${{ matrix.ruby }})
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ ruby: ['3.2', '3.3', '3.4', '4.0']
16
+ steps:
17
+ - uses: actions/checkout@v5
18
+ - name: Install MySQL client headers
19
+ run: sudo apt-get update && sudo apt-get install -y default-libmysqlclient-dev
20
+ - uses: ruby/setup-ruby@v1
21
+ with:
22
+ ruby-version: ${{ matrix.ruby }}
23
+ bundler-cache: true
24
+ - run: bundle exec rake test
25
+
26
+ e2e:
27
+ name: E2E replication (ES ${{ matrix.es_label }})
28
+ runs-on: ubuntu-latest
29
+
30
+ strategy:
31
+ fail-fast: false
32
+ matrix:
33
+ include:
34
+ # The plugin auto-detects the version and adjusts "_type" accordingly.
35
+ - es_label: '6.8'
36
+ es_image: docker.elastic.co/elasticsearch/elasticsearch:6.8.23
37
+ es_major: '6'
38
+ - es_label: '8.x'
39
+ es_image: docker.elastic.co/elasticsearch/elasticsearch:8.18.0
40
+ es_major: '8'
41
+ - es_label: '9.x'
42
+ es_image: docker.elastic.co/elasticsearch/elasticsearch:9.0.0
43
+ es_major: '9'
44
+
45
+ services:
46
+ mysql:
47
+ image: mysql:8.4
48
+ env:
49
+ MYSQL_ROOT_PASSWORD: root
50
+ ports:
51
+ - 3306:3306
52
+ options: >-
53
+ --health-cmd="mysqladmin ping -h localhost -proot"
54
+ --health-interval=10s
55
+ --health-timeout=5s
56
+ --health-retries=10
57
+
58
+ elasticsearch:
59
+ image: ${{ matrix.es_image }}
60
+ env:
61
+ discovery.type: single-node
62
+ xpack.security.enabled: 'false'
63
+ ES_JAVA_OPTS: -Xms512m -Xmx512m
64
+ ports:
65
+ - 9200:9200
66
+
67
+ env:
68
+ MYSQL_HOST: 127.0.0.1
69
+ MYSQL_PORT: 3306
70
+ MYSQL_USER: root
71
+ MYSQL_PASSWORD: root
72
+ MYSQL_DATABASE: e2e_source
73
+ ES_HOST: 127.0.0.1
74
+ ES_PORT: 9200
75
+ ES_MAJOR_VERSION: ${{ matrix.es_major }}
76
+
77
+ steps:
78
+ - uses: actions/checkout@v5
79
+
80
+ - name: Install MySQL client headers
81
+ run: sudo apt-get update && sudo apt-get install -y default-libmysqlclient-dev
82
+
83
+ - uses: ruby/setup-ruby@v1
84
+ with:
85
+ ruby-version: '3.4'
86
+ bundler-cache: true
87
+
88
+ - name: Wait for Elasticsearch
89
+ run: |
90
+ for i in $(seq 1 30); do
91
+ if curl -fs "http://127.0.0.1:9200/_cluster/health?wait_for_status=yellow&timeout=5s" >/dev/null; then
92
+ echo "Elasticsearch is up"; exit 0
93
+ fi
94
+ echo "waiting for Elasticsearch ($i)..."; sleep 5
95
+ done
96
+ echo "Elasticsearch did not become ready"; exit 1
97
+
98
+ - name: Run single-table replication E2E test
99
+ run: bundle exec ruby test/e2e/replication_single_e2e_test.rb
100
+
101
+ - name: Run multi-table replication E2E test
102
+ run: bundle exec ruby test/e2e/replication_multi_e2e_test.rb
data/.gitignore CHANGED
@@ -3,3 +3,4 @@
3
3
  Gemfile.lock
4
4
  pkg/*
5
5
  vendor/*
6
+ test/e2e/*.log
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # fluent-plugin-mysql-replicator [![Build Status](https://travis-ci.org/y-ken/fluent-plugin-mysql-replicator.png?branch=master)](https://travis-ci.org/y-ken/fluent-plugin-mysql-replicator)
1
+ # fluent-plugin-mysql-replicator [![CI](https://github.com/y-ken/fluent-plugin-mysql-replicator/actions/workflows/ci.yml/badge.svg)](https://github.com/y-ken/fluent-plugin-mysql-replicator/actions/workflows/ci.yml)
2
2
 
3
3
  ## Overview
4
4
 
@@ -31,15 +31,33 @@ install with gem or fluent-gem command as:
31
31
 
32
32
  `````
33
33
  # for system installed fluentd
34
- $ gem install fluent-plugin-mysql-replicator -v 1.0.2
34
+ $ gem install fluent-plugin-mysql-replicator -v 1.0.3
35
35
 
36
36
  # for td-agent2
37
37
  $ sudo td-agent-gem install fluent-plugin-mysql-replicator -v 0.6.1
38
38
 
39
39
  # for td-agent3
40
- $ sudo td-agent-gem install fluent-plugin-mysql-replicator -v 1.0.2
40
+ $ sudo td-agent-gem install fluent-plugin-mysql-replicator -v 1.0.3
41
41
  `````
42
42
 
43
+ ## Development container
44
+
45
+ This repository includes a VS Code Dev Container configuration under `.devcontainer/`.
46
+ Use Docker and Remote Containers / Dev Containers in VS Code to build and open the workspace inside a container.
47
+ The container installs Ruby, Bundler, and required native build dependencies.
48
+
49
+ After opening the repository in the Dev Container, run:
50
+
51
+ ```
52
+ bundle install --path vendor/bundle
53
+ ```
54
+
55
+ Then run tests like:
56
+
57
+ ```
58
+ bundle exec ruby -Itest test/plugin/test_out_mysql_replicator_elasticsearch.rb
59
+ ```
60
+
43
61
  ## Included plugins
44
62
 
45
63
  * Input Plugin: mysql_replicator
@@ -47,6 +65,15 @@ $ sudo td-agent-gem install fluent-plugin-mysql-replicator -v 1.0.2
47
65
  * Output Plugin: mysql_replicator_elasticsearch
48
66
  * Output Plugin: mysql_replicator_solr (experimental)
49
67
 
68
+ ## Elasticsearch version compatibility
69
+
70
+ `mysql_replicator_elasticsearch` works with Elasticsearch 6.x through 9.x.
71
+
72
+ Mapping types were removed in Elasticsearch 8.x (and deprecated in 7.x), so the
73
+ `_type` field can no longer be sent in bulk requests. The plugin detects the
74
+ Elasticsearch version on the first write and automatically omits `_type` for
75
+ 7.x and later, so no extra configuration is required.
76
+
50
77
  ## Output example
51
78
 
52
79
  It is a example when detecting insert/update/delete events.
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |s|
3
3
  s.name = "fluent-plugin-mysql-replicator"
4
- s.version = "1.0.2"
4
+ s.version = "1.1.0"
5
5
  s.authors = ["Kentaro Yoshida"]
6
6
  s.email = ["y.ken.studio@gmail.com"]
7
7
  s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
@@ -85,37 +85,49 @@ module Fluent::Plugin
85
85
  while @running
86
86
  rows_count = 0
87
87
  start_time = Time.now
88
- unless config['prepared_query'].nil?
89
- nest_db = get_origin_connection(config)
90
- config['prepared_query'].strip.split(/;/).each do |query|
91
- nest_db.query(query)
88
+ db = nil
89
+ nest_db = nil
90
+ begin
91
+ unless config['prepared_query'].nil?
92
+ nest_db = get_origin_connection(config)
93
+ config['prepared_query'].strip.split(/;/).each do |query|
94
+ nest_db.query(query)
95
+ end
92
96
  end
93
- end
94
- db = get_origin_connection(config)
95
- db.query(config['query']).each do |row|
96
- row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
97
- row.select {|k, v| v.to_s.strip.match(/^SELECT[^\$]+\$\{[^\}]+\}/i) }.each do |k, v|
98
- row[k] = [] unless row[k].is_a?(Array)
99
- nest_db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
100
- nest_row.each {|k, v| nest_row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
101
- row[k] << nest_row
97
+ db = get_origin_connection(config)
98
+ db.query(config['query']).each do |row|
99
+ row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
100
+ row.select {|k, v| v.to_s.strip.match(/^SELECT[^\$]+\$\{[^\}]+\}/i) }.each do |k, v|
101
+ row[k] = [] unless row[k].is_a?(Array)
102
+ nest_db.query(v.gsub(/\$\{([^\}]+)\}/) {|matched| row[$1].to_s}).each do |nest_row|
103
+ nest_row.each {|k, v| nest_row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
104
+ row[k] << nest_row
105
+ end
102
106
  end
107
+ current_id = row[primary_key]
108
+ @mutex.synchronize {
109
+ if row[primary_key].nil?
110
+ log.error "mysql_replicator_multi: missing primary_key. :setting_name=>#{config['name']} :primary_key=>#{primary_key}"
111
+ break
112
+ end
113
+ detect_insert_update(config, row)
114
+ detect_delete(config, current_id, previous_id)
115
+ }
116
+ previous_id = current_id
117
+ rows_count += 1
103
118
  end
104
- current_id = row[primary_key]
119
+ rescue Mysql2::Error => e
120
+ raise e unless config['enable_retry'] == 1
121
+
105
122
  @mutex.synchronize {
106
- if row[primary_key].nil?
107
- log.error "mysql_replicator_multi: missing primary_key. :setting_name=>#{config['name']} :primary_key=>#{primary_key}"
108
- break
109
- end
110
- detect_insert_update(config, row)
111
- detect_delete(config, current_id, previous_id)
123
+ log.error "mysql_replicator_multi: failed due to an error caused by the database. :setting_name=>#{config['name']}"
124
+ log.error "error: #{e.message}"
125
+ log.error e.backtrace.join("\n")
112
126
  }
113
- previous_id = current_id
114
- rows_count += 1
115
- end
116
- db.close
117
- unless config['prepared_query'].nil?
118
- nest_db.close
127
+ sleep config['retry_interval']
128
+ ensure
129
+ db.close if db
130
+ nest_db.close if nest_db && !config['prepared_query'].nil?
119
131
  end
120
132
  elapsed_time = sprintf("%0.02f", Time.now - start_time)
121
133
  @mutex.synchronize {
@@ -290,7 +302,11 @@ module Fluent::Plugin
290
302
  :cache_rows => false
291
303
  )
292
304
  rescue Mysql2::Error => e
293
- raise "mysql_replicator_multi: #{e}"
305
+ if config['enable_retry'] == 1
306
+ raise e
307
+ else
308
+ raise "mysql_replicator_multi: #{e}"
309
+ end
294
310
  end
295
311
  end
296
312
  end
@@ -1,5 +1,6 @@
1
1
  require 'net/http'
2
2
  require 'date'
3
+ require 'yajl'
3
4
  require 'fluent/plugin/output'
4
5
 
5
6
  class Fluent::Plugin::MysqlReplicatorElasticsearchOutput < Fluent::Plugin::Output
@@ -38,6 +39,8 @@ class Fluent::Plugin::MysqlReplicatorElasticsearchOutput < Fluent::Plugin::Outpu
38
39
 
39
40
  def start
40
41
  super
42
+ # nil means "not yet detected"; resolved on the first write.
43
+ @suppress_type = nil
41
44
  end
42
45
 
43
46
  def format(tag, time, record)
@@ -57,6 +60,8 @@ class Fluent::Plugin::MysqlReplicatorElasticsearchOutput < Fluent::Plugin::Outpu
57
60
  end
58
61
 
59
62
  def write(chunk)
63
+ detect_type_suppression if @suppress_type.nil?
64
+
60
65
  bulk_message = []
61
66
 
62
67
  chunk.msgpack_each do |tag, time, record|
@@ -66,28 +71,60 @@ class Fluent::Plugin::MysqlReplicatorElasticsearchOutput < Fluent::Plugin::Outpu
66
71
  id_key = tag_parts['primary_key']
67
72
 
68
73
  if tag_parts['event'] == 'delete'
69
- meta = { "delete" => {"_index" => target_index, "_type" => target_type, "_id" => record[id_key]} }
74
+ action = {"_index" => target_index, "_id" => record[id_key]}
75
+ action['_type'] = target_type unless @suppress_type
76
+ meta = { "delete" => action }
70
77
  bulk_message << Yajl::Encoder.encode(meta)
71
78
  else
72
- meta = { "index" => {"_index" => target_index, "_type" => target_type} }
79
+ action = {"_index" => target_index}
80
+ action['_type'] = target_type unless @suppress_type
73
81
  if id_key && record[id_key]
74
- meta['index']['_id'] = record[id_key]
82
+ action['_id'] = record[id_key]
75
83
  end
84
+ meta = { "index" => action }
76
85
  bulk_message << Yajl::Encoder.encode(meta)
77
86
  bulk_message << Yajl::Encoder.encode(record)
78
87
  end
79
88
  end
80
89
  bulk_message << ""
81
90
 
82
- http = Net::HTTP.new(@host, @port.to_i)
83
- http.use_ssl = @ssl
84
-
85
91
  request = Net::HTTP::Post.new('/_bulk', {'content-type' => 'application/json; charset=utf-8'})
86
92
  if @username && @password
87
93
  request.basic_auth(@username, @password)
88
94
  end
89
95
 
90
96
  request.body = bulk_message.join("\n")
91
- http.request(request).value
97
+ new_http.request(request).value
98
+ end
99
+
100
+ private
101
+
102
+ def new_http
103
+ http = Net::HTTP.new(@host, @port.to_i)
104
+ http.use_ssl = @ssl
105
+ http
106
+ end
107
+
108
+ # Mapping types were removed in Elasticsearch 8.x and deprecated in 7.x.
109
+ # Detect the major version once and omit "_type" for 7.x and later.
110
+ def detect_type_suppression
111
+ major = elasticsearch_major_version
112
+ @suppress_type = !major.nil? && major >= 7
113
+ if major
114
+ log.info "mysql_replicator_elasticsearch: detected Elasticsearch #{major}.x, suppress_type=#{@suppress_type}"
115
+ else
116
+ log.warn "mysql_replicator_elasticsearch: could not detect Elasticsearch version, sending '_type' (assuming 6.x)"
117
+ end
118
+ end
119
+
120
+ def elasticsearch_major_version
121
+ request = Net::HTTP::Get.new('/')
122
+ request.basic_auth(@username, @password) if @username && @password
123
+ response = new_http.request(request)
124
+ number = Yajl::Parser.parse(response.body).dig('version', 'number')
125
+ number.to_s.split('.').first.to_i
126
+ rescue => e
127
+ log.warn "mysql_replicator_elasticsearch: version detection failed: #{e.message}"
128
+ nil
92
129
  end
93
130
  end
@@ -29,6 +29,10 @@ CREATE TABLE IF NOT EXISTS `settings` (
29
29
  `enable_loose_insert` int(11) DEFAULT '0',
30
30
  -- On enabling 'enable_loose_delete: 1', turn on speculative delete but performance penalty on non-contiguous primary key.
31
31
  `enable_loose_delete` int(11) DEFAULT '0',
32
+ -- On enabling 'enable_retry: 1', automatically retries when an error occurs due to MySQL.
33
+ `enable_retry` int(11) DEFAULT '1',
34
+ -- Additional interval when retrying. If not set, waits for the time set in the regular interval column.
35
+ `retry_interval` int(11) NOT NULL DEFAULT '30',
32
36
  PRIMARY KEY (`id`),
33
37
  UNIQUE KEY `name` (`name`)
34
38
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
@@ -0,0 +1,94 @@
1
+ # Shared helpers for the end-to-end replication tests.
2
+ #
3
+ # Connection settings come from environment variables (with local defaults):
4
+ # MYSQL_HOST MYSQL_PORT MYSQL_USER MYSQL_PASSWORD
5
+ # ES_HOST ES_PORT
6
+ require 'mysql2'
7
+ require 'net/http'
8
+ require 'json'
9
+
10
+ module E2E
11
+ module_function
12
+
13
+ ROOT = File.expand_path('../..', __dir__)
14
+
15
+ def mysql_config(database: nil)
16
+ cfg = {
17
+ host: ENV['MYSQL_HOST'] || '127.0.0.1',
18
+ port: (ENV['MYSQL_PORT'] || 3306).to_i,
19
+ username: ENV['MYSQL_USER'] || 'root',
20
+ password: ENV['MYSQL_PASSWORD'] || 'root',
21
+ }
22
+ cfg[:database] = database if database
23
+ cfg
24
+ end
25
+
26
+ def es_base
27
+ "http://#{ENV['ES_HOST'] || '127.0.0.1'}:#{ENV['ES_PORT'] || '9200'}"
28
+ end
29
+
30
+ # Major version of the Elasticsearch under test (defaults to 6).
31
+ def es_major_version
32
+ (ENV['ES_MAJOR_VERSION'] || '6').to_i
33
+ end
34
+
35
+ # Realtime GET by id. Returns [http_status, parsed_body].
36
+ # Elasticsearch 7.x+ dropped custom mapping types, so documents are addressed
37
+ # via the "_doc" endpoint instead of the original type name.
38
+ def es_get(index, type, id)
39
+ type_path = es_major_version >= 7 ? '_doc' : type
40
+ res = Net::HTTP.get_response(URI("#{es_base}/#{index}/#{type_path}/#{id}"))
41
+ [res.code.to_i, (JSON.parse(res.body) rescue {})]
42
+ end
43
+
44
+ # Drop an index so a test starts from a clean slate (ignores "not found").
45
+ def es_delete_index(index)
46
+ uri = URI("#{es_base}/#{index}")
47
+ Net::HTTP.start(uri.host, uri.port) { |h| h.request(Net::HTTP::Delete.new(uri)) }
48
+ rescue StandardError
49
+ # ignore: the index may not exist yet
50
+ end
51
+
52
+ # Poll a condition until it becomes truthy, otherwise fail with a timeout.
53
+ def wait_until(description, timeout: 90, log_path: nil)
54
+ deadline = Time.now + timeout
55
+ loop do
56
+ return if yield
57
+ fail_with("timeout (#{timeout}s) waiting for: #{description}", log_path) if Time.now > deadline
58
+ sleep 1
59
+ end
60
+ end
61
+
62
+ def fail_with(message, log_path = nil)
63
+ warn "\n[E2E] FAILED: #{message}"
64
+ if log_path && File.exist?(log_path)
65
+ warn "\n----- fluentd.log (tail) -----"
66
+ warn File.readlines(log_path).last(60).join
67
+ warn "------------------------------"
68
+ end
69
+ exit 1
70
+ end
71
+
72
+ def step(message)
73
+ puts "[E2E] #{message}"
74
+ end
75
+
76
+ # Boot fluentd as a child process. Returns the pid; logs go to log_path.
77
+ def spawn_fluentd(conf, log_path)
78
+ log = File.open(log_path, 'w')
79
+ Process.spawn(
80
+ 'bundle', 'exec', 'fluentd',
81
+ '-c', conf,
82
+ '-p', File.join(ROOT, 'lib', 'fluent', 'plugin'),
83
+ '--no-supervisor',
84
+ out: log, err: log, chdir: ROOT
85
+ )
86
+ end
87
+
88
+ def stop_fluentd(pid)
89
+ Process.kill('TERM', pid)
90
+ Process.wait(pid)
91
+ rescue Errno::ESRCH, Errno::ECHILD
92
+ # already gone
93
+ end
94
+ end
@@ -0,0 +1,30 @@
1
+ # E2E pipeline for the multi-table input plugin.
2
+ # in_mysql_replicator_multi reads replication settings from the management
3
+ # database (replicator_manager.settings) and persists per-row hashes into
4
+ # replicator_manager.hash_tables. See test/e2e/replication_multi_e2e_test.rb.
5
+ #
6
+ # bulk_insert_count is set to 1 so hash_tables is flushed immediately on each
7
+ # insert, which keeps the test timing deterministic.
8
+ <source>
9
+ @type mysql_replicator_multi
10
+ manager_host "#{ENV['MYSQL_HOST'] || '127.0.0.1'}"
11
+ manager_port "#{ENV['MYSQL_PORT'] || '3306'}"
12
+ manager_username "#{ENV['MYSQL_USER'] || 'root'}"
13
+ manager_password "#{ENV['MYSQL_PASSWORD'] || 'root'}"
14
+ manager_database "#{ENV['MYSQL_MANAGER_DATABASE'] || 'replicator_manager'}"
15
+ tag multiindex.multitype.${event}.${primary_key}
16
+ bulk_insert_count 1
17
+ bulk_insert_timeout 2
18
+ </source>
19
+
20
+ <match multiindex.**>
21
+ @type mysql_replicator_elasticsearch
22
+ host "#{ENV['ES_HOST'] || '127.0.0.1'}"
23
+ port "#{ENV['ES_PORT'] || '9200'}"
24
+ <buffer>
25
+ @type memory
26
+ flush_mode interval
27
+ flush_interval 1s
28
+ flush_thread_count 1
29
+ </buffer>
30
+ </match>
@@ -0,0 +1,28 @@
1
+ # E2E pipeline: MySQL (in_mysql_replicator) -> Elasticsearch (out_mysql_replicator_elasticsearch)
2
+ # Connection details are injected via environment variables so the same config
3
+ # works both in CI and locally. See test/e2e/replication_e2e_test.rb.
4
+ <source>
5
+ @type mysql_replicator
6
+ host "#{ENV['MYSQL_HOST'] || '127.0.0.1'}"
7
+ port "#{ENV['MYSQL_PORT'] || '3306'}"
8
+ username "#{ENV['MYSQL_USER'] || 'root'}"
9
+ password "#{ENV['MYSQL_PASSWORD'] || 'root'}"
10
+ database "#{ENV['MYSQL_DATABASE'] || 'e2e_source'}"
11
+ query SELECT id, name, age FROM users
12
+ primary_key id
13
+ interval 2s
14
+ enable_delete true
15
+ tag myindex.mytype.${event}.${primary_key}
16
+ </source>
17
+
18
+ <match myindex.**>
19
+ @type mysql_replicator_elasticsearch
20
+ host "#{ENV['ES_HOST'] || '127.0.0.1'}"
21
+ port "#{ENV['ES_PORT'] || '9200'}"
22
+ <buffer>
23
+ @type memory
24
+ flush_mode interval
25
+ flush_interval 1s
26
+ flush_thread_count 1
27
+ </buffer>
28
+ </match>
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ # End-to-end test for the multi-table input plugin (in_mysql_replicator_multi).
3
+ #
4
+ # Unlike the single plugin, this one is driven by a management database:
5
+ # - replicator_manager.settings : one row per replication job
6
+ # - replicator_manager.hash_tables: persisted per-row hashes (delete detection)
7
+ #
8
+ # This test builds that management DB from setup_mysql_replicator_multi.sql,
9
+ # registers a settings row pointing at a source table, boots Fluentd wiring
10
+ # in_mysql_replicator_multi -> out_mysql_replicator_elasticsearch, and asserts
11
+ # that INSERT / UPDATE / DELETE propagate to Elasticsearch AND that the
12
+ # hash_tables state is maintained.
13
+ #
14
+ # Delete detection in the multi plugin is gap-based against hash_tables, so the
15
+ # test seeds three rows and deletes the *middle* one (id=2) to exercise it.
16
+ require_relative 'e2e_helper'
17
+ include E2E
18
+
19
+ CONF = File.join(ROOT, 'test', 'e2e', 'fluent_multi.conf')
20
+ LOG_PATH = File.join(ROOT, 'test', 'e2e', 'fluentd_multi.log')
21
+ SETUP_SQL = File.join(ROOT, 'setup_mysql_replicator_multi.sql')
22
+
23
+ SOURCE_DB = ENV['MYSQL_DATABASE'] || 'e2e_source'
24
+ MANAGER_DB = ENV['MYSQL_MANAGER_DATABASE'] || 'replicator_manager'
25
+ SETTING_NAME = 'users_to_es'
26
+ INDEX = 'multiindex'
27
+ TYPE = 'multitype'
28
+
29
+ def hash_table_count(client, pk: nil)
30
+ where = "setting_name = '#{SETTING_NAME}'"
31
+ where += " AND setting_query_pk = #{pk}" if pk
32
+ client.query("SELECT COUNT(*) AS c FROM `#{MANAGER_DB}`.hash_tables WHERE #{where}").first['c']
33
+ end
34
+
35
+ # --- 1. Seed the source database --------------------------------------------
36
+ step "seeding MySQL source database '#{SOURCE_DB}'"
37
+ es_delete_index(INDEX)
38
+ client = Mysql2::Client.new(mysql_config)
39
+ client.query("DROP DATABASE IF EXISTS `#{SOURCE_DB}`")
40
+ client.query("CREATE DATABASE `#{SOURCE_DB}`")
41
+ client.query("USE `#{SOURCE_DB}`")
42
+ client.query(<<~SQL)
43
+ CREATE TABLE users (
44
+ id INT NOT NULL AUTO_INCREMENT,
45
+ name VARCHAR(255) NOT NULL,
46
+ age INT NOT NULL,
47
+ PRIMARY KEY (id)
48
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8
49
+ SQL
50
+ client.query("INSERT INTO users (name, age) VALUES ('alice', 20), ('bob', 30), ('carol', 40)")
51
+
52
+ # --- 2. Build the management database and register a replication setting ----
53
+ step "building management database '#{MANAGER_DB}' from setup SQL"
54
+ client.query("DROP DATABASE IF EXISTS `#{MANAGER_DB}`")
55
+ File.read(SETUP_SQL).split(/;\s*$/).map(&:strip).reject(&:empty?).each do |stmt|
56
+ client.query(stmt)
57
+ end
58
+
59
+ step "registering settings row '#{SETTING_NAME}'"
60
+ cfg = mysql_config
61
+ client.query(<<~SQL)
62
+ INSERT INTO `#{MANAGER_DB}`.settings
63
+ (is_active, name, host, port, username, password, `database`,
64
+ query, prepared_query, `interval`, primary_key, enable_delete)
65
+ VALUES
66
+ (1, '#{SETTING_NAME}', '#{cfg[:host]}', #{cfg[:port]},
67
+ '#{cfg[:username]}', '#{client.escape(cfg[:password].to_s)}', '#{SOURCE_DB}',
68
+ 'SELECT id, name, age FROM users ORDER BY id', '', 2, 'id', 1)
69
+ SQL
70
+
71
+ # --- 3. Boot Fluentd --------------------------------------------------------
72
+ step "starting Fluentd (#{CONF})"
73
+ fluentd_pid = spawn_fluentd(CONF, LOG_PATH)
74
+
75
+ begin
76
+ # --- 4. INSERT is replicated to Elasticsearch -----------------------------
77
+ step "asserting INSERT replication"
78
+ {1 => 'alice', 2 => 'bob', 3 => 'carol'}.each do |id, name|
79
+ wait_until("user #{id} (#{name}) indexed in Elasticsearch", log_path: LOG_PATH) do
80
+ code, body = es_get(INDEX, TYPE, id)
81
+ code == 200 && body.dig('_source', 'name') == name
82
+ end
83
+ end
84
+ step " INSERT OK"
85
+
86
+ # --- 5. hash_tables state is persisted ------------------------------------
87
+ step "asserting hash_tables persistence"
88
+ wait_until("3 rows recorded in hash_tables for '#{SETTING_NAME}'", log_path: LOG_PATH) do
89
+ hash_table_count(client) == 3
90
+ end
91
+ step " hash_tables OK"
92
+
93
+ # --- 6. UPDATE is replicated ----------------------------------------------
94
+ step "asserting UPDATE replication"
95
+ client.query("UPDATE `#{SOURCE_DB}`.users SET age = 21 WHERE id = 1")
96
+ wait_until("user 1 age updated to 21 in Elasticsearch", log_path: LOG_PATH) do
97
+ code, body = es_get(INDEX, TYPE, 1)
98
+ code == 200 && body.dig('_source', 'age') == 21
99
+ end
100
+ step " UPDATE OK"
101
+
102
+ # --- 7. DELETE (middle id) is replicated ----------------------------------
103
+ step "asserting DELETE replication (middle id=2)"
104
+ client.query("DELETE FROM `#{SOURCE_DB}`.users WHERE id = 2")
105
+ wait_until("user 2 removed from Elasticsearch", log_path: LOG_PATH) do
106
+ code, _ = es_get(INDEX, TYPE, 2)
107
+ code == 404
108
+ end
109
+ wait_until("hash_tables entry for id=2 removed", log_path: LOG_PATH) do
110
+ hash_table_count(client, pk: 2) == 0
111
+ end
112
+ # Surviving rows must remain.
113
+ code1, = es_get(INDEX, TYPE, 1)
114
+ code3, = es_get(INDEX, TYPE, 3)
115
+ fail_with("surviving rows were unexpectedly removed (id1=#{code1}, id3=#{code3})", LOG_PATH) unless code1 == 200 && code3 == 200
116
+ step " DELETE OK"
117
+
118
+ puts "\n[E2E] PASSED (multi): insert/update/delete replicated and hash_tables maintained"
119
+ ensure
120
+ step "stopping Fluentd"
121
+ stop_fluentd(fluentd_pid)
122
+ end
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+ # End-to-end test for the single-table input plugin (in_mysql_replicator).
3
+ #
4
+ # It seeds a source table in MySQL, boots a real Fluentd process wiring
5
+ # in_mysql_replicator -> out_mysql_replicator_elasticsearch (fluent_single.conf),
6
+ # and asserts that INSERT / UPDATE / DELETE on MySQL are replicated to
7
+ # Elasticsearch.
8
+ require_relative 'e2e_helper'
9
+ include E2E
10
+
11
+ CONF = File.join(ROOT, 'test', 'e2e', 'fluent_single.conf')
12
+ LOG_PATH = File.join(ROOT, 'test', 'e2e', 'fluentd_single.log')
13
+
14
+ DB = ENV['MYSQL_DATABASE'] || 'e2e_source'
15
+ INDEX = 'myindex'
16
+ TYPE = 'mytype'
17
+
18
+ # --- 1. Seed the source database --------------------------------------------
19
+ step "seeding MySQL source database '#{DB}'"
20
+ es_delete_index(INDEX)
21
+ client = Mysql2::Client.new(mysql_config)
22
+ client.query("DROP DATABASE IF EXISTS `#{DB}`")
23
+ client.query("CREATE DATABASE `#{DB}`")
24
+ client.query("USE `#{DB}`")
25
+ client.query(<<~SQL)
26
+ CREATE TABLE users (
27
+ id INT NOT NULL AUTO_INCREMENT,
28
+ name VARCHAR(255) NOT NULL,
29
+ age INT NOT NULL,
30
+ PRIMARY KEY (id)
31
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8
32
+ SQL
33
+ client.query("INSERT INTO users (name, age) VALUES ('alice', 20), ('bob', 30)")
34
+
35
+ # --- 2. Boot Fluentd --------------------------------------------------------
36
+ step "starting Fluentd (#{CONF})"
37
+ fluentd_pid = spawn_fluentd(CONF, LOG_PATH)
38
+
39
+ begin
40
+ # --- 3. INSERT is replicated ----------------------------------------------
41
+ step "asserting INSERT replication"
42
+ wait_until("user 1 (alice) indexed in Elasticsearch", log_path: LOG_PATH) do
43
+ code, body = es_get(INDEX, TYPE, 1)
44
+ code == 200 && body.dig('_source', 'name') == 'alice'
45
+ end
46
+ wait_until("user 2 (bob) indexed in Elasticsearch", log_path: LOG_PATH) do
47
+ code, body = es_get(INDEX, TYPE, 2)
48
+ code == 200 && body.dig('_source', 'name') == 'bob'
49
+ end
50
+ step " INSERT OK"
51
+
52
+ # --- 4. UPDATE is replicated ----------------------------------------------
53
+ step "asserting UPDATE replication"
54
+ client.query("UPDATE users SET age = 21 WHERE id = 1")
55
+ wait_until("user 1 age updated to 21 in Elasticsearch", log_path: LOG_PATH) do
56
+ code, body = es_get(INDEX, TYPE, 1)
57
+ code == 200 && body.dig('_source', 'age') == 21
58
+ end
59
+ step " UPDATE OK"
60
+
61
+ # --- 5. DELETE is replicated ----------------------------------------------
62
+ step "asserting DELETE replication"
63
+ client.query("DELETE FROM users WHERE id = 2")
64
+ wait_until("user 2 removed from Elasticsearch", log_path: LOG_PATH) do
65
+ code, _ = es_get(INDEX, TYPE, 2)
66
+ code == 404
67
+ end
68
+ step " DELETE OK"
69
+
70
+ puts "\n[E2E] PASSED (single): insert/update/delete replicated MySQL -> Elasticsearch"
71
+ ensure
72
+ step "stopping Fluentd"
73
+ stop_fluentd(fluentd_pid)
74
+ end
@@ -21,7 +21,18 @@ class MysqlReplicatorElasticsearchOutput < Test::Unit::TestCase
21
21
  {'age' => 26, 'request_id' => '42'}
22
22
  end
23
23
 
24
+ # The plugin detects the Elasticsearch version on the first write via GET /,
25
+ # so stub that endpoint (defaulting to 6.x, which keeps "_type").
26
+ def stub_elastic_version(url, version="6.8.23")
27
+ stub_request(:get, url).to_return(
28
+ :status => 200,
29
+ :headers => {"Content-Type" => "application/json"},
30
+ :body => %({"version":{"number":"#{version}"}})
31
+ )
32
+ end
33
+
24
34
  def stub_elastic(url="http://localhost:9200/_bulk")
35
+ stub_elastic_version(url.sub('/_bulk', '/'))
25
36
  stub_request(:post, url).with do |req|
26
37
  @content_type = req.headers["Content-Type"]
27
38
  @index_cmds = req.body.split("\n").map {|r| JSON.parse(r) }
@@ -29,6 +40,7 @@ class MysqlReplicatorElasticsearchOutput < Test::Unit::TestCase
29
40
  end
30
41
 
31
42
  def stub_elastic_unavailable(url="http://localhost:9200/_bulk")
43
+ stub_elastic_version(url.sub('/_bulk', '/'))
32
44
  stub_request(:post, url).to_return(:status => [503, "Service Unavailable"])
33
45
  end
34
46
 
@@ -58,6 +70,16 @@ class MysqlReplicatorElasticsearchOutput < Test::Unit::TestCase
58
70
  assert_equal('mytype', index_cmds.first['index']['_type'])
59
71
  end
60
72
 
73
+ def test_auto_detects_es8_and_omits_type
74
+ stub_elastic
75
+ # Override the version endpoint to report Elasticsearch 8.x.
76
+ stub_elastic_version("http://localhost:9200/", "8.18.0")
77
+ driver.run(default_tag: @tag) do
78
+ driver.feed(sample_record)
79
+ end
80
+ assert(!index_cmds.first['index'].has_key?('_type'))
81
+ end
82
+
61
83
  def test_writes_to_speficied_host
62
84
  driver.configure("host 192.168.33.50\n")
63
85
  elastic_request = stub_elastic("http://192.168.33.50:9200/_bulk")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-mysql-replicator
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kentaro Yoshida
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-27 00:00:00.000000000 Z
11
+ date: 2026-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -107,8 +107,11 @@ executables: []
107
107
  extensions: []
108
108
  extra_rdoc_files: []
109
109
  files:
110
+ - ".devcontainer/Dockerfile"
111
+ - ".devcontainer/devcontainer.json"
112
+ - ".dockerignore"
113
+ - ".github/workflows/ci.yml"
110
114
  - ".gitignore"
111
- - ".travis.yml"
112
115
  - Gemfile
113
116
  - LICENSE
114
117
  - README.md
@@ -125,6 +128,11 @@ files:
125
128
  - lib/fluent/plugin/out_mysql_replicator_elasticsearch.rb
126
129
  - lib/fluent/plugin/out_mysql_replicator_solr.rb
127
130
  - setup_mysql_replicator_multi.sql
131
+ - test/e2e/e2e_helper.rb
132
+ - test/e2e/fluent_multi.conf
133
+ - test/e2e/fluent_single.conf
134
+ - test/e2e/replication_multi_e2e_test.rb
135
+ - test/e2e/replication_single_e2e_test.rb
128
136
  - test/helper.rb
129
137
  - test/plugin/test_in_mysql_replicator.rb
130
138
  - test/plugin/test_in_mysql_replicator_multi.rb
@@ -149,13 +157,18 @@ required_rubygems_version: !ruby/object:Gem::Requirement
149
157
  - !ruby/object:Gem::Version
150
158
  version: '0'
151
159
  requirements: []
152
- rubygems_version: 3.1.6
160
+ rubygems_version: 3.0.3.1
153
161
  signing_key:
154
162
  specification_version: 4
155
163
  summary: Fluentd input plugin to track insert/update/delete event from MySQL database
156
164
  server. Not only that, it could multiple table replication and generate nested document
157
165
  for Elasticsearch/Solr. It's comming support replicate to another RDB/noSQL.
158
166
  test_files:
167
+ - test/e2e/e2e_helper.rb
168
+ - test/e2e/fluent_multi.conf
169
+ - test/e2e/fluent_single.conf
170
+ - test/e2e/replication_multi_e2e_test.rb
171
+ - test/e2e/replication_single_e2e_test.rb
159
172
  - test/helper.rb
160
173
  - test/plugin/test_in_mysql_replicator.rb
161
174
  - test/plugin/test_in_mysql_replicator_multi.rb
data/.travis.yml DELETED
@@ -1,18 +0,0 @@
1
- language: ruby
2
-
3
- rvm:
4
- - 2.4.0
5
- - 2.3.3
6
- - 2.2
7
- - 2.1
8
-
9
- services:
10
- - mysql
11
- - elasticsearch
12
-
13
- before_install:
14
- - gem update bundler
15
-
16
- before_script:
17
- - mysql < setup_mysql_replicator_multi.sql
18
- - curl https://raw.github.com/moliware/travis-solr/master/travis-solr.sh | SOLR_VERSION=4.6.1 bash