fluent-plugin-mysql-replicator 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/README.md +47 -0
- data/fluent-plugin-mysql-replicator.gemspec +1 -1
- data/lib/fluent/plugin/in_mysql_replicator.rb +19 -9
- data/lib/fluent/plugin/out_mysql_replicator_elasticsearch.rb +22 -5
- data/test/plugin/test_in_mysql_replicator.rb +26 -0
- data/test/plugin/test_out_mysql_replicator_elasticsearch.rb +26 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 50b2ac9d97e49b3febf1c20fc98c815b3d6a919d299a24511a72b8b1637d5585
|
|
4
|
+
data.tar.gz: f39b735a34afd31a77f5e82339c449265dd4ef455456a85e2902dab91db79130
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d57b725ee936f2ffc8ae55631e75759c68de863022cb5e5740be87eae6a756b9ddc5a531825bb603a02f6a7f57752ce75d389ff3fd777ba86e5126f83f392710
|
|
7
|
+
data.tar.gz: 8b644659039eae41a70dbe27f206673fb1420372321c00af802925c52b7d8fe3b5abb880d0aaf059521ceb80fbd414c1a41a7b4fef6c6dd8fd2a05ef746fdf05
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,18 @@ All notable changes to this project are documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.3.0] - 2026-06-16
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Date-based Elasticsearch index names. If the index-name segment of the tag
|
|
12
|
+
contains `strftime` tokens (e.g. `%Y%m%d`), they are expanded using the
|
|
13
|
+
record's event time, enabling Logstash-style dated indices such as
|
|
14
|
+
`myindex-20180831`. Index names without a `%` are unchanged. ([#27])
|
|
15
|
+
- Composite primary key support in `mysql_replicator`. `primary_key` now accepts
|
|
16
|
+
a comma-separated list of columns; the combination is used for change
|
|
17
|
+
detection and as the Elasticsearch document `_id` (values joined by `,`). A
|
|
18
|
+
single-column key behaves exactly as before. ([#7])
|
|
19
|
+
|
|
8
20
|
## [1.2.0] - 2026-06-16
|
|
9
21
|
|
|
10
22
|
### Added
|
|
@@ -73,6 +85,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
73
85
|
- First 1.0 release, targeting the Fluentd v0.14+ plugin API. Earlier 0.x
|
|
74
86
|
history is available in the git log.
|
|
75
87
|
|
|
88
|
+
[1.3.0]: https://github.com/y-ken/fluent-plugin-mysql-replicator/compare/v1.2.0...v1.3.0
|
|
76
89
|
[1.2.0]: https://github.com/y-ken/fluent-plugin-mysql-replicator/compare/v1.1.0...v1.2.0
|
|
77
90
|
[1.1.0]: https://github.com/y-ken/fluent-plugin-mysql-replicator/compare/v1.0.3...v1.1.0
|
|
78
91
|
[1.0.3]: https://github.com/y-ken/fluent-plugin-mysql-replicator/compare/v1.0.2...v1.0.3
|
|
@@ -81,7 +94,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
81
94
|
[1.0.0]: https://github.com/y-ken/fluent-plugin-mysql-replicator/releases/tag/v1.0.0
|
|
82
95
|
|
|
83
96
|
[#4]: https://github.com/y-ken/fluent-plugin-mysql-replicator/issues/4
|
|
97
|
+
[#7]: https://github.com/y-ken/fluent-plugin-mysql-replicator/pull/7
|
|
84
98
|
[#18]: https://github.com/y-ken/fluent-plugin-mysql-replicator/pull/18
|
|
99
|
+
[#27]: https://github.com/y-ken/fluent-plugin-mysql-replicator/issues/27
|
|
85
100
|
[#39]: https://github.com/y-ken/fluent-plugin-mysql-replicator/pull/39
|
|
86
101
|
[#40]: https://github.com/y-ken/fluent-plugin-mysql-replicator/issues/40
|
|
87
102
|
[#42]: https://github.com/y-ken/fluent-plugin-mysql-replicator/issues/42
|
data/README.md
CHANGED
|
@@ -122,6 +122,53 @@ Notes:
|
|
|
122
122
|
* Malformed JSON and non-string values are left untouched, so enabling the option
|
|
123
123
|
never corrupts non-JSON data.
|
|
124
124
|
|
|
125
|
+
## Date-based index names
|
|
126
|
+
|
|
127
|
+
`mysql_replicator_elasticsearch` resolves the target index name from the tag
|
|
128
|
+
(via `tag_format`). If that index-name segment contains `strftime` tokens such
|
|
129
|
+
as `%Y%m%d`, they are expanded using the record's event time, so you can create
|
|
130
|
+
Logstash-style dated indices like `myindex-20180831`.
|
|
131
|
+
|
|
132
|
+
Put the tokens in the index-name part of the input plugin's `tag` (the segment
|
|
133
|
+
must not contain `.`, so use `%Y%m%d` or `%Y-%m-%d`):
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
<source>
|
|
137
|
+
@type mysql_replicator
|
|
138
|
+
# ...
|
|
139
|
+
tag myindex-%Y%m%d.mytype.${event}.${primary_key}
|
|
140
|
+
</source>
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Index names that contain no `%` are left unchanged, so this is fully backward
|
|
144
|
+
compatible.
|
|
145
|
+
|
|
146
|
+
> **Note on deletions:** delete events target the index computed from the delete
|
|
147
|
+
> event's own time, so date-rotated indices are best suited to insert-only data
|
|
148
|
+
> (a record inserted on a previous day lives in that day's index).
|
|
149
|
+
|
|
150
|
+
## Composite primary keys
|
|
151
|
+
|
|
152
|
+
`primary_key` accepts a comma-separated list of columns, so tables keyed by more
|
|
153
|
+
than one column are supported:
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
<source>
|
|
157
|
+
@type mysql_replicator
|
|
158
|
+
# ...
|
|
159
|
+
query SELECT tenant_id, id, name FROM items
|
|
160
|
+
primary_key tenant_id,id
|
|
161
|
+
</source>
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Change detection (insert/update/delete) then keys on the combination of those
|
|
165
|
+
columns, and the Elasticsearch document `_id` becomes their values joined by `,`
|
|
166
|
+
(e.g. `10,7`). A single-column `primary_key` (the default `id`) behaves exactly
|
|
167
|
+
as before.
|
|
168
|
+
|
|
169
|
+
This applies to `mysql_replicator`; `mysql_replicator_multi` still expects a
|
|
170
|
+
single-column primary key.
|
|
171
|
+
|
|
125
172
|
## Output example
|
|
126
173
|
|
|
127
174
|
It is a example when detecting insert/update/delete events.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
|
2
2
|
Gem::Specification.new do |s|
|
|
3
3
|
s.name = "fluent-plugin-mysql-replicator"
|
|
4
|
-
s.version = "1.
|
|
4
|
+
s.version = "1.3.0"
|
|
5
5
|
s.authors = ["Kentaro Yoshida"]
|
|
6
6
|
s.email = ["y.ken.studio@gmail.com"]
|
|
7
7
|
s.homepage = "https://github.com/y-ken/fluent-plugin-mysql-replicator"
|
|
@@ -17,7 +17,10 @@ module Fluent::Plugin
|
|
|
17
17
|
config_param :encoding, :string, :default => 'utf8'
|
|
18
18
|
config_param :query, :string
|
|
19
19
|
config_param :prepared_query, :string, :default => nil
|
|
20
|
-
|
|
20
|
+
# A single column name, or a comma-separated list for a composite key
|
|
21
|
+
# (e.g. "tenant_id,id"). The id used for change detection and the
|
|
22
|
+
# Elasticsearch document _id is the combination of these columns.
|
|
23
|
+
config_param :primary_key, :array, :default => ['id']
|
|
21
24
|
config_param :interval, :string, :default => '1m'
|
|
22
25
|
config_param :enable_delete, :bool, :default => true
|
|
23
26
|
# Comma-separated column names whose MySQL JSON values should be parsed into
|
|
@@ -73,7 +76,8 @@ module Fluent::Plugin
|
|
|
73
76
|
end
|
|
74
77
|
rows, con = query(@query, con)
|
|
75
78
|
rows.each do |row|
|
|
76
|
-
|
|
79
|
+
id = extract_id(row)
|
|
80
|
+
current_ids << id
|
|
77
81
|
current_hash = Digest::SHA1.hexdigest(row.flatten.join)
|
|
78
82
|
row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
|
|
79
83
|
parse_json_columns!(row, @json_columns)
|
|
@@ -86,18 +90,18 @@ module Fluent::Plugin
|
|
|
86
90
|
end
|
|
87
91
|
prepared_con.close
|
|
88
92
|
end
|
|
89
|
-
if
|
|
90
|
-
log.error "mysql_replicator: missing primary_key. :tag=>#{tag} :primary_key=>#{primary_key}"
|
|
93
|
+
if id.any?(&:nil?)
|
|
94
|
+
log.error "mysql_replicator: missing primary_key. :tag=>#{tag} :primary_key=>#{@primary_key.join(',')} :id=>#{id}"
|
|
91
95
|
break
|
|
92
96
|
end
|
|
93
|
-
if !table_hash.include?(
|
|
97
|
+
if !table_hash.include?(id)
|
|
94
98
|
tag = format_tag(@tag, {:event => :insert})
|
|
95
99
|
emit_record(tag, row)
|
|
96
|
-
elsif table_hash[
|
|
100
|
+
elsif table_hash[id] != current_hash
|
|
97
101
|
tag = format_tag(@tag, {:event => :update})
|
|
98
102
|
emit_record(tag, row)
|
|
99
103
|
end
|
|
100
|
-
table_hash[
|
|
104
|
+
table_hash[id] = current_hash
|
|
101
105
|
rows_count += 1
|
|
102
106
|
end
|
|
103
107
|
con.close
|
|
@@ -108,7 +112,7 @@ module Fluent::Plugin
|
|
|
108
112
|
hash_delete_by_list(table_hash, deleted_ids)
|
|
109
113
|
deleted_ids.each do |id|
|
|
110
114
|
tag = format_tag(@tag, {:event => :delete})
|
|
111
|
-
emit_record(tag,
|
|
115
|
+
emit_record(tag, Hash[@primary_key.zip(id)])
|
|
112
116
|
end
|
|
113
117
|
end
|
|
114
118
|
end
|
|
@@ -122,6 +126,12 @@ module Fluent::Plugin
|
|
|
122
126
|
deleted_keys.each{|k| hash.delete(k)}
|
|
123
127
|
end
|
|
124
128
|
|
|
129
|
+
# A row's id is the array of its primary-key column values, supporting
|
|
130
|
+
# composite keys. It is a single-element array for a single-column key.
|
|
131
|
+
def extract_id(row)
|
|
132
|
+
@primary_key.map {|col| row[col] }
|
|
133
|
+
end
|
|
134
|
+
|
|
125
135
|
# Returns the primary keys that disappeared since the previous poll.
|
|
126
136
|
#
|
|
127
137
|
# The first poll only establishes a baseline: there is no previous snapshot
|
|
@@ -152,7 +162,7 @@ module Fluent::Plugin
|
|
|
152
162
|
end
|
|
153
163
|
|
|
154
164
|
def format_tag(tag, param)
|
|
155
|
-
pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key}
|
|
165
|
+
pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key.join(',')}
|
|
156
166
|
tag.gsub(/(\${[a-z_]+})/) do
|
|
157
167
|
log.warn "mysql_replicator: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
|
|
158
168
|
pattern[$1]
|
|
@@ -66,20 +66,20 @@ class Fluent::Plugin::MysqlReplicatorElasticsearchOutput < Fluent::Plugin::Outpu
|
|
|
66
66
|
|
|
67
67
|
chunk.msgpack_each do |tag, time, record|
|
|
68
68
|
tag_parts = tag.match(@tag_format)
|
|
69
|
-
target_index = tag_parts['index_name']
|
|
69
|
+
target_index = resolve_index_name(tag_parts['index_name'], time)
|
|
70
70
|
target_type = tag_parts['type_name']
|
|
71
|
-
|
|
71
|
+
id_keys = tag_parts['primary_key'].to_s.split(',')
|
|
72
72
|
|
|
73
73
|
if tag_parts['event'] == 'delete'
|
|
74
|
-
action = {"_index" => target_index, "_id" => record
|
|
74
|
+
action = {"_index" => target_index, "_id" => join_id(record, id_keys)}
|
|
75
75
|
action['_type'] = target_type unless @suppress_type
|
|
76
76
|
meta = { "delete" => action }
|
|
77
77
|
bulk_message << Yajl::Encoder.encode(meta)
|
|
78
78
|
else
|
|
79
79
|
action = {"_index" => target_index}
|
|
80
80
|
action['_type'] = target_type unless @suppress_type
|
|
81
|
-
if
|
|
82
|
-
action['_id'] = record
|
|
81
|
+
if !id_keys.empty? && id_keys.all? {|k| !record[k].nil? }
|
|
82
|
+
action['_id'] = join_id(record, id_keys)
|
|
83
83
|
end
|
|
84
84
|
meta = { "index" => action }
|
|
85
85
|
bulk_message << Yajl::Encoder.encode(meta)
|
|
@@ -105,6 +105,23 @@ class Fluent::Plugin::MysqlReplicatorElasticsearchOutput < Fluent::Plugin::Outpu
|
|
|
105
105
|
http
|
|
106
106
|
end
|
|
107
107
|
|
|
108
|
+
# Build the document _id from one or more primary-key columns. A single key
|
|
109
|
+
# yields its value; a composite key yields the values joined by ",".
|
|
110
|
+
def join_id(record, id_keys)
|
|
111
|
+
id_keys.map {|k| record[k] }.join(',')
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Expand strftime tokens (e.g. "%Y%m%d") in the index name using the record's
|
|
115
|
+
# event time, enabling date-based indices such as "myindex-20180831". Index
|
|
116
|
+
# names without a "%" are returned unchanged.
|
|
117
|
+
def resolve_index_name(index_name, time)
|
|
118
|
+
return index_name unless index_name && index_name.include?('%')
|
|
119
|
+
Time.at(time.to_i).strftime(index_name)
|
|
120
|
+
rescue => e
|
|
121
|
+
log.warn "mysql_replicator_elasticsearch: failed to expand index name '#{index_name}': #{e.message}"
|
|
122
|
+
index_name
|
|
123
|
+
end
|
|
124
|
+
|
|
108
125
|
# Mapping types were removed in Elasticsearch 8.x and deprecated in 7.x.
|
|
109
126
|
# Detect the major version once and omit "_type" for 7.x and later.
|
|
110
127
|
def detect_type_suppression
|
|
@@ -137,4 +137,30 @@ class MysqlReplicatorInputTest < Test::Unit::TestCase
|
|
|
137
137
|
assert_false nested?(12345)
|
|
138
138
|
assert_false nested?(nil)
|
|
139
139
|
end
|
|
140
|
+
|
|
141
|
+
# --- #7: composite primary key support ---
|
|
142
|
+
|
|
143
|
+
def composite_driver
|
|
144
|
+
create_driver(%[
|
|
145
|
+
tag input.mysql
|
|
146
|
+
query SELECT tenant_id, id, text from t
|
|
147
|
+
primary_key tenant_id,id
|
|
148
|
+
])
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def test_primary_key_defaults_to_id_array
|
|
152
|
+
assert_equal ['id'], create_driver.instance.primary_key
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def test_primary_key_parses_composite_list
|
|
156
|
+
assert_equal ['tenant_id', 'id'], composite_driver.instance.primary_key
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def test_extract_id_single_key_is_one_element_array
|
|
160
|
+
assert_equal [7], create_driver.instance.extract_id({'id' => 7, 'text' => 'x'})
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def test_extract_id_returns_composite_values
|
|
164
|
+
assert_equal [10, 7], composite_driver.instance.extract_id({'tenant_id' => 10, 'id' => 7, 'text' => 'x'})
|
|
165
|
+
end
|
|
140
166
|
end
|
|
@@ -61,6 +61,32 @@ class MysqlReplicatorElasticsearchOutput < Test::Unit::TestCase
|
|
|
61
61
|
assert_equal('myindex', index_cmds.first['index']['_index'])
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
+
def test_expands_strftime_tokens_in_index_name
|
|
65
|
+
stub_elastic
|
|
66
|
+
time = Time.utc(2018, 8, 31, 12, 0, 0).to_i
|
|
67
|
+
driver.run(default_tag: 'myindex-%Y%m%d.mytype.insert.id') do
|
|
68
|
+
driver.feed(time, sample_record)
|
|
69
|
+
end
|
|
70
|
+
expected = "myindex-#{Time.at(time).strftime('%Y%m%d')}"
|
|
71
|
+
assert_equal(expected, index_cmds.first['index']['_index'])
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def test_index_name_without_token_is_unchanged
|
|
75
|
+
stub_elastic
|
|
76
|
+
driver.run(default_tag: 'plainindex.mytype.insert.id') do
|
|
77
|
+
driver.feed(sample_record)
|
|
78
|
+
end
|
|
79
|
+
assert_equal('plainindex', index_cmds.first['index']['_index'])
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def test_composite_primary_key_builds_joined_id
|
|
83
|
+
stub_elastic
|
|
84
|
+
driver.run(default_tag: 'myindex.mytype.insert.tenant_id,id') do
|
|
85
|
+
driver.feed({'tenant_id' => 10, 'id' => 7, 'text' => 'x'})
|
|
86
|
+
end
|
|
87
|
+
assert_equal('10,7', index_cmds.first['index']['_id'])
|
|
88
|
+
end
|
|
89
|
+
|
|
64
90
|
def test_writes_to_speficied_type
|
|
65
91
|
driver.configure("type_name mytype\n")
|
|
66
92
|
stub_elastic
|