gamma_replication 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +162 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/gamma_replication +34 -0
- data/gamma_replication.gemspec +40 -0
- data/lib/gamma_replication/command/base_replication.rb +147 -0
- data/lib/gamma_replication/command/dryrun.rb +21 -0
- data/lib/gamma_replication/command/start.rb +29 -0
- data/lib/gamma_replication/command.rb +34 -0
- data/lib/gamma_replication/database_connector/mysql_connector.rb +37 -0
- data/lib/gamma_replication/database_connector.rb +6 -0
- data/lib/gamma_replication/database_settings.rb +22 -0
- data/lib/gamma_replication/hook.rb +65 -0
- data/lib/gamma_replication/maxwell_client.rb +34 -0
- data/lib/gamma_replication/parser/data_parser.rb +175 -0
- data/lib/gamma_replication/table.rb +28 -0
- data/lib/gamma_replication/version.rb +5 -0
- data/lib/gamma_replication.rb +20 -0
- metadata +151 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 2c388f3c2fc5c5b7b1bad0f7b2c6d9f249883979710b2f81b1f405a5557ec201
|
4
|
+
data.tar.gz: 78d48d17c39c15b838c6e2a4a842473dda65711eaa8f6e21ae11de8314c4d945
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7615b3b2e9cc4fdf11139303436fbdf906c291a477ff8d09263a9773615f2bfeb7a742c093eb262138f79ff93d9aac148ceaa425c105a7653852e10d1dfc8580
|
7
|
+
data.tar.gz: 57bc2df77112be468ed5a07361b2e45230fca972ef71d973a00f8aa4b99adafb968f3095275fb03f5d9be3bd2a8be540141460892f0f9f468a79369e15c2a1cc
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2025 Shinsuke Nishio
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
# GammaReplication
|
2
|
+
|
3
|
+
GammaReplication is a tool that reads MySQL binlog using [Maxwell's Daemon](https://github.com/zendesk/maxwell) and replicates data to another MySQL database while masking sensitive information.
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
- Real-time replication using MySQL binlog
|
8
|
+
- Column-level data masking
|
9
|
+
- Flexible hook system for custom data transformation
|
10
|
+
- Dry-run mode for operation verification
|
11
|
+
|
12
|
+
## Requirements
|
13
|
+
|
14
|
+
- Ruby 3.0.0 or higher
|
15
|
+
- MySQL 5.7 or higher
|
16
|
+
- Maxwell's Daemon
|
17
|
+
- Java 8 or higher (for Maxwell's Daemon)
|
18
|
+
|
19
|
+
## Directory Structure
|
20
|
+
|
21
|
+
The tool expects Maxwell's Daemon to be available in the same directory:
|
22
|
+
|
23
|
+
```
|
24
|
+
your_project/
|
25
|
+
├── maxwell/
|
26
|
+
│ └── bin/
|
27
|
+
│ └── maxwell
|
28
|
+
└── your_application_files
|
29
|
+
```
|
30
|
+
|
31
|
+
## Installation
|
32
|
+
|
33
|
+
```bash
|
34
|
+
gem install gamma_replication
|
35
|
+
```
|
36
|
+
|
37
|
+
Or add this line to your application's Gemfile:
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
gem 'gamma_replication'
|
41
|
+
```
|
42
|
+
|
43
|
+
## Setup
|
44
|
+
|
45
|
+
1. Set up Maxwell's Daemon:
|
46
|
+
```bash
|
47
|
+
# Download Maxwell's Daemon
|
48
|
+
wget https://github.com/zendesk/maxwell/releases/download/v1.42.2/maxwell-1.42.2.tar.gz
|
49
|
+
tar xvf maxwell-1.42.2.tar.gz
|
50
|
+
mv maxwell-1.42.2 maxwell
|
51
|
+
|
52
|
+
# The maxwell executable will be available at maxwell/bin/maxwell
|
53
|
+
```
|
54
|
+
|
55
|
+
2. Create configuration files:
|
56
|
+
|
57
|
+
```bash
|
58
|
+
bin/setup
|
59
|
+
```
|
60
|
+
|
61
|
+
This command will create the following files:
|
62
|
+
- `config.properties`: Maxwell configuration
|
63
|
+
- `settings.yml`: Database connection settings
|
64
|
+
- `data.yml`: Table and masking configuration
|
65
|
+
- `hooks/`: Masking scripts
|
66
|
+
|
67
|
+
3. Configure MySQL:
|
68
|
+
- Enable binlog in your MySQL configuration:
|
69
|
+
```ini
|
70
|
+
[mysqld]
|
71
|
+
server-id=1
|
72
|
+
log-bin=master
|
73
|
+
binlog_format=row
|
74
|
+
```
|
75
|
+
- Create a user with replication privileges:
|
76
|
+
```sql
|
77
|
+
CREATE USER 'maxwell'@'%' IDENTIFIED BY 'maxwell';
|
78
|
+
GRANT ALL ON maxwell.* TO 'maxwell'@'%';
|
79
|
+
GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE ON *.* TO 'maxwell'@'%';
|
80
|
+
```
|
81
|
+
|
82
|
+
4. Edit configuration files:
|
83
|
+
|
84
|
+
### settings.yml
|
85
|
+
```yaml
|
86
|
+
in_database_config:
|
87
|
+
host: localhost
|
88
|
+
port: 3306
|
89
|
+
username: repl_user
|
90
|
+
password: password
|
91
|
+
database: source_db
|
92
|
+
|
93
|
+
out_database_config:
|
94
|
+
host: localhost
|
95
|
+
port: 3306
|
96
|
+
username: root
|
97
|
+
password: password
|
98
|
+
database: target_db
|
99
|
+
```
|
100
|
+
|
101
|
+
### data.yml
|
102
|
+
```yaml
|
103
|
+
- data:
|
104
|
+
table: "users"
|
105
|
+
hooks:
|
106
|
+
- column:
|
107
|
+
name:
|
108
|
+
- "email"
|
109
|
+
scripts:
|
110
|
+
- "hooks/mask_email.rb"
|
111
|
+
- column:
|
112
|
+
name:
|
113
|
+
- "phone_number"
|
114
|
+
scripts:
|
115
|
+
- "hooks/mask_phone_number.rb"
|
116
|
+
```
|
117
|
+
|
118
|
+
## Usage
|
119
|
+
|
120
|
+
### Start Replication
|
121
|
+
|
122
|
+
```bash
|
123
|
+
gamma_replication start -s settings.yml -d data.yml -m config.properties
|
124
|
+
```
|
125
|
+
|
126
|
+
### Dry Run (Check SQL)
|
127
|
+
|
128
|
+
```bash
|
129
|
+
gamma_replication dryrun -s settings.yml -d data.yml -m config.properties
|
130
|
+
```
|
131
|
+
|
132
|
+
## Custom Masking
|
133
|
+
|
134
|
+
Create Ruby scripts in the `hooks/` directory to implement custom masking logic:
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
class MaskEmail
|
138
|
+
def execute(apply, column, value)
|
139
|
+
return value unless apply
|
140
|
+
"masked_#{value}"
|
141
|
+
end
|
142
|
+
end
|
143
|
+
```
|
144
|
+
|
145
|
+
## Development
|
146
|
+
|
147
|
+
1. Clone the repository
|
148
|
+
2. Run `bin/setup` to install dependencies
|
149
|
+
3. Run `rake spec` to run the tests
|
150
|
+
4. Run `bin/console` for an interactive prompt
|
151
|
+
|
152
|
+
## License
|
153
|
+
|
154
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
|
155
|
+
|
156
|
+
## Contributing
|
157
|
+
|
158
|
+
1. Fork it
|
159
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
160
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
161
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
162
|
+
5. Create a new Pull Request
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "gamma_replication"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
require "irb"
|
11
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift File.expand_path("../lib", __dir__)
|
5
|
+
require "rubygems"
|
6
|
+
require "gamma_replication"
|
7
|
+
require "thor"
|
8
|
+
|
9
|
+
class GammaReplicationCLI < Thor
|
10
|
+
desc "start", "Start replication using Maxwell"
|
11
|
+
option :settings, aliases: "-s", desc: "Database Settings yaml", required: true
|
12
|
+
option :data, aliases: "-d", desc: "Table Sync Settings yaml", required: true
|
13
|
+
option :hook_dir, aliases: "-h", desc: "Hook script directory", default: "."
|
14
|
+
option :maxwell_config, aliases: "-m", desc: "Maxwell configuration file path", required: true
|
15
|
+
def start
|
16
|
+
GammaReplication::Command::Start.new(options).execute
|
17
|
+
end
|
18
|
+
|
19
|
+
desc "dryrun", "Run replication in dry-run mode (output SQL to STDOUT)"
|
20
|
+
option :settings, aliases: "-s", desc: "Database Settings yaml", required: true
|
21
|
+
option :data, aliases: "-d", desc: "Table Sync Settings yaml", required: true
|
22
|
+
option :hook_dir, aliases: "-h", desc: "Hook script directory", default: "."
|
23
|
+
option :maxwell_config, aliases: "-m", desc: "Maxwell configuration file path", required: true
|
24
|
+
def dryrun
|
25
|
+
GammaReplication::Command::Dryrun.new(options).execute
|
26
|
+
end
|
27
|
+
|
28
|
+
desc "version", "version"
|
29
|
+
def version
|
30
|
+
puts GammaReplication::VERSION
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
GammaReplicationCLI.start(ARGV)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/gamma_replication/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "gamma_replication"
|
7
|
+
spec.version = GammaReplication::VERSION
|
8
|
+
spec.authors = ["Shinsuke Nishio"]
|
9
|
+
spec.email = ["nishio@densan-labs.net"]
|
10
|
+
|
11
|
+
spec.summary = "MySQL replication tool with data masking capability"
|
12
|
+
spec.description = "A tool to replicate MySQL data with the ability to mask sensitive information using Maxwell's Daemon"
|
13
|
+
spec.homepage = "https://github.com/nishio-dens/gamma_replication"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = ">= 3.0.0"
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
19
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
spec.files = Dir.glob(%w[
|
23
|
+
lib/**/*.rb
|
24
|
+
bin/*
|
25
|
+
*.gemspec
|
26
|
+
README.md
|
27
|
+
LICENSE.txt
|
28
|
+
])
|
29
|
+
spec.bindir = "exe"
|
30
|
+
spec.executables = ["gamma_replication"]
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_dependency "activesupport", ">= 7.0"
|
34
|
+
spec.add_dependency "mysql2", "~> 0.5.5"
|
35
|
+
spec.add_dependency "thor", "~> 1.3"
|
36
|
+
|
37
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
38
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
39
|
+
spec.add_development_dependency "rubocop", "~> 1.21"
|
40
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
module Command
|
5
|
+
class BaseReplication < Base
|
6
|
+
def initialize(opts)
|
7
|
+
super()
|
8
|
+
setup_database(opts)
|
9
|
+
setup_parser(opts)
|
10
|
+
setup_maxwell(opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def execute
|
14
|
+
tables = @data_parser.gamma_tables
|
15
|
+
output_setting_warning(tables)
|
16
|
+
|
17
|
+
@table_settings = tables.each_with_object({}) do |table, hash|
|
18
|
+
hash[table.table_name] = table
|
19
|
+
end
|
20
|
+
before_start if respond_to?(:before_start)
|
21
|
+
@maxwell_client.start do |data|
|
22
|
+
process_maxwell_data(data)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def setup_database(opts)
|
29
|
+
@database_settings = GammaReplication::DatabaseSettings.new(opts[:settings])
|
30
|
+
@in_client = GammaReplication::DatabaseConnector::MysqlConnector.new(@database_settings.in_database)
|
31
|
+
@out_client = GammaReplication::DatabaseConnector::MysqlConnector.new(@database_settings.out_database)
|
32
|
+
end
|
33
|
+
|
34
|
+
def setup_parser(opts)
|
35
|
+
@hook_root_dir = opts[:hook_dir] || "."
|
36
|
+
@data_parser = GammaReplication::Parser::DataParser.new(opts[:data], @hook_root_dir, @in_client, @out_client,
|
37
|
+
apply: apply_mode?)
|
38
|
+
end
|
39
|
+
|
40
|
+
def setup_maxwell(opts)
|
41
|
+
@maxwell_client = GammaReplication::MaxwellClient.new(
|
42
|
+
config_path: opts[:maxwell_config] || "config.properties"
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
def process_maxwell_data(data)
|
47
|
+
return unless should_process_data?(data)
|
48
|
+
|
49
|
+
process_data_by_type(data)
|
50
|
+
rescue StandardError => e
|
51
|
+
logger.error(e)
|
52
|
+
end
|
53
|
+
|
54
|
+
def should_process_data?(data)
|
55
|
+
table_name = data["table"]
|
56
|
+
table_setting = @table_settings[table_name]
|
57
|
+
return false unless table_setting
|
58
|
+
return false if @database_settings.in_database["database"] != data["database"]
|
59
|
+
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
63
|
+
def process_data_by_type(data)
|
64
|
+
table_setting = @table_settings[data["table"]]
|
65
|
+
case data["type"]
|
66
|
+
when "insert"
|
67
|
+
process_insert(table_setting, data)
|
68
|
+
when "update"
|
69
|
+
process_update(table_setting, data)
|
70
|
+
when "delete"
|
71
|
+
process_delete(table_setting, data)
|
72
|
+
end
|
73
|
+
rescue StandardError => e
|
74
|
+
logger.error(e)
|
75
|
+
# Nothing
|
76
|
+
end
|
77
|
+
|
78
|
+
def process_insert(table_setting, data)
|
79
|
+
record = data["data"]
|
80
|
+
processed_record = apply_hooks(table_setting, record)
|
81
|
+
|
82
|
+
columns = processed_record.keys
|
83
|
+
values = processed_record.values.map { |v| format_value(v) }
|
84
|
+
|
85
|
+
query = "INSERT INTO #{table_setting.table_name} (#{columns.join(",")}) VALUES (#{values.join(",")})"
|
86
|
+
execute_query(query)
|
87
|
+
end
|
88
|
+
|
89
|
+
def process_update(table_setting, data)
|
90
|
+
record = data["data"]
|
91
|
+
old_record = data["old"]
|
92
|
+
processed_record = apply_hooks(table_setting, record)
|
93
|
+
|
94
|
+
set_clause = processed_record.map { |k, v| "#{k} = #{format_value(v)}" }.join(",")
|
95
|
+
where_clause = build_where_clause(old_record, record, table_setting.primary_key)
|
96
|
+
|
97
|
+
query = "UPDATE #{table_setting.table_name} SET #{set_clause} WHERE #{where_clause}"
|
98
|
+
execute_query(query)
|
99
|
+
end
|
100
|
+
|
101
|
+
def process_delete(table_setting, data)
|
102
|
+
old_record = data["old"]
|
103
|
+
where_clause = build_where_clause(old_record, nil, table_setting.primary_key)
|
104
|
+
|
105
|
+
query = "DELETE FROM #{table_setting.table_name} WHERE #{where_clause}"
|
106
|
+
execute_query(query)
|
107
|
+
end
|
108
|
+
|
109
|
+
def apply_hooks(table_setting, record)
|
110
|
+
result = record.dup
|
111
|
+
table_setting.hooks.each do |hook|
|
112
|
+
result = hook.execute_script(result)
|
113
|
+
end
|
114
|
+
result
|
115
|
+
end
|
116
|
+
|
117
|
+
def build_where_clause(old_record, new_record, primary_key)
|
118
|
+
if old_record.present? && old_record[primary_key].present?
|
119
|
+
"#{primary_key} = #{format_value(old_record[primary_key])}"
|
120
|
+
else
|
121
|
+
"#{primary_key} = #{format_value(new_record[primary_key])}"
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def format_value(value)
|
126
|
+
case value
|
127
|
+
when nil
|
128
|
+
"NULL"
|
129
|
+
when Numeric
|
130
|
+
value.to_s
|
131
|
+
when Time
|
132
|
+
"'#{value.strftime("%Y-%m-%d %H:%M:%S")}'"
|
133
|
+
else
|
134
|
+
"'#{@out_client.client.escape(value.to_s)}'"
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def apply_mode?
|
139
|
+
raise NotImplementedError, "#{self.class} must implement #apply_mode?"
|
140
|
+
end
|
141
|
+
|
142
|
+
def execute_query(query)
|
143
|
+
raise NotImplementedError, "#{self.class} must implement #execute_query"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
module Command
|
5
|
+
class Dryrun < BaseReplication
|
6
|
+
def before_start
|
7
|
+
logger.info("Starting DryRun mode...")
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def apply_mode?
|
13
|
+
false
|
14
|
+
end
|
15
|
+
|
16
|
+
def execute_query(query)
|
17
|
+
logger.info("DryRun: #{query}")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
module Command
|
5
|
+
class Start < BaseReplication
|
6
|
+
def initialize(*)
|
7
|
+
super
|
8
|
+
@out_client.client.query("SET FOREIGN_KEY_CHECKS = 0")
|
9
|
+
end
|
10
|
+
|
11
|
+
def apply_mode?
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute_query(query)
|
16
|
+
logger.info("Executing: #{query}") if ENV["DEBUG"]
|
17
|
+
@out_client.client.query(query)
|
18
|
+
rescue StandardError => e
|
19
|
+
logger.error("Query execution failed: #{e.message}")
|
20
|
+
logger.error(e.backtrace.join("\n"))
|
21
|
+
end
|
22
|
+
|
23
|
+
def finalize
|
24
|
+
@out_client.client.query("SET FOREIGN_KEY_CHECKS = 1")
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
module Command
|
5
|
+
class Base
|
6
|
+
def gamma_tables(_in_client, _out_client, data_parser)
|
7
|
+
data_parser.gamma_tables
|
8
|
+
end
|
9
|
+
|
10
|
+
def output_setting_warning(tables)
|
11
|
+
find_duplicate_tables(tables).each do |table_name|
|
12
|
+
log_duplicate_table_warning(table_name)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def find_duplicate_tables(tables)
|
19
|
+
tables
|
20
|
+
.group_by(&:table_name)
|
21
|
+
.select { |_, group| group.size > 1 }
|
22
|
+
.keys
|
23
|
+
end
|
24
|
+
|
25
|
+
def log_duplicate_table_warning(table_name)
|
26
|
+
logger.warn("Table *#{table_name}* settings are duplicated. Please review your data settings.")
|
27
|
+
end
|
28
|
+
|
29
|
+
def logger
|
30
|
+
@logger ||= Logger.new($stdout)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mysql2"
|
4
|
+
|
5
|
+
module GammaReplication
|
6
|
+
module DatabaseConnector
|
7
|
+
class MysqlConnector
|
8
|
+
DEFAULT_PORT = 3306
|
9
|
+
|
10
|
+
attr_reader :config
|
11
|
+
|
12
|
+
def initialize(config)
|
13
|
+
@config = config
|
14
|
+
end
|
15
|
+
|
16
|
+
def client(database_name = @config[:database])
|
17
|
+
@client ||= Mysql2::Client.new(
|
18
|
+
host: @config[:host],
|
19
|
+
port: @config[:port] || DEFAULT_PORT,
|
20
|
+
username: @config[:username],
|
21
|
+
password: @config[:password] || "",
|
22
|
+
database: database_name
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def schema_client
|
27
|
+
@schema_client ||= Mysql2::Client.new(
|
28
|
+
host: @config[:host],
|
29
|
+
port: @config[:port] || DEFAULT_PORT,
|
30
|
+
username: @config[:username],
|
31
|
+
password: @config[:password] || "",
|
32
|
+
database: "information_schema"
|
33
|
+
)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
class DatabaseSettings
|
5
|
+
attr_reader :settings, :in_database, :out_database
|
6
|
+
|
7
|
+
def initialize(yaml_path)
|
8
|
+
@settings = YAML.safe_load_file(yaml_path, permitted_classes: [Symbol, Hash],
|
9
|
+
symbolize_names: true).with_indifferent_access
|
10
|
+
@in_database = @settings[:in_database_config]
|
11
|
+
@out_database = @settings[:out_database_config]
|
12
|
+
end
|
13
|
+
|
14
|
+
def in_database_config
|
15
|
+
@in_database
|
16
|
+
end
|
17
|
+
|
18
|
+
def out_database_config
|
19
|
+
@out_database
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
class Hook
|
5
|
+
attr_accessor :hook_type, :column_name, :script_path, :root_dir, :apply
|
6
|
+
|
7
|
+
def execute_script(record)
|
8
|
+
validate_script_exists
|
9
|
+
result = record.dup
|
10
|
+
load_script_file
|
11
|
+
execute_hook(result)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def validate_script_exists
|
17
|
+
path = script_file_path
|
18
|
+
raise "Hook Scripts Not Found. path: #{path}" unless File.exist?(path)
|
19
|
+
end
|
20
|
+
|
21
|
+
def script_file_path
|
22
|
+
File.join(root_dir, script_path)
|
23
|
+
end
|
24
|
+
|
25
|
+
def load_script_file
|
26
|
+
load script_file_path
|
27
|
+
end
|
28
|
+
|
29
|
+
def execute_hook(record)
|
30
|
+
instance = create_hook_instance
|
31
|
+
process_hook(instance, record)
|
32
|
+
rescue StandardError
|
33
|
+
raise "Invalid Hook Class #{hook_class_name}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def create_hook_instance
|
37
|
+
hook_class_name.constantize.new
|
38
|
+
end
|
39
|
+
|
40
|
+
def hook_class_name
|
41
|
+
File.basename(script_file_path, ".*").camelize
|
42
|
+
end
|
43
|
+
|
44
|
+
def process_hook(instance, record)
|
45
|
+
case hook_type.to_s
|
46
|
+
when "column"
|
47
|
+
process_column_hook(instance, record)
|
48
|
+
when "row"
|
49
|
+
process_row_hook(instance, record)
|
50
|
+
else
|
51
|
+
raise "Invalid hook type: #{hook_type}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def process_column_hook(instance, record)
|
56
|
+
column = column_name.to_s
|
57
|
+
record[column] = instance.execute(apply, column, record[column])
|
58
|
+
record
|
59
|
+
end
|
60
|
+
|
61
|
+
def process_row_hook(instance, record)
|
62
|
+
instance.execute(apply, record)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "open3"
|
5
|
+
|
6
|
+
module GammaReplication
|
7
|
+
class MaxwellClient
|
8
|
+
attr_reader :config_path, :maxwell_path
|
9
|
+
|
10
|
+
def initialize(config_path:, maxwell_path: "./maxwell")
|
11
|
+
@config_path = config_path
|
12
|
+
@maxwell_path = maxwell_path
|
13
|
+
end
|
14
|
+
|
15
|
+
def start(&block)
|
16
|
+
cmd = "#{maxwell_path}/bin/maxwell --config #{config_path}"
|
17
|
+
|
18
|
+
IO.popen(cmd) do |io|
|
19
|
+
io.each do |line|
|
20
|
+
data = JSON.parse(line.strip)
|
21
|
+
block.call(data) if block_given?
|
22
|
+
rescue JSON::ParserError => e
|
23
|
+
logger.error("Failed to parse Maxwell output: #{e.message}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def logger
|
31
|
+
@logger ||= Logger.new($stdout)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
module Parser
|
5
|
+
class DataParser
|
6
|
+
def initialize(data_yaml_path, hook_root_dir, in_client, out_client, apply: false)
|
7
|
+
@data_settings = YAML.load_file(data_yaml_path).map(&:with_indifferent_access)
|
8
|
+
@hook_root_dir = hook_root_dir
|
9
|
+
@in_client = in_client
|
10
|
+
@out_client = out_client
|
11
|
+
@apply = apply
|
12
|
+
end
|
13
|
+
|
14
|
+
def gamma_tables
|
15
|
+
exist_tables = database_exist_tables
|
16
|
+
@data_settings.flat_map { |d| parse_data_settings(d[:data], exist_tables) }
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def parse_data_settings(data, exist_tables)
|
22
|
+
tables = find_target_tables(data, exist_tables)
|
23
|
+
tables.map { |t| apply_table_settings(t, data) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def find_target_tables(data, exist_tables)
|
27
|
+
if Array(data[:table]).join == "*"
|
28
|
+
without = Array(data[:table_without]) || []
|
29
|
+
exist_tables.reject { |v| without.include?(v.table_name) }
|
30
|
+
else
|
31
|
+
Array(data[:table]).map do |table_name|
|
32
|
+
exist_tables.find { |t| t.table_name == table_name }
|
33
|
+
end.compact
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def apply_table_settings(table, data)
|
38
|
+
table.tap do |t|
|
39
|
+
t.hooks = data[:hooks].present? ? parse_hooks(data[:hooks], t) : []
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def database_exist_tables
|
44
|
+
in_tables = select_table_definitions(@in_client)
|
45
|
+
out_tables = select_table_definitions(@out_client)
|
46
|
+
|
47
|
+
(in_tables + out_tables).uniq.map do |table|
|
48
|
+
build_table_info(table, in_tables, out_tables)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_table_info(table, in_tables, out_tables)
|
53
|
+
GammaReplication::Table.new.tap do |t|
|
54
|
+
t.table_name = table
|
55
|
+
t.in_exist = in_tables.include?(table)
|
56
|
+
t.out_exist = out_tables.include?(table)
|
57
|
+
t.in_exist_columns = select_column_definitions(@in_client, table)
|
58
|
+
t.out_exist_columns = select_column_definitions(@out_client, table)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def select_table_definitions(client)
|
63
|
+
query = build_table_query(client)
|
64
|
+
client.schema_client.query(query).to_a.map { |v| v["TABLE_NAME"] }
|
65
|
+
end
|
66
|
+
|
67
|
+
def build_table_query(client)
|
68
|
+
database = client.schema_client.escape(client.config[:database])
|
69
|
+
<<~SQL
|
70
|
+
SELECT
|
71
|
+
*
|
72
|
+
FROM
|
73
|
+
TABLES
|
74
|
+
INNER JOIN
|
75
|
+
COLLATION_CHARACTER_SET_APPLICABILITY CCSA
|
76
|
+
ON
|
77
|
+
TABLES.TABLE_COLLATION = CCSA.COLLATION_NAME
|
78
|
+
WHERE
|
79
|
+
TABLE_SCHEMA = '#{database}'
|
80
|
+
ORDER BY
|
81
|
+
TABLE_NAME
|
82
|
+
SQL
|
83
|
+
end
|
84
|
+
|
85
|
+
def select_column_definitions(client, table_name)
|
86
|
+
query = build_column_query(client, table_name)
|
87
|
+
client.schema_client.query(query).to_a.map { |v| v["COLUMN_NAME"] }
|
88
|
+
end
|
89
|
+
|
90
|
+
def build_column_query(client, table_name)
|
91
|
+
database = client.schema_client.escape(client.config[:database])
|
92
|
+
escaped_table = client.schema_client.escape(table_name)
|
93
|
+
<<~SQL
|
94
|
+
SELECT
|
95
|
+
*
|
96
|
+
FROM
|
97
|
+
COLUMNS
|
98
|
+
WHERE
|
99
|
+
TABLE_SCHEMA = '#{database}'
|
100
|
+
AND TABLE_NAME = '#{escaped_table}'
|
101
|
+
ORDER BY
|
102
|
+
TABLE_NAME, ORDINAL_POSITION
|
103
|
+
SQL
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse_hooks(hooks, table)
|
107
|
+
hooks = Array(hooks)
|
108
|
+
hooks.flat_map do |hook|
|
109
|
+
type = determine_hook_type(hook)
|
110
|
+
create_hooks_by_type(type, hook, table)
|
111
|
+
end.compact
|
112
|
+
end
|
113
|
+
|
114
|
+
def determine_hook_type(hook)
|
115
|
+
if hook[:row].present?
|
116
|
+
:row
|
117
|
+
elsif hook[:column].present?
|
118
|
+
:column
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def create_hooks_by_type(type, hook, table)
|
123
|
+
case type
|
124
|
+
when :row
|
125
|
+
create_row_hooks(hook[:row], table)
|
126
|
+
when :column
|
127
|
+
create_column_hooks(hook[:column], table)
|
128
|
+
else
|
129
|
+
raise "Unknown Hook Type"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def create_row_hooks(options, table)
|
134
|
+
validate_row_hook_options!(options, table)
|
135
|
+
Array(options[:scripts]).map do |script|
|
136
|
+
build_hook(:row, nil, script)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def create_column_hooks(options, table)
|
141
|
+
validate_column_hook_options!(options, table)
|
142
|
+
column_names = Array(options[:name])
|
143
|
+
scripts = Array(options[:scripts])
|
144
|
+
column_names.product(scripts).map do |column_name, script|
|
145
|
+
build_hook(:column, column_name, script)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def build_hook(type, column_name, script)
|
150
|
+
GammaReplication::Hook.new.tap do |h|
|
151
|
+
h.hook_type = type
|
152
|
+
h.column_name = column_name
|
153
|
+
h.script_path = script
|
154
|
+
h.root_dir = @hook_root_dir
|
155
|
+
h.apply = @apply
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def validate_row_hook_options!(options, table)
|
160
|
+
return if options[:scripts].present?
|
161
|
+
|
162
|
+
raise "Required scripts arguments. table: #{table.table_name}, hook_type: row"
|
163
|
+
end
|
164
|
+
|
165
|
+
def validate_column_hook_options!(options, table)
|
166
|
+
unless options[:name].present?
|
167
|
+
raise "Required column name arguments. table: #{table.table_name}, hook_type: column"
|
168
|
+
end
|
169
|
+
return if options[:scripts].present?
|
170
|
+
|
171
|
+
raise "Required scripts arguments. table: #{table.table_name}, hook_type: column"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module GammaReplication
|
4
|
+
class Table
|
5
|
+
DEFAULT_PRIMARY_KEY = "id"
|
6
|
+
|
7
|
+
attr_accessor :table_name, :hooks,
|
8
|
+
:in_exist, :out_exist, :in_exist_columns, :out_exist_columns
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@hooks = []
|
12
|
+
@in_exist_columns = []
|
13
|
+
@out_exist_columns = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def record_value(record)
|
17
|
+
result = record.dup
|
18
|
+
hooks.each do |hook|
|
19
|
+
result = hook.execute_script(result)
|
20
|
+
end
|
21
|
+
result
|
22
|
+
end
|
23
|
+
|
24
|
+
def primary_key
|
25
|
+
DEFAULT_PRIMARY_KEY
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "active_support/all"
|
4
|
+
require "yaml"
|
5
|
+
require_relative "gamma_replication/version"
|
6
|
+
require_relative "gamma_replication/database_settings"
|
7
|
+
require_relative "gamma_replication/hook"
|
8
|
+
require_relative "gamma_replication/table"
|
9
|
+
require_relative "gamma_replication/database_connector"
|
10
|
+
require_relative "gamma_replication/database_connector/mysql_connector"
|
11
|
+
require_relative "gamma_replication/command"
|
12
|
+
require_relative "gamma_replication/command/base_replication"
|
13
|
+
require_relative "gamma_replication/command/start"
|
14
|
+
require_relative "gamma_replication/command/dryrun"
|
15
|
+
require_relative "gamma_replication/parser/data_parser"
|
16
|
+
require_relative "gamma_replication/maxwell_client"
|
17
|
+
|
18
|
+
module GammaReplication
|
19
|
+
class Error < StandardError; end
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gamma_replication
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Shinsuke Nishio
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-01-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '7.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '7.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mysql2
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.5.5
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.5.5
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: thor
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '13.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '13.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.21'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.21'
|
97
|
+
description: A tool to replicate MySQL data with the ability to mask sensitive information
|
98
|
+
using Maxwell's Daemon
|
99
|
+
email:
|
100
|
+
- nishio@densan-labs.net
|
101
|
+
executables:
|
102
|
+
- gamma_replication
|
103
|
+
extensions: []
|
104
|
+
extra_rdoc_files: []
|
105
|
+
files:
|
106
|
+
- LICENSE.txt
|
107
|
+
- README.md
|
108
|
+
- bin/console
|
109
|
+
- bin/setup
|
110
|
+
- exe/gamma_replication
|
111
|
+
- gamma_replication.gemspec
|
112
|
+
- lib/gamma_replication.rb
|
113
|
+
- lib/gamma_replication/command.rb
|
114
|
+
- lib/gamma_replication/command/base_replication.rb
|
115
|
+
- lib/gamma_replication/command/dryrun.rb
|
116
|
+
- lib/gamma_replication/command/start.rb
|
117
|
+
- lib/gamma_replication/database_connector.rb
|
118
|
+
- lib/gamma_replication/database_connector/mysql_connector.rb
|
119
|
+
- lib/gamma_replication/database_settings.rb
|
120
|
+
- lib/gamma_replication/hook.rb
|
121
|
+
- lib/gamma_replication/maxwell_client.rb
|
122
|
+
- lib/gamma_replication/parser/data_parser.rb
|
123
|
+
- lib/gamma_replication/table.rb
|
124
|
+
- lib/gamma_replication/version.rb
|
125
|
+
homepage: https://github.com/nishio-dens/gamma_replication
|
126
|
+
licenses:
|
127
|
+
- MIT
|
128
|
+
metadata:
|
129
|
+
homepage_uri: https://github.com/nishio-dens/gamma_replication
|
130
|
+
source_code_uri: https://github.com/nishio-dens/gamma_replication
|
131
|
+
changelog_uri: https://github.com/nishio-dens/gamma_replication/blob/main/CHANGELOG.md
|
132
|
+
post_install_message:
|
133
|
+
rdoc_options: []
|
134
|
+
require_paths:
|
135
|
+
- lib
|
136
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: 3.0.0
|
141
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
requirements: []
|
147
|
+
rubygems_version: 3.5.16
|
148
|
+
signing_key:
|
149
|
+
specification_version: 4
|
150
|
+
summary: MySQL replication tool with data masking capability
|
151
|
+
test_files: []
|