whodunit-chronicles 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,138 @@
1
+ name: whodunit-chronicles
2
+
3
+ # ──────────────────────────────────────────────────────────────────────────────
4
+ # Local development + CI test databases
5
+ #
6
+ # Quickstart:
7
+ # docker compose up -d
8
+ # bundle exec rake test
9
+ #
10
+ # Stop + wipe volumes:
11
+ # docker compose down -v
12
+ # ──────────────────────────────────────────────────────────────────────────────
13
+
14
+ services:
15
+ # ── PostgreSQL (logical replication enabled) ─────────────────────────────
16
+ postgres:
17
+ image: postgres:16-alpine
18
+ environment:
19
+ POSTGRES_USER: chronicles
20
+ POSTGRES_PASSWORD: chronicles
21
+ POSTGRES_DB: chronicles_test
22
+ ports:
23
+ - "${POSTGRES_PORT:-5432}:5432"
24
+ command: >
25
+ postgres
26
+ -c wal_level=logical
27
+ -c max_replication_slots=10
28
+ -c max_wal_senders=10
29
+ -c log_replication_commands=on
30
+ volumes:
31
+ - pg_data:/var/lib/postgresql/data
32
+ - ./docker/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
33
+ healthcheck:
34
+ test: ["CMD-SHELL", "pg_isready -U chronicles -d chronicles_test"]
35
+ interval: 5s
36
+ timeout: 5s
37
+ retries: 10
38
+ start_period: 10s
39
+
40
+ # Separate audit database (mirrors production split)
41
+ postgres_audit:
42
+ image: postgres:16-alpine
43
+ environment:
44
+ POSTGRES_USER: chronicles
45
+ POSTGRES_PASSWORD: chronicles
46
+ POSTGRES_DB: chronicles_audit_test
47
+ ports:
48
+ - "${POSTGRES_AUDIT_PORT:-5433}:5432"
49
+ volumes:
50
+ - pg_audit_data:/var/lib/postgresql/data
51
+ healthcheck:
52
+ test: ["CMD-SHELL", "pg_isready -U chronicles -d chronicles_audit_test"]
53
+ interval: 5s
54
+ timeout: 5s
55
+ retries: 10
56
+ start_period: 10s
57
+
58
+ # ── MySQL (binary logging enabled) ───────────────────────────────────────
59
+ mysql:
60
+ image: mysql:8.0
61
+ environment:
62
+ MYSQL_ROOT_PASSWORD: chronicles_root
63
+ MYSQL_USER: chronicles
64
+ MYSQL_PASSWORD: chronicles
65
+ MYSQL_DATABASE: chronicles_test
66
+ ports:
67
+ - "3306:3306"
68
+ command: >
69
+ mysqld
70
+ --server-id=1
71
+ --log-bin=mysql-bin
72
+ --binlog-format=ROW
73
+ --binlog-row-image=FULL
74
+ --expire-logs-days=1
75
+ --gtid-mode=ON
76
+ --enforce-gtid-consistency=ON
77
+ volumes:
78
+ - mysql_data:/var/lib/mysql
79
+ - ./docker/mysql/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
80
+ healthcheck:
81
+ test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "chronicles", "-pchronicles"]
82
+ interval: 5s
83
+ timeout: 5s
84
+ retries: 15
85
+ start_period: 30s
86
+
87
+ # Separate audit database for MySQL
88
+ mysql_audit:
89
+ image: mysql:8.0
90
+ environment:
91
+ MYSQL_ROOT_PASSWORD: chronicles_root
92
+ MYSQL_USER: chronicles
93
+ MYSQL_PASSWORD: chronicles
94
+ MYSQL_DATABASE: chronicles_audit_test
95
+ ports:
96
+ - "3307:3306"
97
+ volumes:
98
+ - mysql_audit_data:/var/lib/mysql
99
+ healthcheck:
100
+ test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "chronicles", "-pchronicles"]
101
+ interval: 5s
102
+ timeout: 5s
103
+ retries: 15
104
+ start_period: 30s
105
+
106
+ # ── MariaDB (binary logging enabled) ─────────────────────────────────────
107
+ mariadb:
108
+ image: mariadb:11
109
+ environment:
110
+ MARIADB_ROOT_PASSWORD: chronicles_root
111
+ MARIADB_USER: chronicles
112
+ MARIADB_PASSWORD: chronicles
113
+ MARIADB_DATABASE: chronicles_test
114
+ ports:
115
+ - "3308:3306"
116
+ command: >
117
+ mariadbd
118
+ --server-id=2
119
+ --log-bin=mariadb-bin
120
+ --binlog-format=ROW
121
+ --binlog-row-image=FULL
122
+ --expire-logs-days=1
123
+ volumes:
124
+ - mariadb_data:/var/lib/mysql
125
+ - ./docker/mysql/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
126
+ healthcheck:
127
+ test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"]
128
+ interval: 5s
129
+ timeout: 5s
130
+ retries: 15
131
+ start_period: 30s
132
+
133
+ volumes:
134
+ pg_data:
135
+ pg_audit_data:
136
+ mysql_data:
137
+ mysql_audit_data:
138
+ mariadb_data:
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Whodunit
4
+ module Chronicles
5
+ # Lazily loads the correct database adapter gem at runtime.
6
+ #
7
+ # This avoids forcing all users to install both `pg` and `trilogy`
8
+ # regardless of which database they actually use.
9
+ #
10
+ # @example
11
+ # adapter = AdapterLoader.load(:postgresql)
12
+ # adapter = AdapterLoader.load(:mysql)
13
+ # adapter = AdapterLoader.load(:mariadb)
14
+ module AdapterLoader
15
+ # Map of adapter type symbols to their required gem and class path.
16
+ ADAPTER_REGISTRY = {
17
+ postgresql: {
18
+ gem: 'pg',
19
+ require: 'whodunit/chronicles/adapters/postgresql',
20
+ class: 'Whodunit::Chronicles::Adapters::PostgreSQL',
21
+ hint: "Add `gem 'pg', '~> 1.5'` to your Gemfile.",
22
+ },
23
+ mysql: {
24
+ gem: 'trilogy',
25
+ require: 'whodunit/chronicles/adapters/mysql',
26
+ class: 'Whodunit::Chronicles::Adapters::MySQL',
27
+ hint: "Add `gem 'trilogy', '~> 2.9'` to your Gemfile.",
28
+ },
29
+ mariadb: {
30
+ gem: 'trilogy',
31
+ require: 'whodunit/chronicles/adapters/mysql',
32
+ class: 'Whodunit::Chronicles::Adapters::MySQL',
33
+ hint: "Add `gem 'trilogy', '~> 2.9'` to your Gemfile.",
34
+ },
35
+ }.freeze
36
+
37
+ # Load and instantiate an adapter by type.
38
+ #
39
+ # @param type [Symbol] one of :postgresql, :mysql, :mariadb
40
+ # @param options [Hash] options forwarded to the adapter constructor
41
+ # @return [Adapters::Base] the instantiated adapter
42
+ # @raise [Whodunit::Chronicles::ConfigurationError] for unknown adapter types
43
+ # @raise [Whodunit::Chronicles::AdapterLoadError] when the required gem is missing
44
+ def self.load(type, **)
45
+ config = ADAPTER_REGISTRY[type.to_sym]
46
+
47
+ unless config
48
+ known = ADAPTER_REGISTRY.keys.map(&:inspect).join(', ')
49
+ raise ConfigurationError,
50
+ "Unknown adapter type #{type.inspect}. Known adapters: #{known}"
51
+ end
52
+
53
+ load_gem!(config)
54
+ require config[:require]
55
+ Object.const_get(config[:class]).new(**)
56
+ rescue LoadError => e
57
+ raise AdapterLoadError,
58
+ "Could not load the '#{config[:gem]}' gem required for the " \
59
+ "#{type} adapter.\n#{config[:hint]}\nOriginal error: #{e.message}"
60
+ end
61
+
62
+ # @api private
63
+ def self.load_gem!(config)
64
+ require config[:gem]
65
+ end
66
+ private_class_method :load_gem!
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,261 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'trilogy'
4
+ require 'uri'
5
+
6
+ module Whodunit
7
+ module Chronicles
8
+ module Adapters
9
+ # MySQL/MariaDB binary log streaming adapter
10
+ #
11
+ # Uses MySQL's binary log replication to stream database changes
12
+ # without impacting application performance.
13
+ class MySQL < Chronicles::StreamAdapter
14
+ DEFAULT_SERVER_ID = 1001
15
+
16
+ attr_reader :connection, :database_url, :server_id, :binlog_file, :binlog_position
17
+
18
+ def initialize(
19
+ database_url: Chronicles.config.database_url,
20
+ server_id: DEFAULT_SERVER_ID,
21
+ logger: Chronicles.logger
22
+ )
23
+ super(logger: logger)
24
+ @database_url = database_url
25
+ @server_id = server_id
26
+ @connection = nil
27
+ @binlog_file = nil
28
+ @binlog_position = nil
29
+ @binlog_checksum = true
30
+ end
31
+
32
+ # Start streaming binary log changes
33
+ def start_streaming(&)
34
+ raise ArgumentError, 'Block required for processing events' unless block_given?
35
+
36
+ log(:info, 'Starting MySQL binary log streaming')
37
+
38
+ establish_connection
39
+ ensure_setup
40
+
41
+ self.running = true
42
+ fetch_current_position
43
+
44
+ log(:info, 'Starting replication from position',
45
+ file: @binlog_file, position: @binlog_position)
46
+
47
+ begin
48
+ stream_binlog_events(&)
49
+ rescue StandardError => e
50
+ log(:error, 'Streaming error', error: e.message, backtrace: e.backtrace.first(5))
51
+ raise ReplicationError, "Failed to stream changes: #{e.message}"
52
+ ensure
53
+ self.running = false
54
+ end
55
+ end
56
+
57
+ # Stop streaming
58
+ def stop_streaming
59
+ log(:info, 'Stopping MySQL binary log streaming')
60
+ self.running = false
61
+ close_connection
62
+ end
63
+
64
+ # Get current replication position
65
+ def current_position
66
+ return "#{@binlog_file}:#{@binlog_position}" if @binlog_file && @binlog_position
67
+
68
+ fetch_current_position
69
+ "#{@binlog_file}:#{@binlog_position}"
70
+ end
71
+
72
+ # Set up binary log replication
73
+ def setup
74
+ log(:info, 'Setting up MySQL binary log replication')
75
+
76
+ establish_connection
77
+ validate_binlog_format
78
+ validate_server_id
79
+ enable_binlog_checksum
80
+
81
+ log(:info, 'MySQL setup completed successfully')
82
+ end
83
+
84
+ # Remove binary log replication setup (minimal cleanup needed)
85
+ def teardown
86
+ log(:info, 'Tearing down MySQL binary log replication')
87
+ close_connection
88
+ log(:info, 'MySQL teardown completed')
89
+ end
90
+
91
+ # Test database connection
92
+ def test_connection
93
+ establish_connection
94
+ result = @connection.query('SELECT @@hostname, @@version, @@server_id')
95
+ info = result.first
96
+
97
+ log(:info, 'Connection test successful',
98
+ hostname: info['@@hostname'],
99
+ version: info['@@version'],
100
+ server_id: info['@@server_id'])
101
+
102
+ true
103
+ rescue StandardError => e
104
+ log(:error, 'Connection test failed', error: e.message)
105
+ false
106
+ end
107
+
108
+ private
109
+
110
+ def establish_connection
111
+ return if @connection&.ping
112
+
113
+ parsed_url = parse_database_url(@database_url)
114
+
115
+ @connection = Trilogy.new(
116
+ host: parsed_url[:host],
117
+ port: parsed_url[:port] || 3306,
118
+ username: parsed_url[:username],
119
+ password: parsed_url[:password],
120
+ database: parsed_url[:database],
121
+ ssl: parsed_url[:ssl],
122
+ )
123
+
124
+ log(:debug, 'Established MySQL connection',
125
+ host: parsed_url[:host],
126
+ database: parsed_url[:database])
127
+ rescue StandardError => e
128
+ log(:error, 'Failed to establish connection', error: e.message)
129
+ raise AdapterLoadError, "Connection failed: #{e.message}"
130
+ end
131
+
132
+ def close_connection
133
+ @connection&.close
134
+ @connection = nil
135
+ end
136
+
137
+ def parse_database_url(url)
138
+ uri = URI.parse(url)
139
+ {
140
+ host: uri.host,
141
+ port: uri.port,
142
+ username: uri.user,
143
+ password: uri.password,
144
+ database: uri.path&.sub('/', ''),
145
+ ssl: uri.query&.include?('ssl=true'),
146
+ }
147
+ end
148
+
149
+ def ensure_setup
150
+ validate_binlog_format
151
+ validate_server_id
152
+ end
153
+
154
+ def validate_binlog_format
155
+ result = @connection.query('SELECT @@binlog_format')
156
+ format = result.first['@@binlog_format']
157
+
158
+ unless %w[ROW MIXED].include?(format)
159
+ raise ReplicationError,
160
+ "Binary log format must be ROW or MIXED, currently: #{format}"
161
+ end
162
+
163
+ log(:debug, 'Binary log format validated', format: format)
164
+ end
165
+
166
+ def validate_server_id
167
+ result = @connection.query('SELECT @@server_id')
168
+ current_server_id = result.first['@@server_id'].to_i
169
+
170
+ if current_server_id == @server_id
171
+ raise ReplicationError,
172
+ "Server ID conflict: #{@server_id} is already in use"
173
+ end
174
+
175
+ log(:debug, 'Server ID validated',
176
+ current: current_server_id,
177
+ replication: @server_id)
178
+ end
179
+
180
+ def enable_binlog_checksum
181
+ @connection.query('SET @master_binlog_checksum = @@global.binlog_checksum')
182
+ log(:debug, 'Binary log checksum enabled')
183
+ end
184
+
185
+ def fetch_current_position
186
+ result = @connection.query('SHOW MASTER STATUS')
187
+ status = result.first
188
+
189
+ raise ReplicationError, 'Unable to fetch master status - binary logging may be disabled' unless status
190
+
191
+ @binlog_file = status['File']
192
+ @binlog_position = status['Position']
193
+ log(:debug, 'Fetched master position',
194
+ file: @binlog_file,
195
+ position: @binlog_position)
196
+ end
197
+
198
+ def stream_binlog_events(&)
199
+ # Register as replica server
200
+ register_replica_server
201
+
202
+ # Request binary log dump
203
+ request_binlog_dump
204
+
205
+ # Process binary log events
206
+ process_binlog_stream(&)
207
+ rescue StandardError => e
208
+ log(:error, 'Binary log streaming error', error: e.message)
209
+ raise
210
+ end
211
+
212
+ def register_replica_server
213
+ # This would typically use COM_REGISTER_SLAVE MySQL protocol command
214
+ # For now, we'll use a simplified approach
215
+ log(:debug, 'Registering as replica server', server_id: @server_id)
216
+
217
+ # NOTE: Full implementation would require low-level MySQL protocol handling
218
+ # This is a placeholder for the binary log streaming setup
219
+ end
220
+
221
+ def request_binlog_dump
222
+ log(:debug, 'Requesting binary log dump',
223
+ file: @binlog_file,
224
+ position: @binlog_position)
225
+
226
+ # This would use COM_BINLOG_DUMP MySQL protocol command
227
+ # Full implementation requires binary protocol handling
228
+ end
229
+
230
+ def process_binlog_stream(&)
231
+ # This would process the binary log event stream
232
+ # Each event would be parsed and converted to a ChangeEvent
233
+
234
+ log(:info, 'Processing binary log stream (placeholder implementation)')
235
+
236
+ # Placeholder: In a real implementation, this would:
237
+ # 1. Read binary log events from the stream
238
+ # 2. Parse event headers and data
239
+ # 3. Convert to ChangeEvent objects
240
+ # 4. Yield each event to the block
241
+
242
+ # For now, we'll simulate with a warning
243
+ log(:warn, 'MySQL binary log streaming requires full protocol implementation')
244
+
245
+ # Yield a placeholder change event to demonstrate the interface
246
+ change_event = ChangeEvent.new(
247
+ table_name: 'example_table',
248
+ action: 'INSERT',
249
+ primary_key: { id: 1 },
250
+ new_data: { id: 1, name: 'test' },
251
+ old_data: nil,
252
+ timestamp: Time.now,
253
+ metadata: { position: current_position },
254
+ )
255
+
256
+ yield(change_event) if block_given?
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end
@@ -9,7 +9,7 @@ module Whodunit
9
9
  #
10
10
  # Uses PostgreSQL's logical replication functionality to stream
11
11
  # database changes via WAL decoding without impacting application performance.
12
- class PostgreSQL < StreamAdapter
12
+ class PostgreSQL < Chronicles::StreamAdapter
13
13
  DEFAULT_PLUGIN = 'pgoutput'
14
14
 
15
15
  attr_reader :connection, :replication_connection, :publication_name, :slot_name
@@ -95,8 +95,8 @@ module Whodunit
95
95
  def changes
96
96
  return {} unless update? && old_data && new_data
97
97
 
98
- changed_columns.each_with_object({}) do |column, changes_hash|
99
- changes_hash[column] = [old_data[column], new_data[column]]
98
+ changed_columns.to_h do |column|
99
+ [column, [old_data[column], new_data[column]]]
100
100
  end
101
101
  end
102
102
 
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Whodunit
4
+ module Chronicles
5
+ # Fans out a single change event to multiple processors in sequence.
6
+ #
7
+ # Use this to build pipelines — e.g. simultaneously storing audit records,
8
+ # streaming to Grafana, and triggering alerts — without coupling any single
9
+ # processor to the others.
10
+ #
11
+ # Each processor runs independently. If one raises, the error is logged and
12
+ # the remaining processors still execute (fail-open by default). Set
13
+ # `fail_fast: true` to instead halt the chain on the first error.
14
+ #
15
+ # @example Basic pipeline
16
+ # service = Whodunit::Chronicles::Service.new(
17
+ # adapter: adapter,
18
+ # processor: Whodunit::Chronicles::CompositeProcessor.new([
19
+ # AuditStoreProcessor.new,
20
+ # AlertingProcessor.new,
21
+ # GrafanaProcessor.new
22
+ # ])
23
+ # )
24
+ #
25
+ # @example Halt on first error
26
+ # CompositeProcessor.new(processors, fail_fast: true)
27
+ #
28
+ class CompositeProcessor
29
+ # @param processors [Array<#process>] ordered list of processors to invoke
30
+ # @param fail_fast [Boolean] when true, halt the chain on the first error
31
+ # @param logger [Logger, nil] optional logger; defaults to Chronicles logger
32
+ def initialize(processors, fail_fast: false, logger: nil)
33
+ raise ArgumentError, 'processors must be an Array' unless processors.is_a?(Array)
34
+ raise ArgumentError, 'processors cannot be empty' if processors.empty?
35
+
36
+ @processors = processors
37
+ @fail_fast = fail_fast
38
+ @logger = logger || Whodunit::Chronicles.logger
39
+ end
40
+
41
+ # Process a change event through every processor in the chain.
42
+ #
43
+ # @param change_event [ChangeEvent] the event to process
44
+ # @return [void]
45
+ # @raise [ProcessingError] only when fail_fast is true and a child raises
46
+ def process(change_event)
47
+ errors = []
48
+
49
+ @processors.each do |processor|
50
+ processor.process(change_event)
51
+ rescue StandardError => e
52
+ raise ProcessingError, "#{processor.class} failed: #{e.message}" if @fail_fast
53
+
54
+ @logger.error { "CompositeProcessor: #{processor.class} raised #{e.class}: #{e.message}" }
55
+ errors << e
56
+ end
57
+
58
+ return if errors.empty?
59
+
60
+ @logger.warn do
61
+ "CompositeProcessor: #{errors.size} processor(s) failed for #{change_event.table_name}##{change_event.action}"
62
+ end
63
+ end
64
+
65
+ # @return [Integer] the number of processors in the chain
66
+ def size
67
+ @processors.size
68
+ end
69
+
70
+ # @return [Array<Class>] the processor classes in chain order
71
+ def processor_classes
72
+ @processors.map(&:class)
73
+ end
74
+
75
+ # Append a processor to the end of the chain.
76
+ #
77
+ # @param processor [#process] the processor to add
78
+ # @return [self]
79
+ def add(processor)
80
+ @processors << processor
81
+ self
82
+ end
83
+ alias << add
84
+ end
85
+ end
86
+ end
@@ -30,21 +30,20 @@ module Whodunit
30
30
  # @raise [ConfigurationError] if configuration is invalid
31
31
  def validate!
32
32
  raise ConfigurationError, 'database_url is required' if database_url.nil?
33
- raise ConfigurationError, 'adapter must be :postgresql' unless adapter == :postgresql
33
+ raise ConfigurationError, 'adapter must be :postgresql or :mysql' unless %i[postgresql mysql].include?(adapter)
34
34
  raise ConfigurationError, 'batch_size must be positive' unless batch_size.positive?
35
35
  raise ConfigurationError, 'max_retry_attempts must be positive' unless max_retry_attempts.positive?
36
36
  raise ConfigurationError, 'retry_delay must be positive' unless retry_delay.positive?
37
37
 
38
- validate_publication_name!
39
- validate_slot_name!
38
+ validate_adapter_specific_settings!
40
39
  end
41
40
 
42
- # Check if a table should be audited based on filters
41
+ # Check if a table should be chronicled based on filters
43
42
  #
44
43
  # @param table_name [String] The table name to check
45
44
  # @param schema_name [String] The schema name to check
46
- # @return [Boolean] true if the table should be audited
47
- def audit_table?(table_name, schema_name = 'public')
45
+ # @return [Boolean] true if the table should be chronicled
46
+ def chronicle_table?(table_name, schema_name = 'public')
48
47
  return false if filtered_by_schema?(schema_name)
49
48
  return false if filtered_by_table?(table_name)
50
49
 
@@ -53,18 +52,30 @@ module Whodunit
53
52
 
54
53
  private
55
54
 
56
- def validate_publication_name!
57
- return if /\A[a-zA-Z_][a-zA-Z0-9_]*\z/.match?(publication_name)
58
-
59
- raise ConfigurationError, 'publication_name must be a valid PostgreSQL identifier'
55
+ def validate_adapter_specific_settings!
56
+ case adapter
57
+ when :postgresql
58
+ validate_postgresql_settings!
59
+ when :mysql
60
+ validate_mysql_settings!
61
+ end
60
62
  end
61
63
 
62
- def validate_slot_name!
63
- return if /\A[a-zA-Z_][a-zA-Z0-9_]*\z/.match?(replication_slot_name)
64
+ def validate_postgresql_settings!
65
+ if publication_name && !/\A[a-zA-Z_][a-zA-Z0-9_]*\z/.match?(publication_name)
66
+ raise ConfigurationError, 'publication_name must be a valid PostgreSQL identifier'
67
+ end
68
+
69
+ return unless replication_slot_name && !/\A[a-zA-Z_][a-zA-Z0-9_]*\z/.match?(replication_slot_name)
64
70
 
65
71
  raise ConfigurationError, 'replication_slot_name must be a valid PostgreSQL identifier'
66
72
  end
67
73
 
74
+ def validate_mysql_settings!
75
+ # MySQL-specific validations can be added here in the future
76
+ # For now, MySQL settings are less restrictive
77
+ end
78
+
68
79
  def filtered_by_schema?(schema_name)
69
80
  return false unless schema_filter
70
81