whodunit-chronicles 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,278 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pg'
4
+
5
+ module Whodunit
6
+ module Chronicles
7
+ module Adapters
8
+ # PostgreSQL logical replication adapter
9
+ #
10
+ # Uses PostgreSQL's logical replication functionality to stream
11
+ # database changes via WAL decoding without impacting application performance.
12
+ class PostgreSQL < StreamAdapter
13
+ DEFAULT_PLUGIN = 'pgoutput'
14
+
15
+ attr_reader :connection, :replication_connection, :publication_name, :slot_name
16
+
17
+ def initialize(
18
+ database_url: Chronicles.config.database_url,
19
+ publication_name: Chronicles.config.publication_name,
20
+ slot_name: Chronicles.config.replication_slot_name,
21
+ logger: Chronicles.logger
22
+ )
23
+ super(logger: logger)
24
+ @database_url = database_url
25
+ @publication_name = publication_name
26
+ @slot_name = slot_name
27
+ @connection = nil
28
+ @replication_connection = nil
29
+ @last_lsn = nil
30
+ end
31
+
32
+ # Start streaming logical replication changes
33
+ def start_streaming(&)
34
+ raise ArgumentError, 'Block required for processing events' unless block_given?
35
+
36
+ log(:info, 'Starting PostgreSQL logical replication streaming')
37
+
38
+ establish_connections
39
+ ensure_setup
40
+
41
+ self.running = true
42
+ self.position = confirmed_flush_lsn || '0/0'
43
+
44
+ log(:info, 'Starting replication from LSN', lsn: @position)
45
+
46
+ begin
47
+ stream_changes(&)
48
+ rescue StandardError => e
49
+ log(:error, 'Streaming error', error: e.message, backtrace: e.backtrace.first(5))
50
+ raise ReplicationError, "Failed to stream changes: #{e.message}"
51
+ ensure
52
+ self.running = false
53
+ end
54
+ end
55
+
56
+ # Stop streaming
57
+ def stop_streaming
58
+ log(:info, 'Stopping PostgreSQL logical replication streaming')
59
+ self.running = false
60
+ close_connections
61
+ end
62
+
63
+ # Get current replication position
64
+ def current_position
65
+ @last_lsn || confirmed_flush_lsn
66
+ end
67
+
68
+ # Set up logical replication (publication and slot)
69
+ def setup
70
+ log(:info, 'Setting up PostgreSQL logical replication')
71
+
72
+ establish_connection
73
+ create_publication
74
+ create_replication_slot
75
+
76
+ log(:info, 'PostgreSQL setup completed successfully')
77
+ end
78
+
79
+ # Remove logical replication setup
80
+ def teardown
81
+ log(:info, 'Tearing down PostgreSQL logical replication')
82
+
83
+ establish_connection
84
+ drop_replication_slot
85
+ drop_publication
86
+
87
+ log(:info, 'PostgreSQL teardown completed')
88
+ ensure
89
+ close_connections
90
+ end
91
+
92
+ # Test database connection
93
+ def test_connection
94
+ establish_connection
95
+ result = @connection.exec('SELECT current_database(), current_user, version()')
96
+ db_info = result.first
97
+
98
+ log(:info, 'Connection test successful',
99
+ database: db_info['current_database'],
100
+ user: db_info['current_user'],
101
+ version: db_info['version'])
102
+
103
+ true
104
+ rescue PG::Error => e
105
+ log(:error, 'Connection test failed', error: e.message)
106
+ false
107
+ ensure
108
+ result&.clear
109
+ end
110
+
111
+ private
112
+
113
+ def establish_connections
114
+ establish_connection
115
+ establish_replication_connection
116
+ end
117
+
118
+ def establish_connection
119
+ return if @connection && !@connection.finished?
120
+
121
+ @connection = PG.connect(@database_url)
122
+ @connection.type_map_for_results = PG::BasicTypeMapForResults.new(@connection)
123
+ end
124
+
125
+ def establish_replication_connection
126
+ return if @replication_connection && !@replication_connection.finished?
127
+
128
+ # Parse connection URL and add replication parameter
129
+ uri = URI.parse(@database_url)
130
+ repl_params = URI.decode_www_form(uri.query || '')
131
+ repl_params << %w[replication database]
132
+ uri.query = URI.encode_www_form(repl_params)
133
+
134
+ @replication_connection = PG.connect(uri.to_s)
135
+ end
136
+
137
+ def close_connections
138
+ @connection&.close
139
+ @replication_connection&.close
140
+ @connection = nil
141
+ @replication_connection = nil
142
+ end
143
+
144
+ def ensure_setup
145
+ unless publication_exists?
146
+ raise ReplicationError, "Publication '#{publication_name}' does not exist. Run #setup first."
147
+ end
148
+
149
+ return if replication_slot_exists?
150
+
151
+ raise ReplicationError, "Replication slot '#{slot_name}' does not exist. Run #setup first."
152
+ end
153
+
154
+ def stream_changes(&)
155
+ copy_sql = build_copy_statement
156
+ log(:debug, 'Starting COPY command', sql: copy_sql)
157
+
158
+ @replication_connection.exec(copy_sql)
159
+
160
+ while running?
161
+ data = @replication_connection.get_copy_data(async: false)
162
+ break unless data
163
+
164
+ process_wal_data(data, &)
165
+ end
166
+ end
167
+
168
+ def build_copy_statement
169
+ options = [
170
+ "proto_version '1'",
171
+ "publication_names '#{publication_name}'",
172
+ ].join(', ')
173
+
174
+ "COPY (SELECT * FROM pg_logical_slot_get_changes('#{slot_name}', NULL, NULL, #{options})) TO STDOUT"
175
+ end
176
+
177
+ def process_wal_data(data)
178
+ # Parse pgoutput protocol message
179
+ # This is a simplified version - full implementation would need
180
+ # to properly decode the binary protocol
181
+ log(:debug, 'Processing WAL data', size: data.bytesize)
182
+
183
+ # For now, we'll parse text-based logical decoding output
184
+ # In production, this should parse the binary pgoutput format
185
+ change_event = parse_logical_message(data)
186
+ yield(change_event) if change_event
187
+ rescue StandardError => e
188
+ log(:error, 'Error processing WAL data', error: e.message, data: data.inspect)
189
+ end
190
+
191
+ def parse_logical_message(data)
192
+ # Simplified parser for demonstration
193
+ # Real implementation would parse pgoutput binary protocol
194
+ lines = data.strip.split("\n")
195
+ return unless lines.any?
196
+
197
+ # This is a placeholder - would need full pgoutput protocol parsing
198
+ log(:debug, 'Parsed logical message', lines: lines.size)
199
+ nil
200
+ end
201
+
202
+ def create_publication
203
+ if publication_exists?
204
+ log(:info, 'Publication already exists', name: publication_name)
205
+ return
206
+ end
207
+
208
+ sql = "CREATE PUBLICATION #{publication_name} FOR ALL TABLES"
209
+ @connection.exec(sql)
210
+ log(:info, 'Created publication', name: publication_name)
211
+ end
212
+
213
+ def drop_publication
214
+ return unless publication_exists?
215
+
216
+ sql = "DROP PUBLICATION IF EXISTS #{publication_name}"
217
+ @connection.exec(sql)
218
+ log(:info, 'Dropped publication', name: publication_name)
219
+ end
220
+
221
+ def create_replication_slot
222
+ if replication_slot_exists?
223
+ log(:info, 'Replication slot already exists', name: slot_name)
224
+ return
225
+ end
226
+
227
+ sql = "SELECT pg_create_logical_replication_slot('#{slot_name}', '#{DEFAULT_PLUGIN}')"
228
+ result = @connection.exec(sql)
229
+ slot_info = result.first
230
+
231
+ log(:info, 'Created replication slot',
232
+ name: slot_name,
233
+ lsn: slot_info['lsn'])
234
+ ensure
235
+ result&.clear
236
+ end
237
+
238
+ def drop_replication_slot
239
+ return unless replication_slot_exists?
240
+
241
+ sql = "SELECT pg_drop_replication_slot('#{slot_name}')"
242
+ @connection.exec(sql)
243
+ log(:info, 'Dropped replication slot', name: slot_name)
244
+ end
245
+
246
+ def publication_exists?
247
+ sql = 'SELECT 1 FROM pg_publication WHERE pubname = $1'
248
+ result = @connection.exec_params(sql, [publication_name])
249
+ exists = result.ntuples.positive?
250
+ result.clear
251
+ exists
252
+ end
253
+
254
+ def replication_slot_exists?
255
+ sql = 'SELECT 1 FROM pg_replication_slots WHERE slot_name = $1'
256
+ result = @connection.exec_params(sql, [slot_name])
257
+ exists = result.ntuples.positive?
258
+ result.clear
259
+ exists
260
+ end
261
+
262
+ def confirmed_flush_lsn
263
+ sql = 'SELECT confirmed_flush_lsn FROM pg_replication_slots WHERE slot_name = $1'
264
+ result = @connection.exec_params(sql, [slot_name])
265
+
266
+ if result.ntuples.positive?
267
+ lsn = result.first['confirmed_flush_lsn']
268
+ result.clear
269
+ lsn
270
+ else
271
+ result.clear
272
+ nil
273
+ end
274
+ end
275
+ end
276
+ end
277
+ end
278
+ end
@@ -0,0 +1,270 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Whodunit
4
+ module Chronicles
5
+ # Processes database change events and creates audit records
6
+ #
7
+ # Transforms ChangeEvent objects into structured audit records
8
+ # with complete object serialization and metadata.
9
+ class AuditProcessor
10
+ attr_reader :logger, :audit_connection
11
+
12
+ def initialize(
13
+ audit_database_url: Chronicles.config.audit_database_url,
14
+ logger: Chronicles.logger
15
+ )
16
+ @audit_database_url = audit_database_url
17
+ @logger = logger
18
+ @audit_connection = nil
19
+ end
20
+
21
+ # Process a change event and create audit record
22
+ #
23
+ # @param change_event [ChangeEvent] The database change to audit
24
+ # @return [Hash] The created audit record
25
+ def process(change_event)
26
+ ensure_audit_connection
27
+
28
+ audit_record = build_audit_record(change_event)
29
+ persist_audit_record(audit_record)
30
+
31
+ log(:debug, 'Processed change event',
32
+ table: change_event.qualified_table_name,
33
+ action: change_event.action,
34
+ audit_id: audit_record[:id])
35
+
36
+ audit_record
37
+ rescue StandardError => e
38
+ log(:error, 'Failed to process change event',
39
+ error: e.message,
40
+ event: change_event.to_s)
41
+ raise
42
+ end
43
+
44
+ # Process multiple change events in a batch
45
+ #
46
+ # @param change_events [Array<ChangeEvent>] Array of change events
47
+ # @return [Array<Hash>] Array of created audit records
48
+ def process_batch(change_events)
49
+ return [] if change_events.empty?
50
+
51
+ ensure_audit_connection
52
+
53
+ audit_records = change_events.map { |event| build_audit_record(event) }
54
+ persist_audit_records_batch(audit_records)
55
+
56
+ log(:info, 'Processed batch of change events', count: change_events.size)
57
+
58
+ audit_records
59
+ rescue StandardError => e
60
+ log(:error, 'Failed to process batch',
61
+ error: e.message,
62
+ count: change_events.size)
63
+ raise
64
+ end
65
+
66
+ # Close audit database connection
67
+ def close
68
+ @audit_connection&.close
69
+ @audit_connection = nil
70
+ end
71
+
72
+ private
73
+
74
+ def ensure_audit_connection
75
+ return if @audit_connection && !@audit_connection.finished?
76
+
77
+ @audit_connection = PG.connect(@audit_database_url || Chronicles.config.database_url)
78
+ @audit_connection.type_map_for_results = PG::BasicTypeMapForResults.new(@audit_connection)
79
+
80
+ ensure_audit_table_exists
81
+ end
82
+
83
+ def ensure_audit_table_exists
84
+ create_sql = <<~SQL
85
+ CREATE TABLE IF NOT EXISTS whodunit_chronicles_audits (
86
+ id BIGSERIAL PRIMARY KEY,
87
+ table_name TEXT NOT NULL,
88
+ schema_name TEXT NOT NULL DEFAULT 'public',
89
+ record_id JSONB,
90
+ action TEXT NOT NULL CHECK (action IN ('INSERT', 'UPDATE', 'DELETE')),
91
+ old_data JSONB,
92
+ new_data JSONB,
93
+ changes JSONB,
94
+ user_id BIGINT,
95
+ user_type TEXT,
96
+ transaction_id TEXT,
97
+ sequence_number INTEGER,
98
+ occurred_at TIMESTAMP WITH TIME ZONE NOT NULL,
99
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
100
+ metadata JSONB DEFAULT '{}'::jsonb,
101
+ #{' '}
102
+ -- Indexes for performance
103
+ CONSTRAINT valid_data_for_action CHECK (
104
+ (action = 'INSERT' AND old_data IS NULL AND new_data IS NOT NULL) OR
105
+ (action = 'UPDATE' AND old_data IS NOT NULL AND new_data IS NOT NULL) OR#{' '}
106
+ (action = 'DELETE' AND old_data IS NOT NULL AND new_data IS NULL)
107
+ )
108
+ );
109
+
110
+ -- Performance indexes
111
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chronicles_audits_table_record#{' '}
112
+ ON whodunit_chronicles_audits (table_name, (record_id->>'id'));
113
+
114
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chronicles_audits_occurred_at#{' '}
115
+ ON whodunit_chronicles_audits (occurred_at DESC);
116
+
117
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chronicles_audits_user#{' '}
118
+ ON whodunit_chronicles_audits (user_id, user_type);
119
+
120
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chronicles_audits_action#{' '}
121
+ ON whodunit_chronicles_audits (action);
122
+
123
+ -- GIN index for JSONB columns
124
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chronicles_audits_record_id_gin#{' '}
125
+ ON whodunit_chronicles_audits USING GIN (record_id);
126
+
127
+ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chronicles_audits_changes_gin#{' '}
128
+ ON whodunit_chronicles_audits USING GIN (changes);
129
+ SQL
130
+
131
+ @audit_connection.exec(create_sql)
132
+ rescue PG::Error => e
133
+ # Ignore "already exists" errors from CONCURRENTLY
134
+ raise unless e.message.include?('already exists')
135
+ end
136
+
137
+ def build_audit_record(change_event)
138
+ user_info = extract_user_info(change_event)
139
+
140
+ {
141
+ id: nil, # Will be set by database
142
+ table_name: change_event.table_name,
143
+ schema_name: change_event.schema_name,
144
+ record_id: change_event.primary_key,
145
+ action: change_event.action,
146
+ old_data: change_event.old_data,
147
+ new_data: change_event.new_data,
148
+ changes: change_event.changes,
149
+ user_id: user_info[:user_id],
150
+ user_type: user_info[:user_type],
151
+ transaction_id: change_event.transaction_id,
152
+ sequence_number: change_event.sequence_number,
153
+ occurred_at: change_event.timestamp,
154
+ created_at: Time.now,
155
+ metadata: build_metadata(change_event),
156
+ }
157
+ end
158
+
159
+ def extract_user_info(change_event)
160
+ data = change_event.current_data || {}
161
+
162
+ # Look for Whodunit user attribution fields
163
+ user_id = data['creator_id'] || data['updater_id'] || data['deleter_id']
164
+
165
+ {
166
+ user_id: user_id,
167
+ user_type: user_id ? 'User' : nil,
168
+ }
169
+ end
170
+
171
+ def build_metadata(change_event)
172
+ {
173
+ table_schema: change_event.schema_name,
174
+ qualified_table_name: change_event.qualified_table_name,
175
+ changed_columns: change_event.changed_columns,
176
+ adapter_metadata: change_event.metadata,
177
+ chronicles_version: Chronicles::VERSION,
178
+ }
179
+ end
180
+
181
+ def persist_audit_record(audit_record)
182
+ sql = <<~SQL
183
+ INSERT INTO whodunit_chronicles_audits (
184
+ table_name, schema_name, record_id, action, old_data, new_data, changes,
185
+ user_id, user_type, transaction_id, sequence_number, occurred_at, created_at, metadata
186
+ ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
187
+ RETURNING id
188
+ SQL
189
+
190
+ params = [
191
+ audit_record[:table_name],
192
+ audit_record[:schema_name],
193
+ audit_record[:record_id].to_json,
194
+ audit_record[:action],
195
+ audit_record[:old_data]&.to_json,
196
+ audit_record[:new_data]&.to_json,
197
+ audit_record[:changes].to_json,
198
+ audit_record[:user_id],
199
+ audit_record[:user_type],
200
+ audit_record[:transaction_id],
201
+ audit_record[:sequence_number],
202
+ audit_record[:occurred_at],
203
+ audit_record[:created_at],
204
+ audit_record[:metadata].to_json,
205
+ ]
206
+
207
+ result = @audit_connection.exec_params(sql, params)
208
+ audit_record[:id] = result.first['id'].to_i
209
+ result.clear
210
+
211
+ audit_record
212
+ end
213
+
214
+ def persist_audit_records_batch(audit_records)
215
+ return audit_records if audit_records.empty?
216
+
217
+ # Use multi-row INSERT for better performance
218
+ values_clauses = []
219
+ all_params = []
220
+ param_index = 1
221
+
222
+ audit_records.each do |record|
223
+ param_positions = (param_index..(param_index + 13)).map { |i| "$#{i}" }.join(', ')
224
+ values_clauses << "(#{param_positions})"
225
+
226
+ all_params.push(
227
+ record[:table_name],
228
+ record[:schema_name],
229
+ record[:record_id].to_json,
230
+ record[:action],
231
+ record[:old_data]&.to_json,
232
+ record[:new_data]&.to_json,
233
+ record[:changes].to_json,
234
+ record[:user_id],
235
+ record[:user_type],
236
+ record[:transaction_id],
237
+ record[:sequence_number],
238
+ record[:occurred_at],
239
+ record[:created_at],
240
+ record[:metadata].to_json,
241
+ )
242
+
243
+ param_index += 14
244
+ end
245
+
246
+ sql = <<~SQL
247
+ INSERT INTO whodunit_chronicles_audits (
248
+ table_name, schema_name, record_id, action, old_data, new_data, changes,
249
+ user_id, user_type, transaction_id, sequence_number, occurred_at, created_at, metadata
250
+ ) VALUES #{values_clauses.join(', ')}
251
+ RETURNING id
252
+ SQL
253
+
254
+ result = @audit_connection.exec_params(sql, all_params)
255
+
256
+ # Set IDs on the audit records
257
+ result.each_with_index do |row, index|
258
+ audit_records[index][:id] = row['id'].to_i
259
+ end
260
+
261
+ result.clear
262
+ audit_records
263
+ end
264
+
265
+ def log(level, message, context = {})
266
+ logger.public_send(level, message, processor: 'AuditProcessor', **context)
267
+ end
268
+ end
269
+ end
270
+ end