orfeas_pam_dsl 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,560 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PamDsl
4
+ # GDPR compliance tools for data subject rights
5
+ #
6
+ # This class provides generic GDPR compliance functionality that works with
7
+ # any event store or data source through configurable extractors.
8
+ #
9
+ # @example With Lyra events
10
+ # compliance = PamDsl::GDPRCompliance.new(
11
+ # subject_id: user.id,
12
+ # subject_type: 'User',
13
+ # event_reader: ->(sid, stype) {
14
+ # Lyra.config.event_store.read.to_a.select { |e| relates_to_subject?(e, sid, stype) }
15
+ # }
16
+ # )
17
+ # report = compliance.data_export
18
+ #
19
+ # @example With RubyEventStore
20
+ # compliance = PamDsl::GDPRCompliance.new(
21
+ # subject_id: user.id,
22
+ # event_reader: ->(sid, stype) {
23
+ # event_store.read.stream("User$#{sid}").to_a
24
+ # },
25
+ # attribute_extractor: ->(e) { e.data[:attributes] || {} },
26
+ # timestamp_extractor: ->(e) { e.metadata[:timestamp] }
27
+ # )
28
+ #
29
+ class GDPRCompliance
30
+ attr_reader :subject_id, :subject_type, :options
31
+
32
+ # Initialize GDPR compliance handler
33
+ #
34
+ # @param subject_id [Object] The data subject's identifier
35
+ # @param subject_type [String] The type/class of the subject (default: 'User')
36
+ # @param event_reader [Proc] Block that returns events for a subject: ->(subject_id, subject_type) { [...] }
37
+ # @param attribute_extractor [Proc] Block to extract attributes from event: ->(event) { hash }
38
+ # @param timestamp_extractor [Proc] Block to extract timestamp from event: ->(event) { time }
39
+ # @param operation_extractor [Proc] Block to extract operation from event: ->(event) { :created/:updated/:destroyed }
40
+ # @param model_class_extractor [Proc] Block to extract model class from event: ->(event) { string }
41
+ # @param model_id_extractor [Proc] Block to extract model id from event: ->(event) { id }
42
+ # @param changes_extractor [Proc] Block to extract changes from event: ->(event) { hash }
43
+ # @param retention_policy [Hash] Retention periods by model class
44
+ #
45
+ def initialize(subject_id:, subject_type: 'User', event_reader:, **options)
46
+ @subject_id = subject_id
47
+ @subject_type = subject_type
48
+ @event_reader = event_reader
49
+ @options = options
50
+
51
+ # Set up extractors with sensible defaults
52
+ @attribute_extractor = options[:attribute_extractor] || method(:default_attribute_extractor)
53
+ @timestamp_extractor = options[:timestamp_extractor] || method(:default_timestamp_extractor)
54
+ @operation_extractor = options[:operation_extractor] || method(:default_operation_extractor)
55
+ @model_class_extractor = options[:model_class_extractor] || method(:default_model_class_extractor)
56
+ @model_id_extractor = options[:model_id_extractor] || method(:default_model_id_extractor)
57
+ @changes_extractor = options[:changes_extractor] || method(:default_changes_extractor)
58
+ @retention_policy = options[:retention_policy] || default_retention_policy
59
+ end
60
+
61
+ # Right to Access (GDPR Article 15)
62
+ # Export all data about the subject
63
+ #
64
+ # @return [Hash] Complete data export including events, PII inventory, and lineage
65
+ #
66
+ def data_export
67
+ events = collect_all_events
68
+
69
+ {
70
+ subject: { id: subject_id, type: subject_type },
71
+ generated_at: Time.now,
72
+ events: events.map { |e| event_summary(e) },
73
+ pii_inventory: collect_pii_inventory(events),
74
+ data_lineage: trace_data_lineage(events),
75
+ processing_activities: collect_processing_activities(events)
76
+ }
77
+ end
78
+
79
+ # Right to be Forgotten (GDPR Article 17)
80
+ # Identify all events containing subject's data for deletion planning
81
+ #
82
+ # @return [Hash] Report on affected data and recommended deletion strategy
83
+ #
84
+ def right_to_be_forgotten_report
85
+ events = collect_all_events
86
+
87
+ {
88
+ subject: { id: subject_id, type: subject_type },
89
+ total_events: events.count,
90
+ events_with_pii: events.count { |e| has_pii?(e) },
91
+ affected_streams: affected_streams(events),
92
+ affected_models: affected_models(events),
93
+ deletion_strategy: recommend_deletion_strategy(events),
94
+ dependencies: find_dependencies(events)
95
+ }
96
+ end
97
+
98
+ # Right to Data Portability (GDPR Article 20)
99
+ # Export data in machine-readable format
100
+ #
101
+ # @param format [Symbol] Export format (:json, :csv, :xml, :hash)
102
+ # @return [String, Hash] Exported data in requested format
103
+ #
104
+ def portable_export(format: :json)
105
+ events = collect_all_events
106
+
107
+ data = {
108
+ version: "1.0",
109
+ subject: { id: subject_id, type: subject_type },
110
+ exported_at: Time.now.iso8601,
111
+ data: collect_portable_data(events)
112
+ }
113
+
114
+ case format
115
+ when :json
116
+ data.to_json
117
+ when :csv
118
+ to_csv(data)
119
+ when :xml
120
+ to_xml(data)
121
+ else
122
+ data
123
+ end
124
+ end
125
+
126
+ # Right to Rectification (GDPR Article 16)
127
+ # Track all corrections made to subject's data
128
+ #
129
+ # @return [Array<Hash>] History of all data corrections
130
+ #
131
+ def rectification_history
132
+ events = collect_all_events
133
+
134
+ corrections = events.select do |event|
135
+ extract_operation(event) == :updated && has_pii?(event)
136
+ end
137
+
138
+ corrections.map do |event|
139
+ {
140
+ timestamp: extract_timestamp(event),
141
+ model: extract_model_class(event),
142
+ record_id: extract_model_id(event),
143
+ changes: extract_changes(event),
144
+ corrected_fields: identify_pii_changes(extract_changes(event)),
145
+ metadata: event.respond_to?(:metadata) ? event.metadata : {}
146
+ }
147
+ end
148
+ end
149
+
150
+ # Processing Activities Record (GDPR Article 30)
151
+ # Document all processing activities for the subject
152
+ #
153
+ # @return [Array<Hash>] Processing activities grouped by source
154
+ #
155
+ def processing_activities
156
+ events = collect_all_events
157
+
158
+ activities = events.group_by do |event|
159
+ event.respond_to?(:metadata) ? (event.metadata[:source] || 'unknown') : 'unknown'
160
+ end
161
+
162
+ activities.map do |source, source_events|
163
+ {
164
+ source: source,
165
+ purpose: infer_purpose(source, source_events),
166
+ legal_basis: determine_legal_basis(source_events),
167
+ data_categories: categorize_data(source_events),
168
+ recipients: identify_recipients(source_events),
169
+ retention_period: determine_retention_period(source_events),
170
+ events_count: source_events.count
171
+ }
172
+ end
173
+ end
174
+
175
+ # Data Retention Compliance Check
176
+ # Verify that data retention policies are being followed
177
+ #
178
+ # @return [Array<Hash>] Compliance status per model class
179
+ #
180
+ def retention_compliance_check
181
+ events = collect_all_events
182
+
183
+ events.group_by { |e| extract_model_class(e) }.map do |model_class, model_events|
184
+ policy = @retention_policy[model_class] || @retention_policy[:default] || { duration: 7.years }
185
+ expired = model_events.select do |event|
186
+ timestamp = extract_timestamp(event)
187
+ timestamp && timestamp < (Time.now - policy[:duration])
188
+ end
189
+
190
+ {
191
+ model_class: model_class,
192
+ total_events: model_events.count,
193
+ expired_events: expired.count,
194
+ retention_period: policy[:duration],
195
+ compliance_status: expired.empty? ? :compliant : :requires_action,
196
+ expired_event_ids: expired.map { |e| e.respond_to?(:event_id) ? e.event_id : e.object_id }
197
+ }
198
+ end
199
+ end
200
+
201
+ # Consent Audit
202
+ # Track and verify consent for data processing
203
+ #
204
+ # @return [Hash] Current consents, history, and processing legitimacy
205
+ #
206
+ def consent_audit
207
+ events = collect_all_events
208
+
209
+ consent_events = events.select do |event|
210
+ data = event.respond_to?(:data) ? event.data : {}
211
+ metadata = event.respond_to?(:metadata) ? event.metadata : {}
212
+
213
+ data[:consent] ||
214
+ metadata[:consent_id] ||
215
+ extract_model_class(event).to_s.match?(/consent/i)
216
+ end
217
+
218
+ {
219
+ current_consents: extract_current_consents(consent_events),
220
+ consent_history: consent_events.map do |event|
221
+ data = event.respond_to?(:data) ? event.data : {}
222
+ {
223
+ timestamp: extract_timestamp(event),
224
+ consent_type: data[:consent_type],
225
+ granted: data[:granted],
226
+ purpose: data[:purpose],
227
+ event_id: event.respond_to?(:event_id) ? event.event_id : nil
228
+ }
229
+ end,
230
+ processing_legitimacy: verify_processing_legitimacy(events, consent_events)
231
+ }
232
+ end
233
+
234
+ # Generate a complete GDPR compliance report
235
+ #
236
+ # @return [Hash] Comprehensive compliance report
237
+ #
238
+ def full_report
239
+ {
240
+ subject: { id: subject_id, type: subject_type },
241
+ generated_at: Time.now,
242
+ data_export: data_export,
243
+ erasure_report: right_to_be_forgotten_report,
244
+ rectification_history: rectification_history,
245
+ processing_activities: processing_activities,
246
+ retention_compliance: retention_compliance_check,
247
+ consent_audit: consent_audit
248
+ }
249
+ end
250
+
251
+ private
252
+
253
+ # Collect all events for the subject
254
+ def collect_all_events
255
+ @event_reader.call(subject_id, subject_type)
256
+ end
257
+
258
+ # Default extractors that work with common event structures
259
+ def default_attribute_extractor(event)
260
+ return event.attributes if event.respond_to?(:attributes) && !event.attributes.is_a?(Method)
261
+ return event.data[:attributes] || event.data["attributes"] || {} if event.respond_to?(:data)
262
+ {}
263
+ end
264
+
265
+ def default_timestamp_extractor(event)
266
+ return event.timestamp if event.respond_to?(:timestamp) && event.timestamp
267
+ if event.respond_to?(:data)
268
+ event.data[:timestamp] || event.data["timestamp"]
269
+ elsif event.respond_to?(:metadata)
270
+ event.metadata[:timestamp]
271
+ end
272
+ end
273
+
274
+ def default_operation_extractor(event)
275
+ return event.operation if event.respond_to?(:operation)
276
+ if event.respond_to?(:data)
277
+ op = event.data[:operation] || event.data["operation"]
278
+ op.is_a?(String) ? op.to_sym : op
279
+ end
280
+ end
281
+
282
+ def default_model_class_extractor(event)
283
+ return event.model_class if event.respond_to?(:model_class)
284
+ event.data[:model_class] || event.data["model_class"] if event.respond_to?(:data)
285
+ end
286
+
287
+ def default_model_id_extractor(event)
288
+ return event.model_id if event.respond_to?(:model_id)
289
+ event.data[:model_id] || event.data["model_id"] if event.respond_to?(:data)
290
+ end
291
+
292
+ def default_changes_extractor(event)
293
+ if event.respond_to?(:changes) && !event.changes.is_a?(Method)
294
+ begin
295
+ return event.changes unless event.method(:changes).owner.to_s.include?('ActiveRecord')
296
+ rescue
297
+ return event.changes
298
+ end
299
+ end
300
+ return event.data[:changes] || event.data["changes"] || {} if event.respond_to?(:data)
301
+ {}
302
+ end
303
+
304
+ # Extraction helpers using configured extractors
305
+ def extract_attributes(event)
306
+ @attribute_extractor.respond_to?(:call) ? @attribute_extractor.call(event) : @attribute_extractor.send(:call, event)
307
+ end
308
+
309
+ def extract_timestamp(event)
310
+ @timestamp_extractor.respond_to?(:call) ? @timestamp_extractor.call(event) : @timestamp_extractor.send(:call, event)
311
+ end
312
+
313
+ def extract_operation(event)
314
+ @operation_extractor.respond_to?(:call) ? @operation_extractor.call(event) : @operation_extractor.send(:call, event)
315
+ end
316
+
317
+ def extract_model_class(event)
318
+ @model_class_extractor.respond_to?(:call) ? @model_class_extractor.call(event) : @model_class_extractor.send(:call, event)
319
+ end
320
+
321
+ def extract_model_id(event)
322
+ @model_id_extractor.respond_to?(:call) ? @model_id_extractor.call(event) : @model_id_extractor.send(:call, event)
323
+ end
324
+
325
+ def extract_changes(event)
326
+ @changes_extractor.respond_to?(:call) ? @changes_extractor.call(event) : @changes_extractor.send(:call, event)
327
+ end
328
+
329
+ def has_pii?(event)
330
+ PIIDetector.detect(extract_attributes(event)).any?
331
+ end
332
+
333
+ def event_summary(event)
334
+ {
335
+ event_id: event.respond_to?(:event_id) ? event.event_id : nil,
336
+ timestamp: extract_timestamp(event),
337
+ operation: extract_operation(event),
338
+ model: extract_model_class(event),
339
+ record_id: extract_model_id(event),
340
+ has_pii: has_pii?(event)
341
+ }
342
+ end
343
+
344
+ def collect_pii_inventory(events)
345
+ PIIDetector.extract_pii_from_records(
346
+ events,
347
+ attribute_extractor: ->(e) { extract_attributes(e) },
348
+ metadata_extractor: ->(e) {
349
+ {
350
+ event_id: e.respond_to?(:event_id) ? e.event_id : nil,
351
+ timestamp: extract_timestamp(e),
352
+ model_class: extract_model_class(e),
353
+ model_id: extract_model_id(e)
354
+ }
355
+ }
356
+ )
357
+ end
358
+
359
+ def trace_data_lineage(events)
360
+ sorted_events = events.sort_by { |e| extract_timestamp(e) || Time.at(0) }
361
+ lineage = {}
362
+
363
+ sorted_events.each do |event|
364
+ pii_fields = PIIDetector.detect(extract_attributes(event))
365
+
366
+ pii_fields.each do |field, info|
367
+ lineage[field] ||= []
368
+ lineage[field] << {
369
+ event_id: event.respond_to?(:event_id) ? event.event_id : nil,
370
+ timestamp: extract_timestamp(event),
371
+ operation: extract_operation(event),
372
+ value: info[:value],
373
+ source: event.respond_to?(:metadata) ? event.metadata[:source] : nil
374
+ }
375
+ end
376
+ end
377
+
378
+ lineage
379
+ end
380
+
381
+ def collect_processing_activities(events)
382
+ events.map do |event|
383
+ metadata = event.respond_to?(:metadata) ? event.metadata : {}
384
+ {
385
+ timestamp: extract_timestamp(event),
386
+ activity: extract_operation(event),
387
+ model: extract_model_class(event),
388
+ purpose: infer_purpose(metadata[:source], [event]),
389
+ data_processed: PIIDetector.detect(extract_attributes(event)).keys
390
+ }
391
+ end
392
+ end
393
+
394
+ def affected_streams(events)
395
+ events.map { |e| "#{extract_model_class(e)}-#{extract_model_id(e)}" }.uniq
396
+ end
397
+
398
+ def affected_models(events)
399
+ events.map { |e| extract_model_class(e) }.uniq
400
+ end
401
+
402
+ def recommend_deletion_strategy(events)
403
+ events.count < 100 ? :direct_deletion : :batch_deletion
404
+ end
405
+
406
+ def find_dependencies(events)
407
+ events.flat_map do |event|
408
+ attrs = extract_attributes(event)
409
+ attrs.select { |k, v| k.to_s.end_with?('_id') && v == subject_id }
410
+ end.uniq
411
+ end
412
+
413
+ def collect_portable_data(events)
414
+ events.group_by { |e| extract_model_class(e) }.transform_values do |model_events|
415
+ aggregate_state(model_events)
416
+ end
417
+ end
418
+
419
+ def aggregate_state(events)
420
+ sorted = events.sort_by { |e| extract_timestamp(e) || Time.at(0) }
421
+ sorted.each_with_object({}) do |event, state|
422
+ case extract_operation(event)
423
+ when :created
424
+ state.merge!(extract_attributes(event))
425
+ when :updated
426
+ changes = extract_changes(event)
427
+ changes.each { |k, (old, new)| state[k] = new }
428
+ when :destroyed
429
+ state[:_deleted] = true
430
+ state[:_deleted_at] = extract_timestamp(event)
431
+ end
432
+ end
433
+ end
434
+
435
+ def identify_pii_changes(changes)
436
+ changes.select { |field, _| PIIDetector.contains_pii?(field) }
437
+ end
438
+
439
+ def infer_purpose(source, events)
440
+ case source.to_s
441
+ when /registration/i then "Account creation and management"
442
+ when /payment/i then "Payment processing"
443
+ when /email/i then "Communication"
444
+ when /analytics/i then "Analytics and improvements"
445
+ else "Application functionality"
446
+ end
447
+ end
448
+
449
+ def determine_legal_basis(events)
450
+ "Consent" # Could be: Consent, Contract, Legal obligation, Vital interests, Public task, Legitimate interests
451
+ end
452
+
453
+ def categorize_data(events)
454
+ pii_types = events.flat_map do |event|
455
+ PIIDetector.detect(extract_attributes(event)).values.map { |v| v[:type] }
456
+ end.uniq
457
+
458
+ pii_types.map { |t| t.to_s.split('_').map(&:capitalize).join(' ') }
459
+ end
460
+
461
+ def identify_recipients(events)
462
+ events.flat_map do |event|
463
+ metadata = event.respond_to?(:metadata) ? event.metadata : {}
464
+ metadata[:recipients] || []
465
+ end.uniq
466
+ end
467
+
468
+ def determine_retention_period(events)
469
+ return "Not specified" if events.empty?
470
+ model_class = extract_model_class(events.first)
471
+ policy = @retention_policy[model_class]
472
+ policy&.dig(:duration) || @retention_policy[:default]&.dig(:duration) || "Not specified"
473
+ end
474
+
475
+ def default_retention_policy
476
+ {
477
+ default: { duration: 7.years },
478
+ 'Payment' => { duration: 10.years },
479
+ 'Invoice' => { duration: 10.years },
480
+ 'Student' => { duration: 10.years },
481
+ 'Enrollment' => { duration: 10.years }
482
+ }
483
+ end
484
+
485
+ def extract_current_consents(consent_events)
486
+ consent_events.group_by { |e| e.respond_to?(:data) ? e.data[:purpose] : nil }.transform_values do |events|
487
+ latest = events.max_by { |e| extract_timestamp(e) || Time.at(0) }
488
+ data = latest.respond_to?(:data) ? latest.data : {}
489
+ {
490
+ granted: data[:granted],
491
+ timestamp: extract_timestamp(latest),
492
+ expires_at: data[:expires_at]
493
+ }
494
+ end
495
+ end
496
+
497
+ def verify_processing_legitimacy(all_events, consent_events)
498
+ all_events.map do |event|
499
+ metadata = event.respond_to?(:metadata) ? event.metadata : {}
500
+ purpose = infer_purpose(metadata[:source], [event])
501
+ consent = find_applicable_consent(consent_events, purpose, extract_timestamp(event))
502
+ consent_data = consent&.respond_to?(:data) ? consent.data : {}
503
+
504
+ {
505
+ event_id: event.respond_to?(:event_id) ? event.event_id : nil,
506
+ timestamp: extract_timestamp(event),
507
+ purpose: purpose,
508
+ has_consent: consent.present? && consent_data[:granted],
509
+ legitimate: consent.present? && consent_data[:granted]
510
+ }
511
+ end
512
+ end
513
+
514
+ def find_applicable_consent(consent_events, purpose, timestamp)
515
+ return nil unless timestamp
516
+
517
+ consent_events.find do |consent|
518
+ data = consent.respond_to?(:data) ? consent.data : {}
519
+ consent_time = extract_timestamp(consent)
520
+
521
+ data[:purpose] == purpose &&
522
+ consent_time && consent_time <= timestamp &&
523
+ (data[:expires_at].nil? || data[:expires_at] > timestamp)
524
+ end
525
+ end
526
+
527
+ def to_csv(data)
528
+ require 'csv'
529
+
530
+ CSV.generate do |csv|
531
+ csv << ["Field", "Value"]
532
+ flatten_hash(data).each { |k, v| csv << [k, v] }
533
+ end
534
+ end
535
+
536
+ def to_xml(data)
537
+ "<?xml version=\"1.0\"?>\n<data_export>\n#{hash_to_xml(data, 1)}</data_export>"
538
+ end
539
+
540
+ def flatten_hash(hash, prefix = "")
541
+ hash.flat_map do |k, v|
542
+ key = prefix.empty? ? k.to_s : "#{prefix}.#{k}"
543
+ v.is_a?(Hash) ? flatten_hash(v, key) : [[key, v]]
544
+ end
545
+ end
546
+
547
+ def hash_to_xml(hash, indent = 0)
548
+ hash.map do |k, v|
549
+ spaces = " " * indent
550
+ if v.is_a?(Hash)
551
+ "#{spaces}<#{k}>\n#{hash_to_xml(v, indent + 1)}#{spaces}</#{k}>\n"
552
+ elsif v.is_a?(Array)
553
+ "#{spaces}<#{k}>#{v.join(', ')}</#{k}>\n"
554
+ else
555
+ "#{spaces}<#{k}>#{v}</#{k}>\n"
556
+ end
557
+ end.join
558
+ end
559
+ end
560
+ end