orfeas_pam_dsl 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +84 -0
- data/MIT-LICENSE +21 -0
- data/README.md +1365 -0
- data/Rakefile +11 -0
- data/lib/pam_dsl/consent.rb +110 -0
- data/lib/pam_dsl/field.rb +76 -0
- data/lib/pam_dsl/gdpr_compliance.rb +560 -0
- data/lib/pam_dsl/pii_detector.rb +442 -0
- data/lib/pam_dsl/pii_masker.rb +121 -0
- data/lib/pam_dsl/policy.rb +175 -0
- data/lib/pam_dsl/policy_comparator.rb +296 -0
- data/lib/pam_dsl/policy_generator.rb +558 -0
- data/lib/pam_dsl/purpose.rb +78 -0
- data/lib/pam_dsl/railtie.rb +25 -0
- data/lib/pam_dsl/registry.rb +50 -0
- data/lib/pam_dsl/reporter.rb +789 -0
- data/lib/pam_dsl/retention.rb +102 -0
- data/lib/pam_dsl/tasks/privacy.rake +139 -0
- data/lib/pam_dsl/version.rb +3 -0
- data/lib/pam_dsl.rb +67 -0
- metadata +136 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PamDsl
|
|
4
|
+
# Detects personally identifiable information (PII) in data
|
|
5
|
+
# This is the canonical PII detection implementation used by both PAM DSL and Lyra
|
|
6
|
+
#
|
|
7
|
+
# Supports two matching modes:
|
|
8
|
+
# - **Partial matching (default)**: Matches field names containing PII keywords
|
|
9
|
+
# e.g., `primary_email`, `customer_phone` are detected
|
|
10
|
+
# - **Exact matching**: Only matches specific known field names
|
|
11
|
+
# e.g., only `email`, `user_email` etc. are matched
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# PamDsl::PIIDetector.detect({ email: "test@example.com", name: "John" })
|
|
15
|
+
# # => { email: { type: :email, ... }, name: { type: :name, ... } }
|
|
16
|
+
#
|
|
17
|
+
# @example Check single field
|
|
18
|
+
# PamDsl::PIIDetector.contains_pii?(:email) # => true
|
|
19
|
+
# PamDsl::PIIDetector.contains_pii?(:count) # => false
|
|
20
|
+
#
|
|
21
|
+
# @example Configure matching mode
|
|
22
|
+
# PamDsl::PIIDetector.partial_match = false # Use exact matching
|
|
23
|
+
#
|
|
24
|
+
class PIIDetector
|
|
25
|
+
# Boundary patterns for partial matching
|
|
26
|
+
# Start: beginning of string or after underscore
|
|
27
|
+
# End: end of string, underscore, or followed by uppercase (camelCase)
|
|
28
|
+
START_BOUNDARY = '(?:^|_)'
|
|
29
|
+
END_BOUNDARY = '(?:$|_|(?=[A-Z]))'
|
|
30
|
+
|
|
31
|
+
# Sensitivity levels for different PII types
|
|
32
|
+
SENSITIVITY_LEVELS = {
|
|
33
|
+
internal: 1, # Low sensitivity - internal use
|
|
34
|
+
confidential: 2, # Medium sensitivity - requires protection
|
|
35
|
+
restricted: 3 # High sensitivity - requires encryption/special handling
|
|
36
|
+
}.freeze
|
|
37
|
+
|
|
38
|
+
# Fields matching these patterns are NOT PII (timestamps, counters, flags, amounts)
|
|
39
|
+
# These are checked BEFORE PII patterns to avoid false positives
|
|
40
|
+
EXCLUSION_PATTERNS = [
|
|
41
|
+
/_at\z/i, # Timestamps: created_at, email_sent_at, verified_at
|
|
42
|
+
/_on\z/i, # Date fields: published_on, sent_on
|
|
43
|
+
/_date\z/i, # Date fields: send_date (but birth_date is handled separately)
|
|
44
|
+
/_time\z/i, # Time fields: start_time, end_time
|
|
45
|
+
/_count\z/i, # Counters: email_count, login_count
|
|
46
|
+
/_amount\z/i, # Amounts: vat_amount, total_amount
|
|
47
|
+
/_total\z/i, # Totals: grand_total
|
|
48
|
+
/_enabled\z/i, # Flags: email_enabled, phone_verified
|
|
49
|
+
/_verified\z/i, # Verification flags
|
|
50
|
+
/_confirmed\z/i, # Confirmation flags
|
|
51
|
+
/_sent\z/i, # Status flags: email_sent
|
|
52
|
+
/_notified\z/i, # Notification flags
|
|
53
|
+
/_status\z/i, # Status fields: payment_status
|
|
54
|
+
/_type\z/i, # Type fields: user_type
|
|
55
|
+
/_uuid\z/i, # UUIDs
|
|
56
|
+
/_code\z/i, # Codes: country_code, currency_code (but postal_code handled separately)
|
|
57
|
+
/\A(id|uuid)\z/i, # Primary keys
|
|
58
|
+
/\Ais_/i, # Boolean flags: is_active
|
|
59
|
+
/\Ahas_/i, # Boolean flags: has_consent
|
|
60
|
+
/encrypted_/i, # Already encrypted
|
|
61
|
+
/_digest\z/i, # Hashes: password_digest
|
|
62
|
+
/_token\z/i, # Tokens: auth_token
|
|
63
|
+
/_hash\z/i, # Hashes: password_hash
|
|
64
|
+
/\A(created|updated|deleted|sent|received)_/i # Prefixed timestamps
|
|
65
|
+
].freeze
|
|
66
|
+
|
|
67
|
+
# Exact match PII patterns (used when partial_match = false)
|
|
68
|
+
# These are specific known field names for each PII type
|
|
69
|
+
EXACT_PII_PATTERNS = {
|
|
70
|
+
email: {
|
|
71
|
+
pattern: /\A(email|e_mail|mail_address|user_email|contact_email|billing_email)\z/i,
|
|
72
|
+
sensitivity: :confidential
|
|
73
|
+
},
|
|
74
|
+
name: {
|
|
75
|
+
pattern: /\A(first_?name|given_?name|fname|last_?name|surname|family_?name|lname|full_?name|display_?name|father_?name|fathername|mother_?name|parent_?name|name|firstname|lastname)\z/i,
|
|
76
|
+
sensitivity: :internal
|
|
77
|
+
},
|
|
78
|
+
phone: {
|
|
79
|
+
pattern: /\A(phone|telephone|mobile|cell|fax|phone_number|mobile_number|cell_phone)\z/i,
|
|
80
|
+
sensitivity: :confidential
|
|
81
|
+
},
|
|
82
|
+
ip_address: {
|
|
83
|
+
pattern: /\A(ip|ip_address|remote_ip|client_ip|source_ip)\z/i,
|
|
84
|
+
sensitivity: :internal
|
|
85
|
+
},
|
|
86
|
+
address: {
|
|
87
|
+
pattern: /\A(address|street|city|state|province|country|postal|zip|postcode|postal_code|zip_code|address_line_?\d?|street_address|billing_address|shipping_address|home_address|work_address)\z/i,
|
|
88
|
+
sensitivity: :confidential
|
|
89
|
+
},
|
|
90
|
+
identifier: {
|
|
91
|
+
pattern: /\A(vat_number|vat_id|tax_id|tin|tax_number|vat_reg_number|afm|passport|passport_number|driver_license|drivers_license|license_number|id_number)\z/i,
|
|
92
|
+
sensitivity: :restricted
|
|
93
|
+
},
|
|
94
|
+
ssn: {
|
|
95
|
+
pattern: /\A(ssn|social_security|social_security_number|national_id)\z/i,
|
|
96
|
+
sensitivity: :restricted
|
|
97
|
+
},
|
|
98
|
+
date_of_birth: {
|
|
99
|
+
pattern: /\A(dob|date_of_birth|birth_date|birthday|birthdate)\z/i,
|
|
100
|
+
sensitivity: :confidential
|
|
101
|
+
},
|
|
102
|
+
credit_card: {
|
|
103
|
+
pattern: /\A(credit_card|card_number|card_number_first\d|card_number_last\d|cvv|cvc|ccn)\z/i,
|
|
104
|
+
sensitivity: :restricted
|
|
105
|
+
},
|
|
106
|
+
financial: {
|
|
107
|
+
pattern: /\A(iban|swift|bic|bank_account|account_number|routing_number|salary|income)\z/i,
|
|
108
|
+
sensitivity: :restricted
|
|
109
|
+
},
|
|
110
|
+
health: {
|
|
111
|
+
pattern: /\A(health_condition|medical_record|diagnosis|prescription|medical_history|medical|health)\z/i,
|
|
112
|
+
sensitivity: :restricted
|
|
113
|
+
},
|
|
114
|
+
biometric: {
|
|
115
|
+
pattern: /\A(fingerprint|face_id|biometric|biometric_data|retina_scan|retina|face)\z/i,
|
|
116
|
+
sensitivity: :restricted
|
|
117
|
+
},
|
|
118
|
+
location: {
|
|
119
|
+
pattern: /\A(latitude|longitude|lat|lng|geo_location|gps_coordinates|gps|location)\z/i,
|
|
120
|
+
sensitivity: :confidential
|
|
121
|
+
},
|
|
122
|
+
credential: {
|
|
123
|
+
pattern: /\A(encrypted_password|password_salt|password_digest|password_hash|api_key|spree_api_key|secret_key|access_key)\z/i,
|
|
124
|
+
sensitivity: :restricted
|
|
125
|
+
},
|
|
126
|
+
token: {
|
|
127
|
+
pattern: /\A(reset_password_token|remember_token|confirmation_token|unlock_token|authentication_token|auth_token|session_token|bearer_token|guest_token|persistence_token|perishable_token|access_token|refresh_token)\z/i,
|
|
128
|
+
sensitivity: :restricted
|
|
129
|
+
},
|
|
130
|
+
payment_token: {
|
|
131
|
+
pattern: /\A(gateway_customer_profile_id|gateway_payment_profile_id|stripe_customer_id|paypal_account_id|payment_token)\z/i,
|
|
132
|
+
sensitivity: :restricted
|
|
133
|
+
}
|
|
134
|
+
}.freeze
|
|
135
|
+
|
|
136
|
+
# Partial match PII patterns (used when partial_match = true)
|
|
137
|
+
# These use boundary patterns to match field names containing PII keywords
|
|
138
|
+
# NOTE: Order matters - more specific patterns must come before general ones
|
|
139
|
+
PARTIAL_PII_PATTERNS = {
|
|
140
|
+
email: {
|
|
141
|
+
pattern: /#{START_BOUNDARY}email#{END_BOUNDARY}/i,
|
|
142
|
+
sensitivity: :confidential
|
|
143
|
+
},
|
|
144
|
+
name: {
|
|
145
|
+
pattern: /#{START_BOUNDARY}(first_?[Nn]ame|last_?[Nn]ame|full_?[Nn]ame|[Nn]ame)#{END_BOUNDARY}/,
|
|
146
|
+
sensitivity: :internal
|
|
147
|
+
},
|
|
148
|
+
phone: {
|
|
149
|
+
pattern: /#{START_BOUNDARY}([Pp]hone|[Tt]elephone|[Mm]obile|[Cc]ell)#{END_BOUNDARY}/,
|
|
150
|
+
sensitivity: :confidential
|
|
151
|
+
},
|
|
152
|
+
ip_address: {
|
|
153
|
+
pattern: /#{START_BOUNDARY}(ip_?[Aa]ddress|remote_?[Ii]p|client_?[Ii]p|source_?[Ii]p)#{END_BOUNDARY}/,
|
|
154
|
+
sensitivity: :internal
|
|
155
|
+
},
|
|
156
|
+
address: {
|
|
157
|
+
pattern: /#{START_BOUNDARY}([Aa]ddress|[Ss]treet|[Cc]ity|[Zz]ip|[Pp]ostal|[Cc]ountry)#{END_BOUNDARY}/,
|
|
158
|
+
sensitivity: :confidential
|
|
159
|
+
},
|
|
160
|
+
identifier: {
|
|
161
|
+
pattern: /#{START_BOUNDARY}([Pp]assport|[Ll]icense|id_?[Nn]umber|tax_?[Ii]d|tax_?[Nn]umber|[Tt]in|national_?[Ii]d_?[Nn]umber|[Vv]at|[Vv]at_?[Nn]umber|[Aa]fm)#{END_BOUNDARY}/,
|
|
162
|
+
sensitivity: :restricted
|
|
163
|
+
},
|
|
164
|
+
ssn: {
|
|
165
|
+
pattern: /#{START_BOUNDARY}([Ss]sn|social_?[Ss]ecurity|national_?[Ii]d)#{END_BOUNDARY}/,
|
|
166
|
+
sensitivity: :restricted
|
|
167
|
+
},
|
|
168
|
+
date_of_birth: {
|
|
169
|
+
pattern: /#{START_BOUNDARY}([Bb]irth|[Dd]ob|[Bb]irthday|date_?[Oo]f_?[Bb]irth)#{END_BOUNDARY}/,
|
|
170
|
+
sensitivity: :confidential
|
|
171
|
+
},
|
|
172
|
+
credit_card: {
|
|
173
|
+
pattern: /#{START_BOUNDARY}(credit_?[Cc]ard|card_?[Nn]umber|[Cc]cn|[Cc]vv|[Cc]vc)#{END_BOUNDARY}/,
|
|
174
|
+
sensitivity: :restricted
|
|
175
|
+
},
|
|
176
|
+
financial: {
|
|
177
|
+
pattern: /#{START_BOUNDARY}([Ss]alary|[Ii]ncome|bank_?[Aa]ccount|[Ii]ban)#{END_BOUNDARY}/,
|
|
178
|
+
sensitivity: :restricted
|
|
179
|
+
},
|
|
180
|
+
health: {
|
|
181
|
+
pattern: /#{START_BOUNDARY}([Mm]edical|[Hh]ealth|[Dd]iagnosis|[Pp]rescription)#{END_BOUNDARY}/,
|
|
182
|
+
sensitivity: :restricted
|
|
183
|
+
},
|
|
184
|
+
biometric: {
|
|
185
|
+
pattern: /#{START_BOUNDARY}([Ff]ingerprint|[Ff]ace|[Rr]etina|[Bb]iometric)#{END_BOUNDARY}/,
|
|
186
|
+
sensitivity: :restricted
|
|
187
|
+
},
|
|
188
|
+
location: {
|
|
189
|
+
pattern: /#{START_BOUNDARY}([Ll]atitude|[Ll]ongitude|[Ll]ocation|[Gg]ps)#{END_BOUNDARY}/,
|
|
190
|
+
sensitivity: :confidential
|
|
191
|
+
},
|
|
192
|
+
credential: {
|
|
193
|
+
pattern: /#{START_BOUNDARY}([Pp]assword|[Ss]ecret|api_?[Kk]ey)#{END_BOUNDARY}/,
|
|
194
|
+
sensitivity: :restricted
|
|
195
|
+
},
|
|
196
|
+
token: {
|
|
197
|
+
pattern: /#{START_BOUNDARY}([Aa]uth_?[Tt]oken|[Ss]ession_?[Tt]oken|[Bb]earer_?[Tt]oken|[Aa]ccess_?[Tt]oken|[Rr]efresh_?[Tt]oken)#{END_BOUNDARY}/,
|
|
198
|
+
sensitivity: :restricted
|
|
199
|
+
},
|
|
200
|
+
payment_token: {
|
|
201
|
+
pattern: /#{START_BOUNDARY}([Gg]ateway.*[Pp]rofile|[Ss]tripe.*[Ii]d|[Pp]aypal.*[Ii]d|[Pp]ayment_?[Tt]oken)#{END_BOUNDARY}/,
|
|
202
|
+
sensitivity: :restricted
|
|
203
|
+
}
|
|
204
|
+
}.freeze
|
|
205
|
+
|
|
206
|
+
# Sensitive PII types that require extra protection
|
|
207
|
+
SENSITIVE_TYPES = %i[ssn credit_card financial health biometric identifier credential token payment_token].freeze
|
|
208
|
+
|
|
209
|
+
class << self
|
|
210
|
+
# Enable or disable partial matching (default: true)
|
|
211
|
+
# When true, matches field names containing PII keywords (e.g., primary_email, customer_phone)
|
|
212
|
+
# When false, only matches specific known field names (e.g., email, phone)
|
|
213
|
+
attr_writer :partial_match
|
|
214
|
+
|
|
215
|
+
def partial_match
|
|
216
|
+
@partial_match.nil? ? true : @partial_match
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Reset to default settings
|
|
220
|
+
def reset!
|
|
221
|
+
@partial_match = true
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Get the current PII patterns based on matching mode
|
|
225
|
+
def pii_patterns
|
|
226
|
+
partial_match ? PARTIAL_PII_PATTERNS : EXACT_PII_PATTERNS
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Detect PII in a hash of attributes
|
|
230
|
+
#
|
|
231
|
+
# @param attributes [Hash] Key-value pairs to check for PII
|
|
232
|
+
# @return [Hash] Keys that contain PII, with their type, value, and sensitivity
|
|
233
|
+
#
|
|
234
|
+
def detect(attributes)
|
|
235
|
+
pii_fields = {}
|
|
236
|
+
|
|
237
|
+
attributes.each do |key, value|
|
|
238
|
+
pii_info = detect_field(key.to_s)
|
|
239
|
+
if pii_info
|
|
240
|
+
pii_fields[key] = {
|
|
241
|
+
type: pii_info[:type],
|
|
242
|
+
value: value,
|
|
243
|
+
sensitive: sensitive?(pii_info[:type]),
|
|
244
|
+
sensitivity: pii_info[:sensitivity]
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
pii_fields
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Check if a field name contains PII
|
|
253
|
+
#
|
|
254
|
+
# @param field_name [String, Symbol] Field name to check
|
|
255
|
+
# @return [Boolean] true if field contains PII
|
|
256
|
+
#
|
|
257
|
+
def contains_pii?(field_name)
|
|
258
|
+
!detect_field(field_name.to_s).nil?
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Get the PII type for a field
|
|
262
|
+
#
|
|
263
|
+
# @param field_name [String, Symbol] Field name to check
|
|
264
|
+
# @return [Symbol, nil] PII type or nil if not PII
|
|
265
|
+
#
|
|
266
|
+
def pii_type(field_name)
|
|
267
|
+
info = detect_field(field_name.to_s)
|
|
268
|
+
info&.dig(:type)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Get the sensitivity level for a field
|
|
272
|
+
#
|
|
273
|
+
# @param field_name [String, Symbol] Field name to check
|
|
274
|
+
# @return [Symbol, nil] Sensitivity level or nil if not PII
|
|
275
|
+
#
|
|
276
|
+
def sensitivity(field_name)
|
|
277
|
+
info = detect_field(field_name.to_s)
|
|
278
|
+
info&.dig(:sensitivity)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Check if a PII type is considered sensitive (requires extra protection)
|
|
282
|
+
#
|
|
283
|
+
# @param pii_type [Symbol] The PII type
|
|
284
|
+
# @return [Boolean]
|
|
285
|
+
#
|
|
286
|
+
def sensitive?(pii_type)
|
|
287
|
+
SENSITIVE_TYPES.include?(pii_type)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Extract PII from a collection of records (events, database rows, etc.)
|
|
291
|
+
#
|
|
292
|
+
# This is a generic method that works with any record type by using blocks
|
|
293
|
+
# to extract the relevant data. This allows PAM DSL to work with Lyra events,
|
|
294
|
+
# RubyEventStore events, ActiveRecord models, or any other data source.
|
|
295
|
+
#
|
|
296
|
+
# @param records [Enumerable] Collection of records to scan
|
|
297
|
+
# @param attribute_extractor [Proc] Block that extracts attributes hash from a record
|
|
298
|
+
# @param metadata_extractor [Proc, nil] Optional block that extracts metadata from a record
|
|
299
|
+
# Should return a hash with keys like :id, :timestamp, :type, :record_id
|
|
300
|
+
# @return [Hash] PII inventory grouped by PII type
|
|
301
|
+
#
|
|
302
|
+
# @example With Lyra events
|
|
303
|
+
# PamDsl::PIIDetector.extract_pii_from_records(
|
|
304
|
+
# events,
|
|
305
|
+
# attribute_extractor: ->(e) { e.attributes },
|
|
306
|
+
# metadata_extractor: ->(e) { { id: e.event_id, timestamp: e.timestamp, type: e.model_class, record_id: e.model_id } }
|
|
307
|
+
# )
|
|
308
|
+
#
|
|
309
|
+
# @example With RubyEventStore events
|
|
310
|
+
# PamDsl::PIIDetector.extract_pii_from_records(
|
|
311
|
+
# events,
|
|
312
|
+
# attribute_extractor: ->(e) { e.data[:attributes] || {} },
|
|
313
|
+
# metadata_extractor: ->(e) { { id: e.event_id, timestamp: e.metadata[:timestamp] } }
|
|
314
|
+
# )
|
|
315
|
+
#
|
|
316
|
+
# @example With ActiveRecord models
|
|
317
|
+
# PamDsl::PIIDetector.extract_pii_from_records(
|
|
318
|
+
# User.all,
|
|
319
|
+
# attribute_extractor: ->(u) { u.attributes },
|
|
320
|
+
# metadata_extractor: ->(u) { { id: u.id, type: u.class.name } }
|
|
321
|
+
# )
|
|
322
|
+
#
|
|
323
|
+
# @example Simple usage (no metadata)
|
|
324
|
+
# PamDsl::PIIDetector.extract_pii_from_records(
|
|
325
|
+
# data_rows,
|
|
326
|
+
# attribute_extractor: ->(row) { row }
|
|
327
|
+
# )
|
|
328
|
+
#
|
|
329
|
+
def extract_pii_from_records(records, attribute_extractor:, metadata_extractor: nil)
|
|
330
|
+
pii_inventory = Hash.new { |h, k| h[k] = [] }
|
|
331
|
+
|
|
332
|
+
records.each do |record|
|
|
333
|
+
attributes = attribute_extractor.call(record)
|
|
334
|
+
next if attributes.nil? || attributes.empty?
|
|
335
|
+
|
|
336
|
+
pii_fields = detect(attributes)
|
|
337
|
+
next if pii_fields.empty?
|
|
338
|
+
|
|
339
|
+
metadata = metadata_extractor ? metadata_extractor.call(record) : {}
|
|
340
|
+
|
|
341
|
+
pii_fields.each do |field, info|
|
|
342
|
+
entry = {
|
|
343
|
+
field: field,
|
|
344
|
+
value: info[:value],
|
|
345
|
+
pii_type: info[:type],
|
|
346
|
+
sensitivity: info[:sensitivity]
|
|
347
|
+
}
|
|
348
|
+
entry.merge!(metadata) if metadata.is_a?(Hash)
|
|
349
|
+
|
|
350
|
+
pii_inventory[info[:type]] << entry
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
pii_inventory
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Mask a PII value for safe display
|
|
358
|
+
#
|
|
359
|
+
# @param value [Object] The value to mask
|
|
360
|
+
# @param pii_type [Symbol] The type of PII
|
|
361
|
+
# @return [String] Masked value
|
|
362
|
+
#
|
|
363
|
+
def mask(value, pii_type)
|
|
364
|
+
return value if value.nil?
|
|
365
|
+
|
|
366
|
+
case pii_type
|
|
367
|
+
when :email
|
|
368
|
+
mask_email(value)
|
|
369
|
+
when :phone
|
|
370
|
+
mask_phone(value)
|
|
371
|
+
when :ssn, :credit_card, :identifier, :financial
|
|
372
|
+
"***REDACTED***"
|
|
373
|
+
when :credential
|
|
374
|
+
"[HIDDEN]"
|
|
375
|
+
when :token, :payment_token
|
|
376
|
+
mask_token(value)
|
|
377
|
+
when :name
|
|
378
|
+
mask_name(value)
|
|
379
|
+
else
|
|
380
|
+
mask_generic(value)
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
private
|
|
385
|
+
|
|
386
|
+
def detect_field(field_name)
|
|
387
|
+
# Check exclusion patterns first to avoid false positives
|
|
388
|
+
return nil if excluded_field?(field_name)
|
|
389
|
+
|
|
390
|
+
# Check each PII pattern
|
|
391
|
+
pii_patterns.each do |type, config|
|
|
392
|
+
if field_name.match?(config[:pattern])
|
|
393
|
+
return { type: type, sensitivity: config[:sensitivity] }
|
|
394
|
+
end
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
nil
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def excluded_field?(field_name)
|
|
401
|
+
# Special cases that should NOT be excluded even though they match exclusion patterns
|
|
402
|
+
return false if field_name =~ /postal_code|zip_code|postcode/i # address codes are PII
|
|
403
|
+
return false if field_name =~ /\Aface_id\z/i # face_id is biometric PII
|
|
404
|
+
return false if field_name =~ /\Anational_id\z/i # national_id is ssn PII
|
|
405
|
+
return false if field_name =~ /\Avat_id\z/i # vat_id is identifier PII
|
|
406
|
+
return false if field_name =~ /\Atax_id\z/i # tax_id is identifier PII
|
|
407
|
+
|
|
408
|
+
EXCLUSION_PATTERNS.any? { |pattern| field_name.match?(pattern) }
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
def mask_email(email)
|
|
412
|
+
return email unless email.to_s.include?('@')
|
|
413
|
+
local, domain = email.to_s.split('@')
|
|
414
|
+
"#{local[0]}***@#{domain}"
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def mask_phone(phone)
|
|
418
|
+
digits = phone.to_s.gsub(/\D/, '')
|
|
419
|
+
return phone if digits.length < 4
|
|
420
|
+
"***-***-#{digits[-4..]}"
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def mask_name(name)
|
|
424
|
+
parts = name.to_s.split(' ')
|
|
425
|
+
return name if parts.empty?
|
|
426
|
+
"#{parts.first} ***"
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def mask_generic(value)
|
|
430
|
+
str = value.to_s
|
|
431
|
+
return str if str.length <= 4
|
|
432
|
+
"#{str[0..1]}***#{str[-2..]}"
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def mask_token(value)
|
|
436
|
+
str = value.to_s
|
|
437
|
+
return "[TOKEN]" if str.length <= 8
|
|
438
|
+
"#{str[0..3]}...#{str[-4..]}"
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module PamDsl
|
|
4
|
+
# Masks PII in attribute hashes for safe display or logging
|
|
5
|
+
#
|
|
6
|
+
# PIIMasker provides batch masking of PII fields in data structures,
|
|
7
|
+
# using PIIDetector for field detection and type-specific masking.
|
|
8
|
+
#
|
|
9
|
+
# @example Mask all PII in a hash
|
|
10
|
+
# data = { email: "alice@example.com", name: "Alice", status: "active" }
|
|
11
|
+
# masked = PamDsl::PIIMasker.mask(data)
|
|
12
|
+
# # => { email: "a***@example.com", name: "Alice ***", status: "active" }
|
|
13
|
+
#
|
|
14
|
+
# @example Full redaction mode
|
|
15
|
+
# masked = PamDsl::PIIMasker.mask(data, strategy: :full)
|
|
16
|
+
# # => { email: "[REDACTED]", name: "[REDACTED]", status: "active" }
|
|
17
|
+
#
|
|
18
|
+
# @example Mask a single field
|
|
19
|
+
# PamDsl::PIIMasker.mask_field("alice@example.com", :email)
|
|
20
|
+
# # => "a***@example.com"
|
|
21
|
+
#
|
|
22
|
+
class PIIMasker
|
|
23
|
+
# Masking strategies
|
|
24
|
+
STRATEGIES = %i[partial full redact_sensitive].freeze
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
# Mask all PII fields in an attributes hash
|
|
28
|
+
#
|
|
29
|
+
# @param attributes [Hash] Key-value pairs to mask
|
|
30
|
+
# @param strategy [Symbol] Masking strategy:
|
|
31
|
+
# - :partial (default) - Type-specific partial masking (e.g., "a***@example.com")
|
|
32
|
+
# - :full - Complete redaction with "[REDACTED]"
|
|
33
|
+
# - :redact_sensitive - Full redaction only for sensitive types (ssn, credit_card, etc.)
|
|
34
|
+
# @return [Hash] New hash with PII fields masked
|
|
35
|
+
#
|
|
36
|
+
def mask(attributes, strategy: :partial)
|
|
37
|
+
return attributes if attributes.nil? || attributes.empty?
|
|
38
|
+
|
|
39
|
+
masked = attributes.dup
|
|
40
|
+
pii_fields = PIIDetector.detect(attributes)
|
|
41
|
+
|
|
42
|
+
pii_fields.each do |key, info|
|
|
43
|
+
masked[key] = mask_value(info[:value], info[:type], strategy, info[:sensitive])
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
masked
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Mask all PII in a collection of records
|
|
50
|
+
#
|
|
51
|
+
# @param records [Enumerable] Collection of records to mask
|
|
52
|
+
# @param attribute_extractor [Proc] Block that extracts attributes hash from a record
|
|
53
|
+
# @param attribute_setter [Proc] Block that returns a new record with masked attributes
|
|
54
|
+
# @param strategy [Symbol] Masking strategy (see #mask)
|
|
55
|
+
# @return [Array] New array with masked records
|
|
56
|
+
#
|
|
57
|
+
# @example With hashes
|
|
58
|
+
# records = [{ email: "a@example.com" }, { email: "b@example.com" }]
|
|
59
|
+
# PIIMasker.mask_records(
|
|
60
|
+
# records,
|
|
61
|
+
# attribute_extractor: ->(r) { r },
|
|
62
|
+
# attribute_setter: ->(r, masked) { masked }
|
|
63
|
+
# )
|
|
64
|
+
#
|
|
65
|
+
# @example With objects
|
|
66
|
+
# PIIMasker.mask_records(
|
|
67
|
+
# events,
|
|
68
|
+
# attribute_extractor: ->(e) { e.data },
|
|
69
|
+
# attribute_setter: ->(e, masked) { e.class.new(e.event_id, masked, e.metadata) }
|
|
70
|
+
# )
|
|
71
|
+
#
|
|
72
|
+
def mask_records(records, attribute_extractor:, attribute_setter:, strategy: :partial)
|
|
73
|
+
records.map do |record|
|
|
74
|
+
attributes = attribute_extractor.call(record)
|
|
75
|
+
masked_attributes = mask(attributes, strategy: strategy)
|
|
76
|
+
attribute_setter.call(record, masked_attributes)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Mask a specific field value by field name
|
|
81
|
+
#
|
|
82
|
+
# @param value [Object] The value to mask
|
|
83
|
+
# @param field_name [String, Symbol] Field name to determine PII type
|
|
84
|
+
# @param strategy [Symbol] Masking strategy (see #mask)
|
|
85
|
+
# @return [Object] Masked value, or original if field is not PII
|
|
86
|
+
#
|
|
87
|
+
def mask_field(value, field_name, strategy: :partial)
|
|
88
|
+
pii_type = PIIDetector.pii_type(field_name)
|
|
89
|
+
return value unless pii_type
|
|
90
|
+
|
|
91
|
+
sensitive = PIIDetector.sensitive?(pii_type)
|
|
92
|
+
mask_value(value, pii_type, strategy, sensitive)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Mask a value given its PII type
|
|
96
|
+
#
|
|
97
|
+
# @param value [Object] The value to mask
|
|
98
|
+
# @param pii_type [Symbol] The PII type (:email, :phone, :ssn, etc.)
|
|
99
|
+
# @param strategy [Symbol] Masking strategy
|
|
100
|
+
# @return [String] Masked value
|
|
101
|
+
#
|
|
102
|
+
def mask_by_type(value, pii_type, strategy: :partial)
|
|
103
|
+
sensitive = PIIDetector.sensitive?(pii_type)
|
|
104
|
+
mask_value(value, pii_type, strategy, sensitive)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
private
|
|
108
|
+
|
|
109
|
+
def mask_value(value, pii_type, strategy, sensitive)
|
|
110
|
+
case strategy
|
|
111
|
+
when :full
|
|
112
|
+
"[REDACTED]"
|
|
113
|
+
when :redact_sensitive
|
|
114
|
+
sensitive ? "[REDACTED]" : PIIDetector.mask(value, pii_type)
|
|
115
|
+
else # :partial
|
|
116
|
+
PIIDetector.mask(value, pii_type)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|