dpdp-erasure-cli 1.0.4 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ # AUTO-GENERATED BY INTROSPECTOR
2
+ # REVIEW REQUIRED: DPO must validate every table, join condition, and PII column before production use.
3
+ # Generated At: 2026-06-12T05:19:07.235Z
4
+
5
+ legal_attestation:
6
+ dpo_identifier: PENDING_REVIEW
7
+ configuration_version: introspector-draft
8
+ legal_review_date: PENDING_REVIEW
9
+ schema_hash: ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c
10
+ generated_by: compliance-introspector-v1
11
+ acknowledgment: PENDING_REVIEW
12
+
13
+ legal_disclaimer:
14
+ text: "Auto-generated by Compliance Worker. The DPO/Developer is responsible for verifying all logical links and PII mappings."
15
+
16
+ rules:
17
+ - id: dpdp_standard
18
+ root_table: public.users
19
+ max_depth: 32
20
+ targets:
21
+ - table: public.users
22
+ # Introspector Confidence: 0.950 (email)
23
+ # Introspector Confidence: 0.920 (indian_mobile)
24
+ pii_columns: [email, phone_number]
25
+ - table: public.kyc_documents
26
+ parent: public.users
27
+ join: "public.users.id = public.kyc_documents.user_id"
28
+ parent_columns: [id]
29
+ child_columns: [user_id]
30
+ pii_columns: []
31
+ - table: public.orders
32
+ parent: public.users
33
+ join: "public.users.id = public.orders.user_id"
34
+ parent_columns: [id]
35
+ child_columns: [user_id]
36
+ pii_columns: []
37
+ - table: public.support_tickets
38
+ parent: public.users
39
+ join: "public.users.id = public.support_tickets.user_id"
40
+ parent_columns: [id]
41
+ child_columns: [user_id]
42
+ # Introspector Confidence: 0.900 (indian_mobile)
43
+ pii_columns: [description]
44
+ primary_key_columns: [id]
45
+ action: redact
46
+ mutation_rules:
47
+ description: HMAC
48
+ - table: public.user_addresses
49
+ parent: public.users
50
+ join: "public.users.id = public.user_addresses.user_id"
51
+ parent_columns: [id]
52
+ child_columns: [user_id]
53
+ # Introspector Confidence: 0.780 (indian_pin_code)
54
+ pii_columns: [pincode]
55
+ primary_key_columns: [id]
56
+ action: redact
57
+ mutation_rules:
58
+ pincode: HMAC
59
+ - table: public.user_devices
60
+ parent: public.users
61
+ join: "public.users.id = public.user_devices.user_id"
62
+ parent_columns: [id]
63
+ child_columns: [user_id]
64
+ # Introspector Confidence: 0.820 (metadata)
65
+ pii_columns: [last_ip_address]
66
+ primary_key_columns: [id]
67
+ action: redact
68
+ mutation_rules:
69
+ last_ip_address: HMAC
70
+ - table: public.user_preferences
71
+ parent: public.users
72
+ join: "public.users.id = public.user_preferences.user_id"
73
+ parent_columns: [id]
74
+ child_columns: [user_id]
75
+ pii_columns: []
76
+ - table: public.order_items
77
+ parent: public.orders
78
+ join: "public.orders.id = public.order_items.order_id"
79
+ parent_columns: [id]
80
+ child_columns: [order_id]
81
+ pii_columns: []
82
+ - table: public.payments
83
+ parent: public.orders
84
+ join: "public.orders.id = public.payments.order_id"
85
+ parent_columns: [id]
86
+ child_columns: [order_id]
87
+ pii_columns: []
88
+ - table: public.ticket_messages
89
+ parent: public.support_tickets
90
+ join: "public.support_tickets.id = public.ticket_messages.ticket_id"
91
+ parent_columns: [id]
92
+ child_columns: [ticket_id]
93
+ # Introspector Confidence: 0.900 (indian_mobile)
94
+ pii_columns: [message_body]
95
+ primary_key_columns: [id]
96
+ action: redact
97
+ mutation_rules:
98
+ message_body: HMAC
99
+ - table: public.abandoned_carts
100
+ parent: public.users
101
+ join: "LOGICAL_LINK (customer_id)"
102
+ parent_columns: [customer_id]
103
+ child_columns: [customer_id]
104
+ pii_columns: []
105
+ - table: public.audit_logs
106
+ parent: public.users
107
+ join: "LOGICAL_LINK (actor_id)"
108
+ parent_columns: [actor_id]
109
+ child_columns: [actor_id]
110
+ # Introspector Confidence: 0.820 (ipv4)
111
+ pii_columns: [ip_address]
112
+ primary_key_columns: [id]
113
+ action: redact
114
+ mutation_rules:
115
+ ip_address: HMAC
116
+ - table: public.legacy_crm_notes
117
+ parent: public.users
118
+ join: "LOGICAL_LINK (client_id)"
119
+ parent_columns: [client_id]
120
+ child_columns: [client_id]
121
+ # Introspector Confidence: 0.950 (email, indian_mobile)
122
+ pii_columns: [agent_notes]
123
+ primary_key_columns: [id]
124
+ action: redact
125
+ mutation_rules:
126
+ agent_notes: HMAC
127
+ - table: public.marketing_campaign_clicks
128
+ parent: public.users
129
+ join: "LOGICAL_LINK (target_email)"
130
+ parent_columns: [target_email]
131
+ child_columns: [target_email]
132
+ # Introspector Confidence: 0.950 (email)
133
+ pii_columns: [target_email]
134
+ primary_key_columns: [id]
135
+ action: redact
136
+ mutation_rules:
137
+ target_email: HMAC
138
+ - table: public.third_party_telemetry
139
+ parent: public.users
140
+ join: "LOGICAL_LINK (user_uuid)"
141
+ parent_columns: [user_uuid]
142
+ child_columns: [user_uuid]
143
+ pii_columns: []
144
+
145
+ # [Potential Logical Link] public.users.customer_id <-> public.abandoned_carts.customer_id - Table exposes customer_id which conceptually maps to the root entity.
146
+ # [Potential Logical Link] public.users.actor_id <-> public.audit_logs.actor_id - Table exposes actor_id which conceptually maps to the root entity.
147
+ # [Potential Logical Link] public.kyc_documents.user_id <-> public.orders.user_id - Both tables expose user_id but no physical foreign key was found.
148
+ # [Potential Logical Link] public.kyc_documents.user_id <-> public.support_tickets.user_id - Both tables expose user_id but no physical foreign key was found.
149
+ # [Potential Logical Link] public.kyc_documents.user_id <-> public.user_addresses.user_id - Both tables expose user_id but no physical foreign key was found.
150
+ # [Potential Logical Link] public.kyc_documents.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
151
+ # [Potential Logical Link] public.kyc_documents.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
152
+ # [Potential Logical Link] public.orders.user_id <-> public.support_tickets.user_id - Both tables expose user_id but no physical foreign key was found.
153
+ # [Potential Logical Link] public.orders.user_id <-> public.user_addresses.user_id - Both tables expose user_id but no physical foreign key was found.
154
+ # [Potential Logical Link] public.orders.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
155
+ # [Potential Logical Link] public.orders.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
156
+ # [Potential Logical Link] public.support_tickets.user_id <-> public.user_addresses.user_id - Both tables expose user_id but no physical foreign key was found.
157
+ # [Potential Logical Link] public.support_tickets.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
158
+ # [Potential Logical Link] public.support_tickets.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
159
+ # [Potential Logical Link] public.user_addresses.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
160
+ # [Potential Logical Link] public.user_addresses.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
161
+ # [Potential Logical Link] public.user_devices.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
162
+ # [Potential Logical Link] public.users.client_id <-> public.legacy_crm_notes.client_id - Table exposes client_id which conceptually maps to the root entity.
163
+ # [Potential Logical Link] public.users.target_email <-> public.marketing_campaign_clicks.target_email - Table exposes target_email which conceptually maps to the root entity.
164
+ # [Potential Logical Link] public.users.user_uuid <-> public.third_party_telemetry.user_uuid - Table exposes user_uuid which conceptually maps to the root entity.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dpdp-erasure-cli",
3
- "version": "1.0.4",
3
+ "version": "1.0.12",
4
4
  "license": "Apache-2.0",
5
5
  "keywords": [
6
6
  "dpdp",
@@ -18,6 +18,10 @@
18
18
  "bin": {
19
19
  "dpdp-cli": "./src/modules/cli/index.ts"
20
20
  },
21
+ "repository": {
22
+ "type": "git",
23
+ "url": "https://github.com/devxdh/dpdp-erasure-engine.git"
24
+ },
21
25
  "publishConfig": {
22
26
  "access": "public"
23
27
  },
@@ -50,4 +54,4 @@
50
54
  "postgres": "^3.4.9",
51
55
  "zod": "^4.4.2"
52
56
  }
53
- }
57
+ }
package/report.json ADDED
@@ -0,0 +1,370 @@
1
+ {
2
+ "summary": {
3
+ "rootTable": "public.users",
4
+ "generatedAt": "2026-06-12T05:19:07.235Z",
5
+ "schemaHash": "ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c",
6
+ "targetCount": 15,
7
+ "tablesWithPii": 8,
8
+ "piiColumnCount": 9,
9
+ "highConfidenceCount": 6,
10
+ "reviewRequiredCount": 3,
11
+ "potentialLogicalLinkCount": 20
12
+ },
13
+ "findings": [
14
+ {
15
+ "table": "public.legacy_crm_notes",
16
+ "column": "agent_notes",
17
+ "dataType": "text",
18
+ "confidence": 0.95,
19
+ "metadataScore": 0,
20
+ "contentMatchRatio": 1,
21
+ "sampleSize": 50,
22
+ "matchedSignatures": [
23
+ "email",
24
+ "indian_mobile"
25
+ ]
26
+ },
27
+ {
28
+ "table": "public.marketing_campaign_clicks",
29
+ "column": "target_email",
30
+ "dataType": "character varying",
31
+ "confidence": 0.95,
32
+ "metadataScore": 0.92,
33
+ "contentMatchRatio": 1,
34
+ "sampleSize": 50,
35
+ "matchedSignatures": [
36
+ "email"
37
+ ]
38
+ },
39
+ {
40
+ "table": "public.users",
41
+ "column": "email",
42
+ "dataType": "character varying",
43
+ "confidence": 0.95,
44
+ "metadataScore": 0.92,
45
+ "contentMatchRatio": 1,
46
+ "sampleSize": 50,
47
+ "matchedSignatures": [
48
+ "email"
49
+ ]
50
+ },
51
+ {
52
+ "table": "public.users",
53
+ "column": "phone_number",
54
+ "dataType": "character varying",
55
+ "confidence": 0.92,
56
+ "metadataScore": 0.92,
57
+ "contentMatchRatio": 1,
58
+ "sampleSize": 50,
59
+ "matchedSignatures": [
60
+ "indian_mobile"
61
+ ]
62
+ },
63
+ {
64
+ "table": "public.support_tickets",
65
+ "column": "description",
66
+ "dataType": "text",
67
+ "confidence": 0.9,
68
+ "metadataScore": 0,
69
+ "contentMatchRatio": 1,
70
+ "sampleSize": 50,
71
+ "matchedSignatures": [
72
+ "indian_mobile"
73
+ ]
74
+ },
75
+ {
76
+ "table": "public.ticket_messages",
77
+ "column": "message_body",
78
+ "dataType": "text",
79
+ "confidence": 0.9,
80
+ "metadataScore": 0,
81
+ "contentMatchRatio": 1,
82
+ "sampleSize": 50,
83
+ "matchedSignatures": [
84
+ "indian_mobile"
85
+ ]
86
+ },
87
+ {
88
+ "table": "public.audit_logs",
89
+ "column": "ip_address",
90
+ "dataType": "inet",
91
+ "confidence": 0.82,
92
+ "metadataScore": 0.82,
93
+ "contentMatchRatio": 1,
94
+ "sampleSize": 50,
95
+ "matchedSignatures": [
96
+ "ipv4"
97
+ ]
98
+ },
99
+ {
100
+ "table": "public.user_devices",
101
+ "column": "last_ip_address",
102
+ "dataType": "inet",
103
+ "confidence": 0.82,
104
+ "metadataScore": 0.82,
105
+ "contentMatchRatio": 0,
106
+ "sampleSize": 0,
107
+ "matchedSignatures": []
108
+ },
109
+ {
110
+ "table": "public.user_addresses",
111
+ "column": "pincode",
112
+ "dataType": "character varying",
113
+ "confidence": 0.78,
114
+ "metadataScore": 0.62,
115
+ "contentMatchRatio": 1,
116
+ "sampleSize": 50,
117
+ "matchedSignatures": [
118
+ "indian_pin_code"
119
+ ]
120
+ }
121
+ ],
122
+ "potentialLogicalLinks": [
123
+ {
124
+ "sourceTable": {
125
+ "schema": "public",
126
+ "table": "users"
127
+ },
128
+ "targetTable": {
129
+ "schema": "public",
130
+ "table": "abandoned_carts"
131
+ },
132
+ "column": "customer_id",
133
+ "reason": "Table exposes customer_id which conceptually maps to the root entity."
134
+ },
135
+ {
136
+ "sourceTable": {
137
+ "schema": "public",
138
+ "table": "users"
139
+ },
140
+ "targetTable": {
141
+ "schema": "public",
142
+ "table": "audit_logs"
143
+ },
144
+ "column": "actor_id",
145
+ "reason": "Table exposes actor_id which conceptually maps to the root entity."
146
+ },
147
+ {
148
+ "sourceTable": {
149
+ "schema": "public",
150
+ "table": "kyc_documents"
151
+ },
152
+ "targetTable": {
153
+ "schema": "public",
154
+ "table": "orders"
155
+ },
156
+ "column": "user_id",
157
+ "reason": "Both tables expose user_id but no physical foreign key was found."
158
+ },
159
+ {
160
+ "sourceTable": {
161
+ "schema": "public",
162
+ "table": "kyc_documents"
163
+ },
164
+ "targetTable": {
165
+ "schema": "public",
166
+ "table": "support_tickets"
167
+ },
168
+ "column": "user_id",
169
+ "reason": "Both tables expose user_id but no physical foreign key was found."
170
+ },
171
+ {
172
+ "sourceTable": {
173
+ "schema": "public",
174
+ "table": "kyc_documents"
175
+ },
176
+ "targetTable": {
177
+ "schema": "public",
178
+ "table": "user_addresses"
179
+ },
180
+ "column": "user_id",
181
+ "reason": "Both tables expose user_id but no physical foreign key was found."
182
+ },
183
+ {
184
+ "sourceTable": {
185
+ "schema": "public",
186
+ "table": "kyc_documents"
187
+ },
188
+ "targetTable": {
189
+ "schema": "public",
190
+ "table": "user_devices"
191
+ },
192
+ "column": "user_id",
193
+ "reason": "Both tables expose user_id but no physical foreign key was found."
194
+ },
195
+ {
196
+ "sourceTable": {
197
+ "schema": "public",
198
+ "table": "kyc_documents"
199
+ },
200
+ "targetTable": {
201
+ "schema": "public",
202
+ "table": "user_preferences"
203
+ },
204
+ "column": "user_id",
205
+ "reason": "Both tables expose user_id but no physical foreign key was found."
206
+ },
207
+ {
208
+ "sourceTable": {
209
+ "schema": "public",
210
+ "table": "orders"
211
+ },
212
+ "targetTable": {
213
+ "schema": "public",
214
+ "table": "support_tickets"
215
+ },
216
+ "column": "user_id",
217
+ "reason": "Both tables expose user_id but no physical foreign key was found."
218
+ },
219
+ {
220
+ "sourceTable": {
221
+ "schema": "public",
222
+ "table": "orders"
223
+ },
224
+ "targetTable": {
225
+ "schema": "public",
226
+ "table": "user_addresses"
227
+ },
228
+ "column": "user_id",
229
+ "reason": "Both tables expose user_id but no physical foreign key was found."
230
+ },
231
+ {
232
+ "sourceTable": {
233
+ "schema": "public",
234
+ "table": "orders"
235
+ },
236
+ "targetTable": {
237
+ "schema": "public",
238
+ "table": "user_devices"
239
+ },
240
+ "column": "user_id",
241
+ "reason": "Both tables expose user_id but no physical foreign key was found."
242
+ },
243
+ {
244
+ "sourceTable": {
245
+ "schema": "public",
246
+ "table": "orders"
247
+ },
248
+ "targetTable": {
249
+ "schema": "public",
250
+ "table": "user_preferences"
251
+ },
252
+ "column": "user_id",
253
+ "reason": "Both tables expose user_id but no physical foreign key was found."
254
+ },
255
+ {
256
+ "sourceTable": {
257
+ "schema": "public",
258
+ "table": "support_tickets"
259
+ },
260
+ "targetTable": {
261
+ "schema": "public",
262
+ "table": "user_addresses"
263
+ },
264
+ "column": "user_id",
265
+ "reason": "Both tables expose user_id but no physical foreign key was found."
266
+ },
267
+ {
268
+ "sourceTable": {
269
+ "schema": "public",
270
+ "table": "support_tickets"
271
+ },
272
+ "targetTable": {
273
+ "schema": "public",
274
+ "table": "user_devices"
275
+ },
276
+ "column": "user_id",
277
+ "reason": "Both tables expose user_id but no physical foreign key was found."
278
+ },
279
+ {
280
+ "sourceTable": {
281
+ "schema": "public",
282
+ "table": "support_tickets"
283
+ },
284
+ "targetTable": {
285
+ "schema": "public",
286
+ "table": "user_preferences"
287
+ },
288
+ "column": "user_id",
289
+ "reason": "Both tables expose user_id but no physical foreign key was found."
290
+ },
291
+ {
292
+ "sourceTable": {
293
+ "schema": "public",
294
+ "table": "user_addresses"
295
+ },
296
+ "targetTable": {
297
+ "schema": "public",
298
+ "table": "user_devices"
299
+ },
300
+ "column": "user_id",
301
+ "reason": "Both tables expose user_id but no physical foreign key was found."
302
+ },
303
+ {
304
+ "sourceTable": {
305
+ "schema": "public",
306
+ "table": "user_addresses"
307
+ },
308
+ "targetTable": {
309
+ "schema": "public",
310
+ "table": "user_preferences"
311
+ },
312
+ "column": "user_id",
313
+ "reason": "Both tables expose user_id but no physical foreign key was found."
314
+ },
315
+ {
316
+ "sourceTable": {
317
+ "schema": "public",
318
+ "table": "user_devices"
319
+ },
320
+ "targetTable": {
321
+ "schema": "public",
322
+ "table": "user_preferences"
323
+ },
324
+ "column": "user_id",
325
+ "reason": "Both tables expose user_id but no physical foreign key was found."
326
+ },
327
+ {
328
+ "sourceTable": {
329
+ "schema": "public",
330
+ "table": "users"
331
+ },
332
+ "targetTable": {
333
+ "schema": "public",
334
+ "table": "legacy_crm_notes"
335
+ },
336
+ "column": "client_id",
337
+ "reason": "Table exposes client_id which conceptually maps to the root entity."
338
+ },
339
+ {
340
+ "sourceTable": {
341
+ "schema": "public",
342
+ "table": "users"
343
+ },
344
+ "targetTable": {
345
+ "schema": "public",
346
+ "table": "marketing_campaign_clicks"
347
+ },
348
+ "column": "target_email",
349
+ "reason": "Table exposes target_email which conceptually maps to the root entity."
350
+ },
351
+ {
352
+ "sourceTable": {
353
+ "schema": "public",
354
+ "table": "users"
355
+ },
356
+ "targetTable": {
357
+ "schema": "public",
358
+ "table": "third_party_telemetry"
359
+ },
360
+ "column": "user_uuid",
361
+ "reason": "Table exposes user_uuid which conceptually maps to the root entity."
362
+ }
363
+ ],
364
+ "nextSteps": [
365
+ "Review every PII column and potential logical link with the application owner.",
366
+ "Copy reviewed targets into compliance.worker.yml and complete legal_attestation.",
367
+ "Run compliance-worker check-integrity before allowing live worker boot.",
368
+ "Sign the reviewed manifest with compliance-worker sign after DPO approval."
369
+ ]
370
+ }
package/report.md ADDED
@@ -0,0 +1,57 @@
1
+ # Compliance Introspector Report
2
+
3
+ ## Summary
4
+
5
+ - Root table: `public.users`
6
+ - Generated at: `2026-06-12T05:19:07.235Z`
7
+ - Schema hash: `ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c`
8
+ - DAG targets: 15
9
+ - Tables with PII: 8
10
+ - PII columns: 9
11
+ - High-confidence findings: 6
12
+ - Review-required findings: 3
13
+ - Potential logical links: 20
14
+
15
+ ## PII Findings
16
+
17
+ | Table | Column | Type | Confidence | Metadata | Content | Signatures |
18
+ | --- | --- | --- | ---: | ---: | ---: | --- |
19
+ | `public.legacy_crm_notes` `agent_notes` `text` 0.950 0.000 1.000 email, indian_mobile |
20
+ | `public.marketing_campaign_clicks` `target_email` `character varying` 0.950 0.920 1.000 email |
21
+ | `public.users` `email` `character varying` 0.950 0.920 1.000 email |
22
+ | `public.users` `phone_number` `character varying` 0.920 0.920 1.000 indian_mobile |
23
+ | `public.support_tickets` `description` `text` 0.900 0.000 1.000 indian_mobile |
24
+ | `public.ticket_messages` `message_body` `text` 0.900 0.000 1.000 indian_mobile |
25
+ | `public.audit_logs` `ip_address` `inet` 0.820 0.820 1.000 ipv4 |
26
+ | `public.user_devices` `last_ip_address` `inet` 0.820 0.820 0.000 metadata |
27
+ | `public.user_addresses` `pincode` `character varying` 0.780 0.620 1.000 indian_pin_code |
28
+
29
+ ## Potential Logical Links
30
+
31
+ - `public.users.customer_id` <-> `public.abandoned_carts.customer_id`: Table exposes customer_id which conceptually maps to the root entity.
32
+ - `public.users.actor_id` <-> `public.audit_logs.actor_id`: Table exposes actor_id which conceptually maps to the root entity.
33
+ - `public.kyc_documents.user_id` <-> `public.orders.user_id`: Both tables expose user_id but no physical foreign key was found.
34
+ - `public.kyc_documents.user_id` <-> `public.support_tickets.user_id`: Both tables expose user_id but no physical foreign key was found.
35
+ - `public.kyc_documents.user_id` <-> `public.user_addresses.user_id`: Both tables expose user_id but no physical foreign key was found.
36
+ - `public.kyc_documents.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
37
+ - `public.kyc_documents.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
38
+ - `public.orders.user_id` <-> `public.support_tickets.user_id`: Both tables expose user_id but no physical foreign key was found.
39
+ - `public.orders.user_id` <-> `public.user_addresses.user_id`: Both tables expose user_id but no physical foreign key was found.
40
+ - `public.orders.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
41
+ - `public.orders.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
42
+ - `public.support_tickets.user_id` <-> `public.user_addresses.user_id`: Both tables expose user_id but no physical foreign key was found.
43
+ - `public.support_tickets.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
44
+ - `public.support_tickets.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
45
+ - `public.user_addresses.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
46
+ - `public.user_addresses.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
47
+ - `public.user_devices.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
48
+ - `public.users.client_id` <-> `public.legacy_crm_notes.client_id`: Table exposes client_id which conceptually maps to the root entity.
49
+ - `public.users.target_email` <-> `public.marketing_campaign_clicks.target_email`: Table exposes target_email which conceptually maps to the root entity.
50
+ - `public.users.user_uuid` <-> `public.third_party_telemetry.user_uuid`: Table exposes user_uuid which conceptually maps to the root entity.
51
+
52
+ ## Next Steps
53
+
54
+ - Review every PII column and potential logical link with the application owner.
55
+ - Copy reviewed targets into compliance.worker.yml and complete legal_attestation.
56
+ - Run compliance-worker check-integrity before allowing live worker boot.
57
+ - Sign the reviewed manifest with compliance-worker sign after DPO approval.
@@ -137,6 +137,7 @@ const METADATA_PATTERNS: Array<{ pattern: RegExp; score: number }> = [
137
137
  { pattern: /(^|_)(driving_license|driving_licence|license_number|licence_number|dl_number|dl_no)($|_)/i, score: MEDIUM_METADATA_SCORE },
138
138
  { pattern: /(^|_)(address|street|postal_code|zip_code|pin_code|pincode)($|_)/i, score: WEAK_METADATA_SCORE },
139
139
  { pattern: /(^|_)(device_fingerprint|device_id|advertising_id|gaid|idfa)($|_)/i, score: WEAK_METADATA_SCORE },
140
+ { pattern: /(^|_)(document_number|identity_number|id_number)($|_)/i, score: WEAK_METADATA_SCORE },
140
141
  ];
141
142
 
142
143
  function qualifiedKey(table: QualifiedTable): string {
@@ -336,12 +337,23 @@ function classifyLeafDetailed(value: string, columnName: string = ""): ContentSi
336
337
  const bytes = textEncoder.encode(value.trim());
337
338
  try {
338
339
  const normalized = textDecoder.decode(bytes).trim();
339
- return CONTENT_SIGNATURES
340
- .filter((signature) =>
341
- signatureHasMetadataSupport(signature, columnName) &&
342
- signature.pattern.test(normalized) &&
343
- (!signature.validate || signature.validate(normalized))
344
- );
340
+ // Split into tokens and strip leading/trailing punctuation so regexes can match substrings
341
+ const tokens = normalized.split(/\s+/).map((t) => t.replace(/^[^\w\+]+|[^\w]+$/g, ""));
342
+ const candidates = Array.from(new Set([normalized, ...tokens])).filter((t) => t.length > 0);
343
+
344
+ const matches = new Set<ContentSignature>();
345
+ for (const candidate of candidates) {
346
+ for (const signature of CONTENT_SIGNATURES) {
347
+ if (
348
+ signatureHasMetadataSupport(signature, columnName) &&
349
+ signature.pattern.test(candidate) &&
350
+ (!signature.validate || signature.validate(candidate))
351
+ ) {
352
+ matches.add(signature);
353
+ }
354
+ }
355
+ }
356
+ return Array.from(matches);
345
357
  } finally {
346
358
  bytes.fill(0);
347
359
  }
@@ -236,7 +236,7 @@ export async function discoverPotentialLogicalLinks(
236
236
  const byColumn = new Map<string, QualifiedTable[]>();
237
237
  for (const row of rows) {
238
238
  const normalized = row.column_name.toLowerCase();
239
- if (!/^(?:user_id|account_id|customer_id|member_id|subject_id|.*_user_id)$/.test(normalized)) {
239
+ if (!/^(?:user_id|account_id|customer_id|client_id|actor_id|user_uuid|member_id|subject_id|.*_user_id|target_email|user_email)$/.test(normalized)) {
240
240
  continue;
241
241
  }
242
242
 
@@ -247,7 +247,25 @@ export async function discoverPotentialLogicalLinks(
247
247
 
248
248
  const links: PotentialLogicalLink[] = [];
249
249
  const emitted = new Set<string>();
250
+
250
251
  for (const [column, tables] of byColumn.entries()) {
252
+ for (const table of tables) {
253
+ // Explicitly link any orphan table that has an identity-like column to the root table
254
+ if (table.schema === root.schema && table.table === root.table) {
255
+ continue;
256
+ }
257
+ const key = physicalLinkKey(root, table, column);
258
+ if (!physicalLinks.has(key) && !emitted.has(key)) {
259
+ emitted.add(key);
260
+ links.push({
261
+ sourceTable: root,
262
+ targetTable: table,
263
+ column,
264
+ reason: `Table exposes ${column} which conceptually maps to the root entity.`,
265
+ });
266
+ }
267
+ }
268
+
251
269
  if (tables.length < 2) {
252
270
  continue;
253
271
  }
@@ -38,14 +38,6 @@ export async function runIntrospector(options: RunIntrospectorOptions): Promise<
38
38
  maxDepth,
39
39
  });
40
40
 
41
- const classifiedColumns = await classifyDagTargets({
42
- sql: options.sql,
43
- targets: dag,
44
- samplePercent: options.samplePercent,
45
- sampleLimit: options.sampleLimit,
46
- threshold: options.threshold,
47
- });
48
-
49
41
  const [schemaHash, potentialLogicalLinks] = await Promise.all([
50
42
  detectSchemaDrift(options.sql, root.schema),
51
43
  discoverPotentialLogicalLinks(
@@ -58,7 +50,49 @@ export async function runIntrospector(options: RunIntrospectorOptions): Promise<
58
50
  ),
59
51
  ]);
60
52
 
61
- const targets: IntrospectorTargetDraft[] = dag.map((target) => ({
53
+ const dagTableKeys = new Set(dag.map((t) => targetKey(t.table.schema, t.table.table)));
54
+ const logicalTargets: typeof dag = [];
55
+
56
+ for (const link of potentialLogicalLinks) {
57
+ const sourceKey = targetKey(link.sourceTable.schema, link.sourceTable.table);
58
+ const targetKeyStr = targetKey(link.targetTable.schema, link.targetTable.table);
59
+
60
+ if (dagTableKeys.has(sourceKey) && !dagTableKeys.has(targetKeyStr)) {
61
+ dagTableKeys.add(targetKeyStr);
62
+ logicalTargets.push({
63
+ table: link.targetTable,
64
+ parentTable: link.sourceTable,
65
+ constraintName: null,
66
+ childColumns: [link.column],
67
+ parentColumns: [link.column],
68
+ depth: maxDepth,
69
+ fkCondition: `LOGICAL_LINK (${link.column})`,
70
+ });
71
+ } else if (dagTableKeys.has(targetKeyStr) && !dagTableKeys.has(sourceKey)) {
72
+ dagTableKeys.add(sourceKey);
73
+ logicalTargets.push({
74
+ table: link.sourceTable,
75
+ parentTable: link.targetTable,
76
+ constraintName: null,
77
+ childColumns: [link.column],
78
+ parentColumns: [link.column],
79
+ depth: maxDepth,
80
+ fkCondition: `LOGICAL_LINK (${link.column})`,
81
+ });
82
+ }
83
+ }
84
+
85
+ const fullTargets = [...dag, ...logicalTargets];
86
+
87
+ const classifiedColumns = await classifyDagTargets({
88
+ sql: options.sql,
89
+ targets: fullTargets,
90
+ samplePercent: options.samplePercent,
91
+ sampleLimit: options.sampleLimit,
92
+ threshold: options.threshold,
93
+ });
94
+
95
+ const targets: IntrospectorTargetDraft[] = fullTargets.map((target) => ({
62
96
  table: target.table,
63
97
  parentTable: target.parentTable,
64
98
  fkCondition: target.fkCondition,