dpdp-erasure-cli 1.0.11 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/compliance.worker.yml +164 -0
- package/package.json +2 -2
- package/report.json +370 -0
- package/report.md +57 -0
- package/src/modules/introspector/classifier.ts +18 -6
- package/src/modules/introspector/dag.ts +19 -1
- package/src/modules/introspector/run.ts +43 -9
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# AUTO-GENERATED BY INTROSPECTOR
|
|
2
|
+
# REVIEW REQUIRED: DPO must validate every table, join condition, and PII column before production use.
|
|
3
|
+
# Generated At: 2026-06-12T05:19:07.235Z
|
|
4
|
+
|
|
5
|
+
legal_attestation:
|
|
6
|
+
dpo_identifier: PENDING_REVIEW
|
|
7
|
+
configuration_version: introspector-draft
|
|
8
|
+
legal_review_date: PENDING_REVIEW
|
|
9
|
+
schema_hash: ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c
|
|
10
|
+
generated_by: compliance-introspector-v1
|
|
11
|
+
acknowledgment: PENDING_REVIEW
|
|
12
|
+
|
|
13
|
+
legal_disclaimer:
|
|
14
|
+
text: "Auto-generated by Compliance Worker. The DPO/Developer is responsible for verifying all logical links and PII mappings."
|
|
15
|
+
|
|
16
|
+
rules:
|
|
17
|
+
- id: dpdp_standard
|
|
18
|
+
root_table: public.users
|
|
19
|
+
max_depth: 32
|
|
20
|
+
targets:
|
|
21
|
+
- table: public.users
|
|
22
|
+
# Introspector Confidence: 0.950 (email)
|
|
23
|
+
# Introspector Confidence: 0.920 (indian_mobile)
|
|
24
|
+
pii_columns: [email, phone_number]
|
|
25
|
+
- table: public.kyc_documents
|
|
26
|
+
parent: public.users
|
|
27
|
+
join: "public.users.id = public.kyc_documents.user_id"
|
|
28
|
+
parent_columns: [id]
|
|
29
|
+
child_columns: [user_id]
|
|
30
|
+
pii_columns: []
|
|
31
|
+
- table: public.orders
|
|
32
|
+
parent: public.users
|
|
33
|
+
join: "public.users.id = public.orders.user_id"
|
|
34
|
+
parent_columns: [id]
|
|
35
|
+
child_columns: [user_id]
|
|
36
|
+
pii_columns: []
|
|
37
|
+
- table: public.support_tickets
|
|
38
|
+
parent: public.users
|
|
39
|
+
join: "public.users.id = public.support_tickets.user_id"
|
|
40
|
+
parent_columns: [id]
|
|
41
|
+
child_columns: [user_id]
|
|
42
|
+
# Introspector Confidence: 0.900 (indian_mobile)
|
|
43
|
+
pii_columns: [description]
|
|
44
|
+
primary_key_columns: [id]
|
|
45
|
+
action: redact
|
|
46
|
+
mutation_rules:
|
|
47
|
+
description: HMAC
|
|
48
|
+
- table: public.user_addresses
|
|
49
|
+
parent: public.users
|
|
50
|
+
join: "public.users.id = public.user_addresses.user_id"
|
|
51
|
+
parent_columns: [id]
|
|
52
|
+
child_columns: [user_id]
|
|
53
|
+
# Introspector Confidence: 0.780 (indian_pin_code)
|
|
54
|
+
pii_columns: [pincode]
|
|
55
|
+
primary_key_columns: [id]
|
|
56
|
+
action: redact
|
|
57
|
+
mutation_rules:
|
|
58
|
+
pincode: HMAC
|
|
59
|
+
- table: public.user_devices
|
|
60
|
+
parent: public.users
|
|
61
|
+
join: "public.users.id = public.user_devices.user_id"
|
|
62
|
+
parent_columns: [id]
|
|
63
|
+
child_columns: [user_id]
|
|
64
|
+
# Introspector Confidence: 0.820 (metadata)
|
|
65
|
+
pii_columns: [last_ip_address]
|
|
66
|
+
primary_key_columns: [id]
|
|
67
|
+
action: redact
|
|
68
|
+
mutation_rules:
|
|
69
|
+
last_ip_address: HMAC
|
|
70
|
+
- table: public.user_preferences
|
|
71
|
+
parent: public.users
|
|
72
|
+
join: "public.users.id = public.user_preferences.user_id"
|
|
73
|
+
parent_columns: [id]
|
|
74
|
+
child_columns: [user_id]
|
|
75
|
+
pii_columns: []
|
|
76
|
+
- table: public.order_items
|
|
77
|
+
parent: public.orders
|
|
78
|
+
join: "public.orders.id = public.order_items.order_id"
|
|
79
|
+
parent_columns: [id]
|
|
80
|
+
child_columns: [order_id]
|
|
81
|
+
pii_columns: []
|
|
82
|
+
- table: public.payments
|
|
83
|
+
parent: public.orders
|
|
84
|
+
join: "public.orders.id = public.payments.order_id"
|
|
85
|
+
parent_columns: [id]
|
|
86
|
+
child_columns: [order_id]
|
|
87
|
+
pii_columns: []
|
|
88
|
+
- table: public.ticket_messages
|
|
89
|
+
parent: public.support_tickets
|
|
90
|
+
join: "public.support_tickets.id = public.ticket_messages.ticket_id"
|
|
91
|
+
parent_columns: [id]
|
|
92
|
+
child_columns: [ticket_id]
|
|
93
|
+
# Introspector Confidence: 0.900 (indian_mobile)
|
|
94
|
+
pii_columns: [message_body]
|
|
95
|
+
primary_key_columns: [id]
|
|
96
|
+
action: redact
|
|
97
|
+
mutation_rules:
|
|
98
|
+
message_body: HMAC
|
|
99
|
+
- table: public.abandoned_carts
|
|
100
|
+
parent: public.users
|
|
101
|
+
join: "LOGICAL_LINK (customer_id)"
|
|
102
|
+
parent_columns: [customer_id]
|
|
103
|
+
child_columns: [customer_id]
|
|
104
|
+
pii_columns: []
|
|
105
|
+
- table: public.audit_logs
|
|
106
|
+
parent: public.users
|
|
107
|
+
join: "LOGICAL_LINK (actor_id)"
|
|
108
|
+
parent_columns: [actor_id]
|
|
109
|
+
child_columns: [actor_id]
|
|
110
|
+
# Introspector Confidence: 0.820 (ipv4)
|
|
111
|
+
pii_columns: [ip_address]
|
|
112
|
+
primary_key_columns: [id]
|
|
113
|
+
action: redact
|
|
114
|
+
mutation_rules:
|
|
115
|
+
ip_address: HMAC
|
|
116
|
+
- table: public.legacy_crm_notes
|
|
117
|
+
parent: public.users
|
|
118
|
+
join: "LOGICAL_LINK (client_id)"
|
|
119
|
+
parent_columns: [client_id]
|
|
120
|
+
child_columns: [client_id]
|
|
121
|
+
# Introspector Confidence: 0.950 (email, indian_mobile)
|
|
122
|
+
pii_columns: [agent_notes]
|
|
123
|
+
primary_key_columns: [id]
|
|
124
|
+
action: redact
|
|
125
|
+
mutation_rules:
|
|
126
|
+
agent_notes: HMAC
|
|
127
|
+
- table: public.marketing_campaign_clicks
|
|
128
|
+
parent: public.users
|
|
129
|
+
join: "LOGICAL_LINK (target_email)"
|
|
130
|
+
parent_columns: [target_email]
|
|
131
|
+
child_columns: [target_email]
|
|
132
|
+
# Introspector Confidence: 0.950 (email)
|
|
133
|
+
pii_columns: [target_email]
|
|
134
|
+
primary_key_columns: [id]
|
|
135
|
+
action: redact
|
|
136
|
+
mutation_rules:
|
|
137
|
+
target_email: HMAC
|
|
138
|
+
- table: public.third_party_telemetry
|
|
139
|
+
parent: public.users
|
|
140
|
+
join: "LOGICAL_LINK (user_uuid)"
|
|
141
|
+
parent_columns: [user_uuid]
|
|
142
|
+
child_columns: [user_uuid]
|
|
143
|
+
pii_columns: []
|
|
144
|
+
|
|
145
|
+
# [Potential Logical Link] public.users.customer_id <-> public.abandoned_carts.customer_id - Table exposes customer_id which conceptually maps to the root entity.
|
|
146
|
+
# [Potential Logical Link] public.users.actor_id <-> public.audit_logs.actor_id - Table exposes actor_id which conceptually maps to the root entity.
|
|
147
|
+
# [Potential Logical Link] public.kyc_documents.user_id <-> public.orders.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
148
|
+
# [Potential Logical Link] public.kyc_documents.user_id <-> public.support_tickets.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
149
|
+
# [Potential Logical Link] public.kyc_documents.user_id <-> public.user_addresses.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
150
|
+
# [Potential Logical Link] public.kyc_documents.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
151
|
+
# [Potential Logical Link] public.kyc_documents.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
152
|
+
# [Potential Logical Link] public.orders.user_id <-> public.support_tickets.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
153
|
+
# [Potential Logical Link] public.orders.user_id <-> public.user_addresses.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
154
|
+
# [Potential Logical Link] public.orders.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
155
|
+
# [Potential Logical Link] public.orders.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
156
|
+
# [Potential Logical Link] public.support_tickets.user_id <-> public.user_addresses.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
157
|
+
# [Potential Logical Link] public.support_tickets.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
158
|
+
# [Potential Logical Link] public.support_tickets.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
159
|
+
# [Potential Logical Link] public.user_addresses.user_id <-> public.user_devices.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
160
|
+
# [Potential Logical Link] public.user_addresses.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
161
|
+
# [Potential Logical Link] public.user_devices.user_id <-> public.user_preferences.user_id - Both tables expose user_id but no physical foreign key was found.
|
|
162
|
+
# [Potential Logical Link] public.users.client_id <-> public.legacy_crm_notes.client_id - Table exposes client_id which conceptually maps to the root entity.
|
|
163
|
+
# [Potential Logical Link] public.users.target_email <-> public.marketing_campaign_clicks.target_email - Table exposes target_email which conceptually maps to the root entity.
|
|
164
|
+
# [Potential Logical Link] public.users.user_uuid <-> public.third_party_telemetry.user_uuid - Table exposes user_uuid which conceptually maps to the root entity.
|
package/package.json
CHANGED
package/report.json
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
{
|
|
2
|
+
"summary": {
|
|
3
|
+
"rootTable": "public.users",
|
|
4
|
+
"generatedAt": "2026-06-12T05:19:07.235Z",
|
|
5
|
+
"schemaHash": "ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c",
|
|
6
|
+
"targetCount": 15,
|
|
7
|
+
"tablesWithPii": 8,
|
|
8
|
+
"piiColumnCount": 9,
|
|
9
|
+
"highConfidenceCount": 6,
|
|
10
|
+
"reviewRequiredCount": 3,
|
|
11
|
+
"potentialLogicalLinkCount": 20
|
|
12
|
+
},
|
|
13
|
+
"findings": [
|
|
14
|
+
{
|
|
15
|
+
"table": "public.legacy_crm_notes",
|
|
16
|
+
"column": "agent_notes",
|
|
17
|
+
"dataType": "text",
|
|
18
|
+
"confidence": 0.95,
|
|
19
|
+
"metadataScore": 0,
|
|
20
|
+
"contentMatchRatio": 1,
|
|
21
|
+
"sampleSize": 50,
|
|
22
|
+
"matchedSignatures": [
|
|
23
|
+
"email",
|
|
24
|
+
"indian_mobile"
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"table": "public.marketing_campaign_clicks",
|
|
29
|
+
"column": "target_email",
|
|
30
|
+
"dataType": "character varying",
|
|
31
|
+
"confidence": 0.95,
|
|
32
|
+
"metadataScore": 0.92,
|
|
33
|
+
"contentMatchRatio": 1,
|
|
34
|
+
"sampleSize": 50,
|
|
35
|
+
"matchedSignatures": [
|
|
36
|
+
"email"
|
|
37
|
+
]
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"table": "public.users",
|
|
41
|
+
"column": "email",
|
|
42
|
+
"dataType": "character varying",
|
|
43
|
+
"confidence": 0.95,
|
|
44
|
+
"metadataScore": 0.92,
|
|
45
|
+
"contentMatchRatio": 1,
|
|
46
|
+
"sampleSize": 50,
|
|
47
|
+
"matchedSignatures": [
|
|
48
|
+
"email"
|
|
49
|
+
]
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"table": "public.users",
|
|
53
|
+
"column": "phone_number",
|
|
54
|
+
"dataType": "character varying",
|
|
55
|
+
"confidence": 0.92,
|
|
56
|
+
"metadataScore": 0.92,
|
|
57
|
+
"contentMatchRatio": 1,
|
|
58
|
+
"sampleSize": 50,
|
|
59
|
+
"matchedSignatures": [
|
|
60
|
+
"indian_mobile"
|
|
61
|
+
]
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"table": "public.support_tickets",
|
|
65
|
+
"column": "description",
|
|
66
|
+
"dataType": "text",
|
|
67
|
+
"confidence": 0.9,
|
|
68
|
+
"metadataScore": 0,
|
|
69
|
+
"contentMatchRatio": 1,
|
|
70
|
+
"sampleSize": 50,
|
|
71
|
+
"matchedSignatures": [
|
|
72
|
+
"indian_mobile"
|
|
73
|
+
]
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"table": "public.ticket_messages",
|
|
77
|
+
"column": "message_body",
|
|
78
|
+
"dataType": "text",
|
|
79
|
+
"confidence": 0.9,
|
|
80
|
+
"metadataScore": 0,
|
|
81
|
+
"contentMatchRatio": 1,
|
|
82
|
+
"sampleSize": 50,
|
|
83
|
+
"matchedSignatures": [
|
|
84
|
+
"indian_mobile"
|
|
85
|
+
]
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"table": "public.audit_logs",
|
|
89
|
+
"column": "ip_address",
|
|
90
|
+
"dataType": "inet",
|
|
91
|
+
"confidence": 0.82,
|
|
92
|
+
"metadataScore": 0.82,
|
|
93
|
+
"contentMatchRatio": 1,
|
|
94
|
+
"sampleSize": 50,
|
|
95
|
+
"matchedSignatures": [
|
|
96
|
+
"ipv4"
|
|
97
|
+
]
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"table": "public.user_devices",
|
|
101
|
+
"column": "last_ip_address",
|
|
102
|
+
"dataType": "inet",
|
|
103
|
+
"confidence": 0.82,
|
|
104
|
+
"metadataScore": 0.82,
|
|
105
|
+
"contentMatchRatio": 0,
|
|
106
|
+
"sampleSize": 0,
|
|
107
|
+
"matchedSignatures": []
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"table": "public.user_addresses",
|
|
111
|
+
"column": "pincode",
|
|
112
|
+
"dataType": "character varying",
|
|
113
|
+
"confidence": 0.78,
|
|
114
|
+
"metadataScore": 0.62,
|
|
115
|
+
"contentMatchRatio": 1,
|
|
116
|
+
"sampleSize": 50,
|
|
117
|
+
"matchedSignatures": [
|
|
118
|
+
"indian_pin_code"
|
|
119
|
+
]
|
|
120
|
+
}
|
|
121
|
+
],
|
|
122
|
+
"potentialLogicalLinks": [
|
|
123
|
+
{
|
|
124
|
+
"sourceTable": {
|
|
125
|
+
"schema": "public",
|
|
126
|
+
"table": "users"
|
|
127
|
+
},
|
|
128
|
+
"targetTable": {
|
|
129
|
+
"schema": "public",
|
|
130
|
+
"table": "abandoned_carts"
|
|
131
|
+
},
|
|
132
|
+
"column": "customer_id",
|
|
133
|
+
"reason": "Table exposes customer_id which conceptually maps to the root entity."
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"sourceTable": {
|
|
137
|
+
"schema": "public",
|
|
138
|
+
"table": "users"
|
|
139
|
+
},
|
|
140
|
+
"targetTable": {
|
|
141
|
+
"schema": "public",
|
|
142
|
+
"table": "audit_logs"
|
|
143
|
+
},
|
|
144
|
+
"column": "actor_id",
|
|
145
|
+
"reason": "Table exposes actor_id which conceptually maps to the root entity."
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"sourceTable": {
|
|
149
|
+
"schema": "public",
|
|
150
|
+
"table": "kyc_documents"
|
|
151
|
+
},
|
|
152
|
+
"targetTable": {
|
|
153
|
+
"schema": "public",
|
|
154
|
+
"table": "orders"
|
|
155
|
+
},
|
|
156
|
+
"column": "user_id",
|
|
157
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"sourceTable": {
|
|
161
|
+
"schema": "public",
|
|
162
|
+
"table": "kyc_documents"
|
|
163
|
+
},
|
|
164
|
+
"targetTable": {
|
|
165
|
+
"schema": "public",
|
|
166
|
+
"table": "support_tickets"
|
|
167
|
+
},
|
|
168
|
+
"column": "user_id",
|
|
169
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"sourceTable": {
|
|
173
|
+
"schema": "public",
|
|
174
|
+
"table": "kyc_documents"
|
|
175
|
+
},
|
|
176
|
+
"targetTable": {
|
|
177
|
+
"schema": "public",
|
|
178
|
+
"table": "user_addresses"
|
|
179
|
+
},
|
|
180
|
+
"column": "user_id",
|
|
181
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"sourceTable": {
|
|
185
|
+
"schema": "public",
|
|
186
|
+
"table": "kyc_documents"
|
|
187
|
+
},
|
|
188
|
+
"targetTable": {
|
|
189
|
+
"schema": "public",
|
|
190
|
+
"table": "user_devices"
|
|
191
|
+
},
|
|
192
|
+
"column": "user_id",
|
|
193
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
"sourceTable": {
|
|
197
|
+
"schema": "public",
|
|
198
|
+
"table": "kyc_documents"
|
|
199
|
+
},
|
|
200
|
+
"targetTable": {
|
|
201
|
+
"schema": "public",
|
|
202
|
+
"table": "user_preferences"
|
|
203
|
+
},
|
|
204
|
+
"column": "user_id",
|
|
205
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"sourceTable": {
|
|
209
|
+
"schema": "public",
|
|
210
|
+
"table": "orders"
|
|
211
|
+
},
|
|
212
|
+
"targetTable": {
|
|
213
|
+
"schema": "public",
|
|
214
|
+
"table": "support_tickets"
|
|
215
|
+
},
|
|
216
|
+
"column": "user_id",
|
|
217
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
"sourceTable": {
|
|
221
|
+
"schema": "public",
|
|
222
|
+
"table": "orders"
|
|
223
|
+
},
|
|
224
|
+
"targetTable": {
|
|
225
|
+
"schema": "public",
|
|
226
|
+
"table": "user_addresses"
|
|
227
|
+
},
|
|
228
|
+
"column": "user_id",
|
|
229
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"sourceTable": {
|
|
233
|
+
"schema": "public",
|
|
234
|
+
"table": "orders"
|
|
235
|
+
},
|
|
236
|
+
"targetTable": {
|
|
237
|
+
"schema": "public",
|
|
238
|
+
"table": "user_devices"
|
|
239
|
+
},
|
|
240
|
+
"column": "user_id",
|
|
241
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
"sourceTable": {
|
|
245
|
+
"schema": "public",
|
|
246
|
+
"table": "orders"
|
|
247
|
+
},
|
|
248
|
+
"targetTable": {
|
|
249
|
+
"schema": "public",
|
|
250
|
+
"table": "user_preferences"
|
|
251
|
+
},
|
|
252
|
+
"column": "user_id",
|
|
253
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
"sourceTable": {
|
|
257
|
+
"schema": "public",
|
|
258
|
+
"table": "support_tickets"
|
|
259
|
+
},
|
|
260
|
+
"targetTable": {
|
|
261
|
+
"schema": "public",
|
|
262
|
+
"table": "user_addresses"
|
|
263
|
+
},
|
|
264
|
+
"column": "user_id",
|
|
265
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
"sourceTable": {
|
|
269
|
+
"schema": "public",
|
|
270
|
+
"table": "support_tickets"
|
|
271
|
+
},
|
|
272
|
+
"targetTable": {
|
|
273
|
+
"schema": "public",
|
|
274
|
+
"table": "user_devices"
|
|
275
|
+
},
|
|
276
|
+
"column": "user_id",
|
|
277
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
"sourceTable": {
|
|
281
|
+
"schema": "public",
|
|
282
|
+
"table": "support_tickets"
|
|
283
|
+
},
|
|
284
|
+
"targetTable": {
|
|
285
|
+
"schema": "public",
|
|
286
|
+
"table": "user_preferences"
|
|
287
|
+
},
|
|
288
|
+
"column": "user_id",
|
|
289
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"sourceTable": {
|
|
293
|
+
"schema": "public",
|
|
294
|
+
"table": "user_addresses"
|
|
295
|
+
},
|
|
296
|
+
"targetTable": {
|
|
297
|
+
"schema": "public",
|
|
298
|
+
"table": "user_devices"
|
|
299
|
+
},
|
|
300
|
+
"column": "user_id",
|
|
301
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
302
|
+
},
|
|
303
|
+
{
|
|
304
|
+
"sourceTable": {
|
|
305
|
+
"schema": "public",
|
|
306
|
+
"table": "user_addresses"
|
|
307
|
+
},
|
|
308
|
+
"targetTable": {
|
|
309
|
+
"schema": "public",
|
|
310
|
+
"table": "user_preferences"
|
|
311
|
+
},
|
|
312
|
+
"column": "user_id",
|
|
313
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
"sourceTable": {
|
|
317
|
+
"schema": "public",
|
|
318
|
+
"table": "user_devices"
|
|
319
|
+
},
|
|
320
|
+
"targetTable": {
|
|
321
|
+
"schema": "public",
|
|
322
|
+
"table": "user_preferences"
|
|
323
|
+
},
|
|
324
|
+
"column": "user_id",
|
|
325
|
+
"reason": "Both tables expose user_id but no physical foreign key was found."
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
"sourceTable": {
|
|
329
|
+
"schema": "public",
|
|
330
|
+
"table": "users"
|
|
331
|
+
},
|
|
332
|
+
"targetTable": {
|
|
333
|
+
"schema": "public",
|
|
334
|
+
"table": "legacy_crm_notes"
|
|
335
|
+
},
|
|
336
|
+
"column": "client_id",
|
|
337
|
+
"reason": "Table exposes client_id which conceptually maps to the root entity."
|
|
338
|
+
},
|
|
339
|
+
{
|
|
340
|
+
"sourceTable": {
|
|
341
|
+
"schema": "public",
|
|
342
|
+
"table": "users"
|
|
343
|
+
},
|
|
344
|
+
"targetTable": {
|
|
345
|
+
"schema": "public",
|
|
346
|
+
"table": "marketing_campaign_clicks"
|
|
347
|
+
},
|
|
348
|
+
"column": "target_email",
|
|
349
|
+
"reason": "Table exposes target_email which conceptually maps to the root entity."
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
"sourceTable": {
|
|
353
|
+
"schema": "public",
|
|
354
|
+
"table": "users"
|
|
355
|
+
},
|
|
356
|
+
"targetTable": {
|
|
357
|
+
"schema": "public",
|
|
358
|
+
"table": "third_party_telemetry"
|
|
359
|
+
},
|
|
360
|
+
"column": "user_uuid",
|
|
361
|
+
"reason": "Table exposes user_uuid which conceptually maps to the root entity."
|
|
362
|
+
}
|
|
363
|
+
],
|
|
364
|
+
"nextSteps": [
|
|
365
|
+
"Review every PII column and potential logical link with the application owner.",
|
|
366
|
+
"Copy reviewed targets into compliance.worker.yml and complete legal_attestation.",
|
|
367
|
+
"Run compliance-worker check-integrity before allowing live worker boot.",
|
|
368
|
+
"Sign the reviewed manifest with compliance-worker sign after DPO approval."
|
|
369
|
+
]
|
|
370
|
+
}
|
package/report.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Compliance Introspector Report
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
- Root table: `public.users`
|
|
6
|
+
- Generated at: `2026-06-12T05:19:07.235Z`
|
|
7
|
+
- Schema hash: `ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c`
|
|
8
|
+
- DAG targets: 15
|
|
9
|
+
- Tables with PII: 8
|
|
10
|
+
- PII columns: 9
|
|
11
|
+
- High-confidence findings: 6
|
|
12
|
+
- Review-required findings: 3
|
|
13
|
+
- Potential logical links: 20
|
|
14
|
+
|
|
15
|
+
## PII Findings
|
|
16
|
+
|
|
17
|
+
| Table | Column | Type | Confidence | Metadata | Content | Signatures |
|
|
18
|
+
| --- | --- | --- | ---: | ---: | ---: | --- |
|
|
19
|
+
| `public.legacy_crm_notes` `agent_notes` `text` 0.950 0.000 1.000 email, indian_mobile |
|
|
20
|
+
| `public.marketing_campaign_clicks` `target_email` `character varying` 0.950 0.920 1.000 email |
|
|
21
|
+
| `public.users` `email` `character varying` 0.950 0.920 1.000 email |
|
|
22
|
+
| `public.users` `phone_number` `character varying` 0.920 0.920 1.000 indian_mobile |
|
|
23
|
+
| `public.support_tickets` `description` `text` 0.900 0.000 1.000 indian_mobile |
|
|
24
|
+
| `public.ticket_messages` `message_body` `text` 0.900 0.000 1.000 indian_mobile |
|
|
25
|
+
| `public.audit_logs` `ip_address` `inet` 0.820 0.820 1.000 ipv4 |
|
|
26
|
+
| `public.user_devices` `last_ip_address` `inet` 0.820 0.820 0.000 metadata |
|
|
27
|
+
| `public.user_addresses` `pincode` `character varying` 0.780 0.620 1.000 indian_pin_code |
|
|
28
|
+
|
|
29
|
+
## Potential Logical Links
|
|
30
|
+
|
|
31
|
+
- `public.users.customer_id` <-> `public.abandoned_carts.customer_id`: Table exposes customer_id which conceptually maps to the root entity.
|
|
32
|
+
- `public.users.actor_id` <-> `public.audit_logs.actor_id`: Table exposes actor_id which conceptually maps to the root entity.
|
|
33
|
+
- `public.kyc_documents.user_id` <-> `public.orders.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
34
|
+
- `public.kyc_documents.user_id` <-> `public.support_tickets.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
35
|
+
- `public.kyc_documents.user_id` <-> `public.user_addresses.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
36
|
+
- `public.kyc_documents.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
37
|
+
- `public.kyc_documents.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
38
|
+
- `public.orders.user_id` <-> `public.support_tickets.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
39
|
+
- `public.orders.user_id` <-> `public.user_addresses.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
40
|
+
- `public.orders.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
41
|
+
- `public.orders.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
42
|
+
- `public.support_tickets.user_id` <-> `public.user_addresses.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
43
|
+
- `public.support_tickets.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
44
|
+
- `public.support_tickets.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
45
|
+
- `public.user_addresses.user_id` <-> `public.user_devices.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
46
|
+
- `public.user_addresses.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
47
|
+
- `public.user_devices.user_id` <-> `public.user_preferences.user_id`: Both tables expose user_id but no physical foreign key was found.
|
|
48
|
+
- `public.users.client_id` <-> `public.legacy_crm_notes.client_id`: Table exposes client_id which conceptually maps to the root entity.
|
|
49
|
+
- `public.users.target_email` <-> `public.marketing_campaign_clicks.target_email`: Table exposes target_email which conceptually maps to the root entity.
|
|
50
|
+
- `public.users.user_uuid` <-> `public.third_party_telemetry.user_uuid`: Table exposes user_uuid which conceptually maps to the root entity.
|
|
51
|
+
|
|
52
|
+
## Next Steps
|
|
53
|
+
|
|
54
|
+
- Review every PII column and potential logical link with the application owner.
|
|
55
|
+
- Copy reviewed targets into compliance.worker.yml and complete legal_attestation.
|
|
56
|
+
- Run compliance-worker check-integrity before allowing live worker boot.
|
|
57
|
+
- Sign the reviewed manifest with compliance-worker sign after DPO approval.
|
|
@@ -137,6 +137,7 @@ const METADATA_PATTERNS: Array<{ pattern: RegExp; score: number }> = [
|
|
|
137
137
|
{ pattern: /(^|_)(driving_license|driving_licence|license_number|licence_number|dl_number|dl_no)($|_)/i, score: MEDIUM_METADATA_SCORE },
|
|
138
138
|
{ pattern: /(^|_)(address|street|postal_code|zip_code|pin_code|pincode)($|_)/i, score: WEAK_METADATA_SCORE },
|
|
139
139
|
{ pattern: /(^|_)(device_fingerprint|device_id|advertising_id|gaid|idfa)($|_)/i, score: WEAK_METADATA_SCORE },
|
|
140
|
+
{ pattern: /(^|_)(document_number|identity_number|id_number)($|_)/i, score: WEAK_METADATA_SCORE },
|
|
140
141
|
];
|
|
141
142
|
|
|
142
143
|
function qualifiedKey(table: QualifiedTable): string {
|
|
@@ -336,12 +337,23 @@ function classifyLeafDetailed(value: string, columnName: string = ""): ContentSi
|
|
|
336
337
|
const bytes = textEncoder.encode(value.trim());
|
|
337
338
|
try {
|
|
338
339
|
const normalized = textDecoder.decode(bytes).trim();
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
340
|
+
// Split into tokens and strip leading/trailing punctuation so regexes can match substrings
|
|
341
|
+
const tokens = normalized.split(/\s+/).map((t) => t.replace(/^[^\w\+]+|[^\w]+$/g, ""));
|
|
342
|
+
const candidates = Array.from(new Set([normalized, ...tokens])).filter((t) => t.length > 0);
|
|
343
|
+
|
|
344
|
+
const matches = new Set<ContentSignature>();
|
|
345
|
+
for (const candidate of candidates) {
|
|
346
|
+
for (const signature of CONTENT_SIGNATURES) {
|
|
347
|
+
if (
|
|
348
|
+
signatureHasMetadataSupport(signature, columnName) &&
|
|
349
|
+
signature.pattern.test(candidate) &&
|
|
350
|
+
(!signature.validate || signature.validate(candidate))
|
|
351
|
+
) {
|
|
352
|
+
matches.add(signature);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return Array.from(matches);
|
|
345
357
|
} finally {
|
|
346
358
|
bytes.fill(0);
|
|
347
359
|
}
|
|
@@ -236,7 +236,7 @@ export async function discoverPotentialLogicalLinks(
|
|
|
236
236
|
const byColumn = new Map<string, QualifiedTable[]>();
|
|
237
237
|
for (const row of rows) {
|
|
238
238
|
const normalized = row.column_name.toLowerCase();
|
|
239
|
-
if (!/^(?:user_id|account_id|customer_id|member_id|subject_id|.*_user_id)$/.test(normalized)) {
|
|
239
|
+
if (!/^(?:user_id|account_id|customer_id|client_id|actor_id|user_uuid|member_id|subject_id|.*_user_id|target_email|user_email)$/.test(normalized)) {
|
|
240
240
|
continue;
|
|
241
241
|
}
|
|
242
242
|
|
|
@@ -247,7 +247,25 @@ export async function discoverPotentialLogicalLinks(
|
|
|
247
247
|
|
|
248
248
|
const links: PotentialLogicalLink[] = [];
|
|
249
249
|
const emitted = new Set<string>();
|
|
250
|
+
|
|
250
251
|
for (const [column, tables] of byColumn.entries()) {
|
|
252
|
+
for (const table of tables) {
|
|
253
|
+
// Explicitly link any orphan table that has an identity-like column to the root table
|
|
254
|
+
if (table.schema === root.schema && table.table === root.table) {
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
const key = physicalLinkKey(root, table, column);
|
|
258
|
+
if (!physicalLinks.has(key) && !emitted.has(key)) {
|
|
259
|
+
emitted.add(key);
|
|
260
|
+
links.push({
|
|
261
|
+
sourceTable: root,
|
|
262
|
+
targetTable: table,
|
|
263
|
+
column,
|
|
264
|
+
reason: `Table exposes ${column} which conceptually maps to the root entity.`,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
251
269
|
if (tables.length < 2) {
|
|
252
270
|
continue;
|
|
253
271
|
}
|
|
@@ -38,14 +38,6 @@ export async function runIntrospector(options: RunIntrospectorOptions): Promise<
|
|
|
38
38
|
maxDepth,
|
|
39
39
|
});
|
|
40
40
|
|
|
41
|
-
const classifiedColumns = await classifyDagTargets({
|
|
42
|
-
sql: options.sql,
|
|
43
|
-
targets: dag,
|
|
44
|
-
samplePercent: options.samplePercent,
|
|
45
|
-
sampleLimit: options.sampleLimit,
|
|
46
|
-
threshold: options.threshold,
|
|
47
|
-
});
|
|
48
|
-
|
|
49
41
|
const [schemaHash, potentialLogicalLinks] = await Promise.all([
|
|
50
42
|
detectSchemaDrift(options.sql, root.schema),
|
|
51
43
|
discoverPotentialLogicalLinks(
|
|
@@ -58,7 +50,49 @@ export async function runIntrospector(options: RunIntrospectorOptions): Promise<
|
|
|
58
50
|
),
|
|
59
51
|
]);
|
|
60
52
|
|
|
61
|
-
const
|
|
53
|
+
const dagTableKeys = new Set(dag.map((t) => targetKey(t.table.schema, t.table.table)));
|
|
54
|
+
const logicalTargets: typeof dag = [];
|
|
55
|
+
|
|
56
|
+
for (const link of potentialLogicalLinks) {
|
|
57
|
+
const sourceKey = targetKey(link.sourceTable.schema, link.sourceTable.table);
|
|
58
|
+
const targetKeyStr = targetKey(link.targetTable.schema, link.targetTable.table);
|
|
59
|
+
|
|
60
|
+
if (dagTableKeys.has(sourceKey) && !dagTableKeys.has(targetKeyStr)) {
|
|
61
|
+
dagTableKeys.add(targetKeyStr);
|
|
62
|
+
logicalTargets.push({
|
|
63
|
+
table: link.targetTable,
|
|
64
|
+
parentTable: link.sourceTable,
|
|
65
|
+
constraintName: null,
|
|
66
|
+
childColumns: [link.column],
|
|
67
|
+
parentColumns: [link.column],
|
|
68
|
+
depth: maxDepth,
|
|
69
|
+
fkCondition: `LOGICAL_LINK (${link.column})`,
|
|
70
|
+
});
|
|
71
|
+
} else if (dagTableKeys.has(targetKeyStr) && !dagTableKeys.has(sourceKey)) {
|
|
72
|
+
dagTableKeys.add(sourceKey);
|
|
73
|
+
logicalTargets.push({
|
|
74
|
+
table: link.sourceTable,
|
|
75
|
+
parentTable: link.targetTable,
|
|
76
|
+
constraintName: null,
|
|
77
|
+
childColumns: [link.column],
|
|
78
|
+
parentColumns: [link.column],
|
|
79
|
+
depth: maxDepth,
|
|
80
|
+
fkCondition: `LOGICAL_LINK (${link.column})`,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const fullTargets = [...dag, ...logicalTargets];
|
|
86
|
+
|
|
87
|
+
const classifiedColumns = await classifyDagTargets({
|
|
88
|
+
sql: options.sql,
|
|
89
|
+
targets: fullTargets,
|
|
90
|
+
samplePercent: options.samplePercent,
|
|
91
|
+
sampleLimit: options.sampleLimit,
|
|
92
|
+
threshold: options.threshold,
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const targets: IntrospectorTargetDraft[] = fullTargets.map((target) => ({
|
|
62
96
|
table: target.table,
|
|
63
97
|
parentTable: target.parentTable,
|
|
64
98
|
fkCondition: target.fkCondition,
|