dpdp-erasure-cli 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # dpdp-erasure-cli
2
2
 
3
- [![npm version](https://badge.fury.io/js/dpdp-erasure-cli.svg)](https://badge.fury.io/js/dpdp-erasure-cli)
3
+ [![npm version](https://img.shields.io/npm/v/dpdp-erasure-cli?color=14b8a6&style=flat-square)](https://www.npmjs.com/package/dpdp-erasure-cli)
4
4
 
5
5
  **The DPDP Erasure Engine CLI** is an automated, AI-assisted privacy toolkit that helps you securely discover, map, and cryptographically shred PII (Personally Identifiable Information) in your database.
6
6
 
@@ -20,6 +20,19 @@ Manually deleting a user across dozens of microservice tables is dangerous and p
20
20
 
21
21
  ---
22
22
 
23
+ ## ⚠️ Introspector Limitations (100% Transparency)
24
+
25
+ While our Introspector is incredibly powerful at analyzing metadata, foreign keys, and block-sampling text to find common identifiers (like Emails, Phone Numbers, Aadhaar, PAN, SSN, Credit Cards), it is fundamentally a regex and heuristic engine—not a sentient AI.
26
+
27
+ **What we CANNOT do:**
28
+ 1. **Generic Column Names:** If your production database has a column named `info` or `data` and it happens to contain a user's *First Name* or *Last Name* embedded inside a generic string, our engine cannot confidently flag it as PII. We can only guess "Name" PII if the column is named descriptively (e.g., `full_name`, `first_name`, `last_name`).
29
+ 2. **Passwords, Tokens, and Secrets:** We cannot differentiate a random SHA-256 password hash or an API token from an ordinary ID string unless the column has a clear name like `password`, `secret`, `token`, or `api_key`.
30
+ 3. **Roles and Permissions:** Similarly, we cannot guess if an integer or string denotes an administrative permission unless the column gives us a hint (like `role_id` or `access_level`).
31
+
32
+ **The Solution:** The Introspector is designed to do 95% of the heavy lifting. **The remaining 5% requires a human DPO or Developer.** You must always review the generated `compliance.worker.yml` and manually add any deeply hidden sensitive columns before deploying.
33
+
34
+ ---
35
+
23
36
  ## 🚀 Installation
24
37
 
25
38
  This CLI relies on [Bun](https://bun.sh/) for native cryptographic bindings and high-performance execution.
@@ -1,6 +1,6 @@
1
1
  # AUTO-GENERATED BY INTROSPECTOR
2
2
  # REVIEW REQUIRED: DPO must validate every table, join condition, and PII column before production use.
3
- # Generated At: 2026-06-12T05:19:07.235Z
3
+ # Generated At: 2026-06-12T08:15:53.497Z
4
4
 
5
5
  legal_attestation:
6
6
  dpo_identifier: PENDING_REVIEW
@@ -13,6 +13,20 @@ legal_attestation:
13
13
  legal_disclaimer:
14
14
  text: "Auto-generated by Compliance Worker. The DPO/Developer is responsible for verifying all logical links and PII mappings."
15
15
 
16
+ # ===================================================================================
17
+ # HOW TO READ THIS MANIFEST:
18
+ # - 'targets': The list of tables the worker will delete/redact from.
19
+ # - 'parent': The table that owns this data. The worker deletes parent-first or child-first depending on the DB constraints.
20
+ # - 'join': The SQL condition used to link the child table to the parent table.
21
+ # - 'pii_columns': Columns identified as containing Personal Identifiable Information.
22
+ # - 'action': 'redact' (anonymizes the row but keeps it) or implicitly 'delete' (removes the row entirely).
23
+ #
24
+ # IMPORTANT:
25
+ # 1. Review all 'join' conditions. If the Introspector guessed a join for an orphaned table, verify it.
26
+ # 2. Review all 'pii_columns' to ensure no sensitive columns were missed.
27
+ # 3. Replace 'PENDING_REVIEW' in legal_attestation once verified.
28
+ # ===================================================================================
29
+
16
30
  rules:
17
31
  - id: dpdp_standard
18
32
  root_table: public.users
@@ -98,14 +112,14 @@ rules:
98
112
  message_body: HMAC
99
113
  - table: public.abandoned_carts
100
114
  parent: public.users
101
- join: "LOGICAL_LINK (customer_id)"
102
- parent_columns: [customer_id]
115
+ join: "public.users.id = public.abandoned_carts.customer_id"
116
+ parent_columns: [id]
103
117
  child_columns: [customer_id]
104
118
  pii_columns: []
105
119
  - table: public.audit_logs
106
120
  parent: public.users
107
- join: "LOGICAL_LINK (actor_id)"
108
- parent_columns: [actor_id]
121
+ join: "public.users.id = public.audit_logs.actor_id"
122
+ parent_columns: [id]
109
123
  child_columns: [actor_id]
110
124
  # Introspector Confidence: 0.820 (ipv4)
111
125
  pii_columns: [ip_address]
@@ -115,8 +129,8 @@ rules:
115
129
  ip_address: HMAC
116
130
  - table: public.legacy_crm_notes
117
131
  parent: public.users
118
- join: "LOGICAL_LINK (client_id)"
119
- parent_columns: [client_id]
132
+ join: "public.users.id = public.legacy_crm_notes.client_id"
133
+ parent_columns: [id]
120
134
  child_columns: [client_id]
121
135
  # Introspector Confidence: 0.950 (email, indian_mobile)
122
136
  pii_columns: [agent_notes]
@@ -126,8 +140,8 @@ rules:
126
140
  agent_notes: HMAC
127
141
  - table: public.marketing_campaign_clicks
128
142
  parent: public.users
129
- join: "LOGICAL_LINK (target_email)"
130
- parent_columns: [target_email]
143
+ join: "public.users.email = public.marketing_campaign_clicks.target_email"
144
+ parent_columns: [email]
131
145
  child_columns: [target_email]
132
146
  # Introspector Confidence: 0.950 (email)
133
147
  pii_columns: [target_email]
@@ -137,8 +151,8 @@ rules:
137
151
  target_email: HMAC
138
152
  - table: public.third_party_telemetry
139
153
  parent: public.users
140
- join: "LOGICAL_LINK (user_uuid)"
141
- parent_columns: [user_uuid]
154
+ join: "public.users.id = public.third_party_telemetry.user_uuid"
155
+ parent_columns: [id]
142
156
  child_columns: [user_uuid]
143
157
  pii_columns: []
144
158
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dpdp-erasure-cli",
3
- "version": "1.1.0",
3
+ "version": "1.1.1",
4
4
  "license": "Apache-2.0",
5
5
  "keywords": [
6
6
  "dpdp",
@@ -54,4 +54,4 @@
54
54
  "postgres": "^3.4.9",
55
55
  "zod": "^4.4.2"
56
56
  }
57
- }
57
+ }
package/report.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "summary": {
3
3
  "rootTable": "public.users",
4
- "generatedAt": "2026-06-12T05:19:07.235Z",
4
+ "generatedAt": "2026-06-12T08:15:53.497Z",
5
5
  "schemaHash": "ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c",
6
6
  "targetCount": 15,
7
7
  "tablesWithPii": 8,
package/report.md CHANGED
@@ -3,7 +3,7 @@
3
3
  ## Summary
4
4
 
5
5
  - Root table: `public.users`
6
- - Generated at: `2026-06-12T05:19:07.235Z`
6
+ - Generated at: `2026-06-12T08:15:53.497Z`
7
7
  - Schema hash: `ea9e816d30fcee6bd4f322f1fb769e853c2e7ee5b19263aaa54f5ef80189212c`
8
8
  - DAG targets: 15
9
9
  - Tables with PII: 8
@@ -10,18 +10,18 @@ import boxen from "boxen";
10
10
  export const UI = {
11
11
  header: (title: string) => {
12
12
  console.log(
13
- boxen(pc.bold(pc.blue(`COMPLIANCE WORKER — ${title.toUpperCase()}`)), {
13
+ boxen(pc.bold(pc.cyan(`COMPLIANCE WORKER — ${title.toUpperCase()}`)), {
14
14
  padding: { top: 0, bottom: 0, left: 2, right: 2 },
15
15
  margin: { top: 1, bottom: 1 },
16
16
  borderStyle: "round",
17
- borderColor: "blue",
17
+ borderColor: "cyan",
18
18
  })
19
19
  );
20
20
  },
21
21
 
22
22
  divider: () => console.log(pc.gray("─".repeat(process.stdout.columns || 60))),
23
23
 
24
- spinner: (text: string): Ora => ora({ text, color: "blue" }).start(),
24
+ spinner: (text: string): Ora => ora({ text, color: "cyan" }).start(),
25
25
 
26
26
  success: (msg: string) => console.log(`\n${pc.green("✔")} ${pc.bold(msg)}`),
27
27
  error: (msg: string) => console.error(`\n${pc.red("✖")} ${pc.bold(msg)}`),
@@ -121,6 +121,10 @@ const CONTENT_SIGNATURES: ContentSignature[] = [
121
121
  ];
122
122
 
123
123
  const METADATA_PATTERNS: Array<{ pattern: RegExp; score: number }> = [
124
+ { pattern: /(^|_)(full_name|first_name|last_name|middle_name|surname|given_name|display_name)($|_)/i, score: STRONG_METADATA_SCORE },
125
+ { pattern: /(^|_)name($|_)/i, score: MEDIUM_METADATA_SCORE },
126
+ { pattern: /(^|_)(password|passwd|pwd|secret|token|api_key|access_token|refresh_token|auth_token|hash|salt)($|_)/i, score: STRONG_METADATA_SCORE },
127
+ { pattern: /(^|_)(role|roles|permission|permissions|group|groups|acl|access_level)($|_)/i, score: WEAK_METADATA_SCORE },
124
128
  { pattern: /(^|_)(email|e_mail|email_address|mail_address|contact_email)($|_)/i, score: STRONG_METADATA_SCORE },
125
129
  { pattern: /(^|_)(phone|mobile|msisdn|telephone|contact_number|whatsapp)(_number|_no)?($|_)/i, score: STRONG_METADATA_SCORE },
126
130
  { pattern: /(^|_)(aadhaar|aadhar|uidai)(_number|_no|_id)?($|_)/i, score: STRONG_METADATA_SCORE },
@@ -53,10 +53,10 @@ describe("Introspector PII classifier", () => {
53
53
  expect(classifyLeaf("560001", "postal_code")).toContain("indian_pin_code");
54
54
  });
55
55
 
56
- it("does not infer personal names without a dedicated NER model", () => {
57
- expect(metadataScore("full_name")).toBe(0);
58
- expect(metadataScore("first_name")).toBe(0);
59
- expect(metadataScore("customer_name")).toBe(0);
56
+ it("infers personal names based on standard developer metadata patterns", () => {
57
+ expect(metadataScore("full_name")).toBe(0.92);
58
+ expect(metadataScore("first_name")).toBe(0.92);
59
+ expect(metadataScore("customer_name")).toBe(0.82);
60
60
  expect(classifyLeaf("Priya Sharma")).toEqual([]);
61
61
  });
62
62
  });
@@ -201,6 +201,7 @@ describe("Offline Introspector", () => {
201
201
  expect(users?.piiColumns.map((column) => column.column).sort()).toEqual([
202
202
  "card_number",
203
203
  "email",
204
+ "full_name",
204
205
  "gstin",
205
206
  "phone",
206
207
  "upi_id",
@@ -215,7 +216,6 @@ describe("Offline Introspector", () => {
215
216
  expect(yaml).toContain(`root_table: ${schema}.users`);
216
217
  expect(yaml).toContain(`table: ${schema}.profiles`);
217
218
  expect(yaml).toContain("pii_columns: [pan, aadhaar_payload, nested_payload]");
218
- expect(yaml).not.toContain("full_name");
219
219
  expect(yaml).toContain("schema_hash:");
220
220
  expect(yaml).toContain("generated_by: compliance-introspector-v1");
221
221
  expect(yaml).toContain("legal_disclaimer:");