dpdp-erasure-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/.env.example +55 -0
  2. package/Dockerfile +33 -0
  3. package/compliance.worker.yaml +64 -0
  4. package/package.json +41 -0
  5. package/src/constants/index.ts +1 -0
  6. package/src/errors/fail.ts +110 -0
  7. package/src/errors/index.ts +4 -0
  8. package/src/errors/inferer.ts +166 -0
  9. package/src/errors/registry.ts +122 -0
  10. package/src/errors/types.ts +65 -0
  11. package/src/errors/worker.ts +161 -0
  12. package/src/index.ts +328 -0
  13. package/src/lib/crypto/digest.ts +22 -0
  14. package/src/lib/crypto/encoding.ts +78 -0
  15. package/src/lib/crypto/index.ts +2 -0
  16. package/src/lib/index.ts +1 -0
  17. package/src/modules/bootstrap/index.ts +2 -0
  18. package/src/modules/bootstrap/integrity.ts +38 -0
  19. package/src/modules/bootstrap/preflight.ts +296 -0
  20. package/src/modules/cli/check-integrity.ts +48 -0
  21. package/src/modules/cli/dry-run.ts +90 -0
  22. package/src/modules/cli/graph.ts +87 -0
  23. package/src/modules/cli/index.ts +184 -0
  24. package/src/modules/cli/init.ts +115 -0
  25. package/src/modules/cli/inspect.ts +86 -0
  26. package/src/modules/cli/introspector.ts +117 -0
  27. package/src/modules/cli/keygen.ts +38 -0
  28. package/src/modules/cli/scan.ts +126 -0
  29. package/src/modules/cli/sign.ts +50 -0
  30. package/src/modules/cli/ui.ts +61 -0
  31. package/src/modules/cli/verify-schema.ts +31 -0
  32. package/src/modules/cli/verify.ts +85 -0
  33. package/src/modules/config/compatibility.ts +271 -0
  34. package/src/modules/config/index.ts +4 -0
  35. package/src/modules/config/reader.ts +149 -0
  36. package/src/modules/config/signature.ts +69 -0
  37. package/src/modules/config/validation.ts +658 -0
  38. package/src/modules/crypto/aes.ts +158 -0
  39. package/src/modules/crypto/envelope.ts +48 -0
  40. package/src/modules/crypto/hmac.ts +60 -0
  41. package/src/modules/crypto/index.ts +3 -0
  42. package/src/modules/db/drift.ts +36 -0
  43. package/src/modules/db/graph.ts +203 -0
  44. package/src/modules/db/index.ts +4 -0
  45. package/src/modules/db/migrations.ts +254 -0
  46. package/src/modules/db/sql-debug.ts +61 -0
  47. package/src/modules/engine/blob/index.ts +3 -0
  48. package/src/modules/engine/blob/s3.ts +455 -0
  49. package/src/modules/engine/blob/store.ts +236 -0
  50. package/src/modules/engine/blob/types.ts +44 -0
  51. package/src/modules/engine/helpers/identity.ts +47 -0
  52. package/src/modules/engine/helpers/index.ts +4 -0
  53. package/src/modules/engine/helpers/outbox.ts +118 -0
  54. package/src/modules/engine/helpers/runtime.ts +115 -0
  55. package/src/modules/engine/helpers/types.ts +61 -0
  56. package/src/modules/engine/index.ts +6 -0
  57. package/src/modules/engine/notifier/config.ts +147 -0
  58. package/src/modules/engine/notifier/dispatcher.ts +300 -0
  59. package/src/modules/engine/notifier/index.ts +3 -0
  60. package/src/modules/engine/notifier/payload.ts +51 -0
  61. package/src/modules/engine/notifier/reservation.ts +153 -0
  62. package/src/modules/engine/notifier/types.ts +38 -0
  63. package/src/modules/engine/shredder.ts +254 -0
  64. package/src/modules/engine/types.ts +146 -0
  65. package/src/modules/engine/vault/compiled-targets.ts +562 -0
  66. package/src/modules/engine/vault/context.ts +254 -0
  67. package/src/modules/engine/vault/dry-run.ts +94 -0
  68. package/src/modules/engine/vault/execution.ts +485 -0
  69. package/src/modules/engine/vault/index.ts +3 -0
  70. package/src/modules/engine/vault/purge.ts +82 -0
  71. package/src/modules/engine/vault/retention.ts +124 -0
  72. package/src/modules/engine/vault/satellite-mutation.ts +193 -0
  73. package/src/modules/engine/vault/satellite.ts +103 -0
  74. package/src/modules/engine/vault/shadow.ts +36 -0
  75. package/src/modules/engine/vault/static-plan.ts +116 -0
  76. package/src/modules/engine/vault/store.ts +34 -0
  77. package/src/modules/engine/vault/vault.ts +84 -0
  78. package/src/modules/introspector/classifier.ts +502 -0
  79. package/src/modules/introspector/dag.ts +276 -0
  80. package/src/modules/introspector/index.ts +7 -0
  81. package/src/modules/introspector/naming.ts +75 -0
  82. package/src/modules/introspector/report.ts +153 -0
  83. package/src/modules/introspector/run.ts +123 -0
  84. package/src/modules/introspector/s3-sampler.ts +227 -0
  85. package/src/modules/introspector/types.ts +131 -0
  86. package/src/modules/introspector/yaml.ts +101 -0
  87. package/src/modules/network/api/control-plane.ts +275 -0
  88. package/src/modules/network/api/index.ts +1 -0
  89. package/src/modules/network/api/validation.ts +71 -0
  90. package/src/modules/network/index.ts +4 -0
  91. package/src/modules/network/object-store/aws/client.ts +444 -0
  92. package/src/modules/network/object-store/aws/credentials.ts +271 -0
  93. package/src/modules/network/object-store/aws/index.ts +2 -0
  94. package/src/modules/network/object-store/aws/sigv4.ts +190 -0
  95. package/src/modules/network/object-store/aws/type.ts +6 -0
  96. package/src/modules/network/object-store/index.ts +1 -0
  97. package/src/modules/network/outbox/dispatcher.ts +183 -0
  98. package/src/modules/network/outbox/index.ts +3 -0
  99. package/src/modules/network/outbox/process.ts +133 -0
  100. package/src/modules/network/outbox/shared.ts +56 -0
  101. package/src/modules/network/outbox/store.ts +346 -0
  102. package/src/modules/network/outbox/types.ts +54 -0
  103. package/src/modules/network/request-signing.ts +61 -0
  104. package/src/modules/worker/index.ts +2 -0
  105. package/src/modules/worker/tasks.ts +58 -0
  106. package/src/modules/worker/types.ts +89 -0
  107. package/src/modules/worker/worker.ts +243 -0
  108. package/src/secrets/index.ts +4 -0
  109. package/src/secrets/kms/index.ts +2 -0
  110. package/src/secrets/kms/signature.ts +82 -0
  111. package/src/secrets/kms/validation.ts +64 -0
  112. package/src/secrets/reader.ts +42 -0
  113. package/src/secrets/repository/crypto.ts +89 -0
  114. package/src/secrets/repository/index.ts +2 -0
  115. package/src/secrets/repository/methods.ts +37 -0
  116. package/src/secrets/resolvers.ts +247 -0
  117. package/src/secrets/signature.ts +78 -0
  118. package/src/types/index.ts +1 -0
  119. package/src/types/types.ts +23 -0
  120. package/src/utils/identifiers.ts +48 -0
  121. package/src/utils/index.ts +3 -0
  122. package/src/utils/json.ts +35 -0
  123. package/src/utils/logger.ts +161 -0
  124. package/src/validation/zod.ts +70 -0
  125. package/tests/adversarial.test.ts +464 -0
  126. package/tests/blob-s3.test.ts +216 -0
  127. package/tests/config.test.ts +395 -0
  128. package/tests/control-plane-client.test.ts +108 -0
  129. package/tests/crypto.test.ts +106 -0
  130. package/tests/errors.test.ts +69 -0
  131. package/tests/fetch-dispatcher.test.ts +213 -0
  132. package/tests/graph.test.ts +84 -0
  133. package/tests/helpers/index.ts +101 -0
  134. package/tests/index-preflight.test.ts +168 -0
  135. package/tests/introspector-classifier.test.ts +62 -0
  136. package/tests/introspector-report.test.ts +85 -0
  137. package/tests/introspector.test.ts +394 -0
  138. package/tests/kms.test.ts +124 -0
  139. package/tests/logger.test.ts +61 -0
  140. package/tests/notifier.test.ts +303 -0
  141. package/tests/outbox.test.ts +478 -0
  142. package/tests/purge-policy.test.ts +124 -0
  143. package/tests/retention.test.ts +103 -0
  144. package/tests/s3-client.test.ts +110 -0
  145. package/tests/satellite.test.ts +119 -0
  146. package/tests/schema-compatibility.test.ts +237 -0
  147. package/tests/schema-integrity.test.ts +64 -0
  148. package/tests/shredder.test.ts +163 -0
  149. package/tests/vault.compiled-targets.test.ts +243 -0
  150. package/tests/vault.replica.test.ts +59 -0
  151. package/tests/vault.test.ts +279 -0
  152. package/tests/worker.retry.test.ts +291 -0
  153. package/tests/worker.test.ts +200 -0
  154. package/tsconfig.json +19 -0
  155. package/vitest.config.ts +13 -0
@@ -0,0 +1,276 @@
1
+ import { MAX_DEPTH } from "@/constants";
2
+ import type { CompileDagOptions, DagTarget, PotentialLogicalLink, QualifiedTable } from "./types";
3
+ import { fail } from "@/errors";
4
+ import { formatJoinCondition, parseQualifiedTable } from "./naming";
5
+ import { parseBuildCommand } from "typescript";
6
+
7
+ interface DagRow {
8
+ constraint_schema: string;
9
+ constraint_name: string;
10
+ child_schema: string;
11
+ child_table: string;
12
+ parent_schema: string;
13
+ parent_table: string;
14
+ child_columns: string[];
15
+ parent_columns: string[];
16
+ depth: number;
17
+ }
18
+
19
+ interface ColumnRow {
20
+ table_schema: string;
21
+ table_name: string;
22
+ column_name: string;
23
+ }
24
+
25
+ function toQualifiedTable(schema: string, table: string): QualifiedTable {
26
+ return { schema, table };
27
+ }
28
+
29
+ function rootTarget(root: QualifiedTable): DagTarget {
30
+ return {
31
+ table: root,
32
+ parentTable: null,
33
+ constraintName: null,
34
+ childColumns: [],
35
+ parentColumns: [],
36
+ depth: 0,
37
+ fkCondition: "ROOT",
38
+ };
39
+ }
40
+
41
+ /**
42
+ * Compiles foreign-key dependencies from `information_schema` into a static DAG target list.
43
+ *
44
+ * The query is read-only, bounded by `maxDepth`, and scoped to the root table schema so
45
+ * unrelated tenant/test schemas cannot slow or block compilation. Composite foreign keys are
46
+ * preserved by aligning `key_column_usage.position_in_unique_constraint` with the referenced key columns.
47
+ *
48
+ * @param options - Database handle, root table, default schema, and recursion breaker.
49
+ * @returns Root target plus dependent satellite tables with explicit join predicates.
50
+ * @throws {WorkerError} If the depth breaker is invalid.
51
+ */
52
+ export async function compileStaticDag(options: CompileDagOptions): Promise<DagTarget[]> {
53
+ const maxDepth = options.maxDepth ?? MAX_DEPTH;
54
+ if (!Number.isInteger(maxDepth) || maxDepth < 1 || maxDepth > MAX_DEPTH) {
55
+ fail({
56
+ code: "INTROSPECTOR_DEPTH_INVALID",
57
+ title: "Invalid introspector depth",
58
+ detail: "Static DAG maxDepth must be an integer between 1 and 32.",
59
+ category: "validation",
60
+ retryable: false,
61
+ fatal: true,
62
+ context: { maxDepth }
63
+ });
64
+ }
65
+
66
+ const root = parseQualifiedTable(options.rootTable, options.defaultSchema);
67
+ const rows = await options.sql<DagRow[]>`
68
+ WITH RECURSIVE fk_columns AS (
69
+ SELECT
70
+ tc.constraint_schema,
71
+ tc.constraint_name,
72
+ kcu.table_schema AS child_schema,
73
+ kcu.table_name AS child_table,
74
+ pk.table_schema AS parent_schema,
75
+ pk.table_name AS parent_table,
76
+ kcu.column_name AS child_column,
77
+ pk.column_name AS parent_column,
78
+ kcu.ordinal_position
79
+ FROM information_schema.table_constraints AS tc
80
+ JOIN information_schema.key_column_usage AS kcu
81
+ ON kcu.constraint_schema = tc.constraint_schema
82
+ AND kcu.constraint_name = tc.constraint_name
83
+ AND kcu.table_schema = tc.table_schema
84
+ AND kcu.table_name = tc.table_name
85
+ JOIN information_schema.referential_constraints AS rc
86
+ ON rc.constraint_schema = tc.constraint_schema
87
+ AND rc.constraint_name = tc.constraint_name
88
+ JOIN information_schema.key_column_usage AS pk
89
+ ON pk.constraint_schema = rc.unique_constraint_schema
90
+ AND pk.constraint_name = rc.unique_constraint_name
91
+ AND pk.ordinal_position = kcu.position_in_unique_constraint
92
+ JOIN information_schema.constraint_column_usage AS ccu
93
+ ON ccu.constraint_schema = tc.constraint_schema
94
+ AND ccu.constraint_name = tc.constraint_name
95
+ WHERE tc.constraint_type = 'FOREIGN KEY'
96
+ AND kcu.table_schema = ${root.schema}
97
+ AND ccu.table_schema = pk.table_schema
98
+ AND ccu.table_name = pk.table_name
99
+ ),
100
+ fk_edges AS (
101
+ SELECT
102
+ constraint_schema,
103
+ constraint_name,
104
+ child_schema,
105
+ child_table,
106
+ parent_schema,
107
+ parent_table,
108
+ array_agg(child_column ORDER BY ordinal_position) AS child_columns,
109
+ array_agg(parent_column ORDER BY ordinal_position) AS parent_columns
110
+ FROM fk_columns
111
+ GROUP BY
112
+ constraint_schema,
113
+ constraint_name,
114
+ child_schema,
115
+ child_table,
116
+ parent_schema,
117
+ parent_table
118
+ ),
119
+ graph AS (
120
+ SELECT
121
+ constraint_schema,
122
+ constraint_name,
123
+ child_schema,
124
+ child_table,
125
+ parent_schema,
126
+ parent_table,
127
+ child_columns,
128
+ parent_columns,
129
+ 1 AS depth,
130
+ ARRAY[parent_schema || '.' || parent_table, child_schema || '.' || child_table] AS visited
131
+ FROM fk_edges
132
+ WHERE parent_schema = ${root.schema}
133
+ AND parent_table = ${root.table}
134
+
135
+ UNION ALL
136
+
137
+ SELECT
138
+ edge.constraint_schema,
139
+ edge.constraint_name,
140
+ edge.child_schema,
141
+ edge.child_table,
142
+ edge.parent_schema,
143
+ edge.parent_table,
144
+ edge.child_columns,
145
+ edge.parent_columns,
146
+ graph.depth + 1 AS depth,
147
+ graph.visited || (edge.child_schema || '.' || edge.child_table)
148
+ FROM fk_edges AS edge
149
+ JOIN graph
150
+ ON edge.parent_schema = graph.child_schema
151
+ AND edge.parent_table = graph.child_table
152
+ WHERE graph.depth < ${maxDepth}
153
+ AND NOT (edge.child_schema || '.' || edge.child_table) = ANY(graph.visited)
154
+ )
155
+ SELECT
156
+ constraint_schema,
157
+ constraint_name,
158
+ child_schema,
159
+ child_table,
160
+ parent_schema,
161
+ parent_table,
162
+ child_columns,
163
+ parent_columns,
164
+ depth
165
+ FROM graph
166
+ ORDER BY depth ASC, child_schema ASC, child_table ASC, constraint_name ASC
167
+ `;
168
+
169
+ const targets = new Map<string, DagTarget>();
170
+ targets.set(`${root.schema}.${root.table}`, rootTarget(root));
171
+
172
+ for (const row of rows) {
173
+ const table = toQualifiedTable(row.child_schema, row.child_table);
174
+ const parentTable = toQualifiedTable(row.parent_schema, row.parent_table);
175
+ const key = `${table.schema}.${table.table}:${row.constraint_schema}.${row.constraint_name}`;
176
+ targets.set(key, {
177
+ table,
178
+ parentTable,
179
+ constraintName: row.constraint_name,
180
+ childColumns: row.child_columns,
181
+ parentColumns: row.parent_columns,
182
+ depth: row.depth,
183
+ fkCondition: formatJoinCondition(parentTable, row.parent_columns, table, row.child_columns),
184
+ });
185
+ }
186
+
187
+ return Array.from(targets.values()).sort((left, right) => {
188
+ const byDepth = left.depth - right.depth;
189
+ if (byDepth !== 0) {
190
+ return byDepth;
191
+ }
192
+ return `${left.table.schema}.${left.table.table}`.localeCompare(`${right.table.schema}.${right.table.table}`);
193
+ });
194
+ }
195
+
196
+ function physicalLinkKey(left: QualifiedTable, right: QualifiedTable, column: string): string {
197
+ const [first, second] = [`${left.schema}.${left.table}`, `${right.schema}.${right.table}`].sort();
198
+ return `${first}|${second}|${column}`;
199
+ }
200
+
201
+ /**
202
+ * Finds likely unmodeled relationships by intersecting high-signal identifier column names.
203
+ *
204
+ * This pass is metadata-only. It does not mutate state and does not prove a relationship exists;
205
+ * it surfaces tables that commonly act as ORM-managed or microservice-managed satellites without
206
+ * physical foreign keys.
207
+ *
208
+ * @param options - Database handle, root table, and default schema.
209
+ * @param physicalDag - FK DAG used to suppress already-modeled relationships.
210
+ * @returns Potential logical links for human review in the generated YAML.
211
+ */
212
+ export async function discoverPotentialLogicalLinks(
213
+ options: Pick<CompileDagOptions, "sql" | "rootTable" | "defaultSchema">,
214
+ physicalDag: readonly DagTarget[],
215
+ ): Promise<PotentialLogicalLink[]> {
216
+ const root = parseQualifiedTable(options.rootTable, options.defaultSchema);
217
+ const rows = await options.sql<ColumnRow[]>`
218
+ SELECT table_schema, table_name, column_name
219
+ FROM information_schema.columns
220
+ WHERE table_schema = ${root.schema}
221
+ AND table_schema NOT IN ('pg_catalog', 'information_schema')
222
+ ORDER BY table_name ASC, ordinal_position ASC
223
+ `;
224
+
225
+ const physicalLinks = new Set<string>();
226
+ for (const target of physicalDag) {
227
+ if (!target.parentTable) {
228
+ continue;
229
+ }
230
+
231
+ for (const column of [...target.childColumns, ...target.parentColumns]) {
232
+ physicalLinks.add(physicalLinkKey(target.parentTable, target.table, column))
233
+ }
234
+ }
235
+
236
+ const byColumn = new Map<string, QualifiedTable[]>();
237
+ for (const row of rows) {
238
+ const normalized = row.column_name.toLowerCase();
239
+ if (!/^(?:user_id|account_id|customer_id|member_id|subject_id|.*_user_id)$/.test(normalized)) {
240
+ continue;
241
+ }
242
+
243
+ const existing = byColumn.get(row.column_name) ?? [];
244
+ existing.push(toQualifiedTable(row.table_schema, row.table_name));
245
+ byColumn.set(row.column_name, existing);
246
+ }
247
+
248
+ const links: PotentialLogicalLink[] = [];
249
+ const emitted = new Set<string>();
250
+ for (const [column, tables] of byColumn.entries()) {
251
+ if (tables.length < 2) {
252
+ continue;
253
+ }
254
+
255
+ for (let leftIndex = 0; leftIndex < tables.length; leftIndex += 1) {
256
+ for (let rightIndex = leftIndex + 1; rightIndex < tables.length; rightIndex += 1) {
257
+ const left = tables[leftIndex]!;
258
+ const right = tables[rightIndex]!;
259
+ const key = physicalLinkKey(left, right, column);
260
+ if (physicalLinks.has(key) || emitted.has(key)) {
261
+ continue;
262
+ }
263
+
264
+ emitted.add(key);
265
+ links.push({
266
+ sourceTable: left,
267
+ targetTable: right,
268
+ column,
269
+ reason: `Both tables expose ${column} but no physical foreign key was found.`,
270
+ });
271
+ }
272
+ }
273
+ }
274
+
275
+ return links;
276
+ }
@@ -0,0 +1,7 @@
1
+ export * from "./run";
2
+ export * from "./classifier";
3
+ export * from "./report";
4
+ export * from "./types";
5
+ export * from "./naming";
6
+ export * from "./dag";
7
+ export * from "./s3-sampler";
@@ -0,0 +1,75 @@
1
+ import { assertIdentifier } from "@/utils";
2
+ import type { QualifiedTable } from "./types";
3
+
4
+ /**
5
+ * Parses a CLI table reference into validated schema and table identifiers.
6
+ *
7
+ * @param value - `table` or `schema.table` reference.
8
+ * @param defaultSchema - Schema used when `value` omits one.
9
+ * @returns Validated qualified table reference.
10
+ */
11
+ export function parseQualifiedTable(value: string, defaultSchema: string = "public"): QualifiedTable {
12
+ const parts = value.split(".");
13
+ if (parts.length > 2 || parts.some((part) => part.trim() == "")) {
14
+ assertIdentifier(value, "qualified table reference");
15
+ }
16
+
17
+ if (parts.length === 1) {
18
+ return {
19
+ schema: assertIdentifier(defaultSchema, "default schema"),
20
+ table: assertIdentifier(parts[0]!, "root table"),
21
+ }
22
+ }
23
+
24
+ return {
25
+ schema: assertIdentifier(parts[0]!, "root schema"),
26
+ table: assertIdentifier(parts[1]!, "root schema"),
27
+ };
28
+ }
29
+
30
+ /**
31
+ * Serializes a qualified table in audit-friendly `schema.table` form.
32
+ *
33
+ * @param table - Qualified table reference.
34
+ * @returns Dot-qualified table name.
35
+ */
36
+ export function formatQualifiedTable(table: QualifiedTable): string {
37
+ return `${table.schema}.${table.table}`;
38
+ }
39
+
40
+ /**
41
+ * Serializes one join predicate without quoting so the YAML remains readable for DPO review.
42
+ *
43
+ * @param parent - Referenced parent table.
44
+ * @param parentColumns - Referenced parent columns.
45
+ * @param child - Dependent child table.
46
+ * @param childColumns - Foreign key columns on the child.
47
+ * @returns Stable join condition string.
48
+ */
49
+ export function formatJoinCondition(
50
+ parent: QualifiedTable,
51
+ parentColumns: string[],
52
+ child: QualifiedTable,
53
+ childColumns: string[]
54
+ ): string {
55
+ return parentColumns
56
+ .map((parentColumn, index) => {
57
+ const childColumn = childColumns[index] ?? childColumns[0] ?? "UNKNOWN";
58
+ return `${formatQualifiedTable(parent)}.${parentColumn} = ${formatQualifiedTable(child)}.${childColumn}`;
59
+ })
60
+ .join(" AND ");
61
+ }
62
+
63
+ /**
64
+ * Quotes a YAML scalar only when needed.
65
+ *
66
+ * @param value - Scalar value.
67
+ * @returns YAML-safe scalar representation.
68
+ */
69
+ export function yamlScalar(value: string): string {
70
+ if (/^[A-Za-z0-9_.:-]+$/.test(value)) {
71
+ return value;
72
+ }
73
+
74
+ return JSON.stringify(value);
75
+ }
@@ -0,0 +1,153 @@
1
+ import { formatQualifiedTable } from "./naming";
2
+ import type {
3
+ IntrospectorDraft,
4
+ IntrospectorReport,
5
+ IntrospectorReportFinding,
6
+ IntrospectorReportSummary,
7
+ } from "./types";
8
+
9
+ const HIGH_CONFIDENCE_THRESHOLD = 0.9;
10
+
11
+ function formatScore(value: number): string {
12
+ return value.toFixed(3);
13
+ }
14
+
15
+ function findingKey(finding: IntrospectorReportFinding): string {
16
+ return `${finding.table}.${finding.column}`;
17
+ }
18
+
19
+ function sortFindings(
20
+ left: IntrospectorReportFinding,
21
+ right: IntrospectorReportFinding
22
+ ): number {
23
+ const byConfidence = right.confidence - left.confidence;
24
+ if (byConfidence !== 0) {
25
+ return byConfidence;
26
+ }
27
+
28
+ return findingKey(left).localeCompare(findingKey(right));
29
+ }
30
+
31
+ /**
32
+ * Builds a stable developer review report from an introspector draft.
33
+ *
34
+ * @param draft - Static DAG, PII taxonomy, and schema metadata produced by the introspector.
35
+ * @returns A report object suitable for CLI output, JSON export, or Markdown review artifacts.
36
+ */
37
+ export function buildIntrospectorReport(draft: IntrospectorDraft): IntrospectorReport {
38
+ const findings = draft.targets.flatMap((target) =>
39
+ target.piiColumns.map((column): IntrospectorReportFinding => ({
40
+ table: formatQualifiedTable(target.table),
41
+ column: column.column,
42
+ dataType: column.dataType,
43
+ confidence: column.confidence,
44
+ metadataScore: column.metadataScore,
45
+ contentMatchRatio: column.contentMatchRatio,
46
+ sampleSize: column.sampleSize,
47
+ matchedSignatures: column.matchedSignatures,
48
+ }))
49
+ ).sort(sortFindings);
50
+
51
+ const tablesWithPii = new Set(findings.map((finding) => finding.table));
52
+ const summary: IntrospectorReportSummary = {
53
+ rootTable: formatQualifiedTable(draft.root),
54
+ generatedAt: draft.generatedAt,
55
+ schemaHash: draft.schemaHash,
56
+ targetCount: draft.targets.length,
57
+ tablesWithPii: tablesWithPii.size,
58
+ piiColumnCount: findings.length,
59
+ highConfidenceCount: findings.filter((finding) => finding.confidence >= HIGH_CONFIDENCE_THRESHOLD).length,
60
+ reviewRequiredCount: findings.filter((finding) => finding.confidence < HIGH_CONFIDENCE_THRESHOLD).length,
61
+ potentialLogicalLinkCount: draft.potentialLogicalLinks.length,
62
+ };
63
+
64
+ return {
65
+ summary,
66
+ findings,
67
+ potentialLogicalLinks: draft.potentialLogicalLinks,
68
+ nextSteps: [
69
+ "Review every PII column and potential logical link with the application owner.",
70
+ "Copy reviewed targets into compliance.worker.yml and complete legal_attestation.",
71
+ "Run compliance-worker check-integrity before allowing live worker boot.",
72
+ "Sign the reviewed manifest with compliance-worker sign after DPO approval.",
73
+ ],
74
+ };
75
+ }
76
+
77
+ /**
78
+ * Renders a Markdown report for DPO/developer review.
79
+ *
80
+ * @param report - Report model created by `buildIntrospectorReport`.
81
+ * @returns Markdown content containing summary, findings, review warnings, and next steps.
82
+ */
83
+ export function renderIntrospectorMarkdown(report: IntrospectorReport): string {
84
+ const lines = [
85
+ "# Compliance Introspector Report",
86
+ "",
87
+ "## Summary",
88
+ "",
89
+ `- Root table: \`${report.summary.rootTable}\``,
90
+ `- Generated at: \`${report.summary.generatedAt}\``,
91
+ `- Schema hash: \`${report.summary.schemaHash}\``,
92
+ `- DAG targets: ${report.summary.targetCount}`,
93
+ `- Tables with PII: ${report.summary.tablesWithPii}`,
94
+ `- PII columns: ${report.summary.piiColumnCount}`,
95
+ `- High-confidence findings: ${report.summary.highConfidenceCount}`,
96
+ `- Review-required findings: ${report.summary.reviewRequiredCount}`,
97
+ `- Potential logical links: ${report.summary.potentialLogicalLinkCount}`,
98
+ "",
99
+ "## PII Findings",
100
+ "",
101
+ ];
102
+
103
+ if (report.findings.length === 0) {
104
+ lines.push("No PII columns crossed the configured confidence threshold.", "");
105
+ } else {
106
+ lines.push("| Table | Column | Type | Confidence | Metadata | Content | Signatures |");
107
+ lines.push("| --- | --- | --- | ---: | ---: | ---: | --- |");
108
+ for (const finding of report.findings) {
109
+ lines.push([
110
+ `| \`${finding.table}\``,
111
+ `\`${finding.column}\``,
112
+ `\`${finding.dataType}\``,
113
+ formatScore(finding.confidence),
114
+ formatScore(finding.metadataScore),
115
+ formatScore(finding.contentMatchRatio),
116
+ finding.matchedSignatures.length > 0 ? finding.matchedSignatures.join(", ") : "metadata",
117
+ "|",
118
+ ].join(" "));
119
+ }
120
+ lines.push("");
121
+ }
122
+
123
+ lines.push("## Potential Logical Links", "");
124
+ if (report.potentialLogicalLinks.length === 0) {
125
+ lines.push("None detected.", "");
126
+ } else {
127
+ for (const link of report.potentialLogicalLinks) {
128
+ lines.push(
129
+ `- \`${formatQualifiedTable(link.sourceTable)}.${link.column}\` <-> ` +
130
+ `\`${formatQualifiedTable(link.targetTable)}.${link.column}\`: ${link.reason}`
131
+ );
132
+ }
133
+ lines.push("");
134
+ }
135
+
136
+ lines.push("## Next Steps", "");
137
+ for (const step of report.nextSteps) {
138
+ lines.push(`- ${step}`);
139
+ }
140
+ lines.push("");
141
+
142
+ return lines.join("\n");
143
+ }
144
+
145
+ /**
146
+ * Renders report JSON with deterministic indentation for CI artifacts.
147
+ *
148
+ * @param report - Report model created by `buildIntrospectorReport`.
149
+ * @returns Pretty-printed JSON report.
150
+ */
151
+ export function renderIntrospectorJson(report: IntrospectorReport): string {
152
+ return `${JSON.stringify(report, null, 2)}\n`;
153
+ }
@@ -0,0 +1,123 @@
1
+ import { MAX_DEPTH } from "@/constants";
2
+ import { formatQualifiedTable, parseQualifiedTable } from "./naming";
3
+ import type {
4
+ IntrospectorDraft,
5
+ IntrospectorTargetDraft,
6
+ RunIntrospectorOptions,
7
+ VerifySchemaIntegrityOptions
8
+ } from "./types";
9
+ import { compileStaticDag, discoverPotentialLogicalLinks } from "./dag";
10
+ import { classifyDagTargets } from "./classifier";
11
+ import { detectSchemaDrift } from "@modules/db";
12
+ import { renderIntrospectorYaml } from "./yaml";
13
+ import { fail } from "@/errors";
14
+ import { readWorkerConfig } from "../config";
15
+
16
+ function targetKey(schema: string, table: string): string {
17
+ return `${schema}.${table}`;
18
+ }
19
+
20
+ /**
21
+ * Runs the offline Introspector pipeline: static DAG compilation, bounded PII
22
+ * classification, and deterministic YAML draft rendering.
23
+ *
24
+ * @param options - SQL handle, root table, sampling controls, and output timestamp.
25
+ * @returns Draft model and rendered YAML content.
26
+ */
27
+ export async function runIntrospector(options: RunIntrospectorOptions): Promise<{
28
+ draft: IntrospectorDraft;
29
+ yaml: string;
30
+ }> {
31
+ const maxDepth = options.maxDepth ?? MAX_DEPTH;
32
+ const root = parseQualifiedTable(options.rootTable, options.defaultSchema);
33
+
34
+ const dag = await compileStaticDag({
35
+ sql: options.sql,
36
+ rootTable: formatQualifiedTable(root),
37
+ defaultSchema: root.schema,
38
+ maxDepth,
39
+ });
40
+
41
+ const classifiedColumns = await classifyDagTargets({
42
+ sql: options.sql,
43
+ targets: dag,
44
+ samplePercent: options.samplePercent,
45
+ sampleLimit: options.sampleLimit,
46
+ threshold: options.threshold,
47
+ });
48
+
49
+ const [schemaHash, potentialLogicalLinks] = await Promise.all([
50
+ detectSchemaDrift(options.sql, root.schema),
51
+ discoverPotentialLogicalLinks(
52
+ {
53
+ sql: options.sql,
54
+ rootTable: formatQualifiedTable(root),
55
+ defaultSchema: root.schema
56
+ },
57
+ dag,
58
+ ),
59
+ ]);
60
+
61
+ const targets: IntrospectorTargetDraft[] = dag.map((target) => ({
62
+ table: target.table,
63
+ parentTable: target.parentTable,
64
+ fkCondition: target.fkCondition,
65
+ childColumns: target.childColumns,
66
+ parentColumns: target.parentColumns,
67
+ depth: target.depth,
68
+ piiColumns: classifiedColumns.get(targetKey(target.table.schema, target.table.table)) ?? [],
69
+ }));
70
+
71
+ const draft: IntrospectorDraft = {
72
+ root,
73
+ maxDepth,
74
+ generatedAt: (options.generatedAt ?? new Date()).toISOString(),
75
+ schemaHash,
76
+ targets,
77
+ potentialLogicalLinks,
78
+ };
79
+
80
+ return {
81
+ draft,
82
+ yaml: renderIntrospectorYaml(draft),
83
+ };
84
+ }
85
+
86
+ /**
87
+ * Verifies that the live schema still matches the DPO-attested manifest hash.
88
+ *
89
+ * @param options - SQL handle, manifest path, and optional env map for key placeholders.
90
+ * @returns Live schema hash when the manifest is current.
91
+ * @throws {WorkerError} When the legal attestation hash is absent or stale.
92
+ */
93
+ export async function verifySchemaIntegrity(options: VerifySchemaIntegrityOptions): Promise<string> {
94
+ const config = await readWorkerConfig(
95
+ {
96
+ ...process.env,
97
+ ...options.env,
98
+ DPDP_MASTER_KEY: options.env?.DPDP_MASTER_KEY ?? process.env.DPDP_MASTER_KEY ?? "0".repeat(64),
99
+ DPDP_HMAC_KEY: options.env?.DPDP_HMAC_KEY ?? process.env.DPDP_HMAC_KEY ?? "0".repeat(64),
100
+ },
101
+ options.configPath
102
+ );
103
+ const expectedHash = config.legal_attestation.schema_hash ?? config.integrity.expected_schema_hash;
104
+ const liveHash = await detectSchemaDrift(options.sql, config.database.app_schema);
105
+
106
+ if (liveHash !== expectedHash) {
107
+ fail({
108
+ code: "INTROSPECTOR_SCHEMA_VERIFY_FAILED",
109
+ title: "Schema verification failed",
110
+ detail: `Live schema hash ${liveHash} does not match legal attestation hash ${expectedHash}.`,
111
+ category: "integrity",
112
+ retryable: false,
113
+ fatal: true,
114
+ context: {
115
+ appSchema: config.database.app_schema,
116
+ expectedHash,
117
+ liveHash,
118
+ },
119
+ });
120
+ }
121
+
122
+ return liveHash;
123
+ }