@datasynx/agentic-ai-cartography 2.0.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +32 -0
- package/README.md +115 -6
- package/dist/api-bin.js +24 -0
- package/dist/api-bin.js.map +1 -0
- package/dist/{bookmarks-VS56KVCO.js → bookmarks-WXHE7GN7.js} +6 -3
- package/dist/{chunk-CJ2PITFA.js → chunk-2SZ5QHGH.js} +71 -9
- package/dist/chunk-2SZ5QHGH.js.map +1 -0
- package/dist/chunk-7QEBFMN4.js +3278 -0
- package/dist/chunk-7QEBFMN4.js.map +1 -0
- package/dist/chunk-7VZH5PFV.js +1134 -0
- package/dist/chunk-7VZH5PFV.js.map +1 -0
- package/dist/chunk-B2AKONVW.js +2465 -0
- package/dist/chunk-B2AKONVW.js.map +1 -0
- package/dist/chunk-WCR47QA2.js +277 -0
- package/dist/chunk-WCR47QA2.js.map +1 -0
- package/dist/cli.js +2367 -663
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +9405 -57913
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3048 -69
- package/dist/index.d.ts +3048 -69
- package/dist/index.js +9150 -2607
- package/dist/index.js.map +1 -1
- package/dist/mcp-bin.js +17 -26
- package/dist/mcp-bin.js.map +1 -1
- package/dist/types-TJWXAQ2L.js +66 -0
- package/llms-full.txt +758 -0
- package/llms.txt +24 -0
- package/package.json +27 -9
- package/scripts/build-llms.mjs +89 -0
- package/scripts/build-mcpb.mjs +31 -0
- package/scripts/gen-api-schemas.ts +29 -0
- package/scripts/gen-docs.ts +123 -0
- package/scripts/sync-version.mjs +51 -0
- package/scripts/validate-server-json.mjs +54 -0
- package/server.json +4 -4
- package/dist/chunk-CJ2PITFA.js.map +0 -1
- package/dist/chunk-D6SRSLBF.js +0 -48
- package/dist/chunk-J6FDZ6HZ.js +0 -142
- package/dist/chunk-J6FDZ6HZ.js.map +0 -1
- package/dist/chunk-UGSNG3QJ.js +0 -49
- package/dist/chunk-UGSNG3QJ.js.map +0 -1
- package/dist/chunk-W7YE6AAH.js +0 -1516
- package/dist/chunk-W7YE6AAH.js.map +0 -1
- package/dist/onnxruntime_binding-6Q6HXASN.node +0 -0
- package/dist/onnxruntime_binding-EKZT2NRK.node +0 -0
- package/dist/onnxruntime_binding-P6S7V3CI.node +0 -0
- package/dist/onnxruntime_binding-PJNNIIUO.node +0 -0
- package/dist/onnxruntime_binding-UN6SPTQK.node +0 -0
- package/dist/sdk-A6NLO3DJ.js +0 -12294
- package/dist/sdk-A6NLO3DJ.js.map +0 -1
- package/dist/sdk-G5D4WQZ4.js +0 -12293
- package/dist/sdk-G5D4WQZ4.js.map +0 -1
- package/dist/sdk-QSTAREST.js +0 -4869
- package/dist/sdk-QSTAREST.js.map +0 -1
- package/dist/sqlite-vec-EZN67B2V.js +0 -40
- package/dist/sqlite-vec-EZN67B2V.js.map +0 -1
- package/dist/sqlite-vec-UK5YYE5T.js +0 -39
- package/dist/sqlite-vec-UK5YYE5T.js.map +0 -1
- package/dist/transformers.node-BTYUTJK5.js +0 -42884
- package/dist/transformers.node-BTYUTJK5.js.map +0 -1
- package/dist/transformers.node-J6PRTTOX.js +0 -42883
- package/dist/transformers.node-J6PRTTOX.js.map +0 -1
- package/dist/types-JG27FR3E.js +0 -29
- package/dist/types-JG27FR3E.js.map +0 -1
- package/scripts/postinstall.mjs +0 -7
- /package/dist/{bookmarks-VS56KVCO.js.map → bookmarks-WXHE7GN7.js.map} +0 -0
- /package/dist/{chunk-D6SRSLBF.js.map → types-TJWXAQ2L.js.map} +0 -0
|
@@ -0,0 +1,3278 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
CostEntrySchema,
|
|
4
|
+
DEFAULT_ANOMALY_THRESHOLDS,
|
|
5
|
+
DRIFT_FIELDS,
|
|
6
|
+
EDGE_RELATIONSHIPS,
|
|
7
|
+
NODE_TYPES,
|
|
8
|
+
NODE_TYPE_GROUPS,
|
|
9
|
+
SharingLevelSchema
|
|
10
|
+
} from "./chunk-WCR47QA2.js";
|
|
11
|
+
import {
|
|
12
|
+
HOME,
|
|
13
|
+
IS_LINUX,
|
|
14
|
+
IS_MAC,
|
|
15
|
+
IS_WIN,
|
|
16
|
+
PLATFORM,
|
|
17
|
+
commandExists,
|
|
18
|
+
dbScanDirs,
|
|
19
|
+
findFiles,
|
|
20
|
+
hostname,
|
|
21
|
+
logDebug,
|
|
22
|
+
machineId,
|
|
23
|
+
osUser,
|
|
24
|
+
run,
|
|
25
|
+
scanAllBookmarks,
|
|
26
|
+
scanAllHistory,
|
|
27
|
+
scanWindowsDbServices,
|
|
28
|
+
scanWindowsPrograms
|
|
29
|
+
} from "./chunk-2SZ5QHGH.js";
|
|
30
|
+
|
|
31
|
+
// src/tools.ts
|
|
32
|
+
import { z } from "zod";
|
|
33
|
+
|
|
34
|
+
// src/sanitize.ts
|
|
35
|
+
var STRIP_RANGES = [
|
|
36
|
+
// C0 controls except 0x09 (tab), 0x0A (LF), 0x0D (CR)
|
|
37
|
+
[0, 8],
|
|
38
|
+
[11, 12],
|
|
39
|
+
[14, 31],
|
|
40
|
+
[127, 127],
|
|
41
|
+
// DEL
|
|
42
|
+
[128, 159],
|
|
43
|
+
// C1 controls
|
|
44
|
+
[173, 173],
|
|
45
|
+
// soft hyphen
|
|
46
|
+
[8203, 8207],
|
|
47
|
+
// ZWSP, ZWNJ, ZWJ, LRM, RLM
|
|
48
|
+
[8234, 8238],
|
|
49
|
+
// bidi embeddings & overrides
|
|
50
|
+
[8288, 8292],
|
|
51
|
+
// word joiner, invisible math operators
|
|
52
|
+
[8294, 8297],
|
|
53
|
+
// bidi isolates
|
|
54
|
+
[8298, 8303],
|
|
55
|
+
// deprecated format characters
|
|
56
|
+
[65279, 65279]
|
|
57
|
+
// BOM / ZWNBSP
|
|
58
|
+
];
|
|
59
|
+
var STRIP = /* @__PURE__ */ new Set();
|
|
60
|
+
for (const [start, end] of STRIP_RANGES) {
|
|
61
|
+
for (let cp = start; cp <= end; cp++) STRIP.add(cp);
|
|
62
|
+
}
|
|
63
|
+
function sanitizeUntrusted(text) {
|
|
64
|
+
if (!text) return text;
|
|
65
|
+
let out = "";
|
|
66
|
+
for (const ch of text.normalize("NFC")) {
|
|
67
|
+
if (!STRIP.has(ch.codePointAt(0))) out += ch;
|
|
68
|
+
}
|
|
69
|
+
return out;
|
|
70
|
+
}
|
|
71
|
+
function sanitizeValue(value) {
|
|
72
|
+
if (typeof value === "string") return sanitizeUntrusted(value);
|
|
73
|
+
if (Array.isArray(value)) return value.map(sanitizeValue);
|
|
74
|
+
if (value && typeof value === "object") {
|
|
75
|
+
const out = {};
|
|
76
|
+
for (const [k, v] of Object.entries(value)) out[k] = sanitizeValue(v);
|
|
77
|
+
return out;
|
|
78
|
+
}
|
|
79
|
+
return value;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// src/scanners/cloud-util.ts
|
|
83
|
+
function safeJson(raw) {
|
|
84
|
+
const s = raw.trim();
|
|
85
|
+
if (!s || s.startsWith("(")) return void 0;
|
|
86
|
+
try {
|
|
87
|
+
return JSON.parse(s);
|
|
88
|
+
} catch {
|
|
89
|
+
return void 0;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
var HINT_ARG_KINDS = {
|
|
93
|
+
namespace: "k8s-namespace",
|
|
94
|
+
region: "aws-region",
|
|
95
|
+
profile: "aws-profile",
|
|
96
|
+
project: "gcp-project",
|
|
97
|
+
subscription: "azure-subscription",
|
|
98
|
+
"resource-group": "azure-resource-group"
|
|
99
|
+
};
|
|
100
|
+
function parseScanHint(hint) {
|
|
101
|
+
const out = { free: "" };
|
|
102
|
+
const free = [];
|
|
103
|
+
for (const tok of (hint ?? "").split(/[\s,]+/).filter(Boolean)) {
|
|
104
|
+
const eq = tok.indexOf("=");
|
|
105
|
+
if (eq <= 0) {
|
|
106
|
+
free.push(tok);
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
const key = tok.slice(0, eq);
|
|
110
|
+
const value = tok.slice(eq + 1);
|
|
111
|
+
const kind = HINT_ARG_KINDS[key];
|
|
112
|
+
if (!kind) {
|
|
113
|
+
free.push(tok);
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
assertSafeScanArg(kind, value);
|
|
117
|
+
if (key === "resource-group") out.resourceGroup = value;
|
|
118
|
+
else if (key === "namespace") out.namespace = value;
|
|
119
|
+
else if (key === "region") out.region = value;
|
|
120
|
+
else if (key === "profile") out.profile = value;
|
|
121
|
+
else if (key === "project") out.project = value;
|
|
122
|
+
else if (key === "subscription") out.subscription = value;
|
|
123
|
+
}
|
|
124
|
+
out.free = free.join(" ");
|
|
125
|
+
return out;
|
|
126
|
+
}
|
|
127
|
+
function buildReport(sections) {
|
|
128
|
+
return sections.map(([k, v]) => `=== ${k} ===
|
|
129
|
+
${v}`).join("\n\n");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// src/scanners/cloud-aws.ts
|
|
133
|
+
var cloudAwsScanner = {
|
|
134
|
+
id: "cloud-aws",
|
|
135
|
+
title: "AWS infrastructure",
|
|
136
|
+
platforms: "all",
|
|
137
|
+
allowedCommands: ["aws"],
|
|
138
|
+
detect: (ctx) => Boolean((ctx.commandExists ?? commandExists)("aws")),
|
|
139
|
+
async scan(ctx) {
|
|
140
|
+
const { region, profile } = parseScanHint(ctx.hint);
|
|
141
|
+
const env = region ? { ...process.env, AWS_DEFAULT_REGION: region } : process.env;
|
|
142
|
+
const pf = profile ? ` --profile ${profile}` : "";
|
|
143
|
+
const runA = createScanRunner((c) => ctx.run(c, { timeout: 2e4, env }), { threshold: 3 });
|
|
144
|
+
const nodes = [];
|
|
145
|
+
const edges = [];
|
|
146
|
+
const report = [];
|
|
147
|
+
report.push(["IDENTITY", runA(`aws sts get-caller-identity${pf} --output json`)]);
|
|
148
|
+
const ec2Raw = runA(`aws ec2 describe-instances${pf} --output json`);
|
|
149
|
+
report.push(["EC2", ec2Raw]);
|
|
150
|
+
const ec2 = safeJson(ec2Raw);
|
|
151
|
+
for (const r of ec2?.Reservations ?? []) {
|
|
152
|
+
for (const i of r.Instances ?? []) {
|
|
153
|
+
const id = String(i.InstanceId ?? "");
|
|
154
|
+
if (!id) continue;
|
|
155
|
+
nodes.push({
|
|
156
|
+
id: `host:aws:${id}`,
|
|
157
|
+
type: "host",
|
|
158
|
+
name: id,
|
|
159
|
+
discoveredVia: "aws-ec2",
|
|
160
|
+
confidence: 0.95,
|
|
161
|
+
tags: ["cloud", "aws", "ec2"],
|
|
162
|
+
metadata: redactValue({ instanceType: i.InstanceType, state: i.State?.Name, privateIp: i.PrivateIpAddress })
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
const rdsRaw = runA(`aws rds describe-db-instances${pf} --output json`);
|
|
167
|
+
report.push(["RDS", rdsRaw]);
|
|
168
|
+
const rds = safeJson(rdsRaw);
|
|
169
|
+
for (const db of rds?.DBInstances ?? []) {
|
|
170
|
+
const id = String(db.DBInstanceIdentifier ?? "");
|
|
171
|
+
if (!id) continue;
|
|
172
|
+
nodes.push({
|
|
173
|
+
id: `database_server:rds:${id}`,
|
|
174
|
+
type: "database_server",
|
|
175
|
+
name: id,
|
|
176
|
+
discoveredVia: "aws-rds",
|
|
177
|
+
confidence: 0.95,
|
|
178
|
+
tags: ["cloud", "aws", "rds"],
|
|
179
|
+
metadata: redactValue({ engine: db.Engine, status: db.DBInstanceStatus, endpoint: db.Endpoint?.Address, port: db.Endpoint?.Port })
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
const cacheRaw = runA(`aws elasticache describe-cache-clusters${pf} --output json`);
|
|
183
|
+
report.push(["ELASTICACHE", cacheRaw]);
|
|
184
|
+
const cache = safeJson(cacheRaw);
|
|
185
|
+
for (const c of cache?.CacheClusters ?? []) {
|
|
186
|
+
const id = String(c.CacheClusterId ?? "");
|
|
187
|
+
if (!id) continue;
|
|
188
|
+
nodes.push({
|
|
189
|
+
id: `cache_server:aws:${id}`,
|
|
190
|
+
type: "cache_server",
|
|
191
|
+
name: id,
|
|
192
|
+
discoveredVia: "aws-elasticache",
|
|
193
|
+
confidence: 0.95,
|
|
194
|
+
tags: ["cloud", "aws", "elasticache"],
|
|
195
|
+
metadata: redactValue({ engine: c.Engine, status: c.CacheClusterStatus })
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
const eksRaw = runA(`aws eks list-clusters${pf} --output json`);
|
|
199
|
+
report.push(["EKS", eksRaw]);
|
|
200
|
+
const eks = safeJson(eksRaw);
|
|
201
|
+
for (const name of eks?.clusters ?? []) {
|
|
202
|
+
if (!name) continue;
|
|
203
|
+
nodes.push({
|
|
204
|
+
id: `k8s_cluster:eks:${name}`,
|
|
205
|
+
type: "k8s_cluster",
|
|
206
|
+
name,
|
|
207
|
+
discoveredVia: "aws-eks",
|
|
208
|
+
confidence: 0.95,
|
|
209
|
+
tags: ["cloud", "aws", "eks", "kubernetes"],
|
|
210
|
+
metadata: { provider: "eks" }
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
const elbRaw = runA(`aws elbv2 describe-load-balancers${pf} --output json`);
|
|
214
|
+
report.push(["ELB_V2", elbRaw]);
|
|
215
|
+
const elb = safeJson(elbRaw);
|
|
216
|
+
for (const lb of elb?.LoadBalancers ?? []) {
|
|
217
|
+
const dns = String(lb.DNSName ?? "");
|
|
218
|
+
if (!dns) continue;
|
|
219
|
+
nodes.push({
|
|
220
|
+
id: `web_service:aws:${dns}`,
|
|
221
|
+
type: "web_service",
|
|
222
|
+
name: lb.LoadBalancerName ?? dns,
|
|
223
|
+
discoveredVia: "aws-elbv2",
|
|
224
|
+
confidence: 0.9,
|
|
225
|
+
tags: ["cloud", "aws", "elb"],
|
|
226
|
+
metadata: redactValue({ dnsName: dns, type: lb.Type, state: lb.State?.Code })
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
return { nodes, edges, report: buildReport(report) };
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
// src/scanners/cloud-gcp.ts
|
|
234
|
+
function lastSegment(value) {
|
|
235
|
+
if (!value) return void 0;
|
|
236
|
+
const parts = value.split("/");
|
|
237
|
+
return parts[parts.length - 1] || value;
|
|
238
|
+
}
|
|
239
|
+
var cloudGcpScanner = {
|
|
240
|
+
id: "cloud-gcp",
|
|
241
|
+
title: "Google Cloud Platform infrastructure",
|
|
242
|
+
platforms: "all",
|
|
243
|
+
allowedCommands: ["gcloud"],
|
|
244
|
+
detect: (ctx) => Boolean((ctx.commandExists ?? commandExists)("gcloud")),
|
|
245
|
+
async scan(ctx) {
|
|
246
|
+
const { project } = parseScanHint(ctx.hint);
|
|
247
|
+
const pf = project ? ` --project ${project}` : "";
|
|
248
|
+
const runG = createScanRunner((c) => ctx.run(c, { timeout: 2e4 }), { threshold: 3 });
|
|
249
|
+
const nodes = [];
|
|
250
|
+
const edges = [];
|
|
251
|
+
const report = [];
|
|
252
|
+
report.push(["IDENTITY", runG(`gcloud config list account --format=json`)]);
|
|
253
|
+
const computeRaw = runG(`gcloud compute instances list${pf} --format=json`);
|
|
254
|
+
report.push(["COMPUTE_INSTANCES", computeRaw]);
|
|
255
|
+
for (const i of safeJson(computeRaw) ?? []) {
|
|
256
|
+
const name = String(i.name ?? "");
|
|
257
|
+
if (!name) continue;
|
|
258
|
+
nodes.push({
|
|
259
|
+
id: `host:gcp:${name}`,
|
|
260
|
+
type: "host",
|
|
261
|
+
name,
|
|
262
|
+
discoveredVia: "gcp-compute",
|
|
263
|
+
confidence: 0.95,
|
|
264
|
+
tags: ["cloud", "gcp", "compute"],
|
|
265
|
+
metadata: { machineType: lastSegment(i.machineType), status: i.status, zone: lastSegment(i.zone) }
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
const sqlRaw = runG(`gcloud sql instances list${pf} --format=json`);
|
|
269
|
+
report.push(["SQL_INSTANCES", sqlRaw]);
|
|
270
|
+
for (const s of safeJson(sqlRaw) ?? []) {
|
|
271
|
+
const name = String(s.name ?? "");
|
|
272
|
+
if (!name) continue;
|
|
273
|
+
nodes.push({
|
|
274
|
+
id: `database_server:gcp-sql:${name}`,
|
|
275
|
+
type: "database_server",
|
|
276
|
+
name,
|
|
277
|
+
discoveredVia: "gcp-sql",
|
|
278
|
+
confidence: 0.95,
|
|
279
|
+
tags: ["cloud", "gcp", "cloudsql"],
|
|
280
|
+
metadata: { engine: s.databaseVersion, state: s.state, region: s.region }
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
const gkeRaw = runG(`gcloud container clusters list${pf} --format=json`);
|
|
284
|
+
report.push(["GKE_CLUSTERS", gkeRaw]);
|
|
285
|
+
for (const c of safeJson(gkeRaw) ?? []) {
|
|
286
|
+
const name = String(c.name ?? "");
|
|
287
|
+
if (!name) continue;
|
|
288
|
+
nodes.push({
|
|
289
|
+
id: `k8s_cluster:gke:${name}`,
|
|
290
|
+
type: "k8s_cluster",
|
|
291
|
+
name,
|
|
292
|
+
discoveredVia: "gcp-gke",
|
|
293
|
+
confidence: 0.95,
|
|
294
|
+
tags: ["cloud", "gcp", "gke", "kubernetes"],
|
|
295
|
+
metadata: { status: c.status, location: c.location }
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
const redisRaw = runG(`gcloud redis instances list --regions=-${pf} --format=json`);
|
|
299
|
+
report.push(["REDIS", redisRaw]);
|
|
300
|
+
for (const r of safeJson(redisRaw) ?? []) {
|
|
301
|
+
const name = lastSegment(String(r.name ?? ""));
|
|
302
|
+
if (!name) continue;
|
|
303
|
+
nodes.push({
|
|
304
|
+
id: `cache_server:gcp:${name}`,
|
|
305
|
+
type: "cache_server",
|
|
306
|
+
name,
|
|
307
|
+
discoveredVia: "gcp-redis",
|
|
308
|
+
confidence: 0.95,
|
|
309
|
+
tags: ["cloud", "gcp", "memorystore"],
|
|
310
|
+
metadata: { tier: r.tier, state: r.state }
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
const runRaw = runG(`gcloud run services list --platform managed${pf} --format=json`);
|
|
314
|
+
report.push(["CLOUD_RUN", runRaw]);
|
|
315
|
+
for (const svc of safeJson(runRaw) ?? []) {
|
|
316
|
+
const name = String(svc.metadata?.name ?? "");
|
|
317
|
+
if (!name) continue;
|
|
318
|
+
nodes.push({
|
|
319
|
+
id: `web_service:gcp-run:${name}`,
|
|
320
|
+
type: "web_service",
|
|
321
|
+
name,
|
|
322
|
+
discoveredVia: "gcp-run",
|
|
323
|
+
confidence: 0.9,
|
|
324
|
+
tags: ["cloud", "gcp", "cloudrun"],
|
|
325
|
+
metadata: redactValue({ url: svc.status?.url })
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
const pubsubRaw = runG(`gcloud pubsub topics list${pf} --format=json`);
|
|
329
|
+
report.push(["PUBSUB", pubsubRaw]);
|
|
330
|
+
for (const t of safeJson(pubsubRaw) ?? []) {
|
|
331
|
+
const name = lastSegment(String(t.name ?? ""));
|
|
332
|
+
if (!name) continue;
|
|
333
|
+
nodes.push({
|
|
334
|
+
id: `topic:gcp:${name}`,
|
|
335
|
+
type: "topic",
|
|
336
|
+
name,
|
|
337
|
+
discoveredVia: "gcp-pubsub",
|
|
338
|
+
confidence: 0.9,
|
|
339
|
+
tags: ["cloud", "gcp", "pubsub"],
|
|
340
|
+
metadata: { fullName: t.name }
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
return { nodes, edges, report: buildReport(report) };
|
|
344
|
+
}
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
// src/scanners/cloud-azure.ts
|
|
348
|
+
var cloudAzureScanner = {
|
|
349
|
+
id: "cloud-azure",
|
|
350
|
+
title: "Azure infrastructure",
|
|
351
|
+
platforms: "all",
|
|
352
|
+
allowedCommands: ["az"],
|
|
353
|
+
detect: (ctx) => Boolean((ctx.commandExists ?? commandExists)("az")),
|
|
354
|
+
async scan(ctx) {
|
|
355
|
+
const { subscription, resourceGroup } = parseScanHint(ctx.hint);
|
|
356
|
+
const sf = subscription ? ` --subscription ${subscription}` : "";
|
|
357
|
+
const rf = resourceGroup ? ` --resource-group ${resourceGroup}` : "";
|
|
358
|
+
const scope = `${sf}${rf}`;
|
|
359
|
+
const runZ = createScanRunner((c) => ctx.run(c, { timeout: 2e4 }), { threshold: 3 });
|
|
360
|
+
const nodes = [];
|
|
361
|
+
const edges = [];
|
|
362
|
+
const report = [];
|
|
363
|
+
report.push(["IDENTITY", runZ(`az account show --output json${sf}`)]);
|
|
364
|
+
const vmRaw = runZ(`az vm list${scope} --output json`);
|
|
365
|
+
report.push(["VMS", vmRaw]);
|
|
366
|
+
for (const vm of safeJson(vmRaw) ?? []) {
|
|
367
|
+
const name = String(vm.name ?? "");
|
|
368
|
+
if (!name) continue;
|
|
369
|
+
nodes.push({
|
|
370
|
+
id: `host:azure:${name}`,
|
|
371
|
+
type: "host",
|
|
372
|
+
name,
|
|
373
|
+
discoveredVia: "azure-vm",
|
|
374
|
+
confidence: 0.95,
|
|
375
|
+
tags: ["cloud", "azure", "vm"],
|
|
376
|
+
metadata: { vmSize: vm.hardwareProfile?.vmSize, location: vm.location, powerState: vm.powerState }
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
const aksRaw = runZ(`az aks list${scope} --output json`);
|
|
380
|
+
report.push(["AKS", aksRaw]);
|
|
381
|
+
for (const aks of safeJson(aksRaw) ?? []) {
|
|
382
|
+
const name = String(aks.name ?? "");
|
|
383
|
+
if (!name) continue;
|
|
384
|
+
nodes.push({
|
|
385
|
+
id: `k8s_cluster:aks:${name}`,
|
|
386
|
+
type: "k8s_cluster",
|
|
387
|
+
name,
|
|
388
|
+
discoveredVia: "azure-aks",
|
|
389
|
+
confidence: 0.95,
|
|
390
|
+
tags: ["cloud", "azure", "aks", "kubernetes"],
|
|
391
|
+
metadata: { location: aks.location, version: aks.kubernetesVersion, state: aks.provisioningState }
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
const sqlRaw = runZ(`az sql server list${scope} --output json`);
|
|
395
|
+
report.push(["SQL_SERVERS", sqlRaw]);
|
|
396
|
+
for (const s of safeJson(sqlRaw) ?? []) {
|
|
397
|
+
const name = String(s.name ?? "");
|
|
398
|
+
if (!name) continue;
|
|
399
|
+
nodes.push({
|
|
400
|
+
id: `database_server:azure-sql:${name}`,
|
|
401
|
+
type: "database_server",
|
|
402
|
+
name,
|
|
403
|
+
discoveredVia: "azure-sql",
|
|
404
|
+
confidence: 0.95,
|
|
405
|
+
tags: ["cloud", "azure", "sql"],
|
|
406
|
+
metadata: { engine: "sqlserver", location: s.location, version: s.version }
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
const pgRaw = runZ(`az postgres server list${scope} --output json`);
|
|
410
|
+
report.push(["POSTGRES", pgRaw]);
|
|
411
|
+
for (const p of safeJson(pgRaw) ?? []) {
|
|
412
|
+
const name = String(p.name ?? "");
|
|
413
|
+
if (!name) continue;
|
|
414
|
+
nodes.push({
|
|
415
|
+
id: `database_server:azure-postgres:${name}`,
|
|
416
|
+
type: "database_server",
|
|
417
|
+
name,
|
|
418
|
+
discoveredVia: "azure-postgres",
|
|
419
|
+
confidence: 0.95,
|
|
420
|
+
tags: ["cloud", "azure", "postgres"],
|
|
421
|
+
metadata: { engine: "postgresql", location: p.location, version: p.version }
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
const redisRaw = runZ(`az redis list${scope} --output json`);
|
|
425
|
+
report.push(["REDIS", redisRaw]);
|
|
426
|
+
for (const r of safeJson(redisRaw) ?? []) {
|
|
427
|
+
const name = String(r.name ?? "");
|
|
428
|
+
if (!name) continue;
|
|
429
|
+
nodes.push({
|
|
430
|
+
id: `cache_server:azure:${name}`,
|
|
431
|
+
type: "cache_server",
|
|
432
|
+
name,
|
|
433
|
+
discoveredVia: "azure-redis",
|
|
434
|
+
confidence: 0.95,
|
|
435
|
+
tags: ["cloud", "azure", "redis"],
|
|
436
|
+
metadata: { location: r.location, state: r.provisioningState }
|
|
437
|
+
});
|
|
438
|
+
}
|
|
439
|
+
const webRaw = runZ(`az webapp list${scope} --output json`);
|
|
440
|
+
report.push(["WEBAPPS", webRaw]);
|
|
441
|
+
for (const w of safeJson(webRaw) ?? []) {
|
|
442
|
+
const name = String(w.name ?? "");
|
|
443
|
+
if (!name) continue;
|
|
444
|
+
nodes.push({
|
|
445
|
+
id: `web_service:azure:${name}`,
|
|
446
|
+
type: "web_service",
|
|
447
|
+
name,
|
|
448
|
+
discoveredVia: "azure-webapp",
|
|
449
|
+
confidence: 0.9,
|
|
450
|
+
tags: ["cloud", "azure", "webapp"],
|
|
451
|
+
metadata: redactValue({ hostName: w.defaultHostName, state: w.state })
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
return { nodes, edges, report: buildReport(report) };
|
|
455
|
+
}
|
|
456
|
+
};
|
|
457
|
+
|
|
458
|
+
// src/scanners/k8s.ts
|
|
459
|
+
var MAX_PODS = 200;
|
|
460
|
+
function selectorMatches(selector, labels) {
|
|
461
|
+
const keys = Object.keys(selector);
|
|
462
|
+
if (keys.length === 0) return false;
|
|
463
|
+
return keys.every((k) => labels[k] === selector[k]);
|
|
464
|
+
}
|
|
465
|
+
var k8sScanner = {
|
|
466
|
+
id: "k8s",
|
|
467
|
+
title: "Kubernetes resources",
|
|
468
|
+
platforms: "all",
|
|
469
|
+
allowedCommands: ["kubectl"],
|
|
470
|
+
detect: (ctx) => Boolean((ctx.commandExists ?? commandExists)("kubectl")),
|
|
471
|
+
async scan(ctx) {
|
|
472
|
+
const { namespace } = parseScanHint(ctx.hint);
|
|
473
|
+
const nsFlag = namespace ? `-n ${namespace}` : "--all-namespaces";
|
|
474
|
+
const runK = createScanRunner((c) => ctx.run(c, { timeout: 15e3 }), { threshold: 3 });
|
|
475
|
+
const nodes = [];
|
|
476
|
+
const edges = [];
|
|
477
|
+
const report = [];
|
|
478
|
+
const nodeIds = /* @__PURE__ */ new Set();
|
|
479
|
+
const add = (n) => {
|
|
480
|
+
if (nodeIds.has(n.id)) return;
|
|
481
|
+
nodeIds.add(n.id);
|
|
482
|
+
nodes.push(n);
|
|
483
|
+
};
|
|
484
|
+
const contextRaw = runK("kubectl config current-context").trim();
|
|
485
|
+
report.push(["CONTEXT", contextRaw]);
|
|
486
|
+
const context = contextRaw.startsWith("(") ? "" : contextRaw;
|
|
487
|
+
const clusterName = context || "current";
|
|
488
|
+
const clusterId = `k8s_cluster:${clusterName}`;
|
|
489
|
+
add({
|
|
490
|
+
id: clusterId,
|
|
491
|
+
type: "k8s_cluster",
|
|
492
|
+
name: clusterName,
|
|
493
|
+
discoveredVia: "kubectl-context",
|
|
494
|
+
confidence: 0.95,
|
|
495
|
+
tags: ["kubernetes", "cluster"],
|
|
496
|
+
metadata: { context: clusterName }
|
|
497
|
+
});
|
|
498
|
+
const nodesRaw = runK("kubectl get nodes -o json");
|
|
499
|
+
report.push(["NODES", nodesRaw]);
|
|
500
|
+
for (const item of safeJson(nodesRaw)?.items ?? []) {
|
|
501
|
+
const name = String(item.metadata?.name ?? "");
|
|
502
|
+
if (!name) continue;
|
|
503
|
+
const id = `host:k8s:${name}`;
|
|
504
|
+
add({
|
|
505
|
+
id,
|
|
506
|
+
type: "host",
|
|
507
|
+
name,
|
|
508
|
+
discoveredVia: "kubectl-node",
|
|
509
|
+
confidence: 0.9,
|
|
510
|
+
tags: ["kubernetes", "node"],
|
|
511
|
+
metadata: { cluster: clusterName }
|
|
512
|
+
});
|
|
513
|
+
edges.push({ sourceId: clusterId, targetId: id, relationship: "contains", evidence: "kubectl get nodes", confidence: 0.9 });
|
|
514
|
+
}
|
|
515
|
+
const svcRaw = runK(`kubectl get services ${nsFlag} -o json`);
|
|
516
|
+
report.push(["SERVICES", svcRaw]);
|
|
517
|
+
const services = safeJson(svcRaw)?.items ?? [];
|
|
518
|
+
for (const svc of services) {
|
|
519
|
+
const name = String(svc.metadata?.name ?? "");
|
|
520
|
+
const ns = String(svc.metadata?.namespace ?? "default");
|
|
521
|
+
if (!name) continue;
|
|
522
|
+
add({
|
|
523
|
+
id: `web_service:k8s:${ns}/${name}`,
|
|
524
|
+
type: "web_service",
|
|
525
|
+
name: `${ns}/${name}`,
|
|
526
|
+
discoveredVia: "kubectl-service",
|
|
527
|
+
confidence: 0.9,
|
|
528
|
+
tags: ["kubernetes", "service"],
|
|
529
|
+
metadata: { namespace: ns, type: svc.spec?.type, clusterIP: svc.spec?.clusterIP }
|
|
530
|
+
});
|
|
531
|
+
}
|
|
532
|
+
const podsRaw = runK(`kubectl get pods ${nsFlag} --field-selector=status.phase=Running -o json`);
|
|
533
|
+
report.push(["PODS_RUNNING", podsRaw]);
|
|
534
|
+
const allPods = safeJson(podsRaw)?.items ?? [];
|
|
535
|
+
const pods = allPods.slice(0, MAX_PODS);
|
|
536
|
+
if (allPods.length > MAX_PODS) {
|
|
537
|
+
report.push(["PODS_OVERFLOW", `${allPods.length} running pods found; first ${MAX_PODS} catalogued`]);
|
|
538
|
+
}
|
|
539
|
+
for (const pod of pods) {
|
|
540
|
+
const name = String(pod.metadata?.name ?? "");
|
|
541
|
+
const ns = String(pod.metadata?.namespace ?? "default");
|
|
542
|
+
if (!name) continue;
|
|
543
|
+
const podId = `pod:${ns}/${name}`;
|
|
544
|
+
add({
|
|
545
|
+
id: podId,
|
|
546
|
+
type: "pod",
|
|
547
|
+
name: `${ns}/${name}`,
|
|
548
|
+
discoveredVia: "kubectl-pod",
|
|
549
|
+
confidence: 0.9,
|
|
550
|
+
tags: ["kubernetes", "pod"],
|
|
551
|
+
metadata: { namespace: ns, node: pod.spec?.nodeName }
|
|
552
|
+
});
|
|
553
|
+
edges.push({ sourceId: clusterId, targetId: podId, relationship: "contains", evidence: "kubectl get pods", confidence: 0.9 });
|
|
554
|
+
const labels = pod.metadata?.labels ?? {};
|
|
555
|
+
for (const svc of services) {
|
|
556
|
+
const svcNs = String(svc.metadata?.namespace ?? "default");
|
|
557
|
+
if (svcNs !== ns) continue;
|
|
558
|
+
const selector = svc.spec?.selector;
|
|
559
|
+
if (!selector || !selectorMatches(selector, labels)) continue;
|
|
560
|
+
const svcId = `web_service:k8s:${svcNs}/${String(svc.metadata?.name ?? "")}`;
|
|
561
|
+
edges.push({ sourceId: svcId, targetId: podId, relationship: "connects_to", evidence: "label selector match", confidence: 0.85 });
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
return { nodes, edges, report: buildReport(report) };
|
|
565
|
+
}
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
// src/scanners/databases.ts
|
|
569
|
+
function baseName(path) {
|
|
570
|
+
const parts = path.split(/[\\/]/);
|
|
571
|
+
return parts[parts.length - 1] || path;
|
|
572
|
+
}
|
|
573
|
+
var databasesScanner = {
|
|
574
|
+
id: "databases",
|
|
575
|
+
title: "Local database servers",
|
|
576
|
+
platforms: "all",
|
|
577
|
+
allowedCommands: ["psql", "mysql", "mongosh", "redis-cli", "sqlite3", "pg_lsclusters", "find", "head", "grep", "awk", "Get-Service", "Get-ChildItem", "Select-Object", "Where-Object"],
|
|
578
|
+
detect: (ctx) => {
|
|
579
|
+
const exists = ctx.commandExists ?? commandExists;
|
|
580
|
+
return ["psql", "mysql", "mongosh", "redis-cli"].some((c) => Boolean(exists(c)));
|
|
581
|
+
},
|
|
582
|
+
async scan(ctx) {
|
|
583
|
+
const exists = ctx.commandExists ?? commandExists;
|
|
584
|
+
const run2 = (cmd, opts) => ctx.run(cmd, { timeout: opts?.timeout ?? 1e4 });
|
|
585
|
+
const deep = parseScanHint(ctx.hint).free.split(/\s+/).includes("deep");
|
|
586
|
+
const nodes = [];
|
|
587
|
+
const edges = [];
|
|
588
|
+
const report = [];
|
|
589
|
+
const seen = /* @__PURE__ */ new Set();
|
|
590
|
+
const add = (n) => {
|
|
591
|
+
if (seen.has(n.id)) return;
|
|
592
|
+
seen.add(n.id);
|
|
593
|
+
nodes.push(n);
|
|
594
|
+
};
|
|
595
|
+
if (exists("psql")) {
|
|
596
|
+
const out = IS_WIN ? run2("psql -lqt") : run2(`psql -lqt 2>/dev/null | grep -v "template0\\|template1" | awk '{print $1}' | grep -v "^$\\|^|"`);
|
|
597
|
+
report.push(["POSTGRES_DATABASES", out || "(psql not running or requires auth)"]);
|
|
598
|
+
if (out) {
|
|
599
|
+
const databases = out.split("\n").map((l) => l.trim()).filter(Boolean);
|
|
600
|
+
add({
|
|
601
|
+
id: "database_server:postgresql:localhost",
|
|
602
|
+
type: "database_server",
|
|
603
|
+
name: "PostgreSQL (localhost)",
|
|
604
|
+
discoveredVia: "psql",
|
|
605
|
+
confidence: 0.9,
|
|
606
|
+
tags: ["local", "postgresql"],
|
|
607
|
+
metadata: redactValue({ engine: "postgresql", host: "localhost", databases })
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
if (!IS_WIN) {
|
|
611
|
+
const clusters = run2("pg_lsclusters 2>/dev/null");
|
|
612
|
+
if (clusters) report.push(["POSTGRES_CLUSTERS", clusters]);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
if (exists("mysql")) {
|
|
616
|
+
const out = IS_WIN ? run2('mysql --connect-timeout=3 -e "SHOW DATABASES;"') : run2('mysql --connect-timeout=3 -e "SHOW DATABASES;" 2>/dev/null');
|
|
617
|
+
report.push(["MYSQL_DATABASES", out || "(mysql not running or requires auth)"]);
|
|
618
|
+
if (out) {
|
|
619
|
+
const databases = out.split("\n").map((l) => l.trim()).filter((l) => l && l !== "Database");
|
|
620
|
+
add({
|
|
621
|
+
id: "database_server:mysql:localhost",
|
|
622
|
+
type: "database_server",
|
|
623
|
+
name: "MySQL (localhost)",
|
|
624
|
+
discoveredVia: "mysql",
|
|
625
|
+
confidence: 0.9,
|
|
626
|
+
tags: ["local", "mysql"],
|
|
627
|
+
metadata: redactValue({ engine: "mysql", host: "localhost", databases })
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
if (exists("mongosh")) {
|
|
632
|
+
const evalExpr = "JSON.stringify(db.adminCommand({listDatabases:1}))";
|
|
633
|
+
const out = IS_WIN ? run2(`mongosh --quiet --eval "${evalExpr}"`) : run2(`mongosh --quiet --eval "${evalExpr}" 2>/dev/null`);
|
|
634
|
+
report.push(["MONGODB_DATABASES", out || "(mongosh not available)"]);
|
|
635
|
+
if (out) {
|
|
636
|
+
add({
|
|
637
|
+
id: "database_server:mongodb:localhost",
|
|
638
|
+
type: "database_server",
|
|
639
|
+
name: "MongoDB (localhost)",
|
|
640
|
+
discoveredVia: "mongosh",
|
|
641
|
+
confidence: 0.9,
|
|
642
|
+
tags: ["local", "mongodb"],
|
|
643
|
+
metadata: { engine: "mongodb", host: "localhost" }
|
|
644
|
+
});
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
if (exists("redis-cli")) {
|
|
648
|
+
const out = IS_WIN ? run2("redis-cli info server") : run2("redis-cli info server 2>/dev/null | head -5");
|
|
649
|
+
report.push(["REDIS_INFO", out || "(redis-cli not available)"]);
|
|
650
|
+
if (out) {
|
|
651
|
+
add({
|
|
652
|
+
id: "cache_server:redis:localhost",
|
|
653
|
+
type: "cache_server",
|
|
654
|
+
name: "Redis (localhost)",
|
|
655
|
+
discoveredVia: "redis-cli",
|
|
656
|
+
confidence: 0.9,
|
|
657
|
+
tags: ["local", "redis"],
|
|
658
|
+
metadata: { engine: "redis", host: "localhost" }
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
const appDirs = dbScanDirs();
|
|
663
|
+
if (appDirs.length > 0) {
|
|
664
|
+
const out = findFiles(appDirs, ["*.sqlite", "*.sqlite3", "*.db"], 4, 80);
|
|
665
|
+
report.push(["SQLITE_APP_FILES", out || "(none found)"]);
|
|
666
|
+
for (const path of out.split("\n").map((l) => l.trim()).filter(Boolean)) {
|
|
667
|
+
const base = baseName(path);
|
|
668
|
+
add({
|
|
669
|
+
id: `database:sqlite:${base}`,
|
|
670
|
+
type: "database",
|
|
671
|
+
name: base,
|
|
672
|
+
discoveredVia: "sqlite-file",
|
|
673
|
+
confidence: 0.7,
|
|
674
|
+
tags: ["local", "sqlite"],
|
|
675
|
+
metadata: redactValue({ path })
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
if (IS_WIN) {
|
|
680
|
+
report.push(["DB_SERVICES", scanWindowsDbServices() || "(no database services found)"]);
|
|
681
|
+
}
|
|
682
|
+
if (deep) {
|
|
683
|
+
const deepOut = IS_WIN ? run2(
|
|
684
|
+
`Get-ChildItem -Path '${HOME}' -Recurse -Depth 6 -Include '*.sqlite','*.sqlite3','*.db' -ErrorAction SilentlyContinue | Where-Object { $_.FullName -notmatch 'node_modules|\\.git' } | Select-Object -First 100 -ExpandProperty FullName`
|
|
685
|
+
) : run2(`find "${HOME}" -maxdepth 6 \\( -name "*.sqlite" -o -name "*.sqlite3" -o -name "*.db" \\) -not -path "*/node_modules/*" -not -path "*/.git/*" 2>/dev/null | head -100`, { timeout: 3e4 });
|
|
686
|
+
report.push(["SQLITE_DEEP_SCAN", deepOut || "(none found)"]);
|
|
687
|
+
for (const path of deepOut.split("\n").map((l) => l.trim()).filter(Boolean)) {
|
|
688
|
+
const base = baseName(path);
|
|
689
|
+
add({
|
|
690
|
+
id: `database:sqlite:${base}`,
|
|
691
|
+
type: "database",
|
|
692
|
+
name: base,
|
|
693
|
+
discoveredVia: "sqlite-deep-scan",
|
|
694
|
+
confidence: 0.6,
|
|
695
|
+
tags: ["local", "sqlite", "deep"],
|
|
696
|
+
metadata: redactValue({ path })
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
const configOut = IS_WIN ? run2(
|
|
700
|
+
`Get-ChildItem -Path '${HOME}' -Recurse -Depth 4 -Include '.env','.env.local','database.yml','database.json','docker-compose.yml' -ErrorAction SilentlyContinue | Select-Object -First 20 -ExpandProperty FullName`,
|
|
701
|
+
{ timeout: 15e3 }
|
|
702
|
+
) : run2(`find "${HOME}" -maxdepth 4 \\( -name ".env" -o -name ".env.local" -o -name "database.yml" -o -name "database.json" -o -name "docker-compose.yml" \\) 2>/dev/null | head -20`, { timeout: 15e3 });
|
|
703
|
+
report.push(["DB_CONFIG_FILES", configOut || "(none found)"]);
|
|
704
|
+
}
|
|
705
|
+
return { nodes, edges, report: buildReport(report) };
|
|
706
|
+
}
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
// src/tools.ts
|
|
710
|
+
function createScanRunner(runFn, opts = {}) {
|
|
711
|
+
const threshold = opts.threshold ?? 3;
|
|
712
|
+
let consecutiveFailures = 0;
|
|
713
|
+
let tripped = false;
|
|
714
|
+
return (cmd) => {
|
|
715
|
+
if (tripped) {
|
|
716
|
+
logDebug(`Circuit breaker: skipping "${cmd}" (${consecutiveFailures} consecutive failures)`);
|
|
717
|
+
return "(skipped \u2014 circuit breaker: too many consecutive failures)";
|
|
718
|
+
}
|
|
719
|
+
const result = runFn(cmd, { timeout: opts.timeout ?? 2e4, env: opts.env });
|
|
720
|
+
if (!result) {
|
|
721
|
+
consecutiveFailures++;
|
|
722
|
+
if (consecutiveFailures >= threshold) {
|
|
723
|
+
tripped = true;
|
|
724
|
+
logDebug(`Circuit breaker tripped after ${threshold} failures, last command: "${cmd}"`);
|
|
725
|
+
}
|
|
726
|
+
return "(error or not available)";
|
|
727
|
+
}
|
|
728
|
+
consecutiveFailures = 0;
|
|
729
|
+
return result;
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
function clampText(raw, max) {
|
|
733
|
+
const clean = sanitizeUntrusted(raw);
|
|
734
|
+
if (clean.length <= max) return clean;
|
|
735
|
+
return clean.slice(0, max) + `
|
|
736
|
+
|
|
737
|
+
\u2026 [output truncated: ${clean.length - max} more characters omitted \u2014 narrow the scan (e.g. a namespace/region/hint) or query the catalog instead]`;
|
|
738
|
+
}
|
|
739
|
+
function stripSensitive(target) {
|
|
740
|
+
const raw = target.trim();
|
|
741
|
+
if (!raw) return raw;
|
|
742
|
+
try {
|
|
743
|
+
const url = new URL(raw.startsWith("http") ? raw : `tcp://${raw}`);
|
|
744
|
+
const stripped = `${url.hostname}${url.port ? ":" + url.port : ""}`;
|
|
745
|
+
return stripped || raw;
|
|
746
|
+
} catch {
|
|
747
|
+
const stripped = raw.replace(/\/.*$/, "").replace(/\?.*$/, "").replace(/@.*:/, ":");
|
|
748
|
+
return stripped || raw;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
var SCAN_ARG_PATTERNS = {
|
|
752
|
+
"k8s-namespace": /^[a-z0-9]([-a-z0-9]*[a-z0-9])?$/,
|
|
753
|
+
"aws-region": /^[A-Za-z0-9-]+$/,
|
|
754
|
+
"aws-profile": /^[A-Za-z0-9_.-]+$/,
|
|
755
|
+
"gcp-project": /^[a-z0-9][a-z0-9.-]*(:[a-z0-9][a-z0-9-]*)?$/,
|
|
756
|
+
"azure-subscription": /^[0-9a-fA-F-]+$/,
|
|
757
|
+
"azure-resource-group": /^[A-Za-z0-9_.()-]+$/
|
|
758
|
+
};
|
|
759
|
+
function assertSafeScanArg(kind, value) {
|
|
760
|
+
if (!SCAN_ARG_PATTERNS[kind].test(value)) {
|
|
761
|
+
throw new Error(`Invalid ${kind} "${value}": contains characters that are not allowed`);
|
|
762
|
+
}
|
|
763
|
+
return value;
|
|
764
|
+
}
|
|
765
|
+
function redactSecrets(value) {
|
|
766
|
+
return value.replace(/([a-z][a-z0-9+.-]*:\/\/[^:@/\s]+):[^@/\s]+@/gi, "$1:***@");
|
|
767
|
+
}
|
|
768
|
+
function redactValue(value) {
|
|
769
|
+
if (typeof value === "string") return redactSecrets(value);
|
|
770
|
+
if (Array.isArray(value)) return value.map(redactValue);
|
|
771
|
+
if (value && typeof value === "object") {
|
|
772
|
+
const out = {};
|
|
773
|
+
for (const [k, v] of Object.entries(value)) out[k] = redactValue(v);
|
|
774
|
+
return out;
|
|
775
|
+
}
|
|
776
|
+
return value;
|
|
777
|
+
}
|
|
778
|
+
var READ_SCAN = { readOnlyHint: true, openWorldHint: true };
|
|
779
|
+
var READ_LOCAL = { readOnlyHint: true, openWorldHint: false };
|
|
780
|
+
var WRITE_CATALOG = { readOnlyHint: false, destructiveHint: false, idempotentHint: true, openWorldHint: false };
|
|
781
|
+
async function buildCartographyToolHandlers(db, sessionId, opts = {}) {
|
|
782
|
+
const maxResponseBytes = opts.maxResponseBytes ?? 1e5;
|
|
783
|
+
const textResult = (raw) => ({ content: [{ type: "text", text: clampText(raw, maxResponseBytes) }] });
|
|
784
|
+
const runScannerTool = async (scanner, hint) => {
|
|
785
|
+
const ctx = { hint, platform: PLATFORM, run };
|
|
786
|
+
if (!await scanner.detect(ctx)) return textResult(`(${scanner.title}: CLI not available)`);
|
|
787
|
+
const result = await scanner.scan(ctx);
|
|
788
|
+
const structured = `=== NODES (${result.nodes.length}) / EDGES (${result.edges.length}) ===
|
|
789
|
+
` + JSON.stringify({ nodes: result.nodes, edges: result.edges });
|
|
790
|
+
return textResult([structured, result.report ?? ""].filter(Boolean).join("\n\n"));
|
|
791
|
+
};
|
|
792
|
+
const tool = (name, description, inputShape, handler, extra) => ({ name, description, inputShape, annotations: extra.annotations, handler });
|
|
793
|
+
const tools = [
|
|
794
|
+
tool("save_node", "Save an infrastructure node to the catalog", {
|
|
795
|
+
id: z.string(),
|
|
796
|
+
type: z.enum(NODE_TYPES),
|
|
797
|
+
name: z.string(),
|
|
798
|
+
discoveredVia: z.string(),
|
|
799
|
+
confidence: z.number().min(0).max(1),
|
|
800
|
+
metadata: z.record(z.string(), z.unknown()).optional(),
|
|
801
|
+
tags: z.array(z.string()).optional(),
|
|
802
|
+
domain: z.string().optional().describe('Business domain, e.g. "Marketing", "Finance"'),
|
|
803
|
+
subDomain: z.string().optional().describe('Sub-domain, e.g. "Forecast client orders"'),
|
|
804
|
+
qualityScore: z.number().min(0).max(100).optional().describe("Data quality score 0\u2013100")
|
|
805
|
+
}, async (args) => {
|
|
806
|
+
const node = {
|
|
807
|
+
id: stripSensitive(args["id"]),
|
|
808
|
+
type: args["type"],
|
|
809
|
+
name: args["name"],
|
|
810
|
+
discoveredVia: args["discoveredVia"],
|
|
811
|
+
confidence: args["confidence"],
|
|
812
|
+
metadata: redactValue(args["metadata"] ?? {}),
|
|
813
|
+
tags: args["tags"] ?? [],
|
|
814
|
+
domain: args["domain"],
|
|
815
|
+
subDomain: args["subDomain"],
|
|
816
|
+
qualityScore: args["qualityScore"]
|
|
817
|
+
};
|
|
818
|
+
db.upsertNode(sessionId, node);
|
|
819
|
+
return { content: [{ type: "text", text: `\u2713 Node: ${node.id}` }] };
|
|
820
|
+
}, { annotations: WRITE_CATALOG }),
|
|
821
|
+
tool("save_edge", "Save a relationship (edge) between two nodes \u2014 ALWAYS save edges when connections are clear", {
|
|
822
|
+
sourceId: z.string(),
|
|
823
|
+
targetId: z.string(),
|
|
824
|
+
relationship: z.enum(EDGE_RELATIONSHIPS),
|
|
825
|
+
evidence: z.string(),
|
|
826
|
+
confidence: z.number().min(0).max(1)
|
|
827
|
+
}, async (args) => {
|
|
828
|
+
db.insertEdge(sessionId, {
|
|
829
|
+
sourceId: args["sourceId"],
|
|
830
|
+
targetId: args["targetId"],
|
|
831
|
+
relationship: args["relationship"],
|
|
832
|
+
evidence: redactSecrets(args["evidence"]),
|
|
833
|
+
confidence: args["confidence"]
|
|
834
|
+
});
|
|
835
|
+
return { content: [{ type: "text", text: `\u2713 ${args["sourceId"]}\u2192${args["targetId"]}` }] };
|
|
836
|
+
}, { annotations: WRITE_CATALOG }),
|
|
837
|
+
tool("get_catalog", "Get the current catalog \u2014 use before save_node to avoid duplicates", {
|
|
838
|
+
includeEdges: z.boolean().default(true)
|
|
839
|
+
}, async (args) => {
|
|
840
|
+
const nodes = db.getNodes(sessionId);
|
|
841
|
+
const edges = args["includeEdges"] ? db.getEdges(sessionId) : [];
|
|
842
|
+
return {
|
|
843
|
+
content: [{
|
|
844
|
+
type: "text",
|
|
845
|
+
text: JSON.stringify({
|
|
846
|
+
count: { nodes: nodes.length, edges: edges.length },
|
|
847
|
+
nodeIds: nodes.map((n) => n.id)
|
|
848
|
+
})
|
|
849
|
+
}]
|
|
850
|
+
};
|
|
851
|
+
}, { annotations: READ_LOCAL }),
|
|
852
|
+
tool("ask_user", "Ask the user a question \u2014 for clarifications, missing context, or consent (e.g. before scanning browser history)", {
|
|
853
|
+
question: z.string().describe("The question for the user (clear and specific)"),
|
|
854
|
+
context: z.string().optional().describe("Optional context explaining why this is relevant")
|
|
855
|
+
}, async (args) => {
|
|
856
|
+
const question = args["question"];
|
|
857
|
+
const context = args["context"];
|
|
858
|
+
if (opts.onAskUser) {
|
|
859
|
+
const answer = await opts.onAskUser(question, context);
|
|
860
|
+
return { content: [{ type: "text", text: answer }] };
|
|
861
|
+
}
|
|
862
|
+
return {
|
|
863
|
+
content: [{ type: "text", text: "(Non-interactive mode \u2014 please continue without this information)" }]
|
|
864
|
+
};
|
|
865
|
+
}, { annotations: READ_LOCAL }),
|
|
866
|
+
tool("scan_bookmarks", "Scan all browser bookmarks \u2014 hostnames only, no personal data (Chrome, Chromium, Edge, Brave, Vivaldi, Opera, Firefox)", {
|
|
867
|
+
minConfidence: z.number().min(0).max(1).default(0.5).optional()
|
|
868
|
+
}, async () => {
|
|
869
|
+
const hosts = await scanAllBookmarks();
|
|
870
|
+
return {
|
|
871
|
+
content: [{
|
|
872
|
+
type: "text",
|
|
873
|
+
text: JSON.stringify({
|
|
874
|
+
count: hosts.length,
|
|
875
|
+
hosts: hosts.map((h) => ({
|
|
876
|
+
hostname: h.hostname,
|
|
877
|
+
port: h.port,
|
|
878
|
+
protocol: h.protocol,
|
|
879
|
+
source: h.source
|
|
880
|
+
})),
|
|
881
|
+
note: "Hostnames only \u2014 no paths, no personal data. Classify each as a business tool (save_node) or ignore (social media, news, shopping)."
|
|
882
|
+
})
|
|
883
|
+
}]
|
|
884
|
+
};
|
|
885
|
+
}, { annotations: READ_SCAN }),
|
|
886
|
+
tool("scan_browser_history", "Scan browser history \u2014 anonymized hostnames + visit frequency. ALWAYS call ask_user for consent before using this tool.", {
|
|
887
|
+
minVisits: z.number().min(1).default(3).optional().describe("Minimum visit count to include a host (filters rarely-visited sites)")
|
|
888
|
+
}, async (args) => {
|
|
889
|
+
const minVisits = args["minVisits"] ?? 3;
|
|
890
|
+
const hosts = await scanAllHistory();
|
|
891
|
+
const filtered = hosts.filter((h) => h.visitCount >= minVisits);
|
|
892
|
+
return {
|
|
893
|
+
content: [{
|
|
894
|
+
type: "text",
|
|
895
|
+
text: JSON.stringify({
|
|
896
|
+
count: filtered.length,
|
|
897
|
+
note: "Anonymized \u2014 hostnames only, no URLs, no paths, no personal data. Classify business tools as saas_tool nodes.",
|
|
898
|
+
hosts: filtered.map((h) => ({
|
|
899
|
+
hostname: h.hostname,
|
|
900
|
+
visitCount: h.visitCount,
|
|
901
|
+
protocol: h.protocol,
|
|
902
|
+
source: h.source
|
|
903
|
+
}))
|
|
904
|
+
})
|
|
905
|
+
}]
|
|
906
|
+
};
|
|
907
|
+
}, { annotations: READ_SCAN }),
|
|
908
|
+
tool("scan_local_databases", "Scan for local database files and running DB servers \u2014 PostgreSQL databases, MySQL, SQLite files from installed apps", {
|
|
909
|
+
deep: z.boolean().default(false).optional().describe("Also search home directory recursively for SQLite/DB files (slower)")
|
|
910
|
+
}, async (args) => {
|
|
911
|
+
const deep = args["deep"] ?? false;
|
|
912
|
+
return runScannerTool(databasesScanner, deep ? "deep" : "");
|
|
913
|
+
}, { annotations: READ_SCAN }),
|
|
914
|
+
tool("scan_k8s_resources", "Scan Kubernetes cluster via kubectl \u2014 100% readonly (get, describe)", {
|
|
915
|
+
namespace: z.string().regex(SCAN_ARG_PATTERNS["k8s-namespace"], "invalid Kubernetes namespace").optional().describe("Filter by namespace \u2014 empty = all namespaces")
|
|
916
|
+
}, async (args) => {
|
|
917
|
+
const ns = args["namespace"];
|
|
918
|
+
if (ns) assertSafeScanArg("k8s-namespace", ns);
|
|
919
|
+
return runScannerTool(k8sScanner, ns ? `namespace=${ns}` : "");
|
|
920
|
+
}, { annotations: READ_SCAN }),
|
|
921
|
+
tool("scan_aws_resources", "Scan AWS infrastructure via AWS CLI \u2014 100% readonly (describe, list)", {
|
|
922
|
+
region: z.string().regex(SCAN_ARG_PATTERNS["aws-region"], "invalid AWS region").optional().describe("AWS Region \u2014 default: AWS_DEFAULT_REGION or profile"),
|
|
923
|
+
profile: z.string().regex(SCAN_ARG_PATTERNS["aws-profile"], "invalid AWS profile").optional().describe("AWS CLI profile")
|
|
924
|
+
}, async (args) => {
|
|
925
|
+
const region = args["region"];
|
|
926
|
+
const profile = args["profile"];
|
|
927
|
+
if (region) assertSafeScanArg("aws-region", region);
|
|
928
|
+
if (profile) assertSafeScanArg("aws-profile", profile);
|
|
929
|
+
const hint = [region ? `region=${region}` : "", profile ? `profile=${profile}` : ""].filter(Boolean).join(" ");
|
|
930
|
+
return runScannerTool(cloudAwsScanner, hint);
|
|
931
|
+
}, { annotations: READ_SCAN }),
|
|
932
|
+
tool("scan_gcp_resources", "Scan Google Cloud Platform via gcloud CLI \u2014 100% readonly (list, describe)", {
|
|
933
|
+
project: z.string().regex(SCAN_ARG_PATTERNS["gcp-project"], "invalid GCP project id").optional().describe("GCP Project ID \u2014 default: current gcloud project")
|
|
934
|
+
}, async (args) => {
|
|
935
|
+
const project = args["project"];
|
|
936
|
+
if (project) assertSafeScanArg("gcp-project", project);
|
|
937
|
+
return runScannerTool(cloudGcpScanner, project ? `project=${project}` : "");
|
|
938
|
+
}, { annotations: READ_SCAN }),
|
|
939
|
+
tool("scan_azure_resources", "Scan Azure infrastructure via az CLI \u2014 100% readonly (list, show)", {
|
|
940
|
+
subscription: z.string().regex(SCAN_ARG_PATTERNS["azure-subscription"], "invalid Azure subscription id").optional().describe("Azure Subscription ID"),
|
|
941
|
+
resourceGroup: z.string().regex(SCAN_ARG_PATTERNS["azure-resource-group"], "invalid Azure resource group").optional().describe("Filter by resource group")
|
|
942
|
+
}, async (args) => {
|
|
943
|
+
const sub = args["subscription"];
|
|
944
|
+
const rg = args["resourceGroup"];
|
|
945
|
+
if (sub) assertSafeScanArg("azure-subscription", sub);
|
|
946
|
+
if (rg) assertSafeScanArg("azure-resource-group", rg);
|
|
947
|
+
const hint = [sub ? `subscription=${sub}` : "", rg ? `resource-group=${rg}` : ""].filter(Boolean).join(" ");
|
|
948
|
+
return runScannerTool(cloudAzureScanner, hint);
|
|
949
|
+
}, { annotations: READ_SCAN }),
|
|
950
|
+
tool("scan_installed_apps", "Scan all installed apps and tools \u2014 IDEs, office, dev tools, business apps, databases", {
|
|
951
|
+
searchHint: z.string().optional().describe('Optional search term to find specific tools (e.g. "hubspot windsurf cursor")')
|
|
952
|
+
}, async (args) => {
|
|
953
|
+
const hint = args["searchHint"];
|
|
954
|
+
const results = {};
|
|
955
|
+
results["PLATFORM"] = `${PLATFORM} (${IS_WIN ? "Windows" : IS_MAC ? "macOS" : "Linux"})`;
|
|
956
|
+
if (IS_MAC) {
|
|
957
|
+
results["APPLICATIONS"] = run("ls /Applications/ 2>/dev/null | head -200") || "(empty)";
|
|
958
|
+
results["USER_APPLICATIONS"] = run("ls ~/Applications/ 2>/dev/null | head -100") || "(empty)";
|
|
959
|
+
results["BREW_CASKS"] = run("brew list --cask 2>/dev/null | head -100") || "(brew not installed)";
|
|
960
|
+
results["BREW_FORMULAE"] = run("brew list --formula 2>/dev/null | head -150") || "(brew not installed)";
|
|
961
|
+
results["SPOTLIGHT_APPS"] = run(`mdfind "kMDItemKind == 'Application'" 2>/dev/null | grep -v "^/System" | grep -v "^/Library/Apple" | head -100`) || "(Spotlight not available)";
|
|
962
|
+
} else if (IS_LINUX) {
|
|
963
|
+
results["DPKG"] = run("dpkg --list 2>/dev/null | awk '{print $2}' | head -200") || "(dpkg not available)";
|
|
964
|
+
results["SNAP"] = run("snap list 2>/dev/null | head -50") || "(snap not available)";
|
|
965
|
+
results["FLATPAK"] = run("flatpak list 2>/dev/null | head -50") || "(flatpak not available)";
|
|
966
|
+
results["DESKTOP_FILES"] = run("ls /usr/share/applications/*.desktop ~/.local/share/applications/*.desktop 2>/dev/null | xargs -I{} basename {} .desktop 2>/dev/null | head -100") || "(no .desktop files)";
|
|
967
|
+
results["RPM"] = run("rpm -qa 2>/dev/null | head -200") || "(rpm not available)";
|
|
968
|
+
} else if (IS_WIN) {
|
|
969
|
+
results["WINGET"] = run("winget list --accept-source-agreements", { timeout: 2e4 }) || "(winget not available)";
|
|
970
|
+
results["INSTALLED_PROGRAMS"] = scanWindowsPrograms() || "(registry scan failed)";
|
|
971
|
+
results["CHOCO"] = run("choco list --local-only", { timeout: 15e3 }) || "(chocolatey not installed)";
|
|
972
|
+
results["SCOOP"] = run("scoop list", { timeout: 15e3 }) || "(scoop not installed)";
|
|
973
|
+
}
|
|
974
|
+
const knownTools = [
|
|
975
|
+
// IDEs & Editors
|
|
976
|
+
"code",
|
|
977
|
+
"code-insiders",
|
|
978
|
+
"cursor",
|
|
979
|
+
"windsurf",
|
|
980
|
+
"zed",
|
|
981
|
+
"vim",
|
|
982
|
+
"nvim",
|
|
983
|
+
"emacs",
|
|
984
|
+
"nano",
|
|
985
|
+
"sublime_text",
|
|
986
|
+
"atom",
|
|
987
|
+
"idea",
|
|
988
|
+
"webstorm",
|
|
989
|
+
"pycharm",
|
|
990
|
+
"goland",
|
|
991
|
+
"datagrip",
|
|
992
|
+
"clion",
|
|
993
|
+
"rider",
|
|
994
|
+
"phpstorm",
|
|
995
|
+
"rubymine",
|
|
996
|
+
"appcode",
|
|
997
|
+
// Dev Tools
|
|
998
|
+
"git",
|
|
999
|
+
"gh",
|
|
1000
|
+
"docker",
|
|
1001
|
+
"docker-compose",
|
|
1002
|
+
"podman",
|
|
1003
|
+
"kubectl",
|
|
1004
|
+
"helm",
|
|
1005
|
+
"terraform",
|
|
1006
|
+
"ansible",
|
|
1007
|
+
"node",
|
|
1008
|
+
"npm",
|
|
1009
|
+
"npx",
|
|
1010
|
+
"yarn",
|
|
1011
|
+
"pnpm",
|
|
1012
|
+
"bun",
|
|
1013
|
+
"deno",
|
|
1014
|
+
"python",
|
|
1015
|
+
"python3",
|
|
1016
|
+
"pip",
|
|
1017
|
+
"pip3",
|
|
1018
|
+
"pipenv",
|
|
1019
|
+
"poetry",
|
|
1020
|
+
"conda",
|
|
1021
|
+
"ruby",
|
|
1022
|
+
"gem",
|
|
1023
|
+
"bundler",
|
|
1024
|
+
"rails",
|
|
1025
|
+
"java",
|
|
1026
|
+
"mvn",
|
|
1027
|
+
"gradle",
|
|
1028
|
+
"kotlin",
|
|
1029
|
+
"go",
|
|
1030
|
+
"cargo",
|
|
1031
|
+
"rustc",
|
|
1032
|
+
"php",
|
|
1033
|
+
"composer",
|
|
1034
|
+
"dotnet",
|
|
1035
|
+
// Databases
|
|
1036
|
+
"psql",
|
|
1037
|
+
"mysql",
|
|
1038
|
+
"mysqladmin",
|
|
1039
|
+
"mongo",
|
|
1040
|
+
"mongosh",
|
|
1041
|
+
"redis-cli",
|
|
1042
|
+
"sqlite3",
|
|
1043
|
+
"clickhouse-client",
|
|
1044
|
+
// Cloud CLIs
|
|
1045
|
+
"aws",
|
|
1046
|
+
"gcloud",
|
|
1047
|
+
"az",
|
|
1048
|
+
"heroku",
|
|
1049
|
+
"fly",
|
|
1050
|
+
"vercel",
|
|
1051
|
+
"netlify",
|
|
1052
|
+
"wrangler",
|
|
1053
|
+
// Infra
|
|
1054
|
+
"vagrant",
|
|
1055
|
+
"packer",
|
|
1056
|
+
"consul",
|
|
1057
|
+
"vault",
|
|
1058
|
+
"nomad",
|
|
1059
|
+
// Communication / SaaS
|
|
1060
|
+
"slack",
|
|
1061
|
+
"discord",
|
|
1062
|
+
"zoom",
|
|
1063
|
+
"teams",
|
|
1064
|
+
"skype",
|
|
1065
|
+
"telegram",
|
|
1066
|
+
"signal",
|
|
1067
|
+
// Browsers
|
|
1068
|
+
"google-chrome",
|
|
1069
|
+
"chromium",
|
|
1070
|
+
"firefox",
|
|
1071
|
+
"safari",
|
|
1072
|
+
"brave",
|
|
1073
|
+
"opera",
|
|
1074
|
+
"edge",
|
|
1075
|
+
// Windows-specific
|
|
1076
|
+
...IS_WIN ? ["pwsh", "powershell", "wsl", "winget", "choco", "scoop", "notepad++"] : [],
|
|
1077
|
+
// Monitoring / Analytics
|
|
1078
|
+
"datadog-agent",
|
|
1079
|
+
"newrelic-agent",
|
|
1080
|
+
"prometheus",
|
|
1081
|
+
"grafana-cli",
|
|
1082
|
+
// Other tools
|
|
1083
|
+
"ngrok",
|
|
1084
|
+
"stripe",
|
|
1085
|
+
"supabase",
|
|
1086
|
+
"neon"
|
|
1087
|
+
];
|
|
1088
|
+
const found = [];
|
|
1089
|
+
const notFound = [];
|
|
1090
|
+
for (const t of knownTools) {
|
|
1091
|
+
const r = commandExists(t);
|
|
1092
|
+
if (r) found.push(`${t}: ${r}`);
|
|
1093
|
+
else notFound.push(t);
|
|
1094
|
+
}
|
|
1095
|
+
results["TOOLS_FOUND"] = found.join("\n") || "(none found)";
|
|
1096
|
+
results["TOOLS_NOT_FOUND"] = notFound.join(", ");
|
|
1097
|
+
if (hint) {
|
|
1098
|
+
const terms = hint.split(/[\s,]+/).filter(Boolean);
|
|
1099
|
+
const hintResults = [];
|
|
1100
|
+
for (const term of terms) {
|
|
1101
|
+
const safe = term.replace(/[^a-zA-Z0-9._-]/g, "");
|
|
1102
|
+
if (!safe) continue;
|
|
1103
|
+
const cmdPath = commandExists(safe);
|
|
1104
|
+
if (cmdPath) {
|
|
1105
|
+
hintResults.push(`${term}: ${cmdPath}`);
|
|
1106
|
+
continue;
|
|
1107
|
+
}
|
|
1108
|
+
let fallback = "";
|
|
1109
|
+
if (IS_WIN) {
|
|
1110
|
+
fallback = run(
|
|
1111
|
+
`Get-ChildItem -Path 'C:\\Program Files','C:\\Program Files (x86)','${HOME}\\AppData\\Local\\Programs' -Recurse -Depth 3 -Filter '*${safe}*' -ErrorAction SilentlyContinue | Select-Object -First 5 -ExpandProperty FullName`,
|
|
1112
|
+
{ timeout: 1e4 }
|
|
1113
|
+
);
|
|
1114
|
+
} else if (IS_MAC) {
|
|
1115
|
+
fallback = run(`mdfind -name "${safe}" 2>/dev/null | head -5`);
|
|
1116
|
+
} else {
|
|
1117
|
+
fallback = run(`find /usr/bin /usr/local/bin /opt/homebrew/bin ~/.local/bin /Applications ~/Applications 2>/dev/null -iname "*${safe}*" -maxdepth 3 2>/dev/null | head -5`);
|
|
1118
|
+
}
|
|
1119
|
+
hintResults.push(fallback ? `${term}: ${fallback}` : `${term}: (not found)`);
|
|
1120
|
+
}
|
|
1121
|
+
results["HINT_SEARCH"] = hintResults.join("\n");
|
|
1122
|
+
}
|
|
1123
|
+
const out = Object.entries(results).map(([k, v]) => `=== ${k} ===
|
|
1124
|
+
${v}`).join("\n\n");
|
|
1125
|
+
return textResult(out);
|
|
1126
|
+
}, { annotations: READ_SCAN })
|
|
1127
|
+
];
|
|
1128
|
+
return tools;
|
|
1129
|
+
}
|
|
1130
|
+
async function buildCartographyToolDefinitions(db, sessionId, opts = {}) {
|
|
1131
|
+
const { tool } = await import("@anthropic-ai/claude-agent-sdk");
|
|
1132
|
+
const handlers = await buildCartographyToolHandlers(db, sessionId, opts);
|
|
1133
|
+
return handlers.map(
|
|
1134
|
+
(t) => tool(t.name, t.description, t.inputShape, t.handler, { annotations: t.annotations })
|
|
1135
|
+
);
|
|
1136
|
+
}
|
|
1137
|
+
async function createCartographyTools(db, sessionId, opts = {}) {
|
|
1138
|
+
const { createSdkMcpServer } = await import("@anthropic-ai/claude-agent-sdk");
|
|
1139
|
+
const tools = await buildCartographyToolDefinitions(db, sessionId, opts);
|
|
1140
|
+
return createSdkMcpServer({ name: "cartography", version: "0.1.0", tools });
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
// src/db.ts
|
|
1144
|
+
import Database from "better-sqlite3";
|
|
1145
|
+
import { mkdirSync } from "fs";
|
|
1146
|
+
import { dirname } from "path";
|
|
1147
|
+
import { createHash } from "crypto";
|
|
1148
|
+
import { z as z3 } from "zod";
|
|
1149
|
+
|
|
1150
|
+
// src/compliance/types.ts
|
|
1151
|
+
import { z as z2 } from "zod";
|
|
1152
|
+
var NODE_TYPE_GROUP_KEYS = Object.keys(NODE_TYPE_GROUPS);
|
|
1153
|
+
var RuleScopeSchema = z2.object({
|
|
1154
|
+
groups: z2.array(z2.enum(NODE_TYPE_GROUP_KEYS)).optional(),
|
|
1155
|
+
types: z2.array(z2.enum(NODE_TYPES)).optional()
|
|
1156
|
+
});
|
|
1157
|
+
var FieldPathSchema = z2.enum([
|
|
1158
|
+
"type",
|
|
1159
|
+
"name",
|
|
1160
|
+
"domain",
|
|
1161
|
+
"subDomain",
|
|
1162
|
+
"confidence",
|
|
1163
|
+
"qualityScore",
|
|
1164
|
+
"owner",
|
|
1165
|
+
"tags",
|
|
1166
|
+
"metadataKeys",
|
|
1167
|
+
"metadataValues"
|
|
1168
|
+
]);
|
|
1169
|
+
var PATTERN_NAMES = ["dsn_with_credentials", "owner_key", "public_exposure"];
|
|
1170
|
+
var ConditionSchema = z2.object({
|
|
1171
|
+
field: FieldPathSchema,
|
|
1172
|
+
op: z2.enum(["present", "absent", "lt", "lte", "gt", "gte", "eq", "includes", "matches"]),
|
|
1173
|
+
value: z2.union([z2.string(), z2.number()]).optional(),
|
|
1174
|
+
pattern: z2.enum(PATTERN_NAMES).optional()
|
|
1175
|
+
});
|
|
1176
|
+
var RuleCheckSchema = z2.lazy(
|
|
1177
|
+
() => z2.union([
|
|
1178
|
+
ConditionSchema,
|
|
1179
|
+
z2.object({ all: z2.array(RuleCheckSchema) }),
|
|
1180
|
+
z2.object({ any: z2.array(RuleCheckSchema) }),
|
|
1181
|
+
z2.object({ not: RuleCheckSchema })
|
|
1182
|
+
])
|
|
1183
|
+
);
|
|
1184
|
+
var SEVERITIES = ["critical", "high", "medium", "low"];
|
|
1185
|
+
var SeveritySchema = z2.enum(SEVERITIES);
|
|
1186
|
+
var SEVERITY_WEIGHT = { critical: 4, high: 3, medium: 2, low: 1 };
|
|
1187
|
+
var ComplianceRuleSchema = z2.object({
|
|
1188
|
+
id: z2.string(),
|
|
1189
|
+
control: z2.string().describe('External control id, e.g. "CIS-1.4"'),
|
|
1190
|
+
framework: z2.enum(["CIS", "SOC2", "ISO27001", "baseline"]),
|
|
1191
|
+
title: z2.string(),
|
|
1192
|
+
severity: SeveritySchema,
|
|
1193
|
+
rationale: z2.string(),
|
|
1194
|
+
scope: RuleScopeSchema,
|
|
1195
|
+
/**
|
|
1196
|
+
* Optional applicability predicate (decision #7): a scoped node is only counted
|
|
1197
|
+
* when this is absent or evaluates true — so a rule needing a signal that's absent
|
|
1198
|
+
* everywhere becomes `not_applicable` rather than failing on sparse data.
|
|
1199
|
+
*/
|
|
1200
|
+
applicableWhen: RuleCheckSchema.optional(),
|
|
1201
|
+
check: RuleCheckSchema
|
|
1202
|
+
});
|
|
1203
|
+
var RulesetSchema = z2.object({
|
|
1204
|
+
name: z2.string(),
|
|
1205
|
+
version: z2.string(),
|
|
1206
|
+
framework: z2.string(),
|
|
1207
|
+
description: z2.string(),
|
|
1208
|
+
rules: z2.array(ComplianceRuleSchema).min(1)
|
|
1209
|
+
}).superRefine((rs, ctx) => {
|
|
1210
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1211
|
+
for (const r of rs.rules) {
|
|
1212
|
+
if (seen.has(r.id)) ctx.addIssue({ code: "custom", message: `duplicate rule id: ${r.id}`, path: ["rules"] });
|
|
1213
|
+
seen.add(r.id);
|
|
1214
|
+
}
|
|
1215
|
+
});
|
|
1216
|
+
var ControlResultSchema = z2.object({
|
|
1217
|
+
ruleId: z2.string(),
|
|
1218
|
+
control: z2.string(),
|
|
1219
|
+
framework: z2.string(),
|
|
1220
|
+
severity: SeveritySchema,
|
|
1221
|
+
status: z2.enum(["pass", "fail", "not_applicable"]),
|
|
1222
|
+
applicableCount: z2.number().int(),
|
|
1223
|
+
passedCount: z2.number().int(),
|
|
1224
|
+
failingNodeIds: z2.array(z2.string())
|
|
1225
|
+
});
|
|
1226
|
+
var ComplianceReportSchema = z2.object({
|
|
1227
|
+
rulesetName: z2.string(),
|
|
1228
|
+
rulesetVersion: z2.string(),
|
|
1229
|
+
generatedAt: z2.string(),
|
|
1230
|
+
score: z2.number().min(0).max(100).nullable(),
|
|
1231
|
+
status: z2.enum(["pass", "fail", "not_applicable"]),
|
|
1232
|
+
totals: z2.object({
|
|
1233
|
+
rules: z2.number(),
|
|
1234
|
+
applicable: z2.number(),
|
|
1235
|
+
passed: z2.number(),
|
|
1236
|
+
failed: z2.number(),
|
|
1237
|
+
notApplicable: z2.number()
|
|
1238
|
+
}),
|
|
1239
|
+
bySeverity: z2.record(SeveritySchema, z2.object({ passed: z2.number(), failed: z2.number() })),
|
|
1240
|
+
controls: z2.array(ControlResultSchema),
|
|
1241
|
+
gaps: z2.array(z2.object({
|
|
1242
|
+
ruleId: z2.string(),
|
|
1243
|
+
control: z2.string(),
|
|
1244
|
+
severity: SeveritySchema,
|
|
1245
|
+
title: z2.string(),
|
|
1246
|
+
nodeIds: z2.array(z2.string())
|
|
1247
|
+
}))
|
|
1248
|
+
});
|
|
1249
|
+
|
|
1250
|
+
// src/compliance/engine.ts
|
|
1251
|
+
var OWNER_KEY_RE = /^(owner|team|maintainer|contact|owned[-_]?by)$/i;
|
|
1252
|
+
var PUBLIC_EXPOSURE_RE = /(^|[^0-9])0\.0\.0\.0(\/0)?|public|internet|exposed/i;
|
|
1253
|
+
function readField(node, field) {
|
|
1254
|
+
switch (field) {
|
|
1255
|
+
case "type":
|
|
1256
|
+
return node.type;
|
|
1257
|
+
case "name":
|
|
1258
|
+
return node.name;
|
|
1259
|
+
case "domain":
|
|
1260
|
+
return node.domain;
|
|
1261
|
+
case "subDomain":
|
|
1262
|
+
return node.subDomain;
|
|
1263
|
+
case "confidence":
|
|
1264
|
+
return node.confidence;
|
|
1265
|
+
case "qualityScore":
|
|
1266
|
+
return node.qualityScore;
|
|
1267
|
+
case "owner":
|
|
1268
|
+
return node.owner;
|
|
1269
|
+
case "tags":
|
|
1270
|
+
return node.tags;
|
|
1271
|
+
case "metadataKeys":
|
|
1272
|
+
return Object.keys(node.metadata ?? {});
|
|
1273
|
+
case "metadataValues":
|
|
1274
|
+
return Object.values(node.metadata ?? {}).map((v) => typeof v === "string" ? v : JSON.stringify(v));
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
function isPresent(value) {
|
|
1278
|
+
if (value === void 0 || value === null) return false;
|
|
1279
|
+
if (Array.isArray(value)) return value.length > 0;
|
|
1280
|
+
if (typeof value === "string") return value.length > 0;
|
|
1281
|
+
return true;
|
|
1282
|
+
}
|
|
1283
|
+
function matchesPattern(value, pattern) {
|
|
1284
|
+
const test = (s) => {
|
|
1285
|
+
switch (pattern) {
|
|
1286
|
+
case "dsn_with_credentials":
|
|
1287
|
+
return redactSecrets(s) !== s;
|
|
1288
|
+
// a credential was present
|
|
1289
|
+
case "owner_key":
|
|
1290
|
+
return OWNER_KEY_RE.test(s);
|
|
1291
|
+
case "public_exposure":
|
|
1292
|
+
return PUBLIC_EXPOSURE_RE.test(s);
|
|
1293
|
+
}
|
|
1294
|
+
};
|
|
1295
|
+
if (Array.isArray(value)) return value.some((v) => typeof v === "string" && test(v));
|
|
1296
|
+
return typeof value === "string" && test(value);
|
|
1297
|
+
}
|
|
1298
|
+
function evaluateCondition(node, cond) {
|
|
1299
|
+
const v = readField(node, cond.field);
|
|
1300
|
+
switch (cond.op) {
|
|
1301
|
+
case "present":
|
|
1302
|
+
return isPresent(v);
|
|
1303
|
+
case "absent":
|
|
1304
|
+
return !isPresent(v);
|
|
1305
|
+
case "lt":
|
|
1306
|
+
return typeof v === "number" && typeof cond.value === "number" && v < cond.value;
|
|
1307
|
+
case "lte":
|
|
1308
|
+
return typeof v === "number" && typeof cond.value === "number" && v <= cond.value;
|
|
1309
|
+
case "gt":
|
|
1310
|
+
return typeof v === "number" && typeof cond.value === "number" && v > cond.value;
|
|
1311
|
+
case "gte":
|
|
1312
|
+
return typeof v === "number" && typeof cond.value === "number" && v >= cond.value;
|
|
1313
|
+
case "eq":
|
|
1314
|
+
return v === cond.value;
|
|
1315
|
+
case "includes":
|
|
1316
|
+
return Array.isArray(v) && cond.value !== void 0 && v.includes(cond.value);
|
|
1317
|
+
case "matches":
|
|
1318
|
+
return cond.pattern !== void 0 && matchesPattern(v, cond.pattern);
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
function evaluateCheck(node, check) {
|
|
1322
|
+
if ("all" in check) return check.all.every((c) => evaluateCheck(node, c));
|
|
1323
|
+
if ("any" in check) return check.any.some((c) => evaluateCheck(node, c));
|
|
1324
|
+
if ("not" in check) return !evaluateCheck(node, check.not);
|
|
1325
|
+
return evaluateCondition(node, check);
|
|
1326
|
+
}
|
|
1327
|
+
function scopedTypes(rule) {
|
|
1328
|
+
const { groups, types } = rule.scope;
|
|
1329
|
+
if ((!groups || groups.length === 0) && (!types || types.length === 0)) return null;
|
|
1330
|
+
const out = /* @__PURE__ */ new Set();
|
|
1331
|
+
for (const g of groups ?? []) for (const t of NODE_TYPE_GROUPS[g]) out.add(t);
|
|
1332
|
+
for (const t of types ?? []) out.add(t);
|
|
1333
|
+
return out;
|
|
1334
|
+
}
|
|
1335
|
+
function applicableNodes(nodes, rule) {
|
|
1336
|
+
const types = scopedTypes(rule);
|
|
1337
|
+
return nodes.filter(
|
|
1338
|
+
(n) => (types === null || types.has(n.type)) && (rule.applicableWhen === void 0 || evaluateCheck(n, rule.applicableWhen))
|
|
1339
|
+
);
|
|
1340
|
+
}
|
|
1341
|
+
function evaluateRule(input, rule) {
|
|
1342
|
+
const applicable = applicableNodes(input.nodes, rule);
|
|
1343
|
+
const base = { ruleId: rule.id, control: rule.control, framework: rule.framework, severity: rule.severity };
|
|
1344
|
+
if (applicable.length === 0) {
|
|
1345
|
+
return { ...base, status: "not_applicable", applicableCount: 0, passedCount: 0, failingNodeIds: [] };
|
|
1346
|
+
}
|
|
1347
|
+
const failingNodeIds = applicable.filter((n) => !evaluateCheck(n, rule.check)).map((n) => n.id).sort();
|
|
1348
|
+
return {
|
|
1349
|
+
...base,
|
|
1350
|
+
status: failingNodeIds.length === 0 ? "pass" : "fail",
|
|
1351
|
+
applicableCount: applicable.length,
|
|
1352
|
+
passedCount: applicable.length - failingNodeIds.length,
|
|
1353
|
+
failingNodeIds
|
|
1354
|
+
};
|
|
1355
|
+
}
|
|
1356
|
+
function scoreTopology(input, ruleset, opts) {
|
|
1357
|
+
const nodes = [...input.nodes].sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
|
|
1358
|
+
const rules = [...ruleset.rules].sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
|
|
1359
|
+
const controls = rules.map((r) => evaluateRule({ nodes, edges: input.edges }, r));
|
|
1360
|
+
const bySeverity = Object.fromEntries(SEVERITIES.map((s) => [s, { passed: 0, failed: 0 }]));
|
|
1361
|
+
let applicable = 0, passed = 0, failed = 0, notApplicable = 0;
|
|
1362
|
+
let weightTotal = 0, weightPassed = 0;
|
|
1363
|
+
const gaps = [];
|
|
1364
|
+
for (const c of controls) {
|
|
1365
|
+
const w = SEVERITY_WEIGHT[c.severity];
|
|
1366
|
+
if (c.status === "not_applicable") {
|
|
1367
|
+
notApplicable++;
|
|
1368
|
+
continue;
|
|
1369
|
+
}
|
|
1370
|
+
applicable++;
|
|
1371
|
+
weightTotal += w;
|
|
1372
|
+
if (c.status === "pass") {
|
|
1373
|
+
passed++;
|
|
1374
|
+
weightPassed += w;
|
|
1375
|
+
bySeverity[c.severity].passed++;
|
|
1376
|
+
} else {
|
|
1377
|
+
failed++;
|
|
1378
|
+
bySeverity[c.severity].failed++;
|
|
1379
|
+
const rule = rules.find((r) => r.id === c.ruleId);
|
|
1380
|
+
gaps.push({ ruleId: c.ruleId, control: c.control, severity: c.severity, title: rule.title, nodeIds: c.failingNodeIds });
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
const score = applicable === 0 ? null : Math.round(100 * weightPassed / weightTotal);
|
|
1384
|
+
const status = applicable === 0 ? "not_applicable" : failed === 0 ? "pass" : "fail";
|
|
1385
|
+
return {
|
|
1386
|
+
rulesetName: ruleset.name,
|
|
1387
|
+
rulesetVersion: ruleset.version,
|
|
1388
|
+
generatedAt: opts?.now ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
1389
|
+
score,
|
|
1390
|
+
status,
|
|
1391
|
+
totals: { rules: rules.length, applicable, passed, failed, notApplicable },
|
|
1392
|
+
bySeverity,
|
|
1393
|
+
controls,
|
|
1394
|
+
gaps
|
|
1395
|
+
};
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
// src/diff.ts
|
|
1399
|
+
var edgeKey = (e) => `${e.sourceId}\0${e.targetId}\0${e.relationship}`;
|
|
1400
|
+
function stableStringify(value) {
|
|
1401
|
+
if (value === null || typeof value !== "object") return JSON.stringify(value) ?? "null";
|
|
1402
|
+
if (Array.isArray(value)) return `[${value.map(stableStringify).join(",")}]`;
|
|
1403
|
+
const keys = Object.keys(value).sort();
|
|
1404
|
+
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(value[k])}`).join(",")}}`;
|
|
1405
|
+
}
|
|
1406
|
+
function driftedFields(a, b) {
|
|
1407
|
+
const changed = [];
|
|
1408
|
+
for (const f of DRIFT_FIELDS) {
|
|
1409
|
+
if (f === "tags") {
|
|
1410
|
+
if ([...a.tags].sort().join("\0") !== [...b.tags].sort().join("\0")) changed.push(f);
|
|
1411
|
+
} else if (f === "metadata" || f === "cost") {
|
|
1412
|
+
if (stableStringify(a[f]) !== stableStringify(b[f])) changed.push(f);
|
|
1413
|
+
} else if (a[f] !== b[f]) {
|
|
1414
|
+
changed.push(f);
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
return changed;
|
|
1418
|
+
}
|
|
1419
|
+
function diffTopology(base, current) {
|
|
1420
|
+
const baseNodes = new Map(base.nodes.map((n) => [n.id, n]));
|
|
1421
|
+
const curNodes = new Map(current.nodes.map((n) => [n.id, n]));
|
|
1422
|
+
const added = [];
|
|
1423
|
+
const removed = [];
|
|
1424
|
+
const changed = [];
|
|
1425
|
+
let unchangedNodes = 0;
|
|
1426
|
+
for (const [id, after] of curNodes) {
|
|
1427
|
+
const before = baseNodes.get(id);
|
|
1428
|
+
if (!before) {
|
|
1429
|
+
added.push(after);
|
|
1430
|
+
continue;
|
|
1431
|
+
}
|
|
1432
|
+
const fields = driftedFields(before, after);
|
|
1433
|
+
if (fields.length > 0) {
|
|
1434
|
+
changed.push({ id, before, after, changedFields: fields, confidenceDelta: after.confidence - before.confidence });
|
|
1435
|
+
} else {
|
|
1436
|
+
unchangedNodes++;
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
for (const [id, before] of baseNodes) {
|
|
1440
|
+
if (!curNodes.has(id)) removed.push(before);
|
|
1441
|
+
}
|
|
1442
|
+
const baseEdges = new Map(base.edges.map((e) => [edgeKey(e), e]));
|
|
1443
|
+
const curEdges = new Map(current.edges.map((e) => [edgeKey(e), e]));
|
|
1444
|
+
const edgesAdded = [];
|
|
1445
|
+
const edgesRemoved = [];
|
|
1446
|
+
let unchangedEdges = 0;
|
|
1447
|
+
for (const [k, e] of curEdges) {
|
|
1448
|
+
if (baseEdges.has(k)) unchangedEdges++;
|
|
1449
|
+
else edgesAdded.push(e);
|
|
1450
|
+
}
|
|
1451
|
+
for (const [k, e] of baseEdges) {
|
|
1452
|
+
if (!curEdges.has(k)) edgesRemoved.push(e);
|
|
1453
|
+
}
|
|
1454
|
+
return {
|
|
1455
|
+
nodes: { added, removed, changed, unchanged: unchangedNodes },
|
|
1456
|
+
edges: { added: edgesAdded, removed: edgesRemoved, unchanged: unchangedEdges },
|
|
1457
|
+
summary: {
|
|
1458
|
+
nodesAdded: added.length,
|
|
1459
|
+
nodesRemoved: removed.length,
|
|
1460
|
+
nodesChanged: changed.length,
|
|
1461
|
+
edgesAdded: edgesAdded.length,
|
|
1462
|
+
edgesRemoved: edgesRemoved.length
|
|
1463
|
+
}
|
|
1464
|
+
};
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
// src/anomaly.ts
|
|
1468
|
+
var MANAGED_TYPES = new Set(Object.values(NODE_TYPE_GROUPS).flat());
|
|
1469
|
+
function hasNoDomain(n) {
|
|
1470
|
+
return n.domain == null || n.domain === "" || n.domain === "(none)";
|
|
1471
|
+
}
|
|
1472
|
+
function detectOrphans(nodes, degree, thresholds = DEFAULT_ANOMALY_THRESHOLDS) {
|
|
1473
|
+
const out = [];
|
|
1474
|
+
for (const n of nodes) {
|
|
1475
|
+
const d = degree.get(n.id) ?? 0;
|
|
1476
|
+
if (d === 0) {
|
|
1477
|
+
out.push({ nodeId: n.id, kind: "orphan", severity: "high", reason: "zero-degree node (no edges)" });
|
|
1478
|
+
} else if (d <= thresholds.orphanWeakDegree) {
|
|
1479
|
+
out.push({ nodeId: n.id, kind: "orphan", severity: "low", reason: `weakly-connected node (degree ${d})` });
|
|
1480
|
+
}
|
|
1481
|
+
}
|
|
1482
|
+
return out;
|
|
1483
|
+
}
|
|
1484
|
+
function detectShadowIt(nodes, thresholds = DEFAULT_ANOMALY_THRESHOLDS) {
|
|
1485
|
+
const out = [];
|
|
1486
|
+
for (const n of nodes) {
|
|
1487
|
+
if (!MANAGED_TYPES.has(n.type)) {
|
|
1488
|
+
out.push({ nodeId: n.id, kind: "shadow-it", severity: "medium", reason: `unmanaged node type "${n.type}"` });
|
|
1489
|
+
continue;
|
|
1490
|
+
}
|
|
1491
|
+
if (hasNoDomain(n)) {
|
|
1492
|
+
const lowConf = n.confidence < thresholds.shadowConfidence;
|
|
1493
|
+
const lowQual = n.qualityScore != null && n.qualityScore < thresholds.shadowQuality;
|
|
1494
|
+
if (lowConf || lowQual) {
|
|
1495
|
+
const sev = lowConf && lowQual ? "high" : "medium";
|
|
1496
|
+
const parts = [];
|
|
1497
|
+
if (lowConf) parts.push(`low confidence ${n.confidence.toFixed(2)}`);
|
|
1498
|
+
if (lowQual) parts.push(`low quality ${n.qualityScore}`);
|
|
1499
|
+
out.push({ nodeId: n.id, kind: "shadow-it", severity: sev, reason: `${parts.join(" and ")} with no business domain` });
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
return out;
|
|
1504
|
+
}
|
|
1505
|
+
function detectAnomalies(nodes, degree, thresholds = DEFAULT_ANOMALY_THRESHOLDS) {
|
|
1506
|
+
return [...detectOrphans(nodes, degree, thresholds), ...detectShadowIt(nodes, thresholds)].sort((a, b) => a.nodeId.localeCompare(b.nodeId) || a.kind.localeCompare(b.kind));
|
|
1507
|
+
}
|
|
1508
|
+
function newAnomalies(base, current) {
|
|
1509
|
+
const seen = new Set(base.map((a) => `${a.nodeId}|${a.kind}`));
|
|
1510
|
+
return current.filter((a) => !seen.has(`${a.nodeId}|${a.kind}`));
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// src/db.ts
|
|
1514
|
+
var DEFAULT_TENANT = "local";
|
|
1515
|
+
function normalizeTenant(raw) {
|
|
1516
|
+
if (raw == null) return DEFAULT_TENANT;
|
|
1517
|
+
const cleaned = sanitizeUntrusted(String(raw)).trim().slice(0, 128);
|
|
1518
|
+
return /^[\w.@:+-]{1,128}$/.test(cleaned) ? cleaned : DEFAULT_TENANT;
|
|
1519
|
+
}
|
|
1520
|
+
var DEFAULT_PORTS = /:(80|443)$/;
|
|
1521
|
+
function normalizeId(id) {
|
|
1522
|
+
return id.trim().toLowerCase().replace(/\s+/g, " ").replace(DEFAULT_PORTS, "");
|
|
1523
|
+
}
|
|
1524
|
+
var KEY_META_KEYS = ["host", "port", "path", "url"];
|
|
1525
|
+
function keyMetaOf(metadata) {
|
|
1526
|
+
const out = {};
|
|
1527
|
+
for (const k of KEY_META_KEYS) {
|
|
1528
|
+
if (metadata[k] != null) out[k] = metadata[k];
|
|
1529
|
+
}
|
|
1530
|
+
return out;
|
|
1531
|
+
}
|
|
1532
|
+
function contentHash(type, name, keyMeta) {
|
|
1533
|
+
const ordered = Object.keys(keyMeta).sort().map((k) => `${k}=${String(keyMeta[k])}`).join("|");
|
|
1534
|
+
const payload = `${type}\u241E${name.trim().toLowerCase()}\u241E${ordered}`;
|
|
1535
|
+
return createHash("sha256").update(payload).digest("hex").slice(0, 32);
|
|
1536
|
+
}
|
|
1537
|
+
function globalId(organization, id) {
|
|
1538
|
+
const org = organization && organization.trim() ? organization.trim() : "_";
|
|
1539
|
+
return `${org}:${normalizeId(id)}`;
|
|
1540
|
+
}
|
|
1541
|
+
function safeJsonParse(raw, fallback) {
|
|
1542
|
+
try {
|
|
1543
|
+
return JSON.parse(raw);
|
|
1544
|
+
} catch {
|
|
1545
|
+
return fallback;
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
var SessionRowSchema = z3.object({
|
|
1549
|
+
id: z3.string(),
|
|
1550
|
+
mode: z3.literal("discover"),
|
|
1551
|
+
started_at: z3.string(),
|
|
1552
|
+
completed_at: z3.string().nullable().optional(),
|
|
1553
|
+
config: z3.string(),
|
|
1554
|
+
name: z3.string().nullable().optional(),
|
|
1555
|
+
tenant: z3.string().default(DEFAULT_TENANT),
|
|
1556
|
+
hostname: z3.string().nullable().optional(),
|
|
1557
|
+
user: z3.string().nullable().optional(),
|
|
1558
|
+
machine_id: z3.string().nullable().optional(),
|
|
1559
|
+
organization: z3.string().nullable().optional(),
|
|
1560
|
+
last_scanned_at: z3.string().nullable().optional()
|
|
1561
|
+
});
|
|
1562
|
+
var NodeRowSchema = z3.object({
|
|
1563
|
+
id: z3.string(),
|
|
1564
|
+
session_id: z3.string(),
|
|
1565
|
+
type: z3.enum(NODE_TYPES),
|
|
1566
|
+
name: z3.string(),
|
|
1567
|
+
discovered_via: z3.string().nullable().optional(),
|
|
1568
|
+
discovered_at: z3.string(),
|
|
1569
|
+
path_id: z3.string().nullable().optional(),
|
|
1570
|
+
depth: z3.number().default(0),
|
|
1571
|
+
confidence: z3.number().default(0.5),
|
|
1572
|
+
metadata: z3.string().default("{}"),
|
|
1573
|
+
tags: z3.string().default("[]"),
|
|
1574
|
+
domain: z3.string().nullable().optional(),
|
|
1575
|
+
sub_domain: z3.string().nullable().optional(),
|
|
1576
|
+
quality_score: z3.number().nullable().optional(),
|
|
1577
|
+
owner: z3.string().nullable().optional(),
|
|
1578
|
+
cost: z3.string().nullable().optional(),
|
|
1579
|
+
global_id: z3.string().nullable().optional(),
|
|
1580
|
+
content_hash: z3.string().nullable().optional()
|
|
1581
|
+
});
|
|
1582
|
+
var ContributorRowSchema = z3.object({
|
|
1583
|
+
global_id: z3.string(),
|
|
1584
|
+
machine_id: z3.string(),
|
|
1585
|
+
hostname: z3.string(),
|
|
1586
|
+
user: z3.string(),
|
|
1587
|
+
organization: z3.string().nullable().optional(),
|
|
1588
|
+
at: z3.string(),
|
|
1589
|
+
confidence: z3.number().default(0.5)
|
|
1590
|
+
});
|
|
1591
|
+
var DriftRunRowSchema = z3.object({
|
|
1592
|
+
id: z3.string(),
|
|
1593
|
+
session_id: z3.string(),
|
|
1594
|
+
base_session_id: z3.string().nullable().optional(),
|
|
1595
|
+
ran_at: z3.string(),
|
|
1596
|
+
nodes_added: z3.number(),
|
|
1597
|
+
nodes_removed: z3.number(),
|
|
1598
|
+
nodes_changed: z3.number(),
|
|
1599
|
+
edges_added: z3.number(),
|
|
1600
|
+
edges_removed: z3.number(),
|
|
1601
|
+
delta: z3.string()
|
|
1602
|
+
});
|
|
1603
|
+
var EdgeRowSchema = z3.object({
|
|
1604
|
+
id: z3.string(),
|
|
1605
|
+
session_id: z3.string(),
|
|
1606
|
+
source_id: z3.string(),
|
|
1607
|
+
target_id: z3.string(),
|
|
1608
|
+
relationship: z3.enum(EDGE_RELATIONSHIPS),
|
|
1609
|
+
evidence: z3.string().nullable().optional(),
|
|
1610
|
+
confidence: z3.number().default(0.5),
|
|
1611
|
+
discovered_at: z3.string()
|
|
1612
|
+
});
|
|
1613
|
+
var PendingShareRowSchema = z3.object({
|
|
1614
|
+
content_hash: z3.string(),
|
|
1615
|
+
session_id: z3.string(),
|
|
1616
|
+
node_id: z3.string().nullable().optional(),
|
|
1617
|
+
kind: z3.enum(["node", "edge"]),
|
|
1618
|
+
payload: z3.string(),
|
|
1619
|
+
status: z3.enum(["pending", "approved", "shared", "withheld"]),
|
|
1620
|
+
decided_by: z3.enum(["user", "rule"]).nullable().optional(),
|
|
1621
|
+
created_at: z3.string(),
|
|
1622
|
+
decided_at: z3.string().nullable().optional(),
|
|
1623
|
+
shared_at: z3.string().nullable().optional()
|
|
1624
|
+
});
|
|
1625
|
+
var EventRowSchema = z3.object({
|
|
1626
|
+
id: z3.string(),
|
|
1627
|
+
session_id: z3.string(),
|
|
1628
|
+
task_id: z3.string().nullable().optional(),
|
|
1629
|
+
timestamp: z3.string(),
|
|
1630
|
+
event_type: z3.string(),
|
|
1631
|
+
process: z3.string(),
|
|
1632
|
+
pid: z3.number(),
|
|
1633
|
+
target: z3.string().nullable().optional(),
|
|
1634
|
+
target_type: z3.string().nullable().optional(),
|
|
1635
|
+
port: z3.number().nullable().optional(),
|
|
1636
|
+
duration_ms: z3.number().nullable().optional(),
|
|
1637
|
+
command: z3.string().nullable().optional(),
|
|
1638
|
+
result_bytes: z3.number().nullable().optional()
|
|
1639
|
+
});
|
|
1640
|
+
var TaskRowSchema = z3.object({
|
|
1641
|
+
id: z3.string(),
|
|
1642
|
+
session_id: z3.string(),
|
|
1643
|
+
description: z3.string().nullable().optional(),
|
|
1644
|
+
started_at: z3.string(),
|
|
1645
|
+
completed_at: z3.string().nullable().optional(),
|
|
1646
|
+
steps: z3.string().default("[]"),
|
|
1647
|
+
involved_services: z3.string().default("[]"),
|
|
1648
|
+
status: z3.enum(["active", "completed", "cancelled"])
|
|
1649
|
+
});
|
|
1650
|
+
var WorkflowRowSchema = z3.object({
|
|
1651
|
+
id: z3.string(),
|
|
1652
|
+
session_id: z3.string(),
|
|
1653
|
+
name: z3.string().nullable().optional(),
|
|
1654
|
+
pattern: z3.string(),
|
|
1655
|
+
task_ids: z3.string().default("[]"),
|
|
1656
|
+
occurrences: z3.number().default(1),
|
|
1657
|
+
first_seen: z3.string(),
|
|
1658
|
+
last_seen: z3.string(),
|
|
1659
|
+
avg_duration_ms: z3.number().nullable().optional(),
|
|
1660
|
+
involved_services: z3.string().default("[]")
|
|
1661
|
+
});
|
|
1662
|
+
var ConnectionRowSchema = z3.object({
|
|
1663
|
+
id: z3.string(),
|
|
1664
|
+
session_id: z3.string(),
|
|
1665
|
+
source_asset_id: z3.string(),
|
|
1666
|
+
target_asset_id: z3.string(),
|
|
1667
|
+
type: z3.string().nullable().optional(),
|
|
1668
|
+
created_at: z3.string()
|
|
1669
|
+
});
|
|
1670
|
+
function typeGroup(type) {
|
|
1671
|
+
for (const [group, types] of Object.entries(NODE_TYPE_GROUPS)) {
|
|
1672
|
+
if (types.includes(type)) return group;
|
|
1673
|
+
}
|
|
1674
|
+
return "other";
|
|
1675
|
+
}
|
|
1676
|
+
function deriveSessionName(summary, startedAt) {
|
|
1677
|
+
const date = startedAt.slice(0, 10);
|
|
1678
|
+
const count = summary.totals.nodes;
|
|
1679
|
+
if (count === 0) return `empty \xB7 0 nodes \xB7 ${date}`;
|
|
1680
|
+
const byGroup = /* @__PURE__ */ new Map();
|
|
1681
|
+
for (const [type, n] of Object.entries(summary.nodesByType)) {
|
|
1682
|
+
const g = typeGroup(type);
|
|
1683
|
+
byGroup.set(g, (byGroup.get(g) ?? 0) + n);
|
|
1684
|
+
}
|
|
1685
|
+
const topGroups = [...byGroup.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).slice(0, 2).map(([g]) => g);
|
|
1686
|
+
const noun = count === 1 ? "node" : "nodes";
|
|
1687
|
+
return `${topGroups.join("+")} \xB7 ${count} ${noun} \xB7 ${date}`;
|
|
1688
|
+
}
|
|
1689
|
+
var SCHEMA = `
|
|
1690
|
+
PRAGMA journal_mode = WAL;
|
|
1691
|
+
PRAGMA foreign_keys = ON;
|
|
1692
|
+
PRAGMA busy_timeout = 5000;
|
|
1693
|
+
|
|
1694
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
1695
|
+
id TEXT PRIMARY KEY,
|
|
1696
|
+
mode TEXT NOT NULL CHECK (mode IN ('discover')),
|
|
1697
|
+
started_at TEXT NOT NULL,
|
|
1698
|
+
completed_at TEXT,
|
|
1699
|
+
config TEXT NOT NULL DEFAULT '{}',
|
|
1700
|
+
name TEXT,
|
|
1701
|
+
tenant TEXT NOT NULL DEFAULT 'local',
|
|
1702
|
+
hostname TEXT,
|
|
1703
|
+
user TEXT,
|
|
1704
|
+
machine_id TEXT,
|
|
1705
|
+
organization TEXT,
|
|
1706
|
+
last_scanned_at TEXT
|
|
1707
|
+
);
|
|
1708
|
+
|
|
1709
|
+
CREATE TABLE IF NOT EXISTS nodes (
|
|
1710
|
+
id TEXT NOT NULL,
|
|
1711
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1712
|
+
type TEXT NOT NULL,
|
|
1713
|
+
name TEXT NOT NULL,
|
|
1714
|
+
discovered_via TEXT,
|
|
1715
|
+
discovered_at TEXT NOT NULL,
|
|
1716
|
+
path_id TEXT,
|
|
1717
|
+
depth INTEGER DEFAULT 0,
|
|
1718
|
+
confidence REAL DEFAULT 0.5,
|
|
1719
|
+
metadata TEXT NOT NULL DEFAULT '{}',
|
|
1720
|
+
tags TEXT NOT NULL DEFAULT '[]',
|
|
1721
|
+
domain TEXT,
|
|
1722
|
+
sub_domain TEXT,
|
|
1723
|
+
quality_score REAL,
|
|
1724
|
+
owner TEXT,
|
|
1725
|
+
cost TEXT,
|
|
1726
|
+
tenant TEXT NOT NULL DEFAULT 'local',
|
|
1727
|
+
global_id TEXT,
|
|
1728
|
+
content_hash TEXT,
|
|
1729
|
+
PRIMARY KEY (id, session_id)
|
|
1730
|
+
);
|
|
1731
|
+
|
|
1732
|
+
CREATE TABLE IF NOT EXISTS connections (
|
|
1733
|
+
id TEXT PRIMARY KEY,
|
|
1734
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1735
|
+
source_asset_id TEXT NOT NULL,
|
|
1736
|
+
target_asset_id TEXT NOT NULL,
|
|
1737
|
+
type TEXT,
|
|
1738
|
+
created_at TEXT NOT NULL,
|
|
1739
|
+
tenant TEXT NOT NULL DEFAULT 'local'
|
|
1740
|
+
);
|
|
1741
|
+
|
|
1742
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
1743
|
+
id TEXT PRIMARY KEY,
|
|
1744
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1745
|
+
source_id TEXT NOT NULL,
|
|
1746
|
+
target_id TEXT NOT NULL,
|
|
1747
|
+
relationship TEXT NOT NULL,
|
|
1748
|
+
evidence TEXT,
|
|
1749
|
+
confidence REAL DEFAULT 0.5,
|
|
1750
|
+
discovered_at TEXT NOT NULL,
|
|
1751
|
+
tenant TEXT NOT NULL DEFAULT 'local'
|
|
1752
|
+
);
|
|
1753
|
+
|
|
1754
|
+
CREATE TABLE IF NOT EXISTS activity_events (
|
|
1755
|
+
id TEXT PRIMARY KEY,
|
|
1756
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1757
|
+
task_id TEXT,
|
|
1758
|
+
timestamp TEXT NOT NULL,
|
|
1759
|
+
event_type TEXT NOT NULL,
|
|
1760
|
+
process TEXT NOT NULL,
|
|
1761
|
+
pid INTEGER NOT NULL,
|
|
1762
|
+
target TEXT,
|
|
1763
|
+
target_type TEXT,
|
|
1764
|
+
port INTEGER,
|
|
1765
|
+
duration_ms INTEGER,
|
|
1766
|
+
command TEXT,
|
|
1767
|
+
result_bytes INTEGER,
|
|
1768
|
+
tenant TEXT NOT NULL DEFAULT 'local'
|
|
1769
|
+
);
|
|
1770
|
+
|
|
1771
|
+
CREATE TABLE IF NOT EXISTS tasks (
|
|
1772
|
+
id TEXT PRIMARY KEY,
|
|
1773
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1774
|
+
description TEXT,
|
|
1775
|
+
started_at TEXT NOT NULL,
|
|
1776
|
+
completed_at TEXT,
|
|
1777
|
+
steps TEXT NOT NULL DEFAULT '[]',
|
|
1778
|
+
involved_services TEXT NOT NULL DEFAULT '[]',
|
|
1779
|
+
status TEXT DEFAULT 'active' CHECK (status IN ('active','completed','cancelled')),
|
|
1780
|
+
tenant TEXT NOT NULL DEFAULT 'local'
|
|
1781
|
+
);
|
|
1782
|
+
|
|
1783
|
+
CREATE TABLE IF NOT EXISTS workflows (
|
|
1784
|
+
id TEXT PRIMARY KEY,
|
|
1785
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1786
|
+
name TEXT,
|
|
1787
|
+
pattern TEXT NOT NULL,
|
|
1788
|
+
task_ids TEXT NOT NULL DEFAULT '[]',
|
|
1789
|
+
occurrences INTEGER DEFAULT 1,
|
|
1790
|
+
first_seen TEXT NOT NULL,
|
|
1791
|
+
last_seen TEXT NOT NULL,
|
|
1792
|
+
avg_duration_ms INTEGER,
|
|
1793
|
+
involved_services TEXT NOT NULL DEFAULT '[]',
|
|
1794
|
+
tenant TEXT NOT NULL DEFAULT 'local'
|
|
1795
|
+
);
|
|
1796
|
+
|
|
1797
|
+
CREATE TABLE IF NOT EXISTS node_approvals (
|
|
1798
|
+
pattern TEXT PRIMARY KEY,
|
|
1799
|
+
action TEXT NOT NULL CHECK (action IN ('save','ignore','auto')),
|
|
1800
|
+
created_at TEXT NOT NULL
|
|
1801
|
+
);
|
|
1802
|
+
|
|
1803
|
+
CREATE TABLE IF NOT EXISTS sharing_policy (
|
|
1804
|
+
pattern TEXT PRIMARY KEY, -- '*' row holds the global default
|
|
1805
|
+
level TEXT NOT NULL CHECK (level IN ('none','anonymized','full')),
|
|
1806
|
+
created_at TEXT NOT NULL
|
|
1807
|
+
);
|
|
1808
|
+
|
|
1809
|
+
CREATE TABLE IF NOT EXISTS pseudonym_reversal (
|
|
1810
|
+
token TEXT PRIMARY KEY,
|
|
1811
|
+
ciphertext TEXT NOT NULL, -- base64(iv \u2016 tag \u2016 AES-256-GCM(plaintext)) under reversalKey(orgKey)
|
|
1812
|
+
created_at TEXT NOT NULL
|
|
1813
|
+
);
|
|
1814
|
+
|
|
1815
|
+
CREATE TABLE IF NOT EXISTS pending_shares (
|
|
1816
|
+
content_hash TEXT PRIMARY KEY, -- sha256 over the policy-transformed payload (stable dedup key)
|
|
1817
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1818
|
+
node_id TEXT, -- original node id (NULL for edge rows)
|
|
1819
|
+
kind TEXT NOT NULL CHECK (kind IN ('node','edge')),
|
|
1820
|
+
payload TEXT NOT NULL, -- JSON of the already-anonymized projection (exactly what would leave)
|
|
1821
|
+
status TEXT NOT NULL CHECK (status IN ('pending','approved','shared','withheld')),
|
|
1822
|
+
decided_by TEXT CHECK (decided_by IN ('user','rule')),
|
|
1823
|
+
created_at TEXT NOT NULL,
|
|
1824
|
+
decided_at TEXT,
|
|
1825
|
+
shared_at TEXT
|
|
1826
|
+
);
|
|
1827
|
+
|
|
1828
|
+
CREATE TABLE IF NOT EXISTS node_contributors (
|
|
1829
|
+
global_id TEXT NOT NULL,
|
|
1830
|
+
machine_id TEXT NOT NULL,
|
|
1831
|
+
hostname TEXT NOT NULL,
|
|
1832
|
+
user TEXT NOT NULL,
|
|
1833
|
+
organization TEXT,
|
|
1834
|
+
at TEXT NOT NULL,
|
|
1835
|
+
confidence REAL NOT NULL DEFAULT 0.5,
|
|
1836
|
+
PRIMARY KEY (global_id, machine_id)
|
|
1837
|
+
);
|
|
1838
|
+
|
|
1839
|
+
CREATE TABLE IF NOT EXISTS drift_runs (
|
|
1840
|
+
id TEXT PRIMARY KEY,
|
|
1841
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1842
|
+
base_session_id TEXT,
|
|
1843
|
+
ran_at TEXT NOT NULL,
|
|
1844
|
+
nodes_added INTEGER NOT NULL,
|
|
1845
|
+
nodes_removed INTEGER NOT NULL,
|
|
1846
|
+
nodes_changed INTEGER NOT NULL,
|
|
1847
|
+
edges_added INTEGER NOT NULL,
|
|
1848
|
+
edges_removed INTEGER NOT NULL,
|
|
1849
|
+
delta TEXT NOT NULL
|
|
1850
|
+
);
|
|
1851
|
+
|
|
1852
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_session ON nodes(session_id);
|
|
1853
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(session_id, type);
|
|
1854
|
+
CREATE INDEX IF NOT EXISTS idx_edges_session ON edges(session_id);
|
|
1855
|
+
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(session_id, source_id);
|
|
1856
|
+
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(session_id, target_id);
|
|
1857
|
+
CREATE INDEX IF NOT EXISTS idx_events_session ON activity_events(session_id);
|
|
1858
|
+
CREATE INDEX IF NOT EXISTS idx_events_task ON activity_events(task_id);
|
|
1859
|
+
CREATE INDEX IF NOT EXISTS idx_tasks_session ON tasks(session_id);
|
|
1860
|
+
CREATE INDEX IF NOT EXISTS idx_connections_session ON connections(session_id);
|
|
1861
|
+
CREATE INDEX IF NOT EXISTS idx_connections_lookup ON connections(session_id, source_asset_id, target_asset_id);
|
|
1862
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions(tenant);
|
|
1863
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_tenant_session ON nodes(tenant, session_id);
|
|
1864
|
+
CREATE INDEX IF NOT EXISTS idx_edges_tenant_session ON edges(tenant, session_id);
|
|
1865
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_global ON nodes(global_id);
|
|
1866
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_content ON nodes(content_hash);
|
|
1867
|
+
CREATE INDEX IF NOT EXISTS idx_contrib_global ON node_contributors(global_id);
|
|
1868
|
+
CREATE INDEX IF NOT EXISTS idx_drift_runs_session ON drift_runs(session_id);
|
|
1869
|
+
CREATE INDEX IF NOT EXISTS idx_drift_runs_ran_at ON drift_runs(ran_at);
|
|
1870
|
+
CREATE INDEX IF NOT EXISTS idx_pending_status ON pending_shares(status);
|
|
1871
|
+
CREATE INDEX IF NOT EXISTS idx_pending_session ON pending_shares(session_id);
|
|
1872
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_tenant_global ON nodes(tenant, global_id);
|
|
1873
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_tenant_content ON nodes(tenant, content_hash);
|
|
1874
|
+
CREATE INDEX IF NOT EXISTS idx_contrib_org ON node_contributors(organization, global_id);
|
|
1875
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_owner ON nodes(session_id, owner);
|
|
1876
|
+
`;
|
|
1877
|
+
var CartographyDB = class {
|
|
1878
|
+
db;
|
|
1879
|
+
/** 3.6 anomaly settings; defaults apply when no `anomaly` config is supplied. */
|
|
1880
|
+
anomalyEnabled;
|
|
1881
|
+
anomalyThresholds;
|
|
1882
|
+
constructor(dbPath, opts) {
|
|
1883
|
+
mkdirSync(dirname(dbPath), { recursive: true });
|
|
1884
|
+
this.db = new Database(dbPath);
|
|
1885
|
+
this.db.pragma("journal_mode = WAL");
|
|
1886
|
+
this.db.pragma("foreign_keys = ON");
|
|
1887
|
+
this.db.pragma("busy_timeout = 5000");
|
|
1888
|
+
this.anomalyEnabled = opts?.anomaly?.enabled ?? true;
|
|
1889
|
+
this.anomalyThresholds = opts?.anomaly ?? DEFAULT_ANOMALY_THRESHOLDS;
|
|
1890
|
+
this.migrate();
|
|
1891
|
+
}
|
|
1892
|
+
migrate() {
|
|
1893
|
+
const version = this.db.pragma("user_version", { simple: true });
|
|
1894
|
+
if (version === 0) {
|
|
1895
|
+
this.db.exec(SCHEMA);
|
|
1896
|
+
this.db.pragma("user_version = 14");
|
|
1897
|
+
return;
|
|
1898
|
+
} else if (version === 1) {
|
|
1899
|
+
const cols = this.db.prepare("PRAGMA table_info(nodes)").all().map((c) => c.name);
|
|
1900
|
+
if (!cols.includes("domain")) this.db.exec("ALTER TABLE nodes ADD COLUMN domain TEXT");
|
|
1901
|
+
if (!cols.includes("sub_domain")) this.db.exec("ALTER TABLE nodes ADD COLUMN sub_domain TEXT");
|
|
1902
|
+
if (!cols.includes("quality_score")) this.db.exec("ALTER TABLE nodes ADD COLUMN quality_score REAL");
|
|
1903
|
+
this.db.exec(`
|
|
1904
|
+
CREATE TABLE IF NOT EXISTS connections (
|
|
1905
|
+
id TEXT PRIMARY KEY,
|
|
1906
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
1907
|
+
source_asset_id TEXT NOT NULL,
|
|
1908
|
+
target_asset_id TEXT NOT NULL,
|
|
1909
|
+
type TEXT,
|
|
1910
|
+
created_at TEXT NOT NULL
|
|
1911
|
+
);
|
|
1912
|
+
CREATE INDEX IF NOT EXISTS idx_connections_session ON connections(session_id);
|
|
1913
|
+
CREATE INDEX IF NOT EXISTS idx_connections_lookup ON connections(session_id, source_asset_id, target_asset_id);
|
|
1914
|
+
`);
|
|
1915
|
+
this.db.pragma("user_version = 3");
|
|
1916
|
+
}
|
|
1917
|
+
if (version === 2) {
|
|
1918
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_connections_lookup ON connections(session_id, source_asset_id, target_asset_id)");
|
|
1919
|
+
this.db.pragma("user_version = 3");
|
|
1920
|
+
}
|
|
1921
|
+
const current = this.db.pragma("user_version", { simple: true });
|
|
1922
|
+
if (current < 4) {
|
|
1923
|
+
this.db.exec(`
|
|
1924
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(session_id, type);
|
|
1925
|
+
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(session_id, source_id);
|
|
1926
|
+
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(session_id, target_id);
|
|
1927
|
+
`);
|
|
1928
|
+
this.db.pragma("user_version = 4");
|
|
1929
|
+
}
|
|
1930
|
+
const v4 = this.db.pragma("user_version", { simple: true });
|
|
1931
|
+
if (v4 < 5) {
|
|
1932
|
+
const cols = this.db.prepare("PRAGMA table_info(sessions)").all().map((c) => c.name);
|
|
1933
|
+
if (!cols.includes("name")) this.db.exec("ALTER TABLE sessions ADD COLUMN name TEXT");
|
|
1934
|
+
this.db.pragma("user_version = 5");
|
|
1935
|
+
}
|
|
1936
|
+
const v5 = this.db.pragma("user_version", { simple: true });
|
|
1937
|
+
if (v5 < 6) {
|
|
1938
|
+
const cols = this.db.prepare("PRAGMA table_info(activity_events)").all().map((c) => c.name);
|
|
1939
|
+
if (!cols.includes("command")) this.db.exec("ALTER TABLE activity_events ADD COLUMN command TEXT");
|
|
1940
|
+
if (!cols.includes("result_bytes")) this.db.exec("ALTER TABLE activity_events ADD COLUMN result_bytes INTEGER");
|
|
1941
|
+
this.db.pragma("user_version = 6");
|
|
1942
|
+
}
|
|
1943
|
+
const v6 = this.db.pragma("user_version", { simple: true });
|
|
1944
|
+
if (v6 < 7) {
|
|
1945
|
+
const tables = ["sessions", "nodes", "edges", "connections", "activity_events", "tasks", "workflows"];
|
|
1946
|
+
for (const t of tables) {
|
|
1947
|
+
const cols = this.db.prepare(`PRAGMA table_info(${t})`).all().map((c) => c.name);
|
|
1948
|
+
if (cols.length > 0 && !cols.includes("tenant")) {
|
|
1949
|
+
this.db.exec(`ALTER TABLE ${t} ADD COLUMN tenant TEXT NOT NULL DEFAULT '${DEFAULT_TENANT}'`);
|
|
1950
|
+
}
|
|
1951
|
+
}
|
|
1952
|
+
const hasTable = (name) => this.db.prepare(`PRAGMA table_info(${name})`).all().length > 0;
|
|
1953
|
+
if (hasTable("sessions")) this.db.exec("CREATE INDEX IF NOT EXISTS idx_sessions_tenant ON sessions(tenant)");
|
|
1954
|
+
if (hasTable("nodes")) this.db.exec("CREATE INDEX IF NOT EXISTS idx_nodes_tenant_session ON nodes(tenant, session_id)");
|
|
1955
|
+
if (hasTable("edges")) this.db.exec("CREATE INDEX IF NOT EXISTS idx_edges_tenant_session ON edges(tenant, session_id)");
|
|
1956
|
+
this.db.pragma("user_version = 7");
|
|
1957
|
+
}
|
|
1958
|
+
const v7 = this.db.pragma("user_version", { simple: true });
|
|
1959
|
+
if (v7 < 8) {
|
|
1960
|
+
const sc = this.db.prepare("PRAGMA table_info(sessions)").all().map((c) => c.name);
|
|
1961
|
+
if (sc.length > 0) {
|
|
1962
|
+
for (const col of ["hostname", "user", "machine_id", "organization"]) {
|
|
1963
|
+
if (!sc.includes(col)) this.db.exec(`ALTER TABLE sessions ADD COLUMN ${col} TEXT`);
|
|
1964
|
+
}
|
|
1965
|
+
}
|
|
1966
|
+
const nc = this.db.prepare("PRAGMA table_info(nodes)").all().map((c) => c.name);
|
|
1967
|
+
if (nc.length > 0) {
|
|
1968
|
+
if (!nc.includes("global_id")) this.db.exec("ALTER TABLE nodes ADD COLUMN global_id TEXT");
|
|
1969
|
+
if (!nc.includes("content_hash")) this.db.exec("ALTER TABLE nodes ADD COLUMN content_hash TEXT");
|
|
1970
|
+
}
|
|
1971
|
+
this.db.exec(`
|
|
1972
|
+
CREATE TABLE IF NOT EXISTS node_contributors (
|
|
1973
|
+
global_id TEXT NOT NULL,
|
|
1974
|
+
machine_id TEXT NOT NULL,
|
|
1975
|
+
hostname TEXT NOT NULL,
|
|
1976
|
+
user TEXT NOT NULL,
|
|
1977
|
+
organization TEXT,
|
|
1978
|
+
at TEXT NOT NULL,
|
|
1979
|
+
confidence REAL NOT NULL DEFAULT 0.5,
|
|
1980
|
+
PRIMARY KEY (global_id, machine_id)
|
|
1981
|
+
);
|
|
1982
|
+
CREATE INDEX IF NOT EXISTS idx_contrib_global ON node_contributors(global_id);
|
|
1983
|
+
`);
|
|
1984
|
+
if (nc.length > 0) {
|
|
1985
|
+
this.db.exec(`
|
|
1986
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_global ON nodes(global_id);
|
|
1987
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_content ON nodes(content_hash);
|
|
1988
|
+
`);
|
|
1989
|
+
}
|
|
1990
|
+
this.db.pragma("user_version = 8");
|
|
1991
|
+
}
|
|
1992
|
+
const v8 = this.db.pragma("user_version", { simple: true });
|
|
1993
|
+
if (v8 < 9) {
|
|
1994
|
+
const sc = this.db.prepare("PRAGMA table_info(sessions)").all().map((c) => c.name);
|
|
1995
|
+
if (sc.length > 0 && !sc.includes("last_scanned_at")) {
|
|
1996
|
+
this.db.exec("ALTER TABLE sessions ADD COLUMN last_scanned_at TEXT");
|
|
1997
|
+
}
|
|
1998
|
+
this.db.pragma("user_version = 9");
|
|
1999
|
+
}
|
|
2000
|
+
const v9 = this.db.pragma("user_version", { simple: true });
|
|
2001
|
+
if (v9 < 10) {
|
|
2002
|
+
this.db.exec(`
|
|
2003
|
+
CREATE TABLE IF NOT EXISTS drift_runs (
|
|
2004
|
+
id TEXT PRIMARY KEY,
|
|
2005
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
2006
|
+
base_session_id TEXT,
|
|
2007
|
+
ran_at TEXT NOT NULL,
|
|
2008
|
+
nodes_added INTEGER NOT NULL,
|
|
2009
|
+
nodes_removed INTEGER NOT NULL,
|
|
2010
|
+
nodes_changed INTEGER NOT NULL,
|
|
2011
|
+
edges_added INTEGER NOT NULL,
|
|
2012
|
+
edges_removed INTEGER NOT NULL,
|
|
2013
|
+
delta TEXT NOT NULL
|
|
2014
|
+
);
|
|
2015
|
+
CREATE INDEX IF NOT EXISTS idx_drift_runs_session ON drift_runs(session_id);
|
|
2016
|
+
CREATE INDEX IF NOT EXISTS idx_drift_runs_ran_at ON drift_runs(ran_at);
|
|
2017
|
+
`);
|
|
2018
|
+
this.db.pragma("user_version = 10");
|
|
2019
|
+
}
|
|
2020
|
+
const v10 = this.db.pragma("user_version", { simple: true });
|
|
2021
|
+
if (v10 < 11) {
|
|
2022
|
+
this.db.exec(`
|
|
2023
|
+
CREATE TABLE IF NOT EXISTS sharing_policy (
|
|
2024
|
+
pattern TEXT PRIMARY KEY,
|
|
2025
|
+
level TEXT NOT NULL CHECK (level IN ('none','anonymized','full')),
|
|
2026
|
+
created_at TEXT NOT NULL
|
|
2027
|
+
);
|
|
2028
|
+
CREATE TABLE IF NOT EXISTS pseudonym_reversal (
|
|
2029
|
+
token TEXT PRIMARY KEY,
|
|
2030
|
+
ciphertext TEXT NOT NULL,
|
|
2031
|
+
created_at TEXT NOT NULL
|
|
2032
|
+
);
|
|
2033
|
+
`);
|
|
2034
|
+
this.db.pragma("user_version = 11");
|
|
2035
|
+
}
|
|
2036
|
+
const v11 = this.db.pragma("user_version", { simple: true });
|
|
2037
|
+
if (v11 < 12) {
|
|
2038
|
+
this.db.exec(`
|
|
2039
|
+
CREATE TABLE IF NOT EXISTS pending_shares (
|
|
2040
|
+
content_hash TEXT PRIMARY KEY,
|
|
2041
|
+
session_id TEXT NOT NULL REFERENCES sessions(id),
|
|
2042
|
+
node_id TEXT,
|
|
2043
|
+
kind TEXT NOT NULL CHECK (kind IN ('node','edge')),
|
|
2044
|
+
payload TEXT NOT NULL,
|
|
2045
|
+
status TEXT NOT NULL CHECK (status IN ('pending','approved','shared','withheld')),
|
|
2046
|
+
decided_by TEXT CHECK (decided_by IN ('user','rule')),
|
|
2047
|
+
created_at TEXT NOT NULL,
|
|
2048
|
+
decided_at TEXT,
|
|
2049
|
+
shared_at TEXT
|
|
2050
|
+
);
|
|
2051
|
+
CREATE INDEX IF NOT EXISTS idx_pending_status ON pending_shares(status);
|
|
2052
|
+
CREATE INDEX IF NOT EXISTS idx_pending_session ON pending_shares(session_id);
|
|
2053
|
+
`);
|
|
2054
|
+
this.db.pragma("user_version = 12");
|
|
2055
|
+
}
|
|
2056
|
+
const v12 = this.db.pragma("user_version", { simple: true });
|
|
2057
|
+
if (v12 < 13) {
|
|
2058
|
+
const hasNodes = this.db.prepare("PRAGMA table_info(nodes)").all().length > 0;
|
|
2059
|
+
if (hasNodes) {
|
|
2060
|
+
this.db.exec(`
|
|
2061
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_tenant_global ON nodes(tenant, global_id);
|
|
2062
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_tenant_content ON nodes(tenant, content_hash);
|
|
2063
|
+
`);
|
|
2064
|
+
}
|
|
2065
|
+
const hasContrib = this.db.prepare("PRAGMA table_info(node_contributors)").all().length > 0;
|
|
2066
|
+
if (hasContrib) {
|
|
2067
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_contrib_org ON node_contributors(organization, global_id)");
|
|
2068
|
+
}
|
|
2069
|
+
this.db.pragma("user_version = 13");
|
|
2070
|
+
}
|
|
2071
|
+
const v13 = this.db.pragma("user_version", { simple: true });
|
|
2072
|
+
if (v13 < 14) {
|
|
2073
|
+
const hasNodes = this.db.prepare("PRAGMA table_info(nodes)").all();
|
|
2074
|
+
if (hasNodes.length > 0) {
|
|
2075
|
+
const cols = hasNodes.map((c) => c.name);
|
|
2076
|
+
if (!cols.includes("owner")) this.db.exec("ALTER TABLE nodes ADD COLUMN owner TEXT");
|
|
2077
|
+
if (!cols.includes("cost")) this.db.exec("ALTER TABLE nodes ADD COLUMN cost TEXT");
|
|
2078
|
+
this.db.exec("CREATE INDEX IF NOT EXISTS idx_nodes_owner ON nodes(session_id, owner)");
|
|
2079
|
+
}
|
|
2080
|
+
this.db.pragma("user_version = 14");
|
|
2081
|
+
}
|
|
2082
|
+
}
|
|
2083
|
+
close() {
|
|
2084
|
+
this.db.pragma("optimize");
|
|
2085
|
+
this.db.close();
|
|
2086
|
+
}
|
|
2087
|
+
/**
|
|
2088
|
+
* Advanced: the underlying better-sqlite3 connection. Used by the optional
|
|
2089
|
+
* semantic-search layer to load the `sqlite-vec` extension and manage its
|
|
2090
|
+
* virtual table. Prefer the typed methods above for everything else.
|
|
2091
|
+
*/
|
|
2092
|
+
rawConnection() {
|
|
2093
|
+
return this.db;
|
|
2094
|
+
}
|
|
2095
|
+
// ── Sessions ────────────────────────────
|
|
2096
|
+
/**
|
|
2097
|
+
* Create a discovery session, stamping its tenant from (in precedence order)
|
|
2098
|
+
* the explicit `tenantId` arg → `config.organization` → DEFAULT_TENANT. The
|
|
2099
|
+
* tenant is normalized once here; every child row written under this session
|
|
2100
|
+
* inherits it via {@link tenantOf}.
|
|
2101
|
+
*/
|
|
2102
|
+
createSession(mode, config, tenantId) {
|
|
2103
|
+
const id = crypto.randomUUID();
|
|
2104
|
+
const tenant = normalizeTenant(tenantId ?? config.organization);
|
|
2105
|
+
this.db.prepare(
|
|
2106
|
+
`INSERT INTO sessions (id, mode, started_at, config, tenant, hostname, user, machine_id, organization)
|
|
2107
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2108
|
+
).run(
|
|
2109
|
+
id,
|
|
2110
|
+
mode,
|
|
2111
|
+
(/* @__PURE__ */ new Date()).toISOString(),
|
|
2112
|
+
JSON.stringify(config),
|
|
2113
|
+
tenant,
|
|
2114
|
+
hostname(),
|
|
2115
|
+
osUser(),
|
|
2116
|
+
machineId(),
|
|
2117
|
+
config.organization ?? null
|
|
2118
|
+
);
|
|
2119
|
+
return id;
|
|
2120
|
+
}
|
|
2121
|
+
/** The tenant that owns a session (DEFAULT_TENANT if the session is unknown). */
|
|
2122
|
+
tenantOf(sessionId) {
|
|
2123
|
+
const r = this.db.prepare("SELECT tenant FROM sessions WHERE id = ?").get(sessionId);
|
|
2124
|
+
return r?.tenant ?? DEFAULT_TENANT;
|
|
2125
|
+
}
|
|
2126
|
+
endSession(id) {
|
|
2127
|
+
this.db.prepare("UPDATE sessions SET completed_at = ? WHERE id = ?").run((/* @__PURE__ */ new Date()).toISOString(), id);
|
|
2128
|
+
}
|
|
2129
|
+
getSession(id) {
|
|
2130
|
+
const row = this.db.prepare("SELECT * FROM sessions WHERE id = ?").get(id);
|
|
2131
|
+
return row ? this.mapSession(row) : void 0;
|
|
2132
|
+
}
|
|
2133
|
+
/**
|
|
2134
|
+
* Resolve the newest session, optionally constrained to a `mode` and/or
|
|
2135
|
+
* `tenantId`. Omitting `tenantId` preserves the original (unscoped) behavior;
|
|
2136
|
+
* passing one returns only that tenant's sessions, which is how the tenant
|
|
2137
|
+
* boundary is enforced at session resolution.
|
|
2138
|
+
*/
|
|
2139
|
+
getLatestSession(mode, tenantId) {
|
|
2140
|
+
const clauses = [];
|
|
2141
|
+
const params = [];
|
|
2142
|
+
if (mode) {
|
|
2143
|
+
clauses.push("mode = ?");
|
|
2144
|
+
params.push(mode);
|
|
2145
|
+
}
|
|
2146
|
+
if (tenantId !== void 0) {
|
|
2147
|
+
clauses.push("tenant = ?");
|
|
2148
|
+
params.push(tenantId);
|
|
2149
|
+
}
|
|
2150
|
+
const where = clauses.length ? `WHERE ${clauses.join(" AND ")} ` : "";
|
|
2151
|
+
const row = this.db.prepare(`SELECT * FROM sessions ${where}ORDER BY rowid DESC LIMIT 1`).get(...params);
|
|
2152
|
+
return row ? this.mapSession(row) : void 0;
|
|
2153
|
+
}
|
|
2154
|
+
getSessions(tenantId) {
|
|
2155
|
+
const rows = tenantId !== void 0 ? this.db.prepare("SELECT * FROM sessions WHERE tenant = ? ORDER BY rowid DESC").all(tenantId) : this.db.prepare("SELECT * FROM sessions ORDER BY rowid DESC").all();
|
|
2156
|
+
return rows.map((r) => this.mapSession(r));
|
|
2157
|
+
}
|
|
2158
|
+
mapSession(r) {
|
|
2159
|
+
const v = SessionRowSchema.parse(r);
|
|
2160
|
+
return {
|
|
2161
|
+
id: v.id,
|
|
2162
|
+
mode: v.mode,
|
|
2163
|
+
startedAt: v.started_at,
|
|
2164
|
+
completedAt: v.completed_at ?? void 0,
|
|
2165
|
+
config: v.config,
|
|
2166
|
+
name: v.name ?? void 0,
|
|
2167
|
+
tenant: v.tenant,
|
|
2168
|
+
hostname: v.hostname ?? void 0,
|
|
2169
|
+
user: v.user ?? void 0,
|
|
2170
|
+
machineId: v.machine_id ?? void 0,
|
|
2171
|
+
organization: v.organization ?? void 0,
|
|
2172
|
+
lastScannedAt: v.last_scanned_at ?? void 0
|
|
2173
|
+
};
|
|
2174
|
+
}
|
|
2175
|
+
/** Record that a session was (re-)scanned now (ISO 8601 UTC). */
|
|
2176
|
+
touchSession(id) {
|
|
2177
|
+
this.db.prepare("UPDATE sessions SET last_scanned_at = ? WHERE id = ?").run((/* @__PURE__ */ new Date()).toISOString(), id);
|
|
2178
|
+
}
|
|
2179
|
+
/** Set (or clear) a session's human-friendly name. */
|
|
2180
|
+
setSessionName(id, name) {
|
|
2181
|
+
this.db.prepare("UPDATE sessions SET name = ? WHERE id = ?").run(name, id);
|
|
2182
|
+
}
|
|
2183
|
+
/**
|
|
2184
|
+
* Compare two discovery sessions and report drift (added/removed/changed nodes
|
|
2185
|
+
* and added/removed edges). Read-only; no schema changes. Throws if either
|
|
2186
|
+
* session id does not exist.
|
|
2187
|
+
*/
|
|
2188
|
+
diffSessions(baseId, currentId) {
|
|
2189
|
+
const base = this.getSession(baseId);
|
|
2190
|
+
if (!base) throw new Error(`Base session not found: ${baseId}`);
|
|
2191
|
+
const current = this.getSession(currentId);
|
|
2192
|
+
if (!current) throw new Error(`Current session not found: ${currentId}`);
|
|
2193
|
+
const baseData = { nodes: this.getNodes(baseId), edges: this.getEdges(baseId) };
|
|
2194
|
+
const curData = { nodes: this.getNodes(currentId), edges: this.getEdges(currentId) };
|
|
2195
|
+
const delta = diffTopology(baseData, curData);
|
|
2196
|
+
const baseAnoms = this.getGraphSummary(baseId).anomalies;
|
|
2197
|
+
const curAnoms = this.getGraphSummary(currentId).anomalies;
|
|
2198
|
+
return {
|
|
2199
|
+
base: { sessionId: baseId, startedAt: base.startedAt, nodeCount: baseData.nodes.length, edgeCount: baseData.edges.length },
|
|
2200
|
+
current: { sessionId: currentId, startedAt: current.startedAt, nodeCount: curData.nodes.length, edgeCount: curData.edges.length },
|
|
2201
|
+
...delta,
|
|
2202
|
+
anomalies: { base: baseAnoms, current: curAnoms, added: newAnomalies(baseAnoms, curAnoms) }
|
|
2203
|
+
};
|
|
2204
|
+
}
|
|
2205
|
+
/**
|
|
2206
|
+
* Score a session against a compliance ruleset (3.4) — a thin wrapper over the
|
|
2207
|
+
* pure `scoreTopology` engine (mirrors `diffSessions`). Throws only when the
|
|
2208
|
+
* session id is unknown; the engine never throws on data shape.
|
|
2209
|
+
*/
|
|
2210
|
+
scoreSession(sessionId, ruleset, opts) {
|
|
2211
|
+
if (!this.getSession(sessionId)) throw new Error(`Session not found: ${sessionId}`);
|
|
2212
|
+
return scoreTopology({ nodes: this.getNodes(sessionId), edges: this.getEdges(sessionId) }, ruleset, opts);
|
|
2213
|
+
}
|
|
2214
|
+
// ── Scheduled discovery: prior-session selection + drift runs (2.5) ──────────
|
|
2215
|
+
/**
|
|
2216
|
+
* The most recent session of `mode` (and optionally `tenantId`) other than
|
|
2217
|
+
* `excludeId`. Used by scheduled discovery to pick an unambiguous diff base
|
|
2218
|
+
* (newest-first by `rowid`), avoiding the `getLatestSession` just-created-session
|
|
2219
|
+
* ambiguity. Returns `undefined` when no such prior session exists.
|
|
2220
|
+
*/
|
|
2221
|
+
getPreviousSession(excludeId, mode, tenantId) {
|
|
2222
|
+
const clauses = ["id != ?"];
|
|
2223
|
+
const params = [excludeId];
|
|
2224
|
+
if (mode) {
|
|
2225
|
+
clauses.push("mode = ?");
|
|
2226
|
+
params.push(mode);
|
|
2227
|
+
}
|
|
2228
|
+
if (tenantId !== void 0) {
|
|
2229
|
+
clauses.push("tenant = ?");
|
|
2230
|
+
params.push(tenantId);
|
|
2231
|
+
}
|
|
2232
|
+
const row = this.db.prepare(`SELECT * FROM sessions WHERE ${clauses.join(" AND ")} ORDER BY rowid DESC LIMIT 1`).get(...params);
|
|
2233
|
+
return row ? this.mapSession(row) : void 0;
|
|
2234
|
+
}
|
|
2235
|
+
/**
|
|
2236
|
+
* Persist one scheduled-discovery drift run: the summary counts plus the full
|
|
2237
|
+
* {@link TopologyDelta} (for audit/replay). Returns the generated row id.
|
|
2238
|
+
* `ranAt` is stamped now (ISO 8601 UTC).
|
|
2239
|
+
*/
|
|
2240
|
+
recordDriftRun(sessionId, baseSessionId, delta) {
|
|
2241
|
+
const id = crypto.randomUUID();
|
|
2242
|
+
const s = delta.summary;
|
|
2243
|
+
this.db.prepare(`
|
|
2244
|
+
INSERT INTO drift_runs
|
|
2245
|
+
(id, session_id, base_session_id, ran_at,
|
|
2246
|
+
nodes_added, nodes_removed, nodes_changed, edges_added, edges_removed, delta)
|
|
2247
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2248
|
+
`).run(
|
|
2249
|
+
id,
|
|
2250
|
+
sessionId,
|
|
2251
|
+
baseSessionId ?? null,
|
|
2252
|
+
(/* @__PURE__ */ new Date()).toISOString(),
|
|
2253
|
+
s.nodesAdded,
|
|
2254
|
+
s.nodesRemoved,
|
|
2255
|
+
s.nodesChanged,
|
|
2256
|
+
s.edgesAdded,
|
|
2257
|
+
s.edgesRemoved,
|
|
2258
|
+
JSON.stringify(delta)
|
|
2259
|
+
);
|
|
2260
|
+
return id;
|
|
2261
|
+
}
|
|
2262
|
+
/** Recent drift runs, newest-first (`LIMIT`, default 50). */
|
|
2263
|
+
getDriftRuns(limit = 50) {
|
|
2264
|
+
const rows = this.db.prepare("SELECT * FROM drift_runs ORDER BY rowid DESC LIMIT ?").all(Math.max(1, Math.floor(limit)));
|
|
2265
|
+
return rows.map((r) => this.mapDriftRun(r));
|
|
2266
|
+
}
|
|
2267
|
+
/** The most recent drift run, or `undefined` when none has been recorded. */
|
|
2268
|
+
getLatestDriftRun() {
|
|
2269
|
+
const row = this.db.prepare("SELECT * FROM drift_runs ORDER BY rowid DESC LIMIT 1").get();
|
|
2270
|
+
return row ? this.mapDriftRun(row) : void 0;
|
|
2271
|
+
}
|
|
2272
|
+
mapDriftRun(r) {
|
|
2273
|
+
const v = DriftRunRowSchema.parse(r);
|
|
2274
|
+
const emptyDelta = {
|
|
2275
|
+
nodes: { added: [], removed: [], changed: [], unchanged: 0 },
|
|
2276
|
+
edges: { added: [], removed: [], unchanged: 0 },
|
|
2277
|
+
summary: { nodesAdded: 0, nodesRemoved: 0, nodesChanged: 0, edgesAdded: 0, edgesRemoved: 0 }
|
|
2278
|
+
};
|
|
2279
|
+
return {
|
|
2280
|
+
id: v.id,
|
|
2281
|
+
sessionId: v.session_id,
|
|
2282
|
+
baseSessionId: v.base_session_id ?? void 0,
|
|
2283
|
+
ranAt: v.ran_at,
|
|
2284
|
+
summary: {
|
|
2285
|
+
nodesAdded: v.nodes_added,
|
|
2286
|
+
nodesRemoved: v.nodes_removed,
|
|
2287
|
+
nodesChanged: v.nodes_changed,
|
|
2288
|
+
edgesAdded: v.edges_added,
|
|
2289
|
+
edgesRemoved: v.edges_removed
|
|
2290
|
+
},
|
|
2291
|
+
delta: safeJsonParse(v.delta, emptyDelta)
|
|
2292
|
+
};
|
|
2293
|
+
}
|
|
2294
|
+
// ── Nodes ───────────────────────────────
|
|
2295
|
+
upsertNode(sessionId, node, depth = 0, attribution) {
|
|
2296
|
+
const tenant = this.tenantOf(sessionId);
|
|
2297
|
+
const ch = contentHash(node.type, node.name, keyMetaOf(node.metadata ?? {}));
|
|
2298
|
+
const drift = this.db.prepare(
|
|
2299
|
+
`SELECT global_id FROM nodes
|
|
2300
|
+
WHERE content_hash = ? AND COALESCE(global_id, '') LIKE ?
|
|
2301
|
+
LIMIT 1`
|
|
2302
|
+
).get(ch, `${globalId(tenant, "")}%`);
|
|
2303
|
+
const gid = drift?.global_id ?? globalId(tenant, node.id);
|
|
2304
|
+
this.db.prepare(`
|
|
2305
|
+
INSERT OR REPLACE INTO nodes
|
|
2306
|
+
(id, session_id, type, name, discovered_via, discovered_at, depth, confidence, metadata, tags,
|
|
2307
|
+
domain, sub_domain, quality_score, owner, cost, tenant, global_id, content_hash)
|
|
2308
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2309
|
+
`).run(
|
|
2310
|
+
node.id,
|
|
2311
|
+
sessionId,
|
|
2312
|
+
node.type,
|
|
2313
|
+
sanitizeUntrusted(node.name),
|
|
2314
|
+
node.discoveredVia,
|
|
2315
|
+
(/* @__PURE__ */ new Date()).toISOString(),
|
|
2316
|
+
depth,
|
|
2317
|
+
node.confidence,
|
|
2318
|
+
JSON.stringify(sanitizeValue(node.metadata ?? {})),
|
|
2319
|
+
JSON.stringify((node.tags ?? []).map(sanitizeUntrusted)),
|
|
2320
|
+
node.domain != null ? sanitizeUntrusted(node.domain) : null,
|
|
2321
|
+
node.subDomain != null ? sanitizeUntrusted(node.subDomain) : null,
|
|
2322
|
+
node.qualityScore ?? null,
|
|
2323
|
+
node.owner != null ? sanitizeUntrusted(node.owner) : null,
|
|
2324
|
+
node.cost ? JSON.stringify(CostEntrySchema.parse(node.cost)) : null,
|
|
2325
|
+
tenant,
|
|
2326
|
+
gid,
|
|
2327
|
+
ch
|
|
2328
|
+
);
|
|
2329
|
+
if (attribution) {
|
|
2330
|
+
this.db.prepare(`
|
|
2331
|
+
INSERT INTO node_contributors (global_id, machine_id, hostname, user, organization, at, confidence)
|
|
2332
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
2333
|
+
ON CONFLICT(global_id, machine_id) DO UPDATE SET
|
|
2334
|
+
confidence = MAX(confidence, excluded.confidence),
|
|
2335
|
+
at = excluded.at,
|
|
2336
|
+
hostname = excluded.hostname,
|
|
2337
|
+
user = excluded.user,
|
|
2338
|
+
organization = excluded.organization
|
|
2339
|
+
`).run(
|
|
2340
|
+
gid,
|
|
2341
|
+
attribution.machineId,
|
|
2342
|
+
sanitizeUntrusted(attribution.hostname),
|
|
2343
|
+
sanitizeUntrusted(attribution.user),
|
|
2344
|
+
attribution.organization != null ? sanitizeUntrusted(attribution.organization) : null,
|
|
2345
|
+
attribution.at,
|
|
2346
|
+
attribution.confidence
|
|
2347
|
+
);
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
/** All contributors that observed a logical node, ordered by observation time. */
|
|
2351
|
+
getContributors(globalId2) {
|
|
2352
|
+
const rows = this.db.prepare(
|
|
2353
|
+
"SELECT * FROM node_contributors WHERE global_id = ? ORDER BY at"
|
|
2354
|
+
).all(globalId2);
|
|
2355
|
+
return rows.map((r) => {
|
|
2356
|
+
const v = ContributorRowSchema.parse(r);
|
|
2357
|
+
return {
|
|
2358
|
+
machineId: v.machine_id,
|
|
2359
|
+
hostname: v.hostname,
|
|
2360
|
+
user: v.user,
|
|
2361
|
+
organization: v.organization ?? void 0,
|
|
2362
|
+
at: v.at,
|
|
2363
|
+
confidence: v.confidence
|
|
2364
|
+
};
|
|
2365
|
+
});
|
|
2366
|
+
}
|
|
2367
|
+
getNodes(sessionId, opts) {
|
|
2368
|
+
let sql = "SELECT * FROM nodes WHERE session_id = ?";
|
|
2369
|
+
if (opts?.limit) {
|
|
2370
|
+
sql += ` LIMIT ${opts.limit}`;
|
|
2371
|
+
if (opts.offset) sql += ` OFFSET ${opts.offset}`;
|
|
2372
|
+
}
|
|
2373
|
+
const rows = this.db.prepare(sql).all(sessionId);
|
|
2374
|
+
return rows.map((r) => this.mapNode(r));
|
|
2375
|
+
}
|
|
2376
|
+
getNodeCount(sessionId) {
|
|
2377
|
+
const row = this.db.prepare("SELECT COUNT(*) as cnt FROM nodes WHERE session_id = ?").get(sessionId);
|
|
2378
|
+
return row.cnt;
|
|
2379
|
+
}
|
|
2380
|
+
mapNode(r) {
|
|
2381
|
+
const v = NodeRowSchema.parse(r);
|
|
2382
|
+
return {
|
|
2383
|
+
id: v.id,
|
|
2384
|
+
sessionId: v.session_id,
|
|
2385
|
+
type: v.type,
|
|
2386
|
+
name: v.name,
|
|
2387
|
+
discoveredVia: v.discovered_via ?? "",
|
|
2388
|
+
discoveredAt: v.discovered_at,
|
|
2389
|
+
depth: v.depth,
|
|
2390
|
+
confidence: v.confidence,
|
|
2391
|
+
metadata: safeJsonParse(v.metadata, {}),
|
|
2392
|
+
tags: safeJsonParse(v.tags, []),
|
|
2393
|
+
pathId: v.path_id ?? void 0,
|
|
2394
|
+
domain: v.domain ?? void 0,
|
|
2395
|
+
subDomain: v.sub_domain ?? void 0,
|
|
2396
|
+
qualityScore: v.quality_score ?? void 0,
|
|
2397
|
+
owner: v.owner ?? void 0,
|
|
2398
|
+
cost: v.cost ? safeJsonParse(v.cost, void 0) : void 0,
|
|
2399
|
+
globalId: v.global_id ?? void 0,
|
|
2400
|
+
contentHash: v.content_hash ?? void 0
|
|
2401
|
+
};
|
|
2402
|
+
}
|
|
2403
|
+
/**
|
|
2404
|
+
* Update only the cost/owner of an existing node, without touching any other
|
|
2405
|
+
* field (unlike upsertNode's INSERT OR REPLACE) — the idempotent enrichment
|
|
2406
|
+
* primitive (3.3). `undefined` leaves a field unchanged; `null` clears it.
|
|
2407
|
+
* No-op (returns false) if the node is absent. Cost is re-validated before write.
|
|
2408
|
+
*/
|
|
2409
|
+
enrichNodeAttribution(sessionId, nodeId, attr) {
|
|
2410
|
+
const sets = [];
|
|
2411
|
+
const vals = [];
|
|
2412
|
+
if (attr.owner !== void 0) {
|
|
2413
|
+
sets.push("owner = ?");
|
|
2414
|
+
vals.push(attr.owner == null ? null : sanitizeUntrusted(attr.owner));
|
|
2415
|
+
}
|
|
2416
|
+
if (attr.cost !== void 0) {
|
|
2417
|
+
sets.push("cost = ?");
|
|
2418
|
+
vals.push(attr.cost == null ? null : JSON.stringify(CostEntrySchema.parse(attr.cost)));
|
|
2419
|
+
}
|
|
2420
|
+
if (sets.length === 0) return false;
|
|
2421
|
+
const info = this.db.prepare(
|
|
2422
|
+
`UPDATE nodes SET ${sets.join(", ")} WHERE session_id = ? AND id = ?`
|
|
2423
|
+
).run(...vals, sessionId, nodeId);
|
|
2424
|
+
return info.changes > 0;
|
|
2425
|
+
}
|
|
2426
|
+
deleteNode(sessionId, nodeId) {
|
|
2427
|
+
this.db.prepare("DELETE FROM nodes WHERE session_id = ? AND id = ?").run(sessionId, nodeId);
|
|
2428
|
+
this.db.prepare(
|
|
2429
|
+
"DELETE FROM edges WHERE session_id = ? AND (source_id = ? OR target_id = ?)"
|
|
2430
|
+
).run(sessionId, nodeId, nodeId);
|
|
2431
|
+
}
|
|
2432
|
+
// ── Edges ───────────────────────────────
|
|
2433
|
+
insertEdge(sessionId, edge) {
|
|
2434
|
+
const id = crypto.randomUUID();
|
|
2435
|
+
const tenant = this.tenantOf(sessionId);
|
|
2436
|
+
this.db.prepare(`
|
|
2437
|
+
INSERT OR IGNORE INTO edges
|
|
2438
|
+
(id, session_id, source_id, target_id, relationship, evidence, confidence, discovered_at, tenant)
|
|
2439
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2440
|
+
`).run(
|
|
2441
|
+
id,
|
|
2442
|
+
sessionId,
|
|
2443
|
+
edge.sourceId,
|
|
2444
|
+
edge.targetId,
|
|
2445
|
+
edge.relationship,
|
|
2446
|
+
sanitizeUntrusted(edge.evidence),
|
|
2447
|
+
edge.confidence,
|
|
2448
|
+
(/* @__PURE__ */ new Date()).toISOString(),
|
|
2449
|
+
tenant
|
|
2450
|
+
);
|
|
2451
|
+
}
|
|
2452
|
+
/**
|
|
2453
|
+
* Delete every edge matching the logical key (source, target, relationship)
|
|
2454
|
+
* within a session. `insertEdge` writes a random PK, so logical identity is the
|
|
2455
|
+
* only way to prune an edge that disappeared while both endpoints survived.
|
|
2456
|
+
*/
|
|
2457
|
+
deleteEdgeByKey(sessionId, sourceId, targetId, relationship) {
|
|
2458
|
+
this.db.prepare(
|
|
2459
|
+
"DELETE FROM edges WHERE session_id = ? AND source_id = ? AND target_id = ? AND relationship = ?"
|
|
2460
|
+
).run(sessionId, sourceId, targetId, relationship);
|
|
2461
|
+
}
|
|
2462
|
+
/**
|
|
2463
|
+
* Apply a precomputed {@link TopologyDelta} to one session in a single
|
|
2464
|
+
* transaction (2.1 incremental discovery): prune removed nodes (cascading their
|
|
2465
|
+
* edges via {@link deleteNode}), upsert added/changed nodes, delete removed edges
|
|
2466
|
+
* by logical key, insert added edges, and stamp `last_scanned_at`. Unchanged rows
|
|
2467
|
+
* are left untouched (stable `discovered_at`).
|
|
2468
|
+
*
|
|
2469
|
+
* `attribution` (2.9) is forwarded to every added/changed node's upsert so a
|
|
2470
|
+
* rescan keeps/appends the running machine's contributor instead of leaving it
|
|
2471
|
+
* out. Unchanged nodes are not re-upserted, so their existing contributors
|
|
2472
|
+
* survive the rescan. `NodeRow extends DiscoveryNode`, so the delta rows pass
|
|
2473
|
+
* straight into `upsertNode` with no mapper.
|
|
2474
|
+
*/
|
|
2475
|
+
applyTopologyDelta(sessionId, delta, attribution) {
|
|
2476
|
+
const apply = this.db.transaction(() => {
|
|
2477
|
+
for (const n of delta.nodes.removed) this.deleteNode(sessionId, n.id);
|
|
2478
|
+
for (const n of delta.nodes.added) {
|
|
2479
|
+
this.upsertNode(sessionId, n, n.depth, attribution ? { ...attribution, confidence: n.confidence } : void 0);
|
|
2480
|
+
}
|
|
2481
|
+
for (const c of delta.nodes.changed) {
|
|
2482
|
+
this.upsertNode(sessionId, c.after, c.after.depth, attribution ? { ...attribution, confidence: c.after.confidence } : void 0);
|
|
2483
|
+
}
|
|
2484
|
+
for (const e of delta.edges.removed) this.deleteEdgeByKey(sessionId, e.sourceId, e.targetId, e.relationship);
|
|
2485
|
+
for (const e of delta.edges.added) this.insertEdge(sessionId, e);
|
|
2486
|
+
this.touchSession(sessionId);
|
|
2487
|
+
});
|
|
2488
|
+
apply();
|
|
2489
|
+
}
|
|
2490
|
+
getEdges(sessionId, opts) {
|
|
2491
|
+
let sql = "SELECT * FROM edges WHERE session_id = ?";
|
|
2492
|
+
if (opts?.limit) {
|
|
2493
|
+
sql += ` LIMIT ${opts.limit}`;
|
|
2494
|
+
if (opts.offset) sql += ` OFFSET ${opts.offset}`;
|
|
2495
|
+
}
|
|
2496
|
+
const rows = this.db.prepare(sql).all(sessionId);
|
|
2497
|
+
return rows.map((r) => {
|
|
2498
|
+
const v = EdgeRowSchema.parse(r);
|
|
2499
|
+
return {
|
|
2500
|
+
id: v.id,
|
|
2501
|
+
sessionId: v.session_id,
|
|
2502
|
+
sourceId: v.source_id,
|
|
2503
|
+
targetId: v.target_id,
|
|
2504
|
+
relationship: v.relationship,
|
|
2505
|
+
evidence: v.evidence ?? "",
|
|
2506
|
+
confidence: v.confidence,
|
|
2507
|
+
discoveredAt: v.discovered_at
|
|
2508
|
+
};
|
|
2509
|
+
});
|
|
2510
|
+
}
|
|
2511
|
+
// ── Events ──────────────────────────────
|
|
2512
|
+
insertEvent(sessionId, event, taskId) {
|
|
2513
|
+
const id = crypto.randomUUID();
|
|
2514
|
+
const tenant = this.tenantOf(sessionId);
|
|
2515
|
+
this.db.prepare(`
|
|
2516
|
+
INSERT INTO activity_events
|
|
2517
|
+
(id, session_id, task_id, timestamp, event_type, process, pid, target, target_type, port, command, result_bytes, tenant)
|
|
2518
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2519
|
+
`).run(
|
|
2520
|
+
id,
|
|
2521
|
+
sessionId,
|
|
2522
|
+
taskId ?? null,
|
|
2523
|
+
(/* @__PURE__ */ new Date()).toISOString(),
|
|
2524
|
+
event.eventType,
|
|
2525
|
+
event.process,
|
|
2526
|
+
event.pid,
|
|
2527
|
+
event.target ?? null,
|
|
2528
|
+
event.targetType ?? null,
|
|
2529
|
+
event.port ?? null,
|
|
2530
|
+
event.command ?? null,
|
|
2531
|
+
event.resultBytes ?? null,
|
|
2532
|
+
tenant
|
|
2533
|
+
);
|
|
2534
|
+
}
|
|
2535
|
+
getEvents(sessionId, since) {
|
|
2536
|
+
const rows = since ? this.db.prepare("SELECT * FROM activity_events WHERE session_id = ? AND timestamp > ? ORDER BY timestamp").all(sessionId, since) : this.db.prepare("SELECT * FROM activity_events WHERE session_id = ? ORDER BY timestamp").all(sessionId);
|
|
2537
|
+
return rows.map((r) => {
|
|
2538
|
+
const v = EventRowSchema.parse(r);
|
|
2539
|
+
return {
|
|
2540
|
+
id: v.id,
|
|
2541
|
+
sessionId: v.session_id,
|
|
2542
|
+
taskId: v.task_id ?? void 0,
|
|
2543
|
+
timestamp: v.timestamp,
|
|
2544
|
+
eventType: v.event_type,
|
|
2545
|
+
process: v.process,
|
|
2546
|
+
pid: v.pid,
|
|
2547
|
+
target: v.target ?? void 0,
|
|
2548
|
+
targetType: v.target_type ?? void 0,
|
|
2549
|
+
port: v.port ?? void 0,
|
|
2550
|
+
durationMs: v.duration_ms ?? void 0,
|
|
2551
|
+
command: v.command ?? void 0,
|
|
2552
|
+
resultBytes: v.result_bytes ?? void 0
|
|
2553
|
+
};
|
|
2554
|
+
});
|
|
2555
|
+
}
|
|
2556
|
+
// ── Tasks ───────────────────────────────
|
|
2557
|
+
startTask(sessionId, description) {
|
|
2558
|
+
const id = crypto.randomUUID();
|
|
2559
|
+
const tenant = this.tenantOf(sessionId);
|
|
2560
|
+
this.db.prepare(`
|
|
2561
|
+
INSERT INTO tasks (id, session_id, description, started_at, steps, involved_services, status, tenant)
|
|
2562
|
+
VALUES (?, ?, ?, ?, '[]', '[]', 'active', ?)
|
|
2563
|
+
`).run(id, sessionId, description ?? null, (/* @__PURE__ */ new Date()).toISOString(), tenant);
|
|
2564
|
+
return id;
|
|
2565
|
+
}
|
|
2566
|
+
endCurrentTask(sessionId) {
|
|
2567
|
+
this.db.prepare(`
|
|
2568
|
+
UPDATE tasks SET status = 'completed', completed_at = ?
|
|
2569
|
+
WHERE session_id = ? AND status = 'active'
|
|
2570
|
+
`).run((/* @__PURE__ */ new Date()).toISOString(), sessionId);
|
|
2571
|
+
}
|
|
2572
|
+
updateTaskDescription(sessionId, description) {
|
|
2573
|
+
this.db.prepare(`
|
|
2574
|
+
UPDATE tasks SET description = ?
|
|
2575
|
+
WHERE session_id = ? AND status = 'active'
|
|
2576
|
+
`).run(description, sessionId);
|
|
2577
|
+
}
|
|
2578
|
+
getActiveTask(sessionId) {
|
|
2579
|
+
const row = this.db.prepare(
|
|
2580
|
+
"SELECT * FROM tasks WHERE session_id = ? AND status = 'active' LIMIT 1"
|
|
2581
|
+
).get(sessionId);
|
|
2582
|
+
return row ? this.mapTask(row) : void 0;
|
|
2583
|
+
}
|
|
2584
|
+
getTasks(sessionId) {
|
|
2585
|
+
const rows = this.db.prepare("SELECT * FROM tasks WHERE session_id = ? ORDER BY started_at").all(sessionId);
|
|
2586
|
+
return rows.map((r) => this.mapTask(r));
|
|
2587
|
+
}
|
|
2588
|
+
mapTask(r) {
|
|
2589
|
+
const v = TaskRowSchema.parse(r);
|
|
2590
|
+
return {
|
|
2591
|
+
id: v.id,
|
|
2592
|
+
sessionId: v.session_id,
|
|
2593
|
+
description: v.description ?? void 0,
|
|
2594
|
+
startedAt: v.started_at,
|
|
2595
|
+
completedAt: v.completed_at ?? void 0,
|
|
2596
|
+
steps: v.steps,
|
|
2597
|
+
involvedServices: v.involved_services,
|
|
2598
|
+
status: v.status
|
|
2599
|
+
};
|
|
2600
|
+
}
|
|
2601
|
+
// ── Workflows ───────────────────────────
|
|
2602
|
+
insertWorkflow(sessionId, data) {
|
|
2603
|
+
const id = crypto.randomUUID();
|
|
2604
|
+
const tenant = this.tenantOf(sessionId);
|
|
2605
|
+
this.db.prepare(`
|
|
2606
|
+
INSERT INTO workflows
|
|
2607
|
+
(id, session_id, name, pattern, task_ids, occurrences,
|
|
2608
|
+
first_seen, last_seen, avg_duration_ms, involved_services, tenant)
|
|
2609
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2610
|
+
`).run(
|
|
2611
|
+
id,
|
|
2612
|
+
sessionId,
|
|
2613
|
+
data.name ?? null,
|
|
2614
|
+
data.pattern,
|
|
2615
|
+
data.taskIds,
|
|
2616
|
+
data.occurrences,
|
|
2617
|
+
data.firstSeen,
|
|
2618
|
+
data.lastSeen,
|
|
2619
|
+
data.avgDurationMs,
|
|
2620
|
+
data.involvedServices,
|
|
2621
|
+
tenant
|
|
2622
|
+
);
|
|
2623
|
+
}
|
|
2624
|
+
getWorkflows(sessionId) {
|
|
2625
|
+
const rows = this.db.prepare("SELECT * FROM workflows WHERE session_id = ?").all(sessionId);
|
|
2626
|
+
return rows.map((r) => {
|
|
2627
|
+
const v = WorkflowRowSchema.parse(r);
|
|
2628
|
+
return {
|
|
2629
|
+
id: v.id,
|
|
2630
|
+
sessionId: v.session_id,
|
|
2631
|
+
name: v.name ?? void 0,
|
|
2632
|
+
pattern: v.pattern,
|
|
2633
|
+
taskIds: v.task_ids,
|
|
2634
|
+
occurrences: v.occurrences,
|
|
2635
|
+
firstSeen: v.first_seen,
|
|
2636
|
+
lastSeen: v.last_seen,
|
|
2637
|
+
avgDurationMs: v.avg_duration_ms ?? 0,
|
|
2638
|
+
involvedServices: v.involved_services
|
|
2639
|
+
};
|
|
2640
|
+
});
|
|
2641
|
+
}
|
|
2642
|
+
// ── Connections (user-created hex map links) ─────────────────────────────
|
|
2643
|
+
upsertConnection(sessionId, conn) {
|
|
2644
|
+
const existing = this.db.prepare(
|
|
2645
|
+
"SELECT id FROM connections WHERE session_id = ? AND source_asset_id = ? AND target_asset_id = ?"
|
|
2646
|
+
).get(sessionId, conn.sourceAssetId, conn.targetAssetId);
|
|
2647
|
+
if (existing) return existing.id;
|
|
2648
|
+
const id = crypto.randomUUID();
|
|
2649
|
+
const tenant = this.tenantOf(sessionId);
|
|
2650
|
+
this.db.prepare(`
|
|
2651
|
+
INSERT INTO connections (id, session_id, source_asset_id, target_asset_id, type, created_at, tenant)
|
|
2652
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
2653
|
+
`).run(id, sessionId, conn.sourceAssetId, conn.targetAssetId, conn.type ?? null, (/* @__PURE__ */ new Date()).toISOString(), tenant);
|
|
2654
|
+
return id;
|
|
2655
|
+
}
|
|
2656
|
+
getConnections(sessionId) {
|
|
2657
|
+
const rows = this.db.prepare("SELECT * FROM connections WHERE session_id = ?").all(sessionId);
|
|
2658
|
+
return rows.map((r) => {
|
|
2659
|
+
const v = ConnectionRowSchema.parse(r);
|
|
2660
|
+
return {
|
|
2661
|
+
id: v.id,
|
|
2662
|
+
sessionId: v.session_id,
|
|
2663
|
+
sourceAssetId: v.source_asset_id,
|
|
2664
|
+
targetAssetId: v.target_asset_id,
|
|
2665
|
+
type: v.type ?? void 0,
|
|
2666
|
+
createdAt: v.created_at
|
|
2667
|
+
};
|
|
2668
|
+
});
|
|
2669
|
+
}
|
|
2670
|
+
deleteConnection(sessionId, connectionId) {
|
|
2671
|
+
this.db.prepare("DELETE FROM connections WHERE session_id = ? AND id = ?").run(sessionId, connectionId);
|
|
2672
|
+
}
|
|
2673
|
+
// ── Approvals ───────────────────────────
|
|
2674
|
+
setApproval(pattern, action) {
|
|
2675
|
+
this.db.prepare(`
|
|
2676
|
+
INSERT OR REPLACE INTO node_approvals (pattern, action, created_at) VALUES (?, ?, ?)
|
|
2677
|
+
`).run(pattern, action, (/* @__PURE__ */ new Date()).toISOString());
|
|
2678
|
+
}
|
|
2679
|
+
getApproval(pattern) {
|
|
2680
|
+
const row = this.db.prepare("SELECT action FROM node_approvals WHERE pattern = ?").get(pattern);
|
|
2681
|
+
return row?.action;
|
|
2682
|
+
}
|
|
2683
|
+
// ── Sharing policy (2.10 consent) ───────────────────────────────────────────
|
|
2684
|
+
/**
|
|
2685
|
+
* Set (or replace) the sharing level for a pattern. The `'*'` pattern is the
|
|
2686
|
+
* global default; any other pattern is an override (glob over the node id).
|
|
2687
|
+
* Validated via {@link SharingLevelSchema} before write; `created_at` is ISO UTC.
|
|
2688
|
+
*/
|
|
2689
|
+
setSharingLevel(pattern, level) {
|
|
2690
|
+
const valid = SharingLevelSchema.parse(level);
|
|
2691
|
+
this.db.prepare(
|
|
2692
|
+
"INSERT OR REPLACE INTO sharing_policy (pattern, level, created_at) VALUES (?, ?, ?)"
|
|
2693
|
+
).run(pattern, valid, (/* @__PURE__ */ new Date()).toISOString());
|
|
2694
|
+
}
|
|
2695
|
+
/**
|
|
2696
|
+
* The full sharing policy: the `'*'` row resolves to `defaultLevel` (`'none'`
|
|
2697
|
+
* when absent — the opt-in floor), every other row becomes an override. The
|
|
2698
|
+
* glob-precedence resolution itself lives in `src/sharing.ts` so it is unit
|
|
2699
|
+
* testable in isolation; this returns the raw policy it consumes.
|
|
2700
|
+
*/
|
|
2701
|
+
getSharingPolicy() {
|
|
2702
|
+
const rows = this.db.prepare("SELECT pattern, level FROM sharing_policy").all();
|
|
2703
|
+
let defaultLevel = "none";
|
|
2704
|
+
const overrides = [];
|
|
2705
|
+
for (const r of rows) {
|
|
2706
|
+
const level = SharingLevelSchema.parse(r.level);
|
|
2707
|
+
if (r.pattern === "*") defaultLevel = level;
|
|
2708
|
+
else overrides.push({ pattern: r.pattern, level });
|
|
2709
|
+
}
|
|
2710
|
+
return { defaultLevel, overrides };
|
|
2711
|
+
}
|
|
2712
|
+
/** Remove a pattern override. The global default (`'*'`) cannot be cleared this way. */
|
|
2713
|
+
clearSharingOverride(pattern) {
|
|
2714
|
+
this.db.prepare("DELETE FROM sharing_policy WHERE pattern = ? AND pattern != '*'").run(pattern);
|
|
2715
|
+
}
|
|
2716
|
+
// ── Pseudonym reversal map (2.10 admin-reversible anonymization) ─────────────
|
|
2717
|
+
/**
|
|
2718
|
+
* Persist the encrypted plaintext behind a pseudonym token. Idempotent: the
|
|
2719
|
+
* token is deterministic, so repeated writes `INSERT OR REPLACE` and never grow
|
|
2720
|
+
* the table. `ciphertext` is base64(iv ‖ tag ‖ AES-256-GCM(plaintext)).
|
|
2721
|
+
*/
|
|
2722
|
+
saveReversal(token, ciphertext) {
|
|
2723
|
+
this.db.prepare(
|
|
2724
|
+
"INSERT OR REPLACE INTO pseudonym_reversal (token, ciphertext, created_at) VALUES (?, ?, ?)"
|
|
2725
|
+
).run(token, ciphertext, (/* @__PURE__ */ new Date()).toISOString());
|
|
2726
|
+
}
|
|
2727
|
+
/** Read the stored ciphertext for a pseudonym token (admin reversal path). */
|
|
2728
|
+
getReversal(token) {
|
|
2729
|
+
const row = this.db.prepare("SELECT ciphertext FROM pseudonym_reversal WHERE token = ?").get(token);
|
|
2730
|
+
return row?.ciphertext;
|
|
2731
|
+
}
|
|
2732
|
+
// ── Pending-review share queue (2.11 central-DB sync) ────────────────────────
|
|
2733
|
+
/**
|
|
2734
|
+
* Enqueue one proposed share item. Idempotent via `INSERT OR IGNORE` on the
|
|
2735
|
+
* `content_hash` PK: re-classifying the same (transformed) item never duplicates
|
|
2736
|
+
* a row nor resets an existing decision. `payload` is the already-policy-
|
|
2737
|
+
* transformed projection (the exact bytes a push would send) — never raw node
|
|
2738
|
+
* data for `anonymized`/`none` items.
|
|
2739
|
+
*/
|
|
2740
|
+
enqueuePending(item) {
|
|
2741
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2742
|
+
const decided = item.status === "pending" ? null : now;
|
|
2743
|
+
this.db.prepare(`
|
|
2744
|
+
INSERT OR IGNORE INTO pending_shares
|
|
2745
|
+
(content_hash, session_id, node_id, kind, payload, status, decided_by, created_at, decided_at)
|
|
2746
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2747
|
+
`).run(
|
|
2748
|
+
item.contentHash,
|
|
2749
|
+
item.sessionId,
|
|
2750
|
+
item.nodeId ?? null,
|
|
2751
|
+
item.kind,
|
|
2752
|
+
JSON.stringify(item.payload),
|
|
2753
|
+
item.status,
|
|
2754
|
+
item.decidedBy ?? null,
|
|
2755
|
+
now,
|
|
2756
|
+
decided
|
|
2757
|
+
);
|
|
2758
|
+
}
|
|
2759
|
+
/** Queued share items, optionally filtered by status and/or session. */
|
|
2760
|
+
getPendingShares(filter) {
|
|
2761
|
+
const clauses = [];
|
|
2762
|
+
const params = [];
|
|
2763
|
+
if (filter?.status) {
|
|
2764
|
+
clauses.push("status = ?");
|
|
2765
|
+
params.push(filter.status);
|
|
2766
|
+
}
|
|
2767
|
+
if (filter?.sessionId) {
|
|
2768
|
+
clauses.push("session_id = ?");
|
|
2769
|
+
params.push(filter.sessionId);
|
|
2770
|
+
}
|
|
2771
|
+
const where = clauses.length ? `WHERE ${clauses.join(" AND ")} ` : "";
|
|
2772
|
+
const rows = this.db.prepare(`SELECT * FROM pending_shares ${where}ORDER BY rowid`).all(...params);
|
|
2773
|
+
return rows.map((r) => this.mapPendingShare(r));
|
|
2774
|
+
}
|
|
2775
|
+
/** Queue size by status (every status key present, zero-filled). */
|
|
2776
|
+
countPendingByStatus() {
|
|
2777
|
+
const out = { pending: 0, approved: 0, shared: 0, withheld: 0 };
|
|
2778
|
+
const rows = this.db.prepare("SELECT status, COUNT(*) c FROM pending_shares GROUP BY status").all();
|
|
2779
|
+
for (const r of rows) {
|
|
2780
|
+
if (r.status in out) out[r.status] = r.c;
|
|
2781
|
+
}
|
|
2782
|
+
return out;
|
|
2783
|
+
}
|
|
2784
|
+
/** `content_hash` values already pushed (status `shared`) — for re-share suppression. */
|
|
2785
|
+
getSharedHashes() {
|
|
2786
|
+
const rows = this.db.prepare("SELECT content_hash FROM pending_shares WHERE status = 'shared'").all();
|
|
2787
|
+
return new Set(rows.map((r) => r.content_hash));
|
|
2788
|
+
}
|
|
2789
|
+
/**
|
|
2790
|
+
* Transition one queued item. Stamps `decided_at` on any non-`pending` status and
|
|
2791
|
+
* `shared_at` when moving to `shared`. `decidedBy` records the actor (`'user'` or
|
|
2792
|
+
* `'rule'`) for the audit trail.
|
|
2793
|
+
*/
|
|
2794
|
+
setPendingStatus(contentHash2, status, decidedBy) {
|
|
2795
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2796
|
+
const sharedAt = status === "shared" ? now : null;
|
|
2797
|
+
this.db.prepare(`
|
|
2798
|
+
UPDATE pending_shares
|
|
2799
|
+
SET status = ?, decided_by = COALESCE(?, decided_by), decided_at = ?, shared_at = COALESCE(?, shared_at)
|
|
2800
|
+
WHERE content_hash = ?
|
|
2801
|
+
`).run(status, decidedBy ?? null, now, sharedAt, contentHash2);
|
|
2802
|
+
}
|
|
2803
|
+
/** Approved items cleared to push (FIFO), optionally capped by `limit`. */
|
|
2804
|
+
getApprovedShares(limit) {
|
|
2805
|
+
let sql = "SELECT * FROM pending_shares WHERE status = 'approved' ORDER BY rowid";
|
|
2806
|
+
if (limit) sql += ` LIMIT ${Math.max(1, Math.floor(limit))}`;
|
|
2807
|
+
const rows = this.db.prepare(sql).all();
|
|
2808
|
+
return rows.map((r) => this.mapPendingShare(r));
|
|
2809
|
+
}
|
|
2810
|
+
mapPendingShare(r) {
|
|
2811
|
+
const v = PendingShareRowSchema.parse(r);
|
|
2812
|
+
return {
|
|
2813
|
+
contentHash: v.content_hash,
|
|
2814
|
+
sessionId: v.session_id,
|
|
2815
|
+
nodeId: v.node_id ?? void 0,
|
|
2816
|
+
kind: v.kind,
|
|
2817
|
+
payload: safeJsonParse(v.payload, null),
|
|
2818
|
+
status: v.status,
|
|
2819
|
+
decidedBy: v.decided_by ?? void 0,
|
|
2820
|
+
createdAt: v.created_at,
|
|
2821
|
+
decidedAt: v.decided_at ?? void 0,
|
|
2822
|
+
sharedAt: v.shared_at ?? void 0
|
|
2823
|
+
};
|
|
2824
|
+
}
|
|
2825
|
+
// ── Pruning ──────────────────────────────
|
|
2826
|
+
/**
|
|
2827
|
+
* Delete a session and all its associated data (nodes, edges, events, tasks, workflows, connections).
|
|
2828
|
+
*/
|
|
2829
|
+
deleteSession(sessionId) {
|
|
2830
|
+
this.db.prepare("DELETE FROM connections WHERE session_id = ?").run(sessionId);
|
|
2831
|
+
this.db.prepare("DELETE FROM workflows WHERE session_id = ?").run(sessionId);
|
|
2832
|
+
this.db.prepare("DELETE FROM activity_events WHERE session_id = ?").run(sessionId);
|
|
2833
|
+
this.db.prepare("DELETE FROM tasks WHERE session_id = ?").run(sessionId);
|
|
2834
|
+
this.db.prepare("DELETE FROM edges WHERE session_id = ?").run(sessionId);
|
|
2835
|
+
this.db.prepare("DELETE FROM nodes WHERE session_id = ?").run(sessionId);
|
|
2836
|
+
this.db.prepare("DELETE FROM sessions WHERE id = ?").run(sessionId);
|
|
2837
|
+
}
|
|
2838
|
+
/**
|
|
2839
|
+
* Prune sessions older than the given ISO date string. Returns count of deleted sessions.
|
|
2840
|
+
*/
|
|
2841
|
+
pruneSessions(olderThan) {
|
|
2842
|
+
const rows = this.db.prepare(
|
|
2843
|
+
"SELECT id FROM sessions WHERE started_at < ?"
|
|
2844
|
+
).all(olderThan);
|
|
2845
|
+
for (const row of rows) {
|
|
2846
|
+
this.deleteSession(row.id);
|
|
2847
|
+
}
|
|
2848
|
+
return rows.length;
|
|
2849
|
+
}
|
|
2850
|
+
// ── Graph queries (read-only context layer) ─────────────────────────────────
|
|
2851
|
+
/** Fetch a single node by id within a session. */
|
|
2852
|
+
getNode(sessionId, nodeId) {
|
|
2853
|
+
const row = this.db.prepare("SELECT * FROM nodes WHERE session_id = ? AND id = ?").get(sessionId, nodeId);
|
|
2854
|
+
return row ? this.mapNode(row) : void 0;
|
|
2855
|
+
}
|
|
2856
|
+
/** Batch-fetch nodes by id, keyed for O(1) lookup. Chunked to stay under SQLite's bind-variable limit. */
|
|
2857
|
+
getNodesByIds(sessionId, ids) {
|
|
2858
|
+
const out = /* @__PURE__ */ new Map();
|
|
2859
|
+
for (let i = 0; i < ids.length; i += 900) {
|
|
2860
|
+
const chunk = ids.slice(i, i + 900);
|
|
2861
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
2862
|
+
const rows = this.db.prepare(
|
|
2863
|
+
`SELECT * FROM nodes WHERE session_id = ? AND id IN (${placeholders})`
|
|
2864
|
+
).all(sessionId, ...chunk);
|
|
2865
|
+
for (const r of rows) {
|
|
2866
|
+
const n = this.mapNode(r);
|
|
2867
|
+
out.set(n.id, n);
|
|
2868
|
+
}
|
|
2869
|
+
}
|
|
2870
|
+
return out;
|
|
2871
|
+
}
|
|
2872
|
+
/** Fetch all nodes of one or more types. */
|
|
2873
|
+
getNodesByType(sessionId, types) {
|
|
2874
|
+
if (types.length === 0) return [];
|
|
2875
|
+
const placeholders = types.map(() => "?").join(",");
|
|
2876
|
+
const rows = this.db.prepare(
|
|
2877
|
+
`SELECT * FROM nodes WHERE session_id = ? AND type IN (${placeholders})`
|
|
2878
|
+
).all(sessionId, ...types);
|
|
2879
|
+
return rows.map((r) => this.mapNode(r));
|
|
2880
|
+
}
|
|
2881
|
+
/**
|
|
2882
|
+
* Lexical search over node id, name, domain, sub-domain and tags.
|
|
2883
|
+
* Case-insensitive substring match — the deterministic fallback for semantic search.
|
|
2884
|
+
*/
|
|
2885
|
+
searchNodes(sessionId, query, opts) {
|
|
2886
|
+
const q = `%${query.trim().toLowerCase()}%`;
|
|
2887
|
+
const params = [sessionId, q, q, q, q, q];
|
|
2888
|
+
let sql = `
|
|
2889
|
+
SELECT * FROM nodes
|
|
2890
|
+
WHERE session_id = ?
|
|
2891
|
+
AND (
|
|
2892
|
+
lower(id) LIKE ? OR lower(name) LIKE ?
|
|
2893
|
+
OR lower(COALESCE(domain, '')) LIKE ?
|
|
2894
|
+
OR lower(COALESCE(sub_domain, '')) LIKE ?
|
|
2895
|
+
OR lower(tags) LIKE ?
|
|
2896
|
+
)`;
|
|
2897
|
+
if (opts?.types && opts.types.length > 0) {
|
|
2898
|
+
sql += ` AND type IN (${opts.types.map(() => "?").join(",")})`;
|
|
2899
|
+
params.push(...opts.types);
|
|
2900
|
+
}
|
|
2901
|
+
sql += " ORDER BY confidence DESC";
|
|
2902
|
+
if (opts?.limit) sql += ` LIMIT ${Math.max(1, Math.floor(opts.limit))}`;
|
|
2903
|
+
const rows = this.db.prepare(sql).all(...params);
|
|
2904
|
+
return rows.map((r) => this.mapNode(r));
|
|
2905
|
+
}
|
|
2906
|
+
/**
|
|
2907
|
+
* Traverse the dependency graph from a node using a recursive CTE with a
|
|
2908
|
+
* path-based cycle guard. `downstream` follows source→target (what the node
|
|
2909
|
+
* depends on / points to); `upstream` follows target→source (what depends on it).
|
|
2910
|
+
*/
|
|
2911
|
+
getDependencies(sessionId, nodeId, opts = {}) {
|
|
2912
|
+
const direction = opts.direction ?? "downstream";
|
|
2913
|
+
const maxDepth = Math.max(1, Math.min(opts.maxDepth ?? 8, 64));
|
|
2914
|
+
const root = this.getNode(sessionId, nodeId);
|
|
2915
|
+
const depthById = /* @__PURE__ */ new Map();
|
|
2916
|
+
const collect = (dir) => {
|
|
2917
|
+
const [from, to] = dir === "downstream" ? ["source_id", "target_id"] : ["target_id", "source_id"];
|
|
2918
|
+
const sql = `
|
|
2919
|
+
WITH RECURSIVE walk(node_id, depth, path) AS (
|
|
2920
|
+
SELECT ?, 0, char(10) || ? || char(10)
|
|
2921
|
+
UNION ALL
|
|
2922
|
+
SELECT e.${to}, w.depth + 1, w.path || e.${to} || char(10)
|
|
2923
|
+
FROM edges e JOIN walk w ON e.${from} = w.node_id
|
|
2924
|
+
WHERE e.session_id = ?
|
|
2925
|
+
AND w.depth < ?
|
|
2926
|
+
AND instr(w.path, char(10) || e.${to} || char(10)) = 0
|
|
2927
|
+
)
|
|
2928
|
+
SELECT node_id, MIN(depth) AS depth FROM walk WHERE node_id != ? GROUP BY node_id`;
|
|
2929
|
+
const rows = this.db.prepare(sql).all(nodeId, nodeId, sessionId, maxDepth, nodeId);
|
|
2930
|
+
for (const r of rows) {
|
|
2931
|
+
const prev = depthById.get(r.node_id);
|
|
2932
|
+
if (prev === void 0 || r.depth < prev) depthById.set(r.node_id, r.depth);
|
|
2933
|
+
}
|
|
2934
|
+
};
|
|
2935
|
+
if (direction === "both") {
|
|
2936
|
+
collect("downstream");
|
|
2937
|
+
collect("upstream");
|
|
2938
|
+
} else collect(direction);
|
|
2939
|
+
const byId = this.getNodesByIds(sessionId, [...depthById.keys()]);
|
|
2940
|
+
const nodes = [...depthById.entries()].map(([id, depth]) => {
|
|
2941
|
+
const n = byId.get(id);
|
|
2942
|
+
return n ? { ...n, depth } : void 0;
|
|
2943
|
+
}).filter((n) => n !== void 0).sort((a, b) => a.depth - b.depth);
|
|
2944
|
+
const reachable = /* @__PURE__ */ new Set([nodeId, ...depthById.keys()]);
|
|
2945
|
+
const edges = this.getEdges(sessionId).filter((e) => reachable.has(e.sourceId) && reachable.has(e.targetId));
|
|
2946
|
+
return { root, direction, maxDepth, nodes, edges };
|
|
2947
|
+
}
|
|
2948
|
+
/** Lightweight aggregate index of the whole topology — the progressive-disclosure summary. */
|
|
2949
|
+
getGraphSummary(sessionId) {
|
|
2950
|
+
const totals = {
|
|
2951
|
+
nodes: this.db.prepare("SELECT COUNT(*) c FROM nodes WHERE session_id = ?").get(sessionId).c,
|
|
2952
|
+
edges: this.db.prepare("SELECT COUNT(*) c FROM edges WHERE session_id = ?").get(sessionId).c
|
|
2953
|
+
};
|
|
2954
|
+
const byType = {};
|
|
2955
|
+
for (const r of this.db.prepare("SELECT type, COUNT(*) c FROM nodes WHERE session_id = ? GROUP BY type").all(sessionId)) {
|
|
2956
|
+
byType[r.type] = r.c;
|
|
2957
|
+
}
|
|
2958
|
+
const byDomain = {};
|
|
2959
|
+
for (const r of this.db.prepare("SELECT COALESCE(domain, '(none)') d, COUNT(*) c FROM nodes WHERE session_id = ? GROUP BY d").all(sessionId)) {
|
|
2960
|
+
byDomain[r.d] = r.c;
|
|
2961
|
+
}
|
|
2962
|
+
const byRelationship = {};
|
|
2963
|
+
for (const r of this.db.prepare("SELECT relationship rel, COUNT(*) c FROM edges WHERE session_id = ? GROUP BY rel").all(sessionId)) {
|
|
2964
|
+
byRelationship[r.rel] = r.c;
|
|
2965
|
+
}
|
|
2966
|
+
const degreeRows = this.db.prepare(`
|
|
2967
|
+
SELECT n.id, n.name, n.type, n.confidence, COUNT(e.id) AS degree
|
|
2968
|
+
FROM nodes n
|
|
2969
|
+
LEFT JOIN edges e ON e.session_id = n.session_id AND (e.source_id = n.id OR e.target_id = n.id)
|
|
2970
|
+
WHERE n.session_id = ?
|
|
2971
|
+
GROUP BY n.id, n.name, n.type
|
|
2972
|
+
`).all(sessionId);
|
|
2973
|
+
const degree = /* @__PURE__ */ new Map();
|
|
2974
|
+
for (const r of degreeRows) degree.set(r.id, r.degree);
|
|
2975
|
+
const topConnected = [...degreeRows].sort((a, b) => b.degree - a.degree || b.confidence - a.confidence || a.id.localeCompare(b.id)).slice(0, 10).map(({ id, name, type, degree: degree2 }) => ({ id, name, type, degree: degree2 }));
|
|
2976
|
+
const anomalies = this.anomalyEnabled ? detectAnomalies(this.getNodes(sessionId), degree, this.anomalyThresholds) : [];
|
|
2977
|
+
const contributors = this.db.prepare(`
|
|
2978
|
+
SELECT COUNT(DISTINCT c.machine_id) AS n
|
|
2979
|
+
FROM node_contributors c
|
|
2980
|
+
JOIN nodes n ON n.global_id = c.global_id
|
|
2981
|
+
WHERE n.session_id = ?
|
|
2982
|
+
`).get(sessionId).n;
|
|
2983
|
+
const costByDomain = this.db.prepare(`
|
|
2984
|
+
SELECT COALESCE(domain, '(none)') domain,
|
|
2985
|
+
json_extract(cost, '$.currency') currency,
|
|
2986
|
+
json_extract(cost, '$.period') period,
|
|
2987
|
+
SUM(json_extract(cost, '$.amount')) total,
|
|
2988
|
+
COUNT(*) nodes
|
|
2989
|
+
FROM nodes
|
|
2990
|
+
WHERE session_id = ? AND cost IS NOT NULL
|
|
2991
|
+
GROUP BY domain, currency, period
|
|
2992
|
+
ORDER BY total DESC
|
|
2993
|
+
`).all(sessionId);
|
|
2994
|
+
const costByOwner = this.db.prepare(`
|
|
2995
|
+
SELECT COALESCE(owner, '(unowned)') owner,
|
|
2996
|
+
json_extract(cost, '$.currency') currency,
|
|
2997
|
+
json_extract(cost, '$.period') period,
|
|
2998
|
+
SUM(json_extract(cost, '$.amount')) total,
|
|
2999
|
+
COUNT(*) nodes
|
|
3000
|
+
FROM nodes
|
|
3001
|
+
WHERE session_id = ? AND cost IS NOT NULL
|
|
3002
|
+
GROUP BY owner, currency, period
|
|
3003
|
+
ORDER BY total DESC
|
|
3004
|
+
`).all(sessionId);
|
|
3005
|
+
const withCost = this.db.prepare(
|
|
3006
|
+
"SELECT COUNT(*) c FROM nodes WHERE session_id = ? AND cost IS NOT NULL"
|
|
3007
|
+
).get(sessionId).c;
|
|
3008
|
+
return {
|
|
3009
|
+
sessionId,
|
|
3010
|
+
totals,
|
|
3011
|
+
nodesByType: byType,
|
|
3012
|
+
nodesByDomain: byDomain,
|
|
3013
|
+
edgesByRelationship: byRelationship,
|
|
3014
|
+
topConnected,
|
|
3015
|
+
anomalies,
|
|
3016
|
+
contributors,
|
|
3017
|
+
costByDomain,
|
|
3018
|
+
costByOwner,
|
|
3019
|
+
costCoverage: { withCost, total: totals.nodes }
|
|
3020
|
+
};
|
|
3021
|
+
}
|
|
3022
|
+
// ── Central collector store (2.12) ──────────────────────────────────────────
|
|
3023
|
+
/**
|
|
3024
|
+
* Resolve (creating once) the synthetic collector session that owns every
|
|
3025
|
+
* central node/edge for a tenant. Central nodes are merged by `(org, global_id)`,
|
|
3026
|
+
* not by session, so they live under a single deterministic session id
|
|
3027
|
+
* (`central:{org}`) — this satisfies the existing `(id, session_id)` node PK and
|
|
3028
|
+
* the `session_id` foreign key without a destructive schema change. Idempotent.
|
|
3029
|
+
*/
|
|
3030
|
+
ensureCentralSession(org) {
|
|
3031
|
+
const tenant = normalizeTenant(org);
|
|
3032
|
+
const id = `central:${tenant}`;
|
|
3033
|
+
const existing = this.db.prepare("SELECT id FROM sessions WHERE id = ?").get(id);
|
|
3034
|
+
if (existing) return id;
|
|
3035
|
+
this.db.prepare(
|
|
3036
|
+
`INSERT INTO sessions (id, mode, started_at, config, tenant, organization)
|
|
3037
|
+
VALUES (?, 'discover', ?, '{}', ?, ?)`
|
|
3038
|
+
).run(id, (/* @__PURE__ */ new Date()).toISOString(), tenant, org);
|
|
3039
|
+
return id;
|
|
3040
|
+
}
|
|
3041
|
+
/**
|
|
3042
|
+
* Find an existing central node within a tenant by its primary identity
|
|
3043
|
+
* (`global_id`), returning its stored `id` so a merge keeps a single row.
|
|
3044
|
+
*/
|
|
3045
|
+
findCentralNodeIdByGlobalId(org, gid) {
|
|
3046
|
+
const tenant = normalizeTenant(org);
|
|
3047
|
+
const row = this.db.prepare(
|
|
3048
|
+
"SELECT id FROM nodes WHERE tenant = ? AND global_id = ? LIMIT 1"
|
|
3049
|
+
).get(tenant, gid);
|
|
3050
|
+
return row?.id;
|
|
3051
|
+
}
|
|
3052
|
+
/**
|
|
3053
|
+
* Secondary merge lookup: an existing central node in the tenant whose
|
|
3054
|
+
* `content_hash` matches (catches `id` drift between machines for the same
|
|
3055
|
+
* logical resource). Returns its stored `id` and `global_id`.
|
|
3056
|
+
*/
|
|
3057
|
+
findCentralNodeByContentHash(org, ch) {
|
|
3058
|
+
const tenant = normalizeTenant(org);
|
|
3059
|
+
const row = this.db.prepare(
|
|
3060
|
+
"SELECT id, global_id FROM nodes WHERE tenant = ? AND content_hash = ? LIMIT 1"
|
|
3061
|
+
).get(tenant, ch);
|
|
3062
|
+
if (!row || row.global_id == null) return void 0;
|
|
3063
|
+
return { id: row.id, globalId: row.global_id };
|
|
3064
|
+
}
|
|
3065
|
+
/**
|
|
3066
|
+
* Merge one incoming node into the central store for a tenant and append the
|
|
3067
|
+
* contributor (2.12). Resolves identity by `(tenant, global_id)` primary, then
|
|
3068
|
+
* `(tenant, content_hash)` secondary; on a hit it keeps the existing row's id
|
|
3069
|
+
* (so the logical node stays a single row), unions tags, keeps the higher
|
|
3070
|
+
* confidence, and merges metadata (incoming values win on key conflict). The
|
|
3071
|
+
* incoming `globalId`/`contentHash` are precomputed by the merge core so they
|
|
3072
|
+
* are consistent with what the lookups used. Returns whether a new row was
|
|
3073
|
+
* created or an existing one was merged. Runs in one transaction.
|
|
3074
|
+
*/
|
|
3075
|
+
upsertCentralNode(org, node, identity, contributor) {
|
|
3076
|
+
const tenant = normalizeTenant(org);
|
|
3077
|
+
const sessionId = this.ensureCentralSession(tenant);
|
|
3078
|
+
const txn = this.db.transaction(() => {
|
|
3079
|
+
let targetId = this.findCentralNodeIdByGlobalId(tenant, identity.globalId);
|
|
3080
|
+
let outcome = "merged";
|
|
3081
|
+
if (!targetId) {
|
|
3082
|
+
const byHash = this.findCentralNodeByContentHash(tenant, identity.contentHash);
|
|
3083
|
+
targetId = byHash?.id;
|
|
3084
|
+
}
|
|
3085
|
+
if (!targetId) {
|
|
3086
|
+
targetId = node.id;
|
|
3087
|
+
outcome = "created";
|
|
3088
|
+
}
|
|
3089
|
+
const existing = this.getCentralNode(tenant, sessionId, targetId);
|
|
3090
|
+
const mergedTags = Array.from(/* @__PURE__ */ new Set([...existing?.tags ?? [], ...node.tags ?? []]));
|
|
3091
|
+
const mergedMeta = { ...existing?.metadata ?? {}, ...node.metadata ?? {} };
|
|
3092
|
+
const confidence = Math.max(existing?.confidence ?? 0, node.confidence);
|
|
3093
|
+
this.db.prepare(`
|
|
3094
|
+
INSERT OR REPLACE INTO nodes
|
|
3095
|
+
(id, session_id, type, name, discovered_via, discovered_at, depth, confidence, metadata, tags,
|
|
3096
|
+
domain, sub_domain, quality_score, tenant, global_id, content_hash)
|
|
3097
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
3098
|
+
`).run(
|
|
3099
|
+
targetId,
|
|
3100
|
+
sessionId,
|
|
3101
|
+
node.type,
|
|
3102
|
+
sanitizeUntrusted(node.name),
|
|
3103
|
+
node.discoveredVia,
|
|
3104
|
+
existing?.discoveredAt ?? (/* @__PURE__ */ new Date()).toISOString(),
|
|
3105
|
+
0,
|
|
3106
|
+
confidence,
|
|
3107
|
+
JSON.stringify(sanitizeValue(mergedMeta)),
|
|
3108
|
+
JSON.stringify(mergedTags.map(sanitizeUntrusted)),
|
|
3109
|
+
node.domain != null ? sanitizeUntrusted(node.domain) : existing?.domain ?? null,
|
|
3110
|
+
node.subDomain != null ? sanitizeUntrusted(node.subDomain) : existing?.subDomain ?? null,
|
|
3111
|
+
node.qualityScore ?? existing?.qualityScore ?? null,
|
|
3112
|
+
tenant,
|
|
3113
|
+
identity.globalId,
|
|
3114
|
+
identity.contentHash
|
|
3115
|
+
);
|
|
3116
|
+
this.db.prepare(`
|
|
3117
|
+
INSERT INTO node_contributors (global_id, machine_id, hostname, user, organization, at, confidence)
|
|
3118
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
3119
|
+
ON CONFLICT(global_id, machine_id) DO UPDATE SET
|
|
3120
|
+
confidence = MAX(confidence, excluded.confidence),
|
|
3121
|
+
at = excluded.at,
|
|
3122
|
+
hostname = excluded.hostname,
|
|
3123
|
+
user = excluded.user,
|
|
3124
|
+
organization = excluded.organization
|
|
3125
|
+
`).run(
|
|
3126
|
+
identity.globalId,
|
|
3127
|
+
contributor.machineId,
|
|
3128
|
+
sanitizeUntrusted(contributor.hostname),
|
|
3129
|
+
sanitizeUntrusted(contributor.user),
|
|
3130
|
+
contributor.organization != null ? sanitizeUntrusted(contributor.organization) : tenant,
|
|
3131
|
+
contributor.at,
|
|
3132
|
+
contributor.confidence
|
|
3133
|
+
);
|
|
3134
|
+
return outcome;
|
|
3135
|
+
});
|
|
3136
|
+
return txn();
|
|
3137
|
+
}
|
|
3138
|
+
/** Insert an edge into the central store for a tenant (idempotent on logical key). */
|
|
3139
|
+
insertCentralEdge(org, edge) {
|
|
3140
|
+
const tenant = normalizeTenant(org);
|
|
3141
|
+
const sessionId = this.ensureCentralSession(tenant);
|
|
3142
|
+
const dup = this.db.prepare(
|
|
3143
|
+
"SELECT 1 FROM edges WHERE tenant = ? AND source_id = ? AND target_id = ? AND relationship = ? LIMIT 1"
|
|
3144
|
+
).get(tenant, edge.sourceId, edge.targetId, edge.relationship);
|
|
3145
|
+
if (dup) return;
|
|
3146
|
+
this.insertEdge(sessionId, edge);
|
|
3147
|
+
}
|
|
3148
|
+
/** A central node by tenant + stored id (the merge target after identity resolution). */
|
|
3149
|
+
getCentralNode(org, sessionId, nodeId) {
|
|
3150
|
+
const tenant = normalizeTenant(org);
|
|
3151
|
+
const row = this.db.prepare(
|
|
3152
|
+
"SELECT * FROM nodes WHERE tenant = ? AND session_id = ? AND id = ?"
|
|
3153
|
+
).get(tenant, sessionId, nodeId);
|
|
3154
|
+
return row ? this.mapNode(row) : void 0;
|
|
3155
|
+
}
|
|
3156
|
+
/** All contributors for a logical (global_id) node across an org. */
|
|
3157
|
+
getContributorsByGlobalId(gid) {
|
|
3158
|
+
return this.getContributors(gid);
|
|
3159
|
+
}
|
|
3160
|
+
/**
|
|
3161
|
+
* Org-wide aggregate summary (2.12) — the central analogue of
|
|
3162
|
+
* {@link getGraphSummary}, scoped `WHERE tenant = ?` so it merges every machine's
|
|
3163
|
+
* contribution into one organization-wide view. Cross-tenant isolation is
|
|
3164
|
+
* structural: org A's rows never appear in org B's counts.
|
|
3165
|
+
*/
|
|
3166
|
+
getOrgSummary(org) {
|
|
3167
|
+
const tenant = normalizeTenant(org);
|
|
3168
|
+
const totals = {
|
|
3169
|
+
nodes: this.db.prepare("SELECT COUNT(*) c FROM nodes WHERE tenant = ?").get(tenant).c,
|
|
3170
|
+
edges: this.db.prepare("SELECT COUNT(*) c FROM edges WHERE tenant = ?").get(tenant).c
|
|
3171
|
+
};
|
|
3172
|
+
const byType = {};
|
|
3173
|
+
for (const r of this.db.prepare("SELECT type, COUNT(*) c FROM nodes WHERE tenant = ? GROUP BY type").all(tenant)) {
|
|
3174
|
+
byType[r.type] = r.c;
|
|
3175
|
+
}
|
|
3176
|
+
const byDomain = {};
|
|
3177
|
+
for (const r of this.db.prepare("SELECT COALESCE(domain, '(none)') d, COUNT(*) c FROM nodes WHERE tenant = ? GROUP BY d").all(tenant)) {
|
|
3178
|
+
byDomain[r.d] = r.c;
|
|
3179
|
+
}
|
|
3180
|
+
const byRelationship = {};
|
|
3181
|
+
for (const r of this.db.prepare("SELECT relationship rel, COUNT(*) c FROM edges WHERE tenant = ? GROUP BY rel").all(tenant)) {
|
|
3182
|
+
byRelationship[r.rel] = r.c;
|
|
3183
|
+
}
|
|
3184
|
+
const topConnected = this.db.prepare(`
|
|
3185
|
+
SELECT n.id, n.name, n.type, COUNT(e.id) AS degree
|
|
3186
|
+
FROM nodes n
|
|
3187
|
+
LEFT JOIN edges e ON e.tenant = n.tenant AND (e.source_id = n.id OR e.target_id = n.id)
|
|
3188
|
+
WHERE n.tenant = ?
|
|
3189
|
+
GROUP BY n.id, n.name, n.type
|
|
3190
|
+
ORDER BY degree DESC, n.confidence DESC
|
|
3191
|
+
LIMIT 10
|
|
3192
|
+
`).all(tenant);
|
|
3193
|
+
const contributors = this.db.prepare(`
|
|
3194
|
+
SELECT COUNT(DISTINCT c.machine_id) AS n
|
|
3195
|
+
FROM node_contributors c
|
|
3196
|
+
JOIN nodes n ON n.global_id = c.global_id
|
|
3197
|
+
WHERE n.tenant = ?
|
|
3198
|
+
`).get(tenant).n;
|
|
3199
|
+
return { org: tenant, totals, nodesByType: byType, nodesByDomain: byDomain, edgesByRelationship: byRelationship, topConnected, contributors };
|
|
3200
|
+
}
|
|
3201
|
+
// ── Stats ───────────────────────────────
|
|
3202
|
+
getStats(sessionId) {
|
|
3203
|
+
const nodes = this.db.prepare("SELECT COUNT(*) as c FROM nodes WHERE session_id = ?").get(sessionId).c;
|
|
3204
|
+
const edges = this.db.prepare("SELECT COUNT(*) as c FROM edges WHERE session_id = ?").get(sessionId).c;
|
|
3205
|
+
const events = this.db.prepare("SELECT COUNT(*) as c FROM activity_events WHERE session_id = ?").get(sessionId).c;
|
|
3206
|
+
const tasks = this.db.prepare("SELECT COUNT(*) as c FROM tasks WHERE session_id = ?").get(sessionId).c;
|
|
3207
|
+
return { nodes, edges, events, tasks };
|
|
3208
|
+
}
|
|
3209
|
+
};
|
|
3210
|
+
|
|
3211
|
+
// src/api/auth.ts
|
|
3212
|
+
var LOOPBACK_HOSTS = /* @__PURE__ */ new Set(["127.0.0.1", "localhost", "::1", "[::1]"]);
|
|
3213
|
+
function isLoopbackHost(host) {
|
|
3214
|
+
return LOOPBACK_HOSTS.has(host);
|
|
3215
|
+
}
|
|
3216
|
+
function timingSafeEqual(a, b) {
|
|
3217
|
+
if (a.length !== b.length) return false;
|
|
3218
|
+
let diff = 0;
|
|
3219
|
+
for (let i = 0; i < a.length; i++) diff |= a.charCodeAt(i) ^ b.charCodeAt(i);
|
|
3220
|
+
return diff === 0;
|
|
3221
|
+
}
|
|
3222
|
+
function bearerToken(header) {
|
|
3223
|
+
if (!header) return void 0;
|
|
3224
|
+
const trimmed = header.trim();
|
|
3225
|
+
if (trimmed.length < 7 || trimmed.slice(0, 6).toLowerCase() !== "bearer") return void 0;
|
|
3226
|
+
const rest = trimmed.slice(6);
|
|
3227
|
+
if (!/^\s/.test(rest)) return void 0;
|
|
3228
|
+
const token = rest.trimStart();
|
|
3229
|
+
return token.length > 0 ? token : void 0;
|
|
3230
|
+
}
|
|
3231
|
+
function checkBearer(authorizationHeader, token) {
|
|
3232
|
+
if (!token) return true;
|
|
3233
|
+
const provided = bearerToken(authorizationHeader);
|
|
3234
|
+
return provided !== void 0 && timingSafeEqual(provided, token);
|
|
3235
|
+
}
|
|
3236
|
+
function assertSafeBind(opts) {
|
|
3237
|
+
if (isLoopbackHost(opts.host)) return;
|
|
3238
|
+
if (opts.allowedHosts === void 0) {
|
|
3239
|
+
throw new Error(
|
|
3240
|
+
`Refusing to bind a non-loopback host (${opts.host}) without an explicit allowedHosts allowlist. Pass { allowedHosts: ['your.public.host:port'] } to opt in, or bind 127.0.0.1 for local-only use.`
|
|
3241
|
+
);
|
|
3242
|
+
}
|
|
3243
|
+
if (!opts.token) {
|
|
3244
|
+
throw new Error(
|
|
3245
|
+
`Refusing to bind a non-loopback host (${opts.host}) without an auth token. Pass { token } (or --token / CARTOGRAPHY_HTTP_TOKEN) so requests must carry 'Authorization: Bearer <token>'.`
|
|
3246
|
+
);
|
|
3247
|
+
}
|
|
3248
|
+
}
|
|
3249
|
+
function defaultAllowedHosts(host, port) {
|
|
3250
|
+
return [`${host}:${port}`, `localhost:${port}`, `127.0.0.1:${port}`];
|
|
3251
|
+
}
|
|
3252
|
+
|
|
3253
|
+
export {
|
|
3254
|
+
sanitizeUntrusted,
|
|
3255
|
+
cloudAwsScanner,
|
|
3256
|
+
cloudGcpScanner,
|
|
3257
|
+
cloudAzureScanner,
|
|
3258
|
+
k8sScanner,
|
|
3259
|
+
databasesScanner,
|
|
3260
|
+
stripSensitive,
|
|
3261
|
+
redactValue,
|
|
3262
|
+
buildCartographyToolHandlers,
|
|
3263
|
+
createCartographyTools,
|
|
3264
|
+
RulesetSchema,
|
|
3265
|
+
stableStringify,
|
|
3266
|
+
diffTopology,
|
|
3267
|
+
DEFAULT_TENANT,
|
|
3268
|
+
normalizeTenant,
|
|
3269
|
+
keyMetaOf,
|
|
3270
|
+
contentHash,
|
|
3271
|
+
globalId,
|
|
3272
|
+
deriveSessionName,
|
|
3273
|
+
CartographyDB,
|
|
3274
|
+
checkBearer,
|
|
3275
|
+
assertSafeBind,
|
|
3276
|
+
defaultAllowedHosts
|
|
3277
|
+
};
|
|
3278
|
+
//# sourceMappingURL=chunk-7QEBFMN4.js.map
|