@datasynx/agentic-ai-cartography 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2465 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ CartographyDB,
4
+ RulesetSchema,
5
+ assertSafeBind,
6
+ checkBearer,
7
+ cloudAwsScanner,
8
+ cloudAzureScanner,
9
+ cloudGcpScanner,
10
+ contentHash,
11
+ databasesScanner,
12
+ defaultAllowedHosts,
13
+ deriveSessionName,
14
+ diffTopology,
15
+ globalId,
16
+ k8sScanner,
17
+ keyMetaOf,
18
+ normalizeTenant,
19
+ redactValue,
20
+ sanitizeUntrusted,
21
+ stableStringify,
22
+ stripSensitive
23
+ } from "./chunk-7QEBFMN4.js";
24
+ import {
25
+ EdgeSchema,
26
+ NODE_TYPES,
27
+ NODE_TYPE_GROUPS,
28
+ NodeSchema,
29
+ SECURITY_METADATA_KEYS,
30
+ SEVERITIES,
31
+ defaultConfig
32
+ } from "./chunk-WCR47QA2.js";
33
+ import {
34
+ IS_WIN,
35
+ PLATFORM,
36
+ checkReadOnly,
37
+ commandExists,
38
+ findFiles,
39
+ logDebug,
40
+ logError,
41
+ logInfo,
42
+ logWarn,
43
+ run,
44
+ scanAllBookmarks,
45
+ scanEstablishedConnections,
46
+ scanListeningPorts
47
+ } from "./chunk-2SZ5QHGH.js";
48
+
49
+ // src/scanners/spi.ts
50
+ import { z } from "zod";
51
+ var ScannerShape = z.object({
52
+ id: z.string().min(1),
53
+ title: z.string().min(1),
54
+ platforms: z.union([
55
+ z.literal("all"),
56
+ z.array(z.enum(["linux", "darwin", "win32"])).nonempty()
57
+ ]),
58
+ allowedCommands: z.array(z.string().min(1)).optional(),
59
+ detect: z.custom((v) => typeof v === "function", {
60
+ message: "detect must be a function"
61
+ }),
62
+ scan: z.custom((v) => typeof v === "function", {
63
+ message: "scan must be a function"
64
+ })
65
+ });
66
+ function validateScanner(value) {
67
+ ScannerShape.parse(value);
68
+ return value;
69
+ }
70
+
71
+ // src/scanners/types.ts
72
+ var ScannerRegistry = class {
73
+ scanners = /* @__PURE__ */ new Map();
74
+ register(scanner) {
75
+ if (this.scanners.has(scanner.id)) throw new Error(`scanner already registered: ${scanner.id}`);
76
+ this.scanners.set(scanner.id, scanner);
77
+ return this;
78
+ }
79
+ /**
80
+ * Register a {@link Scanner} produced by an external plugin package. Validates
81
+ * the shape (throws `ZodError` on mismatch), namespaces the id to
82
+ * `plugin:<pkg>:<id>` (avoiding collisions with built-ins and across plugins),
83
+ * wraps `scan()` so the scanner's `ctx.run` is gated by its declared
84
+ * `allowedCommands` intersected with the central read-only allowlist
85
+ * ({@link checkReadOnly}), and freezes the wrapper. Reuses the duplicate-id
86
+ * guard in {@link register}.
87
+ *
88
+ * The gated runner delegates the actual execution to the host-supplied
89
+ * `ctx.run` (the platform runner), so the global read-only floor still applies
90
+ * and `allowedCommands` is a *second*, scanner-scoped least-privilege boundary.
91
+ * A command runs only if its leading executable is declared AND the whole
92
+ * command passes `checkReadOnly`; otherwise the runner returns `''` (the
93
+ * documented "blocked → ''" contract).
94
+ */
95
+ registerExternal(pkg, scanner) {
96
+ const valid = validateScanner(scanner);
97
+ const id = `plugin:${pkg}:${valid.id}`;
98
+ const declared = valid.allowedCommands ?? [];
99
+ const wrapped = Object.freeze({
100
+ id,
101
+ title: valid.title,
102
+ platforms: valid.platforms,
103
+ allowedCommands: declared,
104
+ detect: (ctx) => valid.detect(ctx),
105
+ scan: (ctx) => valid.scan({
106
+ ...ctx,
107
+ run: (cmd, opts) => {
108
+ const exe = cmd.trim().split(/\s+/)[0] ?? "";
109
+ if (!declared.includes(exe)) return "";
110
+ if (!checkReadOnly(cmd).allowed) return "";
111
+ return ctx.run(cmd, opts);
112
+ }
113
+ })
114
+ });
115
+ return this.register(wrapped);
116
+ }
117
+ get(id) {
118
+ return this.scanners.get(id);
119
+ }
120
+ list() {
121
+ return [...this.scanners.values()];
122
+ }
123
+ /** Scanners whose `platforms` include the given platform. */
124
+ forPlatform(platform) {
125
+ return this.list().filter((s) => s.platforms === "all" || s.platforms.includes(platform));
126
+ }
127
+ };
128
+
129
+ // src/scanners/bookmarks.ts
130
+ var PERSONAL = [
131
+ "facebook.",
132
+ "instagram.",
133
+ "twitter.",
134
+ "x.com",
135
+ "tiktok.",
136
+ "reddit.",
137
+ "youtube.",
138
+ "netflix.",
139
+ "spotify.",
140
+ "twitch.",
141
+ "pinterest.",
142
+ "snapchat.",
143
+ "whatsapp.",
144
+ "amazon.",
145
+ "ebay.",
146
+ "aliexpress.",
147
+ "cnn.",
148
+ "bbc.",
149
+ "nytimes.",
150
+ "espn.",
151
+ "booking.",
152
+ "airbnb.",
153
+ "tripadvisor.",
154
+ "wikipedia."
155
+ ];
156
+ function isPersonalHost(host) {
157
+ const h = host.toLowerCase();
158
+ return PERSONAL.some((p) => h.includes(p));
159
+ }
160
+ var BUSINESS = [
161
+ "github.",
162
+ "gitlab.",
163
+ "bitbucket.",
164
+ "atlassian.",
165
+ "jira.",
166
+ "confluence.",
167
+ "notion.",
168
+ "linear.",
169
+ "slack.",
170
+ "zoom.",
171
+ "figma.",
172
+ "miro.",
173
+ "vercel.",
174
+ "netlify.",
175
+ "heroku.",
176
+ "datadog",
177
+ "sentry.",
178
+ "grafana.",
179
+ "pagerduty.",
180
+ "aws.amazon.",
181
+ "console.cloud.google",
182
+ "portal.azure",
183
+ "cloudflare.",
184
+ "hubspot.",
185
+ "salesforce.",
186
+ "stripe.",
187
+ "twilio.",
188
+ "sendgrid.",
189
+ "mailchimp.",
190
+ "segment.",
191
+ "mixpanel.",
192
+ "amplitude.",
193
+ "looker.",
194
+ "tableau.",
195
+ "snowflake.",
196
+ "databricks.",
197
+ "mongodb.",
198
+ "redis.",
199
+ "elastic.",
200
+ "openai.",
201
+ "anthropic.",
202
+ "huggingface.",
203
+ "docker.",
204
+ "npmjs.",
205
+ "pypi.",
206
+ "circleci.",
207
+ "travis-ci.",
208
+ "jenkins.",
209
+ "terraform.",
210
+ "hashicorp.",
211
+ "okta.",
212
+ "auth0.",
213
+ "1password.",
214
+ "asana.",
215
+ "trello.",
216
+ "monday."
217
+ ];
218
+ function classify(hostname) {
219
+ const h = hostname.toLowerCase();
220
+ if (isPersonalHost(h)) return null;
221
+ if (BUSINESS.some((b) => h.includes(b))) return { type: "saas_tool", confidence: 0.7 };
222
+ if (/^\d+\.\d+\.\d+\.\d+$/.test(h) || /\.(internal|local|corp|lan)\b/.test(h)) {
223
+ return { type: "web_service", confidence: 0.6 };
224
+ }
225
+ return null;
226
+ }
227
+ var bookmarksScanner = {
228
+ id: "bookmarks",
229
+ title: "Browser bookmarks",
230
+ platforms: "all",
231
+ detect: () => true,
232
+ async scan(ctx) {
233
+ const hosts = await (ctx.scanBookmarks ?? scanAllBookmarks)();
234
+ const seen = /* @__PURE__ */ new Set();
235
+ const nodes = [];
236
+ for (const host of hosts) {
237
+ const klass = classify(host.hostname);
238
+ if (!klass) continue;
239
+ const id = `${klass.type}:${host.hostname}`;
240
+ if (seen.has(id)) continue;
241
+ seen.add(id);
242
+ nodes.push({
243
+ id,
244
+ type: klass.type,
245
+ name: host.hostname,
246
+ discoveredVia: "bookmark",
247
+ confidence: klass.confidence,
248
+ tags: ["bookmark"],
249
+ metadata: { protocol: host.protocol, ...host.port ? { port: host.port } : {} }
250
+ });
251
+ }
252
+ return { nodes, edges: [] };
253
+ }
254
+ };
255
+
256
+ // src/scanners/installed-apps.ts
257
+ var KNOWN_TOOLS = {
258
+ ide: ["code", "code-insiders", "cursor", "windsurf", "zed", "nvim", "vim", "emacs", "idea", "webstorm", "pycharm", "goland", "datagrip", "clion", "rider", "phpstorm"],
259
+ "dev-tool": ["git", "gh", "docker", "docker-compose", "podman", "kubectl", "helm", "terraform", "ansible", "vagrant", "packer", "consul", "vault", "nomad"],
260
+ runtime: ["node", "npm", "pnpm", "yarn", "bun", "deno", "python", "python3", "pip", "poetry", "ruby", "rails", "java", "mvn", "gradle", "go", "cargo", "rustc", "php", "composer", "dotnet"],
261
+ database: ["psql", "mysql", "mongosh", "redis-cli", "sqlite3", "clickhouse-client"],
262
+ cloud: ["aws", "gcloud", "az", "heroku", "fly", "vercel", "netlify", "wrangler", "supabase"],
263
+ browser: ["google-chrome", "chromium", "firefox", "brave", "opera"],
264
+ observability: ["prometheus", "grafana-cli", "datadog-agent", "newrelic-agent"]
265
+ };
266
+ var installedAppsScanner = {
267
+ id: "installed-apps",
268
+ title: "Installed apps & developer tools",
269
+ platforms: "all",
270
+ allowedCommands: ["which", "command", "Get-Command"],
271
+ detect: () => true,
272
+ async scan(ctx) {
273
+ const nodes = [];
274
+ const hintTerms = (ctx.hint ?? "").toLowerCase().split(/[\s,]+/).filter(Boolean);
275
+ for (const [category, tools] of Object.entries(KNOWN_TOOLS)) {
276
+ for (const tool of tools) {
277
+ const path = (ctx.commandExists ?? commandExists)(tool);
278
+ if (!path) continue;
279
+ const boosted = hintTerms.some((t) => tool.includes(t));
280
+ nodes.push({
281
+ id: `saas_tool:${tool}`,
282
+ type: "saas_tool",
283
+ name: tool,
284
+ discoveredVia: "installed-app",
285
+ confidence: boosted ? 0.95 : 0.9,
286
+ tags: [category],
287
+ metadata: { category, path }
288
+ });
289
+ }
290
+ }
291
+ return { nodes, edges: [] };
292
+ }
293
+ };
294
+
295
+ // src/scanners/ports.ts
296
+ var PORT_MAP = {
297
+ 5432: { type: "database_server", service: "postgresql" },
298
+ 3306: { type: "database_server", service: "mysql" },
299
+ 1433: { type: "database_server", service: "sqlserver" },
300
+ 27017: { type: "database_server", service: "mongodb" },
301
+ 9200: { type: "database_server", service: "elasticsearch" },
302
+ 6379: { type: "cache_server", service: "redis" },
303
+ 11211: { type: "cache_server", service: "memcached" },
304
+ 9092: { type: "message_broker", service: "kafka" },
305
+ 5672: { type: "message_broker", service: "rabbitmq" },
306
+ 4222: { type: "message_broker", service: "nats" },
307
+ 9090: { type: "web_service", service: "prometheus" },
308
+ 3e3: { type: "web_service", service: "http-app" },
309
+ 8080: { type: "web_service", service: "http-app" },
310
+ 8e3: { type: "web_service", service: "http-app" },
311
+ 80: { type: "web_service", service: "http" },
312
+ 443: { type: "web_service", service: "https" },
313
+ 8200: { type: "web_service", service: "vault" },
314
+ 8500: { type: "web_service", service: "consul" },
315
+ 2379: { type: "web_service", service: "etcd" },
316
+ 5601: { type: "web_service", service: "kibana" },
317
+ 15672: { type: "web_service", service: "rabbitmq-management" }
318
+ };
319
+ function extractListeningPorts(raw) {
320
+ const ports = /* @__PURE__ */ new Set();
321
+ for (const m of raw.matchAll(/[:.](\d{2,5})\b/g)) {
322
+ const p = Number(m[1]);
323
+ if (p in PORT_MAP) ports.add(p);
324
+ }
325
+ return [...ports];
326
+ }
327
+ var portsScanner = {
328
+ id: "local-ports",
329
+ title: "Local listening ports",
330
+ platforms: "all",
331
+ allowedCommands: ["ss", "lsof", "Get-NetTCPConnection"],
332
+ detect: () => true,
333
+ async scan(ctx) {
334
+ const raw = (ctx.scanListeningPorts ?? scanListeningPorts)();
335
+ const nodes = [];
336
+ for (const port of extractListeningPorts(raw)) {
337
+ const { type, service } = PORT_MAP[port];
338
+ nodes.push({
339
+ id: `${type}:localhost:${port}`,
340
+ type,
341
+ name: `${service} (:${port})`,
342
+ discoveredVia: "listening-port",
343
+ confidence: 0.9,
344
+ tags: ["local", service],
345
+ metadata: { port, service, host: "localhost" }
346
+ });
347
+ }
348
+ return { nodes, edges: [] };
349
+ }
350
+ };
351
+
352
+ // src/scanners/confidence.ts
353
+ var CONFIDENCE = {
354
+ "established-connection": 0.85,
355
+ "config-declared": 0.7,
356
+ "connection-string": 0.6,
357
+ "co-location": 0.4
358
+ };
359
+ function evidenceLine(kind, detail) {
360
+ return `[${kind}] ${detail} @ ${(/* @__PURE__ */ new Date()).toISOString()}`;
361
+ }
362
+
363
+ // src/scanners/connections.ts
364
+ var PORT_RE = /^\d{1,5}$/;
365
+ function splitHostPort(token) {
366
+ const t = token.trim();
367
+ if (!t) return null;
368
+ const idx = t.lastIndexOf(":");
369
+ if (idx <= 0) return null;
370
+ const host = t.slice(0, idx).replace(/^\[|\]$/g, "");
371
+ const portStr = t.slice(idx + 1);
372
+ if (!PORT_RE.test(portStr)) return null;
373
+ const port = Number(portStr);
374
+ if (port < 1 || port > 65535) return null;
375
+ return { host, port };
376
+ }
377
+ function parseEstablished(raw) {
378
+ const out = [];
379
+ const seen = /* @__PURE__ */ new Set();
380
+ const push = (local, remote) => {
381
+ const key = `${local.port}->${remote.host}:${remote.port}`;
382
+ if (seen.has(key)) return;
383
+ seen.add(key);
384
+ out.push({ localPort: local.port, remoteHost: remote.host, remotePort: remote.port });
385
+ };
386
+ for (const line of raw.split(/\r?\n/)) {
387
+ const l = line.trim();
388
+ if (!l) continue;
389
+ const arrow = l.match(/([^\s]+:\d{1,5})\s*->\s*([^\s(]+:\d{1,5})/);
390
+ if (arrow) {
391
+ const local = splitHostPort(arrow[1]);
392
+ const remote = splitHostPort(arrow[2]);
393
+ if (local && remote) push(local, remote);
394
+ continue;
395
+ }
396
+ if (/^ESTAB\b/i.test(l) || /\bESTAB\b/i.test(l)) {
397
+ const cols = l.split(/\s+/).filter(Boolean);
398
+ const hostPorts = cols.map((c) => splitHostPort(c)).filter((x) => x !== null);
399
+ if (hostPorts.length >= 2) {
400
+ const local = hostPorts[hostPorts.length - 2];
401
+ const remote = hostPorts[hostPorts.length - 1];
402
+ push(local, remote);
403
+ }
404
+ }
405
+ }
406
+ return out;
407
+ }
408
+ var connectionsScanner = {
409
+ id: "local-connections",
410
+ title: "Local established connections",
411
+ platforms: "all",
412
+ allowedCommands: ["ss", "lsof", "netstat", "Get-NetTCPConnection"],
413
+ detect: () => true,
414
+ async scan(ctx) {
415
+ const conns = parseEstablished((ctx.scanEstablishedConnections ?? scanEstablishedConnections)());
416
+ const nodes = [];
417
+ const edges = [];
418
+ if (conns.length === 0) return { nodes, edges };
419
+ const selfId = "host:localhost";
420
+ nodes.push({
421
+ id: selfId,
422
+ type: "host",
423
+ name: "localhost",
424
+ discoveredVia: "established-connection",
425
+ confidence: 0.9,
426
+ metadata: { host: "localhost" },
427
+ tags: ["local"]
428
+ });
429
+ const seenEdge = /* @__PURE__ */ new Set();
430
+ for (const c of conns) {
431
+ const svc = PORT_MAP[c.remotePort];
432
+ if (!svc) continue;
433
+ const targetId = `${svc.type}:localhost:${c.remotePort}`;
434
+ if (seenEdge.has(targetId)) continue;
435
+ seenEdge.add(targetId);
436
+ edges.push({
437
+ sourceId: selfId,
438
+ targetId,
439
+ relationship: "connects_to",
440
+ evidence: evidenceLine("established-connection", `${c.localPort} -> ${c.remoteHost}:${c.remotePort}`),
441
+ confidence: CONFIDENCE["established-connection"]
442
+ });
443
+ }
444
+ return { nodes, edges };
445
+ }
446
+ };
447
+
448
+ // src/scanners/service-config.ts
449
+ var CONN_VAR_RE = /^(DATABASE_URL|[A-Z0-9_]*_DATABASE_URL|REDIS_URL|[A-Z0-9_]*_REDIS_URL|MONGO_URL|MONGODB_URI|AMQP_URL|[A-Z0-9_]*_URL)$/;
450
+ var SCHEME_PORT = {
451
+ postgres: 5432,
452
+ postgresql: 5432,
453
+ mysql: 3306,
454
+ mongodb: 27017,
455
+ redis: 6379,
456
+ amqp: 5672,
457
+ kafka: 9092,
458
+ elasticsearch: 9200
459
+ };
460
+ function configDirs() {
461
+ const dirs = [process.cwd()];
462
+ if (!IS_WIN) dirs.push("/etc/nginx", "/etc/nginx/conf.d");
463
+ return dirs;
464
+ }
465
+ function redactConnectionString(url) {
466
+ return url.replace(/(\b[a-z][a-z0-9+.-]*:\/\/)([^/\s]+)@/i, (_m, scheme) => `${scheme}****@`).replace(/\b(password|pwd)=([^&\s]+)/gi, "$1=****");
467
+ }
468
+ function parseNginxUpstreams(raw) {
469
+ const out = [];
470
+ const seen = /* @__PURE__ */ new Set();
471
+ const add = (host, port) => {
472
+ const key = `${host}:${port}`;
473
+ if (seen.has(key)) return;
474
+ seen.add(key);
475
+ out.push({ host, port });
476
+ };
477
+ for (const m of raw.matchAll(/\bserver\s+([a-zA-Z0-9_.-]+):(\d{1,5})/g)) add(m[1], Number(m[2]));
478
+ for (const m of raw.matchAll(/\bproxy_pass\s+https?:\/\/([a-zA-Z0-9_.-]+):(\d{1,5})/g)) add(m[1], Number(m[2]));
479
+ return out;
480
+ }
481
+ function parseComposeDeps(raw) {
482
+ const lines = raw.split(/\r?\n/);
483
+ const result = [];
484
+ let inServices = false;
485
+ let servicesIndent = -1;
486
+ let current = null;
487
+ let serviceIndent = -1;
488
+ let inDeps = false;
489
+ let depsIndent = -1;
490
+ const indentOf = (s) => s.length - s.trimStart().length;
491
+ for (const rawLine of lines) {
492
+ if (!rawLine.trim() || rawLine.trim().startsWith("#")) continue;
493
+ const indent = indentOf(rawLine);
494
+ const trimmed = rawLine.trim();
495
+ if (/^services:\s*$/.test(trimmed)) {
496
+ inServices = true;
497
+ servicesIndent = indent;
498
+ continue;
499
+ }
500
+ if (!inServices) continue;
501
+ if (indent <= servicesIndent && !/^services:/.test(trimmed)) {
502
+ inServices = false;
503
+ current = null;
504
+ inDeps = false;
505
+ continue;
506
+ }
507
+ const svcMatch = trimmed.match(/^([a-zA-Z0-9._-]+):\s*$/);
508
+ const isServiceHeader = !!svcMatch && indent > servicesIndent && (serviceIndent === -1 || indent === serviceIndent);
509
+ if (isServiceHeader) {
510
+ current = { service: svcMatch[1], dependsOn: [] };
511
+ result.push(current);
512
+ serviceIndent = indent;
513
+ inDeps = false;
514
+ continue;
515
+ }
516
+ if (!current) continue;
517
+ if (/^depends_on:/.test(trimmed)) {
518
+ inDeps = true;
519
+ depsIndent = indent;
520
+ const inline = trimmed.match(/^depends_on:\s*\[(.*)\]\s*$/);
521
+ if (inline) {
522
+ for (const dep of inline[1].split(",").map((d) => d.trim().replace(/['"]/g, "")).filter(Boolean)) {
523
+ current.dependsOn.push(dep);
524
+ }
525
+ inDeps = false;
526
+ }
527
+ continue;
528
+ }
529
+ if (inDeps) {
530
+ if (indent <= depsIndent) {
531
+ inDeps = false;
532
+ } else {
533
+ const listItem = trimmed.match(/^-\s*([a-zA-Z0-9._-]+)\s*$/);
534
+ const mapItem = trimmed.match(/^([a-zA-Z0-9._-]+):\s*$/);
535
+ const dep = listItem?.[1] ?? mapItem?.[1];
536
+ if (dep) {
537
+ if (!current.dependsOn.includes(dep)) current.dependsOn.push(dep);
538
+ continue;
539
+ }
540
+ }
541
+ }
542
+ }
543
+ return result;
544
+ }
545
+ function parseConnectionString(name, url) {
546
+ const m = url.match(/^([a-z][a-z0-9+.-]*):\/\//i);
547
+ if (!m) return null;
548
+ const scheme = m[1].toLowerCase();
549
+ if (!(scheme in SCHEME_PORT)) return null;
550
+ const portMatch = url.match(/:\/\/[^/]*?:(\d{1,5})(?:[/?]|$)/);
551
+ const port = portMatch ? Number(portMatch[1]) : SCHEME_PORT[scheme];
552
+ if (!port || !(port in PORT_MAP)) return null;
553
+ const relationship = scheme === "amqp" || scheme === "kafka" ? "depends_on" : "reads_from";
554
+ const redacted = redactConnectionString(url);
555
+ return {
556
+ relationship,
557
+ service: PORT_MAP[port].service,
558
+ port,
559
+ evidence: evidenceLine("connection-string", `${name}=${redacted}`)
560
+ };
561
+ }
562
+ function hasConnVar(env) {
563
+ for (const line of env.split(/\r?\n/)) {
564
+ const eq = line.indexOf("=");
565
+ if (eq <= 0) continue;
566
+ const name = line.slice(0, eq).trim();
567
+ if (!CONN_VAR_RE.test(name)) continue;
568
+ if (parseConnectionString(name, line.slice(eq + 1).trim())) return true;
569
+ }
570
+ return false;
571
+ }
572
+ var serviceConfigScanner = {
573
+ id: "service-config",
574
+ title: "Service & reverse-proxy config",
575
+ platforms: "all",
576
+ allowedCommands: ["cat", "grep", "find", "printenv", "ls", "Get-ChildItem"],
577
+ detect(ctx) {
578
+ const files = (ctx.findFiles ?? findFiles)(configDirs(), ["*.conf", "nginx.conf", "docker-compose.y*ml", "compose.y*ml"], 4, 50);
579
+ if (files.trim().length > 0) return true;
580
+ return hasConnVar(ctx.run("printenv"));
581
+ },
582
+ async scan(ctx) {
583
+ const nodes = [];
584
+ const edges = [];
585
+ const selfId = "host:localhost";
586
+ let selfEmitted = false;
587
+ const emitSelf = () => {
588
+ if (selfEmitted) return;
589
+ selfEmitted = true;
590
+ nodes.push({ id: selfId, type: "host", name: "localhost", discoveredVia: "service-config", confidence: 0.9, metadata: { host: "localhost" }, tags: ["local"] });
591
+ };
592
+ for (const line of ctx.run("printenv").split(/\r?\n/)) {
593
+ const eq = line.indexOf("=");
594
+ if (eq <= 0) continue;
595
+ const name = line.slice(0, eq).trim();
596
+ const value = line.slice(eq + 1).trim();
597
+ if (!CONN_VAR_RE.test(name)) continue;
598
+ const parsed = parseConnectionString(name, value);
599
+ if (!parsed) continue;
600
+ emitSelf();
601
+ edges.push({
602
+ sourceId: selfId,
603
+ targetId: `${PORT_MAP[parsed.port].type}:localhost:${parsed.port}`,
604
+ relationship: parsed.relationship,
605
+ evidence: parsed.evidence,
606
+ confidence: CONFIDENCE["connection-string"]
607
+ });
608
+ }
609
+ const findFilesFn = ctx.findFiles ?? findFiles;
610
+ const nginxFiles = findFilesFn(configDirs(), ["*.conf", "nginx.conf"], 4, 50).split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
611
+ for (const file of nginxFiles) {
612
+ const content = ctx.run(`cat "${file}"`);
613
+ if (!content) continue;
614
+ for (const { host, port } of parseNginxUpstreams(content)) {
615
+ if (!(port in PORT_MAP)) continue;
616
+ emitSelf();
617
+ edges.push({
618
+ sourceId: selfId,
619
+ targetId: `${PORT_MAP[port].type}:localhost:${port}`,
620
+ relationship: "depends_on",
621
+ evidence: evidenceLine("config-declared", `nginx ${file}: ${host}:${port}`),
622
+ confidence: CONFIDENCE["config-declared"]
623
+ });
624
+ }
625
+ }
626
+ const composeFiles = findFilesFn(configDirs(), ["docker-compose.y*ml", "compose.y*ml"], 4, 50).split(/\r?\n/).map((s) => s.trim()).filter(Boolean);
627
+ for (const file of composeFiles) {
628
+ const content = ctx.run(`cat "${file}"`);
629
+ if (!content) continue;
630
+ for (const { service, dependsOn } of parseComposeDeps(content)) {
631
+ if (dependsOn.length === 0) continue;
632
+ const srcId = `container:${service}`;
633
+ nodes.push({ id: srcId, type: "container", name: service, discoveredVia: "service-config", confidence: 0.7, metadata: { compose: file }, tags: ["compose"] });
634
+ for (const dep of dependsOn) {
635
+ const depId = `container:${dep}`;
636
+ nodes.push({ id: depId, type: "container", name: dep, discoveredVia: "service-config", confidence: 0.7, metadata: { compose: file }, tags: ["compose"] });
637
+ edges.push({
638
+ sourceId: srcId,
639
+ targetId: depId,
640
+ relationship: "depends_on",
641
+ evidence: evidenceLine("config-declared", `compose ${file}: ${service} depends_on ${dep}`),
642
+ confidence: CONFIDENCE["config-declared"]
643
+ });
644
+ }
645
+ }
646
+ }
647
+ return { nodes, edges };
648
+ }
649
+ };
650
+
651
+ // src/scanners/registry.ts
652
+ function defaultRegistry() {
653
+ return new ScannerRegistry().register(bookmarksScanner).register(installedAppsScanner).register(portsScanner).register(cloudAwsScanner).register(cloudGcpScanner).register(cloudAzureScanner).register(k8sScanner).register(databasesScanner).register(connectionsScanner).register(serviceConfigScanner);
654
+ }
655
+
656
+ // src/scanners/loader.ts
657
+ async function loadPlugins(registry, pkgs) {
658
+ const loaded = [];
659
+ for (const pkg of pkgs) {
660
+ try {
661
+ const mod = await import(pkg);
662
+ const plugin = mod.default;
663
+ if (!plugin || typeof plugin.register !== "function") {
664
+ logWarn("scanner plugin missing default export", { pkg });
665
+ continue;
666
+ }
667
+ const api = {
668
+ registerScanner: (scanner) => {
669
+ registry.registerExternal(pkg, scanner);
670
+ }
671
+ };
672
+ plugin.register(api);
673
+ loaded.push(pkg);
674
+ logInfo("scanner plugin loaded", { pkg, name: plugin.name });
675
+ } catch (err) {
676
+ logWarn("scanner plugin load failed (skipped)", {
677
+ pkg,
678
+ error: err instanceof Error ? err.message : String(err)
679
+ });
680
+ }
681
+ }
682
+ return loaded;
683
+ }
684
+
685
+ // src/discovery/local.ts
686
+ function projectScan(sessionId, nodes, edges) {
687
+ const ts = (/* @__PURE__ */ new Date()).toISOString();
688
+ return {
689
+ nodes: nodes.map((n) => ({
690
+ ...n,
691
+ sessionId,
692
+ discoveredAt: ts,
693
+ depth: 0,
694
+ tags: n.tags ?? [],
695
+ metadata: n.metadata ?? {}
696
+ })),
697
+ edges: edges.map((e) => ({ ...e, id: "", sessionId, discoveredAt: ts }))
698
+ };
699
+ }
700
+ async function runLocalDiscovery(db, sessionId, opts = {}) {
701
+ const registry = opts.registry ?? defaultRegistry();
702
+ if (!opts.registry && opts.plugins && opts.plugins.length > 0) {
703
+ await loadPlugins(registry, opts.plugins);
704
+ }
705
+ const ctx = { hint: opts.hint, platform: PLATFORM, run, commandExists, scanListeningPorts, scanEstablishedConnections, findFiles, scanBookmarks: scanAllBookmarks, ...opts.ctx };
706
+ const nodes = /* @__PURE__ */ new Map();
707
+ const edges = [];
708
+ const ran = [];
709
+ for (const scanner of registry.forPlatform(PLATFORM)) {
710
+ try {
711
+ if (!await scanner.detect(ctx)) continue;
712
+ const result = await scanner.scan(ctx);
713
+ ran.push(scanner.id);
714
+ for (const node of result.nodes) {
715
+ const prev = nodes.get(node.id);
716
+ if (!prev || node.confidence > prev.confidence) nodes.set(node.id, node);
717
+ }
718
+ edges.push(...result.edges);
719
+ opts.onProgress?.(`${scanner.title}: +${result.nodes.length} nodes`);
720
+ } catch (err) {
721
+ opts.onProgress?.(`${scanner.title}: failed (${err instanceof Error ? err.message : String(err)})`);
722
+ }
723
+ }
724
+ const session = db.getSession(sessionId);
725
+ const baseAttribution = session?.machineId ? {
726
+ machineId: session.machineId,
727
+ hostname: session.hostname ?? "unknown-host",
728
+ user: session.user ?? "unknown-user",
729
+ organization: session.tenant,
730
+ at: (/* @__PURE__ */ new Date()).toISOString()
731
+ } : void 0;
732
+ const validEdges = edges.filter((e) => nodes.has(e.sourceId) && nodes.has(e.targetId));
733
+ if (opts.mode === "update") {
734
+ const prior = { nodes: db.getNodes(sessionId), edges: db.getEdges(sessionId) };
735
+ const current = projectScan(sessionId, [...nodes.values()], validEdges);
736
+ const delta = diffTopology(prior, current);
737
+ db.applyTopologyDelta(
738
+ sessionId,
739
+ delta,
740
+ baseAttribution ? { ...baseAttribution, confidence: 0.5 } : void 0
741
+ );
742
+ return { nodes: current.nodes.length, edges: current.edges.length, scanners: ran, delta };
743
+ }
744
+ for (const node of nodes.values()) {
745
+ db.upsertNode(
746
+ sessionId,
747
+ node,
748
+ 0,
749
+ baseAttribution ? { ...baseAttribution, confidence: node.confidence } : void 0
750
+ );
751
+ }
752
+ for (const edge of validEdges) db.insertEdge(sessionId, edge);
753
+ return { nodes: nodes.size, edges: validEdges.length, scanners: ran };
754
+ }
755
+ function localDiscoveryFn(registry, plugins) {
756
+ return async (db, sessionId, opts) => {
757
+ const r = await runLocalDiscovery(db, sessionId, { hint: opts.hint, mode: opts.mode, registry, plugins });
758
+ return { nodes: r.nodes, edges: r.edges, ...r.delta ? { delta: r.delta } : {} };
759
+ };
760
+ }
761
+
762
+ // src/compliance/rulesets/baseline.ts
763
+ var baseline = RulesetSchema.parse({
764
+ name: "baseline",
765
+ version: "1.0.0",
766
+ framework: "baseline",
767
+ description: "Deterministic baseline hygiene controls scored against signals available today.",
768
+ rules: [
769
+ {
770
+ id: "BASE-1",
771
+ control: "BASE-1",
772
+ framework: "baseline",
773
+ severity: "medium",
774
+ title: "Asset has an owner",
775
+ rationale: "Every asset should have a clear owning team/person for accountability.",
776
+ scope: {},
777
+ check: { any: [
778
+ { field: "owner", op: "present" },
779
+ { field: "domain", op: "present" },
780
+ { field: "tags", op: "matches", pattern: "owner_key" },
781
+ { field: "metadataKeys", op: "matches", pattern: "owner_key" }
782
+ ] }
783
+ },
784
+ {
785
+ id: "BASE-2",
786
+ control: "BASE-2",
787
+ framework: "baseline",
788
+ severity: "low",
789
+ title: "Service/data asset is assigned a business domain",
790
+ rationale: "Domain assignment enables blast-radius and ownership analysis.",
791
+ scope: { groups: ["data", "web"] },
792
+ check: { field: "domain", op: "present" }
793
+ },
794
+ {
795
+ id: "BASE-3",
796
+ control: "BASE-3",
797
+ framework: "baseline",
798
+ severity: "high",
799
+ title: "Critical asset is discovered with adequate confidence",
800
+ rationale: "Low-confidence critical assets indicate incomplete or unreliable discovery.",
801
+ scope: { groups: ["data", "infra"] },
802
+ check: { field: "confidence", op: "gte", value: 0.5 }
803
+ },
804
+ {
805
+ id: "BASE-4",
806
+ control: "BASE-4",
807
+ framework: "baseline",
808
+ severity: "critical",
809
+ title: "No embedded credentials / plaintext DSN in metadata",
810
+ rationale: "A connection string with embedded credentials is a direct secret-exposure risk.",
811
+ scope: {},
812
+ check: { not: { field: "metadataValues", op: "matches", pattern: "dsn_with_credentials" } }
813
+ },
814
+ {
815
+ id: "BASE-5",
816
+ control: "BASE-5",
817
+ framework: "baseline",
818
+ severity: "medium",
819
+ title: "Data store carries an acceptable quality score",
820
+ rationale: "Where a quality score exists, a low score flags an under-governed data store.",
821
+ scope: { groups: ["data"] },
822
+ applicableWhen: { field: "qualityScore", op: "present" },
823
+ check: { field: "qualityScore", op: "gte", value: 50 }
824
+ }
825
+ ]
826
+ });
827
+
828
+ // src/compliance/rulesets/cis.ts
829
+ var cis = RulesetSchema.parse({
830
+ name: "cis",
831
+ version: "0.1.0",
832
+ framework: "CIS",
833
+ description: "CIS starter subset \u2014 illustrative controls, not a certified benchmark.",
834
+ rules: [
835
+ {
836
+ id: "CIS-1.1",
837
+ control: "CIS-1.1",
838
+ framework: "CIS",
839
+ severity: "critical",
840
+ title: "No plaintext credentials in service configuration",
841
+ rationale: "CIS hardening forbids embedded secrets in config/metadata.",
842
+ scope: {},
843
+ check: { not: { field: "metadataValues", op: "matches", pattern: "dsn_with_credentials" } }
844
+ },
845
+ {
846
+ id: "CIS-2.1",
847
+ control: "CIS-2.1",
848
+ framework: "CIS",
849
+ severity: "high",
850
+ title: "Data store is not publicly exposed",
851
+ rationale: "Data stores should not bind to public/0.0.0.0 addresses.",
852
+ scope: { groups: ["data"] },
853
+ check: { not: { field: "metadataValues", op: "matches", pattern: "public_exposure" } }
854
+ },
855
+ {
856
+ id: "CIS-3.1",
857
+ control: "CIS-3.1",
858
+ framework: "CIS",
859
+ severity: "medium",
860
+ title: "Asset inventory has an accountable owner",
861
+ rationale: "CIS asset management requires an assigned owner.",
862
+ scope: {},
863
+ check: { any: [{ field: "owner", op: "present" }, { field: "metadataKeys", op: "matches", pattern: "owner_key" }] }
864
+ }
865
+ ]
866
+ });
867
+
868
+ // src/compliance/rulesets/soc2.ts
869
+ var soc2 = RulesetSchema.parse({
870
+ name: "soc2",
871
+ version: "0.1.0",
872
+ framework: "SOC2",
873
+ description: "SOC 2 starter subset \u2014 illustrative controls, not a certified control set.",
874
+ rules: [
875
+ {
876
+ id: "CC6.1",
877
+ control: "CC6.1",
878
+ framework: "SOC2",
879
+ severity: "critical",
880
+ title: "Logical access \u2014 no embedded credentials",
881
+ rationale: "CC6.1 logical access controls preclude plaintext secrets in config.",
882
+ scope: {},
883
+ check: { not: { field: "metadataValues", op: "matches", pattern: "dsn_with_credentials" } }
884
+ },
885
+ {
886
+ id: "CC6.6",
887
+ control: "CC6.6",
888
+ framework: "SOC2",
889
+ severity: "high",
890
+ title: "Boundary protection \u2014 data stores not public",
891
+ rationale: "CC6.6 requires protection of system boundaries from external access.",
892
+ scope: { groups: ["data"] },
893
+ check: { not: { field: "metadataValues", op: "matches", pattern: "public_exposure" } }
894
+ },
895
+ {
896
+ id: "CC1.2",
897
+ control: "CC1.2",
898
+ framework: "SOC2",
899
+ severity: "medium",
900
+ title: "Accountability \u2014 asset has an owner",
901
+ rationale: "CC1.2 establishes accountability; assets need an assigned owner.",
902
+ scope: {},
903
+ check: { any: [{ field: "owner", op: "present" }, { field: "domain", op: "present" }] }
904
+ }
905
+ ]
906
+ });
907
+
908
+ // src/compliance/rulesets/iso27001.ts
909
+ var iso27001 = RulesetSchema.parse({
910
+ name: "iso27001",
911
+ version: "0.1.0",
912
+ framework: "ISO27001",
913
+ description: "ISO/IEC 27001 Annex A starter subset \u2014 illustrative, not certified.",
914
+ rules: [
915
+ {
916
+ id: "A.8.1",
917
+ control: "A.8.1",
918
+ framework: "ISO27001",
919
+ severity: "medium",
920
+ title: "Inventory of assets \u2014 ownership assigned",
921
+ rationale: "A.8.1 requires an inventory of assets each with an identified owner.",
922
+ scope: {},
923
+ check: { any: [{ field: "owner", op: "present" }, { field: "metadataKeys", op: "matches", pattern: "owner_key" }] }
924
+ },
925
+ {
926
+ id: "A.8.12",
927
+ control: "A.8.12",
928
+ framework: "ISO27001",
929
+ severity: "critical",
930
+ title: "Data leakage prevention \u2014 no embedded secrets",
931
+ rationale: "A.8.12 mandates measures against data leakage such as exposed credentials.",
932
+ scope: {},
933
+ check: { not: { field: "metadataValues", op: "matches", pattern: "dsn_with_credentials" } }
934
+ },
935
+ {
936
+ id: "A.8.20",
937
+ control: "A.8.20",
938
+ framework: "ISO27001",
939
+ severity: "high",
940
+ title: "Network security \u2014 data stores not publicly exposed",
941
+ rationale: "A.8.20 requires networks to be secured; data stores should not be public.",
942
+ scope: { groups: ["data"] },
943
+ check: { not: { field: "metadataValues", op: "matches", pattern: "public_exposure" } }
944
+ }
945
+ ]
946
+ });
947
+
948
+ // src/compliance/rulesets/registry.ts
949
+ var RULESETS = { baseline, cis, soc2, iso27001 };
950
+ function getRuleset(name) {
951
+ return RULESETS[name];
952
+ }
953
+ function listRulesets() {
954
+ return Object.values(RULESETS).map((r) => ({ name: r.name, version: r.version, framework: r.framework, ruleCount: r.rules.length }));
955
+ }
956
+
957
+ // src/sinks/stdout.ts
958
+ var StdoutSink = class {
959
+ name = "stdout";
960
+ async emit(alert) {
961
+ process.stdout.write(JSON.stringify(redactValue(alert)) + "\n");
962
+ logInfo("drift alert emitted", { sink: this.name, severity: alert.severity, items: alert.items.length });
963
+ }
964
+ };
965
+
966
+ // src/sinks/webhook.ts
967
+ var LOOPBACK_HOSTS = /* @__PURE__ */ new Set(["localhost", "127.0.0.1", "[::1]", "::1"]);
968
+ function isSecureWebhookUrl(url, env = process.env) {
969
+ if (env.CARTOGRAPHY_ALLOW_INSECURE_SYNC === "1") return true;
970
+ let parsed;
971
+ try {
972
+ parsed = new URL(url);
973
+ } catch {
974
+ return false;
975
+ }
976
+ if (parsed.protocol === "https:") return true;
977
+ if (parsed.protocol === "http:" && LOOPBACK_HOSTS.has(parsed.hostname)) return true;
978
+ return false;
979
+ }
980
+ var WebhookSink = class {
981
+ constructor(opts) {
982
+ this.opts = opts;
983
+ }
984
+ name = "webhook";
985
+ async emit(alert) {
986
+ if (typeof fetch !== "function") {
987
+ logWarn("webhook sink unavailable: global fetch missing", { sink: this.name });
988
+ return;
989
+ }
990
+ const { url, token, timeoutMs } = this.opts;
991
+ if (!url) {
992
+ logWarn("webhook sink unavailable: no url configured", { sink: this.name });
993
+ return;
994
+ }
995
+ if (!isSecureWebhookUrl(url)) {
996
+ logWarn("webhook sink refused: insecure scheme (use https:// or a loopback host)", {
997
+ sink: this.name,
998
+ host: stripSensitive(url)
999
+ });
1000
+ return;
1001
+ }
1002
+ try {
1003
+ const res = await fetch(url, {
1004
+ method: "POST",
1005
+ headers: {
1006
+ "content-type": "application/json",
1007
+ ...token ? { authorization: `Bearer ${token}` } : {}
1008
+ },
1009
+ body: JSON.stringify(redactValue(alert)),
1010
+ signal: AbortSignal.timeout(timeoutMs ?? 1e4)
1011
+ });
1012
+ if (!res.ok) {
1013
+ logError("webhook sink failed", { sink: this.name, host: stripSensitive(url), status: res.status });
1014
+ }
1015
+ } catch (err) {
1016
+ logError("webhook sink failed", {
1017
+ sink: this.name,
1018
+ host: stripSensitive(url),
1019
+ reason: err instanceof Error ? err.message : String(err)
1020
+ });
1021
+ }
1022
+ }
1023
+ };
1024
+
1025
+ // src/sinks/index.ts
1026
+ function buildSinks(drift) {
1027
+ const configs = drift?.sinks && drift.sinks.length > 0 ? drift.sinks : [{ type: "stdout" }];
1028
+ const sinks = [];
1029
+ for (const s of configs) {
1030
+ if (s.type === "webhook") {
1031
+ if (!s.url) continue;
1032
+ sinks.push(new WebhookSink({
1033
+ url: s.url,
1034
+ token: s.token ?? process.env.CARTOGRAPHY_DRIFT_TOKEN,
1035
+ timeoutMs: s.timeoutMs
1036
+ }));
1037
+ } else {
1038
+ sinks.push(new StdoutSink());
1039
+ }
1040
+ }
1041
+ return sinks.length > 0 ? sinks : [new StdoutSink()];
1042
+ }
1043
+
1044
+ // src/drift.ts
1045
+ function maxSeverity(items) {
1046
+ let rank = 0;
1047
+ for (const it of items) {
1048
+ const r = SEVERITIES.indexOf(it.severity);
1049
+ if (r > rank) rank = r;
1050
+ }
1051
+ return SEVERITIES[rank];
1052
+ }
1053
+ var SECURITY_KEY_SET = new Set(SECURITY_METADATA_KEYS);
1054
+ function securityRelevantChange(change) {
1055
+ if (!change.changedFields.includes("metadata")) return [];
1056
+ const before = change.before.metadata ?? {};
1057
+ const after = change.after.metadata ?? {};
1058
+ const keys = /* @__PURE__ */ new Set([...Object.keys(before), ...Object.keys(after)]);
1059
+ const triggered = [];
1060
+ for (const k of keys) {
1061
+ if (!SECURITY_KEY_SET.has(k.toLowerCase())) continue;
1062
+ if (stableStringify(before[k]) !== stableStringify(after[k])) {
1063
+ triggered.push(k);
1064
+ }
1065
+ }
1066
+ return triggered.sort();
1067
+ }
1068
+ var edgeRef = (sourceId, rel, targetId) => `${sourceId} -${rel}-> ${targetId}`;
1069
+ function classifyDrift(diff, now = /* @__PURE__ */ new Date()) {
1070
+ const items = [];
1071
+ for (const n of diff.nodes.added) {
1072
+ items.push({ kind: "node-added", ref: n.id, label: n.name, nodeType: n.type, severity: "info" });
1073
+ }
1074
+ for (const n of diff.nodes.removed) {
1075
+ items.push({ kind: "node-removed", ref: n.id, label: n.name, nodeType: n.type, severity: "warning" });
1076
+ }
1077
+ for (const c of diff.nodes.changed) {
1078
+ const securityFields = securityRelevantChange(c);
1079
+ const severity = securityFields.length > 0 ? "critical" : "info";
1080
+ items.push({
1081
+ kind: "node-changed",
1082
+ ref: c.id,
1083
+ label: c.after.name,
1084
+ nodeType: c.after.type,
1085
+ severity,
1086
+ changedFields: c.changedFields,
1087
+ ...securityFields.length > 0 ? { securityFields } : {}
1088
+ });
1089
+ }
1090
+ for (const e of diff.edges.added) {
1091
+ items.push({
1092
+ kind: "edge-added",
1093
+ ref: edgeRef(e.sourceId, e.relationship, e.targetId),
1094
+ label: `${e.sourceId} \u2192 ${e.targetId}`,
1095
+ severity: "info"
1096
+ });
1097
+ }
1098
+ for (const e of diff.edges.removed) {
1099
+ items.push({
1100
+ kind: "edge-removed",
1101
+ ref: edgeRef(e.sourceId, e.relationship, e.targetId),
1102
+ label: `${e.sourceId} \u2192 ${e.targetId}`,
1103
+ severity: "warning"
1104
+ });
1105
+ }
1106
+ return {
1107
+ base: diff.base,
1108
+ current: diff.current,
1109
+ summary: diff.summary,
1110
+ severity: maxSeverity(items),
1111
+ items,
1112
+ generatedAt: now.toISOString()
1113
+ };
1114
+ }
1115
+ function filterBySeverity(alert, min) {
1116
+ const minRank = SEVERITIES.indexOf(min);
1117
+ const items = alert.items.filter((it) => SEVERITIES.indexOf(it.severity) >= minRank);
1118
+ return { ...alert, items, severity: maxSeverity(items) };
1119
+ }
1120
+ async function runDrift(db, config, opts = {}) {
1121
+ const sessions = db.getSessions();
1122
+ const currentId = opts.current ?? sessions[0]?.id;
1123
+ const baseId = opts.base ?? sessions[1]?.id;
1124
+ if (!baseId || !currentId) {
1125
+ logInfo("drift: fewer than two sessions, skipping");
1126
+ return null;
1127
+ }
1128
+ if (baseId === currentId) {
1129
+ logWarn("drift: base and current session are the same, skipping", { session: currentId });
1130
+ return null;
1131
+ }
1132
+ const diff = db.diffSessions(baseId, currentId);
1133
+ let alert = classifyDrift(diff);
1134
+ alert = filterBySeverity(alert, opts.minSeverity ?? config.drift?.minSeverity ?? "info");
1135
+ const sinks = buildSinks(config.drift);
1136
+ const results = await Promise.allSettled(sinks.map((s) => s.emit(alert)));
1137
+ results.forEach((r, i) => {
1138
+ if (r.status === "rejected") {
1139
+ logError("drift sink rejected", {
1140
+ sink: sinks[i]?.name,
1141
+ reason: r.reason instanceof Error ? r.reason.message : String(r.reason)
1142
+ });
1143
+ }
1144
+ });
1145
+ db.insertEvent(currentId, {
1146
+ eventType: "drift_alert_dispatched",
1147
+ process: "drift",
1148
+ pid: process.pid,
1149
+ command: JSON.stringify(redactValue({
1150
+ base: baseId,
1151
+ current: currentId,
1152
+ severity: alert.severity,
1153
+ items: alert.items.length,
1154
+ sinks: sinks.map((s) => s.name)
1155
+ }))
1156
+ });
1157
+ return alert;
1158
+ }
1159
+
1160
+ // src/orgkey.ts
1161
+ import { randomBytes, hkdfSync } from "crypto";
1162
+ import { existsSync, mkdirSync, readFileSync, writeFileSync, statSync } from "fs";
1163
+ import { homedir } from "os";
1164
+ import { dirname, join } from "path";
1165
+ function orgKeyPath(home = homedir()) {
1166
+ return join(home, ".cartography", "org-key");
1167
+ }
1168
+ var KEY_BYTES = 32;
1169
+ function loadFileSecret(path) {
1170
+ if (existsSync(path)) {
1171
+ try {
1172
+ const mode = statSync(path).mode & 511;
1173
+ if (mode & 63) {
1174
+ logWarn("org-key file is not 0600 \u2014 restrict it: chmod 600 " + path);
1175
+ }
1176
+ } catch {
1177
+ }
1178
+ const hex = readFileSync(path, "utf8").trim();
1179
+ const buf = Buffer.from(hex, "hex");
1180
+ if (buf.length === KEY_BYTES) return buf;
1181
+ logWarn("org-key file was malformed \u2014 regenerating a fresh org key");
1182
+ }
1183
+ mkdirSync(dirname(path), { recursive: true });
1184
+ const secret = randomBytes(KEY_BYTES);
1185
+ writeFileSync(path, secret.toString("hex"), { mode: 384 });
1186
+ logInfo("created org key at ~/.cartography/org-key (mode 0600) \u2014 distribute to admins out-of-band");
1187
+ return secret;
1188
+ }
1189
+ function loadOrgKey(opts = {}) {
1190
+ const path = opts.keyPath ?? orgKeyPath();
1191
+ const secret = loadFileSecret(path);
1192
+ const info = opts.organization && opts.organization.trim() ? opts.organization.trim() : "default";
1193
+ return Buffer.from(hkdfSync("sha256", secret, Buffer.alloc(0), info, KEY_BYTES));
1194
+ }
1195
+ function rotateOrgKey(opts = {}) {
1196
+ const path = opts.keyPath ?? orgKeyPath();
1197
+ mkdirSync(dirname(path), { recursive: true });
1198
+ const secret = randomBytes(KEY_BYTES);
1199
+ writeFileSync(path, secret.toString("hex"), { mode: 384 });
1200
+ logWarn("org key rotated \u2014 pseudonym_reversal entries created under the previous key are now UNRECOVERABLE");
1201
+ const info = opts.organization && opts.organization.trim() ? opts.organization.trim() : "default";
1202
+ return Buffer.from(hkdfSync("sha256", secret, Buffer.alloc(0), info, KEY_BYTES));
1203
+ }
1204
+ function hmacKey(orgKey) {
1205
+ return Buffer.from(hkdfSync("sha256", orgKey, Buffer.from("cartography-hmac-v1"), "hmac", 32));
1206
+ }
1207
+ function reversalKey(orgKey) {
1208
+ return Buffer.from(hkdfSync("sha256", orgKey, Buffer.from("cartography-reversal-v1"), "reversal", 32));
1209
+ }
1210
+
1211
+ // src/anonymize.ts
1212
+ import { createHmac, createCipheriv, createDecipheriv, randomBytes as randomBytes2 } from "crypto";
1213
+ var PRIVATE_IP = /\b(?:10(?:\.\d{1,3}){3}|192\.168(?:\.\d{1,3}){2}|172\.(?:1[6-9]|2\d|3[01])(?:\.\d{1,3}){2})\b/g;
1214
+ var HOSTNAME = /\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}\b/gi;
1215
+ var POSIX_PATH = /(?:^|(?<=\s|=|:|"|'|\())(\/[A-Za-z0-9._-]+(?:\/[A-Za-z0-9._-]+)+)/g;
1216
+ var WIN_PATH = /\b[A-Za-z]:\\[A-Za-z0-9._\\-]+/g;
1217
+ var B32_ALPHABET = "abcdefghijklmnopqrstuvwxyz234567";
1218
+ function base32(buf) {
1219
+ let bits = 0;
1220
+ let value = 0;
1221
+ let out = "";
1222
+ for (const byte of buf) {
1223
+ value = value << 8 | byte;
1224
+ bits += 8;
1225
+ while (bits >= 5) {
1226
+ out += B32_ALPHABET[value >>> bits - 5 & 31];
1227
+ bits -= 5;
1228
+ }
1229
+ }
1230
+ if (bits > 0) out += B32_ALPHABET[value << 5 - bits & 31];
1231
+ return out;
1232
+ }
1233
+ function encryptPlaintext(plaintext, key) {
1234
+ const iv = randomBytes2(12);
1235
+ const cipher = createCipheriv("aes-256-gcm", key, iv);
1236
+ const ct = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final()]);
1237
+ const tag = cipher.getAuthTag();
1238
+ return Buffer.concat([iv, tag, ct]).toString("base64");
1239
+ }
1240
+ function decryptPlaintext(encoded, key) {
1241
+ try {
1242
+ const raw = Buffer.from(encoded, "base64");
1243
+ const iv = raw.subarray(0, 12);
1244
+ const tag = raw.subarray(12, 28);
1245
+ const ct = raw.subarray(28);
1246
+ const decipher = createDecipheriv("aes-256-gcm", key, iv);
1247
+ decipher.setAuthTag(tag);
1248
+ return Buffer.concat([decipher.update(ct), decipher.final()]).toString("utf8");
1249
+ } catch {
1250
+ return void 0;
1251
+ }
1252
+ }
1253
+ function pseudonymizeFragment(plaintext, kind, orgKey, db) {
1254
+ const digest = createHmac("sha256", hmacKey(orgKey)).update(plaintext).digest();
1255
+ const token = `anon:${kind}:${base32(digest.subarray(0, 12))}`;
1256
+ if (db) db.saveReversal(token, encryptPlaintext(plaintext, reversalKey(orgKey)));
1257
+ return token;
1258
+ }
1259
+ function pseudonymizeString(s, orgKey, db) {
1260
+ let out = s;
1261
+ out = out.replace(PRIVATE_IP, (m) => pseudonymizeFragment(m, "ip", orgKey, db));
1262
+ out = out.replace(WIN_PATH, (m) => pseudonymizeFragment(m, "path", orgKey, db));
1263
+ out = out.replace(POSIX_PATH, (m) => pseudonymizeFragment(m, "path", orgKey, db));
1264
+ out = out.replace(
1265
+ /\b([a-z0-9._-]+)@((?:[a-z0-9-]+\.)+[a-z]{2,})\b/gi,
1266
+ (_m, user, host) => `${pseudonymizeFragment(user, "user", orgKey, db)}@${pseudonymizeFragment(host, "host", orgKey, db)}`
1267
+ );
1268
+ out = out.replace(HOSTNAME, (m) => pseudonymizeFragment(m, "host", orgKey, db));
1269
+ return out;
1270
+ }
1271
+ function pseudonymize(value, orgKey, db) {
1272
+ if (typeof value === "string") return pseudonymizeString(value, orgKey, db);
1273
+ if (Array.isArray(value)) return value.map((v) => pseudonymize(v, orgKey, db));
1274
+ if (value && typeof value === "object") {
1275
+ const out = {};
1276
+ for (const [k, v] of Object.entries(value)) out[k] = pseudonymize(v, orgKey, db);
1277
+ return out;
1278
+ }
1279
+ return value;
1280
+ }
1281
+ function reversePseudonym(token, orgKey, db) {
1282
+ const encoded = db.getReversal(token);
1283
+ if (encoded == null) return void 0;
1284
+ const plaintext = decryptPlaintext(encoded, reversalKey(orgKey));
1285
+ if (plaintext === void 0) {
1286
+ logError("reversePseudonym: ciphertext failed authentication (tampered or wrong/rotated org key)");
1287
+ }
1288
+ return plaintext;
1289
+ }
1290
+
1291
+ // src/mcp/server.ts
1292
+ import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
1293
+ import { z as z2 } from "zod";
1294
+
1295
+ // src/nlq/resolve.ts
1296
+ var RELATION_TO_DIRECTION = {
1297
+ "depends-on": "downstream",
1298
+ "depended-on-by": "upstream",
1299
+ "connected-to": "both",
1300
+ "list": void 0
1301
+ };
1302
+ var MAX_QUERY_LEN = 1e3;
1303
+ var GROUP_WORDS = [
1304
+ [/\bservices?\b/i, NODE_TYPE_GROUPS.web],
1305
+ [/\b(?:databases?|datastores?|data ?stores?)\b/i, NODE_TYPE_GROUPS.data],
1306
+ [/\b(?:queues?|topics?|brokers?|messaging)\b/i, NODE_TYPE_GROUPS.messaging],
1307
+ [/\b(?:hosts?|containers?|pods?|clusters?|infra(?:structure)?)\b/i, NODE_TYPE_GROUPS.infra],
1308
+ [/\bsaas\b/i, NODE_TYPE_GROUPS.saas]
1309
+ ];
1310
+ var UPSTREAM = [
1311
+ /^(?:.*?)\b(?:that|which)\s+depends?\s+(?:on|upon)\s+(?<subject>.+)$/i,
1312
+ /^(?:.*?)\bdepending\s+(?:on|upon)\s+(?<subject>.+)$/i,
1313
+ /^(?:.*?)\b(?:that|which)\s+relies?\s+on\s+(?<subject>.+)$/i,
1314
+ /^\s*what\s+depends?\s+(?:on|upon)\s+(?<subject>.+)$/i
1315
+ ];
1316
+ var DOWNSTREAM = [
1317
+ /\bwhat\s+(?:does|do)\s+(?<subject>.+?)\s+depend\s+(?:on|upon)\b/i,
1318
+ /^(?<subject>.+?)['’]s\s+dependencies\b/i,
1319
+ /\bdependencies\s+of\s+(?<subject>.+)$/i,
1320
+ /^(?<subject>.+?)\s+dependencies\b/i,
1321
+ /^(?<subject>.+?)\s+depends?\s+(?:on|upon)\s+.*$/i
1322
+ ];
1323
+ var CONNECTED = [
1324
+ /\bwhat\s+is\s+connected\s+to\s+(?<subject>.+)$/i,
1325
+ /\b(?:connected|related|linked)\s+to\s+(?<subject>.+)$/i,
1326
+ /\btalks?\s+to\s+(?<subject>.+)$/i
1327
+ ];
1328
+ function firstMatch(query, pats) {
1329
+ for (const re of pats) {
1330
+ const m = query.match(re);
1331
+ if (m?.groups?.subject) return m.groups.subject;
1332
+ }
1333
+ return null;
1334
+ }
1335
+ function cleanSubject(s) {
1336
+ return s.replace(/[?.!,]+\s*$/g, "").replace(/^\s*(?:the|a|an|my|our|all)\s+/i, "").trim();
1337
+ }
1338
+ function detectTypeFilter(text) {
1339
+ const set = /* @__PURE__ */ new Set();
1340
+ for (const [re, types] of GROUP_WORDS) if (re.test(text)) for (const t of types) set.add(t);
1341
+ return set.size > 0 ? [...set] : void 0;
1342
+ }
1343
+ function parseNlQuery(raw) {
1344
+ const query = sanitizeUntrusted(raw).slice(0, MAX_QUERY_LEN).trim();
1345
+ let relation = "list";
1346
+ let subj = firstMatch(query, UPSTREAM);
1347
+ if (subj !== null) relation = "depended-on-by";
1348
+ else if ((subj = firstMatch(query, DOWNSTREAM)) !== null) relation = "depends-on";
1349
+ else if ((subj = firstMatch(query, CONNECTED)) !== null) relation = "connected-to";
1350
+ const degraded = subj === null;
1351
+ const subjectQuery = cleanSubject(subj ?? query);
1352
+ const scanText = subj === null ? query : query.replace(subj, " ");
1353
+ const typeFilter = detectTypeFilter(scanText);
1354
+ return { query, subjectQuery, relation, direction: RELATION_TO_DIRECTION[relation], typeFilter, degraded };
1355
+ }
1356
+ async function executeNlQuery(db, sessionId, search, intent, opts = {}) {
1357
+ const anchorLimit = opts.anchorLimit ?? 5;
1358
+ if (intent.relation === "list") {
1359
+ const anchors2 = await search(db, sessionId, intent.subjectQuery, { types: intent.typeFilter, limit: anchorLimit });
1360
+ logDebug("nlq.execute", { relation: "list", typeFilter: intent.typeFilter?.length ?? 0, anchors: anchors2.length });
1361
+ return { intent, anchors: anchors2, nodes: anchors2.map((a) => a.node), paths: [] };
1362
+ }
1363
+ const anchors = await search(db, sessionId, intent.subjectQuery, { limit: anchorLimit });
1364
+ if (anchors.length === 0) {
1365
+ logDebug("nlq.execute", { relation: intent.relation, anchors: 0, results: 0 });
1366
+ return { intent, anchors, nodes: [], paths: [] };
1367
+ }
1368
+ const root = anchors[0].node;
1369
+ const trav = db.getDependencies(sessionId, root.id, { direction: intent.direction, maxDepth: opts.maxDepth ?? 8 });
1370
+ const allow = intent.typeFilter ? new Set(intent.typeFilter) : void 0;
1371
+ const nodes = allow ? trav.nodes.filter((n) => allow.has(n.type)) : trav.nodes;
1372
+ logDebug("nlq.execute", {
1373
+ relation: intent.relation,
1374
+ direction: intent.direction,
1375
+ typeFilter: intent.typeFilter?.length ?? 0,
1376
+ anchors: anchors.length,
1377
+ results: nodes.length
1378
+ });
1379
+ return { intent, anchors, nodes, paths: trav.edges };
1380
+ }
1381
+
1382
+ // src/mcp/server.ts
1383
+ var SERVER_NAME = "cartography";
1384
+ var SERVER_VERSION = "2.3.0";
1385
+ var SERVICE_TYPES = NODE_TYPE_GROUPS.web;
1386
+ var DATA_TYPES = NODE_TYPE_GROUPS.data;
1387
+ var lexicalSearch = async (db, sessionId, query, opts) => db.searchNodes(sessionId, query, { types: opts.types, limit: opts.limit }).map((node) => ({ node }));
1388
+ function compactNode(n) {
1389
+ return {
1390
+ id: n.id,
1391
+ type: n.type,
1392
+ name: n.name,
1393
+ confidence: n.confidence,
1394
+ ...n.domain ? { domain: n.domain } : {},
1395
+ ...n.tags.length ? { tags: n.tags } : {}
1396
+ };
1397
+ }
1398
+ function json(data) {
1399
+ return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
1400
+ }
1401
+ function isOrgSummary(s) {
1402
+ return "org" in s;
1403
+ }
1404
+ function summaryText(s) {
1405
+ const header = isOrgSummary(s) ? `# Organization topology \u2014 org ${s.org} (${s.contributors} contributor${s.contributors === 1 ? "" : "s"})` : `# Infrastructure topology \u2014 session ${s.sessionId}`;
1406
+ const lines = [
1407
+ header,
1408
+ ``,
1409
+ `Totals: ${s.totals.nodes} nodes, ${s.totals.edges} edges`,
1410
+ ``,
1411
+ `Nodes by type:`,
1412
+ ...Object.entries(s.nodesByType).sort((a, b) => b[1] - a[1]).map(([t, c]) => ` - ${t}: ${c}`),
1413
+ ``,
1414
+ `Nodes by domain:`,
1415
+ ...Object.entries(s.nodesByDomain).sort((a, b) => b[1] - a[1]).map(([d, c]) => ` - ${d}: ${c}`),
1416
+ ``,
1417
+ `Edges by relationship:`,
1418
+ ...Object.entries(s.edgesByRelationship).sort((a, b) => b[1] - a[1]).map(([r, c]) => ` - ${r}: ${c}`),
1419
+ ``,
1420
+ `Most connected:`,
1421
+ ...s.topConnected.map((n) => ` - ${n.id} (${n.type}) \u2014 degree ${n.degree}`),
1422
+ // 3.6 anomalies — single-session GraphSummary only.
1423
+ ...!isOrgSummary(s) ? [
1424
+ ``,
1425
+ `Anomalies (${s.anomalies.length}):`,
1426
+ ...s.anomalies.length === 0 ? [" - none"] : s.anomalies.slice(0, 20).map((a) => ` - [${a.severity}] ${a.kind}: ${a.nodeId} \u2014 ${a.reason}`)
1427
+ ] : [],
1428
+ // 3.3 cost section — only for the single-session GraphSummary, and only when costs exist.
1429
+ ...!isOrgSummary(s) && s.costByDomain.length ? [``, `Cost by domain:`, ...s.costByDomain.map((c) => ` - ${c.domain} [${c.currency}/${c.period}]: ${c.total} (${c.nodes} nodes)`)] : [],
1430
+ ...!isOrgSummary(s) && s.costCoverage.withCost ? [``, `Cost coverage: ${s.costCoverage.withCost}/${s.costCoverage.total} nodes attributed`] : [],
1431
+ ``,
1432
+ `Read cartography://nodes/{id} or cartography://dependencies/{id} for detail.`
1433
+ ];
1434
+ return lines.join("\n");
1435
+ }
1436
+ function createMcpServer(opts = {}) {
1437
+ const db = opts.db ?? new CartographyDB(opts.dbPath ?? defaultConfig().dbPath);
1438
+ const search = opts.search ?? lexicalSearch;
1439
+ const tenant = normalizeTenant(opts.tenant);
1440
+ const org = opts.org !== void 0 ? normalizeTenant(opts.org) : void 0;
1441
+ const resolveSession = () => {
1442
+ if (opts.session && opts.session !== "latest") {
1443
+ const s = db.getSession(opts.session);
1444
+ return s && s.tenant === tenant ? s.id : void 0;
1445
+ }
1446
+ return db.getLatestSession("discover", tenant)?.id ?? db.getLatestSession(void 0, tenant)?.id;
1447
+ };
1448
+ const server = new McpServer(
1449
+ { name: SERVER_NAME, version: SERVER_VERSION },
1450
+ {
1451
+ capabilities: { resources: { subscribe: true, listChanged: true }, tools: {}, prompts: {}, logging: {} },
1452
+ instructions: "Cartography exposes a discovered infrastructure/SaaS topology. Start by reading cartography://graph/summary for a low-token overview, then drill into specific nodes via cartography://nodes/{id} or query with the query_infrastructure / get_dependencies tools."
1453
+ }
1454
+ );
1455
+ server.registerResource(
1456
+ "graph-summary",
1457
+ "cartography://graph/summary",
1458
+ { title: "Topology summary", description: "Low-token aggregate index of the whole landscape \u2014 read this first.", mimeType: "text/markdown" },
1459
+ (uri) => {
1460
+ if (org !== void 0) {
1461
+ return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: summaryText(db.getOrgSummary(org)) }] };
1462
+ }
1463
+ const sid = resolveSession();
1464
+ if (!sid) return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: "No discovery session found. Run discovery first." }] };
1465
+ return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: summaryText(db.getGraphSummary(sid)) }] };
1466
+ }
1467
+ );
1468
+ server.registerResource(
1469
+ "cost-summary",
1470
+ "cartography://cost/summary",
1471
+ { title: "Cost summary", description: "Cost rolled up by domain and owner (currency/period-bucketed).", mimeType: "application/json" },
1472
+ (uri) => {
1473
+ const sid = resolveSession();
1474
+ if (!sid) return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({ error: "No discovery session found." }) }] };
1475
+ const s = db.getGraphSummary(sid);
1476
+ return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({ costByDomain: s.costByDomain, costByOwner: s.costByOwner, costCoverage: s.costCoverage }, null, 2) }] };
1477
+ }
1478
+ );
1479
+ server.registerResource(
1480
+ "nodes-index",
1481
+ "cartography://nodes",
1482
+ { title: "Node index", description: "Lightweight list of all nodes (id, type, name only).", mimeType: "application/json" },
1483
+ (uri) => {
1484
+ const sid = resolveSession();
1485
+ const nodes = sid ? db.getNodes(sid) : [];
1486
+ return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({ count: nodes.length, nodes: nodes.map((n) => ({ id: n.id, type: n.type, name: n.name })) }, null, 2) }] };
1487
+ }
1488
+ );
1489
+ server.registerResource(
1490
+ "node-detail",
1491
+ new ResourceTemplate("cartography://nodes/{id}", { list: void 0 }),
1492
+ { title: "Node detail", description: "Full node record plus its incident edges.", mimeType: "application/json" },
1493
+ (uri, variables) => {
1494
+ const sid = resolveSession();
1495
+ const id = decodeURIComponent(String(variables["id"]));
1496
+ const node = sid ? db.getNode(sid, id) : void 0;
1497
+ if (!node) return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({ error: `node not found: ${id}` }) }] };
1498
+ const edges = db.getEdges(sid).filter((e) => e.sourceId === id || e.targetId === id);
1499
+ return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({ node, edges }, null, 2) }] };
1500
+ }
1501
+ );
1502
+ const typedListResource = (name, uri, title, types) => server.registerResource(name, uri, { title, description: `Nodes of type: ${types.join(", ")}.`, mimeType: "application/json" }, (u) => {
1503
+ const sid = resolveSession();
1504
+ const nodes = sid ? db.getNodesByType(sid, types) : [];
1505
+ return { contents: [{ uri: u.href, mimeType: "application/json", text: JSON.stringify({ count: nodes.length, nodes: nodes.map(compactNode) }, null, 2) }] };
1506
+ });
1507
+ typedListResource("services", "cartography://services", "Services", SERVICE_TYPES);
1508
+ typedListResource("databases", "cartography://databases", "Data stores", DATA_TYPES);
1509
+ server.registerResource(
1510
+ "dependencies",
1511
+ new ResourceTemplate("cartography://dependencies/{id}", { list: void 0 }),
1512
+ { title: "Dependencies", description: "Transitive downstream dependencies of a node.", mimeType: "application/json" },
1513
+ (uri, variables) => {
1514
+ const sid = resolveSession();
1515
+ const id = decodeURIComponent(String(variables["id"]));
1516
+ if (!sid) return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({ error: "no session" }) }] };
1517
+ const r = db.getDependencies(sid, id, { direction: "downstream", maxDepth: 8 });
1518
+ return { contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify({
1519
+ root: id,
1520
+ count: r.nodes.length,
1521
+ nodes: r.nodes.map((n) => ({ ...compactNode(n), depth: n.depth })),
1522
+ edges: r.edges.map((e) => ({ from: e.sourceId, to: e.targetId, rel: e.relationship, confidence: e.confidence, evidence: e.evidence }))
1523
+ }, null, 2) }] };
1524
+ }
1525
+ );
1526
+ server.registerResource(
1527
+ "sessions",
1528
+ "cartography://sessions",
1529
+ { title: "Discovery sessions", description: "All discovery sessions in the catalog.", mimeType: "application/json" },
1530
+ (uri) => ({ contents: [{ uri: uri.href, mimeType: "application/json", text: JSON.stringify(db.getSessions(tenant), null, 2) }] })
1531
+ );
1532
+ const readOnly = { readOnlyHint: true, openWorldHint: false };
1533
+ server.registerTool(
1534
+ "get_summary",
1535
+ { title: "Get topology summary", description: "Low-token overview of the whole landscape (counts, types, domains, most-connected, anomalies).", inputSchema: {}, annotations: readOnly },
1536
+ () => {
1537
+ if (org !== void 0) return json(db.getOrgSummary(org));
1538
+ const sid = resolveSession();
1539
+ if (!sid) return json({ error: "No discovery session found." });
1540
+ return json(db.getGraphSummary(sid));
1541
+ }
1542
+ );
1543
+ server.registerTool(
1544
+ "get_cost_summary",
1545
+ { title: "Get cost summary", description: "FinOps rollup: cost by domain and owner, currency/period-bucketed (3.3).", inputSchema: {}, annotations: readOnly },
1546
+ () => {
1547
+ const sid = resolveSession();
1548
+ if (!sid) return json({ error: "No discovery session found." });
1549
+ const s = db.getGraphSummary(sid);
1550
+ return json({ costByDomain: s.costByDomain, costByOwner: s.costByOwner, costCoverage: s.costCoverage });
1551
+ }
1552
+ );
1553
+ server.registerTool(
1554
+ "query_infrastructure",
1555
+ {
1556
+ title: "Query infrastructure",
1557
+ description: "Search the topology by name/id/domain (optionally filtered by node type). Returns compact node records.",
1558
+ inputSchema: {
1559
+ query: z2.string().describe('Free-text query, e.g. "postgres", "auth", "github"'),
1560
+ types: z2.array(z2.enum(NODE_TYPES)).optional().describe("Restrict to these node types"),
1561
+ limit: z2.number().int().min(1).max(200).default(25).optional()
1562
+ },
1563
+ annotations: readOnly
1564
+ },
1565
+ async (args) => {
1566
+ const sid = resolveSession();
1567
+ if (!sid) return json({ error: "No discovery session found." });
1568
+ const results = await search(db, sid, args.query, { types: args.types, limit: args.limit ?? 25 });
1569
+ return json({ count: results.length, results: results.map((r) => ({ ...compactNode(r.node), ...r.score !== void 0 ? { score: r.score } : {} })) });
1570
+ }
1571
+ );
1572
+ server.registerTool(
1573
+ "search_topology",
1574
+ {
1575
+ title: "Search topology (semantic)",
1576
+ description: "Find nodes related to a concept by meaning (semantic search when available, lexical otherwise).",
1577
+ inputSchema: { query: z2.string(), limit: z2.number().int().min(1).max(100).default(10).optional() },
1578
+ annotations: readOnly
1579
+ },
1580
+ async (args) => {
1581
+ const sid = resolveSession();
1582
+ if (!sid) return json({ error: "No discovery session found." });
1583
+ const results = await search(db, sid, args.query, { limit: args.limit ?? 10 });
1584
+ return json({ count: results.length, results: results.map((r) => ({ ...compactNode(r.node), ...r.score !== void 0 ? { score: r.score } : {} })) });
1585
+ }
1586
+ );
1587
+ server.registerTool(
1588
+ "list_services",
1589
+ {
1590
+ title: "List services",
1591
+ description: "List discovered services or data stores.",
1592
+ inputSchema: { kind: z2.enum(["services", "databases", "all"]).default("all").optional() },
1593
+ annotations: readOnly
1594
+ },
1595
+ (args) => {
1596
+ const sid = resolveSession();
1597
+ if (!sid) return json({ error: "No discovery session found." });
1598
+ const kind = args.kind ?? "all";
1599
+ const types = kind === "services" ? SERVICE_TYPES : kind === "databases" ? DATA_TYPES : [...SERVICE_TYPES, ...DATA_TYPES];
1600
+ return json(db.getNodesByType(sid, types).map(compactNode));
1601
+ }
1602
+ );
1603
+ server.registerTool(
1604
+ "get_node",
1605
+ { title: "Get node", description: "Fetch a single node with its incident edges.", inputSchema: { id: z2.string() }, annotations: readOnly },
1606
+ (args) => {
1607
+ const sid = resolveSession();
1608
+ if (!sid) return json({ error: "No discovery session found." });
1609
+ const node = db.getNode(sid, args.id);
1610
+ if (!node) return json({ error: `node not found: ${args.id}` });
1611
+ const edges = db.getEdges(sid).filter((e) => e.sourceId === args.id || e.targetId === args.id);
1612
+ return json({ node, edges });
1613
+ }
1614
+ );
1615
+ server.registerTool(
1616
+ "get_dependencies",
1617
+ {
1618
+ title: "Get dependencies",
1619
+ description: "Traverse the dependency graph from a node (downstream/upstream/both) with a depth limit.",
1620
+ inputSchema: {
1621
+ id: z2.string(),
1622
+ direction: z2.enum(["downstream", "upstream", "both"]).default("downstream").optional(),
1623
+ maxDepth: z2.number().int().min(1).max(64).default(8).optional(),
1624
+ minConfidence: z2.number().min(0).max(1).optional().describe("Drop edges below this confidence (0..1).")
1625
+ },
1626
+ annotations: readOnly
1627
+ },
1628
+ (args) => {
1629
+ const sid = resolveSession();
1630
+ if (!sid) return json({ error: "No discovery session found." });
1631
+ const r = db.getDependencies(sid, args.id, { direction: args.direction ?? "downstream", maxDepth: args.maxDepth ?? 8 });
1632
+ const minConfidence = args.minConfidence ?? 0;
1633
+ return json({
1634
+ root: r.root ? compactNode(r.root) : null,
1635
+ direction: r.direction,
1636
+ count: r.nodes.length,
1637
+ nodes: r.nodes.map((n) => ({ ...compactNode(n), depth: n.depth })),
1638
+ edges: r.edges.filter((e) => e.confidence >= minConfidence).map((e) => ({ from: e.sourceId, to: e.targetId, rel: e.relationship, confidence: e.confidence, evidence: e.evidence }))
1639
+ });
1640
+ }
1641
+ );
1642
+ server.registerTool(
1643
+ "query_natural_language",
1644
+ {
1645
+ title: "Query in natural language",
1646
+ description: 'Answer a plain-English topology question (e.g. "services that depend on the payments DB"). Deterministically parses the question into a structured intent, then anchors via search and traverses dependencies, applying any node-type filter to the results. Echoes the parsed intent for explainability. Read-only, LLM-free.',
1647
+ inputSchema: {
1648
+ query: z2.string().min(1).max(1e3).describe("Natural-language question about the topology"),
1649
+ maxDepth: z2.number().int().min(1).max(64).default(8).optional()
1650
+ },
1651
+ annotations: readOnly
1652
+ },
1653
+ async (args) => {
1654
+ const sid = resolveSession();
1655
+ if (!sid) return json({ error: "No discovery session found." });
1656
+ const intent = parseNlQuery(args.query);
1657
+ const r = await executeNlQuery(db, sid, search, intent, { maxDepth: args.maxDepth ?? 8 });
1658
+ return json({
1659
+ intent: {
1660
+ query: r.intent.query,
1661
+ subjectQuery: r.intent.subjectQuery,
1662
+ relation: r.intent.relation,
1663
+ direction: r.intent.direction ?? null,
1664
+ typeFilter: r.intent.typeFilter ?? null,
1665
+ degraded: r.intent.degraded
1666
+ },
1667
+ anchors: r.anchors.map((a) => ({ ...compactNode(a.node), ...a.score !== void 0 ? { score: a.score } : {} })),
1668
+ count: r.nodes.length,
1669
+ nodes: r.nodes.map((n) => ({ ...compactNode(n), ...n.depth !== void 0 ? { depth: n.depth } : {} })),
1670
+ paths: r.paths.map((e) => ({ from: e.sourceId, to: e.targetId, rel: e.relationship }))
1671
+ });
1672
+ }
1673
+ );
1674
+ server.registerTool(
1675
+ "diff_topology",
1676
+ {
1677
+ title: "Diff topology (drift detection)",
1678
+ description: "Compare two discovery sessions and report added/removed/changed nodes and added/removed edges, plus newly-appearing structural anomalies (3.6). Defaults to the two most recent sessions (base = second-most-recent, current = most-recent).",
1679
+ inputSchema: {
1680
+ base: z2.string().optional().describe("Baseline session id (default: second-most-recent session)"),
1681
+ current: z2.string().optional().describe("Current session id (default: most-recent session)")
1682
+ },
1683
+ annotations: readOnly
1684
+ },
1685
+ (args) => {
1686
+ const sessions = db.getSessions(tenant);
1687
+ const currentId = args.current ?? sessions[0]?.id;
1688
+ const baseId = args.base ?? sessions[1]?.id;
1689
+ if (!baseId || !currentId) return json({ error: "Need at least two discovery sessions to diff." });
1690
+ if (baseId === currentId) return json({ error: "Base and current session are the same." });
1691
+ const d = db.diffSessions(baseId, currentId);
1692
+ return json({
1693
+ base: d.base,
1694
+ current: d.current,
1695
+ summary: d.summary,
1696
+ nodes: {
1697
+ added: d.nodes.added.map(compactNode),
1698
+ removed: d.nodes.removed.map(compactNode),
1699
+ changed: d.nodes.changed.map((c) => ({ ...compactNode(c.after), changedFields: c.changedFields }))
1700
+ },
1701
+ edges: {
1702
+ added: d.edges.added.map((e) => ({ from: e.sourceId, to: e.targetId, rel: e.relationship })),
1703
+ removed: d.edges.removed.map((e) => ({ from: e.sourceId, to: e.targetId, rel: e.relationship }))
1704
+ },
1705
+ anomalies: {
1706
+ added: d.anomalies.added,
1707
+ baseCount: d.anomalies.base.length,
1708
+ currentCount: d.anomalies.current.length
1709
+ }
1710
+ });
1711
+ }
1712
+ );
1713
+ server.registerTool(
1714
+ "classify_drift",
1715
+ {
1716
+ title: "Classify drift (severity-ranked drift detection)",
1717
+ description: "Compare two discovery sessions and return a severity-classified drift alert (info|warning|critical per item plus an overall severity). Defaults to the two most recent. Read-only: never dispatches to sinks.",
1718
+ inputSchema: {
1719
+ base: z2.string().optional().describe("Baseline session id (default: second-most-recent)"),
1720
+ current: z2.string().optional().describe("Current session id (default: most-recent)"),
1721
+ minSeverity: z2.enum(["info", "warning", "critical"]).optional().describe("Drop items below this severity")
1722
+ },
1723
+ annotations: readOnly
1724
+ },
1725
+ (args) => {
1726
+ const sessions = db.getSessions(tenant);
1727
+ const currentId = args.current ?? sessions[0]?.id;
1728
+ const baseId = args.base ?? sessions[1]?.id;
1729
+ if (!baseId || !currentId) return json({ error: "Need at least two discovery sessions to diff." });
1730
+ if (baseId === currentId) return json({ error: "Base and current session are the same." });
1731
+ let alert = classifyDrift(db.diffSessions(baseId, currentId));
1732
+ if (args.minSeverity) alert = filterBySeverity(alert, args.minSeverity);
1733
+ return json(redactValue(alert));
1734
+ }
1735
+ );
1736
+ server.registerTool(
1737
+ "score_compliance",
1738
+ {
1739
+ title: "Score compliance",
1740
+ description: "Grade the served session against a compliance ruleset (baseline/cis/soc2/iso27001 starter sets) and list gaps with the node ids that caused them. Read-only; never throws.",
1741
+ inputSchema: {
1742
+ ruleset: z2.string().default("baseline").optional().describe("Ruleset name (default: baseline)"),
1743
+ session: z2.string().optional().describe("Session id (default: the served session)")
1744
+ },
1745
+ annotations: readOnly
1746
+ },
1747
+ (args) => {
1748
+ const sid = args.session ?? resolveSession();
1749
+ if (!sid) return json({ error: "No discovery session found." });
1750
+ const name = args.ruleset ?? "baseline";
1751
+ const rs = getRuleset(name);
1752
+ if (!rs) return json({ error: `Unknown ruleset: ${name}`, available: listRulesets().map((r) => r.name) });
1753
+ return json(db.scoreSession(sid, rs));
1754
+ }
1755
+ );
1756
+ server.registerTool(
1757
+ "get_activity_events",
1758
+ {
1759
+ title: "Get activity events (audit trail)",
1760
+ description: "Recent executed tool calls and their result sizes for the current session.",
1761
+ inputSchema: { limit: z2.number().int().min(1).max(500).default(50).optional() },
1762
+ annotations: readOnly
1763
+ },
1764
+ (args) => {
1765
+ const sid = resolveSession();
1766
+ if (!sid) return json({ error: "No discovery session found." });
1767
+ const events = db.getEvents(sid).slice(-(args.limit ?? 50));
1768
+ return json({ count: events.length, events });
1769
+ }
1770
+ );
1771
+ if (opts.discovery) {
1772
+ const discovery = opts.discovery;
1773
+ server.registerTool(
1774
+ "run_discovery",
1775
+ {
1776
+ title: "Run discovery",
1777
+ description: "Scan the local system (read-only) and update the catalog. Returns counts of nodes/edges found. Pass `update: true` to rescan the served session in place and return the delta (2.1 incremental discovery).",
1778
+ inputSchema: {
1779
+ hint: z2.string().optional().describe("Optional focus, e.g. tool names to look for"),
1780
+ update: z2.boolean().optional().describe("Rescan the served session in place and return the delta instead of creating a new session")
1781
+ },
1782
+ // Scans read-only but writes results to the local catalog, so not a read-only tool; never destructive.
1783
+ annotations: { readOnlyHint: false, destructiveHint: false, openWorldHint: true }
1784
+ },
1785
+ async (args) => {
1786
+ let sid = resolveSession();
1787
+ if (args.update) {
1788
+ if (!sid) return json({ error: "No session to update; run discovery first." });
1789
+ } else if (!sid) {
1790
+ sid = db.createSession("discover", defaultConfig(), tenant);
1791
+ }
1792
+ const result = await discovery(db, sid, { hint: args.hint, mode: args.update ? "update" : "replace" });
1793
+ const sess = db.getSession(sid);
1794
+ if (sess && !sess.name) db.setSessionName(sid, deriveSessionName(db.getGraphSummary(sid), sess.startedAt));
1795
+ server.server.sendResourceUpdated({ uri: "cartography://graph/summary" }).catch((err) => {
1796
+ process.stderr.write(`[cartography-mcp] resource update notification failed: ${err instanceof Error ? err.message : String(err)}
1797
+ `);
1798
+ });
1799
+ server.server.sendResourceListChanged?.();
1800
+ return json({
1801
+ session: sid,
1802
+ nodes: result.nodes,
1803
+ edges: result.edges,
1804
+ ...result.delta ? { summary: result.delta.summary } : {}
1805
+ });
1806
+ }
1807
+ );
1808
+ }
1809
+ server.registerPrompt(
1810
+ "audit-attack-surface",
1811
+ { title: "Audit attack surface", description: "Review the discovered topology for externally-reachable services and risky dependencies." },
1812
+ () => ({
1813
+ messages: [{
1814
+ role: "user",
1815
+ content: { type: "text", text: "Read cartography://graph/summary and cartography://services. Identify externally-reachable services, data stores with broad inbound dependencies, and any node with low confidence that warrants verification. Use get_dependencies to assess blast radius. Summarize the attack surface and concrete hardening recommendations." }
1816
+ }]
1817
+ })
1818
+ );
1819
+ server.registerPrompt(
1820
+ "map-service-dependencies",
1821
+ {
1822
+ title: "Map service dependencies",
1823
+ description: "Produce a dependency map for a given service.",
1824
+ argsSchema: { service: z2.string().describe("Service node id or name") }
1825
+ },
1826
+ (args) => ({
1827
+ messages: [{
1828
+ role: "user",
1829
+ content: { type: "text", text: `Use query_infrastructure to locate "${args.service}", then get_dependencies (direction=both) to map everything it depends on and everything that depends on it. Present the result as a clear dependency tree and call out single points of failure.` }
1830
+ }]
1831
+ })
1832
+ );
1833
+ server.registerPrompt(
1834
+ "compare-environments",
1835
+ { title: "Compare environments", description: "Summarize infrastructure drift between two discovery snapshots." },
1836
+ () => ({
1837
+ messages: [{
1838
+ role: "user",
1839
+ content: { type: "text", text: "Call diff_topology to compare the two most recent discovery sessions. Summarize what was added, removed, and changed. Flag newly externally-reachable services and removed dependencies that could indicate an outage or decommission. Recommend what an operator should verify." }
1840
+ }]
1841
+ })
1842
+ );
1843
+ server.registerPrompt(
1844
+ "onboard-to-system",
1845
+ { title: "Onboard to system", description: "Explain the system landscape to a new engineer." },
1846
+ () => ({
1847
+ messages: [{
1848
+ role: "user",
1849
+ content: { type: "text", text: "Read cartography://graph/summary, then cartography://services and cartography://databases. Write a concise onboarding briefing for a new engineer: what the major systems are, how they connect, which data stores are central, and where to look first." }
1850
+ }]
1851
+ })
1852
+ );
1853
+ server.registerPrompt(
1854
+ "find-single-points-of-failure",
1855
+ { title: "Find single points of failure", description: "Rank chokepoints whose loss has the largest blast radius." },
1856
+ () => ({
1857
+ messages: [{
1858
+ role: "user",
1859
+ content: { type: "text", text: "Call get_summary and read topConnected (the most-connected nodes). For each, call get_dependencies (direction=both) to measure how many services depend on it. Identify single points of failure \u2014 nodes whose loss would disconnect or degrade the largest blast radius \u2014 rank them by impact, and recommend redundancy or mitigation for each." }
1860
+ }]
1861
+ })
1862
+ );
1863
+ server.registerPrompt(
1864
+ "generate-runbook",
1865
+ {
1866
+ title: "Generate operations runbook",
1867
+ description: "Produce an operations/onboarding runbook from the topology.",
1868
+ argsSchema: { service: z2.string().optional().describe("Optional service id/name to scope the runbook") }
1869
+ },
1870
+ (args) => ({
1871
+ messages: [{
1872
+ role: "user",
1873
+ content: { type: "text", text: args.service ? `Use query_infrastructure to locate "${args.service}", then get_dependencies (direction=both). Write an operations runbook for it: purpose, upstream/downstream dependencies, startup/shutdown order, health checks, common failure modes, and escalation steps.` : "Read cartography://graph/summary, then call get_summary and list_services. Write a system-wide operations runbook: major components, how they connect, critical data stores, startup/shutdown order, health checks, and where an on-call engineer should look first." }
1874
+ }]
1875
+ })
1876
+ );
1877
+ return server;
1878
+ }
1879
+
1880
+ // src/mcp/transports.ts
1881
+ import { randomUUID } from "crypto";
1882
+ import http from "http";
1883
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
1884
+ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
1885
+ async function runStdio(server) {
1886
+ const transport = new StdioServerTransport();
1887
+ await server.connect(transport);
1888
+ }
1889
+ var MAX_INGEST_BYTES = 5 * 1024 * 1024;
1890
+ async function readCappedBody(req, cap) {
1891
+ const chunks = [];
1892
+ let total = 0;
1893
+ let overflow = false;
1894
+ for await (const chunk of req) {
1895
+ if (overflow) continue;
1896
+ const buf = chunk;
1897
+ total += buf.length;
1898
+ if (total > cap) {
1899
+ overflow = true;
1900
+ chunks.length = 0;
1901
+ continue;
1902
+ }
1903
+ chunks.push(buf);
1904
+ }
1905
+ if (overflow) return { overflow: true, value: void 0 };
1906
+ if (chunks.length === 0) return { overflow: false, value: void 0 };
1907
+ try {
1908
+ return { overflow: false, value: JSON.parse(Buffer.concat(chunks).toString("utf8")) };
1909
+ } catch {
1910
+ return { overflow: false, value: void 0 };
1911
+ }
1912
+ }
1913
+ async function readJsonBody(req) {
1914
+ const chunks = [];
1915
+ for await (const chunk of req) chunks.push(chunk);
1916
+ if (chunks.length === 0) return void 0;
1917
+ try {
1918
+ return JSON.parse(Buffer.concat(chunks).toString("utf8"));
1919
+ } catch {
1920
+ return void 0;
1921
+ }
1922
+ }
1923
+ async function runHttp(factory, opts = {}) {
1924
+ const host = opts.host ?? "127.0.0.1";
1925
+ const port = opts.port ?? 3737;
1926
+ assertSafeBind({ host, port, ...opts.allowedHosts ? { allowedHosts: opts.allowedHosts } : {}, ...opts.token ? { token: opts.token } : {} });
1927
+ const allowedHosts = opts.allowedHosts ?? defaultAllowedHosts(host, port);
1928
+ const token = opts.token;
1929
+ const transports = /* @__PURE__ */ new Map();
1930
+ const httpServer = http.createServer(async (req, res) => {
1931
+ try {
1932
+ const url = req.url ?? "";
1933
+ const isIngest = url.startsWith("/ingest") && opts.onIngest !== void 0;
1934
+ if (!url.startsWith("/mcp") && !isIngest) {
1935
+ res.writeHead(404, { "content-type": "application/json" }).end('{"error":"not found"}');
1936
+ return;
1937
+ }
1938
+ if (!checkBearer(req.headers["authorization"], token)) {
1939
+ res.writeHead(401, { "content-type": "application/json", "www-authenticate": "Bearer" }).end('{"error":"unauthorized"}');
1940
+ return;
1941
+ }
1942
+ if (isIngest) {
1943
+ const hostHeader = (req.headers["host"] ?? "").toLowerCase();
1944
+ if (!allowedHosts.some((h) => h.toLowerCase() === hostHeader)) {
1945
+ res.writeHead(403, { "content-type": "application/json" }).end('{"error":"host not allowed"}');
1946
+ return;
1947
+ }
1948
+ const onIngest = opts.onIngest;
1949
+ if (req.method !== "POST") {
1950
+ res.writeHead(405, { "content-type": "application/json", "allow": "POST" }).end('{"error":"method not allowed"}');
1951
+ return;
1952
+ }
1953
+ const { overflow, value } = await readCappedBody(req, MAX_INGEST_BYTES);
1954
+ if (overflow) {
1955
+ res.writeHead(413, { "content-type": "application/json" }).end('{"error":"payload too large"}');
1956
+ return;
1957
+ }
1958
+ const out = onIngest(value);
1959
+ res.writeHead(out.status, { "content-type": "application/json" }).end(JSON.stringify(out.body));
1960
+ return;
1961
+ }
1962
+ const sessionId = req.headers["mcp-session-id"];
1963
+ const existing = sessionId ? transports.get(sessionId) : void 0;
1964
+ if (existing) {
1965
+ const body2 = req.method === "POST" ? await readJsonBody(req) : void 0;
1966
+ await existing.handleRequest(req, res, body2);
1967
+ return;
1968
+ }
1969
+ if (req.method !== "POST") {
1970
+ res.writeHead(400, { "content-type": "application/json" }).end('{"error":"missing or unknown mcp-session-id"}');
1971
+ return;
1972
+ }
1973
+ const body = await readJsonBody(req);
1974
+ const transport = new StreamableHTTPServerTransport({
1975
+ sessionIdGenerator: () => randomUUID(),
1976
+ enableDnsRebindingProtection: true,
1977
+ allowedHosts,
1978
+ ...opts.allowedOrigins ? { allowedOrigins: opts.allowedOrigins } : {},
1979
+ onsessioninitialized: (id) => {
1980
+ transports.set(id, transport);
1981
+ }
1982
+ });
1983
+ transport.onclose = () => {
1984
+ if (transport.sessionId) transports.delete(transport.sessionId);
1985
+ };
1986
+ await factory().connect(transport);
1987
+ await transport.handleRequest(req, res, body);
1988
+ } catch (err) {
1989
+ process.stderr.write(`[cartography-mcp] HTTP request failed: ${err instanceof Error ? err.message : String(err)}
1990
+ `);
1991
+ if (!res.headersSent) res.writeHead(500, { "content-type": "application/json" }).end('{"error":"internal error"}');
1992
+ }
1993
+ });
1994
+ await new Promise((resolve) => httpServer.listen(port, host, resolve));
1995
+ return httpServer;
1996
+ }
1997
+
1998
+ // src/semantic/hash.ts
1999
+ function fnv1a(s) {
2000
+ let h = 2166136261;
2001
+ for (let i = 0; i < s.length; i++) {
2002
+ h ^= s.charCodeAt(i);
2003
+ h = Math.imul(h, 16777619);
2004
+ }
2005
+ return h >>> 0;
2006
+ }
2007
+
2008
+ // src/semantic/embeddings.ts
2009
+ async function createLocalEmbedder(model = "Xenova/all-MiniLM-L6-v2") {
2010
+ try {
2011
+ const tf = await import("@huggingface/transformers");
2012
+ const extractor = await tf.pipeline("feature-extraction", model);
2013
+ return {
2014
+ id: `local:${model}`,
2015
+ dimensions: 384,
2016
+ async embed(texts) {
2017
+ const out = [];
2018
+ for (const text of texts) {
2019
+ const tensor = await extractor(text, { pooling: "mean", normalize: true });
2020
+ out.push(Float32Array.from(tensor.data));
2021
+ }
2022
+ return out;
2023
+ }
2024
+ };
2025
+ } catch {
2026
+ return void 0;
2027
+ }
2028
+ }
2029
+
2030
+ // src/semantic/store.ts
2031
+ function nodeText(n) {
2032
+ const desc = typeof n.metadata?.["description"] === "string" ? n.metadata["description"] : "";
2033
+ const category = typeof n.metadata?.["category"] === "string" ? n.metadata["category"] : "";
2034
+ return [n.name, n.id.replace(/[:_]/g, " "), `type ${n.type}`, n.domain ?? "", n.subDomain ?? "", category, n.tags.join(" "), desc].filter(Boolean).join(" \u2014 ");
2035
+ }
2036
+ function hash(s) {
2037
+ return fnv1a(s).toString(16);
2038
+ }
2039
+ function toBuffer(v) {
2040
+ return Buffer.from(v.buffer, v.byteOffset, v.byteLength);
2041
+ }
2042
+ var VectorStore = class {
2043
+ constructor(db, embedder) {
2044
+ this.db = db;
2045
+ this.embedder = embedder;
2046
+ }
2047
+ loaded = false;
2048
+ /** Load sqlite-vec and ensure the schema exists. Returns false if unavailable. */
2049
+ async init() {
2050
+ if (this.loaded) return true;
2051
+ try {
2052
+ const conn = this.db.rawConnection();
2053
+ const sqliteVec = await import("sqlite-vec");
2054
+ sqliteVec.load(conn);
2055
+ conn.exec(`
2056
+ CREATE TABLE IF NOT EXISTS vec_index (
2057
+ rowid INTEGER PRIMARY KEY AUTOINCREMENT,
2058
+ session_id TEXT NOT NULL,
2059
+ node_id TEXT NOT NULL,
2060
+ hash TEXT NOT NULL,
2061
+ UNIQUE(session_id, node_id)
2062
+ );
2063
+ CREATE TABLE IF NOT EXISTS vec_meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
2064
+ `);
2065
+ const dimRow = conn.prepare("SELECT value FROM vec_meta WHERE key = 'dims'").get();
2066
+ const dims = this.embedder.dimensions;
2067
+ if (dimRow && Number(dimRow.value) !== dims) {
2068
+ conn.exec("DROP TABLE IF EXISTS vec_nodes; DELETE FROM vec_index;");
2069
+ }
2070
+ conn.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS vec_nodes USING vec0(embedding float[${dims}])`);
2071
+ conn.prepare("INSERT OR REPLACE INTO vec_meta(key, value) VALUES (?, ?)").run("dims", String(dims));
2072
+ conn.prepare("INSERT OR REPLACE INTO vec_meta(key, value) VALUES (?, ?)").run("embedder", this.embedder.id);
2073
+ this.loaded = true;
2074
+ return true;
2075
+ } catch {
2076
+ return false;
2077
+ }
2078
+ }
2079
+ /** Incrementally embed and index any new/changed nodes for a session. */
2080
+ async index(sessionId) {
2081
+ if (!await this.init()) return { embedded: 0, total: 0 };
2082
+ const conn = this.db.rawConnection();
2083
+ const nodes = this.db.getNodes(sessionId);
2084
+ const getRow = conn.prepare("SELECT rowid, hash FROM vec_index WHERE session_id = ? AND node_id = ?");
2085
+ const insIndex = conn.prepare("INSERT INTO vec_index (session_id, node_id, hash) VALUES (?, ?, ?)");
2086
+ const updHash = conn.prepare("UPDATE vec_index SET hash = ? WHERE rowid = ?");
2087
+ const delVec = conn.prepare("DELETE FROM vec_nodes WHERE rowid = ?");
2088
+ const insVec = conn.prepare("INSERT INTO vec_nodes (rowid, embedding) VALUES (?, ?)");
2089
+ const pending = [];
2090
+ for (const n of nodes) {
2091
+ const text = nodeText(n);
2092
+ const h = hash(`${this.embedder.id}:${text}`);
2093
+ const existing = getRow.get(sessionId, n.id);
2094
+ if (existing) {
2095
+ if (existing.hash === h) continue;
2096
+ updHash.run(h, existing.rowid);
2097
+ delVec.run(BigInt(existing.rowid));
2098
+ pending.push({ rowid: BigInt(existing.rowid), text });
2099
+ } else {
2100
+ const info = insIndex.run(sessionId, n.id, h);
2101
+ pending.push({ rowid: BigInt(info.lastInsertRowid), text });
2102
+ }
2103
+ }
2104
+ if (pending.length > 0) {
2105
+ const vectors = await this.embedder.embed(pending.map((p) => p.text));
2106
+ const tx = conn.transaction(() => {
2107
+ pending.forEach((p, i) => insVec.run(p.rowid, toBuffer(vectors[i])));
2108
+ });
2109
+ tx();
2110
+ }
2111
+ return { embedded: pending.length, total: nodes.length };
2112
+ }
2113
+ /** k-nearest-neighbour search within a session. Returns node ids + distances. */
2114
+ async search(sessionId, query, k) {
2115
+ if (!await this.init()) return [];
2116
+ await this.index(sessionId);
2117
+ const conn = this.db.rawConnection();
2118
+ const [qv] = await this.embedder.embed([query]);
2119
+ if (!qv) return [];
2120
+ const overfetch = Math.max(k * 5, k);
2121
+ const knn = conn.prepare(
2122
+ "SELECT rowid, distance FROM vec_nodes WHERE embedding MATCH ? ORDER BY distance LIMIT ?"
2123
+ ).all(toBuffer(qv), overfetch);
2124
+ const meta = conn.prepare("SELECT node_id AS nodeId, session_id AS sessionId FROM vec_index WHERE rowid = ?");
2125
+ const out = [];
2126
+ for (const row of knn) {
2127
+ const m = meta.get(row.rowid);
2128
+ if (m && m.sessionId === sessionId) out.push({ nodeId: m.nodeId, distance: row.distance });
2129
+ if (out.length >= k) break;
2130
+ }
2131
+ return out;
2132
+ }
2133
+ };
2134
+
2135
+ // src/semantic/search.ts
2136
+ var lexical = (db, sessionId, query, opts) => db.searchNodes(sessionId, query, { types: opts.types, limit: opts.limit }).map((node) => ({ node }));
2137
+ var lexicalSearch2 = () => async (d, sid, q, opts) => lexical(d, sid, q, opts);
2138
+ async function createSemanticSearch(db, embedder, opts = {}) {
2139
+ const log = opts.log;
2140
+ const provider = embedder ?? await createLocalEmbedder();
2141
+ if (!provider) {
2142
+ log?.("semantic search: embeddings unavailable (@huggingface/transformers not installed or failed to load) \u2014 using lexical search");
2143
+ return lexicalSearch2();
2144
+ }
2145
+ const store = new VectorStore(db, provider);
2146
+ const ok = await store.init();
2147
+ if (!ok) {
2148
+ log?.("semantic search: vector store unavailable (sqlite-vec not installed or failed to load) \u2014 using lexical search");
2149
+ return lexicalSearch2();
2150
+ }
2151
+ log?.("semantic search: ready");
2152
+ return async (d, sid, query, queryOpts) => {
2153
+ const hits = await store.search(sid, query, queryOpts.limit);
2154
+ if (hits.length === 0) return lexical(d, sid, query, queryOpts);
2155
+ const byId = d.getNodesByIds(sid, hits.map((h) => h.nodeId));
2156
+ const results = [];
2157
+ for (const h of hits) {
2158
+ const node = byId.get(h.nodeId);
2159
+ if (!node) continue;
2160
+ if (queryOpts.types && queryOpts.types.length > 0 && !queryOpts.types.includes(node.type)) continue;
2161
+ results.push({ node, score: Math.max(0, 1 - h.distance / 2) });
2162
+ }
2163
+ return results.length > 0 ? results : lexical(d, sid, query, queryOpts);
2164
+ };
2165
+ }
2166
+
2167
+ // src/store/sqlite.ts
2168
+ var SqliteStoreBackend = class {
2169
+ constructor(db) {
2170
+ this.db = db;
2171
+ }
2172
+ upsertNode(org, node, identity, contributor) {
2173
+ return this.db.upsertCentralNode(org, node, identity, contributor);
2174
+ }
2175
+ insertEdge(org, edge) {
2176
+ this.db.insertCentralEdge(org, edge);
2177
+ }
2178
+ getSummary(org) {
2179
+ return this.db.getOrgSummary(org);
2180
+ }
2181
+ getContributors(globalId2) {
2182
+ return this.db.getContributorsByGlobalId(globalId2);
2183
+ }
2184
+ /**
2185
+ * No-op: the wrapped `CartographyDB` is owned by the caller (it is shared with the
2186
+ * read-side MCP server in server-mode), so the backend never closes it. The caller
2187
+ * closes the `CartographyDB` directly.
2188
+ */
2189
+ close() {
2190
+ }
2191
+ };
2192
+
2193
+ // src/central/ingest.ts
2194
+ import { z as z3 } from "zod";
2195
+
2196
+ // src/central/merge.ts
2197
+ function computeIdentity(org, node) {
2198
+ return {
2199
+ globalId: globalId(org, node.id),
2200
+ contentHash: contentHash(node.type, node.name, keyMetaOf(node.metadata ?? {}))
2201
+ };
2202
+ }
2203
+
2204
+ // src/central/anonymization.ts
2205
+ var LOOPBACK_IP = /\b127(?:\.\d{1,3}){3}\b/g;
2206
+ var FQDN = /\b(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}\b/gi;
2207
+ var POSIX_PATH2 = /(?:^|(?<=\s|=|:|"|'|\())(\/[A-Za-z0-9._-]+(?:\/[A-Za-z0-9._-]+)+)/g;
2208
+ var WIN_PATH2 = /\b[A-Za-z]:\\[A-Za-z0-9._\\-]+/g;
2209
+ var HOME_USER = /(?:\/home\/|\/Users\/|[A-Za-z]:\\Users\\)([A-Za-z0-9._-]+)/g;
2210
+ var BARE_INTERNAL_HOST = /^[a-z0-9]+(?:-[a-z0-9]+)+$|^[a-z]+\d+$|^\d+[a-z]+$/i;
2211
+ var ANON_TOKEN = /^anon:(?:host|user|path|ip):[a-z2-7]+$/;
2212
+ function violationsInString(s, path) {
2213
+ const out = [];
2214
+ const trimmed = s.trim();
2215
+ if (trimmed === "" || ANON_TOKEN.test(trimmed)) return out;
2216
+ if (PRIVATE_IP.test(s) || LOOPBACK_IP.test(s)) out.push({ path, kind: "private-ip" });
2217
+ PRIVATE_IP.lastIndex = 0;
2218
+ LOOPBACK_IP.lastIndex = 0;
2219
+ if (HOME_USER.test(s)) out.push({ path, kind: "username" });
2220
+ HOME_USER.lastIndex = 0;
2221
+ if (WIN_PATH2.test(s) || POSIX_PATH2.test(s)) out.push({ path, kind: "absolute-path" });
2222
+ WIN_PATH2.lastIndex = 0;
2223
+ POSIX_PATH2.lastIndex = 0;
2224
+ if (FQDN.test(s)) out.push({ path, kind: "hostname" });
2225
+ FQDN.lastIndex = 0;
2226
+ if (out.length === 0 && BARE_INTERNAL_HOST.test(trimmed)) out.push({ path, kind: "hostname" });
2227
+ return out;
2228
+ }
2229
+ function findAnonViolations(value, level, path = "") {
2230
+ if (level !== "anonymized") return [];
2231
+ return collect(value, path);
2232
+ }
2233
+ function collect(value, path) {
2234
+ if (typeof value === "string") return violationsInString(value, path || "(root)");
2235
+ if (Array.isArray(value)) return value.flatMap((v, i) => collect(v, `${path}[${i}]`));
2236
+ if (value && typeof value === "object") {
2237
+ return Object.entries(value).flatMap(([k, v]) => collect(v, path ? `${path}.${k}` : k));
2238
+ }
2239
+ return [];
2240
+ }
2241
+ function scrub(value, path) {
2242
+ if (typeof value === "string") return violationsInString(value, path || "(root)").length > 0 ? "***" : value;
2243
+ if (Array.isArray(value)) return value.map((v, i) => scrub(v, `${path}[${i}]`));
2244
+ if (value && typeof value === "object") {
2245
+ const out = {};
2246
+ for (const [k, v] of Object.entries(value)) out[k] = scrub(v, path ? `${path}.${k}` : k);
2247
+ return out;
2248
+ }
2249
+ return value;
2250
+ }
2251
+ function revalidateAnonymized(node, level, mode) {
2252
+ const violations = [
2253
+ ...findAnonViolations(node.name, level, "name"),
2254
+ ...findAnonViolations(node.id, level, "id"),
2255
+ ...findAnonViolations(node.metadata ?? {}, level, "metadata"),
2256
+ ...findAnonViolations(node.tags ?? [], level, "tags"),
2257
+ ...findAnonViolations(node.domain ?? "", level, "domain"),
2258
+ ...findAnonViolations(node.subDomain ?? "", level, "subDomain")
2259
+ ];
2260
+ if (violations.length === 0 || mode === "reject") return { node, violations };
2261
+ const scrubbed = {
2262
+ ...node,
2263
+ name: scrub(node.name, "name"),
2264
+ metadata: scrub(node.metadata ?? {}, "metadata"),
2265
+ tags: (node.tags ?? []).map((t, i) => scrub(t, `tags[${i}]`)),
2266
+ ...node.domain != null ? { domain: scrub(node.domain, "domain") } : {},
2267
+ ...node.subDomain != null ? { subDomain: scrub(node.subDomain, "subDomain") } : {}
2268
+ };
2269
+ return { node: scrubbed, violations };
2270
+ }
2271
+
2272
+ // src/central/ingest.ts
2273
+ var INGEST_SCHEMA_VERSION = 1;
2274
+ var MAX_ITEMS = 5e4;
2275
+ var ContributorSchema = z3.object({
2276
+ machineId: z3.string().min(1),
2277
+ hostname: z3.string().default("unknown"),
2278
+ user: z3.string().default("unknown"),
2279
+ confidence: z3.number().min(0).max(1).default(0.5)
2280
+ });
2281
+ var IngestEnvelopeSchema = z3.object({
2282
+ schemaVersion: z3.literal(INGEST_SCHEMA_VERSION),
2283
+ org: z3.string().min(1).optional(),
2284
+ items: z3.array(z3.object({
2285
+ contentHash: z3.string(),
2286
+ kind: z3.enum(["node", "edge"]),
2287
+ payload: z3.unknown()
2288
+ })).max(MAX_ITEMS),
2289
+ // Extensions (forward-compatible; 2.11 does not yet send these).
2290
+ contributor: ContributorSchema.optional(),
2291
+ anonymizationLevel: z3.enum(["none", "anonymized", "full"]).optional()
2292
+ });
2293
+ function ingestEnvelope(store, envelope, opts = {}) {
2294
+ const anonMode = opts.anonMode ?? "reject";
2295
+ const org = envelope.org ?? opts.defaultOrg ?? "local";
2296
+ const level = envelope.anonymizationLevel ?? "anonymized";
2297
+ const at = (/* @__PURE__ */ new Date()).toISOString();
2298
+ const contributor = {
2299
+ machineId: envelope.contributor?.machineId ?? "unknown",
2300
+ hostname: envelope.contributor?.hostname ?? "unknown",
2301
+ user: envelope.contributor?.user ?? "unknown",
2302
+ organization: org,
2303
+ at,
2304
+ confidence: envelope.contributor?.confidence ?? 0.5
2305
+ };
2306
+ let accepted = 0;
2307
+ let merged = 0;
2308
+ let rejected = 0;
2309
+ let edges = 0;
2310
+ let violations = 0;
2311
+ const acceptedNodeIds = /* @__PURE__ */ new Set();
2312
+ for (const item of envelope.items) {
2313
+ if (item.kind !== "node") continue;
2314
+ const parsed = NodeSchema.safeParse(item.payload);
2315
+ if (!parsed.success) {
2316
+ rejected += 1;
2317
+ logWarn("ingest: dropped malformed node payload", { org, contentHash: item.contentHash });
2318
+ continue;
2319
+ }
2320
+ const node = parsed.data;
2321
+ const check = revalidateAnonymized(node, level, anonMode);
2322
+ if (check.violations.length > 0) {
2323
+ violations += check.violations.length;
2324
+ const idViolation = check.violations.some((v) => v.path === "id");
2325
+ if (anonMode === "reject" || idViolation) {
2326
+ rejected += 1;
2327
+ logWarn("ingest: rejected node with un-anonymized fragments", {
2328
+ org,
2329
+ nodeId: node.id,
2330
+ action: "reject",
2331
+ mode: anonMode,
2332
+ idViolation,
2333
+ kinds: check.violations.map((v) => `${v.path}:${v.kind}`)
2334
+ });
2335
+ continue;
2336
+ }
2337
+ logWarn("ingest: scrubbed un-anonymized fragments from node", {
2338
+ org,
2339
+ nodeId: node.id,
2340
+ action: "strip",
2341
+ kinds: check.violations.map((v) => `${v.path}:${v.kind}`)
2342
+ });
2343
+ }
2344
+ const safe = check.node;
2345
+ const identity = computeIdentity(org, safe);
2346
+ const outcome = store.upsertNode(org, safe, identity, { ...contributor, confidence: safe.confidence });
2347
+ accepted += 1;
2348
+ if (outcome === "merged") merged += 1;
2349
+ acceptedNodeIds.add(safe.id);
2350
+ }
2351
+ for (const item of envelope.items) {
2352
+ if (item.kind !== "edge") continue;
2353
+ const parsed = EdgeSchema.safeParse(item.payload);
2354
+ if (!parsed.success) {
2355
+ logWarn("ingest: dropped malformed edge payload", { org, contentHash: item.contentHash });
2356
+ continue;
2357
+ }
2358
+ const edge = parsed.data;
2359
+ if (acceptedNodeIds.size > 0 && (!acceptedNodeIds.has(edge.sourceId) || !acceptedNodeIds.has(edge.targetId))) {
2360
+ continue;
2361
+ }
2362
+ store.insertEdge(org, edge);
2363
+ edges += 1;
2364
+ }
2365
+ logInfo("ingest", { org, accepted, merged, rejected, edges, violations, level, anonMode });
2366
+ return { org, accepted, merged, rejected, edges, violations };
2367
+ }
2368
+
2369
+ // src/central/server.ts
2370
+ function createIngestHandler(store, opts = {}) {
2371
+ return (body) => {
2372
+ const parsed = IngestEnvelopeSchema.safeParse(body);
2373
+ if (!parsed.success) {
2374
+ const issues = parsed.error.issues.map((i) => `${i.path.join(".") || "(root)"}: ${i.message}`);
2375
+ logWarn("ingest: rejected invalid envelope", { issues });
2376
+ return { status: 400, body: { error: "invalid envelope", issues } };
2377
+ }
2378
+ try {
2379
+ const result = ingestEnvelope(store, parsed.data, opts);
2380
+ return { status: 200, body: result };
2381
+ } catch (err) {
2382
+ logWarn("ingest: failed", { error: err instanceof Error ? err.message : String(err) });
2383
+ return { status: 500, body: { error: "ingest failed" } };
2384
+ }
2385
+ };
2386
+ }
2387
+
2388
+ // src/mcp/start.ts
2389
+ function parseMcpArgs(argv) {
2390
+ const opts = {};
2391
+ for (let i = 0; i < argv.length; i++) {
2392
+ const a = argv[i];
2393
+ if (a === "--http") opts.transport = "http";
2394
+ else if (a === "--stdio") opts.transport = "stdio";
2395
+ else if (a === "--no-semantic") opts.semantic = false;
2396
+ else if (a === "--port") opts.port = Number(argv[++i]);
2397
+ else if (a === "--host") opts.host = argv[++i];
2398
+ else if (a === "--allowed-hosts") opts.allowedHosts = (argv[++i] ?? "").split(",").map((h) => h.trim()).filter(Boolean);
2399
+ else if (a === "--token") opts.token = argv[++i];
2400
+ else if (a === "--db") opts.dbPath = argv[++i];
2401
+ else if (a === "--session") opts.session = argv[++i];
2402
+ else if (a === "--tenant" || a === "--org") opts.tenant = argv[++i];
2403
+ else if (a === "--plugins") opts.plugins = (argv[++i] ?? "").split(",").map((p) => p.trim()).filter(Boolean);
2404
+ else if (a === "--server-mode") opts.serverMode = true;
2405
+ else if (a === "--anon-mode") {
2406
+ const m = argv[++i];
2407
+ if (m === "reject" || m === "strip") opts.anonMode = m;
2408
+ } else if (a === "--help" || a === "-h") opts.help = true;
2409
+ }
2410
+ return opts;
2411
+ }
2412
+ async function startMcp(opts = {}) {
2413
+ const log = opts.log ?? ((m) => process.stderr.write(m + "\n"));
2414
+ const db = new CartographyDB(opts.dbPath ?? defaultConfig().dbPath);
2415
+ let search;
2416
+ if (opts.semantic !== false) {
2417
+ search = await createSemanticSearch(db, void 0, { log });
2418
+ }
2419
+ const plugins = opts.plugins ?? (process.env["CARTOGRAPHY_PLUGINS"] ? process.env["CARTOGRAPHY_PLUGINS"].split(",").map((p) => p.trim()).filter(Boolean) : []);
2420
+ const discovery = localDiscoveryFn(void 0, plugins);
2421
+ const tenant = normalizeTenant(opts.tenant);
2422
+ const serverMode = opts.serverMode === true;
2423
+ const org = serverMode ? tenant : void 0;
2424
+ const factory = () => createMcpServer({ db, session: opts.session ?? "latest", tenant, search, discovery, ...org !== void 0 ? { org } : {} });
2425
+ const transport = serverMode ? "http" : opts.transport;
2426
+ if (transport === "http") {
2427
+ const port = opts.port ?? 3737;
2428
+ const host = opts.host ?? "127.0.0.1";
2429
+ const token = opts.token ?? process.env["CARTOGRAPHY_HTTP_TOKEN"] ?? process.env["CARTOGRAPHY_CENTRAL_TOKEN"];
2430
+ let onIngest;
2431
+ if (serverMode) {
2432
+ const store = new SqliteStoreBackend(db);
2433
+ const anonMode = opts.anonMode ?? "reject";
2434
+ onIngest = createIngestHandler(store, { anonMode, defaultOrg: tenant });
2435
+ }
2436
+ await runHttp(factory, {
2437
+ port,
2438
+ host,
2439
+ ...opts.allowedHosts ? { allowedHosts: opts.allowedHosts } : {},
2440
+ ...token ? { token } : {},
2441
+ ...onIngest ? { onIngest } : {}
2442
+ });
2443
+ const modeNote = serverMode ? ` [central collector: POST /ingest enabled, anon-mode=${opts.anonMode ?? "reject"}]` : "";
2444
+ log(`Cartography MCP server (Streamable HTTP) on http://${host}:${port}/mcp${token ? " (auth: bearer token required)" : ""} (tenant: ${tenant})${modeNote}`);
2445
+ } else {
2446
+ log(`Cartography MCP server (stdio) ready (tenant: ${tenant})`);
2447
+ await runStdio(factory());
2448
+ }
2449
+ }
2450
+
2451
+ export {
2452
+ isPersonalHost,
2453
+ runLocalDiscovery,
2454
+ getRuleset,
2455
+ listRulesets,
2456
+ runDrift,
2457
+ loadOrgKey,
2458
+ rotateOrgKey,
2459
+ pseudonymizeString,
2460
+ pseudonymize,
2461
+ reversePseudonym,
2462
+ parseMcpArgs,
2463
+ startMcp
2464
+ };
2465
+ //# sourceMappingURL=chunk-B2AKONVW.js.map