@datasynx/agentic-ai-cartography 2.0.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/AGENTS.md +32 -0
  2. package/README.md +115 -6
  3. package/dist/api-bin.js +24 -0
  4. package/dist/api-bin.js.map +1 -0
  5. package/dist/{bookmarks-VS56KVCO.js → bookmarks-WXHE7GN7.js} +6 -3
  6. package/dist/{chunk-CJ2PITFA.js → chunk-2SZ5QHGH.js} +71 -9
  7. package/dist/chunk-2SZ5QHGH.js.map +1 -0
  8. package/dist/chunk-7QEBFMN4.js +3278 -0
  9. package/dist/chunk-7QEBFMN4.js.map +1 -0
  10. package/dist/chunk-7VZH5PFV.js +1134 -0
  11. package/dist/chunk-7VZH5PFV.js.map +1 -0
  12. package/dist/chunk-B2AKONVW.js +2465 -0
  13. package/dist/chunk-B2AKONVW.js.map +1 -0
  14. package/dist/chunk-WCR47QA2.js +277 -0
  15. package/dist/chunk-WCR47QA2.js.map +1 -0
  16. package/dist/cli.js +2367 -663
  17. package/dist/cli.js.map +1 -1
  18. package/dist/index.cjs +9405 -57913
  19. package/dist/index.cjs.map +1 -1
  20. package/dist/index.d.cts +3048 -69
  21. package/dist/index.d.ts +3048 -69
  22. package/dist/index.js +9150 -2607
  23. package/dist/index.js.map +1 -1
  24. package/dist/mcp-bin.js +17 -26
  25. package/dist/mcp-bin.js.map +1 -1
  26. package/dist/types-TJWXAQ2L.js +66 -0
  27. package/llms-full.txt +758 -0
  28. package/llms.txt +24 -0
  29. package/package.json +27 -9
  30. package/scripts/build-llms.mjs +89 -0
  31. package/scripts/build-mcpb.mjs +31 -0
  32. package/scripts/gen-api-schemas.ts +29 -0
  33. package/scripts/gen-docs.ts +123 -0
  34. package/scripts/sync-version.mjs +51 -0
  35. package/scripts/validate-server-json.mjs +54 -0
  36. package/server.json +4 -4
  37. package/dist/chunk-CJ2PITFA.js.map +0 -1
  38. package/dist/chunk-D6SRSLBF.js +0 -48
  39. package/dist/chunk-J6FDZ6HZ.js +0 -142
  40. package/dist/chunk-J6FDZ6HZ.js.map +0 -1
  41. package/dist/chunk-UGSNG3QJ.js +0 -49
  42. package/dist/chunk-UGSNG3QJ.js.map +0 -1
  43. package/dist/chunk-W7YE6AAH.js +0 -1516
  44. package/dist/chunk-W7YE6AAH.js.map +0 -1
  45. package/dist/onnxruntime_binding-6Q6HXASN.node +0 -0
  46. package/dist/onnxruntime_binding-EKZT2NRK.node +0 -0
  47. package/dist/onnxruntime_binding-P6S7V3CI.node +0 -0
  48. package/dist/onnxruntime_binding-PJNNIIUO.node +0 -0
  49. package/dist/onnxruntime_binding-UN6SPTQK.node +0 -0
  50. package/dist/sdk-A6NLO3DJ.js +0 -12294
  51. package/dist/sdk-A6NLO3DJ.js.map +0 -1
  52. package/dist/sdk-G5D4WQZ4.js +0 -12293
  53. package/dist/sdk-G5D4WQZ4.js.map +0 -1
  54. package/dist/sdk-QSTAREST.js +0 -4869
  55. package/dist/sdk-QSTAREST.js.map +0 -1
  56. package/dist/sqlite-vec-EZN67B2V.js +0 -40
  57. package/dist/sqlite-vec-EZN67B2V.js.map +0 -1
  58. package/dist/sqlite-vec-UK5YYE5T.js +0 -39
  59. package/dist/sqlite-vec-UK5YYE5T.js.map +0 -1
  60. package/dist/transformers.node-BTYUTJK5.js +0 -42884
  61. package/dist/transformers.node-BTYUTJK5.js.map +0 -1
  62. package/dist/transformers.node-J6PRTTOX.js +0 -42883
  63. package/dist/transformers.node-J6PRTTOX.js.map +0 -1
  64. package/dist/types-JG27FR3E.js +0 -29
  65. package/dist/types-JG27FR3E.js.map +0 -1
  66. package/scripts/postinstall.mjs +0 -7
  67. /package/dist/{bookmarks-VS56KVCO.js.map → bookmarks-WXHE7GN7.js.map} +0 -0
  68. /package/dist/{chunk-D6SRSLBF.js.map → types-TJWXAQ2L.js.map} +0 -0
package/dist/index.d.cts CHANGED
@@ -1,9 +1,51 @@
1
1
  import Database from 'better-sqlite3';
2
2
  import { z } from 'zod';
3
3
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
4
- import http from 'node:http';
4
+ import http, { IncomingMessage, Server } from 'node:http';
5
5
  import { McpServerConfig, HookCallback } from '@anthropic-ai/claude-agent-sdk';
6
6
 
7
+ /**
8
+ * Topology diffing — pure, deterministic comparison of two discovery snapshots.
9
+ *
10
+ * This module knows nothing about the database; it operates on plain
11
+ * node/edge arrays so it can be unit-tested in isolation and reused by the
12
+ * CLI, the MCP server, and the exporter. Drift is detected on a stable
13
+ * projection of node fields (see DRIFT_FIELDS); `confidence` is reported but
14
+ * never on its own marks a node as changed.
15
+ */
16
+
17
+ /** Deterministic JSON serialization with recursively sorted object keys. */
18
+ declare function stableStringify(value: unknown): string;
19
+ interface TopologyInput {
20
+ nodes: NodeRow[];
21
+ edges: EdgeRow[];
22
+ }
23
+ interface TopologyDelta {
24
+ nodes: {
25
+ added: NodeRow[];
26
+ removed: NodeRow[];
27
+ changed: NodeChange[];
28
+ unchanged: number;
29
+ };
30
+ edges: {
31
+ added: EdgeRow[];
32
+ removed: EdgeRow[];
33
+ unchanged: number;
34
+ };
35
+ summary: {
36
+ nodesAdded: number;
37
+ nodesRemoved: number;
38
+ nodesChanged: number;
39
+ edgesAdded: number;
40
+ edgesRemoved: number;
41
+ };
42
+ }
43
+ /**
44
+ * Compute the delta from `base` to `current`. Nodes are keyed by `id`, edges by
45
+ * (source, target, relationship). Pure: same inputs always yield the same output.
46
+ */
47
+ declare function diffTopology(base: TopologyInput, current: TopologyInput): TopologyDelta;
48
+
7
49
  declare const NODE_TYPES: readonly ["host", "database_server", "database", "table", "web_service", "api_endpoint", "cache_server", "message_broker", "queue", "topic", "container", "pod", "k8s_cluster", "config_file", "saas_tool", "unknown"];
8
50
  type NodeType = typeof NODE_TYPES[number];
9
51
  /**
@@ -21,6 +63,22 @@ declare const NODE_TYPE_GROUPS: {
21
63
  };
22
64
  declare const EDGE_RELATIONSHIPS: readonly ["connects_to", "reads_from", "writes_to", "calls", "contains", "depends_on"];
23
65
  type EdgeRelationship = typeof EDGE_RELATIONSHIPS[number];
66
+ /** Billing period the amount covers. Cross-period rollup is bucketed, never normalized (5.1). */
67
+ declare const COST_PERIODS: readonly ["hourly", "daily", "monthly", "yearly"];
68
+ type CostPeriod = typeof COST_PERIODS[number];
69
+ /** Attributed cost for one billing period (3.3). Amount is in `currency` per `period`. */
70
+ declare const CostEntrySchema: z.ZodObject<{
71
+ amount: z.ZodNumber;
72
+ currency: z.ZodString;
73
+ period: z.ZodEnum<{
74
+ hourly: "hourly";
75
+ daily: "daily";
76
+ monthly: "monthly";
77
+ yearly: "yearly";
78
+ }>;
79
+ source: z.ZodOptional<z.ZodString>;
80
+ }, z.core.$strip>;
81
+ type CostEntry = z.infer<typeof CostEntrySchema>;
24
82
  declare const NodeSchema: z.ZodObject<{
25
83
  id: z.ZodString;
26
84
  type: z.ZodEnum<{
@@ -49,6 +107,18 @@ declare const NodeSchema: z.ZodObject<{
49
107
  domain: z.ZodOptional<z.ZodString>;
50
108
  subDomain: z.ZodOptional<z.ZodString>;
51
109
  qualityScore: z.ZodOptional<z.ZodNumber>;
110
+ owner: z.ZodOptional<z.ZodString>;
111
+ cost: z.ZodOptional<z.ZodObject<{
112
+ amount: z.ZodNumber;
113
+ currency: z.ZodString;
114
+ period: z.ZodEnum<{
115
+ hourly: "hourly";
116
+ daily: "daily";
117
+ monthly: "monthly";
118
+ yearly: "yearly";
119
+ }>;
120
+ source: z.ZodOptional<z.ZodString>;
121
+ }, z.core.$strip>>;
52
122
  }, z.core.$strip>;
53
123
  type DiscoveryNode = z.infer<typeof NodeSchema>;
54
124
  declare const EdgeSchema: z.ZodObject<{
@@ -66,6 +136,32 @@ declare const EdgeSchema: z.ZodObject<{
66
136
  confidence: z.ZodDefault<z.ZodNumber>;
67
137
  }, z.core.$strip>;
68
138
  type DiscoveryEdge = z.infer<typeof EdgeSchema>;
139
+ /**
140
+ * Per-employee sharing levels, ordered most-private → least-private:
141
+ * - `none` — share nothing (the opt-in default; nothing leaves the machine).
142
+ * - `anonymized` — pseudonymize identifying fields (host/user/path/private-IP) via
143
+ * an org-keyed, admin-reversible HMAC while preserving topology shape.
144
+ * - `full` — share the raw record verbatim.
145
+ */
146
+ declare const SHARING_LEVELS: readonly ["none", "anonymized", "full"];
147
+ declare const SharingLevelSchema: z.ZodEnum<{
148
+ none: "none";
149
+ anonymized: "anonymized";
150
+ full: "full";
151
+ }>;
152
+ type SharingLevel = typeof SHARING_LEVELS[number];
153
+ /**
154
+ * A resolved sharing policy: the global `defaultLevel` (the `'*'` row) plus
155
+ * remembered pattern overrides (glob over the node id). The most-specific
156
+ * matching override wins; the default applies when nothing matches.
157
+ */
158
+ interface SharingPolicy {
159
+ defaultLevel: SharingLevel;
160
+ overrides: {
161
+ pattern: string;
162
+ level: SharingLevel;
163
+ }[];
164
+ }
69
165
  declare const DataAssetSchema: z.ZodObject<{
70
166
  id: z.ZodString;
71
167
  name: z.ZodString;
@@ -116,6 +212,13 @@ interface NodeRow extends DiscoveryNode {
116
212
  discoveredAt: string;
117
213
  depth: number;
118
214
  pathId?: string;
215
+ /**
216
+ * Org-scoped human-readable global identity (`{tenant}:{normalizedId}`); the
217
+ * same logical resource collapses to one `globalId` across machines (2.9).
218
+ */
219
+ globalId?: string;
220
+ /** Secondary dedup key (sha256 over type + name + key-meta) that catches `id` drift between machines (2.9). */
221
+ contentHash?: string;
119
222
  }
120
223
  interface EdgeRow extends DiscoveryEdge {
121
224
  id: string;
@@ -129,19 +232,681 @@ interface SessionRow {
129
232
  startedAt: string;
130
233
  completedAt?: string;
131
234
  config: string;
235
+ /** Human-friendly, deterministically-derived label (e.g. "infra+data · 42 nodes · 2026-06-11"). */
236
+ name?: string;
237
+ /** Tenant/organization partition this session belongs to. Defaults to `'local'`. */
238
+ tenant: string;
239
+ /**
240
+ * Source attribution captured at session creation (2.9). Local-only — these
241
+ * identifying fields never leave the machine; off-machine sharing (2.11) and
242
+ * anonymization/consent (2.10) are deferred.
243
+ */
244
+ hostname?: string;
245
+ user?: string;
246
+ machineId?: string;
247
+ /**
248
+ * Raw `--org` / `config.organization` value as supplied (provenance). The
249
+ * normalized form is {@link tenant} — the org-scope partition introduced by 2.8.
250
+ */
251
+ organization?: string;
252
+ /**
253
+ * ISO 8601 UTC timestamp of the last in-place rescan of this session (2.1).
254
+ * `undefined`/NULL until the session is rescanned via incremental discovery —
255
+ * the freshness signal for scheduled discovery (2.5) to build on.
256
+ */
257
+ lastScannedAt?: string;
258
+ }
259
+ /**
260
+ * One observation of a logical node from a single machine. Accumulated in the
261
+ * `node_contributors` table (keyed by `(global_id, machine_id)`); never anonymized
262
+ * in 2.9 (that is 2.10) and never transmitted off-machine in 2.9 (that is 2.11).
263
+ */
264
+ interface Contributor {
265
+ machineId: string;
266
+ hostname: string;
267
+ user: string;
268
+ /** Effective org-scope of the contribution (the session's tenant). */
269
+ organization?: string;
270
+ /** ISO 8601 UTC timestamp of the contributing observation. */
271
+ at: string;
272
+ /** Confidence of the observation that produced this contribution (0–1). */
273
+ confidence: number;
274
+ }
275
+ /**
276
+ * Node fields whose change marks a node as `changed` in a topology diff.
277
+ * `confidence` is deliberately excluded — it fluctuates between scans (noise)
278
+ * and is reported separately as `confidenceDelta` rather than triggering drift.
279
+ */
280
+ declare const DRIFT_FIELDS: readonly ["type", "name", "domain", "subDomain", "qualityScore", "metadata", "tags", "owner", "cost"];
281
+ type DriftField = typeof DRIFT_FIELDS[number];
282
+ interface NodeChange {
283
+ id: string;
284
+ before: NodeRow;
285
+ after: NodeRow;
286
+ /** Which of DRIFT_FIELDS differ between `before` and `after`. */
287
+ changedFields: DriftField[];
288
+ /** Informational confidence delta (after − before); does not itself trigger drift. */
289
+ confidenceDelta: number;
290
+ }
291
+ declare const ANOMALY_KINDS: readonly ["orphan", "shadow-it"];
292
+ type AnomalyKind = typeof ANOMALY_KINDS[number];
293
+ declare const ANOMALY_SEVERITIES: readonly ["low", "medium", "high"];
294
+ type AnomalySeverity = typeof ANOMALY_SEVERITIES[number];
295
+ /** A standing structural anomaly within a single topology snapshot. Deterministic. */
296
+ interface Anomaly {
297
+ /** The flagged node, structured id "{type}:{id}" — never raw free-text. */
298
+ nodeId: string;
299
+ kind: AnomalyKind;
300
+ severity: AnomalySeverity;
301
+ /** Stable, human-readable explanation built only from nodeId + numeric scores. */
302
+ reason: string;
303
+ }
304
+ /** Resolved anomaly thresholds (defaults in `DEFAULT_ANOMALY_THRESHOLDS` unless overridden by config). */
305
+ interface AnomalyThresholds {
306
+ /** Degree at or below which a node is a weak-link orphan candidate (0 = isolated). */
307
+ orphanWeakDegree: number;
308
+ /** Confidence (0–1) below which an undomained node is shadow-IT. */
309
+ shadowConfidence: number;
310
+ /** qualityScore (0–100) below which an undomained node is shadow-IT. */
311
+ shadowQuality: number;
312
+ }
313
+ interface AnomalyConfig extends AnomalyThresholds {
314
+ /** When false, the engine short-circuits to an empty array (rollback flag). */
315
+ enabled: boolean;
316
+ }
317
+ /**
318
+ * Default anomaly thresholds. Defined here (not in `anomaly.ts`) so `defaultConfig`
319
+ * can reference them without a runtime cycle; `anomaly.ts` re-exports this constant.
320
+ */
321
+ declare const DEFAULT_ANOMALY_THRESHOLDS: AnomalyThresholds;
322
+ interface TopologyDiff {
323
+ base: {
324
+ sessionId: string;
325
+ startedAt: string;
326
+ nodeCount: number;
327
+ edgeCount: number;
328
+ };
329
+ current: {
330
+ sessionId: string;
331
+ startedAt: string;
332
+ nodeCount: number;
333
+ edgeCount: number;
334
+ };
335
+ nodes: {
336
+ added: NodeRow[];
337
+ removed: NodeRow[];
338
+ changed: NodeChange[];
339
+ unchanged: number;
340
+ };
341
+ edges: {
342
+ added: EdgeRow[];
343
+ removed: EdgeRow[];
344
+ unchanged: number;
345
+ };
346
+ summary: {
347
+ nodesAdded: number;
348
+ nodesRemoved: number;
349
+ nodesChanged: number;
350
+ edgesAdded: number;
351
+ edgesRemoved: number;
352
+ };
353
+ /** Standing anomalies in base vs current, plus those newly appearing in current (3.6). */
354
+ anomalies: {
355
+ base: Anomaly[];
356
+ current: Anomaly[];
357
+ added: Anomaly[];
358
+ };
359
+ }
360
+ /** Severity rank, ascending. `maxSeverity` and threshold filtering rely on this order. */
361
+ declare const SEVERITIES: readonly ["info", "warning", "critical"];
362
+ type Severity$1 = typeof SEVERITIES[number];
363
+ /**
364
+ * Free-form metadata keys (case-insensitive) whose change escalates a node-changed
365
+ * item to `critical`. Security-/exposure-relevant signals live only in the
366
+ * free-form `metadata` blob (there are no first-class security node fields).
367
+ */
368
+ declare const SECURITY_METADATA_KEYS: readonly ["publicexposure", "public", "exposed", "iamrole", "role", "encryption", "encrypted", "tls", "tlsenabled", "ports", "openports", "auth", "authentication"];
369
+ type DriftItemKind = 'node-added' | 'node-removed' | 'node-changed' | 'edge-added' | 'edge-removed';
370
+ interface DriftAlertItem {
371
+ kind: DriftItemKind;
372
+ /** Node id, or "source -rel-> target" for edges. */
373
+ ref: string;
374
+ /** Human-readable node/edge name for display. */
375
+ label: string;
376
+ nodeType?: NodeType;
377
+ severity: Severity$1;
378
+ /** Present for node-changed; subset of DRIFT_FIELDS that differ. */
379
+ changedFields?: DriftField[];
380
+ /** Present for node-changed; metadata keys that triggered escalation. */
381
+ securityFields?: string[];
382
+ }
383
+ interface DriftAlert {
384
+ base: TopologyDiff['base'];
385
+ current: TopologyDiff['current'];
386
+ summary: TopologyDiff['summary'];
387
+ /** Overall severity = max severity across items (info when no items). */
388
+ severity: Severity$1;
389
+ items: DriftAlertItem[];
390
+ /** ISO-8601 UTC generation time. */
391
+ generatedAt: string;
392
+ }
393
+ /** One configured drift sink. `url` is required when `type === 'webhook'`. */
394
+ interface DriftSinkConfig {
395
+ type: 'stdout' | 'webhook';
396
+ /** Required when type === 'webhook'. */
397
+ url?: string;
398
+ /** Optional bearer token; falls back to CARTOGRAPHY_DRIFT_TOKEN. */
399
+ token?: string;
400
+ timeoutMs?: number;
401
+ }
402
+ /**
403
+ * Opt-in drift-alerting block on {@link CartographyConfig}. Absent → the runner
404
+ * defaults to a single `stdout` sink at `minSeverity: 'info'` (everything stays
405
+ * local; no outbound traffic unless a `webhook` sink is explicitly configured).
406
+ */
407
+ interface DriftConfig {
408
+ /** Items below this severity are dropped before dispatch. Default 'info'. */
409
+ minSeverity: Severity$1;
410
+ sinks: DriftSinkConfig[];
411
+ }
412
+ /** Validate an externally-supplied drift block (CLI/env/future file loader). */
413
+ declare const DriftConfigSchema: z.ZodObject<{
414
+ minSeverity: z.ZodDefault<z.ZodEnum<{
415
+ info: "info";
416
+ warning: "warning";
417
+ critical: "critical";
418
+ }>>;
419
+ sinks: z.ZodDefault<z.ZodArray<z.ZodObject<{
420
+ type: z.ZodEnum<{
421
+ stdout: "stdout";
422
+ webhook: "webhook";
423
+ }>;
424
+ url: z.ZodOptional<z.ZodString>;
425
+ token: z.ZodOptional<z.ZodString>;
426
+ timeoutMs: z.ZodOptional<z.ZodNumber>;
427
+ }, z.core.$strip>>>;
428
+ }, z.core.$strip>;
429
+ /** Machine-readable result formats shared by `discover` (#67) and `schedule`. */
430
+ declare const OUTPUT_FORMATS: readonly ["text", "json", "stream-json"];
431
+ type OutputFormat = typeof OUTPUT_FORMATS[number];
432
+ /**
433
+ * A recurring-discovery schedule, read from a JSON config file. The `cron`
434
+ * string is a 5-field expression (min hour dom month dow) validated by
435
+ * `parseCron` in `schedule.ts`; the Zod schema only enforces non-emptiness so
436
+ * the config layer stays decoupled from the cron grammar.
437
+ */
438
+ declare const ScheduleConfigSchema: z.ZodObject<{
439
+ cron: z.ZodString;
440
+ entryPoints: z.ZodOptional<z.ZodArray<z.ZodString>>;
441
+ outputFormat: z.ZodDefault<z.ZodEnum<{
442
+ text: "text";
443
+ json: "json";
444
+ "stream-json": "stream-json";
445
+ }>>;
446
+ dbPath: z.ZodOptional<z.ZodString>;
447
+ }, z.core.$strict>;
448
+ type ScheduleConfig = z.infer<typeof ScheduleConfigSchema>;
449
+ /**
450
+ * Outbound central-DB connection (2.11). The first egress path Cartograph has:
451
+ * after a scan, consented, policy-transformed deltas are pushed to this ingest
452
+ * endpoint over bearer-auth HTTPS. Presence of `url` *is* the feature flag — when
453
+ * absent the entire sync pipeline short-circuits and nothing ever networks.
454
+ *
455
+ * `.strict()` so a typo'd key in `config.json` fails loudly. The `token` is an
456
+ * opaque secret (never logged, never serialized into a payload); `org` is forwarded
457
+ * as a header so the central side (2.12) can scope ingest by tenant.
458
+ */
459
+ declare const CentralDbConfigSchema: z.ZodObject<{
460
+ url: z.ZodString;
461
+ token: z.ZodString;
462
+ org: z.ZodOptional<z.ZodString>;
463
+ batchSize: z.ZodOptional<z.ZodNumber>;
464
+ }, z.core.$strict>;
465
+ type CentralDbConfig = z.infer<typeof CentralDbConfigSchema>;
466
+ /**
467
+ * Read a {@link CentralDbConfig} from environment variables
468
+ * (`CARTOGRAPHY_CENTRAL_URL`/`_TOKEN`/`_ORG`), letting CI / secret-managers inject
469
+ * the token without a file. Returns a partial — only the keys actually present —
470
+ * so it composes field-wise over a `config.json` block. Never throws.
471
+ */
472
+ declare function centralDbFromEnv(env?: NodeJS.ProcessEnv): Partial<CentralDbConfig>;
473
+ /**
474
+ * Lifecycle status of one queued share item (2.11):
475
+ * - `pending` — new/unmatched, awaiting the employee's explicit review.
476
+ * - `approved` — cleared to leave (by `sync review`, or auto by a remembered rule).
477
+ * - `shared` — successfully pushed to the central ingest endpoint.
478
+ * - `withheld` — explicitly suppressed; never leaves.
479
+ *
480
+ * The load-bearing privacy invariant: only `approved` rows are ever pushed.
481
+ */
482
+ declare const PENDING_STATUSES: readonly ["pending", "approved", "shared", "withheld"];
483
+ type PendingStatus = typeof PENDING_STATUSES[number];
484
+ /**
485
+ * One row of the `pending_shares` review queue (2.11). `payload` is the *already
486
+ * policy-transformed* (anonymized/dropped) projection from `previewShare` — never
487
+ * raw node data for `anonymized`/`none` items — so what is queued is exactly what
488
+ * may leave. Keyed by `contentHash` (a hash of that transformed payload).
489
+ */
490
+ interface PendingShareRow {
491
+ contentHash: string;
492
+ sessionId: string;
493
+ nodeId?: string;
494
+ kind: 'node' | 'edge';
495
+ /** Policy-transformed payload (the exact bytes a push would send). */
496
+ payload: unknown;
497
+ status: PendingStatus;
498
+ /** Who decided: `'user'` (interactive review) or `'rule'` (remembered policy). */
499
+ decidedBy?: 'user' | 'rule';
500
+ createdAt: string;
501
+ decidedAt?: string;
502
+ sharedAt?: string;
503
+ }
504
+ /**
505
+ * Top-level shape of a `cartography.config.json` file. `.strict()` rejects
506
+ * unknown keys so typos fail loudly rather than being silently ignored. WS 2.11
507
+ * (central-org sync) extends this same schema with a `centralDb` block.
508
+ */
509
+ declare const ConfigFileSchema: z.ZodObject<{
510
+ schedule: z.ZodOptional<z.ZodObject<{
511
+ cron: z.ZodString;
512
+ entryPoints: z.ZodOptional<z.ZodArray<z.ZodString>>;
513
+ outputFormat: z.ZodDefault<z.ZodEnum<{
514
+ text: "text";
515
+ json: "json";
516
+ "stream-json": "stream-json";
517
+ }>>;
518
+ dbPath: z.ZodOptional<z.ZodString>;
519
+ }, z.core.$strict>>;
520
+ entryPoints: z.ZodOptional<z.ZodArray<z.ZodString>>;
521
+ dbPath: z.ZodOptional<z.ZodString>;
522
+ organization: z.ZodOptional<z.ZodString>;
523
+ centralDb: z.ZodOptional<z.ZodObject<{
524
+ url: z.ZodString;
525
+ token: z.ZodString;
526
+ org: z.ZodOptional<z.ZodString>;
527
+ batchSize: z.ZodOptional<z.ZodNumber>;
528
+ }, z.core.$strict>>;
529
+ }, z.core.$strict>;
530
+ type ConfigFile = z.infer<typeof ConfigFileSchema>;
531
+ /**
532
+ * One persisted scheduled-discovery run (2.5). Records what changed between this
533
+ * run's session and the prior one, with the full {@link TopologyDelta} for audit
534
+ * and the summary counts for fast querying. `baseSessionId` is `undefined` on the
535
+ * very first run (no prior topology — everything is `added`).
536
+ */
537
+ interface DriftRunRow {
538
+ id: string;
539
+ sessionId: string;
540
+ baseSessionId?: string;
541
+ /** ISO 8601 UTC timestamp this run was recorded. */
542
+ ranAt: string;
543
+ summary: {
544
+ nodesAdded: number;
545
+ nodesRemoved: number;
546
+ nodesChanged: number;
547
+ edgesAdded: number;
548
+ edgesRemoved: number;
549
+ };
550
+ delta: TopologyDelta;
132
551
  }
552
+ /**
553
+ * Agent backend selectable via `--provider` / `CARTOGRAPHY_PROVIDER`. Defined here
554
+ * (in the dependency-free types module) and re-exported from `providers/types.ts`
555
+ * so `defaultConfig` can reference it without a runtime cycle.
556
+ */
557
+ type ProviderName = 'claude' | 'openai' | 'ollama';
133
558
  interface CartographyConfig {
134
559
  maxDepth: number;
135
560
  maxTurns: number;
136
561
  entryPoints: string[];
562
+ /** Agent backend. Defaults to `'claude'`; selected by `--provider` / `CARTOGRAPHY_PROVIDER`. */
563
+ provider: ProviderName;
564
+ /** Lead/discovery model. Back-compat alias for `models.lead` (kept in sync by defaultConfig). */
137
565
  agentModel: string;
566
+ /** Model roles: `lead` drives discovery, `fast` powers cheaper helper tasks (e.g. chat). */
567
+ models: {
568
+ lead: string;
569
+ fast: string;
570
+ };
138
571
  organization?: string;
139
572
  outputDir: string;
140
573
  dbPath: string;
141
574
  verbose: boolean;
575
+ /** Max characters of a single scan-tool response returned to the agent (guards the context window). */
576
+ maxToolResponseBytes: number;
577
+ /** Explicit allowlist of scanner plugin package names to load (opt-in / consent-first). Default `[]`. */
578
+ plugins: string[];
579
+ /**
580
+ * Optional recurring-discovery schedule (2.5), populated from a config file by
581
+ * `loadConfig`. `undefined` for every existing/CLI caller — additive only.
582
+ */
583
+ schedule?: ScheduleConfig;
584
+ /**
585
+ * Optional central-DB outbound sync target (2.11). `undefined` for every caller
586
+ * unless configured via `config.json` (`centralDb` block), the
587
+ * `CARTOGRAPHY_CENTRAL_*` env vars, or an explicit override. Absent = the sync
588
+ * pipeline is fully inert (no classify, no queue, no push).
589
+ */
590
+ centralDb?: CentralDbConfig;
591
+ /**
592
+ * Optional anomaly-detection thresholds (3.6). `undefined` for every existing
593
+ * caller — `defaultConfig` populates it from `DEFAULT_ANOMALY_THRESHOLDS`, and the
594
+ * engine falls back to those defaults when absent (optional-deps-degrade).
595
+ */
596
+ anomaly?: AnomalyConfig;
597
+ /**
598
+ * Optional drift-alerting block (3.1). `undefined` for every existing/CLI caller
599
+ * (additive only); when absent the drift runner defaults to a local `stdout` sink.
600
+ * No outbound traffic unless an operator configures a `webhook` sink.
601
+ */
602
+ drift?: DriftConfig;
142
603
  }
604
+ /** Default lead (discovery) model. */
605
+ declare const DEFAULT_LEAD_MODEL = "claude-sonnet-4-5-20250929";
606
+ /** Default fast model for helper tasks (chat, summaries). */
607
+ declare const DEFAULT_FAST_MODEL = "claude-haiku-4-5-20251001";
143
608
  declare function defaultConfig(overrides?: Partial<CartographyConfig>): CartographyConfig;
144
609
 
610
+ /**
611
+ * Compliance scoring (3.4) — schemas + types.
612
+ *
613
+ * Rulesets are **declarative data** (a serializable `RuleCheck` expression tree)
614
+ * interpreted by a single trusted engine — never executable predicate code. `field`
615
+ * and `pattern` are closed enums, so a ruleset can neither reach arbitrary node
616
+ * properties nor inject a ReDoS-prone regex. Every bundled ruleset is
617
+ * `RulesetSchema.parse`d at module load (fail-fast on malformed data).
618
+ */
619
+
620
+ /** Which nodes a rule applies to. Empty scope (no groups/types) = all nodes. */
621
+ declare const RuleScopeSchema: z.ZodObject<{
622
+ groups: z.ZodOptional<z.ZodArray<z.ZodEnum<{
623
+ [x: string]: string;
624
+ }>>>;
625
+ types: z.ZodOptional<z.ZodArray<z.ZodEnum<{
626
+ host: "host";
627
+ database_server: "database_server";
628
+ database: "database";
629
+ table: "table";
630
+ web_service: "web_service";
631
+ api_endpoint: "api_endpoint";
632
+ cache_server: "cache_server";
633
+ message_broker: "message_broker";
634
+ queue: "queue";
635
+ topic: "topic";
636
+ container: "container";
637
+ pod: "pod";
638
+ k8s_cluster: "k8s_cluster";
639
+ config_file: "config_file";
640
+ saas_tool: "saas_tool";
641
+ unknown: "unknown";
642
+ }>>>;
643
+ }, z.core.$strip>;
644
+ type RuleScope = z.infer<typeof RuleScopeSchema>;
645
+ declare const ConditionSchema: z.ZodObject<{
646
+ field: z.ZodEnum<{
647
+ type: "type";
648
+ name: "name";
649
+ domain: "domain";
650
+ subDomain: "subDomain";
651
+ qualityScore: "qualityScore";
652
+ tags: "tags";
653
+ owner: "owner";
654
+ confidence: "confidence";
655
+ metadataKeys: "metadataKeys";
656
+ metadataValues: "metadataValues";
657
+ }>;
658
+ op: z.ZodEnum<{
659
+ includes: "includes";
660
+ present: "present";
661
+ absent: "absent";
662
+ lt: "lt";
663
+ lte: "lte";
664
+ gt: "gt";
665
+ gte: "gte";
666
+ eq: "eq";
667
+ matches: "matches";
668
+ }>;
669
+ value: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber]>>;
670
+ pattern: z.ZodOptional<z.ZodEnum<{
671
+ dsn_with_credentials: "dsn_with_credentials";
672
+ owner_key: "owner_key";
673
+ public_exposure: "public_exposure";
674
+ }>>;
675
+ }, z.core.$strip>;
676
+ type Condition = z.infer<typeof ConditionSchema>;
677
+ /** A serializable check: a leaf Condition or an all/any/not combinator. */
678
+ type RuleCheck = Condition | {
679
+ all: RuleCheck[];
680
+ } | {
681
+ any: RuleCheck[];
682
+ } | {
683
+ not: RuleCheck;
684
+ };
685
+ declare const RuleCheckSchema: z.ZodType<RuleCheck>;
686
+ declare const SeveritySchema: z.ZodEnum<{
687
+ low: "low";
688
+ medium: "medium";
689
+ high: "high";
690
+ critical: "critical";
691
+ }>;
692
+ type Severity = z.infer<typeof SeveritySchema>;
693
+ /** Severity → score weight (decision #5). */
694
+ declare const SEVERITY_WEIGHT: Record<Severity, number>;
695
+ declare const ComplianceRuleSchema: z.ZodObject<{
696
+ id: z.ZodString;
697
+ control: z.ZodString;
698
+ framework: z.ZodEnum<{
699
+ CIS: "CIS";
700
+ SOC2: "SOC2";
701
+ ISO27001: "ISO27001";
702
+ baseline: "baseline";
703
+ }>;
704
+ title: z.ZodString;
705
+ severity: z.ZodEnum<{
706
+ low: "low";
707
+ medium: "medium";
708
+ high: "high";
709
+ critical: "critical";
710
+ }>;
711
+ rationale: z.ZodString;
712
+ scope: z.ZodObject<{
713
+ groups: z.ZodOptional<z.ZodArray<z.ZodEnum<{
714
+ [x: string]: string;
715
+ }>>>;
716
+ types: z.ZodOptional<z.ZodArray<z.ZodEnum<{
717
+ host: "host";
718
+ database_server: "database_server";
719
+ database: "database";
720
+ table: "table";
721
+ web_service: "web_service";
722
+ api_endpoint: "api_endpoint";
723
+ cache_server: "cache_server";
724
+ message_broker: "message_broker";
725
+ queue: "queue";
726
+ topic: "topic";
727
+ container: "container";
728
+ pod: "pod";
729
+ k8s_cluster: "k8s_cluster";
730
+ config_file: "config_file";
731
+ saas_tool: "saas_tool";
732
+ unknown: "unknown";
733
+ }>>>;
734
+ }, z.core.$strip>;
735
+ applicableWhen: z.ZodOptional<z.ZodType<RuleCheck, unknown, z.core.$ZodTypeInternals<RuleCheck, unknown>>>;
736
+ check: z.ZodType<RuleCheck, unknown, z.core.$ZodTypeInternals<RuleCheck, unknown>>;
737
+ }, z.core.$strip>;
738
+ type ComplianceRule = z.infer<typeof ComplianceRuleSchema>;
739
+ declare const RulesetSchema: z.ZodObject<{
740
+ name: z.ZodString;
741
+ version: z.ZodString;
742
+ framework: z.ZodString;
743
+ description: z.ZodString;
744
+ rules: z.ZodArray<z.ZodObject<{
745
+ id: z.ZodString;
746
+ control: z.ZodString;
747
+ framework: z.ZodEnum<{
748
+ CIS: "CIS";
749
+ SOC2: "SOC2";
750
+ ISO27001: "ISO27001";
751
+ baseline: "baseline";
752
+ }>;
753
+ title: z.ZodString;
754
+ severity: z.ZodEnum<{
755
+ low: "low";
756
+ medium: "medium";
757
+ high: "high";
758
+ critical: "critical";
759
+ }>;
760
+ rationale: z.ZodString;
761
+ scope: z.ZodObject<{
762
+ groups: z.ZodOptional<z.ZodArray<z.ZodEnum<{
763
+ [x: string]: string;
764
+ }>>>;
765
+ types: z.ZodOptional<z.ZodArray<z.ZodEnum<{
766
+ host: "host";
767
+ database_server: "database_server";
768
+ database: "database";
769
+ table: "table";
770
+ web_service: "web_service";
771
+ api_endpoint: "api_endpoint";
772
+ cache_server: "cache_server";
773
+ message_broker: "message_broker";
774
+ queue: "queue";
775
+ topic: "topic";
776
+ container: "container";
777
+ pod: "pod";
778
+ k8s_cluster: "k8s_cluster";
779
+ config_file: "config_file";
780
+ saas_tool: "saas_tool";
781
+ unknown: "unknown";
782
+ }>>>;
783
+ }, z.core.$strip>;
784
+ applicableWhen: z.ZodOptional<z.ZodType<RuleCheck, unknown, z.core.$ZodTypeInternals<RuleCheck, unknown>>>;
785
+ check: z.ZodType<RuleCheck, unknown, z.core.$ZodTypeInternals<RuleCheck, unknown>>;
786
+ }, z.core.$strip>>;
787
+ }, z.core.$strip>;
788
+ type Ruleset = z.infer<typeof RulesetSchema>;
789
+ declare const ControlResultSchema: z.ZodObject<{
790
+ ruleId: z.ZodString;
791
+ control: z.ZodString;
792
+ framework: z.ZodString;
793
+ severity: z.ZodEnum<{
794
+ low: "low";
795
+ medium: "medium";
796
+ high: "high";
797
+ critical: "critical";
798
+ }>;
799
+ status: z.ZodEnum<{
800
+ pass: "pass";
801
+ fail: "fail";
802
+ not_applicable: "not_applicable";
803
+ }>;
804
+ applicableCount: z.ZodNumber;
805
+ passedCount: z.ZodNumber;
806
+ failingNodeIds: z.ZodArray<z.ZodString>;
807
+ }, z.core.$strip>;
808
+ type ControlResult = z.infer<typeof ControlResultSchema>;
809
+ declare const ComplianceReportSchema: z.ZodObject<{
810
+ rulesetName: z.ZodString;
811
+ rulesetVersion: z.ZodString;
812
+ generatedAt: z.ZodString;
813
+ score: z.ZodNullable<z.ZodNumber>;
814
+ status: z.ZodEnum<{
815
+ pass: "pass";
816
+ fail: "fail";
817
+ not_applicable: "not_applicable";
818
+ }>;
819
+ totals: z.ZodObject<{
820
+ rules: z.ZodNumber;
821
+ applicable: z.ZodNumber;
822
+ passed: z.ZodNumber;
823
+ failed: z.ZodNumber;
824
+ notApplicable: z.ZodNumber;
825
+ }, z.core.$strip>;
826
+ bySeverity: z.ZodRecord<z.ZodEnum<{
827
+ low: "low";
828
+ medium: "medium";
829
+ high: "high";
830
+ critical: "critical";
831
+ }>, z.ZodObject<{
832
+ passed: z.ZodNumber;
833
+ failed: z.ZodNumber;
834
+ }, z.core.$strip>>;
835
+ controls: z.ZodArray<z.ZodObject<{
836
+ ruleId: z.ZodString;
837
+ control: z.ZodString;
838
+ framework: z.ZodString;
839
+ severity: z.ZodEnum<{
840
+ low: "low";
841
+ medium: "medium";
842
+ high: "high";
843
+ critical: "critical";
844
+ }>;
845
+ status: z.ZodEnum<{
846
+ pass: "pass";
847
+ fail: "fail";
848
+ not_applicable: "not_applicable";
849
+ }>;
850
+ applicableCount: z.ZodNumber;
851
+ passedCount: z.ZodNumber;
852
+ failingNodeIds: z.ZodArray<z.ZodString>;
853
+ }, z.core.$strip>>;
854
+ gaps: z.ZodArray<z.ZodObject<{
855
+ ruleId: z.ZodString;
856
+ control: z.ZodString;
857
+ severity: z.ZodEnum<{
858
+ low: "low";
859
+ medium: "medium";
860
+ high: "high";
861
+ critical: "critical";
862
+ }>;
863
+ title: z.ZodString;
864
+ nodeIds: z.ZodArray<z.ZodString>;
865
+ }, z.core.$strip>>;
866
+ }, z.core.$strip>;
867
+ type ComplianceReport = z.infer<typeof ComplianceReportSchema>;
868
+
869
+ /** Attribution applied by an enrichment pass (3.3). `null` clears the field; `undefined` leaves it unchanged. */
870
+ interface NodeAttribution {
871
+ owner?: string | null;
872
+ cost?: CostEntry | null;
873
+ }
874
+
875
+ /** Default tenant for single-user / pre-migration installs. */
876
+ declare const DEFAULT_TENANT = "local";
877
+ /**
878
+ * Normalize an untrusted tenant id: strip invisible/control characters, trim,
879
+ * cap length, and enforce a conservative key charset. Falls back to DEFAULT_TENANT
880
+ * when the input is missing or invalid. The tenant is a data-scoping partition key
881
+ * (not an auth boundary — RBAC is Phase 4), so the rule is shared by the DB and MCP
882
+ * layers via this one helper.
883
+ */
884
+ declare function normalizeTenant(raw?: string | null): string;
885
+ /**
886
+ * Deterministic, pure normalization of a node id so the same logical resource
887
+ * yields the same key across machines: trim, lowercase, collapse internal
888
+ * whitespace, strip a single trailing default port (:80/:443).
889
+ */
890
+ declare function normalizeId(id: string): string;
891
+ /**
892
+ * Extract only the stable key-meta subset (`host`/`port`/`path`/`url`) from a
893
+ * metadata blob (pure). Never the whole blob, which carries machine-local noise
894
+ * such as timestamps and absolute paths.
895
+ */
896
+ declare function keyMetaOf(metadata: Record<string, unknown>): Record<string, unknown>;
897
+ /**
898
+ * Secondary dedup key: sha256 over type + normalized name + sorted key-meta.
899
+ * Catches `id` drift between machines for the same logical resource. Truncated
900
+ * to 32 hex chars (128 bits) — ample for collision resistance at org scale.
901
+ */
902
+ declare function contentHash(type: string, name: string, keyMeta: Record<string, unknown>): string;
903
+ /**
904
+ * Human-readable primary identity: org-scoped normalized id (`{org}:{normalizedId}`).
905
+ * `org` is the session's normalized tenant (2.8's partition) — the `'_'` sentinel
906
+ * isolates org-less sessions into their own namespace so two organizations never
907
+ * collapse onto one logical node even with identical infra.
908
+ */
909
+ declare function globalId(organization: string | undefined, id: string): string;
145
910
  interface ConnectionRow extends Connection {
146
911
  sessionId: string;
147
912
  createdAt: string;
@@ -162,7 +927,61 @@ interface GraphSummary {
162
927
  type: string;
163
928
  degree: number;
164
929
  }>;
930
+ /** Standing structural anomalies (orphans / shadow-IT), computed at read time (3.6). */
931
+ anomalies: Anomaly[];
932
+ /** Distinct machines that contributed to this session's nodes (2.9 attribution). */
933
+ contributors: number;
934
+ /** Cost rolled up by domain, bucketed by (currency, period) — never FX-normalized (3.3). */
935
+ costByDomain: Array<{
936
+ domain: string;
937
+ currency: string;
938
+ period: string;
939
+ total: number;
940
+ nodes: number;
941
+ }>;
942
+ /** Cost rolled up by owner, bucketed by (currency, period) (3.3). */
943
+ costByOwner: Array<{
944
+ owner: string;
945
+ currency: string;
946
+ period: string;
947
+ total: number;
948
+ nodes: number;
949
+ }>;
950
+ /** Nodes carrying a cost vs. total — attribution coverage (3.3). */
951
+ costCoverage: {
952
+ withCost: number;
953
+ total: number;
954
+ };
955
+ }
956
+ /**
957
+ * Org-wide aggregate index (2.12) — the central-collector analogue of
958
+ * {@link GraphSummary}. Scoped to a single tenant (`org`) rather than one session,
959
+ * so it merges every machine's contribution into one organization-wide topology.
960
+ */
961
+ interface OrgSummary {
962
+ org: string;
963
+ totals: {
964
+ nodes: number;
965
+ edges: number;
966
+ };
967
+ nodesByType: Record<string, number>;
968
+ nodesByDomain: Record<string, number>;
969
+ edgesByRelationship: Record<string, number>;
970
+ topConnected: Array<{
971
+ id: string;
972
+ name: string;
973
+ type: string;
974
+ degree: number;
975
+ }>;
976
+ /** Distinct machines that contributed to this org's nodes (2.9 attribution, org-wide). */
977
+ contributors: number;
165
978
  }
979
+ /**
980
+ * Derive a deterministic, human-friendly session label from its graph summary,
981
+ * e.g. `"infra+data · 42 nodes · 2026-06-11"`. Pure: same summary + timestamp
982
+ * always yields the same name. No LLM call.
983
+ */
984
+ declare function deriveSessionName(summary: GraphSummary, startedAt: string): string;
166
985
  /** Result of a recursive dependency traversal. */
167
986
  interface TraversalResult {
168
987
  root?: NodeRow;
@@ -185,6 +1004,8 @@ interface EventRow {
185
1004
  targetType?: string;
186
1005
  port?: number;
187
1006
  durationMs?: number;
1007
+ command?: string;
1008
+ resultBytes?: number;
188
1009
  }
189
1010
  interface TaskRow {
190
1011
  id: string;
@@ -210,7 +1031,12 @@ interface WorkflowRow {
210
1031
  }
211
1032
  declare class CartographyDB {
212
1033
  private db;
213
- constructor(dbPath: string);
1034
+ /** 3.6 anomaly settings; defaults apply when no `anomaly` config is supplied. */
1035
+ private readonly anomalyEnabled;
1036
+ private readonly anomalyThresholds;
1037
+ constructor(dbPath: string, opts?: {
1038
+ anomaly?: AnomalyConfig;
1039
+ });
214
1040
  private migrate;
215
1041
  close(): void;
216
1042
  /**
@@ -219,26 +1045,105 @@ declare class CartographyDB {
219
1045
  * virtual table. Prefer the typed methods above for everything else.
220
1046
  */
221
1047
  rawConnection(): Database.Database;
222
- createSession(mode: 'discover', config: CartographyConfig): string;
1048
+ /**
1049
+ * Create a discovery session, stamping its tenant from (in precedence order)
1050
+ * the explicit `tenantId` arg → `config.organization` → DEFAULT_TENANT. The
1051
+ * tenant is normalized once here; every child row written under this session
1052
+ * inherits it via {@link tenantOf}.
1053
+ */
1054
+ createSession(mode: 'discover', config: CartographyConfig, tenantId?: string): string;
1055
+ /** The tenant that owns a session (DEFAULT_TENANT if the session is unknown). */
1056
+ private tenantOf;
223
1057
  endSession(id: string): void;
224
1058
  getSession(id: string): SessionRow | undefined;
225
- getLatestSession(mode?: string): SessionRow | undefined;
226
- getSessions(): SessionRow[];
1059
+ /**
1060
+ * Resolve the newest session, optionally constrained to a `mode` and/or
1061
+ * `tenantId`. Omitting `tenantId` preserves the original (unscoped) behavior;
1062
+ * passing one returns only that tenant's sessions, which is how the tenant
1063
+ * boundary is enforced at session resolution.
1064
+ */
1065
+ getLatestSession(mode?: string, tenantId?: string): SessionRow | undefined;
1066
+ getSessions(tenantId?: string): SessionRow[];
227
1067
  private mapSession;
228
- upsertNode(sessionId: string, node: DiscoveryNode, depth?: number): void;
1068
+ /** Record that a session was (re-)scanned now (ISO 8601 UTC). */
1069
+ touchSession(id: string): void;
1070
+ /** Set (or clear) a session's human-friendly name. */
1071
+ setSessionName(id: string, name: string): void;
1072
+ /**
1073
+ * Compare two discovery sessions and report drift (added/removed/changed nodes
1074
+ * and added/removed edges). Read-only; no schema changes. Throws if either
1075
+ * session id does not exist.
1076
+ */
1077
+ diffSessions(baseId: string, currentId: string): TopologyDiff;
1078
+ /**
1079
+ * Score a session against a compliance ruleset (3.4) — a thin wrapper over the
1080
+ * pure `scoreTopology` engine (mirrors `diffSessions`). Throws only when the
1081
+ * session id is unknown; the engine never throws on data shape.
1082
+ */
1083
+ scoreSession(sessionId: string, ruleset: Ruleset, opts?: {
1084
+ now?: string;
1085
+ }): ComplianceReport;
1086
+ /**
1087
+ * The most recent session of `mode` (and optionally `tenantId`) other than
1088
+ * `excludeId`. Used by scheduled discovery to pick an unambiguous diff base
1089
+ * (newest-first by `rowid`), avoiding the `getLatestSession` just-created-session
1090
+ * ambiguity. Returns `undefined` when no such prior session exists.
1091
+ */
1092
+ getPreviousSession(excludeId: string, mode?: string, tenantId?: string): SessionRow | undefined;
1093
+ /**
1094
+ * Persist one scheduled-discovery drift run: the summary counts plus the full
1095
+ * {@link TopologyDelta} (for audit/replay). Returns the generated row id.
1096
+ * `ranAt` is stamped now (ISO 8601 UTC).
1097
+ */
1098
+ recordDriftRun(sessionId: string, baseSessionId: string | undefined, delta: TopologyDelta): string;
1099
+ /** Recent drift runs, newest-first (`LIMIT`, default 50). */
1100
+ getDriftRuns(limit?: number): DriftRunRow[];
1101
+ /** The most recent drift run, or `undefined` when none has been recorded. */
1102
+ getLatestDriftRun(): DriftRunRow | undefined;
1103
+ private mapDriftRun;
1104
+ upsertNode(sessionId: string, node: DiscoveryNode, depth?: number, attribution?: Contributor): void;
1105
+ /** All contributors that observed a logical node, ordered by observation time. */
1106
+ getContributors(globalId: string): Contributor[];
229
1107
  getNodes(sessionId: string, opts?: {
230
1108
  limit?: number;
231
1109
  offset?: number;
232
1110
  }): NodeRow[];
233
1111
  getNodeCount(sessionId: string): number;
234
1112
  private mapNode;
1113
+ /**
1114
+ * Update only the cost/owner of an existing node, without touching any other
1115
+ * field (unlike upsertNode's INSERT OR REPLACE) — the idempotent enrichment
1116
+ * primitive (3.3). `undefined` leaves a field unchanged; `null` clears it.
1117
+ * No-op (returns false) if the node is absent. Cost is re-validated before write.
1118
+ */
1119
+ enrichNodeAttribution(sessionId: string, nodeId: string, attr: NodeAttribution): boolean;
235
1120
  deleteNode(sessionId: string, nodeId: string): void;
236
1121
  insertEdge(sessionId: string, edge: DiscoveryEdge): void;
1122
+ /**
1123
+ * Delete every edge matching the logical key (source, target, relationship)
1124
+ * within a session. `insertEdge` writes a random PK, so logical identity is the
1125
+ * only way to prune an edge that disappeared while both endpoints survived.
1126
+ */
1127
+ deleteEdgeByKey(sessionId: string, sourceId: string, targetId: string, relationship: string): void;
1128
+ /**
1129
+ * Apply a precomputed {@link TopologyDelta} to one session in a single
1130
+ * transaction (2.1 incremental discovery): prune removed nodes (cascading their
1131
+ * edges via {@link deleteNode}), upsert added/changed nodes, delete removed edges
1132
+ * by logical key, insert added edges, and stamp `last_scanned_at`. Unchanged rows
1133
+ * are left untouched (stable `discovered_at`).
1134
+ *
1135
+ * `attribution` (2.9) is forwarded to every added/changed node's upsert so a
1136
+ * rescan keeps/appends the running machine's contributor instead of leaving it
1137
+ * out. Unchanged nodes are not re-upserted, so their existing contributors
1138
+ * survive the rescan. `NodeRow extends DiscoveryNode`, so the delta rows pass
1139
+ * straight into `upsertNode` with no mapper.
1140
+ */
1141
+ applyTopologyDelta(sessionId: string, delta: TopologyDelta, attribution?: Contributor): void;
237
1142
  getEdges(sessionId: string, opts?: {
238
1143
  limit?: number;
239
1144
  offset?: number;
240
1145
  }): EdgeRow[];
241
- insertEvent(sessionId: string, event: Pick<EventRow, 'eventType' | 'process' | 'pid' | 'target' | 'targetType' | 'port'>, taskId?: string): void;
1146
+ insertEvent(sessionId: string, event: Pick<EventRow, 'eventType' | 'process' | 'pid' | 'target' | 'targetType' | 'port'> & Partial<Pick<EventRow, 'command' | 'resultBytes'>>, taskId?: string): void;
242
1147
  getEvents(sessionId: string, since?: string): EventRow[];
243
1148
  startTask(sessionId: string, description?: string): string;
244
1149
  endCurrentTask(sessionId: string): void;
@@ -253,6 +1158,63 @@ declare class CartographyDB {
253
1158
  deleteConnection(sessionId: string, connectionId: string): void;
254
1159
  setApproval(pattern: string, action: 'save' | 'ignore' | 'auto'): void;
255
1160
  getApproval(pattern: string): string | undefined;
1161
+ /**
1162
+ * Set (or replace) the sharing level for a pattern. The `'*'` pattern is the
1163
+ * global default; any other pattern is an override (glob over the node id).
1164
+ * Validated via {@link SharingLevelSchema} before write; `created_at` is ISO UTC.
1165
+ */
1166
+ setSharingLevel(pattern: string, level: SharingLevel): void;
1167
+ /**
1168
+ * The full sharing policy: the `'*'` row resolves to `defaultLevel` (`'none'`
1169
+ * when absent — the opt-in floor), every other row becomes an override. The
1170
+ * glob-precedence resolution itself lives in `src/sharing.ts` so it is unit
1171
+ * testable in isolation; this returns the raw policy it consumes.
1172
+ */
1173
+ getSharingPolicy(): SharingPolicy;
1174
+ /** Remove a pattern override. The global default (`'*'`) cannot be cleared this way. */
1175
+ clearSharingOverride(pattern: string): void;
1176
+ /**
1177
+ * Persist the encrypted plaintext behind a pseudonym token. Idempotent: the
1178
+ * token is deterministic, so repeated writes `INSERT OR REPLACE` and never grow
1179
+ * the table. `ciphertext` is base64(iv ‖ tag ‖ AES-256-GCM(plaintext)).
1180
+ */
1181
+ saveReversal(token: string, ciphertext: string): void;
1182
+ /** Read the stored ciphertext for a pseudonym token (admin reversal path). */
1183
+ getReversal(token: string): string | undefined;
1184
+ /**
1185
+ * Enqueue one proposed share item. Idempotent via `INSERT OR IGNORE` on the
1186
+ * `content_hash` PK: re-classifying the same (transformed) item never duplicates
1187
+ * a row nor resets an existing decision. `payload` is the already-policy-
1188
+ * transformed projection (the exact bytes a push would send) — never raw node
1189
+ * data for `anonymized`/`none` items.
1190
+ */
1191
+ enqueuePending(item: {
1192
+ contentHash: string;
1193
+ sessionId: string;
1194
+ nodeId?: string;
1195
+ kind: 'node' | 'edge';
1196
+ payload: unknown;
1197
+ status: PendingStatus;
1198
+ decidedBy?: 'user' | 'rule';
1199
+ }): void;
1200
+ /** Queued share items, optionally filtered by status and/or session. */
1201
+ getPendingShares(filter?: {
1202
+ status?: PendingStatus;
1203
+ sessionId?: string;
1204
+ }): PendingShareRow[];
1205
+ /** Queue size by status (every status key present, zero-filled). */
1206
+ countPendingByStatus(): Record<PendingStatus, number>;
1207
+ /** `content_hash` values already pushed (status `shared`) — for re-share suppression. */
1208
+ getSharedHashes(): Set<string>;
1209
+ /**
1210
+ * Transition one queued item. Stamps `decided_at` on any non-`pending` status and
1211
+ * `shared_at` when moving to `shared`. `decidedBy` records the actor (`'user'` or
1212
+ * `'rule'`) for the audit trail.
1213
+ */
1214
+ setPendingStatus(contentHash: string, status: PendingStatus, decidedBy?: 'user' | 'rule'): void;
1215
+ /** Approved items cleared to push (FIFO), optionally capped by `limit`. */
1216
+ getApprovedShares(limit?: number): PendingShareRow[];
1217
+ private mapPendingShare;
256
1218
  /**
257
1219
  * Delete a session and all its associated data (nodes, edges, events, tasks, workflows, connections).
258
1220
  */
@@ -286,6 +1248,55 @@ declare class CartographyDB {
286
1248
  }): TraversalResult;
287
1249
  /** Lightweight aggregate index of the whole topology — the progressive-disclosure summary. */
288
1250
  getGraphSummary(sessionId: string): GraphSummary;
1251
+ /**
1252
+ * Resolve (creating once) the synthetic collector session that owns every
1253
+ * central node/edge for a tenant. Central nodes are merged by `(org, global_id)`,
1254
+ * not by session, so they live under a single deterministic session id
1255
+ * (`central:{org}`) — this satisfies the existing `(id, session_id)` node PK and
1256
+ * the `session_id` foreign key without a destructive schema change. Idempotent.
1257
+ */
1258
+ ensureCentralSession(org: string): string;
1259
+ /**
1260
+ * Find an existing central node within a tenant by its primary identity
1261
+ * (`global_id`), returning its stored `id` so a merge keeps a single row.
1262
+ */
1263
+ findCentralNodeIdByGlobalId(org: string, gid: string): string | undefined;
1264
+ /**
1265
+ * Secondary merge lookup: an existing central node in the tenant whose
1266
+ * `content_hash` matches (catches `id` drift between machines for the same
1267
+ * logical resource). Returns its stored `id` and `global_id`.
1268
+ */
1269
+ findCentralNodeByContentHash(org: string, ch: string): {
1270
+ id: string;
1271
+ globalId: string;
1272
+ } | undefined;
1273
+ /**
1274
+ * Merge one incoming node into the central store for a tenant and append the
1275
+ * contributor (2.12). Resolves identity by `(tenant, global_id)` primary, then
1276
+ * `(tenant, content_hash)` secondary; on a hit it keeps the existing row's id
1277
+ * (so the logical node stays a single row), unions tags, keeps the higher
1278
+ * confidence, and merges metadata (incoming values win on key conflict). The
1279
+ * incoming `globalId`/`contentHash` are precomputed by the merge core so they
1280
+ * are consistent with what the lookups used. Returns whether a new row was
1281
+ * created or an existing one was merged. Runs in one transaction.
1282
+ */
1283
+ upsertCentralNode(org: string, node: DiscoveryNode, identity: {
1284
+ globalId: string;
1285
+ contentHash: string;
1286
+ }, contributor: Contributor): 'created' | 'merged';
1287
+ /** Insert an edge into the central store for a tenant (idempotent on logical key). */
1288
+ insertCentralEdge(org: string, edge: DiscoveryEdge): void;
1289
+ /** A central node by tenant + stored id (the merge target after identity resolution). */
1290
+ getCentralNode(org: string, sessionId: string, nodeId: string): NodeRow | undefined;
1291
+ /** All contributors for a logical (global_id) node across an org. */
1292
+ getContributorsByGlobalId(gid: string): Contributor[];
1293
+ /**
1294
+ * Org-wide aggregate summary (2.12) — the central analogue of
1295
+ * {@link getGraphSummary}, scoped `WHERE tenant = ?` so it merges every machine's
1296
+ * contribution into one organization-wide view. Cross-tenant isolation is
1297
+ * structural: org A's rows never appear in org B's counts.
1298
+ */
1299
+ getOrgSummary(org: string): OrgSummary;
289
1300
  getStats(sessionId: string): {
290
1301
  nodes: number;
291
1302
  edges: number;
@@ -295,78 +1306,557 @@ declare class CartographyDB {
295
1306
  }
296
1307
 
297
1308
  /**
298
- * The Cartography MCP server the package's primary, LLM-agnostic interface.
1309
+ * `StoreBackend` the persistence seam for the central collector (2.12).
299
1310
  *
300
- * It exposes the discovered infrastructure topology as Model Context Protocol
301
- * **Resources** (read-only context, progressive disclosure), a small set of query
302
- * **Tools** (parameterized lookups), and reusable **Prompts**. Any MCP host —
303
- * Claude Code, Cursor, Cline, Windsurf, the Vercel AI SDK, LangGraph — can drive
304
- * it; the package never needs to know which model is in use.
1311
+ * The central collector merges consented discovery deltas from many machines into
1312
+ * one organization-wide, tenant-partitioned topology. Today there is exactly one
1313
+ * shipping implementation, {@link SqliteStoreBackend}, which wraps the existing
1314
+ * `CartographyDB`. The interface exists so a graph/Postgres backend (4.3) can be
1315
+ * dropped in without touching the ingest orchestration the ingest core
1316
+ * (`src/central/ingest.ts`) talks only to this interface, never to SQLite directly.
1317
+ *
1318
+ * The contract is deliberately minimal: an org-scoped node upsert (merge-by-identity
1319
+ * with contributor attribution), an org-scoped edge insert, an org-wide summary, and
1320
+ * a contributor read for audit/tests. Every method is tenant-scoped by `org` — there
1321
+ * is no un-scoped path, so cross-tenant isolation is structural.
305
1322
  */
306
1323
 
307
- /** A pluggable search backend; defaults to lexical search, can be upgraded to semantic. */
308
- type SearchFn = (db: CartographyDB, sessionId: string, query: string, opts: {
309
- types?: readonly string[];
310
- limit: number;
311
- }) => Promise<Array<{
312
- node: NodeRow;
313
- score?: number;
314
- }>>;
315
- /** A pluggable discovery backend invoked by the `run_discovery` tool. */
316
- type DiscoveryFn = (db: CartographyDB, sessionId: string, opts: {
317
- hint?: string;
318
- }) => Promise<{
319
- nodes: number;
320
- edges: number;
321
- }>;
322
- interface CreateMcpServerOptions {
323
- /** Database instance. If omitted, one is opened at `config.dbPath`. */
324
- db?: CartographyDB;
325
- /** Path to the SQLite catalog (used when `db` is not provided). */
326
- dbPath?: string;
327
- /** Session to serve: a session id, or `'latest'` (default) for the newest discovery. */
328
- session?: string | 'latest';
329
- /** Semantic/lexical search backend. Defaults to lexical `searchNodes`. */
330
- search?: SearchFn;
331
- /** Discovery backend for `run_discovery`/`refresh`. Optional. */
332
- discovery?: DiscoveryFn;
1324
+ /** Precomputed merge identity for an incoming node (from the merge core). */
1325
+ interface NodeIdentity {
1326
+ /** Primary merge key — org-scoped normalized id (`{org}:{normalizedId}`). */
1327
+ globalId: string;
1328
+ /** Secondary merge key — content hash that catches `id` drift between machines. */
1329
+ contentHash: string;
333
1330
  }
334
1331
  /**
335
- * Build a fully-configured Cartography MCP server. Call `.connect(transport)` to run it.
1332
+ * A provider-agnostic central store. All operations are scoped to a single tenant
1333
+ * (`org`); there is no cross-tenant read or write path.
336
1334
  */
337
- declare function createMcpServer(opts?: CreateMcpServerOptions): McpServer;
1335
+ interface StoreBackend {
1336
+ /**
1337
+ * Merge one incoming node under `org`, keyed by `identity.globalId` (primary) then
1338
+ * `identity.contentHash` (secondary), and append/update the `contributor`. Returns
1339
+ * `'created'` when this is the first observation of the logical node, `'merged'`
1340
+ * when it collapsed onto an existing one.
1341
+ */
1342
+ upsertNode(org: string, node: DiscoveryNode, identity: NodeIdentity, contributor: Contributor): 'created' | 'merged';
1343
+ /** Insert an edge under `org` (idempotent on the logical `(source, target, relationship)` key). */
1344
+ insertEdge(org: string, edge: DiscoveryEdge): void;
1345
+ /** Org-wide aggregate summary (merged counts across all contributors). */
1346
+ getSummary(org: string): OrgSummary;
1347
+ /** Contributors for a merged logical node (test/audit helper). */
1348
+ getContributors(globalId: string): Contributor[];
1349
+ /** Release any underlying resources. */
1350
+ close(): void;
1351
+ }
338
1352
 
339
1353
  /**
340
- * Transport bindings for the Cartography MCP server.
1354
+ * `SqliteStoreBackend` the default (and currently only) {@link StoreBackend}
1355
+ * implementation for the central collector (2.12).
341
1356
  *
342
- * - **stdio**: the local-first default zero network, every client supports it.
343
- * - **Streamable HTTP**: a single `/mcp` endpoint for team/remote use, bound to
344
- * localhost with DNS-rebinding protection. The deprecated SSE transport is not used.
1357
+ * It is a thin adapter over `CartographyDB`: the schema, migrations, and merge SQL
1358
+ * all live in `src/db.ts` (the single owner of the catalog), so this class only
1359
+ * forwards the org-scoped central operations. Constructing it adds no new state and
1360
+ * no new schema — the zero-config local SQLite path is byte-for-byte unchanged.
1361
+ *
1362
+ * A graph/Postgres backend (4.3) implements the same interface without changing the
1363
+ * ingest orchestration that consumes it.
345
1364
  */
346
1365
 
347
- /** Connect a server over stdio (resolves when the transport closes). */
348
- declare function runStdio(server: McpServer): Promise<void>;
349
- interface HttpOptions {
350
- port?: number;
351
- host?: string;
352
- /** Extra allowed Host headers (defaults to localhost:port variants). */
353
- allowedHosts?: string[];
354
- /** Allowed Origin headers (defaults to none → same-origin only). */
355
- allowedOrigins?: string[];
1366
+ declare class SqliteStoreBackend implements StoreBackend {
1367
+ private readonly db;
1368
+ constructor(db: CartographyDB);
1369
+ upsertNode(org: string, node: DiscoveryNode, identity: NodeIdentity, contributor: Contributor): 'created' | 'merged';
1370
+ insertEdge(org: string, edge: DiscoveryEdge): void;
1371
+ getSummary(org: string): OrgSummary;
1372
+ getContributors(globalId: string): Contributor[];
1373
+ /**
1374
+ * No-op: the wrapped `CartographyDB` is owned by the caller (it is shared with the
1375
+ * read-side MCP server in server-mode), so the backend never closes it. The caller
1376
+ * closes the `CartographyDB` directly.
1377
+ */
1378
+ close(): void;
356
1379
  }
1380
+
357
1381
  /**
358
- * Start a Streamable HTTP server. A fresh MCP server instance is created per
359
- * session via `factory`, so multiple clients can connect concurrently.
1382
+ * `QueryBackend` the **read-only** query seam for the API server (4.2).
1383
+ *
1384
+ * This is deliberately distinct from {@link StoreBackend} (`src/store/backend.ts`),
1385
+ * which is the central-collector **write/ingest** seam. The two seams have opposite
1386
+ * shapes: ingest merges incoming deltas; this one answers topology questions. A
1387
+ * non-SQLite backend (4.3) implements both. Keeping them separate means the API
1388
+ * never gains a write path and the ingest core never gains a query path.
1389
+ *
1390
+ * Every method takes a {@link TenantContext}. Session resolution is tenant-scoped, so
1391
+ * a caller bound to tenant A can never read tenant B's topology — even by naming a
1392
+ * session id that belongs to B (it resolves to "not found", never B's data). This
1393
+ * mirrors the MCP server's `resolveSession` tenant guard exactly.
360
1394
  */
361
- declare function runHttp(factory: () => McpServer, opts?: HttpOptions): Promise<http.Server>;
1395
+
1396
+ /** The tenant (org-scope) a request is bound to. `'local'` (DEFAULT_TENANT) until a real org is supplied. */
1397
+ interface TenantContext {
1398
+ tenant: string;
1399
+ }
1400
+ interface NodeQuery {
1401
+ search?: string;
1402
+ types?: readonly string[];
1403
+ limit?: number;
1404
+ offset?: number;
1405
+ }
1406
+ interface DependencyQuery {
1407
+ direction?: 'downstream' | 'upstream' | 'both';
1408
+ maxDepth?: number;
1409
+ }
1410
+ interface NodesResult {
1411
+ nodes: NodeRow[];
1412
+ total: number;
1413
+ limit: number;
1414
+ offset: number;
1415
+ }
1416
+ interface HealthResult {
1417
+ store: 'sqlite';
1418
+ sessions: number;
1419
+ }
1420
+ /** A requested resource (session / diff endpoint) does not exist for this tenant → REST 404. */
1421
+ declare class NotFoundError extends Error {
1422
+ constructor(message: string);
1423
+ }
1424
+ /** Narrow, read-only view of the topology store. Tenant is required on every call. */
1425
+ interface QueryBackend {
1426
+ /** Aggregate, low-token index of the resolved session. Throws {@link NotFoundError} if no session resolves. */
1427
+ summary(ctx: TenantContext, sessionId?: string): GraphSummary;
1428
+ /** Page/search nodes of the resolved session. Throws {@link NotFoundError} if no session resolves. */
1429
+ nodes(ctx: TenantContext, q: NodeQuery, sessionId?: string): NodesResult;
1430
+ /** One node by id (or `undefined` if absent). Throws {@link NotFoundError} if no session resolves. */
1431
+ node(ctx: TenantContext, id: string, sessionId?: string): NodeRow | undefined;
1432
+ /** Dependency traversal from a node. Throws {@link NotFoundError} if no session resolves. */
1433
+ dependencies(ctx: TenantContext, id: string, q: DependencyQuery, sessionId?: string): TraversalResult;
1434
+ /** Compare two sessions (both must belong to the tenant). Throws {@link NotFoundError} on an unknown/foreign id. */
1435
+ diff(ctx: TenantContext, base: string, current: string): TopologyDiff;
1436
+ /** All sessions for this tenant, newest first. */
1437
+ sessions(ctx: TenantContext): SessionRow[];
1438
+ /** Liveness/coverage probe (never resolves a session). */
1439
+ health(ctx: TenantContext): HealthResult;
1440
+ }
1441
+ /**
1442
+ * `QueryBackend` over the local `CartographyDB`. A thin read adapter: the schema,
1443
+ * migrations, and SQL all live in `db.ts`; this only resolves the tenant-scoped
1444
+ * session and forwards. Constructing it adds no state and no schema.
1445
+ */
1446
+ declare class SqliteQueryBackend implements QueryBackend {
1447
+ private readonly db;
1448
+ private readonly defaultSession;
1449
+ constructor(db: CartographyDB, defaultSession?: string | 'latest');
1450
+ /**
1451
+ * Resolve the session id for a request, scoped to `ctx.tenant`. An explicit id must
1452
+ * belong to the tenant or it resolves to undefined (cross-tenant isolation); else the
1453
+ * newest `discover` session for the tenant. Mirrors `resolveSession` in the MCP server.
1454
+ */
1455
+ private resolveSession;
1456
+ summary(ctx: TenantContext, sessionId?: string): GraphSummary;
1457
+ nodes(ctx: TenantContext, q: NodeQuery, sessionId?: string): NodesResult;
1458
+ node(ctx: TenantContext, id: string, sessionId?: string): NodeRow | undefined;
1459
+ dependencies(ctx: TenantContext, id: string, q: DependencyQuery, sessionId?: string): TraversalResult;
1460
+ diff(ctx: TenantContext, base: string, current: string): TopologyDiff;
1461
+ sessions(ctx: TenantContext): SessionRow[];
1462
+ health(ctx: TenantContext): HealthResult;
1463
+ }
1464
+ /** Construct the default SQLite-backed read query backend. */
1465
+ declare function createSqliteQueryBackend(db: CartographyDB, defaultSession?: string | 'latest'): QueryBackend;
1466
+
1467
+ /**
1468
+ * Global-identity merge core for the central collector (2.12) — pure, no I/O.
1469
+ *
1470
+ * The merge *keys* (`normalizeId`, `contentHash`, `keyMetaOf`, `globalId`) already
1471
+ * exist in `src/db.ts` from the 2.9 identity work; this module re-exports them so
1472
+ * the central collector has one import surface, and adds {@link computeIdentity} —
1473
+ * the small adapter that turns one incoming `DiscoveryNode` into the precomputed
1474
+ * `(globalId, contentHash)` pair the {@link StoreBackend} merge consumes.
1475
+ *
1476
+ * The merge *resolution* (primary by `(org, globalId)`, secondary by
1477
+ * `(org, contentHash)`, contributor union, max-confidence) lives in the store
1478
+ * (`CartographyDB.upsertCentralNode`) because it needs the existing row — keeping
1479
+ * the SQL next to the schema. This module is the deterministic key derivation that
1480
+ * both the client (push) and the server (ingest) must agree on.
1481
+ */
1482
+
1483
+ /**
1484
+ * Derive the precomputed merge identity for one incoming node under `org`:
1485
+ * - `globalId` = `{org}:{normalizeId(node.id)}` (primary key; the org-scope ensures
1486
+ * two organizations never collapse onto one logical node).
1487
+ * - `contentHash` = sha256 over `type + normalized name + sorted key-meta` (secondary
1488
+ * key; catches `id` drift between machines for the same logical resource).
1489
+ *
1490
+ * Pure and deterministic: the same node + org always yields the same identity, on
1491
+ * any machine. This is the contract the client (2.11 push) and the server (ingest)
1492
+ * share so a content hash computed on the client matches the one the server recomputes.
1493
+ */
1494
+ declare function computeIdentity(org: string, node: DiscoveryNode): NodeIdentity;
1495
+
1496
+ /**
1497
+ * Server-side anonymization re-validation for the central collector (2.12).
1498
+ *
1499
+ * **Don't trust the client.** The client (2.10) is supposed to pseudonymize
1500
+ * identifying fragments before pushing at the `anonymized` sharing level, but the
1501
+ * collector independently re-checks every incoming payload and rejects or scrubs any
1502
+ * un-anonymized identifying fragment it finds. This is defense-in-depth: a buggy,
1503
+ * outdated, or malicious client cannot leak raw identifiers into the org-wide store.
1504
+ *
1505
+ * The walker mirrors `redactValue`/`pseudonymize` (it rewrites only leaf strings;
1506
+ * structure is invariant). What it flags at the `anonymized` level:
1507
+ * - **private-ip** — RFC-1918 / loopback IPv4 (`src/anonymize.ts:PRIVATE_IP` + 127/8).
1508
+ * - **absolute-path**— POSIX (`/home/alice/...`) or Windows (`C:\Users\alice\...`) paths.
1509
+ * - **username** — the user segment of `/home/<u>`, `/Users/<u>`, `C:\Users\<u>`.
1510
+ * - **hostname** — multi-label FQDNs (`db-01.internal.acme.lan`), AND the known
1511
+ * 2.10 residual: **bare single-label internal hostnames** (e.g. `db-prod-01`) that
1512
+ * the client's multi-label-only HOSTNAME regex never tokenizes. A token already in
1513
+ * `anon:{kind}:{base32}` form is recognized as anonymized and never flagged.
1514
+ *
1515
+ * At the `none` / `full` levels nothing is claimed to be anonymized, so re-validation
1516
+ * is a no-op (returns no violations): the employee consented to share at that level.
1517
+ */
1518
+
1519
+ /** The anonymization level claimed by an ingest envelope (mirrors `SharingLevel`). */
1520
+ type AnonymizationLevel = 'none' | 'anonymized' | 'full';
1521
+ /** A detected un-anonymized identifying fragment, with its location for logging. */
1522
+ interface AnonViolation {
1523
+ /** JSON path to the offending leaf, e.g. `metadata.host`. */
1524
+ path: string;
1525
+ kind: 'hostname' | 'username' | 'absolute-path' | 'private-ip';
1526
+ }
1527
+ /**
1528
+ * Recursively collect every anonymization violation in a value at the given level.
1529
+ * Pure. At `none`/`full` returns `[]` (no anonymization is claimed). Object keys are
1530
+ * schema, not data, so only values are inspected; `path` accumulates the location.
1531
+ */
1532
+ declare function findAnonViolations(value: unknown, level: AnonymizationLevel, path?: string): AnonViolation[];
1533
+ /**
1534
+ * Re-validate one node against its claimed anonymization level (2.12).
1535
+ *
1536
+ * - `mode: 'reject'` (default, strict) — returns the node unchanged plus the
1537
+ * violations; the caller drops a node with any violation and never persists it.
1538
+ * - `mode: 'strip'` (lenient) — returns a sanitized node (offending leaves masked to
1539
+ * `***`) plus the violations (for logging), so the topology shape survives while
1540
+ * the identifying fragments do not.
1541
+ *
1542
+ * Pure: no I/O, no logging (the caller logs). At `none`/`full` it is a pass-through.
1543
+ */
1544
+ declare function revalidateAnonymized(node: DiscoveryNode, level: AnonymizationLevel, mode: 'reject' | 'strip'): {
1545
+ node: DiscoveryNode;
1546
+ violations: AnonViolation[];
1547
+ };
1548
+
1549
+ /**
1550
+ * Ingest orchestration for the central collector (2.12).
1551
+ *
1552
+ * Consumes the 2.11 PUSH ENVELOPE verbatim —
1553
+ * `{ schemaVersion: 1, org?, items: [{ contentHash, kind: 'node'|'edge', payload }] }`
1554
+ * — and merges it into the tenant-partitioned central store. The pipeline is:
1555
+ *
1556
+ * 1. **Validate** the envelope shape (zod). A malformed envelope is rejected whole;
1557
+ * nothing is persisted (the HTTP layer turns this into a 400).
1558
+ * 2. **Re-validate anonymization** per node (`revalidateAnonymized`) — the client is
1559
+ * not trusted. In `reject` mode a node with any identifying fragment is dropped
1560
+ * and counted; in `strip` mode it is scrubbed and persisted. Either way a
1561
+ * structured WARN is logged with the violation path + action.
1562
+ * 3. **Merge** each node by `(org, globalId)` primary + `(org, contentHash)` secondary
1563
+ * with a contributor union (`store.upsertNode`), and insert edges (`store.insertEdge`)
1564
+ * only when both endpoints survived re-validation.
1565
+ * 4. **Log** one structured INFO with the per-ingest counts.
1566
+ *
1567
+ * The contributor `at` is stamped server-side (`new Date().toISOString()`) — never
1568
+ * trusted from the client. The envelope MAY carry a `contributor`/`anonymizationLevel`
1569
+ * extension (2.9/2.10); absent, the collector derives a conservative default and
1570
+ * re-validates at the `anonymized` level (the safe assumption).
1571
+ */
1572
+
1573
+ /** Wire-format version this collector accepts (the 2.11 `PUSH_SCHEMA_VERSION`). */
1574
+ declare const INGEST_SCHEMA_VERSION: 1;
1575
+ /**
1576
+ * The ingest envelope — the 2.11 push contract, plus optional `contributor` /
1577
+ * `anonymizationLevel` extension fields (read when present; never required). `org`
1578
+ * is optional in the wire format; the collector falls back to its `defaultOrg`.
1579
+ */
1580
+ declare const IngestEnvelopeSchema: z.ZodObject<{
1581
+ schemaVersion: z.ZodLiteral<1>;
1582
+ org: z.ZodOptional<z.ZodString>;
1583
+ items: z.ZodArray<z.ZodObject<{
1584
+ contentHash: z.ZodString;
1585
+ kind: z.ZodEnum<{
1586
+ node: "node";
1587
+ edge: "edge";
1588
+ }>;
1589
+ payload: z.ZodUnknown;
1590
+ }, z.core.$strip>>;
1591
+ contributor: z.ZodOptional<z.ZodObject<{
1592
+ machineId: z.ZodString;
1593
+ hostname: z.ZodDefault<z.ZodString>;
1594
+ user: z.ZodDefault<z.ZodString>;
1595
+ confidence: z.ZodDefault<z.ZodNumber>;
1596
+ }, z.core.$strip>>;
1597
+ anonymizationLevel: z.ZodOptional<z.ZodEnum<{
1598
+ none: "none";
1599
+ anonymized: "anonymized";
1600
+ full: "full";
1601
+ }>>;
1602
+ }, z.core.$strip>;
1603
+ type IngestEnvelope = z.infer<typeof IngestEnvelopeSchema>;
1604
+ /** Per-ingest outcome counts (the 200 response body). */
1605
+ interface IngestResult {
1606
+ org: string;
1607
+ /** Nodes persisted (created + merged). */
1608
+ accepted: number;
1609
+ /** Nodes that collapsed onto an existing logical node. */
1610
+ merged: number;
1611
+ /** Nodes dropped for anonymization violations (reject mode). */
1612
+ rejected: number;
1613
+ /** Edges persisted. */
1614
+ edges: number;
1615
+ /** Total anonymization violations detected across all nodes. */
1616
+ violations: number;
1617
+ }
1618
+ interface IngestOptions {
1619
+ /** `reject` (default) drops nodes with violations; `strip` scrubs and keeps them. */
1620
+ anonMode?: 'reject' | 'strip';
1621
+ /** Tenant used when the envelope omits `org`. */
1622
+ defaultOrg?: string;
1623
+ }
1624
+ /**
1625
+ * Ingest one validated envelope into the store. Returns the outcome counts.
1626
+ * The caller (HTTP handler) wraps this in try/catch; the store's per-node upsert is
1627
+ * itself transactional, so a single bad node never half-writes a row.
1628
+ */
1629
+ declare function ingestEnvelope(store: StoreBackend, envelope: IngestEnvelope, opts?: IngestOptions): IngestResult;
1630
+
1631
+ /**
1632
+ * The central-collector ingest HTTP surface (2.12).
1633
+ *
1634
+ * {@link createIngestHandler} produces the request handler that the Streamable-HTTP
1635
+ * transport mounts at `POST /ingest` when server-mode is on. It is deliberately a
1636
+ * pure `(body) => { status, body }` function with no socket of its own, so it can be
1637
+ * unit-tested directly without binding a port (the transport owns auth, the
1638
+ * non-loopback host allowlist, and the body size cap — see `src/mcp/transports.ts`).
1639
+ *
1640
+ * Validation is fail-closed: a malformed envelope yields 400 (no writes); an internal
1641
+ * error yields 500; only a valid envelope runs `ingestEnvelope` and returns 200 with
1642
+ * the {@link IngestResult}. The bearer token is enforced by the transport before this
1643
+ * handler ever runs, so the handler never sees (and never logs) the token.
1644
+ */
1645
+
1646
+ /** A transport-agnostic HTTP-ish response: a status code and a JSON-serializable body. */
1647
+ interface IngestResponse {
1648
+ status: number;
1649
+ body: unknown;
1650
+ }
1651
+ type IngestHandler = (body: unknown) => IngestResponse;
1652
+ /**
1653
+ * Build the `/ingest` handler over a {@link StoreBackend}. The handler validates the
1654
+ * 2.11 push envelope, runs ingest (re-validating anonymization first), and maps the
1655
+ * outcome to an HTTP status:
1656
+ * - 400 — envelope failed `IngestEnvelopeSchema` (no writes).
1657
+ * - 500 — ingest threw (the store's per-node transaction rolls that node back).
1658
+ * - 200 — {@link IngestResult}.
1659
+ */
1660
+ declare function createIngestHandler(store: StoreBackend, opts?: IngestOptions): IngestHandler;
1661
+
1662
+ /**
1663
+ * Org-key lifecycle for the 2.10 anonymization layer.
1664
+ *
1665
+ * The org key is the single secret that makes pseudonyms deterministic across
1666
+ * machines (so a central merge collapses the same host to one token) and
1667
+ * admin-reversible (only the key holder can invert a pseudonym). It lives on disk
1668
+ * at `~/.cartography/org-key` (mode 0600), **never** in the SQLite catalog, never
1669
+ * in any log line, and never in an export. The returned key is HKDF-namespaced by
1670
+ * `organization`, so two organizations sharing one machine derive non-colliding
1671
+ * keys (and therefore non-colliding tokens + isolated reversal maps).
1672
+ *
1673
+ * Zero new dependencies: HMAC/AES/HKDF/randomBytes all come from `node:crypto`.
1674
+ */
1675
+ interface OrgKeyOptions {
1676
+ /** Organization namespace; absent ⇒ the `'default'` namespace. */
1677
+ organization?: string;
1678
+ /** Override the on-disk key path (tests / non-default data dirs). */
1679
+ keyPath?: string;
1680
+ }
1681
+ /** Default org-key file path: `~/.cartography/org-key`. */
1682
+ declare function orgKeyPath(home?: string): string;
1683
+ /**
1684
+ * Load (or lazily create) the org key, HKDF-namespaced by `organization`. The
1685
+ * on-disk secret is a single random value; per-org keys are derived from it so
1686
+ * the same file serves multiple orgs without collision. Pure of side effects
1687
+ * beyond the lazy file creation; never logs the key material.
1688
+ */
1689
+ declare function loadOrgKey(opts?: OrgKeyOptions): Buffer;
1690
+ /**
1691
+ * Rotate the org key: overwrite the on-disk secret with fresh random bytes and
1692
+ * return the new derived key. Reversal entries written under the old key become
1693
+ * unrecoverable by design — callers are warned at the stderr level.
1694
+ */
1695
+ declare function rotateOrgKey(opts?: OrgKeyOptions): Buffer;
1696
+ /** Subkey for HMAC pseudonym tokens (distinct domain from the reversal cipher key). */
1697
+ declare function hmacKey(orgKey: Buffer): Buffer;
1698
+ /** Subkey for AES-256-GCM reversal-map encryption (distinct domain from the HMAC key). */
1699
+ declare function reversalKey(orgKey: Buffer): Buffer;
1700
+
1701
+ /**
1702
+ * Org-keyed, admin-reversible pseudonymization (2.10 `anonymized` sharing level).
1703
+ *
1704
+ * Identifying fragments — private IPs, file paths, `user@host` pairs, and bare
1705
+ * hostnames — are replaced by a deterministic token
1706
+ * `anon:{kind}:{base32(first 12 bytes of HMAC-SHA256(hmacKey, plaintext))}`
1707
+ * so the same input + org key always yields the same token (a central merge then
1708
+ * collapses the same host across machines). The token is one-way for anyone
1709
+ * without the org key (HMAC preimage resistance + a 32-byte secret); an admin
1710
+ * holding the key can invert it via an AES-256-GCM reversal map.
1711
+ *
1712
+ * Topology shape is preserved: {@link pseudonymize} mirrors `redactValue`
1713
+ * (`src/tools.ts`) — it walks structure, rewriting only leaf strings; it never
1714
+ * adds, removes, or reorders keys/array elements.
1715
+ *
1716
+ * Zero new dependencies — all crypto is `node:crypto`.
1717
+ */
1718
+
1719
+ /** The kinds of identifying fragment we tokenize. The prefix keeps namespaces legible + non-colliding. */
1720
+ type FragmentKind = 'host' | 'user' | 'path' | 'ip';
1721
+ /**
1722
+ * RFC-1918 private IPv4 ranges: 10/8, 172.16/12, 192.168/16. Only private IPs are
1723
+ * pseudonymized — public IPs are not identifying of an employee's machine and are
1724
+ * left intact (so topology against public infra still reads).
1725
+ */
1726
+ declare const PRIVATE_IP: RegExp;
1727
+ /**
1728
+ * Deterministic token for one identifying fragment. When `db` is supplied, the
1729
+ * plaintext is AES-256-GCM-encrypted under `reversalKey(orgKey)` and persisted so
1730
+ * an admin can later invert the token. Idempotent: the token is deterministic, so
1731
+ * re-encrypting the same fragment `INSERT OR REPLACE`s the same row.
1732
+ */
1733
+ declare function pseudonymizeFragment(plaintext: string, kind: FragmentKind, orgKey: Buffer, db?: CartographyDB): string;
1734
+ /**
1735
+ * Pseudonymize the identifying fragments inside a single string — private IPs,
1736
+ * file paths, `user@host` pairs, and bare hostnames — leaving structure-irrelevant
1737
+ * text intact. Order matters: paths and IPs are matched before hostnames so an IP
1738
+ * or a path segment is never mis-tokenized as a host.
1739
+ */
1740
+ declare function pseudonymizeString(s: string, orgKey: Buffer, db?: CartographyDB): string;
1741
+ /**
1742
+ * Recursive, structure-preserving walker — same shape as `redactValue`
1743
+ * (`src/tools.ts`): strings → {@link pseudonymizeString}, arrays → map,
1744
+ * objects → per-value recurse, primitives → unchanged. Object keys are left
1745
+ * verbatim (they are schema, not data), so topology shape is invariant.
1746
+ */
1747
+ declare function pseudonymize(value: unknown, orgKey: Buffer, db?: CartographyDB): unknown;
1748
+ /**
1749
+ * Admin reversal: decrypt the original plaintext behind a pseudonym token, or
1750
+ * `undefined` when the token is unknown or the ciphertext fails GCM authentication
1751
+ * (tampered or produced under a different / rotated org key).
1752
+ */
1753
+ declare function reversePseudonym(token: string, orgKey: Buffer, db: CartographyDB): string | undefined;
1754
+
1755
+ /**
1756
+ * Sharing classifier + pre-send preview (2.10).
1757
+ *
1758
+ * Resolves the effective sharing level for each node (a PERSONAL-host hard floor
1759
+ * over the persisted policy), applies that level (`none` drops, `anonymized`
1760
+ * pseudonymizes, `full` keeps raw), and builds {@link previewShare} — the exact,
1761
+ * topology-shape-preserving payload that *would* leave the machine. This is the
1762
+ * privacy gate 2.11/2.12 build on; nothing here performs any network I/O.
1763
+ */
1764
+
1765
+ /**
1766
+ * Resolve the policy-level for a node id: the most-specific matching override
1767
+ * wins (fewest wildcards, then longest pattern); ties and no-match fall through
1768
+ * to `defaultLevel`. The `'*'`/`'**'` rows are the global default and always lose
1769
+ * to any narrower match. Pure and deterministic.
1770
+ */
1771
+ declare function resolveSharingLevel(nodeId: string, policy: SharingPolicy): SharingLevel;
1772
+ /**
1773
+ * The effective level for a node: a PERSONAL-host hard floor (single-sourced from
1774
+ * the bookmarks never-share list) forces `'none'` regardless of policy; otherwise
1775
+ * the policy's resolved level for the node id applies.
1776
+ */
1777
+ declare function resolveEffectiveLevel(node: DiscoveryNode, policy: SharingPolicy): SharingLevel;
1778
+ /**
1779
+ * Apply a sharing level to one node:
1780
+ * - `none` → `null` (the node is dropped, never leaves).
1781
+ * - `anonymized` → a clone with `id`/`name` pseudonymized and `metadata` walked
1782
+ * (structure-preserving); identifying fragments tokenized.
1783
+ * - `full` → a structural clone, verbatim.
1784
+ *
1785
+ * The same deterministic `pseudonymizeString` is applied to the id here and by
1786
+ * {@link previewShare} when remapping edges, so endpoints always resolve.
1787
+ */
1788
+ declare function applySharingLevel(node: DiscoveryNode, level: SharingLevel, orgKey: Buffer, db?: CartographyDB): DiscoveryNode | null;
1789
+ interface SharePreviewEntry {
1790
+ /** The original (un-anonymized) node, for the operator's side-by-side disclosure. */
1791
+ node: DiscoveryNode;
1792
+ level: SharingLevel;
1793
+ /** What would actually leave: the transformed node, or `null` when dropped. */
1794
+ payload: DiscoveryNode | null;
1795
+ }
1796
+ interface SharePreview {
1797
+ nodes: SharePreviewEntry[];
1798
+ /** Edges that would leave, with endpoints remapped to surviving (transformed) ids. */
1799
+ edges: {
1800
+ sourceId: string;
1801
+ targetId: string;
1802
+ relationship: string;
1803
+ }[];
1804
+ /** Original ids of nodes dropped at level `none`. */
1805
+ droppedNodeIds: string[];
1806
+ }
1807
+ /**
1808
+ * Build the exact payload that would leave for a session: resolve each node's
1809
+ * effective level, apply it, and remap edge endpoints through the same id
1810
+ * transform. An edge survives only when both endpoints survive — so when no node
1811
+ * is dropped the node count, edge count, and relationship multiset are identical
1812
+ * before/after; when some are dropped the result is a well-defined subgraph.
1813
+ *
1814
+ * `opts.persistReversal` (default false) controls whether anonymized fragments are
1815
+ * written to the reversal map — pure preview need not persist; an actual
1816
+ * pre-send transform should, so an admin can later invert.
1817
+ */
1818
+ declare function previewShare(db: CartographyDB, sessionId: string, orgKey: Buffer, policy: SharingPolicy, opts?: {
1819
+ persistReversal?: boolean;
1820
+ }): SharePreview;
1821
+ /**
1822
+ * The seam 2.11 will use to suppress `ask_user` re-prompts: true when the node id
1823
+ * is already covered by the persisted policy — either a matching override or a
1824
+ * non-`none` default — so a matched item is never re-prompted.
1825
+ */
1826
+ declare function isRemembered(policy: SharingPolicy, nodeId: string): boolean;
362
1827
 
363
1828
  /**
364
1829
  * Cross-platform utilities for Linux, macOS, and Windows.
365
1830
  * Centralizes all OS-specific logic so scanning tools work everywhere.
366
1831
  */
367
1832
  type Platform = 'linux' | 'darwin' | 'win32';
1833
+ /** OS hostname, with a stable fallback so attribution is never empty. */
1834
+ declare function hostname(): string;
1835
+ /** Current OS username, falling back to USER/USERNAME env, then a sentinel. */
1836
+ declare function osUser(): string;
1837
+ /**
1838
+ * Stable, privacy-respecting per-install identifier: a random UUID v4 cached at
1839
+ * `~/.cartography/machine-id` (mode 0600). No hardware fingerprinting, no command
1840
+ * execution — allowlist-safe. If the file cannot be read or written (read-only
1841
+ * home, missing HOME in CI), it degrades to a process-stable ephemeral UUID with a
1842
+ * warning (optional-deps-degrade house style) rather than throwing. The id never
1843
+ * leaves the machine in 2.9; it is opaque and non-identifying.
1844
+ */
1845
+ declare function machineId(): string;
368
1846
  declare function safeEnv(): NodeJS.ProcessEnv;
369
1847
 
1848
+ /**
1849
+ * Remove orphaned temp files from previous bookmark/history scans.
1850
+ * Call at startup to prevent /tmp accumulation after crashes.
1851
+ */
1852
+ declare function cleanupTempFiles(): number;
1853
+ interface BookmarkHost {
1854
+ hostname: string;
1855
+ port: number;
1856
+ protocol: 'http' | 'https';
1857
+ source: string;
1858
+ }
1859
+
370
1860
  /**
371
1861
  * Scanner plugin contract.
372
1862
  *
@@ -389,6 +1879,19 @@ interface ScanContext {
389
1879
  timeout?: number;
390
1880
  env?: NodeJS.ProcessEnv;
391
1881
  }) => string;
1882
+ /**
1883
+ * Injectable seam: resolve a command to its path ('' if absent). Defaults to the
1884
+ * real `commandExists` when omitted. Lets scanners run deterministically in tests.
1885
+ */
1886
+ commandExists?: (cmd: string) => string;
1887
+ /** Injectable seam: raw listening-port output. Defaults to `scanListeningPorts`. */
1888
+ scanListeningPorts?: () => string;
1889
+ /** Injectable seam: raw established-connection output (3.2). Defaults to `scanEstablishedConnections`. */
1890
+ scanEstablishedConnections?: () => string;
1891
+ /** Injectable seam: cross-platform file search (3.2). Defaults to `findFiles`. */
1892
+ findFiles?: (dirs: string[], patterns: string[], maxDepth: number, limit: number) => string;
1893
+ /** Injectable seam: browser-bookmark host source. Defaults to `scanAllBookmarks`. */
1894
+ scanBookmarks?: () => Promise<BookmarkHost[]>;
392
1895
  }
393
1896
  interface ScanResult {
394
1897
  /** Deterministically classified nodes. */
@@ -416,23 +1919,597 @@ interface Scanner {
416
1919
  declare class ScannerRegistry {
417
1920
  private scanners;
418
1921
  register(scanner: Scanner): this;
1922
+ /**
1923
+ * Register a {@link Scanner} produced by an external plugin package. Validates
1924
+ * the shape (throws `ZodError` on mismatch), namespaces the id to
1925
+ * `plugin:<pkg>:<id>` (avoiding collisions with built-ins and across plugins),
1926
+ * wraps `scan()` so the scanner's `ctx.run` is gated by its declared
1927
+ * `allowedCommands` intersected with the central read-only allowlist
1928
+ * ({@link checkReadOnly}), and freezes the wrapper. Reuses the duplicate-id
1929
+ * guard in {@link register}.
1930
+ *
1931
+ * The gated runner delegates the actual execution to the host-supplied
1932
+ * `ctx.run` (the platform runner), so the global read-only floor still applies
1933
+ * and `allowedCommands` is a *second*, scanner-scoped least-privilege boundary.
1934
+ * A command runs only if its leading executable is declared AND the whole
1935
+ * command passes `checkReadOnly`; otherwise the runner returns `''` (the
1936
+ * documented "blocked → ''" contract).
1937
+ */
1938
+ registerExternal(pkg: string, scanner: Scanner): this;
419
1939
  get(id: string): Scanner | undefined;
420
1940
  list(): Scanner[];
421
1941
  /** Scanners whose `platforms` include the given platform. */
422
1942
  forPlatform(platform: Platform): Scanner[];
423
1943
  }
424
1944
 
1945
+ /**
1946
+ * Hostname substrings that indicate a personal site — never catalogued, and the
1947
+ * hard floor for the 2.10 sharing policy (a personal host is forced to `'none'`
1948
+ * regardless of policy). Single-sourced here and shared via {@link isPersonalHost}
1949
+ * so the never-share list never forks.
1950
+ */
1951
+ declare const PERSONAL: string[];
1952
+ /** True when `host` matches a PERSONAL substring (case-insensitive). */
1953
+ declare function isPersonalHost(host: string): boolean;
425
1954
  declare const bookmarksScanner: Scanner;
426
1955
 
1956
+ /**
1957
+ * The Cartography MCP server — the package's primary, LLM-agnostic interface.
1958
+ *
1959
+ * It exposes the discovered infrastructure topology as Model Context Protocol
1960
+ * **Resources** (read-only context, progressive disclosure), a small set of query
1961
+ * **Tools** (parameterized lookups), and reusable **Prompts**. Any MCP host —
1962
+ * Claude Code, Cursor, Cline, Windsurf, the Vercel AI SDK, LangGraph — can drive
1963
+ * it; the package never needs to know which model is in use.
1964
+ */
1965
+
1966
+ /** A pluggable search backend; defaults to lexical search, can be upgraded to semantic. */
1967
+ type SearchFn = (db: CartographyDB, sessionId: string, query: string, opts: {
1968
+ types?: readonly string[];
1969
+ limit: number;
1970
+ }) => Promise<Array<{
1971
+ node: NodeRow;
1972
+ score?: number;
1973
+ }>>;
1974
+ /** A pluggable discovery backend invoked by the `run_discovery` tool. */
1975
+ type DiscoveryFn = (db: CartographyDB, sessionId: string, opts: {
1976
+ hint?: string;
1977
+ mode?: 'replace' | 'update';
1978
+ }) => Promise<{
1979
+ nodes: number;
1980
+ edges: number;
1981
+ delta?: TopologyDelta;
1982
+ }>;
1983
+ interface CreateMcpServerOptions {
1984
+ /** Database instance. If omitted, one is opened at `config.dbPath`. */
1985
+ db?: CartographyDB;
1986
+ /** Path to the SQLite catalog (used when `db` is not provided). */
1987
+ dbPath?: string;
1988
+ /** Session to serve: a session id, or `'latest'` (default) for the newest discovery. */
1989
+ session?: string | 'latest';
1990
+ /**
1991
+ * Tenant/organization whose topology this server serves. Defaults to DEFAULT_TENANT
1992
+ * (`'local'`). Session resolution is scoped to this tenant, so a server bound to one
1993
+ * tenant never surfaces another tenant's sessions/nodes/edges. This is a data-scoping
1994
+ * partition, not an authorization boundary (RBAC is Phase 4).
1995
+ */
1996
+ tenant?: string;
1997
+ /** Semantic/lexical search backend. Defaults to lexical `searchNodes`. */
1998
+ search?: SearchFn;
1999
+ /** Discovery backend for `run_discovery`/`refresh`. Optional. */
2000
+ discovery?: DiscoveryFn;
2001
+ /**
2002
+ * Central-collector org (2.12). When set, `get_summary` returns the org-wide,
2003
+ * cross-machine merged summary (`db.getOrgSummary(org)`) instead of the single-
2004
+ * session view. Used by server-mode; unset preserves the local single-session
2005
+ * behaviour exactly. The org is normalized to a tenant.
2006
+ */
2007
+ org?: string;
2008
+ }
2009
+ /**
2010
+ * Build a fully-configured Cartography MCP server. Call `.connect(transport)` to run it.
2011
+ */
2012
+ declare function createMcpServer(opts?: CreateMcpServerOptions): McpServer;
2013
+
2014
+ /**
2015
+ * Transport bindings for the Cartography MCP server.
2016
+ *
2017
+ * - **stdio**: the local-first default — zero network, every client supports it.
2018
+ * - **Streamable HTTP**: a single `/mcp` endpoint for team/remote use, bound to
2019
+ * localhost with DNS-rebinding protection. The deprecated SSE transport is not used.
2020
+ */
2021
+
2022
+ /** Connect a server over stdio (resolves when the transport closes). */
2023
+ declare function runStdio(server: McpServer): Promise<void>;
2024
+ interface HttpOptions {
2025
+ port?: number;
2026
+ host?: string;
2027
+ /** Extra allowed Host headers (defaults to localhost:port variants). */
2028
+ allowedHosts?: string[];
2029
+ /** Allowed Origin headers (defaults to none → same-origin only). */
2030
+ allowedOrigins?: string[];
2031
+ /**
2032
+ * Shared secret required in the `Authorization: Bearer <token>` header.
2033
+ * Mandatory when binding a non-loopback host; optional (and enforced when
2034
+ * present) on loopback.
2035
+ */
2036
+ token?: string;
2037
+ /**
2038
+ * Central-collector ingest hook (2.12). When set, an authenticated `POST /ingest`
2039
+ * **write** route is exposed: it inherits the *same* bearer auth and non-loopback
2040
+ * host-allowlist guards as `/mcp` (the route adds no separate hardening path), size-
2041
+ * caps the body, parses JSON, and returns the hook's `{ status, body }`. When unset,
2042
+ * `/ingest` 404s exactly like any other path — the collector stays dark by default.
2043
+ */
2044
+ onIngest?: (body: unknown) => {
2045
+ status: number;
2046
+ body: unknown;
2047
+ };
2048
+ }
2049
+ /**
2050
+ * Start a Streamable HTTP server. A fresh MCP server instance is created per
2051
+ * session via `factory`, so multiple clients can connect concurrently.
2052
+ */
2053
+ declare function runHttp(factory: () => McpServer, opts?: HttpOptions): Promise<http.Server>;
2054
+
2055
+ /**
2056
+ * Shared HTTP auth + bind-hardening primitives.
2057
+ *
2058
+ * Extracted verbatim from `src/mcp/transports.ts` so the MCP transport, the REST/
2059
+ * GraphQL API server (4.2), and any future HTTP surface consume **one** provably-
2060
+ * identical implementation of the CVE-2025-66414 guards, the constant-time bearer
2061
+ * compare, and the default Host allowlist. The allowlist — not any one caller — is
2062
+ * the security boundary; centralizing it here keeps every networked surface on the
2063
+ * same posture and makes the behavior unit-testable in isolation.
2064
+ */
2065
+ /** Loopback hosts are safe to bind without an explicit Host allowlist. */
2066
+ declare const LOOPBACK_HOSTS: ReadonlySet<string>;
2067
+ /** True when `host` is a loopback address (safe to bind without an allowlist/token). */
2068
+ declare function isLoopbackHost(host: string): boolean;
2069
+ /** Constant-time comparison to avoid leaking the token via timing. */
2070
+ declare function timingSafeEqual(a: string, b: string): boolean;
2071
+ /**
2072
+ * Extract the bearer token from an Authorization header, if present. Parsed with
2073
+ * linear string ops (no regex) so a user-controlled header can never trigger
2074
+ * polynomial backtracking (ReDoS) — `^Bearer\s+(.+)$` is ambiguous between `\s+`
2075
+ * and `.+` on a long run of spaces.
2076
+ */
2077
+ declare function bearerToken(header: string | undefined): string | undefined;
2078
+ /**
2079
+ * Returns true if the request is authenticated: a request is authenticated when no
2080
+ * token is configured (open loopback dev mode) OR the `Authorization: Bearer` value
2081
+ * is present and constant-time-equal to the configured token. The caller maps a
2082
+ * `false` to a 401.
2083
+ */
2084
+ declare function checkBearer(authorizationHeader: string | undefined, token: string | undefined): boolean;
2085
+ interface BindGuardOptions {
2086
+ host: string;
2087
+ port: number;
2088
+ allowedHosts?: string[];
2089
+ token?: string;
2090
+ }
2091
+ /**
2092
+ * Enforce the CVE-2025-66414 + mandatory-token guards before binding. Throws the
2093
+ * exact errors `runHttp` raised inline, so existing transport behavior is preserved:
2094
+ * a non-loopback bind requires BOTH an explicit `allowedHosts` allowlist AND a token.
2095
+ */
2096
+ declare function assertSafeBind(opts: BindGuardOptions): void;
2097
+ /** Default Host allowlist: the bound host plus the localhost variants, all `:port`. */
2098
+ declare function defaultAllowedHosts(host: string, port: number): string[];
2099
+
2100
+ /**
2101
+ * Per-request tenant resolution for the API server (4.2).
2102
+ *
2103
+ * The tenant (org-scope) is a first-class request property: it is resolved once,
2104
+ * up front, and threaded into every {@link QueryBackend} call so isolation is
2105
+ * structural, not bolted on. A request may name a tenant via the
2106
+ * `X-Cartograph-Tenant` header or a `?tenant=` query param; absent either, it
2107
+ * defaults to the server's configured default (normally `DEFAULT_TENANT='local'`).
2108
+ *
2109
+ * Validation reuses `normalizeTenant` (the single charset-allowlisted validator,
2110
+ * `^[\w.@:+-]{1,128}$`) — but here we **reject** a malformed value with a typed
2111
+ * error (→ HTTP 400) rather than silently falling back, so a client never believes
2112
+ * it is scoped to one tenant while being served another. The raw input is never
2113
+ * reflected into a response.
2114
+ */
2115
+
2116
+ declare const TENANT_HEADER = "x-cartograph-tenant";
2117
+ /** The supplied tenant value did not pass the charset/length allowlist → HTTP 400. */
2118
+ declare class InvalidTenantError extends Error {
2119
+ constructor();
2120
+ }
2121
+ interface TenantOptions {
2122
+ /** Default tenant when the request names none. Defaults to `DEFAULT_TENANT` ('local'). */
2123
+ defaultTenant?: string;
2124
+ /** Header to read the tenant from. Defaults to `x-cartograph-tenant`. */
2125
+ header?: string;
2126
+ }
2127
+ /**
2128
+ * Resolve the tenant from the request header or `?tenant=` query param, else the
2129
+ * configured default. A supplied-but-malformed value throws {@link InvalidTenantError}
2130
+ * (the caller maps it to a 400) instead of silently defaulting.
2131
+ */
2132
+ declare function resolveTenant(req: IncomingMessage, url: URL, opts?: TenantOptions): TenantContext;
2133
+
2134
+ /**
2135
+ * The read-only API HTTP server (4.2), on Node's built-in `http` (zero new runtime dep).
2136
+ *
2137
+ * Request flow mirrors the MCP transport (`src/mcp/transports.ts`): the CVE-2025-66414
2138
+ * bind guards run at startup (shared `assertSafeBind`); per request the Host header is
2139
+ * checked against the allowlist (DNS-rebinding), then the bearer token is verified
2140
+ * **before any backend access**, then the tenant is resolved, then the route dispatches.
2141
+ * REST handlers are pure (`rest.ts`); GraphQL is wired when enabled (`graphql.ts`). One
2142
+ * structured stderr access line per request — never the token, never query values.
2143
+ */
2144
+
2145
+ interface ApiServerOptions extends BindGuardOptions {
2146
+ backend: QueryBackend;
2147
+ version: string;
2148
+ /** CORS Origin allowlist. Default: none (same-origin only). */
2149
+ allowedOrigins?: string[];
2150
+ /** Tenant resolution options (header name / default tenant). */
2151
+ tenant?: TenantOptions;
2152
+ /** Expose `/graphql` (default true). */
2153
+ graphql?: boolean;
2154
+ /** Access logger (stderr). */
2155
+ log?: (msg: string) => void;
2156
+ }
2157
+ /** Start the read-only API server. Resolves once it is listening. */
2158
+ declare function runApi(opts: ApiServerOptions): Promise<http.Server>;
2159
+
2160
+ /**
2161
+ * OpenAPI 3.1 document generation for the read-only API (4.2).
2162
+ *
2163
+ * The document is **generated from the zod response schemas** (`schemas.ts`), never
2164
+ * hand-maintained, so it cannot drift from what the server actually returns. A
2165
+ * committed copy lives at `docs/api/openapi.json`; a test asserts the built document
2166
+ * deep-equals it (drift guard) and validates under `ajv`.
2167
+ *
2168
+ * `zodToJsonSchema` is a small, fail-closed projection covering exactly the zod
2169
+ * constructs `schemas.ts` uses (object/array/string/number/integer/boolean/enum/
2170
+ * literal/record/optional). An unsupported construct throws, so a future schema
2171
+ * change can't be silently mis-projected. (The provider tool layer has its own flat
2172
+ * converter in `src/providers/zod-schema.ts`; this one is recursive and serves the
2173
+ * API's nested response shapes.)
2174
+ */
2175
+
2176
+ /** Project a zod schema to a JSON-Schema (2020-12) fragment. Fail-closed on the unknown. */
2177
+ declare function zodToJsonSchema(schema: z.ZodTypeAny): Record<string, unknown>;
2178
+ interface OpenApiOptions {
2179
+ version: string;
2180
+ }
2181
+ /** Build the OpenAPI 3.1 document from the zod schemas + the static route table. Deterministic. */
2182
+ declare function buildOpenApiDocument(opts: OpenApiOptions): Record<string, unknown>;
2183
+
2184
+ /**
2185
+ * Hand-rolled, zero-dependency GraphQL layer for the read-only API (4.2).
2186
+ *
2187
+ * Mirrors REST over `POST /graphql` (and serves the SDL on `GET /graphql`) without
2188
+ * adding a `graphql`/`apollo` runtime dependency. Resolvers delegate to the same
2189
+ * {@link QueryBackend} and reuse the REST projections (`rest.ts`), so REST and GraphQL
2190
+ * return byte-identical shapes and the consent posture stays in one place. It is
2191
+ * strictly **read-only**: there is no `Mutation` type and a `mutation` document is
2192
+ * rejected. A small tokenizer/parser handles the query subset the schema needs
2193
+ * (fields, arguments, variables, nested selections) and a minimal `__schema`
2194
+ * introspection response keeps GraphiQL-style clients working.
2195
+ */
2196
+
2197
+ interface GraphqlDeps {
2198
+ backend: QueryBackend;
2199
+ }
2200
+ interface GraphqlResult {
2201
+ data?: unknown;
2202
+ errors?: Array<{
2203
+ message: string;
2204
+ }>;
2205
+ }
2206
+ declare const SDL = "# Cartograph read-only GraphQL API (4.2). Mirrors the REST surface.\nschema { query: Query }\n\ntype Query {\n summary(session: String): Summary\n nodes(search: String, types: [String!], limit: Int, offset: Int, session: String): NodeConnection\n node(id: String!, session: String): Node\n dependencies(id: String!, direction: Direction, maxDepth: Int, session: String): Dependencies\n diff(base: String!, current: String!): Diff\n sessions: [Session!]!\n}\n\nenum Direction { downstream upstream both }\n\ntype Totals { nodes: Int! edges: Int! }\ntype Count { key: String! value: Int! }\ntype TopConnected { id: String! name: String! type: String! degree: Int! }\ntype Anomaly { nodeId: String! kind: String! severity: String! reason: String! }\ntype Cost { amount: Float! currency: String! period: String! source: String }\ntype CostRollup { key: String! currency: String! period: String! total: Float! nodes: Int! }\ntype CostCoverage { withCost: Int! total: Int! }\n\ntype Node {\n id: String! type: String! name: String! confidence: Float!\n domain: String subDomain: String qualityScore: Float owner: String cost: Cost tags: [String!]!\n}\ntype DependencyNode {\n id: String! type: String! name: String! confidence: Float!\n domain: String subDomain: String qualityScore: Float owner: String cost: Cost tags: [String!]! depth: Int!\n}\ntype Edge { sourceId: String! targetId: String! relationship: String! confidence: Float! evidence: String! }\n\ntype Summary {\n sessionId: String!\n totals: Totals!\n topConnected: [TopConnected!]!\n anomalies: [Anomaly!]!\n contributors: Int!\n costByDomain: [CostRollup!]!\n costByOwner: [CostRollup!]!\n costCoverage: CostCoverage!\n}\n\ntype NodeConnection { nodes: [Node!]! total: Int! limit: Int! offset: Int! }\ntype Dependencies { root: Node direction: Direction! maxDepth: Int! nodes: [DependencyNode!]! edges: [Edge!]! }\n\ntype SessionEndpoint { sessionId: String! startedAt: String! nodeCount: Int! edgeCount: Int! }\ntype DiffSummary { nodesAdded: Int! nodesRemoved: Int! nodesChanged: Int! edgesAdded: Int! edgesRemoved: Int! }\ntype NodeChange { id: String! changedFields: [String!]! confidenceDelta: Float! }\ntype DiffNodes { added: [Node!]! removed: [Node!]! changed: [NodeChange!]! unchanged: Int! }\ntype DiffEdges { added: [Edge!]! removed: [Edge!]! unchanged: Int! }\ntype DiffAnomalies { added: [Anomaly!]! }\ntype Diff {\n base: SessionEndpoint! current: SessionEndpoint! summary: DiffSummary!\n nodes: DiffNodes! edges: DiffEdges! anomalies: DiffAnomalies!\n}\n\ntype Session { id: String! mode: String! startedAt: String! completedAt: String name: String tenant: String! lastScannedAt: String }\n";
2207
+ /** Execute a `{ query, variables, operationName }` request. Read-only; rejects mutations. */
2208
+ declare function executeGraphql(ctx: TenantContext, body: unknown, deps: GraphqlDeps): Promise<GraphqlResult>;
2209
+ /** `GET /graphql` → the SDL as text/plain. */
2210
+ declare function handleGraphqlGet(): {
2211
+ status: number;
2212
+ body: string;
2213
+ };
2214
+
2215
+ /**
2216
+ * Shared entry logic for the read-only API server (4.2), used by both the dedicated
2217
+ * `cartography-api` binary and the `api` CLI sub-command. Mirrors `src/mcp/start.ts`:
2218
+ * opens the catalog, builds the SQLite query backend, resolves the bearer token from
2219
+ * `--token`/`CARTOGRAPHY_HTTP_TOKEN`, and starts `runApi`. All logging is to stderr;
2220
+ * the token value is never logged (only whether one is set).
2221
+ */
2222
+
2223
+ interface StartApiOptions {
2224
+ dbPath?: string;
2225
+ session?: string | 'latest';
2226
+ port?: number;
2227
+ host?: string;
2228
+ allowedHosts?: string[];
2229
+ allowedOrigins?: string[];
2230
+ token?: string;
2231
+ /** Expose `/graphql` (default true). */
2232
+ graphql?: boolean;
2233
+ /** Default tenant served when a request names none. */
2234
+ tenant?: string;
2235
+ log?: (msg: string) => void;
2236
+ }
2237
+ interface ParsedApiArgs extends StartApiOptions {
2238
+ /** `--help`/`-h` was passed; the caller should print usage and exit 0. */
2239
+ help?: boolean;
2240
+ }
2241
+ /** Parse `cartography-api` argv into StartApiOptions (unit-testable, no side effects). */
2242
+ declare function parseApiArgs(argv: string[]): ParsedApiArgs;
2243
+ /** Open the catalog, build the read backend, and start the API server. Returns the server. */
2244
+ declare function startApi(opts?: StartApiOptions): Promise<Server>;
2245
+
427
2246
  declare const installedAppsScanner: Scanner;
428
2247
 
2248
+ /** Well-known listening ports → node type + service name. */
2249
+ declare const PORT_MAP: Record<number, {
2250
+ type: NodeType;
2251
+ service: string;
2252
+ }>;
429
2253
  /** Extract distinct listening port numbers from ss/lsof/PowerShell output. */
430
2254
  declare function extractListeningPorts(raw: string): number[];
431
2255
  declare const portsScanner: Scanner;
432
2256
 
2257
+ /**
2258
+ * AWS infrastructure scanner.
2259
+ *
2260
+ * Read-only AWS CLI discovery (`describe`/`list` actions, JSON output) mapped to
2261
+ * deterministic nodes. Detection is a cheap `commandExists('aws')` check, so an
2262
+ * absent CLI skips the scanner without throwing. Every command stays within the
2263
+ * read-only allowlist's `aws` gate and uses `--output json` for stable parsing.
2264
+ */
2265
+
2266
+ declare const cloudAwsScanner: Scanner;
2267
+
2268
+ /**
2269
+ * Google Cloud Platform scanner.
2270
+ *
2271
+ * Read-only `gcloud … list` discovery with `--format=json` for stable parsing,
2272
+ * mapped to deterministic nodes. Detection is `commandExists('gcloud')`; an
2273
+ * absent CLI skips the scanner. Every command stays within the read-only
2274
+ * allowlist's `gcloud` gate.
2275
+ */
2276
+
2277
+ declare const cloudGcpScanner: Scanner;
2278
+
2279
+ /**
2280
+ * Azure infrastructure scanner.
2281
+ *
2282
+ * Read-only `az … list/show` discovery with `--output json` for stable parsing,
2283
+ * mapped to deterministic nodes. Detection is `commandExists('az')`; an absent
2284
+ * CLI skips the scanner. Subscription / resource-group scoping comes from the
2285
+ * hint. Every command stays within the read-only allowlist's `az` gate.
2286
+ */
2287
+
2288
+ declare const cloudAzureScanner: Scanner;
2289
+
2290
+ /**
2291
+ * Kubernetes scanner.
2292
+ *
2293
+ * Read-only `kubectl get … -o json` discovery mapped to deterministic nodes and
2294
+ * edges. Detection is `commandExists('kubectl')`; an absent CLI skips the
2295
+ * scanner. The cluster node anchors `contains` edges to its hosts and pods, and
2296
+ * a `connects_to` edge is emitted from a service to a running pod when the pod's
2297
+ * labels match the service's selector — only when both endpoints are in the
2298
+ * result set (the driver prunes dangling edges anyway). Pod enumeration is
2299
+ * bounded to keep `run_discovery` latency predictable on large clusters.
2300
+ */
2301
+
2302
+ declare const k8sScanner: Scanner;
2303
+
2304
+ /**
2305
+ * Local database scanner.
2306
+ *
2307
+ * Probes locally-installed DB clients (read-only) and discovers SQLite files in
2308
+ * app-data directories, mapping reachable servers to deterministic nodes. Each
2309
+ * client is only probed when present (`commandExists`), so an absent CLI never
2310
+ * throws and never produces a node. The expensive home-directory / config-file
2311
+ * find is opt-in via the `deep` hint token, keeping the default deterministic
2312
+ * `run_discovery` path fast. Connection strings are credential-redacted before
2313
+ * persistence.
2314
+ */
2315
+
2316
+ declare const databasesScanner: Scanner;
2317
+
2318
+ /**
2319
+ * Established-connections scanner (3.2).
2320
+ *
2321
+ * Reads the host's live TCP connections (read-only) and infers `connects_to`
2322
+ * edges from a single local `host:localhost` node to any recognized listening
2323
+ * service (`${type}:localhost:${port}` from {@link PORT_MAP}). The server-side
2324
+ * node is only kept when `portsScanner` also mapped that port in the same run —
2325
+ * the in-batch endpoint gate in `runLocalDiscovery` drops edges to ports nothing
2326
+ * is listening on (graceful degradation, never an error).
2327
+ */
2328
+
2329
+ interface EstablishedConn {
2330
+ localPort: number;
2331
+ remoteHost: string;
2332
+ remotePort: number;
2333
+ }
2334
+ /**
2335
+ * Parse established connections from `ss -tnp`, `lsof -nP`, or PowerShell output
2336
+ * (pure, host-independent). Deduplicates and silently ignores unparseable rows.
2337
+ *
2338
+ * Recognized shapes:
2339
+ * - ss: `ESTAB 0 0 127.0.0.1:54321 127.0.0.1:5432 …`
2340
+ * - lsof: `node 1 u … TCP 127.0.0.1:54321->127.0.0.1:5432 (ESTABLISHED)`
2341
+ * - PS: `127.0.0.1:54321 -> 127.0.0.1:5432`
2342
+ */
2343
+ declare function parseEstablished(raw: string): EstablishedConn[];
2344
+ declare const connectionsScanner: Scanner;
2345
+
2346
+ /**
2347
+ * Service & reverse-proxy config scanner (3.2).
2348
+ *
2349
+ * Infers *declared* dependency edges from local config without running any service:
2350
+ * - connection-string env vars (`DATABASE_URL`, `REDIS_URL`, …) → `reads_from` /
2351
+ * `depends_on` to `${type}:localhost:${port}`, confidence `connection-string` (0.6);
2352
+ * - nginx `upstream` / `proxy_pass` host:port → `depends_on`, confidence `config-declared` (0.7);
2353
+ * - docker-compose `depends_on:` → `container:<svc> depends_on container:<dep>`, 0.7.
2354
+ *
2355
+ * **Security boundary:** connection strings routinely embed credentials and
2356
+ * `sanitizeUntrusted` (applied later in `insertEdge`) is NOT a credential filter, so
2357
+ * this scanner redacts userinfo / `password=` *before* the value reaches `evidence`.
2358
+ */
2359
+
2360
+ /**
2361
+ * Replace credentials in a connection string with `****` (the security boundary).
2362
+ * Strips the `user:pass@` userinfo and any `password=`/`pwd=` query token.
2363
+ */
2364
+ declare function redactConnectionString(url: string): string;
2365
+ /**
2366
+ * Parse nginx `upstream { server host:port; }` and `proxy_pass http://host:port`
2367
+ * directives into `{ host, port }` targets (pure, host-independent).
2368
+ */
2369
+ declare function parseNginxUpstreams(raw: string): {
2370
+ host: string;
2371
+ port: number;
2372
+ }[];
2373
+ /**
2374
+ * Parse docker-compose `depends_on:` blocks (both list and map form) into a
2375
+ * `{ service, dependsOn[] }` array (pure). Indentation-based, dependency-free.
2376
+ */
2377
+ declare function parseComposeDeps(raw: string): {
2378
+ service: string;
2379
+ dependsOn: string[];
2380
+ }[];
2381
+ /**
2382
+ * Map a connection-string env var to a candidate edge (pure). Returns the
2383
+ * relationship, the target service port, and a **credential-redacted** evidence
2384
+ * string — or null when the scheme/port is not recognized.
2385
+ */
2386
+ declare function parseConnectionString(name: string, url: string): {
2387
+ relationship: EdgeRelationship;
2388
+ service: string;
2389
+ port: number;
2390
+ evidence: string;
2391
+ } | null;
2392
+ declare const serviceConfigScanner: Scanner;
2393
+
2394
+ /**
2395
+ * Confidence rubric for inferred dependency edges (3.2).
2396
+ *
2397
+ * A single source of truth so every scanner that infers an edge draws its
2398
+ * `confidence` from the same strictly-ordered tier set, and downstream consumers
2399
+ * (compliance 3.4, anomaly 3.6) can rank evidence quality consistently. All values
2400
+ * are in `(0,1]`; confidence is never fabricated — an absent evidence source yields
2401
+ * no edge rather than a low-confidence guess.
2402
+ */
2403
+ /** Evidence tiers for inferred dependency edges, strongest first. */
2404
+ type EvidenceKind = 'established-connection' | 'config-declared' | 'connection-string' | 'co-location';
2405
+ /** Single source of truth for edge confidence. Strictly ordered, all in (0,1]. */
2406
+ declare const CONFIDENCE: Record<EvidenceKind, number>;
2407
+ /** Build a self-describing, auditable evidence string with an ISO-8601 UTC stamp. */
2408
+ declare function evidenceLine(kind: EvidenceKind, detail: string): string;
2409
+
2410
+ /**
2411
+ * Shared helpers for the cloud / k8s / database scanners.
2412
+ *
2413
+ * These keep the provider scanners (`cloud-aws`, `cloud-gcp`, `cloud-azure`,
2414
+ * `k8s`, `databases`) small and consistent: JSON parsing that never throws,
2415
+ * provider-parameter extraction from the single `ScanContext.hint` channel
2416
+ * (validated against the same strict patterns the agent tools use), and the
2417
+ * `=== KEY ===` report formatter so the structured + raw-report output shape
2418
+ * stays identical to the legacy agent-tool reports.
2419
+ */
2420
+ /**
2421
+ * Parse JSON CLI output. Returns `undefined` for empty output, our sentinel
2422
+ * placeholders (`(error or not available)`, `(skipped …)`), or malformed JSON —
2423
+ * it never throws, so a scanner degrades to "no nodes" instead of crashing the
2424
+ * discovery run.
2425
+ */
2426
+ declare function safeJson<T = unknown>(raw: string): T | undefined;
2427
+ /** Provider parameters parsed out of a {@link ScanContext.hint}. */
2428
+ interface ScanHintParams {
2429
+ namespace?: string;
2430
+ region?: string;
2431
+ profile?: string;
2432
+ project?: string;
2433
+ subscription?: string;
2434
+ resourceGroup?: string;
2435
+ /** Leftover non-`key=value` terms (back-compat with the installed-apps hint use). */
2436
+ free: string;
2437
+ }
2438
+ /**
2439
+ * Parse `key=value` provider parameters from a `ScanContext.hint`, validating
2440
+ * each value against its strict pattern via {@link assertSafeScanArg} (throws on
2441
+ * an injection payload — even a read-only one the allowlist would otherwise
2442
+ * permit). Unknown tokens flow through to {@link ScanHintParams.free} unchanged.
2443
+ */
2444
+ declare function parseScanHint(hint: string | undefined): ScanHintParams;
2445
+ /** Render `=== KEY ===\n…` report sections (the legacy agent-tool report shape). */
2446
+ declare function buildReport(sections: Array<[string, string]>): string;
2447
+
2448
+ /** The only surface a plugin's `register()` touches. */
2449
+ interface ScannerPluginApi {
2450
+ /** Register a scanner the host will namespace, command-gate, and freeze. */
2451
+ registerScanner(scanner: Scanner): void;
2452
+ }
2453
+ /** The default-exported shape a `@datasynx/scanner-*` package implements. */
2454
+ interface ScannerPlugin {
2455
+ /** Human-readable plugin name (informational; the package name is the identity). */
2456
+ name: string;
2457
+ /** Register the plugin's scanners through the host-provided API. */
2458
+ register(api: ScannerPluginApi): void;
2459
+ }
2460
+ /**
2461
+ * Identity helper a plugin author default-exports. It only narrows the type so
2462
+ * editors guide authoring; it adds no runtime behaviour.
2463
+ */
2464
+ declare function definePlugin(plugin: ScannerPlugin): ScannerPlugin;
2465
+ /**
2466
+ * Zod shape validating a {@link Scanner} produced by an external plugin.
2467
+ * `platforms` uses the concrete `Platform` literals so an invalid platform is
2468
+ * rejected at registration; `detect`/`scan` are validated as callables only
2469
+ * (their async signatures cannot be expressed in zod) — behavioural safety comes
2470
+ * from the gated `ctx.run` wrapper plus the per-scanner runtime try/catch.
2471
+ */
2472
+ declare const ScannerShape: z.ZodObject<{
2473
+ id: z.ZodString;
2474
+ title: z.ZodString;
2475
+ platforms: z.ZodUnion<readonly [z.ZodLiteral<"all">, z.ZodArray<z.ZodEnum<{
2476
+ linux: "linux";
2477
+ darwin: "darwin";
2478
+ win32: "win32";
2479
+ }>>]>;
2480
+ allowedCommands: z.ZodOptional<z.ZodArray<z.ZodString>>;
2481
+ detect: z.ZodCustom<(ctx: ScanContext) => boolean | Promise<boolean>, (ctx: ScanContext) => boolean | Promise<boolean>>;
2482
+ scan: z.ZodCustom<(ctx: ScanContext) => Promise<ScanResult>, (ctx: ScanContext) => Promise<ScanResult>>;
2483
+ }, z.core.$strip>;
2484
+ /**
2485
+ * Validate an unknown value as a {@link Scanner}; throws `ZodError` on mismatch.
2486
+ * Returns the original value (the parsed result discards the live function
2487
+ * references), so the caller keeps the real `detect`/`scan` closures.
2488
+ */
2489
+ declare function validateScanner(value: unknown): Scanner;
2490
+
433
2491
  /** A registry pre-loaded with the built-in deterministic scanners. */
434
2492
  declare function defaultRegistry(): ScannerRegistry;
435
2493
 
2494
+ /**
2495
+ * Opt-in scanner-plugin loader.
2496
+ *
2497
+ * Resolves the explicitly configured plugin package names (consent-first: no
2498
+ * filesystem auto-scan), dynamically `import()`s each in its own try/catch, and
2499
+ * registers its scanners through {@link ScannerRegistry.registerExternal}
2500
+ * (validated, namespaced, command-gated, frozen). One bad plugin — a
2501
+ * non-resolvable name, a missing/invalid default export, a `register()` that
2502
+ * throws, or a malformed scanner that fails zod validation — is logged via
2503
+ * `logWarn` and skipped. It never aborts discovery (optional-deps degrade
2504
+ * pattern, mirroring `src/semantic/search.ts`).
2505
+ */
2506
+
2507
+ /**
2508
+ * Load each configured plugin package into the given registry. Returns the
2509
+ * package names that loaded and registered successfully (for audit/reporting).
2510
+ */
2511
+ declare function loadPlugins(registry: ScannerRegistry, pkgs: readonly string[]): Promise<string[]>;
2512
+
436
2513
  /**
437
2514
  * Deterministic, LLM-free local discovery.
438
2515
  *
@@ -446,18 +2523,48 @@ declare function defaultRegistry(): ScannerRegistry;
446
2523
  interface LocalDiscoveryOptions {
447
2524
  hint?: string;
448
2525
  registry?: ScannerRegistry;
2526
+ /**
2527
+ * Opt-in scanner plugin package names to load (consent-first). Honoured only
2528
+ * when no explicit `registry` is supplied — an injected registry is used
2529
+ * verbatim, so tests and advanced callers stay in full control.
2530
+ */
2531
+ plugins?: string[];
449
2532
  /** Called after each scanner with a short progress line. */
450
2533
  onProgress?: (line: string) => void;
2534
+ /**
2535
+ * Override individual {@link ScanContext} fields. Production omits this and the
2536
+ * real platform helpers are used; tests inject deterministic sources
2537
+ * (`commandExists`, `scanListeningPorts`, `scanBookmarks`) so the built-in
2538
+ * scanners run against known inputs without depending on the host.
2539
+ */
2540
+ ctx?: Partial<ScanContext>;
2541
+ /**
2542
+ * `'replace'` (default) preserves today's append-all behavior: every scanned
2543
+ * node/edge is upserted/inserted into the session. `'update'` (2.1 incremental
2544
+ * discovery) reads the session's prior state, diffs it against the current scan,
2545
+ * and applies only the delta — upserting changed/added, pruning removed (nodes
2546
+ * and edges), and stamping `last_scanned_at`. Same `session_id` either way.
2547
+ */
2548
+ mode?: 'replace' | 'update';
451
2549
  }
452
- declare function runLocalDiscovery(db: CartographyDB, sessionId: string, opts?: LocalDiscoveryOptions): Promise<{
2550
+ interface LocalDiscoveryResult {
453
2551
  nodes: number;
454
2552
  edges: number;
455
2553
  scanners: string[];
456
- }>;
457
- /** Adapter matching the MCP `DiscoveryFn` signature. */
458
- declare function localDiscoveryFn(registry?: ScannerRegistry): (db: CartographyDB, sessionId: string, opts: {
2554
+ /** Present only in `'update'` mode: the delta applied to the session (2.1). */
2555
+ delta?: TopologyDelta;
2556
+ }
2557
+ declare function runLocalDiscovery(db: CartographyDB, sessionId: string, opts?: LocalDiscoveryOptions): Promise<LocalDiscoveryResult>;
2558
+ /**
2559
+ * Adapter matching the MCP `DiscoveryFn` signature. When no explicit `registry`
2560
+ * is provided, the opt-in `plugins` list is loaded onto the default registry on
2561
+ * each discovery run (so late-installed plugins are picked up).
2562
+ */
2563
+ declare function localDiscoveryFn(registry?: ScannerRegistry, plugins?: string[]): (db: CartographyDB, sessionId: string, opts: {
459
2564
  hint?: string;
2565
+ mode?: "replace" | "update";
460
2566
  }) => Promise<{
2567
+ delta?: TopologyDelta | undefined;
461
2568
  nodes: number;
462
2569
  edges: number;
463
2570
  }>;
@@ -520,12 +2627,175 @@ declare class VectorStore {
520
2627
  * or return nothing.
521
2628
  */
522
2629
 
2630
+ /** Options for {@link createSemanticSearch}. */
2631
+ interface SemanticSearchOptions {
2632
+ /** Logger for mode/degradation diagnostics (stderr). No-op if omitted. */
2633
+ log?: (msg: string) => void;
2634
+ }
523
2635
  /**
524
2636
  * Build a {@link SearchFn} that prefers semantic (vector) search and falls back to
525
2637
  * lexical. Pass an explicit embedder, or let it lazily load the local transformer
526
- * (returns a lexical-only function if none is available).
2638
+ * (returns a lexical-only function if none is available). Pass `opts.log` to record
2639
+ * which mode was resolved — semantic readiness or the reason it degraded to lexical.
2640
+ */
2641
+ declare function createSemanticSearch(db: CartographyDB, embedder?: EmbeddingProvider, opts?: SemanticSearchOptions): Promise<SearchFn>;
2642
+
2643
+ /**
2644
+ * Types for the `install` harness — the layer that writes Cartography's MCP
2645
+ * server entry into each host's native config file (JSON / TOML / YAML).
2646
+ *
2647
+ * A {@link ClientSpec} is a declarative description of one host: where its config
2648
+ * lives per OS and scope, what serialization format it uses, and how to splice a
2649
+ * server entry into that host's particular schema (`mcpServers`, `servers`,
2650
+ * `context_servers`, `[mcp_servers]`, `extensions`, …). The merge engine and CLI
2651
+ * are generic; all host-specific knowledge lives in specs.
2652
+ */
2653
+ type ConfigFormat = 'json' | 'toml' | 'yaml';
2654
+ type Scope = 'global' | 'project';
2655
+ type OsKind = 'mac' | 'win' | 'linux';
2656
+ /** A transport-agnostic description of the Cartography MCP server to register. */
2657
+ interface ServerEntry {
2658
+ /** stdio command (mutually exclusive with `url`). */
2659
+ command?: string;
2660
+ args?: string[];
2661
+ env?: Record<string, string>;
2662
+ /** Streamable HTTP endpoint (mutually exclusive with `command`). */
2663
+ url?: string;
2664
+ }
2665
+ /** Everything a spec needs to resolve a config path; injectable for tests. */
2666
+ interface ResolveContext {
2667
+ scope: Scope;
2668
+ os: OsKind;
2669
+ /** User home directory. */
2670
+ home: string;
2671
+ /** Project/working directory (for project-scoped configs). */
2672
+ cwd: string;
2673
+ /** Environment variables (for `%APPDATA%` etc. on Windows). */
2674
+ env: Record<string, string | undefined>;
2675
+ }
2676
+ interface ClientSpec {
2677
+ /** Stable id used on the CLI, e.g. `claude-code`. */
2678
+ id: string;
2679
+ /** Human label, e.g. `Claude Code`. */
2680
+ label: string;
2681
+ format: ConfigFormat;
2682
+ /** Resolve the absolute config path for the given scope/OS, or undefined if unsupported. */
2683
+ path(ctx: ResolveContext): string | undefined;
2684
+ /** Pure: return a new config object with `entry` spliced in under `serverName`. */
2685
+ apply(existing: Record<string, unknown>, serverName: string, entry: ServerEntry): Record<string, unknown>;
2686
+ /** Optional caveat surfaced to the user (e.g. "uses `servers`, not `mcpServers`"). */
2687
+ note?: string;
2688
+ }
2689
+
2690
+ /**
2691
+ * Format-agnostic (de)serialization for host config files. JSON keeps 2-space
2692
+ * indentation; TOML uses smol-toml; YAML uses the `yaml` package. Empty or
2693
+ * whitespace-only input parses to an empty object so a fresh install starts clean.
2694
+ */
2695
+
2696
+ declare function parseConfig(text: string, format: ConfigFormat): Record<string, unknown>;
2697
+ declare function serializeConfig(obj: Record<string, unknown>, format: ConfigFormat): string;
2698
+
2699
+ /**
2700
+ * Idempotent deep-merge of plain objects. Used by client specs to splice a server
2701
+ * entry into an existing config without clobbering unrelated keys. Arrays and
2702
+ * scalars from `source` replace those in `target`; nested plain objects merge
2703
+ * recursively. `source` is never mutated; `target` is cloned.
2704
+ */
2705
+ declare function deepMerge<T extends Record<string, unknown>>(target: T, source: Record<string, unknown>): T;
2706
+
2707
+ /**
2708
+ * Shared entry-shape helpers. Most hosts accept the Claude-Desktop-style object
2709
+ * (`command`/`args`/`env` for stdio, `url`/`type` for HTTP); specs that diverge
2710
+ * (VS Code `type`, Zed `source`, Codex TOML, …) compose or override these.
2711
+ */
2712
+
2713
+ /** The common `{ command, args, env }` | `{ type:'http', url }` server object. */
2714
+ declare function mcpServerObject(entry: ServerEntry): Record<string, unknown>;
2715
+
2716
+ /**
2717
+ * The canonical Cartography MCP server entry every client spec receives. Kept in
2718
+ * one place so the npx invocation (and any future packaging change) is defined
2719
+ * exactly once and reused across all hosts.
2720
+ */
2721
+
2722
+ declare const PACKAGE_NAME = "@datasynx/agentic-ai-cartography";
2723
+ declare const MCP_BIN = "cartography-mcp";
2724
+ declare const DEFAULT_SERVER_NAME = "cartography";
2725
+ interface EntryOptions {
2726
+ /** `http` produces a `url` entry; otherwise stdio via npx. */
2727
+ transport?: 'stdio' | 'http';
2728
+ /** HTTP endpoint (used when transport === 'http'). */
2729
+ url?: string;
2730
+ /** Extra environment variables to inject. */
2731
+ env?: Record<string, string>;
2732
+ /** Extra package arguments appended after the bin name (e.g. `--db`, `--session`). */
2733
+ packageArgs?: string[];
2734
+ }
2735
+ /** Build the default server entry (stdio via `npx` unless an HTTP url is given). */
2736
+ declare function defaultServerEntry(opts?: EntryOptions): ServerEntry;
2737
+
2738
+ /**
2739
+ * Generic install planning/applying. `planInstall` is pure relative to a provided
2740
+ * context (reads the existing file, computes the merged result, never writes) so
2741
+ * it powers both `--dry-run` and the real write in `applyInstall`.
2742
+ */
2743
+
2744
+ interface InstallPlan {
2745
+ client: string;
2746
+ label: string;
2747
+ path: string;
2748
+ format: ConfigFormat;
2749
+ /** Existing file contents ('' when the file does not exist). */
2750
+ before: string;
2751
+ /** Contents that would be written. */
2752
+ after: string;
2753
+ fileExists: boolean;
2754
+ changed: boolean;
2755
+ note?: string;
2756
+ }
2757
+ /** Detect the current OS kind. */
2758
+ declare function currentOs(): OsKind;
2759
+ /** Build a real resolve context from the running environment. */
2760
+ declare function defaultContext(scope: Scope): ResolveContext;
2761
+ interface PlanOptions {
2762
+ serverName?: string;
2763
+ entry: ServerEntry;
2764
+ }
2765
+ /** Compute what installing `entry` into `spec` would change. Reads the config file but never writes. */
2766
+ declare function planInstall(spec: ClientSpec, ctx: ResolveContext, opts: PlanOptions): InstallPlan;
2767
+ /** Write a plan's result to disk, creating parent directories as needed. */
2768
+ declare function applyInstall(plan: InstallPlan): void;
2769
+ /** A minimal line-oriented diff for `--dry-run` output. */
2770
+ declare function renderDiff(before: string, after: string): string;
2771
+
2772
+ /**
2773
+ * Registry of supported MCP hosts. The merge engine and CLI are generic; every
2774
+ * host-specific detail (config path, format, schema key) lives in a `ClientSpec`
2775
+ * here. New hosts are added by appending a spec — no engine changes required.
527
2776
  */
528
- declare function createSemanticSearch(db: CartographyDB, embedder?: EmbeddingProvider): Promise<SearchFn>;
2777
+
2778
+ /** All registered clients, in display order. Extended by later milestones. */
2779
+ declare const CLIENTS: ClientSpec[];
2780
+ declare function getClient(id: string): ClientSpec | undefined;
2781
+ declare function listClients(): ReadonlyArray<Pick<ClientSpec, 'id' | 'label' | 'format' | 'note'>>;
2782
+
2783
+ /**
2784
+ * One-click install deeplinks for hosts that support them. Cursor expects a
2785
+ * **Base64**-encoded server config; VS Code expects **URL-encoded** JSON — mixing
2786
+ * the two encodings is a classic mistake, so each has its own helper.
2787
+ */
2788
+
2789
+ /** `cursor://…/mcp/install?name=<name>&config=<base64 JSON of the server config>`. */
2790
+ declare function cursorDeeplink(name: string, entry: ServerEntry): string;
2791
+ interface VscodeDeeplinkOptions {
2792
+ /** Target VS Code Insiders (`vscode-insiders://`). */
2793
+ insiders?: boolean;
2794
+ }
2795
+ /** `vscode://mcp/install?<URL-encoded JSON>` where the JSON is `{ name, ...serverConfig }`. */
2796
+ declare function vscodeDeeplink(name: string, entry: ServerEntry, opts?: VscodeDeeplinkOptions): string;
2797
+ /** A `code --add-mcp '<json>'` CLI one-liner (alternative to the deeplink). */
2798
+ declare function codeAddMcpCommand(name: string, entry: ServerEntry): string;
529
2799
 
530
2800
  /**
531
2801
  * Circuit breaker for sequential CLI scans.
@@ -542,8 +2812,66 @@ declare function createScanRunner(runFn: (cmd: string, opts?: {
542
2812
  interface CartographyToolsOptions {
543
2813
  /** Called when the agent needs a human answer. Return the user's response. */
544
2814
  onAskUser?: (question: string, context?: string) => Promise<string>;
2815
+ /** Max characters of a single tool response (sanitized + truncated). Default 100 000. */
2816
+ maxResponseBytes?: number;
545
2817
  }
2818
+ /**
2819
+ * Sanitize untrusted scan output (strip invisible/control characters) and cap it
2820
+ * at `max` characters so a single verbose scan can never blow the agent's context
2821
+ * window. Appends an explicit truncation notice when clamped.
2822
+ */
2823
+ declare function clampText(raw: string, max: number): string;
546
2824
  declare function stripSensitive(target: string): string;
2825
+ /**
2826
+ * Strict patterns for the cloud/k8s scan parameters that get spliced into a
2827
+ * shell command. `run()` re-checks the final command against the read-only
2828
+ * allowlist, but values are validated here first so an injection payload never
2829
+ * reaches the shell — not even a read-only one (e.g. `; cat ~/.ssh/id_rsa`)
2830
+ * that the allowlist would otherwise permit and which could disclose files.
2831
+ */
2832
+ declare const SCAN_ARG_PATTERNS: {
2833
+ readonly 'k8s-namespace': RegExp;
2834
+ readonly 'aws-region': RegExp;
2835
+ readonly 'aws-profile': RegExp;
2836
+ readonly 'gcp-project': RegExp;
2837
+ readonly 'azure-subscription': RegExp;
2838
+ readonly 'azure-resource-group': RegExp;
2839
+ };
2840
+ type ScanArgKind = keyof typeof SCAN_ARG_PATTERNS;
2841
+ /** Throw if `value` fails the strict pattern for `kind`; otherwise return it. */
2842
+ declare function assertSafeScanArg(kind: ScanArgKind, value: string): string;
2843
+ /** Redact `user:password@` credentials embedded in any URL/DSN-like string. */
2844
+ declare function redactSecrets(value: string): string;
2845
+ /** Recursively redact secrets from arbitrary metadata before persistence. */
2846
+ declare function redactValue(value: unknown): unknown;
2847
+ /** The MCP text-content shape every tool handler returns. */
2848
+ interface ToolResult {
2849
+ content: {
2850
+ type: 'text';
2851
+ text: string;
2852
+ }[];
2853
+ }
2854
+ /**
2855
+ * Provider-neutral tool definition: a zod input shape plus a handler that returns
2856
+ * the MCP text-content shape. This is the single source of truth for the discovery
2857
+ * tools — the Claude provider wraps these via the SDK `tool()` factory, while
2858
+ * OpenAI/Ollama convert `inputShape` to JSON Schema and call `handler` directly.
2859
+ */
2860
+ interface AgentTool {
2861
+ name: string;
2862
+ description: string;
2863
+ /** Raw zod shape (ZodRawShape) for the tool input. */
2864
+ inputShape: z.ZodRawShape;
2865
+ /** Host-side gating hints (never a security boundary). */
2866
+ annotations: Record<string, boolean>;
2867
+ handler: (args: Record<string, unknown>) => Promise<ToolResult>;
2868
+ }
2869
+ /**
2870
+ * Build the discovery tool handlers with **no** SDK dependency. Returns the same
2871
+ * handler bodies used by the Claude path, exposed neutrally so any provider can
2872
+ * reuse them. `buildCartographyToolDefinitions` wraps these for the Claude SDK.
2873
+ */
2874
+ declare function buildCartographyToolHandlers(db: CartographyDB, sessionId: string, opts?: CartographyToolsOptions): Promise<AgentTool[]>;
547
2875
  declare function createCartographyTools(db: CartographyDB, sessionId: string, opts?: CartographyToolsOptions): Promise<McpServerConfig>;
548
2876
 
549
2877
  declare const safetyHook: HookCallback;
@@ -602,16 +2930,667 @@ type DiscoveryEvent = {
602
2930
  type AskUserFn = (question: string, context?: string) => Promise<string>;
603
2931
  declare function runDiscovery(config: CartographyConfig, db: CartographyDB, sessionId: string, onEvent?: (event: DiscoveryEvent) => void, onAskUser?: AskUserFn, hint?: string): Promise<void>;
604
2932
 
2933
+ /** Inputs a provider needs to run one discovery session. */
2934
+ interface AgentRunContext {
2935
+ config: CartographyConfig;
2936
+ db: CartographyDB;
2937
+ sessionId: string;
2938
+ systemPrompt: string;
2939
+ initialPrompt: string;
2940
+ onAskUser?: AskUserFn;
2941
+ /** Wall-clock deadline (epoch ms). Providers MUST stop and emit `done` past this. */
2942
+ deadlineMs: number;
2943
+ }
2944
+ /**
2945
+ * A provider-neutral agent backend. Yields the existing `DiscoveryEvent` stream.
2946
+ * Implementations MUST: enforce `config.maxTurns`, honor `ctx.deadlineMs`, route
2947
+ * every shell command through `checkReadOnly` + `run()`, and write an audit row per
2948
+ * executed tool so the audit trail is identical across providers.
2949
+ */
2950
+ interface AgentProvider {
2951
+ readonly name: ProviderName;
2952
+ /** Throw a clear Error if the optional dep / API key / CLI is missing. */
2953
+ ensureAvailable(config: CartographyConfig): Promise<void>;
2954
+ run(ctx: AgentRunContext): AsyncIterable<DiscoveryEvent>;
2955
+ }
2956
+ type ProviderFactory = () => AgentProvider;
2957
+ /** Registry of provider factories; the single source of truth for valid provider names. */
2958
+ declare class ProviderRegistry {
2959
+ private readonly factories;
2960
+ register(name: ProviderName, factory: ProviderFactory): void;
2961
+ has(name: string): name is ProviderName;
2962
+ resolve(name: ProviderName): AgentProvider;
2963
+ names(): ProviderName[];
2964
+ }
2965
+
2966
+ declare function createDefaultRegistry(): ProviderRegistry;
2967
+ declare const defaultProviderRegistry: ProviderRegistry;
2968
+
2969
+ declare function createClaudeProvider(): AgentProvider;
2970
+
2971
+ declare function createOpenAIProvider(): AgentProvider;
2972
+
2973
+ declare function createOllamaProvider(): AgentProvider;
2974
+
2975
+ interface JsonSchema {
2976
+ type: 'object';
2977
+ properties: Record<string, unknown>;
2978
+ required: string[];
2979
+ additionalProperties: boolean;
2980
+ }
2981
+ /** Convert a flat ZodRawShape used by the discovery tools to a JSON Schema object. */
2982
+ declare function shapeToJsonSchema(shape: z.ZodRawShape): JsonSchema;
2983
+
2984
+ declare function createBashTool(): AgentTool;
2985
+
2986
+ /**
2987
+ * Drift classification + dispatch (3.1).
2988
+ *
2989
+ * Layered on top of the pure topology delta (`diffTopology` → `db.diffSessions`):
2990
+ * `classifyDrift` turns a `TopologyDiff` into a severity-ranked `DriftAlert`
2991
+ * (deterministic, DB-agnostic, exhaustively unit-testable), and `runDrift` is the
2992
+ * one-shot runner — the seam scheduled discovery (2.5) calls after a scan — that
2993
+ * resolves the latest two sessions, classifies, threshold-filters, fans out to the
2994
+ * configured sinks (one sink's failure never aborts the others), and writes an
2995
+ * audit event. No new tables, no migration; read-only except the audit row.
2996
+ */
2997
+
2998
+ /** Return the highest severity among items; `info` for an empty list. */
2999
+ declare function maxSeverity(items: DriftAlertItem[]): Severity$1;
3000
+ /**
3001
+ * Names of the metadata keys whose value changed and which warrant escalation to
3002
+ * `critical`. Empty unless the node-changed touched `metadata` and a
3003
+ * security-relevant key (case-insensitive) actually differs before→after.
3004
+ */
3005
+ declare function securityRelevantChange(change: NodeChange): string[];
3006
+ /**
3007
+ * Classify a topology diff into a severity-tagged alert. Pure & deterministic
3008
+ * (only `generatedAt` depends on `now`). Rules:
3009
+ * - node-removed, edge-removed → warning (lost capability / connectivity)
3010
+ * - node-changed touching a security metadata → critical
3011
+ * key
3012
+ * - node-changed (other fields) → info
3013
+ * - node-added, edge-added → info
3014
+ * Overall severity = maxSeverity(items).
3015
+ */
3016
+ declare function classifyDrift(diff: TopologyDiff, now?: Date): DriftAlert;
3017
+ /** Drop items strictly below `min`; recompute the overall severity over what's kept. */
3018
+ declare function filterBySeverity(alert: DriftAlert, min: Severity$1): DriftAlert;
3019
+ interface RunDriftOptions {
3020
+ base?: string;
3021
+ current?: string;
3022
+ minSeverity?: Severity$1;
3023
+ }
3024
+ /**
3025
+ * Resolve base/current (newest-first `getSessions`, like the CLI/MCP surfaces),
3026
+ * classify, filter below `minSeverity`, dispatch to all configured sinks
3027
+ * (`Promise.allSettled` — one sink failure never aborts the others), and record an
3028
+ * audit event. Returns the alert, or `null` when fewer than two sessions exist
3029
+ * (graceful no-op). The seam scheduled discovery (2.5) calls post-scan.
3030
+ */
3031
+ declare function runDrift(db: CartographyDB, config: CartographyConfig, opts?: RunDriftOptions): Promise<DriftAlert | null>;
3032
+
3033
+ /**
3034
+ * Anomaly detection (3.6) — pure, deterministic flagging of standing structural risk.
3035
+ *
3036
+ * Knows nothing about the database; operates on plain `NodeRow[]` + an in-memory
3037
+ * degree map so it can be unit-tested in isolation and reused by the CLI, the MCP
3038
+ * server, and the scheduled-scan alert path (3.1). Same topology always yields the
3039
+ * same anomalies with identical `reason` strings (mirrors `deriveSessionName`). No
3040
+ * LLM, no I/O, no dependency. `reason` interpolates only the structured `nodeId` and
3041
+ * numeric scores — never raw node name/metadata (prompt-injection safe).
3042
+ */
3043
+
3044
+ /** Flag zero/weak-degree nodes. degree 0 → high; 1..orphanWeakDegree → low. */
3045
+ declare function detectOrphans(nodes: NodeRow[], degree: ReadonlyMap<string, number>, thresholds?: AnomalyThresholds): Anomaly[];
3046
+ /** Flag unmanaged-type or undomained low-confidence/quality nodes. */
3047
+ declare function detectShadowIt(nodes: NodeRow[], thresholds?: AnomalyThresholds): Anomaly[];
3048
+ /** Aggregate + stable sort (nodeId, then kind). Returns [] for an empty topology. */
3049
+ declare function detectAnomalies(nodes: NodeRow[], degree: ReadonlyMap<string, number>, thresholds?: AnomalyThresholds): Anomaly[];
3050
+ /** Anomalies present in `current` but absent in `base`, keyed by (nodeId, kind). Pure & order-stable. */
3051
+ declare function newAnomalies(base: Anomaly[], current: Anomaly[]): Anomaly[];
3052
+
3053
+ /**
3054
+ * Natural-language topology query resolver (3.5) — deterministic, LLM-free.
3055
+ *
3056
+ * Turns a plain-English question ("services that depend on the payments DB") into a
3057
+ * structured intent, then composes the existing primitives — the injected `SearchFn`
3058
+ * (semantic↔lexical degradation inherited for free) + `db.getDependencies` + a
3059
+ * post-traversal type filter — and echoes the parsed intent for explainability.
3060
+ * No LLM, no new exec path, read-only. The NL string is sanitized + length-clamped
3061
+ * (ReDoS-safe: only linear, anchored patterns).
3062
+ */
3063
+
3064
+ /** The relationship the operator asked about, in resolver-native terms. */
3065
+ type NlRelation = 'depends-on' | 'depended-on-by' | 'connected-to' | 'list';
3066
+ /** Maps a parsed relation to the `db.getDependencies` direction. Single source of truth. */
3067
+ declare const RELATION_TO_DIRECTION: {
3068
+ readonly 'depends-on': "downstream";
3069
+ readonly 'depended-on-by': "upstream";
3070
+ readonly 'connected-to': "both";
3071
+ readonly list: undefined;
3072
+ };
3073
+ /** Structured intent parsed from a natural-language question. */
3074
+ interface NlIntent {
3075
+ readonly query: string;
3076
+ readonly subjectQuery: string;
3077
+ readonly relation: NlRelation;
3078
+ readonly direction?: 'downstream' | 'upstream' | 'both';
3079
+ /** Concrete node types the *result* set is restricted to (post-traversal), if any. */
3080
+ readonly typeFilter?: readonly NodeType[];
3081
+ /** True when no relation pattern matched and we degraded to a search-only list. */
3082
+ readonly degraded: boolean;
3083
+ }
3084
+ interface NlQueryResult {
3085
+ readonly intent: NlIntent;
3086
+ readonly anchors: Array<{
3087
+ node: NodeRow;
3088
+ score?: number;
3089
+ }>;
3090
+ readonly nodes: Array<NodeRow & {
3091
+ depth?: number;
3092
+ }>;
3093
+ readonly paths: EdgeRow[];
3094
+ }
3095
+ interface NlQueryOptions {
3096
+ readonly maxDepth?: number;
3097
+ readonly anchorLimit?: number;
3098
+ }
3099
+ /** Parse a natural-language question into a structured {@link NlIntent}. Pure & deterministic. */
3100
+ declare function parseNlQuery(raw: string): NlIntent;
3101
+ /** Execute a parsed intent: anchor via `search`, traverse, then filter results by type. */
3102
+ declare function executeNlQuery(db: CartographyDB, sessionId: string, search: SearchFn, intent: NlIntent, opts?: NlQueryOptions): Promise<NlQueryResult>;
3103
+ /** Convenience: parse + execute in one call. */
3104
+ declare function resolveNlQuery(db: CartographyDB, sessionId: string, search: SearchFn, raw: string, opts?: NlQueryOptions): Promise<NlQueryResult>;
3105
+
3106
+ /**
3107
+ * Cost attribution (3.3) — turn the topology into a FinOps lens.
3108
+ *
3109
+ * A pluggable `CostSource` yields `{ owner, cost }` keyed by node id; `enrichCosts`
3110
+ * applies it to a session via targeted, idempotent UPDATEs (never `INSERT OR
3111
+ * REPLACE`, so other node fields are untouched). The first source is `CsvCostSource`
3112
+ * — deterministic, provider-agnostic, no new dependency. Live billing-API sources
3113
+ * (AWS Cost Explorer, GCP/Azure) are future implementations of the same interface.
3114
+ */
3115
+
3116
+ /** One attribution line keyed to a node. `cost`/`owner` may be partially present. */
3117
+ interface CostRecord {
3118
+ nodeId: string;
3119
+ owner?: string;
3120
+ cost?: CostEntry;
3121
+ }
3122
+ /** A pluggable provider of cost/owner attribution, keyed by node id. */
3123
+ interface CostSource {
3124
+ readonly id: string;
3125
+ /** Attribution keyed by node id. An absent/unauthorized source resolves to an empty map (degrade). */
3126
+ fetch(): Promise<Map<string, CostRecord>>;
3127
+ }
3128
+ interface EnrichResult {
3129
+ source: string;
3130
+ total: number;
3131
+ matched: number;
3132
+ unmatched: number;
3133
+ unmatchedIds: string[];
3134
+ }
3135
+ /** How a CSV row is resolved to a node id when no explicit `nodeId` column is given. */
3136
+ type MatchStrategy = 'nodeId' | 'name' | 'tag';
3137
+ interface CsvCostSourceOptions {
3138
+ filePath: string;
3139
+ match?: MatchStrategy;
3140
+ db?: CartographyDB;
3141
+ sessionId?: string;
3142
+ }
3143
+ /**
3144
+ * Parse a cost CSV (`nodeId,owner,amount,currency,period[,source]`, header required)
3145
+ * into validated `CostRecord[]`. Each row's cost fields are validated via
3146
+ * `CostEntrySchema`; a malformed row is skipped with a `logWarn` (counts only, no
3147
+ * owner PII) — the batch never aborts.
3148
+ */
3149
+ declare function parseCostCsv(text: string): CostRecord[];
3150
+ /** CSV-backed cost source. Resolves row→node ids per the chosen `MatchStrategy`. */
3151
+ declare class CsvCostSource implements CostSource {
3152
+ private readonly opts;
3153
+ readonly id: string;
3154
+ constructor(opts: CsvCostSourceOptions);
3155
+ fetch(): Promise<Map<string, CostRecord>>;
3156
+ }
3157
+ /**
3158
+ * Idempotent post-pass: apply a source's attribution to a session via targeted
3159
+ * UPDATEs. Re-running with the same source yields the same DB state. Unmatched
3160
+ * rows (no such node id) are reported, never written.
3161
+ */
3162
+ declare function enrichCosts(db: CartographyDB, sessionId: string, source: CostSource): Promise<EnrichResult>;
3163
+
605
3164
  declare function generateTopologyMermaid(nodes: NodeRow[], edges: EdgeRow[]): string;
606
3165
  declare function generateDependencyMermaid(nodes: NodeRow[], edges: EdgeRow[]): string;
3166
+ /**
3167
+ * Render a topology diff as a Mermaid graph: added nodes/edges in green, removed
3168
+ * in red, changed in amber. Endpoints of added/removed edges that are otherwise
3169
+ * unchanged are drawn as neutral "context" nodes so every edge has both ends.
3170
+ */
3171
+ declare function generateDiffMermaid(diff: TopologyDiff): string;
607
3172
  declare function exportBackstageYAML(nodes: NodeRow[], edges: EdgeRow[], org?: string): string;
608
3173
  declare function exportJSON(db: CartographyDB, sessionId: string): string;
609
3174
  declare function exportDiscoveryApp(nodes: NodeRow[], edges: EdgeRow[], options?: {
610
3175
  theme?: 'light' | 'dark';
611
3176
  }): string;
612
3177
  declare function exportJGF(nodes: NodeRow[], edges: EdgeRow[]): string;
3178
+ /**
3179
+ * Cost rolled up by domain and owner as CSV — the FinOps export. One block per
3180
+ * scope; rows are bucketed by `(currency, period)` so mixed currencies are never
3181
+ * summed into one figure.
3182
+ */
3183
+ declare function exportCostCSV(summary: GraphSummary): string;
3184
+ /** The same cost rollup as JSON for programmatic consumers. */
3185
+ declare function exportCostSummary(summary: GraphSummary): string;
3186
+ /**
3187
+ * Render a `ComplianceReport` as `json`, `markdown`, or `mermaid`. The Mermaid form
3188
+ * reuses the diff exporter's severity-coloured `classDef` pattern: one node per gap
3189
+ * coloured by severity, with a `"✓ No compliance gaps"` empty state.
3190
+ */
3191
+ declare function exportComplianceReport(report: ComplianceReport, format: 'json' | 'markdown' | 'mermaid'): string;
613
3192
  declare function exportAll(db: CartographyDB, sessionId: string, outputDir: string, formats?: string[]): void;
614
3193
 
3194
+ /**
3195
+ * Compliance scoring engine (3.4) — pure, deterministic, DB-free.
3196
+ *
3197
+ * `scoreTopology({nodes, edges}, ruleset)` mirrors `diffTopology`'s shape: plain
3198
+ * arrays in, a structured `ComplianceReport` out. The engine is the only trusted
3199
+ * evaluator of the declarative `RuleCheck` DSL — no `eval`, no ruleset-supplied
3200
+ * code or regex. Iteration order is stabilised (nodes + rules sorted by id) so the
3201
+ * report is byte-stable for a fixed `now`.
3202
+ */
3203
+
3204
+ interface ComplianceInput {
3205
+ nodes: NodeRow[];
3206
+ edges: EdgeRow[];
3207
+ }
3208
+ declare function evaluateCheck(node: NodeRow, check: RuleCheck): boolean;
3209
+ declare function evaluateRule(input: ComplianceInput, rule: ComplianceRule): ControlResult;
3210
+ /**
3211
+ * Score a topology against a ruleset. `score = round(100 × Σweight(passed applicable) /
3212
+ * Σweight(applicable))`, weighted by severity, with not-applicable controls excluded
3213
+ * from the denominator. `score = null` / `status = 'not_applicable'` when nothing applies.
3214
+ */
3215
+ declare function scoreTopology(input: ComplianceInput, ruleset: Ruleset, opts?: {
3216
+ now?: string;
3217
+ }): ComplianceReport;
3218
+ /** Validate raw ruleset data (used by the registry and any future import path). */
3219
+ declare function loadRuleset(raw: unknown): Ruleset;
3220
+
3221
+ /**
3222
+ * Bundled ruleset registry (3.4). All rulesets are plain data, validated at load.
3223
+ */
3224
+
3225
+ /** Resolve a bundled ruleset by name, or `undefined` (callers degrade, never throw). */
3226
+ declare function getRuleset(name: string): Ruleset | undefined;
3227
+ /** List the bundled rulesets (name/version/framework/rule count) for help + degrade messages. */
3228
+ declare function listRulesets(): Array<{
3229
+ name: string;
3230
+ version: string;
3231
+ framework: string;
3232
+ ruleCount: number;
3233
+ }>;
3234
+
3235
+ /**
3236
+ * Plain-text compliance report formatter (3.4). Colour-free so it lives outside
3237
+ * `cli.ts` (which owns the colour helpers and is excluded from coverage).
3238
+ */
3239
+
3240
+ /** Render a `ComplianceReport` as a plain-text summary with `✓`/`✗` markers. */
3241
+ declare function formatComplianceText(report: ComplianceReport): string;
3242
+
3243
+ /**
3244
+ * A destination for a classified drift alert. Implementations are the *only*
3245
+ * outbound surface of the drift feature; they must degrade gracefully and must
3246
+ * not throw for transient failures — log and resolve so the runner can continue
3247
+ * dispatching to the remaining sinks.
3248
+ */
3249
+ interface DriftSink {
3250
+ /** Stable identifier for logging/audit (e.g. 'stdout', 'webhook'). */
3251
+ readonly name: string;
3252
+ /** Dispatch one alert. Never throws for transient failures. */
3253
+ emit(alert: DriftAlert): Promise<void>;
3254
+ }
3255
+
3256
+ /**
3257
+ * Default sink: writes one severity-tagged, credential-redacted JSON line to
3258
+ * **stdout** (the data channel) and a one-line diagnostic to **stderr**, keeping
3259
+ * the stdout/stderr discipline the rest of the CLI follows. Fully local — no
3260
+ * network egress.
3261
+ */
3262
+ declare class StdoutSink implements DriftSink {
3263
+ readonly name = "stdout";
3264
+ emit(alert: DriftAlert): Promise<void>;
3265
+ }
3266
+
3267
+ interface WebhookSinkOptions {
3268
+ url: string;
3269
+ token?: string;
3270
+ timeoutMs?: number;
3271
+ }
3272
+ /**
3273
+ * Outbound sink: POSTs the alert as JSON to an operator-configured endpoint. The
3274
+ * first and only outbound network surface of the drift feature — off by default
3275
+ * (constructed only when a `webhook` sink is explicitly configured). Hardened:
3276
+ * - the body is **always** `redactValue(alert)`, so no `user:password@` secret
3277
+ * leaves the process;
3278
+ * - only `stripSensitive(url)` (host:port) is ever logged — never the full URL,
3279
+ * the bearer token, or the body;
3280
+ * - the target must be `https:` (SSRF / plaintext-exfil guard) — a plaintext
3281
+ * `http:` URL is refused unless the host is loopback or the documented
3282
+ * `CARTOGRAPHY_ALLOW_INSECURE_SYNC=1` escape hatch is set (mirrors `pushDeltas`);
3283
+ * - degrades to a logged no-op when `fetch` is unavailable or the URL is empty;
3284
+ * - never throws (the runner owns retry/abort policy).
3285
+ */
3286
+ declare class WebhookSink implements DriftSink {
3287
+ private readonly opts;
3288
+ readonly name = "webhook";
3289
+ constructor(opts: WebhookSinkOptions);
3290
+ emit(alert: DriftAlert): Promise<void>;
3291
+ }
3292
+
3293
+ /**
3294
+ * Construct sinks from config. Absent/empty config → `[new StdoutSink()]` (the
3295
+ * local default). A webhook sink's token falls back to `CARTOGRAPHY_DRIFT_TOKEN`
3296
+ * when not given explicitly (mirroring `CARTOGRAPHY_HTTP_TOKEN`). A webhook entry
3297
+ * without a url is skipped defensively (the schema already rejects it).
3298
+ */
3299
+ declare function buildSinks(drift?: DriftConfig): DriftSink[];
3300
+
3301
+ /** Raised when a config file cannot be read, parsed, or validated. */
3302
+ declare class ConfigError extends Error {
3303
+ constructor(message: string);
3304
+ }
3305
+ /**
3306
+ * Read and validate a JSON config file at `path`, returning a fully-resolved
3307
+ * {@link CartographyConfig}. Merges the file's `schedule`/`entryPoints`/`dbPath`/
3308
+ * `organization` into `defaultConfig` so every existing config invariant (e.g.
3309
+ * `agentModel === models.lead`) is preserved.
3310
+ *
3311
+ * Precedence for the shared `entryPoints`/`dbPath`: a value inside the `schedule`
3312
+ * block wins over the same file-level key (the schedule block is the more specific
3313
+ * intent for a scheduled run).
3314
+ *
3315
+ * @throws {ConfigError} when the file is missing/unreadable, the JSON is malformed,
3316
+ * or the content fails schema validation (including unknown keys via `.strict()`).
3317
+ */
3318
+ declare function loadConfig(path: string): CartographyConfig;
3319
+ /**
3320
+ * Lower-level reader: parse + validate a config file into its typed shape without
3321
+ * resolving it against `defaultConfig`. Useful for callers (e.g. WS 2.11) that need
3322
+ * the raw file shape rather than a merged runtime config.
3323
+ *
3324
+ * @throws {ConfigError} on missing file, malformed JSON, or schema-validation failure.
3325
+ */
3326
+ declare function readConfigFile(path: string): ConfigFile;
3327
+
3328
+ /**
3329
+ * Scheduled discovery (2.5).
3330
+ *
3331
+ * A thin, dependency-free cron driver over the existing read-only discovery and
3332
+ * drift machinery. Two pure pieces — {@link parseCron} and {@link nextRun} — own
3333
+ * a minimal 5-field cron grammar (UTC, no NPM dependency); {@link runOnce} runs a
3334
+ * single deterministic local scan and returns the topology drift relative to the
3335
+ * prior run. `runOnce` never invokes the Claude/agent loop and needs no API key.
3336
+ *
3337
+ * Reuse, not reimplementation: discovery is `runLocalDiscovery` and diffing is the
3338
+ * pure `diffTopology` engine (surfaced here via the 2.1 incremental `mode:'update'`
3339
+ * rescan, which already returns the delta). This module adds scheduling + per-run
3340
+ * persistence around them.
3341
+ */
3342
+
3343
+ /** Parsed allowed-value sets for each cron field (all UTC). */
3344
+ interface CronFields {
3345
+ /** 0–59 */
3346
+ minute: Set<number>;
3347
+ /** 0–23 */
3348
+ hour: Set<number>;
3349
+ /** 1–31 */
3350
+ dom: Set<number>;
3351
+ /** 1–12 */
3352
+ month: Set<number>;
3353
+ /** 0–6 (Sunday = 0) */
3354
+ dow: Set<number>;
3355
+ }
3356
+ /**
3357
+ * Parse a 5-field cron expression (`minute hour dom month dow`, UTC) into its
3358
+ * matching value sets. Grammar per field: star, star-slash-n, `a`, `a-b`,
3359
+ * `a-b`-slash-n, `a`-slash-n, and comma lists of those. Day-of-week `7`
3360
+ * normalizes to `0` (Sunday).
3361
+ *
3362
+ * @throws {RangeError} when the expression is not exactly 5 fields, or any field
3363
+ * is out of range / non-numeric / malformed.
3364
+ */
3365
+ declare function parseCron(expr: string): CronFields;
3366
+ /**
3367
+ * The earliest scheduled instant strictly after `after` (UTC, second/ms truncated).
3368
+ * Deterministic and pure: same `expr` + `after` always yields the same Date.
3369
+ *
3370
+ * @throws {RangeError} when `expr` is invalid, or no match is found within ~4 years.
3371
+ */
3372
+ declare function nextRun(expr: string, after: Date): Date;
3373
+ interface ScheduledRunResult {
3374
+ /** The session this run scanned (reused in place across runs). */
3375
+ sessionId: string;
3376
+ /** The prior session used as the diff base, or `undefined` on the first run. */
3377
+ baseSessionId?: string;
3378
+ /** Topology drift this run observed. */
3379
+ delta: TopologyDelta;
3380
+ /** Node count after the scan. */
3381
+ nodes: number;
3382
+ /** Edge count after the scan. */
3383
+ edges: number;
3384
+ /** Ids of the scanners that ran. */
3385
+ scanners: string[];
3386
+ }
3387
+ /**
3388
+ * Run one deterministic local-discovery pass and return its topology drift.
3389
+ *
3390
+ * Reuses the most recent prior `discover` session for this config's tenant and
3391
+ * rescans it in place via the 2.1 incremental `mode:'update'` path (same session
3392
+ * id, prunes vanished entities, stamps `last_scanned_at`), which already returns
3393
+ * the delta from the pure `diffTopology` engine. When no prior session exists, it
3394
+ * creates a fresh session, replace-scans it, and reports the whole topology as
3395
+ * `added` (diffed against an empty base).
3396
+ *
3397
+ * Read-only and API-key-free. Does **not** persist the drift run — the caller does
3398
+ * (so tests can inspect the delta first). All progress goes to stderr.
3399
+ */
3400
+ declare function runOnce(cfg: CartographyConfig, db: CartographyDB): Promise<ScheduledRunResult>;
3401
+
3402
+ /**
3403
+ * Share classifier (2.11) — pure, DB-agnostic.
3404
+ *
3405
+ * Buckets the items of a {@link SharePreview} (the 2.10 policy-transformed payload)
3406
+ * into `share` / `withhold` / `pending` against the persisted {@link SharingPolicy}
3407
+ * and the set of already-shared content hashes:
3408
+ *
3409
+ * - A node whose transformed payload is `null` (effective level `none`, incl. the
3410
+ * PERSONAL-host hard floor) is **withheld** — it never leaves.
3411
+ * - A node already pushed (its `shareHash` ∈ `sharedHashes`) is dropped (no bucket).
3412
+ * - A surviving node covered by the policy ({@link isRemembered}) is **share** — the
3413
+ * employee's policy is the standing consent, so it auto-approves with no re-prompt.
3414
+ * - A surviving node *not* covered by any policy rule/default is **pending** — new /
3415
+ * unmatched, queued for explicit review. Nothing in this bucket ever leaves until
3416
+ * the employee approves it (the load-bearing privacy invariant).
3417
+ *
3418
+ * Edges follow their endpoints: an edge is shareable only when both endpoints
3419
+ * survive (already guaranteed by `previewShare`, which drops dangling edges) and
3420
+ * both endpoint nodes land in `share`. Otherwise the edge is withheld.
3421
+ *
3422
+ * The transform/anonymization itself is **not** reimplemented here — it is consumed
3423
+ * from `previewShare`. This module only routes the already-transformed items.
3424
+ */
3425
+
3426
+ /** One classified, ready-to-queue item carrying its outgoing (transformed) payload. */
3427
+ interface ClassifiedItem {
3428
+ contentHash: string;
3429
+ kind: 'node' | 'edge';
3430
+ /** Original node id (for `node` items); the remapped source id for `edge` items. */
3431
+ nodeId?: string;
3432
+ /** The exact transformed bytes that would leave the machine. */
3433
+ payload: unknown;
3434
+ }
3435
+ interface ClassifyResult {
3436
+ /** Covered by policy → auto-approve (no re-prompt). */
3437
+ share: ClassifiedItem[];
3438
+ /** Effective level `none` / PERSONAL floor → suppressed; never leaves. */
3439
+ withhold: ClassifiedItem[];
3440
+ /** New / unmatched → queued for explicit human review. */
3441
+ pending: ClassifiedItem[];
3442
+ }
3443
+ interface ClassifyInput {
3444
+ preview: SharePreview;
3445
+ policy: SharingPolicy;
3446
+ /** `content_hash` values already pushed (status `shared`) — suppress re-share. */
3447
+ sharedHashes: ReadonlySet<string>;
3448
+ }
3449
+ /**
3450
+ * Route a {@link SharePreview} into share / withhold / pending buckets. Pure and
3451
+ * deterministic: same inputs always yield the same buckets. The payloads carried
3452
+ * here are the transformed ones from the preview, so anything routed to `share` is
3453
+ * already anonymized per policy.
3454
+ */
3455
+ declare function classify(input: ClassifyInput): ClassifyResult;
3456
+
3457
+ /**
3458
+ * Stable content hashing for the central-DB share queue (2.11).
3459
+ *
3460
+ * The hash is computed over the *policy-transformed* payload (what `previewShare`
3461
+ * produces — already anonymized/dropped), so two scans that yield the same outgoing
3462
+ * bytes map to the same `pending_shares` row (idempotent enqueue) and the same
3463
+ * server-side dedup key. It is deterministic via `stableStringify` (recursively
3464
+ * key-sorted JSON), so key ordering never perturbs the hash.
3465
+ *
3466
+ * Zero new dependencies — `node:crypto` + the existing `stableStringify`.
3467
+ */
3468
+ /** A node or edge projected to its outgoing (already-transformed) shape. */
3469
+ type ShareKind = 'node' | 'edge';
3470
+ /**
3471
+ * sha256 (hex) over the canonical JSON of `{ kind, payload }`. `payload` is the
3472
+ * transformed projection — for `anonymized`/`none` items it is the anonymized form,
3473
+ * never the raw record — so the hash is stable across scans and identical to the
3474
+ * value the central side can dedup on.
3475
+ */
3476
+ declare function shareHash(kind: ShareKind, payload: unknown): string;
3477
+
3478
+ /**
3479
+ * Outbound push client (2.11) — Cartograph's first egress path.
3480
+ *
3481
+ * Pushes consented, policy-transformed deltas to the central ingest endpoint over
3482
+ * bearer-auth HTTPS. It is the inverse of the inbound MCP auth in
3483
+ * `src/mcp/transports.ts`: instead of validating a bearer token, it *sends* one.
3484
+ *
3485
+ * Load-bearing privacy invariant: this function only ever sends the items it is
3486
+ * handed, which the caller (`sync push`) draws exclusively from `getApprovedShares()`
3487
+ * (status `approved`). The hard guards below additionally make a no-config / insecure
3488
+ * / empty send impossible. The bearer token is NEVER logged and NEVER placed in the
3489
+ * payload; error logging routes the URL through `stripSensitive`.
3490
+ *
3491
+ * Network is Node 20+ global `fetch`, injectable via `fetchImpl` so tests never hit
3492
+ * the network. Zero new dependencies.
3493
+ */
3494
+
3495
+ /** One item to push. `payload` is the already-policy-transformed (anonymized) projection. */
3496
+ interface PushItem {
3497
+ contentHash: string;
3498
+ kind: 'node' | 'edge';
3499
+ payload: unknown;
3500
+ }
3501
+ interface PushResult {
3502
+ /** Items the server acknowledged. */
3503
+ sent: number;
3504
+ /** Batches POSTed. */
3505
+ batches: number;
3506
+ /** Items in batches that ultimately failed (left for a later retry). */
3507
+ failed: number;
3508
+ /** content hashes the server acknowledged (caller flips these to `shared`). */
3509
+ sentHashes: string[];
3510
+ }
3511
+ interface PushOptions {
3512
+ /** Injectable fetch (tests). Defaults to Node global `fetch`. */
3513
+ fetchImpl?: typeof fetch;
3514
+ /** Items per batch. Defaults to `config.centralDb.batchSize ?? 100`. */
3515
+ batchSize?: number;
3516
+ /** Max retries per batch on 5xx / network errors. Default 4. */
3517
+ maxRetries?: number;
3518
+ /** Per-request timeout (ms). Default 15000. */
3519
+ timeoutMs?: number;
3520
+ /** Preview only — never networks. */
3521
+ dryRun?: boolean;
3522
+ /** Log sink (stderr by default); the token never reaches it. */
3523
+ log?: (line: string) => void;
3524
+ /** Sleep hook (tests inject a no-op to skip real backoff). */
3525
+ sleep?: (ms: number) => Promise<void>;
3526
+ }
3527
+ /** Wire-format version of the push envelope (the contract WS 2.12 ingests). */
3528
+ declare const PUSH_SCHEMA_VERSION: 1;
3529
+ /**
3530
+ * Push approved deltas. Returns counts and the acknowledged content hashes.
3531
+ *
3532
+ * Hard guards (the no-leak guarantee):
3533
+ * - missing `centralDb.url`/`token` → throws (nothing sent, fetch never called).
3534
+ * - non-`https:` URL → throws, unless `CARTOGRAPHY_ALLOW_INSECURE_SYNC === '1'`
3535
+ * (test-only, documented as unsafe; never the default).
3536
+ * - empty `items` → returns zeros without any network call.
3537
+ */
3538
+ declare function pushDeltas(config: CartographyConfig, items: PushItem[], opts?: PushOptions): Promise<PushResult>;
3539
+
3540
+ /**
3541
+ * Central-DB sync orchestration (2.11) — the post-scan enqueue glue.
3542
+ *
3543
+ * After a (manual or scheduled) scan, {@link runSyncClassify} resolves the
3544
+ * employee's persisted sharing policy (2.10), builds the policy-transformed payload
3545
+ * via `previewShare` (already anonymized/dropped — nothing raw for `anonymized`/
3546
+ * `none`), classifies it against the already-shared set, and enqueues the result
3547
+ * into `pending_shares`:
3548
+ *
3549
+ * - `share` → enqueued `approved` with `decided_by='rule'` (the policy *is* the
3550
+ * standing consent; never re-prompted).
3551
+ * - `pending` → enqueued `pending` for explicit review (nothing leaves until approved).
3552
+ * - `withhold`→ recorded `withheld` for the audit/`sync status` suppression count.
3553
+ *
3554
+ * Short-circuits to a no-op when `centralDb` is unconfigured, so an install without
3555
+ * sync configured never writes to the queue. All writes run in one transaction.
3556
+ */
3557
+
3558
+ interface SyncClassifyResult {
3559
+ enqueued: number;
3560
+ autoShared: number;
3561
+ withheld: number;
3562
+ }
3563
+ interface SyncClassifyOptions {
3564
+ /** Override the org-key path / namespace (tests). Defaults to `config.organization`. */
3565
+ orgKey?: Buffer;
3566
+ }
3567
+ /**
3568
+ * Classify a session's topology against the persisted policy and enqueue the
3569
+ * result. Returns counts for `pending` (`enqueued`), auto-approved (`autoShared`),
3570
+ * and suppressed (`withheld`) items. No-op (all zeros) when sync is unconfigured.
3571
+ */
3572
+ declare function runSyncClassify(db: CartographyDB, sessionId: string, config: CartographyConfig, opts?: SyncClassifyOptions): SyncClassifyResult;
3573
+
3574
+ /**
3575
+ * Sanitization of untrusted text before it enters the catalog or an LLM context
3576
+ * window. Discovery ingests text from sources outside our control — browser
3577
+ * bookmark titles, command output, scanner reports — which can carry hidden
3578
+ * prompt-injection payloads using invisible Unicode (zero-width spaces,
3579
+ * bidi/format controls, soft hyphens) or stray control characters.
3580
+ *
3581
+ * `sanitizeUntrusted` strips those while preserving ordinary whitespace
3582
+ * (tab/0x09, line feed/0x0A, carriage return/0x0D) and NFC-normalizes the
3583
+ * result. It is a no-op for normal ASCII/printable text.
3584
+ *
3585
+ * The set of stripped code points is defined numerically (below) rather than as
3586
+ * a regex of literal invisible characters, so the source stays pure ASCII and
3587
+ * auditable.
3588
+ */
3589
+ /** Strip invisible/control characters and NFC-normalize untrusted text. */
3590
+ declare function sanitizeUntrusted(text: string): string;
3591
+ /** Recursively apply `sanitizeUntrusted` to every string in an arbitrary value. */
3592
+ declare function sanitizeValue(value: unknown): unknown;
3593
+
615
3594
  /**
616
3595
  * Hex Grid Engine — flat-top axial coordinate system.
617
3596
  * Reference: https://www.redblobgames.com/grids/hexagons/
@@ -705,13 +3684,13 @@ declare function buildMapData(nodes: NodeRow[], edges: EdgeRow[], options?: {
705
3684
  theme?: 'light' | 'dark';
706
3685
  }): CartographyMapData;
707
3686
 
708
- declare function checkPrerequisites(): void;
709
-
710
3687
  /**
711
- * Remove orphaned temp files from previous bookmark/history scans.
712
- * Call at startup to prevent /tmp accumulation after crashes.
3688
+ * Provider-aware preflight. Defaults to `'claude'` so existing zero-arg callers are
3689
+ * unaffected. For non-Claude providers, checks only what each backend needs from the
3690
+ * environment; deeper reachability (e.g. Ollama host) is deferred to the provider's
3691
+ * `ensureAvailable`, which degrades at run with an actionable message.
713
3692
  */
714
- declare function cleanupTempFiles(): number;
3693
+ declare function checkPrerequisites(provider?: ProviderName): void;
715
3694
 
716
3695
  /**
717
3696
  * Structured logging for enterprise observability.
@@ -731,4 +3710,4 @@ declare function logInfo(message: string, context?: Record<string, unknown>): vo
731
3710
  declare function logWarn(message: string, context?: Record<string, unknown>): void;
732
3711
  declare function logError(message: string, context?: Record<string, unknown>): void;
733
3712
 
734
- export { type CartographyConfig, CartographyDB, type CartographyMapData, type Cluster, ClusterSchema, type Connection, ConnectionSchema, type CreateMcpServerOptions, DOMAIN_COLORS, DOMAIN_PALETTE, type DataAsset, DataAssetSchema, type DiscoveryEdge, type DiscoveryEvent, type DiscoveryFn, type DiscoveryNode, EDGE_RELATIONSHIPS, type EdgeRelationship, type EdgeRow, EdgeSchema, type EmbeddingProvider, type GraphSummary, type HttpOptions, type LocalDiscoveryOptions, type LogEntry, type LogLevel, NODE_TYPES, NODE_TYPE_GROUPS, type NodeRow, NodeSchema, type NodeType, type PolicyResult, type ScanContext, type ScanResult, type Scanner, ScannerRegistry, type SearchFn, type SessionRow, type ShellKind, type TraversalResult, VectorStore, assertReadOnly, assignColors, bookmarksScanner, buildMapData, checkPrerequisites, checkReadOnly, cleanupTempFiles, computeCentroid, computeClusterBounds, createCartographyTools, createHashEmbedder, createLocalEmbedder, createMcpServer, createScanRunner, createSemanticSearch, defaultConfig, defaultRegistry, edgesToConnections, exportAll, exportBackstageYAML, exportDiscoveryApp, exportJGF, exportJSON, extractListeningPorts, generateDependencyMermaid, generateTopologyMermaid, groupByDomain, hexCorners, hexDistance, hexNeighbors, hexRing, hexSpiral, hexToPixel, installedAppsScanner, isReadOnlyCommand, layoutClusters, localDiscoveryFn, log, logDebug, logError, logInfo, logWarn, nodesToAssets, pixelToHex, portsScanner, runDiscovery, runHttp, runLocalDiscovery, runStdio, safeEnv, safetyHook, setVerbose, shadeVariant, splitSegments, stripSensitive };
3713
+ export { ANOMALY_KINDS, ANOMALY_SEVERITIES, type AgentProvider, type AgentRunContext, type AgentTool, type Anomaly, type AnomalyConfig, type AnomalyKind, type AnomalySeverity, type AnomalyThresholds, type AnonViolation, type AnonymizationLevel, type ApiServerOptions, type AskUserFn, type BindGuardOptions, CLIENTS, CONFIDENCE, COST_PERIODS, type CartographyConfig, CartographyDB, type CartographyMapData, type CentralDbConfig, CentralDbConfigSchema, type ClassifiedItem, type ClassifyInput, type ClassifyResult, type ClientSpec, type Cluster, ClusterSchema, type ComplianceInput, type ComplianceReport, ComplianceReportSchema, type ComplianceRule, ComplianceRuleSchema, type Condition, ConditionSchema, ConfigError, type ConfigFile, ConfigFileSchema, type ConfigFormat, type Connection, ConnectionSchema, type Contributor, type ControlResult, ControlResultSchema, type CostEntry, CostEntrySchema, type CostPeriod, type CostRecord, type CostSource, type CreateMcpServerOptions, type CronFields, CsvCostSource, type CsvCostSourceOptions, DEFAULT_ANOMALY_THRESHOLDS, DEFAULT_FAST_MODEL, DEFAULT_LEAD_MODEL, DEFAULT_SERVER_NAME, DEFAULT_TENANT, DOMAIN_COLORS, DOMAIN_PALETTE, DRIFT_FIELDS, type DataAsset, DataAssetSchema, type DependencyQuery, type DiscoveryEdge, type DiscoveryEvent, type DiscoveryFn, type DiscoveryNode, type DriftAlert, type DriftAlertItem, type DriftConfig, DriftConfigSchema, type DriftField, type DriftItemKind, type DriftRunRow, type DriftSink, type DriftSinkConfig, EDGE_RELATIONSHIPS, type EdgeRelationship, type EdgeRow, EdgeSchema, type EmbeddingProvider, type EnrichResult, type EntryOptions, type EstablishedConn, type EvidenceKind, type FragmentKind, type GraphSummary, type HealthResult, type HttpOptions, INGEST_SCHEMA_VERSION, type IngestEnvelope, IngestEnvelopeSchema, type IngestHandler, type IngestOptions, type IngestResponse, type IngestResult, type InstallPlan, InvalidTenantError, LOOPBACK_HOSTS, type LocalDiscoveryOptions, type LocalDiscoveryResult, type LogEntry, type LogLevel, MCP_BIN, type MatchStrategy, NODE_TYPES, NODE_TYPE_GROUPS, type NlIntent, type NlQueryOptions, type NlQueryResult, type NlRelation, type NodeAttribution, type NodeChange, type NodeIdentity, type NodeQuery, type NodeRow, NodeSchema, type NodeType, type NodesResult, NotFoundError, OUTPUT_FORMATS, type OrgKeyOptions, type OrgSummary, type OsKind, type OutputFormat, PACKAGE_NAME, PENDING_STATUSES, PERSONAL, PORT_MAP, PRIVATE_IP, PUSH_SCHEMA_VERSION, type ParsedApiArgs, type PendingShareRow, type PendingStatus, type PlanOptions, type PolicyResult, type ProviderFactory, type ProviderName, ProviderRegistry, type PushItem, type PushOptions, type PushResult, type QueryBackend, RELATION_TO_DIRECTION, type ResolveContext, type RuleCheck, RuleCheckSchema, type RuleScope, type Ruleset, RulesetSchema, type RunDriftOptions, SCAN_ARG_PATTERNS, SDL, SECURITY_METADATA_KEYS, SEVERITIES, SEVERITY_WEIGHT, SHARING_LEVELS, type ScanArgKind, type ScanContext, type ScanHintParams, type ScanResult, type Scanner, type ScannerPlugin, type ScannerPluginApi, ScannerRegistry, ScannerShape, type ScheduleConfig, ScheduleConfigSchema, type ScheduledRunResult, type Scope, type SearchFn, type SemanticSearchOptions, type ServerEntry, type SessionRow, type Severity, type SharePreview, type SharePreviewEntry, type SharingLevel, SharingLevelSchema, type SharingPolicy, type ShellKind, SqliteQueryBackend, SqliteStoreBackend, type StartApiOptions, StdoutSink, type StoreBackend, type SyncClassifyOptions, type SyncClassifyResult, TENANT_HEADER, type TenantContext, type TenantOptions, type ToolResult, type TopologyDelta, type TopologyDiff, type TopologyInput, type TraversalResult, VectorStore, WebhookSink, type WebhookSinkOptions, applyInstall, applySharingLevel, assertReadOnly, assertSafeBind, assertSafeScanArg, assignColors, bearerToken, bookmarksScanner, buildCartographyToolHandlers, buildMapData, buildOpenApiDocument, buildReport, buildSinks, centralDbFromEnv, checkBearer, checkPrerequisites, checkReadOnly, clampText, classify, classifyDrift, cleanupTempFiles, cloudAwsScanner, cloudAzureScanner, cloudGcpScanner, codeAddMcpCommand, computeCentroid, computeClusterBounds, computeIdentity, connectionsScanner, contentHash, createBashTool, createCartographyTools, createClaudeProvider, createDefaultRegistry, createHashEmbedder, createIngestHandler, createLocalEmbedder, createMcpServer, createOllamaProvider, createOpenAIProvider, createScanRunner, createSemanticSearch, createSqliteQueryBackend, currentOs, cursorDeeplink, databasesScanner, deepMerge, defaultAllowedHosts, defaultConfig, defaultContext, defaultProviderRegistry, defaultRegistry, defaultServerEntry, definePlugin, deriveSessionName, detectAnomalies, detectOrphans, detectShadowIt, diffTopology, edgesToConnections, enrichCosts, evaluateCheck, evaluateRule, evidenceLine, executeGraphql, executeNlQuery, exportAll, exportBackstageYAML, exportComplianceReport, exportCostCSV, exportCostSummary, exportDiscoveryApp, exportJGF, exportJSON, extractListeningPorts, filterBySeverity, findAnonViolations, formatComplianceText, generateDependencyMermaid, generateDiffMermaid, generateTopologyMermaid, getClient, getRuleset, globalId, groupByDomain, handleGraphqlGet, hexCorners, hexDistance, hexNeighbors, hexRing, hexSpiral, hexToPixel, hmacKey, hostname, ingestEnvelope, installedAppsScanner, isLoopbackHost, isPersonalHost, isReadOnlyCommand, isRemembered, k8sScanner, keyMetaOf, layoutClusters, listClients, listRulesets, loadConfig, loadOrgKey, loadPlugins, loadRuleset, localDiscoveryFn, log, logDebug, logError, logInfo, logWarn, machineId, maxSeverity, mcpServerObject, newAnomalies, nextRun, nodesToAssets, normalizeId, normalizeTenant, orgKeyPath, osUser, parseApiArgs, parseComposeDeps, parseConfig, parseConnectionString, parseCostCsv, parseCron, parseEstablished, parseNginxUpstreams, parseNlQuery, parseScanHint, pixelToHex, planInstall, portsScanner, previewShare, pseudonymize, pseudonymizeFragment, pseudonymizeString, pushDeltas, readConfigFile, redactConnectionString, redactSecrets, redactValue, renderDiff, resolveEffectiveLevel, resolveNlQuery, resolveSharingLevel, resolveTenant, revalidateAnonymized, reversalKey, reversePseudonym, rotateOrgKey, runApi, runDiscovery, runDrift, runHttp, runLocalDiscovery, runOnce, runStdio, runSyncClassify, safeEnv, safeJson, safetyHook, sanitizeUntrusted, sanitizeValue, scoreTopology, securityRelevantChange, serializeConfig, serviceConfigScanner, setVerbose, shadeVariant, shapeToJsonSchema, shareHash, splitSegments, stableStringify, startApi, stripSensitive, timingSafeEqual, validateScanner, vscodeDeeplink, zodToJsonSchema };