crosscheck-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,487 @@
1
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
2
+ import { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
3
+
4
+ /** sessions row. */
5
+ interface SessionRow {
6
+ session_id: string;
7
+ started_at: number;
8
+ last_at: number | null;
9
+ calls: number;
10
+ wall_ms: number;
11
+ cache_hits: number;
12
+ total_prompt_tokens: number;
13
+ total_completion_tokens: number;
14
+ total_cached_tokens: number;
15
+ total_tokens: number;
16
+ total_cost_usd: number;
17
+ total_cpu_ms: number;
18
+ }
19
+ /** usage_log row. */
20
+ interface UsageLogRow {
21
+ id: number;
22
+ session_id: string;
23
+ ts: number;
24
+ tool: string | null;
25
+ purpose: string;
26
+ provider: string;
27
+ model: string;
28
+ prompt_tokens: number;
29
+ completion_tokens: number;
30
+ cached_tokens: number;
31
+ total_tokens: number;
32
+ cost_usd: number;
33
+ estimated: number;
34
+ wall_ms: number;
35
+ cpu_ms: number;
36
+ }
37
+ /** Insert shape for usage_log (id is auto-assigned, defaults filled by SQL). */
38
+ interface UsageLogInsert {
39
+ session_id: string;
40
+ ts: number;
41
+ tool: string | null;
42
+ purpose: string;
43
+ provider: string;
44
+ model: string;
45
+ prompt_tokens?: number;
46
+ completion_tokens?: number;
47
+ cached_tokens?: number;
48
+ total_tokens?: number;
49
+ cost_usd?: number;
50
+ estimated?: number;
51
+ wall_ms?: number;
52
+ cpu_ms?: number;
53
+ }
54
+ /** claims row. citations_json is the raw JSON string as stored. */
55
+ interface ClaimRow {
56
+ id: number;
57
+ session_id: string;
58
+ text: string;
59
+ provider: string | null;
60
+ confidence: number | null;
61
+ citations_json: string | null;
62
+ kind: string | null;
63
+ created_at: number;
64
+ }
65
+ interface ClaimInsert {
66
+ session_id: string;
67
+ text: string;
68
+ provider?: string | null;
69
+ confidence?: number | null;
70
+ citations?: readonly string[] | null;
71
+ kind?: string | null;
72
+ }
73
+ /** claim_links row. kind is constrained: supports | attacks | derives_from | merges_with. */
74
+ type ClaimLinkKind = "supports" | "attacks" | "derives_from" | "merges_with";
75
+ interface ClaimLinkRow {
76
+ id: number;
77
+ src_id: number;
78
+ dst_id: number;
79
+ kind: ClaimLinkKind;
80
+ created_at: number;
81
+ }
82
+ /** provider_stats row. */
83
+ interface ProviderStatsRow {
84
+ provider: string;
85
+ wins: number;
86
+ losses: number;
87
+ abstains: number;
88
+ last_at: number | null;
89
+ }
90
+ /** delegations row. */
91
+ interface DelegationRow {
92
+ id: number;
93
+ session_id: string | null;
94
+ requester: string | null;
95
+ tool_call: string;
96
+ via: string;
97
+ accepted: number;
98
+ created_at: number;
99
+ }
100
+ /** session_memory row. kind is constrained: fact | open_question | decision. */
101
+ type SessionMemoryKind = "fact" | "open_question" | "decision";
102
+ interface SessionMemoryRow {
103
+ id: number;
104
+ session_id: string;
105
+ kind: SessionMemoryKind;
106
+ content: string;
107
+ source_tool: string | null;
108
+ source_call_id: string | null;
109
+ confidence: number | null;
110
+ created_at: number;
111
+ stale_at: number | null;
112
+ stale_reason: string | null;
113
+ }
114
+ interface SessionMemoryInsert {
115
+ session_id: string;
116
+ kind: SessionMemoryKind;
117
+ content: string;
118
+ source_tool?: string | null;
119
+ source_call_id?: string | null;
120
+ confidence?: number | null;
121
+ }
122
+ /** A single FTS5 search hit. The adapter handles snippet/highlight
123
+ * internally; the caller never sees raw bm25 scores either — `score`
124
+ * is normalized to [0, 1] where higher = better match. */
125
+ interface SearchHit {
126
+ path: string;
127
+ session_id: string | null;
128
+ tool: string | null;
129
+ ts: number;
130
+ snippet: string;
131
+ /** Normalized [0,1]; higher is better. Adapters convert bm25 (lower-is-
132
+ * better) to this normalized form internally. */
133
+ score: number;
134
+ }
135
+ interface RecallSearchOpts {
136
+ /** Match within a single session. */
137
+ session_id?: string;
138
+ /** Match within a single tool. */
139
+ tool?: string;
140
+ /** Only matches with ts >= sinceMs. */
141
+ since_ms?: number;
142
+ }
143
+ interface ListSessionMemoryOpts {
144
+ kinds?: readonly SessionMemoryKind[];
145
+ include_stale?: boolean;
146
+ limit?: number;
147
+ }
148
+ interface MarkStaleOpts {
149
+ ids?: readonly number[];
150
+ kinds?: readonly SessionMemoryKind[];
151
+ reason?: string;
152
+ }
153
+ interface StorageRead {
154
+ getSession(sessionId: string): Promise<SessionRow | null>;
155
+ listSessions(opts?: {
156
+ limit?: number;
157
+ }): Promise<readonly SessionRow[]>;
158
+ listUsageForSession(sessionId: string, opts?: {
159
+ only_purpose?: readonly string[];
160
+ only_provider?: readonly string[];
161
+ limit?: number;
162
+ }): Promise<readonly UsageLogRow[]>;
163
+ listUsageGroupedByPurpose(sessionId: string): Promise<readonly {
164
+ purpose: string;
165
+ calls: number;
166
+ prompt_tokens: number;
167
+ completion_tokens: number;
168
+ total_tokens: number;
169
+ cost_usd: number;
170
+ wall_ms: number;
171
+ cpu_ms: number;
172
+ }[]>;
173
+ listUsageGroupedByProvider(purpose?: string): Promise<readonly {
174
+ provider: string;
175
+ calls: number;
176
+ total_tokens: number;
177
+ cost_usd: number;
178
+ errors: number;
179
+ }[]>;
180
+ /** Per-provider per-purpose averages for the smart router. Filters
181
+ * to ts >= sinceMs (epoch ms) when supplied. Returns provider,
182
+ * calls + token sum + averages (tokens, cost, wall_ms). Mirrors
183
+ * Python's `_router_stats` aggregation, sans the event-log error
184
+ * counts (TS doesn't write an events.jsonl; callers default
185
+ * error_rate to 0). */
186
+ listRouterStatsByPurpose(purpose: string, sinceMs?: number): Promise<readonly {
187
+ provider: string;
188
+ calls: number;
189
+ tokens_sum: number;
190
+ avg_total_tokens: number;
191
+ avg_cost_usd: number;
192
+ avg_wall_ms: number;
193
+ }[]>;
194
+ listClaimsForSession(sessionId: string): Promise<readonly ClaimRow[]>;
195
+ getClaim(claimId: number): Promise<ClaimRow | null>;
196
+ listClaimLinksForSession(sessionId: string): Promise<readonly ClaimLinkRow[]>;
197
+ listProviderStats(opts?: {
198
+ limit?: number;
199
+ }): Promise<readonly ProviderStatsRow[]>;
200
+ getProviderStats(provider: string): Promise<ProviderStatsRow | null>;
201
+ listDelegationsForSession(sessionId: string): Promise<readonly DelegationRow[]>;
202
+ countDelegationsByRequester(requester: string): Promise<number>;
203
+ countDelegationsBySession(sessionId: string): Promise<number>;
204
+ /** Count accepted-only delegations for a session. Used by the
205
+ * delegate tool's quota check (Python's
206
+ * `WHERE session_id=? AND accepted=1`). */
207
+ countAcceptedDelegationsBySession(sessionId: string): Promise<number>;
208
+ /** Count accepted-only delegations for a requester. Used by the
209
+ * delegate tool's quota check. */
210
+ countAcceptedDelegationsByRequester(requester: string): Promise<number>;
211
+ /** Aggregate delegations grouped by (requester, accepted). Used by
212
+ * scoreboard to count per-requester acceptances vs refusals. */
213
+ listDelegationAggregatesByRequester(): Promise<readonly {
214
+ requester: string;
215
+ accepted: 0 | 1;
216
+ count: number;
217
+ }[]>;
218
+ /** Return the four global row counts in one transaction. Each count
219
+ * degrades to 0 if the table is missing (matches Python's
220
+ * best-effort behavior). */
221
+ countScoreboardTotals(): Promise<{
222
+ sessions: number;
223
+ claims: number;
224
+ claim_links: number;
225
+ delegations: number;
226
+ }>;
227
+ listSessionMemory(sessionId: string, opts?: ListSessionMemoryOpts): Promise<readonly SessionMemoryRow[]>;
228
+ getFetchEgressTotals(sessionId: string): Promise<{
229
+ total_bytes: number;
230
+ unique_hosts: number;
231
+ }>;
232
+ /** True iff the (session_id, host) pair has been recorded before.
233
+ * Used by fetch's unique-hosts cap to allow continued requests to
234
+ * hosts already counted while rejecting new ones at the limit. */
235
+ hasFetchEgressHost(sessionId: string, host: string): Promise<boolean>;
236
+ recallSearch(query: string, k: number, opts?: RecallSearchOpts): Promise<readonly SearchHit[]>;
237
+ }
238
+ interface StorageWrite {
239
+ upsertSession(row: SessionRow): Promise<void>;
240
+ accumulateSessionTotals(sessionId: string, delta: {
241
+ calls?: number;
242
+ wall_ms?: number;
243
+ cache_hits?: number;
244
+ total_prompt_tokens?: number;
245
+ total_completion_tokens?: number;
246
+ total_cached_tokens?: number;
247
+ total_tokens?: number;
248
+ total_cost_usd?: number;
249
+ total_cpu_ms?: number;
250
+ last_at?: number;
251
+ }): Promise<void>;
252
+ insertUsage(rows: readonly UsageLogInsert[]): Promise<void>;
253
+ insertClaim(claim: ClaimInsert): Promise<number>;
254
+ insertClaimLink(srcId: number, dstId: number, kind: ClaimLinkKind): Promise<void>;
255
+ deleteClaimsForSession(sessionId: string): Promise<number>;
256
+ bumpProviderBallot(provider: string, ballot: "agree" | "disagree" | "abstain", at: number): Promise<void>;
257
+ insertDelegation(row: {
258
+ session_id: string | null;
259
+ requester: string | null;
260
+ tool_call: string;
261
+ via: string;
262
+ accepted: 0 | 1;
263
+ created_at: number;
264
+ }): Promise<void>;
265
+ insertSessionMemory(row: SessionMemoryInsert & {
266
+ created_at: number;
267
+ }): Promise<number>;
268
+ markSessionMemoryStale(sessionId: string, at: number, opts?: MarkStaleOpts): Promise<number>;
269
+ clearSessionMemory(sessionId: string): Promise<number>;
270
+ recordFetchEgress(sessionId: string, host: string, bytes: number, at: number): Promise<void>;
271
+ indexTranscript(row: {
272
+ path: string;
273
+ session_id: string | null;
274
+ tool: string;
275
+ ts: number;
276
+ content: string;
277
+ }): Promise<void>;
278
+ }
279
+ type Txn = StorageRead & StorageWrite;
280
+ interface UnsafeStorage {
281
+ /** Execute arbitrary DDL/DML. Returns the rows-changed count.
282
+ * DO NOT call from app code. */
283
+ exec(sql: string, params?: readonly unknown[]): Promise<number>;
284
+ /** Run a query returning rows. Returns each row as a generic record. */
285
+ query(sql: string, params?: readonly unknown[]): Promise<readonly Record<string, unknown>[]>;
286
+ }
287
+ interface Storage extends StorageRead, StorageWrite {
288
+ /** Open a transaction. The callback receives a Txn with the same
289
+ * surface as Storage; nesting reuses the outer transaction via
290
+ * SAVEPOINTs. Throws bubble out as ROLLBACK. */
291
+ txn<T>(fn: (txn: Txn) => Promise<T>): Promise<T>;
292
+ /** Migration runner — applies any pending migrations in order. Idempotent;
293
+ * re-running is a no-op when nothing's pending. */
294
+ migrate(): Promise<{
295
+ applied: readonly string[];
296
+ }>;
297
+ /** PRAGMA-derived canonical schema string. Used by the parity gate to
298
+ * assert a TS-init'd DB has the same schema as a Python-init'd one. */
299
+ canonicalSchema(): Promise<string>;
300
+ /** Access the unsafe surface. Migrations only. */
301
+ unsafe(): UnsafeStorage;
302
+ /** Close any underlying handles. */
303
+ close(): Promise<void>;
304
+ }
305
+
306
+ /** A live bridge to a Python crosscheck-agent child. */
307
+ interface BridgeHandle {
308
+ /** Names of the tools exposed by the Python child (from its
309
+ * `tools/list` response). */
310
+ readonly toolNames: ReadonlySet<string>;
311
+ /** PID of the spawned Python child process, or `null` if the
312
+ * transport hasn't started yet / has already closed. Useful for
313
+ * shutdown verification and host-side diagnostics. */
314
+ readonly pid: number | null;
315
+ /** Forward a `tools/call` to the Python child and return the result
316
+ * envelope verbatim. */
317
+ callTool(name: string, args: Record<string, unknown>): Promise<{
318
+ content: {
319
+ type: string;
320
+ text: string;
321
+ }[];
322
+ isError?: boolean;
323
+ }>;
324
+ /** Re-fetch the tool list (in case the child surfaces new tools mid-
325
+ * session). Returns the new tool names. */
326
+ refreshTools(): Promise<ReadonlySet<string>>;
327
+ /** Tear down the bridge cleanly. Idempotent — repeat calls are no-ops. */
328
+ close(): Promise<void>;
329
+ }
330
+
331
+ interface PricingDoc {
332
+ /** `<provider>` -> `<model>` -> ModelRates. */
333
+ [provider: string]: unknown;
334
+ }
335
+
336
+ /** One provider call's token + cost record. Mirrors `Usage.to_dict()`
337
+ * output exactly. */
338
+ interface Usage {
339
+ provider: string;
340
+ model: string;
341
+ prompt_tokens: number;
342
+ completion_tokens: number;
343
+ cached_tokens: number;
344
+ total_tokens: number;
345
+ cost_usd: number;
346
+ estimated: boolean;
347
+ purpose: string;
348
+ }
349
+
350
+ /** Inbound message shape — chat-completions style. */
351
+ interface ChatMessage {
352
+ role: "system" | "user" | "assistant" | string;
353
+ content: string;
354
+ [k: string]: unknown;
355
+ }
356
+ /** A successful `send()` result. */
357
+ interface SendResult {
358
+ /** Concatenated text from the provider response. */
359
+ text: string;
360
+ /** Number of HTTP attempts that produced this result (>= 1). */
361
+ attempts: number;
362
+ /** Parsed + cost-augmented usage record. */
363
+ usage: Usage;
364
+ }
365
+ /** Args passed into every `Provider.send()` call. Mirrors the Python
366
+ * `send(messages, max_tokens, temperature, purpose='worker')` signature
367
+ * plus an optional `signal` for cancellation. */
368
+ interface SendArgs {
369
+ messages: readonly ChatMessage[];
370
+ maxTokens: number;
371
+ temperature: number;
372
+ purpose?: string;
373
+ /** Optional AbortSignal so callers can cancel long-running calls. */
374
+ signal?: AbortSignal;
375
+ /** Optional one-call model override. Used by the cheap-mode tier
376
+ * picker (see core/retarget.ts) to route a node to a cheaper model
377
+ * without mutating the provider's default. */
378
+ modelOverride?: string;
379
+ }
380
+ /** Provider adapter. */
381
+ interface Provider {
382
+ /** Lowercased identifier (e.g. "anthropic", "openai"). */
383
+ readonly name: string;
384
+ /** Default model for this provider (env-resolved at factory time). */
385
+ readonly model: string;
386
+ /** Make a request and return the parsed result. Throws `ProviderError`
387
+ * on classified failures (auth, rate_limit, timeout, server, parse,
388
+ * network, client). */
389
+ send(args: SendArgs): Promise<SendResult>;
390
+ }
391
+
392
+ declare const SERVER_NAME = "crosscheck-agent";
393
+ declare const SERVER_VERSION = "0.1.0";
394
+ interface CreateServerOptions {
395
+ /** Optional Python bridge. When supplied, the bridge's tools are
396
+ * merged into the registry as proxy entries — they forward `tools/call`
397
+ * to the Python child. In Phase 4 this is route-all mode (every
398
+ * tool name comes from Python); in Phase 5+ native TS tools
399
+ * override per-name. */
400
+ bridge?: BridgeHandle;
401
+ /** Native LLM providers, keyed by lowercased name. Threaded into
402
+ * pick / audit / confer via the tool registry. When absent, those
403
+ * tools return a clear "no providers" error (or defer to the bridge
404
+ * if one is wired). */
405
+ providers?: Readonly<Record<string, Provider>>;
406
+ /** Optional provider allowlist. */
407
+ providerAllowlist?: readonly string[] | null;
408
+ /** SQLite-backed storage adapter. Threaded into recall / scoreboard /
409
+ * session_memory / explain via the tool registry. */
410
+ storage?: Storage;
411
+ /** Directory holding transcript JSON files (used by `explain`). */
412
+ transcriptsDir?: string;
413
+ /** Repo root path (`.git/` ancestor). Used by update_crosscheck for
414
+ * git ops + cache writes, and by `fetch` for evidence-dir resolution. */
415
+ repoRoot?: string;
416
+ /** Pricing doc (pricing.json content). Used by orchestrate's
417
+ * cheap_mode tier picker. Without it, cheap_mode falls back to
418
+ * id-hash provider rotation. */
419
+ pricing?: PricingDoc;
420
+ }
421
+ /**
422
+ * Create a not-yet-connected MCP server with the current tool surface
423
+ * registered. The caller is responsible for connecting it to a Transport.
424
+ *
425
+ * When `opts.bridge` is supplied, the Python tool surface is merged in
426
+ * via proxy handlers — TS-native tools win on name collisions so we can
427
+ * cut over per-tool in Phase 5 without restarting.
428
+ */
429
+ declare function createServer(opts?: CreateServerOptions): Server;
430
+ /**
431
+ * Connect a server to a transport and start serving. Pure plumbing — kept
432
+ * here so the entrypoint files stay short.
433
+ */
434
+ declare function connectAndServe(transport: Transport, opts?: CreateServerOptions): Promise<Server>;
435
+
436
+ /** The shape every event carries. v1 is single-typed (tool_invoke)
437
+ * but the discriminant is kept on the payload so a future v2 can
438
+ * add other events (provider_call, retry, breaker_trip, ...)
439
+ * without breaking consumers. */
440
+ interface CrosscheckEvent {
441
+ event: "tool_invoke";
442
+ /** Monotonic-time-derived id. NOT a UUID; just a short hex that's
443
+ * unique within a process run so events from one call can be
444
+ * correlated across emitters. */
445
+ id: string;
446
+ /** Tool name the host called (e.g. "confer", "audit"). */
447
+ tool: string;
448
+ /** ISO-8601 timestamp captured at handler start, in UTC. */
449
+ started_at: string;
450
+ /** Handler wall-clock duration in milliseconds. */
451
+ duration_ms: number;
452
+ /** "ok" when the handler returned without throwing; "error" when
453
+ * it threw. A returned error-envelope (with `error_code`) still
454
+ * counts as "ok" — the call completed, the tool just reported a
455
+ * domain-level failure. */
456
+ status: "ok" | "error";
457
+ /** Top-level keys present in the args bag the host passed.
458
+ * Sanitized in the sense that values are NOT included; key
459
+ * inventory is usually enough to triage a regression. */
460
+ args_keys: string[];
461
+ /** When status="ok": top-level keys present in the result
462
+ * envelope. Empty when status="error". */
463
+ result_keys: string[];
464
+ /** When the result envelope carries `error_code`, mirror it here
465
+ * for easy filtering. Absent on a clean success. */
466
+ error_code?: string;
467
+ /** When status="error" (handler threw): the exception's message.
468
+ * Truncated to 512 chars. */
469
+ error_message?: string;
470
+ /** Stringified-envelope byte length on success. Useful for
471
+ * spotting outsized debate transcripts / orchestrate DAGs. */
472
+ envelope_bytes?: number;
473
+ }
474
+ /** Pluggable transport. Implementations should NEVER throw — a bad
475
+ * emitter must not break the tool call it's observing. */
476
+ interface EventEmitter {
477
+ emit(event: CrosscheckEvent): void;
478
+ }
479
+ declare function setEventEmitter(emitter: EventEmitter): void;
480
+ declare function getEventEmitter(): EventEmitter;
481
+ declare class RecordingEmitter implements EventEmitter {
482
+ readonly events: CrosscheckEvent[];
483
+ emit(event: CrosscheckEvent): void;
484
+ clear(): void;
485
+ }
486
+
487
+ export { type CreateServerOptions, type CrosscheckEvent, type EventEmitter, RecordingEmitter, SERVER_NAME, SERVER_VERSION, connectAndServe, createServer, getEventEmitter, setEventEmitter };