@tangle-network/agent-runtime 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -84
- package/dist/agent.d.ts +1 -1
- package/dist/index.d.ts +167 -862
- package/dist/index.js +157 -1395
- package/dist/index.js.map +1 -1
- package/dist/{types-CYxfw14J.d.ts → types-DmhXdAhu.d.ts} +1 -1
- package/package.json +2 -4
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { AgentEvalError, KnowledgeReadinessReport, ControlEvalResult, KnowledgeRequirement, TraceEvent } from '@tangle-network/agent-eval';
|
|
2
2
|
export { AgentEvalError, AgentEvalErrorCode, CaptureIntegrityError, ConfigError, ControlBudget, ControlDecision, ControlEvalResult, ControlRunResult, ControlStep, DataAcquisitionPlan, JudgeError, KnowledgeReadinessReport, KnowledgeRequirement, NotFoundError, ReplayError, RunRecord, UserQuestion, ValidationError, VerificationError } from '@tangle-network/agent-eval';
|
|
3
|
-
import { A as AgentBackendInput, a as AgentExecutionBackend, b as AgentBackendContext, R as RuntimeStreamEvent,
|
|
4
|
-
export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext } from './types-
|
|
3
|
+
import { A as AgentBackendInput, a as AgentExecutionBackend, b as AgentBackendContext, R as RuntimeStreamEvent, K as KnowledgeReadinessDecision, c as RunAgentTaskOptions, d as AgentTaskRunResult, e as RunAgentTaskStreamOptions, f as AgentTaskRunSummary, g as AgentTaskSpec, h as AgentRuntimeEvent, i as AgentTaskStatus, j as RuntimeSessionStore, k as RuntimeSession } from './types-DmhXdAhu.js';
|
|
4
|
+
export { l as AgentAdapter, m as AgentKnowledgeProvider, n as AgentRuntimeEventSink, o as AgentTaskContext } from './types-DmhXdAhu.js';
|
|
5
5
|
import { AgentProfilePrompt, AgentProfileResources, AgentSubagentProfile, AgentProfile, SandboxInstance } from '@tangle-network/sandbox';
|
|
6
6
|
|
|
7
7
|
/**
|
|
@@ -177,338 +177,55 @@ declare function sandboxAsChatTurnTarget(instance: SandboxInstance, opts?: {
|
|
|
177
177
|
}): ChatTurnSandbox;
|
|
178
178
|
|
|
179
179
|
/**
|
|
180
|
-
*
|
|
181
|
-
*
|
|
180
|
+
* Derive a stable executionId from the run identity. The same
|
|
181
|
+
* `(projectId, sessionId, turnIndex)` tuple yields the same id — so a
|
|
182
|
+
* client retry of the same turn lands on the same substrate execution
|
|
183
|
+
* and the orchestrator's buffer replays instead of starting a second
|
|
184
|
+
* prompt.
|
|
182
185
|
*
|
|
183
|
-
*
|
|
184
|
-
*
|
|
185
|
-
*
|
|
186
|
-
* the prior steps from a `DurableRunStore` and fast-replays them (returning
|
|
187
|
-
* cached values) until it reaches the first unfinished step, where execution
|
|
188
|
-
* actually resumes.
|
|
186
|
+
* Format is readable, not hashed: operators grepping orchestrator logs
|
|
187
|
+
* for `gtm-agent:thread-abc:3` find the run without translating an
|
|
188
|
+
* opaque id. Substrate executionIds are not a secrecy boundary.
|
|
189
189
|
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
*
|
|
193
|
-
*
|
|
194
|
-
*
|
|
195
|
-
*
|
|
196
|
-
*
|
|
197
|
-
* the same index = `DurableRunDivergenceError`.
|
|
198
|
-
*
|
|
199
|
-
* 3. Non-determinism (now / uuid / random) MUST flow through the
|
|
200
|
-
* `DurableContext` helpers — `ctx.now()`, `ctx.uuid()` — so the values
|
|
201
|
-
* are checkpointed and identical on replay. Bare `Date.now()` /
|
|
202
|
-
* `crypto.randomUUID()` inside a task fn breaks replay equality.
|
|
190
|
+
* Wire integration:
|
|
191
|
+
* - `@tangle-network/sandbox@0.1.x` PromptOptions does not yet expose
|
|
192
|
+
* `executionId`. The SDK auto-reconnects in-call by extracting it
|
|
193
|
+
* from the response `execution.started` event; products do nothing.
|
|
194
|
+
* - For cross-process reconnect today, bypass the SDK and POST to the
|
|
195
|
+
* orchestrator's `/agents/run/stream` directly with this id in the
|
|
196
|
+
* `X-Execution-ID` header (see tax-agent's `sessions.ts`).
|
|
203
197
|
*/
|
|
204
|
-
|
|
205
|
-
/** Caller-facing kinds. The runner uses these for telemetry + querying. */
|
|
206
|
-
type StepKind =
|
|
207
|
-
/** Logical step that ran user code (the default for ctx.step). */
|
|
208
|
-
'logic'
|
|
209
|
-
/** A wrapped LLM call. */
|
|
210
|
-
| 'llm'
|
|
211
|
-
/** A wrapped tool call. */
|
|
212
|
-
| 'tool'
|
|
213
|
-
/** A wrapped readiness probe. */
|
|
214
|
-
| 'readiness'
|
|
215
|
-
/** A deterministic clock or uuid read. */
|
|
216
|
-
| 'deterministic'
|
|
217
|
-
/** A suspend-for-event boundary. */
|
|
218
|
-
| 'event';
|
|
219
|
-
type StepStatus = 'pending' | 'running' | 'completed' | 'failed';
|
|
220
|
-
interface StepError {
|
|
221
|
-
message: string;
|
|
222
|
-
code?: string;
|
|
223
|
-
/** Optional stack — stored for diagnostics, NEVER replayed as an exception. */
|
|
224
|
-
stack?: string;
|
|
225
|
-
}
|
|
226
|
-
interface StepRecord<T = unknown> {
|
|
227
|
-
runId: string;
|
|
228
|
-
/** Monotonic 0-based index. Position is the load-bearing identifier — the
|
|
229
|
-
* same intent string at different positions is a different step. */
|
|
230
|
-
stepIndex: number;
|
|
231
|
-
/** Caller-supplied label; intended for human reading + log correlation. */
|
|
232
|
-
intent: string;
|
|
233
|
-
kind: StepKind;
|
|
234
|
-
/** sha256 of the canonical input fingerprint at begin-time. Used to detect
|
|
235
|
-
* divergence (caller changed inputs across replays). Empty for steps where
|
|
236
|
-
* the input cannot be canonicalized (e.g. ctx.now()). */
|
|
237
|
-
inputHash: string;
|
|
238
|
-
status: StepStatus;
|
|
239
|
-
/** Re-entry count. Increments each time the step begins. */
|
|
240
|
-
attempts: number;
|
|
241
|
-
/** JSON-serializable result. Present when status === 'completed'. */
|
|
242
|
-
result?: T;
|
|
243
|
-
error?: StepError;
|
|
244
|
-
startedAt?: string;
|
|
245
|
-
completedAt?: string;
|
|
246
|
-
}
|
|
247
|
-
interface EventRecord {
|
|
248
|
-
runId: string;
|
|
249
|
-
key: string;
|
|
250
|
-
payload: unknown;
|
|
251
|
-
emittedAt: string;
|
|
252
|
-
}
|
|
253
|
-
type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'suspended';
|
|
254
|
-
interface RunOutcome {
|
|
255
|
-
pass?: boolean;
|
|
256
|
-
score?: number;
|
|
257
|
-
notes?: string;
|
|
258
|
-
/** Free-form bag of run-level metrics — surfaced in OTLP / TraceStore. */
|
|
259
|
-
metadata?: Record<string, unknown>;
|
|
260
|
-
}
|
|
261
|
-
interface DurableRunManifest {
|
|
262
|
-
/** Stable per-product id (e.g. 'legal-agent', 'creative-agent'). */
|
|
198
|
+
declare function deriveExecutionId(input: {
|
|
263
199
|
projectId: string;
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
/** Input payload. Hashed into the run identity so two runs with the same
|
|
268
|
-
* runId but different inputs raise DurableRunInputMismatchError. */
|
|
269
|
-
input: Record<string, unknown>;
|
|
270
|
-
/** Free-form tags surfaced into RunRecord / OTLP. */
|
|
271
|
-
tags?: Record<string, string>;
|
|
272
|
-
}
|
|
273
|
-
interface RunRecord {
|
|
274
|
-
runId: string;
|
|
275
|
-
manifestHash: string;
|
|
276
|
-
projectId: string;
|
|
277
|
-
scenarioId?: string;
|
|
278
|
-
status: RunStatus;
|
|
279
|
-
createdAt: string;
|
|
280
|
-
updatedAt: string;
|
|
281
|
-
completedAt?: string;
|
|
282
|
-
/** Stable per-worker id holding the lease. */
|
|
283
|
-
leaseHolderId?: string;
|
|
284
|
-
leaseExpiresAt?: string;
|
|
285
|
-
outcome?: RunOutcome;
|
|
286
|
-
stepCount: number;
|
|
287
|
-
}
|
|
288
|
-
/**
|
|
289
|
-
* The durable-run substrate. Implementations: in-memory (dev), file-system
|
|
290
|
-
* (eval harness), D1 (Cloudflare prod). All stores share this exact contract
|
|
291
|
-
* — swap by changing one factory call.
|
|
292
|
-
*
|
|
293
|
-
* Concurrency model: at most one worker holds a run's lease at a time. Lease
|
|
294
|
-
* renewal happens on a heartbeat; on lease expiry, another worker can
|
|
295
|
-
* `startOrResume` and pick up. Steps committed by the prior worker survive.
|
|
296
|
-
*/
|
|
297
|
-
interface DurableRunStore {
|
|
298
|
-
/**
|
|
299
|
-
* Begin or resume a run. Returns the canonical RunRecord, all previously
|
|
300
|
-
* completed steps (in order), and the lease deadline.
|
|
301
|
-
*
|
|
302
|
-
* If the run did not exist, creates it with status='running'. If it existed
|
|
303
|
-
* with a different manifest hash, throws DurableRunInputMismatchError.
|
|
304
|
-
* If it existed with a live lease held by a different worker, throws
|
|
305
|
-
* DurableRunLeaseHeldError (caller can retry or back off).
|
|
306
|
-
*/
|
|
307
|
-
startOrResume(input: {
|
|
308
|
-
runId: string;
|
|
309
|
-
manifest: DurableRunManifest;
|
|
310
|
-
workerId: string;
|
|
311
|
-
leaseMs?: number;
|
|
312
|
-
}): Promise<{
|
|
313
|
-
run: RunRecord;
|
|
314
|
-
completedSteps: ReadonlyArray<StepRecord>;
|
|
315
|
-
leaseExpiresAt: string;
|
|
316
|
-
}>;
|
|
317
|
-
/** Renew the lease. Returns false if another worker now holds it. */
|
|
318
|
-
renewLease(input: {
|
|
319
|
-
runId: string;
|
|
320
|
-
workerId: string;
|
|
321
|
-
leaseMs?: number;
|
|
322
|
-
}): Promise<{
|
|
323
|
-
ok: boolean;
|
|
324
|
-
leaseExpiresAt?: string;
|
|
325
|
-
}>;
|
|
326
|
-
/** Load a step by position. Returns undefined if not yet begun. */
|
|
327
|
-
loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
|
|
328
|
-
/** Record step start (intent + input hash + kind). Bumps attempt count. */
|
|
329
|
-
beginStep(input: {
|
|
330
|
-
runId: string;
|
|
331
|
-
stepIndex: number;
|
|
332
|
-
intent: string;
|
|
333
|
-
kind: StepKind;
|
|
334
|
-
inputHash: string;
|
|
335
|
-
}): Promise<StepRecord>;
|
|
336
|
-
/** Mark step completed with a JSON-serializable result. */
|
|
337
|
-
completeStep(input: {
|
|
338
|
-
runId: string;
|
|
339
|
-
stepIndex: number;
|
|
340
|
-
result: unknown;
|
|
341
|
-
}): Promise<StepRecord>;
|
|
342
|
-
/** Mark step failed with a captured error. */
|
|
343
|
-
failStep(input: {
|
|
344
|
-
runId: string;
|
|
345
|
-
stepIndex: number;
|
|
346
|
-
error: StepError;
|
|
347
|
-
}): Promise<StepRecord>;
|
|
348
|
-
/** End the run; releases lease. */
|
|
349
|
-
endRun(input: {
|
|
350
|
-
runId: string;
|
|
351
|
-
workerId: string;
|
|
352
|
-
status: 'completed' | 'failed';
|
|
353
|
-
outcome?: RunOutcome;
|
|
354
|
-
}): Promise<RunRecord>;
|
|
355
|
-
/**
|
|
356
|
-
* Emit an event. First emit wins; subsequent emits return the existing
|
|
357
|
-
* record under `existing` and accepted=false. Caller can treat that as
|
|
358
|
-
* idempotency-by-design — never double-fire a downstream side effect.
|
|
359
|
-
*/
|
|
360
|
-
emitEvent(input: {
|
|
361
|
-
runId: string;
|
|
362
|
-
key: string;
|
|
363
|
-
payload: unknown;
|
|
364
|
-
}): Promise<{
|
|
365
|
-
accepted: boolean;
|
|
366
|
-
record: EventRecord;
|
|
367
|
-
}>;
|
|
368
|
-
/** Load the cached event payload if it has been emitted. */
|
|
369
|
-
loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
|
|
370
|
-
/** Cleanup hook for in-memory / fs stores; no-op for D1. Idempotent. */
|
|
371
|
-
close(): Promise<void>;
|
|
372
|
-
}
|
|
373
|
-
/** Base class for durable-run errors. */
|
|
374
|
-
declare class DurableRunError extends Error {
|
|
375
|
-
readonly code: 'lease_held' | 'manifest_mismatch' | 'step_divergence' | 'step_input_mismatch' | 'await_event_timeout' | 'event_emit_race';
|
|
376
|
-
constructor(message: string, code: 'lease_held' | 'manifest_mismatch' | 'step_divergence' | 'step_input_mismatch' | 'await_event_timeout' | 'event_emit_race');
|
|
377
|
-
}
|
|
378
|
-
/** Thrown when another worker holds the lease for this runId. */
|
|
379
|
-
declare class DurableRunLeaseHeldError extends DurableRunError {
|
|
380
|
-
constructor(message: string);
|
|
381
|
-
}
|
|
382
|
-
/** Thrown when the manifest hash differs from a prior run with the same id. */
|
|
383
|
-
declare class DurableRunInputMismatchError extends DurableRunError {
|
|
384
|
-
constructor(message: string);
|
|
385
|
-
}
|
|
386
|
-
/** Thrown when the same stepIndex re-runs with a different intent string. */
|
|
387
|
-
declare class DurableRunDivergenceError extends DurableRunError {
|
|
388
|
-
constructor(message: string);
|
|
389
|
-
}
|
|
390
|
-
/** Thrown when `awaitEvent` times out. */
|
|
391
|
-
declare class DurableAwaitEventTimeoutError extends DurableRunError {
|
|
392
|
-
constructor(message: string);
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
/**
|
|
396
|
-
* `runDurableTurn` — a streaming, backend-agnostic, checkpoint+replay durable
|
|
397
|
-
* turn. The single reusable primitive every product's chat handler routes
|
|
398
|
-
* through, so per-product durability code drops to zero.
|
|
399
|
-
*
|
|
400
|
-
* A **turn** is one request→response unit: a producer yields a stream of
|
|
401
|
-
* events and, once drained, exposes the turn's final text. `runDurableTurn`
|
|
402
|
-
* wraps that with a `DurableRunStore`:
|
|
403
|
-
*
|
|
404
|
-
* - **Fresh run** — no completed step for this `(runId)`. The producer
|
|
405
|
-
* runs; its events forward live to the caller (streaming preserved)
|
|
406
|
-
* while final text accumulates; on drain the text is checkpointed.
|
|
407
|
-
*
|
|
408
|
-
* - **Replay** — a completed step already exists (the worker died after
|
|
409
|
-
* the turn finished but before the response reached the client, and the
|
|
410
|
-
* client retried the same turn). The cached text is emitted as a single
|
|
411
|
-
* synthetic event; the producer is never constructed — no LLM call, no
|
|
412
|
-
* double-billing.
|
|
413
|
-
*
|
|
414
|
-
* - **Mid-stream crash** — a turn that died *while streaming* leaves step 0
|
|
415
|
-
* in `running`/`failed`. There is no partial-stream checkpoint (the
|
|
416
|
-
* substrate checkpoints JSON values at step granularity), so the turn
|
|
417
|
-
* re-runs from the top. This is the honest durability ceiling: a
|
|
418
|
-
* *completed* turn is free to replay; an *interrupted* turn re-runs.
|
|
419
|
-
*
|
|
420
|
-
* Generic over the event type `TEvent` so a product can stream its own NDJSON
|
|
421
|
-
* shape or the runtime's `RuntimeStreamEvent` — `runDurableTurn` never
|
|
422
|
-
* inspects events, it only forwards them and reads `finalText()` after drain.
|
|
423
|
-
*
|
|
424
|
-
* Lease: a turn is a single step, fast enough that the heartbeat in
|
|
425
|
-
* `runDurable` is unnecessary — `runDurableTurn` claims the lease once via
|
|
426
|
-
* `startOrResume` and releases it on `endRun`. Concurrent workers on the same
|
|
427
|
-
* `runId` are rejected with `DurableRunLeaseHeldError` (the client retried
|
|
428
|
-
* before the first attempt finished); callers surface that as "turn already
|
|
429
|
-
* in flight."
|
|
430
|
-
*/
|
|
431
|
-
|
|
432
|
-
/** The live side of a turn — what a fresh run produces. */
|
|
433
|
-
interface DurableTurnProducer<TEvent> {
|
|
434
|
-
/** The turn's event stream. Forwarded verbatim to the caller. */
|
|
435
|
-
stream: AsyncGenerator<TEvent, void, unknown>;
|
|
436
|
-
/** The turn's final assistant text. Read once, after `stream` drains. */
|
|
437
|
-
finalText(): string;
|
|
438
|
-
}
|
|
439
|
-
interface RunDurableTurnOptions<TEvent> {
|
|
440
|
-
store: DurableRunStore;
|
|
441
|
-
/** Stable per-turn run id. Convention: `chat:<threadId>:<turnIndex>`. The
|
|
442
|
-
* same id on a retry is what enables replay. */
|
|
443
|
-
runId: string;
|
|
444
|
-
manifest: DurableRunManifest;
|
|
445
|
-
/** Stable per-isolate worker id. Defaults to a fresh `deriveWorkerId()`
|
|
446
|
-
* per call when omitted — fine for single-attempt turns. */
|
|
447
|
-
workerId: string;
|
|
448
|
-
/** Lease window in ms. Default 60_000 — a turn rarely runs longer. */
|
|
449
|
-
leaseMs?: number;
|
|
450
|
-
/** Human-readable step label. Default `turn`. */
|
|
451
|
-
intent?: string;
|
|
452
|
-
/** Builds the live producer. Called exactly once, on a fresh run; never
|
|
453
|
-
* called on the replay path. */
|
|
454
|
-
produce: () => DurableTurnProducer<TEvent>;
|
|
455
|
-
/** Synthesizes the single event emitted on the replay path from the
|
|
456
|
-
* cached final text (e.g. a product's `{ type: 'result', data: {...} }`). */
|
|
457
|
-
replayEvent: (finalText: string) => TEvent;
|
|
458
|
-
/** Optional live accumulator. When the producer's `finalText()` is only
|
|
459
|
-
* valid after drain, this lets `runDurableTurn` also observe each event
|
|
460
|
-
* to build the text — return the running text or `undefined` to ignore
|
|
461
|
-
* an event. When omitted, `producer.finalText()` is the sole source. */
|
|
462
|
-
accumulate?: (event: TEvent, current: string) => string | undefined;
|
|
463
|
-
}
|
|
464
|
-
interface DurableTurnHandle<TEvent> {
|
|
465
|
-
/** Drop-in stream. Fresh runs forward producer events live; replays emit
|
|
466
|
-
* exactly one `replayEvent(cachedText)`. */
|
|
467
|
-
stream: AsyncGenerator<TEvent, void, unknown>;
|
|
468
|
-
/** The turn's final text. Valid after `stream` drains. */
|
|
469
|
-
finalText(): string;
|
|
470
|
-
/** True iff this turn replayed a cached result (no producer ran). Valid
|
|
471
|
-
* after `stream` drains. */
|
|
472
|
-
replayed(): boolean;
|
|
473
|
-
/** The durable `RunRecord` for this turn. Valid after `stream` drains. */
|
|
474
|
-
record(): RunRecord | undefined;
|
|
475
|
-
}
|
|
476
|
-
declare function runDurableTurn<TEvent>(options: RunDurableTurnOptions<TEvent>): DurableTurnHandle<TEvent>;
|
|
200
|
+
sessionId: string;
|
|
201
|
+
turnIndex: number;
|
|
202
|
+
}): string;
|
|
477
203
|
|
|
478
204
|
/**
|
|
479
|
-
* `
|
|
480
|
-
*
|
|
481
|
-
*
|
|
482
|
-
* `
|
|
483
|
-
*
|
|
484
|
-
* product-specific is a hook the product supplies.
|
|
205
|
+
* `handleChatTurn` — framework-neutral chat-turn HTTP orchestrator.
|
|
206
|
+
* Owns the NDJSON `ChatStreamEvent` line protocol, the `session.run.*`
|
|
207
|
+
* lifecycle vocabulary, and the persist / post-process / trace-flush
|
|
208
|
+
* hook order. Returns a `ReadableStream` body the product hands to its
|
|
209
|
+
* platform `Response`.
|
|
485
210
|
*
|
|
486
|
-
*
|
|
487
|
-
*
|
|
488
|
-
*
|
|
489
|
-
*
|
|
490
|
-
* - NDJSON encoding into a `ReadableStream<Uint8Array>` (the body every
|
|
491
|
-
* product returns, React Router or Hono alike)
|
|
492
|
-
* - calling the product's persist / post-process hooks in the right order,
|
|
493
|
-
* after the stream drains, with the assembled final text
|
|
494
|
-
* - never throwing into the HTTP layer — a producer failure becomes an
|
|
495
|
-
* `error` + `session.run.failed` event pair, the stream still closes
|
|
211
|
+
* Execution durability is the substrate's concern: `box.streamPrompt`
|
|
212
|
+
* auto-reconnects in-call; cross-process reconnect via `X-Execution-ID`
|
|
213
|
+
* is the product's job. The producer this engine wraps already speaks
|
|
214
|
+
* that protocol — the engine just frames the events.
|
|
496
215
|
*
|
|
497
|
-
*
|
|
498
|
-
* - `produce`
|
|
499
|
-
*
|
|
500
|
-
* - `
|
|
501
|
-
* - `
|
|
502
|
-
* - `
|
|
503
|
-
* - `
|
|
216
|
+
* Hooks (`ChatTurnHooks`):
|
|
217
|
+
* - `produce` — build the backend event stream
|
|
218
|
+
* - `persistAssistantMessage` — write the assistant turn to the product DB
|
|
219
|
+
* - `onTurnComplete?` — post-process (proposals, citations, …)
|
|
220
|
+
* - `onEvent?` — per-event side channel (e.g. DO broadcast)
|
|
221
|
+
* - `transformFinalText?` — pre-persist transform (e.g. PII redact)
|
|
222
|
+
* - `traceFlush?` — handed to waitUntil so OTLP export lands
|
|
504
223
|
*
|
|
505
|
-
* Framework neutrality:
|
|
506
|
-
*
|
|
507
|
-
*
|
|
508
|
-
*
|
|
509
|
-
* and returns `result.body` as its platform `Response`.
|
|
224
|
+
* Framework neutrality: takes already-resolved values (`identity` tuple,
|
|
225
|
+
* a `waitUntil`), never a `Request` or a `Context`. The product's thin
|
|
226
|
+
* route adapter does auth + parse + access-control, then calls
|
|
227
|
+
* `handleChatTurn(...)` and returns `result.body` as its platform `Response`.
|
|
510
228
|
*/
|
|
511
|
-
|
|
512
229
|
/** The NDJSON line protocol every product chat client already speaks. */
|
|
513
230
|
interface ChatStreamEvent {
|
|
514
231
|
type: string;
|
|
@@ -518,76 +235,56 @@ interface ChatStreamEvent {
|
|
|
518
235
|
* scoped products and the user id for session-scoped products. */
|
|
519
236
|
interface ChatTurnIdentity {
|
|
520
237
|
tenantId: string;
|
|
521
|
-
/** Thread / session id
|
|
238
|
+
/** Thread / session id. */
|
|
522
239
|
sessionId: string;
|
|
523
240
|
userId: string;
|
|
524
241
|
/** Monotonic 0-based turn index within the session. */
|
|
525
242
|
turnIndex: number;
|
|
526
243
|
}
|
|
244
|
+
/** The live side of a turn — what the product's `produce` hook returns. */
|
|
245
|
+
interface ChatTurnProducer<TEvent extends ChatStreamEvent = ChatStreamEvent> {
|
|
246
|
+
/** The turn's event stream. Forwarded verbatim to the caller. */
|
|
247
|
+
stream: AsyncGenerator<TEvent, void, unknown>;
|
|
248
|
+
/** The turn's final assistant text. Read once, after `stream` drains. */
|
|
249
|
+
finalText(): string;
|
|
250
|
+
}
|
|
527
251
|
interface ChatTurnHooks {
|
|
528
|
-
/**
|
|
529
|
-
*
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
*/
|
|
533
|
-
produce(): DurableTurnProducer<ChatStreamEvent>;
|
|
534
|
-
/**
|
|
535
|
-
* Persist the completed assistant message to the product's own store.
|
|
536
|
-
* Called once, after the stream drains, on a fresh (non-replay) run.
|
|
537
|
-
* Receives the assembled (and `transformFinalText`-transformed) text.
|
|
538
|
-
*/
|
|
252
|
+
/** Build the backend stream. The engine forwards events verbatim and
|
|
253
|
+
* reads `finalText()` once the stream drains. */
|
|
254
|
+
produce(): ChatTurnProducer;
|
|
255
|
+
/** Persist the assistant message to the product's own store. Called
|
|
256
|
+
* once, after drain, with the assembled (transform-applied) text. */
|
|
539
257
|
persistAssistantMessage(input: {
|
|
540
258
|
identity: ChatTurnIdentity;
|
|
541
259
|
finalText: string;
|
|
542
|
-
record: RunRecord | undefined;
|
|
543
260
|
}): Promise<void>;
|
|
544
|
-
/**
|
|
545
|
-
*
|
|
546
|
-
*
|
|
547
|
-
* has no shared logic here. Errors are swallowed + logged (post-process
|
|
548
|
-
* must never fail the turn that already streamed successfully).
|
|
549
|
-
*/
|
|
261
|
+
/** Optional post-processing (proposals, citations, credit metering …).
|
|
262
|
+
* Errors are swallowed + logged — post-process must never fail a turn
|
|
263
|
+
* that already streamed successfully. */
|
|
550
264
|
onTurnComplete?(input: {
|
|
551
265
|
identity: ChatTurnIdentity;
|
|
552
266
|
finalText: string;
|
|
553
267
|
}): Promise<void>;
|
|
554
|
-
/**
|
|
555
|
-
*
|
|
556
|
-
*
|
|
557
|
-
* are swallowed — a broadcast failure must not break the chat stream.
|
|
558
|
-
*/
|
|
268
|
+
/** Optional per-event side channel (e.g. DO broadcast). Runs for every
|
|
269
|
+
* emitted event, lifecycle envelope included. Errors swallowed — a
|
|
270
|
+
* broadcast failure must not break the chat stream. */
|
|
559
271
|
onEvent?(event: ChatStreamEvent): void | Promise<void>;
|
|
560
|
-
/**
|
|
561
|
-
*
|
|
562
|
-
*
|
|
563
|
-
*/
|
|
272
|
+
/** Optional pre-persist transform of the final text (e.g. PII
|
|
273
|
+
* redaction). Affects only what is persisted; the live stream is
|
|
274
|
+
* never altered. */
|
|
564
275
|
transformFinalText?(text: string): string | Promise<string>;
|
|
565
|
-
/**
|
|
566
|
-
*
|
|
567
|
-
* hands it to `waitUntil` so the worker isolate stays alive for the POST.
|
|
568
|
-
*/
|
|
276
|
+
/** Optional trace flush — resolves when OTLP export completes. Handed
|
|
277
|
+
* to `waitUntil` so the worker isolate stays alive for the POST. */
|
|
569
278
|
traceFlush?(): Promise<void>;
|
|
570
279
|
}
|
|
571
280
|
interface RunChatTurnInput {
|
|
572
281
|
identity: ChatTurnIdentity;
|
|
573
|
-
/** The user's message for this turn. Hashed into the durable run identity. */
|
|
574
|
-
userMessage: string;
|
|
575
|
-
/** Product id for telemetry / the durable manifest (`legal-agent`, …). */
|
|
576
|
-
projectId: string;
|
|
577
|
-
/** Domain tag for the task spec (`legal`, `gtm`, …). */
|
|
578
|
-
domain: string;
|
|
579
|
-
/** Model id, when known — recorded on the manifest. */
|
|
580
|
-
model?: string;
|
|
581
|
-
store: DurableRunStore;
|
|
582
282
|
hooks: ChatTurnHooks;
|
|
583
|
-
/** Worker liveness hook
|
|
584
|
-
*
|
|
283
|
+
/** Worker liveness hook. When omitted, trace flush is awaited inline
|
|
284
|
+
* before the stream closes. */
|
|
585
285
|
waitUntil?: (p: Promise<unknown>) => void;
|
|
586
|
-
/**
|
|
587
|
-
|
|
588
|
-
/** Lease window in ms. Default 60_000. */
|
|
589
|
-
leaseMs?: number;
|
|
590
|
-
/** Optional structured logger for swallowed hook errors. */
|
|
286
|
+
/** Structured logger for swallowed hook errors. Defaults to
|
|
287
|
+
* `console.error` so failures surface without product wiring. */
|
|
591
288
|
log?: (message: string, meta?: Record<string, unknown>) => void;
|
|
592
289
|
}
|
|
593
290
|
interface ChatTurnResult {
|
|
@@ -597,496 +294,11 @@ interface ChatTurnResult {
|
|
|
597
294
|
contentType: 'application/x-ndjson';
|
|
598
295
|
}
|
|
599
296
|
/**
|
|
600
|
-
*
|
|
601
|
-
*
|
|
602
|
-
|
|
603
|
-
declare class DurableChatTurnEngine {
|
|
604
|
-
/**
|
|
605
|
-
* Run one durable chat turn. Returns immediately with a `ReadableStream`
|
|
606
|
-
* body; the turn executes as the body is pulled. Never rejects — backend
|
|
607
|
-
* failures surface as `error` + `session.run.failed` events.
|
|
608
|
-
*/
|
|
609
|
-
runTurn(input: RunChatTurnInput): ChatTurnResult;
|
|
610
|
-
}
|
|
611
|
-
/** Convenience singleton — the engine is stateless, one instance is enough. */
|
|
612
|
-
declare const durableChatTurnEngine: DurableChatTurnEngine;
|
|
613
|
-
|
|
614
|
-
/**
|
|
615
|
-
* D1DurableRunStore — the production path for Cloudflare Workers. Backed by
|
|
616
|
-
* a D1 (SQLite-compatible) database via the binding the worker already holds.
|
|
617
|
-
*
|
|
618
|
-
* Apply `./schema.sql` once before use; the store itself does not run DDL.
|
|
619
|
-
* Migration version is recorded in `durable_schema_info`; consumers can
|
|
620
|
-
* inspect `getSchemaVersion()` if they ship a migration tool.
|
|
621
|
-
*
|
|
622
|
-
* Why structural typing: agent-runtime stays Cloudflare-free at the dep
|
|
623
|
-
* level. Consumers pass their `D1Database` binding — TypeScript matches the
|
|
624
|
-
* minimal `D1DatabaseLike` surface below. Tests use the same interface with
|
|
625
|
-
* a fake.
|
|
626
|
-
*/
|
|
627
|
-
|
|
628
|
-
/**
|
|
629
|
-
* Minimal D1 surface this store uses. Compatible with Cloudflare's
|
|
630
|
-
* `D1Database` from `@cloudflare/workers-types`. Defined locally so
|
|
631
|
-
* agent-runtime does not depend on workers-types at the package level.
|
|
632
|
-
*/
|
|
633
|
-
interface D1DatabaseLike {
|
|
634
|
-
prepare(query: string): D1PreparedStatementLike;
|
|
635
|
-
batch(statements: D1PreparedStatementLike[]): Promise<unknown[]>;
|
|
636
|
-
}
|
|
637
|
-
interface D1PreparedStatementLike {
|
|
638
|
-
bind(...values: unknown[]): D1PreparedStatementLike;
|
|
639
|
-
first<T = unknown>(): Promise<T | null>;
|
|
640
|
-
all<T = unknown>(): Promise<{
|
|
641
|
-
results: T[];
|
|
642
|
-
}>;
|
|
643
|
-
run(): Promise<{
|
|
644
|
-
success: boolean;
|
|
645
|
-
meta?: {
|
|
646
|
-
changes?: number;
|
|
647
|
-
};
|
|
648
|
-
}>;
|
|
649
|
-
}
|
|
650
|
-
declare class D1DurableRunStore implements DurableRunStore {
|
|
651
|
-
private readonly db;
|
|
652
|
-
constructor(db: D1DatabaseLike);
|
|
653
|
-
/** Override for tests — defaults to Date.now(). */
|
|
654
|
-
now: () => number;
|
|
655
|
-
startOrResume(input: {
|
|
656
|
-
runId: string;
|
|
657
|
-
manifest: DurableRunManifest;
|
|
658
|
-
workerId: string;
|
|
659
|
-
leaseMs?: number;
|
|
660
|
-
}): ReturnType<DurableRunStore['startOrResume']>;
|
|
661
|
-
renewLease(input: {
|
|
662
|
-
runId: string;
|
|
663
|
-
workerId: string;
|
|
664
|
-
leaseMs?: number;
|
|
665
|
-
}): Promise<{
|
|
666
|
-
ok: boolean;
|
|
667
|
-
leaseExpiresAt?: string;
|
|
668
|
-
}>;
|
|
669
|
-
loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
|
|
670
|
-
beginStep(input: {
|
|
671
|
-
runId: string;
|
|
672
|
-
stepIndex: number;
|
|
673
|
-
intent: string;
|
|
674
|
-
kind: StepKind;
|
|
675
|
-
inputHash: string;
|
|
676
|
-
}): Promise<StepRecord>;
|
|
677
|
-
completeStep(input: {
|
|
678
|
-
runId: string;
|
|
679
|
-
stepIndex: number;
|
|
680
|
-
result: unknown;
|
|
681
|
-
}): Promise<StepRecord>;
|
|
682
|
-
failStep(input: {
|
|
683
|
-
runId: string;
|
|
684
|
-
stepIndex: number;
|
|
685
|
-
error: StepError;
|
|
686
|
-
}): Promise<StepRecord>;
|
|
687
|
-
endRun(input: {
|
|
688
|
-
runId: string;
|
|
689
|
-
workerId: string;
|
|
690
|
-
status: 'completed' | 'failed';
|
|
691
|
-
outcome?: RunOutcome;
|
|
692
|
-
}): Promise<RunRecord>;
|
|
693
|
-
emitEvent(input: {
|
|
694
|
-
runId: string;
|
|
695
|
-
key: string;
|
|
696
|
-
payload: unknown;
|
|
697
|
-
}): ReturnType<DurableRunStore['emitEvent']>;
|
|
698
|
-
loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
|
|
699
|
-
close(): Promise<void>;
|
|
700
|
-
/** Inspect the currently-applied schema version. */
|
|
701
|
-
getSchemaVersion(): Promise<number | undefined>;
|
|
702
|
-
private readSteps;
|
|
703
|
-
private bumpUpdated;
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
/**
|
|
707
|
-
* FileSystemDurableRunStore — durable-run substrate backed by a directory
|
|
708
|
-
* tree under a single root. One subdir per run:
|
|
709
|
-
*
|
|
710
|
-
* <root>/<runId>/
|
|
711
|
-
* run.json — RunRecord (rewritten on every mutation; the only
|
|
712
|
-
* scalar fields are status/lease, so this stays small)
|
|
713
|
-
* steps.jsonl — append-only StepRecord stream; one JSON per line
|
|
714
|
-
* events.jsonl — append-only EventRecord stream
|
|
715
|
-
* lease.json — current leaseholder + deadline (separate from
|
|
716
|
-
* run.json so renewLease writes one tiny file
|
|
717
|
-
* instead of round-tripping the whole run record)
|
|
718
|
-
*
|
|
719
|
-
* Concurrency: the eval harness is single-process — we rely on Node's
|
|
720
|
-
* append-mode semantics for atomicity of step / event writes (single-line
|
|
721
|
-
* writes < PIPE_BUF are atomic on POSIX). For run.json / lease.json we write
|
|
722
|
-
* to a `<file>.tmp` then `rename` to make replacement atomic. This is
|
|
723
|
-
* sufficient for the single-process eval harness use case. Multi-process
|
|
724
|
-
* concurrency on the SAME filesystem requires a flock-based extension;
|
|
725
|
-
* for that path use D1DurableRunStore.
|
|
726
|
-
*/
|
|
727
|
-
|
|
728
|
-
declare class FileSystemDurableRunStore implements DurableRunStore {
|
|
729
|
-
private readonly root;
|
|
730
|
-
constructor(root: string);
|
|
731
|
-
/** Override for tests — defaults to Date.now(). */
|
|
732
|
-
now: () => number;
|
|
733
|
-
startOrResume(input: {
|
|
734
|
-
runId: string;
|
|
735
|
-
manifest: DurableRunManifest;
|
|
736
|
-
workerId: string;
|
|
737
|
-
leaseMs?: number;
|
|
738
|
-
}): ReturnType<DurableRunStore['startOrResume']>;
|
|
739
|
-
renewLease(input: {
|
|
740
|
-
runId: string;
|
|
741
|
-
workerId: string;
|
|
742
|
-
leaseMs?: number;
|
|
743
|
-
}): Promise<{
|
|
744
|
-
ok: boolean;
|
|
745
|
-
leaseExpiresAt?: string;
|
|
746
|
-
}>;
|
|
747
|
-
loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
|
|
748
|
-
beginStep(input: {
|
|
749
|
-
runId: string;
|
|
750
|
-
stepIndex: number;
|
|
751
|
-
intent: string;
|
|
752
|
-
kind: StepKind;
|
|
753
|
-
inputHash: string;
|
|
754
|
-
}): Promise<StepRecord>;
|
|
755
|
-
completeStep(input: {
|
|
756
|
-
runId: string;
|
|
757
|
-
stepIndex: number;
|
|
758
|
-
result: unknown;
|
|
759
|
-
}): Promise<StepRecord>;
|
|
760
|
-
failStep(input: {
|
|
761
|
-
runId: string;
|
|
762
|
-
stepIndex: number;
|
|
763
|
-
error: StepError;
|
|
764
|
-
}): Promise<StepRecord>;
|
|
765
|
-
endRun(input: {
|
|
766
|
-
runId: string;
|
|
767
|
-
workerId: string;
|
|
768
|
-
status: 'completed' | 'failed';
|
|
769
|
-
outcome?: RunOutcome;
|
|
770
|
-
}): Promise<RunRecord>;
|
|
771
|
-
emitEvent(input: {
|
|
772
|
-
runId: string;
|
|
773
|
-
key: string;
|
|
774
|
-
payload: unknown;
|
|
775
|
-
}): ReturnType<DurableRunStore['emitEvent']>;
|
|
776
|
-
loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
|
|
777
|
-
close(): Promise<void>;
|
|
778
|
-
/** @internal — used by tests to list runs in the store. */
|
|
779
|
-
_listRunIds(): Promise<string[]>;
|
|
780
|
-
private runDir;
|
|
781
|
-
private readRun;
|
|
782
|
-
private writeRun;
|
|
783
|
-
private readLeaseSafe;
|
|
784
|
-
private writeLease;
|
|
785
|
-
private readSteps;
|
|
786
|
-
private appendStep;
|
|
787
|
-
private bumpRunUpdated;
|
|
788
|
-
}
|
|
789
|
-
|
|
790
|
-
/**
|
|
791
|
-
* Identity + canonical-hash helpers for the durable-runs substrate.
|
|
792
|
-
*
|
|
793
|
-
* Two boundary disciplines:
|
|
794
|
-
*
|
|
795
|
-
* 1. **Manifest hash** — sha256 over a sorted-key JSON of (projectId,
|
|
796
|
-
* scenarioId, task.id, task.intent, task.domain, input). Same hash =
|
|
797
|
-
* same run identity. Used to detect "same runId, different inputs."
|
|
798
|
-
*
|
|
799
|
-
* 2. **Step input hash** — sha256 over a sorted-key JSON of the step's
|
|
800
|
-
* input fingerprint. Used to detect drift across replays.
|
|
801
|
-
*
|
|
802
|
-
* Sorted-key JSON makes hashes deterministic regardless of object insertion
|
|
803
|
-
* order. NaN / Infinity / undefined / functions / symbols / class instances
|
|
804
|
-
* are rejected — pure JSON only at the boundary, so the hash matches whatever
|
|
805
|
-
* the store round-trips.
|
|
297
|
+
* Run one chat turn. Returns immediately with a `ReadableStream` body;
|
|
298
|
+
* the turn executes as the body is pulled. Never rejects — backend
|
|
299
|
+
* failures surface as `error` + `session.run.failed` events.
|
|
806
300
|
*/
|
|
807
|
-
|
|
808
|
-
/** sha256-hex over a JSON-canonicalized value. */
|
|
809
|
-
declare function canonicalHash(value: unknown): string;
|
|
810
|
-
/** Canonical JSON: object keys sorted lexicographically; arrays preserved. */
|
|
811
|
-
declare function canonicalJson(value: unknown): string;
|
|
812
|
-
/** Hash a DurableRunManifest into the run identity component. */
|
|
813
|
-
declare function manifestHash(manifest: DurableRunManifest): string;
|
|
814
|
-
/** Stable per-step identifier — hash of (runId, position, intent). */
|
|
815
|
-
declare function stepId(runId: string, stepIndex: number, intent: string): string;
|
|
816
|
-
/**
|
|
817
|
-
* Stable worker id for a single process. Format: `host:pid:rand`. Random
|
|
818
|
-
* suffix prevents collisions when the host/pid pair is short-lived (e.g.,
|
|
819
|
-
* Cloudflare isolates that recycle fast).
|
|
820
|
-
*/
|
|
821
|
-
declare function deriveWorkerId(): string;
|
|
822
|
-
|
|
823
|
-
/**
|
|
824
|
-
* In-memory DurableRunStore for dev + tests. Single-process. All state lives
|
|
825
|
-
* in maps. Lease enforcement is real (Date.now() vs lease deadline) so the
|
|
826
|
-
* crash-recovery + multi-worker race tests run identically against this and
|
|
827
|
-
* the file-system / D1 stores.
|
|
828
|
-
*/
|
|
829
|
-
|
|
830
|
-
declare class InMemoryDurableRunStore implements DurableRunStore {
|
|
831
|
-
private readonly runs;
|
|
832
|
-
/** Override for tests — defaults to Date.now(). */
|
|
833
|
-
now: () => number;
|
|
834
|
-
startOrResume(input: {
|
|
835
|
-
runId: string;
|
|
836
|
-
manifest: DurableRunManifest;
|
|
837
|
-
workerId: string;
|
|
838
|
-
leaseMs?: number;
|
|
839
|
-
}): ReturnType<DurableRunStore['startOrResume']>;
|
|
840
|
-
renewLease(input: {
|
|
841
|
-
runId: string;
|
|
842
|
-
workerId: string;
|
|
843
|
-
leaseMs?: number;
|
|
844
|
-
}): Promise<{
|
|
845
|
-
ok: boolean;
|
|
846
|
-
leaseExpiresAt?: string;
|
|
847
|
-
}>;
|
|
848
|
-
loadStep(runId: string, stepIndex: number): Promise<StepRecord | undefined>;
|
|
849
|
-
beginStep(input: {
|
|
850
|
-
runId: string;
|
|
851
|
-
stepIndex: number;
|
|
852
|
-
intent: string;
|
|
853
|
-
kind: StepKind;
|
|
854
|
-
inputHash: string;
|
|
855
|
-
}): Promise<StepRecord>;
|
|
856
|
-
completeStep(input: {
|
|
857
|
-
runId: string;
|
|
858
|
-
stepIndex: number;
|
|
859
|
-
result: unknown;
|
|
860
|
-
}): Promise<StepRecord>;
|
|
861
|
-
failStep(input: {
|
|
862
|
-
runId: string;
|
|
863
|
-
stepIndex: number;
|
|
864
|
-
error: StepError;
|
|
865
|
-
}): Promise<StepRecord>;
|
|
866
|
-
endRun(input: {
|
|
867
|
-
runId: string;
|
|
868
|
-
workerId: string;
|
|
869
|
-
status: 'completed' | 'failed';
|
|
870
|
-
outcome?: RunOutcome;
|
|
871
|
-
}): Promise<RunRecord>;
|
|
872
|
-
emitEvent(input: {
|
|
873
|
-
runId: string;
|
|
874
|
-
key: string;
|
|
875
|
-
payload: unknown;
|
|
876
|
-
}): ReturnType<DurableRunStore['emitEvent']>;
|
|
877
|
-
loadEvent(runId: string, key: string): Promise<EventRecord | undefined>;
|
|
878
|
-
close(): Promise<void>;
|
|
879
|
-
/** @internal — used by tests to inspect lease metadata. */
|
|
880
|
-
_inspect(runId: string): RunRecord | undefined;
|
|
881
|
-
/** @internal — used by tests to simulate lease expiry. */
|
|
882
|
-
_expireLease(runId: string): void;
|
|
883
|
-
private requireRun;
|
|
884
|
-
}
|
|
885
|
-
|
|
886
|
-
/**
|
|
887
|
-
* Durable runner — wraps a user-supplied async function in checkpoint /
|
|
888
|
-
* resume / lease semantics. The user writes plain async code, awaiting
|
|
889
|
-
* `ctx.step(intent, fn)` boundaries. On worker crash, the next caller with
|
|
890
|
-
* the same `runId` skips completed steps and resumes from the first unfinished
|
|
891
|
-
* one.
|
|
892
|
-
*
|
|
893
|
-
* Invariants:
|
|
894
|
-
*
|
|
895
|
-
* - Step positions are derived from a monotonic counter on the ctx. The
|
|
896
|
-
* same intent at position N is the same step across replays. If the user
|
|
897
|
-
* reorders steps, position N changes intent and we raise
|
|
898
|
-
* DurableRunDivergenceError fail-loud.
|
|
899
|
-
*
|
|
900
|
-
* - `ctx.now()` and `ctx.uuid()` are checkpointed as zero-input logic steps
|
|
901
|
-
* with kind='deterministic'. On replay they return the recorded value.
|
|
902
|
-
*
|
|
903
|
-
* - `awaitEvent` writes a 'event' step that records the event payload on
|
|
904
|
-
* first awaited completion. On replay, the cached payload returns
|
|
905
|
-
* synchronously. If the event has not been emitted and the runner is in
|
|
906
|
-
* a fresh execution, it polls the store until timeout.
|
|
907
|
-
*
|
|
908
|
-
* - Lease renewal happens on a wall-clock interval (every leaseMs/3). If
|
|
909
|
-
* the store reports a lost lease, the runner aborts the current step
|
|
910
|
-
* execution and throws — letting whichever worker holds the lease pick
|
|
911
|
-
* up. Committed steps survive.
|
|
912
|
-
*/
|
|
913
|
-
|
|
914
|
-
interface DurableContext {
|
|
915
|
-
readonly runId: string;
|
|
916
|
-
readonly projectId: string;
|
|
917
|
-
readonly scenarioId?: string;
|
|
918
|
-
/**
|
|
919
|
-
* Execute a checkpointed step. The step is identified by its **position**
|
|
920
|
-
* (monotonic counter on this ctx); `intent` is a human-readable label that
|
|
921
|
-
* must stay stable across replays.
|
|
922
|
-
*
|
|
923
|
-
* On first execution: runs `fn`, records the result, returns it.
|
|
924
|
-
* On replay: returns the recorded result WITHOUT calling `fn`.
|
|
925
|
-
*
|
|
926
|
-
* The `inputFingerprint` (optional) lets the runner detect "same intent,
|
|
927
|
-
* different inputs" — it gets hashed and compared. If you don't supply
|
|
928
|
-
* one, drift is allowed (input not checked).
|
|
929
|
-
*/
|
|
930
|
-
step<T>(intent: string, fn: () => Promise<T>, opts?: {
|
|
931
|
-
kind?: StepKind;
|
|
932
|
-
inputFingerprint?: unknown;
|
|
933
|
-
}): Promise<T>;
|
|
934
|
-
/** Race-free first-emit-wins event wait. */
|
|
935
|
-
awaitEvent<T = unknown>(key: string, opts?: {
|
|
936
|
-
timeoutMs?: number;
|
|
937
|
-
pollMs?: number;
|
|
938
|
-
}): Promise<T>;
|
|
939
|
-
/** Emit an event. First emit wins. Subsequent emits no-op. */
|
|
940
|
-
emitEvent(key: string, payload: unknown): Promise<{
|
|
941
|
-
accepted: boolean;
|
|
942
|
-
}>;
|
|
943
|
-
/** Deterministic clock — checkpointed once per call. */
|
|
944
|
-
now(): Promise<Date>;
|
|
945
|
-
/** Deterministic uuid — checkpointed once per call. */
|
|
946
|
-
uuid(): Promise<string>;
|
|
947
|
-
}
|
|
948
|
-
interface RunDurableInput<TResult> {
|
|
949
|
-
runId: string;
|
|
950
|
-
manifest: DurableRunManifest;
|
|
951
|
-
store: DurableRunStore;
|
|
952
|
-
workerId?: string;
|
|
953
|
-
leaseMs?: number;
|
|
954
|
-
/** Total time budget for the run. Used for awaitEvent timeouts; runner
|
|
955
|
-
* itself doesn't kill long-running steps (the step fn must respect
|
|
956
|
-
* AbortSignal if it cares). */
|
|
957
|
-
signal?: AbortSignal;
|
|
958
|
-
taskFn: (ctx: DurableContext) => Promise<TResult>;
|
|
959
|
-
/** Default outcome on successful completion. */
|
|
960
|
-
defaultOutcome?: RunOutcome;
|
|
961
|
-
}
|
|
962
|
-
interface RunDurableResult<TResult> {
|
|
963
|
-
result: TResult;
|
|
964
|
-
record: RunRecord;
|
|
965
|
-
/** All steps captured this run (replayed + freshly executed). */
|
|
966
|
-
steps: ReadonlyArray<StepRecord>;
|
|
967
|
-
}
|
|
968
|
-
declare function runDurable<TResult>(input: RunDurableInput<TResult>): Promise<RunDurableResult<TResult>>;
|
|
969
|
-
|
|
970
|
-
/**
|
|
971
|
-
* The durable-runs SQL schema as a string constant. Inlined so consumers
|
|
972
|
-
* (Cloudflare Workers via D1) can apply it without bundling a `.sql` file:
|
|
973
|
-
*
|
|
974
|
-
* import { DURABLE_SCHEMA_SQL } from '@tangle-network/agent-runtime'
|
|
975
|
-
* await env.DB.exec(DURABLE_SCHEMA_SQL)
|
|
976
|
-
*
|
|
977
|
-
* The canonical source is `src/durable/schema.sql` — this string MUST stay
|
|
978
|
-
* byte-identical to it. The build does not copy `.sql` files into `dist/`,
|
|
979
|
-
* so the constant is the only path consumers have. A unit test asserts the
|
|
980
|
-
* two stay in sync (`durable-schema.test.ts`).
|
|
981
|
-
*
|
|
982
|
-
* `DURABLE_SCHEMA_VERSION` reflects the latest migration version applied by
|
|
983
|
-
* this string. Bump it on every backwards-incompatible change AND add a new
|
|
984
|
-
* migration entry to durable_schema_info instead of mutating prior rows.
|
|
985
|
-
*/
|
|
986
|
-
declare const DURABLE_SCHEMA_VERSION = 1;
|
|
987
|
-
declare const DURABLE_SCHEMA_SQL = "-- Durable-run substrate \u2014 versioned schema for D1 / SQLite.\n--\n-- Apply once per database. Subsequent migrations append; never rewrite a\n-- prior version. See `durable_schema_info` for the migration trail.\n--\n-- Concurrency notes for D1:\n-- - SQLite supports UNIQUE constraints for first-emit-wins (`durable_events`\n-- PK is (run_id, key) \u2014 duplicate insert raises, caller treats as \"already\n-- emitted\").\n-- - Lease takeover happens via a conditional UPDATE: we only claim the lease\n-- if (lease_holder_id IS NULL OR lease_expires_at < :now) \u2014 atomic under\n-- SQLite's row-level locking.\n-- - All timestamps stored as ISO-8601 TEXT for cross-platform consistency.\n-- - `result_json` / `error_json` / `outcome_json` / `payload_json` are\n-- JSON-encoded TEXT; the application enforces canonical-JSON discipline at\n-- the boundary so the store stays type-agnostic.\n\nCREATE TABLE IF NOT EXISTS durable_schema_info (\n version INTEGER PRIMARY KEY,\n applied_at TEXT NOT NULL\n);\n\nCREATE TABLE IF NOT EXISTS durable_runs (\n run_id TEXT PRIMARY KEY,\n manifest_hash TEXT NOT NULL,\n project_id TEXT NOT NULL,\n scenario_id TEXT,\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed','suspended')),\n created_at TEXT NOT NULL,\n updated_at TEXT NOT NULL,\n completed_at TEXT,\n lease_holder_id TEXT,\n lease_expires_at TEXT,\n outcome_json TEXT,\n step_count INTEGER NOT NULL DEFAULT 0\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_runs_project_status ON durable_runs(project_id, status);\nCREATE INDEX IF NOT EXISTS idx_durable_runs_lease_expires ON durable_runs(lease_expires_at);\n\nCREATE TABLE IF NOT EXISTS durable_steps (\n run_id TEXT NOT NULL,\n step_index INTEGER NOT NULL,\n intent TEXT NOT NULL,\n kind TEXT NOT NULL,\n input_hash TEXT NOT NULL DEFAULT '',\n status TEXT NOT NULL CHECK (status IN ('pending','running','completed','failed')),\n attempts INTEGER NOT NULL DEFAULT 0,\n result_json TEXT,\n error_json TEXT,\n started_at TEXT,\n completed_at TEXT,\n PRIMARY KEY (run_id, step_index)\n);\n\nCREATE INDEX IF NOT EXISTS idx_durable_steps_status ON durable_steps(run_id, status);\n\nCREATE TABLE IF NOT EXISTS durable_events (\n run_id TEXT NOT NULL,\n key TEXT NOT NULL,\n payload_json TEXT,\n emitted_at TEXT NOT NULL,\n PRIMARY KEY (run_id, key)\n);\n\nINSERT OR IGNORE INTO durable_schema_info (version, applied_at)\nVALUES (1, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));\n";
|
|
988
|
-
|
|
989
|
-
/**
|
|
990
|
-
* Cloudflare Workflows integration for the durable-run substrate.
|
|
991
|
-
*
|
|
992
|
-
* Two valid deployment patterns on Cloudflare:
|
|
993
|
-
*
|
|
994
|
-
* A. **Plain Worker + D1DurableRunStore.** Each request invokes
|
|
995
|
-
* `runDurable(...)` directly against a D1 binding. Survives worker
|
|
996
|
-
* isolate restarts; lease takeover happens via D1 row-level
|
|
997
|
-
* conditional UPDATE. The default path; no Workflows binding needed.
|
|
998
|
-
*
|
|
999
|
-
* B. **Cloudflare Workflows entrypoint.** Wrap an entire `runDurable(...)`
|
|
1000
|
-
* call inside a single Workflow `step.do(...)`. Workflows gives you
|
|
1001
|
-
* retry-on-throw with platform-managed exponential backoff and
|
|
1002
|
-
* survives full Workers deploy rolls. Use it when the task can take
|
|
1003
|
-
* minutes to hours, or when you want the Workflows dashboard for
|
|
1004
|
-
* observability. Inside the step, `runDurable` still uses D1 for
|
|
1005
|
-
* step-level checkpoints — so a half-completed run resumes from
|
|
1006
|
-
* its last checkpoint on retry rather than restarting from scratch.
|
|
1007
|
-
*
|
|
1008
|
-
* This module provides the surface for pattern B: a thin helper that
|
|
1009
|
-
* converts a Workflows `WorkflowStep` into a `DurableContext`. We do not
|
|
1010
|
-
* take a runtime dep on `cloudflare:workers` — the integration is purely
|
|
1011
|
-
* structural typing.
|
|
1012
|
-
*
|
|
1013
|
-
* Example (pattern B):
|
|
1014
|
-
*
|
|
1015
|
-
* import { WorkflowEntrypoint } from 'cloudflare:workers'
|
|
1016
|
-
* import { runOnWorkflowStep } from '@tangle-network/agent-runtime'
|
|
1017
|
-
*
|
|
1018
|
-
* export class LegalChatWorkflow extends WorkflowEntrypoint<Env, ChatParams> {
|
|
1019
|
-
* async run(event, step) {
|
|
1020
|
-
* return runOnWorkflowStep(step, {
|
|
1021
|
-
* workflowName: 'legal-chat',
|
|
1022
|
-
* taskFn: async (ctx) => {
|
|
1023
|
-
* const ready = await ctx.step('readiness', () => probeKnowledge(...))
|
|
1024
|
-
* const answer = await ctx.step('llm:turn-1', () => callLlm(...))
|
|
1025
|
-
* const shipped = await ctx.awaitEvent('shipped', { timeoutMs: 5 * 60_000 })
|
|
1026
|
-
* return { answer, shipped }
|
|
1027
|
-
* },
|
|
1028
|
-
* })
|
|
1029
|
-
* }
|
|
1030
|
-
* }
|
|
1031
|
-
*
|
|
1032
|
-
* Step ordering, replay semantics, and divergence detection inside the
|
|
1033
|
-
* `taskFn` are inherited from Cloudflare's Workflows engine — we
|
|
1034
|
-
* intentionally do NOT layer a second durable store inside this path.
|
|
1035
|
-
* Pick pattern A or pattern B per agent; do not mix.
|
|
1036
|
-
*/
|
|
1037
|
-
|
|
1038
|
-
/**
|
|
1039
|
-
* Structural subset of Cloudflare's `WorkflowStep`. Mirrors the public surface
|
|
1040
|
-
* documented at https://developers.cloudflare.com/workflows/build/. Defined
|
|
1041
|
-
* here so this module imposes zero `cloudflare:workers` runtime dependency.
|
|
1042
|
-
*/
|
|
1043
|
-
interface WorkflowStepLike {
|
|
1044
|
-
do<T>(name: string, opts: WorkflowStepConfig, fn: () => Promise<T>): Promise<T>;
|
|
1045
|
-
do<T>(name: string, fn: () => Promise<T>): Promise<T>;
|
|
1046
|
-
sleep(name: string, duration: string | number): Promise<void>;
|
|
1047
|
-
waitForEvent<T = unknown>(name: string, opts: {
|
|
1048
|
-
type: string;
|
|
1049
|
-
timeout?: string;
|
|
1050
|
-
}): Promise<{
|
|
1051
|
-
payload: T;
|
|
1052
|
-
timestamp: number;
|
|
1053
|
-
type: string;
|
|
1054
|
-
}>;
|
|
1055
|
-
}
|
|
1056
|
-
interface WorkflowStepConfig {
|
|
1057
|
-
retries?: {
|
|
1058
|
-
limit: number;
|
|
1059
|
-
delay: string | number;
|
|
1060
|
-
backoff?: 'constant' | 'linear' | 'exponential';
|
|
1061
|
-
};
|
|
1062
|
-
timeout?: string | number;
|
|
1063
|
-
}
|
|
1064
|
-
interface RunOnWorkflowStepInput<TResult> {
|
|
1065
|
-
/** Logical workflow name; used as a prefix on step ids for filtering. */
|
|
1066
|
-
workflowName: string;
|
|
1067
|
-
/** User task — same shape as runDurable's taskFn. */
|
|
1068
|
-
taskFn: (ctx: DurableContext) => Promise<TResult>;
|
|
1069
|
-
/** Optional per-step retry / timeout policy applied to ctx.step calls. */
|
|
1070
|
-
stepConfig?: WorkflowStepConfig;
|
|
1071
|
-
/** Optional clock — defaults to Date.now. */
|
|
1072
|
-
now?: () => number;
|
|
1073
|
-
}
|
|
1074
|
-
/**
|
|
1075
|
-
* Adapt a Cloudflare `WorkflowStep` into a `DurableContext` and run a task.
|
|
1076
|
-
*
|
|
1077
|
-
* Every `ctx.step(intent, fn)` becomes `step.do(<name>, fn)` with stable
|
|
1078
|
-
* names — Workflows checkpoints + replays based on step name + position,
|
|
1079
|
-
* matching our model.
|
|
1080
|
-
*
|
|
1081
|
-
* `ctx.awaitEvent(key)` becomes `step.waitForEvent(key, { type: key })`.
|
|
1082
|
-
* Caller is responsible for emitting from the platform side (e.g. via the
|
|
1083
|
-
* Workflows REST API or a sibling worker that publishes events).
|
|
1084
|
-
*
|
|
1085
|
-
* `ctx.now()` and `ctx.uuid()` go through `step.do` so the values are
|
|
1086
|
-
* captured in the platform's checkpoint state and remain stable across
|
|
1087
|
-
* replay — same invariant as our own stores.
|
|
1088
|
-
*/
|
|
1089
|
-
declare function runOnWorkflowStep<TResult>(workflowStep: WorkflowStepLike, input: RunOnWorkflowStepInput<TResult>): Promise<TResult>;
|
|
301
|
+
declare function handleChatTurn(input: RunChatTurnInput): ChatTurnResult;
|
|
1090
302
|
|
|
1091
303
|
/**
|
|
1092
304
|
* @stable
|
|
@@ -1206,6 +418,99 @@ interface ClassifyIntentOptions {
|
|
|
1206
418
|
*/
|
|
1207
419
|
declare function classifyIntent(profile: AgentProfile, message: string, opts?: ClassifyIntentOptions): ClassifyIntentResult;
|
|
1208
420
|
|
|
421
|
+
/**
|
|
422
|
+
* @stable
|
|
423
|
+
*
|
|
424
|
+
* Chat-model resolution + catalog validation — the shared primitive every
|
|
425
|
+
* product chat handler needs and was, until now, hand-rolling. Lifts the
|
|
426
|
+
* router `/v1/models` fetch, the fail-closed id validation, and the
|
|
427
|
+
* precedence resolver out of four near-identical per-repo copies.
|
|
428
|
+
*
|
|
429
|
+
* Policy-free by design: callers pass their own precedence order
|
|
430
|
+
* (`resolveChatModel`) and their own known-good `allowlist`
|
|
431
|
+
* (`validateChatModelId`), so each product keeps its resolution policy while
|
|
432
|
+
* sharing the catalog fetch, the malformed-id guard, and the fail-closed
|
|
433
|
+
* admission rule. No React, no `process.env` assumption — `env` is an
|
|
434
|
+
* explicit narrow record so this runs unchanged in Node and in Workers.
|
|
435
|
+
*/
|
|
436
|
+
/**
|
|
437
|
+
* A model entry as returned by the Tangle Router `/v1/models` endpoint.
|
|
438
|
+
* Intentionally minimal — only the fields resolution + validation read.
|
|
439
|
+
*/
|
|
440
|
+
interface ModelInfo {
|
|
441
|
+
id: string;
|
|
442
|
+
name?: string;
|
|
443
|
+
description?: string;
|
|
444
|
+
/** Provider slug, when the router exposes it (`provider` or `_provider`). */
|
|
445
|
+
provider?: string;
|
|
446
|
+
_provider?: string;
|
|
447
|
+
architecture?: {
|
|
448
|
+
modality?: string;
|
|
449
|
+
input_modalities?: string[];
|
|
450
|
+
output_modalities?: string[];
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
/** Env keys the router base URL is resolved from. */
|
|
454
|
+
interface RouterEnv {
|
|
455
|
+
TANGLE_ROUTER_URL?: string;
|
|
456
|
+
TANGLE_ROUTER_BASE_URL?: string;
|
|
457
|
+
}
|
|
458
|
+
declare const DEFAULT_ROUTER_BASE_URL = "https://router.tangle.tools";
|
|
459
|
+
/** Resolve the router base URL from env, normalised — no trailing `/v1` or `/`. */
|
|
460
|
+
declare function resolveRouterBaseUrl(env?: RouterEnv): string;
|
|
461
|
+
/**
|
|
462
|
+
* Fetch the model catalog from the router's `/v1/models`. Throws on a non-2xx
|
|
463
|
+
* response — callers decide whether to fail open (empty catalog) or closed.
|
|
464
|
+
*/
|
|
465
|
+
declare function getModels(routerBaseUrl?: string): Promise<ModelInfo[]>;
|
|
466
|
+
/**
|
|
467
|
+
* Prepend synthetic catalog entries for ids the environment pins but the
|
|
468
|
+
* router may not list (e.g. a private or self-hosted chat model). Ids already
|
|
469
|
+
* present in `models` are not duplicated.
|
|
470
|
+
*/
|
|
471
|
+
declare function withConfiguredModels(models: ModelInfo[], extraIds: string[]): ModelInfo[];
|
|
472
|
+
/** Trim a candidate model id; `undefined` for non-strings and blanks. */
|
|
473
|
+
declare function cleanModelId(value: unknown): string | undefined;
|
|
474
|
+
interface ChatModelCandidate {
|
|
475
|
+
/** Stable label for telemetry — e.g. `request`, `workspace`, `env`. */
|
|
476
|
+
source: string;
|
|
477
|
+
model: string | undefined;
|
|
478
|
+
}
|
|
479
|
+
interface ResolvedChatModel {
|
|
480
|
+
source: string;
|
|
481
|
+
model: string;
|
|
482
|
+
}
|
|
483
|
+
/**
|
|
484
|
+
* Resolve a chat model by precedence: the first candidate carrying a
|
|
485
|
+
* non-blank model wins, else `fallback`. The caller owns the precedence
|
|
486
|
+
* order, so each product keeps its own policy (request → workspace → env,
|
|
487
|
+
* etc.) while the first-non-blank logic and the telemetry shape stay shared.
|
|
488
|
+
*/
|
|
489
|
+
declare function resolveChatModel(candidates: ChatModelCandidate[], fallback: ResolvedChatModel): ResolvedChatModel;
|
|
490
|
+
type ChatModelValidation = {
|
|
491
|
+
succeeded: true;
|
|
492
|
+
value: string;
|
|
493
|
+
} | {
|
|
494
|
+
succeeded: false;
|
|
495
|
+
error: string;
|
|
496
|
+
};
|
|
497
|
+
/**
|
|
498
|
+
* Validate a caller-supplied chat-model id. Rejects non-strings, malformed
|
|
499
|
+
* ids, and ids absent from both the caller's `allowlist` and the live router
|
|
500
|
+
* catalog. Fails closed: when the catalog cannot be fetched, an unverifiable
|
|
501
|
+
* id is rejected rather than admitted — a bad model never reaches the agent.
|
|
502
|
+
*/
|
|
503
|
+
declare function validateChatModelId(modelId: unknown, options?: {
|
|
504
|
+
/**
|
|
505
|
+
* Known-good ids that skip the catalog round trip — e.g. the product's
|
|
506
|
+
* default model plus any env-configured ids.
|
|
507
|
+
*/
|
|
508
|
+
allowlist?: string[];
|
|
509
|
+
routerBaseUrl?: string;
|
|
510
|
+
/** Injectable catalog loader — overridden in tests. */
|
|
511
|
+
loadModels?: (routerBaseUrl: string) => Promise<ModelInfo[]>;
|
|
512
|
+
}): Promise<ChatModelValidation>;
|
|
513
|
+
|
|
1209
514
|
/**
|
|
1210
515
|
* Validate an AgentProfile against canonical conformance rules: tool keys
|
|
1211
516
|
* must map to an MCP server entry (not be shell capabilities masquerading
|
|
@@ -1651,4 +956,4 @@ declare function createTraceBridge(options: TraceBridgeOptions): TraceBridge;
|
|
|
1651
956
|
*/
|
|
1652
957
|
declare function toAgentEvalTrace(event: RuntimeStreamEvent, options: TraceBridgeOptions): TraceEvent | undefined;
|
|
1653
958
|
|
|
1654
|
-
export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, type ChatStreamEvent, ChatTurnError, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnResult, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult,
|
|
959
|
+
export { AgentBackendContext, AgentBackendInput, AgentExecutionBackend, AgentRuntimeEvent, AgentTaskRunResult, AgentTaskRunSummary, AgentTaskSpec, AgentTaskStatus, type BackendRetryPolicy, BackendTransportError, type ChatModelCandidate, type ChatModelValidation, type ChatStreamEvent, ChatTurnError, type ChatTurnHooks, type ChatTurnIdentity, type ChatTurnMessage, type ChatTurnOverlay, type ChatTurnProducer, type ChatTurnResult, type ChatTurnSandbox, type ClassifyIntentOptions, type ClassifyIntentResult, type ConformanceIssue, type ConformanceOptions, type ConformanceResult, DEFAULT_ROUTER_BASE_URL, InMemoryRuntimeSessionStore, KnowledgeReadinessDecision, type ModelInfo, type ResolvedChatModel, type RouterEnv, RunAgentTaskOptions, RunAgentTaskStreamOptions, type RunChatTurnInput, type RunChatTurnOptions, type RuntimeEventCollector, type RuntimeRunCompleteInput, type RuntimeRunCost, type RuntimeRunHandle, type RuntimeRunOptions, type RuntimeRunPersistenceAdapter, type RuntimeRunRow, RuntimeRunStateError, type RuntimeRunStatus, RuntimeSession, RuntimeSessionStore, RuntimeStreamEvent, type RuntimeStreamEventCollector, type RuntimeStreamEventSink, type RuntimeStreamEventSummary, type RuntimeTelemetryOptions, type SanitizedKnowledgeReadinessReport, type SanitizedKnowledgeRequirement, type ServerSentEventOptions, SessionMismatchError, type SubagentMatcher, type TraceBridge, type TraceBridgeOptions, assertProfileConformance, classifyIntent, cleanModelId, composeTurnProfile, createIterableBackend, createOpenAICompatibleBackend, createRuntimeEventCollector, createRuntimeStreamEventCollector, createSandboxPromptBackend, createTraceBridge, decideKnowledgeReadiness, deriveExecutionId, encodeServerSentEvent, getModels, handleChatTurn, readinessServerSentEvent, resolveChatModel, resolveRouterBaseUrl, runAgentTask, runAgentTaskStream, runChatTurn, runtimeStreamServerSentEvent, sandboxAsChatTurnTarget, sanitizeAgentRuntimeEvent, sanitizeKnowledgeReadinessReport, sanitizeRuntimeStreamEvent, startRuntimeRun, summarizeAgentTaskRun, toAgentEvalTrace, validateChatModelId, withConfiguredModels };
|