@devosurf/tesser-testing 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/engine.ts ADDED
@@ -0,0 +1,571 @@
1
+ // The in-process TestEngine (ADR-0008): journal-of-results semantics identical to the
2
+ // durable server engine — occurrence-disambiguated steps, real retry policies on a
3
+ // time-skipping clock, undo-in-reverse on terminal failure, connector calls captured —
4
+ // with connectors auto-mocked. Milliseconds per run; failures machine-actionable.
5
+
6
+ import { AsyncLocalStorage } from "node:async_hooks";
7
+ import type {
8
+ AutomationDef,
9
+ Ctx,
10
+ EventDefinition,
11
+ HarnessRunRequest,
12
+ HarnessRunResult,
13
+ Logger,
14
+ NormalizedModelRequest,
15
+ NormalizedModelResponse,
16
+ Schema,
17
+ StepOpts,
18
+ WebhookRequest,
19
+ } from "@devosurf/tesser-sdk";
20
+ import { isRetryableError, isTerminalError } from "@devosurf/tesser-sdk";
21
+ import {
22
+ buildConnectorClient,
23
+ buildHarnesses,
24
+ buildOperators,
25
+ decodeJournal,
26
+ encodeJournal,
27
+ isRetrySafe,
28
+ nextRetryDelayMs,
29
+ parseDuration,
30
+ resolveRetryPolicy,
31
+ validateSchema,
32
+ type JsonValue,
33
+ } from "@devosurf/tesser-sdk/internal";
34
+ import type { AnyAction, ConnectorInstance } from "@devosurf/tesser-sdk/connector";
35
+ import { createSpy, recordCall, type Spy } from "./spy.js";
36
+ import { sampleFromSchema } from "./sample.js";
37
+
38
+ export class TestConfigError extends Error {
39
+ constructor(
40
+ message: string,
41
+ readonly hint?: string,
42
+ ) {
43
+ super(hint ? `${message}\n hint: ${hint}` : message);
44
+ this.name = "TestConfigError";
45
+ }
46
+ }
47
+
48
+ export interface JournalEntry {
49
+ kind: "step" | "signal" | "sleep";
50
+ name: string;
51
+ occurrence: number;
52
+ status: "completed" | "failed" | "timed-out";
53
+ attempts: number;
54
+ result?: unknown;
55
+ error?: SerializedError;
56
+ /** Step names this entry's undo was executed for (undo bookkeeping lives run-level). */
57
+ }
58
+
59
+ export interface SerializedError {
60
+ name: string;
61
+ message: string;
62
+ retryable: boolean;
63
+ terminal: boolean;
64
+ }
65
+
66
+ export interface MachineFailure {
67
+ automation: string;
68
+ status: "failed";
69
+ error: SerializedError;
70
+ failedStep?: { name: string; occurrence: number; attempts: number; error: SerializedError };
71
+ steps: Array<{ name: string; occurrence: number; status: string; attempts: number }>;
72
+ connectorCalls: Array<{ step?: string; action?: string; input: unknown }>;
73
+ suggestion?: string;
74
+ }
75
+
76
+ export type StepMock = unknown | ((input: unknown, info: { action: string; connection: string }) => unknown);
77
+ export type ScriptedModelTurn =
78
+ | NormalizedModelResponse
79
+ | ((request: NormalizedModelRequest, info: { operatorKey: string; modelKey: string; turn: number }) => NormalizedModelResponse | Promise<NormalizedModelResponse>);
80
+ export type ScriptedHarnessRun =
81
+ | HarnessRunResult<unknown>
82
+ | ((request: HarnessRunRequest<unknown>, info: { harnessKey: string; invocation: number }) => HarnessRunResult<unknown> | Promise<HarnessRunResult<unknown>>);
83
+
84
+ export interface RunOptions {
85
+ input?: unknown;
86
+ /** Mock connector-call results by the *step name* they happen inside. The step's own
87
+ * code still runs; only the external call is replaced. */
88
+ mocks?: Record<string, StepMock>;
89
+ /** Replay seeding (ADR-0002 recovery semantics): steps listed here return their
90
+ * recorded results WITHOUT executing — exactly how the durable engine resumes.
91
+ * Used by `tesser replay` fixtures. */
92
+ journal?: Array<{ name: string; occurrence?: number; result: unknown }>;
93
+ /** Mock whole connections by action path: { slack: { chat: { postMessage: fn } } }. */
94
+ connections?: Record<string, unknown>;
95
+ secrets?: Record<string, string>;
96
+ /** Scripted model responses by Operator key and Model key. Operators never call live models in tests. */
97
+ models?: Record<string, Record<string, ScriptedModelTurn | ScriptedModelTurn[]>>;
98
+ /** Scripted Harness results by Harness key. Harnesses never run live subprocesses in tests. */
99
+ harnesses?: Record<string, ScriptedHarnessRun | ScriptedHarnessRun[]>;
100
+ /** Resolutions for ctx.waitForSignal, by name; value, per-occurrence array, or fn. */
101
+ signals?: Record<string, unknown | unknown[] | ((occurrence: number) => unknown)>;
102
+ request?: WebhookRequest;
103
+ }
104
+
105
+ export interface TestRunResult<TOutput = unknown> {
106
+ status: "completed" | "failed";
107
+ result?: TOutput;
108
+ error?: SerializedError & { step?: string };
109
+ /** Step results by name — the value for a single occurrence, an array for repeats. */
110
+ steps: Record<string, unknown>;
111
+ journal: JournalEntry[];
112
+ /** Spies by step name AND dotted action path; safe to read for never-called names. */
113
+ calls: Record<string, Spy>;
114
+ emitted: Array<{ event: string; payload: unknown }>;
115
+ slept: string[];
116
+ undone: string[];
117
+ logs: Array<{ level: "info" | "warn" | "error"; msg: string; meta?: Record<string, unknown> }>;
118
+ failure(): MachineFailure | null;
119
+ }
120
+
121
+ function serializeError(err: unknown): SerializedError {
122
+ const e = err instanceof Error ? err : new Error(String(err));
123
+ return {
124
+ name: e.name,
125
+ message: e.message,
126
+ retryable: isRetryableError(err),
127
+ terminal: isTerminalError(err),
128
+ };
129
+ }
130
+
131
+ interface ActiveStep {
132
+ name: string;
133
+ sawUnsafeWrite: boolean;
134
+ }
135
+
136
+ export async function executeAutomation<TOutput>(
137
+ def: AutomationDef<any, TOutput, any, any, any, any, any>,
138
+ opts: RunOptions = {},
139
+ ): Promise<TestRunResult<TOutput>> {
140
+ const journal: JournalEntry[] = [];
141
+ const journalByKey = new Map<string, JournalEntry>();
142
+ for (const seeded of opts.journal ?? []) {
143
+ const occurrence = seeded.occurrence ?? 1;
144
+ const entry: JournalEntry = {
145
+ kind: "step",
146
+ name: seeded.name,
147
+ occurrence,
148
+ status: "completed",
149
+ attempts: 1,
150
+ result: encodeJournal(seeded.result),
151
+ };
152
+ journalByKey.set(`step:${seeded.name}#${occurrence}`, entry);
153
+ journal.push(entry);
154
+ }
155
+ const emitted: TestRunResult["emitted"] = [];
156
+ const slept: string[] = [];
157
+ const undone: string[] = [];
158
+ const logs: TestRunResult["logs"] = [];
159
+ const connectorCalls: MachineFailure["connectorCalls"] = [];
160
+ const undoStack: Array<{ name: string; undo: () => unknown | Promise<unknown> }> = [];
161
+
162
+ const spies = new Map<string, Spy>();
163
+ const spyFor = (key: string): Spy => {
164
+ let s = spies.get(key);
165
+ if (!s) {
166
+ s = createSpy(key);
167
+ spies.set(key, s);
168
+ }
169
+ return s;
170
+ };
171
+ for (const key of Object.keys(opts.mocks ?? {})) spyFor(key);
172
+
173
+ const calls: Record<string, Spy> = new Proxy({} as Record<string, Spy>, {
174
+ get: (_t, prop) => (typeof prop === "string" ? spyFor(prop) : undefined),
175
+ has: () => true,
176
+ ownKeys: () => [...spies.keys()],
177
+ getOwnPropertyDescriptor: () => ({ enumerable: true, configurable: true }),
178
+ });
179
+
180
+ const activeStep = new AsyncLocalStorage<ActiveStep>();
181
+
182
+ const logger: Logger = {
183
+ info: (msg, meta) => logs.push({ level: "info", msg, ...(meta ? { meta } : {}) }),
184
+ warn: (msg, meta) => logs.push({ level: "warn", msg, ...(meta ? { meta } : {}) }),
185
+ error: (msg, meta) => logs.push({ level: "error", msg, ...(meta ? { meta } : {}) }),
186
+ };
187
+
188
+ // ---- connections: real connector defs, mocked transport ----
189
+ const connections: Record<string, unknown> = {};
190
+ for (const [connKey, connector] of Object.entries((def.connections ?? {}) as Record<string, ConnectorInstance<any, any>>)) {
191
+ connections[connKey] = buildConnectorClient(connector, async (path, actionDef, rawInput) => {
192
+ const step = activeStep.getStore();
193
+ const actionPath = path.join(".");
194
+ const fullPath = `${connKey}.${actionPath}`;
195
+ if (!step) {
196
+ throw new TestConfigError(
197
+ `connector call ${fullPath} happened outside ctx.step()`,
198
+ `side effects must live inside a step (ADR-0002) — wrap the call: ctx.step("name", () => ctx.connections.${fullPath}(...))`,
199
+ );
200
+ }
201
+ const input = await validateSchema(
202
+ (actionDef as AnyAction).input,
203
+ rawInput ?? {},
204
+ `${fullPath} input`,
205
+ );
206
+
207
+ if (!isRetrySafe(actionDef as AnyAction, connector.__connector.idempotencyHeader !== undefined)) {
208
+ step.sawUnsafeWrite = true;
209
+ }
210
+
211
+ const resolve = async (): Promise<{ value: unknown; validate: boolean }> => {
212
+ // 1) whole-connection mock by action path
213
+ const connMockRoot = opts.connections?.[connKey];
214
+ if (connMockRoot !== undefined) {
215
+ let node: unknown = connMockRoot;
216
+ for (const seg of path) node = (node as Record<string, unknown> | undefined)?.[seg];
217
+ if (node !== undefined) {
218
+ return {
219
+ value: typeof node === "function" ? await (node as (i: unknown) => unknown)(input) : node,
220
+ validate: false,
221
+ };
222
+ }
223
+ }
224
+ // 2) step-name mock
225
+ const stepMock = opts.mocks?.[step.name];
226
+ if (stepMock !== undefined) {
227
+ return {
228
+ value:
229
+ typeof stepMock === "function"
230
+ ? await (stepMock as (i: unknown, info: { action: string; connection: string }) => unknown)(
231
+ input,
232
+ { action: actionPath, connection: connKey },
233
+ )
234
+ : stepMock,
235
+ validate: false,
236
+ };
237
+ }
238
+ // 3) connector-declared sample
239
+ const sample = connector.__connector.samples?.[actionPath];
240
+ if (sample !== undefined) return { value: sample, validate: true };
241
+ // 4) derive from the action's output schema
242
+ const derived = await sampleFromSchema((actionDef as AnyAction).output);
243
+ if (derived !== undefined) return { value: derived, validate: true };
244
+ throw new TestConfigError(
245
+ `no mock for connector call ${fullPath} in step "${step.name}"`,
246
+ `provide mocks: { "${step.name}": () => <result> } or connections: { ${connKey}: { ${actionPath
247
+ .split(".")
248
+ .join(": { ")}: () => <result> ${"}".repeat(actionPath.split(".").length)} }`,
249
+ );
250
+ };
251
+
252
+ try {
253
+ const { value, validate } = await resolve();
254
+ const result = validate
255
+ ? await validateSchema((actionDef as AnyAction).output, value, `${fullPath} sample output`)
256
+ : value;
257
+ const record = { args: [input], step: step.name, action: fullPath, result };
258
+ recordCall(spyFor(step.name), record);
259
+ recordCall(spyFor(fullPath), record);
260
+ connectorCalls.push({ step: step.name, action: fullPath, input });
261
+ return result;
262
+ } catch (err) {
263
+ const record = { args: [input], step: step.name, action: fullPath, error: String(err) };
264
+ recordCall(spyFor(step.name), record);
265
+ recordCall(spyFor(fullPath), record);
266
+ connectorCalls.push({ step: step.name, action: fullPath, input });
267
+ throw err;
268
+ }
269
+ });
270
+ }
271
+
272
+ // ---- secrets ----
273
+ const secretNames = Object.keys((def.secrets ?? {}) as Record<string, unknown>);
274
+ const secrets: Record<string, string> = {};
275
+ for (const name of secretNames) {
276
+ secrets[name] = opts.secrets?.[name] ?? `test-secret-${name}`;
277
+ }
278
+
279
+ // ---- occurrence counters (reset per handler invocation; here: single invocation) ----
280
+ const stepOccurrence = new Map<string, number>();
281
+ const signalOccurrence = new Map<string, number>();
282
+ const defaultRetry = resolveRetryPolicy(def.retry);
283
+
284
+ const modelTurns = new Map<string, number>();
285
+ const harnessInvocations = new Map<string, number>();
286
+
287
+ const ctx: Ctx<any, any, any, any> = {
288
+ async step<T>(name: string, fn: () => Promise<T> | T, stepOpts?: StepOpts<T>): Promise<T> {
289
+ if (typeof name !== "string" || name.length === 0 || typeof fn !== "function") {
290
+ throw new TestConfigError(`ctx.step: expected (name, fn) — got name=${JSON.stringify(name)}`);
291
+ }
292
+ const occ = (stepOccurrence.get(name) ?? 0) + 1;
293
+ stepOccurrence.set(name, occ);
294
+ const key = `step:${name}#${occ}`;
295
+ const cached = journalByKey.get(key);
296
+ if (cached?.status === "completed") {
297
+ const value = decodeJournal(cached.result as JsonValue) as T;
298
+ if (stepOpts?.undo) undoStack.push({ name, undo: () => stepOpts.undo!(value) });
299
+ return value;
300
+ }
301
+
302
+ const policy = resolveRetryPolicy(stepOpts?.retry, defaultRetry);
303
+ const timeoutMs = stepOpts?.timeout !== undefined ? parseDuration(stepOpts.timeout, "step timeout") : undefined;
304
+ const entry: JournalEntry = { kind: "step", name, occurrence: occ, status: "failed", attempts: 0 };
305
+ journal.push(entry);
306
+ journalByKey.set(key, entry);
307
+
308
+ let lastError: unknown;
309
+ for (let attempt = 1; attempt <= policy.maxAttempts; attempt++) {
310
+ entry.attempts = attempt;
311
+ const state: ActiveStep = { name, sawUnsafeWrite: false };
312
+ try {
313
+ let resultPromise = Promise.resolve(activeStep.run(state, () => fn()));
314
+ if (timeoutMs !== undefined) {
315
+ // Time-skipping: a step that *would* time out only does so if it actually
316
+ // hangs on something unmocked — we reject after the wall-clock budget.
317
+ resultPromise = Promise.race([
318
+ resultPromise,
319
+ new Promise<never>((_r, reject) =>
320
+ setTimeout(
321
+ () => reject(new TestConfigError(`step "${name}" exceeded its ${stepOpts?.timeout} timeout`)),
322
+ Math.min(timeoutMs, 5_000),
323
+ ).unref?.(),
324
+ ),
325
+ ]);
326
+ }
327
+ const result = await resultPromise;
328
+ const encoded = encodeJournal(result); // throws NotSerializableError → terminal
329
+ entry.status = "completed";
330
+ entry.result = encoded;
331
+ if (stepOpts?.undo) undoStack.push({ name, undo: () => stepOpts.undo!(result) });
332
+ return result;
333
+ } catch (err) {
334
+ lastError = err;
335
+ entry.error = serializeError(err);
336
+ if (isTerminalError(err)) break;
337
+ if (state.sawUnsafeWrite && stepOpts?.retry === undefined) {
338
+ // Derived retry-safety (ADR-0012): a non-idempotent write never silently
339
+ // double-fires. Explicit per-step retry opts override.
340
+ logger.warn(
341
+ `step "${name}" performed a non-retry-safe write and will not auto-retry — pass StepOpts.retry to opt in`,
342
+ );
343
+ break;
344
+ }
345
+ const delay = nextRetryDelayMs(policy, attempt, isRetryableError(err) ? err.retryAfterMs : undefined);
346
+ if (delay === null) break;
347
+ slept.push(`retry:${name}#${occ}@${attempt}`);
348
+ // time-skipping clock: no real wait
349
+ }
350
+ }
351
+ entry.status = "failed";
352
+ throw lastError;
353
+ },
354
+
355
+ connections: connections as never,
356
+ secrets: secrets as never,
357
+ operators: {} as never,
358
+ harnesses: {} as never,
359
+
360
+ async sleep(duration: string): Promise<void> {
361
+ parseDuration(duration, "ctx.sleep"); // validate eagerly, real engine persists it
362
+ slept.push(duration);
363
+ },
364
+
365
+ async waitForSignal<T>(name: string, sOpts: { schema: Schema<T>; timeout?: string }): Promise<T | null> {
366
+ const occ = (signalOccurrence.get(name) ?? 0) + 1;
367
+ signalOccurrence.set(name, occ);
368
+ const provided = opts.signals?.[name];
369
+ let value: unknown;
370
+ if (typeof provided === "function") value = (provided as (o: number) => unknown)(occ);
371
+ else if (Array.isArray(provided)) value = provided[occ - 1];
372
+ else value = provided;
373
+
374
+ const entry: JournalEntry = { kind: "signal", name, occurrence: occ, status: "completed", attempts: 1 };
375
+ journal.push(entry);
376
+ if (value === undefined) {
377
+ if (sOpts.timeout !== undefined) {
378
+ parseDuration(sOpts.timeout, "waitForSignal timeout");
379
+ entry.status = "timed-out";
380
+ return null; // clock skips straight to the timeout
381
+ }
382
+ throw new TestConfigError(
383
+ `run is suspended waiting for signal "${name}" (occurrence ${occ}) with no timeout`,
384
+ `provide signals: { "${name}": <payload> } in t.run(...)`,
385
+ );
386
+ }
387
+ const validated = await validateSchema(sOpts.schema, value, `signal "${name}" payload`);
388
+ entry.result = encodeJournal(validated as unknown);
389
+ return validated;
390
+ },
391
+
392
+ async emit<T>(event: EventDefinition<T>, payload: T): Promise<void> {
393
+ const validated = await validateSchema(event.schema, payload, `event "${event.name}" payload`);
394
+ emitted.push({ event: event.name, payload: validated });
395
+ },
396
+
397
+ ...(opts.request !== undefined || (def.trigger as { kind?: string }).kind === "webhook"
398
+ ? {
399
+ request:
400
+ opts.request ??
401
+ ({
402
+ headers: { "content-type": "application/json" },
403
+ query: {},
404
+ rawBody: new TextEncoder().encode(JSON.stringify(opts.input ?? {})),
405
+ } satisfies WebhookRequest),
406
+ }
407
+ : {}),
408
+
409
+ logger,
410
+ run: { id: `test_${Math.random().toString(36).slice(2, 10)}`, attempt: 1, automationId: def.id },
411
+ };
412
+
413
+ (ctx as { operators: unknown }).operators = buildOperators(def, ctx, async ({ operatorKey, modelKey, request }) => {
414
+ const key = `${operatorKey}:${modelKey}`;
415
+ const turn = (modelTurns.get(key) ?? 0) + 1;
416
+ modelTurns.set(key, turn);
417
+ const script = opts.models?.[operatorKey]?.[modelKey];
418
+ if (script === undefined) {
419
+ throw new TestConfigError(
420
+ `unscripted model turn for operator "${operatorKey}" model "${modelKey}" (turn ${turn})`,
421
+ `provide models: { ${operatorKey}: { ${modelKey}: [{ output: <typed output>, usage: { inputTokens, outputTokens } }] } }`,
422
+ );
423
+ }
424
+ const picked = Array.isArray(script) ? script[turn - 1] : script;
425
+ if (picked === undefined) {
426
+ throw new TestConfigError(`no scripted model response for operator "${operatorKey}" model "${modelKey}" turn ${turn}`);
427
+ }
428
+ const response = typeof picked === "function" ? await picked(request, { operatorKey, modelKey, turn }) : picked;
429
+ recordCall(spyFor(`operator.${operatorKey}.model`), { args: [request], result: response });
430
+ recordCall(spyFor(`operator.${operatorKey}.model.${turn}`), { args: [request], result: response });
431
+ return response;
432
+ });
433
+
434
+ (ctx as { harnesses: unknown }).harnesses = buildHarnesses(def, ctx, async ({ harnessKey, request }) => {
435
+ const step = activeStep.getStore();
436
+ if (!step) {
437
+ throw new TestConfigError(
438
+ `harness "${harnessKey}" ran outside ctx.step()`,
439
+ `Harnesses are durable Step runners — wrap the call: ctx.step("name", () => ctx.harnesses.${harnessKey}.run(...))`,
440
+ );
441
+ }
442
+ const invocation = (harnessInvocations.get(harnessKey) ?? 0) + 1;
443
+ harnessInvocations.set(harnessKey, invocation);
444
+ const script = opts.harnesses?.[harnessKey];
445
+ if (script === undefined) {
446
+ throw new TestConfigError(
447
+ `unscripted Harness run for "${harnessKey}"`,
448
+ `provide harnesses: { ${harnessKey}: { output: <typed output>, status: "completed", artifacts: [], adapter: "test" } }`,
449
+ );
450
+ }
451
+ const picked = Array.isArray(script) ? script[invocation - 1] : script;
452
+ if (picked === undefined) throw new TestConfigError(`no scripted Harness result for "${harnessKey}" invocation ${invocation}`);
453
+ const result = typeof picked === "function" ? await picked(request, { harnessKey, invocation }) : picked;
454
+ recordCall(spyFor(`harness.${harnessKey}`), { args: [request], step: step.name, result });
455
+ return result;
456
+ });
457
+
458
+ // ---- resolve + validate input ----
459
+ let input = opts.input;
460
+ const trigger = def.trigger as {
461
+ kind: string;
462
+ input?: Schema<unknown>;
463
+ event?: EventDefinition<unknown>;
464
+ connectorId?: string;
465
+ triggerId?: string;
466
+ };
467
+ let inputSchema: Schema<unknown> | undefined =
468
+ def.input ?? (trigger.kind === "webhook" ? trigger.input : trigger.kind === "event" ? trigger.event?.schema : undefined);
469
+
470
+ if (trigger.kind === "connector" && trigger.connectorId !== undefined) {
471
+ // A connector trigger's payload contract is its declaration's output schema; the
472
+ // connector may also ship an explicit `trigger:<id>` sample.
473
+ const connector = Object.values(
474
+ (def.connections ?? {}) as Record<string, ConnectorInstance<any, any>>,
475
+ ).find((c) => c.id === trigger.connectorId);
476
+ const decl = connector?.__connector.triggers?.[trigger.triggerId ?? ""];
477
+ if (decl) {
478
+ inputSchema = def.input ?? (decl.output as Schema<unknown>);
479
+ if (input === undefined) {
480
+ input = connector?.__connector.samples?.[`trigger:${trigger.triggerId}`];
481
+ }
482
+ }
483
+ }
484
+
485
+ if (input === undefined && inputSchema && trigger.kind !== "schedule") {
486
+ input = await sampleFromSchema(inputSchema);
487
+ }
488
+ if (inputSchema && input !== undefined) {
489
+ input = await validateSchema(inputSchema, input, `automation "${def.id}" input`);
490
+ }
491
+
492
+ const finish = (
493
+ status: "completed" | "failed",
494
+ result?: TOutput,
495
+ error?: TestRunResult["error"],
496
+ ): TestRunResult<TOutput> => {
497
+ const stepsByName: Record<string, unknown[]> = {};
498
+ for (const e of journal) {
499
+ if (e.kind !== "step" || e.status !== "completed") continue;
500
+ (stepsByName[e.name] ??= []).push(decodeJournal(e.result as JsonValue));
501
+ }
502
+ const steps: Record<string, unknown> = {};
503
+ for (const [name, results] of Object.entries(stepsByName)) {
504
+ steps[name] = results.length === 1 ? results[0] : results;
505
+ }
506
+ const failedEntry = journal.find((e) => e.kind === "step" && e.status === "failed");
507
+ return {
508
+ status,
509
+ ...(result !== undefined ? { result } : {}),
510
+ ...(error !== undefined ? { error } : {}),
511
+ steps,
512
+ journal,
513
+ calls,
514
+ emitted,
515
+ slept,
516
+ undone,
517
+ logs,
518
+ failure: () =>
519
+ status === "completed"
520
+ ? null
521
+ : {
522
+ automation: def.id,
523
+ status: "failed",
524
+ error: error as SerializedError,
525
+ ...(failedEntry
526
+ ? {
527
+ failedStep: {
528
+ name: failedEntry.name,
529
+ occurrence: failedEntry.occurrence,
530
+ attempts: failedEntry.attempts,
531
+ error: failedEntry.error as SerializedError,
532
+ },
533
+ }
534
+ : {}),
535
+ steps: journal
536
+ .filter((e) => e.kind === "step")
537
+ .map((e) => ({ name: e.name, occurrence: e.occurrence, status: e.status, attempts: e.attempts })),
538
+ connectorCalls,
539
+ ...(failedEntry?.error?.terminal === false
540
+ ? {}
541
+ : failedEntry
542
+ ? { suggestion: `step "${failedEntry.name}" failed terminally — fix the input or mark the error retryable` }
543
+ : {}),
544
+ },
545
+ };
546
+ };
547
+
548
+ try {
549
+ let output: unknown = await def.run(input as never, ctx as never);
550
+ if (def.output) {
551
+ output = await validateSchema(def.output, output, `automation "${def.id}" output`);
552
+ }
553
+ return finish("completed", output as TOutput);
554
+ } catch (err) {
555
+ // Terminal failure → undo completed steps in reverse (ADR-0002), best-effort.
556
+ for (const item of [...undoStack].reverse()) {
557
+ try {
558
+ await item.undo();
559
+ undone.push(item.name);
560
+ } catch (undoErr) {
561
+ logger.error(`undo for step "${item.name}" failed: ${String(undoErr)}`);
562
+ undone.push(`${item.name} (failed)`);
563
+ }
564
+ }
565
+ const failedStep = journal.find((e) => e.kind === "step" && e.status === "failed");
566
+ return finish("failed", undefined, {
567
+ ...serializeError(err),
568
+ ...(failedStep ? { step: failedStep.name } : {}),
569
+ });
570
+ }
571
+ }
package/src/index.ts ADDED
@@ -0,0 +1,48 @@
1
+ // @devosurf/tesser-testing — the agent's fast pass/fail loop (ADR-0008). In-process engine with
2
+ // journal-of-results semantics, time-skipping, auto-mocked connectors, machine-actionable
3
+ // failures. Apache-2.0; never imports the AGPL server.
4
+
5
+ import type { AutomationDef } from "@devosurf/tesser-sdk";
6
+ import { executeAutomation, type RunOptions, type TestRunResult } from "./engine.js";
7
+
8
+ export interface AutomationTest<TOutput> {
9
+ run(opts?: RunOptions): Promise<TestRunResult<TOutput>>;
10
+ }
11
+
12
+ /** Create a test handle for one automation. `run` executes it in-process on the
13
+ * time-skipping clock with connectors mocked (explicitly via `mocks`/`connections`,
14
+ * or auto-mocked from connector samples / output schemas). */
15
+ export function createTest<TOutput>(opts: {
16
+ automation: AutomationDef<any, TOutput, any, any, any, any, any>;
17
+ }): AutomationTest<TOutput> {
18
+ if (!opts?.automation?.id || typeof opts.automation.run !== "function") {
19
+ throw new TypeError("createTest: pass { automation } — the default export of an automation file");
20
+ }
21
+ return {
22
+ run: (runOpts?: RunOptions) => executeAutomation(opts.automation, runOpts ?? {}),
23
+ };
24
+ }
25
+
26
+ export {
27
+ executeAutomation,
28
+ TestConfigError,
29
+ type RunOptions,
30
+ type TestRunResult,
31
+ type JournalEntry,
32
+ type MachineFailure,
33
+ type SerializedError,
34
+ type StepMock,
35
+ type ScriptedModelTurn,
36
+ type ScriptedHarnessRun,
37
+ } from "./engine.js";
38
+ export { smokeTest, type SmokeOutcome } from "./smoke.js";
39
+ export {
40
+ sampleFromSchema,
41
+ edgeCasesFromSchema,
42
+ sampleFromJsonSchema,
43
+ toJsonSchemaAsync,
44
+ type EdgeCase,
45
+ } from "./sample.js";
46
+ export { Cassette, hashInput, type CassetteEntry } from "./cassette.js";
47
+ export { createSpy, type Spy, type CapturedCall } from "./spy.js";
48
+ export { invokeAction, fakeFetch, type InvokeActionOptions } from "./invoke-action.js";