oh-my-fable 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +215 -0
- package/dist/index.cjs +920 -0
- package/dist/index.d.cts +380 -0
- package/dist/index.d.ts +380 -0
- package/dist/index.js +871 -0
- package/package.json +69 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
interface Goal {
|
|
2
|
+
description: string;
|
|
3
|
+
/** "don't do X" style guardrails. */
|
|
4
|
+
constraints?: string[];
|
|
5
|
+
/** Completion criteria the reflector checks against. */
|
|
6
|
+
successCriteria?: string[];
|
|
7
|
+
}
|
|
8
|
+
type StepStatus = "pending" | "running" | "done" | "failed" | "skipped";
|
|
9
|
+
interface Step {
|
|
10
|
+
id: string;
|
|
11
|
+
/** What this step is trying to achieve (natural language). */
|
|
12
|
+
intent: string;
|
|
13
|
+
dependsOn?: string[];
|
|
14
|
+
status: StepStatus;
|
|
15
|
+
attempts: number;
|
|
16
|
+
/** Short summary of what the step produced, once done. */
|
|
17
|
+
result?: string;
|
|
18
|
+
}
|
|
19
|
+
type PlanStatus = "active" | "done" | "failed";
|
|
20
|
+
interface Plan {
|
|
21
|
+
goal: string;
|
|
22
|
+
steps: Step[];
|
|
23
|
+
status: PlanStatus;
|
|
24
|
+
/** Bumped on every replan. */
|
|
25
|
+
revision: number;
|
|
26
|
+
}
|
|
27
|
+
interface Observation {
|
|
28
|
+
stepId: string;
|
|
29
|
+
ok: boolean;
|
|
30
|
+
output: string;
|
|
31
|
+
toolCalls?: ToolCall[];
|
|
32
|
+
error?: string;
|
|
33
|
+
tokensUsed: number;
|
|
34
|
+
}
|
|
35
|
+
type Progress = "on_track" | "needs_replan" | "blocked" | "goal_met";
|
|
36
|
+
interface Reflection {
|
|
37
|
+
progress: Progress;
|
|
38
|
+
notes: string;
|
|
39
|
+
confidence?: number;
|
|
40
|
+
}
|
|
41
|
+
type Role = "system" | "user" | "assistant";
|
|
42
|
+
interface Message {
|
|
43
|
+
role: Role;
|
|
44
|
+
content: string;
|
|
45
|
+
}
|
|
46
|
+
interface ToolSchema {
|
|
47
|
+
name: string;
|
|
48
|
+
description: string;
|
|
49
|
+
/** JSON-schema object describing the parameters. */
|
|
50
|
+
parameters: Record<string, unknown>;
|
|
51
|
+
}
|
|
52
|
+
interface ToolCall {
|
|
53
|
+
id: string;
|
|
54
|
+
name: string;
|
|
55
|
+
input: unknown;
|
|
56
|
+
}
|
|
57
|
+
interface ToolResult {
|
|
58
|
+
ok: boolean;
|
|
59
|
+
output: string;
|
|
60
|
+
error?: string;
|
|
61
|
+
}
|
|
62
|
+
interface CompletionRequest {
|
|
63
|
+
messages: Message[];
|
|
64
|
+
tools?: ToolSchema[];
|
|
65
|
+
maxTokens?: number;
|
|
66
|
+
temperature?: number;
|
|
67
|
+
responseFormat?: "text" | "json";
|
|
68
|
+
}
|
|
69
|
+
type StopReason = "end" | "tool_use" | "max_tokens" | "error";
|
|
70
|
+
interface CompletionResult {
|
|
71
|
+
content: string;
|
|
72
|
+
toolCalls?: ToolCall[];
|
|
73
|
+
tokensIn: number;
|
|
74
|
+
tokensOut: number;
|
|
75
|
+
stopReason: StopReason;
|
|
76
|
+
}
|
|
77
|
+
interface BudgetState {
|
|
78
|
+
steps: number;
|
|
79
|
+
tokens: number;
|
|
80
|
+
startedAtMs: number;
|
|
81
|
+
/** Separate counter so a replan storm can't run forever. */
|
|
82
|
+
replans: number;
|
|
83
|
+
}
|
|
84
|
+
interface Digest {
|
|
85
|
+
summary: string;
|
|
86
|
+
/** ISO timestamp this digest covers up to. */
|
|
87
|
+
coversUntil: string;
|
|
88
|
+
}
|
|
89
|
+
interface RunContext {
|
|
90
|
+
runId: string;
|
|
91
|
+
goal: Goal;
|
|
92
|
+
plan: Plan;
|
|
93
|
+
/** Conversation handed to the model (the thing that gets compacted). */
|
|
94
|
+
history: Message[];
|
|
95
|
+
/** Compacted summaries of folded-away history. */
|
|
96
|
+
digests: Digest[];
|
|
97
|
+
budget: BudgetState;
|
|
98
|
+
config: SerializableConfig;
|
|
99
|
+
createdAt: string;
|
|
100
|
+
updatedAt: string;
|
|
101
|
+
/** Extension slot — modules attach state without touching the core. */
|
|
102
|
+
meta: Record<string, unknown>;
|
|
103
|
+
}
|
|
104
|
+
/** The subset of config that is data (persisted in RunContext). */
|
|
105
|
+
interface SerializableConfig {
|
|
106
|
+
maxSteps: number;
|
|
107
|
+
maxTokens: number;
|
|
108
|
+
maxWallClockMs: number;
|
|
109
|
+
maxStepAttempts: number;
|
|
110
|
+
maxReplans: number;
|
|
111
|
+
contextTokenLimit: number;
|
|
112
|
+
keepRecent: number;
|
|
113
|
+
temperature: number;
|
|
114
|
+
maxStepTokens: number;
|
|
115
|
+
}
|
|
116
|
+
type RunStatus = "done" | "halted" | "failed";
|
|
117
|
+
interface RunResult {
|
|
118
|
+
status: RunStatus;
|
|
119
|
+
reason?: string;
|
|
120
|
+
ctx: RunContext;
|
|
121
|
+
}
|
|
122
|
+
interface RunSummary {
|
|
123
|
+
runId: string;
|
|
124
|
+
goal: string;
|
|
125
|
+
planStatus: PlanStatus;
|
|
126
|
+
steps: number;
|
|
127
|
+
updatedAt: string;
|
|
128
|
+
}
|
|
129
|
+
interface Provider {
|
|
130
|
+
name: string;
|
|
131
|
+
complete(req: CompletionRequest): Promise<CompletionResult>;
|
|
132
|
+
/** Cheap token estimate (chars/4 is fine) — only used to decide compaction. */
|
|
133
|
+
estimateTokens(messages: Message[]): number;
|
|
134
|
+
}
|
|
135
|
+
interface Store {
|
|
136
|
+
save(ctx: RunContext): Promise<void>;
|
|
137
|
+
load(runId: string): Promise<RunContext | null>;
|
|
138
|
+
list(): Promise<RunSummary[]>;
|
|
139
|
+
}
|
|
140
|
+
interface Tool {
|
|
141
|
+
name: string;
|
|
142
|
+
description: string;
|
|
143
|
+
schema: ToolSchema;
|
|
144
|
+
handler(input: unknown): Promise<ToolResult> | ToolResult;
|
|
145
|
+
}
|
|
146
|
+
type RunEvent = {
|
|
147
|
+
type: "plan_created";
|
|
148
|
+
plan: Plan;
|
|
149
|
+
} | {
|
|
150
|
+
type: "step_start";
|
|
151
|
+
step: Step;
|
|
152
|
+
} | {
|
|
153
|
+
type: "step_done";
|
|
154
|
+
step: Step;
|
|
155
|
+
observation: Observation;
|
|
156
|
+
} | {
|
|
157
|
+
type: "reflection";
|
|
158
|
+
reflection: Reflection;
|
|
159
|
+
step: Step;
|
|
160
|
+
} | {
|
|
161
|
+
type: "replan";
|
|
162
|
+
revision: number;
|
|
163
|
+
reason: string;
|
|
164
|
+
} | {
|
|
165
|
+
type: "compaction";
|
|
166
|
+
foldedMessages: number;
|
|
167
|
+
digestChars: number;
|
|
168
|
+
} | {
|
|
169
|
+
type: "checkpoint";
|
|
170
|
+
runId: string;
|
|
171
|
+
} | {
|
|
172
|
+
type: "halted";
|
|
173
|
+
reason: string;
|
|
174
|
+
} | {
|
|
175
|
+
type: "done";
|
|
176
|
+
reason: string;
|
|
177
|
+
} | {
|
|
178
|
+
type: "escalation";
|
|
179
|
+
step: Step;
|
|
180
|
+
notes: string;
|
|
181
|
+
};
|
|
182
|
+
/** Full run config: data fields + injected dependencies. */
|
|
183
|
+
interface RunConfig extends Partial<SerializableConfig> {
|
|
184
|
+
provider: Provider;
|
|
185
|
+
store?: Store;
|
|
186
|
+
tools?: Tool[];
|
|
187
|
+
/** Where the default FileStore writes. */
|
|
188
|
+
runsDir?: string;
|
|
189
|
+
/** Observe everything the loop does. */
|
|
190
|
+
onEvent?: (event: RunEvent) => void;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
declare const DEFAULT_CONFIG: SerializableConfig;
|
|
194
|
+
/** Merge user config over defaults and pull out the serializable subset. */
|
|
195
|
+
declare function resolveSerializable(config: Partial<SerializableConfig>): SerializableConfig;
|
|
196
|
+
|
|
197
|
+
/** A short, sortable, dependency-free id. */
|
|
198
|
+
declare function genId(prefix?: string): string;
|
|
199
|
+
/** Create a fresh, fully-initialized (and serializable) RunContext. */
|
|
200
|
+
declare function createContext(goal: Goal, config: SerializableConfig): RunContext;
|
|
201
|
+
/** The next pending step whose dependencies are all satisfied. */
|
|
202
|
+
declare function nextPendingStep(ctx: RunContext): Step | null;
|
|
203
|
+
|
|
204
|
+
interface BudgetVerdict {
|
|
205
|
+
exceeded: boolean;
|
|
206
|
+
reason?: string;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* The three top-level guards, checked at the very top of every loop turn. The
|
|
210
|
+
* per-step (attempts) and replan guards live alongside the reflector; this is the
|
|
211
|
+
* hard ceiling that stops a runaway run from spending forever.
|
|
212
|
+
*/
|
|
213
|
+
declare function checkBudget(ctx: RunContext): BudgetVerdict;
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* The default persistence: one JSON file per run. Zero dependencies, and the
|
|
217
|
+
* whole point of the harness — a checkpoint after every step means "resume from
|
|
218
|
+
* exactly where it died". Swap in SQLite/Redis by implementing the same Store.
|
|
219
|
+
*/
|
|
220
|
+
declare class FileStore implements Store {
|
|
221
|
+
private readonly dir;
|
|
222
|
+
constructor(dir?: string);
|
|
223
|
+
private ensureDir;
|
|
224
|
+
private path;
|
|
225
|
+
save(ctx: RunContext): Promise<void>;
|
|
226
|
+
load(runId: string): Promise<RunContext | null>;
|
|
227
|
+
list(): Promise<RunSummary[]>;
|
|
228
|
+
}
|
|
229
|
+
/** An in-memory store — handy for tests and ephemeral runs. */
|
|
230
|
+
declare class MemoryStore implements Store {
|
|
231
|
+
private map;
|
|
232
|
+
save(ctx: RunContext): Promise<void>;
|
|
233
|
+
load(runId: string): Promise<RunContext | null>;
|
|
234
|
+
list(): Promise<RunSummary[]>;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Keeps the model's context inside its window by folding old, completed turns
|
|
239
|
+
* into a running digest. The plan is NEVER touched here — it lives outside
|
|
240
|
+
* history precisely so compaction can't blur "where we are".
|
|
241
|
+
*/
|
|
242
|
+
declare class ContextManager {
|
|
243
|
+
private readonly provider;
|
|
244
|
+
private readonly config;
|
|
245
|
+
constructor(provider: Provider, config: SerializableConfig);
|
|
246
|
+
overBudget(ctx: RunContext): boolean;
|
|
247
|
+
/** Fold all but the most recent K messages into a digest; return the new (short) history. */
|
|
248
|
+
compact(ctx: RunContext): Promise<Message[]>;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
declare class ToolRegistry {
|
|
252
|
+
private map;
|
|
253
|
+
constructor(tools?: Tool[]);
|
|
254
|
+
register(tool: Tool): void;
|
|
255
|
+
get(name: string): Tool | undefined;
|
|
256
|
+
schemas(): ToolSchema[];
|
|
257
|
+
get size(): number;
|
|
258
|
+
/** Run a tool, turning any thrown error into a ToolResult — the loop never dies on a tool. */
|
|
259
|
+
run(name: string, input: unknown): Promise<ToolResult>;
|
|
260
|
+
}
|
|
261
|
+
/** Define a tool with less ceremony. */
|
|
262
|
+
declare function defineTool(name: string, description: string, parameters: Record<string, unknown>, handler: (input: unknown) => Promise<ToolResult> | ToolResult): Tool;
|
|
263
|
+
|
|
264
|
+
declare class Planner {
|
|
265
|
+
private readonly provider;
|
|
266
|
+
private readonly temperature;
|
|
267
|
+
constructor(provider: Provider, temperature: number);
|
|
268
|
+
plan(goal: Goal): Promise<Plan>;
|
|
269
|
+
/**
|
|
270
|
+
* Accumulate, never reset: completed steps are preserved verbatim; only the
|
|
271
|
+
* remaining work is regenerated from the point of failure. This is what lets
|
|
272
|
+
* a long task make forward progress instead of restarting forever.
|
|
273
|
+
*/
|
|
274
|
+
replan(plan: Plan, obs: Observation, ctx: RunContext): Promise<Plan>;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
declare class Executor {
|
|
278
|
+
private readonly provider;
|
|
279
|
+
private readonly opts;
|
|
280
|
+
private registry;
|
|
281
|
+
constructor(provider: Provider, registry: ToolRegistry, opts: {
|
|
282
|
+
temperature: number;
|
|
283
|
+
maxStepTokens: number;
|
|
284
|
+
});
|
|
285
|
+
execute(step: Step, ctx: RunContext): Promise<Observation>;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
declare class Reflector {
|
|
289
|
+
private readonly provider;
|
|
290
|
+
constructor(provider: Provider);
|
|
291
|
+
reflect(plan: Plan, obs: Observation, ctx: RunContext): Promise<Reflection>;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
interface LoopDeps {
|
|
295
|
+
planner: Planner;
|
|
296
|
+
executor: Executor;
|
|
297
|
+
reflector: Reflector;
|
|
298
|
+
contextManager: ContextManager;
|
|
299
|
+
store: Store;
|
|
300
|
+
onEvent: (e: RunEvent) => void;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* The plan ↔ execute ↔ reflect loop. Its one invariant: at the end of every
|
|
304
|
+
* iteration, `ctx` equals what's on disk — so a crash anywhere resumes from the
|
|
305
|
+
* last checkpoint with zero lost progress.
|
|
306
|
+
*/
|
|
307
|
+
declare function runLoop(ctx: RunContext, deps: LoopDeps): Promise<RunResult>;
|
|
308
|
+
|
|
309
|
+
/** chars/4 — deliberately rough; only used to decide when to compact. */
|
|
310
|
+
declare function estimateTokens(messages: Message[]): number;
|
|
311
|
+
interface RetryOptions {
|
|
312
|
+
retries?: number;
|
|
313
|
+
baseDelayMs?: number;
|
|
314
|
+
/** Decide whether an error is worth retrying (429/5xx/network). */
|
|
315
|
+
isRetryable?: (err: unknown) => boolean;
|
|
316
|
+
sleep?: (ms: number) => Promise<void>;
|
|
317
|
+
}
|
|
318
|
+
/** Wrap a flaky async call with exponential backoff. Lives in the provider layer, not the loop. */
|
|
319
|
+
declare function withRetry<T>(fn: () => Promise<T>, opts?: RetryOptions): Promise<T>;
|
|
320
|
+
type ScriptedResponse = Partial<CompletionResult> | ((req: CompletionRequest) => Partial<CompletionResult>);
|
|
321
|
+
/**
|
|
322
|
+
* A provider that replays a fixed script of responses in order. The thing that
|
|
323
|
+
* makes an agent built on this harness *deterministically testable* — script the
|
|
324
|
+
* model and assert the loop's behavior, no network, no flakiness.
|
|
325
|
+
*/
|
|
326
|
+
declare class ScriptedProvider implements Provider {
|
|
327
|
+
private readonly responses;
|
|
328
|
+
readonly name = "scripted";
|
|
329
|
+
private index;
|
|
330
|
+
/** Every request the loop made — handy for assertions. */
|
|
331
|
+
readonly requests: CompletionRequest[];
|
|
332
|
+
constructor(responses: ScriptedResponse[]);
|
|
333
|
+
complete(req: CompletionRequest): Promise<CompletionResult>;
|
|
334
|
+
estimateTokens(messages: Message[]): number;
|
|
335
|
+
}
|
|
336
|
+
declare const reply: {
|
|
337
|
+
plan(steps: Array<{
|
|
338
|
+
id: string;
|
|
339
|
+
intent: string;
|
|
340
|
+
dependsOn?: string[];
|
|
341
|
+
}>): ScriptedResponse;
|
|
342
|
+
reflection(progress: string, notes?: string, confidence?: number): ScriptedResponse;
|
|
343
|
+
text(content: string): ScriptedResponse;
|
|
344
|
+
toolUse(calls: ToolCall[], content?: string): ScriptedResponse;
|
|
345
|
+
};
|
|
346
|
+
|
|
347
|
+
interface AnthropicOptions {
|
|
348
|
+
apiKey?: string;
|
|
349
|
+
model?: string;
|
|
350
|
+
baseUrl?: string;
|
|
351
|
+
/** anthropic-version header. */
|
|
352
|
+
version?: string;
|
|
353
|
+
maxRetries?: number;
|
|
354
|
+
defaultMaxTokens?: number;
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* The default provider — talks to the Anthropic Messages API over `fetch`, no
|
|
358
|
+
* SDK, no dependencies. Swap in any other `Provider` to go model-agnostic.
|
|
359
|
+
*/
|
|
360
|
+
declare class AnthropicProvider implements Provider {
|
|
361
|
+
readonly name = "anthropic";
|
|
362
|
+
private readonly apiKey;
|
|
363
|
+
private readonly model;
|
|
364
|
+
private readonly baseUrl;
|
|
365
|
+
private readonly version;
|
|
366
|
+
private readonly maxRetries;
|
|
367
|
+
private readonly defaultMaxTokens;
|
|
368
|
+
constructor(opts?: AnthropicOptions);
|
|
369
|
+
estimateTokens(messages: Message[]): number;
|
|
370
|
+
complete(req: CompletionRequest): Promise<CompletionResult>;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/** Run an agent to completion (or to a budget halt). The whole run is checkpointed every step. */
|
|
374
|
+
declare function run(goal: Goal | string, config: RunConfig): Promise<RunResult>;
|
|
375
|
+
/** Resume a run from its last checkpoint — same plan, same progress, continues where it died. */
|
|
376
|
+
declare function resume(runId: string, config: RunConfig): Promise<RunResult>;
|
|
377
|
+
/** Continue a RunContext you already hold in memory (advanced; same as resume without the store load). */
|
|
378
|
+
declare function runWith(ctx: RunContext, config: RunConfig): Promise<RunResult>;
|
|
379
|
+
|
|
380
|
+
export { type AnthropicOptions, AnthropicProvider, type BudgetState, type CompletionRequest, type CompletionResult, ContextManager, DEFAULT_CONFIG, type Digest, Executor, FileStore, type Goal, type LoopDeps, MemoryStore, type Message, type Observation, type Plan, type PlanStatus, Planner, type Progress, type Provider, type Reflection, Reflector, type Role, type RunConfig, type RunContext, type RunEvent, type RunResult, type RunStatus, type RunSummary, ScriptedProvider, type ScriptedResponse, type SerializableConfig, type Step, type StepStatus, type StopReason, type Store, type Tool, type ToolCall, ToolRegistry, type ToolResult, type ToolSchema, checkBudget, createContext, defineTool, estimateTokens, genId, nextPendingStep, reply, resolveSerializable, resume, run, runLoop, runWith, withRetry };
|