hankweave 0.5.7 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -11
- package/dist/base-process-manager.d.ts +30 -0
- package/dist/budget.d.ts +315 -0
- package/dist/checkpoint-git.d.ts +98 -0
- package/dist/claude-agent-sdk-manager.d.ts +144 -0
- package/dist/claude-log-parser.d.ts +63 -0
- package/dist/claude-runtime-extractor.d.ts +73 -0
- package/dist/codex-runtime-extractor.d.ts +107 -0
- package/dist/codon-runner.d.ts +278 -0
- package/dist/config-validation/model-validator.d.ts +16 -0
- package/dist/config-validation/sentinel.schema.d.ts +6967 -0
- package/dist/config.d.ts +40815 -0
- package/dist/cost-tracker.d.ts +72 -0
- package/dist/execution-planner.d.ts +62 -0
- package/dist/execution-thread.d.ts +71 -0
- package/dist/exports/schemas.d.ts +9 -0
- package/dist/exports/schemas.js +1019 -0
- package/dist/exports/types.d.ts +15 -0
- package/dist/exports/types.js +60 -0
- package/dist/file-resolver.d.ts +33 -0
- package/dist/index.js +380 -293
- package/dist/index.js.map +33 -29
- package/dist/llm/llm-provider-registry.d.ts +207 -0
- package/dist/llm/models-dev-schema.d.ts +679 -0
- package/dist/llm/provider-config.d.ts +30 -0
- package/dist/prompt-builder.d.ts +75 -0
- package/dist/prompt-frontmatter.d.ts +61 -0
- package/dist/replay-process-manager.d.ts +82 -0
- package/dist/runtime-extractor-base.d.ts +120 -0
- package/dist/schemas/event-schemas.d.ts +8389 -0
- package/dist/schemas/websocket-log-schemas.d.ts +4502 -0
- package/dist/shim-process-manager.d.ts +98 -0
- package/dist/shim-runtime-extractor.d.ts +51 -0
- package/dist/shims/codex/README.md +129 -0
- package/dist/shims/codex/THIRDPARTY.md +18 -0
- package/dist/shims/codex/VERSION +1 -0
- package/dist/shims/codex/common/package.json +24 -0
- package/dist/shims/codex/index.js +1154 -970
- package/dist/shims/codex/package.json +46 -0
- package/dist/shims/codex/tsup.config.ts +16 -0
- package/dist/shims/gemini/README.md +59 -0
- package/dist/shims/gemini/THIRDPARTY.md +32 -0
- package/dist/shims/gemini/VERSION +1 -0
- package/dist/shims/gemini/common/package.json +24 -0
- package/dist/shims/gemini/index.js +1359 -30
- package/dist/shims/gemini/package.json +37 -0
- package/dist/shims/opencode/README.md +82 -0
- package/dist/shims/opencode/THIRDPARTY.md +32 -0
- package/dist/shims/opencode/VERSION +1 -0
- package/dist/shims/opencode/common/package.json +24 -0
- package/dist/shims/opencode/index.js +1476 -0
- package/dist/shims/opencode/package.json +38 -0
- package/dist/shims/pi/README.md +87 -0
- package/dist/shims/pi/THIRDPARTY.md +24 -0
- package/dist/shims/pi/VERSION +1 -0
- package/dist/shims/pi/common/package.json +24 -0
- package/dist/shims/pi/index.js +249832 -0
- package/dist/shims/pi/package.json +53 -0
- package/dist/state-manager.d.ts +161 -0
- package/dist/state-transition-guards.d.ts +37 -0
- package/dist/telemetry/telemetry-types.d.ts +206 -0
- package/dist/typed-event-emitter.d.ts +57 -0
- package/dist/types/branded-types.d.ts +15 -0
- package/dist/types/budget-types.d.ts +82 -0
- package/dist/types/claude-session-schema.d.ts +2430 -0
- package/dist/types/error-types.d.ts +44 -0
- package/dist/types/input-ai-types.d.ts +1070 -0
- package/dist/types/llm-call-types.d.ts +3829 -0
- package/dist/types/sentinel-types.d.ts +66 -0
- package/dist/types/state-types.d.ts +1099 -0
- package/dist/types/tool-types.d.ts +86 -0
- package/dist/types/types.d.ts +367 -0
- package/dist/types/websocket-log-types.d.ts +7 -0
- package/dist/utils.d.ts +452 -0
- package/package.json +15 -2
- package/schemas/hank.schema.json +158 -3
- package/schemas/hankweave.schema.json +17 -1
- package/shims/codex/index.js +0 -1583
- package/shims/gemini/index.js +0 -31
package/README.md
CHANGED
|
@@ -34,7 +34,8 @@ Hankweave takes care of long-running executions, while:
|
|
|
34
34
|
- **Preflight checks** catch as many problems as possible before the first token is cast - API keys, model availability, file paths, rig configs, sentinel schemas.
|
|
35
35
|
- **Sentinels** monitor the event stream in real time to catch drift, laziness, and convention violations - functioning as error detectors, narrators, and real-time evals while keeping the core agent focused.
|
|
36
36
|
- **Looping** sequences repeat complex tasks, trading compute for reliability using Agentic Dynamic Programming.
|
|
37
|
-
- **
|
|
37
|
+
- **Budgets** let hank authors and operators independently express cost, time, and token limits. The runtime resolves competing preferences, distributes budgets across codons and loops, and enforces them in real time — including budget-driven variable loop termination.
|
|
38
|
+
- **Harness abstraction** lets hanks run on Claude Code, Codex, Gemini CLI, Pi, OpenCode, or any agent that exposes the right capabilities. Test in your preferred coding agent, then freeze and ship. Swap harnesses seamlessly, or build new ones using [Clausetta](./learning/examples/clausetta/), our hank for auto-generating shims.
|
|
38
39
|
- **Rigs** provide deterministic code loading and workspace setup, so the same codon runs the same way every time.
|
|
39
40
|
- **Checkpointing and rollbacks** create git snapshots at every codon boundary. When something fails, roll back to any point and try a different approach.
|
|
40
41
|
- **Structured event journal** traces every tool call and decision back to its source, making it possible to pinpoint where a 20-hour run went wrong.
|
|
@@ -61,7 +62,7 @@ Today, Hankweave is responsible for executing all reliable AI work at Southbridg
|
|
|
61
62
|
|
|
62
63
|
## How Hankweave Works
|
|
63
64
|
|
|
64
|
-
The Hankweave runtime is a **server** that orchestrates agent harnesses - Claude Code, Gemini CLI, and others - to execute hanks reliably. Written entirely in Typescript, Hankweave is designed to be a configurable bottom-of-the-stack runtime that can run almost anywhere. Here's the full picture:
|
|
65
|
+
The Hankweave runtime is a **server** that orchestrates agent harnesses - Claude Code, Codex, Gemini CLI, Pi, OpenCode, and others - to execute hanks reliably. Written entirely in Typescript, Hankweave is designed to be a configurable bottom-of-the-stack runtime that can run almost anywhere. Here's the full picture:
|
|
65
66
|
|
|
66
67
|
```
|
|
67
68
|
┌─────────────────────────────────┐
|
|
@@ -83,12 +84,12 @@ The Hankweave runtime is a **server** that orchestrates agent harnesses - Claude
|
|
|
83
84
|
EVENTS (WebSocket) ORCHESTRATES
|
|
84
85
|
│ │
|
|
85
86
|
▼ ▼
|
|
86
|
-
┌─────────────────────────┐
|
|
87
|
-
│ CONSUMERS │ │ Claude
|
|
88
|
-
│ │ │ Code
|
|
89
|
-
│ Basic CLI (included) │
|
|
90
|
-
│ Data pipelines │
|
|
91
|
-
│ CI systems │
|
|
87
|
+
┌─────────────────────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌──────────┐
|
|
88
|
+
│ CONSUMERS │ │ Claude │ │ Gemini │ │ Codex │ │ Pi │ │ OpenCode │
|
|
89
|
+
│ │ │ Code │ │ CLI │ │ │ │ │ │ │
|
|
90
|
+
│ Basic CLI (included) │ └───┬────┘ └───┬────┘ └───┬────┘ └───┬────┘ └────┬─────┘
|
|
91
|
+
│ Data pipelines │ │ │ │ │ │
|
|
92
|
+
│ CI systems │ └──────────┴──────────┴──────────┴───────────┘
|
|
92
93
|
│ Custom UIs │ │
|
|
93
94
|
│ │ ▼
|
|
94
95
|
└─────────────────────────┘ ┌─────────────────────────────────────────────┐
|
|
@@ -250,16 +251,16 @@ Files. One codon writes to the filesystem, the next reads from it. There's no im
|
|
|
250
251
|
|
|
251
252
|
It depends on the hank and the models you choose. A complex planning hank might cost $10-15 per run on frontier models. Simpler hanks can cost pennies.
|
|
252
253
|
|
|
253
|
-
The key insight is that as hanks mature, you can move to faster and cheaper models. Early iteration needs the best model you can get; once the prompts, rigs, and sentinels are dialed in, the structure does the heavy lifting and cheaper models perform well. Try running any hank with `-m haiku` to quickly prototype.
|
|
254
|
+
The key insight is that as hanks mature, you can move to faster and cheaper models. Early iteration needs the best model you can get; once the prompts, rigs, and sentinels are dialed in, the structure does the heavy lifting and cheaper models perform well. Try running any hank with `-m haiku` to quickly prototype, or use `--max-cost 0.50 -m haiku` for a budget-capped pilot run.
|
|
254
255
|
|
|
255
|
-
Hankweave includes per-codon [cost and token tracking](https://hankweave.southbridge.ai/reference/performance/)
|
|
256
|
+
Hankweave includes per-codon [cost and token tracking](https://hankweave.southbridge.ai/reference/performance/) and a [budget system](https://hankweave.southbridge.ai/concepts/budgets/) that lets authors allocate budgets across codons and loops, and operators cap runs with `--max-cost` and `--max-time`.
|
|
256
257
|
|
|
257
258
|
</details>
|
|
258
259
|
|
|
259
260
|
<details>
|
|
260
261
|
<summary><strong>What models and harnesses are supported?</strong></summary>
|
|
261
262
|
|
|
262
|
-
|
|
263
|
+
Five agent harnesses ship with Hankweave: **Claude Code** (via the Agents SDK, in-process), **Gemini CLI**, **Codex**, **Pi** (embedded — no external CLI install needed), and **OpenCode** (all via shims). You can mix harnesses in the same hank — use Claude for targeted coding, Gemini for writing, Codex for planning. And you can build new ones: if an agent exposes the required capabilities, you can run the polymorphic hank, plug in information about the agent you want supported, and Hankweave - using a hank - will build a shim to connect it.
|
|
263
264
|
|
|
264
265
|
</details>
|
|
265
266
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { ClaudeLogParser } from "./claude-log-parser.js";
|
|
2
|
+
import { type ProcessEvents, TypedEventEmitter } from "./typed-event-emitter.js";
|
|
3
|
+
import type { Logger } from "./utils.js";
|
|
4
|
+
/**
|
|
5
|
+
* Base class for process managers that provides shared context-exceeded detection.
|
|
6
|
+
*
|
|
7
|
+
* All process managers (ClaudeAgentSDKManager, ShimProcessManager, ReplayProcessManager)
|
|
8
|
+
* need to detect context-exceeded conditions from parsed log messages before emitting
|
|
9
|
+
* the exit event. This base class centralizes that logic.
|
|
10
|
+
*
|
|
11
|
+
* Note: No abstract method signatures — the three managers have different spawn()
|
|
12
|
+
* signatures, and CodonRunner uses instanceof narrowing. The union type in CodonRunner
|
|
13
|
+
* remains unchanged.
|
|
14
|
+
*/
|
|
15
|
+
export declare class BaseProcessManager extends TypedEventEmitter<ProcessEvents> {
|
|
16
|
+
protected logger: Logger;
|
|
17
|
+
protected logParser: ClaudeLogParser;
|
|
18
|
+
constructor(logger: Logger, logParser: ClaudeLogParser);
|
|
19
|
+
/**
|
|
20
|
+
* Flush the log parser and scan all parsed messages for context-exceeded indicators.
|
|
21
|
+
* Detects both synthetic assistant messages (output token exceeded) and
|
|
22
|
+
* result messages with is_error.
|
|
23
|
+
*/
|
|
24
|
+
protected detectContextExceeded(): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Emit the "exit" event with automatic context-exceeded detection.
|
|
27
|
+
* Ensures every exit path consistently detects context exhaustion.
|
|
28
|
+
*/
|
|
29
|
+
protected emitExit(exitCode: number): void;
|
|
30
|
+
}
|
package/dist/budget.d.ts
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified budget module.
|
|
3
|
+
*
|
|
4
|
+
* Contains:
|
|
5
|
+
* - BudgetTracker: per-codon limit enforcement (cost, duration, output tokens)
|
|
6
|
+
* - resolveCodonBudget: allocation logic (shared, proportional, proportional-strict)
|
|
7
|
+
* - Budget: run-level facade that ties everything together
|
|
8
|
+
*
|
|
9
|
+
* Shared types (BudgetLimits, BudgetExceededInfo, etc.) live in types/budget-types.ts
|
|
10
|
+
* and are re-exported here for convenience.
|
|
11
|
+
*/
|
|
12
|
+
import type { CostTrackerEvents } from "./cost-tracker.js";
|
|
13
|
+
import type { ExecutionCodonEntry } from "./execution-planner.js";
|
|
14
|
+
import { TypedEventEmitter } from "./typed-event-emitter.js";
|
|
15
|
+
import type { CodonId } from "./types/branded-types.js";
|
|
16
|
+
import { type AllocationMode, type BudgetExceededInfo, BudgetLimits, type BudgetSummaryData, type OnExceededPolicy } from "./types/budget-types.js";
|
|
17
|
+
import { type Run } from "./types/state-types.js";
|
|
18
|
+
import type { Logger } from "./utils.js";
|
|
19
|
+
export type { AllocationMode, BudgetCurrency, BudgetExceededData, BudgetExceededInfo, BudgetLimitsOptions, BudgetSummaryData, CodonBudgetSummaryRow, OnExceededPolicy, } from "./types/budget-types.js";
|
|
20
|
+
export { BUDGET_CURRENCIES, BudgetLimits } from "./types/budget-types.js";
|
|
21
|
+
export interface BudgetTrackerEvents extends Record<string, unknown[]> {
|
|
22
|
+
exceeded: [info: BudgetExceededInfo];
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Pure logic class that tracks budget consumption against limits.
|
|
26
|
+
* Emits `exceeded` event (at most once) when any currency goes over its limit.
|
|
27
|
+
* No I/O, no runtime dependencies — designed for unit testing.
|
|
28
|
+
*/
|
|
29
|
+
export declare class BudgetTracker extends TypedEventEmitter<BudgetTrackerEvents> {
|
|
30
|
+
private readonly limits;
|
|
31
|
+
private costUsed;
|
|
32
|
+
private outputTokensUsed;
|
|
33
|
+
private contextTokensHighWaterMark;
|
|
34
|
+
private readonly startTime;
|
|
35
|
+
private exceededInfo;
|
|
36
|
+
constructor(limits: BudgetLimits);
|
|
37
|
+
/** Accumulate cost and check against limit. */
|
|
38
|
+
addCost(delta: number): void;
|
|
39
|
+
/** Accumulate output tokens and check against limit. */
|
|
40
|
+
addOutputTokens(delta: number): void;
|
|
41
|
+
/**
|
|
42
|
+
* Update the context token high-water mark and check against limit.
|
|
43
|
+
* Called with per-turn inputTokens + outputTokens (NOT cumulative sum).
|
|
44
|
+
* Tracks the peak context window fill level across all turns.
|
|
45
|
+
*/
|
|
46
|
+
updateContextTokens(inputTokensThisTurn: number, outputTokensThisTurn: number): void;
|
|
47
|
+
/** Check elapsed wall-clock time against duration limit. */
|
|
48
|
+
checkTime(): void;
|
|
49
|
+
/** Has any budget limit been exceeded? */
|
|
50
|
+
isExceeded(): boolean;
|
|
51
|
+
/** Get info about which limit was exceeded, if any. */
|
|
52
|
+
getExceededInfo(): BudgetExceededInfo | undefined;
|
|
53
|
+
/** Remaining cost budget, or undefined if no cost limit. */
|
|
54
|
+
getRemainingCost(): number | undefined;
|
|
55
|
+
/** Current consumption state. */
|
|
56
|
+
getState(): {
|
|
57
|
+
costUsed: number;
|
|
58
|
+
outputTokensUsed: number;
|
|
59
|
+
contextTokensHighWaterMark: number;
|
|
60
|
+
elapsedSeconds: number;
|
|
61
|
+
};
|
|
62
|
+
/** Whether any limits are configured. */
|
|
63
|
+
hasLimits(): boolean;
|
|
64
|
+
private setExceeded;
|
|
65
|
+
}
|
|
66
|
+
export interface CodonBudgetConfig {
|
|
67
|
+
maxDollars?: number;
|
|
68
|
+
maxTimeSeconds?: number;
|
|
69
|
+
maxOutputTokens?: number;
|
|
70
|
+
maxContextTokens?: number;
|
|
71
|
+
onExceeded?: OnExceededPolicy;
|
|
72
|
+
}
|
|
73
|
+
export type RemainingCodonInfo = Pick<CodonBudgetConfig, "maxDollars"> & {
|
|
74
|
+
/** Config-level ID, used to look up in parent's shares map. */
|
|
75
|
+
codonConfigId?: string;
|
|
76
|
+
};
|
|
77
|
+
export interface ResolveCodonBudgetParams {
|
|
78
|
+
codon: CodonBudgetConfig;
|
|
79
|
+
/** Codons that haven't started yet (excluding the current codon). */
|
|
80
|
+
remainingCodons: RemainingCodonInfo[];
|
|
81
|
+
/** Effective global budget (min of runtime and hank maxDollars). */
|
|
82
|
+
globalMaxDollars?: number;
|
|
83
|
+
/** Total cost already spent across completed/failed codons. */
|
|
84
|
+
alreadySpent: number;
|
|
85
|
+
/** Allocation mode from the parent container's budget config. Default: "shared". */
|
|
86
|
+
allocationMode?: AllocationMode;
|
|
87
|
+
/** Share map from parent's budget config: child config ID → fraction (0-1). */
|
|
88
|
+
shares?: Record<string, number>;
|
|
89
|
+
/** The codon's config-level ID (used to look up in shares map). */
|
|
90
|
+
codonConfigId?: string;
|
|
91
|
+
/**
|
|
92
|
+
* For proportional-strict mode: the amount of budget "consumed" by completed codons
|
|
93
|
+
* using strict accounting (max of share amount and actual spend per codon).
|
|
94
|
+
* If undefined, falls back to alreadySpent.
|
|
95
|
+
*/
|
|
96
|
+
strictAlreadyConsumed?: number;
|
|
97
|
+
/**
|
|
98
|
+
* Remaining wall-clock seconds from the hank-level time budget.
|
|
99
|
+
* When set, the codon's effective duration = min(codon.maxTimeSeconds, remainingTimeSeconds).
|
|
100
|
+
*/
|
|
101
|
+
remainingTimeSeconds?: number;
|
|
102
|
+
/** Container-level (hank/loop) onExceeded default. Codon-level overrides this. */
|
|
103
|
+
containerOnExceeded?: OnExceededPolicy;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Resolves effective budget limits for a single codon execution.
|
|
107
|
+
* Pure function — no I/O, no side effects.
|
|
108
|
+
*
|
|
109
|
+
* When a global budget exists, every codon gets a cost limit.
|
|
110
|
+
* Allocation mode determines how the budget is distributed among children.
|
|
111
|
+
*/
|
|
112
|
+
export declare function resolveCodonBudget(params: ResolveCodonBudgetParams): BudgetLimits;
|
|
113
|
+
/**
|
|
114
|
+
* Budget configuration from the hank-level budget config.
|
|
115
|
+
*/
|
|
116
|
+
export interface BudgetConfig {
|
|
117
|
+
maxDollars?: number;
|
|
118
|
+
maxTimeSeconds?: number;
|
|
119
|
+
allocationMode?: AllocationMode;
|
|
120
|
+
shares?: Record<string, number>;
|
|
121
|
+
onExceeded?: OnExceededPolicy;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Interface for reporting budget telemetry events.
|
|
125
|
+
* Implemented by TelemetryCollector; injected into Budget to keep it decoupled.
|
|
126
|
+
*/
|
|
127
|
+
export interface BudgetTelemetryReporter {
|
|
128
|
+
reportBudgetSet(data: {
|
|
129
|
+
codonId: string;
|
|
130
|
+
limits: BudgetLimits;
|
|
131
|
+
}): void;
|
|
132
|
+
reportBudgetExceeded(data: {
|
|
133
|
+
codonId: string;
|
|
134
|
+
info: BudgetExceededInfo;
|
|
135
|
+
}): void;
|
|
136
|
+
}
|
|
137
|
+
export interface BudgetParams {
|
|
138
|
+
config: BudgetConfig;
|
|
139
|
+
executionPlan: ExecutionCodonEntry[];
|
|
140
|
+
logger: Logger;
|
|
141
|
+
/** For continuation runs: prior runs to hydrate spending from. */
|
|
142
|
+
priorRuns?: {
|
|
143
|
+
runs: Run[];
|
|
144
|
+
currentRunId: string;
|
|
145
|
+
};
|
|
146
|
+
/** Optional telemetry reporter for budget events. */
|
|
147
|
+
telemetry?: BudgetTelemetryReporter;
|
|
148
|
+
}
|
|
149
|
+
export interface BudgetEvents extends Record<string, unknown[]> {
|
|
150
|
+
exceeded: [data: {
|
|
151
|
+
codonId: string;
|
|
152
|
+
info: BudgetExceededInfo;
|
|
153
|
+
}];
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Run-level budget facade.
|
|
157
|
+
*
|
|
158
|
+
* Encapsulates allocation logic (resolveCodonBudget), per-codon limit
|
|
159
|
+
* enforcement (BudgetTracker), run-level spending tracking, and retry
|
|
160
|
+
* cost accumulation behind a single interface.
|
|
161
|
+
*
|
|
162
|
+
* Created once per run by HankweaveRuntime. Both the runtime and
|
|
163
|
+
* CodonRunner interact with this class for all budget concerns.
|
|
164
|
+
*
|
|
165
|
+
* CostTracker and StateManager remain in CodonRunner — this class
|
|
166
|
+
* subscribes to CostTracker events for automatic limit enforcement.
|
|
167
|
+
*/
|
|
168
|
+
export declare class Budget extends TypedEventEmitter<BudgetEvents> {
|
|
169
|
+
private readonly budgetConfig;
|
|
170
|
+
private readonly logger;
|
|
171
|
+
private readonly telemetry?;
|
|
172
|
+
private executionPlan;
|
|
173
|
+
private hankStartTime;
|
|
174
|
+
private activeTrackers;
|
|
175
|
+
private activeTimers;
|
|
176
|
+
private resolvedLimits;
|
|
177
|
+
private exceededSnapshots;
|
|
178
|
+
private completedSpending;
|
|
179
|
+
private retryAccumulated;
|
|
180
|
+
private loopEffectiveBudgets;
|
|
181
|
+
private loopStartTimes;
|
|
182
|
+
private priorCodonSnapshots?;
|
|
183
|
+
constructor(params: BudgetParams);
|
|
184
|
+
/**
|
|
185
|
+
* Log a human-readable budget summary at startup so the server log captures
|
|
186
|
+
* the effective ceiling, allocation mode, and spending state.
|
|
187
|
+
*/
|
|
188
|
+
private logStartupSummary;
|
|
189
|
+
/**
|
|
190
|
+
* Hydrate spending from prior runs on resume.
|
|
191
|
+
* Called after construction when this Budget belongs to a continuation run,
|
|
192
|
+
* so that allocation calculations account for money already spent.
|
|
193
|
+
*/
|
|
194
|
+
seedCompletedSpending(priorSpending: Map<string, number>): void;
|
|
195
|
+
/**
|
|
196
|
+
* Compute and hydrate prior spending from historical runs.
|
|
197
|
+
* Iterates over all runs except the current one, aggregates codon costs,
|
|
198
|
+
* and seeds the completed spending map.
|
|
199
|
+
*/
|
|
200
|
+
hydrateFromPriorRuns(runs: Run[], currentRunId: string): void;
|
|
201
|
+
/**
|
|
202
|
+
* Compute elapsed wall-clock time from prior runs and offset hankStartTime
|
|
203
|
+
* so that time budgets account for time already consumed.
|
|
204
|
+
*/
|
|
205
|
+
hydrateTimeFromPriorRuns(runs: Run[], currentRunId: string): void;
|
|
206
|
+
/**
|
|
207
|
+
* Initialize budget tracking for a codon. Resolves effective limits,
|
|
208
|
+
* creates a BudgetTracker, and subscribes to CostTracker events for
|
|
209
|
+
* automatic limit enforcement.
|
|
210
|
+
*/
|
|
211
|
+
trackCodon(codonId: CodonId | string, codon: {
|
|
212
|
+
id: string;
|
|
213
|
+
budget?: {
|
|
214
|
+
maxDollars?: number;
|
|
215
|
+
maxTimeSeconds?: number;
|
|
216
|
+
maxOutputTokens?: number;
|
|
217
|
+
maxContextTokens?: number;
|
|
218
|
+
onExceeded?: OnExceededPolicy;
|
|
219
|
+
};
|
|
220
|
+
}, costTracker: TypedEventEmitter<CostTrackerEvents>): void;
|
|
221
|
+
/**
|
|
222
|
+
* Record that a codon completed successfully. Updates spending records
|
|
223
|
+
* for subsequent allocation calculations.
|
|
224
|
+
*/
|
|
225
|
+
completeCodon(codonId: CodonId | string, finalCost: number): void;
|
|
226
|
+
/**
|
|
227
|
+
* Record that a codon failed. Updates spending records.
|
|
228
|
+
*/
|
|
229
|
+
failCodon(codonId: CodonId | string, partialCost: number): void;
|
|
230
|
+
/**
|
|
231
|
+
* Record that a codon was skipped (0 cost).
|
|
232
|
+
*/
|
|
233
|
+
skipCodon(codonId: CodonId | string): void;
|
|
234
|
+
private clearWatchdog;
|
|
235
|
+
/**
|
|
236
|
+
* Has this codon's budget been exceeded?
|
|
237
|
+
*/
|
|
238
|
+
isExceeded(codonId: string): boolean;
|
|
239
|
+
/**
|
|
240
|
+
* Get details about the budget breach, if any.
|
|
241
|
+
*/
|
|
242
|
+
getExceededInfo(codonId: string): BudgetExceededInfo | undefined;
|
|
243
|
+
/**
|
|
244
|
+
* Get the effective budget limits resolved for a codon.
|
|
245
|
+
*/
|
|
246
|
+
getEffectiveLimits(codonId: string): BudgetLimits;
|
|
247
|
+
/**
|
|
248
|
+
* Total cost spent across all completed/failed codons in this run.
|
|
249
|
+
*/
|
|
250
|
+
getTotalSpent(): number;
|
|
251
|
+
/**
|
|
252
|
+
* Get exceeded snapshots (read-only) for all codons that hit their budget.
|
|
253
|
+
*/
|
|
254
|
+
getExceededSnapshots(): ReadonlyMap<string, BudgetExceededInfo>;
|
|
255
|
+
/**
|
|
256
|
+
* Build the end-of-run budget summary from internal state + run codons.
|
|
257
|
+
* Returns null if no budget limits were configured.
|
|
258
|
+
*/
|
|
259
|
+
getBudgetSummary(currentRun: Run): BudgetSummaryData | null;
|
|
260
|
+
/**
|
|
261
|
+
* Update the execution plan (e.g., after loop expansion adds new entries).
|
|
262
|
+
* Must be called before trackCodon() for newly expanded codons.
|
|
263
|
+
*/
|
|
264
|
+
updateExecutionPlan(plan: ExecutionCodonEntry[]): void;
|
|
265
|
+
/**
|
|
266
|
+
* Accumulate cost from a failed retry attempt.
|
|
267
|
+
*/
|
|
268
|
+
accumulateRetryCost(codonId: string, cost: number): void;
|
|
269
|
+
/**
|
|
270
|
+
* Get accumulated retry cost and clear it. Returns 0 if none.
|
|
271
|
+
*/
|
|
272
|
+
getAndClearRetryCost(codonId: string): number;
|
|
273
|
+
/**
|
|
274
|
+
* Check whether a loop's budget has been exhausted (cost or time).
|
|
275
|
+
*/
|
|
276
|
+
isLoopBudgetExceeded(loopId: string): boolean;
|
|
277
|
+
/**
|
|
278
|
+
* Total cost spent across all completed/failed codons within a specific loop.
|
|
279
|
+
*/
|
|
280
|
+
getLoopSpent(loopId: string): number;
|
|
281
|
+
/**
|
|
282
|
+
* Compute total elapsed wall-clock time for prior codons within a specific loop.
|
|
283
|
+
* Uses codon startTime/endTime from the execution plan's completed spending entries.
|
|
284
|
+
*/
|
|
285
|
+
private computePriorLoopElapsedMs;
|
|
286
|
+
/**
|
|
287
|
+
* Find a codon by ID in the prior runs data stored during hydration.
|
|
288
|
+
* Returns undefined if not found or if no prior runs exist.
|
|
289
|
+
*/
|
|
290
|
+
private findCodonInPriorRuns;
|
|
291
|
+
/**
|
|
292
|
+
* Resolve budget limits for a codon scoped to the hank level (no loop).
|
|
293
|
+
*/
|
|
294
|
+
private resolveHankScopedLimits;
|
|
295
|
+
/**
|
|
296
|
+
* Resolve budget limits for a codon scoped to its parent loop's budget.
|
|
297
|
+
*/
|
|
298
|
+
private resolveLoopScopedLimits;
|
|
299
|
+
/**
|
|
300
|
+
* Compute the loop's effective budget by capping it against the hank's allocation.
|
|
301
|
+
*/
|
|
302
|
+
private resolveAndStoreLoopEffectiveBudget;
|
|
303
|
+
/**
|
|
304
|
+
* Compute how much the hank allocated to a specific loop, based on the hank's allocation mode.
|
|
305
|
+
*/
|
|
306
|
+
private computeHankAllocationForLoop;
|
|
307
|
+
/**
|
|
308
|
+
* Compute strict consumed amount for proportional-strict mode (hank-level scope).
|
|
309
|
+
*/
|
|
310
|
+
private computeStrictConsumed;
|
|
311
|
+
/**
|
|
312
|
+
* Compute strict consumed amount for proportional-strict mode within a loop.
|
|
313
|
+
*/
|
|
314
|
+
private computeStrictConsumedInLoop;
|
|
315
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { type Logger } from "./utils.js";
|
|
2
|
+
/**
|
|
3
|
+
* Git operations for the checkpoint system.
|
|
4
|
+
* Handles the shadow git repository in .hankweave/checkpoints.
|
|
5
|
+
*/
|
|
6
|
+
export declare class CheckpointGit {
|
|
7
|
+
private executionPath;
|
|
8
|
+
private agentRootPath;
|
|
9
|
+
private checkpointPath;
|
|
10
|
+
private git;
|
|
11
|
+
private logger;
|
|
12
|
+
private trackedPatterns;
|
|
13
|
+
constructor(executionPath: string, agentRootPath: string, logger: Logger);
|
|
14
|
+
/**
|
|
15
|
+
* Migrate backup directories from legacy .git to .hankweavecheckpoints.
|
|
16
|
+
* Called conditionally when the main checkpoint needed migration.
|
|
17
|
+
*/
|
|
18
|
+
private migrateBackupDirectories;
|
|
19
|
+
/**
|
|
20
|
+
* Initialize the shadow git repository
|
|
21
|
+
* @returns The initial commit SHA (either from new repo creation or existing repo HEAD)
|
|
22
|
+
*/
|
|
23
|
+
initialize(): Promise<string | undefined>;
|
|
24
|
+
/**
|
|
25
|
+
* Check if repository is initialized
|
|
26
|
+
*/
|
|
27
|
+
isInitialized(): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Get the current branch name
|
|
30
|
+
* @returns The current branch name or undefined if not initialized
|
|
31
|
+
*/
|
|
32
|
+
getCurrentBranch(): Promise<string | undefined>;
|
|
33
|
+
/**
|
|
34
|
+
* Add patterns to track
|
|
35
|
+
*/
|
|
36
|
+
addPatterns(patterns: string[]): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Clear all tracked patterns
|
|
39
|
+
*/
|
|
40
|
+
clearPatterns(): void;
|
|
41
|
+
/**
|
|
42
|
+
* Get resolved files for all tracked patterns
|
|
43
|
+
*/
|
|
44
|
+
private getCheckpointedFiles;
|
|
45
|
+
/**
|
|
46
|
+
* Create a checkpoint commit. If branch is specified, switch to that branch for the commit and then switch back to the original branch.
|
|
47
|
+
* @param message Commit message for the checkpoint
|
|
48
|
+
* @param options Optional parameters, including branch name
|
|
49
|
+
* @returns The commit SHA of the new checkpoint or null if commit failed
|
|
50
|
+
*/
|
|
51
|
+
commit(message: string, options?: {
|
|
52
|
+
branch?: string;
|
|
53
|
+
}): Promise<string | null>;
|
|
54
|
+
/**
|
|
55
|
+
* Get the checkpoint repository path
|
|
56
|
+
*/
|
|
57
|
+
getPath(): string;
|
|
58
|
+
/**
|
|
59
|
+
* Switch to a specific branch, creating it if it doesn't exist
|
|
60
|
+
*/
|
|
61
|
+
switchToBranch(branchName: string): Promise<void>;
|
|
62
|
+
/**
|
|
63
|
+
* Create a new branch from a specific SHA and switch to it.
|
|
64
|
+
* This preserves the old branch's history (unlike git reset --hard).
|
|
65
|
+
* @param branchName Name for the new branch
|
|
66
|
+
* @param sha The commit SHA to start the branch from
|
|
67
|
+
*/
|
|
68
|
+
createBranchFromSha(branchName: string, sha: string): Promise<void>;
|
|
69
|
+
/**
|
|
70
|
+
* Get all checkpoint SHAs from the repository
|
|
71
|
+
* @returns Set of all commit SHAs in the repository
|
|
72
|
+
*/
|
|
73
|
+
getAllCheckpointShas(): Promise<Set<string>>;
|
|
74
|
+
/**
|
|
75
|
+
* Check if a specific commit SHA exists in the repository
|
|
76
|
+
* @param sha Full or partial SHA to check
|
|
77
|
+
* @returns true if the commit exists, false otherwise
|
|
78
|
+
*/
|
|
79
|
+
commitExists(sha: string): Promise<boolean>;
|
|
80
|
+
/**
|
|
81
|
+
* Get all checkpoints with detailed information, ordered by time (newest first)
|
|
82
|
+
* @returns Array of checkpoint information ordered by timestamp
|
|
83
|
+
*/
|
|
84
|
+
getAllCheckpoints(): Promise<Array<{
|
|
85
|
+
sha: string;
|
|
86
|
+
message: string;
|
|
87
|
+
timestamp: string;
|
|
88
|
+
branch: string;
|
|
89
|
+
}>>;
|
|
90
|
+
/**
|
|
91
|
+
* Reset to a specific checkpoint.
|
|
92
|
+
*
|
|
93
|
+
* IMPORTANT: This uses `git checkout` to a detached HEAD state instead of
|
|
94
|
+
* `git reset --hard`. This preserves the old branch's history so you can
|
|
95
|
+
* still access old checkpoints from previous timelines.
|
|
96
|
+
*/
|
|
97
|
+
resetToCheckpoint(sha: string): Promise<void>;
|
|
98
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { BaseProcessManager } from "./base-process-manager.js";
|
|
2
|
+
import type { ClaudeLogParser } from "./claude-log-parser.js";
|
|
3
|
+
import type { Codon, ShimSelfTestResult } from "./types/types.js";
|
|
4
|
+
import type { Logger } from "./utils.js";
|
|
5
|
+
/**
|
|
6
|
+
* Error thrown when Claude executable cannot be found.
|
|
7
|
+
* This allows callers to handle this specific case.
|
|
8
|
+
*/
|
|
9
|
+
export declare class ClaudeExecutableNotFoundError extends Error {
|
|
10
|
+
constructor(message: string);
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Detect an installed Claude executable.
|
|
14
|
+
* Checks common installation locations and falls back to `which claude`.
|
|
15
|
+
*
|
|
16
|
+
* @returns Path to Claude executable, or null if not found
|
|
17
|
+
*/
|
|
18
|
+
export declare function detectClaudeExecutable(): string | null;
|
|
19
|
+
/**
|
|
20
|
+
* Manages Claude Agent SDK lifecycle, mimicking the ClaudeProcessManager API.
|
|
21
|
+
* Handles log stream creation and converts SDK messages to JSONL format.
|
|
22
|
+
*/
|
|
23
|
+
export declare class ClaudeAgentSDKManager extends BaseProcessManager {
|
|
24
|
+
private executionPath;
|
|
25
|
+
private agentRootPath;
|
|
26
|
+
private anthropicBaseUrl?;
|
|
27
|
+
private globalSystemPrompt?;
|
|
28
|
+
private defaultShimIdleTimeout?;
|
|
29
|
+
private abortController;
|
|
30
|
+
private logStream;
|
|
31
|
+
private killed;
|
|
32
|
+
private sessionId;
|
|
33
|
+
private syntheticPid;
|
|
34
|
+
private queryPromise;
|
|
35
|
+
private promptBuilder;
|
|
36
|
+
constructor(executionPath: string, agentRootPath: string, logger: Logger, logParser: ClaudeLogParser, anthropicBaseUrl?: string | undefined, globalSystemPrompt?: string | null | undefined, defaultShimIdleTimeout?: number | undefined);
|
|
37
|
+
/** Frontmatter metadata from the prompt file (if any) */
|
|
38
|
+
get promptFrontmatter(): import("./prompt-frontmatter.js").PromptFrontmatter | undefined;
|
|
39
|
+
/**
|
|
40
|
+
* Ensure Claude SDK files are available, extracting if necessary.
|
|
41
|
+
*
|
|
42
|
+
* This static method should be called at application startup before creating
|
|
43
|
+
* any ClaudeAgentSDKManager instances. It handles:
|
|
44
|
+
* - Detecting if running from compiled executable or source
|
|
45
|
+
* - Extracting embedded SDK files for compiled mode
|
|
46
|
+
* - Verifying extracted files exist
|
|
47
|
+
* - Setting CLAUDE_PATH_TO_CLAUDE_EXECUTABLE environment variable
|
|
48
|
+
*
|
|
49
|
+
* @returns Path to cli.js if compiled (and sets env var), or null if running from source
|
|
50
|
+
* @throws Error if extraction fails or extracted file doesn't exist
|
|
51
|
+
*/
|
|
52
|
+
static ensureSdkAvailable(): Promise<{
|
|
53
|
+
path: string | null;
|
|
54
|
+
version: string;
|
|
55
|
+
cached: boolean;
|
|
56
|
+
}>;
|
|
57
|
+
/**
|
|
58
|
+
* Spawn a Claude Agent SDK session for the given codon configuration.
|
|
59
|
+
* Sets up logging, environment, and message handling.
|
|
60
|
+
*
|
|
61
|
+
* This unified method handles both normal codon execution and exhaustion extensions.
|
|
62
|
+
* From Claude's perspective, both are identical: resume a session with a new prompt.
|
|
63
|
+
* The difference is only where the prompt comes from.
|
|
64
|
+
*
|
|
65
|
+
* @param codon - Codon configuration (not Loop - loops must be expanded first)
|
|
66
|
+
* @param sessionToResume - Session ID to resume (if any). When provided with exhaustionPrompt,
|
|
67
|
+
* always resumes regardless of codon.continuationMode.
|
|
68
|
+
* @param options - Optional spawn configuration
|
|
69
|
+
* @param options.logPath - Custom log file path (defaults to .hankweave/logs/)
|
|
70
|
+
* @param options.exhaustionPrompt - If provided, activates exhaustion mode: uses this prompt
|
|
71
|
+
* instead of codon config, appends to log, forces resume.
|
|
72
|
+
*/
|
|
73
|
+
spawn(codon: Codon, sessionToResume: string | null, options?: {
|
|
74
|
+
logPath?: string;
|
|
75
|
+
exhaustionPrompt?: string;
|
|
76
|
+
}): Promise<string>;
|
|
77
|
+
/**
|
|
78
|
+
* Build SDK options from codon configuration.
|
|
79
|
+
*/
|
|
80
|
+
private buildSDKOptions;
|
|
81
|
+
/**
|
|
82
|
+
* Run the query and process messages.
|
|
83
|
+
*/
|
|
84
|
+
private runQuery;
|
|
85
|
+
/**
|
|
86
|
+
* Extract detailed error information from the error and log file.
|
|
87
|
+
*/
|
|
88
|
+
private extractErrorDetails;
|
|
89
|
+
/**
|
|
90
|
+
* Convert SDK message to JSONL format matching claude-session-schema.
|
|
91
|
+
* SDK messages already have the correct structure, so we mostly just filter out
|
|
92
|
+
* unwanted message types and handle edge cases.
|
|
93
|
+
*/
|
|
94
|
+
private convertSDKMessageToJSONL;
|
|
95
|
+
/**
|
|
96
|
+
* Write a message to the log file.
|
|
97
|
+
*/
|
|
98
|
+
private writeToLog;
|
|
99
|
+
/**
|
|
100
|
+
* Kill the Claude Agent SDK session gracefully.
|
|
101
|
+
* Aborts the query (which triggers SIGTERM on the child via the SDK's abort handler),
|
|
102
|
+
* then waits up to PROCESS_KILL_GRACE_MS for the query to actually complete.
|
|
103
|
+
*/
|
|
104
|
+
kill(signal?: NodeJS.Signals): Promise<void>;
|
|
105
|
+
/**
|
|
106
|
+
* Force-kill the Claude Agent SDK session immediately.
|
|
107
|
+
* Sends abort (SIGTERM via SDK) without waiting for the child to exit.
|
|
108
|
+
* Used by forceShutdown() when the user presses q/Ctrl+C a second time.
|
|
109
|
+
*
|
|
110
|
+
* Note: We cannot send SIGKILL to the SDK's child process because the SDK
|
|
111
|
+
* does not expose the child PID. The abort sends SIGTERM; when our process
|
|
112
|
+
* exits immediately after, the SDK's process.on("exit") handler fires another
|
|
113
|
+
* SIGTERM as a belt-and-suspenders measure.
|
|
114
|
+
*/
|
|
115
|
+
forceKill(): Promise<void>;
|
|
116
|
+
/**
|
|
117
|
+
* Clean up resources.
|
|
118
|
+
*/
|
|
119
|
+
private cleanup;
|
|
120
|
+
/**
|
|
121
|
+
* Check if session is running.
|
|
122
|
+
*/
|
|
123
|
+
isRunning(): boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Get session ID.
|
|
126
|
+
*/
|
|
127
|
+
getSessionId(): string | undefined;
|
|
128
|
+
/**
|
|
129
|
+
* Get synthetic PID (for compatibility with ClaudeProcessManager API).
|
|
130
|
+
* Note: This is not a real process ID since SDK runs in-process.
|
|
131
|
+
*/
|
|
132
|
+
getPid(): number | undefined;
|
|
133
|
+
/**
|
|
134
|
+
* Close log stream explicitly (for external cleanup).
|
|
135
|
+
*/
|
|
136
|
+
closeLogStream(): Promise<void>;
|
|
137
|
+
/**
|
|
138
|
+
* Run self-test to verify Claude Agent SDK environment setup.
|
|
139
|
+
* Checks for API authentication (API key or OAuth token) and SDK availability.
|
|
140
|
+
*
|
|
141
|
+
* @returns Promise resolving to self-test results
|
|
142
|
+
*/
|
|
143
|
+
runSelfTest(): Promise<ShimSelfTestResult>;
|
|
144
|
+
}
|