@oscharko-dev/keiko-harness 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/.tsbuildinfo +1 -0
  2. package/dist/adapters.d.ts +26 -0
  3. package/dist/adapters.d.ts.map +1 -0
  4. package/dist/adapters.js +48 -0
  5. package/dist/context.d.ts +32 -0
  6. package/dist/context.d.ts.map +1 -0
  7. package/dist/context.js +21 -0
  8. package/dist/emitter.d.ts +16 -0
  9. package/dist/emitter.d.ts.map +1 -0
  10. package/dist/emitter.js +72 -0
  11. package/dist/errors.d.ts +3 -0
  12. package/dist/errors.d.ts.map +1 -0
  13. package/dist/errors.js +4 -0
  14. package/dist/executor.d.ts +4 -0
  15. package/dist/executor.d.ts.map +1 -0
  16. package/dist/executor.js +211 -0
  17. package/dist/fingerprint.d.ts +8 -0
  18. package/dist/fingerprint.d.ts.map +1 -0
  19. package/dist/fingerprint.js +28 -0
  20. package/dist/index.d.ts +11 -0
  21. package/dist/index.d.ts.map +1 -0
  22. package/dist/index.js +14 -0
  23. package/dist/loop.d.ts +4 -0
  24. package/dist/loop.d.ts.map +1 -0
  25. package/dist/loop.js +159 -0
  26. package/dist/patcher.d.ts +5 -0
  27. package/dist/patcher.d.ts.map +1 -0
  28. package/dist/patcher.js +49 -0
  29. package/dist/planner.d.ts +4 -0
  30. package/dist/planner.d.ts.map +1 -0
  31. package/dist/planner.js +21 -0
  32. package/dist/ports.d.ts +28 -0
  33. package/dist/ports.d.ts.map +1 -0
  34. package/dist/ports.js +8 -0
  35. package/dist/session.d.ts +27 -0
  36. package/dist/session.d.ts.map +1 -0
  37. package/dist/session.js +119 -0
  38. package/dist/sinks.d.ts +31 -0
  39. package/dist/sinks.d.ts.map +1 -0
  40. package/dist/sinks.js +72 -0
  41. package/dist/tasks/explain-plan.d.ts +4 -0
  42. package/dist/tasks/explain-plan.d.ts.map +1 -0
  43. package/dist/tasks/explain-plan.js +29 -0
  44. package/dist/tasks/generate-unit-tests.d.ts +4 -0
  45. package/dist/tasks/generate-unit-tests.d.ts.map +1 -0
  46. package/dist/tasks/generate-unit-tests.js +34 -0
  47. package/dist/tasks/investigate-bug.d.ts +4 -0
  48. package/dist/tasks/investigate-bug.d.ts.map +1 -0
  49. package/dist/tasks/investigate-bug.js +31 -0
  50. package/dist/tasks/policy.d.ts +12 -0
  51. package/dist/tasks/policy.d.ts.map +1 -0
  52. package/dist/tasks/policy.js +22 -0
  53. package/dist/tasks/renderRetrievedContext.d.ts +3 -0
  54. package/dist/tasks/renderRetrievedContext.d.ts.map +1 -0
  55. package/dist/tasks/renderRetrievedContext.js +53 -0
  56. package/dist/tasks/verify.d.ts +4 -0
  57. package/dist/tasks/verify.d.ts.map +1 -0
  58. package/dist/tasks/verify.js +16 -0
  59. package/dist/types.d.ts +3 -0
  60. package/dist/types.d.ts.map +1 -0
  61. package/dist/types.js +4 -0
  62. package/dist/version.d.ts +2 -0
  63. package/dist/version.d.ts.map +1 -0
  64. package/dist/version.js +5 -0
  65. package/package.json +34 -0
@@ -0,0 +1,211 @@
1
+ // Handlers for the model-call and tool-call states. The harness — not the model — owns
2
+ // control flow: it inspects finishReason and toolCalls and decides the next state. A model
3
+ // response is never executed as an instruction (ADR-0004 D1).
4
+ import { CancelledError, GatewayError, } from "@oscharko-dev/keiko-model-gateway";
5
+ import { ToolError } from "@oscharko-dev/keiko-tools";
6
+ import { WorkspaceError } from "@oscharko-dev/keiko-workspace";
7
+ import { contextBytes } from "./context.js";
8
+ import { HARNESS_CODES, toFailure } from "./errors.js";
9
+ const RUN_COMMAND_TOOL = "run_command";
10
+ function toolFailureCode(error) {
11
+ if (error instanceof ToolError || error instanceof WorkspaceError) {
12
+ return error.code;
13
+ }
14
+ return "TOOL_ERROR";
15
+ }
16
+ function buildRequest(ctx) {
17
+ const tools = ctx.plan.allowsTools ? ctx.tools.listTools() : undefined;
18
+ return tools === undefined
19
+ ? { modelId: ctx.modelId, messages: ctx.messages }
20
+ : { modelId: ctx.modelId, messages: ctx.messages, tools };
21
+ }
22
+ function routeAfterModel(ctx, response) {
23
+ if (response.finishReason === "tool_calls") {
24
+ if (!ctx.plan.allowsTools) {
25
+ ctx.failure = toFailure(HARNESS_CODES.INTERNAL, "model requested tool calls on a read-only task type");
26
+ return { to: "failed", reason: "tool_calls finishReason forbidden for this task type" };
27
+ }
28
+ return { to: "tool-call", reason: "model requested tool calls" };
29
+ }
30
+ if (ctx.plan.allowsPatch) {
31
+ return { to: "patch-proposal", reason: "model produced final content; assembling patch" };
32
+ }
33
+ return { to: "reporting", reason: "model produced final content; read-only task" };
34
+ }
35
+ function onModelError(ctx, error) {
36
+ if (ctx.signal.aborted || error instanceof CancelledError) {
37
+ if (ctx.failure?.category === HARNESS_CODES.LIMIT_WALL_TIME) {
38
+ return { to: "limit-exceeded", reason: "maxWallTimeMs exceeded during model call" };
39
+ }
40
+ return { to: "cancelled", reason: "abort detected during model call" };
41
+ }
42
+ const code = error instanceof GatewayError ? error.code : "UNKNOWN";
43
+ const message = error instanceof Error ? error.message : "model call failed";
44
+ ctx.emitter.emit({ type: "model:call:failed", modelId: ctx.modelId, errorCode: code, message });
45
+ const retryable = error instanceof GatewayError && error.retryable;
46
+ if (!retryable) {
47
+ ctx.failure = toFailure(HARNESS_CODES.MODEL_ERROR, message);
48
+ return { to: "failed", reason: "non-retryable model error" };
49
+ }
50
+ ctx.counters.failureAttempts += 1;
51
+ if (ctx.counters.failureAttempts >= ctx.limits.maxFailureAttempts) {
52
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_FAILURE_ATTEMPTS, "max failure attempts reached");
53
+ return { to: "limit-exceeded", reason: "maxFailureAttempts exceeded" };
54
+ }
55
+ return { to: "planning", reason: "retryable model error; re-planning" };
56
+ }
57
+ export async function handleModelCall(ctx) {
58
+ ctx.counters.modelCalls += 1;
59
+ ctx.emitter.emit({
60
+ type: "model:call:started",
61
+ modelId: ctx.modelId,
62
+ messageCount: ctx.messages.length,
63
+ contextBytes: contextBytes(ctx.messages),
64
+ });
65
+ let response;
66
+ try {
67
+ response = await ctx.model.call(buildRequest(ctx), ctx.signal);
68
+ }
69
+ catch (error) {
70
+ return onModelError(ctx, error);
71
+ }
72
+ ctx.emitter.emit({
73
+ type: "model:call:completed",
74
+ modelId: ctx.modelId,
75
+ finishReason: response.finishReason,
76
+ toolCallCount: response.toolCalls.length,
77
+ usage: {
78
+ requestId: response.usage.requestId,
79
+ promptTokens: response.usage.promptTokens,
80
+ completionTokens: response.usage.completionTokens,
81
+ latencyMs: response.usage.latencyMs,
82
+ },
83
+ });
84
+ ctx.emitter.emit({
85
+ type: "reasoning:trace",
86
+ phase: "model-call",
87
+ rationale: "evaluated model response and selected next state",
88
+ modelResponse: response.content,
89
+ });
90
+ ctx.messages = [...ctx.messages, assistantMessage(response)];
91
+ ctx.lastResponse = response;
92
+ return routeAfterModel(ctx, response);
93
+ }
94
+ function assistantMessage(response) {
95
+ return response.toolCalls.length === 0
96
+ ? { role: "assistant", content: response.content }
97
+ : { role: "assistant", content: response.content, toolCalls: response.toolCalls };
98
+ }
99
+ // S-M1: emits the redacted audit event matching a tool's metadata, in addition to
100
+ // tool:call:completed, so the issue #10 ledger sees THAT a command ran / a patch applied — never
101
+ // the args, stdout, or file paths. No-op when the tool returned no metadata (read-only tools).
102
+ function emitToolMetadata(ctx, metadata, durationMs) {
103
+ if (metadata === undefined) {
104
+ return;
105
+ }
106
+ if (metadata.kind === "command") {
107
+ ctx.emitter.emit({
108
+ type: "sandbox:configured",
109
+ envAllowlist: metadata.sandbox.envAllowlist,
110
+ network: metadata.sandbox.network,
111
+ maxOutputBytes: metadata.sandbox.maxOutputBytes,
112
+ timeoutMs: metadata.sandbox.timeoutMs,
113
+ terminationGraceMs: metadata.sandbox.terminationGraceMs,
114
+ cwdRequested: metadata.sandbox.cwdRequested,
115
+ });
116
+ ctx.emitter.emit({
117
+ type: "command:executed",
118
+ executable: metadata.executable,
119
+ argCount: metadata.argCount,
120
+ exitCode: metadata.exitCode,
121
+ timedOut: metadata.timedOut,
122
+ durationMs,
123
+ });
124
+ return;
125
+ }
126
+ ctx.emitter.emit({
127
+ type: "patch:applied",
128
+ changedFiles: metadata.changedFiles,
129
+ created: metadata.created,
130
+ deleted: metadata.deleted,
131
+ });
132
+ }
133
+ function abortStep(ctx, reason) {
134
+ if (ctx.failure?.category === HARNESS_CODES.LIMIT_WALL_TIME) {
135
+ return { to: "limit-exceeded", reason: "maxWallTimeMs exceeded during tool call" };
136
+ }
137
+ return { to: "cancelled", reason };
138
+ }
139
+ function commandBudgetExceeded(ctx) {
140
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_COMMAND_EXEC, "command-execution budget exhausted");
141
+ return { to: "limit-exceeded", reason: "maxCommandExecutions exceeded" };
142
+ }
143
+ function toolOutputBudgetExceeded(ctx, bytes) {
144
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_CONTEXT_SIZE, `context ${String(bytes)} bytes exceeds limit ${String(ctx.limits.maxContextBytes)}`);
145
+ return { to: "limit-exceeded", reason: "maxContextBytes exceeded after tool output" };
146
+ }
147
+ function isStateStep(value) {
148
+ return "to" in value;
149
+ }
150
+ async function runOneTool(ctx, call) {
151
+ ctx.counters.toolCalls += 1;
152
+ ctx.emitter.emit({ type: "tool:call:started", toolName: call.name, toolCallId: call.id });
153
+ try {
154
+ const result = await ctx.tools.execute({
155
+ toolCallId: call.id,
156
+ toolName: call.name,
157
+ arguments: call.arguments,
158
+ signal: ctx.signal,
159
+ });
160
+ if (result.commandExecuted === true) {
161
+ ctx.counters.commandExecutions += 1;
162
+ }
163
+ ctx.emitter.emit({
164
+ type: "tool:call:completed",
165
+ toolName: call.name,
166
+ toolCallId: call.id,
167
+ durationMs: result.durationMs,
168
+ });
169
+ emitToolMetadata(ctx, result.metadata, result.durationMs);
170
+ return { role: "tool", content: result.output, toolCallId: call.id };
171
+ }
172
+ catch (error) {
173
+ const message = error instanceof Error ? error.message : "tool execution failed";
174
+ ctx.emitter.emit({
175
+ type: "tool:call:failed",
176
+ toolName: call.name,
177
+ toolCallId: call.id,
178
+ errorCode: toolFailureCode(error),
179
+ message,
180
+ });
181
+ if (ctx.signal.aborted || error instanceof CancelledError) {
182
+ return abortStep(ctx, "abort detected during tool call");
183
+ }
184
+ ctx.failure = toFailure(HARNESS_CODES.TOOL_ERROR, message);
185
+ return { to: "failed", reason: "tool execution failed" };
186
+ }
187
+ }
188
+ export async function handleToolCall(ctx) {
189
+ const calls = ctx.lastResponse?.toolCalls ?? [];
190
+ const results = [];
191
+ for (const call of calls) {
192
+ if (ctx.signal.aborted) {
193
+ return abortStep(ctx, "abort detected before tool call");
194
+ }
195
+ if (call.name === RUN_COMMAND_TOOL &&
196
+ ctx.counters.commandExecutions >= ctx.limits.maxCommandExecutions) {
197
+ return commandBudgetExceeded(ctx);
198
+ }
199
+ const result = await runOneTool(ctx, call);
200
+ if (isStateStep(result)) {
201
+ return result;
202
+ }
203
+ const bytes = contextBytes([...ctx.messages, ...results, result]);
204
+ if (bytes > ctx.limits.maxContextBytes) {
205
+ return toolOutputBudgetExceeded(ctx, bytes);
206
+ }
207
+ results.push(result);
208
+ }
209
+ ctx.messages = [...ctx.messages, ...results];
210
+ return { to: "model-call", reason: "tool results fed back to model" };
211
+ }
@@ -0,0 +1,8 @@
1
+ import { canonicalise } from "@oscharko-dev/keiko-security";
2
+ import type { Fingerprinter, FingerprintInput, IdSource } from "./ports.js";
3
+ export declare function configFingerprint(input: FingerprintInput): string;
4
+ export declare const defaultFingerprinter: Fingerprinter;
5
+ export declare const defaultIdSource: IdSource;
6
+ export declare function counterIdSource(): IdSource;
7
+ export { canonicalise };
8
+ //# sourceMappingURL=fingerprint.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fingerprint.d.ts","sourceRoot":"","sources":["../src/fingerprint.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,YAAY,EAAa,MAAM,8BAA8B,CAAC;AACvE,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE5E,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,gBAAgB,GAAG,MAAM,CAEjE;AAED,eAAO,MAAM,oBAAoB,EAAE,aAElC,CAAC;AAEF,eAAO,MAAM,eAAe,EAAE,QAE7B,CAAC;AAGF,wBAAgB,eAAe,IAAI,QAAQ,CAQ1C;AAID,OAAO,EAAE,YAAY,EAAE,CAAC"}
@@ -0,0 +1,28 @@
1
+ // Deterministic run-ID and configuration-fingerprint sources. Production uses
2
+ // node:crypto (randomUUID) for the random ID source and the shared security-package
3
+ // hashing primitives (canonical-JSON + SHA-256 hex) for the config fingerprint; tests
4
+ // inject a counter so IDs are fixed and runs are reproducible for replay (ADR-0004 D7).
5
+ import { randomUUID } from "node:crypto";
6
+ import { canonicalise, sha256Hex } from "@oscharko-dev/keiko-security";
7
+ export function configFingerprint(input) {
8
+ return sha256Hex(canonicalise(input));
9
+ }
10
+ export const defaultFingerprinter = {
11
+ compute: configFingerprint,
12
+ };
13
+ export const defaultIdSource = {
14
+ newRunId: () => randomUUID(),
15
+ };
16
+ // Test/replay helper: deterministic monotonically increasing run IDs.
17
+ export function counterIdSource() {
18
+ let n = 0;
19
+ return {
20
+ newRunId: () => {
21
+ n += 1;
22
+ return `run-${String(n)}`;
23
+ },
24
+ };
25
+ }
26
+ // Re-export the canonical-JSON serialiser at its historical name for any caller that still imports
27
+ // it from this module. The implementation now lives in @oscharko-dev/keiko-security/hashing.
28
+ export { canonicalise };
@@ -0,0 +1,11 @@
1
+ export { KEIKO_HARNESS_VERSION } from "./version.js";
2
+ export { createSession, HARNESS_VERSION, type AgentConfig, type AgentSession, type HarnessDeps, } from "./session.js";
3
+ export { createSession as runAgent } from "./session.js";
4
+ export { DEFAULT_LIMITS, HARNESS_CODES, TERMINAL_STATES, type ExplainPlanInput, type GenerateUnitTestsInput, type HarnessCode, type HarnessEvent, type HarnessFailure, type HarnessLimits, type HarnessStateName, type InvestigateBugInput, type ModelCallCompletedEvent, type ModelCallFailedEvent, type ModelCallStartedEvent, type PatchProposedEvent, type ReasoningTraceEvent, type RunCancelledEvent, type RunCompletedEvent, type RunCounters, type RunFailedEvent, type RunManifest, type RunOutcome, type RunResult, type RunStartedEvent, type StateTransition, type StateTransitionEvent, type TaskInput, type TaskType, type TerminalState, type ToolCallCompletedEvent, type ToolCallFailedEvent, type ToolCallStartedEvent, type VerificationResultEvent, } from "./types.js";
5
+ export { HarnessError, HarnessInternalError, HarnessModelError, HarnessToolError, LimitExceededError, toFailure, } from "./errors.js";
6
+ export type { EventSink, Fingerprinter, FingerprintInput, IdSource, ModelPort, ToolCallMetadata, ToolCallRequest, ToolCallResult, ToolPort, } from "./ports.js";
7
+ export { DryRunToolPort, GatewayModelPort, type ChatModel, type RecordedToolCall, } from "./adapters.js";
8
+ export { CliEventSink, MemoryEventSink, type EventWriter, type ManifestSeed } from "./sinks.js";
9
+ export { canonicalise, configFingerprint, counterIdSource, defaultFingerprinter, defaultIdSource, } from "./fingerprint.js";
10
+ export { resolveTaskPlan, type TaskPlan } from "./tasks/policy.js";
11
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,EACL,aAAa,EACb,eAAe,EACf,KAAK,WAAW,EAChB,KAAK,YAAY,EACjB,KAAK,WAAW,GACjB,MAAM,cAAc,CAAC;AAGtB,OAAO,EAAE,aAAa,IAAI,QAAQ,EAAE,MAAM,cAAc,CAAC;AAEzD,OAAO,EACL,cAAc,EACd,aAAa,EACb,eAAe,EACf,KAAK,gBAAgB,EACrB,KAAK,sBAAsB,EAC3B,KAAK,WAAW,EAChB,KAAK,YAAY,EACjB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,KAAK,oBAAoB,EACzB,KAAK,qBAAqB,EAC1B,KAAK,kBAAkB,EACvB,KAAK,mBAAmB,EACxB,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAChB,KAAK,cAAc,EACnB,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,SAAS,EACd,KAAK,eAAe,EACpB,KAAK,eAAe,EACpB,KAAK,oBAAoB,EACzB,KAAK,SAAS,EACd,KAAK,QAAQ,EACb,KAAK,aAAa,EAClB,KAAK,sBAAsB,EAC3B,KAAK,mBAAmB,EACxB,KAAK,oBAAoB,EACzB,KAAK,uBAAuB,GAC7B,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,YAAY,EACZ,oBAAoB,EACpB,iBAAiB,EACjB,gBAAgB,EAChB,kBAAkB,EAClB,SAAS,GACV,MAAM,aAAa,CAAC;AAErB,YAAY,EACV,SAAS,EACT,aAAa,EACb,gBAAgB,EAChB,QAAQ,EACR,SAAS,EACT,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,QAAQ,GACT,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,cAAc,EACd,gBAAgB,EAChB,KAAK,SAAS,EACd,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,KAAK,WAAW,EAAE,KAAK,YAAY,EAAE,MAAM,YAAY,CAAC;AAEhG,OAAO,EACL,YAAY,EACZ,iBAAiB,EACjB,eAAe,EACf,oBAAoB,EACpB,eAAe,GAChB,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,eAAe,EAAE,KAAK,QAAQ,EAAE,MAAM,mBAAmB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,14 @@
1
+ // Public barrel for the agent harness: the session API, all ports/adapters/sinks, the
2
+ // task types, the event schema, the limit/error taxonomy, and the deterministic ID and
3
+ // fingerprint sources. Downstream tools, evidence, UI, and evaluation code depend only
4
+ // on these typed seams (ADR-0004 D2).
5
+ export { KEIKO_HARNESS_VERSION } from "./version.js";
6
+ export { createSession, HARNESS_VERSION, } from "./session.js";
7
+ // runAgent is the ergonomic SDK alias of createSession; both start a bounded run.
8
+ export { createSession as runAgent } from "./session.js";
9
+ export { DEFAULT_LIMITS, HARNESS_CODES, TERMINAL_STATES, } from "./types.js";
10
+ export { HarnessError, HarnessInternalError, HarnessModelError, HarnessToolError, LimitExceededError, toFailure, } from "./errors.js";
11
+ export { DryRunToolPort, GatewayModelPort, } from "./adapters.js";
12
+ export { CliEventSink, MemoryEventSink } from "./sinks.js";
13
+ export { canonicalise, configFingerprint, counterIdSource, defaultFingerprinter, defaultIdSource, } from "./fingerprint.js";
14
+ export { resolveTaskPlan } from "./tasks/policy.js";
package/dist/loop.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ import { type RunContext } from "./context.js";
2
+ import { type RunOutcome } from "./types.js";
3
+ export declare function runLoop(ctx: RunContext): Promise<RunOutcome>;
4
+ //# sourceMappingURL=loop.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loop.d.ts","sourceRoot":"","sources":["../src/loop.ts"],"names":[],"mappings":"AAKA,OAAO,EAAgB,KAAK,UAAU,EAAkB,MAAM,cAAc,CAAC;AAI7E,OAAO,EAA0C,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC;AAyIrF,wBAAsB,OAAO,CAAC,GAAG,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC,CA4BlE"}
package/dist/loop.js ADDED
@@ -0,0 +1,159 @@
1
+ // The state-machine driver. The harness owns all control flow: it checks abort and limit
2
+ // guards at the top of the loop and before each port call, dispatches the current state to
3
+ // its handler, and emits a state:transition before every change (ADR-0004 D1, D3, D4).
4
+ import { HARNESS_CODES, toFailure } from "./errors.js";
5
+ import { contextBytes } from "./context.js";
6
+ import { handleModelCall, handleToolCall } from "./executor.js";
7
+ import { handlePatchProposal, handleReporting, handleVerification } from "./patcher.js";
8
+ import { handleContextSelection, handlePlanning } from "./planner.js";
9
+ import { TERMINAL_STATES } from "./types.js";
10
+ const MAX_LOOP_STEPS = 10_000; // absolute safety net; bounded states make this unreachable.
11
+ function abortStep(reason) {
12
+ return { to: "cancelled", reason };
13
+ }
14
+ function checkWallTime(ctx) {
15
+ if (ctx.clock.now() - ctx.startedAt > ctx.limits.maxWallTimeMs) {
16
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_WALL_TIME, "wall-time budget exhausted");
17
+ return { to: "limit-exceeded", reason: "maxWallTimeMs exceeded" };
18
+ }
19
+ return null;
20
+ }
21
+ // Limit checks evaluated when re-entering planning (iterations) plus the wall-time gate for
22
+ // the run as a whole.
23
+ function checkLoopLimits(ctx) {
24
+ const wallTime = checkWallTime(ctx);
25
+ if (wallTime !== null) {
26
+ return wallTime;
27
+ }
28
+ if (ctx.counters.iterations >= ctx.limits.maxIterations) {
29
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_ITERATIONS, "iteration budget exhausted");
30
+ return { to: "limit-exceeded", reason: "maxIterations exceeded" };
31
+ }
32
+ return null;
33
+ }
34
+ // Context-size and model-call-count checks, evaluated at every model-call entry so the
35
+ // limit bounds calls that follow tool-call (not only the initial context-selection path).
36
+ function checkModelCallLimits(ctx) {
37
+ if (ctx.counters.modelCalls >= ctx.limits.maxModelCalls) {
38
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_MODEL_CALLS, "model-call budget exhausted");
39
+ return { to: "limit-exceeded", reason: "maxModelCalls exceeded" };
40
+ }
41
+ const bytes = contextBytes(ctx.messages);
42
+ if (bytes > ctx.limits.maxContextBytes) {
43
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_CONTEXT_SIZE, `context ${String(bytes)} bytes exceeds limit ${String(ctx.limits.maxContextBytes)}`);
44
+ return { to: "limit-exceeded", reason: "maxContextBytes exceeded" };
45
+ }
46
+ return null;
47
+ }
48
+ // Per-state-entry guards: abort is honoured before any state; call-count limits are
49
+ // enforced immediately before the state that consumes the bounded resource.
50
+ function checkEntryGuards(ctx, state) {
51
+ const wallTime = checkWallTime(ctx);
52
+ if (wallTime !== null) {
53
+ return wallTime;
54
+ }
55
+ if (ctx.signal.aborted) {
56
+ return abortStep("abort detected before state entry");
57
+ }
58
+ if (state === "model-call") {
59
+ return checkModelCallLimits(ctx);
60
+ }
61
+ if (state === "tool-call") {
62
+ return checkToolLimits(ctx);
63
+ }
64
+ return null;
65
+ }
66
+ function checkToolLimits(ctx) {
67
+ const pending = ctx.lastResponse?.toolCalls.length ?? 0;
68
+ if (ctx.counters.toolCalls + pending > ctx.limits.maxToolCalls) {
69
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_TOOL_CALLS, "tool-call budget exhausted");
70
+ return { to: "limit-exceeded", reason: "maxToolCalls exceeded" };
71
+ }
72
+ if (ctx.counters.commandExecutions >= ctx.limits.maxCommandExecutions) {
73
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_COMMAND_EXEC, "command-execution budget exhausted");
74
+ return { to: "limit-exceeded", reason: "maxCommandExecutions exceeded" };
75
+ }
76
+ return null;
77
+ }
78
+ async function dispatch(ctx, state) {
79
+ switch (state) {
80
+ case "planning":
81
+ ctx.counters.iterations += 1;
82
+ return handlePlanning(ctx);
83
+ case "context-selection":
84
+ return handleContextSelection(ctx);
85
+ case "model-call":
86
+ return handleModelCall(ctx);
87
+ case "tool-call":
88
+ return handleToolCall(ctx);
89
+ case "patch-proposal":
90
+ return handlePatchProposal(ctx);
91
+ case "verification":
92
+ return handleVerification(ctx);
93
+ case "reporting":
94
+ return handleReporting(ctx);
95
+ default:
96
+ ctx.failure = toFailure(HARNESS_CODES.INTERNAL, `no handler for state ${state}`);
97
+ return { to: "failed", reason: "internal: unhandled state" };
98
+ }
99
+ }
100
+ function transition(ctx, from, step) {
101
+ if (step.to === "cancelled") {
102
+ ctx.cancelledAtState = from;
103
+ }
104
+ ctx.emitter.emit({ type: "state:transition", from, to: step.to, reason: step.reason });
105
+ return step.to;
106
+ }
107
+ function emitTerminal(ctx, state) {
108
+ if (state === "completed") {
109
+ ctx.emitter.emit({
110
+ type: "run:completed",
111
+ report: ctx.report ?? "no model output",
112
+ ...(ctx.patchDiff === undefined ? {} : { patchDiff: ctx.patchDiff }),
113
+ });
114
+ return;
115
+ }
116
+ if (state === "cancelled") {
117
+ ctx.emitter.emit({
118
+ type: "run:cancelled",
119
+ atState: ctx.cancelledAtState ?? state,
120
+ ...(ctx.cancelReason === undefined ? {} : { reason: ctx.cancelReason }),
121
+ });
122
+ return;
123
+ }
124
+ if (state === "failed" || state === "limit-exceeded") {
125
+ const failure = ctx.failure ?? toFailure(HARNESS_CODES.INTERNAL, "run failed without a failure record");
126
+ ctx.failure = failure;
127
+ ctx.emitter.emit({ type: "run:failed", failure, atState: state });
128
+ }
129
+ }
130
+ // Runs the state machine from `intake` to a terminal state and returns the outcome.
131
+ export async function runLoop(ctx) {
132
+ let state = transition(ctx, "intake", {
133
+ to: "planning",
134
+ reason: "task validated",
135
+ });
136
+ for (let step = 0; step < MAX_LOOP_STEPS && !TERMINAL_STATES.has(state); step += 1) {
137
+ if (ctx.signal.aborted) {
138
+ state = transition(ctx, state, abortStep("abort detected at top of loop"));
139
+ break;
140
+ }
141
+ const guard = state === "planning" ? checkLoopLimits(ctx) : checkEntryGuards(ctx, state);
142
+ if (guard !== null) {
143
+ state = transition(ctx, state, guard);
144
+ continue;
145
+ }
146
+ const dispatched = await dispatch(ctx, state);
147
+ const postDispatchGuard = checkWallTime(ctx);
148
+ state = transition(ctx, state, postDispatchGuard ?? dispatched);
149
+ }
150
+ if (!TERMINAL_STATES.has(state)) {
151
+ ctx.failure = toFailure(HARNESS_CODES.INTERNAL, "state-machine safety step limit exceeded");
152
+ state = transition(ctx, state, {
153
+ to: "failed",
154
+ reason: "internal: state-machine step limit exceeded",
155
+ });
156
+ }
157
+ emitTerminal(ctx, state);
158
+ return state;
159
+ }
@@ -0,0 +1,5 @@
1
+ import type { RunContext, StateStep } from "./context.js";
2
+ export declare function handlePatchProposal(ctx: RunContext): StateStep;
3
+ export declare function handleVerification(ctx: RunContext): StateStep;
4
+ export declare function handleReporting(ctx: RunContext): StateStep;
5
+ //# sourceMappingURL=patcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"patcher.d.ts","sourceRoot":"","sources":["../src/patcher.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAQ1D,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAkB9D;AAID,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAgB7D;AAID,wBAAgB,eAAe,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAG1D"}
@@ -0,0 +1,49 @@
1
+ // Handlers for the patch-proposal, verification, and reporting states. The harness NEVER
2
+ // applies a patch: the diff is emitted as a patch:proposed event and carried on the run
3
+ // result. Nothing here touches the file system (ADR-0004 D8, dry-run by default).
4
+ import { HARNESS_CODES, toFailure } from "./errors.js";
5
+ const encoder = new TextEncoder();
6
+ function patchByteLength(diff) {
7
+ return encoder.encode(diff).length;
8
+ }
9
+ export function handlePatchProposal(ctx) {
10
+ const diff = ctx.lastResponse?.content ?? "";
11
+ const bytes = patchByteLength(diff);
12
+ if (bytes > ctx.limits.maxPatchBytes) {
13
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_PATCH_SIZE, `patch ${String(bytes)} bytes exceeds limit ${String(ctx.limits.maxPatchBytes)}`);
14
+ return { to: "limit-exceeded", reason: "maxPatchBytes exceeded" };
15
+ }
16
+ ctx.patchDiff = diff;
17
+ ctx.emitter.emit({
18
+ type: "patch:proposed",
19
+ targetFile: ctx.plan.targetFile,
20
+ patchBytes: bytes,
21
+ diff,
22
+ });
23
+ return { to: "verification", reason: "patch assembled and proposed (not applied)" };
24
+ }
25
+ // Wave-1 verification is a structural check: a proposed patch must be non-empty. Real test/
26
+ // command verification arrives with the tool execution layer (issue #6).
27
+ export function handleVerification(ctx) {
28
+ const passed = (ctx.patchDiff ?? "").trim().length > 0;
29
+ ctx.emitter.emit({
30
+ type: "verification:result",
31
+ passed,
32
+ detail: passed ? "non-empty patch produced" : "empty patch",
33
+ });
34
+ if (passed) {
35
+ return { to: "reporting", reason: "verification passed" };
36
+ }
37
+ ctx.counters.failureAttempts += 1;
38
+ if (ctx.counters.failureAttempts >= ctx.limits.maxFailureAttempts) {
39
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_FAILURE_ATTEMPTS, "verification kept failing");
40
+ return { to: "limit-exceeded", reason: "maxFailureAttempts exceeded after verification" };
41
+ }
42
+ return { to: "planning", reason: "verification failed; re-planning" };
43
+ }
44
+ // Records the final report on the context. The run:completed event is emitted by the loop
45
+ // once the terminal `completed` state is reached, so it is the last event in the stream.
46
+ export function handleReporting(ctx) {
47
+ ctx.report = ctx.lastResponse?.content ?? "no model output";
48
+ return { to: "completed", reason: "report generated" };
49
+ }
@@ -0,0 +1,4 @@
1
+ import { type RunContext, type StateStep } from "./context.js";
2
+ export declare function handlePlanning(ctx: RunContext): StateStep;
3
+ export declare function handleContextSelection(ctx: RunContext): StateStep;
4
+ //# sourceMappingURL=planner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"planner.d.ts","sourceRoot":"","sources":["../src/planner.ts"],"names":[],"mappings":"AAKA,OAAO,EAAgB,KAAK,UAAU,EAAE,KAAK,SAAS,EAAE,MAAM,cAAc,CAAC;AAE7E,wBAAgB,cAAc,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAOzD;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,UAAU,GAAG,SAAS,CAUjE"}
@@ -0,0 +1,21 @@
1
+ // Handlers for the planning and context-selection states. Planning emits the task
2
+ // rationale as a reasoning:trace. Context-selection finalises the message array and
3
+ // enforces maxContextBytes before any model call (ADR-0004 D3 enforcement point).
4
+ import { HARNESS_CODES, toFailure } from "./errors.js";
5
+ import { contextBytes } from "./context.js";
6
+ export function handlePlanning(ctx) {
7
+ ctx.emitter.emit({
8
+ type: "reasoning:trace",
9
+ phase: "planning",
10
+ rationale: ctx.plan.rationale,
11
+ });
12
+ return { to: "context-selection", reason: "plan constructed" };
13
+ }
14
+ export function handleContextSelection(ctx) {
15
+ const bytes = contextBytes(ctx.messages);
16
+ if (bytes > ctx.limits.maxContextBytes) {
17
+ ctx.failure = toFailure(HARNESS_CODES.LIMIT_CONTEXT_SIZE, `context ${String(bytes)} bytes exceeds limit ${String(ctx.limits.maxContextBytes)}`);
18
+ return { to: "limit-exceeded", reason: "maxContextBytes exceeded" };
19
+ }
20
+ return { to: "model-call", reason: "context assembled within byte budget" };
21
+ }
@@ -0,0 +1,28 @@
1
+ import type { GatewayRequest, GatewayStreamChunk, NormalizedResponse } from "@oscharko-dev/keiko-model-gateway";
2
+ import type { ToolCallMetadata, ToolCallRequest, ToolCallResult, ToolPort } from "@oscharko-dev/keiko-contracts";
3
+ import type { HarnessEvent, HarnessLimits, TaskInput, TaskType } from "./types.js";
4
+ export type { ToolCallMetadata, ToolCallRequest, ToolCallResult, ToolPort };
5
+ export interface ModelPort {
6
+ readonly call: (request: GatewayRequest, signal: AbortSignal) => Promise<NormalizedResponse>;
7
+ readonly callStream?: (request: GatewayRequest, signal: AbortSignal) => AsyncIterable<GatewayStreamChunk>;
8
+ }
9
+ export interface EventSink {
10
+ readonly emit: (event: HarnessEvent) => void;
11
+ readonly retainsRawContent?: boolean | undefined;
12
+ }
13
+ export interface IdSource {
14
+ readonly newRunId: () => string;
15
+ }
16
+ export interface FingerprintInput {
17
+ readonly taskType: TaskType;
18
+ readonly taskInput: TaskInput;
19
+ readonly limits: HarnessLimits;
20
+ readonly modelId: string;
21
+ readonly workingDirectory: string;
22
+ readonly dryRun: boolean;
23
+ readonly harnessVersion: string;
24
+ }
25
+ export interface Fingerprinter {
26
+ readonly compute: (input: FingerprintInput) => string;
27
+ }
28
+ //# sourceMappingURL=ports.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ports.d.ts","sourceRoot":"","sources":["../src/ports.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EACV,cAAc,EACd,kBAAkB,EAClB,kBAAkB,EACnB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,KAAK,EACV,gBAAgB,EAChB,eAAe,EACf,cAAc,EACd,QAAQ,EACT,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEnF,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC;AAE5E,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,KAAK,OAAO,CAAC,kBAAkB,CAAC,CAAC;IAK7F,QAAQ,CAAC,UAAU,CAAC,EAAE,CACpB,OAAO,EAAE,cAAc,EACvB,MAAM,EAAE,WAAW,KAChB,aAAa,CAAC,kBAAkB,CAAC,CAAC;CACxC;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,YAAY,KAAK,IAAI,CAAC;IAI7C,QAAQ,CAAC,iBAAiB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CAClD;AAED,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,QAAQ,EAAE,MAAM,MAAM,CAAC;CACjC;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;IAC5B,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;IAC9B,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC;IAC/B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;CACjC;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,MAAM,CAAC;CACvD"}
package/dist/ports.js ADDED
@@ -0,0 +1,8 @@
1
+ // Hexagonal port interfaces. The harness (high-level policy) depends only on these
2
+ // abstractions, never on the concrete Gateway, file system, or terminal. Issues #6,
3
+ // #10, and #13 each plug in their own implementations without touching the harness.
4
+ //
5
+ // The tool ports (ToolPort, ToolCallRequest, ToolCallResult, ToolCallMetadata) are
6
+ // shared with the tools package via contracts. Re-export them here as part of the
7
+ // harness package surface.
8
+ export {};
@@ -0,0 +1,27 @@
1
+ import { HARNESS_VERSION } from "@oscharko-dev/keiko-contracts";
2
+ import type { Clock } from "@oscharko-dev/keiko-model-gateway";
3
+ import type { EventSink, Fingerprinter, IdSource, ModelPort, ToolPort } from "./ports.js";
4
+ import { type HarnessLimits, type RunResult, type TaskInput } from "./types.js";
5
+ export { HARNESS_VERSION };
6
+ export interface AgentConfig {
7
+ readonly model: string;
8
+ readonly workingDirectory: string;
9
+ readonly limits?: Partial<HarnessLimits> | undefined;
10
+ readonly dryRun?: boolean | undefined;
11
+ }
12
+ export interface HarnessDeps {
13
+ readonly model: ModelPort;
14
+ readonly tools: ToolPort;
15
+ readonly sink: EventSink;
16
+ readonly clock?: Clock | undefined;
17
+ readonly idSource?: IdSource | undefined;
18
+ readonly fingerprinter?: Fingerprinter | undefined;
19
+ }
20
+ export interface AgentSession {
21
+ readonly runId: string;
22
+ readonly fingerprint: string;
23
+ readonly result: Promise<RunResult>;
24
+ readonly cancel: (reason?: string) => void;
25
+ }
26
+ export declare function createSession(task: TaskInput, config: AgentConfig, deps: HarnessDeps): AgentSession;
27
+ //# sourceMappingURL=session.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"session.d.ts","sourceRoot":"","sources":["../src/session.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAChE,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,mCAAmC,CAAC;AAO/D,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAG1F,OAAO,EAEL,KAAK,aAAa,EAElB,KAAK,SAAS,EACd,KAAK,SAAS,EACf,MAAM,YAAY,CAAC;AAIpB,OAAO,EAAE,eAAe,EAAE,CAAC;AAE3B,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,SAAS,CAAC;IAGrD,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACvC;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;IAC1B,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC;IACzB,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,KAAK,GAAG,SAAS,CAAC;IACnC,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IACzC,QAAQ,CAAC,aAAa,CAAC,EAAE,aAAa,GAAG,SAAS,CAAC;CACpD;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;IACpC,QAAQ,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;CAC5C;AA6FD,wBAAgB,aAAa,CAC3B,IAAI,EAAE,SAAS,EACf,MAAM,EAAE,WAAW,EACnB,IAAI,EAAE,WAAW,GAChB,YAAY,CAqCd"}