@bluecopa/harness 0.0.0-snapshot.137 → 0.0.0-snapshot.138

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,90 @@
1
- import { A as AnyTool, T as ToolProvider, a as ToolResult, M as ModelFactory, b as ToolResultArtifact } from '../shared-types-DRxnerLT.js';
2
- export { c as ActionType, B as BashOptions, d as BatchOp, e as BatchResult, G as GlobOptions, f as GrepOptions, R as ReadOptions, g as TextEditorRequest, h as ThreadStatus, i as ToolProviderCapabilities, W as WebFetchOptions } from '../shared-types-DRxnerLT.js';
1
+ import { A as AnyTool, T as ToolProvider, a as ToolResult, M as ModelFactory, b as ToolResultArtifact, c as ToolProviderCapabilities, B as BashOptions, R as ReadOptions, G as GlobOptions, d as GrepOptions, e as BatchOp, f as BatchResult } from '../shared-types-D89hqST8.js';
2
+ export { g as ActionType, h as TextEditorRequest, i as ThreadStatus, W as WebFetchOptions } from '../shared-types-D89hqST8.js';
3
+ import { HarnessTelemetry } from '../observability/otel.js';
4
+ export { MetricRecord, SpanHandle, SpanRecord } from '../observability/otel.js';
3
5
  import 'ai';
4
6
 
7
+ /** Long-running job orchestration primitives. Generic; no coding-agent assumptions. */
8
+ type JobKind = "local" | "remote";
9
+ /** Discriminated on `kind` so remote-only fields only exist on remote transports. */
10
+ type JobTransport = {
11
+ kind: "local";
12
+ } | {
13
+ kind: "remote";
14
+ host: string;
15
+ user?: string | undefined;
16
+ keyPath?: string | undefined;
17
+ };
18
+ interface JobSpec {
19
+ id: string;
20
+ label: string;
21
+ command: string;
22
+ cwd?: string | undefined;
23
+ env?: Record<string, string> | undefined;
24
+ transport: JobTransport;
25
+ startedAt: number;
26
+ startedBy?: {
27
+ tupleId?: string | undefined;
28
+ turn?: number | undefined;
29
+ } | undefined;
30
+ }
31
+ type JobState = "running" | "exited" | "failed" | "killed" | "unknown";
32
+ interface JobStatus {
33
+ spec: JobSpec;
34
+ state: JobState;
35
+ /** Local supervisor PID. */
36
+ pid?: number | undefined;
37
+ /** Remote parent PID (only set when transport.kind === "remote"). */
38
+ remotePid?: number | undefined;
39
+ exitCode?: number | undefined;
40
+ /** Mirrors spec.startedAt; kept on the status for convenience when reading status without spec unpacking. */
41
+ startedAt: number;
42
+ endedAt?: number | undefined;
43
+ lastSeenAliveAt: number;
44
+ tailLines: string[];
45
+ bytesWritten: number;
46
+ }
47
+ type JobEvent = {
48
+ type: "job_started";
49
+ status: JobStatus;
50
+ } | {
51
+ type: "job_output";
52
+ id: string;
53
+ chunk: string;
54
+ at: number;
55
+ } | {
56
+ type: "job_exited";
57
+ status: JobStatus;
58
+ } | {
59
+ type: "job_failed";
60
+ status: JobStatus;
61
+ error: string;
62
+ } | {
63
+ type: "job_killed";
64
+ status: JobStatus;
65
+ reason: string;
66
+ };
67
+ interface JobStartOptions {
68
+ label: string;
69
+ command: string;
70
+ transport?: JobTransport | undefined;
71
+ cwd?: string | undefined;
72
+ env?: Record<string, string> | undefined;
73
+ }
74
+ interface JobRegistry {
75
+ start(opts: JobStartOptions, startedBy?: JobSpec["startedBy"]): Promise<JobStatus>;
76
+ check(id: string): Promise<JobStatus | null>;
77
+ cancel(id: string, signal?: "TERM" | "KILL"): Promise<JobStatus | null>;
78
+ list(): Promise<JobStatus[]>;
79
+ tail(id: string, lines?: number): Promise<string[]>;
80
+ waitFor(id: string, signal?: AbortSignal): Promise<JobStatus>;
81
+ subscribe(handler: (e: JobEvent) => void): () => void;
82
+ /** Cheap synchronous snapshot — suitable for orchestrator context each turn. */
83
+ snapshot(): JobStatus[];
84
+ /** Remove completed/killed/failed jobs older than `olderThanMs` (default 24h). Returns count removed. */
85
+ prune(olderThanMs?: number): Promise<number>;
86
+ }
87
+
5
88
  interface ToolCallInfo {
6
89
  toolCallId: string;
7
90
  toolName: string;
@@ -40,15 +123,37 @@ interface AgentMessage {
40
123
  * (truecode). The harness only provides the Tool contract and helpers.
41
124
  */
42
125
 
126
+ type ToolExecutionMode = "serial" | "parallel";
43
127
  /** A registered tool: schema for the model, execute for the worker */
44
128
  interface Tool {
45
129
  name: string;
46
130
  /** AI SDK tool schema. Optional for ARC-internal tools (ReadEpisode, LCM_*, ScratchPad_*). */
47
131
  schema?: AnyTool | undefined;
132
+ /** Worker execution mode for batched tool calls. Defaults to serial. */
133
+ executionMode?: ToolExecutionMode | ((args: Record<string, unknown>) => ToolExecutionMode);
48
134
  /** Execute using the ToolProvider. If not set, tool is handled externally (e.g. ARC tools). */
49
135
  execute?: (provider: ToolProvider, args: Record<string, unknown>, workDir: string) => Promise<ToolResult>;
136
+ /**
137
+ * When true, this tool may yield the worker loop if the operation is incomplete.
138
+ * After execution, the worker checks if the target thread is still running.
139
+ * If it is and `waitForThread` is available on the ToolProvider, the worker
140
+ * suspends (does not count a step) until the thread completes.
141
+ * The tool args must include a `threadId` field for the worker to track.
142
+ */
143
+ yieldsOnIncomplete?: boolean | undefined;
50
144
  }
51
145
 
146
+ /**
147
+ * Serialized attachment form suitable for JSON persistence. Unlike the in-flight
148
+ * `ContentPart` (which carries a `Buffer`), the stored form keeps image bytes
149
+ * base64-encoded so messages round-trip cleanly through session snapshots.
150
+ * Converted to a real `ContentPart` only when building AgentMessage[] for the model.
151
+ */
152
+ interface StoredAttachment {
153
+ type: "image";
154
+ imageBase64: string;
155
+ mimeType: string;
156
+ }
52
157
  interface StoredMessage {
53
158
  id: string;
54
159
  conversationId: string;
@@ -57,6 +162,8 @@ interface StoredMessage {
57
162
  content: string;
58
163
  toolCalls?: ToolCallInfo[];
59
164
  toolResults?: ToolResultInfo[];
165
+ /** Multimodal image attachments. Persisted as base64; converted to ContentPart[] at model-call time. */
166
+ attachments?: StoredAttachment[];
60
167
  timestamp: number;
61
168
  }
62
169
  interface GrepResult {
@@ -232,7 +339,7 @@ interface ExpectedOutputContract {
232
339
  description?: string | undefined;
233
340
  }
234
341
  /** Worker model tier for dispatch routing. */
235
- type DispatchTier = "fast" | "strong";
342
+ type DispatchTier = "fast" | "medium" | "strong";
236
343
  /** Worker instruction tuple */
237
344
  interface Tuple {
238
345
  id: string;
@@ -243,12 +350,22 @@ interface Tuple {
243
350
  expectedOutput: ExpectedOutputContract;
244
351
  /** Tool names available to worker */
245
352
  tools: string[];
246
- /** Step budget (1-10) */
353
+ /** Step budget */
247
354
  steps: number;
248
- /** Worker model tier 'fast' for simple tasks, 'strong' (default) for complex reasoning. */
355
+ /** Worker model tier. Omitted dispatches use the default step budget and strong worker model. */
249
356
  tier?: DispatchTier | undefined;
357
+ /** Semantic role label for this worker (e.g. "Debugger", "Researcher", "Sysadmin").
358
+ * Shown in the TUI instead of the generic tier name to give the user a clear
359
+ * mental model of what the worker is doing. */
360
+ role?: string | undefined;
250
361
  /** Public orchestrator rationale that preceded this dispatch */
251
362
  orchestratorContext?: string | undefined;
363
+ /** Prior tuple this dispatch is continuing from, when ARC resumes internally. */
364
+ continuationOf?: string | undefined;
365
+ /** Why ARC created this continuation dispatch. */
366
+ continuationReason?: "step_budget" | "user_steering" | undefined;
367
+ /** Compact user-facing checkpoint summary for continuation UI. */
368
+ continuationSummary?: string | undefined;
252
369
  }
253
370
  interface DispatchRecord {
254
371
  tuple: Tuple;
@@ -260,19 +377,6 @@ interface DispatchRecord {
260
377
  /** Worker execution result (artifacts, actions, status) */
261
378
  workerResult?: WorkerResult | undefined;
262
379
  }
263
- interface OodaSnapshot {
264
- observations: string[];
265
- beliefs: string[];
266
- disprovenApproaches: string[];
267
- blockers: string[];
268
- decisionPressure: {
269
- turn: number;
270
- maxTurns: number;
271
- turnsRemaining: number;
272
- dispatchCount: number;
273
- allIncomplete: boolean;
274
- };
275
- }
276
380
  type ReadEpisodeDetail = "summary" | "trace" | "artifacts";
277
381
  interface ReadEpisodeArgs {
278
382
  id: string;
@@ -286,6 +390,16 @@ interface TraceToolCall {
286
390
  args: Record<string, unknown>;
287
391
  }
288
392
  type ArcTraceEvent = {
393
+ scope: "orchestrator";
394
+ phase: "context_assembled";
395
+ turn: number;
396
+ totalChars: number;
397
+ estimatedTokens: number;
398
+ sections: Record<string, {
399
+ chars: number;
400
+ estimatedTokens: number;
401
+ }>;
402
+ } | {
289
403
  scope: "orchestrator";
290
404
  phase: "model_input";
291
405
  turn: number;
@@ -341,6 +455,12 @@ type ArcTraceEvent = {
341
455
  tupleId: string;
342
456
  step: number;
343
457
  toolNames: string[];
458
+ } | {
459
+ scope: "worker";
460
+ phase: "textual_tool_call_rescued";
461
+ tupleId: string;
462
+ step: number;
463
+ toolNames: string[];
344
464
  } | {
345
465
  scope: "worker";
346
466
  phase: "tool_call";
@@ -374,6 +494,8 @@ interface VectorIndex {
374
494
  search(query: string, k: number): Promise<string[]>;
375
495
  load(): Promise<void>;
376
496
  save(): Promise<void>;
497
+ /** Number of documents currently indexed. Used to detect empty index on session resume. */
498
+ size(): Promise<number>;
377
499
  }
378
500
  interface ArtifactStore {
379
501
  set(id: string, artifact: Artifact): Promise<void>;
@@ -433,8 +555,6 @@ interface OrchestratorContext {
433
555
  allIncomplete: boolean;
434
556
  /** Completed dispatches in chronological order */
435
557
  dispatches: DispatchRecord[];
436
- /** Current observe/orient state rendered into the orchestrator prompt */
437
- ooda: OodaSnapshot;
438
558
  /** LCM message store (all conversations) */
439
559
  messageStore?: MessageStore | undefined;
440
560
  /** LCM summary DAG */
@@ -443,6 +563,11 @@ interface OrchestratorContext {
443
563
 
444
564
  interface ArcConfig {
445
565
  task: string;
566
+ /**
567
+ * Image attachments to inject alongside the initial task when the orchestrator
568
+ * model is multimodal. Subsequent task attachments flow through pushTask instead.
569
+ */
570
+ initialAttachments?: StoredAttachment[] | undefined;
446
571
  workDir: string;
447
572
  /** Model ID for the orchestrator */
448
573
  model: string;
@@ -450,13 +575,15 @@ interface ArcConfig {
450
575
  workerModel: string;
451
576
  /** Optional per-tier worker model overrides. Falls back to workerModel when a tier is missing. */
452
577
  workerModelMap?: Partial<Record<DispatchTier, string>> | undefined;
578
+ /** Optional per-tier worker step budgets. Falls back to ARC defaults when unset. */
579
+ workerStepBudgets?: Partial<Record<DispatchTier, number>> | undefined;
453
580
  createModel?: ModelFactory;
454
581
  toolProvider: ToolProvider;
455
582
  /** Agent-provided tool definitions (Bash, Read, Write, etc.) with schemas, execute, and artifact metadata. Harness adds ARC framework tools internally. */
456
583
  tools?: Map<string, Tool> | undefined;
457
584
  /** Max orchestrator turns before stopping (default: 12) */
458
585
  maxTurns?: number;
459
- /** Max steps per worker (default: 5, max: 10) */
586
+ /** Max steps per untiered worker (default: 30). Explicit tiers use workerStepBudgets/default tier budgets. */
460
587
  maxStepsPerWorker?: number;
461
588
  /** Rolling window size for orchestrator context (default: 10) */
462
589
  orchestratorWindowSize?: number;
@@ -470,6 +597,8 @@ interface ArcConfig {
470
597
  transcriptStore?: TranscriptStore;
471
598
  /** Injected artifact store (default: in-memory) */
472
599
  artifactStore?: ArtifactStore;
600
+ /** Custom orchestrator system prompt (appended to default) */
601
+ orchestratorSystemPromptSuffix?: string | undefined;
473
602
  /** Custom worker system prompt (appended to default) */
474
603
  workerSystemPromptSuffix?: string | undefined;
475
604
  /** Custom messages after the core task/budget block */
@@ -478,12 +607,18 @@ interface ArcConfig {
478
607
  providerOptions?: Record<string, unknown> | undefined;
479
608
  /** Optional hook runner for PreToolUse/PostToolUse/BeforeWorker/AfterWorker events */
480
609
  hookRunner?: HookRunner | undefined;
610
+ /** Optional telemetry collector for profiling spans and metrics. */
611
+ telemetry?: HarnessTelemetry | undefined;
481
612
  /** Callback for AskUser orchestrator tool. If provided, AskUser is available to the orchestrator. */
482
613
  askUser?: ((question: string, options?: string[]) => Promise<string>) | undefined;
483
614
  /** Session store for persistence across restarts. */
484
615
  sessionStore?: SessionStore | undefined;
485
616
  /** Session ID to resume. If provided with sessionStore, loop hydrates from saved state. */
486
617
  sessionId?: string | undefined;
618
+ /** Job registry for long-running process orchestration. Default: MemoryJobRegistry. */
619
+ jobRegistry?: JobRegistry | undefined;
620
+ /** Per model-call timeout in milliseconds. Applied to both orchestrator and worker generateText. Default: 180000 (3 min). */
621
+ modelCallTimeoutMs?: number | undefined;
487
622
  }
488
623
  type ArcEvent = {
489
624
  type: "orchestrator_turn";
@@ -499,6 +634,12 @@ type ArcEvent = {
499
634
  } | {
500
635
  type: "dispatch_full";
501
636
  tuple: Tuple;
637
+ } | {
638
+ type: "worker_continuation";
639
+ tupleId: string;
640
+ fromTupleId: string;
641
+ reason: "step_budget" | "user_steering";
642
+ summary: string;
502
643
  } | {
503
644
  type: "worker_progress";
504
645
  tupleId: string;
@@ -523,11 +664,20 @@ type ArcEvent = {
523
664
  type: "ask_user";
524
665
  question: string;
525
666
  options?: string[] | undefined;
667
+ } | {
668
+ type: "interrupt_status";
669
+ summary: string;
526
670
  } | {
527
671
  type: "orchestrator_usage";
528
672
  turn: number;
529
673
  inputTokens?: number;
530
674
  outputTokens?: number;
675
+ } | {
676
+ type: "model_heartbeat";
677
+ scope: "orchestrator" | "worker";
678
+ model: string;
679
+ elapsedMs: number;
680
+ tupleId?: string | undefined;
531
681
  } | {
532
682
  type: "done";
533
683
  output: string;
@@ -537,6 +687,20 @@ type ArcEvent = {
537
687
  } | {
538
688
  type: "text_delta";
539
689
  text: string;
690
+ } | {
691
+ type: "reasoning_delta";
692
+ text: string;
693
+ } | {
694
+ type: "job_event";
695
+ event: JobEvent;
696
+ } | {
697
+ type: "interjection_queued";
698
+ position: number;
699
+ text: string;
700
+ } | {
701
+ type: "interjection_delivered";
702
+ position: number;
703
+ turn: number;
540
704
  };
541
705
  type WorkerProgressEvent = {
542
706
  kind: "model_start";
@@ -573,7 +737,9 @@ type WorkerProgressEvent = {
573
737
  durationMs: number;
574
738
  outputSummary: string;
575
739
  output?: string | undefined;
740
+ artifact?: ToolResultArtifact | undefined;
576
741
  exitCode?: unknown;
742
+ metadata?: Record<string, unknown> | undefined;
577
743
  } | {
578
744
  kind: "tool_error";
579
745
  step: number;
@@ -586,6 +752,25 @@ type WorkerProgressEvent = {
586
752
  status: "complete" | "incomplete" | "failed" | "interrupted";
587
753
  stepsUsed: number;
588
754
  summary: string;
755
+ } | {
756
+ kind: "message_appended";
757
+ message: AgentMessage;
758
+ } | {
759
+ kind: "model_heartbeat";
760
+ step: number;
761
+ elapsedMs: number;
762
+ } | {
763
+ kind: "yield_start";
764
+ step: number;
765
+ toolCallId: string;
766
+ toolName: string;
767
+ threadId: string;
768
+ } | {
769
+ kind: "yield_resume";
770
+ step: number;
771
+ toolCallId: string;
772
+ toolName: string;
773
+ threadId: string;
589
774
  };
590
775
  interface RunWorkerConfig {
591
776
  /** Original top-level task */
@@ -608,6 +793,8 @@ interface RunWorkerConfig {
608
793
  signal?: AbortSignal | undefined;
609
794
  /** Extra text prefixed to worker system prompt */
610
795
  systemPromptPrefix?: string | undefined;
796
+ /** Semantic role label for TUI display (e.g. "Debugger", "Researcher"). */
797
+ role?: string | undefined;
611
798
  /** Extra text appended to worker system prompt */
612
799
  systemPromptSuffix?: string | undefined;
613
800
  /** Provider options passed to generateText (e.g. reasoning config). */
@@ -622,6 +809,18 @@ interface RunWorkerConfig {
622
809
  onTrace?: ((event: ArcTraceEvent) => void) | undefined;
623
810
  /** Optional hook runner for PreToolUse/PostToolUse events */
624
811
  hookRunner?: HookRunner | undefined;
812
+ /** Optional telemetry collector for profiling spans and metrics. */
813
+ telemetry?: HarnessTelemetry | undefined;
814
+ /** Current sub-dispatch depth (0 = top-level worker). Used to enforce max depth. */
815
+ dispatchDepth?: number | undefined;
816
+ /** Existing messages to resume from (for continuations). When set, the worker
817
+ * appends `instruction` as a user message to these messages instead of building
818
+ * a fresh initial prompt. */
819
+ resumeMessages?: AgentMessage[] | undefined;
820
+ /** Steps already consumed before this resume (for accurate step counting). */
821
+ stepsAlreadyUsed?: number | undefined;
822
+ /** Per model-call timeout in milliseconds. Default: 180000 (3 min). */
823
+ modelCallTimeoutMs?: number | undefined;
625
824
  }
626
825
  interface WorkerResult {
627
826
  transcript: AgentMessage[];
@@ -640,47 +839,109 @@ interface ArcRunResult {
640
839
  output: string;
641
840
  events: ArcEvent[];
642
841
  }
842
+ type PushResult = {
843
+ delivered: "as_task";
844
+ } | {
845
+ delivered: "as_interjection";
846
+ position: number;
847
+ } | {
848
+ delivered: "dropped";
849
+ reason: "in_ask_user";
850
+ };
851
+ interface PushTaskOptions {
852
+ /** Multimodal attachments to attach to the resulting user message. */
853
+ attachments?: StoredAttachment[];
854
+ }
643
855
  declare class ArcLoop {
644
856
  private config;
645
857
  private readonly transcriptStore;
646
858
  private readonly vectorIndex;
647
859
  private readonly scratchPad;
648
860
  private readonly artifactStore;
861
+ private readonly jobRegistry;
862
+ /** Non-output job events queued for orchestrator turn-boundary injection. */
863
+ private pendingJobEvents;
864
+ /** Events that need to flow out of the async generator stream (job events, interjection lifecycle). */
865
+ private pendingArcEvents;
866
+ private jobUnsubscribe;
867
+ private readonly telemetry;
649
868
  private messageStore;
650
869
  private summaryDAG;
651
870
  private readonly createModel;
652
871
  private readonly windowSize;
653
872
  private readonly model;
873
+ /** Full orchestrator system prompt (base + optional suffix). */
874
+ private readonly orchestratorSystem;
654
875
  /** Orchestrator tool schemas (for the model) */
655
876
  private readonly orchestratorToolSchemas;
656
877
  /** Orchestrator tool registry (for execute) — excludes dispatch/done (control flow) */
657
878
  private readonly orchestratorToolRegistry;
879
+ /** ARC framework tools that are always available to workers. */
880
+ private readonly workerArcTools;
658
881
  /** Dispatcher deps + mutable state — shared with dispatcher.ts functions */
659
882
  private readonly dispatchDeps;
660
883
  private readonly dispatchState;
661
884
  private orchestratorMessageIndex;
662
885
  private turn;
663
886
  private maxTurns;
887
+ private readonly inFlightDispatches;
888
+ private readonly historySearchesThisTask;
889
+ private readonly historyReadsThisTask;
890
+ private readonly historyExpansionsThisTask;
891
+ private readonly historyExpansionSummaryCache;
892
+ private lastSessionCheckpointAt;
664
893
  /** Per-turn abort controller — cancelled by interrupt(), refreshed each turn. */
665
894
  private turnController;
666
895
  /** Resolver for the next task — set when the loop is waiting between tasks. */
667
896
  private taskResolve;
897
+ private pendingInterjections;
898
+ /** Image attachments for the currently active task. Injected into orchestrator prompts. */
899
+ private currentTaskAttachments;
900
+ /** Consecutive dispatch rounds where every dispatch ended in failed/incomplete. Forces done at threshold. */
901
+ private consecutiveFailedDispatchRounds;
902
+ /** Turn refunds spent on env-only failed rounds within the current task. Bounded by MAX_ENV_TIMEOUT_REFUNDS. */
903
+ private envTimeoutRefundsThisTask;
904
+ private askUserPending;
905
+ private readonly modelCallTimeoutMs;
668
906
  constructor(config: ArcConfig);
907
+ /**
908
+ * Replace agent-provided worker tools for future dispatches.
909
+ * Already-running workers keep the tool set they started with.
910
+ */
911
+ refreshWorkerTools(tools: Map<string, Tool>): void;
912
+ /** Drain events from asynchronous sources (job registry, interjections) into the generator stream. */
913
+ private drainPendingArcEvents;
914
+ private drainPendingJobEvents;
915
+ private drainPendingInterjections;
916
+ /** Release subscriptions and free resources held by this loop. */
917
+ dispose(): void;
669
918
  /**
670
919
  * Interrupt the current turn — cancels in-flight model calls and workers.
671
920
  * The orchestrator loop stays alive and will prompt for user steering.
672
921
  */
673
922
  interrupt(): void;
923
+ private refreshTurnSignal;
674
924
  /** True when the loop is waiting for the next task (between done boundaries). */
675
925
  get idle(): boolean;
926
+ /** Serialize current loop state for UI-level checkpoints. */
927
+ getSessionSnapshot(): SessionSnapshot;
928
+ /** Restore loop state from a prior UI-level checkpoint. */
929
+ restoreSessionSnapshot(snapshot: SessionSnapshot | null): void;
676
930
  /**
677
- * Push a follow-up task into the loop. The orchestrator sees it as
678
- * a new user message with full conversational context from prior tasks.
931
+ * Deliver a message to the orchestrator regardless of loop state.
932
+ * - Idle (waiting for next task): resolves waitForNextTask becomes next task.
933
+ * - Running: queued as [user interjection], drained at next turn boundary.
934
+ * - Inside askUser: dropped; caller should resolve the askUser promise directly.
679
935
  */
680
- pushTask(task: string): boolean;
936
+ pushTask(msg: string, options?: PushTaskOptions): PushResult;
937
+ private wrappedAskUser;
681
938
  private waitForNextTask;
682
939
  /** Save session snapshot + update meta if a session store is configured. */
683
940
  private saveSession;
941
+ private buildSessionSnapshot;
942
+ private checkpointSession;
943
+ private checkpointAfterDispatchEvent;
944
+ private clearCompletedInFlightDispatches;
684
945
  /** Reset per-task state while keeping full conversation history. */
685
946
  private resetForNewTask;
686
947
  /**
@@ -692,17 +953,36 @@ declare class ArcLoop {
692
953
  * Breaks after the first `done` event — does not wait for follow-up tasks.
693
954
  */
694
955
  run(signal?: AbortSignal): Promise<ArcRunResult>;
956
+ private runDispatchCalls;
957
+ private continueStepBudgetDispatches;
958
+ private buildContinuationDispatchArgs;
959
+ private completeCurrentTask;
695
960
  /** Append a message to the LCM message store (single source of truth) */
696
961
  private appendOrchestratorMessage;
697
962
  private findEpisodeRecordBySummaryId;
963
+ private searchHistoryWithGuardrails;
964
+ private readHistoryWithGuardrails;
965
+ private expandHistoryWithGuardrails;
966
+ private summarizeHistoryExpansion;
698
967
  private buildContext;
699
968
  private buildOrchestratorMessages;
700
969
  private buildTaskContextText;
970
+ private buildAttachmentMessage;
701
971
  private readEpisode;
702
972
  /**
703
- * Handle a turn interrupt: prompt user for steering, inject into context.
973
+ * Handle a turn interrupt without routing every follow-up through the
974
+ * orchestrator. Status questions are answered from the saved worker record;
975
+ * steering/continue become an internal worker continuation.
704
976
  */
705
977
  private handleInterrupt;
978
+ /**
979
+ * Ask the user whether to keep retrying after MAX_ENV_TIMEOUT_REFUNDS
980
+ * consecutive model-layer failures, or stop so they can give fresh
981
+ * instructions. Returns "retry" or "stop". Falls back to "stop" when no
982
+ * askUser callback is configured (e.g. headless runs).
983
+ */
984
+ private askRetryOrStop;
985
+ private formatInterruptStatus;
706
986
  }
707
987
 
708
988
  /**
@@ -720,6 +1000,73 @@ declare function formatDispatchForPrompt(record: DispatchRecord, options?: {
720
1000
 
721
1001
  declare function cloneForTrace<T>(value: T): T;
722
1002
 
1003
+ /** Default budget for dispatches that do not request a tier. */
1004
+ declare const DEFAULT_MAX_STEPS_PER_WORKER = 30;
1005
+ /** Per-tier initial budgets. Strong is intentionally larger for implementation loops. */
1006
+ declare const DEFAULT_WORKER_STEP_BUDGETS: Record<DispatchTier, number>;
1007
+ /** Hard ceiling on worker steps after RequestMoreSteps extensions. */
1008
+ declare const ABSOLUTE_MAX_WORKER_STEPS = 60;
1009
+ /** Recommended extension size when workers need to keep their current context. */
1010
+ declare const REQUEST_MORE_STEPS_INCREMENT = 15;
1011
+
1012
+ interface Episode {
1013
+ id: string;
1014
+ taskId: string;
1015
+ sessionId: string;
1016
+ tupleId?: string;
1017
+ summary: string;
1018
+ createdAt: number;
1019
+ completedAt?: number;
1020
+ /** Arbitrary metadata for routing/display */
1021
+ metadata?: Record<string, unknown>;
1022
+ }
1023
+ interface EpisodeTrace {
1024
+ episodeId: string;
1025
+ messages: Array<{
1026
+ role: string;
1027
+ content: string;
1028
+ [key: string]: unknown;
1029
+ }>;
1030
+ createdAt: number;
1031
+ }
1032
+ interface EpisodeStore {
1033
+ addEpisode(episode: Episode): Promise<void>;
1034
+ addTrace(trace: EpisodeTrace): Promise<void>;
1035
+ getEpisode(id: string): Promise<Episode | null>;
1036
+ getTrace(episodeId: string): Promise<EpisodeTrace | null>;
1037
+ getEpisodesByTask(taskId: string): Promise<Episode[]>;
1038
+ getEpisodesBySession(sessionId: string): Promise<Episode[]>;
1039
+ getRecentEpisodes(limit: number): Promise<Episode[]>;
1040
+ evictTraces(olderThan: number): Promise<number>;
1041
+ }
1042
+ interface SessionMemo {
1043
+ id: string;
1044
+ sessionId: string;
1045
+ content: string;
1046
+ sourceEpisodeIds?: string[];
1047
+ createdAt: number;
1048
+ }
1049
+ interface SessionMemoStore {
1050
+ addMemo(memo: SessionMemo): Promise<void>;
1051
+ getMemo(id: string): Promise<SessionMemo | null>;
1052
+ getMemosBySession(sessionId: string): Promise<SessionMemo[]>;
1053
+ getRecentMemos(limit: number): Promise<SessionMemo[]>;
1054
+ }
1055
+ interface LongTermMemory {
1056
+ id: string;
1057
+ content: string;
1058
+ category: string;
1059
+ createdAt: number;
1060
+ updatedAt: number;
1061
+ }
1062
+ interface LongTermStore {
1063
+ addMemory(memory: LongTermMemory): Promise<void>;
1064
+ getMemory(id: string): Promise<LongTermMemory | null>;
1065
+ getAllMemories(): Promise<LongTermMemory[]>;
1066
+ getMemoriesByCategory(category: string): Promise<LongTermMemory[]>;
1067
+ updateMemory(id: string, updates: Partial<Pick<LongTermMemory, 'content' | 'category' | 'updatedAt'>>): Promise<void>;
1068
+ deleteMemory(id: string): Promise<void>;
1069
+ }
723
1070
  /** In-memory transcript store for testing */
724
1071
  declare class MemoryTranscriptStore implements TranscriptStore {
725
1072
  private transcripts;
@@ -735,6 +1082,7 @@ declare class MemoryVectorIndex implements VectorIndex {
735
1082
  search(query: string, k: number): Promise<string[]>;
736
1083
  load(): Promise<void>;
737
1084
  save(): Promise<void>;
1085
+ size(): Promise<number>;
738
1086
  }
739
1087
  /** In-memory scratch pad for testing */
740
1088
  declare class MemoryScratchPad implements ScratchPad {
@@ -761,6 +1109,37 @@ declare class MemorySessionStore implements SessionStore {
761
1109
  saveMeta(id: string, meta: SessionMeta): Promise<void>;
762
1110
  list(): Promise<SessionMeta[]>;
763
1111
  }
1112
+ /** In-memory episode store */
1113
+ declare class InMemoryEpisodeStore implements EpisodeStore {
1114
+ private episodes;
1115
+ private traces;
1116
+ addEpisode(episode: Episode): Promise<void>;
1117
+ addTrace(trace: EpisodeTrace): Promise<void>;
1118
+ getEpisode(id: string): Promise<Episode | null>;
1119
+ getTrace(episodeId: string): Promise<EpisodeTrace | null>;
1120
+ getEpisodesByTask(taskId: string): Promise<Episode[]>;
1121
+ getEpisodesBySession(sessionId: string): Promise<Episode[]>;
1122
+ getRecentEpisodes(limit: number): Promise<Episode[]>;
1123
+ evictTraces(_olderThan: number): Promise<number>;
1124
+ }
1125
+ /** In-memory session memo store */
1126
+ declare class InMemorySessionMemoStore implements SessionMemoStore {
1127
+ private memos;
1128
+ addMemo(memo: SessionMemo): Promise<void>;
1129
+ getMemo(id: string): Promise<SessionMemo | null>;
1130
+ getMemosBySession(sessionId: string): Promise<SessionMemo[]>;
1131
+ getRecentMemos(limit: number): Promise<SessionMemo[]>;
1132
+ }
1133
+ /** In-memory long-term store */
1134
+ declare class InMemoryLongTermStore implements LongTermStore {
1135
+ private memories;
1136
+ addMemory(memory: LongTermMemory): Promise<void>;
1137
+ getMemory(id: string): Promise<LongTermMemory | null>;
1138
+ getAllMemories(): Promise<LongTermMemory[]>;
1139
+ getMemoriesByCategory(category: string): Promise<LongTermMemory[]>;
1140
+ updateMemory(id: string, updates: Partial<Pick<LongTermMemory, 'content' | 'category' | 'updatedAt'>>): Promise<void>;
1141
+ deleteMemory(id: string): Promise<void>;
1142
+ }
764
1143
 
765
1144
  /**
766
1145
  * File-based transcript store.
@@ -793,4 +1172,192 @@ declare class FsArtifactStore implements ArtifactStore {
793
1172
  private save;
794
1173
  }
795
1174
 
796
- export { AnyTool, type ArcConfig, type ArcEvent, ArcLoop, type ArcRunResult, type ArcTraceEvent, type Artifact, type ArtifactStore, type DispatchRecord, type DispatchTier, type ExpectedArtifact, type ExpectedOutputContract, FsArtifactStore, FsTranscriptStore, MemoryArtifactStore, MemoryMessageStore, MemoryScratchPad, MemorySessionStore, MemorySummaryDAG, MemoryTranscriptStore, MemoryVectorIndex, type MessageStore, ModelFactory, type OodaSnapshot, type OrchestratorContext, type ReadEpisodeArgs, type ReadEpisodeDetail, type RunWorkerConfig, type ScratchPad, type SessionMeta, type SessionSnapshot, type SessionStore, type StoredMessage, type SummaryDAG, type SummaryNode, type Tool, ToolProvider, ToolResult, ToolResultArtifact, type TraceToolCall, type Transcript, type TranscriptStore, type Tuple, type VectorIndex, type WorkerProgressEvent, type WorkerResult, cloneForTrace, formatDispatchForPrompt };
1175
+ /**
1176
+ * CompositeToolProvider delegates to the first provider with matching capabilities.
1177
+ *
1178
+ * Useful when multiple tool providers exist (e.g., a sandbox executor + a local
1179
+ * file-system executor) and you want to route tool calls based on capability.
1180
+ */
1181
+ declare class CompositeToolProvider implements ToolProvider {
1182
+ private providers;
1183
+ constructor(providers: ToolProvider[]);
1184
+ capabilities(): ToolProviderCapabilities;
1185
+ /** Pick the first provider that supports the requested capability. */
1186
+ private pick;
1187
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
1188
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
1189
+ writeFile(path: string, content: string): Promise<ToolResult>;
1190
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
1191
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
1192
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
1193
+ }
1194
+
1195
+ /**
1196
+ * Executor interface for sandboxed code execution (e.g., E2B sandbox).
1197
+ *
1198
+ * Implementations wrap sandbox APIs into a uniform shape that
1199
+ * E2BToolProvider can delegate to.
1200
+ */
1201
+ interface E2BExecutor {
1202
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
1203
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
1204
+ writeFile(path: string, content: string): Promise<ToolResult>;
1205
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
1206
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
1207
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
1208
+ batch?(ops: BatchOp[]): Promise<BatchResult[]>;
1209
+ destroy(): Promise<void>;
1210
+ }
1211
+ /**
1212
+ * E2BToolProvider wraps an E2BExecutor into a ToolProvider interface
1213
+ * suitable for use with the harness agent loop.
1214
+ */
1215
+ declare class E2BToolProvider {
1216
+ private executor;
1217
+ constructor(executor: E2BExecutor);
1218
+ capabilities(): ToolProviderCapabilities;
1219
+ bash(command: string, options?: BashOptions): Promise<ToolResult>;
1220
+ readFile(path: string, options?: ReadOptions): Promise<ToolResult>;
1221
+ writeFile(path: string, content: string): Promise<ToolResult>;
1222
+ editFile(path: string, oldText: string, newText: string): Promise<ToolResult>;
1223
+ glob(pattern: string, options?: GlobOptions): Promise<ToolResult>;
1224
+ grep(pattern: string, path?: string, options?: GrepOptions): Promise<ToolResult>;
1225
+ }
1226
+
1227
+ /** Options for constructing a ControlPlaneE2BExecutor. */
1228
+ interface ControlPlaneE2BExecutorOptions {
1229
+ baseUrl: string;
1230
+ apiKey: string;
1231
+ templateId?: string;
1232
+ }
1233
+ /**
1234
+ * Base class for control-plane E2B sandbox executors.
1235
+ *
1236
+ * Provides the configuration surface and stubs that concrete implementations
1237
+ * (e.g., Samyx control-plane API) extend. Subclasses must override the
1238
+ * tool-execution methods with actual sandbox API calls.
1239
+ */
1240
+ declare class ControlPlaneE2BExecutor implements E2BExecutor {
1241
+ protected baseUrl: string;
1242
+ protected apiKey: string;
1243
+ protected templateId: string;
1244
+ constructor(options: ControlPlaneE2BExecutorOptions);
1245
+ bash(_command: string, _options?: BashOptions): Promise<ToolResult>;
1246
+ readFile(_path: string, _options?: ReadOptions): Promise<ToolResult>;
1247
+ writeFile(_path: string, _content: string): Promise<ToolResult>;
1248
+ editFile(_path: string, _oldText: string, _newText: string): Promise<ToolResult>;
1249
+ glob(_pattern: string, _options?: GlobOptions): Promise<ToolResult>;
1250
+ grep(_pattern: string, _path?: string, _options?: GrepOptions): Promise<ToolResult>;
1251
+ writeFileBytes(_path: string, _data: Uint8Array): Promise<void>;
1252
+ readFileBytes(_path: string): Promise<Uint8Array>;
1253
+ get activeSandboxId(): string | undefined;
1254
+ destroy(): Promise<void>;
1255
+ }
1256
+
1257
+ /**
1258
+ * Permission request sent to the resolver for each tool call.
1259
+ *
1260
+ * The `toolName` key is always present. Additional tool arguments
1261
+ * are forwarded as-is for context-sensitive permission decisions.
1262
+ */
1263
+ interface PermissionRequest {
1264
+ toolName: string;
1265
+ [toolArg: string]: unknown;
1266
+ }
1267
+ /** Resolver function that decides whether a tool call is permitted. */
1268
+ type PermissionResolver = (request: PermissionRequest) => Promise<boolean>;
1269
+ /** Permission mode for the manager. */
1270
+ type PermissionMode = "deny_all" | "allow_all" | "ask";
1271
+ /**
1272
+ * PermissionManager controls whether tool calls are allowed.
1273
+ *
1274
+ * Three modes:
1275
+ * - `deny_all`: reject everything
1276
+ * - `allow_all`: accept everything
1277
+ * - `ask`: delegate to a resolver function
1278
+ */
1279
+ declare class PermissionManager {
1280
+ private mode;
1281
+ private resolver;
1282
+ constructor(mode: PermissionMode, resolver?: PermissionResolver);
1283
+ /** Check whether a tool call is permitted. */
1284
+ canExecute(request: PermissionRequest): Promise<boolean>;
1285
+ }
1286
+
1287
+ interface ExecutionContext {
1288
+ attempt: number;
1289
+ totalAttempts: number;
1290
+ startTime: number;
1291
+ signal: AbortSignal;
1292
+ }
1293
+ interface PipelineOptions {
1294
+ timeout?: number;
1295
+ retryCount?: number;
1296
+ }
1297
+ declare class ResiliencePipeline {
1298
+ private options;
1299
+ constructor(options?: PipelineOptions);
1300
+ /** Set a timeout in milliseconds for execution. */
1301
+ timeout(ms: number): ResiliencePipeline;
1302
+ /** Set retry count. */
1303
+ retries(count: number): ResiliencePipeline;
1304
+ /** Build the executable pipeline. */
1305
+ build(): {
1306
+ execute: <T>(fn: () => Promise<T>, ctx: ExecutionContext) => Promise<T>;
1307
+ };
1308
+ }
1309
+ /** Create a new resilience pipeline builder. */
1310
+ declare function resilience(): ResiliencePipeline;
1311
+
1312
+ /**
1313
+ * Profile types for ArcLoop thread configuration.
1314
+ *
1315
+ * Profiles control which tools a worker thread can use and which
1316
+ * model tier it runs at.
1317
+ */
1318
+ /** Declaration of a worker profile — matched by name in process context. */
1319
+ interface ProfileDeclaration {
1320
+ name: string;
1321
+ /** Descriptive signature for the orchestrator (e.g., "question:string -> evidence:string[]") */
1322
+ signature: string;
1323
+ /** Tool names available to workers with this profile */
1324
+ tools: string[];
1325
+ /** Worker model tier */
1326
+ model: "fast" | "medium" | "strong";
1327
+ /** Worker step budget */
1328
+ maxSteps: number;
1329
+ /** Background/injection context for the worker system prompt */
1330
+ background: string;
1331
+ }
1332
+ /** A profile bound to a process, with its declaration. */
1333
+ interface ProcessProfile {
1334
+ name: string;
1335
+ declaration: ProfileDeclaration;
1336
+ }
1337
+
1338
+ /**
1339
+ * In-memory reference implementation of JobRegistry.
1340
+ *
1341
+ * Jobs here never actually spawn processes — callers use `simulateExit` /
1342
+ * `simulateOutput` / `simulateFailure` to drive state. Intended for tests
1343
+ * and agents that don't need filesystem-backed persistence.
1344
+ */
1345
+ declare class MemoryJobRegistry implements JobRegistry {
1346
+ private jobs;
1347
+ private handlers;
1348
+ private seq;
1349
+ start(opts: JobStartOptions, startedBy?: JobSpec["startedBy"]): Promise<JobStatus>;
1350
+ check(id: string): Promise<JobStatus | null>;
1351
+ cancel(id: string, _signal?: "TERM" | "KILL"): Promise<JobStatus | null>;
1352
+ list(): Promise<JobStatus[]>;
1353
+ tail(id: string, lines?: number): Promise<string[]>;
1354
+ waitFor(id: string, signal?: AbortSignal): Promise<JobStatus>;
1355
+ subscribe(handler: (e: JobEvent) => void): () => void;
1356
+ snapshot(): JobStatus[];
1357
+ prune(_olderThanMs?: number): Promise<number>;
1358
+ simulateOutput(id: string, chunk: string): void;
1359
+ simulateExit(id: string, exitCode: number): void;
1360
+ private emit;
1361
+ }
1362
+
1363
+ export { ABSOLUTE_MAX_WORKER_STEPS, AnyTool, type ArcConfig, type ArcEvent, ArcLoop, type ArcRunResult, type ArcTraceEvent, type Artifact, type ArtifactStore, BashOptions, BatchOp, BatchResult, CompositeToolProvider, ControlPlaneE2BExecutor, type ControlPlaneE2BExecutorOptions, DEFAULT_MAX_STEPS_PER_WORKER, DEFAULT_WORKER_STEP_BUDGETS, type DispatchRecord, type DispatchTier, type E2BExecutor, E2BToolProvider, type Episode, type EpisodeStore, type EpisodeTrace, type ExecutionContext, type ExpectedArtifact, type ExpectedOutputContract, FsArtifactStore, FsTranscriptStore, GlobOptions, GrepOptions, HarnessTelemetry, type HookCallback, type HookContext, type HookDecision, type HookEventName, HookRunner, InMemoryEpisodeStore, InMemoryLongTermStore, InMemorySessionMemoStore, type JobEvent, type JobKind, type JobRegistry, type JobSpec, type JobStartOptions, type JobState, type JobStatus, type JobTransport, type LongTermMemory, type LongTermStore, MemoryArtifactStore, MemoryJobRegistry, MemoryMessageStore, MemoryScratchPad, MemorySessionStore, MemorySummaryDAG, MemoryTranscriptStore, MemoryVectorIndex, type MessageStore, ModelFactory, type OrchestratorContext, PermissionManager, type PermissionMode, type PermissionRequest, type PermissionResolver, type ProcessProfile, type ProfileDeclaration, type PushResult, REQUEST_MORE_STEPS_INCREMENT, type ReadEpisodeArgs, type ReadEpisodeDetail, ReadOptions, type RunWorkerConfig, type ScratchPad, type SessionMemo, type SessionMemoStore, type SessionMeta, type SessionSnapshot, type SessionStore, type StoredAttachment, type StoredMessage, type SummaryDAG, type SummaryNode, type Tool, type ToolExecutionMode, ToolProvider, ToolProviderCapabilities, ToolResult, ToolResultArtifact, type TraceToolCall, type Transcript, type TranscriptStore, type Tuple, type VectorIndex, type WorkerProgressEvent, type WorkerResult, cloneForTrace, formatDispatchForPrompt, resilience };