react-agent-harness 0.0.1__tar.gz → 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {react_agent_harness-0.0.1/react_agent_harness.egg-info → react_agent_harness-0.0.2}/PKG-INFO +1 -1
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/README.md +93 -25
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/agents/base.py +104 -26
- react_agent_harness-0.0.2/harness/checkpoint.py +165 -0
- react_agent_harness-0.0.2/harness/hitl.py +195 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/runtime.py +175 -22
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/orchestrator/planner.py +117 -12
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/pyproject.toml +1 -1
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2/react_agent_harness.egg-info}/PKG-INFO +1 -1
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/react_agent_harness.egg-info/SOURCES.txt +2 -0
- react_agent_harness-0.0.2/tests/test_checkpoint_resume.py +1119 -0
- react_agent_harness-0.0.1/harness/hitl.py +0 -282
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/LICENSE +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/agents/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/annotation.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/events.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/executor_bridge.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/llm/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/llm/openai.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/otel.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/harness/utils.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/memory/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/memory/episodic_lance.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/memory/manager.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/memory/redis_store.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/memory/stores.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/memory/working.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/orchestrator/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/react_agent_harness.egg-info/dependency_links.txt +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/react_agent_harness.egg-info/requires.txt +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/react_agent_harness.egg-info/top_level.txt +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/setup.cfg +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_agents_base.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_annotation.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_executor_bridge.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_http_fetch.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_mcp_adapter.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_memory.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_openai_llm.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_orchestrator.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_otel.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_parse_action_json.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_redis_store.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_streaming.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_vision.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tests/test_working_memory.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tools/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tools/builtin/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tools/builtin/fetch_image.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tools/builtin/http_fetch.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tools/mcp/__init__.py +0 -0
- {react_agent_harness-0.0.1 → react_agent_harness-0.0.2}/tools/mcp/adapter.py +0 -0
|
@@ -37,7 +37,8 @@ harness/runtime.py AgentRuntime — single entry point, wire once run a
|
|
|
37
37
|
harness/events.py BusEvent + EventType — canonical event vocabulary
|
|
38
38
|
harness/llm/openai.py OpenAILLM — OpenAI adapter with usage + cost tracking
|
|
39
39
|
harness/annotation.py Annotation store + AnnotationHook — RLHF trajectory capture
|
|
40
|
-
harness/hitl.py HITL approval gate — interactive CLI,
|
|
40
|
+
harness/hitl.py HITL approval gate — interactive CLI, session-allow list
|
|
41
|
+
harness/checkpoint.py CheckpointStore + _ResumeHint + maybe_resume_key — pluggable run-state persistence (file + Redis); auto-resume built into dispatch_stream / run_stream
|
|
41
42
|
harness/otel.py OTELHook — OpenTelemetry span exporter (opt-in)
|
|
42
43
|
harness/executor_bridge.py ExecutorBridge + ExecutorTool — controlled subprocess launcher with optional Docker sandboxing
|
|
43
44
|
orchestrator/planner.py Hybrid DAG orchestrator — plan, replan, synthesize
|
|
@@ -564,7 +565,7 @@ agents.register(AgentConfig(
|
|
|
564
565
|
hitl_tools=["write_file", "delete_file"], # these two require human approval
|
|
565
566
|
))
|
|
566
567
|
|
|
567
|
-
# AgentRuntime auto-creates a
|
|
568
|
+
# AgentRuntime auto-creates a FileCheckpointStore when hitl_tools are present.
|
|
568
569
|
runtime = AgentRuntime(...)
|
|
569
570
|
await runtime.run_agent("file_agent", "clean up the logs directory")
|
|
570
571
|
```
|
|
@@ -573,19 +574,19 @@ Checkpoints are written to `~/.agent-harness/checkpoints/` by default.
|
|
|
573
574
|
Override the directory:
|
|
574
575
|
|
|
575
576
|
```python
|
|
576
|
-
from harness.
|
|
577
|
+
from harness.checkpoint import FileCheckpointStore
|
|
577
578
|
|
|
578
|
-
runtime = AgentRuntime(...,
|
|
579
|
+
runtime = AgentRuntime(..., checkpoint_store=FileCheckpointStore("/var/lib/myapp/ckp"))
|
|
579
580
|
```
|
|
580
581
|
|
|
581
582
|
For Redis-backed storage (shared across processes or machines):
|
|
582
583
|
|
|
583
584
|
```python
|
|
584
585
|
import redis.asyncio as aioredis
|
|
585
|
-
from harness.
|
|
586
|
+
from harness.checkpoint import RedisCheckpointStore
|
|
586
587
|
|
|
587
588
|
client = aioredis.from_url("redis://localhost:6379", decode_responses=True)
|
|
588
|
-
runtime = AgentRuntime(...,
|
|
589
|
+
runtime = AgentRuntime(..., checkpoint_store=RedisCheckpointStore(client))
|
|
589
590
|
```
|
|
590
591
|
|
|
591
592
|
When the agent calls `write_file` or `delete_file` a prompt appears:
|
|
@@ -597,46 +598,112 @@ When the agent calls `write_file` or `delete_file` a prompt appears:
|
|
|
597
598
|
Tool: delete_file
|
|
598
599
|
Args: {"path": "/var/log/app.log"}
|
|
599
600
|
Agent: file_agent step=2
|
|
600
|
-
Run: 3f7a1b2c
|
|
601
|
+
Run: 3f7a1b2c-...:file_agent
|
|
601
602
|
ID: a1b2-c3d4
|
|
602
603
|
────────────────────────────────────────────────────────────
|
|
603
|
-
|
|
604
|
+
y = approve once | a = allow 'delete_file' for session | n = reject | <text> = steer
|
|
605
|
+
Ctrl-C to pause. Resume: python my_script.py --resume 3f7a1b2c-...:file_agent
|
|
606
|
+
────────────────────────────────────────────────────────────
|
|
607
|
+
Approve? [y/n/a/correction]:
|
|
604
608
|
```
|
|
605
609
|
|
|
606
610
|
**Prompt semantics:**
|
|
607
611
|
|
|
608
612
|
| Input | Effect |
|
|
609
613
|
|---|---|
|
|
610
|
-
| `y` / `yes` | Tool runs |
|
|
614
|
+
| `y` / `yes` | Tool runs once |
|
|
611
615
|
| `n` / `no` | Tool skipped; agent sees a rejection observation |
|
|
616
|
+
| `a` / `allow` | Tool runs **and** added to session allow-list; no further prompts for this tool (or command prefix for shell-like tools) |
|
|
612
617
|
| any other text | Correction: tool skipped, text injected into `WorkingMemory` as a user message; LLM self-corrects on the next step |
|
|
613
618
|
|
|
619
|
+
For shell-like tools (`shell`, `bash`, `run`, `exec`), `a` allows the **first
|
|
620
|
+
word** of the command — e.g. typing `a` when approving `shell git commit ...`
|
|
621
|
+
allows all `git` commands for the session but still prompts for `shell rm ...`.
|
|
622
|
+
|
|
614
623
|
**Wall-time budget** is suspended while waiting for input — human think-time
|
|
615
624
|
does not count against `max_wall_time_seconds`.
|
|
616
625
|
|
|
626
|
+
### Step-level checkpointing
|
|
627
|
+
|
|
628
|
+
Enable periodic crash-resume independent of HITL:
|
|
629
|
+
|
|
630
|
+
```python
|
|
631
|
+
AgentConfig(
|
|
632
|
+
agent_id="long_runner",
|
|
633
|
+
...
|
|
634
|
+
checkpoint_every=3, # checkpoint before every 3rd step (0 = disabled)
|
|
635
|
+
)
|
|
636
|
+
```
|
|
637
|
+
|
|
638
|
+
The same `CheckpointStore` is used for both HITL and step checkpoints. Resume
|
|
639
|
+
works with `runtime.resume(key)` regardless of how the checkpoint was created.
|
|
640
|
+
|
|
641
|
+
### Checkpoint namespacing
|
|
642
|
+
|
|
643
|
+
Each agent writes to its own key so orchestrated runs never overwrite each other:
|
|
644
|
+
|
|
645
|
+
| Path | Checkpoint key | Stored at |
|
|
646
|
+
|---|---|---|
|
|
647
|
+
| Single-agent (`run_agent`, `run_routed`) | `<run_id>:<agent_id>` | `~/.agent-harness/checkpoints/<run_id>:<agent_id>.json` |
|
|
648
|
+
| Orchestrated (`run`, `run_stream`) | `<run_id>` (orchestrator) + `<run_id>:<agent_id>` (each agent) | one file per agent, one file for the orchestrator |
|
|
649
|
+
|
|
650
|
+
The orchestrator checkpoint stores the goal, the full plan, completed task
|
|
651
|
+
results, and the replan count. It is updated after each parallel batch
|
|
652
|
+
completes and deleted on clean `DONE`.
|
|
653
|
+
|
|
617
654
|
### Crash / Ctrl-C resume
|
|
618
655
|
|
|
619
|
-
The
|
|
620
|
-
|
|
656
|
+
The checkpoint (step number + full `WorkingMemory`) is written before every
|
|
657
|
+
HITL prompt and (if `checkpoint_every > 0`) at each periodic step.
|
|
658
|
+
|
|
659
|
+
**What the banner prints:**
|
|
621
660
|
|
|
661
|
+
- **Single-agent run**: `--resume <run_id>:<agent_id>` — restores just that agent.
|
|
662
|
+
- **Orchestrated run**: `--resume <run_id>` — restores the full orchestration.
|
|
663
|
+
|
|
664
|
+
```
|
|
665
|
+
Run interrupted — checkpoint saved.
|
|
666
|
+
Resume: python my_script.py --resume 3f7a1b2c-...
|
|
622
667
|
```
|
|
623
|
-
|
|
668
|
+
|
|
669
|
+
**Auto-resume — no script changes required.** When `checkpoint_store` is
|
|
670
|
+
configured, `dispatch_stream` and `run_stream` detect `--resume <key>` in
|
|
671
|
+
`sys.argv` automatically. Your existing script resumes transparently:
|
|
672
|
+
|
|
673
|
+
```bash
|
|
674
|
+
python my_script.py --resume 3f7a1b2c-...
|
|
624
675
|
```
|
|
625
676
|
|
|
626
|
-
|
|
677
|
+
The runtime detects the flag, loads the checkpoint, and streams events
|
|
678
|
+
identically to a fresh run. Scripts need zero resume-specific code.
|
|
679
|
+
|
|
680
|
+
For **explicit control** — streaming resume or blocking resume:
|
|
627
681
|
|
|
628
682
|
```python
|
|
629
|
-
|
|
683
|
+
# streaming (same event sequence as the original run)
|
|
684
|
+
async for event in runtime.resume_stream("3f7a1b2c-..."):
|
|
685
|
+
...
|
|
630
686
|
|
|
631
|
-
|
|
687
|
+
# blocking
|
|
688
|
+
result = await runtime.resume("3f7a1b2c-...:file_agent") # single-agent
|
|
689
|
+
result = await runtime.resume("3f7a1b2c-...") # orchestrated
|
|
632
690
|
```
|
|
633
691
|
|
|
634
|
-
`
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
692
|
+
Both `resume_stream` and `resume` auto-detect the checkpoint type (agent vs
|
|
693
|
+
orchestrator) from the stored data and call the right path.
|
|
694
|
+
|
|
695
|
+
If you need the resume key from `sys.argv` directly:
|
|
696
|
+
|
|
697
|
+
```python
|
|
698
|
+
from harness.checkpoint import maybe_resume_key
|
|
699
|
+
|
|
700
|
+
key = maybe_resume_key() # returns None if --resume is absent
|
|
701
|
+
```
|
|
638
702
|
|
|
639
|
-
|
|
703
|
+
**Orchestrated resume** skips completed tasks (injects their stored results
|
|
704
|
+
directly into the synthesis step) and re-runs only the tasks that had not yet
|
|
705
|
+
finished. If an individual agent's HITL checkpoint is still on disk, that agent
|
|
706
|
+
is resumed at its saved step rather than re-run from scratch.
|
|
640
707
|
|
|
641
708
|
### Correction steering and replanning
|
|
642
709
|
|
|
@@ -646,9 +713,10 @@ When the human types a correction instead of y/n:
|
|
|
646
713
|
`WorkingMemory`. The LLM sees it on the next think step and self-corrects
|
|
647
714
|
without replanning. Suitable for redirecting tool choice or adjusting
|
|
648
715
|
parameters.
|
|
649
|
-
- **
|
|
650
|
-
|
|
651
|
-
|
|
716
|
+
- **Orchestrated run**: the correction steers only the current agent. Because
|
|
717
|
+
the orchestrator checkpoint records task results as they complete, a full
|
|
718
|
+
`runtime.resume(run_id)` after the agent finishes will continue the remaining
|
|
719
|
+
tasks with correct upstream context.
|
|
652
720
|
|
|
653
|
-
The `annotation_store` and `
|
|
654
|
-
simultaneously for RLHF data collection with HITL review.
|
|
721
|
+
The `annotation_store` and `checkpoint_store` are independent — both can be
|
|
722
|
+
wired simultaneously for RLHF data collection with HITL review.
|
|
@@ -35,6 +35,7 @@ from dataclasses import dataclass
|
|
|
35
35
|
from datetime import datetime, timezone
|
|
36
36
|
from typing import Any, Final
|
|
37
37
|
|
|
38
|
+
from harness.checkpoint import _ResumeHint
|
|
38
39
|
from harness.events import BusEvent, EventType
|
|
39
40
|
from harness.utils import fire
|
|
40
41
|
from memory.manager import MemoryManager
|
|
@@ -61,6 +62,7 @@ class AgentConfig:
|
|
|
61
62
|
confidence_from_llm: bool = True # if False, confidence=1.0 on success
|
|
62
63
|
working_memory_max_tokens: int = 8000 # WorkingMemory eviction threshold; tune per agent
|
|
63
64
|
hitl_tools: list[str] = None # tools requiring human approval; None = no HITL
|
|
65
|
+
checkpoint_every: int = 0 # write a resumable checkpoint every N steps; 0 = disabled
|
|
64
66
|
|
|
65
67
|
def __post_init__(self):
|
|
66
68
|
if self.hitl_tools is None:
|
|
@@ -129,7 +131,7 @@ class BaseAgent:
|
|
|
129
131
|
tracer,
|
|
130
132
|
guard,
|
|
131
133
|
llm,
|
|
132
|
-
|
|
134
|
+
checkpoint_store: Any | None = None, # FileCheckpointStore / RedisCheckpointStore
|
|
133
135
|
) -> None:
|
|
134
136
|
self.config = config
|
|
135
137
|
self.role = config.role # exposed for orchestrator planner prompt
|
|
@@ -138,10 +140,14 @@ class BaseAgent:
|
|
|
138
140
|
self._tracer = tracer
|
|
139
141
|
self._guard = guard
|
|
140
142
|
self._llm = llm
|
|
141
|
-
self.
|
|
143
|
+
self._checkpoint_store = checkpoint_store
|
|
142
144
|
self._working_memory: WorkingMemory | None = None
|
|
143
145
|
self._task: str = ""
|
|
144
146
|
self._last_think_error: str | None = None
|
|
147
|
+
self._ckp_id: str = "" # f"{run_id}:{agent_id}" — unique per agent per run
|
|
148
|
+
self._resume_key: str = (
|
|
149
|
+
"" # key printed in --resume banner; set by orchestrator to outer run_id
|
|
150
|
+
)
|
|
145
151
|
|
|
146
152
|
# ── Streaming entry point (canonical) ─────────────────────────────────────
|
|
147
153
|
|
|
@@ -151,6 +157,9 @@ class BaseAgent:
|
|
|
151
157
|
run_id: str | None = None,
|
|
152
158
|
) -> AsyncGenerator[BusEvent, None]:
|
|
153
159
|
run_id = run_id or str(uuid.uuid4())
|
|
160
|
+
self._ckp_id = f"{run_id}:{self.config.agent_id}"
|
|
161
|
+
if not self._resume_key:
|
|
162
|
+
self._resume_key = self._ckp_id
|
|
154
163
|
self._task = task
|
|
155
164
|
self._working_memory = WorkingMemory(
|
|
156
165
|
llm=self._llm,
|
|
@@ -161,8 +170,17 @@ class BaseAgent:
|
|
|
161
170
|
await self._working_memory.append("system", system, pinned=True)
|
|
162
171
|
await self._working_memory.append("user", task)
|
|
163
172
|
|
|
164
|
-
async
|
|
165
|
-
|
|
173
|
+
async with _ResumeHint(
|
|
174
|
+
self._resume_key,
|
|
175
|
+
self._checkpoint_store,
|
|
176
|
+
f"Agent {self.config.agent_id}",
|
|
177
|
+
check_key=self._ckp_id,
|
|
178
|
+
) as hint:
|
|
179
|
+
async for event in self._run_stream_internal(run_id):
|
|
180
|
+
if event.type == EventType.TASK_DONE:
|
|
181
|
+
await self._clear_checkpoint(run_id)
|
|
182
|
+
hint.done = True
|
|
183
|
+
yield event
|
|
166
184
|
|
|
167
185
|
async def _resume_stream(
|
|
168
186
|
self,
|
|
@@ -177,13 +195,25 @@ class BaseAgent:
|
|
|
177
195
|
The approval prompt is shown again; once the human responds the
|
|
178
196
|
tool runs (or the correction is injected) before the loop continues.
|
|
179
197
|
"""
|
|
198
|
+
self._ckp_id = f"{run_id}:{self.config.agent_id}"
|
|
199
|
+
if not self._resume_key:
|
|
200
|
+
self._resume_key = self._ckp_id
|
|
180
201
|
if pending:
|
|
181
202
|
async for event in self._replay_pending_step(run_id, pending):
|
|
182
203
|
yield event
|
|
183
204
|
start_step = pending["step"] + 1
|
|
184
205
|
|
|
185
|
-
async
|
|
186
|
-
|
|
206
|
+
async with _ResumeHint(
|
|
207
|
+
self._resume_key,
|
|
208
|
+
self._checkpoint_store,
|
|
209
|
+
f"Agent {self.config.agent_id}",
|
|
210
|
+
check_key=self._ckp_id,
|
|
211
|
+
) as hint:
|
|
212
|
+
async for event in self._run_stream_internal(run_id, start_step=start_step):
|
|
213
|
+
if event.type == EventType.TASK_DONE:
|
|
214
|
+
await self._clear_checkpoint(run_id)
|
|
215
|
+
hint.done = True
|
|
216
|
+
yield event
|
|
187
217
|
|
|
188
218
|
async def _run_stream_internal(
|
|
189
219
|
self,
|
|
@@ -246,11 +276,31 @@ class BaseAgent:
|
|
|
246
276
|
|
|
247
277
|
# ── ReAct Loop (stream) ───────────────────────────────────────────────────
|
|
248
278
|
|
|
279
|
+
async def _write_step_checkpoint(self, run_id: str, step: int) -> None:
|
|
280
|
+
if self._checkpoint_store is None:
|
|
281
|
+
return
|
|
282
|
+
await self._checkpoint_store.write(
|
|
283
|
+
self._ckp_id,
|
|
284
|
+
{
|
|
285
|
+
"run_id": run_id,
|
|
286
|
+
"agent_id": self.config.agent_id,
|
|
287
|
+
"task": self._task,
|
|
288
|
+
"step": step,
|
|
289
|
+
"memory": self._working_memory.to_dict(),
|
|
290
|
+
},
|
|
291
|
+
)
|
|
292
|
+
|
|
249
293
|
async def _react_stream(
|
|
250
294
|
self, run_id: str, start_step: int = 0
|
|
251
295
|
) -> AsyncGenerator[BusEvent, None]:
|
|
252
296
|
for step in range(start_step, self.config.max_steps):
|
|
253
297
|
self._guard.check()
|
|
298
|
+
if (
|
|
299
|
+
self._checkpoint_store is not None
|
|
300
|
+
and self.config.checkpoint_every > 0
|
|
301
|
+
and step % self.config.checkpoint_every == 0
|
|
302
|
+
):
|
|
303
|
+
await self._write_step_checkpoint(run_id, step)
|
|
254
304
|
|
|
255
305
|
# Think — yields TOKEN events when the LLM client supports streaming.
|
|
256
306
|
response = None
|
|
@@ -337,7 +387,9 @@ class BaseAgent:
|
|
|
337
387
|
if approval is None or approval.approved:
|
|
338
388
|
approved.append(act)
|
|
339
389
|
elif approval.correction:
|
|
340
|
-
await self._inject_human_guidance(
|
|
390
|
+
await self._inject_human_guidance(
|
|
391
|
+
response, approval.correction, run_id, step
|
|
392
|
+
)
|
|
341
393
|
correction_injected = True
|
|
342
394
|
break
|
|
343
395
|
# else: rejected — drop from batch silently
|
|
@@ -366,7 +418,7 @@ class BaseAgent:
|
|
|
366
418
|
for act in parallel_actions
|
|
367
419
|
]
|
|
368
420
|
)
|
|
369
|
-
await self.
|
|
421
|
+
await self._commit_checkpoint(run_id, step)
|
|
370
422
|
|
|
371
423
|
combined: list[dict] = []
|
|
372
424
|
for i, (act, obs) in enumerate(zip(parallel_actions, observations, strict=False)):
|
|
@@ -611,14 +663,17 @@ class BaseAgent:
|
|
|
611
663
|
Returns ApprovalResponse if the tool is gated, None if not.
|
|
612
664
|
Writes a crash-resumable checkpoint to the store before blocking on stdin.
|
|
613
665
|
"""
|
|
614
|
-
if not (self.
|
|
666
|
+
if not (self._checkpoint_store and tool_name in self.config.hitl_tools):
|
|
615
667
|
return None
|
|
616
668
|
|
|
617
|
-
from harness.hitl import ApprovalRequest, request_approval
|
|
669
|
+
from harness.hitl import ApprovalRequest, is_session_allowed, request_approval
|
|
670
|
+
|
|
671
|
+
if is_session_allowed(tool_name, tool_args):
|
|
672
|
+
return None # fast-path: human already allowed this tool/prefix for session
|
|
618
673
|
|
|
619
674
|
approval_id = str(uuid.uuid4())
|
|
620
|
-
await self.
|
|
621
|
-
|
|
675
|
+
await self._checkpoint_store.write(
|
|
676
|
+
self._ckp_id,
|
|
622
677
|
{
|
|
623
678
|
"run_id": run_id,
|
|
624
679
|
"agent_id": self.config.agent_id,
|
|
@@ -637,14 +692,13 @@ class BaseAgent:
|
|
|
637
692
|
return await request_approval(
|
|
638
693
|
ApprovalRequest(
|
|
639
694
|
approval_id=approval_id,
|
|
640
|
-
run_id=run_id
|
|
695
|
+
run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
|
|
641
696
|
agent_id=self.config.agent_id,
|
|
642
697
|
tool=tool_name,
|
|
643
698
|
args=tool_args,
|
|
644
699
|
step=step,
|
|
645
700
|
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
646
701
|
),
|
|
647
|
-
self._approval_store,
|
|
648
702
|
self._guard,
|
|
649
703
|
)
|
|
650
704
|
|
|
@@ -666,24 +720,49 @@ class BaseAgent:
|
|
|
666
720
|
approval = await self._gate_tool(run_id, step, tool_name, tool_args, response)
|
|
667
721
|
if approval is not None:
|
|
668
722
|
if approval.correction:
|
|
669
|
-
await self._inject_human_guidance(response, approval.correction, run_id)
|
|
723
|
+
await self._inject_human_guidance(response, approval.correction, run_id, step)
|
|
670
724
|
return _HITL_CORRECTION
|
|
671
725
|
if not approval.approved:
|
|
672
|
-
await self.
|
|
726
|
+
await self._commit_checkpoint(run_id, step)
|
|
673
727
|
return f"Tool rejected by human: {approval.correction or 'no reason given'}"
|
|
674
728
|
obs = await self._execute_tool(tool_name, tool_args)
|
|
675
|
-
|
|
729
|
+
if approval is not None:
|
|
730
|
+
# HITL was involved — overwrite pending checkpoint with clean state.
|
|
731
|
+
# Non-HITL tools leave the step checkpoint intact for the next iteration.
|
|
732
|
+
await self._commit_checkpoint(run_id, step)
|
|
676
733
|
return obs
|
|
677
734
|
|
|
678
|
-
async def _inject_human_guidance(
|
|
679
|
-
|
|
735
|
+
async def _inject_human_guidance(
|
|
736
|
+
self, response: dict, correction: str, run_id: str, step: int
|
|
737
|
+
) -> None:
|
|
738
|
+
"""Append human correction to WorkingMemory and commit a clean checkpoint."""
|
|
680
739
|
await self._working_memory.append("assistant", json.dumps(response))
|
|
681
740
|
await self._working_memory.append("user", f"Human guidance: {correction}")
|
|
682
|
-
await self.
|
|
741
|
+
await self._commit_checkpoint(run_id, step)
|
|
742
|
+
|
|
743
|
+
async def _commit_checkpoint(self, run_id: str, step: int) -> None:
|
|
744
|
+
"""Overwrite checkpoint with current state (no pending field).
|
|
745
|
+
|
|
746
|
+
Called after HITL resolves or a tool completes so the stored state
|
|
747
|
+
always reflects reality — no stale 'pending' approval marker, and
|
|
748
|
+
the step position is preserved for crash-resume.
|
|
749
|
+
"""
|
|
750
|
+
if self._checkpoint_store is None:
|
|
751
|
+
return
|
|
752
|
+
await self._checkpoint_store.write(
|
|
753
|
+
self._ckp_id,
|
|
754
|
+
{
|
|
755
|
+
"run_id": run_id,
|
|
756
|
+
"agent_id": self.config.agent_id,
|
|
757
|
+
"task": self._task,
|
|
758
|
+
"step": step,
|
|
759
|
+
"memory": self._working_memory.to_dict(),
|
|
760
|
+
},
|
|
761
|
+
)
|
|
683
762
|
|
|
684
763
|
async def _clear_checkpoint(self, run_id: str) -> None:
|
|
685
|
-
if self.
|
|
686
|
-
await self.
|
|
764
|
+
if self._checkpoint_store:
|
|
765
|
+
await self._checkpoint_store.delete(self._ckp_id)
|
|
687
766
|
|
|
688
767
|
async def _replay_pending_step(
|
|
689
768
|
self,
|
|
@@ -701,19 +780,18 @@ class BaseAgent:
|
|
|
701
780
|
approval = await request_approval(
|
|
702
781
|
ApprovalRequest(
|
|
703
782
|
approval_id=pending["approval_id"],
|
|
704
|
-
run_id=run_id
|
|
783
|
+
run_id=self._resume_key, # standalone: ckp_id; orchestrated: outer run_id
|
|
705
784
|
agent_id=self.config.agent_id,
|
|
706
785
|
tool=tool_name,
|
|
707
786
|
args=tool_args,
|
|
708
787
|
step=step,
|
|
709
788
|
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
710
789
|
),
|
|
711
|
-
self._approval_store,
|
|
712
790
|
self._guard,
|
|
713
791
|
)
|
|
714
792
|
|
|
715
793
|
if approval.correction:
|
|
716
|
-
await self._inject_human_guidance(llm_response, approval.correction, run_id)
|
|
794
|
+
await self._inject_human_guidance(llm_response, approval.correction, run_id, step)
|
|
717
795
|
return
|
|
718
796
|
|
|
719
797
|
observation = (
|
|
@@ -740,7 +818,7 @@ class BaseAgent:
|
|
|
740
818
|
else observation
|
|
741
819
|
)
|
|
742
820
|
await self._working_memory.append("user", f"Observation: {obs_text}")
|
|
743
|
-
await self.
|
|
821
|
+
await self._commit_checkpoint(run_id, step)
|
|
744
822
|
|
|
745
823
|
|
|
746
824
|
# ── Response normalization (module-level for testability) ────────────────────
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""
|
|
2
|
+
harness/checkpoint.py — Pluggable checkpoint store for run state persistence.
|
|
3
|
+
|
|
4
|
+
Used by both the HITL approval gate (crash-resume) and periodic step
|
|
5
|
+
checkpointing (checkpoint_every on AgentConfig).
|
|
6
|
+
|
|
7
|
+
A checkpoint is a plain dict written under a run_id key. The schema is
|
|
8
|
+
defined by the caller — agents write {run_id, agent_id, task, step, memory}
|
|
9
|
+
with an optional {pending: ...} field added by the HITL gate.
|
|
10
|
+
|
|
11
|
+
Two backends ship out of the box:
|
|
12
|
+
|
|
13
|
+
FileCheckpointStore — zero deps, one JSON file per run_id
|
|
14
|
+
RedisCheckpointStore — for distributed / multi-process setups
|
|
15
|
+
|
|
16
|
+
Both share the same three-method interface so callers are backend-agnostic.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import sys
|
|
24
|
+
import time
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def maybe_resume_key() -> str | None:
|
|
30
|
+
"""
|
|
31
|
+
Extract the --resume <key> value from sys.argv, or return None.
|
|
32
|
+
|
|
33
|
+
Called automatically by AgentRuntime.dispatch_stream / run_stream so that
|
|
34
|
+
scripts resume transparently without any resume-specific code. Also
|
|
35
|
+
available for scripts that need the key explicitly.
|
|
36
|
+
"""
|
|
37
|
+
args = sys.argv[1:]
|
|
38
|
+
if "--resume" not in args:
|
|
39
|
+
return None
|
|
40
|
+
idx = args.index("--resume")
|
|
41
|
+
if idx + 1 >= len(args):
|
|
42
|
+
print("Usage: --resume <ckp_id>", file=sys.stderr)
|
|
43
|
+
sys.exit(1)
|
|
44
|
+
return args[idx + 1]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class FileCheckpointStore:
|
|
48
|
+
"""
|
|
49
|
+
Zero-dependency checkpoint store backed by JSON files.
|
|
50
|
+
|
|
51
|
+
Default directory: ~/.agent-harness/checkpoints/
|
|
52
|
+
Override with the CHECKPOINT_DIR env var or by passing checkpoint_dir.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, checkpoint_dir: str | Path | None = None) -> None:
|
|
56
|
+
self._dir = Path(
|
|
57
|
+
checkpoint_dir
|
|
58
|
+
or os.environ.get("CHECKPOINT_DIR", Path.home() / ".agent-harness" / "checkpoints")
|
|
59
|
+
)
|
|
60
|
+
self._dir.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
|
|
62
|
+
def _path(self, run_id: str) -> Path:
|
|
63
|
+
return self._dir / f"{run_id}.json"
|
|
64
|
+
|
|
65
|
+
async def write(self, run_id: str, data: dict) -> None:
|
|
66
|
+
self._path(run_id).write_text(json.dumps(data, default=str, indent=2))
|
|
67
|
+
|
|
68
|
+
async def read(self, run_id: str) -> dict | None:
|
|
69
|
+
path = self._path(run_id)
|
|
70
|
+
if not path.exists():
|
|
71
|
+
return None
|
|
72
|
+
return json.loads(path.read_text())
|
|
73
|
+
|
|
74
|
+
async def delete(self, run_id: str) -> None:
|
|
75
|
+
path = self._path(run_id)
|
|
76
|
+
if path.exists():
|
|
77
|
+
path.unlink()
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def purge_old(cls, days: int = 7, checkpoint_dir: str | Path | None = None) -> int:
|
|
81
|
+
"""Delete checkpoint files older than `days`. Returns count removed."""
|
|
82
|
+
store = cls(checkpoint_dir)
|
|
83
|
+
cutoff = time.time() - days * 86_400
|
|
84
|
+
removed = 0
|
|
85
|
+
for p in store._dir.glob("*.json"):
|
|
86
|
+
if p.stat().st_mtime < cutoff:
|
|
87
|
+
p.unlink()
|
|
88
|
+
removed += 1
|
|
89
|
+
return removed
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class RedisCheckpointStore:
|
|
93
|
+
"""
|
|
94
|
+
Checkpoint store backed by Redis.
|
|
95
|
+
|
|
96
|
+
Checkpoints expire after ttl_seconds (default 24 h).
|
|
97
|
+
|
|
98
|
+
Usage:
|
|
99
|
+
import redis.asyncio as redis
|
|
100
|
+
client = redis.Redis(host="localhost", decode_responses=True)
|
|
101
|
+
store = RedisCheckpointStore(client)
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
_KEY = "ckp:{}"
|
|
105
|
+
|
|
106
|
+
def __init__(self, client: Any, ttl_seconds: int = 86_400) -> None:
|
|
107
|
+
self._r = client
|
|
108
|
+
self._ttl = ttl_seconds
|
|
109
|
+
|
|
110
|
+
async def write(self, run_id: str, data: dict) -> None:
|
|
111
|
+
await self._r.set(self._KEY.format(run_id), json.dumps(data, default=str), ex=self._ttl)
|
|
112
|
+
|
|
113
|
+
async def read(self, run_id: str) -> dict | None:
|
|
114
|
+
raw = await self._r.get(self._KEY.format(run_id))
|
|
115
|
+
return json.loads(raw) if raw else None
|
|
116
|
+
|
|
117
|
+
async def delete(self, run_id: str) -> None:
|
|
118
|
+
await self._r.delete(self._KEY.format(run_id))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class _ResumeHint:
|
|
122
|
+
"""
|
|
123
|
+
Async context manager that prints a --resume hint to stderr on interruption,
|
|
124
|
+
but only when a checkpoint actually exists in the store.
|
|
125
|
+
|
|
126
|
+
Two keys:
|
|
127
|
+
check_key — where the checkpoint lives in the store (verified on exit).
|
|
128
|
+
Defaults to resume_key when not supplied.
|
|
129
|
+
resume_key — printed in the hint; what the human passes to --resume.
|
|
130
|
+
|
|
131
|
+
For orchestrated agents these differ: check_key is the namespaced agent key
|
|
132
|
+
(f"{run_id}:{agent_id}") while resume_key is the bare orchestrator run_id.
|
|
133
|
+
|
|
134
|
+
Set ``hint.done = True`` before leaving the managed block to suppress the
|
|
135
|
+
message on clean success.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
resume_key: str,
|
|
141
|
+
checkpoint_store: Any,
|
|
142
|
+
label: str = "Run",
|
|
143
|
+
*,
|
|
144
|
+
check_key: str | None = None,
|
|
145
|
+
) -> None:
|
|
146
|
+
self._resume_key = resume_key
|
|
147
|
+
self._check_key = check_key if check_key is not None else resume_key
|
|
148
|
+
self._store = checkpoint_store
|
|
149
|
+
self._label = label
|
|
150
|
+
self.done: bool = False
|
|
151
|
+
|
|
152
|
+
async def __aenter__(self) -> _ResumeHint:
|
|
153
|
+
return self
|
|
154
|
+
|
|
155
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
|
|
156
|
+
if not self.done and self._store is not None and self._resume_key:
|
|
157
|
+
checkpoint = await self._store.read(self._check_key)
|
|
158
|
+
if checkpoint is not None:
|
|
159
|
+
script = sys.argv[0] if sys.argv else "your_script.py"
|
|
160
|
+
print(
|
|
161
|
+
f"\n {self._label} interrupted — checkpoint saved."
|
|
162
|
+
f"\n Resume: python {script} --resume {self._resume_key}\n",
|
|
163
|
+
file=sys.stderr,
|
|
164
|
+
)
|
|
165
|
+
return False
|