godel-py 3.13.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- godel_py-3.13.2/.agents/BEADS.md +27 -0
- godel_py-3.13.2/.agents/CLI.md +206 -0
- godel_py-3.13.2/.agents/HANDOFF.md +233 -0
- godel_py-3.13.2/.agents/MONITORING.md +202 -0
- godel_py-3.13.2/.github/workflows/publish.yml +84 -0
- godel_py-3.13.2/.gitignore +26 -0
- godel_py-3.13.2/AGENTS.md +128 -0
- godel_py-3.13.2/CHANGELOG.md +680 -0
- godel_py-3.13.2/CLAUDE.md +1 -0
- godel_py-3.13.2/CODE_OF_CONDUCT.md +84 -0
- godel_py-3.13.2/CONTRIBUTING.md +25 -0
- godel_py-3.13.2/LICENSE +84 -0
- godel_py-3.13.2/PKG-INFO +82 -0
- godel_py-3.13.2/README.md +48 -0
- godel_py-3.13.2/SECURITY.md +16 -0
- godel_py-3.13.2/audits/2026-05-01-06be6db.md +369 -0
- godel_py-3.13.2/benchmarks/README.md +47 -0
- godel_py-3.13.2/benchmarks/observability.py +447 -0
- godel_py-3.13.2/benchmarks/results/.gitkeep +0 -0
- godel_py-3.13.2/benchmarks/results/2026-04-14-26bfcc7-023912-c296.json +35 -0
- godel_py-3.13.2/docs/README.md +57 -0
- godel_py-3.13.2/docs/api-reference.md +187 -0
- godel_py-3.13.2/docs/best-practices.md +351 -0
- godel_py-3.13.2/docs/cli.md +194 -0
- godel_py-3.13.2/docs/concepts.md +211 -0
- godel_py-3.13.2/docs/examples.md +49 -0
- godel_py-3.13.2/docs/getting-started.md +114 -0
- godel_py-3.13.2/docs/index.md +36 -0
- godel_py-3.13.2/docs/monitoring.md +202 -0
- godel_py-3.13.2/docs/proposals/web-gui.md +312 -0
- godel_py-3.13.2/docs/redaction.md +145 -0
- godel_py-3.13.2/docs/skills/README.md +12 -0
- godel_py-3.13.2/docs/skills/godel-engineer.md +133 -0
- godel_py-3.13.2/docs/skills/godel-runner.md +98 -0
- godel_py-3.13.2/docs/stdout-capture.md +151 -0
- godel_py-3.13.2/docs/transcript-format.md +263 -0
- godel_py-3.13.2/docs/why-godel.md +47 -0
- godel_py-3.13.2/examples/README.md +33 -0
- godel_py-3.13.2/examples/count_8.py +13 -0
- godel_py-3.13.2/examples/feature_factory.py +270 -0
- godel_py-3.13.2/examples/feature_factory_dryrun.py +157 -0
- godel_py-3.13.2/examples/haiku_chat.py +41 -0
- godel_py-3.13.2/examples/haiku_chat_copilot.py +35 -0
- godel_py-3.13.2/examples/parallel_agents.py +23 -0
- godel_py-3.13.2/examples/pr_review.py +151 -0
- godel_py-3.13.2/examples/research_chat.py +49 -0
- godel_py-3.13.2/examples/research_chat_copilot.py +46 -0
- godel_py-3.13.2/godel/__init__.py +100 -0
- godel_py-3.13.2/godel/__main__.py +3 -0
- godel_py-3.13.2/godel/_config.py +319 -0
- godel_py-3.13.2/godel/_context.py +149 -0
- godel_py-3.13.2/godel/_dag_render.py +274 -0
- godel_py-3.13.2/godel/_decorators.py +1049 -0
- godel_py-3.13.2/godel/_event_log.py +313 -0
- godel_py-3.13.2/godel/_events.py +110 -0
- godel_py-3.13.2/godel/_exceptions.py +468 -0
- godel_py-3.13.2/godel/_formatters.py +203 -0
- godel_py-3.13.2/godel/_guides/__init__.py +30 -0
- godel_py-3.13.2/godel/_guides/api-reference.md +187 -0
- godel_py-3.13.2/godel/_guides/best-practices.md +351 -0
- godel_py-3.13.2/godel/_guides/cli.md +194 -0
- godel_py-3.13.2/godel/_guides/concepts.md +211 -0
- godel_py-3.13.2/godel/_guides/engineer.md +133 -0
- godel_py-3.13.2/godel/_guides/getting-started.md +114 -0
- godel_py-3.13.2/godel/_guides/monitoring.md +202 -0
- godel_py-3.13.2/godel/_guides/runner.md +98 -0
- godel_py-3.13.2/godel/_linter.py +692 -0
- godel_py-3.13.2/godel/_pause.py +146 -0
- godel_py-3.13.2/godel/_redact.py +165 -0
- godel_py-3.13.2/godel/_replay.py +373 -0
- godel_py-3.13.2/godel/_rewind.py +389 -0
- godel_py-3.13.2/godel/_run.py +418 -0
- godel_py-3.13.2/godel/_run_summary.py +145 -0
- godel_py-3.13.2/godel/_stdout_capture.py +171 -0
- godel_py-3.13.2/godel/_strict_ast.py +113 -0
- godel_py-3.13.2/godel/_strict_audit.py +43 -0
- godel_py-3.13.2/godel/_strict_imports.py +49 -0
- godel_py-3.13.2/godel/_tail.py +986 -0
- godel_py-3.13.2/godel/_transcript.py +477 -0
- godel_py-3.13.2/godel/_watch.py +1577 -0
- godel_py-3.13.2/godel/_watch_model.py +364 -0
- godel_py-3.13.2/godel/agents/__init__.py +9 -0
- godel_py-3.13.2/godel/agents/_adapters.py +279 -0
- godel_py-3.13.2/godel/agents/_claude.py +185 -0
- godel_py-3.13.2/godel/agents/_common.py +619 -0
- godel_py-3.13.2/godel/agents/_copilot.py +216 -0
- godel_py-3.13.2/godel/agents/_stream_parser.py +282 -0
- godel_py-3.13.2/godel/cli.py +1609 -0
- godel_py-3.13.2/godel/det.py +231 -0
- godel_py-3.13.2/godel/intervention/__init__.py +47 -0
- godel_py-3.13.2/godel/intervention/_context.py +289 -0
- godel_py-3.13.2/godel/intervention/_tools.py +400 -0
- godel_py-3.13.2/godel/intervention/default_agent.py +383 -0
- godel_py-3.13.2/godel/io.py +688 -0
- godel_py-3.13.2/godel/testing.py +5 -0
- godel_py-3.13.2/plans/live-observability-v3.md +141 -0
- godel_py-3.13.2/pyproject.toml +62 -0
- godel_py-3.13.2/scripts/sync_guides.sh +18 -0
- godel_py-3.13.2/tests/__snapshots__/test_watch_render.ambr +36 -0
- godel_py-3.13.2/tests/fixtures/event_streams/simple_workflow.jsonl +10 -0
- godel_py-3.13.2/tests/fixtures/event_streams/with_rotation.jsonl +7 -0
- godel_py-3.13.2/tests/fixtures/events_old_format.jsonl +5 -0
- godel_py-3.13.2/tests/fixtures/failing_workflow.py +5 -0
- godel_py-3.13.2/tests/fixtures/good_workflow.py +5 -0
- godel_py-3.13.2/tests/fixtures/mock_intervention.py +63 -0
- godel_py-3.13.2/tests/fixtures/no_workflow.py +1 -0
- godel_py-3.13.2/tests/fixtures/parallel_rewind_wf.py +34 -0
- godel_py-3.13.2/tests/fixtures/pause_edit_wf.py +58 -0
- godel_py-3.13.2/tests/fixtures/repair_schema_typo_wf.py +38 -0
- godel_py-3.13.2/tests/fixtures/stream_json/claude_sample.jsonl +3 -0
- godel_py-3.13.2/tests/fixtures/stream_json/copilot_sample.jsonl +2 -0
- godel_py-3.13.2/tests/fixtures/stream_json/crlf_sample.jsonl +2 -0
- godel_py-3.13.2/tests/test_agent_adapters.py +678 -0
- godel_py-3.13.2/tests/test_agent_events.py +263 -0
- godel_py-3.13.2/tests/test_agent_streaming.py +347 -0
- godel_py-3.13.2/tests/test_agents.py +707 -0
- godel_py-3.13.2/tests/test_audit_log_errors.py +324 -0
- godel_py-3.13.2/tests/test_auto_checkpoint.py +320 -0
- godel_py-3.13.2/tests/test_cli.py +45 -0
- godel_py-3.13.2/tests/test_cli_args.py +398 -0
- godel_py-3.13.2/tests/test_cli_lint.py +228 -0
- godel_py-3.13.2/tests/test_cli_pause.py +194 -0
- godel_py-3.13.2/tests/test_cli_pause_resume.py +423 -0
- godel_py-3.13.2/tests/test_cli_resume.py +159 -0
- godel_py-3.13.2/tests/test_cli_rewind.py +269 -0
- godel_py-3.13.2/tests/test_cli_run_lint.py +136 -0
- godel_py-3.13.2/tests/test_cli_runs_list.py +288 -0
- godel_py-3.13.2/tests/test_cli_show.py +155 -0
- godel_py-3.13.2/tests/test_cli_strict.py +90 -0
- godel_py-3.13.2/tests/test_cli_tail.py +158 -0
- godel_py-3.13.2/tests/test_cli_watch.py +905 -0
- godel_py-3.13.2/tests/test_config.py +336 -0
- godel_py-3.13.2/tests/test_copilot_agent.py +586 -0
- godel_py-3.13.2/tests/test_dag_render.py +223 -0
- godel_py-3.13.2/tests/test_decorator_options.py +367 -0
- godel_py-3.13.2/tests/test_decorators.py +277 -0
- godel_py-3.13.2/tests/test_default_intervention_agent.py +456 -0
- godel_py-3.13.2/tests/test_det.py +202 -0
- godel_py-3.13.2/tests/test_event_log.py +126 -0
- godel_py-3.13.2/tests/test_events.py +66 -0
- godel_py-3.13.2/tests/test_exceptions.py +53 -0
- godel_py-3.13.2/tests/test_exports.py +32 -0
- godel_py-3.13.2/tests/test_exports_strict.py +17 -0
- godel_py-3.13.2/tests/test_file_io.py +621 -0
- godel_py-3.13.2/tests/test_formatters.py +402 -0
- godel_py-3.13.2/tests/test_graph_cut.py +247 -0
- godel_py-3.13.2/tests/test_guide.py +57 -0
- godel_py-3.13.2/tests/test_hash_mismatch.py +118 -0
- godel_py-3.13.2/tests/test_idempotency.py +462 -0
- godel_py-3.13.2/tests/test_integration_audit.py +156 -0
- godel_py-3.13.2/tests/test_integration_resume.py +351 -0
- godel_py-3.13.2/tests/test_intervention_context.py +193 -0
- godel_py-3.13.2/tests/test_intervention_tools.py +457 -0
- godel_py-3.13.2/tests/test_io.py +77 -0
- godel_py-3.13.2/tests/test_io_events.py +88 -0
- godel_py-3.13.2/tests/test_join_cascade.py +522 -0
- godel_py-3.13.2/tests/test_lint_rules.py +766 -0
- godel_py-3.13.2/tests/test_linter_framework.py +397 -0
- godel_py-3.13.2/tests/test_observability_integration.py +683 -0
- godel_py-3.13.2/tests/test_parallel_events.py +194 -0
- godel_py-3.13.2/tests/test_pause_resume_e2e.py +353 -0
- godel_py-3.13.2/tests/test_pause_signal.py +356 -0
- godel_py-3.13.2/tests/test_primitive_exceptions.py +247 -0
- godel_py-3.13.2/tests/test_redaction.py +524 -0
- godel_py-3.13.2/tests/test_repair_cli.py +306 -0
- godel_py-3.13.2/tests/test_repair_e2e.py +262 -0
- godel_py-3.13.2/tests/test_replay_parallel.py +181 -0
- godel_py-3.13.2/tests/test_replay_primitives.py +310 -0
- godel_py-3.13.2/tests/test_replay_walker.py +125 -0
- godel_py-3.13.2/tests/test_resume_exceptions.py +123 -0
- godel_py-3.13.2/tests/test_rewind.py +495 -0
- godel_py-3.13.2/tests/test_rewind_adversarial.py +201 -0
- godel_py-3.13.2/tests/test_rewind_e2e_m4.py +487 -0
- godel_py-3.13.2/tests/test_rewind_io_preservation.py +413 -0
- godel_py-3.13.2/tests/test_rewind_parallel.py +405 -0
- godel_py-3.13.2/tests/test_rewind_safety.py +424 -0
- godel_py-3.13.2/tests/test_rewind_signal.py +284 -0
- godel_py-3.13.2/tests/test_run.py +77 -0
- godel_py-3.13.2/tests/test_run_events.py +77 -0
- godel_py-3.13.2/tests/test_run_signal.py +363 -0
- godel_py-3.13.2/tests/test_run_streaming.py +234 -0
- godel_py-3.13.2/tests/test_show_full.py +436 -0
- godel_py-3.13.2/tests/test_source_edit_guard.py +594 -0
- godel_py-3.13.2/tests/test_stdout_capture.py +673 -0
- godel_py-3.13.2/tests/test_step_event_history.py +292 -0
- godel_py-3.13.2/tests/test_step_events.py +105 -0
- godel_py-3.13.2/tests/test_step_timeout.py +292 -0
- godel_py-3.13.2/tests/test_stream_parser.py +564 -0
- godel_py-3.13.2/tests/test_stream_path.py +292 -0
- godel_py-3.13.2/tests/test_strict_ast.py +164 -0
- godel_py-3.13.2/tests/test_strict_audit.py +69 -0
- godel_py-3.13.2/tests/test_strict_imports.py +61 -0
- godel_py-3.13.2/tests/test_strict_integration.py +151 -0
- godel_py-3.13.2/tests/test_structured_exceptions.py +362 -0
- godel_py-3.13.2/tests/test_tail.py +311 -0
- godel_py-3.13.2/tests/test_transcript_tail.py +522 -0
- godel_py-3.13.2/tests/test_transcript_writer.py +631 -0
- godel_py-3.13.2/tests/test_watch_model.py +776 -0
- godel_py-3.13.2/tests/test_watch_optional_dep.py +117 -0
- godel_py-3.13.2/tests/test_watch_render.py +1004 -0
- godel_py-3.13.2/tests/test_watch_verbosity.py +607 -0
- godel_py-3.13.2/tests/test_workflow_events.py +86 -0
- godel_py-3.13.2/tests/test_workflow_rewind.py +177 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
## Beads Issue Tracker
|
|
2
|
+
|
|
3
|
+
This project uses **bd (beads)** for issue tracking. Run `bd prime` to see full workflow context and commands.
|
|
4
|
+
|
|
5
|
+
### Quick Reference
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
bd ready # Find available work
|
|
9
|
+
bd show <id> # View issue details
|
|
10
|
+
bd update <id> --claim # Claim work
|
|
11
|
+
bd close <id> # Complete work
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
### Rules
|
|
15
|
+
|
|
16
|
+
- Use `bd` for ALL task tracking — do NOT use TodoWrite, TaskCreate, or markdown TODO lists
|
|
17
|
+
- Run `bd prime` for detailed command reference and session close protocol
|
|
18
|
+
- Never work directly over main, create a worktree for your work.
|
|
19
|
+
|
|
20
|
+
## Definition of DONE (CLOSED)
|
|
21
|
+
- A ticket can be marked as done (closed), once it has been merged.
|
|
22
|
+
|
|
23
|
+
### Preconditions
|
|
24
|
+
- All quality gates defined for the project must be green, don't flag issues as pre-existing, you take ownership and fix any quality gate that is not green.
|
|
25
|
+
- The work should have been reviewed and approved (two pair of eyes principle) by a human or another agent with no prior context about the task.
|
|
26
|
+
- Clean: Remove leftover comments, dead code resulted from the implementation, orphan worktrees, etc. Tidy up when you finish.
|
|
27
|
+
- Handoff: If there is folloup work discovered during the implementation, you must create a beads ticket for it.
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# Godel CLI Reference
|
|
2
|
+
|
|
3
|
+
The `godel` command is the single entry point for running, resuming, linting, and inspecting Godel workflows.
|
|
4
|
+
|
|
5
|
+
## Commands
|
|
6
|
+
|
|
7
|
+
### `godel run FILE [-- ARG ...]`
|
|
8
|
+
|
|
9
|
+
Execute a `@workflow`-decorated function from `FILE`.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
godel run FILE [OPTIONS] [-- ARG ...]
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
**Options:**
|
|
16
|
+
- `--no-strict` — Disable strict mode (allow non-deterministic ops).
|
|
17
|
+
- `--no-lint` — Skip lint pre-flight check.
|
|
18
|
+
|
|
19
|
+
**Passing arguments to workflows:**
|
|
20
|
+
|
|
21
|
+
Append `--` followed by tokens to pass positional and keyword arguments to the `@workflow` function:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Positional args only
|
|
25
|
+
godel run workflow.py -- alice bob
|
|
26
|
+
|
|
27
|
+
# Keyword args only
|
|
28
|
+
godel run workflow.py -- model=opus max_steps=10
|
|
29
|
+
|
|
30
|
+
# Mixed (positional order is preserved among positional tokens)
|
|
31
|
+
godel run workflow.py -- alice model=opus
|
|
32
|
+
|
|
33
|
+
# Edge cases
|
|
34
|
+
godel run workflow.py -- q=a=b # key='q', value='a=b' (split on first '=')
|
|
35
|
+
godel run workflow.py -- x= # key='x', value=''
|
|
36
|
+
godel run workflow.py -- 1=foo # '1' is not a valid identifier → positional
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Semantics:**
|
|
40
|
+
- Tokens containing `=` with a valid Python identifier LHS become keyword args.
|
|
41
|
+
- Other tokens (including `KEY=` where KEY is not a valid identifier) become positional args.
|
|
42
|
+
- All values are passed as **strings**; the workflow function is responsible for type coercion.
|
|
43
|
+
- Duplicate kwarg keys are rejected with an error.
|
|
44
|
+
- Argument binding is validated before the run starts; arity mismatches exit with code 2 and no run ID is printed.
|
|
45
|
+
|
|
46
|
+
**Workflow function example:**
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from godel import workflow
|
|
50
|
+
|
|
51
|
+
@workflow
|
|
52
|
+
async def my_workflow(name: str, model: str = "sonnet"):
|
|
53
|
+
...
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
godel run my_workflow.py -- alice model=opus
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Exit codes:**
|
|
61
|
+
- `0` — Workflow completed or paused successfully.
|
|
62
|
+
- `1` — `WorkflowFail` raised inside the workflow.
|
|
63
|
+
- `2` — Argument error, no `@workflow` found, or unexpected exception.
|
|
64
|
+
- `130` — Interrupted by Ctrl+C (SIGINT).
|
|
65
|
+
|
|
66
|
+
**Ctrl+C / SIGINT behaviour:**
|
|
67
|
+
|
|
68
|
+
Pressing `Ctrl+C` once cancels the running workflow task. Each subprocess
|
|
69
|
+
started by `run()` is isolated in its own process group; on cancellation,
|
|
70
|
+
Godel sends `SIGTERM` to the process group and waits up to 2 seconds before
|
|
71
|
+
escalating to `SIGKILL`. No orphan agent processes survive.
|
|
72
|
+
|
|
73
|
+
A second `Ctrl+C` arriving within 1 second of the first triggers an immediate
|
|
74
|
+
`os._exit(130)` — a panic exit that bypasses any hung cleanup.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
### `godel resume RUN_ID [FILE]`
|
|
79
|
+
|
|
80
|
+
Resume a paused or interrupted workflow run from its audit log.
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
godel resume RUN_ID [FILE] [OPTIONS]
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Options:**
|
|
87
|
+
- `--on-mismatch continue|invalidate|abort` — Policy for `request_hash` mismatches.
|
|
88
|
+
- `--on-source-edit warn|abort|ignore` — Policy when a cached `@step`'s source has changed.
|
|
89
|
+
- `--no-strict` — Disable strict mode.
|
|
90
|
+
- `--no-lint` — Skip lint pre-flight check.
|
|
91
|
+
|
|
92
|
+
`RUN_ID` can be a prefix (minimum 8 characters) of the full run ID.
|
|
93
|
+
|
|
94
|
+
The workflow is called with the **same positional and keyword args** that were used in the original `godel run` invocation — no need to re-supply them. The args are recovered from the `WORKFLOW_STARTED` event in the audit log.
|
|
95
|
+
|
|
96
|
+
**Non-serialisable args:** If the original run was started programmatically with non-JSON-serialisable arguments (e.g. custom Python objects), `godel resume` will refuse with:
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
[godel] resume error: This run used non-serialisable args; programmatic resume only.
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
In that case, resume the workflow directly in Python code.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
### `godel lint FILE`
|
|
107
|
+
|
|
108
|
+
Lint a workflow file for common mistakes.
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
godel lint FILE [--format text|json] [--skip RULE_IDS]
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Options:**
|
|
115
|
+
- `--format text|json` — Output format (default: `text`).
|
|
116
|
+
- `--skip RULE_IDS` — Comma-separated rule IDs to skip (e.g. `PL003,PL007`).
|
|
117
|
+
|
|
118
|
+
**Exit codes:** `1` if any errors found; `0` if warnings only or clean.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
### `godel show RUN_ID`
|
|
123
|
+
|
|
124
|
+
Display the audit log for a workflow run.
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
godel show RUN_ID [--graph] [--all]
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**Options:**
|
|
131
|
+
- `--graph` — Render the DAG as an ASCII tree.
|
|
132
|
+
- `--all` — Show failed retries and invalidated events.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
### `godel pause RUN_ID`
|
|
137
|
+
|
|
138
|
+
Request a live workflow run to pause at its next `@step` boundary.
|
|
139
|
+
|
|
140
|
+
```
|
|
141
|
+
godel pause RUN_ID [--reason TEXT]
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
### `godel resume RUN_ID`
|
|
147
|
+
|
|
148
|
+
See [resume](#godel-resume-run_id-file) above.
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
### `godel rewind RUN_ID`
|
|
153
|
+
|
|
154
|
+
Rewind a workflow run to a previous checkpoint.
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
godel rewind RUN_ID --to EVENT_ID[,EVENT_ID,...] [--reason TEXT]
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
### `godel repair RUN_ID`
|
|
163
|
+
|
|
164
|
+
Drop an intervention agent into a paused or crashed run.
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
godel repair RUN_ID [--agent MODULE:FUNCTION] [--model MODEL] [--max-iterations N] [--dry-run]
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
### `godel watch RUN_ID`
|
|
173
|
+
|
|
174
|
+
Attach a live TUI renderer to a running or completed workflow.
|
|
175
|
+
|
|
176
|
+
Replays history from archived transcript files then follows the live transcript
|
|
177
|
+
until the run finishes or Ctrl+C is pressed. Requires `godel[watch]`
|
|
178
|
+
(`pip install 'godel[watch]'`).
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
godel watch RUN_ID [--runs-dir DIR] [--plain|-p]
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Options:**
|
|
185
|
+
- `--runs-dir DIR` — Directory containing per-run transcript directories (default: `./runs`).
|
|
186
|
+
- `--plain`, `-p` — Force plain line-log output instead of the Rich TUI panel display.
|
|
187
|
+
Each event is printed as a single `[godel-watch] <ts> <op> ...` line.
|
|
188
|
+
Useful for CI, pipes, or while the panel UX is being redesigned.
|
|
189
|
+
|
|
190
|
+
**Environment variables:**
|
|
191
|
+
- `GODEL_WATCH_PLAIN=1` — Equivalent to `--plain`; forces plain line-log without modifying the command invocation.
|
|
192
|
+
|
|
193
|
+
**Notes:**
|
|
194
|
+
- `RUN_ID` can be a prefix (minimum characters to resolve uniquely).
|
|
195
|
+
- If the run was started with `--no-stream` (streaming disabled), a discoverability hint is printed on stderr and the command exits immediately.
|
|
196
|
+
- Without `--plain` and without `GODEL_WATCH_PLAIN=1`, the Rich TUI is used on capable terminals (auto-fallback to plain on non-TTY, `TERM=dumb`, or non-UTF-8 locales).
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
### `godel tail RUN_ID`
|
|
201
|
+
|
|
202
|
+
Follow a workflow's audit log in real time.
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
godel tail RUN_ID [--format pretty|json] [--no-follow] [--no-wait]
|
|
206
|
+
```
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# Handoff: M0 Python Library Skeleton
|
|
2
|
+
|
|
3
|
+
## Goal
|
|
4
|
+
Implement the M0 milestone for the Godel Python library — the minimal package structure and primitives to run one real end-to-end workflow with a live Claude Code agent.
|
|
5
|
+
|
|
6
|
+
## What Was Done
|
|
7
|
+
|
|
8
|
+
**7 tickets completed** across 4 waves (parallelized where dependencies allowed):
|
|
9
|
+
|
|
10
|
+
| File | Purpose |
|
|
11
|
+
|------|---------|
|
|
12
|
+
| `pyproject.toml` | Package config, deps: pydantic, click, python-ulid |
|
|
13
|
+
| `godel/_context.py` | WorkflowContext dataclass + ContextVars |
|
|
14
|
+
| `godel/_decorators.py` | `@workflow`, `@step`, `WorkflowFail`, `parallel()`, `retry()` |
|
|
15
|
+
| `godel/_run.py` | `run()` async subprocess primitive with `_privileged` flag |
|
|
16
|
+
| `godel/io.py` | Async `print`/`input` shadows |
|
|
17
|
+
| `godel/agents/_claude.py` | `claude_code()` factory wrapping `claude` CLI via `run()` |
|
|
18
|
+
| `godel/cli.py` | `godel run <file>` workflow discovery and execution |
|
|
19
|
+
| `examples/pr_review.py` | Live PR review workflow (validated end-to-end) |
|
|
20
|
+
|
|
21
|
+
**30 tests** across 5 test files, all passing.
|
|
22
|
+
|
|
23
|
+
## Current State
|
|
24
|
+
- All committed and pushed to `origin/main`
|
|
25
|
+
- Tests pass, `pip install -e .` works
|
|
26
|
+
- `requires-python` is `>=3.10` (system has 3.10.12, tickets spec'd 3.11)
|
|
27
|
+
- Beads tickets all closed, dolt pushed
|
|
28
|
+
|
|
29
|
+
## Live Validation Results
|
|
30
|
+
The PR review workflow ran successfully against real infrastructure:
|
|
31
|
+
- Created `feat/version-helper` branch, committed code, pushed
|
|
32
|
+
- Opened draft PR #7 on `atscub/godel-lang`
|
|
33
|
+
- Copilot reviewed and left 4 comments on intentional code smells
|
|
34
|
+
- Engineer agent categorized feedback, fixed 3 issues, re-ran quality gates
|
|
35
|
+
- Was killed during second review poll loop (no new comments); PR closed manually
|
|
36
|
+
|
|
37
|
+
## Gotchas
|
|
38
|
+
|
|
39
|
+
1. **`claude -p` returns natural language, not JSON** — even when prompted for schema output, the agent's `result` field is a summary of what it did. The `_claude.py` extraction fallback (haiku call) handles this, but it adds latency and cost per schema call.
|
|
40
|
+
|
|
41
|
+
2. **Agents operate in the working directory** — the engineer agent during the live run picked up uncommitted `_claude.py` edits and "fixed" them as part of Copilot review feedback. Real workflows should run in an isolated worktree/clone.
|
|
42
|
+
|
|
43
|
+
3. **`wait_for_review` polls with agent calls** — each poll is a full Claude invocation (~$0.05). Deterministic operations like PR comment polling should use direct `gh api` calls instead.
|
|
44
|
+
|
|
45
|
+
4. **`handle_feedback` doesn't pass comments to the prompt** — fixed in the final version, but worth checking: the `comments` parameter must be interpolated into the engineer's prompt string.
|
|
46
|
+
|
|
47
|
+
## What's Left (M1+)
|
|
48
|
+
|
|
49
|
+
- **M1** (`awl-gwj`): Audit log + JSONL persistence — event emission for every step/run/fork/join
|
|
50
|
+
- Granular permissions for `claude_code()` (currently `skip_permissions: bool`, should support `allowedTools` lists)
|
|
51
|
+
- Workflow isolation (run in separate worktree)
|
|
52
|
+
- Review polling via deterministic API calls instead of agent calls
|
|
53
|
+
|
|
54
|
+
## Entry Points
|
|
55
|
+
- **Spec**: `docs/py-library/02-api.md` (API contract), `docs/py-library/03-runtime.md` (execution model)
|
|
56
|
+
- **Code**: Start with `godel/_decorators.py` (core primitives) and `godel/agents/_claude.py` (agent interface)
|
|
57
|
+
- **Run**: `cd py-library && pip install -e . && python -m pytest tests/ -v`
|
|
58
|
+
- **Beads**: `bd ready` for next available work, `bd show awl-gwj` for M1 epic
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
# Handoff: M1 + M2 + M3 Implementation (2026-04-12)
|
|
63
|
+
|
|
64
|
+
## Goal
|
|
65
|
+
Implement M1 (audit log + JSONL persistence), M2 (godel.strict mode), and M3 (deterministic replay + resume) across 27 beads tasks organized into 9 dependency-ordered waves.
|
|
66
|
+
|
|
67
|
+
## What Was Done
|
|
68
|
+
|
|
69
|
+
**22 of 27 tasks completed** across Waves 1-6 (plus partial Wave 7). M1 and M2 are fully complete. M3 is partially complete.
|
|
70
|
+
|
|
71
|
+
### Completed Epics
|
|
72
|
+
- **M1 (`awl-gwj`)**: Audit log + JSONL persistence — DONE
|
|
73
|
+
- **M2 (`awl-7no`)**: godel.strict mode — DONE
|
|
74
|
+
|
|
75
|
+
### New Files Created
|
|
76
|
+
|
|
77
|
+
| File | Purpose |
|
|
78
|
+
|------|---------|
|
|
79
|
+
| `godel/_events.py` | `Event` dataclass + `EventStatus` enum (STARTED/FINISHED/FAILED/INVALIDATED/SUSPENDED) |
|
|
80
|
+
| `godel/_event_log.py` | `EventLog` class — in-memory DAG + append-only JSONL writer at `./runs/<run_id>.jsonl` |
|
|
81
|
+
| `godel/_exceptions.py` | `GodelStrictError`, `StrictViolation`, `ResumeError`, `UnsafeResumeError` |
|
|
82
|
+
| `godel/_strict_ast.py` | Layer 1: AST pre-scan for banned calls/modules |
|
|
83
|
+
| `godel/_strict_imports.py` | Layer 2: `sys.meta_path` import guard |
|
|
84
|
+
| `godel/_strict_audit.py` | Layer 3: PEP 578 audit hook (permanent, uses `_privileged` contextvar bypass) |
|
|
85
|
+
| `godel/_replay.py` | `ReplayWalker` (cursor-based DAG traversal), `ReplayMatch`, `MismatchPolicy`, hash mismatch handling |
|
|
86
|
+
| `godel/_dag_render.py` | ASCII DAG renderer for `godel show --graph` |
|
|
87
|
+
|
|
88
|
+
### Modified Files
|
|
89
|
+
|
|
90
|
+
| File | Changes |
|
|
91
|
+
|------|---------|
|
|
92
|
+
| `godel/_context.py` | Added `event_log`, `replay_walker`, `_invocation_counts`, `_step_local_seq` fields to `WorkflowContext`; added `get_event_log()` helper; added `_pending_replay` contextvar |
|
|
93
|
+
| `godel/_decorators.py` | `@workflow` creates EventLog + emits WORKFLOW_STARTED/FINISHED/FAILED, stores `_last_run_id`; `@step` emits step.enter events with invocation tracking; `parallel()` emits FORK/JOIN events |
|
|
94
|
+
| `godel/_run.py` | `run()` emits two-phase events (STARTED/FINISHED/FAILED), truncates stdout/stderr to 1000 chars in log; **replay guard added** — returns cached result on replay |
|
|
95
|
+
| `godel/io.py` | `print()`/`input()` emit events; **replay guards added** — print skips on replay, input returns cached value |
|
|
96
|
+
| `godel/det.py` | Replaced stubs with real implementations recording events; **replay guards partially added** (now/random have guards, uuid4 may be partial) |
|
|
97
|
+
| `godel/agents/_claude.py` | `__call__` emits agent.call events; extracted `_execute` method |
|
|
98
|
+
| `godel/cli.py` | Added `--strict` flag, `show` command with `--graph`, run_id output after execution |
|
|
99
|
+
| `godel/__init__.py` | Exports: Event, EventStatus, EventLog, get_event_log, GodelStrictError, StrictViolation, ResumeError, UnsafeResumeError, det |
|
|
100
|
+
|
|
101
|
+
### Test Files (27 total, 154 tests passing)
|
|
102
|
+
|
|
103
|
+
New test files: `test_events.py`, `test_event_log.py`, `test_exceptions.py`, `test_strict_ast.py`, `test_strict_imports.py`, `test_strict_audit.py`, `test_cli_strict.py`, `test_cli_show.py`, `test_workflow_events.py`, `test_step_events.py`, `test_run_events.py`, `test_io_events.py`, `test_det.py`, `test_parallel_events.py`, `test_agent_events.py`, `test_exports.py`, `test_exports_strict.py`, `test_strict_integration.py`, `test_integration_audit.py`, `test_replay_walker.py`, `test_dag_render.py`, `test_hash_mismatch.py`
|
|
104
|
+
|
|
105
|
+
## Current State (2026-04-12)
|
|
106
|
+
- **193 tests passing** (`python -m pytest tests/ -v`) across 29 test files
|
|
107
|
+
- All M1, M2, M3 milestones **complete**
|
|
108
|
+
- All beads tasks closed, dolt pushed
|
|
109
|
+
|
|
110
|
+
## M3 Completion Summary (Waves 7-9)
|
|
111
|
+
|
|
112
|
+
### Wave 7 — Completed
|
|
113
|
+
- `awl-571`: Hash mismatch detection — `_replay.py` with `handle_hash_mismatch`, `_cascade_invalidate`, `MismatchPolicy`
|
|
114
|
+
- `awl-8cy`: Resume exceptions — `UnsafeResumeError` with cmd/step_path/event_id attributes, actionable fix suggestions
|
|
115
|
+
- `awl-9z9`: Replay guards in all primitives — `run()`, `print()`, `input()`, `det.now()`, `det.random()`, `det.uuid4()`
|
|
116
|
+
|
|
117
|
+
### Wave 8 — Completed
|
|
118
|
+
- `awl-qj0`: Replay-aware `parallel()` — FORK invocation tracking, branch primitives replay from cache individually
|
|
119
|
+
- `awl-xj2`: CLI `godel resume <run_id> <file>` — loads JSONL, sets up ReplayWalker via `_pending_replay` contextvar, `@workflow` reuses run_id on resume
|
|
120
|
+
|
|
121
|
+
### Wave 9 — Completed
|
|
122
|
+
- `awl-e7i`: E2E integration test — 7 tests: crash-and-resume, det value stability, print silence, UnsafeResumeError, parallel branch replay, no duplicate subprocess, event append verification
|
|
123
|
+
|
|
124
|
+
### New Test Files (M3)
|
|
125
|
+
| File | Tests |
|
|
126
|
+
|------|-------|
|
|
127
|
+
| `test_replay_primitives.py` | 8 — replay guards for all primitives |
|
|
128
|
+
| `test_resume_exceptions.py` | 15 — exception hierarchy, attributes, formatting |
|
|
129
|
+
| `test_replay_parallel.py` | 4 — FORK/JOIN replay, invocation tracking |
|
|
130
|
+
| `test_cli_resume.py` | 5 — CLI resume command, workflow decorator resume path |
|
|
131
|
+
| `test_integration_resume.py` | 7 — E2E crash-and-resume validation |
|
|
132
|
+
|
|
133
|
+
## Key Design Decisions
|
|
134
|
+
|
|
135
|
+
1. **Audit hook test isolation**: All `--strict` CLI tests and audit hook tests use `subprocess.run()` because PEP 578 hooks are permanent. Never use CliRunner for tests that install audit hooks.
|
|
136
|
+
|
|
137
|
+
2. **EventLog file writes use `_privileged`**: The EventLog wraps all file I/O in `_privileged.set(True)` to bypass the audit hook in strict mode.
|
|
138
|
+
|
|
139
|
+
3. **`urllib` → `urllib.request`**: Changed banned module from `urllib` to `urllib.request` because `urllib.parse` is used by `pathlib` (stdlib dependency).
|
|
140
|
+
|
|
141
|
+
4. **Replay index key**: `(step_path, invocation_seq, step_local_seq, op)` — NOT event_id. This makes deterministic replay work because strict mode guarantees the same logical position on re-execution.
|
|
142
|
+
|
|
143
|
+
5. **JSONL is append-only**: STARTED appears first, then FINISHED/FAILED overwrites on load (last snapshot per event_id wins).
|
|
144
|
+
|
|
145
|
+
6. **Branch replay is implicit**: `parallel()` doesn't skip execution during replay — it re-enters all branches, and each branch's leaf primitives individually consult the ReplayWalker. FORK invocations are tracked with a `__FORK__` suffix key.
|
|
146
|
+
|
|
147
|
+
7. **Resume appends to same JSONL**: On resume, `@workflow` reuses the original `run_id` and `EventLog` (open for append). New events get new `event_id`s and higher `seq` numbers.
|
|
148
|
+
|
|
149
|
+
## What's Left (M4+)
|
|
150
|
+
|
|
151
|
+
- **M4** (`awl-dyn`): Rewind — rollback to a previous checkpoint
|
|
152
|
+
- **M5** (`awl-9lf`): Structured exception hierarchy
|
|
153
|
+
- **M6** (`awl-c8t`): Workflow linter
|
|
154
|
+
- **M7** (`awl-qe6`): Intervention mode
|
|
155
|
+
- **M8** (`awl-9g1`): DSL ↔ library interop (stretch)
|
|
156
|
+
|
|
157
|
+
## Entry Points
|
|
158
|
+
|
|
159
|
+
- **Spec**: `docs/py-library/02-api.md` (API contract), `docs/py-library/03-runtime.md` (execution model)
|
|
160
|
+
- **Code**: Start with `godel/_decorators.py` (core primitives) and `godel/_replay.py` (replay engine)
|
|
161
|
+
- **Run**: `cd py-library && pip install -e . && python -m pytest tests/ -v`
|
|
162
|
+
- **Beads**: `bd ready` for next available work
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
# Handoff: M4 + M5 + M6 + M7 (2026-04-13)
|
|
167
|
+
|
|
168
|
+
## Goal
|
|
169
|
+
Land M4 (Rewind), M5 (Structured exception hierarchy), M6 (Workflow linter), and M7 (Intervention mode) — including the `godel pause`, `godel resume`, `godel rewind`, and `godel repair` CLI surface plus the default intervention agent.
|
|
170
|
+
|
|
171
|
+
## Current State
|
|
172
|
+
- **561 tests passing** (`cd py-library && uv run pytest`)
|
|
173
|
+
- Working tree clean; master @ `2a1aa3c`; all commits pushed
|
|
174
|
+
- M4, M5, M6, M7 milestones **complete** end-to-end. M7 exit criterion (a) — `godel repair` auto-fixes a schema-mismatch typo without human input — verified by E2E test `tests/test_repair_e2e.py`.
|
|
175
|
+
|
|
176
|
+
## New / Modified Files (since M3)
|
|
177
|
+
|
|
178
|
+
| File | Purpose |
|
|
179
|
+
|------|---------|
|
|
180
|
+
| `godel/_rewind.py` | `rewind()` primitive + `apply_rewind()` (graph cut, INVALIDATED cascade, REWIND intent/outcome events, safety guard) |
|
|
181
|
+
| `godel/_pause.py` | Pause-sentinel file (`./runs/<run_id>.pause`) — atomic write via `mkstemp` + `os.replace`, per-run-id orphan glob cleanup |
|
|
182
|
+
| `godel/_linter.py` | Workflow linter — `@runtime_checkable` `LintRule` Protocol, `register_rule`/`clear_rules`/`get_rules`, `LintDiagnostic` with col 0-based and severity Literal-validated |
|
|
183
|
+
| `godel/_exceptions.py` | Two disjoint hierarchies: `GodelStrictError` (engine guard) and `GodelError` (workflow-author errors: `AgentRefusal`, `SchemaValidationFailure`, `HumanTimeout`, `NonDeterministicEscape`, `RewindUnsafe`); subclasses use `**kwargs` forwarding to the base |
|
|
184
|
+
| `godel/intervention/_context.py` | `InterventionContext`, `FailureInfo`, `SourceFile` + `build_intervention_context(run_id)` — reconstructs failure, local-state snapshot, sources from the audit log |
|
|
185
|
+
| `godel/intervention/_tools.py` | `InterventionToolset` — `edit_file`, `resume`, `give_up`, `rewind` returning `RewindResult` (incl. `already_rewound_ids`) |
|
|
186
|
+
| `godel/intervention/default_agent.py` | Default LLM repair agent — closure-factory `_make_intervention_workflow` keeps `@workflow` args slim; per-iteration `@step(name=f"reason_and_call_{i}")`; `_escape_backticks` prevents prompt injection |
|
|
187
|
+
| `godel/cli.py` | New subcommands: `godel pause <run_id>`, `godel rewind <run_id> --to <ids>`, `godel repair <run_id> [--agent MOD:FN] [--dry-run]` (exit codes: 0=resume, 1=give_up, 2=usage, 3=crash) |
|
|
188
|
+
| `tests/test_repair_e2e.py` | M7 exit-criterion E2E: deterministic mock intervention auto-fixes a Pydantic schema typo via real `godel repair` CLI subprocess |
|
|
189
|
+
|
|
190
|
+
(Plus extensive new tests across `test_rewind_*.py`, `test_pause_*.py`, `test_linter_framework.py`, `test_structured_exceptions.py`, `test_intervention_*.py`, `test_repair_cli.py`, `test_step_event_history.py`.)
|
|
191
|
+
|
|
192
|
+
## Key Design Decisions (M4–M7)
|
|
193
|
+
|
|
194
|
+
1. **REWIND emits two events: `phase=intent` (from the primitive) + `phase=outcome` (from `apply_rewind`)** — the pair makes the audit log unambiguous when a rewind operation produces different "requested" vs. "actually invalidated" sets.
|
|
195
|
+
|
|
196
|
+
2. **`rewind(to=[])` raises `ValueError`** before any side effects — empty target lists were silently producing empty REWIND events with no graph cut.
|
|
197
|
+
|
|
198
|
+
3. **`already_rewound_ids` on `RewindResult`** — targets that were already INVALIDATED are returned separately so the intervention agent can distinguish "no-op rewind" from "successful invalidation."
|
|
199
|
+
|
|
200
|
+
4. **Pause sentinel uses atomic write** — `mkstemp(dir=parent, suffix=f".{run_id}.pause.tmp")` + `os.replace`; `clear_pause_request` globs scoped to `*.{run_id}.pause.tmp` so concurrent runs are not affected.
|
|
201
|
+
|
|
202
|
+
5. **Per-branch replay-suppress flag (`WorkflowContext._local_replay_suppress`)** — a sibling parallel branch reaching a non-cached step boundary used to clear the *shared* `event_log._replay_suppress` and corrupt the cached branch's `last_step_event_id()`. Each branch now snapshots the flag at fork time; `_clear_local_suppress()` relies on `asyncio.gather`'s `copy_context()` task isolation to mutate only the calling branch's context.
|
|
203
|
+
|
|
204
|
+
6. **Source-edit guard normalizes whitespace before hashing** — `inspect.getsource()` is `rstrip()`-ed and consecutive blank lines collapsed before SHA-256, so trivial reformat doesn't trip the resume edit detector. Documented limitation: triple-quoted string content is not normalized.
|
|
205
|
+
|
|
206
|
+
7. **Default intervention agent uses a closure-factory** — `@workflow`'s default `repr(args)` capture would have dumped the entire `InterventionContext` (events + sources) into the audit log. The factory pattern keeps the `@workflow`-visible signature to `(run_id: str, run_state: str)`; `ctx`/`tools` are captured via closure.
|
|
207
|
+
|
|
208
|
+
8. **`SchemaValidationFailure` exists in two namespaces** — `godel._exceptions.SchemaValidationFailure` (subclass of `GodelError`, raised by the engine on Pydantic validation) and `godel.agents.SchemaValidationFailure` (subclass of `WorkflowFail`, used by agent factories). They are intentionally distinct; do not unify without auditing all `isinstance` callers.
|
|
209
|
+
|
|
210
|
+
## Gotchas / Open Follow-ups
|
|
211
|
+
|
|
212
|
+
1. ~~**`_replay_suppress_clear_gen` counter is dead code** (filed: `awl-ddk`) — incremented in 3 places but never read. Either wire up a debug consumer or remove.~~ ✓ fixed in f70639d
|
|
213
|
+
|
|
214
|
+
2. ~~**`test_parallel_mixed_cached_race_last_step_event_id` asserts ordering** (`awl-ddk`) — `_step_event_history` is documented as non-deterministic across parallel branches; the test happens to pass today but should switch to set/sorted membership.~~ ✓ fixed in f70639d
|
|
215
|
+
|
|
216
|
+
3. **`_render_context_marker` doesn't strip whitespace-only step_path components** (`awl-uni`) — `' '` is truthy and slips through; fix should use `s.strip()` not bare `if s`.
|
|
217
|
+
|
|
218
|
+
4. **`**kwargs` forwarding in `GodelError` subclasses kills IDE param hints** (`awl-uni`) — typos surface as `TypeError` from `GodelError.__init__` instead of at the subclass call site. Consider `typing_extensions.Unpack[TypedDict]` to restore static visibility.
|
|
219
|
+
|
|
220
|
+
5. **`repair` CLI `--agent MOD:FN` requires `_is_workflow=True`** on the resolved function. Bare `async def` is rejected. The default agent and the test's mock both use the closure-factory pattern: outer function carries the `_is_workflow` marker, inner `@workflow` does the audit work.
|
|
221
|
+
|
|
222
|
+
## What's Left (M8+)
|
|
223
|
+
|
|
224
|
+
- **M8** (`awl-9g1`): DSL ↔ library interop (stretch)
|
|
225
|
+
- Deferred WARN follow-ups: `awl-uni` (GodelError hygiene), `awl-ddk` (replay-suppress dead code + test ordering)
|
|
226
|
+
- Backlog (P4): `awl-ul2` parser error recovery, `awl-509` prompt success/failure tracking, `awl-a7p` workflow stdlib
|
|
227
|
+
|
|
228
|
+
## Entry Points
|
|
229
|
+
|
|
230
|
+
- **Spec**: `docs/py-library/02-api.md`, `docs/py-library/03-runtime.md`
|
|
231
|
+
- **Code**: `godel/cli.py` (`repair_cmd`), `godel/intervention/default_agent.py`, `godel/_rewind.py`
|
|
232
|
+
- **Run**: `cd py-library && uv run pytest`
|
|
233
|
+
- **Beads**: `bd ready`
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Monitoring godel runs (agent guide)
|
|
2
|
+
|
|
3
|
+
Tricks for gents that need to babysit a long workflow without burning context or losing
|
|
4
|
+
the run to a terminal mishap.
|
|
5
|
+
|
|
6
|
+
## TL;DR
|
|
7
|
+
|
|
8
|
+
- Run bare (`python -m godel run FILE`), **not** `--watch` / `--plain`. The
|
|
9
|
+
watchers can spam terminals enough to SIGHUP the workflow.
|
|
10
|
+
Until that's fixed, monitor from a separate channel.
|
|
11
|
+
- Tail `runs/<id>.jsonl` directly. It is append-only JSON-per-line and is
|
|
12
|
+
the source of truth for everything godel did.
|
|
13
|
+
- Filter aggressively. The raw stream contains tool calls, streamed agent
|
|
14
|
+
chunks, and per-step entry/exit; ~95% is noise for high-level oversight.
|
|
15
|
+
- Use the harness's event-monitor primitive (one notification per filtered
|
|
16
|
+
line) instead of polling — fewer cache misses, instant on state change.
|
|
17
|
+
|
|
18
|
+
## Files written per run
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
runs/<id>.jsonl # canonical audit log (truncated payloads)
|
|
22
|
+
runs/<id>/transcript.jsonl # full chunked stream: prompts, tool calls,
|
|
23
|
+
# streamed responses, run.start
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
`request.prompt` and `response.value` in `<id>.jsonl` are clipped to ~500
|
|
27
|
+
chars. Full content lives in `transcript.jsonl` as chunked
|
|
28
|
+
`agent.response` events grouped by `stream_path`. Reassemble by
|
|
29
|
+
concatenating `text` fields per `stream_path`.
|
|
30
|
+
|
|
31
|
+
## High-signal event shapes
|
|
32
|
+
|
|
33
|
+
For oversight, these are the only events worth surfacing by default:
|
|
34
|
+
|
|
35
|
+
| op | status | meaning |
|
|
36
|
+
| ------------------- | -------- | ------------------------------------------ |
|
|
37
|
+
| `print` | FINISHED | workflow's own narrative log lines |
|
|
38
|
+
| `input` | STARTED | checkpoint waiting on stdin |
|
|
39
|
+
| `agent.call` | FINISHED | one agent decision completed (with model + schema name) |
|
|
40
|
+
| `agent.call` | FAILED | agent error or schema validation fail |
|
|
41
|
+
| `step.enter` | FAILED | step body raised |
|
|
42
|
+
| `WORKFLOW_*` | FINISHED / FAILED | run lifecycle |
|
|
43
|
+
|
|
44
|
+
Skip `run STARTED`, `step.enter STARTED`, intermediate `agent.call STARTED`
|
|
45
|
+
unless debugging — they are very chatty in any non-trivial workflow.
|
|
46
|
+
|
|
47
|
+
## Monitor recipe (event-driven)
|
|
48
|
+
|
|
49
|
+
**Preferred: `godel tail`.** It is a native Python CLI that follows
|
|
50
|
+
`runs/<id>.jsonl` without shell pipe buffering, waits for new events,
|
|
51
|
+
and exits at run completion.
|
|
52
|
+
|
|
53
|
+
Note: `tail` streams every event raw — it does **not** filter out
|
|
54
|
+
rewind-invalidated or retry-superseded events (no `--all` flag exists
|
|
55
|
+
like on `godel show`). If you rewound the run and are tailing after
|
|
56
|
+
replay, filter invalidated events in your own consumer by tracking
|
|
57
|
+
`status == "INVALIDATED"` or using `seq`/`event_id` ordering against
|
|
58
|
+
the rewind boundary.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
godel tail <id> --format=json # one JSON object per line, stable schema
|
|
62
|
+
godel tail <id> --format=pretty # human-readable table (step_path + status + duration)
|
|
63
|
+
godel tail <id> --no-follow # drain once and exit
|
|
64
|
+
godel tail <id> --no-wait # fail if log file doesn't exist yet
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Pipe `--format=json` into a filter and wire into the harness `Monitor`:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
Monitor(persistent=true,
|
|
71
|
+
command="godel tail <id> --format=json | python -u /tmp/godel_filter.py")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Where the filter is just the per-line logic below (no seek/tell bookkeeping).
|
|
75
|
+
|
|
76
|
+
**Fallback: pure-python file seek.** If for some reason `godel tail` isn't
|
|
77
|
+
available, read the file directly — do *not* use `tail -F | python`,
|
|
78
|
+
shell pipe buffering can delay events by minutes.
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
# /tmp/godel_monitor.py
|
|
82
|
+
import json, os, time, sys
|
|
83
|
+
|
|
84
|
+
P = sys.argv[1] # path to runs/<id>.jsonl
|
|
85
|
+
pos = os.path.getsize(P) if os.path.exists(P) else 0
|
|
86
|
+
while True:
|
|
87
|
+
try: size = os.path.getsize(P)
|
|
88
|
+
except FileNotFoundError: time.sleep(1); continue
|
|
89
|
+
if size > pos:
|
|
90
|
+
with open(P) as f:
|
|
91
|
+
f.seek(pos)
|
|
92
|
+
for line in f:
|
|
93
|
+
if not line.strip(): continue
|
|
94
|
+
try: e = json.loads(line)
|
|
95
|
+
except: continue
|
|
96
|
+
op, st = e.get("op"), e.get("status")
|
|
97
|
+
sp = "/".join(e.get("step_path") or [])
|
|
98
|
+
if op == "print" and st == "FINISHED":
|
|
99
|
+
t = (e.get("request") or {}).get("text","").strip()
|
|
100
|
+
if t: print("LOG", t, flush=True)
|
|
101
|
+
elif op == "input" and st == "STARTED":
|
|
102
|
+
print("CHECKPOINT", sp, flush=True)
|
|
103
|
+
elif op == "agent.call" and st == "FINISHED":
|
|
104
|
+
r = e.get("request") or {}
|
|
105
|
+
print(f"AGENT {r.get('model')} schema={r.get('schema_name')} @ {sp}", flush=True)
|
|
106
|
+
elif st == "FAILED":
|
|
107
|
+
err = str((e.get("response") or {}).get("error",""))[:160]
|
|
108
|
+
print(f"FAIL {op} @ {sp} :: {err}", flush=True)
|
|
109
|
+
elif op == "WORKFLOW_FINISHED" or (op=="WORKFLOW_STARTED" and st=="FINISHED"):
|
|
110
|
+
print(f"WORKFLOW {st}", flush=True)
|
|
111
|
+
pos = f.tell()
|
|
112
|
+
time.sleep(1)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Wire that to `Monitor(persistent=true, command="python -u /tmp/godel_monitor.py runs/<id>.jsonl")` (claude code specific, check for alternative patterns on other backends).
|
|
116
|
+
Each filtered line becomes one harness notification — no polling, no cache
|
|
117
|
+
churn, instant on event.
|
|
118
|
+
|
|
119
|
+
## Polling fallback (when Monitor isn't available)
|
|
120
|
+
|
|
121
|
+
If you must poll, do it cache-aware: every 250–270s (stay inside the 5-min
|
|
122
|
+
prompt cache) is the right cadence for slow agent calls. Compute a small
|
|
123
|
+
summary, never dump raw events:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
import json, collections
|
|
127
|
+
ev=[json.loads(l) for l in open(p)]
|
|
128
|
+
last=ev[-1]
|
|
129
|
+
ac=[e for e in ev if e.get("op")=="agent.call"]
|
|
130
|
+
finished=collections.Counter(
|
|
131
|
+
(e.get("step_path") or ["?"])[-1]
|
|
132
|
+
for e in ev if e.get("op")=="step.enter" and e.get("status")=="FINISHED"
|
|
133
|
+
)
|
|
134
|
+
# print: ev count, last op/status/step, agent fin/fail, finished steps
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Don't poll faster than ~120s; agent calls regularly take 1–3 min.
|
|
138
|
+
|
|
139
|
+
## Identifying the in-flight agent
|
|
140
|
+
|
|
141
|
+
A `STARTED` event with no matching `FINISHED`/`FAILED` (compare on
|
|
142
|
+
`request_hash`) is in flight:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
inflight = [e for e in ev
|
|
146
|
+
if e.get("op")=="agent.call" and e.get("status")=="STARTED"
|
|
147
|
+
and not any(x.get("request_hash")==e.get("request_hash")
|
|
148
|
+
and x.get("status") in ("FINISHED","FAILED")
|
|
149
|
+
for x in ev)]
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Surfaces model + schema_name + step path = enough to tell the user "opus
|
|
153
|
+
crunching PlanReview right now."
|
|
154
|
+
|
|
155
|
+
## Reading what an agent actually said
|
|
156
|
+
|
|
157
|
+
Schema'd responses appear in `<id>.jsonl` as a truncated `response.value`
|
|
158
|
+
string repr of the pydantic model. To get the full structured output:
|
|
159
|
+
|
|
160
|
+
1. From `<id>.jsonl`, grab the event's `step_path` and `stream_path`.
|
|
161
|
+
2. In `transcript.jsonl`, collect `agent.response` events with the same
|
|
162
|
+
`step_path` and matching `stream_path` (a tuple).
|
|
163
|
+
3. Concatenate their `text` fields in seq order — that's the raw model
|
|
164
|
+
output (typically JSON).
|
|
165
|
+
4. `json.JSONDecoder().raw_decode(text.lstrip())` to parse (the model may
|
|
166
|
+
continue after the JSON closes; raw_decode stops at the first object).
|
|
167
|
+
|
|
168
|
+
## Recovery patterns
|
|
169
|
+
|
|
170
|
+
- **Terminal died, run cancelled (`CancelledError`)**: try
|
|
171
|
+
`python -m godel resume <id>` first. If it aborts with
|
|
172
|
+
`UnsafeResumeError`, the dead step had a non-idempotent in-flight
|
|
173
|
+
side-effect call. Three options, in order of preference:
|
|
174
|
+
1. **Rewind then resume.** Identify the last `step.enter FINISHED`
|
|
175
|
+
(or `agent.call FINISHED`) in `runs/<id>.jsonl`, then
|
|
176
|
+
`godel rewind <id> --to <event_id>` followed by
|
|
177
|
+
`godel resume <id>`. The log is append-only — rewind does not
|
|
178
|
+
delete events, it marks everything past the target as invalidated
|
|
179
|
+
so resume treats them as absent and re-executes from there.
|
|
180
|
+
Useful when the failed call was unrecoverable but the step above
|
|
181
|
+
it can be re-executed.
|
|
182
|
+
2. `godel repair <id>` — drops an intervention agent into the
|
|
183
|
+
crashed run to unstick manually.
|
|
184
|
+
3. Fresh run (last resort — loses all prior agent tokens).
|
|
185
|
+
|
|
186
|
+
- **Run looks stuck**: check whether last event is `input STARTED` (waiting
|
|
187
|
+
on stdin) before assuming a hang. `input()` is `sys.stdin.readline()` —
|
|
188
|
+
pipe stdin or press enter in the controlling terminal.
|
|
189
|
+
- **Run still alive?**: `ps -ef | grep "godel run"`. No process + no
|
|
190
|
+
recent jsonl writes = dead.
|
|
191
|
+
|
|
192
|
+
## Token thrift while monitoring
|
|
193
|
+
|
|
194
|
+
- Never read the full audit log. Tail offsets, last N lines, or summary
|
|
195
|
+
counters only.
|
|
196
|
+
- Never paste raw transcript chunks into your context — reassemble the
|
|
197
|
+
small slice you need (one agent's response), then summarize.
|
|
198
|
+
- Monitor notifications are cheap; one line per event. Polling snapshots
|
|
199
|
+
are expensive; keep them under ~10 lines of formatted output.
|
|
200
|
+
- Each schedule wakeup invalidates the prompt cache if it lands past 5
|
|
201
|
+
min. Choose 270s (cache-warm) or 1200s+ (one cold fetch buys a long
|
|
202
|
+
wait); avoid the 300–600s sour spot.
|