arh-oc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arh_oc-0.1.0/PKG-INFO +69 -0
- arh_oc-0.1.0/README.md +60 -0
- arh_oc-0.1.0/pyproject.toml +23 -0
- arh_oc-0.1.0/src/arh/__init__.py +4 -0
- arh_oc-0.1.0/src/arh/cli.py +297 -0
- arh_oc-0.1.0/src/arh/core.py +237 -0
- arh_oc-0.1.0/src/arh/io.py +50 -0
- arh_oc-0.1.0/src/arh/opencode.py +362 -0
- arh_oc-0.1.0/src/arh/phases/__init__.py +1 -0
- arh_oc-0.1.0/src/arh/phases/contract.py +188 -0
- arh_oc-0.1.0/src/arh/phases/feedback.py +185 -0
- arh_oc-0.1.0/src/arh/phases/research.py +366 -0
- arh_oc-0.1.0/src/arh/phases/research_loop.py +197 -0
- arh_oc-0.1.0/src/arh/phases/setup.py +477 -0
- arh_oc-0.1.0/src/arh/prompts/__init__.py +1 -0
- arh_oc-0.1.0/src/arh/prompts/contract.md +41 -0
- arh_oc-0.1.0/src/arh/prompts/feedback.md +39 -0
- arh_oc-0.1.0/src/arh/prompts/research_loop_summary.md +18 -0
- arh_oc-0.1.0/src/arh/prompts/research_patch.md +31 -0
- arh_oc-0.1.0/src/arh/prompts/research_plan.md +27 -0
- arh_oc-0.1.0/src/arh/prompts/setup_inspect.md +34 -0
- arh_oc-0.1.0/src/arh/prompts/setup_patch.md +40 -0
- arh_oc-0.1.0/src/arh/results.py +148 -0
- arh_oc-0.1.0/src/arh/schema.py +522 -0
arh_oc-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: arh-oc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: auto research harness based on opencode server
|
|
5
|
+
Requires-Dist: pydantic>=2.11,<3.0
|
|
6
|
+
Requires-Dist: typer>=0.15,<1.0
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# ARH
|
|
11
|
+
|
|
12
|
+
ARH is a small CLI for autoresearch-style deep learning iteration.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
From PyPI:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
uv tool install arh-oc
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Then run the CLI as:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
arh --help
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
From this repo:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
uv pip install -e .
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Or inside another research repo:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install -e /path/to/arh
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Basic flow
|
|
41
|
+
|
|
42
|
+
Inside your training repo:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
arh contract
|
|
46
|
+
arh setup
|
|
47
|
+
arh research
|
|
48
|
+
arh feedback
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
For continuous looping:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
arh research-loop
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## What each command does
|
|
58
|
+
|
|
59
|
+
- `arh contract`: creates or updates `research.md`
|
|
60
|
+
- `arh setup`: inspects the train entrypoint, patches smoke markers, and runs setup smoke
|
|
61
|
+
- `arh research`: creates the next experiment, commits it, and launches it in `tmux`
|
|
62
|
+
- `arh feedback`: reads the latest finished run and appends feedback to `results.md`
|
|
63
|
+
- `arh research-loop`: alternates `feedback` and `research` until stopped or a stop condition is reached
|
|
64
|
+
|
|
65
|
+
## Files created in your research repo
|
|
66
|
+
|
|
67
|
+
- `research.md`: research contract
|
|
68
|
+
- `results.md`: experiment and feedback log
|
|
69
|
+
- `.autoresearch/logs/`: smoke and experiment logs
|
arh_oc-0.1.0/README.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# ARH
|
|
2
|
+
|
|
3
|
+
ARH is a small CLI for autoresearch-style deep learning iteration.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
From PyPI:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
uv tool install arh-oc
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Then run the CLI as:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
arh --help
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
From this repo:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uv pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Or inside another research repo:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install -e /path/to/arh
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Basic flow
|
|
32
|
+
|
|
33
|
+
Inside your training repo:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
arh contract
|
|
37
|
+
arh setup
|
|
38
|
+
arh research
|
|
39
|
+
arh feedback
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For continuous looping:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
arh research-loop
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## What each command does
|
|
49
|
+
|
|
50
|
+
- `arh contract`: creates or updates `research.md`
|
|
51
|
+
- `arh setup`: inspects the train entrypoint, patches smoke markers, and runs setup smoke
|
|
52
|
+
- `arh research`: creates the next experiment, commits it, and launches it in `tmux`
|
|
53
|
+
- `arh feedback`: reads the latest finished run and appends feedback to `results.md`
|
|
54
|
+
- `arh research-loop`: alternates `feedback` and `research` until stopped or a stop condition is reached
|
|
55
|
+
|
|
56
|
+
## Files created in your research repo
|
|
57
|
+
|
|
58
|
+
- `research.md`: research contract
|
|
59
|
+
- `results.md`: experiment and feedback log
|
|
60
|
+
- `.autoresearch/logs/`: smoke and experiment logs
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "arh-oc"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "auto research harness based on opencode server"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"pydantic>=2.11,<3.0",
|
|
9
|
+
"typer>=0.15,<1.0",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[tool.setuptools.package-data]
|
|
13
|
+
arh = ["prompts/*.md"]
|
|
14
|
+
|
|
15
|
+
[tool.uv.build-backend]
|
|
16
|
+
module-name = "arh"
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
arh = "arh.cli:main"
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["uv_build>=0.9.6,<0.10.0"]
|
|
23
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from . import __version__
|
|
9
|
+
from .core import run_opencode_smoke
|
|
10
|
+
from .phases import contract as contract_phase
|
|
11
|
+
from .phases import feedback as feedback_phase
|
|
12
|
+
from .phases import research as research_phase
|
|
13
|
+
from .phases import research_loop as research_loop_phase
|
|
14
|
+
from .phases import setup as setup_phase
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
app = typer.Typer(
|
|
18
|
+
help="Autoresearch CLI for contract setup, smoke setup, research runs, and feedback."
|
|
19
|
+
)
|
|
20
|
+
opencode_app = typer.Typer(help="Low-level OpenCode connectivity helpers.")
|
|
21
|
+
app.add_typer(opencode_app, name="opencode")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def cwd() -> Path:
|
|
25
|
+
return Path(".").resolve()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _handle_common_file_not_found(exc: FileNotFoundError) -> None:
|
|
29
|
+
message = str(exc)
|
|
30
|
+
typer.secho(message, fg=typer.colors.RED, err=True)
|
|
31
|
+
if "contract file not found" in message:
|
|
32
|
+
typer.secho(
|
|
33
|
+
"Run `arh contract` first to create `research.md`, or pass `--contract <path>`.",
|
|
34
|
+
fg=typer.colors.YELLOW,
|
|
35
|
+
err=True,
|
|
36
|
+
)
|
|
37
|
+
raise typer.Exit(code=1)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.command(help="Interactively create or update `research.md`, the project contract.")
|
|
41
|
+
def contract(
|
|
42
|
+
host: str = typer.Option("127.0.0.1", help="OpenCode server hostname"),
|
|
43
|
+
port: int = typer.Option(4096, help="OpenCode server port"),
|
|
44
|
+
output: str = typer.Option(
|
|
45
|
+
"research.md", help="Path to write the contract markdown."
|
|
46
|
+
),
|
|
47
|
+
model: str = typer.Option(
|
|
48
|
+
"openai/gpt-5.3-codex-spark",
|
|
49
|
+
help="Model in provider/model format.",
|
|
50
|
+
),
|
|
51
|
+
verbose: bool = typer.Option(
|
|
52
|
+
False, "--verbose", help="Show detailed internal streaming events."
|
|
53
|
+
),
|
|
54
|
+
) -> None:
|
|
55
|
+
result = contract_phase.run(
|
|
56
|
+
cwd(),
|
|
57
|
+
host=host,
|
|
58
|
+
port=port,
|
|
59
|
+
output_path=output,
|
|
60
|
+
model=model,
|
|
61
|
+
verbose=verbose,
|
|
62
|
+
)
|
|
63
|
+
print(f"status: {result['status']}")
|
|
64
|
+
print(f"session_id: {result['session_id']}")
|
|
65
|
+
if "output_path" in result:
|
|
66
|
+
print(f"saved_to: {result['output_path']}")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@app.command(
|
|
70
|
+
help="Inspect the training entrypoint, patch smoke instrumentation, and run setup smoke."
|
|
71
|
+
)
|
|
72
|
+
def setup(
|
|
73
|
+
contract: str = typer.Option("research.md", help="Path to the contract markdown."),
|
|
74
|
+
host: str = typer.Option("127.0.0.1", help="OpenCode server hostname"),
|
|
75
|
+
port: int = typer.Option(4096, help="OpenCode server port"),
|
|
76
|
+
model: str = typer.Option(
|
|
77
|
+
"openai/gpt-5.3-codex-spark",
|
|
78
|
+
help="Model in provider/model format.",
|
|
79
|
+
),
|
|
80
|
+
verbose: bool = typer.Option(
|
|
81
|
+
False, "--verbose", help="Show detailed internal streaming events."
|
|
82
|
+
),
|
|
83
|
+
) -> None:
|
|
84
|
+
try:
|
|
85
|
+
result = setup_phase.run(
|
|
86
|
+
cwd(),
|
|
87
|
+
contract_path=contract,
|
|
88
|
+
host=host,
|
|
89
|
+
port=port,
|
|
90
|
+
model=model,
|
|
91
|
+
verbose=verbose,
|
|
92
|
+
)
|
|
93
|
+
except FileNotFoundError as exc:
|
|
94
|
+
_handle_common_file_not_found(exc)
|
|
95
|
+
print(f"status: {result['status']}")
|
|
96
|
+
print(f"session_id: {result['session_id']}")
|
|
97
|
+
if "smoke_result" in result:
|
|
98
|
+
smoke_result = result["smoke_result"]
|
|
99
|
+
print(f"log_path: {smoke_result['log_path']}")
|
|
100
|
+
print(f"command: {smoke_result['command']}")
|
|
101
|
+
if "result_marker" in result and result["result_marker"]:
|
|
102
|
+
print(f"result: {result['result_marker']}")
|
|
103
|
+
if "git" in result:
|
|
104
|
+
print(f"git: {result['git']}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@app.command(
|
|
108
|
+
help="Create the next experiment, patch the repo, commit it, and launch the run in tmux."
|
|
109
|
+
)
|
|
110
|
+
def research(
|
|
111
|
+
contract: str = typer.Option("research.md", help="Path to the contract markdown."),
|
|
112
|
+
results: str = typer.Option(
|
|
113
|
+
"results.md", help="Path to the experiment results log markdown."
|
|
114
|
+
),
|
|
115
|
+
host: str = typer.Option("127.0.0.1", help="OpenCode server hostname"),
|
|
116
|
+
port: int = typer.Option(4096, help="OpenCode server port"),
|
|
117
|
+
model: str = typer.Option(
|
|
118
|
+
"openai/gpt-5.3-codex-spark",
|
|
119
|
+
help="Model in provider/model format.",
|
|
120
|
+
),
|
|
121
|
+
verbose: bool = typer.Option(
|
|
122
|
+
False, "--verbose", help="Show detailed internal streaming events."
|
|
123
|
+
),
|
|
124
|
+
) -> None:
|
|
125
|
+
try:
|
|
126
|
+
result = research_phase.run(
|
|
127
|
+
cwd(),
|
|
128
|
+
contract_path=contract,
|
|
129
|
+
results_path=results,
|
|
130
|
+
host=host,
|
|
131
|
+
port=port,
|
|
132
|
+
model=model,
|
|
133
|
+
verbose=verbose,
|
|
134
|
+
)
|
|
135
|
+
except FileNotFoundError as exc:
|
|
136
|
+
_handle_common_file_not_found(exc)
|
|
137
|
+
print(f"status: {result['status']}")
|
|
138
|
+
if "exp_id" in result:
|
|
139
|
+
print(f"exp_id: {result['exp_id']}")
|
|
140
|
+
if "session_id" in result:
|
|
141
|
+
print(f"session_id: {result['session_id']}")
|
|
142
|
+
if "commit" in result:
|
|
143
|
+
print(f"commit: {result['commit']}")
|
|
144
|
+
if "polling_interval_seconds" in result:
|
|
145
|
+
print(f"polling_interval_seconds: {result['polling_interval_seconds']}")
|
|
146
|
+
if "tmux_session_name" in result and "launch" not in result:
|
|
147
|
+
print(f"tmux_session_name: {result['tmux_session_name']}")
|
|
148
|
+
if "launch" in result and result["launch"]:
|
|
149
|
+
launch = result["launch"]
|
|
150
|
+
print(f"tmux_session_name: {launch['tmux_session_name']}")
|
|
151
|
+
print(f"log_path: {launch['log_path']}")
|
|
152
|
+
print(f"command: {launch['command']}")
|
|
153
|
+
if result.get("log_tail"):
|
|
154
|
+
print("log_tail:")
|
|
155
|
+
print(result["log_tail"])
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@app.command(
|
|
159
|
+
help="Inspect the latest finished run, append feedback, and suggest the next action."
|
|
160
|
+
)
|
|
161
|
+
def feedback(
|
|
162
|
+
contract: str = typer.Option("research.md", help="Path to the contract markdown."),
|
|
163
|
+
results: str = typer.Option(
|
|
164
|
+
"results.md", help="Path to the experiment results log markdown."
|
|
165
|
+
),
|
|
166
|
+
exp_id: int | None = typer.Option(None, help="Specific experiment id to inspect."),
|
|
167
|
+
host: str = typer.Option("127.0.0.1", help="OpenCode server hostname"),
|
|
168
|
+
port: int = typer.Option(4096, help="OpenCode server port"),
|
|
169
|
+
model: str = typer.Option(
|
|
170
|
+
"openai/gpt-5.3-codex-spark",
|
|
171
|
+
help="Model in provider/model format.",
|
|
172
|
+
),
|
|
173
|
+
verbose: bool = typer.Option(
|
|
174
|
+
False, "--verbose", help="Show detailed internal streaming events."
|
|
175
|
+
),
|
|
176
|
+
) -> None:
|
|
177
|
+
try:
|
|
178
|
+
result = feedback_phase.run(
|
|
179
|
+
cwd(),
|
|
180
|
+
contract_path=contract,
|
|
181
|
+
results_path=results,
|
|
182
|
+
exp_id=exp_id,
|
|
183
|
+
host=host,
|
|
184
|
+
port=port,
|
|
185
|
+
model=model,
|
|
186
|
+
verbose=verbose,
|
|
187
|
+
)
|
|
188
|
+
except FileNotFoundError as exc:
|
|
189
|
+
_handle_common_file_not_found(exc)
|
|
190
|
+
print(f"status: {result['status']}")
|
|
191
|
+
if "exp_id" in result:
|
|
192
|
+
print(f"exp_id: {result['exp_id']}")
|
|
193
|
+
if result.get("status") == "running" and "next_phase" in result:
|
|
194
|
+
print(f"next_phase: {result['next_phase']}")
|
|
195
|
+
if result.get("status") == "running" and "suggested_sleep_sec" in result:
|
|
196
|
+
print(f"suggested_sleep_sec: {result['suggested_sleep_sec']}")
|
|
197
|
+
if "tmux_session_name" in result:
|
|
198
|
+
print(f"tmux_session_name: {result['tmux_session_name']}")
|
|
199
|
+
if "log_path" in result:
|
|
200
|
+
print(f"log_path: {result['log_path']}")
|
|
201
|
+
if "result_marker" in result and result["result_marker"]:
|
|
202
|
+
print(f"result: {result['result_marker']}")
|
|
203
|
+
if "summary" in result:
|
|
204
|
+
summary = result["summary"]
|
|
205
|
+
print(f"main_metric_value: {summary.main_metric}")
|
|
206
|
+
print(f"sub_metric_value: {summary.sub_metric}")
|
|
207
|
+
print(f"status_decision: {summary.status}")
|
|
208
|
+
print(f"branch_action: {summary.branch_action}")
|
|
209
|
+
if summary.accepted_commit:
|
|
210
|
+
print(f"accepted_commit: {summary.accepted_commit}")
|
|
211
|
+
print(f"description: {summary.description}")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@app.command(
|
|
215
|
+
"research-loop",
|
|
216
|
+
help="Continuously alternate feedback and research phases until stopped or a stop condition is reached.",
|
|
217
|
+
)
|
|
218
|
+
def research_loop(
|
|
219
|
+
contract: str = typer.Option("research.md", help="Path to the contract markdown."),
|
|
220
|
+
results: str = typer.Option(
|
|
221
|
+
"results.md", help="Path to the experiment results log markdown."
|
|
222
|
+
),
|
|
223
|
+
host: str = typer.Option("127.0.0.1", help="OpenCode server hostname"),
|
|
224
|
+
port: int = typer.Option(4096, help="OpenCode server port"),
|
|
225
|
+
model: str = typer.Option(
|
|
226
|
+
"openai/gpt-5.3-codex-spark",
|
|
227
|
+
help="Model in provider/model format.",
|
|
228
|
+
),
|
|
229
|
+
verbose: bool = typer.Option(
|
|
230
|
+
False, "--verbose", help="Show detailed internal streaming events."
|
|
231
|
+
),
|
|
232
|
+
max_cycles: int = typer.Option(
|
|
233
|
+
0, help="Stop after N loop cycles. Use 0 for no explicit cycle limit."
|
|
234
|
+
),
|
|
235
|
+
) -> None:
|
|
236
|
+
try:
|
|
237
|
+
result = research_loop_phase.run(
|
|
238
|
+
cwd(),
|
|
239
|
+
contract_path=contract,
|
|
240
|
+
results_path=results,
|
|
241
|
+
host=host,
|
|
242
|
+
port=port,
|
|
243
|
+
model=model,
|
|
244
|
+
verbose=verbose,
|
|
245
|
+
max_cycles=max_cycles,
|
|
246
|
+
)
|
|
247
|
+
except FileNotFoundError as exc:
|
|
248
|
+
_handle_common_file_not_found(exc)
|
|
249
|
+
print(f"status: {result['status']}")
|
|
250
|
+
if "reason" in result:
|
|
251
|
+
print(f"reason: {result['reason']}")
|
|
252
|
+
if "stop_condition" in result:
|
|
253
|
+
print(f"stop_condition: {result['stop_condition']}")
|
|
254
|
+
if "completed_runs" in result:
|
|
255
|
+
print(f"completed_runs: {result['completed_runs']}")
|
|
256
|
+
if "cycles" in result:
|
|
257
|
+
print(f"cycles: {result['cycles']}")
|
|
258
|
+
if "exp_id" in result:
|
|
259
|
+
print(f"exp_id: {result['exp_id']}")
|
|
260
|
+
if result.get("final_summary"):
|
|
261
|
+
print(f"final_summary: {result['final_summary']}")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@opencode_app.command(
|
|
265
|
+
"smoke",
|
|
266
|
+
help="Start a local OpenCode server, send one test prompt, and print the raw reply.",
|
|
267
|
+
)
|
|
268
|
+
def opencode_smoke(
|
|
269
|
+
host: str = typer.Option("127.0.0.1", help="OpenCode server hostname"),
|
|
270
|
+
port: int = typer.Option(4096, help="OpenCode server port"),
|
|
271
|
+
prompt: str = typer.Option(
|
|
272
|
+
"Say hello in one short sentence.", help="Test prompt to send."
|
|
273
|
+
),
|
|
274
|
+
model: str = typer.Option(
|
|
275
|
+
"openai/gpt-5.3-codex-spark",
|
|
276
|
+
help="Model in provider/model format.",
|
|
277
|
+
),
|
|
278
|
+
) -> None:
|
|
279
|
+
result = run_opencode_smoke(prompt=prompt, host=host, port=port, model=model)
|
|
280
|
+
print(f"base_url: {result['base_url']}")
|
|
281
|
+
print(f"session_id: {result['session_id']}")
|
|
282
|
+
print(f"reply_text: {result['reply_text'] or '(empty)'}")
|
|
283
|
+
print("raw_message:")
|
|
284
|
+
print(json.dumps(result["message"], ensure_ascii=False, indent=2))
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
@app.command(help="Print the installed ARH version.")
|
|
288
|
+
def version() -> None:
|
|
289
|
+
print(__version__)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def main() -> None:
|
|
293
|
+
app()
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
if __name__ == "__main__":
|
|
297
|
+
main()
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from collections import deque
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
from .opencode import (
|
|
14
|
+
create_session,
|
|
15
|
+
extract_text_reply,
|
|
16
|
+
send_session_message,
|
|
17
|
+
start_server,
|
|
18
|
+
stop_process,
|
|
19
|
+
wait_for_health,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
RUN_DIR = Path(".autoresearch")
|
|
24
|
+
STATE_FILE = "state.json"
|
|
25
|
+
JOURNAL_FILE = "journal.jsonl"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def now_iso() -> str:
|
|
29
|
+
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def run_path(cwd: Path) -> Path:
|
|
33
|
+
return cwd / RUN_DIR
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def state_path(cwd: Path) -> Path:
|
|
37
|
+
return run_path(cwd) / STATE_FILE
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def journal_path(cwd: Path) -> Path:
|
|
41
|
+
return run_path(cwd) / JOURNAL_FILE
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_run_dir(cwd: Path) -> None:
|
|
45
|
+
run_path(cwd).mkdir(parents=True, exist_ok=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def init_state(cwd: Path, spec_path: str = "experiment.md") -> Dict[str, Any]:
|
|
49
|
+
ensure_run_dir(cwd)
|
|
50
|
+
state_file = state_path(cwd)
|
|
51
|
+
if state_file.exists():
|
|
52
|
+
raise FileExistsError(
|
|
53
|
+
"already initialized: .autoresearch/state.json already exists"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
state = {
|
|
57
|
+
"status": "initialized",
|
|
58
|
+
"run_id": f"run-{int(time.time())}",
|
|
59
|
+
"tick": 0,
|
|
60
|
+
"created_at": now_iso(),
|
|
61
|
+
"updated_at": now_iso(),
|
|
62
|
+
"spec_path": spec_path,
|
|
63
|
+
"last_summary": "Initialized.",
|
|
64
|
+
}
|
|
65
|
+
state_file.write_text(json.dumps(state, indent=2), encoding="utf-8")
|
|
66
|
+
return state
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_state(cwd: Path) -> Dict[str, Any]:
|
|
70
|
+
state_file = state_path(cwd)
|
|
71
|
+
if not state_file.exists():
|
|
72
|
+
raise FileNotFoundError(
|
|
73
|
+
"not initialized: .autoresearch/state.json is missing. run `arh init` first"
|
|
74
|
+
)
|
|
75
|
+
return json.loads(state_file.read_text(encoding="utf-8"))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def save_state(cwd: Path, state: Dict[str, Any]) -> None:
|
|
79
|
+
state["updated_at"] = now_iso()
|
|
80
|
+
state_path(cwd).write_text(json.dumps(state, indent=2), encoding="utf-8")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def append_journal(cwd: Path, event: Dict[str, Any]) -> None:
|
|
84
|
+
ensure_run_dir(cwd)
|
|
85
|
+
payload = dict(event)
|
|
86
|
+
payload["_at"] = now_iso()
|
|
87
|
+
with journal_path(cwd).open("a", encoding="utf-8") as handle:
|
|
88
|
+
handle.write(json.dumps(payload, ensure_ascii=False) + os.linesep)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def read_spec(cwd: Path, state: Dict[str, Any]) -> str:
|
|
92
|
+
path = cwd / state.get("spec_path", "experiment.md")
|
|
93
|
+
if not path.exists():
|
|
94
|
+
return "(spec file not found)"
|
|
95
|
+
return path.read_text(encoding="utf-8", errors="ignore")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def read_recent_journal(cwd: Path, max_lines: int = 20) -> List[Dict[str, Any]]:
|
|
99
|
+
path = journal_path(cwd)
|
|
100
|
+
if not path.exists():
|
|
101
|
+
return []
|
|
102
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
103
|
+
lines = deque(handle, maxlen=max_lines)
|
|
104
|
+
result: List[Dict[str, Any]] = []
|
|
105
|
+
for line in lines:
|
|
106
|
+
line = line.strip()
|
|
107
|
+
if not line:
|
|
108
|
+
continue
|
|
109
|
+
try:
|
|
110
|
+
result.append(json.loads(line))
|
|
111
|
+
except json.JSONDecodeError:
|
|
112
|
+
continue
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def make_context(state: Dict[str, Any], cwd: Path) -> Dict[str, Any]:
|
|
117
|
+
return {
|
|
118
|
+
"run_id": state.get("run_id"),
|
|
119
|
+
"tick": state.get("tick", 0),
|
|
120
|
+
"status": state.get("status"),
|
|
121
|
+
"spec_path": state.get("spec_path", "experiment.md"),
|
|
122
|
+
"spec_excerpt": read_spec(cwd, state)[:600],
|
|
123
|
+
"recent_decisions": [
|
|
124
|
+
item
|
|
125
|
+
for item in read_recent_journal(cwd, max_lines=10)
|
|
126
|
+
if item.get("type") == "decision"
|
|
127
|
+
],
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def make_decision(context: Dict[str, Any]) -> Dict[str, Any]:
|
|
132
|
+
tick = int(context.get("tick", 0))
|
|
133
|
+
return {
|
|
134
|
+
"type": "decision",
|
|
135
|
+
"summary": f"tick {tick + 1} executed; ready for next research step",
|
|
136
|
+
"next_action": {
|
|
137
|
+
"type": "request_observation",
|
|
138
|
+
"detail": "run an experiment based on this context and report results via `arh note`",
|
|
139
|
+
},
|
|
140
|
+
"state_update": {"status": "running" if tick < 1000 else "stopped"},
|
|
141
|
+
"memory_update": "Persist the latest evidence and continue loop-based reasoning.",
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class TickResult:
|
|
147
|
+
state: Dict[str, Any]
|
|
148
|
+
context: Dict[str, Any]
|
|
149
|
+
decision: Dict[str, Any]
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def run_tick(cwd: Path = Path(".")) -> TickResult:
|
|
153
|
+
state = load_state(cwd)
|
|
154
|
+
if state.get("status") == "stopped":
|
|
155
|
+
return TickResult(
|
|
156
|
+
state=state,
|
|
157
|
+
context={},
|
|
158
|
+
decision={"type": "noop", "summary": "already stopped"},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
context = make_context(state, cwd)
|
|
162
|
+
decision = make_decision(context)
|
|
163
|
+
state["tick"] = int(state.get("tick", 0)) + 1
|
|
164
|
+
state["status"] = decision.get("state_update", {}).get(
|
|
165
|
+
"status", state.get("status")
|
|
166
|
+
)
|
|
167
|
+
state["last_summary"] = decision.get("summary", "")
|
|
168
|
+
save_state(cwd, state)
|
|
169
|
+
append_journal(cwd, context)
|
|
170
|
+
append_journal(cwd, decision)
|
|
171
|
+
return TickResult(state=state, context=context, decision=decision)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def run_loop(
|
|
175
|
+
cwd: Path, max_ticks: int = 1, sleep_secs: float = 0.0
|
|
176
|
+
) -> List[TickResult]:
|
|
177
|
+
results: List[TickResult] = []
|
|
178
|
+
for _ in range(max_ticks):
|
|
179
|
+
result = run_tick(cwd)
|
|
180
|
+
results.append(result)
|
|
181
|
+
if result.state.get("status") == "stopped":
|
|
182
|
+
break
|
|
183
|
+
if sleep_secs > 0:
|
|
184
|
+
time.sleep(sleep_secs)
|
|
185
|
+
return results
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def record_note(cwd: Path, text: str) -> None:
|
|
189
|
+
append_journal(cwd, {"type": "note", "text": text})
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def run_status(cwd: Path) -> str:
|
|
193
|
+
state = load_state(cwd)
|
|
194
|
+
history = read_recent_journal(cwd, max_lines=5)
|
|
195
|
+
lines = [
|
|
196
|
+
"Status summary",
|
|
197
|
+
f"run_id: {state.get('run_id')}",
|
|
198
|
+
f"status: {state.get('status')}",
|
|
199
|
+
f"tick: {state.get('tick')}",
|
|
200
|
+
f"spec: {state.get('spec_path')}",
|
|
201
|
+
f"last_summary: {state.get('last_summary')}",
|
|
202
|
+
]
|
|
203
|
+
if history:
|
|
204
|
+
lines.append(f"recent_events: {len(history)}")
|
|
205
|
+
for idx, item in enumerate(history[-3:], start=1):
|
|
206
|
+
lines.append(
|
|
207
|
+
f" {idx}) {item.get('type')}: {item.get('summary', item.get('text', ''))}"
|
|
208
|
+
)
|
|
209
|
+
return "\n".join(lines)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def run_opencode_smoke(
|
|
213
|
+
prompt: str = "Say hello in one short sentence.",
|
|
214
|
+
host: str = "127.0.0.1",
|
|
215
|
+
port: int = 4096,
|
|
216
|
+
model: str | None = None,
|
|
217
|
+
) -> Dict[str, Any]:
|
|
218
|
+
parsed = urlparse(f"http://{host}:{port}")
|
|
219
|
+
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
|
220
|
+
process = start_server(host=host, port=port)
|
|
221
|
+
try:
|
|
222
|
+
health = wait_for_health(base_url)
|
|
223
|
+
session = create_session(base_url, title="arh smoke test")
|
|
224
|
+
session_id = session.get("id") or session.get("session", {}).get("id")
|
|
225
|
+
if not isinstance(session_id, str) or not session_id:
|
|
226
|
+
raise RuntimeError(f"failed to extract session id from response: {session}")
|
|
227
|
+
message = send_session_message(base_url, session_id, prompt, model=model)
|
|
228
|
+
return {
|
|
229
|
+
"base_url": base_url,
|
|
230
|
+
"health": health,
|
|
231
|
+
"session": session,
|
|
232
|
+
"session_id": session_id,
|
|
233
|
+
"message": message,
|
|
234
|
+
"reply_text": extract_text_reply(message),
|
|
235
|
+
}
|
|
236
|
+
finally:
|
|
237
|
+
stop_process(process)
|