ralph-code 0.6.1__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ralph_code-0.6.1 → ralph_code-0.6.2}/PKG-INFO +13 -4
- {ralph_code-0.6.1 → ralph_code-0.6.2}/README.md +12 -3
- {ralph_code-0.6.1 → ralph_code-0.6.2}/pyproject.toml +1 -1
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/__init__.py +1 -1
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/app.py +1 -1
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/config.py +25 -3
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/git_manager.py +5 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/harness.py +8 -2
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/harness_runner.py +186 -40
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/prd_manager.py +19 -3
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/workflow.py +54 -17
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph_code.egg-info/PKG-INFO +13 -4
- {ralph_code-0.6.1 → ralph_code-0.6.2}/setup.py +1 -1
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_config.py +14 -2
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_harness.py +25 -6
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_harness_runner.py +105 -9
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_prd_manager.py +26 -2
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_workflow.py +117 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/LICENSE +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/MANIFEST.in +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/__main__.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/claude_runner.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/colors.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/schemas/ralph_tasks_schema.json +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/schemas/task_schema.json +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/spinner.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/storage.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/tasks.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph/user_stories.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph_code.egg-info/SOURCES.txt +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph_code.egg-info/dependency_links.txt +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph_code.egg-info/entry_points.txt +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph_code.egg-info/requires.txt +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/ralph_code.egg-info/top_level.txt +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/setup.cfg +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_app.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_app_integration.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_colors.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_git_manager.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_spinner.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_storage.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_tasks.py +0 -0
- {ralph_code-0.6.1 → ralph_code-0.6.2}/tests/test_user_stories.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ralph-code
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: Automated task implementation with Claude Code and Codex
|
|
5
5
|
Author: Ralph Coding
|
|
6
6
|
License: MIT
|
|
@@ -37,7 +37,7 @@ Dynamic: requires-python
|
|
|
37
37
|
|
|
38
38
|
# ralph-code
|
|
39
39
|
|
|
40
|
-
Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
|
|
40
|
+
Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Ralph now defaults to continuing past PRD-to-task conversion instead of pausing there, and non-interactive harness calls are bounded by timeouts and turn caps so stuck agent runs fail fast instead of hanging forever. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
|
|
41
41
|
|
|
42
42
|
Because LLMs are carrying out the work, we can specify a job of "Find all the python files in the project that directly or indirectly access sqlalchemy objects, and upgrade the code to work with sqlalchemy 2.* This will result in probably a single-task project, but that one task might add 50 other tasks (on per file) to the backlog, which are then processed sequentially."
|
|
43
43
|
|
|
@@ -59,6 +59,15 @@ pip install ralph-code
|
|
|
59
59
|
ralph [OPTIONS] [DIRECTORY]
|
|
60
60
|
```
|
|
61
61
|
|
|
62
|
+
## Recent changes
|
|
63
|
+
|
|
64
|
+
Version `0.6.2` includes:
|
|
65
|
+
- Bounded non-interactive harness execution with timeout and turn limits
|
|
66
|
+
- Structured `tasks.json` generation for more reliable PRD conversion
|
|
67
|
+
- Automatic continuation after task generation by default
|
|
68
|
+
- `PRDs/` as the standard task directory, with legacy `PRD/` compatibility
|
|
69
|
+
- Refreshed model catalogs and current defaults
|
|
70
|
+
|
|
62
71
|
### Options
|
|
63
72
|
|
|
64
73
|
- `--debug`: Enable debug logging, logs are saved into the .ralph subdirectory of the project
|
|
@@ -66,8 +75,8 @@ ralph [OPTIONS] [DIRECTORY]
|
|
|
66
75
|
|
|
67
76
|
## Usage
|
|
68
77
|
|
|
69
|
-
First create a task
|
|
70
|
-
Then you run
|
|
78
|
+
First create a task in `PRDs/`, give a short name for the task (used for the branch commits will be added to), and then give a description.
|
|
79
|
+
Then you run `ralph`, it will produce a `.md` file of the specifications, which will be broken into small tasks put into a `tasks.json` file. Each task will be worked on independently.
|
|
71
80
|
|
|
72
81
|
## Requirements
|
|
73
82
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ralph-code
|
|
2
2
|
|
|
3
|
-
Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
|
|
3
|
+
Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Ralph now defaults to continuing past PRD-to-task conversion instead of pausing there, and non-interactive harness calls are bounded by timeouts and turn caps so stuck agent runs fail fast instead of hanging forever. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
|
|
4
4
|
|
|
5
5
|
Because LLMs are carrying out the work, we can specify a job of "Find all the python files in the project that directly or indirectly access sqlalchemy objects, and upgrade the code to work with sqlalchemy 2.* This will result in probably a single-task project, but that one task might add 50 other tasks (on per file) to the backlog, which are then processed sequentially."
|
|
6
6
|
|
|
@@ -22,6 +22,15 @@ pip install ralph-code
|
|
|
22
22
|
ralph [OPTIONS] [DIRECTORY]
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
+
## Recent changes
|
|
26
|
+
|
|
27
|
+
Version `0.6.2` includes:
|
|
28
|
+
- Bounded non-interactive harness execution with timeout and turn limits
|
|
29
|
+
- Structured `tasks.json` generation for more reliable PRD conversion
|
|
30
|
+
- Automatic continuation after task generation by default
|
|
31
|
+
- `PRDs/` as the standard task directory, with legacy `PRD/` compatibility
|
|
32
|
+
- Refreshed model catalogs and current defaults
|
|
33
|
+
|
|
25
34
|
### Options
|
|
26
35
|
|
|
27
36
|
- `--debug`: Enable debug logging, logs are saved into the .ralph subdirectory of the project
|
|
@@ -29,8 +38,8 @@ ralph [OPTIONS] [DIRECTORY]
|
|
|
29
38
|
|
|
30
39
|
## Usage
|
|
31
40
|
|
|
32
|
-
First create a task
|
|
33
|
-
Then you run
|
|
41
|
+
First create a task in `PRDs/`, give a short name for the task (used for the branch commits will be added to), and then give a description.
|
|
42
|
+
Then you run `ralph`, it will produce a `.md` file of the specifications, which will be broken into small tasks put into a `tasks.json` file. Each task will be worked on independently.
|
|
34
43
|
|
|
35
44
|
## Requirements
|
|
36
45
|
|
|
@@ -559,7 +559,7 @@ class RalphApp:
|
|
|
559
559
|
choices: list[Choice] = []
|
|
560
560
|
|
|
561
561
|
for model_name, label in supported_models:
|
|
562
|
-
choices.append(Choice(title=model_name, value=model_name))
|
|
562
|
+
choices.append(Choice(title=f"{model_name} ({label})", value=model_name))
|
|
563
563
|
|
|
564
564
|
# No models available - show alert and return
|
|
565
565
|
if not choices:
|
|
@@ -14,11 +14,13 @@ DEFAULT_CONFIG = {
|
|
|
14
14
|
"harness": "claude",
|
|
15
15
|
"worker_model": "opus",
|
|
16
16
|
"summary_model": "haiku",
|
|
17
|
+
"harness_timeout_seconds": 1800,
|
|
18
|
+
"non_interactive_max_turns": 12,
|
|
17
19
|
"max_iterations": 10,
|
|
18
20
|
"max_story_attempts": 3,
|
|
19
21
|
"auto_spec_without_oversight": True,
|
|
20
22
|
"pause_after_spec": False,
|
|
21
|
-
"pause_after_tasks":
|
|
23
|
+
"pause_after_tasks": False,
|
|
22
24
|
"wait_on_rate_limit": True,
|
|
23
25
|
"pause_on_completion": True,
|
|
24
26
|
"always_build_tests": False,
|
|
@@ -63,7 +65,7 @@ class Config:
|
|
|
63
65
|
if "worker_model" not in self._config:
|
|
64
66
|
harness = self._config.get("harness", DEFAULT_CONFIG["harness"])
|
|
65
67
|
if harness == "codex":
|
|
66
|
-
self._config["worker_model"] = "gpt-5.
|
|
68
|
+
self._config["worker_model"] = "gpt-5.3-codex"
|
|
67
69
|
else:
|
|
68
70
|
self._config["worker_model"] = "opus"
|
|
69
71
|
needs_save = True
|
|
@@ -72,7 +74,7 @@ class Config:
|
|
|
72
74
|
if "summary_model" not in self._config:
|
|
73
75
|
harness = self._config.get("harness", DEFAULT_CONFIG["harness"])
|
|
74
76
|
if harness == "codex":
|
|
75
|
-
self._config["summary_model"] = "gpt-5.
|
|
77
|
+
self._config["summary_model"] = "gpt-5.1-codex-mini"
|
|
76
78
|
else:
|
|
77
79
|
self._config["summary_model"] = "haiku"
|
|
78
80
|
needs_save = True
|
|
@@ -124,6 +126,26 @@ class Config:
|
|
|
124
126
|
self._config["summary_model"] = value
|
|
125
127
|
self._save()
|
|
126
128
|
|
|
129
|
+
@property
|
|
130
|
+
def harness_timeout_seconds(self) -> int:
|
|
131
|
+
"""Maximum seconds to wait for a harness subprocess before aborting."""
|
|
132
|
+
return int(self._config.get("harness_timeout_seconds", 1800))
|
|
133
|
+
|
|
134
|
+
@harness_timeout_seconds.setter
|
|
135
|
+
def harness_timeout_seconds(self, value: int) -> None:
|
|
136
|
+
self._config["harness_timeout_seconds"] = max(1, value)
|
|
137
|
+
self._save()
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def non_interactive_max_turns(self) -> int:
|
|
141
|
+
"""Maximum agent turns for non-interactive harness runs."""
|
|
142
|
+
return int(self._config.get("non_interactive_max_turns", 12))
|
|
143
|
+
|
|
144
|
+
@non_interactive_max_turns.setter
|
|
145
|
+
def non_interactive_max_turns(self, value: int) -> None:
|
|
146
|
+
self._config["non_interactive_max_turns"] = max(1, value)
|
|
147
|
+
self._save()
|
|
148
|
+
|
|
127
149
|
@property
|
|
128
150
|
def max_iterations(self) -> int:
|
|
129
151
|
"""Maximum iterations for implementation loop."""
|
|
@@ -230,6 +230,11 @@ class GitManager:
|
|
|
230
230
|
|
|
231
231
|
return self.commit(message)
|
|
232
232
|
|
|
233
|
+
def get_staged_files(self) -> list[str]:
|
|
234
|
+
"""Get list of files currently staged for commit."""
|
|
235
|
+
result = self._run_git("diff", "--cached", "--name-only")
|
|
236
|
+
return [f for f in result.stdout.strip().split("\n") if f]
|
|
237
|
+
|
|
233
238
|
def get_unstaged_files(self) -> list[str]:
|
|
234
239
|
"""Get list of files with unstaged changes (modified + untracked).
|
|
235
240
|
|
|
@@ -46,13 +46,19 @@ HarnessType = Literal["claude", "codex", "custom"]
|
|
|
46
46
|
# These defaults are used when CLI model querying fails or isn't supported.
|
|
47
47
|
DEFAULT_MODELS: dict[HarnessType, list[tuple[str, str]]] = {
|
|
48
48
|
"claude": [
|
|
49
|
+
("default", "Standard"),
|
|
49
50
|
("haiku", "Light"),
|
|
50
51
|
("sonnet", "Standard"),
|
|
51
52
|
("opus", "Standard"),
|
|
53
|
+
("sonnet[1m]", "Extended"),
|
|
54
|
+
("opusplan", "Planning"),
|
|
52
55
|
],
|
|
53
56
|
"codex": [
|
|
54
57
|
("gpt-5.1-codex-mini", "Light"),
|
|
58
|
+
("gpt-5.3-codex-spark", "Preview"),
|
|
59
|
+
("gpt-5.3-codex", "Standard"),
|
|
55
60
|
("gpt-5.2-codex", "Standard"),
|
|
61
|
+
("gpt-5.1-codex", "Standard"),
|
|
56
62
|
("gpt-5.1-codex-max", "Standard"),
|
|
57
63
|
("gpt-5.2", "Standard"),
|
|
58
64
|
],
|
|
@@ -61,13 +67,13 @@ DEFAULT_MODELS: dict[HarnessType, list[tuple[str, str]]] = {
|
|
|
61
67
|
|
|
62
68
|
DEFAULT_WORKER_MODEL: dict[HarnessType, str] = {
|
|
63
69
|
"claude": "opus",
|
|
64
|
-
"codex": "gpt-5.
|
|
70
|
+
"codex": "gpt-5.3-codex",
|
|
65
71
|
"custom": "",
|
|
66
72
|
}
|
|
67
73
|
|
|
68
74
|
DEFAULT_SUMMARY_MODEL: dict[HarnessType, str] = {
|
|
69
75
|
"claude": "haiku",
|
|
70
|
-
"codex": "gpt-5.
|
|
76
|
+
"codex": "gpt-5.1-codex-mini",
|
|
71
77
|
"custom": "",
|
|
72
78
|
}
|
|
73
79
|
|
|
@@ -38,7 +38,7 @@ import time
|
|
|
38
38
|
from dataclasses import dataclass
|
|
39
39
|
from datetime import datetime
|
|
40
40
|
from pathlib import Path
|
|
41
|
-
from typing import Callable
|
|
41
|
+
from typing import Any, Callable
|
|
42
42
|
|
|
43
43
|
from .config import get_config
|
|
44
44
|
from .harness import Harness, HarnessType
|
|
@@ -80,7 +80,7 @@ HARNESS_MODEL_MAPPING: dict[HarnessType, dict[str, str]] = {
|
|
|
80
80
|
"codex": {
|
|
81
81
|
# Codex maps to OpenAI/Codex model names
|
|
82
82
|
"haiku": "gpt-5.1-codex-mini",
|
|
83
|
-
"sonnet": "gpt-5.
|
|
83
|
+
"sonnet": "gpt-5.3-codex",
|
|
84
84
|
"opus": "gpt-5.1-codex-max",
|
|
85
85
|
},
|
|
86
86
|
"custom": {
|
|
@@ -201,19 +201,22 @@ class HarnessRunner:
|
|
|
201
201
|
model: str | None = None,
|
|
202
202
|
print_output: bool = True,
|
|
203
203
|
allow_writes: bool = False,
|
|
204
|
+
output_format: str | None = None,
|
|
205
|
+
json_schema: dict[str, Any] | None = None,
|
|
206
|
+
max_turns: int | None = None,
|
|
204
207
|
) -> list[str]:
|
|
205
208
|
"""Build the harness CLI command with harness-specific flags.
|
|
206
209
|
|
|
207
210
|
This method handles the differences in CLI interfaces between harness types:
|
|
208
211
|
|
|
209
212
|
Claude CLI:
|
|
210
|
-
claude --model <model> --print [--dangerously-skip-permissions]
|
|
213
|
+
claude --model <model> --print [--dangerously-skip-permissions] "<prompt>"
|
|
211
214
|
|
|
212
215
|
Codex CLI (non-interactive):
|
|
213
216
|
codex exec --model <model> --sandbox <mode> "<prompt>"
|
|
214
217
|
|
|
215
218
|
Custom (defaults to Claude-like):
|
|
216
|
-
custom --model <model> --print [--dangerously-skip-permissions]
|
|
219
|
+
custom --model <model> --print [--dangerously-skip-permissions] "<prompt>"
|
|
217
220
|
|
|
218
221
|
Args:
|
|
219
222
|
prompt: The prompt text to send to the harness.
|
|
@@ -228,6 +231,7 @@ class HarnessRunner:
|
|
|
228
231
|
harness = self._get_harness()
|
|
229
232
|
model = model or self._config.worker_model
|
|
230
233
|
mapped_model = self._map_model(model)
|
|
234
|
+
effective_max_turns = max_turns if max_turns is not None else self._config.non_interactive_max_turns
|
|
231
235
|
|
|
232
236
|
cmd = [harness.path]
|
|
233
237
|
|
|
@@ -239,6 +243,14 @@ class HarnessRunner:
|
|
|
239
243
|
if print_output:
|
|
240
244
|
cmd.append("--print")
|
|
241
245
|
|
|
246
|
+
cmd.extend(["--max-turns", str(effective_max_turns)])
|
|
247
|
+
|
|
248
|
+
if output_format is not None:
|
|
249
|
+
cmd.extend(["--output-format", output_format])
|
|
250
|
+
|
|
251
|
+
if json_schema is not None:
|
|
252
|
+
cmd.extend(["--json-schema", json.dumps(json_schema)])
|
|
253
|
+
|
|
242
254
|
if allow_writes:
|
|
243
255
|
cmd.append("--dangerously-skip-permissions")
|
|
244
256
|
|
|
@@ -283,6 +295,9 @@ class HarnessRunner:
|
|
|
283
295
|
max_retries: int = 3,
|
|
284
296
|
retry_delay: float = 60.0,
|
|
285
297
|
allow_writes: bool = False,
|
|
298
|
+
output_format: str | None = None,
|
|
299
|
+
json_schema: dict[str, Any] | None = None,
|
|
300
|
+
max_turns: int | None = None,
|
|
286
301
|
) -> HarnessResponse:
|
|
287
302
|
"""
|
|
288
303
|
Run a prompt through the harness CLI.
|
|
@@ -297,7 +312,14 @@ class HarnessRunner:
|
|
|
297
312
|
Returns:
|
|
298
313
|
HarnessResponse with the result
|
|
299
314
|
"""
|
|
300
|
-
cmd = self._build_command(
|
|
315
|
+
cmd = self._build_command(
|
|
316
|
+
prompt,
|
|
317
|
+
model,
|
|
318
|
+
allow_writes=allow_writes,
|
|
319
|
+
output_format=output_format,
|
|
320
|
+
json_schema=json_schema,
|
|
321
|
+
max_turns=max_turns if max_turns is not None else self._config.non_interactive_max_turns,
|
|
322
|
+
)
|
|
301
323
|
|
|
302
324
|
self._log(f"Command: {' '.join(cmd)}")
|
|
303
325
|
self._log(f"Prompt:\n{prompt}\n")
|
|
@@ -313,12 +335,16 @@ class HarnessRunner:
|
|
|
313
335
|
cwd=self.project_dir,
|
|
314
336
|
capture_output=True,
|
|
315
337
|
text=True,
|
|
338
|
+
timeout=self._config.harness_timeout_seconds,
|
|
316
339
|
# Don't catch KeyboardInterrupt - let it propagate
|
|
317
340
|
)
|
|
318
341
|
|
|
319
342
|
output = result.stdout
|
|
320
343
|
error = result.stderr
|
|
321
344
|
|
|
345
|
+
if output_format == "json" and self._get_harness().type == "claude":
|
|
346
|
+
output, error = self._extract_claude_json_result(output, error, result.returncode)
|
|
347
|
+
|
|
322
348
|
self._log(f"Output:\n{output}\n")
|
|
323
349
|
if error:
|
|
324
350
|
self._log(f"Error:\n{error}\n")
|
|
@@ -356,6 +382,15 @@ class HarnessRunner:
|
|
|
356
382
|
output="",
|
|
357
383
|
error=error_msg,
|
|
358
384
|
)
|
|
385
|
+
except subprocess.TimeoutExpired:
|
|
386
|
+
timeout = self._config.harness_timeout_seconds
|
|
387
|
+
error_msg = f"Harness timed out after {timeout}s"
|
|
388
|
+
self._log(f"Error: {error_msg}")
|
|
389
|
+
return HarnessResponse(
|
|
390
|
+
success=False,
|
|
391
|
+
output="",
|
|
392
|
+
error=error_msg,
|
|
393
|
+
)
|
|
359
394
|
except Exception as e:
|
|
360
395
|
error_msg = str(e)
|
|
361
396
|
self._log(f"Exception: {error_msg}")
|
|
@@ -372,6 +407,77 @@ class HarnessRunner:
|
|
|
372
407
|
rate_limited=True,
|
|
373
408
|
)
|
|
374
409
|
|
|
410
|
+
def _extract_claude_json_result(
|
|
411
|
+
self, output: str, error: str, returncode: int
|
|
412
|
+
) -> tuple[str, str]:
|
|
413
|
+
"""Extract the useful result from Claude's JSON envelope."""
|
|
414
|
+
stripped = output.strip()
|
|
415
|
+
if not stripped:
|
|
416
|
+
return output, error
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
payload = json.loads(stripped)
|
|
420
|
+
except json.JSONDecodeError:
|
|
421
|
+
return output, error
|
|
422
|
+
|
|
423
|
+
if isinstance(payload, dict):
|
|
424
|
+
result = payload.get("result", payload)
|
|
425
|
+
if isinstance(result, str):
|
|
426
|
+
normalized_output = result
|
|
427
|
+
else:
|
|
428
|
+
normalized_output = json.dumps(result, ensure_ascii=False)
|
|
429
|
+
|
|
430
|
+
if returncode != 0 and not error:
|
|
431
|
+
error_value = payload.get("error") or payload.get("message") or normalized_output
|
|
432
|
+
error = str(error_value)
|
|
433
|
+
|
|
434
|
+
return normalized_output, error
|
|
435
|
+
|
|
436
|
+
return output, error
|
|
437
|
+
|
|
438
|
+
def run_structured(
|
|
439
|
+
self,
|
|
440
|
+
prompt: str,
|
|
441
|
+
json_schema: dict[str, Any],
|
|
442
|
+
model: str | None = None,
|
|
443
|
+
allow_writes: bool = False,
|
|
444
|
+
) -> tuple[HarnessResponse, dict[str, Any] | None]:
|
|
445
|
+
"""Run a prompt expecting a schema-validated JSON object."""
|
|
446
|
+
harness = self._get_harness()
|
|
447
|
+
response = self.run(
|
|
448
|
+
prompt,
|
|
449
|
+
model=model,
|
|
450
|
+
allow_writes=allow_writes,
|
|
451
|
+
output_format="json" if harness.type == "claude" else None,
|
|
452
|
+
json_schema=json_schema if harness.type == "claude" else None,
|
|
453
|
+
)
|
|
454
|
+
if not response.success:
|
|
455
|
+
return response, None
|
|
456
|
+
|
|
457
|
+
try:
|
|
458
|
+
parsed = json.loads(response.output)
|
|
459
|
+
except json.JSONDecodeError as exc:
|
|
460
|
+
return (
|
|
461
|
+
HarnessResponse(
|
|
462
|
+
success=False,
|
|
463
|
+
output=response.output,
|
|
464
|
+
error=f"Structured output was not valid JSON: {exc}",
|
|
465
|
+
),
|
|
466
|
+
None,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
if not isinstance(parsed, dict):
|
|
470
|
+
return (
|
|
471
|
+
HarnessResponse(
|
|
472
|
+
success=False,
|
|
473
|
+
output=response.output,
|
|
474
|
+
error="Structured output was not a JSON object",
|
|
475
|
+
),
|
|
476
|
+
None,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
return response, parsed
|
|
480
|
+
|
|
375
481
|
def create_prd(self, task_description: str, learnings: str = "") -> HarnessResponse:
|
|
376
482
|
"""
|
|
377
483
|
Create a PRD (Product Requirements Document) for a task using the prd skill format.
|
|
@@ -673,25 +779,66 @@ IMPORTANT RULES:
|
|
|
673
779
|
8. All stories start with passes: false
|
|
674
780
|
9. branchName MUST start with "{branch_prefix}/" (use lowercase letters, numbers, and hyphens only)
|
|
675
781
|
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
782
|
+
Return a JSON object only. Do not wrap it in markdown fences."""
|
|
783
|
+
|
|
784
|
+
schema: dict[str, Any] = {
|
|
785
|
+
"type": "object",
|
|
786
|
+
"required": ["project", "branchName", "description", "userStories"],
|
|
787
|
+
"properties": {
|
|
788
|
+
"project": {"type": "string", "const": project_name},
|
|
789
|
+
"branchName": {
|
|
790
|
+
"type": "string",
|
|
791
|
+
"pattern": rf"^{re.escape(branch_prefix)}/[a-z0-9-]+$",
|
|
792
|
+
},
|
|
793
|
+
"description": {"type": "string", "minLength": 1},
|
|
794
|
+
"userStories": {
|
|
795
|
+
"type": "array",
|
|
796
|
+
"minItems": 1,
|
|
797
|
+
"items": {
|
|
798
|
+
"type": "object",
|
|
799
|
+
"required": [
|
|
800
|
+
"id",
|
|
801
|
+
"title",
|
|
802
|
+
"description",
|
|
803
|
+
"acceptanceCriteria",
|
|
804
|
+
"priority",
|
|
805
|
+
"passes",
|
|
806
|
+
],
|
|
807
|
+
"properties": {
|
|
808
|
+
"id": {"type": "string", "pattern": r"^US-[0-9]{3}$"},
|
|
809
|
+
"title": {"type": "string", "minLength": 1},
|
|
810
|
+
"description": {"type": "string", "minLength": 1},
|
|
811
|
+
"acceptanceCriteria": {
|
|
812
|
+
"type": "array",
|
|
813
|
+
"minItems": 1,
|
|
814
|
+
"items": {"type": "string", "minLength": 1},
|
|
815
|
+
},
|
|
816
|
+
"priority": {"type": "integer", "minimum": 1},
|
|
817
|
+
"passes": {"type": "boolean", "const": False},
|
|
818
|
+
"notes": {"type": "string"},
|
|
819
|
+
},
|
|
820
|
+
"additionalProperties": False,
|
|
821
|
+
},
|
|
822
|
+
},
|
|
823
|
+
},
|
|
824
|
+
"additionalProperties": False,
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
response, parsed = self.run_structured(
|
|
828
|
+
prompt,
|
|
829
|
+
json_schema=schema,
|
|
830
|
+
model=self._config.summary_model,
|
|
831
|
+
)
|
|
832
|
+
if not response.success or parsed is None:
|
|
833
|
+
return response
|
|
693
834
|
|
|
694
|
-
return
|
|
835
|
+
return HarnessResponse(
|
|
836
|
+
success=True,
|
|
837
|
+
output=json.dumps(parsed, indent=2, ensure_ascii=False),
|
|
838
|
+
error=response.error,
|
|
839
|
+
rate_limited=response.rate_limited,
|
|
840
|
+
cost=response.cost,
|
|
841
|
+
)
|
|
695
842
|
|
|
696
843
|
def implement_story(self, story_prompt: str, context: str = "") -> HarnessResponse:
|
|
697
844
|
"""
|
|
@@ -739,31 +886,30 @@ After implementation, verify each acceptance criterion is met."""
|
|
|
739
886
|
GIT DIFF OF CHANGES:
|
|
740
887
|
{git_diff if git_diff else "(No changes detected)"}
|
|
741
888
|
|
|
742
|
-
Check each acceptance criterion and determine if it has been met.
|
|
889
|
+
Check each acceptance criterion and determine if it has been met.
|
|
743
890
|
|
|
744
|
-
IMPORTANT: Distinguish between these scenarios:
|
|
745
|
-
1. PASSES - All acceptance criteria are verifiably met
|
|
746
|
-
2. FAILS - One or more criteria are NOT met (implementation is wrong/incomplete)
|
|
747
|
-
3. BLOCKED - Some criteria CANNOT BE VERIFIED due to external factors (permission issues,
|
|
748
|
-
|
|
891
|
+
IMPORTANT: Distinguish between these scenarios:
|
|
892
|
+
1. PASSES - All acceptance criteria are verifiably met
|
|
893
|
+
2. FAILS - One or more criteria are NOT met (implementation is wrong/incomplete)
|
|
894
|
+
3. BLOCKED - Some criteria CANNOT BE VERIFIED due to external factors (permission issues,
|
|
895
|
+
pre-existing errors unrelated to this change, environment constraints, etc.)
|
|
749
896
|
|
|
750
|
-
HANDLING
|
|
751
|
-
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
-
|
|
755
|
-
-
|
|
897
|
+
HANDLING AMBIGUOUS OR NONSENSICAL CRITERIA:
|
|
898
|
+
- Do NOT auto-pass criteria as "N/A" or "irrelevant".
|
|
899
|
+
- If a criterion is based on a bad assumption, contradictory, or impossible to satisfy as written,
|
|
900
|
+
mark it as FAILED and explain precisely why.
|
|
901
|
+
- In "To Complete", include concrete guidance to correct that criterion/story definition.
|
|
902
|
+
- Use BLOCKED only when verification is impossible due to external constraints, not because criteria are poor.
|
|
756
903
|
|
|
757
904
|
Respond in this exact format:
|
|
758
905
|
|
|
759
906
|
STATUS: PASSES or FAILS or BLOCKED
|
|
760
907
|
NOTES:
|
|
761
|
-
**What Passed:**
|
|
762
|
-
- ✅ List each criterion that was clearly met
|
|
763
|
-
- ✅ List criteria that are N/A with explanation (e.g., "N/A - file doesn't use this library")
|
|
908
|
+
**What Passed:**
|
|
909
|
+
- ✅ List each criterion that was clearly met
|
|
764
910
|
|
|
765
|
-
**What Failed:**
|
|
766
|
-
- ❌ List each criterion that was NOT met
|
|
911
|
+
**What Failed:**
|
|
912
|
+
- ❌ List each criterion that was NOT met, including malformed/incorrect criteria
|
|
767
913
|
|
|
768
914
|
**What Could Not Be Verified:**
|
|
769
915
|
- ⚠️ List criteria that cannot be verified with reasons (e.g., "Tests execution - requires permission")
|
|
@@ -38,6 +38,11 @@ def slugify(text: str) -> str:
|
|
|
38
38
|
return slug[:50] # Limit length
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
def stable_prd_id(file_path: Path) -> str:
|
|
42
|
+
"""Generate a deterministic PRD ID from file path."""
|
|
43
|
+
return str(uuid.uuid5(uuid.NAMESPACE_URL, str(file_path.resolve())))
|
|
44
|
+
|
|
45
|
+
|
|
41
46
|
@dataclass
|
|
42
47
|
class PRD:
|
|
43
48
|
"""Represents a Product Requirements Document."""
|
|
@@ -58,7 +63,7 @@ class PRD:
|
|
|
58
63
|
content = file_path.read_text().strip()
|
|
59
64
|
name = file_path.stem # filename without extension
|
|
60
65
|
return cls(
|
|
61
|
-
id=
|
|
66
|
+
id=stable_prd_id(file_path),
|
|
62
67
|
name=name,
|
|
63
68
|
file_path=file_path,
|
|
64
69
|
is_specced=False,
|
|
@@ -119,7 +124,7 @@ class PRD:
|
|
|
119
124
|
status = "errored"
|
|
120
125
|
|
|
121
126
|
return cls(
|
|
122
|
-
id=
|
|
127
|
+
id=stable_prd_id(file_path),
|
|
123
128
|
name=name,
|
|
124
129
|
file_path=file_path,
|
|
125
130
|
is_specced=True,
|
|
@@ -155,11 +160,22 @@ class PRDManager:
|
|
|
155
160
|
|
|
156
161
|
def __init__(self, project_dir: Path):
|
|
157
162
|
self.project_dir = project_dir
|
|
158
|
-
self.prd_dir =
|
|
163
|
+
self.prd_dir = self._resolve_prd_dir()
|
|
159
164
|
self._ensure_prd_dir()
|
|
160
165
|
self._prds: list[PRD] = []
|
|
161
166
|
self._load()
|
|
162
167
|
|
|
168
|
+
def _resolve_prd_dir(self) -> Path:
|
|
169
|
+
"""Resolve the active PRD directory, supporting the legacy singular path."""
|
|
170
|
+
plural_dir = self.project_dir / "PRDs"
|
|
171
|
+
legacy_dir = self.project_dir / "PRD"
|
|
172
|
+
|
|
173
|
+
if plural_dir.exists():
|
|
174
|
+
return plural_dir
|
|
175
|
+
if legacy_dir.exists():
|
|
176
|
+
return legacy_dir
|
|
177
|
+
return plural_dir
|
|
178
|
+
|
|
163
179
|
def _ensure_prd_dir(self) -> None:
|
|
164
180
|
"""Ensure PRD directory exists."""
|
|
165
181
|
self.prd_dir.mkdir(parents=True, exist_ok=True)
|