ralph-code 0.6.1__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {ralph_code-0.6.1/ralph_code.egg-info → ralph_code-0.6.3}/PKG-INFO +19 -4
  2. {ralph_code-0.6.1 → ralph_code-0.6.3}/README.md +18 -3
  3. {ralph_code-0.6.1 → ralph_code-0.6.3}/pyproject.toml +1 -1
  4. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/__init__.py +1 -1
  5. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/app.py +1 -1
  6. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/config.py +25 -3
  7. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/git_manager.py +14 -4
  8. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/harness.py +8 -2
  9. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/harness_runner.py +186 -40
  10. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/prd_manager.py +19 -3
  11. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/workflow.py +54 -17
  12. {ralph_code-0.6.1 → ralph_code-0.6.3/ralph_code.egg-info}/PKG-INFO +19 -4
  13. {ralph_code-0.6.1 → ralph_code-0.6.3}/setup.py +1 -1
  14. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_config.py +14 -2
  15. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_git_manager.py +55 -0
  16. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_harness.py +25 -6
  17. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_harness_runner.py +105 -9
  18. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_prd_manager.py +26 -2
  19. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_workflow.py +117 -0
  20. {ralph_code-0.6.1 → ralph_code-0.6.3}/LICENSE +0 -0
  21. {ralph_code-0.6.1 → ralph_code-0.6.3}/MANIFEST.in +0 -0
  22. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/__main__.py +0 -0
  23. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/claude_runner.py +0 -0
  24. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/colors.py +0 -0
  25. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/schemas/ralph_tasks_schema.json +0 -0
  26. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/schemas/task_schema.json +0 -0
  27. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/spinner.py +0 -0
  28. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/storage.py +0 -0
  29. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/tasks.py +0 -0
  30. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph/user_stories.py +0 -0
  31. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph_code.egg-info/SOURCES.txt +0 -0
  32. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph_code.egg-info/dependency_links.txt +0 -0
  33. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph_code.egg-info/entry_points.txt +0 -0
  34. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph_code.egg-info/requires.txt +0 -0
  35. {ralph_code-0.6.1 → ralph_code-0.6.3}/ralph_code.egg-info/top_level.txt +0 -0
  36. {ralph_code-0.6.1 → ralph_code-0.6.3}/setup.cfg +0 -0
  37. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_app.py +0 -0
  38. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_app_integration.py +0 -0
  39. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_colors.py +0 -0
  40. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_spinner.py +0 -0
  41. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_storage.py +0 -0
  42. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_tasks.py +0 -0
  43. {ralph_code-0.6.1 → ralph_code-0.6.3}/tests/test_user_stories.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ralph-code
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Summary: Automated task implementation with Claude Code and Codex
5
5
  Author: Ralph Coding
6
6
  License: MIT
@@ -37,7 +37,7 @@ Dynamic: requires-python
37
37
 
38
38
  # ralph-code
39
39
 
40
- Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
40
+ Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Ralph now defaults to continuing past PRD-to-task conversion instead of pausing there, and non-interactive harness calls are bounded by timeouts and turn caps so stuck agent runs fail fast instead of hanging forever. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
41
41
 
42
42
  Because LLMs are carrying out the work, we can specify a job of "Find all the python files in the project that directly or indirectly access sqlalchemy objects, and upgrade the code to work with sqlalchemy 2.* This will result in probably a single-task project, but that one task might add 50 other tasks (on per file) to the backlog, which are then processed sequentially."
43
43
 
@@ -59,6 +59,21 @@ pip install ralph-code
59
59
  ralph [OPTIONS] [DIRECTORY]
60
60
  ```
61
61
 
62
+ ## Recent changes
63
+
64
+ Version `0.6.3` includes:
65
+ - Safer git integration for staged-file commit checks
66
+ - Correct staged diff generation
67
+ - Deduplicated unstaged file reporting
68
+ - Clearer errors when `git` is not available
69
+
70
+ Version `0.6.2` includes:
71
+ - Bounded non-interactive harness execution with timeout and turn limits
72
+ - Structured `tasks.json` generation for more reliable PRD conversion
73
+ - Automatic continuation after task generation by default
74
+ - `PRDs/` as the standard task directory, with legacy `PRD/` compatibility
75
+ - Refreshed model catalogs and current defaults
76
+
62
77
  ### Options
63
78
 
64
79
  - `--debug`: Enable debug logging, logs are saved into the .ralph subdirectory of the project
@@ -66,8 +81,8 @@ ralph [OPTIONS] [DIRECTORY]
66
81
 
67
82
  ## Usage
68
83
 
69
- First create a task, give a short name for the task (used for the branch commits will be added to), and then give a description.
70
- Then you run the ralph-coder, it will produce a .md file of the specifications, which will be broken into small tasks put into a tasks.json file. Each task will be worked on independently.
84
+ First create a task in `PRDs/`, give a short name for the task (used for the branch commits will be added to), and then give a description.
85
+ Then you run `ralph`, it will produce a `.md` file of the specifications, which will be broken into small tasks put into a `tasks.json` file. Each task will be worked on independently.
71
86
 
72
87
  ## Requirements
73
88
 
@@ -1,6 +1,6 @@
1
1
  # ralph-code
2
2
 
3
- Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
3
+ Automated task implementation with Claude Code and Codex for "Ralph Coding". What is [Ralph Coding](https://ghuntley.com/ralph/)? It's a method of coding where context rot is avoided by controlling the retention of information. This method involves re-invoking claude or codex for each task, and passing information about the requirements, acceptance testing, and any progress that's made (or roadblocks/challenges faced) through files, rather than retaining all prompts + thinking + response tokens. It tends to result in more requests, some duplicated token work, but fairly consistent performance, and best of all it can largely be done unattended. Ralph now defaults to continuing past PRD-to-task conversion instead of pausing there, and non-interactive harness calls are bounded by timeouts and turn caps so stuck agent runs fail fast instead of hanging forever. Recommend Claude Max account or codex equivalent, but be aware that GPT-5 - GPT5.2's slow reasoning and response makes this ponderous, it's fine overnight.
4
4
 
5
5
  Because LLMs are carrying out the work, we can specify a job of "Find all the python files in the project that directly or indirectly access sqlalchemy objects, and upgrade the code to work with sqlalchemy 2.* This will result in probably a single-task project, but that one task might add 50 other tasks (on per file) to the backlog, which are then processed sequentially."
6
6
 
@@ -22,6 +22,21 @@ pip install ralph-code
22
22
  ralph [OPTIONS] [DIRECTORY]
23
23
  ```
24
24
 
25
+ ## Recent changes
26
+
27
+ Version `0.6.3` includes:
28
+ - Safer git integration for staged-file commit checks
29
+ - Correct staged diff generation
30
+ - Deduplicated unstaged file reporting
31
+ - Clearer errors when `git` is not available
32
+
33
+ Version `0.6.2` includes:
34
+ - Bounded non-interactive harness execution with timeout and turn limits
35
+ - Structured `tasks.json` generation for more reliable PRD conversion
36
+ - Automatic continuation after task generation by default
37
+ - `PRDs/` as the standard task directory, with legacy `PRD/` compatibility
38
+ - Refreshed model catalogs and current defaults
39
+
25
40
  ### Options
26
41
 
27
42
  - `--debug`: Enable debug logging, logs are saved into the .ralph subdirectory of the project
@@ -29,8 +44,8 @@ ralph [OPTIONS] [DIRECTORY]
29
44
 
30
45
  ## Usage
31
46
 
32
- First create a task, give a short name for the task (used for the branch commits will be added to), and then give a description.
33
- Then you run the ralph-coder, it will produce a .md file of the specifications, which will be broken into small tasks put into a tasks.json file. Each task will be worked on independently.
47
+ First create a task in `PRDs/`, give a short name for the task (used for the branch commits will be added to), and then give a description.
48
+ Then you run `ralph`, it will produce a `.md` file of the specifications, which will be broken into small tasks put into a `tasks.json` file. Each task will be worked on independently.
34
49
 
35
50
  ## Requirements
36
51
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ralph-code"
7
- version = "0.6.1"
7
+ version = "0.6.3"
8
8
  description = "Automated task implementation with Claude Code and Codex"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,6 +1,6 @@
1
1
  """ralph-code: Automated task implementation with Claude Code and Codex."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.6.3"
4
4
  __author__ = "Ralph Coding"
5
5
 
6
6
  from .app import RalphApp, main
@@ -559,7 +559,7 @@ class RalphApp:
559
559
  choices: list[Choice] = []
560
560
 
561
561
  for model_name, label in supported_models:
562
- choices.append(Choice(title=model_name, value=model_name))
562
+ choices.append(Choice(title=f"{model_name} ({label})", value=model_name))
563
563
 
564
564
  # No models available - show alert and return
565
565
  if not choices:
@@ -14,11 +14,13 @@ DEFAULT_CONFIG = {
14
14
  "harness": "claude",
15
15
  "worker_model": "opus",
16
16
  "summary_model": "haiku",
17
+ "harness_timeout_seconds": 1800,
18
+ "non_interactive_max_turns": 12,
17
19
  "max_iterations": 10,
18
20
  "max_story_attempts": 3,
19
21
  "auto_spec_without_oversight": True,
20
22
  "pause_after_spec": False,
21
- "pause_after_tasks": True,
23
+ "pause_after_tasks": False,
22
24
  "wait_on_rate_limit": True,
23
25
  "pause_on_completion": True,
24
26
  "always_build_tests": False,
@@ -63,7 +65,7 @@ class Config:
63
65
  if "worker_model" not in self._config:
64
66
  harness = self._config.get("harness", DEFAULT_CONFIG["harness"])
65
67
  if harness == "codex":
66
- self._config["worker_model"] = "gpt-5.2-codex"
68
+ self._config["worker_model"] = "gpt-5.3-codex"
67
69
  else:
68
70
  self._config["worker_model"] = "opus"
69
71
  needs_save = True
@@ -72,7 +74,7 @@ class Config:
72
74
  if "summary_model" not in self._config:
73
75
  harness = self._config.get("harness", DEFAULT_CONFIG["harness"])
74
76
  if harness == "codex":
75
- self._config["summary_model"] = "gpt-5.2"
77
+ self._config["summary_model"] = "gpt-5.1-codex-mini"
76
78
  else:
77
79
  self._config["summary_model"] = "haiku"
78
80
  needs_save = True
@@ -124,6 +126,26 @@ class Config:
124
126
  self._config["summary_model"] = value
125
127
  self._save()
126
128
 
129
+ @property
130
+ def harness_timeout_seconds(self) -> int:
131
+ """Maximum seconds to wait for a harness subprocess before aborting."""
132
+ return int(self._config.get("harness_timeout_seconds", 1800))
133
+
134
+ @harness_timeout_seconds.setter
135
+ def harness_timeout_seconds(self, value: int) -> None:
136
+ self._config["harness_timeout_seconds"] = max(1, value)
137
+ self._save()
138
+
139
+ @property
140
+ def non_interactive_max_turns(self) -> int:
141
+ """Maximum agent turns for non-interactive harness runs."""
142
+ return int(self._config.get("non_interactive_max_turns", 12))
143
+
144
+ @non_interactive_max_turns.setter
145
+ def non_interactive_max_turns(self, value: int) -> None:
146
+ self._config["non_interactive_max_turns"] = max(1, value)
147
+ self._save()
148
+
127
149
  @property
128
150
  def max_iterations(self) -> int:
129
151
  """Maximum iterations for implementation loop."""
@@ -26,6 +26,8 @@ class GitManager:
26
26
  check=check,
27
27
  )
28
28
  return result
29
+ except FileNotFoundError as e:
30
+ raise GitError("Git executable not found on PATH") from e
29
31
  except subprocess.CalledProcessError as e:
30
32
  raise GitError(f"Git command failed: git {' '.join(args)}\n{e.stderr}")
31
33
 
@@ -230,25 +232,33 @@ class GitManager:
230
232
 
231
233
  return self.commit(message)
232
234
 
235
+ def get_staged_files(self) -> list[str]:
236
+ """Get list of files currently staged for commit."""
237
+ result = self._run_git("diff", "--cached", "--name-only")
238
+ return [f for f in result.stdout.strip().split("\n") if f]
239
+
233
240
  def get_unstaged_files(self) -> list[str]:
234
241
  """Get list of files with unstaged changes (modified + untracked).
235
242
 
236
243
  Returns files that have changes not yet added to the staging area.
237
244
  This includes both modified tracked files and new untracked files.
238
245
  """
239
- files = []
246
+ files: list[str] = []
247
+ seen: set[str] = set()
240
248
 
241
249
  # Get modified but unstaged files
242
250
  result = self._run_git("diff", "--name-only")
243
251
  for f in result.stdout.strip().split("\n"):
244
- if f:
252
+ if f and f not in seen:
245
253
  files.append(f)
254
+ seen.add(f)
246
255
 
247
256
  # Get untracked files
248
257
  result = self._run_git("ls-files", "--others", "--exclude-standard")
249
258
  for f in result.stdout.strip().split("\n"):
250
- if f:
259
+ if f and f not in seen:
251
260
  files.append(f)
261
+ seen.add(f)
252
262
 
253
263
  return files
254
264
 
@@ -259,7 +269,7 @@ class GitManager:
259
269
 
260
270
  def get_diff(self, staged: bool = False) -> str:
261
271
  """Get the current diff for stageable files only."""
262
- files = self.get_stageable_files()
272
+ files = self.get_staged_files() if staged else self.get_stageable_files()
263
273
  if not files:
264
274
  return ""
265
275
 
@@ -46,13 +46,19 @@ HarnessType = Literal["claude", "codex", "custom"]
46
46
  # These defaults are used when CLI model querying fails or isn't supported.
47
47
  DEFAULT_MODELS: dict[HarnessType, list[tuple[str, str]]] = {
48
48
  "claude": [
49
+ ("default", "Standard"),
49
50
  ("haiku", "Light"),
50
51
  ("sonnet", "Standard"),
51
52
  ("opus", "Standard"),
53
+ ("sonnet[1m]", "Extended"),
54
+ ("opusplan", "Planning"),
52
55
  ],
53
56
  "codex": [
54
57
  ("gpt-5.1-codex-mini", "Light"),
58
+ ("gpt-5.3-codex-spark", "Preview"),
59
+ ("gpt-5.3-codex", "Standard"),
55
60
  ("gpt-5.2-codex", "Standard"),
61
+ ("gpt-5.1-codex", "Standard"),
56
62
  ("gpt-5.1-codex-max", "Standard"),
57
63
  ("gpt-5.2", "Standard"),
58
64
  ],
@@ -61,13 +67,13 @@ DEFAULT_MODELS: dict[HarnessType, list[tuple[str, str]]] = {
61
67
 
62
68
  DEFAULT_WORKER_MODEL: dict[HarnessType, str] = {
63
69
  "claude": "opus",
64
- "codex": "gpt-5.2-codex",
70
+ "codex": "gpt-5.3-codex",
65
71
  "custom": "",
66
72
  }
67
73
 
68
74
  DEFAULT_SUMMARY_MODEL: dict[HarnessType, str] = {
69
75
  "claude": "haiku",
70
- "codex": "gpt-5.2",
76
+ "codex": "gpt-5.1-codex-mini",
71
77
  "custom": "",
72
78
  }
73
79
 
@@ -38,7 +38,7 @@ import time
38
38
  from dataclasses import dataclass
39
39
  from datetime import datetime
40
40
  from pathlib import Path
41
- from typing import Callable
41
+ from typing import Any, Callable
42
42
 
43
43
  from .config import get_config
44
44
  from .harness import Harness, HarnessType
@@ -80,7 +80,7 @@ HARNESS_MODEL_MAPPING: dict[HarnessType, dict[str, str]] = {
80
80
  "codex": {
81
81
  # Codex maps to OpenAI/Codex model names
82
82
  "haiku": "gpt-5.1-codex-mini",
83
- "sonnet": "gpt-5.2-codex",
83
+ "sonnet": "gpt-5.3-codex",
84
84
  "opus": "gpt-5.1-codex-max",
85
85
  },
86
86
  "custom": {
@@ -201,19 +201,22 @@ class HarnessRunner:
201
201
  model: str | None = None,
202
202
  print_output: bool = True,
203
203
  allow_writes: bool = False,
204
+ output_format: str | None = None,
205
+ json_schema: dict[str, Any] | None = None,
206
+ max_turns: int | None = None,
204
207
  ) -> list[str]:
205
208
  """Build the harness CLI command with harness-specific flags.
206
209
 
207
210
  This method handles the differences in CLI interfaces between harness types:
208
211
 
209
212
  Claude CLI:
210
- claude --model <model> --print [--dangerously-skip-permissions] -p "<prompt>"
213
+ claude --model <model> --print [--dangerously-skip-permissions] "<prompt>"
211
214
 
212
215
  Codex CLI (non-interactive):
213
216
  codex exec --model <model> --sandbox <mode> "<prompt>"
214
217
 
215
218
  Custom (defaults to Claude-like):
216
- custom --model <model> --print [--dangerously-skip-permissions] -p "<prompt>"
219
+ custom --model <model> --print [--dangerously-skip-permissions] "<prompt>"
217
220
 
218
221
  Args:
219
222
  prompt: The prompt text to send to the harness.
@@ -228,6 +231,7 @@ class HarnessRunner:
228
231
  harness = self._get_harness()
229
232
  model = model or self._config.worker_model
230
233
  mapped_model = self._map_model(model)
234
+ effective_max_turns = max_turns if max_turns is not None else self._config.non_interactive_max_turns
231
235
 
232
236
  cmd = [harness.path]
233
237
 
@@ -239,6 +243,14 @@ class HarnessRunner:
239
243
  if print_output:
240
244
  cmd.append("--print")
241
245
 
246
+ cmd.extend(["--max-turns", str(effective_max_turns)])
247
+
248
+ if output_format is not None:
249
+ cmd.extend(["--output-format", output_format])
250
+
251
+ if json_schema is not None:
252
+ cmd.extend(["--json-schema", json.dumps(json_schema)])
253
+
242
254
  if allow_writes:
243
255
  cmd.append("--dangerously-skip-permissions")
244
256
 
@@ -283,6 +295,9 @@ class HarnessRunner:
283
295
  max_retries: int = 3,
284
296
  retry_delay: float = 60.0,
285
297
  allow_writes: bool = False,
298
+ output_format: str | None = None,
299
+ json_schema: dict[str, Any] | None = None,
300
+ max_turns: int | None = None,
286
301
  ) -> HarnessResponse:
287
302
  """
288
303
  Run a prompt through the harness CLI.
@@ -297,7 +312,14 @@ class HarnessRunner:
297
312
  Returns:
298
313
  HarnessResponse with the result
299
314
  """
300
- cmd = self._build_command(prompt, model, allow_writes=allow_writes)
315
+ cmd = self._build_command(
316
+ prompt,
317
+ model,
318
+ allow_writes=allow_writes,
319
+ output_format=output_format,
320
+ json_schema=json_schema,
321
+ max_turns=max_turns if max_turns is not None else self._config.non_interactive_max_turns,
322
+ )
301
323
 
302
324
  self._log(f"Command: {' '.join(cmd)}")
303
325
  self._log(f"Prompt:\n{prompt}\n")
@@ -313,12 +335,16 @@ class HarnessRunner:
313
335
  cwd=self.project_dir,
314
336
  capture_output=True,
315
337
  text=True,
338
+ timeout=self._config.harness_timeout_seconds,
316
339
  # Don't catch KeyboardInterrupt - let it propagate
317
340
  )
318
341
 
319
342
  output = result.stdout
320
343
  error = result.stderr
321
344
 
345
+ if output_format == "json" and self._get_harness().type == "claude":
346
+ output, error = self._extract_claude_json_result(output, error, result.returncode)
347
+
322
348
  self._log(f"Output:\n{output}\n")
323
349
  if error:
324
350
  self._log(f"Error:\n{error}\n")
@@ -356,6 +382,15 @@ class HarnessRunner:
356
382
  output="",
357
383
  error=error_msg,
358
384
  )
385
+ except subprocess.TimeoutExpired:
386
+ timeout = self._config.harness_timeout_seconds
387
+ error_msg = f"Harness timed out after {timeout}s"
388
+ self._log(f"Error: {error_msg}")
389
+ return HarnessResponse(
390
+ success=False,
391
+ output="",
392
+ error=error_msg,
393
+ )
359
394
  except Exception as e:
360
395
  error_msg = str(e)
361
396
  self._log(f"Exception: {error_msg}")
@@ -372,6 +407,77 @@ class HarnessRunner:
372
407
  rate_limited=True,
373
408
  )
374
409
 
410
+ def _extract_claude_json_result(
411
+ self, output: str, error: str, returncode: int
412
+ ) -> tuple[str, str]:
413
+ """Extract the useful result from Claude's JSON envelope."""
414
+ stripped = output.strip()
415
+ if not stripped:
416
+ return output, error
417
+
418
+ try:
419
+ payload = json.loads(stripped)
420
+ except json.JSONDecodeError:
421
+ return output, error
422
+
423
+ if isinstance(payload, dict):
424
+ result = payload.get("result", payload)
425
+ if isinstance(result, str):
426
+ normalized_output = result
427
+ else:
428
+ normalized_output = json.dumps(result, ensure_ascii=False)
429
+
430
+ if returncode != 0 and not error:
431
+ error_value = payload.get("error") or payload.get("message") or normalized_output
432
+ error = str(error_value)
433
+
434
+ return normalized_output, error
435
+
436
+ return output, error
437
+
438
+ def run_structured(
439
+ self,
440
+ prompt: str,
441
+ json_schema: dict[str, Any],
442
+ model: str | None = None,
443
+ allow_writes: bool = False,
444
+ ) -> tuple[HarnessResponse, dict[str, Any] | None]:
445
+ """Run a prompt expecting a schema-validated JSON object."""
446
+ harness = self._get_harness()
447
+ response = self.run(
448
+ prompt,
449
+ model=model,
450
+ allow_writes=allow_writes,
451
+ output_format="json" if harness.type == "claude" else None,
452
+ json_schema=json_schema if harness.type == "claude" else None,
453
+ )
454
+ if not response.success:
455
+ return response, None
456
+
457
+ try:
458
+ parsed = json.loads(response.output)
459
+ except json.JSONDecodeError as exc:
460
+ return (
461
+ HarnessResponse(
462
+ success=False,
463
+ output=response.output,
464
+ error=f"Structured output was not valid JSON: {exc}",
465
+ ),
466
+ None,
467
+ )
468
+
469
+ if not isinstance(parsed, dict):
470
+ return (
471
+ HarnessResponse(
472
+ success=False,
473
+ output=response.output,
474
+ error="Structured output was not a JSON object",
475
+ ),
476
+ None,
477
+ )
478
+
479
+ return response, parsed
480
+
375
481
  def create_prd(self, task_description: str, learnings: str = "") -> HarnessResponse:
376
482
  """
377
483
  Create a PRD (Product Requirements Document) for a task using the prd skill format.
@@ -673,25 +779,66 @@ IMPORTANT RULES:
673
779
  8. All stories start with passes: false
674
780
  9. branchName MUST start with "{branch_prefix}/" (use lowercase letters, numbers, and hyphens only)
675
781
 
676
- Output ONLY valid JSON in the exact format below, nothing else:
677
- {{
678
- "project": "{project_name}",
679
- "branchName": "{branch_prefix}/feature-name-here",
680
- "description": "Feature description here",
681
- "userStories": [
682
- {{
683
- "id": "US-001",
684
- "title": "Story title",
685
- "description": "As a [user], I want [feature] so that [benefit]",
686
- "acceptanceCriteria": ["criterion 1", "criterion 2", "Typecheck passes"],
687
- "priority": 1,
688
- "passes": false,
689
- "notes": ""
690
- }}
691
- ]
692
- }}"""
782
+ Return a JSON object only. Do not wrap it in markdown fences."""
783
+
784
+ schema: dict[str, Any] = {
785
+ "type": "object",
786
+ "required": ["project", "branchName", "description", "userStories"],
787
+ "properties": {
788
+ "project": {"type": "string", "const": project_name},
789
+ "branchName": {
790
+ "type": "string",
791
+ "pattern": rf"^{re.escape(branch_prefix)}/[a-z0-9-]+$",
792
+ },
793
+ "description": {"type": "string", "minLength": 1},
794
+ "userStories": {
795
+ "type": "array",
796
+ "minItems": 1,
797
+ "items": {
798
+ "type": "object",
799
+ "required": [
800
+ "id",
801
+ "title",
802
+ "description",
803
+ "acceptanceCriteria",
804
+ "priority",
805
+ "passes",
806
+ ],
807
+ "properties": {
808
+ "id": {"type": "string", "pattern": r"^US-[0-9]{3}$"},
809
+ "title": {"type": "string", "minLength": 1},
810
+ "description": {"type": "string", "minLength": 1},
811
+ "acceptanceCriteria": {
812
+ "type": "array",
813
+ "minItems": 1,
814
+ "items": {"type": "string", "minLength": 1},
815
+ },
816
+ "priority": {"type": "integer", "minimum": 1},
817
+ "passes": {"type": "boolean", "const": False},
818
+ "notes": {"type": "string"},
819
+ },
820
+ "additionalProperties": False,
821
+ },
822
+ },
823
+ },
824
+ "additionalProperties": False,
825
+ }
826
+
827
+ response, parsed = self.run_structured(
828
+ prompt,
829
+ json_schema=schema,
830
+ model=self._config.summary_model,
831
+ )
832
+ if not response.success or parsed is None:
833
+ return response
693
834
 
694
- return self.run(prompt, model=self._config.summary_model)
835
+ return HarnessResponse(
836
+ success=True,
837
+ output=json.dumps(parsed, indent=2, ensure_ascii=False),
838
+ error=response.error,
839
+ rate_limited=response.rate_limited,
840
+ cost=response.cost,
841
+ )
695
842
 
696
843
  def implement_story(self, story_prompt: str, context: str = "") -> HarnessResponse:
697
844
  """
@@ -739,31 +886,30 @@ After implementation, verify each acceptance criterion is met."""
739
886
  GIT DIFF OF CHANGES:
740
887
  {git_diff if git_diff else "(No changes detected)"}
741
888
 
742
- Check each acceptance criterion and determine if it has been met.
889
+ Check each acceptance criterion and determine if it has been met.
743
890
 
744
- IMPORTANT: Distinguish between these scenarios:
745
- 1. PASSES - All acceptance criteria are verifiably met (or irrelevant - see below)
746
- 2. FAILS - One or more criteria are NOT met (implementation is wrong/incomplete)
747
- 3. BLOCKED - Some criteria CANNOT BE VERIFIED due to external factors (permission issues,
748
- pre-existing errors unrelated to this change, environment constraints, etc.)
891
+ IMPORTANT: Distinguish between these scenarios:
892
+ 1. PASSES - All acceptance criteria are verifiably met
893
+ 2. FAILS - One or more criteria are NOT met (implementation is wrong/incomplete)
894
+ 3. BLOCKED - Some criteria CANNOT BE VERIFIED due to external factors (permission issues,
895
+ pre-existing errors unrelated to this change, environment constraints, etc.)
749
896
 
750
- HANDLING IRRELEVANT OR NONSENSICAL CRITERIA:
751
- - If a criterion is based on an erroneous assumption (e.g., "update file X for library Y" but
752
- file X doesn't use library Y), treat it as PASSED by virtue of being irrelevant.
753
- - If a criterion seems nonsensical or impossible, question it and explain why it doesn't apply.
754
- - These criteria may have been auto-generated by earlier processes and don't reflect reality.
755
- - Mark such criteria as passed with a note explaining why they're not applicable.
897
+ HANDLING AMBIGUOUS OR NONSENSICAL CRITERIA:
898
+ - Do NOT auto-pass criteria as "N/A" or "irrelevant".
899
+ - If a criterion is based on a bad assumption, contradictory, or impossible to satisfy as written,
900
+ mark it as FAILED and explain precisely why.
901
+ - In "To Complete", include concrete guidance to correct that criterion/story definition.
902
+ - Use BLOCKED only when verification is impossible due to external constraints, not because criteria are poor.
756
903
 
757
904
  Respond in this exact format:
758
905
 
759
906
  STATUS: PASSES or FAILS or BLOCKED
760
907
  NOTES:
761
- **What Passed:**
762
- - ✅ List each criterion that was clearly met
763
- - ✅ List criteria that are N/A with explanation (e.g., "N/A - file doesn't use this library")
908
+ **What Passed:**
909
+ - ✅ List each criterion that was clearly met
764
910
 
765
- **What Failed:**
766
- - ❌ List each criterion that was NOT met (implementation issue)
911
+ **What Failed:**
912
+ - ❌ List each criterion that was NOT met, including malformed/incorrect criteria
767
913
 
768
914
  **What Could Not Be Verified:**
769
915
  - ⚠️ List criteria that cannot be verified with reasons (e.g., "Tests execution - requires permission")
@@ -38,6 +38,11 @@ def slugify(text: str) -> str:
38
38
  return slug[:50] # Limit length
39
39
 
40
40
 
41
+ def stable_prd_id(file_path: Path) -> str:
42
+ """Generate a deterministic PRD ID from file path."""
43
+ return str(uuid.uuid5(uuid.NAMESPACE_URL, str(file_path.resolve())))
44
+
45
+
41
46
  @dataclass
42
47
  class PRD:
43
48
  """Represents a Product Requirements Document."""
@@ -58,7 +63,7 @@ class PRD:
58
63
  content = file_path.read_text().strip()
59
64
  name = file_path.stem # filename without extension
60
65
  return cls(
61
- id=str(uuid.uuid4()),
66
+ id=stable_prd_id(file_path),
62
67
  name=name,
63
68
  file_path=file_path,
64
69
  is_specced=False,
@@ -119,7 +124,7 @@ class PRD:
119
124
  status = "errored"
120
125
 
121
126
  return cls(
122
- id=str(uuid.uuid4()),
127
+ id=stable_prd_id(file_path),
123
128
  name=name,
124
129
  file_path=file_path,
125
130
  is_specced=True,
@@ -155,11 +160,22 @@ class PRDManager:
155
160
 
156
161
  def __init__(self, project_dir: Path):
157
162
  self.project_dir = project_dir
158
- self.prd_dir = project_dir / "PRD"
163
+ self.prd_dir = self._resolve_prd_dir()
159
164
  self._ensure_prd_dir()
160
165
  self._prds: list[PRD] = []
161
166
  self._load()
162
167
 
168
+ def _resolve_prd_dir(self) -> Path:
169
+ """Resolve the active PRD directory, supporting the legacy singular path."""
170
+ plural_dir = self.project_dir / "PRDs"
171
+ legacy_dir = self.project_dir / "PRD"
172
+
173
+ if plural_dir.exists():
174
+ return plural_dir
175
+ if legacy_dir.exists():
176
+ return legacy_dir
177
+ return plural_dir
178
+
163
179
  def _ensure_prd_dir(self) -> None:
164
180
  """Ensure PRD directory exists."""
165
181
  self.prd_dir.mkdir(parents=True, exist_ok=True)