@lnilluv/pi-ralph-loop 0.2.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.github/workflows/ci.yml +5 -2
  2. package/.github/workflows/release.yml +15 -43
  3. package/README.md +51 -113
  4. package/package.json +13 -5
  5. package/scripts/version-helper.ts +210 -0
  6. package/src/index.ts +1360 -275
  7. package/src/ralph-draft-context.ts +618 -0
  8. package/src/ralph-draft-llm.ts +297 -0
  9. package/src/ralph-draft.ts +33 -0
  10. package/src/ralph.ts +1457 -0
  11. package/src/runner-rpc.ts +434 -0
  12. package/src/runner-state.ts +822 -0
  13. package/src/runner.ts +957 -0
  14. package/src/secret-paths.ts +66 -0
  15. package/src/shims.d.ts +0 -3
  16. package/tests/fixtures/parity/migrate/OPEN_QUESTIONS.md +3 -0
  17. package/tests/fixtures/parity/migrate/RALPH.md +27 -0
  18. package/tests/fixtures/parity/migrate/golden/MIGRATED.md +15 -0
  19. package/tests/fixtures/parity/migrate/legacy/source.md +6 -0
  20. package/tests/fixtures/parity/migrate/legacy/source.yaml +3 -0
  21. package/tests/fixtures/parity/migrate/scripts/show-legacy.sh +10 -0
  22. package/tests/fixtures/parity/migrate/scripts/verify.sh +15 -0
  23. package/tests/fixtures/parity/research/OPEN_QUESTIONS.md +3 -0
  24. package/tests/fixtures/parity/research/RALPH.md +45 -0
  25. package/tests/fixtures/parity/research/claim-evidence-checklist.md +15 -0
  26. package/tests/fixtures/parity/research/expected-outputs.md +22 -0
  27. package/tests/fixtures/parity/research/scripts/show-snapshots.sh +13 -0
  28. package/tests/fixtures/parity/research/scripts/verify.sh +55 -0
  29. package/tests/fixtures/parity/research/snapshots/app-factory-ai-cli.md +11 -0
  30. package/tests/fixtures/parity/research/snapshots/docs-factory-ai-cli-features-missions.md +11 -0
  31. package/tests/fixtures/parity/research/snapshots/factory-ai-news-missions.md +11 -0
  32. package/tests/fixtures/parity/research/source-manifest.md +20 -0
  33. package/tests/index.test.ts +3529 -0
  34. package/tests/parity/README.md +9 -0
  35. package/tests/parity/harness.py +526 -0
  36. package/tests/parity-harness.test.ts +42 -0
  37. package/tests/parity-research-fixture.test.ts +34 -0
  38. package/tests/ralph-draft-context.test.ts +672 -0
  39. package/tests/ralph-draft-llm.test.ts +434 -0
  40. package/tests/ralph-draft.test.ts +168 -0
  41. package/tests/ralph.test.ts +1840 -0
  42. package/tests/runner-event-contract.test.ts +235 -0
  43. package/tests/runner-rpc.test.ts +358 -0
  44. package/tests/runner-state.test.ts +553 -0
  45. package/tests/runner.test.ts +1347 -0
  46. package/tests/secret-paths.test.ts +55 -0
  47. package/tests/version-helper.test.ts +75 -0
  48. package/tsconfig.json +3 -2
@@ -0,0 +1,9 @@
1
+ # Parity harness
2
+
3
+ Run from the repo root:
4
+
5
+ ```bash
6
+ python3 tests/parity/harness.py --implementation pi-ralph-loop --fixture research --fixture migrate
7
+ ```
8
+
9
+ Fixtures live under `tests/fixtures/parity/`. The harness copies each fixture into a temporary task workspace, replays it, and writes the bundle root path when it finishes.
@@ -0,0 +1,526 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import hashlib
6
+ import json
7
+ import os
8
+ import shlex
9
+ import shutil
10
+ import subprocess
11
+ import tempfile
12
+ import threading
13
+ import time
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ REPO_ROOT = Path(__file__).resolve().parents[2]
18
+ FIXTURES_ROOT = REPO_ROOT / "tests" / "fixtures" / "parity"
19
+ DEFAULT_MODEL = os.environ.get("PI_RALPH_PARITY_MODEL")
20
+ DEFAULT_LOOP_RPC_COMMAND = os.environ.get("PI_RALPH_PARITY_LOOP_RPC_COMMAND")
21
+ DEFAULT_RALPHIFY_RPC_COMMAND = os.environ.get("PI_RALPH_PARITY_RALPHIFY_RPC_COMMAND", "")
22
+ DEFAULT_LOOP_PROMPT_TEMPLATE = os.environ.get("PI_RALPH_PARITY_LOOP_PROMPT_TEMPLATE", "/ralph --path {ralph_path}")
23
+ DEFAULT_RALPHIFY_PROMPT_TEMPLATE = os.environ.get(
24
+ "PI_RALPH_PARITY_RALPHIFY_PROMPT_TEMPLATE",
25
+ "/ralph --path {ralph_path}",
26
+ )
27
+ TERMINAL_STATUSES = {
28
+ "complete",
29
+ "max-iterations",
30
+ "no-progress-exhaustion",
31
+ "stopped",
32
+ "timeout",
33
+ "error",
34
+ "cancelled",
35
+ }
36
+ AGENT_FILE_NAMES = ["auth.json", "models.json", "agent-models.json"]
37
+
38
+
39
+ def utc_now() -> str:
40
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
41
+
42
+
43
+ def read_text(path: Path) -> str:
44
+ return path.read_text(encoding="utf-8")
45
+
46
+
47
+ def write_text(path: Path, content: str) -> None:
48
+ path.parent.mkdir(parents=True, exist_ok=True)
49
+ path.write_text(content, encoding="utf-8")
50
+
51
+
52
+ def write_json(path: Path, payload: Any) -> None:
53
+ path.parent.mkdir(parents=True, exist_ok=True)
54
+ path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
55
+
56
+
57
+ def inventory(path: Path) -> list[dict[str, Any]]:
58
+ rows: list[dict[str, Any]] = []
59
+ if not path.exists():
60
+ return rows
61
+
62
+ for entry in sorted(path.rglob("*")):
63
+ if not entry.is_file():
64
+ continue
65
+ rel = entry.relative_to(path).as_posix()
66
+ data = entry.read_bytes()
67
+ rows.append({
68
+ "path": rel,
69
+ "size": len(data),
70
+ "sha256": hashlib.sha256(data).hexdigest(),
71
+ })
72
+ return rows
73
+
74
+
75
+ def write_inventory_tsv(path: Path, rows: list[dict[str, Any]]) -> None:
76
+ lines = ["path\tsize\tsha256"]
77
+ for row in rows:
78
+ lines.append(f"{row['path']}\t{row['size']}\t{row['sha256']}")
79
+ write_text(path, "\n".join(lines) + "\n")
80
+
81
+
82
+ def read_status(task_dir: Path, error_context: list[dict[str, str]] | None = None) -> dict[str, Any] | None:
83
+ status_path = task_dir / ".ralph-runner" / "status.json"
84
+ if not status_path.exists():
85
+ return None
86
+ try:
87
+ payload = json.loads(read_text(status_path))
88
+ except (OSError, json.JSONDecodeError, UnicodeDecodeError) as exc:
89
+ if error_context is not None:
90
+ entry = {"path": str(status_path), "error": f"{type(exc).__name__}: {exc}"}
91
+ if not error_context or error_context[-1] != entry:
92
+ error_context.append(entry)
93
+ return None
94
+ if isinstance(payload, dict):
95
+ return payload
96
+ return None
97
+
98
+
99
+ def ensure_agent_dir(bundle_root: Path) -> dict[str, Any]:
100
+ agent_dir = bundle_root / "agent"
101
+ agent_dir.mkdir(parents=True, exist_ok=True)
102
+
103
+ source_dir = os.environ.get("PI_CODING_AGENT_DIR")
104
+ if source_dir:
105
+ source = Path(source_dir)
106
+ else:
107
+ source = Path.home() / ".pi" / "agent"
108
+
109
+ copied: list[str] = []
110
+ for file_name in AGENT_FILE_NAMES:
111
+ src = source / file_name
112
+ if src.exists():
113
+ shutil.copy2(src, agent_dir / file_name)
114
+ copied.append(file_name)
115
+
116
+ return {
117
+ "source": str(source),
118
+ "destination": str(agent_dir),
119
+ "copied_files": copied,
120
+ }
121
+
122
+
123
+ def create_bundle_root(explicit_root: str | None) -> Path:
124
+ if explicit_root:
125
+ root = Path(explicit_root).expanduser().resolve()
126
+ root.mkdir(parents=True, exist_ok=True)
127
+ return root
128
+ return Path(tempfile.mkdtemp(prefix="pi-ralph-parity-")).resolve()
129
+
130
+
131
+ def build_loop_rpc_command(model: str | None) -> list[str]:
132
+ command = [
133
+ "pi",
134
+ "--mode",
135
+ "rpc",
136
+ "--no-extensions",
137
+ "-e",
138
+ str(REPO_ROOT / "src" / "index.ts"),
139
+ ]
140
+ if model:
141
+ command.extend(["--model", model])
142
+ return command
143
+
144
+
145
+ def run_git(args: list[str]) -> str:
146
+ result = subprocess.run(
147
+ ["git", *args],
148
+ # Run the RPC session inside the copied task workspace so file writes stay
149
+ # isolated from the repository checkout.
150
+ cwd=REPO_ROOT,
151
+ capture_output=True,
152
+ text=True,
153
+ check=False,
154
+ )
155
+ output = (result.stdout + result.stderr).strip()
156
+ return output
157
+
158
+
159
+ def parse_command(text: str) -> list[str]:
160
+ return shlex.split(text)
161
+
162
+
163
+ def task_prompt(prompt_template: str, ralph_path: Path) -> str:
164
+ return prompt_template.format(
165
+ ralph_path=shlex.quote(str(ralph_path)),
166
+ task_dir=shlex.quote(str(ralph_path.parent)),
167
+ fixture=ralph_path.parents[2].name,
168
+ implementation=ralph_path.parents[1].name,
169
+ )
170
+
171
+
172
+ def stream_reader(stream, file_obj, store: list[str], last_output: list[float]) -> None:
173
+ try:
174
+ for line in iter(stream.readline, ""):
175
+ last_output[0] = time.time()
176
+ store.append(line)
177
+ file_obj.write(line)
178
+ file_obj.flush()
179
+ finally:
180
+ try:
181
+ stream.close()
182
+ except (OSError, ValueError):
183
+ pass
184
+
185
+
186
+ def run_rpc_session(
187
+ command: list[str],
188
+ prompt: str,
189
+ cwd: Path,
190
+ env: dict[str, str],
191
+ stdout_path: Path,
192
+ stderr_path: Path,
193
+ task_dir: Path,
194
+ run_timeout_seconds: int,
195
+ quiet_kill_seconds: float,
196
+ ) -> dict[str, Any]:
197
+ stdout_path.parent.mkdir(parents=True, exist_ok=True)
198
+ stderr_path.parent.mkdir(parents=True, exist_ok=True)
199
+
200
+ proc = subprocess.Popen(
201
+ command,
202
+ cwd=str(cwd),
203
+ env=env,
204
+ stdin=subprocess.PIPE,
205
+ stdout=subprocess.PIPE,
206
+ stderr=subprocess.PIPE,
207
+ text=True,
208
+ bufsize=1,
209
+ )
210
+ stdout_lines: list[str] = []
211
+ stderr_lines: list[str] = []
212
+ last_output = [time.time()]
213
+ termination_reason = "timeout"
214
+ cleanup_errors: list[dict[str, str]] = []
215
+ status_errors: list[dict[str, str]] = []
216
+ startup_grace_seconds = max(quiet_kill_seconds * 10, 30.0)
217
+
218
+ stdout_file = stdout_path.open("w", encoding="utf-8")
219
+ stderr_file = stderr_path.open("w", encoding="utf-8")
220
+ threads = [
221
+ threading.Thread(target=stream_reader, args=(proc.stdout, stdout_file, stdout_lines, last_output), daemon=True),
222
+ threading.Thread(target=stream_reader, args=(proc.stderr, stderr_file, stderr_lines, last_output), daemon=True),
223
+ ]
224
+ for thread in threads:
225
+ thread.start()
226
+
227
+ payload = json.dumps({"type": "prompt", "id": f"parity-{int(time.time() * 1000)}", "message": prompt}) + "\n"
228
+ if proc.stdin is not None:
229
+ proc.stdin.write(payload)
230
+ proc.stdin.flush()
231
+
232
+ started_at = time.time()
233
+ status_snapshot: dict[str, Any] | None = None
234
+
235
+ try:
236
+ while time.time() - started_at < run_timeout_seconds:
237
+ if proc.poll() is not None:
238
+ termination_reason = "process-exited"
239
+ break
240
+
241
+ status_snapshot = read_status(task_dir, status_errors)
242
+ status_value = status_snapshot.get("status") if status_snapshot else None
243
+ if status_value in TERMINAL_STATUSES:
244
+ termination_reason = f"terminal-status:{status_value}"
245
+ time.sleep(1.0)
246
+ break
247
+
248
+ status_is_active = isinstance(status_value, str) and status_value in {"initializing", "running"}
249
+ if (
250
+ quiet_kill_seconds > 0
251
+ and time.time() - started_at >= startup_grace_seconds
252
+ and time.time() - last_output[0] > quiet_kill_seconds
253
+ and not status_is_active
254
+ ):
255
+ termination_reason = "idle-timeout:no-status"
256
+ break
257
+
258
+ time.sleep(0.25)
259
+ finally:
260
+ if proc.poll() is None:
261
+ try:
262
+ proc.terminate()
263
+ except (ProcessLookupError, OSError) as exc:
264
+ cleanup_errors.append({"action": "terminate", "error": f"{type(exc).__name__}: {exc}"})
265
+ try:
266
+ proc.wait(timeout=5)
267
+ except subprocess.TimeoutExpired as exc:
268
+ cleanup_errors.append({"action": "wait-after-terminate", "error": f"{type(exc).__name__}: {exc}"})
269
+ try:
270
+ proc.kill()
271
+ except (ProcessLookupError, OSError) as kill_exc:
272
+ cleanup_errors.append({"action": "kill", "error": f"{type(kill_exc).__name__}: {kill_exc}"})
273
+ try:
274
+ proc.wait(timeout=5)
275
+ except subprocess.TimeoutExpired as wait_exc:
276
+ cleanup_errors.append({"action": "wait-after-kill", "error": f"{type(wait_exc).__name__}: {wait_exc}"})
277
+
278
+ if proc.stdin is not None:
279
+ try:
280
+ proc.stdin.close()
281
+ except (OSError, ValueError) as exc:
282
+ cleanup_errors.append({"action": "close-stdin", "error": f"{type(exc).__name__}: {exc}"})
283
+
284
+ for thread in threads:
285
+ thread.join(timeout=2)
286
+ stdout_file.close()
287
+ stderr_file.close()
288
+
289
+ return {
290
+ "returncode": proc.returncode,
291
+ "stdout_lines": stdout_lines,
292
+ "stderr_lines": stderr_lines,
293
+ "status": status_snapshot,
294
+ "termination_reason": termination_reason,
295
+ "cleanup_errors": cleanup_errors,
296
+ "status_errors": status_errors,
297
+ "command": command,
298
+ "prompt": prompt,
299
+ }
300
+
301
+
302
+ def run_verifier(task_dir: Path, env: dict[str, str], run_dir: Path) -> dict[str, Any]:
303
+ verify_script = task_dir / "scripts" / "verify.sh"
304
+ if not verify_script.exists():
305
+ return {"skipped": True}
306
+
307
+ verify_command = ["bash", str(verify_script)]
308
+ result = subprocess.run(
309
+ verify_command,
310
+ cwd=str(task_dir),
311
+ env=env,
312
+ capture_output=True,
313
+ text=True,
314
+ check=False,
315
+ )
316
+ write_text(run_dir / "verify.command.txt", " ".join(shlex.quote(part) for part in verify_command) + "\n")
317
+ write_text(run_dir / "verify.stdout.log", result.stdout)
318
+ write_text(run_dir / "verify.stderr.log", result.stderr)
319
+ payload = {
320
+ "returncode": result.returncode,
321
+ "command": verify_command,
322
+ "cwd": str(task_dir),
323
+ "passed": result.returncode == 0,
324
+ }
325
+ write_json(run_dir / "verify.json", payload)
326
+ return payload
327
+
328
+
329
+ def implementation_plan(implementation: str, loop_command: list[str], loop_prompt_template: str, ralphify_command: list[str] | None) -> list[tuple[str, list[str], str]]:
330
+ if implementation == "pi-ralph-loop":
331
+ return [("pi-ralph-loop", loop_command, loop_prompt_template)]
332
+ if implementation == "ralphify":
333
+ if not ralphify_command:
334
+ raise SystemExit("--implementation ralphify requires PI_RALPH_PARITY_RALPHIFY_RPC_COMMAND or --ralphify-rpc-command")
335
+ return [("ralphify", ralphify_command, DEFAULT_RALPHIFY_PROMPT_TEMPLATE)]
336
+ if implementation == "both":
337
+ if not ralphify_command:
338
+ raise SystemExit("--implementation both requires a Ralphify RPC command")
339
+ return [
340
+ ("pi-ralph-loop", loop_command, loop_prompt_template),
341
+ ("ralphify", ralphify_command, DEFAULT_RALPHIFY_PROMPT_TEMPLATE),
342
+ ]
343
+ raise SystemExit(f"Unknown implementation: {implementation}")
344
+
345
+
346
+ def run_fixture(bundle_root: Path, fixture_name: str, implementation: str, rpc_command: list[str], prompt_template: str, env: dict[str, str], run_timeout_seconds: int, quiet_kill_seconds: float) -> dict[str, Any]:
347
+ fixture_dir = FIXTURES_ROOT / fixture_name
348
+ if not fixture_dir.exists():
349
+ raise SystemExit(f"Missing fixture directory: {fixture_dir}")
350
+
351
+ run_dir = bundle_root / "runs" / fixture_name / implementation
352
+ task_dir = run_dir / "task"
353
+ if run_dir.exists():
354
+ shutil.rmtree(run_dir)
355
+ run_dir.mkdir(parents=True, exist_ok=True)
356
+
357
+ shutil.copytree(fixture_dir, task_dir)
358
+
359
+ before_rows = inventory(task_dir)
360
+ write_inventory_tsv(run_dir / "inventory-before.tsv", before_rows)
361
+ write_json(run_dir / "fixture-manifest.json", {
362
+ "fixture": fixture_name,
363
+ "implementation": implementation,
364
+ "fixture_dir": str(fixture_dir),
365
+ "task_dir": str(task_dir),
366
+ "files": before_rows,
367
+ })
368
+
369
+ ralph_path = task_dir / "RALPH.md"
370
+ prompt = task_prompt(prompt_template, ralph_path)
371
+ write_text(run_dir / "command.txt", " ".join(shlex.quote(part) for part in rpc_command) + "\n")
372
+ write_text(run_dir / "prompt.txt", prompt + "\n")
373
+
374
+ started_at = utc_now()
375
+ session_result = run_rpc_session(
376
+ rpc_command,
377
+ prompt,
378
+ task_dir,
379
+ env,
380
+ run_dir / "top-level-rpc.jsonl",
381
+ run_dir / "top-level-stderr.log",
382
+ task_dir,
383
+ run_timeout_seconds,
384
+ quiet_kill_seconds,
385
+ )
386
+
387
+ after_rows = inventory(task_dir)
388
+ write_inventory_tsv(run_dir / "inventory-after.tsv", after_rows)
389
+ verifier_result = run_verifier(task_dir, env, run_dir)
390
+
391
+ metadata = {
392
+ "fixture": fixture_name,
393
+ "implementation": implementation,
394
+ "command": rpc_command,
395
+ "prompt": prompt,
396
+ "task_dir": str(task_dir),
397
+ "started_at": started_at,
398
+ "finished_at": utc_now(),
399
+ "termination_reason": session_result["termination_reason"],
400
+ "session": {
401
+ "returncode": session_result["returncode"],
402
+ "status": session_result["status"],
403
+ "termination_reason": session_result["termination_reason"],
404
+ "cleanup_errors": session_result["cleanup_errors"],
405
+ "status_errors": session_result["status_errors"],
406
+ "stdout_lines": len(session_result["stdout_lines"]),
407
+ "stderr_lines": len(session_result["stderr_lines"]),
408
+ },
409
+ "verifier": verifier_result,
410
+ "inventory": {
411
+ "before_count": len(before_rows),
412
+ "after_count": len(after_rows),
413
+ },
414
+ }
415
+ write_json(run_dir / "run-metadata.json", metadata)
416
+
417
+ return metadata
418
+
419
+
420
+ def main() -> int:
421
+ parser = argparse.ArgumentParser(description="Replay parity fixtures and capture provenance bundles.")
422
+ parser.add_argument(
423
+ "--fixture",
424
+ action="append",
425
+ choices=("research", "migrate"),
426
+ help="Fixture to run. May be provided more than once. Default: both fixtures.",
427
+ )
428
+ parser.add_argument(
429
+ "--implementation",
430
+ choices=("pi-ralph-loop", "ralphify", "both"),
431
+ default="pi-ralph-loop",
432
+ help="Which implementation to run.",
433
+ )
434
+ parser.add_argument(
435
+ "--root",
436
+ help="Reuse this artifact root instead of creating a fresh temp dir.",
437
+ )
438
+ parser.add_argument(
439
+ "--model",
440
+ default=DEFAULT_MODEL,
441
+ help="Model to pin for the built-in pi-ralph-loop command. If omitted, pi uses the currently active model.",
442
+ )
443
+ parser.add_argument(
444
+ "--loop-rpc-command",
445
+ default=DEFAULT_LOOP_RPC_COMMAND,
446
+ help="Full RPC command override for pi-ralph-loop. When set, this replaces the built-in pi command entirely.",
447
+ )
448
+ parser.add_argument(
449
+ "--ralphify-rpc-command",
450
+ default=DEFAULT_RALPHIFY_RPC_COMMAND,
451
+ help="RPC command used for Ralphify.",
452
+ )
453
+ parser.add_argument(
454
+ "--loop-prompt-template",
455
+ default=DEFAULT_LOOP_PROMPT_TEMPLATE,
456
+ help="Prompt template used for pi-ralph-loop.",
457
+ )
458
+ parser.add_argument(
459
+ "--ralphify-prompt-template",
460
+ default=DEFAULT_RALPHIFY_PROMPT_TEMPLATE,
461
+ help="Prompt template used for Ralphify.",
462
+ )
463
+ parser.add_argument(
464
+ "--run-timeout-seconds",
465
+ type=int,
466
+ default=900,
467
+ help="Maximum wall-clock time per run.",
468
+ )
469
+ parser.add_argument(
470
+ "--quiet-kill-seconds",
471
+ type=float,
472
+ default=3.0,
473
+ help="How long to wait with no output before a non-status-aware process is considered idle. Processes that report initializing/running status are allowed to stay silent.",
474
+ )
475
+ args = parser.parse_args()
476
+
477
+ fixtures = args.fixture if args.fixture else ["research", "migrate"]
478
+ bundle_root = create_bundle_root(args.root or os.environ.get("PI_RALPH_PARITY_ROOT"))
479
+ agent_info = ensure_agent_dir(bundle_root)
480
+
481
+ env = os.environ.copy()
482
+ env["PI_CODING_AGENT_DIR"] = agent_info["destination"]
483
+
484
+ loop_command = (
485
+ parse_command(args.loop_rpc_command)
486
+ if args.loop_rpc_command
487
+ else build_loop_rpc_command(args.model)
488
+ )
489
+ ralphify_command = parse_command(args.ralphify_rpc_command) if args.ralphify_rpc_command else None
490
+
491
+ runs: list[dict[str, Any]] = []
492
+ for fixture_name in fixtures:
493
+ for impl_name, rpc_command, prompt_template in implementation_plan(args.implementation, loop_command, args.loop_prompt_template, ralphify_command):
494
+ metadata = run_fixture(
495
+ bundle_root,
496
+ fixture_name,
497
+ impl_name,
498
+ rpc_command,
499
+ prompt_template if impl_name == "pi-ralph-loop" else args.ralphify_prompt_template,
500
+ env,
501
+ args.run_timeout_seconds,
502
+ args.quiet_kill_seconds,
503
+ )
504
+ runs.append(metadata)
505
+
506
+ manifest = {
507
+ "created_at": utc_now(),
508
+ "repo_root": str(REPO_ROOT),
509
+ "repo_head": run_git(["rev-parse", "HEAD"]),
510
+ "repo_status": run_git(["status", "--short"]),
511
+ "bundle_root": str(bundle_root),
512
+ "agent": agent_info,
513
+ "fixtures": fixtures,
514
+ "implementation": args.implementation,
515
+ "loop_rpc_command": loop_command,
516
+ "ralphify_rpc_command": ralphify_command,
517
+ "runs": runs,
518
+ }
519
+ write_json(bundle_root / "manifest.json", manifest)
520
+
521
+ print(bundle_root)
522
+ return 0
523
+
524
+
525
+ if __name__ == "__main__":
526
+ raise SystemExit(main())
@@ -0,0 +1,42 @@
1
+ import assert from "node:assert/strict";
2
+ import { execFile } from "node:child_process";
3
+ import { fileURLToPath } from "node:url";
4
+ import { promisify } from "node:util";
5
+ import test from "node:test";
6
+
7
+ const execFileAsync = promisify(execFile);
8
+ const repoRoot = fileURLToPath(new URL("..", import.meta.url));
9
+
10
+ function cleanEnv(): NodeJS.ProcessEnv {
11
+ const env = { ...process.env };
12
+ delete env.PI_RALPH_PARITY_MODEL;
13
+ delete env.PI_RALPH_PARITY_LOOP_RPC_COMMAND;
14
+ return env;
15
+ }
16
+
17
+ test("parity harness builds a pi command without pinning a model by default", async () => {
18
+ const { stdout } = await execFileAsync(
19
+ "python3",
20
+ [
21
+ "-c",
22
+ "from tests.parity.harness import build_loop_rpc_command; print(' '.join(build_loop_rpc_command(None)))",
23
+ ],
24
+ { cwd: repoRoot, env: cleanEnv() },
25
+ );
26
+
27
+ assert.match(stdout.trim(), /^pi --mode rpc --no-extensions -e .*\/src\/index\.ts$/);
28
+ assert.equal(stdout.includes("--model"), false);
29
+ });
30
+
31
+ test("parity harness appends an explicit model when requested", async () => {
32
+ const { stdout } = await execFileAsync(
33
+ "python3",
34
+ [
35
+ "-c",
36
+ "from tests.parity.harness import build_loop_rpc_command; print(' '.join(build_loop_rpc_command('openai-codex/gpt-5.4-mini:high')))"
37
+ ],
38
+ { cwd: repoRoot, env: cleanEnv() },
39
+ );
40
+
41
+ assert.match(stdout.trim(), /^pi --mode rpc --no-extensions -e .*\/src\/index\.ts --model openai-codex\/gpt-5\.4-mini:high$/);
42
+ });
@@ -0,0 +1,34 @@
1
+ import assert from "node:assert/strict";
2
+ import { existsSync, readFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import test from "node:test";
5
+
6
+ const fixtureDir = join(process.cwd(), "tests/fixtures/parity/research");
7
+
8
+ const generatedOutputs = [
9
+ "INSTALL_FLOW.md",
10
+ "MISSIONS_FINDINGS.md",
11
+ "evidence/INDEX.md",
12
+ "evidence/raw/app-factory-ai-cli.md",
13
+ "evidence/raw/docs-factory-ai-cli-features-missions.md",
14
+ "evidence/raw/factory-ai-news-missions.md",
15
+ ];
16
+
17
+ test("research fixture does not include generated outputs", () => {
18
+ for (const rel of generatedOutputs) {
19
+ assert.equal(existsSync(join(fixtureDir, rel)), false, `${rel} should not be checked in`);
20
+ }
21
+ });
22
+
23
+ test("research fixture instructions name the helper scripts explicitly", () => {
24
+ const ralph = readFileSync(join(fixtureDir, "RALPH.md"), "utf8");
25
+ assert.match(ralph, /First, inspect `\.\/scripts\/show-snapshots\.sh`\./);
26
+ assert.match(ralph, /Before you finish, run `\.\/scripts\/verify\.sh`\./);
27
+ });
28
+
29
+ test("research checklist leaves generated outputs as pending work", () => {
30
+ const checklist = readFileSync(join(fixtureDir, "claim-evidence-checklist.md"), "utf8");
31
+ assert.match(checklist, /- \[ \] `INSTALL_FLOW\.md` must synthesize the shared installer claim across all three snapshots\./);
32
+ assert.match(checklist, /- \[ \] `MISSIONS_FINDINGS\.md` must cite each snapshot path directly\./);
33
+ assert.match(checklist, /- \[ \] `evidence\/INDEX\.md` must map each raw evidence file back to its snapshot\./);
34
+ });