codd-dev 1.7.1__tar.gz → 1.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {codd_dev-1.7.1 → codd_dev-1.9.2}/PKG-INFO +17 -7
  2. {codd_dev-1.7.1 → codd_dev-1.9.2}/README.md +16 -6
  3. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/assembler.py +42 -46
  4. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/cli.py +28 -15
  5. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/fixer.py +137 -16
  6. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/generator.py +94 -1
  7. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/implementer.py +529 -217
  8. {codd_dev-1.7.1 → codd_dev-1.9.2}/pyproject.toml +1 -1
  9. {codd_dev-1.7.1 → codd_dev-1.9.2}/.gitignore +0 -0
  10. {codd_dev-1.7.1 → codd_dev-1.9.2}/LICENSE +0 -0
  11. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/__init__.py +0 -0
  12. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/bridge.py +0 -0
  13. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/clustering.py +0 -0
  14. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/config.py +0 -0
  15. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/contracts.py +0 -0
  16. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/defaults.yaml +0 -0
  17. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/e2e_runner.py +0 -0
  18. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/env_refs.py +0 -0
  19. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/extract_ai.py +0 -0
  20. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/extractor.py +0 -0
  21. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/graph.py +0 -0
  22. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/hooks/__init__.py +0 -0
  23. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/hooks/pre-commit +0 -0
  24. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/inheritance.py +0 -0
  25. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/mcp_server.py +0 -0
  26. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/measure.py +0 -0
  27. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/parsing.py +0 -0
  28. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/planner.py +0 -0
  29. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/policy.py +0 -0
  30. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/propagate.py +0 -0
  31. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/propagator.py +0 -0
  32. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/repair_slice.py +0 -0
  33. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/require.py +0 -0
  34. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/require_plugins.py +0 -0
  35. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/restore.py +0 -0
  36. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/scanner.py +0 -0
  37. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/schema_refs.py +0 -0
  38. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/synth.py +0 -0
  39. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/codd.yaml.tmpl +0 -0
  40. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/conventions.yaml.tmpl +0 -0
  41. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/data_dependencies.yaml.tmpl +0 -0
  42. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/doc_links.yaml.tmpl +0 -0
  43. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/extracted/api-contract.md.j2 +0 -0
  44. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/extracted/architecture-overview.md.j2 +0 -0
  45. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/extracted/module-detail.md.j2 +0 -0
  46. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/extracted/schema-design.md.j2 +0 -0
  47. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/extracted/system-context.md.j2 +0 -0
  48. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/gitignore.tmpl +0 -0
  49. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/templates/overrides.yaml.tmpl +0 -0
  50. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/traceability.py +0 -0
  51. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/validator.py +0 -0
  52. {codd_dev-1.7.1 → codd_dev-1.9.2}/codd/wiring.py +0 -0
  53. {codd_dev-1.7.1 → codd_dev-1.9.2}/docs/requirements/README.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codd-dev
3
- Version: 1.7.1
3
+ Version: 1.9.2
4
4
  Summary: CoDD: Coherence-Driven Development — cross-artifact change impact analysis
5
5
  Project-URL: Homepage, https://github.com/yohey-w/codd-dev
6
6
  Project-URL: Repository, https://github.com/yohey-w/codd-dev
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
60
60
  pip install codd-dev
61
61
  ```
62
62
 
63
- **v1.7.0** — `init` / `scan` / `impact` are stable. `propagate` traces code changes to downstream design docs and doc-to-doc changes via CEG graph. `extract --ai` with baseline preset. Custom `node_id` prefixes via `codd.yaml`. GitHub Action for CI integration.
63
+ **v1.9.0** — `codd implement` now supports **multi-AI engine** (Claude stdout + Codex file-writing) and **automatic parallel execution** within phases via git worktree isolation. Phase milestone format (`#### M1.1`) supported. AI command timeout extended to 1 hour for heavy reasoning models. SWE-bench Verified: **73/73 = 100%** resolved.
64
64
 
65
65
  ---
66
66
 
@@ -132,10 +132,7 @@ done
132
132
  codd validate
133
133
 
134
134
  # Generate code from design docs
135
- sprints=$(codd plan --sprints)
136
- for sprint in $(seq 1 $sprints); do
137
- codd implement --sprint $sprint
138
- done
135
+ codd implement
139
136
 
140
137
  # Assemble code fragments into a buildable project
141
138
  codd assemble
@@ -421,6 +418,19 @@ codd impact
421
418
  | `codd policy` | **Alpha** | Enterprise policy checker (forbidden/required patterns in source code) |
422
419
  | `codd measure` | **Alpha** | Project health metrics (graph, coverage, quality, health score 0-100) |
423
420
  | `codd mcp-server` | **Alpha** | MCP server for AI tool integration (stdio, zero dependencies) |
421
+ | `codd fix` | **Alpha** | Auto-fix test/build failures with diagnostic reasoning and session state |
422
+
423
+ ## SWE-bench Verified
424
+
425
+ CoDD's `fix` command with diagnostic reasoning achieves **73/73 = 100%** on a curated subset of [SWE-bench Verified](https://www.swebench.com/verified.html). The diagnostic step forces root cause analysis before patching, and session state prevents repeating failed approaches across retries.
426
+
427
+ | Metric | Result |
428
+ |--------|--------|
429
+ | Instances | 73 (curated from SWE-bench Verified) |
430
+ | Resolved | **73 (100%)** |
431
+ | Key feature | Diagnostic reasoning + session state persistence |
432
+
433
+ Details: [Zenn: CoDD SWE-bench Guide](https://zenn.dev/shio_shoppaize/articles/codd-swebench-pilot?locale=en)
424
434
 
425
435
  ## OSS / Pro Split
426
436
 
@@ -428,7 +438,7 @@ CoDD v1.6.0 introduced a clean OSS/Pro boundary via a bridge pattern.
428
438
 
429
439
  **OSS (MIT, free)** — everything you need to keep docs coherent:
430
440
 
431
- `init` · `scan` · `impact` · `generate` · `restore` · `propagate` · `extract` · `require` · `plan` · `validate` · `measure` · `policy` · `mcp-server`
441
+ `init` · `scan` · `impact` · `generate` · `restore` · `propagate` · `extract` · `require` · `plan` · `validate` · `measure` · `policy` · `fix` · `mcp-server`
432
442
 
433
443
  **Pro (private, paid)** — enterprise review and verification:
434
444
 
@@ -22,7 +22,7 @@
22
22
  pip install codd-dev
23
23
  ```
24
24
 
25
- **v1.7.0** — `init` / `scan` / `impact` are stable. `propagate` traces code changes to downstream design docs and doc-to-doc changes via CEG graph. `extract --ai` with baseline preset. Custom `node_id` prefixes via `codd.yaml`. GitHub Action for CI integration.
25
+ **v1.9.0** — `codd implement` now supports **multi-AI engine** (Claude stdout + Codex file-writing) and **automatic parallel execution** within phases via git worktree isolation. Phase milestone format (`#### M1.1`) supported. AI command timeout extended to 1 hour for heavy reasoning models. SWE-bench Verified: **73/73 = 100%** resolved.
26
26
 
27
27
  ---
28
28
 
@@ -94,10 +94,7 @@ done
94
94
  codd validate
95
95
 
96
96
  # Generate code from design docs
97
- sprints=$(codd plan --sprints)
98
- for sprint in $(seq 1 $sprints); do
99
- codd implement --sprint $sprint
100
- done
97
+ codd implement
101
98
 
102
99
  # Assemble code fragments into a buildable project
103
100
  codd assemble
@@ -383,6 +380,19 @@ codd impact
383
380
  | `codd policy` | **Alpha** | Enterprise policy checker (forbidden/required patterns in source code) |
384
381
  | `codd measure` | **Alpha** | Project health metrics (graph, coverage, quality, health score 0-100) |
385
382
  | `codd mcp-server` | **Alpha** | MCP server for AI tool integration (stdio, zero dependencies) |
383
+ | `codd fix` | **Alpha** | Auto-fix test/build failures with diagnostic reasoning and session state |
384
+
385
+ ## SWE-bench Verified
386
+
387
+ CoDD's `fix` command with diagnostic reasoning achieves **73/73 = 100%** on a curated subset of [SWE-bench Verified](https://www.swebench.com/verified.html). The diagnostic step forces root cause analysis before patching, and session state prevents repeating failed approaches across retries.
388
+
389
+ | Metric | Result |
390
+ |--------|--------|
391
+ | Instances | 73 (curated from SWE-bench Verified) |
392
+ | Resolved | **73 (100%)** |
393
+ | Key feature | Diagnostic reasoning + session state persistence |
394
+
395
+ Details: [Zenn: CoDD SWE-bench Guide](https://zenn.dev/shio_shoppaize/articles/codd-swebench-pilot?locale=en)
386
396
 
387
397
  ## OSS / Pro Split
388
398
 
@@ -390,7 +400,7 @@ CoDD v1.6.0 introduced a clean OSS/Pro boundary via a bridge pattern.
390
400
 
391
401
  **OSS (MIT, free)** — everything you need to keep docs coherent:
392
402
 
393
- `init` · `scan` · `impact` · `generate` · `restore` · `propagate` · `extract` · `require` · `plan` · `validate` · `measure` · `policy` · `mcp-server`
403
+ `init` · `scan` · `impact` · `generate` · `restore` · `propagate` · `extract` · `require` · `plan` · `validate` · `measure` · `policy` · `fix` · `mcp-server`
394
404
 
395
405
  **Pro (private, paid)** — enterprise review and verification:
396
406
 
@@ -1,4 +1,4 @@
1
- """CoDD assembler — integrate generated sprint fragments into a working project."""
1
+ """CoDD assembler — integrate generated fragments into a working project."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -10,7 +10,7 @@ import warnings
10
10
 
11
11
  import codd.generator as generator_module
12
12
  from codd.generator import _load_project_config, _normalize_conventions
13
- from codd.implementer import get_task_slugs_by_sprint
13
+ from codd.implementer import get_valid_task_slugs
14
14
  from codd.scanner import _extract_frontmatter, build_document_node_path_map
15
15
 
16
16
 
@@ -51,7 +51,9 @@ def assemble_project(
51
51
  prompt = _build_assemble_prompt(config, design_docs, fragments, dest)
52
52
 
53
53
  # Invoke AI
54
- raw_output = generator_module._invoke_ai_command(resolved_ai_command, prompt)
54
+ raw_output = generator_module._invoke_ai_command(
55
+ resolved_ai_command, prompt, project_root=project_root,
56
+ )
55
57
 
56
58
  # Parse and write files
57
59
  files_written = _write_assembled_files(project_root, dest_path, raw_output)
@@ -71,7 +73,6 @@ def _collect_design_documents(project_root: Path, config: dict[str, Any]) -> lis
71
73
  full_path = project_root / rel_path
72
74
  if full_path.exists():
73
75
  content = full_path.read_text(encoding="utf-8")
74
- # Strip frontmatter for the prompt
75
76
  stripped = _strip_frontmatter(content)
76
77
  docs.append({
77
78
  "node_id": node_id,
@@ -82,10 +83,10 @@ def _collect_design_documents(project_root: Path, config: dict[str, Any]) -> lis
82
83
 
83
84
 
84
85
  def _collect_generated_fragments(project_root: Path, config: dict[str, Any]) -> list[dict[str, str]]:
85
- """Collect all generated code fragments from src/generated/sprint_N/.
86
+ """Collect all generated code fragments from src/generated/.
86
87
 
87
- Cross-references against the implementation plan to detect orphan fragments
88
- from renamed or deleted tasks. Orphans are excluded with a warning.
88
+ Supports both flat layout (src/generated/<task>/) and legacy sprint layout
89
+ (src/generated/sprint_N/<task>/). Orphan directories are excluded with a warning.
89
90
  """
90
91
  source_dirs = config.get("scan", {}).get("source_dirs", ["src/"])
91
92
  generated_base = None
@@ -101,44 +102,39 @@ def _collect_generated_fragments(project_root: Path, config: dict[str, Any]) ->
101
102
  if not generated_base.is_dir():
102
103
  return []
103
104
 
104
- # Load valid task slugs from implementation plan for orphan detection
105
- valid_slugs = get_task_slugs_by_sprint(project_root)
105
+ valid_slugs = get_valid_task_slugs(project_root)
106
106
 
107
107
  code_extensions = (".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".java", ".css")
108
108
  fragments = []
109
- for sprint_dir in sorted(generated_base.iterdir()):
110
- if not sprint_dir.is_dir() or not sprint_dir.name.startswith("sprint_"):
109
+
110
+ orphan_dirs: set[str] = set()
111
+ if valid_slugs:
112
+ for child in generated_base.iterdir():
113
+ if child.is_dir() and not child.name.startswith("sprint_") and child.name not in valid_slugs:
114
+ orphan_dirs.add(child.name)
115
+ warnings.warn(
116
+ f"Orphan fragment directory 'generated/{child.name}' "
117
+ f"does not match any task in the implementation plan. Skipping.",
118
+ stacklevel=2,
119
+ )
120
+
121
+ for code_file in sorted(generated_base.rglob("*")):
122
+ if not code_file.is_file() or code_file.suffix not in code_extensions:
111
123
  continue
112
124
 
113
- # Identify orphan task directories
114
- orphan_dirs: set[str] = set()
115
- if valid_slugs and sprint_dir.name in valid_slugs:
116
- expected = valid_slugs[sprint_dir.name]
117
- for child in sprint_dir.iterdir():
118
- if child.is_dir() and child.name not in expected:
119
- orphan_dirs.add(child.name)
120
- warnings.warn(
121
- f"Orphan fragment directory '{sprint_dir.name}/{child.name}' "
122
- f"does not match any task in the implementation plan. Skipping.",
123
- stacklevel=2,
124
- )
125
-
126
- for code_file in sorted(sprint_dir.rglob("*")):
127
- if not code_file.is_file() or code_file.suffix not in code_extensions:
128
- continue
129
-
130
- # Skip files under orphan task directories
131
- rel_to_sprint = code_file.relative_to(sprint_dir)
132
- if rel_to_sprint.parts and rel_to_sprint.parts[0] in orphan_dirs:
133
- continue
134
-
135
- rel_path = code_file.relative_to(project_root)
136
- content = code_file.read_text(encoding="utf-8")
137
- fragments.append({
138
- "sprint_dir": sprint_dir.name,
139
- "path": str(rel_path),
140
- "content": content,
141
- })
125
+ rel_to_generated = code_file.relative_to(generated_base)
126
+ if rel_to_generated.parts and rel_to_generated.parts[0] in orphan_dirs:
127
+ continue
128
+
129
+ rel_path = code_file.relative_to(project_root)
130
+ content = code_file.read_text(encoding="utf-8")
131
+
132
+ task_group = rel_to_generated.parts[0] if rel_to_generated.parts else "unknown"
133
+ fragments.append({
134
+ "task_group": task_group,
135
+ "path": str(rel_path),
136
+ "content": content,
137
+ })
142
138
 
143
139
  return fragments
144
140
 
@@ -172,13 +168,13 @@ def _build_assemble_prompt(
172
168
  ## Instructions
173
169
 
174
170
  1. Read the design documents below to understand the architecture, component tree, data model, and state management.
175
- 2. Read all generated code fragments — they contain implementation pieces organized by sprint.
171
+ 2. Read all generated code fragments — they contain implementation pieces organized by task.
176
172
  3. Produce a COMPLETE, BUILDABLE project. This includes:
177
173
  - **Project configuration files** at the project root: package.json, tsconfig.json, next.config.*, tailwind.config.*, postcss.config.*, etc. — whatever the tech stack requires to build and run.
178
174
  - **Entry point / scaffold files**: app/layout.tsx, app/page.tsx (for Next.js), index.html, main.py, etc. — the files that wire the application together.
179
175
  - **Source code** under `{output_dir}/`: components, utilities, types, styles, hooks, reducers.
180
176
  - **Style entry points**: globals.css or equivalent with framework imports (e.g. @import "tailwindcss").
181
- 4. Resolve conflicts between sprint fragments: later sprints may refine or replace earlier ones.
177
+ 4. Resolve conflicts between fragments: later tasks may refine or replace earlier ones.
182
178
  5. Ensure all imports resolve correctly between files.
183
179
  6. Do NOT add features beyond what the design documents specify.
184
180
  7. Preserve traceability comments (@generated-by, @generated-from) where practical.
@@ -211,11 +207,11 @@ Do not include explanations outside of the === FILE blocks.
211
207
 
212
208
  # Add generated fragments
213
209
  parts.append("## Generated Code Fragments\n")
214
- current_sprint = None
210
+ current_group = None
215
211
  for frag in fragments:
216
- if frag["sprint_dir"] != current_sprint:
217
- current_sprint = frag["sprint_dir"]
218
- parts.append(f"\n### {current_sprint}\n")
212
+ if frag["task_group"] != current_group:
213
+ current_group = frag["task_group"]
214
+ parts.append(f"\n### {current_group}\n")
219
215
  parts.append(f"#### {frag['path']}\n```\n{frag['content']}\n```\n")
220
216
 
221
217
  return "\n".join(parts)
@@ -491,39 +491,52 @@ def propagate(diff: str, path: str, update: bool, verify: bool, do_commit: bool,
491
491
 
492
492
 
493
493
  @main.command()
494
- @click.option("--sprint", required=True, type=click.IntRange(min=1), help="Sprint number to implement")
495
494
  @click.option("--path", default=".", help="Project root directory")
496
495
  @click.option("--task", default=None, help="Generate only one task by task ID or title match")
497
- @click.option("--clean", is_flag=True, default=False, help="Remove existing sprint output before re-generating")
496
+ @click.option("--clean", is_flag=True, default=False, help="Remove existing generated output before re-generating")
498
497
  @click.option(
499
498
  "--ai-cmd",
500
499
  default=None,
501
500
  help="Override AI CLI command (defaults to codd.yaml ai_command or merged CoDD defaults)",
502
501
  )
503
- def implement(sprint: int, path: str, task: str | None, clean: bool, ai_cmd: str | None):
504
- """Generate implementation code for a specific sprint."""
505
- from codd.implementer import implement_sprint
502
+ def implement(path: str, task: str | None, clean: bool, ai_cmd: str | None):
503
+ """Generate implementation code from the implementation plan."""
504
+ from codd.implementer import implement_tasks
506
505
 
507
506
  project_root = Path(path).resolve()
508
507
  codd_dir = _require_codd_dir(project_root)
509
508
 
510
509
  if clean:
511
- click.echo(f"Cleaning src/generated/sprint_{sprint}/ ...")
510
+ click.echo("Cleaning src/generated/ ...")
512
511
 
513
512
  try:
514
- results = implement_sprint(project_root, sprint, task=task, ai_command=ai_cmd, clean=clean)
513
+ results = implement_tasks(project_root, task=task, ai_command=ai_cmd, clean=clean)
515
514
  except (FileNotFoundError, ValueError) as exc:
516
515
  click.echo(f"Error: {exc}")
517
516
  raise SystemExit(1)
518
517
 
519
518
  generated_files = 0
519
+ failed_tasks = []
520
520
  for result in results:
521
+ if result.error:
522
+ failed_tasks.append(result)
523
+ continue
521
524
  for generated_file in result.generated_files:
522
525
  rel_path = generated_file.relative_to(project_root)
523
526
  click.echo(f"Generated: {rel_path} ({result.task_id})")
524
527
  generated_files += 1
525
528
 
526
- click.echo(f"Sprint {sprint}: {generated_files} files generated across {len(results)} task(s)")
529
+ succeeded = len(results) - len(failed_tasks)
530
+ click.echo(f"{generated_files} files generated across {succeeded} task(s)")
531
+
532
+ if failed_tasks:
533
+ click.echo(click.style(
534
+ f"\nFAILED: {len(failed_tasks)} task(s) produced no files:",
535
+ fg="red", bold=True,
536
+ ))
537
+ for ft in failed_tasks:
538
+ click.echo(click.style(f" ✗ {ft.task_id} ({ft.task_title}): {ft.error}", fg="red"))
539
+ raise SystemExit(1)
527
540
 
528
541
 
529
542
  @main.command()
@@ -552,7 +565,7 @@ def assemble(path: str, output_dir: str | None, ai_cmd: str | None):
552
565
 
553
566
  @main.command()
554
567
  @click.option("--path", default=".", help="Project root directory")
555
- @click.option("--sprint", default=None, type=click.IntRange(min=1), help="Sprint number to verify")
568
+ @click.option("--sprint", default=None, type=click.IntRange(min=1), help="(deprecated, ignored) Sprint number", hidden=True)
556
569
  @click.option("--e2e", is_flag=True, default=False, help="Run E2E tests (CI-safe, excludes @cdp-only)")
557
570
  @click.option("--deploy", is_flag=True, default=False, help="Run deploy/CDP-only E2E tests against deployed URL")
558
571
  @click.option("--base-url", default=None, help="Override BASE_URL for E2E tests")
@@ -902,13 +915,13 @@ def policy(path: str):
902
915
  @click.option("--init", "initialize", is_flag=True, help="Generate wave_config from requirement docs")
903
916
  @click.option("--force", is_flag=True, help="Overwrite existing wave_config during --init")
904
917
  @click.option("--waves", is_flag=True, help="Output only the total wave count (for shell scripting)")
905
- @click.option("--sprints", is_flag=True, help="Output only the total sprint count (for shell scripting)")
918
+ @click.option("--tasks", is_flag=True, help="Output only the total task count (for shell scripting)")
906
919
  @click.option(
907
920
  "--ai-cmd",
908
921
  default=None,
909
922
  help="Override AI CLI command for --init (defaults to codd.yaml ai_command or 'claude --print')",
910
923
  )
911
- def plan(path: str, as_json: bool, initialize: bool, force: bool, waves: bool, sprints: bool, ai_cmd: str | None):
924
+ def plan(path: str, as_json: bool, initialize: bool, force: bool, waves: bool, tasks: bool, ai_cmd: str | None):
912
925
  """Show wave execution status from configured artifacts."""
913
926
  from codd.planner import build_plan, plan_init, plan_to_dict, render_plan_text
914
927
 
@@ -946,7 +959,7 @@ def plan(path: str, as_json: bool, initialize: bool, force: bool, waves: bool, s
946
959
 
947
960
  if force:
948
961
  raise click.BadOptionUsage("force", "--force requires --init")
949
- if ai_cmd is not None and not waves and not sprints:
962
+ if ai_cmd is not None and not waves and not tasks:
950
963
  raise click.BadOptionUsage("ai_cmd", "--ai-cmd requires --init")
951
964
 
952
965
  if waves:
@@ -956,9 +969,9 @@ def plan(path: str, as_json: bool, initialize: bool, force: bool, waves: bool, s
956
969
  click.echo(len(wave_config))
957
970
  return
958
971
 
959
- if sprints:
960
- from codd.implementer import count_sprints
961
- click.echo(count_sprints(project_root))
972
+ if tasks:
973
+ from codd.implementer import get_valid_task_slugs
974
+ click.echo(len(get_valid_task_slugs(project_root)))
962
975
  return
963
976
 
964
977
  try:
@@ -38,6 +38,7 @@ class FixAttempt:
38
38
  failures: list[FailureInfo]
39
39
  fixed: bool
40
40
  ai_output: str = ""
41
+ diagnosis: str = "" # root cause diagnosis from this attempt
41
42
 
42
43
 
43
44
  @dataclass
@@ -110,27 +111,35 @@ def run_fix(
110
111
  fixed=False,
111
112
  )
112
113
 
113
- # Step 2: Fix loop
114
+ # Step 2: Fix loop with diagnostic reasoning and session state
114
115
  attempts: list[FixAttempt] = []
116
+ session_state = _SessionState()
117
+
115
118
  for attempt_num in range(1, max_attempts + 1):
116
119
  # Map failures to design context
117
120
  context = _build_fix_context(project_root, config, failures)
118
121
 
119
- # Build prompt and invoke AI (fix mode: returns fixed source, writes to files)
120
- prompt = _build_fix_prompt(project_root, failures, context, config)
122
+ # Build prompt with diagnosis step and session state from prior attempts
123
+ prompt = _build_fix_prompt(
124
+ project_root, failures, context, config,
125
+ session_state=session_state,
126
+ )
121
127
  ai_output = _invoke_fix_ai(resolved_ai, prompt, project_root)
122
128
 
129
+ # Extract diagnosis from AI output for session state
130
+ diagnosis = _extract_diagnosis(ai_output)
131
+
123
132
  # Re-run tests to verify
124
133
  new_failures = _run_local_tests(project_root, config)
125
134
 
126
135
  if new_failures is None:
127
- # Tests could not run — mark as unverified, not fixed
128
136
  logger.warning("Local tests could not run. Fix is unverified.")
129
137
  attempts.append(FixAttempt(
130
138
  attempt=attempt_num,
131
139
  failures=failures,
132
140
  fixed=False,
133
141
  ai_output=ai_output,
142
+ diagnosis=diagnosis,
134
143
  ))
135
144
  break
136
145
 
@@ -141,11 +150,21 @@ def run_fix(
141
150
  failures=failures,
142
151
  fixed=fixed,
143
152
  ai_output=ai_output,
153
+ diagnosis=diagnosis,
144
154
  ))
145
155
 
146
156
  if fixed:
147
157
  break
148
158
 
159
+ # Accumulate session state for next retry
160
+ session_state.record_attempt(
161
+ attempt=attempt_num,
162
+ diagnosis=diagnosis,
163
+ failures=failures,
164
+ new_failures=new_failures,
165
+ ai_output=ai_output,
166
+ )
167
+
149
168
  # Next iteration uses new failures
150
169
  failures = new_failures
151
170
 
@@ -168,6 +187,84 @@ def run_fix(
168
187
  )
169
188
 
170
189
 
190
+ # ---------------------------------------------------------------------------
191
+ # Session state for cross-retry diagnostic context
192
+ # ---------------------------------------------------------------------------
193
+
194
+
195
+ class _SessionState:
196
+ """Accumulates diagnostic context across retry attempts.
197
+
198
+ Inspired by SWE-bench diagnose experiment (73/73 = 100%):
199
+ passing prior attempt history — what was tried, what failed, and why —
200
+ dramatically reduces wasted retries.
201
+ """
202
+
203
+ def __init__(self) -> None:
204
+ self.prior_attempts: list[dict[str, str]] = []
205
+
206
+ def record_attempt(
207
+ self,
208
+ attempt: int,
209
+ diagnosis: str,
210
+ failures: list[FailureInfo],
211
+ new_failures: list[FailureInfo],
212
+ ai_output: str,
213
+ ) -> None:
214
+ summary = {
215
+ "attempt": str(attempt),
216
+ "diagnosis": diagnosis[:500],
217
+ "original_errors": "; ".join(f.summary for f in failures)[:300],
218
+ "result_after_fix": (
219
+ "all tests passed" if not new_failures
220
+ else "; ".join(f.summary for f in new_failures)[:300]
221
+ ),
222
+ "approach_summary": _summarize_approach(ai_output)[:500],
223
+ }
224
+ self.prior_attempts.append(summary)
225
+
226
+ def format_for_prompt(self) -> str:
227
+ if not self.prior_attempts:
228
+ return ""
229
+ lines = ["## Prior attempts (DO NOT repeat these — try a different approach)\n"]
230
+ for pa in self.prior_attempts:
231
+ lines.append(f"### Attempt {pa['attempt']}")
232
+ lines.append(f"- Diagnosis: {pa['diagnosis']}")
233
+ lines.append(f"- Approach: {pa['approach_summary']}")
234
+ lines.append(f"- Result: {pa['result_after_fix']}")
235
+ lines.append("")
236
+ return "\n".join(lines)
237
+
238
+
239
+ def _summarize_approach(ai_output: str) -> str:
240
+ """Extract a brief summary of what the AI changed from its output."""
241
+ # Look for explanation text after the last code block
242
+ parts = ai_output.rsplit("```", 1)
243
+ if len(parts) > 1:
244
+ explanation = parts[1].strip()
245
+ if explanation:
246
+ return explanation[:500]
247
+ # Fallback: first 200 chars
248
+ return ai_output[:200]
249
+
250
+
251
+ def _extract_diagnosis(ai_output: str) -> str:
252
+ """Extract the diagnosis section from AI output."""
253
+ # Look for ## Diagnosis or ## Root Cause sections
254
+ for marker in ("## Diagnosis", "## Root Cause", "**Diagnosis:**", "**Root Cause:**"):
255
+ idx = ai_output.find(marker)
256
+ if idx >= 0:
257
+ # Extract until next ## or code block
258
+ rest = ai_output[idx + len(marker):]
259
+ end = len(rest)
260
+ for stop in ("\n## ", "\n```"):
261
+ pos = rest.find(stop)
262
+ if pos >= 0 and pos < end:
263
+ end = pos
264
+ return rest[:end].strip()[:500]
265
+ return ""
266
+
267
+
171
268
  # ---------------------------------------------------------------------------
172
269
  # AI invocation for fix (source-in → fixed-source-out → write back)
173
270
  # ---------------------------------------------------------------------------
@@ -613,12 +710,14 @@ def _build_fix_prompt(
613
710
  failures: list[FailureInfo],
614
711
  design_context: str,
615
712
  config: dict[str, Any],
713
+ *,
714
+ session_state: _SessionState | None = None,
616
715
  ) -> str:
617
716
  """Build the prompt for AI to fix failures.
618
717
 
619
- The prompt includes: error logs, design docs, AND the current source
620
- of files mentioned in failures. The AI returns the complete fixed
621
- source for each file in fenced code blocks tagged with file paths.
718
+ The prompt includes: error logs, design docs, current source of files
719
+ mentioned in failures, and (on retries) session state from prior attempts.
720
+ Requires the AI to diagnose root cause BEFORE writing any fix.
622
721
  """
623
722
  project_name = config.get("project", {}).get("name", project_root.name)
624
723
  language = config.get("project", {}).get("language", "unknown")
@@ -635,9 +734,21 @@ def _build_fix_prompt(
635
734
  # Collect current source of files mentioned in failures
636
735
  source_section = _collect_source_files(project_root, failures)
637
736
 
737
+ # Session state from prior attempts (if retrying)
738
+ session_section = ""
739
+ if session_state and session_state.prior_attempts:
740
+ session_section = session_state.format_for_prompt()
741
+
638
742
  lines = [
639
743
  f"You are fixing failures in the project '{project_name}' ({language}).",
640
744
  "",
745
+ ]
746
+
747
+ # Insert session state before failures (so AI sees what NOT to repeat)
748
+ if session_section:
749
+ lines.extend([session_section, ""])
750
+
751
+ lines.extend([
641
752
  "## Failures to fix",
642
753
  "",
643
754
  *failure_section,
@@ -651,9 +762,16 @@ def _build_fix_prompt(
651
762
  "",
652
763
  "## Instructions",
653
764
  "",
654
- "1. Read the failing test/build output carefully.",
655
- "2. Use the design documents to understand the INTENDED behavior.",
656
- "3. Fix the IMPLEMENTATION code to match the design, not the other way around.",
765
+ "### Step 1: Diagnose (MANDATORY do this BEFORE writing any fix)",
766
+ "",
767
+ "Write a `## Diagnosis` section that answers:",
768
+ "1. What is the root cause of each failure?",
769
+ "2. Which file(s) and line(s) are responsible?",
770
+ "3. What is the correct behavior according to the design docs?",
771
+ "",
772
+ "### Step 2: Fix",
773
+ "",
774
+ "1. Fix the IMPLEMENTATION code to match the design, not the other way around.",
657
775
  " - If tests fail, fix the source code so tests pass.",
658
776
  " - If a test expects an endpoint/method/feature that doesn't exist in code,",
659
777
  " ADD the missing implementation as described in the design documents.",
@@ -661,15 +779,18 @@ def _build_fix_prompt(
661
779
  " - If build fails (type errors, import errors), fix the source code.",
662
780
  " - If lint fails, fix the lint issues in the source code.",
663
781
  " - If a tool prompted interactively in CI (missing config), create the required config file.",
664
- "4. Do NOT modify test files unless the test itself has a bug (e.g., wrong import path).",
665
- "5. Do NOT modify design documents.",
666
- "6. Make minimal, focused changes. Don't refactor unrelated code.",
667
- "7. Follow the target framework's lint rules and naming conventions.",
782
+ "2. Do NOT modify test files unless the test itself has a bug (e.g., wrong import path).",
783
+ "3. Do NOT modify design documents.",
784
+ "4. Make minimal, focused changes. Don't refactor unrelated code.",
785
+ "5. Follow the target framework's lint rules and naming conventions.",
668
786
  " Avoid using global/reserved names (module, exports, require, etc.) as local variables.",
669
787
  "",
670
788
  "## Output format (CRITICAL)",
671
789
  "",
672
- "For each file you fix or create, output the COMPLETE file content in a fenced",
790
+ "## Diagnosis",
791
+ "(your root cause analysis here)",
792
+ "",
793
+ "Then for each file you fix or create, output the COMPLETE file content in a fenced",
673
794
  "code block tagged with the language and the file path (relative to project root):",
674
795
  "",
675
796
  "```<language> <relative/path/to/file>",
@@ -683,7 +804,7 @@ def _build_fix_prompt(
683
804
  "```",
684
805
  "",
685
806
  "After all code blocks, briefly explain what you fixed and why.",
686
- ]
807
+ ])
687
808
 
688
809
  return "\n".join(lines)
689
810