python-harness 0.0.12__tar.gz → 0.0.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {python_harness-0.0.12/python_harness.egg-info → python_harness-0.0.13}/PKG-INFO +1 -1
  2. {python_harness-0.0.12 → python_harness-0.0.13}/pyproject.toml +1 -1
  3. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/cli.py +21 -45
  4. python_harness-0.0.13/python_harness/llm_client.py +32 -0
  5. python_harness-0.0.13/python_harness/refine_apply.py +177 -0
  6. python_harness-0.0.13/python_harness/refine_checks.py +29 -0
  7. python_harness-0.0.13/python_harness/refine_engine.py +41 -0
  8. python_harness-0.0.13/python_harness/refine_execution.py +114 -0
  9. python_harness-0.0.13/python_harness/refine_models.py +40 -0
  10. python_harness-0.0.13/python_harness/refine_rounds.py +373 -0
  11. python_harness-0.0.13/python_harness/refine_scoring.py +95 -0
  12. python_harness-0.0.13/python_harness/refine_workspace.py +57 -0
  13. python_harness-0.0.13/python_harness/soft_eval_report.py +30 -0
  14. python_harness-0.0.13/python_harness/soft_eval_report_messages.py +57 -0
  15. python_harness-0.0.13/python_harness/soft_eval_report_metrics.py +53 -0
  16. python_harness-0.0.13/python_harness/soft_eval_report_mock.py +45 -0
  17. python_harness-0.0.13/python_harness/soft_eval_report_shared.py +2 -0
  18. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/soft_evaluator.py +69 -18
  19. {python_harness-0.0.12 → python_harness-0.0.13/python_harness.egg-info}/PKG-INFO +1 -1
  20. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness.egg-info/SOURCES.txt +17 -0
  21. {python_harness-0.0.12 → python_harness-0.0.13}/tests/test_cli.py +118 -37
  22. python_harness-0.0.13/tests/test_refine_apply.py +184 -0
  23. python_harness-0.0.13/tests/test_refine_engine.py +982 -0
  24. python_harness-0.0.13/tests/test_refine_scoring.py +177 -0
  25. python_harness-0.0.13/tests/test_refine_workspace.py +69 -0
  26. {python_harness-0.0.12 → python_harness-0.0.13}/tests/test_soft_evaluator.py +116 -1
  27. python_harness-0.0.12/python_harness/soft_eval_report.py +0 -154
  28. {python_harness-0.0.12 → python_harness-0.0.13}/LICENSE +0 -0
  29. {python_harness-0.0.12 → python_harness-0.0.13}/README.md +0 -0
  30. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/__init__.py +0 -0
  31. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/evaluator.py +0 -0
  32. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/hard_evaluator.py +0 -0
  33. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/python_file_inventory.py +0 -0
  34. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness/qc_evaluator.py +0 -0
  35. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness.egg-info/dependency_links.txt +0 -0
  36. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness.egg-info/entry_points.txt +0 -0
  37. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness.egg-info/requires.txt +0 -0
  38. {python_harness-0.0.12 → python_harness-0.0.13}/python_harness.egg-info/top_level.txt +0 -0
  39. {python_harness-0.0.12 → python_harness-0.0.13}/setup.cfg +0 -0
  40. {python_harness-0.0.12 → python_harness-0.0.13}/tests/test_evaluator.py +0 -0
  41. {python_harness-0.0.12 → python_harness-0.0.13}/tests/test_hard_evaluator.py +0 -0
  42. {python_harness-0.0.12 → python_harness-0.0.13}/tests/test_qc_evaluator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-harness
3
- Version: 0.0.12
3
+ Version: 0.0.13
4
4
  Summary: An agentic codebase evaluation and evolution tool for Python projects.
5
5
  Author-email: Mingli Yuan <mingli.yuan@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "python-harness"
3
- version = "0.0.12"
3
+ version = "0.0.13"
4
4
  description = "An agentic codebase evaluation and evolution tool for Python projects."
5
5
  requires-python = ">=3.10"
6
6
  readme = "README.md"
@@ -4,6 +4,7 @@ Command-line interface for python-harness.
4
4
 
5
5
  import os
6
6
  import sys
7
+ from pathlib import Path
7
8
  from typing import Any
8
9
 
9
10
  import typer
@@ -11,6 +12,7 @@ from dotenv import load_dotenv
11
12
  from rich.console import Console
12
13
 
13
14
  from python_harness.evaluator import Evaluator
15
+ from python_harness.refine_engine import run_refine
14
16
 
15
17
  # Try to find .env file explicitly before anything else executes
16
18
  env_path = os.path.join(os.getcwd(), '.env')
@@ -231,7 +233,7 @@ def _print_final_report(final_report: dict[str, Any]) -> None:
231
233
  suggestions = final_report.get("suggestions", [])
232
234
  if suggestions:
233
235
  console.print("[bold cyan]Top 3 Improvement Suggestions:[/bold cyan]")
234
- for i, sug in enumerate(suggestions, 1):
236
+ for i, sug in enumerate(suggestions[:3], 1):
235
237
  console.print(
236
238
  f" {i}. [bold]{sug.get('title', 'Suggestion')}[/bold] "
237
239
  f"(Target: [yellow]{sug.get('target_file', 'unknown')}[/yellow])"
@@ -242,56 +244,30 @@ def _print_final_report(final_report: dict[str, Any]) -> None:
242
244
  @app.command()
243
245
  def refine(
244
246
  path: str = typer.Argument(".", help="The path to evaluate and evolve"),
245
- steps: int = typer.Option(1, help="Number of evolution steps to perform"),
246
- max_retries: int = typer.Option(3, help="Maximum retries per variant if tests fail")
247
+ max_retries: int = typer.Option(3, help="Maximum retries per candidate"),
248
+ loop: bool = typer.Option(False, help="Keep refining winners across rounds"),
249
+ max_rounds: int = typer.Option(3, help="Maximum refine rounds when looping"),
247
250
  ) -> None:
248
251
  """
249
- Refine the codebase through an agentic Edit-Test-Improve loop.
250
- Generates variants based on suggestions, tests them, and picks the best.
252
+ Refine the codebase through a fixed two-level search and optional loop.
251
253
  """
252
254
  console.print(
253
- f"[bold magenta]Starting evolution loop for path:[/bold magenta] {path} "
254
- f"[dim](steps={steps}, max_retries={max_retries})[/dim]"
255
+ f"[bold magenta]Starting refine for path:[/bold magenta] {path} "
256
+ f"[dim](loop={loop}, max_rounds={max_rounds}, "
257
+ f"max_retries={max_retries})[/dim]"
255
258
  )
256
-
257
- # 1. First, run a baseline evaluation to get suggestions
258
- evaluator = Evaluator(path)
259
- console.print("[cyan]Running baseline evaluation...[/cyan]")
260
- hard_results = evaluator.hard_evaluator.evaluate()
261
- soft_results = evaluator.soft_evaluator.evaluate()
262
- baseline_report = evaluator.soft_evaluator.generate_final_report(
263
- hard_results, {"all_passed": True, "failures": []}, soft_results
264
- )
265
-
266
- suggestions = baseline_report.get("suggestions", [])
267
- if not suggestions:
268
- console.print("[yellow]No suggestions found to evolve. Exiting.[/yellow]")
269
- return
270
-
271
- console.print(
272
- f"[green]Found {len(suggestions)} suggestions. "
273
- f"Starting evolution branches...[/green]"
274
- )
275
-
276
- # TODO: Implement the Git branching and Agent modification logic here.
277
- # The loop will be:
278
- # for step in range(steps):
279
- # for suggestion in suggestions:
280
- # checkout new branch variant-X
281
- # for retry in range(max_retries):
282
- # ask LLM to apply suggestion to code
283
- # run pytest
284
- # if pytest passes:
285
- # run harness . to get new score
286
- # break
287
- # else:
288
- # feed error back to LLM for retry
289
- # compare all variants and checkout the best one
290
-
291
- console.print(
292
- "[yellow]Evolution engine skeleton ready. "
293
- "Actual git mutation logic pending.[/yellow]"
259
+ target_path = Path(path).resolve()
260
+
261
+ result = run_refine(
262
+ target_path=target_path,
263
+ max_retries=max_retries,
264
+ loop=loop,
265
+ max_rounds=max_rounds,
266
+ progress_callback=lambda message: console.print(f"[dim]{message}[/dim]"),
294
267
  )
268
+ console.print(f"[green]winner_id:[/green] {result['winner_id']}")
269
+ console.print(f"[cyan]rounds_completed:[/cyan] {result['rounds_completed']}")
270
+ console.print(f"[yellow]stop_reason:[/yellow] {result['stop_reason']}")
295
271
  @app.command()
296
272
  def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> None:
297
273
  """
@@ -0,0 +1,32 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from typing import Any
4
+
5
+ from openai import OpenAI
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class LLMSettings:
10
+ api_key: str | None
11
+ base_url: str
12
+ model_name: str
13
+ mini_model_name: str
14
+ request_timeout_seconds: float
15
+
16
+
17
+ def load_llm_settings() -> LLMSettings:
18
+ return LLMSettings(
19
+ api_key=os.environ.get("LLM_API_KEY"),
20
+ base_url=os.environ.get("LLM_BASE_URL", "https://api.deepseek.com/v1"),
21
+ model_name=os.environ.get("LLM_MODEL_NAME", "deepseek-reasoner"),
22
+ mini_model_name=os.environ.get("LLM_MINI_MODEL_NAME", "deepseek-chat"),
23
+ request_timeout_seconds=float(
24
+ os.environ.get("LLM_REQUEST_TIMEOUT_SECONDS", "60")
25
+ ),
26
+ )
27
+
28
+
29
+ def build_llm_client(settings: LLMSettings) -> Any | None:
30
+ if not settings.api_key:
31
+ return None
32
+ return OpenAI(api_key=settings.api_key, base_url=settings.base_url)
@@ -0,0 +1,177 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, cast
4
+
5
+ from python_harness.llm_client import build_llm_client, load_llm_settings
6
+ from python_harness.python_file_inventory import collect_python_files
7
+
8
+
9
+ class NullSuggestionApplier:
10
+ def apply(
11
+ self,
12
+ workspace: Path,
13
+ suggestion: dict[str, str],
14
+ failure_feedback: str = "",
15
+ ) -> dict[str, Any]:
16
+ return {
17
+ "ok": True,
18
+ "touched_files": [],
19
+ "failure_reason": "",
20
+ "suggestion_title": suggestion.get("title", ""),
21
+ "failure_feedback": failure_feedback,
22
+ "workspace": str(workspace),
23
+ }
24
+
25
+
26
+ class LLMSuggestionApplier:
27
+ def __init__(
28
+ self,
29
+ client: Any | None = None,
30
+ model_name: str | None = None,
31
+ ) -> None:
32
+ settings = load_llm_settings()
33
+ self.client = client if client is not None else build_llm_client(settings)
34
+ self.model_name = model_name or settings.mini_model_name
35
+ self.request_timeout_seconds = settings.request_timeout_seconds
36
+
37
+ def _select_files(self, workspace: Path, suggestion: dict[str, str]) -> list[Path]:
38
+ target_file = suggestion.get("target_file", "").strip()
39
+ if target_file and target_file != "all":
40
+ target_path = workspace / target_file
41
+ if target_path.is_file():
42
+ return [target_path]
43
+ if target_path.is_dir():
44
+ return sorted(target_path.rglob("*.py"))[:3]
45
+ return collect_python_files(workspace)[:3]
46
+
47
+ def _build_messages(
48
+ self,
49
+ workspace: Path,
50
+ suggestion: dict[str, str],
51
+ failure_feedback: str,
52
+ files: list[Path],
53
+ ) -> list[dict[str, str]]:
54
+ inventory = "\n".join(
55
+ f"- {file_path.relative_to(workspace)}"
56
+ for file_path in collect_python_files(workspace)
57
+ )
58
+ file_blocks = "\n\n".join(
59
+ (
60
+ f"FILE: {file_path.relative_to(workspace)}\n"
61
+ f"```python\n{file_path.read_text(encoding='utf-8')}\n```"
62
+ )
63
+ for file_path in files
64
+ )
65
+ system_prompt = (
66
+ "You apply a single repository improvement suggestion. "
67
+ "Return only valid JSON with schema "
68
+ '{"updates":[{"path":"relative/path.py","content":"full file content"}]}. '
69
+ "Make the smallest possible change that satisfies the suggestion "
70
+ "and preserves behavior. "
71
+ "Never write files outside the workspace."
72
+ )
73
+ user_prompt = (
74
+ f"Suggestion title: {suggestion.get('title', '')}\n"
75
+ f"Suggestion description: {suggestion.get('description', '')}\n"
76
+ f"Suggestion target_file: {suggestion.get('target_file', 'all')}\n"
77
+ f"Failure feedback from previous attempt: {failure_feedback or 'None'}\n\n"
78
+ f"Workspace python inventory:\n{inventory}\n\n"
79
+ f"Editable file contents:\n{file_blocks}"
80
+ )
81
+ return [
82
+ {"role": "system", "content": system_prompt},
83
+ {"role": "user", "content": user_prompt},
84
+ ]
85
+
86
+ def _parse_updates(self, raw_content: str) -> list[dict[str, str]]:
87
+ payload = json.loads(raw_content)
88
+ updates = payload.get("updates", [])
89
+ if not isinstance(updates, list):
90
+ raise ValueError("LLM updates payload must contain a list")
91
+ parsed: list[dict[str, str]] = []
92
+ for update in updates:
93
+ if not isinstance(update, dict):
94
+ continue
95
+ path = update.get("path")
96
+ content = update.get("content")
97
+ if isinstance(path, str) and isinstance(content, str):
98
+ parsed.append({"path": path, "content": content})
99
+ if not parsed:
100
+ raise ValueError("LLM returned no file updates")
101
+ return parsed
102
+
103
+ def apply(
104
+ self,
105
+ workspace: Path,
106
+ suggestion: dict[str, str],
107
+ failure_feedback: str = "",
108
+ ) -> dict[str, Any]:
109
+ if self.client is None:
110
+ return {
111
+ "ok": False,
112
+ "touched_files": [],
113
+ "failure_reason": "LLM_API_KEY not configured",
114
+ }
115
+ files = self._select_files(workspace, suggestion)
116
+ if not files:
117
+ return {
118
+ "ok": False,
119
+ "touched_files": [],
120
+ "failure_reason": "No editable files selected for suggestion",
121
+ }
122
+
123
+ client = cast(Any, self.client)
124
+ try:
125
+ completion = client.chat.completions.create(
126
+ model=self.model_name,
127
+ messages=self._build_messages(
128
+ workspace,
129
+ suggestion,
130
+ failure_feedback,
131
+ files,
132
+ ),
133
+ response_format={"type": "json_object"},
134
+ timeout=self.request_timeout_seconds,
135
+ )
136
+ except Exception as exc:
137
+ return {
138
+ "ok": False,
139
+ "touched_files": [],
140
+ "failure_reason": str(exc),
141
+ "retryable": False,
142
+ }
143
+ content = completion.choices[0].message.content
144
+ if not content:
145
+ return {
146
+ "ok": False,
147
+ "touched_files": [],
148
+ "failure_reason": "LLM returned empty response",
149
+ "retryable": False,
150
+ }
151
+
152
+ try:
153
+ updates = self._parse_updates(content)
154
+ touched_files: list[str] = []
155
+ for update in updates:
156
+ destination = (workspace / update["path"]).resolve()
157
+ if not destination.is_relative_to(workspace.resolve()):
158
+ raise ValueError("LLM update path is outside workspace")
159
+ destination.parent.mkdir(parents=True, exist_ok=True)
160
+ destination.write_text(update["content"], encoding="utf-8")
161
+ touched_files.append(str(destination.relative_to(workspace)))
162
+ except Exception as exc:
163
+ return {
164
+ "ok": False,
165
+ "touched_files": [],
166
+ "failure_reason": str(exc),
167
+ "retryable": False,
168
+ }
169
+
170
+ return {
171
+ "ok": True,
172
+ "touched_files": touched_files,
173
+ "failure_reason": "",
174
+ "suggestion_title": suggestion.get("title", ""),
175
+ "failure_feedback": failure_feedback,
176
+ "workspace": str(workspace),
177
+ }
@@ -0,0 +1,29 @@
1
+ import subprocess
2
+ import sys
3
+ from pathlib import Path
4
+
5
+
6
+ def run_command(path: Path, args: list[str]) -> tuple[bool, str]:
7
+ command_cwd = path if path.is_dir() else path.parent
8
+ completed = subprocess.run(
9
+ args,
10
+ cwd=command_cwd,
11
+ capture_output=True,
12
+ text=True,
13
+ check=False,
14
+ )
15
+ output = (completed.stdout + completed.stderr).strip()
16
+ return completed.returncode == 0, output
17
+
18
+
19
+ def default_self_check_runner(path: Path) -> tuple[bool, str]:
20
+ checks = [
21
+ [sys.executable, "-m", "ruff", "check", str(path)],
22
+ [sys.executable, "-m", "mypy", str(path)],
23
+ [sys.executable, "-m", "pytest", str(path)],
24
+ ]
25
+ for args in checks:
26
+ ok, output = run_command(path, args)
27
+ if not ok:
28
+ return False, output
29
+ return True, ""
@@ -0,0 +1,41 @@
1
+ from collections.abc import Callable
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from python_harness.refine_checks import default_self_check_runner
6
+ from python_harness.refine_execution import (
7
+ execute_candidate as _execute_candidate,
8
+ )
9
+ from python_harness.refine_rounds import (
10
+ default_evaluator_runner,
11
+ default_workspace_root,
12
+ suggestions_from,
13
+ validate_workspace_root,
14
+ )
15
+ from python_harness.refine_rounds import (
16
+ run_refine as _run_refine,
17
+ )
18
+ from python_harness.refine_rounds import (
19
+ run_refine_round as _run_refine_round,
20
+ )
21
+
22
+ SelfCheckRunner = Callable[[Path], tuple[bool, str]]
23
+ EvaluatorRunner = Callable[[Path], dict[str, Any]]
24
+
25
+ _default_evaluator_runner = default_evaluator_runner
26
+ _default_self_check_runner = default_self_check_runner
27
+ _default_workspace_root = default_workspace_root
28
+ _suggestions_from = suggestions_from
29
+ _validate_workspace_root = validate_workspace_root
30
+
31
+
32
+ def execute_candidate(*args: Any, **kwargs: Any) -> Any:
33
+ return _execute_candidate(*args, **kwargs)
34
+
35
+
36
+ def run_refine_round(*args: Any, **kwargs: Any) -> Any:
37
+ return _run_refine_round(*args, **kwargs)
38
+
39
+
40
+ def run_refine(*args: Any, **kwargs: Any) -> Any:
41
+ return _run_refine(*args, **kwargs)
@@ -0,0 +1,114 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ from python_harness.refine_models import Candidate, SuggestionApplier
5
+ from python_harness.refine_workspace import create_candidate_workspace
6
+
7
+
8
+ def _emit(progress_callback: Any, message: str) -> None:
9
+ if progress_callback is not None:
10
+ progress_callback(message)
11
+
12
+
13
+ def execute_candidate(
14
+ *,
15
+ parent: Candidate,
16
+ candidate_id: str,
17
+ suggestion: dict[str, str],
18
+ workspace_root: Path,
19
+ applier: SuggestionApplier,
20
+ self_check_runner: Any,
21
+ evaluator_runner: Any,
22
+ max_retries: int,
23
+ progress_callback: Any = None,
24
+ ) -> Candidate:
25
+ workspace = create_candidate_workspace(
26
+ parent.workspace,
27
+ workspace_root,
28
+ candidate_id,
29
+ )
30
+ feedback = ""
31
+ retries = 0
32
+ suggestion_title = suggestion.get("title", candidate_id)
33
+
34
+ while True:
35
+ apply_result: dict[str, Any] | None = None
36
+ _emit(
37
+ progress_callback,
38
+ f"{candidate_id} apply started: {suggestion_title}",
39
+ )
40
+ try:
41
+ apply_result = applier.apply(
42
+ workspace,
43
+ suggestion,
44
+ failure_feedback=feedback,
45
+ )
46
+ if not bool(apply_result.get("ok", False)):
47
+ feedback = str(
48
+ apply_result.get("failure_reason") or "suggestion apply failed"
49
+ )
50
+ raise RuntimeError(feedback)
51
+ _emit(progress_callback, f"{candidate_id} apply passed")
52
+ except Exception as exc:
53
+ feedback = str(exc)
54
+ retryable = True
55
+ if apply_result is not None:
56
+ retryable = bool(apply_result.get("retryable", True))
57
+ _emit(progress_callback, f"{candidate_id} apply failed: {feedback}")
58
+ if not retryable:
59
+ return Candidate(
60
+ id=candidate_id,
61
+ parent_id=parent.id,
62
+ depth=parent.depth + 1,
63
+ workspace=workspace,
64
+ suggestion_trace=parent.suggestion_trace + (suggestion_title,),
65
+ status="failed",
66
+ retry_count=retries,
67
+ selection_reason=feedback,
68
+ )
69
+ retries += 1
70
+ if retries > max_retries:
71
+ return Candidate(
72
+ id=candidate_id,
73
+ parent_id=parent.id,
74
+ depth=parent.depth + 1,
75
+ workspace=workspace,
76
+ suggestion_trace=parent.suggestion_trace + (suggestion_title,),
77
+ status="failed",
78
+ retry_count=retries - 1,
79
+ selection_reason=feedback,
80
+ )
81
+ continue
82
+
83
+ _emit(progress_callback, f"{candidate_id} guardrail 1 started")
84
+ is_ok, feedback = self_check_runner(workspace)
85
+ if is_ok:
86
+ _emit(progress_callback, f"{candidate_id} guardrail 1 passed")
87
+ _emit(progress_callback, f"{candidate_id} guardrail 2 started")
88
+ evaluation = evaluator_runner(workspace)
89
+ _emit(progress_callback, f"{candidate_id} guardrail 2 passed")
90
+ return Candidate(
91
+ id=candidate_id,
92
+ parent_id=parent.id,
93
+ depth=parent.depth + 1,
94
+ workspace=workspace,
95
+ suggestion_trace=parent.suggestion_trace + (suggestion_title,),
96
+ evaluation=evaluation,
97
+ status="measured",
98
+ retry_count=retries,
99
+ )
100
+
101
+ _emit(progress_callback, f"{candidate_id} guardrail 1 failed")
102
+ _emit(progress_callback, feedback)
103
+ retries += 1
104
+ if retries > max_retries:
105
+ return Candidate(
106
+ id=candidate_id,
107
+ parent_id=parent.id,
108
+ depth=parent.depth + 1,
109
+ workspace=workspace,
110
+ suggestion_trace=parent.suggestion_trace + (suggestion_title,),
111
+ status="failed",
112
+ retry_count=retries - 1,
113
+ selection_reason=str(feedback),
114
+ )
@@ -0,0 +1,40 @@
1
+ from dataclasses import dataclass, field
2
+ from pathlib import Path
3
+ from typing import Any, Protocol
4
+
5
+
6
+ @dataclass(slots=True)
7
+ class Candidate:
8
+ id: str
9
+ parent_id: str | None
10
+ depth: int
11
+ workspace: Path
12
+ suggestion_trace: tuple[str, ...]
13
+ evaluation: dict[str, Any] | None = None
14
+ status: str = "pending"
15
+ retry_count: int = 0
16
+ selection_reason: str = ""
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class SelectionResult:
21
+ winner: Candidate
22
+ ordered_ids: list[str]
23
+ reason: str
24
+
25
+
26
+ @dataclass(slots=True)
27
+ class RefineRoundResult:
28
+ baseline: Candidate
29
+ candidates: list[Candidate] = field(default_factory=list)
30
+ winner: Candidate | None = None
31
+ stop_reason: str = ""
32
+
33
+
34
+ class SuggestionApplier(Protocol):
35
+ def apply(
36
+ self,
37
+ workspace: Path,
38
+ suggestion: dict[str, str],
39
+ failure_feedback: str = "",
40
+ ) -> dict[str, Any]: ...