pdd-cli 0.0.90__py3-none-any.whl → 0.0.121__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. pdd/__init__.py +38 -6
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +506 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +537 -0
  6. pdd/agentic_common.py +533 -770
  7. pdd/agentic_crash.py +2 -1
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +582 -0
  10. pdd/agentic_fix.py +118 -3
  11. pdd/agentic_update.py +27 -9
  12. pdd/agentic_verify.py +3 -2
  13. pdd/architecture_sync.py +565 -0
  14. pdd/auth_service.py +210 -0
  15. pdd/auto_deps_main.py +63 -53
  16. pdd/auto_include.py +236 -3
  17. pdd/auto_update.py +125 -47
  18. pdd/bug_main.py +195 -23
  19. pdd/cmd_test_main.py +345 -197
  20. pdd/code_generator.py +4 -2
  21. pdd/code_generator_main.py +118 -32
  22. pdd/commands/__init__.py +6 -0
  23. pdd/commands/analysis.py +113 -48
  24. pdd/commands/auth.py +309 -0
  25. pdd/commands/connect.py +358 -0
  26. pdd/commands/fix.py +155 -114
  27. pdd/commands/generate.py +5 -0
  28. pdd/commands/maintenance.py +3 -2
  29. pdd/commands/misc.py +8 -0
  30. pdd/commands/modify.py +225 -163
  31. pdd/commands/sessions.py +284 -0
  32. pdd/commands/utility.py +12 -7
  33. pdd/construct_paths.py +334 -32
  34. pdd/context_generator_main.py +167 -170
  35. pdd/continue_generation.py +6 -3
  36. pdd/core/__init__.py +33 -0
  37. pdd/core/cli.py +44 -7
  38. pdd/core/cloud.py +237 -0
  39. pdd/core/dump.py +68 -20
  40. pdd/core/errors.py +4 -0
  41. pdd/core/remote_session.py +61 -0
  42. pdd/crash_main.py +219 -23
  43. pdd/data/llm_model.csv +4 -4
  44. pdd/docs/prompting_guide.md +864 -0
  45. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  46. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  47. pdd/fix_code_loop.py +208 -34
  48. pdd/fix_code_module_errors.py +6 -2
  49. pdd/fix_error_loop.py +291 -38
  50. pdd/fix_main.py +208 -6
  51. pdd/fix_verification_errors_loop.py +235 -26
  52. pdd/fix_verification_main.py +269 -83
  53. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  54. pdd/frontend/dist/assets/index-CUWd8al1.js +450 -0
  55. pdd/frontend/dist/index.html +376 -0
  56. pdd/frontend/dist/logo.svg +33 -0
  57. pdd/generate_output_paths.py +46 -5
  58. pdd/generate_test.py +212 -151
  59. pdd/get_comment.py +19 -44
  60. pdd/get_extension.py +8 -9
  61. pdd/get_jwt_token.py +309 -20
  62. pdd/get_language.py +8 -7
  63. pdd/get_run_command.py +7 -5
  64. pdd/insert_includes.py +2 -1
  65. pdd/llm_invoke.py +531 -97
  66. pdd/load_prompt_template.py +15 -34
  67. pdd/operation_log.py +342 -0
  68. pdd/path_resolution.py +140 -0
  69. pdd/postprocess.py +122 -97
  70. pdd/preprocess.py +68 -12
  71. pdd/preprocess_main.py +33 -1
  72. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  73. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  74. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  75. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  76. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  77. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  78. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  79. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  80. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  81. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  82. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  83. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  84. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +140 -0
  85. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  86. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  87. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  88. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  89. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  90. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  91. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  92. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  93. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  94. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  95. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  96. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  97. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  98. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  99. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  100. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  101. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  102. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  103. pdd/prompts/agentic_fix_primary_LLM.prompt +2 -2
  104. pdd/prompts/agentic_update_LLM.prompt +192 -338
  105. pdd/prompts/auto_include_LLM.prompt +22 -0
  106. pdd/prompts/change_LLM.prompt +3093 -1
  107. pdd/prompts/detect_change_LLM.prompt +571 -14
  108. pdd/prompts/fix_code_module_errors_LLM.prompt +8 -0
  109. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +1 -0
  110. pdd/prompts/generate_test_LLM.prompt +19 -1
  111. pdd/prompts/generate_test_from_example_LLM.prompt +366 -0
  112. pdd/prompts/insert_includes_LLM.prompt +262 -252
  113. pdd/prompts/prompt_code_diff_LLM.prompt +123 -0
  114. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  115. pdd/remote_session.py +876 -0
  116. pdd/server/__init__.py +52 -0
  117. pdd/server/app.py +335 -0
  118. pdd/server/click_executor.py +587 -0
  119. pdd/server/executor.py +338 -0
  120. pdd/server/jobs.py +661 -0
  121. pdd/server/models.py +241 -0
  122. pdd/server/routes/__init__.py +31 -0
  123. pdd/server/routes/architecture.py +451 -0
  124. pdd/server/routes/auth.py +364 -0
  125. pdd/server/routes/commands.py +929 -0
  126. pdd/server/routes/config.py +42 -0
  127. pdd/server/routes/files.py +603 -0
  128. pdd/server/routes/prompts.py +1347 -0
  129. pdd/server/routes/websocket.py +473 -0
  130. pdd/server/security.py +243 -0
  131. pdd/server/terminal_spawner.py +217 -0
  132. pdd/server/token_counter.py +222 -0
  133. pdd/summarize_directory.py +236 -237
  134. pdd/sync_animation.py +8 -4
  135. pdd/sync_determine_operation.py +329 -47
  136. pdd/sync_main.py +272 -28
  137. pdd/sync_orchestration.py +289 -211
  138. pdd/sync_order.py +304 -0
  139. pdd/template_expander.py +161 -0
  140. pdd/templates/architecture/architecture_json.prompt +41 -46
  141. pdd/trace.py +1 -1
  142. pdd/track_cost.py +0 -13
  143. pdd/unfinished_prompt.py +2 -1
  144. pdd/update_main.py +68 -26
  145. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/METADATA +15 -10
  146. pdd_cli-0.0.121.dist-info/RECORD +229 -0
  147. pdd_cli-0.0.90.dist-info/RECORD +0 -153
  148. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/WHEEL +0 -0
  149. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/entry_points.txt +0 -0
  150. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/licenses/LICENSE +0 -0
  151. {pdd_cli-0.0.90.dist-info → pdd_cli-0.0.121.dist-info}/top_level.txt +0 -0
pdd/agentic_common.py CHANGED
@@ -1,863 +1,626 @@
1
- # pdd/agentic_common.py
2
1
  from __future__ import annotations
3
2
 
4
- import json
5
3
  import os
6
- import secrets
4
+ import sys
5
+ import json
7
6
  import shutil
8
7
  import subprocess
9
- from dataclasses import dataclass
8
+ import tempfile
9
+ import time
10
+ import uuid
11
+ import re
10
12
  from pathlib import Path
11
- from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Tuple
13
+ from typing import List, Optional, Tuple, Dict, Any, Union
14
+ from dataclasses import dataclass
12
15
 
13
16
  from rich.console import Console
14
17
 
15
- from .llm_invoke import LLM_MODEL_CSV_PATH, _load_model_data
16
-
17
- console = Console()
18
+ try:
19
+ from pdd.llm_invoke import _load_model_data
20
+ except ImportError:
21
+ def _load_model_data(*args, **kwargs):
22
+ return None
18
23
 
24
+ # Constants
19
25
  AGENT_PROVIDER_PREFERENCE: List[str] = ["anthropic", "google", "openai"]
20
-
21
- # CLI command mapping for each provider
22
- CLI_COMMANDS: Dict[str, str] = {
23
- "anthropic": "claude",
24
- "google": "gemini",
25
- "openai": "codex",
26
- }
27
-
28
- # Timeouts
29
26
  DEFAULT_TIMEOUT_SECONDS: float = 240.0
30
- TIMEOUT_ENV_VAR: str = "PDD_AGENTIC_TIMEOUT"
31
-
32
-
33
- @dataclass(frozen=True)
34
- class TokenPricing:
35
- """
36
- Simple per-token pricing descriptor.
27
+ MIN_VALID_OUTPUT_LENGTH: int = 50
28
+ DEFAULT_MAX_RETRIES: int = 3
29
+ DEFAULT_RETRY_DELAY: float = 5.0
37
30
 
38
- Prices are expressed in USD per 1,000,000 tokens.
39
- cached_input_multiplier is the fraction of full input price charged
40
- for cached tokens (e.g. 0.25 == 75% discount).
41
- """
31
+ # GitHub State Markers
32
+ GITHUB_STATE_MARKER_START = "<!-- PDD_WORKFLOW_STATE:"
33
+ GITHUB_STATE_MARKER_END = "-->"
42
34
 
35
+ @dataclass
36
+ class Pricing:
43
37
  input_per_million: float
44
38
  output_per_million: float
45
39
  cached_input_multiplier: float = 1.0
46
40
 
47
-
48
- # Approximate Gemini pricing by model family.
49
- # These values can be refined if needed; they are only used when the
50
- # provider returns token counts instead of a direct USD cost.
51
- GEMINI_PRICING_BY_FAMILY: Dict[str, TokenPricing] = {
52
- "flash": TokenPricing(input_per_million=0.35, output_per_million=1.05, cached_input_multiplier=0.5),
53
- "pro": TokenPricing(input_per_million=3.50, output_per_million=10.50, cached_input_multiplier=0.5),
54
- "default": TokenPricing(input_per_million=0.35, output_per_million=1.05, cached_input_multiplier=0.5),
41
+ # Pricing Configuration
42
+ # Gemini: Based on test expectations (Flash: $0.35/$1.05, Cached 50%)
43
+ GEMINI_PRICING_BY_FAMILY = {
44
+ "flash": Pricing(0.35, 1.05, 0.5),
45
+ "pro": Pricing(3.50, 10.50, 0.5), # Placeholder for Pro
55
46
  }
56
47
 
57
- # Codex/OpenAI pricing (explicitly provided in prompt)
58
- CODEX_PRICING: TokenPricing = TokenPricing(
59
- input_per_million=1.50,
60
- output_per_million=6.00,
61
- cached_input_multiplier=0.25, # 75% discount for cached tokens
62
- )
63
-
64
-
65
- # ---------------------------------------------------------------------------
66
- # Logging utilities (Rich-based, respect verbose/quiet flags)
67
- # ---------------------------------------------------------------------------
68
-
69
-
70
- def _format_label(label: str) -> str:
71
- return f"[{label}] " if label else ""
72
-
73
-
74
- def log_info(message: str, *, verbose: bool, quiet: bool, label: str = "") -> None:
75
- """
76
- Log an informational message.
77
-
78
- Skips output when quiet=True.
79
- """
80
- if quiet:
81
- return
82
- prefix = _format_label(label)
83
- console.print(f"{prefix}{message}")
84
-
85
-
86
- def log_debug(message: str, *, verbose: bool, quiet: bool, label: str = "") -> None:
87
- """
88
- Log a debug message.
89
-
90
- Only emits output when verbose=True and quiet=False.
91
- """
92
- if quiet or not verbose:
93
- return
94
- prefix = _format_label(label)
95
- console.log(f"{prefix}{message}")
96
-
97
-
98
- def log_error(message: str, *, verbose: bool, quiet: bool, label: str = "") -> None:
99
- """
100
- Log an error message.
101
-
102
- Errors are always printed, even in quiet mode.
103
- """
104
- prefix = _format_label(label)
105
- console.print(f"[red]{prefix}{message}[/red]")
106
-
107
-
108
- # ---------------------------------------------------------------------------
109
- # Internal helpers
110
- # ---------------------------------------------------------------------------
111
-
112
-
113
- def _safe_load_model_data() -> Any | None:
114
- """
115
- Best-effort wrapper around _load_model_data.
116
-
117
- This is used as part of provider availability checks so that we
118
- respect whatever configuration llm_invoke is using (including
119
- any API-key related metadata encoded in the model CSV).
120
- """
121
- try:
122
- return _load_model_data(LLM_MODEL_CSV_PATH)
123
- except Exception:
124
- return None
48
+ # Codex: Based on test expectations ($1.50/$6.00, Cached 25%)
49
+ CODEX_PRICING = Pricing(1.50, 6.00, 0.25)
125
50
 
51
+ console = Console()
126
52
 
127
- def _provider_has_api_key(provider: str, model_data: Any | None) -> bool:
53
+ def get_available_agents() -> List[str]:
128
54
  """
129
- Determine whether the given provider has an API key or CLI auth configured.
130
-
131
- This function:
132
- - For Anthropic: Also checks if Claude CLI is available (subscription auth)
133
- which doesn't require an API key.
134
- - Attempts to infer API-key environment variable names from the
135
- llm_invoke model data (if it is a DataFrame-like object).
136
- - Falls back to well-known default environment variable names.
137
-
138
- The actual presence of API keys is checked via os.environ.
55
+ Returns list of available provider names based on CLI existence and API key configuration.
139
56
  """
140
- env = os.environ
57
+ available = []
141
58
 
142
- # For Anthropic: Check if Claude CLI is available for subscription auth
143
- # This is more robust as it uses the user's Claude subscription instead of API credits
144
- if provider == "anthropic":
145
- if shutil.which("claude"):
146
- # Claude CLI is available - we can use subscription auth
147
- # even without an API key
148
- return True
149
-
150
- # Try to extract env var hints from model_data, if it looks like a DataFrame.
151
- inferred_env_vars: List[str] = []
152
- if model_data is not None:
153
- try:
154
- columns = list(getattr(model_data, "columns", []))
155
- if "provider" in columns:
156
- # DataFrame-like path
157
- try:
158
- df = model_data # type: ignore[assignment]
159
- # Filter rows matching provider name (case-insensitive)
160
- provider_mask = df["provider"].str.lower() == provider.lower() # type: ignore[index]
161
- provider_rows = df[provider_mask]
162
- # Look for any column that might specify an API-key env var
163
- candidate_cols = [
164
- c
165
- for c in columns
166
- if "api" in c.lower() and "key" in c.lower() or "env" in c.lower()
167
- ]
168
- for _, row in provider_rows.iterrows(): # type: ignore[attr-defined]
169
- for col in candidate_cols:
170
- value = str(row.get(col, "")).strip()
171
- # Heuristic: looks like an env var name (upper & contains underscore)
172
- if value and value.upper() == value and "_" in value:
173
- inferred_env_vars.append(value)
174
- except Exception:
175
- # If anything above fails, we silently fall back to defaults.
176
- pass
177
- except Exception:
178
- pass
179
-
180
- default_env_map: Dict[str, List[str]] = {
181
- "anthropic": ["ANTHROPIC_API_KEY"],
182
- "google": ["GEMINI_API_KEY", "GOOGLE_API_KEY"],
183
- "openai": ["OPENAI_API_KEY"],
184
- }
185
-
186
- env_candidates = inferred_env_vars or default_env_map.get(provider, [])
187
- return any(env.get(name) for name in env_candidates)
188
-
189
-
190
- def _get_agent_timeout() -> float:
191
- """
192
- Resolve the agentic subprocess timeout from environment, with a sane default.
193
- """
194
- raw = os.getenv(TIMEOUT_ENV_VAR)
195
- if not raw:
196
- return DEFAULT_TIMEOUT_SECONDS
197
- try:
198
- value = float(raw)
199
- if value <= 0:
200
- raise ValueError
201
- return value
202
- except ValueError:
203
- return DEFAULT_TIMEOUT_SECONDS
59
+ # 1. Anthropic (Claude)
60
+ # Available if 'claude' CLI exists. API key not strictly required (subscription auth).
61
+ if shutil.which("claude"):
62
+ available.append("anthropic")
204
63
 
64
+ # 2. Google (Gemini)
65
+ # Available if 'gemini' CLI exists AND (API key is set OR Vertex AI auth is configured)
66
+ has_gemini_cli = shutil.which("gemini") is not None
67
+ has_google_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
68
+ has_vertex_auth = (
69
+ os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and
70
+ os.environ.get("GOOGLE_GENAI_USE_VERTEXAI") == "true"
71
+ )
72
+
73
+ if has_gemini_cli and (has_google_key or has_vertex_auth):
74
+ available.append("google")
205
75
 
206
- def _build_subprocess_env(
207
- base: Optional[Mapping[str, str]] = None,
208
- *,
209
- use_cli_auth: bool = False,
210
- ) -> Dict[str, str]:
211
- """
212
- Build a sanitized environment for non-interactive subprocess execution.
213
-
214
- Ensures:
215
- - TERM=dumb
216
- - NO_COLOR=1
217
- - CI=1
218
- while preserving existing variables (including API keys).
219
-
220
- Args:
221
- base: Optional base environment mapping (defaults to os.environ).
222
- use_cli_auth: If True, remove ANTHROPIC_API_KEY to force Claude CLI
223
- subscription auth instead of API key auth. This is more
224
- robust as it uses the user's Claude subscription.
225
- """
226
- env: Dict[str, str] = dict(base or os.environ)
227
- env.setdefault("TERM", "dumb")
228
- env.setdefault("NO_COLOR", "1")
229
- env.setdefault("CI", "1")
76
+ # 3. OpenAI (Codex)
77
+ # Available if 'codex' CLI exists AND OPENAI_API_KEY is set
78
+ if shutil.which("codex") and os.environ.get("OPENAI_API_KEY"):
79
+ available.append("openai")
230
80
 
231
- if use_cli_auth:
232
- # Remove API key to force Claude CLI subscription auth
233
- env.pop("ANTHROPIC_API_KEY", None)
81
+ return available
234
82
 
235
- return env
83
+ def _calculate_gemini_cost(stats: Dict[str, Any]) -> float:
84
+ """Calculates cost for Gemini based on token stats."""
85
+ total_cost = 0.0
86
+ models = stats.get("models", {})
87
+
88
+ for model_name, data in models.items():
89
+ tokens = data.get("tokens", {})
90
+ prompt = tokens.get("prompt", 0)
91
+ candidates = tokens.get("candidates", 0)
92
+ cached = tokens.get("cached", 0)
93
+
94
+ # Determine pricing family
95
+ family = "flash" if "flash" in model_name.lower() else "pro"
96
+ pricing = GEMINI_PRICING_BY_FAMILY.get(family, GEMINI_PRICING_BY_FAMILY["flash"])
97
+
98
+ # Logic: new_input = max(0, prompt - cached)
99
+ # Assuming 'prompt' is total input tokens
100
+ new_input = max(0, prompt - cached)
101
+
102
+ input_cost = (new_input / 1_000_000) * pricing.input_per_million
103
+ cached_cost = (cached / 1_000_000) * pricing.input_per_million * pricing.cached_input_multiplier
104
+ output_cost = (candidates / 1_000_000) * pricing.output_per_million
105
+
106
+ total_cost += input_cost + cached_cost + output_cost
107
+
108
+ return total_cost
236
109
 
110
+ def _calculate_codex_cost(usage: Dict[str, Any]) -> float:
111
+ """Calculates cost for Codex based on usage stats."""
112
+ input_tokens = usage.get("input_tokens", 0)
113
+ output_tokens = usage.get("output_tokens", 0)
114
+ cached_tokens = usage.get("cached_input_tokens", 0)
115
+
116
+ pricing = CODEX_PRICING
117
+
118
+ # Logic: new_input = max(0, input - cached)
119
+ new_input = max(0, input_tokens - cached_tokens)
120
+
121
+ input_cost = (new_input / 1_000_000) * pricing.input_per_million
122
+ cached_cost = (cached_tokens / 1_000_000) * pricing.input_per_million * pricing.cached_input_multiplier
123
+ output_cost = (output_tokens / 1_000_000) * pricing.output_per_million
124
+
125
+ return input_cost + cached_cost + output_cost
237
126
 
238
- def _build_provider_command(
239
- provider: str,
127
+ def run_agentic_task(
240
128
  instruction: str,
129
+ cwd: Path,
241
130
  *,
242
- use_interactive_mode: bool = False,
243
- ) -> List[str]:
131
+ verbose: bool = False,
132
+ quiet: bool = False,
133
+ label: str = "",
134
+ timeout: Optional[float] = None,
135
+ max_retries: int = 1,
136
+ retry_delay: float = DEFAULT_RETRY_DELAY
137
+ ) -> Tuple[bool, str, float, str]:
244
138
  """
245
- Build the CLI command line for the given provider.
246
-
247
- Provider commands:
248
-
249
- - Anthropic (Claude Code):
250
- Normal: ["claude", "-p", <instruction>, "--dangerously-skip-permissions", "--output-format", "json"]
251
- Interactive (more robust, uses subscription auth):
252
- ["claude", "--dangerously-skip-permissions", "--output-format", "json", <instruction>]
253
-
254
- - Google (Gemini CLI):
255
- Normal: ["gemini", "-p", <instruction>, "--yolo", "--output-format", "json"]
256
- Interactive: ["gemini", "--yolo", "--output-format", "json", <instruction>]
257
-
258
- - OpenAI (Codex CLI):
259
- ["codex", "exec", "--full-auto", "--json", <instruction>]
139
+ Runs an agentic task using available providers in preference order.
260
140
 
261
141
  Args:
262
- provider: The provider name ("anthropic", "google", "openai").
263
- instruction: The instruction to pass to the CLI.
264
- use_interactive_mode: If True, use interactive mode instead of -p flag.
265
- This is more robust for Anthropic as it uses
266
- subscription auth and allows full file access.
267
- """
268
- if provider == "anthropic":
269
- if use_interactive_mode:
270
- # Interactive mode: no -p flag, uses subscription auth
271
- # This allows full file access and is more robust
272
- return [
273
- "claude",
274
- "--dangerously-skip-permissions",
275
- "--output-format",
276
- "json",
277
- instruction,
278
- ]
279
- else:
280
- return [
281
- "claude",
282
- "-p",
283
- instruction,
284
- "--dangerously-skip-permissions",
285
- "--output-format",
286
- "json",
287
- ]
288
- if provider == "google":
289
- if use_interactive_mode:
290
- # Interactive mode for Gemini
291
- return [
292
- "gemini",
293
- "--yolo",
294
- "--output-format",
295
- "json",
296
- instruction,
297
- ]
298
- else:
299
- return [
300
- "gemini",
301
- "-p",
302
- instruction,
303
- "--yolo",
304
- "--output-format",
305
- "json",
306
- ]
307
- if provider == "openai":
308
- return [
309
- "codex",
310
- "exec",
311
- "--full-auto",
312
- "--json",
313
- instruction,
314
- ]
315
- raise ValueError(f"Unknown provider: {provider}")
316
-
317
-
318
- def _classify_gemini_model(model_name: str) -> str:
319
- """
320
- Classify a Gemini model name into a pricing family: 'flash', 'pro', or 'default'.
321
- """
322
- lower = model_name.lower()
323
- if "flash" in lower:
324
- return "flash"
325
- if "pro" in lower:
326
- return "pro"
327
- return "default"
328
-
329
-
330
- def _safe_int(value: Any) -> int:
331
- try:
332
- return int(value)
333
- except (TypeError, ValueError):
334
- return 0
335
-
336
-
337
- def _calculate_gemini_cost(stats: Mapping[str, Any]) -> float:
338
- """
339
- Compute total Gemini cost from stats.models[model]["tokens"] entries.
340
-
341
- Each model entry should have:
342
- tokens = { "prompt": int, "candidates": int, "cached": int, ... }
343
-
344
- Pricing is determined by the model family (flash/pro/default).
345
- Cached tokens are charged at a discounted rate.
346
- """
347
- models = stats.get("models") or {}
348
- if not isinstance(models, Mapping):
349
- return 0.0
350
-
351
- total_cost = 0.0
352
- for model_name, model_data in models.items():
353
- if not isinstance(model_data, Mapping):
354
- continue
355
- tokens = model_data.get("tokens") or {}
356
- if not isinstance(tokens, Mapping):
357
- continue
358
-
359
- prompt_tokens = _safe_int(tokens.get("prompt"))
360
- output_tokens = _safe_int(tokens.get("candidates"))
361
- cached_tokens = _safe_int(tokens.get("cached"))
362
-
363
- family = _classify_gemini_model(str(model_name))
364
- pricing = GEMINI_PRICING_BY_FAMILY.get(family, GEMINI_PRICING_BY_FAMILY["default"])
365
-
366
- # Assume prompt_tokens includes cached_tokens; charge non-cached at full price,
367
- # cached at a discounted rate.
368
- new_prompt_tokens = max(prompt_tokens - cached_tokens, 0)
369
- effective_cached_tokens = min(cached_tokens, prompt_tokens)
370
-
371
- cost_input_new = new_prompt_tokens * pricing.input_per_million / 1_000_000
372
- cost_input_cached = (
373
- effective_cached_tokens
374
- * pricing.input_per_million
375
- * pricing.cached_input_multiplier
376
- / 1_000_000
377
- )
378
- cost_output = output_tokens * pricing.output_per_million / 1_000_000
379
-
380
- total_cost += cost_input_new + cost_input_cached + cost_output
381
-
382
- return total_cost
383
-
142
+ instruction: The task instruction
143
+ cwd: Working directory
144
+ verbose: Show detailed output
145
+ quiet: Suppress all non-error output
146
+ label: Task label for logging
147
+ timeout: Optional timeout override
148
+ max_retries: Number of attempts per provider before fallback (default: 1 = no retries)
149
+ retry_delay: Base delay in seconds for exponential backoff (default: DEFAULT_RETRY_DELAY)
384
150
 
385
- def _calculate_codex_cost(usage: Mapping[str, Any]) -> float:
151
+ Returns:
152
+ (success, output_text, cost_usd, provider_used)
386
153
  """
387
- Compute Codex/OpenAI cost from a `usage` dict with:
154
+ agents = get_available_agents()
388
155
 
389
- - input_tokens
390
- - output_tokens
391
- - cached_input_tokens
156
+ # Filter agents based on preference order
157
+ candidates = [p for p in AGENT_PROVIDER_PREFERENCE if p in agents]
392
158
 
393
- Cached tokens are charged at a 75% discount (i.e. 25% of full price).
394
- """
395
- input_tokens = _safe_int(usage.get("input_tokens"))
396
- output_tokens = _safe_int(usage.get("output_tokens"))
397
- cached_input_tokens = _safe_int(usage.get("cached_input_tokens"))
159
+ if not candidates:
160
+ msg = "No agent providers are available (check CLI installation and API keys)"
161
+ if not quiet:
162
+ console.print(f"[bold red]{msg}[/bold red]")
163
+ return False, msg, 0.0, ""
398
164
 
399
- new_input_tokens = max(input_tokens - cached_input_tokens, 0)
400
- effective_cached_tokens = min(cached_input_tokens, input_tokens)
165
+ effective_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT_SECONDS
401
166
 
402
- pricing = CODEX_PRICING
167
+ # Create a unique temp file for the prompt
168
+ prompt_filename = f".agentic_prompt_{uuid.uuid4().hex[:8]}.txt"
169
+ prompt_path = cwd / prompt_filename
403
170
 
404
- cost_input_new = new_input_tokens * pricing.input_per_million / 1_000_000
405
- cost_input_cached = (
406
- effective_cached_tokens
407
- * pricing.input_per_million
408
- * pricing.cached_input_multiplier
409
- / 1_000_000
171
+ full_instruction = (
172
+ f"{instruction}\n\n"
173
+ f"Read the file {prompt_filename} for instructions. "
174
+ "You have full file access to explore and modify files as needed."
410
175
  )
411
- cost_output = output_tokens * pricing.output_per_million / 1_000_000
412
-
413
- return cost_input_new + cost_input_cached + cost_output
414
-
415
-
416
- def _parse_anthropic_result(data: Mapping[str, Any]) -> Tuple[bool, str, float]:
417
- """
418
- Parse Claude Code (Anthropic) JSON result.
419
-
420
- Expected:
421
- - data["response"]: main content
422
- - data["error"]: optional error block
423
- - data["total_cost_usd"]: total cost in USD (if available)
424
- """
425
- error_info = data.get("error")
426
- has_error = bool(error_info)
427
-
428
- if isinstance(error_info, Mapping):
429
- error_msg = str(error_info.get("message") or error_info)
430
- elif error_info is not None:
431
- error_msg = str(error_info)
432
- else:
433
- error_msg = ""
434
-
435
- response_text = str(data.get("response") or "")
436
- if not response_text and error_msg:
437
- response_text = error_msg
438
176
 
439
- cost_raw = data.get("total_cost_usd")
440
177
  try:
441
- cost = float(cost_raw)
442
- except (TypeError, ValueError):
443
- cost = 0.0
444
-
445
- return (not has_error, response_text, cost)
446
-
447
-
448
- def _parse_gemini_result(data: Mapping[str, Any]) -> Tuple[bool, str, float]:
449
- """
450
- Parse Gemini CLI JSON result.
451
-
452
- Expected high-level structure:
453
- {
454
- "response": "string",
455
- "stats": { ... per-model token usage ... },
456
- "error": { ... } # optional
457
- }
458
- """
459
- error_info = data.get("error")
460
- has_error = bool(error_info)
461
-
462
- if isinstance(error_info, Mapping):
463
- error_msg = str(error_info.get("message") or error_info)
464
- elif error_info is not None:
465
- error_msg = str(error_info)
466
- else:
467
- error_msg = ""
468
-
469
- response_text = str(data.get("response") or "")
470
- if not response_text and error_msg:
471
- response_text = error_msg
178
+ # Write prompt to file
179
+ with open(prompt_path, "w", encoding="utf-8") as f:
180
+ f.write(full_instruction)
472
181
 
473
- stats = data.get("stats") or {}
474
- cost = 0.0
475
- if isinstance(stats, Mapping):
476
- try:
477
- cost = _calculate_gemini_cost(stats)
478
- except Exception:
479
- cost = 0.0
182
+ for provider in candidates:
183
+ if verbose:
184
+ console.print(f"[dim]Attempting provider: {provider} for task '{label}'[/dim]")
480
185
 
481
- return (not has_error, response_text, cost)
186
+ last_output = ""
187
+ for attempt in range(1, max_retries + 1):
188
+ if verbose and attempt > 1:
189
+ console.print(f"[dim]Retry {attempt}/{max_retries} for {provider} (task: {label})[/dim]")
482
190
 
191
+ success, output, cost = _run_with_provider(
192
+ provider, prompt_path, cwd, effective_timeout, verbose, quiet
193
+ )
194
+ last_output = output
483
195
 
484
- def _extract_codex_usage(stdout: str) -> Optional[Mapping[str, Any]]:
485
- """
486
- Extract the latest `usage` object from Codex JSONL output.
196
+ # False Positive Detection
197
+ if success:
198
+ is_false_positive = (cost == 0.0 and len(output.strip()) < MIN_VALID_OUTPUT_LENGTH)
487
199
 
488
- The `codex exec --json` command emits newline-delimited JSON events.
489
- We scan all lines and keep the most recent event containing a `usage` key.
490
- """
491
- last_usage: Optional[Mapping[str, Any]] = None
492
- for line in stdout.splitlines():
493
- line = line.strip()
494
- if not line:
495
- continue
496
- try:
497
- event = json.loads(line)
498
- except json.JSONDecodeError:
499
- continue
500
- usage = event.get("usage")
501
- if isinstance(usage, Mapping):
502
- last_usage = usage
503
- return last_usage
200
+ if is_false_positive:
201
+ if not quiet:
202
+ console.print(f"[yellow]Provider '{provider}' returned false positive (attempt {attempt})[/yellow]")
203
+ # Treat as failure, retry
204
+ else:
205
+ # Check for suspicious files (C, E, T)
206
+ suspicious = []
207
+ for name in ["C", "E", "T"]:
208
+ if (cwd / name).exists():
209
+ suspicious.append(name)
504
210
 
211
+ if suspicious:
212
+ console.print(f"[bold red]SUSPICIOUS FILES DETECTED: {', '.join(['- ' + s for s in suspicious])}[/bold red]")
505
213
 
506
- def _extract_codex_output(stdout: str) -> str:
507
- """
508
- Extract assistant-visible output text from Codex JSONL output.
214
+ # Real success
215
+ return True, output, cost, provider
509
216
 
510
- Heuristic:
511
- - Collect content from events with type == "message" and role == "assistant"
512
- - Fallback to raw stdout if nothing is found
513
- """
514
- assistant_messages: List[str] = []
515
- for line in stdout.splitlines():
516
- line = line.strip()
517
- if not line:
518
- continue
519
- try:
520
- event = json.loads(line)
521
- except json.JSONDecodeError:
522
- continue
523
-
524
- if event.get("type") == "message" and event.get("role") == "assistant":
525
- content = event.get("content")
526
- if isinstance(content, str):
527
- assistant_messages.append(content)
528
- elif isinstance(content, list):
529
- # Sometimes content may be a list of segments; concatenate any text fields.
530
- parts: List[str] = []
531
- for part in content:
532
- if isinstance(part, Mapping) and "text" in part:
533
- parts.append(str(part["text"]))
534
- else:
535
- parts.append(str(part))
536
- assistant_messages.append("".join(parts))
217
+ # Failed - retry with backoff if attempts remain
218
+ if attempt < max_retries:
219
+ backoff = retry_delay * attempt
220
+ if verbose:
221
+ console.print(f"[dim]Waiting {backoff}s before retry...[/dim]")
222
+ time.sleep(backoff)
537
223
 
538
- if assistant_messages:
539
- return "\n".join(assistant_messages)
224
+ # All retries exhausted for this provider
225
+ if verbose:
226
+ console.print(f"[yellow]Provider {provider} failed after {max_retries} attempts: {last_output}[/yellow]")
540
227
 
541
- return stdout.strip()
228
+ return False, "All agent providers failed", 0.0, ""
542
229
 
230
+ finally:
231
+ # Cleanup prompt file
232
+ if prompt_path.exists():
233
+ try:
234
+ os.remove(prompt_path)
235
+ except OSError:
236
+ pass
543
237
 
544
238
  def _run_with_provider(
545
- provider: str,
546
- agentic_instruction: str,
547
- cwd: Path,
548
- *,
549
- verbose: bool,
550
- quiet: bool,
551
- label: str = "",
239
+ provider: str,
240
+ prompt_path: Path,
241
+ cwd: Path,
242
+ timeout: float = DEFAULT_TIMEOUT_SECONDS,
243
+ verbose: bool = False,
244
+ quiet: bool = False
552
245
  ) -> Tuple[bool, str, float]:
553
246
  """
554
- Invoke the given provider's CLI in headless JSON mode.
555
-
556
- For Anthropic (Claude), uses subscription auth (removes API key from env)
557
- and interactive mode (no -p flag) for more robust authentication that
558
- doesn't require API credits.
559
-
560
- Returns:
561
- (success, message, cost)
562
-
563
- - success: True if the CLI completed successfully without reported errors
564
- - message: natural-language output on success, or error description on failure
565
- - cost: estimated USD cost for this attempt
247
+ Internal helper to run a specific provider's CLI.
248
+ Returns (success, output_or_error, cost).
566
249
  """
567
- # Use interactive mode and CLI auth for Anthropic (more robust, uses subscription)
568
- use_interactive = provider == "anthropic"
569
- use_cli_auth = provider == "anthropic"
570
-
571
- cmd = _build_provider_command(
572
- provider,
573
- agentic_instruction,
574
- use_interactive_mode=use_interactive,
575
- )
576
- timeout = _get_agent_timeout()
577
- env = _build_subprocess_env(use_cli_auth=use_cli_auth)
578
-
579
- log_debug(
580
- f"Invoking provider '{provider}' with timeout {timeout:.1f}s",
581
- verbose=verbose,
582
- quiet=quiet,
583
- label=label,
584
- )
585
- log_debug(
586
- f"Command: {' '.join(cmd)}",
587
- verbose=verbose,
588
- quiet=quiet,
589
- label=label,
590
- )
250
+
251
+ # Prepare Environment
252
+ env = os.environ.copy()
253
+ env["TERM"] = "dumb"
254
+ env["NO_COLOR"] = "1"
255
+ env["CI"] = "1"
256
+
257
+ cmd: List[str] = []
258
+
259
+ # Construct Command
260
+ if provider == "anthropic":
261
+ # Remove API key to force subscription auth if configured that way
262
+ env.pop("ANTHROPIC_API_KEY", None)
263
+ # Note: Tests expect NO -p flag for Anthropic, and prompt path as last arg
264
+ cmd = [
265
+ "claude",
266
+ "--dangerously-skip-permissions",
267
+ "--output-format", "json",
268
+ str(prompt_path)
269
+ ]
270
+ elif provider == "google":
271
+ cmd = [
272
+ "gemini",
273
+ "-p", str(prompt_path),
274
+ "--yolo",
275
+ "--output-format", "json"
276
+ ]
277
+ elif provider == "openai":
278
+ cmd = [
279
+ "codex",
280
+ "exec",
281
+ "--full-auto",
282
+ "--json",
283
+ str(prompt_path)
284
+ ]
285
+ else:
286
+ return False, f"Unknown provider {provider}", 0.0
591
287
 
592
288
  try:
593
- completed = subprocess.run(
289
+ result = subprocess.run(
594
290
  cmd,
595
- cwd=str(cwd),
291
+ cwd=cwd,
596
292
  env=env,
597
293
  capture_output=True,
598
294
  text=True,
599
- timeout=timeout,
600
- check=False,
295
+ timeout=timeout
601
296
  )
602
- except FileNotFoundError:
603
- message = f"CLI command for provider '{provider}' was not found."
604
- log_error(message, verbose=verbose, quiet=quiet, label=label)
605
- return False, message, 0.0
606
297
  except subprocess.TimeoutExpired:
607
- message = f"Provider '{provider}' CLI timed out after {timeout:.1f} seconds."
608
- log_error(message, verbose=verbose, quiet=quiet, label=label)
609
- return False, message, 0.0
610
- except Exception as exc:
611
- message = f"Error invoking provider '{provider}': {exc}"
612
- log_error(message, verbose=verbose, quiet=quiet, label=label)
613
- return False, message, 0.0
614
-
615
- stdout = completed.stdout or ""
616
- stderr = completed.stderr or ""
617
- if verbose and stdout:
618
- log_debug(f"{provider} stdout:\n{stdout}", verbose=verbose, quiet=quiet, label=label)
619
- if verbose and stderr:
620
- log_debug(f"{provider} stderr:\n{stderr}", verbose=verbose, quiet=quiet, label=label)
621
-
622
- # Default assumptions
623
- success = completed.returncode == 0
624
- cost = 0.0
625
- message: str
298
+ return False, "Timeout expired", 0.0
299
+ except Exception as e:
300
+ return False, str(e), 0.0
626
301
 
627
- # Provider-specific JSON parsing and cost extraction
628
- if provider in ("anthropic", "google"):
629
- raw_json = stdout.strip() or stderr.strip()
630
- if not raw_json:
631
- message = f"Provider '{provider}' produced no JSON output."
632
- log_error(message, verbose=verbose, quiet=quiet, label=label)
633
- return False, message, 0.0
634
-
635
- try:
636
- data = json.loads(raw_json)
637
- except json.JSONDecodeError as exc:
638
- # Include raw output in the error message to aid debugging
639
- # (e.g. if the provider printed a plain text error instead of JSON)
640
- message = f"Failed to parse JSON from provider '{provider}': {exc}\nOutput: {raw_json}"
641
- log_error(message, verbose=verbose, quiet=quiet, label=label)
642
- return False, message, 0.0
643
-
644
- if not isinstance(data, Mapping):
645
- message = f"Unexpected JSON structure from provider '{provider}'."
646
- log_error(message, verbose=verbose, quiet=quiet, label=label)
647
- return False, message, 0.0
648
-
649
- if provider == "anthropic":
650
- parsed_success, response_text, cost = _parse_anthropic_result(data)
651
- else: # google / Gemini
652
- parsed_success, response_text, cost = _parse_gemini_result(data)
653
-
654
- # Combine CLI exit code with JSON-level success flag
655
- if not success or not parsed_success:
656
- success = False
657
- message = response_text or stderr.strip() or stdout.strip() or "No response from provider."
658
-
659
- if not success and completed.returncode != 0 and stderr:
660
- message = f"{message}\n\nCLI stderr:\n{stderr.strip()}"
661
- return success, message, cost
662
-
663
- # OpenAI / Codex: JSONL stream on stdout
664
- if provider == "openai":
665
- usage = _extract_codex_usage(stdout)
666
- if usage is not None:
667
- try:
668
- cost = _calculate_codex_cost(usage)
669
- except Exception:
670
- cost = 0.0
671
-
672
- message = _extract_codex_output(stdout)
673
- if not success:
674
- if stderr.strip():
675
- message = (
676
- f"{message}\n\nCLI stderr:\n{stderr.strip()}"
677
- if message
678
- else f"Codex CLI failed with exit code {completed.returncode}.\n\nstderr:\n{stderr.strip()}"
679
- )
680
- elif not message:
681
- message = f"Codex CLI failed with exit code {completed.returncode}."
682
-
683
- return success, message or "No response from provider.", cost
684
-
685
- # Should not reach here because _build_provider_command validates provider
686
- message = f"Unsupported provider '{provider}'."
687
- log_error(message, verbose=verbose, quiet=quiet, label=label)
688
- return False, message, 0.0
689
-
690
-
691
- # ---------------------------------------------------------------------------
692
- # Public API
693
- # ---------------------------------------------------------------------------
302
+ if result.returncode != 0:
303
+ return False, f"Exit code {result.returncode}: {result.stderr}", 0.0
694
304
 
305
+ # Parse JSON Output
306
+ try:
307
+ # Handle JSONL output (Codex sometimes streams)
308
+ output_str = result.stdout.strip()
309
+ data = {}
310
+
311
+ if provider == "openai" and "\n" in output_str:
312
+ # Parse JSONL, look for result type
313
+ lines = output_str.splitlines()
314
+ for line in lines:
315
+ try:
316
+ item = json.loads(line)
317
+ if item.get("type") == "result":
318
+ data = item
319
+ break
320
+ except json.JSONDecodeError:
321
+ continue
322
+ # If no result block found, try parsing last line
323
+ if not data and lines:
324
+ try:
325
+ data = json.loads(lines[-1])
326
+ except:
327
+ pass
328
+ else:
329
+ data = json.loads(output_str)
330
+
331
+ return _parse_provider_json(provider, data)
332
+ except json.JSONDecodeError:
333
+ # Fallback if CLI didn't output valid JSON (sometimes happens on crash)
334
+ return False, f"Invalid JSON output: {result.stdout[:200]}...", 0.0
695
335
 
696
- def get_available_agents() -> List[str]:
336
+ def _parse_provider_json(provider: str, data: Dict[str, Any]) -> Tuple[bool, str, float]:
697
337
  """
698
- Return a list of available agent providers, e.g. ["anthropic", "google"].
699
-
700
- A provider is considered available if:
701
- - Its CLI binary exists on PATH (checked via shutil.which)
702
- - Its API key appears configured (using llm_invoke's model data plus
703
- well-known environment variables)
338
+ Extracts (success, text_response, cost_usd) from provider JSON.
704
339
  """
705
- model_data = _safe_load_model_data()
706
- available: List[str] = []
707
-
708
- for provider in AGENT_PROVIDER_PREFERENCE:
709
- cli = CLI_COMMANDS.get(provider)
710
- if not cli:
711
- continue
712
- if shutil.which(cli) is None:
713
- continue
714
- if not _provider_has_api_key(provider, model_data):
715
- continue
716
- available.append(provider)
340
+ cost = 0.0
341
+ output_text = ""
717
342
 
718
- return available
343
+ try:
344
+ if provider == "anthropic":
345
+ # Anthropic usually provides direct cost
346
+ cost = float(data.get("total_cost_usd", 0.0))
347
+ # Result might be in 'result' or 'response'
348
+ output_text = data.get("result") or data.get("response") or ""
349
+
350
+ elif provider == "google":
351
+ stats = data.get("stats", {})
352
+ cost = _calculate_gemini_cost(stats)
353
+ output_text = data.get("result") or data.get("response") or data.get("output") or ""
719
354
 
355
+ elif provider == "openai":
356
+ usage = data.get("usage", {})
357
+ cost = _calculate_codex_cost(usage)
358
+ output_text = data.get("result") or data.get("output") or ""
720
359
 
721
- def run_agentic_task(
722
- instruction: str,
723
- cwd: Path,
724
- *,
725
- verbose: bool = False,
726
- quiet: bool = False,
727
- label: str = "",
728
- ) -> Tuple[bool, str, float, str]:
729
- """
730
- Run an agentic task using the first available provider in preference order.
360
+ return True, str(output_text), cost
731
361
 
732
- The task is executed in headless mode with JSON output for structured
733
- parsing and real cost tracking.
362
+ except Exception as e:
363
+ return False, f"Error parsing {provider} JSON: {e}", 0.0
734
364
 
735
- Process:
736
- 1. Write `instruction` into a unique temp file named
737
- `.agentic_prompt_<random>.txt` under `cwd`.
738
- 2. Build agentic meta-instruction:
739
365
 
740
- "Read the file {prompt_file} for instructions. You have full file
741
- access to explore and modify files as needed."
366
+ # --- GitHub State Persistence ---
742
367
 
743
- 3. Try providers in `AGENT_PROVIDER_PREFERENCE` order, but only those
744
- returned by `get_available_agents()`.
745
- 4. For each provider:
746
- - Invoke its CLI in headless JSON mode with file-write permissions.
747
- - Parse JSON to extract response text and cost.
748
- - On success, stop and return.
749
- - On failure, proceed to next provider.
750
- 5. Clean up the temp prompt file.
368
+ def _build_state_marker(workflow_type: str, issue_number: int) -> str:
369
+ return f"{GITHUB_STATE_MARKER_START}{workflow_type}:issue-{issue_number}"
751
370
 
752
- Args:
753
- instruction: Natural-language instruction describing the task.
754
- cwd: Project root where the agent should operate.
755
- verbose: Enable verbose logging (debug output).
756
- quiet: Suppress non-error logging.
757
- label: Optional label prefix for log messages (e.g. "agentic-fix").
371
+ def _serialize_state_comment(workflow_type: str, issue_number: int, state: Dict) -> str:
372
+ marker = _build_state_marker(workflow_type, issue_number)
373
+ json_str = json.dumps(state, indent=2)
374
+ return f"{marker}\n{json_str}\n{GITHUB_STATE_MARKER_END}"
758
375
 
759
- Returns:
760
- Tuple[bool, str, float, str]:
761
- - success: Whether the task completed successfully.
762
- - output: On success, the agent's main response text.
763
- On failure, a human-readable error message.
764
- - cost: Total estimated USD cost across all provider attempts.
765
- - provider_used: Name of the successful provider
766
- ("anthropic", "google", or "openai"),
767
- or "" if no provider succeeded.
768
- """
769
- if not instruction or not instruction.strip():
770
- message = "Agentic instruction must be a non-empty string."
771
- log_error(message, verbose=verbose, quiet=quiet, label=label)
772
- return False, message, 0.0, ""
773
-
774
- if not cwd.exists() or not cwd.is_dir():
775
- message = f"Working directory does not exist or is not a directory: {cwd}"
776
- log_error(message, verbose=verbose, quiet=quiet, label=label)
777
- return False, message, 0.0, ""
778
-
779
- available = get_available_agents()
780
- if not available:
781
- message = "No agent providers are available. Ensure CLI tools and API keys are configured."
782
- log_error(message, verbose=verbose, quiet=quiet, label=label)
783
- return False, message, 0.0, ""
784
-
785
- log_info(
786
- f"Available providers (in preference order): {', '.join(available)}",
787
- verbose=verbose,
788
- quiet=quiet,
789
- label=label,
790
- )
376
+ def _parse_state_from_comment(body: str, workflow_type: str, issue_number: int) -> Optional[Dict]:
377
+ marker = _build_state_marker(workflow_type, issue_number)
378
+ if marker not in body:
379
+ return None
380
+
381
+ try:
382
+ # Extract content between marker and end marker
383
+ start_idx = body.find(marker) + len(marker)
384
+ end_idx = body.find(GITHUB_STATE_MARKER_END, start_idx)
385
+
386
+ if end_idx == -1:
387
+ return None
388
+
389
+ json_str = body[start_idx:end_idx].strip()
390
+ return json.loads(json_str)
391
+ except (json.JSONDecodeError, ValueError):
392
+ return None
791
393
 
792
- # 1. Write user instruction into a unique prompt file under cwd
793
- prompt_token = secrets.token_hex(8)
794
- prompt_file = cwd / f".agentic_prompt_{prompt_token}.txt"
394
+ def _find_state_comment(
395
+ repo_owner: str,
396
+ repo_name: str,
397
+ issue_number: int,
398
+ workflow_type: str,
399
+ cwd: Path
400
+ ) -> Optional[Tuple[int, Dict]]:
401
+ """
402
+ Returns (comment_id, state_dict) if found, else None.
403
+ """
404
+ if not shutil.which("gh"):
405
+ return None
795
406
 
796
407
  try:
797
- prompt_file.write_text(instruction, encoding="utf-8")
798
- except OSError as exc:
799
- message = f"Failed to write prompt file '{prompt_file}': {exc}"
800
- log_error(message, verbose=verbose, quiet=quiet, label=label)
801
- return False, message, 0.0, ""
802
-
803
- agentic_instruction = (
804
- f"Read the file {prompt_file} for instructions. "
805
- "You have full file access to explore and modify files as needed."
806
- )
408
+ # List comments
409
+ cmd = [
410
+ "gh", "api",
411
+ f"repos/{repo_owner}/{repo_name}/issues/{issue_number}/comments",
412
+ "--method", "GET"
413
+ ]
414
+ result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
415
+ if result.returncode != 0:
416
+ return None
417
+
418
+ comments = json.loads(result.stdout)
419
+ marker = _build_state_marker(workflow_type, issue_number)
420
+
421
+ for comment in comments:
422
+ body = comment.get("body", "")
423
+ if marker in body:
424
+ state = _parse_state_from_comment(body, workflow_type, issue_number)
425
+ if state:
426
+ return comment["id"], state
427
+
428
+ return None
429
+ except Exception:
430
+ return None
807
431
 
808
- total_cost = 0.0
809
- provider_errors: List[str] = []
432
+ def github_save_state(
433
+ repo_owner: str,
434
+ repo_name: str,
435
+ issue_number: int,
436
+ workflow_type: str,
437
+ state: Dict,
438
+ cwd: Path,
439
+ comment_id: Optional[int] = None
440
+ ) -> Optional[int]:
441
+ """
442
+ Creates or updates a GitHub comment with the state. Returns new/existing comment_id.
443
+ """
444
+ if not shutil.which("gh"):
445
+ return None
810
446
 
447
+ body = _serialize_state_comment(workflow_type, issue_number, state)
448
+
811
449
  try:
812
- for provider in AGENT_PROVIDER_PREFERENCE:
813
- if provider not in available:
814
- continue
815
-
816
- log_info(
817
- f"Trying provider '{provider}'...",
818
- verbose=verbose,
819
- quiet=quiet,
820
- label=label,
821
- )
822
-
823
- success, message, cost = _run_with_provider(
824
- provider,
825
- agentic_instruction,
826
- cwd,
827
- verbose=verbose,
828
- quiet=quiet,
829
- label=label,
830
- )
831
- total_cost += cost
832
-
833
- if success:
834
- log_info(
835
- f"Provider '{provider}' completed successfully. "
836
- f"Estimated cost: ${cost:.6f}",
837
- verbose=verbose,
838
- quiet=quiet,
839
- label=label,
840
- )
841
- return True, message, total_cost, provider
842
-
843
- provider_errors.append(f"{provider}: {message}")
844
- log_error(
845
- f"Provider '{provider}' failed: {message}",
846
- verbose=verbose,
847
- quiet=quiet,
848
- label=label,
849
- )
450
+ if comment_id:
451
+ # PATCH existing
452
+ cmd = [
453
+ "gh", "api",
454
+ f"repos/{repo_owner}/{repo_name}/issues/comments/{comment_id}",
455
+ "-X", "PATCH",
456
+ "-f", f"body={body}"
457
+ ]
458
+ res = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
459
+ if res.returncode == 0:
460
+ return comment_id
461
+ else:
462
+ # POST new
463
+ cmd = [
464
+ "gh", "api",
465
+ f"repos/{repo_owner}/{repo_name}/issues/{issue_number}/comments",
466
+ "-X", "POST",
467
+ "-f", f"body={body}"
468
+ ]
469
+ res = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
470
+ if res.returncode == 0:
471
+ data = json.loads(res.stdout)
472
+ return data.get("id")
473
+
474
+ return None
475
+ except Exception:
476
+ return None
850
477
 
851
- # If we reach here, all providers failed
852
- combined_error = "All agent providers failed. " + " | ".join(provider_errors)
853
- log_error(combined_error, verbose=verbose, quiet=quiet, label=label)
854
- return False, combined_error, total_cost, ""
478
+ def github_load_state(
479
+ repo_owner: str,
480
+ repo_name: str,
481
+ issue_number: int,
482
+ workflow_type: str,
483
+ cwd: Path
484
+ ) -> Tuple[Optional[Dict], Optional[int]]:
485
+ """
486
+ Wrapper to find state. Returns (state, comment_id).
487
+ """
488
+ result = _find_state_comment(repo_owner, repo_name, issue_number, workflow_type, cwd)
489
+ if result:
490
+ return result[1], result[0]
491
+ return None, None
492
+
493
+ def github_clear_state(
494
+ repo_owner: str,
495
+ repo_name: str,
496
+ issue_number: int,
497
+ workflow_type: str,
498
+ cwd: Path
499
+ ) -> bool:
500
+ """
501
+ Deletes the state comment if it exists.
502
+ """
503
+ result = _find_state_comment(repo_owner, repo_name, issue_number, workflow_type, cwd)
504
+ if not result:
505
+ return True # Already clear
506
+
507
+ comment_id = result[0]
508
+ try:
509
+ cmd = [
510
+ "gh", "api",
511
+ f"repos/{repo_owner}/{repo_name}/issues/comments/{comment_id}",
512
+ "-X", "DELETE"
513
+ ]
514
+ subprocess.run(cmd, cwd=cwd, capture_output=True)
515
+ return True
516
+ except Exception:
517
+ return False
518
+
519
+ def _should_use_github_state(use_github_state: bool) -> bool:
520
+ if not use_github_state:
521
+ return False
522
+ if os.environ.get("PDD_NO_GITHUB_STATE") == "1":
523
+ return False
524
+ return True
525
+
526
+ # --- High Level State Wrappers ---
527
+
528
+ def load_workflow_state(
529
+ cwd: Path,
530
+ issue_number: int,
531
+ workflow_type: str,
532
+ state_dir: Path,
533
+ repo_owner: str,
534
+ repo_name: str,
535
+ use_github_state: bool = True
536
+ ) -> Tuple[Optional[Dict], Optional[int]]:
537
+ """
538
+ Loads state from GitHub (priority) or local file.
539
+ Returns (state_dict, github_comment_id).
540
+ """
541
+ local_file = state_dir / f"{workflow_type}_state_{issue_number}.json"
542
+
543
+ # Try GitHub first
544
+ if _should_use_github_state(use_github_state):
545
+ gh_state, gh_id = github_load_state(repo_owner, repo_name, issue_number, workflow_type, cwd)
546
+ if gh_state:
547
+ # Cache locally
548
+ try:
549
+ state_dir.mkdir(parents=True, exist_ok=True)
550
+ with open(local_file, "w") as f:
551
+ json.dump(gh_state, f, indent=2)
552
+ except Exception:
553
+ pass # Ignore local cache errors
554
+ return gh_state, gh_id
855
555
 
856
- finally:
857
- # 5. Clean up prompt file
556
+ # Fallback to local
557
+ if local_file.exists():
858
558
  try:
859
- if prompt_file.exists():
860
- prompt_file.unlink()
861
- except OSError:
862
- # Best-effort cleanup; ignore errors.
863
- pass
559
+ with open(local_file, "r") as f:
560
+ return json.load(f), None
561
+ except Exception:
562
+ pass
563
+
564
+ return None, None
565
+
566
+ def save_workflow_state(
567
+ cwd: Path,
568
+ issue_number: int,
569
+ workflow_type: str,
570
+ state: Dict,
571
+ state_dir: Path,
572
+ repo_owner: str,
573
+ repo_name: str,
574
+ use_github_state: bool = True,
575
+ github_comment_id: Optional[int] = None
576
+ ) -> Optional[int]:
577
+ """
578
+ Saves state to local file and GitHub.
579
+ Returns updated github_comment_id.
580
+ """
581
+ local_file = state_dir / f"{workflow_type}_state_{issue_number}.json"
582
+
583
+ # 1. Save Local
584
+ try:
585
+ state_dir.mkdir(parents=True, exist_ok=True)
586
+ with open(local_file, "w") as f:
587
+ json.dump(state, f, indent=2)
588
+ except Exception as e:
589
+ console.print(f"[yellow]Warning: Failed to save local state: {e}[/yellow]")
590
+
591
+ # 2. Save GitHub
592
+ if _should_use_github_state(use_github_state):
593
+ new_id = github_save_state(
594
+ repo_owner, repo_name, issue_number, workflow_type, state, cwd, github_comment_id
595
+ )
596
+ if new_id:
597
+ return new_id
598
+ else:
599
+ console.print("[dim]Warning: Failed to sync state to GitHub[/dim]")
600
+
601
+ return github_comment_id
602
+
603
+ def clear_workflow_state(
604
+ cwd: Path,
605
+ issue_number: int,
606
+ workflow_type: str,
607
+ state_dir: Path,
608
+ repo_owner: str,
609
+ repo_name: str,
610
+ use_github_state: bool = True
611
+ ) -> None:
612
+ """
613
+ Clears local and GitHub state.
614
+ """
615
+ local_file = state_dir / f"{workflow_type}_state_{issue_number}.json"
616
+
617
+ # Clear Local
618
+ if local_file.exists():
619
+ try:
620
+ os.remove(local_file)
621
+ except Exception:
622
+ pass
623
+
624
+ # Clear GitHub
625
+ if _should_use_github_state(use_github_state):
626
+ github_clear_state(repo_owner, repo_name, issue_number, workflow_type, cwd)