pdd-cli 0.0.45__py3-none-any.whl → 0.0.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. pdd/__init__.py +40 -8
  2. pdd/agentic_bug.py +323 -0
  3. pdd/agentic_bug_orchestrator.py +497 -0
  4. pdd/agentic_change.py +231 -0
  5. pdd/agentic_change_orchestrator.py +526 -0
  6. pdd/agentic_common.py +598 -0
  7. pdd/agentic_crash.py +534 -0
  8. pdd/agentic_e2e_fix.py +319 -0
  9. pdd/agentic_e2e_fix_orchestrator.py +426 -0
  10. pdd/agentic_fix.py +1294 -0
  11. pdd/agentic_langtest.py +162 -0
  12. pdd/agentic_update.py +387 -0
  13. pdd/agentic_verify.py +183 -0
  14. pdd/architecture_sync.py +565 -0
  15. pdd/auth_service.py +210 -0
  16. pdd/auto_deps_main.py +71 -51
  17. pdd/auto_include.py +245 -5
  18. pdd/auto_update.py +125 -47
  19. pdd/bug_main.py +196 -23
  20. pdd/bug_to_unit_test.py +2 -0
  21. pdd/change_main.py +11 -4
  22. pdd/cli.py +22 -1181
  23. pdd/cmd_test_main.py +350 -150
  24. pdd/code_generator.py +60 -18
  25. pdd/code_generator_main.py +790 -57
  26. pdd/commands/__init__.py +48 -0
  27. pdd/commands/analysis.py +306 -0
  28. pdd/commands/auth.py +309 -0
  29. pdd/commands/connect.py +290 -0
  30. pdd/commands/fix.py +163 -0
  31. pdd/commands/generate.py +257 -0
  32. pdd/commands/maintenance.py +175 -0
  33. pdd/commands/misc.py +87 -0
  34. pdd/commands/modify.py +256 -0
  35. pdd/commands/report.py +144 -0
  36. pdd/commands/sessions.py +284 -0
  37. pdd/commands/templates.py +215 -0
  38. pdd/commands/utility.py +110 -0
  39. pdd/config_resolution.py +58 -0
  40. pdd/conflicts_main.py +8 -3
  41. pdd/construct_paths.py +589 -111
  42. pdd/context_generator.py +10 -2
  43. pdd/context_generator_main.py +175 -76
  44. pdd/continue_generation.py +53 -10
  45. pdd/core/__init__.py +33 -0
  46. pdd/core/cli.py +527 -0
  47. pdd/core/cloud.py +237 -0
  48. pdd/core/dump.py +554 -0
  49. pdd/core/errors.py +67 -0
  50. pdd/core/remote_session.py +61 -0
  51. pdd/core/utils.py +90 -0
  52. pdd/crash_main.py +262 -33
  53. pdd/data/language_format.csv +71 -63
  54. pdd/data/llm_model.csv +20 -18
  55. pdd/detect_change_main.py +5 -4
  56. pdd/docs/prompting_guide.md +864 -0
  57. pdd/docs/whitepaper_with_benchmarks/data_and_functions/benchmark_analysis.py +495 -0
  58. pdd/docs/whitepaper_with_benchmarks/data_and_functions/creation_compare.py +528 -0
  59. pdd/fix_code_loop.py +523 -95
  60. pdd/fix_code_module_errors.py +6 -2
  61. pdd/fix_error_loop.py +491 -92
  62. pdd/fix_errors_from_unit_tests.py +4 -3
  63. pdd/fix_main.py +278 -21
  64. pdd/fix_verification_errors.py +12 -100
  65. pdd/fix_verification_errors_loop.py +529 -286
  66. pdd/fix_verification_main.py +294 -89
  67. pdd/frontend/dist/assets/index-B5DZHykP.css +1 -0
  68. pdd/frontend/dist/assets/index-DQ3wkeQ2.js +449 -0
  69. pdd/frontend/dist/index.html +376 -0
  70. pdd/frontend/dist/logo.svg +33 -0
  71. pdd/generate_output_paths.py +139 -15
  72. pdd/generate_test.py +218 -146
  73. pdd/get_comment.py +19 -44
  74. pdd/get_extension.py +8 -9
  75. pdd/get_jwt_token.py +318 -22
  76. pdd/get_language.py +8 -7
  77. pdd/get_run_command.py +75 -0
  78. pdd/get_test_command.py +68 -0
  79. pdd/git_update.py +70 -19
  80. pdd/incremental_code_generator.py +2 -2
  81. pdd/insert_includes.py +13 -4
  82. pdd/llm_invoke.py +1711 -181
  83. pdd/load_prompt_template.py +19 -12
  84. pdd/path_resolution.py +140 -0
  85. pdd/pdd_completion.fish +25 -2
  86. pdd/pdd_completion.sh +30 -4
  87. pdd/pdd_completion.zsh +79 -4
  88. pdd/postprocess.py +14 -4
  89. pdd/preprocess.py +293 -24
  90. pdd/preprocess_main.py +41 -6
  91. pdd/prompts/agentic_bug_step10_pr_LLM.prompt +182 -0
  92. pdd/prompts/agentic_bug_step1_duplicate_LLM.prompt +73 -0
  93. pdd/prompts/agentic_bug_step2_docs_LLM.prompt +129 -0
  94. pdd/prompts/agentic_bug_step3_triage_LLM.prompt +95 -0
  95. pdd/prompts/agentic_bug_step4_reproduce_LLM.prompt +97 -0
  96. pdd/prompts/agentic_bug_step5_root_cause_LLM.prompt +123 -0
  97. pdd/prompts/agentic_bug_step6_test_plan_LLM.prompt +107 -0
  98. pdd/prompts/agentic_bug_step7_generate_LLM.prompt +172 -0
  99. pdd/prompts/agentic_bug_step8_verify_LLM.prompt +119 -0
  100. pdd/prompts/agentic_bug_step9_e2e_test_LLM.prompt +289 -0
  101. pdd/prompts/agentic_change_step10_identify_issues_LLM.prompt +1006 -0
  102. pdd/prompts/agentic_change_step11_fix_issues_LLM.prompt +984 -0
  103. pdd/prompts/agentic_change_step12_create_pr_LLM.prompt +131 -0
  104. pdd/prompts/agentic_change_step1_duplicate_LLM.prompt +73 -0
  105. pdd/prompts/agentic_change_step2_docs_LLM.prompt +101 -0
  106. pdd/prompts/agentic_change_step3_research_LLM.prompt +126 -0
  107. pdd/prompts/agentic_change_step4_clarify_LLM.prompt +164 -0
  108. pdd/prompts/agentic_change_step5_docs_change_LLM.prompt +981 -0
  109. pdd/prompts/agentic_change_step6_devunits_LLM.prompt +1005 -0
  110. pdd/prompts/agentic_change_step7_architecture_LLM.prompt +1044 -0
  111. pdd/prompts/agentic_change_step8_analyze_LLM.prompt +1027 -0
  112. pdd/prompts/agentic_change_step9_implement_LLM.prompt +1077 -0
  113. pdd/prompts/agentic_crash_explore_LLM.prompt +49 -0
  114. pdd/prompts/agentic_e2e_fix_step1_unit_tests_LLM.prompt +90 -0
  115. pdd/prompts/agentic_e2e_fix_step2_e2e_tests_LLM.prompt +91 -0
  116. pdd/prompts/agentic_e2e_fix_step3_root_cause_LLM.prompt +89 -0
  117. pdd/prompts/agentic_e2e_fix_step4_fix_e2e_tests_LLM.prompt +96 -0
  118. pdd/prompts/agentic_e2e_fix_step5_identify_devunits_LLM.prompt +91 -0
  119. pdd/prompts/agentic_e2e_fix_step6_create_unit_tests_LLM.prompt +106 -0
  120. pdd/prompts/agentic_e2e_fix_step7_verify_tests_LLM.prompt +116 -0
  121. pdd/prompts/agentic_e2e_fix_step8_run_pdd_fix_LLM.prompt +120 -0
  122. pdd/prompts/agentic_e2e_fix_step9_verify_all_LLM.prompt +146 -0
  123. pdd/prompts/agentic_fix_explore_LLM.prompt +45 -0
  124. pdd/prompts/agentic_fix_harvest_only_LLM.prompt +48 -0
  125. pdd/prompts/agentic_fix_primary_LLM.prompt +85 -0
  126. pdd/prompts/agentic_update_LLM.prompt +925 -0
  127. pdd/prompts/agentic_verify_explore_LLM.prompt +45 -0
  128. pdd/prompts/auto_include_LLM.prompt +122 -905
  129. pdd/prompts/change_LLM.prompt +3093 -1
  130. pdd/prompts/detect_change_LLM.prompt +686 -27
  131. pdd/prompts/example_generator_LLM.prompt +22 -1
  132. pdd/prompts/extract_code_LLM.prompt +5 -1
  133. pdd/prompts/extract_program_code_fix_LLM.prompt +7 -1
  134. pdd/prompts/extract_prompt_update_LLM.prompt +7 -8
  135. pdd/prompts/extract_promptline_LLM.prompt +17 -11
  136. pdd/prompts/find_verification_errors_LLM.prompt +6 -0
  137. pdd/prompts/fix_code_module_errors_LLM.prompt +12 -2
  138. pdd/prompts/fix_errors_from_unit_tests_LLM.prompt +9 -0
  139. pdd/prompts/fix_verification_errors_LLM.prompt +22 -0
  140. pdd/prompts/generate_test_LLM.prompt +41 -7
  141. pdd/prompts/generate_test_from_example_LLM.prompt +115 -0
  142. pdd/prompts/increase_tests_LLM.prompt +1 -5
  143. pdd/prompts/insert_includes_LLM.prompt +316 -186
  144. pdd/prompts/prompt_code_diff_LLM.prompt +119 -0
  145. pdd/prompts/prompt_diff_LLM.prompt +82 -0
  146. pdd/prompts/trace_LLM.prompt +25 -22
  147. pdd/prompts/unfinished_prompt_LLM.prompt +85 -1
  148. pdd/prompts/update_prompt_LLM.prompt +22 -1
  149. pdd/pytest_output.py +127 -12
  150. pdd/remote_session.py +876 -0
  151. pdd/render_mermaid.py +236 -0
  152. pdd/server/__init__.py +52 -0
  153. pdd/server/app.py +335 -0
  154. pdd/server/click_executor.py +587 -0
  155. pdd/server/executor.py +338 -0
  156. pdd/server/jobs.py +661 -0
  157. pdd/server/models.py +241 -0
  158. pdd/server/routes/__init__.py +31 -0
  159. pdd/server/routes/architecture.py +451 -0
  160. pdd/server/routes/auth.py +364 -0
  161. pdd/server/routes/commands.py +929 -0
  162. pdd/server/routes/config.py +42 -0
  163. pdd/server/routes/files.py +603 -0
  164. pdd/server/routes/prompts.py +1322 -0
  165. pdd/server/routes/websocket.py +473 -0
  166. pdd/server/security.py +243 -0
  167. pdd/server/terminal_spawner.py +209 -0
  168. pdd/server/token_counter.py +222 -0
  169. pdd/setup_tool.py +648 -0
  170. pdd/simple_math.py +2 -0
  171. pdd/split_main.py +3 -2
  172. pdd/summarize_directory.py +237 -195
  173. pdd/sync_animation.py +8 -4
  174. pdd/sync_determine_operation.py +839 -112
  175. pdd/sync_main.py +351 -57
  176. pdd/sync_orchestration.py +1400 -756
  177. pdd/sync_tui.py +848 -0
  178. pdd/template_expander.py +161 -0
  179. pdd/template_registry.py +264 -0
  180. pdd/templates/architecture/architecture_json.prompt +237 -0
  181. pdd/templates/generic/generate_prompt.prompt +174 -0
  182. pdd/trace.py +168 -12
  183. pdd/trace_main.py +4 -3
  184. pdd/track_cost.py +140 -63
  185. pdd/unfinished_prompt.py +51 -4
  186. pdd/update_main.py +567 -67
  187. pdd/update_model_costs.py +2 -2
  188. pdd/update_prompt.py +19 -4
  189. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/METADATA +29 -11
  190. pdd_cli-0.0.118.dist-info/RECORD +227 -0
  191. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/licenses/LICENSE +1 -1
  192. pdd_cli-0.0.45.dist-info/RECORD +0 -116
  193. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/WHEEL +0 -0
  194. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/entry_points.txt +0 -0
  195. {pdd_cli-0.0.45.dist-info → pdd_cli-0.0.118.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1322 @@
1
+ """
2
+ REST API endpoints for prompt analysis and preprocessing.
3
+
4
+ Provides endpoints for preprocessing prompts and calculating token metrics
5
+ without executing commands.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from pathlib import Path
12
+ from typing import List, Optional
13
+
14
+ from fastapi import APIRouter, Depends, HTTPException
15
+ from pydantic import BaseModel, Field
16
+
17
+ try:
18
+ from rich.console import Console
19
+ console = Console()
20
+ except ImportError:
21
+ class Console:
22
+ def print(self, *args, **kwargs):
23
+ import builtins
24
+ builtins.print(*args)
25
+ console = Console()
26
+
27
+ from ..security import PathValidator, SecurityError
28
+ from ..token_counter import get_token_metrics
29
+ from pdd.load_prompt_template import load_prompt_template
30
+
31
+
32
+ # Request/Response Models
33
+
34
+ class CostEstimateResponse(BaseModel):
35
+ """Cost estimation result."""
36
+ input_cost: float = Field(..., description="Estimated input cost in USD")
37
+ model: str = Field(..., description="Model used for estimation")
38
+ tokens: int = Field(..., description="Number of tokens")
39
+ cost_per_million: float = Field(..., description="Cost per million tokens")
40
+ currency: str = Field("USD", description="Currency code")
41
+
42
+
43
+ class TokenMetricsResponse(BaseModel):
44
+ """Token metrics result."""
45
+ token_count: int = Field(..., description="Number of tokens")
46
+ context_limit: int = Field(..., description="Model context limit")
47
+ context_usage_percent: float = Field(..., description="Percentage of context used")
48
+ cost_estimate: Optional[CostEstimateResponse] = Field(None, description="Cost estimate if pricing available")
49
+
50
+
51
+ class PromptAnalyzeRequest(BaseModel):
52
+ """Request to analyze a prompt file."""
53
+ path: str = Field(..., description="Path to prompt file (relative to project root)")
54
+ model: str = Field("claude-sonnet-4-20250514", description="Model to use for token estimation")
55
+ preprocess: bool = Field(True, description="Whether to preprocess the prompt")
56
+ content: Optional[str] = Field(None, description="Optional content to analyze instead of reading from file")
57
+
58
+
59
+ class PromptAnalyzeResponse(BaseModel):
60
+ """Response from prompt analysis."""
61
+ raw_content: str = Field(..., description="Original prompt content")
62
+ processed_content: Optional[str] = Field(None, description="Preprocessed content (if requested)")
63
+ raw_metrics: TokenMetricsResponse = Field(..., description="Token metrics for raw content")
64
+ processed_metrics: Optional[TokenMetricsResponse] = Field(None, description="Token metrics for processed content")
65
+ preprocessing_succeeded: bool = Field(True, description="Whether preprocessing succeeded")
66
+ preprocessing_error: Optional[str] = Field(None, description="Preprocessing error if any")
67
+
68
+
69
+ class SyncStatusResponse(BaseModel):
70
+ """Response from sync status check."""
71
+ status: str = Field(..., description="Sync status: in_sync, prompt_changed, code_changed, conflict, never_synced")
72
+ last_sync_timestamp: Optional[str] = Field(None, description="ISO timestamp of last sync")
73
+ last_sync_command: Optional[str] = Field(None, description="Last sync command executed")
74
+ prompt_modified: bool = Field(False, description="Whether prompt was modified since last sync")
75
+ code_modified: bool = Field(False, description="Whether code was modified since last sync")
76
+ fingerprint_exists: bool = Field(False, description="Whether a fingerprint exists")
77
+ prompt_exists: bool = Field(False, description="Whether the prompt file exists")
78
+ code_exists: bool = Field(False, description="Whether the code file exists")
79
+
80
+
81
+ class ModelInfo(BaseModel):
82
+ """Information about an available LLM model."""
83
+ model: str = Field(..., description="Full model identifier (e.g., gpt-5.1-codex-mini)")
84
+ provider: str = Field(..., description="Model provider (e.g., OpenAI, Anthropic)")
85
+ input_cost: float = Field(..., description="Input cost per million tokens (USD)")
86
+ output_cost: float = Field(..., description="Output cost per million tokens (USD)")
87
+ elo: int = Field(..., description="Coding arena ELO rating")
88
+ context_limit: int = Field(..., description="Maximum context window size in tokens")
89
+ max_thinking_tokens: int = Field(0, description="Maximum thinking/reasoning tokens (0 if not supported)")
90
+ reasoning_type: str = Field("none", description="Reasoning type: none, effort, or budget")
91
+ structured_output: bool = Field(True, description="Whether the model supports structured output")
92
+
93
+
94
+ class ModelsResponse(BaseModel):
95
+ """Response containing available models."""
96
+ models: list[ModelInfo] = Field(..., description="List of available models")
97
+ default_model: str = Field(..., description="Default model name")
98
+
99
+
100
+ class MatchCheckRequest(BaseModel):
101
+ """Request to check prompt-code match."""
102
+ prompt_content: str = Field(..., description="Prompt/requirements content")
103
+ code_content: str = Field(..., description="Code content to evaluate")
104
+ strength: float = Field(0.5, description="Model strength (0-1)")
105
+
106
+
107
+ class MatchCheckResult(BaseModel):
108
+ """Result from LLM match evaluation."""
109
+ match_score: int = Field(..., description="Match score (0-100)")
110
+ summary: str = Field(..., description="Summary of match analysis")
111
+ missing: list[str] = Field(default_factory=list, description="Missing requirements")
112
+ extra: list[str] = Field(default_factory=list, description="Extra code not in prompt")
113
+ suggestions: list[str] = Field(default_factory=list, description="Improvement suggestions")
114
+
115
+
116
+ class MatchCheckResponse(BaseModel):
117
+ """Response from match check endpoint."""
118
+ result: MatchCheckResult = Field(..., description="Match evaluation result")
119
+ cost: float = Field(..., description="LLM invocation cost in USD")
120
+ model: str = Field(..., description="Model used for evaluation")
121
+
122
+
123
+ # Diff Analysis Models (for detailed prompt-code diff visualization)
124
+
125
+ class PromptRange(BaseModel):
126
+ """Line range in the prompt content."""
127
+ startLine: int = Field(..., description="Starting line number (1-indexed)")
128
+ endLine: int = Field(..., description="Ending line number (1-indexed)")
129
+ text: str = Field(..., description="Text excerpt from this range")
130
+
131
+
132
+ class CodeRange(BaseModel):
133
+ """Line range in the code content."""
134
+ startLine: int = Field(..., description="Starting line number (1-indexed)")
135
+ endLine: int = Field(..., description="Ending line number (1-indexed)")
136
+ text: str = Field(..., description="Text excerpt from this range")
137
+
138
+
139
+ class DiffSection(BaseModel):
140
+ """A section representing a semantic unit (requirement/feature) and its mapping."""
141
+ id: str = Field(..., description="Unique identifier for this section")
142
+ promptRange: PromptRange = Field(..., description="Line range in the prompt")
143
+ codeRanges: list[CodeRange] = Field(default_factory=list, description="Corresponding code ranges (can be multiple or empty)")
144
+ status: str = Field(..., description="Match status: matched, partial, missing, or extra")
145
+ matchConfidence: int = Field(..., description="Confidence score 0-100")
146
+ semanticLabel: str = Field(..., description="Semantic label (e.g., 'Error Handling', 'API Endpoint')")
147
+ notes: Optional[str] = Field("", description="Explanation of WHY this status exists")
148
+
149
+
150
+ class LineMapping(BaseModel):
151
+ """Fine-grained line-level mapping between prompt and code."""
152
+ promptLine: int = Field(..., description="Line number in prompt (1-indexed)")
153
+ codeLines: list[int] = Field(default_factory=list, description="Corresponding line numbers in code")
154
+ matchType: str = Field(..., description="Match type: exact, semantic, partial, or none")
155
+
156
+
157
+ class HiddenKnowledgeLocation(BaseModel):
158
+ """Location of hidden knowledge in code."""
159
+ startLine: int = Field(..., description="Starting line number (1-indexed)")
160
+ endLine: int = Field(..., description="Ending line number (1-indexed)")
161
+
162
+
163
+ class HiddenKnowledge(BaseModel):
164
+ """Undocumented code knowledge that would be lost on regeneration."""
165
+ type: str = Field(..., description="Type: magic_value, algorithm_choice, edge_case, error_handling, api_contract, optimization, business_logic, assumption")
166
+ location: HiddenKnowledgeLocation = Field(..., description="Location in code")
167
+ description: str = Field(..., description="What the code knows that the prompt doesn't say")
168
+ regenerationImpact: str = Field(..., description="Impact: would_differ, would_fail, or might_work")
169
+ suggestedPromptAddition: str = Field(..., description="What to add to the prompt to capture this")
170
+
171
+
172
+ class DiffStats(BaseModel):
173
+ """Aggregated statistics for the diff analysis."""
174
+ totalRequirements: int = Field(..., description="Total number of requirements identified in prompt")
175
+ matchedRequirements: int = Field(..., description="Number of fully matched requirements")
176
+ missingRequirements: int = Field(..., description="Number of requirements not implemented in code")
177
+ totalCodeFeatures: int = Field(0, description="Total number of code features/sections identified")
178
+ documentedFeatures: int = Field(0, description="Number of code features documented in prompt")
179
+ undocumentedFeatures: int = Field(0, description="Number of code features not in prompt")
180
+ promptToCodeCoverage: float = Field(..., description="How much of the prompt is implemented in code (0-100)")
181
+ codeToPromptCoverage: float = Field(0.0, description="How much of the code is documented in prompt (0-100)")
182
+ hiddenKnowledgeCount: int = Field(0, description="Number of hidden knowledge items found")
183
+ criticalGaps: int = Field(0, description="Number of critical gaps that would cause regeneration failure")
184
+
185
+
186
+ class DiffAnalysisResult(BaseModel):
187
+ """Detailed diff analysis result focused on regeneration capability."""
188
+ overallScore: int = Field(..., description="Overall regeneration capability score 0-100")
189
+ canRegenerate: bool = Field(False, description="Conservative assessment: could this prompt produce working code?")
190
+ regenerationRisk: str = Field("high", description="Risk level: low, medium, high, or critical")
191
+ promptToCodeScore: int = Field(0, description="How well code implements prompt (0-100)")
192
+ codeToPromptScore: int = Field(0, description="How well prompt describes code (0-100)")
193
+ summary: str = Field(..., description="Summary of regeneration viability")
194
+ sections: list[DiffSection] = Field(default_factory=list, description="Prompt requirement sections with code mappings")
195
+ codeSections: list[DiffSection] = Field(default_factory=list, description="Code feature sections with prompt mappings")
196
+ hiddenKnowledge: list[HiddenKnowledge] = Field(default_factory=list, description="Undocumented code knowledge that would be lost")
197
+ lineMappings: list[LineMapping] = Field(default_factory=list, description="Line-level mappings")
198
+ stats: DiffStats = Field(..., description="Aggregated statistics")
199
+ missing: list[str] = Field(default_factory=list, description="Requirements in prompt but not in code")
200
+ extra: list[str] = Field(default_factory=list, description="Code features that would be LOST on regeneration")
201
+ suggestions: list[str] = Field(default_factory=list, description="Specific additions to enable regeneration")
202
+
203
+
204
+ class DiffAnalysisRequest(BaseModel):
205
+ """Request for detailed prompt-code diff analysis."""
206
+ prompt_content: str = Field(..., description="Prompt/requirements content")
207
+ code_content: str = Field(..., description="Code content to analyze")
208
+ strength: float = Field(0.5, description="Model strength (0-1)")
209
+ mode: str = Field("detailed", description="Analysis mode: 'quick' or 'detailed'")
210
+ include_tests: bool = Field(True, description="Include test content in analysis")
211
+ prompt_path: Optional[str] = Field(None, description="Prompt path for auto-detecting tests")
212
+ code_path: Optional[str] = Field(None, description="Code path for finding associated tests")
213
+
214
+
215
+ class DiffAnalysisResponse(BaseModel):
216
+ """Response from diff analysis endpoint."""
217
+ result: DiffAnalysisResult = Field(..., description="Detailed diff analysis result")
218
+ cost: float = Field(..., description="LLM invocation cost in USD")
219
+ model: str = Field(..., description="Model used for analysis")
220
+ analysisMode: str = Field(..., description="Analysis mode used")
221
+ cached: bool = Field(False, description="Whether result was from cache")
222
+ tests_included: bool = Field(False, description="Whether tests were included in analysis")
223
+ test_files: List[str] = Field(default_factory=list, description="Test files included in analysis")
224
+
225
+
226
+ # Router setup
227
+ router = APIRouter(prefix="/api/v1/prompts", tags=["prompts"])
228
+
229
+ # Dependency injection placeholder
230
+ _path_validator: Optional[PathValidator] = None
231
+
232
+
233
+ def get_path_validator() -> PathValidator:
234
+ """Dependency to get the PathValidator instance."""
235
+ if _path_validator is None:
236
+ raise RuntimeError("PathValidator not configured")
237
+ return _path_validator
238
+
239
+
240
+ def set_path_validator(validator: PathValidator) -> None:
241
+ """Configure the PathValidator instance."""
242
+ global _path_validator
243
+ _path_validator = validator
244
+
245
+
246
+ @router.post("/analyze", response_model=PromptAnalyzeResponse)
247
+ async def analyze_prompt(
248
+ request: PromptAnalyzeRequest,
249
+ validator: PathValidator = Depends(get_path_validator),
250
+ ):
251
+ """
252
+ Analyze a prompt file: preprocess it and calculate token metrics.
253
+
254
+ Returns both raw and processed content with their respective token counts,
255
+ context usage percentages, and cost estimates.
256
+
257
+ This endpoint does NOT execute any commands - it's purely for preview
258
+ and cost estimation before running expensive operations.
259
+ """
260
+ try:
261
+ abs_path = validator.validate(request.path)
262
+
263
+ # Use provided content if available, otherwise read from file
264
+ if request.content is not None:
265
+ raw_content = request.content
266
+ # Check content size (limit to 500KB)
267
+ if len(raw_content.encode('utf-8')) > 500 * 1024:
268
+ raise HTTPException(
269
+ status_code=400,
270
+ detail=f"Content too large for analysis (max 500KB)"
271
+ )
272
+ else:
273
+ # Read from file
274
+ if not abs_path.exists():
275
+ raise HTTPException(status_code=404, detail=f"File not found: {request.path}")
276
+
277
+ if abs_path.is_dir():
278
+ raise HTTPException(status_code=400, detail=f"Cannot analyze directory: {request.path}")
279
+
280
+ # Check file size (limit to 500KB for preprocessing)
281
+ file_size = abs_path.stat().st_size
282
+ if file_size > 500 * 1024:
283
+ raise HTTPException(
284
+ status_code=400,
285
+ detail=f"File too large for analysis: {file_size} bytes (max 500KB)"
286
+ )
287
+
288
+ # Read raw content
289
+ try:
290
+ raw_content = abs_path.read_text(encoding='utf-8')
291
+ except UnicodeDecodeError:
292
+ raise HTTPException(status_code=400, detail="File is not a valid text file")
293
+
294
+ # Calculate raw metrics
295
+ pricing_csv = validator.project_root / ".pdd" / "llm_model.csv"
296
+ raw_metrics = get_token_metrics(
297
+ raw_content,
298
+ model=request.model,
299
+ pricing_csv=pricing_csv if pricing_csv.exists() else None
300
+ )
301
+
302
+ # Preprocess if requested
303
+ processed_content = None
304
+ processed_metrics = None
305
+ preprocessing_succeeded = True
306
+ preprocessing_error = None
307
+
308
+ if request.preprocess:
309
+ try:
310
+ # Import here to avoid circular imports
311
+ from pdd.preprocess import preprocess
312
+
313
+ # Change to project root for relative includes to work
314
+ original_cwd = os.getcwd()
315
+ try:
316
+ os.chdir(validator.project_root)
317
+ processed_content = preprocess(
318
+ raw_content,
319
+ recursive=True,
320
+ double_curly_brackets=True
321
+ )
322
+ finally:
323
+ os.chdir(original_cwd)
324
+
325
+ processed_metrics_obj = get_token_metrics(
326
+ processed_content,
327
+ model=request.model,
328
+ pricing_csv=pricing_csv if pricing_csv.exists() else None
329
+ )
330
+ processed_metrics = TokenMetricsResponse(
331
+ token_count=processed_metrics_obj.token_count,
332
+ context_limit=processed_metrics_obj.context_limit,
333
+ context_usage_percent=processed_metrics_obj.context_usage_percent,
334
+ cost_estimate=CostEstimateResponse(**processed_metrics_obj.cost_estimate.to_dict())
335
+ if processed_metrics_obj.cost_estimate else None
336
+ )
337
+ except Exception as e:
338
+ preprocessing_succeeded = False
339
+ preprocessing_error = str(e)
340
+ console.print(f"[yellow]Preprocessing warning: {e}[/yellow]")
341
+
342
+ # Convert raw metrics to response model
343
+ raw_metrics_response = TokenMetricsResponse(
344
+ token_count=raw_metrics.token_count,
345
+ context_limit=raw_metrics.context_limit,
346
+ context_usage_percent=raw_metrics.context_usage_percent,
347
+ cost_estimate=CostEstimateResponse(**raw_metrics.cost_estimate.to_dict())
348
+ if raw_metrics.cost_estimate else None
349
+ )
350
+
351
+ return PromptAnalyzeResponse(
352
+ raw_content=raw_content,
353
+ processed_content=processed_content,
354
+ raw_metrics=raw_metrics_response,
355
+ processed_metrics=processed_metrics,
356
+ preprocessing_succeeded=preprocessing_succeeded,
357
+ preprocessing_error=preprocessing_error,
358
+ )
359
+
360
+ except SecurityError as e:
361
+ raise HTTPException(status_code=403, detail=e.message)
362
+
363
+
364
+ @router.get("/sync-status", response_model=SyncStatusResponse)
365
+ async def get_sync_status(
366
+ basename: str,
367
+ language: str,
368
+ validator: PathValidator = Depends(get_path_validator),
369
+ ):
370
+ """
371
+ Get the sync status for a prompt/code pair.
372
+
373
+ Compares current file hashes with the stored fingerprint to determine
374
+ if the prompt and code are in sync, or if either has been modified.
375
+
376
+ Query parameters:
377
+ basename: The basename of the module (e.g., "calculator", "core/utils")
378
+ language: The programming language (e.g., "python", "typescript")
379
+
380
+ Returns:
381
+ SyncStatusResponse with status and modification details
382
+ """
383
+ try:
384
+ # Import sync utilities - these handle all the fingerprint logic
385
+ from pdd.sync_determine_operation import (
386
+ read_fingerprint,
387
+ get_pdd_file_paths,
388
+ calculate_sha256,
389
+ )
390
+
391
+ # Change to project root for proper path resolution
392
+ original_cwd = os.getcwd()
393
+ try:
394
+ os.chdir(validator.project_root)
395
+
396
+ # Get file paths for this module
397
+ paths = get_pdd_file_paths(basename, language)
398
+
399
+ # Check if files exist
400
+ prompt_exists = paths['prompt'].exists()
401
+ code_exists = paths['code'].exists()
402
+
403
+ # Read fingerprint (stored hash state)
404
+ fingerprint = read_fingerprint(basename, language)
405
+
406
+ if not fingerprint:
407
+ # No fingerprint - never synced
408
+ return SyncStatusResponse(
409
+ status="never_synced",
410
+ fingerprint_exists=False,
411
+ prompt_exists=prompt_exists,
412
+ code_exists=code_exists,
413
+ )
414
+
415
+ # Calculate current hashes
416
+ current_prompt_hash = calculate_sha256(paths['prompt']) if prompt_exists else None
417
+ current_code_hash = calculate_sha256(paths['code']) if code_exists else None
418
+
419
+ # Compare with fingerprint
420
+ prompt_modified = (
421
+ current_prompt_hash is not None and
422
+ fingerprint.prompt_hash is not None and
423
+ current_prompt_hash != fingerprint.prompt_hash
424
+ )
425
+ code_modified = (
426
+ current_code_hash is not None and
427
+ fingerprint.code_hash is not None and
428
+ current_code_hash != fingerprint.code_hash
429
+ )
430
+
431
+ # Determine status
432
+ if prompt_modified and code_modified:
433
+ status = "conflict"
434
+ elif prompt_modified:
435
+ status = "prompt_changed"
436
+ elif code_modified:
437
+ status = "code_changed"
438
+ else:
439
+ status = "in_sync"
440
+
441
+ return SyncStatusResponse(
442
+ status=status,
443
+ last_sync_timestamp=fingerprint.timestamp,
444
+ last_sync_command=fingerprint.command,
445
+ prompt_modified=prompt_modified,
446
+ code_modified=code_modified,
447
+ fingerprint_exists=True,
448
+ prompt_exists=prompt_exists,
449
+ code_exists=code_exists,
450
+ )
451
+
452
+ finally:
453
+ os.chdir(original_cwd)
454
+
455
+ except SecurityError as e:
456
+ raise HTTPException(status_code=403, detail=e.message)
457
+ except Exception as e:
458
+ console.print(f"[red]Error getting sync status: {e}[/red]")
459
+ raise HTTPException(status_code=500, detail=f"Error getting sync status: {str(e)}")
460
+
461
+
462
+ @router.get("/models", response_model=ModelsResponse)
463
+ async def get_available_models():
464
+ """
465
+ Get a list of available LLM models with their capabilities.
466
+
467
+ Returns model information including:
468
+ - Context limits
469
+ - Thinking/reasoning token capacity
470
+ - Pricing (input/output cost per million tokens)
471
+ - ELO ratings
472
+ """
473
+ try:
474
+ # Import here to avoid circular imports
475
+ from pdd.llm_invoke import _load_model_data, LLM_MODEL_CSV_PATH, DEFAULT_BASE_MODEL
476
+ from ..token_counter import MODEL_CONTEXT_LIMITS
477
+
478
+ # Load model data from CSV
479
+ model_df = _load_model_data(LLM_MODEL_CSV_PATH)
480
+
481
+ # Helper to determine context limit for a model
482
+ def get_context_limit(model_name: str) -> int:
483
+ """Get context limit based on model name."""
484
+ model_lower = model_name.lower()
485
+ for prefix, limit in MODEL_CONTEXT_LIMITS.items():
486
+ if prefix in model_lower:
487
+ return limit
488
+ return MODEL_CONTEXT_LIMITS.get("default", 128000)
489
+
490
+ # Convert DataFrame to list of ModelInfo
491
+ models = []
492
+ for _, row in model_df.iterrows():
493
+ model_name = str(row.get('model', ''))
494
+ if not model_name:
495
+ continue
496
+
497
+ models.append(ModelInfo(
498
+ model=model_name,
499
+ provider=str(row.get('provider', 'Unknown')),
500
+ input_cost=float(row.get('input', 0)),
501
+ output_cost=float(row.get('output', 0)),
502
+ elo=int(row.get('coding_arena_elo', 0)),
503
+ context_limit=get_context_limit(model_name),
504
+ max_thinking_tokens=int(row.get('max_reasoning_tokens', 0)),
505
+ reasoning_type=str(row.get('reasoning_type', 'none')),
506
+ structured_output=bool(row.get('structured_output', True)),
507
+ ))
508
+
509
+ # Sort by ELO descending (best models first)
510
+ models.sort(key=lambda m: m.elo, reverse=True)
511
+
512
+ return ModelsResponse(
513
+ models=models,
514
+ default_model=DEFAULT_BASE_MODEL,
515
+ )
516
+
517
+ except Exception as e:
518
+ console.print(f"[red]Error getting available models: {e}[/red]")
519
+ raise HTTPException(status_code=500, detail=f"Error getting available models: {str(e)}")
520
+
521
+
522
+ @router.post("/check-match", response_model=MatchCheckResponse)
523
+ async def check_match(request: MatchCheckRequest):
524
+ """
525
+ Check how well code implements the requirements in a prompt using LLM judge.
526
+
527
+ Uses llm_invoke to evaluate the match between prompt requirements and code,
528
+ returning a score, summary, missing requirements, and suggestions.
529
+ """
530
+ try:
531
+ from pdd.llm_invoke import llm_invoke
532
+
533
+ judge_prompt = """You are a code review expert. Analyze how well the following code implements the requirements in the prompt.
534
+
535
+ PROMPT/REQUIREMENTS:
536
+ {prompt}
537
+
538
+ CODE:
539
+ {code}
540
+
541
+ Evaluate the code against the prompt requirements and respond with a JSON object containing:
542
+ - match_score: integer from 0-100 indicating how well the code matches the prompt
543
+ - summary: 1-2 sentence summary of your analysis
544
+ - missing: array of requirements from the prompt that are NOT implemented in the code
545
+ - extra: array of code features that are NOT specified in the prompt
546
+ - suggestions: array of improvement suggestions"""
547
+
548
+ result = llm_invoke(
549
+ prompt=judge_prompt,
550
+ input_json={"prompt": request.prompt_content, "code": request.code_content},
551
+ strength=request.strength,
552
+ temperature=0.1,
553
+ output_schema={
554
+ "type": "object",
555
+ "properties": {
556
+ "match_score": {"type": "integer", "minimum": 0, "maximum": 100},
557
+ "summary": {"type": "string"},
558
+ "missing": {"type": "array", "items": {"type": "string"}},
559
+ "extra": {"type": "array", "items": {"type": "string"}},
560
+ "suggestions": {"type": "array", "items": {"type": "string"}}
561
+ },
562
+ "required": ["match_score", "summary"]
563
+ },
564
+ use_cloud=False,
565
+ )
566
+
567
+ # Parse result - it might be a string or dict depending on model
568
+ llm_result = result.get('result', {})
569
+ if isinstance(llm_result, str):
570
+ import json
571
+ llm_result = json.loads(llm_result)
572
+
573
+ return MatchCheckResponse(
574
+ result=MatchCheckResult(
575
+ match_score=llm_result.get('match_score', 0),
576
+ summary=llm_result.get('summary', ''),
577
+ missing=llm_result.get('missing', []),
578
+ extra=llm_result.get('extra', []),
579
+ suggestions=llm_result.get('suggestions', []),
580
+ ),
581
+ cost=result.get('cost', 0.0),
582
+ model=result.get('model_name', 'unknown'),
583
+ )
584
+
585
+ except Exception as e:
586
+ console.print(f"[red]Error checking match: {e}[/red]")
587
+ raise HTTPException(status_code=500, detail=f"Error checking match: {str(e)}")
588
+
589
+
590
+ # Simple in-memory cache for diff analysis results
591
+ _diff_cache: dict[str, tuple[DiffAnalysisResponse, float]] = {}
592
+ _CACHE_TTL_SECONDS = 600 # 10 minutes
593
+
594
+
595
+ def _get_cache_key(
596
+ prompt_content: str,
597
+ code_content: str,
598
+ mode: str,
599
+ include_tests: bool = False,
600
+ test_content: str = ""
601
+ ) -> str:
602
+ """Generate cache key from content hash."""
603
+ import hashlib
604
+ content = f"{prompt_content}|||{code_content}|||{mode}|||{include_tests}|||{test_content}"
605
+ return hashlib.sha256(content.encode()).hexdigest()
606
+
607
+
608
+ def _get_cached_result(key: str) -> Optional[DiffAnalysisResponse]:
609
+ """Get cached result if not expired."""
610
+ import time
611
+ if key in _diff_cache:
612
+ result, timestamp = _diff_cache[key]
613
+ if time.time() - timestamp < _CACHE_TTL_SECONDS:
614
+ # Return cached result with cached flag set
615
+ return DiffAnalysisResponse(
616
+ result=result.result,
617
+ cost=result.cost,
618
+ model=result.model,
619
+ analysisMode=result.analysisMode,
620
+ cached=True,
621
+ tests_included=result.tests_included,
622
+ test_files=result.test_files,
623
+ )
624
+ else:
625
+ del _diff_cache[key]
626
+ return None
627
+
628
+
629
+ def _cache_result(key: str, result: DiffAnalysisResponse) -> None:
630
+ """Cache a result."""
631
+ import time
632
+ _diff_cache[key] = (result, time.time())
633
+
634
+
635
+ @router.post("/diff-analysis", response_model=DiffAnalysisResponse)
636
+ async def analyze_diff(request: DiffAnalysisRequest):
637
+ """
638
+ Perform detailed diff analysis between prompt requirements and code.
639
+
640
+ Returns semantic sections with line-level mappings, showing how each
641
+ requirement in the prompt corresponds to code implementation.
642
+
643
+ Supports two modes:
644
+ - 'quick': Faster, lower-cost analysis with basic section mapping
645
+ - 'detailed': Full line-level mapping with higher accuracy
646
+
647
+ Results are cached for 10 minutes based on content hash.
648
+
649
+ If include_tests is True and prompt_path/code_path are provided,
650
+ test content will be appended to the prompt for more accurate analysis.
651
+ """
652
+ try:
653
+ from pdd.llm_invoke import llm_invoke
654
+ from pdd.construct_paths import _find_pddrc_file, _load_pddrc_config
655
+ from pdd.code_generator_main import _find_default_test_files
656
+ import json
657
+ import pathlib
658
+
659
+ # Process tests if requested
660
+ analysis_prompt = request.prompt_content
661
+ test_files_used: List[str] = []
662
+ test_content = ""
663
+
664
+ if request.include_tests and (request.prompt_path or request.code_path):
665
+ # Get tests_dir from .pddrc config
666
+ tests_dir = "tests" # Default
667
+ pddrc_path = _find_pddrc_file()
668
+ if pddrc_path:
669
+ try:
670
+ config = _load_pddrc_config(pddrc_path)
671
+ tests_dir = config.get("tests_dir", "tests")
672
+ except Exception:
673
+ pass
674
+
675
+ # Find test files based on code path
676
+ code_path = request.code_path
677
+ if not code_path and request.prompt_path:
678
+ # Try to infer code path from prompt path
679
+ # e.g., prompts/hello_python.prompt -> pdd/hello.py
680
+ prompt_path = pathlib.Path(request.prompt_path)
681
+ prompt_name = prompt_path.stem
682
+ # Remove language suffix (e.g., _python, _Python, _typescript)
683
+ # Case-insensitive matching
684
+ prompt_name_lower = prompt_name.lower()
685
+ for suffix in ['_python', '_typescript', '_javascript', '_rust', '_go', '_java', '_cpp', '_c', '_csharp', '_ruby', '_swift', '_kotlin']:
686
+ if prompt_name_lower.endswith(suffix):
687
+ prompt_name = prompt_name[:-len(suffix)]
688
+ break
689
+ # This is a best-effort heuristic; may not always work
690
+
691
+ if code_path:
692
+ found_tests = _find_default_test_files(tests_dir, code_path)
693
+ if found_tests:
694
+ test_content = "\n\n<unit_test_content>\n"
695
+ test_content += "The following is the unit test content that the generated code must pass:\n"
696
+ for tf in found_tests:
697
+ try:
698
+ with open(tf, 'r', encoding='utf-8') as f:
699
+ content = f.read()
700
+ test_content += f"\nFile: {pathlib.Path(tf).name}\n```python\n{content}\n```\n"
701
+ test_files_used.append(tf)
702
+ except Exception:
703
+ pass
704
+ test_content += "</unit_test_content>\n"
705
+ analysis_prompt = request.prompt_content + test_content
706
+
707
+ # Check cache (includes test content in key)
708
+ cache_key = _get_cache_key(
709
+ request.prompt_content,
710
+ request.code_content,
711
+ request.mode,
712
+ request.include_tests,
713
+ test_content,
714
+ )
715
+ cached = _get_cached_result(cache_key)
716
+ if cached:
717
+ return cached
718
+
719
+ # Adjust strength based on mode
720
+ strength = request.strength
721
+ if request.mode == "quick":
722
+ strength = min(strength, 0.25)
723
+
724
+ # Load the LLM prompt template for bidirectional diff analysis
725
+ diff_prompt = load_prompt_template("prompt_code_diff_LLM")
726
+ if not diff_prompt:
727
+ raise HTTPException(
728
+ status_code=500,
729
+ detail="Failed to load prompt_code_diff_LLM.prompt template"
730
+ )
731
+
732
+ # Add line numbers to content (use analysis_prompt which includes tests)
733
+ prompt_lines = analysis_prompt.split('\n')
734
+ code_lines = request.code_content.split('\n')
735
+
736
+ prompt_numbered = '\n'.join(
737
+ f"{i+1}: {line}" for i, line in enumerate(prompt_lines)
738
+ )
739
+ code_numbered = '\n'.join(
740
+ f"{i+1}: {line}" for i, line in enumerate(code_lines)
741
+ )
742
+
743
+ # Define the output schema for bidirectional structured output
744
+ section_schema = {
745
+ "type": "object",
746
+ "properties": {
747
+ "id": {"type": "string"},
748
+ "promptRange": {
749
+ "type": "object",
750
+ "properties": {
751
+ "startLine": {"type": "integer"},
752
+ "endLine": {"type": "integer"},
753
+ "text": {"type": "string"}
754
+ },
755
+ "required": ["startLine", "endLine", "text"]
756
+ },
757
+ "codeRanges": {
758
+ "type": "array",
759
+ "items": {
760
+ "type": "object",
761
+ "properties": {
762
+ "startLine": {"type": "integer"},
763
+ "endLine": {"type": "integer"},
764
+ "text": {"type": "string"}
765
+ },
766
+ "required": ["startLine", "endLine", "text"]
767
+ }
768
+ },
769
+ "status": {"type": "string", "enum": ["matched", "partial", "missing", "extra"]},
770
+ "matchConfidence": {"type": "integer", "minimum": 0, "maximum": 100},
771
+ "semanticLabel": {"type": "string"},
772
+ "notes": {"type": "string", "description": "Required explanation of WHY this status exists"}
773
+ },
774
+ "required": ["id", "promptRange", "status", "matchConfidence", "semanticLabel", "notes"]
775
+ }
776
+
777
+ output_schema = {
778
+ "type": "object",
779
+ "properties": {
780
+ "overallScore": {"type": "integer", "minimum": 0, "maximum": 100},
781
+ "promptToCodeScore": {"type": "integer", "minimum": 0, "maximum": 100},
782
+ "codeToPromptScore": {"type": "integer", "minimum": 0, "maximum": 100},
783
+ "summary": {"type": "string"},
784
+ "sections": {"type": "array", "items": section_schema},
785
+ "codeSections": {"type": "array", "items": section_schema},
786
+ "lineMappings": {
787
+ "type": "array",
788
+ "items": {
789
+ "type": "object",
790
+ "properties": {
791
+ "promptLine": {"type": "integer"},
792
+ "codeLines": {"type": "array", "items": {"type": "integer"}},
793
+ "matchType": {"type": "string", "enum": ["exact", "semantic", "partial", "none"]}
794
+ },
795
+ "required": ["promptLine", "codeLines", "matchType"]
796
+ }
797
+ },
798
+ "stats": {
799
+ "type": "object",
800
+ "properties": {
801
+ "totalRequirements": {"type": "integer"},
802
+ "matchedRequirements": {"type": "integer"},
803
+ "missingRequirements": {"type": "integer"},
804
+ "totalCodeFeatures": {"type": "integer"},
805
+ "documentedFeatures": {"type": "integer"},
806
+ "undocumentedFeatures": {"type": "integer"},
807
+ "promptToCodeCoverage": {"type": "number"},
808
+ "codeToPromptCoverage": {"type": "number"}
809
+ },
810
+ "required": ["totalRequirements", "matchedRequirements", "missingRequirements", "promptToCodeCoverage"]
811
+ },
812
+ "missing": {"type": "array", "items": {"type": "string"}},
813
+ "extra": {"type": "array", "items": {"type": "string"}},
814
+ "suggestions": {"type": "array", "items": {"type": "string"}}
815
+ },
816
+ "required": ["overallScore", "promptToCodeScore", "codeToPromptScore", "summary", "sections", "codeSections", "stats"]
817
+ }
818
+
819
+ result = llm_invoke(
820
+ prompt=diff_prompt,
821
+ input_json={
822
+ "prompt_numbered": prompt_numbered,
823
+ "code_numbered": code_numbered,
824
+ },
825
+ strength=strength,
826
+ temperature=0.1,
827
+ output_schema=output_schema,
828
+ use_cloud=False,
829
+ )
830
+
831
+ # Parse result
832
+ llm_result = result.get('result', {})
833
+ if isinstance(llm_result, str):
834
+ llm_result = json.loads(llm_result)
835
+
836
+ def parse_section(sec: dict) -> DiffSection:
837
+ """Helper to parse a section from LLM output."""
838
+ prompt_range = sec.get('promptRange', {})
839
+ code_ranges = [
840
+ CodeRange(
841
+ startLine=cr.get('startLine', 1),
842
+ endLine=cr.get('endLine', 1),
843
+ text=cr.get('text', ''),
844
+ )
845
+ for cr in sec.get('codeRanges', [])
846
+ ]
847
+ return DiffSection(
848
+ id=sec.get('id', ''),
849
+ promptRange=PromptRange(
850
+ startLine=prompt_range.get('startLine', 1),
851
+ endLine=prompt_range.get('endLine', 1),
852
+ text=prompt_range.get('text', ''),
853
+ ),
854
+ codeRanges=code_ranges,
855
+ status=sec.get('status', 'missing'),
856
+ matchConfidence=sec.get('matchConfidence', 0),
857
+ semanticLabel=sec.get('semanticLabel', ''),
858
+ notes=sec.get('notes'),
859
+ )
860
+
861
+ # Build prompt → code sections
862
+ sections = [parse_section(sec) for sec in llm_result.get('sections', [])]
863
+
864
+ # Build code → prompt sections
865
+ code_sections = [parse_section(sec) for sec in llm_result.get('codeSections', [])]
866
+
867
+ # Build line mappings
868
+ line_mappings = []
869
+ for lm in llm_result.get('lineMappings', []):
870
+ line_mappings.append(LineMapping(
871
+ promptLine=lm.get('promptLine', 1),
872
+ codeLines=lm.get('codeLines', []),
873
+ matchType=lm.get('matchType', 'none'),
874
+ ))
875
+
876
+ # Build stats with bidirectional coverage
877
+ stats_data = llm_result.get('stats', {})
878
+ stats = DiffStats(
879
+ totalRequirements=stats_data.get('totalRequirements', 0),
880
+ matchedRequirements=stats_data.get('matchedRequirements', 0),
881
+ missingRequirements=stats_data.get('missingRequirements', 0),
882
+ totalCodeFeatures=stats_data.get('totalCodeFeatures', 0),
883
+ documentedFeatures=stats_data.get('documentedFeatures', 0),
884
+ undocumentedFeatures=stats_data.get('undocumentedFeatures', 0),
885
+ promptToCodeCoverage=stats_data.get('promptToCodeCoverage', 0.0),
886
+ codeToPromptCoverage=stats_data.get('codeToPromptCoverage', 0.0),
887
+ )
888
+
889
+ # Build response with bidirectional scores
890
+ response = DiffAnalysisResponse(
891
+ result=DiffAnalysisResult(
892
+ overallScore=llm_result.get('overallScore', 0),
893
+ promptToCodeScore=llm_result.get('promptToCodeScore', 0),
894
+ codeToPromptScore=llm_result.get('codeToPromptScore', 0),
895
+ summary=llm_result.get('summary', ''),
896
+ sections=sections,
897
+ codeSections=code_sections,
898
+ lineMappings=line_mappings,
899
+ stats=stats,
900
+ missing=llm_result.get('missing', []),
901
+ extra=llm_result.get('extra', []),
902
+ suggestions=llm_result.get('suggestions', []),
903
+ ),
904
+ cost=result.get('cost', 0.0),
905
+ model=result.get('model_name', 'unknown'),
906
+ analysisMode=request.mode,
907
+ cached=False,
908
+ tests_included=len(test_files_used) > 0,
909
+ test_files=test_files_used,
910
+ )
911
+
912
+ # Cache the result
913
+ _cache_result(cache_key, response)
914
+
915
+ return response
916
+
917
+ except Exception as e:
918
+ console.print(f"[red]Error analyzing diff: {e}[/red]")
919
+ raise HTTPException(status_code=500, detail=f"Error analyzing diff: {str(e)}")
920
+
921
+
922
+ # =============================================================================
923
+ # Prompt Version History and Diff Analysis (Feature 1)
924
+ # =============================================================================
925
+
926
+ class PromptVersionInfo(BaseModel):
927
+ """Information about a prompt version from git history."""
928
+ commit_hash: str = Field(..., description="Git commit hash")
929
+ commit_date: str = Field(..., description="Commit date in ISO format")
930
+ commit_message: str = Field(..., description="Commit message")
931
+ author: str = Field(..., description="Author name")
932
+ prompt_content: str = Field(..., description="Prompt content at this version")
933
+
934
+
935
+ class PromptHistoryRequest(BaseModel):
936
+ """Request for prompt git history."""
937
+ prompt_path: str = Field(..., description="Path to the prompt file")
938
+ limit: int = Field(10, description="Maximum number of versions to retrieve")
939
+
940
+
941
+ class PromptHistoryResponse(BaseModel):
942
+ """Response with prompt versions from git history."""
943
+ versions: List[PromptVersionInfo] = Field(default_factory=list)
944
+ current_content: str = Field(..., description="Current working directory content")
945
+ has_uncommitted_changes: bool = Field(False, description="Whether there are uncommitted changes")
946
+
947
+
948
+ class LinguisticChange(BaseModel):
949
+ """A semantic/linguistic change between prompt versions."""
950
+ change_type: str = Field(..., description="Type: added, removed, modified")
951
+ category: str = Field(..., description="Category: requirement, constraint, behavior, format")
952
+ description: str = Field(..., description="Description of the change")
953
+ old_text: Optional[str] = Field(None, description="Old text (for modified/removed)")
954
+ new_text: Optional[str] = Field(None, description="New text (for added/modified)")
955
+ impact: str = Field(..., description="Impact: breaking, enhancement, clarification")
956
+
957
+
958
+ class PromptDiffRequest(BaseModel):
959
+ """Request to diff two prompt versions."""
960
+ prompt_path: str = Field(..., description="Path to the prompt file")
961
+ version_a: str = Field(..., description="First version: commit hash, 'HEAD', or 'working'")
962
+ version_b: str = Field(..., description="Second version: commit hash, 'HEAD', or 'working'")
963
+ code_path: Optional[str] = Field(None, description="Optional code path for related code diff")
964
+ strength: float = Field(0.5, description="Model strength (0-1) for analysis quality")
965
+
966
+
967
+ class PromptDiffResponse(BaseModel):
968
+ """Response with prompt version diff analysis."""
969
+ prompt_a_content: str = Field(..., description="Content of version A (older)")
970
+ prompt_b_content: str = Field(..., description="Content of version B (newer)")
971
+ text_diff: str = Field(..., description="Unified text diff (old → new)")
972
+ linguistic_changes: List[LinguisticChange] = Field(default_factory=list)
973
+ code_diff: Optional[str] = Field(None, description="Code diff if code_path provided")
974
+ summary: str = Field(..., description="LLM-generated summary of semantic changes")
975
+ cost: float = Field(0.0, description="LLM analysis cost")
976
+ model: str = Field("", description="Model used for analysis")
977
+ version_a_label: str = Field("", description="Label for version A (the older version)")
978
+ version_b_label: str = Field("", description="Label for version B (the newer version)")
979
+ versions_swapped: bool = Field(False, description="Whether versions were swapped to ensure old→new order")
980
+
981
+
982
+ def _get_git_file_at_commit(file_path: str, commit: str) -> Optional[str]:
983
+ """Get file content at a specific git commit."""
984
+ import subprocess
985
+ try:
986
+ result = subprocess.run(
987
+ ['git', 'show', f'{commit}:{file_path}'],
988
+ capture_output=True,
989
+ text=True,
990
+ cwd=Path(file_path).parent if Path(file_path).is_absolute() else None,
991
+ )
992
+ if result.returncode == 0:
993
+ return result.stdout
994
+ return None
995
+ except Exception:
996
+ return None
997
+
998
+
999
+ def _get_git_log(file_path: str, limit: int = 10) -> List[dict]:
1000
+ """Get git log for a file."""
1001
+ import subprocess
1002
+ try:
1003
+ # Format: hash|date|message|author
1004
+ result = subprocess.run(
1005
+ ['git', 'log', f'-n{limit}', '--format=%H|%aI|%s|%an', '--', file_path],
1006
+ capture_output=True,
1007
+ text=True,
1008
+ cwd=Path(file_path).parent if Path(file_path).is_absolute() else None,
1009
+ )
1010
+ if result.returncode != 0:
1011
+ return []
1012
+
1013
+ commits = []
1014
+ for line in result.stdout.strip().split('\n'):
1015
+ if not line:
1016
+ continue
1017
+ parts = line.split('|', 3)
1018
+ if len(parts) >= 4:
1019
+ commits.append({
1020
+ 'commit_hash': parts[0],
1021
+ 'commit_date': parts[1],
1022
+ 'commit_message': parts[2],
1023
+ 'author': parts[3],
1024
+ })
1025
+ return commits
1026
+ except Exception:
1027
+ return []
1028
+
1029
+
1030
+ def _has_uncommitted_changes(file_path: str) -> bool:
1031
+ """Check if file has uncommitted changes."""
1032
+ import subprocess
1033
+ try:
1034
+ result = subprocess.run(
1035
+ ['git', 'status', '--porcelain', '--', file_path],
1036
+ capture_output=True,
1037
+ text=True,
1038
+ cwd=Path(file_path).parent if Path(file_path).is_absolute() else None,
1039
+ )
1040
+ return bool(result.stdout.strip())
1041
+ except Exception:
1042
+ return False
1043
+
1044
+
1045
+ def _get_text_diff(content_a: str, content_b: str) -> str:
1046
+ """Generate unified diff between two strings."""
1047
+ import difflib
1048
+ diff = difflib.unified_diff(
1049
+ content_a.splitlines(keepends=True),
1050
+ content_b.splitlines(keepends=True),
1051
+ fromfile='version_a',
1052
+ tofile='version_b',
1053
+ )
1054
+ return ''.join(diff)
1055
+
1056
+
1057
+ def _get_commit_timestamp(file_path: str, version: str) -> Optional[str]:
1058
+ """
1059
+ Get ISO timestamp for a version identifier.
1060
+
1061
+ Returns:
1062
+ ISO timestamp string, or None if not determinable.
1063
+ 'working' returns current time (newest).
1064
+ 'HEAD' returns HEAD commit time.
1065
+ Commit hashes return their commit time.
1066
+ """
1067
+ import subprocess
1068
+ from datetime import datetime
1069
+
1070
+ if version == 'working':
1071
+ # Working directory is always the "newest" - return current time
1072
+ return datetime.now().isoformat()
1073
+
1074
+ try:
1075
+ # Get commit timestamp
1076
+ commit = 'HEAD' if version == 'HEAD' else version
1077
+ result = subprocess.run(
1078
+ ['git', 'log', '-1', '--format=%aI', commit],
1079
+ capture_output=True,
1080
+ text=True,
1081
+ cwd=Path(file_path).parent if Path(file_path).is_absolute() else None,
1082
+ )
1083
+ if result.returncode == 0 and result.stdout.strip():
1084
+ return result.stdout.strip()
1085
+ return None
1086
+ except Exception:
1087
+ return None
1088
+
1089
+
1090
+ def _is_version_newer(file_path: str, version_a: str, version_b: str) -> bool:
1091
+ """
1092
+ Determine if version_a is newer than version_b.
1093
+
1094
+ Returns True if version_a is newer, False otherwise.
1095
+ """
1096
+ ts_a = _get_commit_timestamp(file_path, version_a)
1097
+ ts_b = _get_commit_timestamp(file_path, version_b)
1098
+
1099
+ if ts_a is None or ts_b is None:
1100
+ # If we can't determine, assume the order is correct
1101
+ return False
1102
+
1103
+ return ts_a > ts_b
1104
+
1105
+
1106
+ @router.post("/git-history", response_model=PromptHistoryResponse)
1107
+ async def get_prompt_git_history(request: PromptHistoryRequest):
1108
+ """
1109
+ Get git history for a prompt file.
1110
+
1111
+ Returns a list of versions with their content, commit info, and
1112
+ whether there are uncommitted changes in the working directory.
1113
+ """
1114
+ try:
1115
+ prompt_path = request.prompt_path
1116
+ file_path = Path(prompt_path)
1117
+
1118
+ # Get current content
1119
+ current_content = ""
1120
+ if file_path.exists():
1121
+ current_content = file_path.read_text(encoding='utf-8')
1122
+
1123
+ # Get git log
1124
+ commits = _get_git_log(prompt_path, request.limit)
1125
+
1126
+ # Build version list with content
1127
+ versions = []
1128
+ for commit in commits:
1129
+ content = _get_git_file_at_commit(prompt_path, commit['commit_hash'])
1130
+ if content is not None:
1131
+ versions.append(PromptVersionInfo(
1132
+ commit_hash=commit['commit_hash'],
1133
+ commit_date=commit['commit_date'],
1134
+ commit_message=commit['commit_message'],
1135
+ author=commit['author'],
1136
+ prompt_content=content,
1137
+ ))
1138
+
1139
+ # Check for uncommitted changes
1140
+ has_changes = _has_uncommitted_changes(prompt_path)
1141
+
1142
+ return PromptHistoryResponse(
1143
+ versions=versions,
1144
+ current_content=current_content,
1145
+ has_uncommitted_changes=has_changes,
1146
+ )
1147
+
1148
+ except Exception as e:
1149
+ console.print(f"[red]Error getting git history: {e}[/red]")
1150
+ raise HTTPException(status_code=500, detail=f"Error getting git history: {str(e)}")
1151
+
1152
+
1153
+ @router.post("/prompt-diff", response_model=PromptDiffResponse)
1154
+ async def get_prompt_diff(request: PromptDiffRequest):
1155
+ """
1156
+ Compare two prompt versions with LLM-powered linguistic analysis.
1157
+
1158
+ Analyzes semantic differences between prompt versions and categorizes
1159
+ changes by type (requirement, constraint, behavior, format) and
1160
+ impact (breaking, enhancement, clarification).
1161
+
1162
+ Note: Versions are automatically ordered so version_a is older and
1163
+ version_b is newer. This ensures "added" means new content and
1164
+ "removed" means deleted content.
1165
+ """
1166
+ try:
1167
+ from pdd.llm_invoke import llm_invoke
1168
+ import json
1169
+
1170
+ prompt_path = request.prompt_path
1171
+ file_path = Path(prompt_path)
1172
+
1173
+ # Determine version order - we want older → newer
1174
+ version_a = request.version_a
1175
+ version_b = request.version_b
1176
+ versions_swapped = False
1177
+
1178
+ # Check if we need to swap to ensure old → new order
1179
+ if _is_version_newer(prompt_path, version_a, version_b):
1180
+ # version_a is newer, so swap them
1181
+ version_a, version_b = version_b, version_a
1182
+ versions_swapped = True
1183
+
1184
+ # Create human-readable labels
1185
+ def _version_label(v: str) -> str:
1186
+ if v == 'working':
1187
+ return 'Working Directory'
1188
+ elif v == 'HEAD':
1189
+ return 'HEAD'
1190
+ else:
1191
+ return v[:7] # Short commit hash
1192
+
1193
+ version_a_label = _version_label(version_a)
1194
+ version_b_label = _version_label(version_b)
1195
+
1196
+ # Get content for version A (older)
1197
+ if version_a == 'working':
1198
+ content_a = file_path.read_text(encoding='utf-8') if file_path.exists() else ""
1199
+ elif version_a == 'HEAD':
1200
+ content_a = _get_git_file_at_commit(prompt_path, 'HEAD') or ""
1201
+ else:
1202
+ content_a = _get_git_file_at_commit(prompt_path, version_a) or ""
1203
+
1204
+ # Get content for version B (newer)
1205
+ if version_b == 'working':
1206
+ content_b = file_path.read_text(encoding='utf-8') if file_path.exists() else ""
1207
+ elif version_b == 'HEAD':
1208
+ content_b = _get_git_file_at_commit(prompt_path, 'HEAD') or ""
1209
+ else:
1210
+ content_b = _get_git_file_at_commit(prompt_path, version_b) or ""
1211
+
1212
+ # Generate text diff (old → new)
1213
+ text_diff = _get_text_diff(content_a, content_b)
1214
+
1215
+ # Get code diff if requested
1216
+ code_diff = None
1217
+ if request.code_path:
1218
+ code_path = Path(request.code_path)
1219
+ if version_a == 'working':
1220
+ code_a = code_path.read_text(encoding='utf-8') if code_path.exists() else ""
1221
+ elif version_a == 'HEAD':
1222
+ code_a = _get_git_file_at_commit(request.code_path, 'HEAD') or ""
1223
+ else:
1224
+ code_a = _get_git_file_at_commit(request.code_path, version_a) or ""
1225
+
1226
+ if version_b == 'working':
1227
+ code_b = code_path.read_text(encoding='utf-8') if code_path.exists() else ""
1228
+ elif version_b == 'HEAD':
1229
+ code_b = _get_git_file_at_commit(request.code_path, 'HEAD') or ""
1230
+ else:
1231
+ code_b = _get_git_file_at_commit(request.code_path, version_b) or ""
1232
+
1233
+ code_diff = _get_text_diff(code_a, code_b)
1234
+
1235
+ # LLM analysis for linguistic changes
1236
+ diff_prompt = load_prompt_template("prompt_diff_LLM")
1237
+ if not diff_prompt:
1238
+ # Fallback: return without LLM analysis
1239
+ return PromptDiffResponse(
1240
+ prompt_a_content=content_a,
1241
+ prompt_b_content=content_b,
1242
+ text_diff=text_diff,
1243
+ linguistic_changes=[],
1244
+ code_diff=code_diff,
1245
+ summary="LLM analysis unavailable (template not found)",
1246
+ cost=0.0,
1247
+ model="",
1248
+ version_a_label=version_a_label,
1249
+ version_b_label=version_b_label,
1250
+ versions_swapped=versions_swapped,
1251
+ )
1252
+
1253
+ # Define output schema for linguistic analysis
1254
+ output_schema = {
1255
+ "type": "object",
1256
+ "properties": {
1257
+ "summary": {"type": "string"},
1258
+ "changes": {
1259
+ "type": "array",
1260
+ "items": {
1261
+ "type": "object",
1262
+ "properties": {
1263
+ "change_type": {"type": "string", "enum": ["added", "removed", "modified"]},
1264
+ "category": {"type": "string", "enum": ["requirement", "constraint", "behavior", "format"]},
1265
+ "description": {"type": "string"},
1266
+ "old_text": {"type": "string"},
1267
+ "new_text": {"type": "string"},
1268
+ "impact": {"type": "string", "enum": ["breaking", "enhancement", "clarification"]},
1269
+ },
1270
+ "required": ["change_type", "category", "description", "impact"]
1271
+ }
1272
+ }
1273
+ },
1274
+ "required": ["summary", "changes"]
1275
+ }
1276
+
1277
+ result = llm_invoke(
1278
+ prompt=diff_prompt,
1279
+ input_json={
1280
+ "prompt_a": content_a,
1281
+ "prompt_b": content_b,
1282
+ "text_diff": text_diff,
1283
+ },
1284
+ strength=request.strength,
1285
+ temperature=0.1,
1286
+ output_schema=output_schema,
1287
+ use_cloud=False,
1288
+ )
1289
+
1290
+ llm_result = result.get('result', {})
1291
+ if isinstance(llm_result, str):
1292
+ llm_result = json.loads(llm_result)
1293
+
1294
+ # Parse linguistic changes
1295
+ linguistic_changes = []
1296
+ for change in llm_result.get('changes', []):
1297
+ linguistic_changes.append(LinguisticChange(
1298
+ change_type=change.get('change_type', 'modified'),
1299
+ category=change.get('category', 'requirement'),
1300
+ description=change.get('description', ''),
1301
+ old_text=change.get('old_text'),
1302
+ new_text=change.get('new_text'),
1303
+ impact=change.get('impact', 'clarification'),
1304
+ ))
1305
+
1306
+ return PromptDiffResponse(
1307
+ prompt_a_content=content_a,
1308
+ prompt_b_content=content_b,
1309
+ text_diff=text_diff,
1310
+ linguistic_changes=linguistic_changes,
1311
+ code_diff=code_diff,
1312
+ summary=llm_result.get('summary', ''),
1313
+ cost=result.get('cost', 0.0),
1314
+ model=result.get('model_name', 'unknown'),
1315
+ version_a_label=version_a_label,
1316
+ version_b_label=version_b_label,
1317
+ versions_swapped=versions_swapped,
1318
+ )
1319
+
1320
+ except Exception as e:
1321
+ console.print(f"[red]Error analyzing prompt diff: {e}[/red]")
1322
+ raise HTTPException(status_code=500, detail=f"Error analyzing prompt diff: {str(e)}")