pdd-cli 0.0.41__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,574 +1,1039 @@
1
- # pdd/sync_determine_operation.py
1
+ """
2
+ sync_determine_operation.py
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
4
+
5
+ Core decision-making logic for the `pdd sync` command.
6
+ Implements fingerprint-based state analysis and deterministic operation selection.
7
+ """
2
8
 
3
9
  import os
4
10
  import sys
5
11
  import json
6
12
  import hashlib
7
13
  import subprocess
8
- import threading
9
- from dataclasses import dataclass, asdict, field
10
- from datetime import datetime, timezone
11
14
  from pathlib import Path
12
- from typing import Optional, Dict, Any, List
15
+ from dataclasses import dataclass, field
16
+ from typing import Dict, List, Optional, Any
17
+ from datetime import datetime
18
+ import psutil
13
19
 
14
- # --- Dependencies ---
15
- # This implementation requires the 'psutil' library for robust PID checking.
16
- # It can be installed with: pip install psutil
20
+ # Platform-specific imports for file locking
17
21
  try:
18
- import psutil
22
+ import fcntl
23
+ HAS_FCNTL = True
19
24
  except ImportError:
20
- print("Error: 'psutil' library not found. Please install it using 'pip install psutil'", file=sys.stderr)
21
- sys.exit(1)
25
+ HAS_FCNTL = False
22
26
 
23
- # Platform-specific locking
24
- if sys.platform == 'win32':
27
+ try:
25
28
  import msvcrt
26
- else:
27
- import fcntl
29
+ HAS_MSVCRT = True
30
+ except ImportError:
31
+ HAS_MSVCRT = False
32
+
33
+ # Import PDD internal modules
34
+ from pdd.construct_paths import construct_paths
35
+ from pdd.load_prompt_template import load_prompt_template
36
+ from pdd.llm_invoke import llm_invoke
37
+ from pdd.get_language import get_language
38
+
39
+ # Constants - Use functions for dynamic path resolution
40
+ def get_pdd_dir():
41
+ """Get the .pdd directory relative to current working directory."""
42
+ return Path.cwd() / '.pdd'
28
43
 
29
- # --- Constants for Directory Structure ---
30
- PDD_DIR = Path(".pdd")
31
- META_DIR = PDD_DIR / "meta"
32
- LOCKS_DIR = PDD_DIR / "locks"
44
+ def get_meta_dir():
45
+ """Get the metadata directory."""
46
+ return get_pdd_dir() / 'meta'
33
47
 
34
- PROMPTS_ROOT_DIR = Path("prompts")
35
- CODE_ROOT_DIR = Path("src")
36
- EXAMPLES_ROOT_DIR = Path("examples")
37
- TESTS_ROOT_DIR = Path("tests")
48
+ def get_locks_dir():
49
+ """Get the locks directory."""
50
+ return get_pdd_dir() / 'locks'
38
51
 
52
+ # For backward compatibility
53
+ PDD_DIR = get_pdd_dir()
54
+ META_DIR = get_meta_dir()
55
+ LOCKS_DIR = get_locks_dir()
56
+
57
+ # Export constants for other modules
58
+ __all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
59
+ 'sync_determine_operation', 'analyze_conflict_with_llm']
39
60
 
40
- # --- Data Structures ---
41
61
 
42
62
  @dataclass
43
63
  class Fingerprint:
44
64
  """Represents the last known good state of a PDD unit."""
45
65
  pdd_version: str
46
66
  timestamp: str # ISO 8601 format
47
- command: str
48
- prompt_hash: Optional[str] = None
49
- code_hash: Optional[str] = None
50
- example_hash: Optional[str] = None
51
- test_hash: Optional[str] = None
67
+ command: str # e.g., "generate", "fix"
68
+ prompt_hash: Optional[str]
69
+ code_hash: Optional[str]
70
+ example_hash: Optional[str]
71
+ test_hash: Optional[str]
72
+
52
73
 
53
74
  @dataclass
54
75
  class RunReport:
55
- """Represents the results of the last test or execution run."""
76
+ """Represents the results from the last test run."""
56
77
  timestamp: str
57
78
  exit_code: int
58
79
  tests_passed: int
59
80
  tests_failed: int
60
81
  coverage: float
61
82
 
62
- @dataclass
63
- class LLMConflictResolutionOutput:
64
- """Represents the structured output from the LLM for conflict resolution."""
65
- next_operation: str
66
- reason: str
67
- confidence: float
68
83
 
69
84
  @dataclass
70
85
  class SyncDecision:
71
- """Represents the recommended operation to run next."""
72
- operation: str
86
+ """Represents a decision about what PDD operation to run next."""
87
+ operation: str # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing'
73
88
  reason: str
74
89
  details: Dict[str, Any] = field(default_factory=dict)
90
+ estimated_cost: float = 0.0
91
+ confidence: float = 1.0
92
+ prerequisites: List[str] = field(default_factory=list)
75
93
 
76
- # --- Mock Internal PDD Modules ---
77
- # These are placeholders for the internal pdd library functions.
78
-
79
- def load_prompt_template(prompt_name: str) -> Optional[str]:
80
- """
81
- (MOCK) Loads a prompt template from the pdd library.
82
- In a real scenario, this would load from a package resource.
83
- """
84
- templates = {
85
- "sync_analysis_LLM.prompt": """
86
- You are an expert software development assistant. Your task is to resolve a synchronization conflict in a PDD unit.
87
- Both the user and the PDD tool have made changes, and you must decide the best course of action.
88
-
89
- Analyze the following information:
90
-
91
- **Last Known Good State (Fingerprint):**
92
- ```json
93
- {fingerprint}
94
- ```
95
-
96
- **Files Changed Since Last Sync:**
97
- - {changed_files_list}
98
-
99
- **Diffs:**
100
-
101
- --- PROMPT DIFF ---
102
- {prompt_diff}
103
- --- END PROMPT DIFF ---
104
-
105
- --- CODE DIFF ---
106
- {code_diff}
107
- --- END CODE DIFF ---
108
-
109
- --- TEST DIFF ---
110
- {test_diff}
111
- --- END TEST DIFF ---
112
-
113
- --- EXAMPLE DIFF ---
114
- {example_diff}
115
- --- END EXAMPLE DIFF ---
116
-
117
- Based on the diffs, determine the user's intent and the nature of the conflict.
118
- Respond with a JSON object recommending the next operation. The possible operations are:
119
- - "generate": The prompt changes are significant; regenerate the code.
120
- - "update": The code changes are valuable; update the prompt to reflect them.
121
- - "fix": The test changes seem to be fixing a bug; try to fix the code.
122
- - "merge_manually": The conflict is too complex. Ask the user to merge changes.
123
-
124
- Your JSON response must have the following format:
125
- {{
126
- "next_operation": "your_recommendation",
127
- "reason": "A clear, concise explanation of why you chose this operation.",
128
- "confidence": 0.9
129
- }}
130
- """
131
- }
132
- return templates.get(prompt_name)
133
-
134
- def llm_invoke(prompt: str, **kwargs) -> Dict[str, Any]:
135
- """
136
- (MOCK) Invokes the LLM with a given prompt.
137
- This mock version provides a deterministic response for demonstration.
138
- """
139
- print("--- (MOCK) LLM Invocation ---")
140
- print(f"Prompt sent to LLM:\n{prompt[:500]}...")
141
- # In a real scenario, this would call an actual LLM API.
142
- # Here, we return a canned response with low confidence to test the failure path.
143
- response_obj = LLMConflictResolutionOutput(
144
- next_operation="update",
145
- reason="Mock LLM analysis determined that the manual code changes are significant but confidence is low.",
146
- confidence=0.70
147
- )
148
- return {
149
- "result": response_obj,
150
- "cost": 0.001,
151
- "model_name": "mock-gpt-4"
152
- }
153
-
154
-
155
- # --- Directory and Locking Mechanism ---
156
-
157
- def _ensure_pdd_dirs_exist():
158
- """Ensures that the .pdd metadata and lock directories exist."""
159
- META_DIR.mkdir(parents=True, exist_ok=True)
160
- LOCKS_DIR.mkdir(parents=True, exist_ok=True)
161
-
162
- _lock_registry = threading.local()
163
94
 
164
95
  class SyncLock:
165
- """
166
- A robust, re-entrant, PID-aware file lock for synchronizing operations.
167
- Ensures only one process can operate on a PDD unit at a time.
168
- """
96
+ """Context manager for handling file-descriptor based locking."""
97
+
169
98
  def __init__(self, basename: str, language: str):
170
- _ensure_pdd_dirs_exist() # Ensure directories exist before creating lock file
171
- self.lock_dir = LOCKS_DIR
172
- self.lock_path = self.lock_dir / f"{basename}_{language}.lock"
173
- self._lock_fd = None
174
- self._is_reentrant_acquisition = False
175
- self.lock_key = str(self.lock_path)
176
- # The file descriptor is only stored on the instance that actually acquires the lock
177
- self._is_lock_owner = False
178
-
179
- @property
180
- def lock_file_path(self):
181
- return self.lock_path
182
-
183
- def _get_lock_count(self) -> int:
184
- if not hasattr(_lock_registry, 'counts'):
185
- _lock_registry.counts = {}
186
- return _lock_registry.counts.get(self.lock_key, 0)
187
-
188
- def _increment_lock_count(self):
189
- if not hasattr(_lock_registry, 'counts'):
190
- _lock_registry.counts = {}
191
- count = _lock_registry.counts.get(self.lock_key, 0)
192
- _lock_registry.counts[self.lock_key] = count + 1
193
-
194
- def _decrement_lock_count(self) -> int:
195
- if not hasattr(_lock_registry, 'counts'):
196
- _lock_registry.counts = {}
197
- count = _lock_registry.counts.get(self.lock_key, 0)
198
- if count > 0:
199
- _lock_registry.counts[self.lock_key] = count - 1
200
- return _lock_registry.counts.get(self.lock_key, 0)
201
-
202
- def acquire(self):
203
- """
204
- Acquires an exclusive lock, handling stale locks from crashed processes.
205
- Raises TimeoutError if the lock is held by another active process.
206
- """
207
- lock_count = self._get_lock_count()
208
- if lock_count > 0: # Re-entrancy
209
- self._is_reentrant_acquisition = True
210
- self._increment_lock_count()
211
- return
212
-
213
- # First time acquiring in this thread. Perform the actual lock.
214
- if self.lock_path.exists():
215
- try:
216
- pid_str = self.lock_path.read_text().strip()
217
- if pid_str:
218
- pid = int(pid_str)
219
- if psutil.pid_exists(pid):
220
- raise TimeoutError(f"is locked by another process (PID: {pid})")
221
- else:
222
- self.lock_path.unlink()
223
- except (ValueError, FileNotFoundError):
224
- # Corrupted or unreadable lock file, treat as stale
225
- self.lock_path.unlink(missing_ok=True)
226
-
227
- # Use O_TRUNC to ensure we overwrite any previous (e.g., corrupted) content
228
- self._lock_fd = os.open(self.lock_path, os.O_CREAT | os.O_WRONLY | os.O_TRUNC)
229
- self._is_lock_owner = True
230
-
231
- try:
232
- if sys.platform == 'win32':
233
- msvcrt.locking(self._lock_fd, msvcrt.LK_NBLCK, 1)
234
- else:
235
- fcntl.flock(self._lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
236
- except (IOError, BlockingIOError):
237
- os.close(self._lock_fd)
238
- self._lock_fd = None
239
- self._is_lock_owner = False
240
- raise TimeoutError("Failed to acquire lock; another process may have just started.")
241
-
242
- os.write(self._lock_fd, str(os.getpid()).encode())
243
- os.fsync(self._lock_fd)
244
- self._increment_lock_count()
245
-
246
- def release(self):
247
- """Releases the lock and cleans up the lock file."""
248
- new_count = self._decrement_lock_count()
249
-
250
- if new_count == 0 and self._is_lock_owner:
251
- # This was the last lock holder in this thread, so release the file lock.
252
- if self._lock_fd:
253
- if sys.platform != 'win32':
254
- fcntl.flock(self._lock_fd, fcntl.LOCK_UN)
255
- os.close(self._lock_fd)
256
- self._lock_fd = None
257
-
258
- try:
259
- if self.lock_path.exists():
260
- # Safety check: only delete if we are still the owner
261
- pid_str = self.lock_path.read_text().strip()
262
- if not pid_str or int(pid_str) == os.getpid():
263
- self.lock_path.unlink()
264
- except (OSError, ValueError, FileNotFoundError):
265
- pass # Ignore errors on cleanup
266
-
99
+ self.basename = basename
100
+ self.language = language
101
+ self.lock_file = get_locks_dir() / f"{basename}_{language}.lock"
102
+ self.fd = None
103
+ self.current_pid = os.getpid()
104
+
267
105
  def __enter__(self):
268
106
  self.acquire()
269
107
  return self
270
-
108
+
271
109
  def __exit__(self, exc_type, exc_val, exc_tb):
272
110
  self.release()
111
+
112
+ def acquire(self):
113
+ """Acquire the lock, handling stale locks and re-entrancy."""
114
+ # Ensure lock directory exists
115
+ self.lock_file.parent.mkdir(parents=True, exist_ok=True)
116
+
117
+ try:
118
+ # Check if lock file exists
119
+ if self.lock_file.exists():
120
+ try:
121
+ # Read PID from lock file
122
+ stored_pid = int(self.lock_file.read_text().strip())
123
+
124
+ # Check if this is the same process (re-entrancy)
125
+ if stored_pid == self.current_pid:
126
+ return
127
+
128
+ # Check if the process is still running
129
+ if psutil.pid_exists(stored_pid):
130
+ raise TimeoutError(f"Lock held by running process {stored_pid}")
131
+
132
+ # Stale lock - remove it
133
+ self.lock_file.unlink(missing_ok=True)
134
+
135
+ except (ValueError, FileNotFoundError):
136
+ # Invalid lock file - remove it
137
+ self.lock_file.unlink(missing_ok=True)
138
+
139
+ # Create lock file and acquire file descriptor lock
140
+ self.lock_file.touch()
141
+ self.fd = open(self.lock_file, 'w')
142
+
143
+ if HAS_FCNTL:
144
+ # POSIX systems
145
+ fcntl.flock(self.fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
146
+ elif HAS_MSVCRT:
147
+ # Windows systems
148
+ msvcrt.locking(self.fd.fileno(), msvcrt.LK_NBLCK, 1)
149
+
150
+ # Write current PID to lock file
151
+ self.fd.write(str(self.current_pid))
152
+ self.fd.flush()
153
+
154
+ except (IOError, OSError) as e:
155
+ if self.fd:
156
+ self.fd.close()
157
+ self.fd = None
158
+ raise TimeoutError(f"Failed to acquire lock: {e}")
159
+
160
+ def release(self):
161
+ """Release the lock and clean up."""
162
+ if self.fd:
163
+ try:
164
+ if HAS_FCNTL:
165
+ fcntl.flock(self.fd.fileno(), fcntl.LOCK_UN)
166
+ elif HAS_MSVCRT:
167
+ msvcrt.locking(self.fd.fileno(), msvcrt.LK_UNLCK, 1)
168
+
169
+ self.fd.close()
170
+ self.fd = None
171
+
172
+ # Remove lock file
173
+ self.lock_file.unlink(missing_ok=True)
174
+
175
+ except (IOError, OSError):
176
+ # Best effort cleanup
177
+ pass
178
+
179
+
180
+ def get_extension(language: str) -> str:
181
+ """Get file extension for a programming language."""
182
+ extensions = {
183
+ 'python': 'py',
184
+ 'javascript': 'js',
185
+ 'typescript': 'ts',
186
+ 'java': 'java',
187
+ 'cpp': 'cpp',
188
+ 'c': 'c',
189
+ 'ruby': 'rb',
190
+ 'go': 'go',
191
+ 'rust': 'rs',
192
+ 'php': 'php',
193
+ 'swift': 'swift',
194
+ 'kotlin': 'kt',
195
+ 'scala': 'scala',
196
+ 'csharp': 'cs',
197
+ 'css': 'css',
198
+ 'html': 'html',
199
+ 'sql': 'sql',
200
+ 'shell': 'sh',
201
+ 'bash': 'sh',
202
+ 'powershell': 'ps1',
203
+ 'r': 'r',
204
+ 'matlab': 'm',
205
+ 'lua': 'lua',
206
+ 'perl': 'pl',
207
+ }
208
+ return extensions.get(language.lower(), language.lower())
273
209
 
274
210
 
275
- # --- State Analysis Functions ---
276
-
277
- LANGUAGE_EXTENSIONS = {
278
- "python": "py",
279
- "javascript": "js",
280
- "typescript": "ts",
281
- "rust": "rs",
282
- "go": "go",
283
- }
284
-
285
- def get_language_extension(language: str) -> str:
286
- """Gets the file extension for a given language."""
287
- if language not in LANGUAGE_EXTENSIONS:
288
- raise ValueError(f"Unsupported language: {language}")
289
- return LANGUAGE_EXTENSIONS[language]
211
+ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
212
+ """Returns a dictionary mapping file types to their expected Path objects."""
213
+ try:
214
+ # Use construct_paths to get configuration-aware paths
215
+ prompt_filename = f"{basename}_{language}.prompt"
216
+ prompt_path = str(Path(prompts_dir) / prompt_filename)
217
+
218
+ # Check if prompt file exists - if not, we can't proceed with construct_paths
219
+ if not Path(prompt_path).exists():
220
+ # Fall back to default path construction if prompt doesn't exist
221
+ extension = get_extension(language)
222
+ return {
223
+ 'prompt': Path(prompt_path),
224
+ 'code': Path(f"{basename}.{extension}"),
225
+ 'example': Path(f"{basename}_example.{extension}"),
226
+ 'test': Path(f"test_{basename}.{extension}")
227
+ }
228
+
229
+ input_file_paths = {
230
+ "prompt_file": prompt_path
231
+ }
232
+
233
+ # Only call construct_paths if the prompt file exists
234
+ resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
235
+ input_file_paths=input_file_paths,
236
+ force=True, # Use force=True to avoid interactive prompts during sync
237
+ quiet=True,
238
+ command="generate",
239
+ command_options={}
240
+ )
241
+
242
+ # Extract paths from config as specified in the spec
243
+ # The spec shows: return { 'prompt': Path(config['prompt_file']), ... }
244
+ # But we need to map the output_file_paths keys to our expected structure
245
+
246
+ # For generate command, construct_paths returns these in output_file_paths:
247
+ # - 'output' or 'code_file' for the generated code
248
+ # For other commands, we need to construct the full set of paths
249
+
250
+ # Get the code file path from output_file_paths
251
+ code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
252
+ if not code_path:
253
+ # Fallback to constructing from basename
254
+ extension = get_extension(language)
255
+ code_path = f"{basename}.{extension}"
256
+
257
+ # Get configured paths for example and test files using construct_paths
258
+ # Note: construct_paths requires files to exist, so we need to handle the case
259
+ # where code file doesn't exist yet (during initial sync startup)
260
+ try:
261
+ # Create a temporary empty code file if it doesn't exist for path resolution
262
+ code_path_obj = Path(code_path)
263
+ temp_code_created = False
264
+ if not code_path_obj.exists():
265
+ code_path_obj.parent.mkdir(parents=True, exist_ok=True)
266
+ code_path_obj.touch()
267
+ temp_code_created = True
268
+
269
+ try:
270
+ # Get example path using example command
271
+ _, _, example_output_paths, _ = construct_paths(
272
+ input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
273
+ force=True, quiet=True, command="example", command_options={}
274
+ )
275
+ example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
276
+
277
+ # Get test path using test command
278
+ _, _, test_output_paths, _ = construct_paths(
279
+ input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
280
+ force=True, quiet=True, command="test", command_options={}
281
+ )
282
+ test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
283
+
284
+ finally:
285
+ # Clean up temporary file if we created it
286
+ if temp_code_created and code_path_obj.exists() and code_path_obj.stat().st_size == 0:
287
+ code_path_obj.unlink()
288
+
289
+ except Exception as e:
290
+ # Log the specific exception that's causing fallback to wrong paths
291
+ import logging
292
+ logger = logging.getLogger(__name__)
293
+ logger.warning(f"construct_paths failed in get_pdd_file_paths: {type(e).__name__}: {e}")
294
+ logger.warning(f"Falling back to .pddrc-aware path construction")
295
+ logger.warning(f"prompt_path: {prompt_path}, code_path: {code_path}")
296
+
297
+ # Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
298
+ try:
299
+ # Get configured directories by using construct_paths with just the prompt file
300
+ _, _, example_output_paths, _ = construct_paths(
301
+ input_file_paths={"prompt_file": prompt_path},
302
+ force=True, quiet=True, command="example", command_options={}
303
+ )
304
+ example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
305
+
306
+ _, _, test_output_paths, _ = construct_paths(
307
+ input_file_paths={"prompt_file": prompt_path},
308
+ force=True, quiet=True, command="test", command_options={}
309
+ )
310
+ test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
311
+
312
+ except Exception:
313
+ # Final fallback to deriving from code path if all else fails
314
+ code_path_obj = Path(code_path)
315
+ code_dir = code_path_obj.parent
316
+ code_stem = code_path_obj.stem
317
+ code_ext = code_path_obj.suffix
318
+ example_path = code_dir / f"{code_stem}_example{code_ext}"
319
+ test_path = code_dir / f"test_{code_stem}{code_ext}"
320
+
321
+ return {
322
+ 'prompt': Path(prompt_path),
323
+ 'code': Path(code_path),
324
+ 'example': example_path,
325
+ 'test': test_path
326
+ }
327
+
328
+ except Exception as e:
329
+ # Fallback to simple naming if construct_paths fails
330
+ extension = get_extension(language)
331
+ return {
332
+ 'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
333
+ 'code': Path(f"{basename}.{extension}"),
334
+ 'example': Path(f"{basename}_example.{extension}"),
335
+ 'test': Path(f"test_{basename}.{extension}")
336
+ }
290
337
 
291
- def get_pdd_file_paths(basename: str, language: str) -> Dict[str, Path]:
292
- """Returns a dictionary mapping file types to their expected paths."""
293
- ext = get_language_extension(language)
294
- return {
295
- 'prompt': PROMPTS_ROOT_DIR / f"{basename}_{language}.prompt",
296
- 'code': CODE_ROOT_DIR / f"{basename}.{ext}",
297
- 'example': EXAMPLES_ROOT_DIR / f"{basename}_example.{ext}",
298
- 'test': TESTS_ROOT_DIR / f"test_{basename}.{ext}",
299
- }
300
338
 
301
339
  def calculate_sha256(file_path: Path) -> Optional[str]:
302
- """Calculates the SHA256 hash of a file if it exists, otherwise returns None."""
303
- if not file_path.is_file():
340
+ """Calculates the SHA256 hash of a file if it exists."""
341
+ if not file_path.exists():
304
342
  return None
305
343
 
306
- sha256_hash = hashlib.sha256()
307
- with open(file_path, "rb") as f:
308
- for byte_block in iter(lambda: f.read(4096), b""):
309
- sha256_hash.update(byte_block)
310
- return sha256_hash.hexdigest()
344
+ try:
345
+ hasher = hashlib.sha256()
346
+ with open(file_path, 'rb') as f:
347
+ for chunk in iter(lambda: f.read(4096), b""):
348
+ hasher.update(chunk)
349
+ return hasher.hexdigest()
350
+ except (IOError, OSError):
351
+ return None
352
+
311
353
 
312
- def _read_json_file(file_path: Path, data_class) -> Optional[Any]:
313
- """Generic JSON file reader and validator."""
314
- if not file_path.is_file():
354
+ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
355
+ """Reads and validates the JSON fingerprint file."""
356
+ meta_dir = get_meta_dir()
357
+ meta_dir.mkdir(parents=True, exist_ok=True)
358
+ fingerprint_file = meta_dir / f"{basename}_{language}.json"
359
+
360
+ if not fingerprint_file.exists():
315
361
  return None
362
+
316
363
  try:
317
- with open(file_path, 'r') as f:
364
+ with open(fingerprint_file, 'r') as f:
318
365
  data = json.load(f)
319
- return data_class(**data)
320
- except (json.JSONDecodeError, TypeError):
321
- # Catches corrupted file, or if data doesn't match dataclass fields
366
+
367
+ return Fingerprint(
368
+ pdd_version=data['pdd_version'],
369
+ timestamp=data['timestamp'],
370
+ command=data['command'],
371
+ prompt_hash=data.get('prompt_hash'),
372
+ code_hash=data.get('code_hash'),
373
+ example_hash=data.get('example_hash'),
374
+ test_hash=data.get('test_hash')
375
+ )
376
+ except (json.JSONDecodeError, KeyError, IOError):
322
377
  return None
323
378
 
324
- def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
325
- """Reads and validates the JSON fingerprint file."""
326
- fingerprint_path = META_DIR / f"{basename}_{language}.json"
327
- return _read_json_file(fingerprint_path, Fingerprint)
328
379
 
329
380
  def read_run_report(basename: str, language: str) -> Optional[RunReport]:
330
381
  """Reads and validates the JSON run report file."""
331
- report_path = META_DIR / f"{basename}_{language}_run.json"
332
- return _read_json_file(report_path, RunReport)
382
+ meta_dir = get_meta_dir()
383
+ meta_dir.mkdir(parents=True, exist_ok=True)
384
+ run_report_file = meta_dir / f"{basename}_{language}_run.json"
385
+
386
+ if not run_report_file.exists():
387
+ return None
388
+
389
+ try:
390
+ with open(run_report_file, 'r') as f:
391
+ data = json.load(f)
392
+
393
+ return RunReport(
394
+ timestamp=data['timestamp'],
395
+ exit_code=data['exit_code'],
396
+ tests_passed=data['tests_passed'],
397
+ tests_failed=data['tests_failed'],
398
+ coverage=data['coverage']
399
+ )
400
+ except (json.JSONDecodeError, KeyError, IOError):
401
+ return None
402
+
333
403
 
334
404
  def calculate_current_hashes(paths: Dict[str, Path]) -> Dict[str, Optional[str]]:
335
405
  """Computes the hashes for all current files on disk."""
406
+ # Return hash keys that match what the fingerprint expects
336
407
  return {
337
- f"{file_type}_hash": calculate_sha256(path)
338
- for file_type, path in paths.items()
408
+ f"{file_type}_hash": calculate_sha256(file_path)
409
+ for file_type, file_path in paths.items()
339
410
  }
340
411
 
341
- # --- LLM-based Conflict Analysis ---
342
412
 
343
413
  def get_git_diff(file_path: Path) -> str:
414
+ """Get git diff for a file against HEAD."""
415
+ try:
416
+ result = subprocess.run(
417
+ ['git', 'diff', 'HEAD', str(file_path)],
418
+ capture_output=True,
419
+ text=True,
420
+ cwd=file_path.parent if file_path.parent.exists() else Path.cwd()
421
+ )
422
+
423
+ if result.returncode == 0:
424
+ return result.stdout
425
+ else:
426
+ return ""
427
+ except (subprocess.SubprocessError, FileNotFoundError):
428
+ return ""
429
+
430
+
431
+ def validate_expected_files(fingerprint: Optional[Fingerprint], paths: Dict[str, Path]) -> Dict[str, bool]:
344
432
  """
345
- Gets the git diff of a file against its last committed version (HEAD).
346
- Returns the full content for untracked files.
433
+ Validate that files expected to exist based on fingerprint actually exist.
434
+
435
+ Args:
436
+ fingerprint: The last known good state fingerprint
437
+ paths: Dict mapping file types to their expected Path objects
438
+
439
+ Returns:
440
+ Dict mapping file types to existence status
347
441
  """
348
- if not file_path.exists():
349
- return ""
442
+ validation = {}
350
443
 
351
- # Try to use a relative path if possible, as git's output is cleaner.
352
- # This is safe because test fixtures chdir into the repo root.
353
- try:
354
- path_for_git = file_path.relative_to(Path.cwd())
355
- except ValueError:
356
- # Not relative to CWD, use the original absolute path.
357
- path_for_git = file_path
358
-
359
- # Use 'git status' to check if the file is tracked
360
- try:
361
- status_result = subprocess.run(
362
- ['git', 'status', '--porcelain', str(path_for_git)],
363
- capture_output=True, text=True, check=True, encoding='utf-8'
364
- )
365
- is_untracked = status_result.stdout.strip().startswith('??')
366
- except (subprocess.CalledProcessError, FileNotFoundError):
367
- # Not a git repo, git not found, or file not in repo. Fallback to content.
368
- return file_path.read_text(encoding='utf-8')
369
-
370
- command = ['git', 'diff']
371
- if is_untracked:
372
- # Diff against nothing to show the whole file as an addition
373
- # Use /dev/null for POSIX and NUL for Windows
374
- null_device = "NUL" if sys.platform == "win32" else "/dev/null"
375
- command.extend(['--no-index', null_device, str(path_for_git)])
376
- else:
377
- # Diff against the last commit
378
- command.extend(['HEAD', '--', str(path_for_git)])
444
+ if not fingerprint:
445
+ return validation
446
+
447
+ # Check each file type that has a hash in the fingerprint
448
+ if fingerprint.code_hash:
449
+ validation['code'] = paths['code'].exists()
450
+ if fingerprint.example_hash:
451
+ validation['example'] = paths['example'].exists()
452
+ if fingerprint.test_hash:
453
+ validation['test'] = paths['test'].exists()
379
454
 
380
- try:
381
- # The `git diff` command returns exit code 1 if there are differences,
382
- # which `check=True` would interpret as an error. We must not use it.
383
- diff_result = subprocess.run(
384
- command, capture_output=True, text=True, encoding='utf-8'
385
- )
386
- return diff_result.stdout
387
- except FileNotFoundError:
388
- # Fallback if git command is not found
389
- return file_path.read_text(encoding='utf-8')
390
-
391
- def analyze_conflict_with_llm(
392
- basename: str,
393
- language: str,
455
+ return validation
456
+
457
+
458
+ def _handle_missing_expected_files(
459
+ missing_files: List[str],
460
+ paths: Dict[str, Path],
394
461
  fingerprint: Fingerprint,
395
- changed_files: List[str]
462
+ basename: str,
463
+ language: str,
464
+ prompts_dir: str,
465
+ skip_tests: bool = False,
466
+ skip_verify: bool = False
396
467
  ) -> SyncDecision:
397
468
  """
398
- Uses an LLM to analyze a complex sync conflict and recommend an operation.
469
+ Handle the case where expected files are missing.
470
+ Determine the appropriate recovery operation.
471
+
472
+ Args:
473
+ missing_files: List of file types that are missing
474
+ paths: Dict mapping file types to their expected Path objects
475
+ fingerprint: The last known good state fingerprint
476
+ basename: The base name for the PDD unit
477
+ language: The programming language
478
+ prompts_dir: Directory containing prompt files
479
+ skip_tests: If True, skip test generation
480
+ skip_verify: If True, skip verification operations
481
+
482
+ Returns:
483
+ SyncDecision object with the appropriate recovery operation
399
484
  """
400
- try:
401
- prompt_template = load_prompt_template("sync_analysis_LLM.prompt")
402
- if not prompt_template:
485
+
486
+ # Priority: regenerate from the earliest missing component
487
+ if 'code' in missing_files:
488
+ # Code file missing - start from the beginning
489
+ if paths['prompt'].exists():
490
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
491
+ if check_for_dependencies(prompt_content):
492
+ return SyncDecision(
493
+ operation='auto-deps',
494
+ reason='Code file missing, prompt has dependencies - regenerate from auto-deps',
495
+ details={'missing_files': missing_files, 'prompt_path': str(paths['prompt'])},
496
+ estimated_cost=0.5,
497
+ confidence=0.85
498
+ )
499
+ else:
500
+ return SyncDecision(
501
+ operation='generate',
502
+ reason='Code file missing - regenerate from prompt',
503
+ details={'missing_files': missing_files, 'prompt_path': str(paths['prompt'])},
504
+ estimated_cost=1.0,
505
+ confidence=0.90
506
+ )
507
+
508
+ elif 'example' in missing_files and paths['code'].exists():
509
+ # Code exists but example missing
510
+ return SyncDecision(
511
+ operation='example',
512
+ reason='Example file missing - regenerate example',
513
+ details={'missing_files': missing_files, 'code_path': str(paths['code'])},
514
+ estimated_cost=0.5,
515
+ confidence=0.85
516
+ )
517
+
518
+ elif 'test' in missing_files and paths['code'].exists() and paths['example'].exists():
519
+ # Code and example exist but test missing
520
+ if skip_tests:
521
+ # Skip test generation if --skip-tests flag is used
403
522
  return SyncDecision(
404
- operation="fail_and_request_manual_merge",
405
- reason="Failed to load LLM analysis prompt template 'sync_analysis_LLM.prompt'."
523
+ operation='nothing',
524
+ reason='Test file missing but --skip-tests specified - workflow complete',
525
+ details={'missing_files': missing_files, 'skip_tests': True},
526
+ estimated_cost=0.0,
527
+ confidence=1.0
406
528
  )
407
-
408
- paths = get_pdd_file_paths(basename, language)
409
- diffs = {ftype: "" for ftype in ['prompt', 'code', 'test', 'example']}
410
-
411
- for file_type in changed_files:
412
- if file_type in paths:
413
- diffs[file_type] = get_git_diff(paths[file_type])
414
-
415
- # Format the prompt for the LLM
416
- formatted_prompt = prompt_template.format(
417
- fingerprint=json.dumps(asdict(fingerprint), indent=2),
418
- changed_files_list=", ".join(changed_files),
419
- prompt_diff=diffs['prompt'],
420
- code_diff=diffs['code'],
421
- test_diff=diffs['test'],
422
- example_diff=diffs['example']
423
- )
424
-
425
- # Invoke the LLM
426
- llm_response = llm_invoke(prompt=formatted_prompt)
427
- response_obj = llm_response.get('result')
428
-
429
- # Validate the response object
430
- if not isinstance(response_obj, LLMConflictResolutionOutput):
529
+ else:
431
530
  return SyncDecision(
432
- operation="fail_and_request_manual_merge",
433
- reason=f"LLM did not return the expected Pydantic object. Got type: {type(response_obj).__name__}",
434
- details={"raw_response": str(response_obj)}
531
+ operation='test',
532
+ reason='Test file missing - regenerate tests',
533
+ details={'missing_files': missing_files, 'code_path': str(paths['code'])},
534
+ estimated_cost=1.0,
535
+ confidence=0.85
435
536
  )
537
+
538
+ # Fallback - regenerate everything
539
+ return SyncDecision(
540
+ operation='generate',
541
+ reason='Multiple files missing - regenerate from prompt',
542
+ details={'missing_files': missing_files},
543
+ estimated_cost=2.0,
544
+ confidence=0.80
545
+ )
436
546
 
437
- next_op = response_obj.next_operation
438
- reason = response_obj.reason
439
- confidence = response_obj.confidence
440
547
 
441
- if confidence < 0.75:
442
- return SyncDecision(
443
- operation="fail_and_request_manual_merge",
444
- reason=f"LLM analysis confidence ({confidence:.2f}) is below threshold. "
445
- f"LLM suggestion was: '{next_op}' - {reason}",
446
- details=asdict(response_obj)
447
- )
548
+ def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False) -> bool:
549
+ """
550
+ Check if workflow is complete considering skip flags.
551
+
552
+ Args:
553
+ paths: Dict mapping file types to their expected Path objects
554
+ skip_tests: If True, test files are not required for completion
555
+ skip_verify: If True, verification operations are not required
556
+
557
+ Returns:
558
+ True if all required files exist for the current workflow configuration
559
+ """
560
+ required_files = ['code', 'example']
561
+
562
+ if not skip_tests:
563
+ required_files.append('test')
448
564
 
449
- return SyncDecision(
450
- operation=next_op,
451
- reason=f"LLM analysis: {reason}",
452
- details=asdict(response_obj)
453
- )
565
+ return all(paths[f].exists() for f in required_files)
454
566
 
455
- except Exception as e:
456
- return SyncDecision(
457
- operation="fail_and_request_manual_merge",
458
- reason=f"LLM conflict analysis failed: {e}",
459
- details={"raw_response": str(locals().get('llm_response', {}).get('result'))}
460
- )
461
567
 
568
+ def check_for_dependencies(prompt_content: str) -> bool:
569
+ """Check if prompt contains actual dependency indicators that need auto-deps processing."""
570
+ # Only check for specific XML tags that indicate actual dependencies
571
+ xml_dependency_indicators = [
572
+ '<include>',
573
+ '<web>',
574
+ '<shell>'
575
+ ]
576
+
577
+ # Check for explicit dependency management mentions
578
+ explicit_dependency_indicators = [
579
+ 'auto-deps',
580
+ 'auto_deps',
581
+ 'dependencies needed',
582
+ 'requires dependencies',
583
+ 'include dependencies'
584
+ ]
585
+
586
+ prompt_lower = prompt_content.lower()
587
+
588
+ # Check for XML tags (case-sensitive for proper XML)
589
+ has_xml_deps = any(indicator in prompt_content for indicator in xml_dependency_indicators)
590
+
591
+ # Check for explicit dependency mentions
592
+ has_explicit_deps = any(indicator in prompt_lower for indicator in explicit_dependency_indicators)
593
+
594
+ return has_xml_deps or has_explicit_deps
462
595
 
463
- # --- Main Decision Function ---
464
596
 
465
- def determine_sync_operation(
466
- basename: str,
467
- language: str,
468
- target_coverage: float = 80.0
469
- ) -> SyncDecision:
597
+ def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
470
598
  """
471
- Analyzes a PDD unit's state and determines the next operation.
599
+ Core decision-making function for sync operations with skip flag awareness.
600
+
601
+ Args:
602
+ basename: The base name for the PDD unit
603
+ language: The programming language
604
+ target_coverage: Desired test coverage percentage
605
+ budget: Maximum budget for operations
606
+ log_mode: If True, skip locking entirely for read-only analysis
607
+ prompts_dir: Directory containing prompt files
608
+ skip_tests: If True, skip test generation and execution
609
+ skip_verify: If True, skip verification operations
610
+
611
+ Returns:
612
+ SyncDecision object with the recommended operation
613
+ """
614
+
615
+ if log_mode:
616
+ # Skip locking for read-only analysis
617
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
618
+ else:
619
+ # Normal exclusive locking for actual operations
620
+ with SyncLock(basename, language) as lock:
621
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
472
622
 
473
- This function is the core of the `pdd sync` command, providing a deterministic,
474
- reliable, and safe decision based on runtime signals and file fingerprints.
475
623
 
624
+ def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
625
+ """
626
+ Perform the sync state analysis without locking concerns.
627
+
476
628
  Args:
477
- basename: The base name of the PDD unit (e.g., 'calculator').
478
- language: The programming language of the unit (e.g., 'python').
479
- target_coverage: The desired test coverage percentage.
480
-
629
+ basename: The base name for the PDD unit
630
+ language: The programming language
631
+ target_coverage: Desired test coverage percentage
632
+ budget: Maximum budget for operations
633
+ prompts_dir: Directory containing prompt files
634
+ skip_tests: If True, skip test generation and execution
635
+ skip_verify: If True, skip verification operations
636
+
481
637
  Returns:
482
- A SyncDecision object with the recommended operation and reason.
638
+ SyncDecision object with the recommended operation
483
639
  """
484
- with SyncLock(basename, language):
485
- # 1. Check Runtime Signals First (highest priority)
486
- run_report = read_run_report(basename, language)
487
- if run_report:
488
- if run_report.exit_code != 0:
640
+ # 1. Check Runtime Signals First (Highest Priority)
641
+ # Workflow Order (from whitepaper):
642
+ # 1. auto-deps (find context/dependencies)
643
+ # 2. generate (create code module)
644
+ # 3. example (create usage example)
645
+ # 4. crash (resolve crashes if code doesn't run)
646
+ # 5. verify (verify example runs correctly after crash fix)
647
+ # 6. test (generate unit tests)
648
+ # 7. fix (resolve bugs found by tests)
649
+ # 8. update (sync changes back to prompt)
650
+
651
+ # Read fingerprint early since we need it for crash verification
652
+ fingerprint = read_fingerprint(basename, language)
653
+
654
+ run_report = read_run_report(basename, language)
655
+ if run_report:
656
+ # Check test failures first (higher priority than exit code)
657
+ if run_report.tests_failed > 0:
658
+ return SyncDecision(
659
+ operation='fix',
660
+ reason=f'Test failures detected: {run_report.tests_failed} failed tests',
661
+ details={'tests_failed': run_report.tests_failed},
662
+ estimated_cost=1.5,
663
+ confidence=0.90
664
+ )
665
+
666
+ # Then check for runtime crashes (only if no test failures)
667
+ if run_report.exit_code != 0:
668
+ # Check if this was from a crash fix that needs verification
669
+ if fingerprint and fingerprint.command == 'crash':
670
+ return SyncDecision(
671
+ operation='verify',
672
+ reason='Previous crash was fixed - verify example runs correctly',
673
+ details={'previous_command': 'crash', 'previous_exit_code': run_report.exit_code},
674
+ estimated_cost=0.7,
675
+ confidence=0.90
676
+ )
677
+ else:
489
678
  return SyncDecision(
490
679
  operation='crash',
491
- reason=f"The last run exited with a non-zero code ({run_report.exit_code}). "
492
- "This indicates a crash that must be fixed.",
493
- details=asdict(run_report)
680
+ reason='Runtime error detected in last run',
681
+ details={'exit_code': run_report.exit_code},
682
+ estimated_cost=2.0,
683
+ confidence=0.95
494
684
  )
495
- if run_report.tests_failed > 0:
685
+
686
+ if run_report.coverage < target_coverage:
687
+ if skip_tests:
688
+ # When tests are skipped but coverage is low, consider workflow complete
689
+ # since we can't improve coverage without running tests
496
690
  return SyncDecision(
497
- operation='fix',
498
- reason=f"The last test run had {run_report.tests_failed} failing tests. "
499
- "These must be fixed.",
500
- details=asdict(run_report)
691
+ operation='all_synced',
692
+ reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% but tests skipped',
693
+ details={'current_coverage': run_report.coverage, 'target_coverage': target_coverage, 'tests_skipped': True},
694
+ estimated_cost=0.0,
695
+ confidence=0.90
501
696
  )
502
- if run_report.coverage < target_coverage:
697
+ else:
503
698
  return SyncDecision(
504
699
  operation='test',
505
- reason=f"Current test coverage ({run_report.coverage}%) is below the "
506
- f"target ({target_coverage}%). More tests are needed.",
507
- details=asdict(run_report)
700
+ reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}%',
701
+ details={'current_coverage': run_report.coverage, 'target_coverage': target_coverage},
702
+ estimated_cost=1.0,
703
+ confidence=0.85
508
704
  )
509
-
510
- # 2. Analyze File State
511
- paths = get_pdd_file_paths(basename, language)
512
- fingerprint = read_fingerprint(basename, language)
513
- current_hashes = calculate_current_hashes(paths)
705
+
706
+ # 2. Analyze File State
707
+ paths = get_pdd_file_paths(basename, language, prompts_dir)
708
+ current_hashes = calculate_current_hashes(paths)
709
+
710
+ # 3. Implement the Decision Tree
711
+ if not fingerprint:
712
+ # No Fingerprint (New or Untracked Unit)
713
+ if paths['prompt'].exists():
714
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
715
+ if check_for_dependencies(prompt_content):
716
+ return SyncDecision(
717
+ operation='auto-deps',
718
+ reason='New prompt with dependencies detected',
719
+ details={'prompt_path': str(paths['prompt'])},
720
+ estimated_cost=0.5,
721
+ confidence=0.80
722
+ )
723
+ else:
724
+ return SyncDecision(
725
+ operation='generate',
726
+ reason='New prompt ready for code generation',
727
+ details={'prompt_path': str(paths['prompt'])},
728
+ estimated_cost=1.0,
729
+ confidence=0.90
730
+ )
731
+ else:
732
+ return SyncDecision(
733
+ operation='nothing',
734
+ reason='No prompt file and no history - nothing to do',
735
+ details={},
736
+ estimated_cost=0.0,
737
+ confidence=1.0
738
+ )
739
+
740
+ # CRITICAL FIX: Validate expected files exist before hash comparison
741
+ if fingerprint:
742
+ file_validation = validate_expected_files(fingerprint, paths)
743
+ missing_expected_files = [
744
+ file_type for file_type, exists in file_validation.items()
745
+ if not exists
746
+ ]
514
747
 
515
- # 3. Implement the Decision Tree
748
+ if missing_expected_files:
749
+ # Files are missing that should exist - need to regenerate
750
+ # This prevents the incorrect analyze_conflict decision
751
+ return _handle_missing_expected_files(
752
+ missing_expected_files, paths, fingerprint, basename, language, prompts_dir, skip_tests, skip_verify
753
+ )
754
+
755
+ # Compare hashes only for files that actually exist (prevents None != "hash" false positives)
756
+ changes = []
757
+ if fingerprint:
758
+ if current_hashes.get('prompt_hash') != fingerprint.prompt_hash:
759
+ changes.append('prompt')
760
+ # Only compare hashes for files that exist
761
+ if paths['code'].exists() and current_hashes.get('code_hash') != fingerprint.code_hash:
762
+ changes.append('code')
763
+ if paths['example'].exists() and current_hashes.get('example_hash') != fingerprint.example_hash:
764
+ changes.append('example')
765
+ if paths['test'].exists() and current_hashes.get('test_hash') != fingerprint.test_hash:
766
+ changes.append('test')
767
+
768
+ if not changes:
769
+ # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
770
+ if _is_workflow_complete(paths, skip_tests, skip_verify):
771
+ return SyncDecision(
772
+ operation='nothing',
773
+ reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
774
+ details={'skip_tests': skip_tests, 'skip_verify': skip_verify},
775
+ estimated_cost=0.0,
776
+ confidence=1.0
777
+ )
778
+
779
+ # Progress workflow considering skip flags
780
+ if paths['code'].exists() and not paths['example'].exists():
781
+ return SyncDecision(
782
+ operation='example',
783
+ reason='Code exists but example missing - progress workflow',
784
+ details={'code_path': str(paths['code'])},
785
+ estimated_cost=0.5,
786
+ confidence=0.85
787
+ )
516
788
 
517
- # Case: No Fingerprint (new or untracked unit)
518
- if not fingerprint:
789
+ if (paths['code'].exists() and paths['example'].exists() and
790
+ not skip_tests and not paths['test'].exists()):
791
+ return SyncDecision(
792
+ operation='test',
793
+ reason='Code and example exist but test missing - progress workflow',
794
+ details={'code_path': str(paths['code']), 'example_path': str(paths['example'])},
795
+ estimated_cost=1.0,
796
+ confidence=0.85
797
+ )
798
+
799
+ # Some files are missing but no changes detected
800
+ if not paths['code'].exists():
519
801
  if paths['prompt'].exists():
802
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
803
+ if check_for_dependencies(prompt_content):
804
+ return SyncDecision(
805
+ operation='auto-deps',
806
+ reason='Missing code file, prompt has dependencies',
807
+ details={'prompt_path': str(paths['prompt'])},
808
+ estimated_cost=0.5,
809
+ confidence=0.80
810
+ )
811
+ else:
812
+ return SyncDecision(
813
+ operation='generate',
814
+ reason='Missing code file - generate from prompt',
815
+ details={'prompt_path': str(paths['prompt'])},
816
+ estimated_cost=1.0,
817
+ confidence=0.90
818
+ )
819
+
820
+ elif len(changes) == 1:
821
+ # Simple Changes (Single File Modified)
822
+ change = changes[0]
823
+
824
+ if change == 'prompt':
825
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
826
+ if check_for_dependencies(prompt_content):
520
827
  return SyncDecision(
521
- operation='generate',
522
- reason="No fingerprint file found, but a prompt exists. This appears to be a new PDD unit."
828
+ operation='auto-deps',
829
+ reason='Prompt changed and dependencies need updating',
830
+ details={'changed_file': 'prompt'},
831
+ estimated_cost=0.5,
832
+ confidence=0.85
523
833
  )
524
834
  else:
525
835
  return SyncDecision(
526
- operation='nothing',
527
- reason="No PDD fingerprint and no prompt file found. Nothing to do."
836
+ operation='generate',
837
+ reason='Prompt changed - regenerate code',
838
+ details={'changed_file': 'prompt'},
839
+ estimated_cost=1.0,
840
+ confidence=0.90
528
841
  )
529
-
530
- # Compare current hashes with fingerprint
531
- fingerprint_hashes = {
532
- 'prompt_hash': fingerprint.prompt_hash,
533
- 'code_hash': fingerprint.code_hash,
534
- 'example_hash': fingerprint.example_hash,
535
- 'test_hash': fingerprint.test_hash,
536
- }
537
842
 
538
- changed_files = [
539
- file_type.replace('_hash', '')
540
- for file_type, f_hash in fingerprint_hashes.items()
541
- if current_hashes.get(file_type) != f_hash
542
- ]
843
+ elif change == 'code':
844
+ return SyncDecision(
845
+ operation='update',
846
+ reason='Code changed - update prompt to reflect changes',
847
+ details={'changed_file': 'code'},
848
+ estimated_cost=0.8,
849
+ confidence=0.85
850
+ )
543
851
 
544
- # Case: No Changes
545
- if not changed_files:
852
+ elif change == 'test':
546
853
  return SyncDecision(
547
- operation='nothing',
548
- reason="All files are synchronized with the last known good state."
854
+ operation='test',
855
+ reason='Test changed - run new tests',
856
+ details={'changed_file': 'test'},
857
+ estimated_cost=0.5,
858
+ confidence=0.80
549
859
  )
860
+
861
+ elif change == 'example':
862
+ return SyncDecision(
863
+ operation='verify',
864
+ reason='Example changed - verify new example',
865
+ details={'changed_file': 'example'},
866
+ estimated_cost=0.7,
867
+ confidence=0.80
868
+ )
869
+
870
+ else:
871
+ # Complex Changes (Multiple Files Modified / Conflicts)
872
+ return SyncDecision(
873
+ operation='analyze_conflict',
874
+ reason='Multiple files changed - requires conflict analysis',
875
+ details={'changed_files': changes},
876
+ estimated_cost=2.0,
877
+ confidence=0.70
878
+ )
879
+
880
+ # Fallback - should not reach here normally
881
+ return SyncDecision(
882
+ operation='nothing',
883
+ reason='No clear operation determined',
884
+ details={'fingerprint_exists': fingerprint is not None, 'changes': changes},
885
+ estimated_cost=0.0,
886
+ confidence=0.50
887
+ )
888
+
550
889
 
551
- details = {"changed_files": changed_files}
552
- # Case: Simple Changes (Single File Modified)
553
- if len(changed_files) == 1:
554
- change = changed_files[0]
555
- if change == 'prompt':
556
- return SyncDecision('generate', "The prompt has been modified. Code should be regenerated.", details)
557
- if change == 'code':
558
- return SyncDecision('update', "The code has been modified manually. The prompt should be updated.", details)
559
- if change == 'test':
560
- return SyncDecision('test', "The test file has been modified. The new tests should be run.", details)
561
- if change == 'example':
562
- # 'verify' is a pdd command to run the example file
563
- return SyncDecision('verify', "The example file has been modified. It should be verified.", details)
564
-
565
- # Case: Complex Changes (Multiple Files Modified / Conflicts)
566
- if len(changed_files) > 1:
890
+ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerprint, changed_files: List[str], prompts_dir: str = "prompts") -> SyncDecision:
891
+ """
892
+ Resolve complex sync conflicts using an LLM.
893
+
894
+ Args:
895
+ basename: The base name for the PDD unit
896
+ language: The programming language
897
+ fingerprint: The last known good state
898
+ changed_files: List of files that have changed
899
+ prompts_dir: Directory containing prompt files
900
+
901
+ Returns:
902
+ SyncDecision object with LLM-recommended operation
903
+ """
904
+
905
+ try:
906
+ # 1. Load LLM Prompt
907
+ prompt_template = load_prompt_template("sync_analysis_LLM")
908
+ if not prompt_template:
909
+ # Fallback if template not found
567
910
  return SyncDecision(
568
- operation='analyze_conflict',
569
- reason=f"Multiple files have been modified since the last sync: {', '.join(changed_files)}.",
570
- details=details
911
+ operation='fail_and_request_manual_merge',
912
+ reason='LLM analysis template not found - manual merge required',
913
+ details={'error': 'Template not available'},
914
+ estimated_cost=0.0,
915
+ confidence=0.0
571
916
  )
917
+
918
+ # 2. Gather file paths and diffs
919
+ paths = get_pdd_file_paths(basename, language, prompts_dir)
920
+
921
+ # Generate diffs for changed files
922
+ diffs = {}
923
+ for file_type in changed_files:
924
+ if file_type in paths and paths[file_type].exists():
925
+ diffs[f"{file_type}_diff"] = get_git_diff(paths[file_type])
926
+ diffs[f"{file_type}_path"] = str(paths[file_type])
927
+ else:
928
+ diffs[f"{file_type}_diff"] = ""
929
+ diffs[f"{file_type}_path"] = str(paths.get(file_type, ''))
930
+
931
+ # 3. Format the prompt
932
+ formatted_prompt = prompt_template.format(
933
+ fingerprint=json.dumps({
934
+ 'pdd_version': fingerprint.pdd_version,
935
+ 'timestamp': fingerprint.timestamp,
936
+ 'command': fingerprint.command,
937
+ 'prompt_hash': fingerprint.prompt_hash,
938
+ 'code_hash': fingerprint.code_hash,
939
+ 'example_hash': fingerprint.example_hash,
940
+ 'test_hash': fingerprint.test_hash
941
+ }, indent=2),
942
+ changed_files_list=', '.join(changed_files),
943
+ prompt_diff=diffs.get('prompt_diff', ''),
944
+ code_diff=diffs.get('code_diff', ''),
945
+ example_diff=diffs.get('example_diff', ''),
946
+ test_diff=diffs.get('test_diff', ''),
947
+ prompt_path=diffs.get('prompt_path', ''),
948
+ code_path=diffs.get('code_path', ''),
949
+ example_path=diffs.get('example_path', ''),
950
+ test_path=diffs.get('test_path', '')
951
+ )
952
+
953
+ # 4. Invoke LLM with caching for determinism
954
+ response = llm_invoke(
955
+ prompt=formatted_prompt,
956
+ input_json={},
957
+ strength=0.7, # Use a consistent strength for determinism
958
+ temperature=0.0, # Use temperature 0 for deterministic output
959
+ verbose=False
960
+ )
961
+
962
+ # 5. Parse and validate response
963
+ try:
964
+ llm_result = json.loads(response['result'])
572
965
 
573
- # Fallback, should not be reached
574
- return SyncDecision('nothing', 'Analysis complete, no operation required.')
966
+ # Validate required keys
967
+ required_keys = ['next_operation', 'reason', 'confidence']
968
+ if not all(key in llm_result for key in required_keys):
969
+ raise ValueError("Missing required keys in LLM response")
970
+
971
+ # Check confidence threshold
972
+ confidence = float(llm_result.get('confidence', 0.0))
973
+ if confidence < 0.75:
974
+ return SyncDecision(
975
+ operation='fail_and_request_manual_merge',
976
+ reason=f'LLM confidence too low ({confidence:.2f}) - manual merge required',
977
+ details={'llm_response': llm_result, 'changed_files': changed_files},
978
+ estimated_cost=response.get('cost', 0.0),
979
+ confidence=confidence
980
+ )
981
+
982
+ # Extract operation and details
983
+ operation = llm_result['next_operation']
984
+ reason = llm_result['reason']
985
+ merge_strategy = llm_result.get('merge_strategy', {})
986
+ follow_up_operations = llm_result.get('follow_up_operations', [])
987
+
988
+ return SyncDecision(
989
+ operation=operation,
990
+ reason=f"LLM analysis: {reason}",
991
+ details={
992
+ 'llm_response': llm_result,
993
+ 'changed_files': changed_files,
994
+ 'merge_strategy': merge_strategy,
995
+ 'follow_up_operations': follow_up_operations
996
+ },
997
+ estimated_cost=response.get('cost', 0.0),
998
+ confidence=confidence,
999
+ prerequisites=follow_up_operations
1000
+ )
1001
+
1002
+ except (json.JSONDecodeError, ValueError, KeyError) as e:
1003
+ # Invalid LLM response - fallback to manual merge
1004
+ return SyncDecision(
1005
+ operation='fail_and_request_manual_merge',
1006
+ reason=f'Invalid LLM response: {e} - manual merge required',
1007
+ details={'error': str(e), 'raw_response': response.get('result', ''), 'changed_files': changed_files},
1008
+ estimated_cost=response.get('cost', 0.0),
1009
+ confidence=0.0
1010
+ )
1011
+
1012
+ except Exception as e:
1013
+ # Any other error - fallback to manual merge
1014
+ return SyncDecision(
1015
+ operation='fail_and_request_manual_merge',
1016
+ reason=f'Error during LLM analysis: {e} - manual merge required',
1017
+ details={'error': str(e), 'changed_files': changed_files},
1018
+ estimated_cost=0.0,
1019
+ confidence=0.0
1020
+ )
1021
+
1022
+
1023
+ if __name__ == "__main__":
1024
+ # Example usage
1025
+ if len(sys.argv) != 3:
1026
+ print("Usage: python sync_determine_operation.py <basename> <language>")
1027
+ sys.exit(1)
1028
+
1029
+ basename = sys.argv[1]
1030
+ language = sys.argv[2]
1031
+
1032
+ decision = sync_determine_operation(basename, language, target_coverage=90.0)
1033
+
1034
+ print(f"Operation: {decision.operation}")
1035
+ print(f"Reason: {decision.reason}")
1036
+ print(f"Estimated Cost: ${decision.estimated_cost:.2f}")
1037
+ print(f"Confidence: {decision.confidence:.2f}")
1038
+ if decision.details:
1039
+ print(f"Details: {json.dumps(decision.details, indent=2)}")