pdd-cli 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pdd/__init__.py +1 -1
  2. pdd/auto_deps_main.py +1 -1
  3. pdd/auto_update.py +73 -78
  4. pdd/bug_main.py +3 -3
  5. pdd/bug_to_unit_test.py +46 -38
  6. pdd/change.py +20 -13
  7. pdd/change_main.py +223 -163
  8. pdd/cli.py +192 -95
  9. pdd/cmd_test_main.py +51 -36
  10. pdd/code_generator_main.py +3 -2
  11. pdd/conflicts_main.py +1 -1
  12. pdd/construct_paths.py +221 -19
  13. pdd/context_generator_main.py +27 -12
  14. pdd/crash_main.py +44 -50
  15. pdd/data/llm_model.csv +1 -1
  16. pdd/detect_change_main.py +1 -1
  17. pdd/fix_code_module_errors.py +12 -0
  18. pdd/fix_main.py +2 -2
  19. pdd/fix_verification_errors.py +13 -0
  20. pdd/fix_verification_main.py +3 -3
  21. pdd/generate_output_paths.py +113 -21
  22. pdd/generate_test.py +53 -16
  23. pdd/llm_invoke.py +162 -0
  24. pdd/logo_animation.py +455 -0
  25. pdd/preprocess_main.py +1 -1
  26. pdd/process_csv_change.py +1 -1
  27. pdd/prompts/extract_program_code_fix_LLM.prompt +2 -1
  28. pdd/prompts/sync_analysis_LLM.prompt +82 -0
  29. pdd/split_main.py +1 -1
  30. pdd/sync_animation.py +643 -0
  31. pdd/sync_determine_operation.py +1039 -0
  32. pdd/sync_main.py +333 -0
  33. pdd/sync_orchestration.py +639 -0
  34. pdd/trace_main.py +1 -1
  35. pdd/update_main.py +7 -2
  36. pdd/xml_tagger.py +15 -6
  37. pdd_cli-0.0.42.dist-info/METADATA +307 -0
  38. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/RECORD +42 -36
  39. pdd_cli-0.0.40.dist-info/METADATA +0 -269
  40. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/WHEEL +0 -0
  41. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/entry_points.txt +0 -0
  42. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/licenses/LICENSE +0 -0
  43. {pdd_cli-0.0.40.dist-info → pdd_cli-0.0.42.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1039 @@
1
+ """
2
+ sync_determine_operation.py
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
4
+
5
+ Core decision-making logic for the `pdd sync` command.
6
+ Implements fingerprint-based state analysis and deterministic operation selection.
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ import hashlib
13
+ import subprocess
14
+ from pathlib import Path
15
+ from dataclasses import dataclass, field
16
+ from typing import Dict, List, Optional, Any
17
+ from datetime import datetime
18
+ import psutil
19
+
20
+ # Platform-specific imports for file locking
21
+ try:
22
+ import fcntl
23
+ HAS_FCNTL = True
24
+ except ImportError:
25
+ HAS_FCNTL = False
26
+
27
+ try:
28
+ import msvcrt
29
+ HAS_MSVCRT = True
30
+ except ImportError:
31
+ HAS_MSVCRT = False
32
+
33
+ # Import PDD internal modules
34
+ from pdd.construct_paths import construct_paths
35
+ from pdd.load_prompt_template import load_prompt_template
36
+ from pdd.llm_invoke import llm_invoke
37
+ from pdd.get_language import get_language
38
+
39
+ # Constants - Use functions for dynamic path resolution
40
+ def get_pdd_dir():
41
+ """Get the .pdd directory relative to current working directory."""
42
+ return Path.cwd() / '.pdd'
43
+
44
+ def get_meta_dir():
45
+ """Get the metadata directory."""
46
+ return get_pdd_dir() / 'meta'
47
+
48
+ def get_locks_dir():
49
+ """Get the locks directory."""
50
+ return get_pdd_dir() / 'locks'
51
+
52
+ # For backward compatibility
53
+ PDD_DIR = get_pdd_dir()
54
+ META_DIR = get_meta_dir()
55
+ LOCKS_DIR = get_locks_dir()
56
+
57
+ # Export constants for other modules
58
+ __all__ = ['PDD_DIR', 'META_DIR', 'LOCKS_DIR', 'Fingerprint', 'RunReport', 'SyncDecision',
59
+ 'sync_determine_operation', 'analyze_conflict_with_llm']
60
+
61
+
62
+ @dataclass
63
+ class Fingerprint:
64
+ """Represents the last known good state of a PDD unit."""
65
+ pdd_version: str
66
+ timestamp: str # ISO 8601 format
67
+ command: str # e.g., "generate", "fix"
68
+ prompt_hash: Optional[str]
69
+ code_hash: Optional[str]
70
+ example_hash: Optional[str]
71
+ test_hash: Optional[str]
72
+
73
+
74
+ @dataclass
75
+ class RunReport:
76
+ """Represents the results from the last test run."""
77
+ timestamp: str
78
+ exit_code: int
79
+ tests_passed: int
80
+ tests_failed: int
81
+ coverage: float
82
+
83
+
84
+ @dataclass
85
+ class SyncDecision:
86
+ """Represents a decision about what PDD operation to run next."""
87
+ operation: str # 'auto-deps', 'generate', 'example', 'crash', 'verify', 'test', 'fix', 'update', 'analyze_conflict', 'nothing'
88
+ reason: str
89
+ details: Dict[str, Any] = field(default_factory=dict)
90
+ estimated_cost: float = 0.0
91
+ confidence: float = 1.0
92
+ prerequisites: List[str] = field(default_factory=list)
93
+
94
+
95
+ class SyncLock:
96
+ """Context manager for handling file-descriptor based locking."""
97
+
98
+ def __init__(self, basename: str, language: str):
99
+ self.basename = basename
100
+ self.language = language
101
+ self.lock_file = get_locks_dir() / f"{basename}_{language}.lock"
102
+ self.fd = None
103
+ self.current_pid = os.getpid()
104
+
105
+ def __enter__(self):
106
+ self.acquire()
107
+ return self
108
+
109
+ def __exit__(self, exc_type, exc_val, exc_tb):
110
+ self.release()
111
+
112
+ def acquire(self):
113
+ """Acquire the lock, handling stale locks and re-entrancy."""
114
+ # Ensure lock directory exists
115
+ self.lock_file.parent.mkdir(parents=True, exist_ok=True)
116
+
117
+ try:
118
+ # Check if lock file exists
119
+ if self.lock_file.exists():
120
+ try:
121
+ # Read PID from lock file
122
+ stored_pid = int(self.lock_file.read_text().strip())
123
+
124
+ # Check if this is the same process (re-entrancy)
125
+ if stored_pid == self.current_pid:
126
+ return
127
+
128
+ # Check if the process is still running
129
+ if psutil.pid_exists(stored_pid):
130
+ raise TimeoutError(f"Lock held by running process {stored_pid}")
131
+
132
+ # Stale lock - remove it
133
+ self.lock_file.unlink(missing_ok=True)
134
+
135
+ except (ValueError, FileNotFoundError):
136
+ # Invalid lock file - remove it
137
+ self.lock_file.unlink(missing_ok=True)
138
+
139
+ # Create lock file and acquire file descriptor lock
140
+ self.lock_file.touch()
141
+ self.fd = open(self.lock_file, 'w')
142
+
143
+ if HAS_FCNTL:
144
+ # POSIX systems
145
+ fcntl.flock(self.fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
146
+ elif HAS_MSVCRT:
147
+ # Windows systems
148
+ msvcrt.locking(self.fd.fileno(), msvcrt.LK_NBLCK, 1)
149
+
150
+ # Write current PID to lock file
151
+ self.fd.write(str(self.current_pid))
152
+ self.fd.flush()
153
+
154
+ except (IOError, OSError) as e:
155
+ if self.fd:
156
+ self.fd.close()
157
+ self.fd = None
158
+ raise TimeoutError(f"Failed to acquire lock: {e}")
159
+
160
+ def release(self):
161
+ """Release the lock and clean up."""
162
+ if self.fd:
163
+ try:
164
+ if HAS_FCNTL:
165
+ fcntl.flock(self.fd.fileno(), fcntl.LOCK_UN)
166
+ elif HAS_MSVCRT:
167
+ msvcrt.locking(self.fd.fileno(), msvcrt.LK_UNLCK, 1)
168
+
169
+ self.fd.close()
170
+ self.fd = None
171
+
172
+ # Remove lock file
173
+ self.lock_file.unlink(missing_ok=True)
174
+
175
+ except (IOError, OSError):
176
+ # Best effort cleanup
177
+ pass
178
+
179
+
180
+ def get_extension(language: str) -> str:
181
+ """Get file extension for a programming language."""
182
+ extensions = {
183
+ 'python': 'py',
184
+ 'javascript': 'js',
185
+ 'typescript': 'ts',
186
+ 'java': 'java',
187
+ 'cpp': 'cpp',
188
+ 'c': 'c',
189
+ 'ruby': 'rb',
190
+ 'go': 'go',
191
+ 'rust': 'rs',
192
+ 'php': 'php',
193
+ 'swift': 'swift',
194
+ 'kotlin': 'kt',
195
+ 'scala': 'scala',
196
+ 'csharp': 'cs',
197
+ 'css': 'css',
198
+ 'html': 'html',
199
+ 'sql': 'sql',
200
+ 'shell': 'sh',
201
+ 'bash': 'sh',
202
+ 'powershell': 'ps1',
203
+ 'r': 'r',
204
+ 'matlab': 'm',
205
+ 'lua': 'lua',
206
+ 'perl': 'pl',
207
+ }
208
+ return extensions.get(language.lower(), language.lower())
209
+
210
+
211
+ def get_pdd_file_paths(basename: str, language: str, prompts_dir: str = "prompts") -> Dict[str, Path]:
212
+ """Returns a dictionary mapping file types to their expected Path objects."""
213
+ try:
214
+ # Use construct_paths to get configuration-aware paths
215
+ prompt_filename = f"{basename}_{language}.prompt"
216
+ prompt_path = str(Path(prompts_dir) / prompt_filename)
217
+
218
+ # Check if prompt file exists - if not, we can't proceed with construct_paths
219
+ if not Path(prompt_path).exists():
220
+ # Fall back to default path construction if prompt doesn't exist
221
+ extension = get_extension(language)
222
+ return {
223
+ 'prompt': Path(prompt_path),
224
+ 'code': Path(f"{basename}.{extension}"),
225
+ 'example': Path(f"{basename}_example.{extension}"),
226
+ 'test': Path(f"test_{basename}.{extension}")
227
+ }
228
+
229
+ input_file_paths = {
230
+ "prompt_file": prompt_path
231
+ }
232
+
233
+ # Only call construct_paths if the prompt file exists
234
+ resolved_config, input_strings, output_file_paths, detected_language = construct_paths(
235
+ input_file_paths=input_file_paths,
236
+ force=True, # Use force=True to avoid interactive prompts during sync
237
+ quiet=True,
238
+ command="generate",
239
+ command_options={}
240
+ )
241
+
242
+ # Extract paths from config as specified in the spec
243
+ # The spec shows: return { 'prompt': Path(config['prompt_file']), ... }
244
+ # But we need to map the output_file_paths keys to our expected structure
245
+
246
+ # For generate command, construct_paths returns these in output_file_paths:
247
+ # - 'output' or 'code_file' for the generated code
248
+ # For other commands, we need to construct the full set of paths
249
+
250
+ # Get the code file path from output_file_paths
251
+ code_path = output_file_paths.get('output', output_file_paths.get('code_file', ''))
252
+ if not code_path:
253
+ # Fallback to constructing from basename
254
+ extension = get_extension(language)
255
+ code_path = f"{basename}.{extension}"
256
+
257
+ # Get configured paths for example and test files using construct_paths
258
+ # Note: construct_paths requires files to exist, so we need to handle the case
259
+ # where code file doesn't exist yet (during initial sync startup)
260
+ try:
261
+ # Create a temporary empty code file if it doesn't exist for path resolution
262
+ code_path_obj = Path(code_path)
263
+ temp_code_created = False
264
+ if not code_path_obj.exists():
265
+ code_path_obj.parent.mkdir(parents=True, exist_ok=True)
266
+ code_path_obj.touch()
267
+ temp_code_created = True
268
+
269
+ try:
270
+ # Get example path using example command
271
+ _, _, example_output_paths, _ = construct_paths(
272
+ input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
273
+ force=True, quiet=True, command="example", command_options={}
274
+ )
275
+ example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
276
+
277
+ # Get test path using test command
278
+ _, _, test_output_paths, _ = construct_paths(
279
+ input_file_paths={"prompt_file": prompt_path, "code_file": code_path},
280
+ force=True, quiet=True, command="test", command_options={}
281
+ )
282
+ test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
283
+
284
+ finally:
285
+ # Clean up temporary file if we created it
286
+ if temp_code_created and code_path_obj.exists() and code_path_obj.stat().st_size == 0:
287
+ code_path_obj.unlink()
288
+
289
+ except Exception as e:
290
+ # Log the specific exception that's causing fallback to wrong paths
291
+ import logging
292
+ logger = logging.getLogger(__name__)
293
+ logger.warning(f"construct_paths failed in get_pdd_file_paths: {type(e).__name__}: {e}")
294
+ logger.warning(f"Falling back to .pddrc-aware path construction")
295
+ logger.warning(f"prompt_path: {prompt_path}, code_path: {code_path}")
296
+
297
+ # Improved fallback: try to use construct_paths with just prompt_file to get proper directory configs
298
+ try:
299
+ # Get configured directories by using construct_paths with just the prompt file
300
+ _, _, example_output_paths, _ = construct_paths(
301
+ input_file_paths={"prompt_file": prompt_path},
302
+ force=True, quiet=True, command="example", command_options={}
303
+ )
304
+ example_path = Path(example_output_paths.get('output', f"{basename}_example.{get_extension(language)}"))
305
+
306
+ _, _, test_output_paths, _ = construct_paths(
307
+ input_file_paths={"prompt_file": prompt_path},
308
+ force=True, quiet=True, command="test", command_options={}
309
+ )
310
+ test_path = Path(test_output_paths.get('output', f"test_{basename}.{get_extension(language)}"))
311
+
312
+ except Exception:
313
+ # Final fallback to deriving from code path if all else fails
314
+ code_path_obj = Path(code_path)
315
+ code_dir = code_path_obj.parent
316
+ code_stem = code_path_obj.stem
317
+ code_ext = code_path_obj.suffix
318
+ example_path = code_dir / f"{code_stem}_example{code_ext}"
319
+ test_path = code_dir / f"test_{code_stem}{code_ext}"
320
+
321
+ return {
322
+ 'prompt': Path(prompt_path),
323
+ 'code': Path(code_path),
324
+ 'example': example_path,
325
+ 'test': test_path
326
+ }
327
+
328
+ except Exception as e:
329
+ # Fallback to simple naming if construct_paths fails
330
+ extension = get_extension(language)
331
+ return {
332
+ 'prompt': Path(prompts_dir) / f"{basename}_{language}.prompt",
333
+ 'code': Path(f"{basename}.{extension}"),
334
+ 'example': Path(f"{basename}_example.{extension}"),
335
+ 'test': Path(f"test_{basename}.{extension}")
336
+ }
337
+
338
+
339
+ def calculate_sha256(file_path: Path) -> Optional[str]:
340
+ """Calculates the SHA256 hash of a file if it exists."""
341
+ if not file_path.exists():
342
+ return None
343
+
344
+ try:
345
+ hasher = hashlib.sha256()
346
+ with open(file_path, 'rb') as f:
347
+ for chunk in iter(lambda: f.read(4096), b""):
348
+ hasher.update(chunk)
349
+ return hasher.hexdigest()
350
+ except (IOError, OSError):
351
+ return None
352
+
353
+
354
+ def read_fingerprint(basename: str, language: str) -> Optional[Fingerprint]:
355
+ """Reads and validates the JSON fingerprint file."""
356
+ meta_dir = get_meta_dir()
357
+ meta_dir.mkdir(parents=True, exist_ok=True)
358
+ fingerprint_file = meta_dir / f"{basename}_{language}.json"
359
+
360
+ if not fingerprint_file.exists():
361
+ return None
362
+
363
+ try:
364
+ with open(fingerprint_file, 'r') as f:
365
+ data = json.load(f)
366
+
367
+ return Fingerprint(
368
+ pdd_version=data['pdd_version'],
369
+ timestamp=data['timestamp'],
370
+ command=data['command'],
371
+ prompt_hash=data.get('prompt_hash'),
372
+ code_hash=data.get('code_hash'),
373
+ example_hash=data.get('example_hash'),
374
+ test_hash=data.get('test_hash')
375
+ )
376
+ except (json.JSONDecodeError, KeyError, IOError):
377
+ return None
378
+
379
+
380
+ def read_run_report(basename: str, language: str) -> Optional[RunReport]:
381
+ """Reads and validates the JSON run report file."""
382
+ meta_dir = get_meta_dir()
383
+ meta_dir.mkdir(parents=True, exist_ok=True)
384
+ run_report_file = meta_dir / f"{basename}_{language}_run.json"
385
+
386
+ if not run_report_file.exists():
387
+ return None
388
+
389
+ try:
390
+ with open(run_report_file, 'r') as f:
391
+ data = json.load(f)
392
+
393
+ return RunReport(
394
+ timestamp=data['timestamp'],
395
+ exit_code=data['exit_code'],
396
+ tests_passed=data['tests_passed'],
397
+ tests_failed=data['tests_failed'],
398
+ coverage=data['coverage']
399
+ )
400
+ except (json.JSONDecodeError, KeyError, IOError):
401
+ return None
402
+
403
+
404
+ def calculate_current_hashes(paths: Dict[str, Path]) -> Dict[str, Optional[str]]:
405
+ """Computes the hashes for all current files on disk."""
406
+ # Return hash keys that match what the fingerprint expects
407
+ return {
408
+ f"{file_type}_hash": calculate_sha256(file_path)
409
+ for file_type, file_path in paths.items()
410
+ }
411
+
412
+
413
+ def get_git_diff(file_path: Path) -> str:
414
+ """Get git diff for a file against HEAD."""
415
+ try:
416
+ result = subprocess.run(
417
+ ['git', 'diff', 'HEAD', str(file_path)],
418
+ capture_output=True,
419
+ text=True,
420
+ cwd=file_path.parent if file_path.parent.exists() else Path.cwd()
421
+ )
422
+
423
+ if result.returncode == 0:
424
+ return result.stdout
425
+ else:
426
+ return ""
427
+ except (subprocess.SubprocessError, FileNotFoundError):
428
+ return ""
429
+
430
+
431
+ def validate_expected_files(fingerprint: Optional[Fingerprint], paths: Dict[str, Path]) -> Dict[str, bool]:
432
+ """
433
+ Validate that files expected to exist based on fingerprint actually exist.
434
+
435
+ Args:
436
+ fingerprint: The last known good state fingerprint
437
+ paths: Dict mapping file types to their expected Path objects
438
+
439
+ Returns:
440
+ Dict mapping file types to existence status
441
+ """
442
+ validation = {}
443
+
444
+ if not fingerprint:
445
+ return validation
446
+
447
+ # Check each file type that has a hash in the fingerprint
448
+ if fingerprint.code_hash:
449
+ validation['code'] = paths['code'].exists()
450
+ if fingerprint.example_hash:
451
+ validation['example'] = paths['example'].exists()
452
+ if fingerprint.test_hash:
453
+ validation['test'] = paths['test'].exists()
454
+
455
+ return validation
456
+
457
+
458
+ def _handle_missing_expected_files(
459
+ missing_files: List[str],
460
+ paths: Dict[str, Path],
461
+ fingerprint: Fingerprint,
462
+ basename: str,
463
+ language: str,
464
+ prompts_dir: str,
465
+ skip_tests: bool = False,
466
+ skip_verify: bool = False
467
+ ) -> SyncDecision:
468
+ """
469
+ Handle the case where expected files are missing.
470
+ Determine the appropriate recovery operation.
471
+
472
+ Args:
473
+ missing_files: List of file types that are missing
474
+ paths: Dict mapping file types to their expected Path objects
475
+ fingerprint: The last known good state fingerprint
476
+ basename: The base name for the PDD unit
477
+ language: The programming language
478
+ prompts_dir: Directory containing prompt files
479
+ skip_tests: If True, skip test generation
480
+ skip_verify: If True, skip verification operations
481
+
482
+ Returns:
483
+ SyncDecision object with the appropriate recovery operation
484
+ """
485
+
486
+ # Priority: regenerate from the earliest missing component
487
+ if 'code' in missing_files:
488
+ # Code file missing - start from the beginning
489
+ if paths['prompt'].exists():
490
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
491
+ if check_for_dependencies(prompt_content):
492
+ return SyncDecision(
493
+ operation='auto-deps',
494
+ reason='Code file missing, prompt has dependencies - regenerate from auto-deps',
495
+ details={'missing_files': missing_files, 'prompt_path': str(paths['prompt'])},
496
+ estimated_cost=0.5,
497
+ confidence=0.85
498
+ )
499
+ else:
500
+ return SyncDecision(
501
+ operation='generate',
502
+ reason='Code file missing - regenerate from prompt',
503
+ details={'missing_files': missing_files, 'prompt_path': str(paths['prompt'])},
504
+ estimated_cost=1.0,
505
+ confidence=0.90
506
+ )
507
+
508
+ elif 'example' in missing_files and paths['code'].exists():
509
+ # Code exists but example missing
510
+ return SyncDecision(
511
+ operation='example',
512
+ reason='Example file missing - regenerate example',
513
+ details={'missing_files': missing_files, 'code_path': str(paths['code'])},
514
+ estimated_cost=0.5,
515
+ confidence=0.85
516
+ )
517
+
518
+ elif 'test' in missing_files and paths['code'].exists() and paths['example'].exists():
519
+ # Code and example exist but test missing
520
+ if skip_tests:
521
+ # Skip test generation if --skip-tests flag is used
522
+ return SyncDecision(
523
+ operation='nothing',
524
+ reason='Test file missing but --skip-tests specified - workflow complete',
525
+ details={'missing_files': missing_files, 'skip_tests': True},
526
+ estimated_cost=0.0,
527
+ confidence=1.0
528
+ )
529
+ else:
530
+ return SyncDecision(
531
+ operation='test',
532
+ reason='Test file missing - regenerate tests',
533
+ details={'missing_files': missing_files, 'code_path': str(paths['code'])},
534
+ estimated_cost=1.0,
535
+ confidence=0.85
536
+ )
537
+
538
+ # Fallback - regenerate everything
539
+ return SyncDecision(
540
+ operation='generate',
541
+ reason='Multiple files missing - regenerate from prompt',
542
+ details={'missing_files': missing_files},
543
+ estimated_cost=2.0,
544
+ confidence=0.80
545
+ )
546
+
547
+
548
+ def _is_workflow_complete(paths: Dict[str, Path], skip_tests: bool = False, skip_verify: bool = False) -> bool:
549
+ """
550
+ Check if workflow is complete considering skip flags.
551
+
552
+ Args:
553
+ paths: Dict mapping file types to their expected Path objects
554
+ skip_tests: If True, test files are not required for completion
555
+ skip_verify: If True, verification operations are not required
556
+
557
+ Returns:
558
+ True if all required files exist for the current workflow configuration
559
+ """
560
+ required_files = ['code', 'example']
561
+
562
+ if not skip_tests:
563
+ required_files.append('test')
564
+
565
+ return all(paths[f].exists() for f in required_files)
566
+
567
+
568
+ def check_for_dependencies(prompt_content: str) -> bool:
569
+ """Check if prompt contains actual dependency indicators that need auto-deps processing."""
570
+ # Only check for specific XML tags that indicate actual dependencies
571
+ xml_dependency_indicators = [
572
+ '<include>',
573
+ '<web>',
574
+ '<shell>'
575
+ ]
576
+
577
+ # Check for explicit dependency management mentions
578
+ explicit_dependency_indicators = [
579
+ 'auto-deps',
580
+ 'auto_deps',
581
+ 'dependencies needed',
582
+ 'requires dependencies',
583
+ 'include dependencies'
584
+ ]
585
+
586
+ prompt_lower = prompt_content.lower()
587
+
588
+ # Check for XML tags (case-sensitive for proper XML)
589
+ has_xml_deps = any(indicator in prompt_content for indicator in xml_dependency_indicators)
590
+
591
+ # Check for explicit dependency mentions
592
+ has_explicit_deps = any(indicator in prompt_lower for indicator in explicit_dependency_indicators)
593
+
594
+ return has_xml_deps or has_explicit_deps
595
+
596
+
597
+ def sync_determine_operation(basename: str, language: str, target_coverage: float, budget: float = 10.0, log_mode: bool = False, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
598
+ """
599
+ Core decision-making function for sync operations with skip flag awareness.
600
+
601
+ Args:
602
+ basename: The base name for the PDD unit
603
+ language: The programming language
604
+ target_coverage: Desired test coverage percentage
605
+ budget: Maximum budget for operations
606
+ log_mode: If True, skip locking entirely for read-only analysis
607
+ prompts_dir: Directory containing prompt files
608
+ skip_tests: If True, skip test generation and execution
609
+ skip_verify: If True, skip verification operations
610
+
611
+ Returns:
612
+ SyncDecision object with the recommended operation
613
+ """
614
+
615
+ if log_mode:
616
+ # Skip locking for read-only analysis
617
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
618
+ else:
619
+ # Normal exclusive locking for actual operations
620
+ with SyncLock(basename, language) as lock:
621
+ return _perform_sync_analysis(basename, language, target_coverage, budget, prompts_dir, skip_tests, skip_verify)
622
+
623
+
624
+ def _perform_sync_analysis(basename: str, language: str, target_coverage: float, budget: float, prompts_dir: str = "prompts", skip_tests: bool = False, skip_verify: bool = False) -> SyncDecision:
625
+ """
626
+ Perform the sync state analysis without locking concerns.
627
+
628
+ Args:
629
+ basename: The base name for the PDD unit
630
+ language: The programming language
631
+ target_coverage: Desired test coverage percentage
632
+ budget: Maximum budget for operations
633
+ prompts_dir: Directory containing prompt files
634
+ skip_tests: If True, skip test generation and execution
635
+ skip_verify: If True, skip verification operations
636
+
637
+ Returns:
638
+ SyncDecision object with the recommended operation
639
+ """
640
+ # 1. Check Runtime Signals First (Highest Priority)
641
+ # Workflow Order (from whitepaper):
642
+ # 1. auto-deps (find context/dependencies)
643
+ # 2. generate (create code module)
644
+ # 3. example (create usage example)
645
+ # 4. crash (resolve crashes if code doesn't run)
646
+ # 5. verify (verify example runs correctly after crash fix)
647
+ # 6. test (generate unit tests)
648
+ # 7. fix (resolve bugs found by tests)
649
+ # 8. update (sync changes back to prompt)
650
+
651
+ # Read fingerprint early since we need it for crash verification
652
+ fingerprint = read_fingerprint(basename, language)
653
+
654
+ run_report = read_run_report(basename, language)
655
+ if run_report:
656
+ # Check test failures first (higher priority than exit code)
657
+ if run_report.tests_failed > 0:
658
+ return SyncDecision(
659
+ operation='fix',
660
+ reason=f'Test failures detected: {run_report.tests_failed} failed tests',
661
+ details={'tests_failed': run_report.tests_failed},
662
+ estimated_cost=1.5,
663
+ confidence=0.90
664
+ )
665
+
666
+ # Then check for runtime crashes (only if no test failures)
667
+ if run_report.exit_code != 0:
668
+ # Check if this was from a crash fix that needs verification
669
+ if fingerprint and fingerprint.command == 'crash':
670
+ return SyncDecision(
671
+ operation='verify',
672
+ reason='Previous crash was fixed - verify example runs correctly',
673
+ details={'previous_command': 'crash', 'previous_exit_code': run_report.exit_code},
674
+ estimated_cost=0.7,
675
+ confidence=0.90
676
+ )
677
+ else:
678
+ return SyncDecision(
679
+ operation='crash',
680
+ reason='Runtime error detected in last run',
681
+ details={'exit_code': run_report.exit_code},
682
+ estimated_cost=2.0,
683
+ confidence=0.95
684
+ )
685
+
686
+ if run_report.coverage < target_coverage:
687
+ if skip_tests:
688
+ # When tests are skipped but coverage is low, consider workflow complete
689
+ # since we can't improve coverage without running tests
690
+ return SyncDecision(
691
+ operation='all_synced',
692
+ reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}% but tests skipped',
693
+ details={'current_coverage': run_report.coverage, 'target_coverage': target_coverage, 'tests_skipped': True},
694
+ estimated_cost=0.0,
695
+ confidence=0.90
696
+ )
697
+ else:
698
+ return SyncDecision(
699
+ operation='test',
700
+ reason=f'Coverage {run_report.coverage:.1f}% below target {target_coverage:.1f}%',
701
+ details={'current_coverage': run_report.coverage, 'target_coverage': target_coverage},
702
+ estimated_cost=1.0,
703
+ confidence=0.85
704
+ )
705
+
706
+ # 2. Analyze File State
707
+ paths = get_pdd_file_paths(basename, language, prompts_dir)
708
+ current_hashes = calculate_current_hashes(paths)
709
+
710
+ # 3. Implement the Decision Tree
711
+ if not fingerprint:
712
+ # No Fingerprint (New or Untracked Unit)
713
+ if paths['prompt'].exists():
714
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
715
+ if check_for_dependencies(prompt_content):
716
+ return SyncDecision(
717
+ operation='auto-deps',
718
+ reason='New prompt with dependencies detected',
719
+ details={'prompt_path': str(paths['prompt'])},
720
+ estimated_cost=0.5,
721
+ confidence=0.80
722
+ )
723
+ else:
724
+ return SyncDecision(
725
+ operation='generate',
726
+ reason='New prompt ready for code generation',
727
+ details={'prompt_path': str(paths['prompt'])},
728
+ estimated_cost=1.0,
729
+ confidence=0.90
730
+ )
731
+ else:
732
+ return SyncDecision(
733
+ operation='nothing',
734
+ reason='No prompt file and no history - nothing to do',
735
+ details={},
736
+ estimated_cost=0.0,
737
+ confidence=1.0
738
+ )
739
+
740
+ # CRITICAL FIX: Validate expected files exist before hash comparison
741
+ if fingerprint:
742
+ file_validation = validate_expected_files(fingerprint, paths)
743
+ missing_expected_files = [
744
+ file_type for file_type, exists in file_validation.items()
745
+ if not exists
746
+ ]
747
+
748
+ if missing_expected_files:
749
+ # Files are missing that should exist - need to regenerate
750
+ # This prevents the incorrect analyze_conflict decision
751
+ return _handle_missing_expected_files(
752
+ missing_expected_files, paths, fingerprint, basename, language, prompts_dir, skip_tests, skip_verify
753
+ )
754
+
755
+ # Compare hashes only for files that actually exist (prevents None != "hash" false positives)
756
+ changes = []
757
+ if fingerprint:
758
+ if current_hashes.get('prompt_hash') != fingerprint.prompt_hash:
759
+ changes.append('prompt')
760
+ # Only compare hashes for files that exist
761
+ if paths['code'].exists() and current_hashes.get('code_hash') != fingerprint.code_hash:
762
+ changes.append('code')
763
+ if paths['example'].exists() and current_hashes.get('example_hash') != fingerprint.example_hash:
764
+ changes.append('example')
765
+ if paths['test'].exists() and current_hashes.get('test_hash') != fingerprint.test_hash:
766
+ changes.append('test')
767
+
768
+ if not changes:
769
+ # No Changes (Hashes Match Fingerprint) - Progress workflow with skip awareness
770
+ if _is_workflow_complete(paths, skip_tests, skip_verify):
771
+ return SyncDecision(
772
+ operation='nothing',
773
+ reason=f'All required files synchronized (skip_tests={skip_tests}, skip_verify={skip_verify})',
774
+ details={'skip_tests': skip_tests, 'skip_verify': skip_verify},
775
+ estimated_cost=0.0,
776
+ confidence=1.0
777
+ )
778
+
779
+ # Progress workflow considering skip flags
780
+ if paths['code'].exists() and not paths['example'].exists():
781
+ return SyncDecision(
782
+ operation='example',
783
+ reason='Code exists but example missing - progress workflow',
784
+ details={'code_path': str(paths['code'])},
785
+ estimated_cost=0.5,
786
+ confidence=0.85
787
+ )
788
+
789
+ if (paths['code'].exists() and paths['example'].exists() and
790
+ not skip_tests and not paths['test'].exists()):
791
+ return SyncDecision(
792
+ operation='test',
793
+ reason='Code and example exist but test missing - progress workflow',
794
+ details={'code_path': str(paths['code']), 'example_path': str(paths['example'])},
795
+ estimated_cost=1.0,
796
+ confidence=0.85
797
+ )
798
+
799
+ # Some files are missing but no changes detected
800
+ if not paths['code'].exists():
801
+ if paths['prompt'].exists():
802
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
803
+ if check_for_dependencies(prompt_content):
804
+ return SyncDecision(
805
+ operation='auto-deps',
806
+ reason='Missing code file, prompt has dependencies',
807
+ details={'prompt_path': str(paths['prompt'])},
808
+ estimated_cost=0.5,
809
+ confidence=0.80
810
+ )
811
+ else:
812
+ return SyncDecision(
813
+ operation='generate',
814
+ reason='Missing code file - generate from prompt',
815
+ details={'prompt_path': str(paths['prompt'])},
816
+ estimated_cost=1.0,
817
+ confidence=0.90
818
+ )
819
+
820
+ elif len(changes) == 1:
821
+ # Simple Changes (Single File Modified)
822
+ change = changes[0]
823
+
824
+ if change == 'prompt':
825
+ prompt_content = paths['prompt'].read_text(encoding='utf-8', errors='ignore')
826
+ if check_for_dependencies(prompt_content):
827
+ return SyncDecision(
828
+ operation='auto-deps',
829
+ reason='Prompt changed and dependencies need updating',
830
+ details={'changed_file': 'prompt'},
831
+ estimated_cost=0.5,
832
+ confidence=0.85
833
+ )
834
+ else:
835
+ return SyncDecision(
836
+ operation='generate',
837
+ reason='Prompt changed - regenerate code',
838
+ details={'changed_file': 'prompt'},
839
+ estimated_cost=1.0,
840
+ confidence=0.90
841
+ )
842
+
843
+ elif change == 'code':
844
+ return SyncDecision(
845
+ operation='update',
846
+ reason='Code changed - update prompt to reflect changes',
847
+ details={'changed_file': 'code'},
848
+ estimated_cost=0.8,
849
+ confidence=0.85
850
+ )
851
+
852
+ elif change == 'test':
853
+ return SyncDecision(
854
+ operation='test',
855
+ reason='Test changed - run new tests',
856
+ details={'changed_file': 'test'},
857
+ estimated_cost=0.5,
858
+ confidence=0.80
859
+ )
860
+
861
+ elif change == 'example':
862
+ return SyncDecision(
863
+ operation='verify',
864
+ reason='Example changed - verify new example',
865
+ details={'changed_file': 'example'},
866
+ estimated_cost=0.7,
867
+ confidence=0.80
868
+ )
869
+
870
+ else:
871
+ # Complex Changes (Multiple Files Modified / Conflicts)
872
+ return SyncDecision(
873
+ operation='analyze_conflict',
874
+ reason='Multiple files changed - requires conflict analysis',
875
+ details={'changed_files': changes},
876
+ estimated_cost=2.0,
877
+ confidence=0.70
878
+ )
879
+
880
+ # Fallback - should not reach here normally
881
+ return SyncDecision(
882
+ operation='nothing',
883
+ reason='No clear operation determined',
884
+ details={'fingerprint_exists': fingerprint is not None, 'changes': changes},
885
+ estimated_cost=0.0,
886
+ confidence=0.50
887
+ )
888
+
889
+
890
+ def analyze_conflict_with_llm(basename: str, language: str, fingerprint: Fingerprint, changed_files: List[str], prompts_dir: str = "prompts") -> SyncDecision:
891
+ """
892
+ Resolve complex sync conflicts using an LLM.
893
+
894
+ Args:
895
+ basename: The base name for the PDD unit
896
+ language: The programming language
897
+ fingerprint: The last known good state
898
+ changed_files: List of files that have changed
899
+ prompts_dir: Directory containing prompt files
900
+
901
+ Returns:
902
+ SyncDecision object with LLM-recommended operation
903
+ """
904
+
905
+ try:
906
+ # 1. Load LLM Prompt
907
+ prompt_template = load_prompt_template("sync_analysis_LLM")
908
+ if not prompt_template:
909
+ # Fallback if template not found
910
+ return SyncDecision(
911
+ operation='fail_and_request_manual_merge',
912
+ reason='LLM analysis template not found - manual merge required',
913
+ details={'error': 'Template not available'},
914
+ estimated_cost=0.0,
915
+ confidence=0.0
916
+ )
917
+
918
+ # 2. Gather file paths and diffs
919
+ paths = get_pdd_file_paths(basename, language, prompts_dir)
920
+
921
+ # Generate diffs for changed files
922
+ diffs = {}
923
+ for file_type in changed_files:
924
+ if file_type in paths and paths[file_type].exists():
925
+ diffs[f"{file_type}_diff"] = get_git_diff(paths[file_type])
926
+ diffs[f"{file_type}_path"] = str(paths[file_type])
927
+ else:
928
+ diffs[f"{file_type}_diff"] = ""
929
+ diffs[f"{file_type}_path"] = str(paths.get(file_type, ''))
930
+
931
+ # 3. Format the prompt
932
+ formatted_prompt = prompt_template.format(
933
+ fingerprint=json.dumps({
934
+ 'pdd_version': fingerprint.pdd_version,
935
+ 'timestamp': fingerprint.timestamp,
936
+ 'command': fingerprint.command,
937
+ 'prompt_hash': fingerprint.prompt_hash,
938
+ 'code_hash': fingerprint.code_hash,
939
+ 'example_hash': fingerprint.example_hash,
940
+ 'test_hash': fingerprint.test_hash
941
+ }, indent=2),
942
+ changed_files_list=', '.join(changed_files),
943
+ prompt_diff=diffs.get('prompt_diff', ''),
944
+ code_diff=diffs.get('code_diff', ''),
945
+ example_diff=diffs.get('example_diff', ''),
946
+ test_diff=diffs.get('test_diff', ''),
947
+ prompt_path=diffs.get('prompt_path', ''),
948
+ code_path=diffs.get('code_path', ''),
949
+ example_path=diffs.get('example_path', ''),
950
+ test_path=diffs.get('test_path', '')
951
+ )
952
+
953
+ # 4. Invoke LLM with caching for determinism
954
+ response = llm_invoke(
955
+ prompt=formatted_prompt,
956
+ input_json={},
957
+ strength=0.7, # Use a consistent strength for determinism
958
+ temperature=0.0, # Use temperature 0 for deterministic output
959
+ verbose=False
960
+ )
961
+
962
+ # 5. Parse and validate response
963
+ try:
964
+ llm_result = json.loads(response['result'])
965
+
966
+ # Validate required keys
967
+ required_keys = ['next_operation', 'reason', 'confidence']
968
+ if not all(key in llm_result for key in required_keys):
969
+ raise ValueError("Missing required keys in LLM response")
970
+
971
+ # Check confidence threshold
972
+ confidence = float(llm_result.get('confidence', 0.0))
973
+ if confidence < 0.75:
974
+ return SyncDecision(
975
+ operation='fail_and_request_manual_merge',
976
+ reason=f'LLM confidence too low ({confidence:.2f}) - manual merge required',
977
+ details={'llm_response': llm_result, 'changed_files': changed_files},
978
+ estimated_cost=response.get('cost', 0.0),
979
+ confidence=confidence
980
+ )
981
+
982
+ # Extract operation and details
983
+ operation = llm_result['next_operation']
984
+ reason = llm_result['reason']
985
+ merge_strategy = llm_result.get('merge_strategy', {})
986
+ follow_up_operations = llm_result.get('follow_up_operations', [])
987
+
988
+ return SyncDecision(
989
+ operation=operation,
990
+ reason=f"LLM analysis: {reason}",
991
+ details={
992
+ 'llm_response': llm_result,
993
+ 'changed_files': changed_files,
994
+ 'merge_strategy': merge_strategy,
995
+ 'follow_up_operations': follow_up_operations
996
+ },
997
+ estimated_cost=response.get('cost', 0.0),
998
+ confidence=confidence,
999
+ prerequisites=follow_up_operations
1000
+ )
1001
+
1002
+ except (json.JSONDecodeError, ValueError, KeyError) as e:
1003
+ # Invalid LLM response - fallback to manual merge
1004
+ return SyncDecision(
1005
+ operation='fail_and_request_manual_merge',
1006
+ reason=f'Invalid LLM response: {e} - manual merge required',
1007
+ details={'error': str(e), 'raw_response': response.get('result', ''), 'changed_files': changed_files},
1008
+ estimated_cost=response.get('cost', 0.0),
1009
+ confidence=0.0
1010
+ )
1011
+
1012
+ except Exception as e:
1013
+ # Any other error - fallback to manual merge
1014
+ return SyncDecision(
1015
+ operation='fail_and_request_manual_merge',
1016
+ reason=f'Error during LLM analysis: {e} - manual merge required',
1017
+ details={'error': str(e), 'changed_files': changed_files},
1018
+ estimated_cost=0.0,
1019
+ confidence=0.0
1020
+ )
1021
+
1022
+
1023
+ if __name__ == "__main__":
1024
+ # Example usage
1025
+ if len(sys.argv) != 3:
1026
+ print("Usage: python sync_determine_operation.py <basename> <language>")
1027
+ sys.exit(1)
1028
+
1029
+ basename = sys.argv[1]
1030
+ language = sys.argv[2]
1031
+
1032
+ decision = sync_determine_operation(basename, language, target_coverage=90.0)
1033
+
1034
+ print(f"Operation: {decision.operation}")
1035
+ print(f"Reason: {decision.reason}")
1036
+ print(f"Estimated Cost: ${decision.estimated_cost:.2f}")
1037
+ print(f"Confidence: {decision.confidence:.2f}")
1038
+ if decision.details:
1039
+ print(f"Details: {json.dumps(decision.details, indent=2)}")