galangal-orchestrate 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. galangal/__init__.py +36 -0
  2. galangal/__main__.py +6 -0
  3. galangal/ai/__init__.py +167 -0
  4. galangal/ai/base.py +159 -0
  5. galangal/ai/claude.py +352 -0
  6. galangal/ai/codex.py +370 -0
  7. galangal/ai/gemini.py +43 -0
  8. galangal/ai/subprocess.py +254 -0
  9. galangal/cli.py +371 -0
  10. galangal/commands/__init__.py +27 -0
  11. galangal/commands/complete.py +367 -0
  12. galangal/commands/github.py +355 -0
  13. galangal/commands/init.py +177 -0
  14. galangal/commands/init_wizard.py +762 -0
  15. galangal/commands/list.py +20 -0
  16. galangal/commands/pause.py +34 -0
  17. galangal/commands/prompts.py +89 -0
  18. galangal/commands/reset.py +41 -0
  19. galangal/commands/resume.py +30 -0
  20. galangal/commands/skip.py +62 -0
  21. galangal/commands/start.py +530 -0
  22. galangal/commands/status.py +44 -0
  23. galangal/commands/switch.py +28 -0
  24. galangal/config/__init__.py +15 -0
  25. galangal/config/defaults.py +183 -0
  26. galangal/config/loader.py +163 -0
  27. galangal/config/schema.py +330 -0
  28. galangal/core/__init__.py +33 -0
  29. galangal/core/artifacts.py +136 -0
  30. galangal/core/state.py +1097 -0
  31. galangal/core/tasks.py +454 -0
  32. galangal/core/utils.py +116 -0
  33. galangal/core/workflow/__init__.py +68 -0
  34. galangal/core/workflow/core.py +789 -0
  35. galangal/core/workflow/engine.py +781 -0
  36. galangal/core/workflow/pause.py +35 -0
  37. galangal/core/workflow/tui_runner.py +1322 -0
  38. galangal/exceptions.py +36 -0
  39. galangal/github/__init__.py +31 -0
  40. galangal/github/client.py +427 -0
  41. galangal/github/images.py +324 -0
  42. galangal/github/issues.py +298 -0
  43. galangal/logging.py +364 -0
  44. galangal/prompts/__init__.py +5 -0
  45. galangal/prompts/builder.py +527 -0
  46. galangal/prompts/defaults/benchmark.md +34 -0
  47. galangal/prompts/defaults/contract.md +35 -0
  48. galangal/prompts/defaults/design.md +54 -0
  49. galangal/prompts/defaults/dev.md +89 -0
  50. galangal/prompts/defaults/docs.md +104 -0
  51. galangal/prompts/defaults/migration.md +59 -0
  52. galangal/prompts/defaults/pm.md +110 -0
  53. galangal/prompts/defaults/pm_questions.md +53 -0
  54. galangal/prompts/defaults/preflight.md +32 -0
  55. galangal/prompts/defaults/qa.md +65 -0
  56. galangal/prompts/defaults/review.md +90 -0
  57. galangal/prompts/defaults/review_codex.md +99 -0
  58. galangal/prompts/defaults/security.md +84 -0
  59. galangal/prompts/defaults/test.md +91 -0
  60. galangal/results.py +176 -0
  61. galangal/ui/__init__.py +5 -0
  62. galangal/ui/console.py +126 -0
  63. galangal/ui/tui/__init__.py +56 -0
  64. galangal/ui/tui/adapters.py +168 -0
  65. galangal/ui/tui/app.py +902 -0
  66. galangal/ui/tui/entry.py +24 -0
  67. galangal/ui/tui/mixins.py +196 -0
  68. galangal/ui/tui/modals.py +339 -0
  69. galangal/ui/tui/styles/app.tcss +86 -0
  70. galangal/ui/tui/styles/modals.tcss +197 -0
  71. galangal/ui/tui/types.py +107 -0
  72. galangal/ui/tui/widgets.py +263 -0
  73. galangal/validation/__init__.py +5 -0
  74. galangal/validation/runner.py +1072 -0
  75. galangal_orchestrate-0.13.0.dist-info/METADATA +985 -0
  76. galangal_orchestrate-0.13.0.dist-info/RECORD +79 -0
  77. galangal_orchestrate-0.13.0.dist-info/WHEEL +4 -0
  78. galangal_orchestrate-0.13.0.dist-info/entry_points.txt +2 -0
  79. galangal_orchestrate-0.13.0.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,1072 @@
1
+ """
2
+ Config-driven validation runner.
3
+ """
4
+
5
+ import fnmatch
6
+ import subprocess
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+ from galangal.config.loader import get_config, get_project_root
11
+ from galangal.config.schema import PreflightCheck, SkipCondition, StageValidation, ValidationCommand
12
+ from galangal.core.artifacts import (
13
+ artifact_exists,
14
+ read_artifact,
15
+ write_artifact,
16
+ write_skip_artifact,
17
+ )
18
+ from galangal.core.utils import now_iso, truncate_text
19
+
20
+
21
+ def read_decision_file(stage: str, task_name: str) -> str | None:
22
+ """
23
+ Read a stage decision file and return its normalized content.
24
+
25
+ Decision files contain exactly one word indicating the stage result.
26
+ Valid decision values are defined in STAGE_METADATA (state.py).
27
+
28
+ Args:
29
+ stage: Stage name (e.g., "SECURITY", "QA", "REVIEW").
30
+ task_name: Name of the task.
31
+
32
+ Returns:
33
+ The decision word (uppercase, stripped) or None if file doesn't exist
34
+ or contains invalid content.
35
+ """
36
+ from galangal.core.state import Stage, get_decision_file_name
37
+
38
+ try:
39
+ stage_enum = Stage.from_str(stage.upper())
40
+ decision_file = get_decision_file_name(stage_enum)
41
+ except ValueError:
42
+ # Fallback for unknown stages
43
+ decision_file = f"{stage.upper()}_DECISION"
44
+
45
+ if not decision_file or not artifact_exists(decision_file, task_name):
46
+ return None
47
+
48
+ content = read_artifact(decision_file, task_name)
49
+ if not content:
50
+ return None
51
+
52
+ # Strip and normalize - should be exactly one word
53
+ decision = content.strip().upper()
54
+
55
+ # Validate it's a single word (no spaces, newlines, etc.)
56
+ if " " in decision or "\n" in decision or len(decision) > 20:
57
+ return None
58
+
59
+ return decision
60
+
61
+
62
+ # Decision configurations are now centralized in STAGE_METADATA (state.py)
63
+ # Use get_decision_config(stage) to get decision values for a stage
64
+
65
+
66
+ @dataclass
67
+ class ValidationResult:
68
+ """
69
+ Result of a validation check.
70
+
71
+ Attributes:
72
+ success: Whether the validation passed.
73
+ message: Human-readable description of the result.
74
+ output: Optional detailed output (e.g., test results, command stdout).
75
+ rollback_to: If validation failed, the stage to roll back to (e.g., "DEV").
76
+ skipped: True if the stage was skipped due to skip_if conditions.
77
+ is_fast_track: If True, this is a minor rollback that should skip
78
+ stages that already passed (REQUEST_MINOR_CHANGES).
79
+ """
80
+
81
+ success: bool
82
+ message: str
83
+ output: str | None = None
84
+ rollback_to: str | None = None # Stage to rollback to on failure
85
+ skipped: bool = False # True if stage was skipped due to conditions
86
+ needs_user_decision: bool = False # True if decision file missing/unclear
87
+ is_fast_track: bool = False # True for minor rollbacks (skip passed stages)
88
+
89
+
90
+ def validate_stage_decision(
91
+ stage: str,
92
+ task_name: str,
93
+ artifact_name: str,
94
+ missing_artifact_msg: str | None = None,
95
+ skip_artifact: str | None = None,
96
+ ) -> ValidationResult:
97
+ """Generic decision file validation for stages.
98
+
99
+ This helper consolidates the repeated pattern of:
100
+ 1. Check skip artifact
101
+ 2. Check decision file for known values
102
+ 3. Check if report artifact exists
103
+ 4. Request user decision if unclear
104
+
105
+ Args:
106
+ stage: Stage name (e.g., "SECURITY", "QA", "REVIEW").
107
+ task_name: Name of the task being validated.
108
+ artifact_name: Name of the report artifact (e.g., "QA_REPORT.md").
109
+ missing_artifact_msg: Custom message if artifact is missing.
110
+ skip_artifact: Optional skip artifact name (e.g., "SECURITY_SKIP.md").
111
+
112
+ Returns:
113
+ ValidationResult based on decision file or artifact status.
114
+ """
115
+ from galangal.core.state import Stage, get_decision_config
116
+
117
+ stage_upper = stage.upper()
118
+
119
+ # Check for skip artifact first
120
+ if skip_artifact and artifact_exists(skip_artifact, task_name):
121
+ return ValidationResult(True, f"{stage_upper} skipped")
122
+
123
+ # Check for decision file using centralized config from STAGE_METADATA
124
+ decision = read_decision_file(stage_upper, task_name)
125
+ try:
126
+ stage_enum = Stage.from_str(stage_upper)
127
+ decision_config = get_decision_config(stage_enum) or {}
128
+ except ValueError:
129
+ decision_config = {}
130
+
131
+ if decision and decision in decision_config:
132
+ success, message, rollback_to, is_fast_track = decision_config[decision]
133
+ return ValidationResult(
134
+ success, message, rollback_to=rollback_to, is_fast_track=is_fast_track
135
+ )
136
+
137
+ # Decision file missing or unclear - check if artifact exists
138
+ if not artifact_exists(artifact_name, task_name):
139
+ msg = missing_artifact_msg or f"{artifact_name} not found"
140
+ return ValidationResult(False, msg, rollback_to="DEV")
141
+
142
+ # Artifact exists but no valid decision file - need user to decide
143
+ content = read_artifact(artifact_name, task_name) or ""
144
+ return ValidationResult(
145
+ False,
146
+ f"{stage_upper}_DECISION file missing or unclear - user confirmation required",
147
+ output=truncate_text(content, 2000),
148
+ needs_user_decision=True,
149
+ )
150
+
151
+
152
+ class ValidationRunner:
153
+ """
154
+ Config-driven validation runner for workflow stages.
155
+
156
+ This class validates stage outputs based on configuration in `.galangal/config.yaml`.
157
+ Each stage can define:
158
+ - `checks`: Preflight checks (path existence, command execution)
159
+ - `commands`: Shell commands to run (e.g., tests, linting)
160
+ - `artifact`/`pass_marker`/`fail_marker`: Artifact content validation
161
+ - `skip_if`: Conditions to skip the stage
162
+ - `artifacts_required`: List of artifacts that must exist
163
+
164
+ If no config exists for a stage, default validation logic is used.
165
+ """
166
+
167
+ def __init__(self) -> None:
168
+ self.config = get_config()
169
+ self.project_root = get_project_root()
170
+
171
+ def validate_stage(
172
+ self,
173
+ stage: str,
174
+ task_name: str,
175
+ ) -> ValidationResult:
176
+ """
177
+ Validate a workflow stage based on config.
178
+
179
+ Executes the validation pipeline for a stage:
180
+ 1. Check skip conditions (no_files_match, manual skip artifacts)
181
+ 2. Run preflight checks (for PREFLIGHT stage)
182
+ 3. Run validation commands (all commands run, outputs aggregated)
183
+ 4. Check artifact markers (APPROVED, PASS, etc.)
184
+ 5. Verify required artifacts exist
185
+
186
+ Validation command outputs are aggregated into VALIDATION_REPORT.md
187
+ for easier debugging when failures occur.
188
+
189
+ Special handling for:
190
+ - PREFLIGHT: Runs environment checks, generates PREFLIGHT_REPORT.md
191
+ - SECURITY: Checks SECURITY_CHECKLIST.md for APPROVED/REJECTED
192
+ - QA: Checks QA_REPORT.md for Status: PASS/FAIL
193
+
194
+ Args:
195
+ stage: The stage name (e.g., "PM", "DEV", "QA").
196
+ task_name: Name of the task being validated.
197
+
198
+ Returns:
199
+ ValidationResult indicating success/failure with optional rollback target.
200
+ """
201
+ stage_lower = stage.lower()
202
+
203
+ # Get stage validation config
204
+ validation_config = self.config.validation
205
+ stage_config: StageValidation | None = getattr(validation_config, stage_lower, None)
206
+
207
+ if stage_config is None:
208
+ # No config for this stage - use defaults
209
+ return self._validate_with_defaults(stage, task_name)
210
+
211
+ # NOTE: Skip conditions are checked in get_next_stage() which is the single
212
+ # source of truth for skip logic. By the time we reach validate_stage(),
213
+ # the stage has already been determined to not be skipped.
214
+
215
+ # SECURITY stage: use generic decision validation
216
+ if stage_lower == "security":
217
+ return validate_stage_decision(
218
+ "SECURITY",
219
+ task_name,
220
+ "SECURITY_CHECKLIST.md",
221
+ skip_artifact="SECURITY_SKIP.md",
222
+ )
223
+
224
+ # Run preflight checks (for PREFLIGHT stage)
225
+ if stage_config.checks:
226
+ result = self._run_preflight_checks(stage_config.checks, task_name)
227
+ if not result.success:
228
+ return result
229
+
230
+ # Run validation commands and aggregate outputs
231
+ command_results = self._run_all_commands(stage_config, task_name)
232
+ if command_results["has_failure"]:
233
+ # Write validation report with all outputs for debugging
234
+ self._write_validation_report(stage, task_name, command_results)
235
+ return ValidationResult(
236
+ False,
237
+ command_results["first_failure_message"],
238
+ output=command_results["aggregated_output"],
239
+ rollback_to="DEV",
240
+ )
241
+
242
+ # Check for pass/fail markers in artifacts (for AI-driven stages)
243
+ if stage_config.artifact and stage_config.pass_marker:
244
+ result = self._check_artifact_markers(stage_config, task_name)
245
+ if not result.success:
246
+ return result
247
+
248
+ # QA stage: always check QA_DECISION file first
249
+ if stage_lower == "qa":
250
+ result = self._check_qa_report(task_name)
251
+ if not result.success:
252
+ return result
253
+
254
+ # REVIEW stage: check REVIEW_DECISION file first (for Codex/independent reviews)
255
+ if stage_lower == "review":
256
+ result = validate_stage_decision("REVIEW", task_name, "REVIEW_NOTES.md")
257
+ if result.success or result.rollback_to:
258
+ # Either passed or has a clear rollback target - return this result
259
+ return result
260
+ # Fall through to artifact marker check if decision file missing/unclear
261
+
262
+ # Check required artifacts
263
+ for artifact_name in stage_config.artifacts_required:
264
+ if not artifact_exists(artifact_name, task_name):
265
+ return ValidationResult(
266
+ False,
267
+ f"{artifact_name} not found",
268
+ rollback_to="DEV",
269
+ )
270
+
271
+ return ValidationResult(True, f"{stage} validation passed")
272
+
273
+ def _get_all_changed_files(self) -> set[str]:
274
+ """
275
+ Get all changed files from commits, staging area, and working tree.
276
+
277
+ Collects files from multiple sources to ensure skip detection works
278
+ correctly even in dirty working trees:
279
+
280
+ 1. Committed changes: `git diff --name-only base_branch...HEAD`
281
+ 2. Working tree changes: `git status --porcelain` (staged, unstaged, untracked)
282
+
283
+ Returns:
284
+ Set of file paths that have been changed, staged, or are untracked.
285
+ Empty set on error.
286
+ """
287
+ changed: set[str] = set()
288
+
289
+ try:
290
+ # 1. Committed changes vs base branch
291
+ base_branch = self.config.pr.base_branch
292
+ result = subprocess.run(
293
+ ["git", "diff", "--name-only", f"{base_branch}...HEAD"],
294
+ cwd=self.project_root,
295
+ capture_output=True,
296
+ text=True,
297
+ timeout=10,
298
+ )
299
+ if result.returncode == 0 and result.stdout.strip():
300
+ changed.update(f for f in result.stdout.strip().split("\n") if f)
301
+
302
+ # 2. Working tree changes (staged, unstaged, untracked)
303
+ # Porcelain format: "XY filename" or "XY old -> new" for renames
304
+ # X = staging area status, Y = working tree status
305
+ # ?? = untracked, M = modified, A = added, D = deleted, R = renamed
306
+ result = subprocess.run(
307
+ ["git", "status", "--porcelain"],
308
+ cwd=self.project_root,
309
+ capture_output=True,
310
+ text=True,
311
+ timeout=10,
312
+ )
313
+ if result.returncode == 0 and result.stdout.strip():
314
+ for line in result.stdout.strip().split("\n"):
315
+ if line and len(line) >= 3:
316
+ # Extract filename (handle renames: "R old -> new")
317
+ file_part = line[3:]
318
+ if " -> " in file_part:
319
+ # For renames, include both old and new paths
320
+ old, new = file_part.split(" -> ", 1)
321
+ changed.add(old)
322
+ changed.add(new)
323
+ else:
324
+ changed.add(file_part)
325
+
326
+ except Exception:
327
+ pass # Return whatever we collected so far
328
+
329
+ return changed
330
+
331
+ def _should_skip(self, skip_condition: SkipCondition, task_name: str) -> bool:
332
+ """
333
+ Check if a stage's skip condition is met.
334
+
335
+ Supports `no_files_match` condition which checks if any changed files
336
+ match the given glob patterns. Changed files include:
337
+ - Committed changes vs base branch
338
+ - Staged changes (git add)
339
+ - Unstaged changes (modified tracked files)
340
+ - Untracked files (new files not yet added)
341
+
342
+ This ensures conditional stages are not incorrectly skipped when
343
+ relevant files exist in the working tree but haven't been committed.
344
+
345
+ Args:
346
+ skip_condition: Config object with skip criteria (e.g., no_files_match).
347
+ task_name: Name of the task (unused, for future conditions).
348
+
349
+ Returns:
350
+ True if the stage should be skipped, False otherwise.
351
+ """
352
+ if skip_condition.no_files_match:
353
+ try:
354
+ changed_files = self._get_all_changed_files()
355
+
356
+ # If we couldn't get any file info, don't skip (safe default)
357
+ if not changed_files:
358
+ return False
359
+
360
+ # Support both single pattern and list of patterns
361
+ patterns = skip_condition.no_files_match
362
+ if isinstance(patterns, str):
363
+ patterns = [patterns]
364
+
365
+ for f in changed_files:
366
+ for pattern in patterns:
367
+ if fnmatch.fnmatch(f, pattern) or fnmatch.fnmatch(
368
+ f.lower(), pattern.lower()
369
+ ):
370
+ return False # Found a match, don't skip
371
+
372
+ return True # No matches, skip
373
+ except Exception:
374
+ return False # On error, don't skip
375
+
376
+ return False
377
+
378
+ def _write_skip_artifact(self, stage: str, task_name: str, reason: str) -> None:
379
+ """Write a skip marker artifact."""
380
+ write_skip_artifact(stage, reason, task_name)
381
+
382
+ def should_skip_stage(self, stage: str, task_name: str) -> bool:
383
+ """
384
+ Check if a stage should be skipped based on skip_if conditions only.
385
+
386
+ This method checks ONLY the skip_if condition configured for a stage,
387
+ without running any validation commands. Use this when you need to
388
+ determine whether to skip a conditional stage before execution.
389
+
390
+ Args:
391
+ stage: The stage name (e.g., "MIGRATION", "CONTRACT", "BENCHMARK").
392
+ task_name: Name of the task (for future conditions).
393
+
394
+ Returns:
395
+ True if the stage should be skipped, False otherwise.
396
+ """
397
+ stage_lower = stage.lower()
398
+ validation_config = self.config.validation
399
+ stage_config: StageValidation | None = getattr(validation_config, stage_lower, None)
400
+
401
+ if stage_config is None:
402
+ return False
403
+
404
+ if stage_config.skip_if:
405
+ return self._should_skip(stage_config.skip_if, task_name)
406
+
407
+ return False
408
+
409
+ def _run_preflight_checks(
410
+ self, checks: list[PreflightCheck], task_name: str
411
+ ) -> ValidationResult:
412
+ """
413
+ Run preflight environment checks and generate PREFLIGHT_REPORT.md.
414
+
415
+ Preflight checks verify the development environment is ready:
416
+ - Path existence checks (e.g., config files, virtual envs)
417
+ - Command execution checks (e.g., git status, tool versions)
418
+
419
+ Each check can be:
420
+ - Required: Failure stops the workflow
421
+ - warn_only: Failure logs a warning but continues
422
+
423
+ The function generates PREFLIGHT_REPORT.md with detailed results
424
+ for each check.
425
+
426
+ Args:
427
+ checks: List of PreflightCheck configs to run.
428
+ task_name: Task name for writing the report artifact.
429
+
430
+ Returns:
431
+ ValidationResult with success=True if all required checks pass.
432
+ Output contains the generated report content.
433
+ """
434
+ results: dict[str, dict[str, str]] = {}
435
+ all_ok = True
436
+
437
+ for check in checks:
438
+ if check.path_exists:
439
+ path = self.project_root / check.path_exists
440
+ exists = path.exists()
441
+ results[check.name] = {"status": "OK" if exists else "Missing"}
442
+ if not exists and not check.warn_only:
443
+ all_ok = False
444
+
445
+ elif check.command:
446
+ try:
447
+ # Support both string (shell) and list (direct) commands
448
+ if isinstance(check.command, list):
449
+ result = subprocess.run(
450
+ check.command,
451
+ shell=False,
452
+ cwd=self.project_root,
453
+ capture_output=True,
454
+ text=True,
455
+ timeout=30,
456
+ )
457
+ else:
458
+ result = subprocess.run(
459
+ check.command,
460
+ shell=True,
461
+ cwd=self.project_root,
462
+ capture_output=True,
463
+ text=True,
464
+ timeout=30,
465
+ )
466
+ output = result.stdout.strip()
467
+
468
+ if check.expect_empty:
469
+ # Filter out task-related files for git status
470
+ if output:
471
+ filtered = self._filter_task_files(output, task_name)
472
+ ok = not filtered
473
+ else:
474
+ ok = True
475
+ else:
476
+ ok = result.returncode == 0
477
+
478
+ status = "OK" if ok else ("Warning" if check.warn_only else "Failed")
479
+ results[check.name] = {
480
+ "status": status,
481
+ "output": output[:200] if output else "",
482
+ }
483
+ if not ok and not check.warn_only:
484
+ all_ok = False
485
+
486
+ except Exception as e:
487
+ results[check.name] = {"status": "Error", "error": str(e)}
488
+ if not check.warn_only:
489
+ all_ok = False
490
+
491
+ # Generate report (uses now_iso imported at module level)
492
+ status = "READY" if all_ok else "NOT_READY"
493
+ report = f"""# Preflight Report
494
+
495
+ ## Summary
496
+ - **Status:** {status}
497
+ - **Date:** {now_iso()}
498
+
499
+ ## Checks
500
+ """
501
+ for name, check_result in results.items():
502
+ status_val = check_result.get("status", "Unknown")
503
+ if status_val == "OK":
504
+ status_icon = "✓"
505
+ elif status_val == "Warning":
506
+ status_icon = "⚠"
507
+ else:
508
+ status_icon = "✗"
509
+ report += f"\n### {status_icon} {name}\n"
510
+ report += f"- Status: {check_result.get('status', 'Unknown')}\n"
511
+ if check_result.get("output"):
512
+ report += f"- Output: {check_result['output']}\n"
513
+ if check_result.get("error"):
514
+ report += f"- Error: {check_result['error']}\n"
515
+
516
+ write_artifact("PREFLIGHT_REPORT.md", report, task_name)
517
+
518
+ if all_ok:
519
+ return ValidationResult(True, "Preflight checks passed", output=report)
520
+ return ValidationResult(
521
+ False,
522
+ "Preflight checks failed - fix environment issues",
523
+ output=report,
524
+ )
525
+
526
+ def _filter_task_files(self, git_status: str, task_name: str) -> str:
527
+ """Filter out task-related files from git status output."""
528
+ config = get_config()
529
+ tasks_dir = config.tasks_dir
530
+
531
+ filtered_lines = []
532
+ for line in git_status.split("\n"):
533
+ file_path = line[3:] if len(line) > 3 else line
534
+ # Skip task artifacts directory
535
+ if file_path.startswith(f"{tasks_dir}/"):
536
+ continue
537
+ filtered_lines.append(line)
538
+
539
+ return "\n".join(filtered_lines)
540
+
541
+ def _get_placeholders(self, task_name: str) -> dict[str, str]:
542
+ """
543
+ Build placeholder substitution dictionary for commands.
544
+
545
+ Returns:
546
+ Dict mapping placeholder names to their values:
547
+ - {task_dir}: Full path to task directory
548
+ - {project_root}: Full path to project root
549
+ - {base_branch}: Configured base branch name
550
+ """
551
+ config = get_config()
552
+ return {
553
+ "{task_dir}": str(self.project_root / config.tasks_dir / task_name),
554
+ "{project_root}": str(self.project_root),
555
+ "{base_branch}": config.pr.base_branch,
556
+ }
557
+
558
+ def _substitute_placeholders(self, text: str, placeholders: dict[str, str]) -> str:
559
+ """Substitute all placeholders in a string."""
560
+ for key, value in placeholders.items():
561
+ text = text.replace(key, value)
562
+ return text
563
+
564
+ def _run_command(
565
+ self, cmd_config: ValidationCommand, task_name: str, default_timeout: int
566
+ ) -> ValidationResult:
567
+ """
568
+ Execute a validation command and return the result.
569
+
570
+ Commands can be specified as:
571
+ - String: Executed via shell (supports &&, |, etc.)
572
+ - List: Executed directly without shell (safer for paths with spaces)
573
+
574
+ Supported placeholders: {task_dir}, {project_root}, {base_branch}
575
+
576
+ Args:
577
+ cmd_config: Command configuration with name, command (str or list),
578
+ timeout, and optional/allow_failure flags.
579
+ task_name: Task name for placeholder substitution.
580
+ default_timeout: Timeout to use if not specified in config.
581
+
582
+ Returns:
583
+ ValidationResult with success based on exit code.
584
+ Failure results include rollback_to="DEV".
585
+ """
586
+ placeholders = self._get_placeholders(task_name)
587
+ timeout = cmd_config.timeout if cmd_config.timeout is not None else default_timeout
588
+
589
+ try:
590
+ if isinstance(cmd_config.command, list):
591
+ # List form: substitute placeholders in each element, run without shell
592
+ cmd = [
593
+ self._substitute_placeholders(arg, placeholders) for arg in cmd_config.command
594
+ ]
595
+ result = subprocess.run(
596
+ cmd,
597
+ shell=False,
598
+ cwd=self.project_root,
599
+ capture_output=True,
600
+ text=True,
601
+ timeout=timeout,
602
+ )
603
+ else:
604
+ # String form: substitute and run via shell (backwards compatible)
605
+ command = self._substitute_placeholders(cmd_config.command, placeholders)
606
+ result = subprocess.run(
607
+ command,
608
+ shell=True,
609
+ cwd=self.project_root,
610
+ capture_output=True,
611
+ text=True,
612
+ timeout=timeout,
613
+ )
614
+
615
+ if result.returncode == 0:
616
+ return ValidationResult(
617
+ True,
618
+ f"{cmd_config.name}: passed",
619
+ output=result.stdout,
620
+ )
621
+ else:
622
+ return ValidationResult(
623
+ False,
624
+ f"{cmd_config.name}: failed",
625
+ output=result.stdout + result.stderr,
626
+ rollback_to="DEV",
627
+ )
628
+
629
+ except subprocess.TimeoutExpired:
630
+ return ValidationResult(
631
+ False,
632
+ f"{cmd_config.name}: timed out",
633
+ rollback_to="DEV",
634
+ )
635
+ except Exception as e:
636
+ return ValidationResult(
637
+ False,
638
+ f"{cmd_config.name}: error - {e}",
639
+ rollback_to="DEV",
640
+ )
641
+
642
+ def _run_all_commands(self, stage_config: StageValidation, task_name: str) -> dict[str, Any]:
643
+ """
644
+ Run all validation commands and aggregate their outputs.
645
+
646
+ Unlike early-return behavior, this runs ALL commands to collect
647
+ complete debugging information when failures occur.
648
+
649
+ Args:
650
+ stage_config: Stage validation configuration with commands list.
651
+ task_name: Task name for placeholder substitution.
652
+
653
+ Returns:
654
+ Dict with:
655
+ - has_failure: True if any non-optional command failed
656
+ - first_failure_message: Message from first failing command
657
+ - aggregated_output: Combined output from all commands
658
+ - results: List of (name, success, output) tuples
659
+ """
660
+ results: list[tuple[str, bool, str]] = []
661
+ has_failure = False
662
+ first_failure_message = ""
663
+
664
+ for cmd_config in stage_config.commands:
665
+ result = self._run_command(cmd_config, task_name, stage_config.timeout)
666
+ output = result.output or ""
667
+ results.append((cmd_config.name, result.success, output))
668
+
669
+ if not result.success:
670
+ if cmd_config.optional:
671
+ continue
672
+ if cmd_config.allow_failure:
673
+ continue
674
+ if not has_failure:
675
+ has_failure = True
676
+ first_failure_message = result.message
677
+
678
+ # Build aggregated output for debugging
679
+ aggregated_parts = []
680
+ for name, success, output in results:
681
+ status = "✓ PASSED" if success else "✗ FAILED"
682
+ aggregated_parts.append(f"=== {name}: {status} ===")
683
+ if output:
684
+ aggregated_parts.append(output.strip())
685
+ aggregated_parts.append("")
686
+
687
+ return {
688
+ "has_failure": has_failure,
689
+ "first_failure_message": first_failure_message,
690
+ "aggregated_output": "\n".join(aggregated_parts),
691
+ "results": results,
692
+ }
693
+
694
+ def _write_validation_report(
695
+ self, stage: str, task_name: str, command_results: dict[str, Any]
696
+ ) -> None:
697
+ """
698
+ Write VALIDATION_REPORT.md with aggregated command outputs.
699
+
700
+ Creates a structured report showing all validation command results,
701
+ making it easier to debug failures without re-running commands.
702
+
703
+ Args:
704
+ stage: Stage name (e.g., "TEST", "QA").
705
+ task_name: Task name for artifact path.
706
+ command_results: Results from _run_all_commands().
707
+ """
708
+ from datetime import datetime
709
+
710
+ lines = [
711
+ f"# {stage} Validation Report",
712
+ "",
713
+ f"**Generated:** {datetime.now().isoformat()}",
714
+ "",
715
+ "## Summary",
716
+ "",
717
+ ]
718
+
719
+ passed = sum(1 for _, success, _ in command_results["results"] if success)
720
+ failed = len(command_results["results"]) - passed
721
+ lines.append(f"- **Passed:** {passed}")
722
+ lines.append(f"- **Failed:** {failed}")
723
+ lines.append("")
724
+ lines.append("## Command Results")
725
+ lines.append("")
726
+
727
+ for name, success, output in command_results["results"]:
728
+ status = "✓ PASSED" if success else "✗ FAILED"
729
+ lines.append(f"### {name}: {status}")
730
+ lines.append("")
731
+ if output:
732
+ # Truncate very long outputs
733
+ truncated = output[:5000]
734
+ if len(output) > 5000:
735
+ truncated += "\n\n... (output truncated)"
736
+ lines.append("```")
737
+ lines.append(truncated.strip())
738
+ lines.append("```")
739
+ else:
740
+ lines.append("_(no output)_")
741
+ lines.append("")
742
+
743
+ report_content = "\n".join(lines)
744
+ write_artifact("VALIDATION_REPORT.md", report_content, task_name)
745
+
746
+ # For TEST stage, also write a concise summary for downstream prompts
747
+ if stage.upper() == "TEST":
748
+ self._write_test_summary(task_name, command_results)
749
+
750
+ def _write_test_summary(self, task_name: str, command_results: dict[str, Any]) -> None:
751
+ """
752
+ Write TEST_SUMMARY.md with concise test results for downstream prompts.
753
+
754
+ Parses test output to extract key information without verbose logs.
755
+ Supports pytest output format primarily.
756
+
757
+ Args:
758
+ task_name: Task name for artifact path.
759
+ command_results: Results from _run_all_commands().
760
+ """
761
+ from datetime import datetime
762
+
763
+ # Find test command output (look for pytest, jest, etc.)
764
+ test_output = ""
765
+ test_cmd_name = ""
766
+ for name, success, output in command_results["results"]:
767
+ name_lower = name.lower()
768
+ if any(kw in name_lower for kw in ["test", "pytest", "jest", "mocha", "unittest"]):
769
+ test_output = output
770
+ test_cmd_name = name
771
+ break
772
+
773
+ # If no specific test command found, use all output
774
+ if not test_output:
775
+ test_output = command_results.get("aggregated_output", "")
776
+ test_cmd_name = "tests"
777
+
778
+ # Parse test results
779
+ summary = self._parse_test_output(test_output)
780
+
781
+ # Determine overall status
782
+ has_failure = command_results.get("has_failure", False)
783
+ status = "FAIL" if has_failure else "PASS"
784
+
785
+ lines = [
786
+ "# Test Summary",
787
+ "",
788
+ f"**Status:** {status}",
789
+ f"**Command:** {test_cmd_name}",
790
+ f"**Generated:** {datetime.now().isoformat()}",
791
+ "",
792
+ ]
793
+
794
+ # Add counts if parsed
795
+ if summary["counts"]:
796
+ lines.append(summary["counts"])
797
+ lines.append("")
798
+
799
+ # Add duration if found
800
+ if summary["duration"]:
801
+ lines.append(f"**Duration:** {summary['duration']}")
802
+ lines.append("")
803
+
804
+ # Add failed tests
805
+ if summary["failed_tests"]:
806
+ lines.append("## Failed Tests")
807
+ lines.append("")
808
+ for test_info in summary["failed_tests"][:20]: # Limit to 20 failures
809
+ lines.append(f"- {test_info}")
810
+ if len(summary["failed_tests"]) > 20:
811
+ lines.append(f"- ... and {len(summary['failed_tests']) - 20} more failures")
812
+ lines.append("")
813
+
814
+ # Add coverage if found
815
+ if summary["coverage"]:
816
+ lines.append("## Coverage")
817
+ lines.append("")
818
+ lines.append(summary["coverage"])
819
+ lines.append("")
820
+
821
+ # Add warnings/errors summary (not full output)
822
+ if summary["warnings"]:
823
+ lines.append("## Warnings")
824
+ lines.append("")
825
+ for warning in summary["warnings"][:10]:
826
+ lines.append(f"- {warning}")
827
+ lines.append("")
828
+
829
+ write_artifact("TEST_SUMMARY.md", "\n".join(lines), task_name)
830
+
831
+ def _parse_test_output(self, output: str) -> dict[str, Any]:
832
+ """
833
+ Parse test output to extract summary information.
834
+
835
+ Supports pytest output format. Returns structured summary data.
836
+
837
+ Args:
838
+ output: Raw test command output.
839
+
840
+ Returns:
841
+ Dict with counts, duration, failed_tests, coverage, warnings.
842
+ """
843
+ import re
844
+
845
+ result: dict[str, Any] = {
846
+ "counts": "",
847
+ "duration": "",
848
+ "failed_tests": [],
849
+ "coverage": "",
850
+ "warnings": [],
851
+ }
852
+
853
+ if not output:
854
+ return result
855
+
856
+ lines = output.split("\n")
857
+
858
+ # Parse pytest-style summary line: "5 passed, 2 failed, 1 skipped in 3.45s"
859
+ for line in lines:
860
+ # Match pytest summary
861
+ summary_match = re.search(
862
+ r"(\d+)\s+passed.*?(\d+)\s+failed|(\d+)\s+passed",
863
+ line,
864
+ re.IGNORECASE,
865
+ )
866
+ if summary_match:
867
+ result["counts"] = line.strip()
868
+
869
+ # Match duration
870
+ duration_match = re.search(r"in\s+([\d.]+)s", line)
871
+ if duration_match:
872
+ result["duration"] = f"{duration_match.group(1)}s"
873
+
874
+ # Match pytest short summary (=== short test summary info ===)
875
+ # or individual FAILED lines
876
+ if "FAILED" in line:
877
+ # Extract test name and brief error
878
+ failed_match = re.match(r"FAILED\s+(\S+)(?:\s+-\s+(.+))?", line.strip())
879
+ if failed_match:
880
+ test_name = failed_match.group(1)
881
+ error_brief = failed_match.group(2) or ""
882
+ if error_brief:
883
+ result["failed_tests"].append(f"`{test_name}` - {error_brief[:100]}")
884
+ else:
885
+ result["failed_tests"].append(f"`{test_name}`")
886
+
887
+ # Match coverage summary
888
+ if "TOTAL" in line and "%" in line:
889
+ result["coverage"] = line.strip()
890
+ elif re.match(r"^(Lines|Branches|Coverage):\s*\d+%", line.strip()):
891
+ result["coverage"] += line.strip() + "\n"
892
+
893
+ # Collect warnings (pytest warnings summary)
894
+ if "warning" in line.lower() and "PytestWarning" not in line:
895
+ warning_text = line.strip()[:150]
896
+ if warning_text and warning_text not in result["warnings"]:
897
+ result["warnings"].append(warning_text)
898
+
899
+ # Also look for assertion errors in failed test output
900
+ if not result["failed_tests"]:
901
+ # Try to find test failures from assertion errors
902
+ for i, line in enumerate(lines):
903
+ if "AssertionError" in line or "Error:" in line:
904
+ # Look backwards for test name
905
+ for j in range(max(0, i - 5), i):
906
+ test_match = re.search(r"(test_\w+)", lines[j])
907
+ if test_match:
908
+ error_brief = line.strip()[:80]
909
+ test_entry = f"`{test_match.group(1)}` - {error_brief}"
910
+ if test_entry not in result["failed_tests"]:
911
+ result["failed_tests"].append(test_entry)
912
+ break
913
+
914
+ return result
915
+
916
+ def _check_artifact_markers(
917
+ self, stage_config: StageValidation, task_name: str
918
+ ) -> ValidationResult:
919
+ """Check for pass/fail markers in an artifact."""
920
+ artifact_name = stage_config.artifact
921
+ if not artifact_name:
922
+ return ValidationResult(True, "No artifact to check")
923
+
924
+ content = read_artifact(artifact_name, task_name)
925
+ if not content:
926
+ return ValidationResult(
927
+ False,
928
+ f"{artifact_name} not found or empty",
929
+ rollback_to="DEV",
930
+ )
931
+
932
+ content_upper = content.upper()
933
+
934
+ if stage_config.pass_marker and stage_config.pass_marker in content_upper:
935
+ return ValidationResult(True, f"{artifact_name}: approved")
936
+
937
+ if stage_config.fail_marker and stage_config.fail_marker in content_upper:
938
+ return ValidationResult(
939
+ False,
940
+ f"{artifact_name}: changes requested",
941
+ rollback_to="DEV",
942
+ )
943
+
944
+ # Markers unclear - prompt user to decide instead of retry loop
945
+ return ValidationResult(
946
+ False,
947
+ f"{artifact_name}: unclear result - must contain {stage_config.pass_marker} or {stage_config.fail_marker}",
948
+ output=truncate_text(content, 2000),
949
+ needs_user_decision=True,
950
+ )
951
+
952
+ def _check_qa_report(self, task_name: str) -> ValidationResult:
953
+ """Check QA_DECISION file first, then fall back to QA_REPORT.md parsing."""
954
+ return validate_stage_decision("QA", task_name, "QA_REPORT.md")
955
+
956
+ def _validate_with_defaults(self, stage: str, task_name: str) -> ValidationResult:
957
+ """
958
+ Validate a stage using built-in default logic.
959
+
960
+ Used when no validation config exists for a stage. Implements
961
+ sensible defaults for each stage:
962
+ - PM: Requires SPEC.md and PLAN.md
963
+ - DESIGN: Requires DESIGN.md or DESIGN_SKIP.md
964
+ - DEV: Always passes (QA will validate)
965
+ - TEST: Requires TEST_PLAN.md
966
+ - QA: Checks QA_REPORT.md for PASS/FAIL
967
+ - SECURITY: Checks SECURITY_CHECKLIST.md for APPROVED/REJECTED
968
+ - REVIEW: Checks REVIEW_NOTES.md for APPROVE/REQUEST_CHANGES
969
+ - DOCS: Requires DOCS_REPORT.md
970
+
971
+ Args:
972
+ stage: The stage name (case-insensitive).
973
+ task_name: Task name for artifact lookups.
974
+
975
+ Returns:
976
+ ValidationResult based on stage-specific defaults.
977
+ """
978
+ stage_upper = stage.upper()
979
+
980
+ # PM stage - check for SPEC.md and PLAN.md
981
+ if stage_upper == "PM":
982
+ if not artifact_exists("SPEC.md", task_name):
983
+ return ValidationResult(False, "SPEC.md not found")
984
+ if not artifact_exists("PLAN.md", task_name):
985
+ return ValidationResult(False, "PLAN.md not found")
986
+ return ValidationResult(True, "PM stage validated")
987
+
988
+ # DESIGN stage - check for DESIGN.md or skip marker
989
+ if stage_upper == "DESIGN":
990
+ if artifact_exists("DESIGN_SKIP.md", task_name):
991
+ return ValidationResult(True, "Design skipped")
992
+ if not artifact_exists("DESIGN.md", task_name):
993
+ return ValidationResult(False, "DESIGN.md not found")
994
+ return ValidationResult(True, "Design stage validated")
995
+
996
+ # DEV stage - just check Claude completed
997
+ if stage_upper == "DEV":
998
+ return ValidationResult(True, "DEV stage completed - QA will validate")
999
+
1000
+ # TEST stage - check TEST_DECISION file for pass/fail/blocked
1001
+ if stage_upper == "TEST":
1002
+ from galangal.core.state import Stage, get_decision_config
1003
+
1004
+ if not artifact_exists("TEST_PLAN.md", task_name):
1005
+ return ValidationResult(False, "TEST_PLAN.md not found")
1006
+
1007
+ # Check for decision file first using centralized config
1008
+ decision = read_decision_file("TEST", task_name)
1009
+ test_decision_config = get_decision_config(Stage.TEST) or {}
1010
+ if decision and decision in test_decision_config:
1011
+ success, message, rollback_to, is_fast_track = test_decision_config[decision]
1012
+ return ValidationResult(
1013
+ success, message, rollback_to=rollback_to, is_fast_track=is_fast_track
1014
+ )
1015
+
1016
+ # No decision file - check TEST_PLAN.md content for markers
1017
+ report = read_artifact("TEST_PLAN.md", task_name) or ""
1018
+ report_upper = report.upper()
1019
+
1020
+ # Check for explicit BLOCKED marker (implementation bugs)
1021
+ if "##BLOCKED##" in report or "## BLOCKED" in report_upper:
1022
+ return ValidationResult(
1023
+ False,
1024
+ "Tests blocked by implementation issues - needs DEV fix",
1025
+ rollback_to="DEV",
1026
+ )
1027
+
1028
+ # Check for FAIL status in the report
1029
+ if "**STATUS:** FAIL" in report or "STATUS: FAIL" in report_upper:
1030
+ return ValidationResult(
1031
+ False,
1032
+ "Tests failed - needs DEV fix",
1033
+ rollback_to="DEV",
1034
+ )
1035
+
1036
+ # Check for PASS status
1037
+ if "**STATUS:** PASS" in report or "STATUS: PASS" in report_upper:
1038
+ return ValidationResult(True, "Tests passed")
1039
+
1040
+ # No clear status - require user decision
1041
+ return ValidationResult(
1042
+ False,
1043
+ "TEST_DECISION file missing - confirm test results",
1044
+ output=truncate_text(report, 2000),
1045
+ needs_user_decision=True,
1046
+ )
1047
+
1048
+ # QA stage - use generic decision validation
1049
+ if stage_upper == "QA":
1050
+ return validate_stage_decision("QA", task_name, "QA_REPORT.md")
1051
+
1052
+ # SECURITY stage - use generic decision validation
1053
+ if stage_upper == "SECURITY":
1054
+ return validate_stage_decision(
1055
+ "SECURITY",
1056
+ task_name,
1057
+ "SECURITY_CHECKLIST.md",
1058
+ skip_artifact="SECURITY_SKIP.md",
1059
+ )
1060
+
1061
+ # REVIEW stage - use generic decision validation
1062
+ if stage_upper == "REVIEW":
1063
+ return validate_stage_decision("REVIEW", task_name, "REVIEW_NOTES.md")
1064
+
1065
+ # DOCS stage - check for DOCS_REPORT.md
1066
+ if stage_upper == "DOCS":
1067
+ if not artifact_exists("DOCS_REPORT.md", task_name):
1068
+ return ValidationResult(False, "DOCS_REPORT.md not found")
1069
+ return ValidationResult(True, "Docs stage validated")
1070
+
1071
+ # Default: pass
1072
+ return ValidationResult(True, f"{stage} completed")