prizmkit 1.1.69 → 1.1.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/bundled/VERSION.json +3 -3
  2. package/bundled/dev-pipeline/lib/common.sh +427 -0
  3. package/bundled/dev-pipeline/lib/heartbeat.sh +36 -0
  4. package/bundled/dev-pipeline/run-feature.sh +109 -29
  5. package/bundled/dev-pipeline/scripts/generate-bootstrap-prompt.py +11 -12
  6. package/bundled/dev-pipeline/scripts/parse-stream-progress.py +160 -3
  7. package/bundled/dev-pipeline/scripts/update-feature-status.py +27 -3
  8. package/bundled/dev-pipeline/templates/agent-prompts/dev-implement.md +36 -22
  9. package/bundled/dev-pipeline/templates/agent-prompts/reviewer-review.md +1 -1
  10. package/bundled/dev-pipeline/templates/bugfix-bootstrap-prompt.md +24 -21
  11. package/bundled/dev-pipeline/templates/refactor-bootstrap-prompt.md +13 -26
  12. package/bundled/dev-pipeline/templates/sections/ac-verification-checklist.md +4 -10
  13. package/bundled/dev-pipeline/templates/sections/context-budget-rules.md +1 -0
  14. package/bundled/dev-pipeline/templates/sections/feature-context.md +16 -11
  15. package/bundled/dev-pipeline/templates/sections/phase-browser-verification-auto.md +17 -26
  16. package/bundled/dev-pipeline/templates/sections/phase-browser-verification-opencli.md +1 -1
  17. package/bundled/dev-pipeline/templates/sections/phase-browser-verification.md +1 -1
  18. package/bundled/dev-pipeline/templates/sections/phase-commit-full.md +11 -0
  19. package/bundled/dev-pipeline/templates/sections/phase-commit.md +11 -0
  20. package/bundled/dev-pipeline/templates/sections/phase-context-snapshot-base.md +1 -1
  21. package/bundled/dev-pipeline/templates/sections/phase-implement-agent.md +2 -9
  22. package/bundled/dev-pipeline/templates/sections/phase-implement-full.md +2 -9
  23. package/bundled/dev-pipeline/templates/sections/phase-implement-lite.md +8 -17
  24. package/bundled/dev-pipeline/templates/sections/phase-plan-lite.md +1 -1
  25. package/bundled/dev-pipeline/templates/sections/phase-review-full.md +1 -1
  26. package/bundled/dev-pipeline/templates/sections/phase-specify-plan-full.md +1 -1
  27. package/bundled/dev-pipeline/templates/sections/task-contract.md +34 -0
  28. package/bundled/dev-pipeline/templates/sections/test-failure-recovery-agent.md +27 -46
  29. package/bundled/dev-pipeline/templates/sections/test-failure-recovery-lite.md +27 -37
  30. package/bundled/dev-pipeline/tests/test_generate_bootstrap_prompt.py +13 -0
  31. package/bundled/dev-pipeline-windows/lib/common.ps1 +61 -1
  32. package/bundled/dev-pipeline-windows/lib/pipeline.ps1 +299 -14
  33. package/bundled/dev-pipeline-windows/scripts/generate-bootstrap-prompt.py +11 -12
  34. package/bundled/dev-pipeline-windows/scripts/parse-stream-progress.py +160 -3
  35. package/bundled/dev-pipeline-windows/scripts/update-feature-status.py +27 -3
  36. package/bundled/dev-pipeline-windows/templates/agent-prompts/dev-implement.md +36 -22
  37. package/bundled/dev-pipeline-windows/templates/agent-prompts/reviewer-review.md +1 -1
  38. package/bundled/dev-pipeline-windows/templates/bugfix-bootstrap-prompt.md +24 -21
  39. package/bundled/dev-pipeline-windows/templates/refactor-bootstrap-prompt.md +13 -26
  40. package/bundled/dev-pipeline-windows/templates/sections/ac-verification-checklist.md +4 -10
  41. package/bundled/dev-pipeline-windows/templates/sections/context-budget-rules.md +1 -0
  42. package/bundled/dev-pipeline-windows/templates/sections/feature-context.md +16 -11
  43. package/bundled/dev-pipeline-windows/templates/sections/phase-browser-verification-auto.md +22 -10
  44. package/bundled/dev-pipeline-windows/templates/sections/phase-commit-full.md +11 -0
  45. package/bundled/dev-pipeline-windows/templates/sections/phase-commit.md +11 -0
  46. package/bundled/dev-pipeline-windows/templates/sections/phase-context-snapshot-base.md +1 -1
  47. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-agent.md +2 -9
  48. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-full.md +2 -9
  49. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-lite.md +8 -19
  50. package/bundled/dev-pipeline-windows/templates/sections/phase-plan-lite.md +1 -1
  51. package/bundled/dev-pipeline-windows/templates/sections/phase-review-full.md +1 -1
  52. package/bundled/dev-pipeline-windows/templates/sections/phase-specify-plan-full.md +1 -1
  53. package/bundled/dev-pipeline-windows/templates/sections/task-contract.md +34 -0
  54. package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-agent.md +27 -46
  55. package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-lite.md +27 -37
  56. package/bundled/skills/_metadata.json +1 -1
  57. package/package.json +1 -1
@@ -335,6 +335,242 @@ function Invoke-PrizmPipeline {
335
335
  return $LASTEXITCODE -eq 0
336
336
  }
337
337
 
338
+ function Get-PrizmFeatureSlugFromList {
339
+ param([string]$ListPath, [string]$FeatureId)
340
+ if (-not (Test-Path $ListPath)) { return '' }
341
+ try { $data = Get-Content $ListPath -Raw | ConvertFrom-Json } catch { return '' }
342
+ foreach ($feature in @($data.features)) {
343
+ if ($feature.id -eq $FeatureId) {
344
+ $number = ([string]$feature.id).Replace('F-', '').Replace('f-', '').PadLeft(3, '0')
345
+ $title = ([string]$feature.title).ToLowerInvariant()
346
+ $title = [regex]::Replace($title, '[^a-z0-9\s-]', '')
347
+ $title = [regex]::Replace($title.Trim(), '[\s]+', '-')
348
+ $title = [regex]::Replace($title, '-+', '-').Trim('-')
349
+ if ($title) { return "$number-$title" }
350
+ return $number
351
+ }
352
+ }
353
+ return ''
354
+ }
355
+
356
+ function Test-PrizmCheckpointComplete {
357
+ param([string]$CheckpointPath)
358
+ if (-not (Test-Path $CheckpointPath)) { return $false }
359
+ try { $checkpoint = Get-Content $CheckpointPath -Raw | ConvertFrom-Json } catch { return $false }
360
+ if (-not $checkpoint.steps) { return $false }
361
+ foreach ($step in @($checkpoint.steps)) {
362
+ if ($step.status -notin @('completed', 'skipped')) { return $false }
363
+ }
364
+ return $true
365
+ }
366
+
367
+ function Get-PrizmFeatureTitleFromList {
368
+ param([string]$ListPath, [string]$FeatureId)
369
+ if (-not (Test-Path $ListPath)) { return '' }
370
+ try { $data = Get-Content $ListPath -Raw | ConvertFrom-Json } catch { return '' }
371
+ foreach ($feature in @($data.features)) {
372
+ if ($feature.id -eq $FeatureId) { return [string]$feature.title }
373
+ }
374
+ return ''
375
+ }
376
+
377
+ function Get-PrizmTitleWords {
378
+ param([string]$Text)
379
+ $matches = [regex]::Matches(([string]$Text).ToLowerInvariant(), '[a-z0-9]{3,}')
380
+ $words = @()
381
+ foreach ($match in $matches) { $words += [string]$match.Value }
382
+ return @($words)
383
+ }
384
+
385
+ function Test-PrizmCommitMatchesFeatureTitle {
386
+ param([string]$Subject, [string]$FeatureTitle)
387
+ $titleWords = @(Get-PrizmTitleWords $FeatureTitle)
388
+ if ($titleWords.Count -eq 0) { return $false }
389
+ $subjectWords = @(Get-PrizmTitleWords $Subject)
390
+ $subjectSet = @{}
391
+ foreach ($word in $subjectWords) { $subjectSet[$word] = $true }
392
+ $required = if ($titleWords.Count -le 3) { $titleWords.Count } else { [Math]::Max(3, [int][Math]::Ceiling($titleWords.Count * 0.75)) }
393
+ $matched = 0
394
+ foreach ($word in $titleWords) {
395
+ if ($subjectSet.ContainsKey($word)) { $matched++ }
396
+ }
397
+ return $matched -ge $required
398
+ }
399
+
400
+ function Get-PrizmFeatureCommit {
401
+ param([string]$ProjectRoot, [string]$BaseCommit, [string]$FeatureId, [bool]$AllowFallback = $false, [string]$FeatureTitle = '')
402
+ $range = if ($BaseCommit) { "$BaseCommit..HEAD" } else { 'HEAD' }
403
+ $lines = & git -C $ProjectRoot log $range '--format=%H%x09%s' 2>$null
404
+ if ($LASTEXITCODE -ne 0) { return '' }
405
+ foreach ($line in @($lines)) {
406
+ $parts = ([string]$line).Split("`t", 2)
407
+ if ($parts.Count -lt 2) { continue }
408
+ $subject = $parts[1]
409
+ if ($subject.Contains($FeatureId) -and $subject -notmatch '^wip(\(|:)') { return $parts[0] }
410
+ }
411
+ if ($AllowFallback -and $FeatureTitle) {
412
+ foreach ($line in @($lines)) {
413
+ $parts = ([string]$line).Split("`t", 2)
414
+ if ($parts.Count -lt 2) { continue }
415
+ $subject = $parts[1]
416
+ if ($subject -notmatch '^wip(\(|:)' -and (Test-PrizmCommitMatchesFeatureTitle $subject $FeatureTitle)) { return $parts[0] }
417
+ }
418
+ }
419
+ return ''
420
+ }
421
+
422
+ function Get-PrizmFeatureSemanticCompletion {
423
+ param([string]$ProjectRoot, [string]$ListPath, [string]$FeatureId, [string]$BaseCommit, [string]$PrizmkitDir)
424
+ $slug = Get-PrizmFeatureSlugFromList $ListPath $FeatureId
425
+ if (-not $slug) { return $null }
426
+ $checkpointPath = Join-Path $PrizmkitDir "specs\$slug\workflow-checkpoint.json"
427
+ if (-not (Test-PrizmCheckpointComplete $checkpointPath)) { return $null }
428
+ $featureTitle = Get-PrizmFeatureTitleFromList $ListPath $FeatureId
429
+ $commitSha = Get-PrizmFeatureCommit $ProjectRoot $BaseCommit $FeatureId $true $featureTitle
430
+ if (-not $commitSha) { return $null }
431
+ return [pscustomobject]@{ Slug = $slug; CommitSha = $commitSha; CheckpointPath = $checkpointPath }
432
+ }
433
+
434
+ function Save-PrizmPostCompletionDirtyArtifacts {
435
+ param([string]$ProjectRoot, [string]$ArtifactDir, [string]$ItemId, [string]$SessionId)
436
+ $status = & git -C $ProjectRoot status --porcelain --untracked-files=all 2>$null
437
+ if ([string]::IsNullOrWhiteSpace(($status -join "`n"))) { return $true }
438
+
439
+ New-Item -ItemType Directory -Force -Path $ArtifactDir | Out-Null
440
+ ($status -join "`n") | Set-Content -Path (Join-Path $ArtifactDir 'post-completion-status.txt') -Encoding UTF8
441
+ & git -C $ProjectRoot diff --binary | Set-Content -Path (Join-Path $ArtifactDir 'post-completion-dirty.patch') -Encoding UTF8
442
+ if ($LASTEXITCODE -ne 0) { return $false }
443
+ & git -C $ProjectRoot diff --cached --binary | Set-Content -Path (Join-Path $ArtifactDir 'post-completion-staged.patch') -Encoding UTF8
444
+ if ($LASTEXITCODE -ne 0) { return $false }
445
+
446
+ $untracked = & git -C $ProjectRoot ls-files --others --exclude-standard 2>$null
447
+ $manifest = Join-Path $ArtifactDir 'post-completion-untracked.txt'
448
+ @($untracked) | Set-Content -Path $manifest -Encoding UTF8
449
+ $untrackedDir = Join-Path $ArtifactDir 'untracked'
450
+ foreach ($rel in @($untracked)) {
451
+ if (-not $rel) { continue }
452
+ $source = Join-Path $ProjectRoot $rel
453
+ $dest = Join-Path $untrackedDir $rel
454
+ $destParent = Split-Path $dest -Parent
455
+ if ($destParent) { New-Item -ItemType Directory -Force -Path $destParent | Out-Null }
456
+ if (Test-Path $source -PathType Leaf) { Copy-Item -LiteralPath $source -Destination $dest -Force }
457
+ elseif (Test-Path $source -PathType Container) { New-Item -ItemType Directory -Force -Path $dest | Out-Null }
458
+ }
459
+
460
+ @(
461
+ '# Post-completion dirty changes preserved',
462
+ '',
463
+ "- Feature: $ItemId",
464
+ "- Session: $SessionId",
465
+ '- Reason: workflow checkpoint and feature commit were already complete, but delayed post-commit activity left the working tree dirty.',
466
+ '',
467
+ '## Recovery guidance',
468
+ '',
469
+ 'The finalized feature commit was kept unchanged for merge. Review these follow-up artifacts separately; do not assume they were merged:',
470
+ '',
471
+ '- `post-completion-status.txt` — original dirty working tree status',
472
+ '- `post-completion-dirty.patch` — unstaged tracked changes',
473
+ '- `post-completion-staged.patch` — staged changes',
474
+ '- `post-completion-untracked.txt` and `untracked/` — untracked files copied before cleanup'
475
+ ) | Set-Content -Path (Join-Path $ArtifactDir 'post-completion-findings.md') -Encoding UTF8
476
+
477
+ & git -C $ProjectRoot reset --hard *> $null
478
+ if ($LASTEXITCODE -ne 0) { return $false }
479
+ foreach ($rel in @($untracked)) {
480
+ if (-not $rel -or $rel -like '.prizmkit/*') { continue }
481
+ $target = Join-Path $ProjectRoot $rel
482
+ if (Test-Path $target) { Remove-Item -LiteralPath $target -Recurse -Force -ErrorAction SilentlyContinue }
483
+ }
484
+ $remaining = & git -C $ProjectRoot status --porcelain --untracked-files=all 2>$null | Where-Object { $_ -notmatch '^\?\? \.prizmkit/' }
485
+ return [string]::IsNullOrWhiteSpace(($remaining -join "`n"))
486
+ }
487
+
488
+ function Write-PrizmRuntimeFailureLog {
489
+ param(
490
+ [string]$FailureLog,
491
+ [string]$FeatureId,
492
+ [string]$SessionId,
493
+ [string]$SessionStatus,
494
+ [int]$ExitCode,
495
+ [string]$StaleKillMarker,
496
+ [string]$ProgressJson,
497
+ [string]$CheckpointPath,
498
+ [string]$ProjectRoot,
499
+ [string]$BaseCommit
500
+ )
501
+ if (-not $FailureLog -or (Test-Path $FailureLog)) { return }
502
+ $dir = Split-Path $FailureLog -Parent
503
+ if ($dir) { New-Item -ItemType Directory -Force -Path $dir | Out-Null }
504
+ $stale = if (Test-Path $StaleKillMarker) { Get-Content $StaleKillMarker -Raw } else { 'No stale-kill marker.' }
505
+ $progressLines = @('Progress data unavailable.')
506
+ if (Test-Path $ProgressJson) {
507
+ try {
508
+ $progress = Get-Content $ProgressJson -Raw | ConvertFrom-Json
509
+ $progressLines = @()
510
+ foreach ($key in @('fatal_error_code','api_error_status','api_error_code','current_phase','current_tool','last_text_snippet','terminal_result_text')) {
511
+ if ($progress.PSObject.Properties[$key] -and $progress.$key) { $progressLines += "- ${key}: $($progress.$key)" }
512
+ }
513
+ if ($progressLines.Count -eq 0) { $progressLines = @('Progress data contained no terminal fields.') }
514
+ } catch { $progressLines = @("Progress parse error: $($_.Exception.Message)") }
515
+ }
516
+ $checkpointLines = @('No checkpoint file found.')
517
+ if (Test-Path $CheckpointPath) {
518
+ try {
519
+ $checkpoint = Get-Content $CheckpointPath -Raw | ConvertFrom-Json
520
+ $steps = @($checkpoint.steps)
521
+ $complete = @($steps | Where-Object { $_.status -in @('completed','skipped') }).Count
522
+ $checkpointLines = @("$complete/$($steps.Count) steps completed_or_skipped")
523
+ foreach ($step in $steps) {
524
+ if ($step.status -notin @('completed','skipped')) { $checkpointLines += "- incomplete: $($step.id) $($step.skill) = $($step.status)" }
525
+ }
526
+ } catch { $checkpointLines = @("Checkpoint parse error: $($_.Exception.Message)") }
527
+ }
528
+ $latestCommit = (& git -C $ProjectRoot rev-parse --short HEAD 2>$null | Select-Object -First 1)
529
+ if (-not $latestCommit) { $latestCommit = 'unavailable' }
530
+ $featureCommit = if (Get-PrizmFeatureCommit $ProjectRoot $BaseCommit $FeatureId $false) { 'yes' } else { 'no' }
531
+ $dirty = & git -C $ProjectRoot status --short 2>$null
532
+ if ([string]::IsNullOrWhiteSpace(($dirty -join "`n"))) { $dirty = @('clean') }
533
+ @(
534
+ '# Runtime-synthesized failure log',
535
+ '',
536
+ '## Session',
537
+ '',
538
+ "- feature_id: $FeatureId",
539
+ "- session_id: $SessionId",
540
+ "- session_status: $SessionStatus",
541
+ "- exit_code: $ExitCode",
542
+ '',
543
+ '## Stale kill marker',
544
+ '',
545
+ '```json',
546
+ $stale,
547
+ '```',
548
+ '',
549
+ '## Progress',
550
+ '',
551
+ $progressLines,
552
+ '',
553
+ '## Checkpoint',
554
+ '',
555
+ $checkpointLines,
556
+ '',
557
+ '## Git state',
558
+ '',
559
+ "- feature_commit_exists: $featureCommit",
560
+ "- latest_commit: $latestCommit",
561
+ '',
562
+ '```text',
563
+ $dirty,
564
+ '```',
565
+ '',
566
+ '## Recommended recovery action',
567
+ '',
568
+ '- If this is an AI runtime/provider error before checkpoint completion, retry the session with a fresh context.',
569
+ '- If checkpoint completion and a feature commit both exist, inspect post-completion artifacts and finalize manually rather than rebuilding from scratch.',
570
+ '- If the working tree is dirty, preserve or review those changes before any reset or merge.'
571
+ ) | Set-Content -Path $FailureLog -Encoding UTF8
572
+ }
573
+
338
574
  function New-PrizmDefaultDevBranchName {
339
575
  param([string]$Kind, [string]$CurrentItemId)
340
576
  $timestamp = Get-Date -Format 'yyyyMMddHHmm'
@@ -588,6 +824,17 @@ function Invoke-PrizmPipeline {
588
824
  }
589
825
 
590
826
  $effectiveStaleKillThreshold = Get-PrizmEffectiveStaleKillThreshold -ProgressFile $progressJson -BaseThreshold $staleKillThreshold
827
+ $fatalErrorCode = Get-PrizmProgressFatalErrorCode -ProgressFile $progressJson
828
+ if ($fatalErrorCode) {
829
+ $wasStaleKilled = $true
830
+ Write-PrizmWarn "Session hit fatal AI runtime error: $fatalErrorCode"
831
+ $fatalErrorMarker = Join-Path $logsDir 'fatal-error.json'
832
+ Write-PrizmFatalErrorMarker $fatalErrorMarker $fatalErrorCode $staleSeconds $effectiveStaleKillThreshold
833
+ Write-PrizmFatalErrorMarker $staleKillMarker $fatalErrorCode $staleSeconds $effectiveStaleKillThreshold
834
+ Stop-PrizmSessionProcess $pidPath
835
+ if ($staleKillGraceSeconds -gt 0) { Start-Sleep -Seconds $staleKillGraceSeconds }
836
+ break
837
+ }
591
838
  if ($effectiveStaleKillThreshold -gt 0 -and $staleSeconds -ge $effectiveStaleKillThreshold) {
592
839
  $wasStaleKilled = $true
593
840
  Write-PrizmWarn "Session stale-killed (no progress for ${effectiveStaleKillThreshold}s)"
@@ -619,9 +866,23 @@ function Invoke-PrizmPipeline {
619
866
  Stop-PrizmProgressParser $parserProcess
620
867
 
621
868
  $wasInfraError = ($exitCode -ne 0 -and (Test-PrizmInfraError -SessionLog $sessionLog -ProgressJson $progressJson))
869
+ $wasAiRuntimeError = Test-PrizmAiRuntimeError -SessionLog $sessionLog -ProgressJson $progressJson
870
+ $semanticCompletion = if ($Kind -eq 'feature' -and $isGitRepository) {
871
+ Get-PrizmFeatureSemanticCompletion $paths.ProjectRoot $listPath $CurrentItemId $baseCommit $paths.PrizmkitDir
872
+ } else { $null }
622
873
 
623
874
  $status = 'crashed'
624
- if ($wasTimedOut) {
875
+ if ($semanticCompletion) {
876
+ $status = 'success'
877
+ if ($exitCode -ne 0 -or $wasStaleKilled -or $wasTimedOut -or $wasAiRuntimeError) {
878
+ Write-PrizmWarn "Session ended with a failure signal after semantic completion; treating as finalized success"
879
+ Write-PrizmWarn "Semantic completion commit: $($semanticCompletion.CommitSha)"
880
+ }
881
+ } elseif ($wasAiRuntimeError) {
882
+ $status = 'infra_error'
883
+ Write-PrizmWarn "AI session failed due to structured AI runtime/context error"
884
+ Write-PrizmWarn "AI runtime errors are retried without consuming code retry budget"
885
+ } elseif ($wasTimedOut) {
625
886
  $status = 'timed_out'
626
887
  Write-PrizmWarn "AI session timed out after $timeoutSeconds seconds"
627
888
  } elseif ($wasInfraError) {
@@ -653,13 +914,17 @@ function Invoke-PrizmPipeline {
653
914
  $mergeSucceeded = $true
654
915
  $itemListStatus = ''
655
916
  if ($status -eq 'success') {
656
- $updateResult = Invoke-PrizmPythonJson $python (@((Join-Path $paths.ScriptsDir $updateScript), $listOption, $listPath, '--state-dir', $stateDir, '--action', 'update', $idOption, $CurrentItemId, '--session-id', $sessionId, '--session-status', $status) + $maxRetryArgs)
657
- if ($updateResult -and $updateResult.PSObject.Properties['new_status']) {
658
- $itemListStatus = [string]$updateResult.new_status
659
- }
660
-
661
917
  if (Test-PrizmGitDirty $paths.ProjectRoot) {
662
- if ($hadDirtyBaseline) {
918
+ if ($semanticCompletion) {
919
+ $artifactDir = Join-Path $paths.PrizmkitDir "specs\$($semanticCompletion.Slug)"
920
+ if (Save-PrizmPostCompletionDirtyArtifacts $paths.ProjectRoot $artifactDir $CurrentItemId $sessionId) {
921
+ Write-PrizmWarn "Post-completion dirty changes preserved under $artifactDir"
922
+ Write-PrizmWarn "They were not included in the finalized feature commit."
923
+ } else {
924
+ Write-PrizmWarn "Could not safely preserve post-completion dirty changes; preserving dev branch for manual finalization"
925
+ $status = 'finalization_needed'
926
+ }
927
+ } elseif ($hadDirtyBaseline) {
663
928
  Write-PrizmInfo "Auto-committing pipeline bookkeeping artifacts only."
664
929
  Invoke-PrizmGitIncludeBookkeepingArtifacts $paths.ProjectRoot $stateDir $listPath
665
930
  } else {
@@ -668,13 +933,20 @@ function Invoke-PrizmPipeline {
668
933
  }
669
934
  }
670
935
 
671
- if ($isGitRepository -and $devBranchName) {
672
- if (Merge-PrizmDevBranch $paths.ProjectRoot $devBranchName $originalBranch $autoPush) {
673
- $devBranchName = ''
674
- } else {
675
- $mergeSucceeded = $false
676
- $status = 'merge_conflict'
677
- Write-PrizmWarn "Auto-merge failed - dev branch preserved for inspection"
936
+ if ($status -eq 'success') {
937
+ $updateResult = Invoke-PrizmPythonJson $python (@((Join-Path $paths.ScriptsDir $updateScript), $listOption, $listPath, '--state-dir', $stateDir, '--action', 'update', $idOption, $CurrentItemId, '--session-id', $sessionId, '--session-status', $status) + $maxRetryArgs)
938
+ if ($updateResult -and $updateResult.PSObject.Properties['new_status']) {
939
+ $itemListStatus = [string]$updateResult.new_status
940
+ }
941
+
942
+ if ($isGitRepository -and $devBranchName) {
943
+ if (Merge-PrizmDevBranch $paths.ProjectRoot $devBranchName $originalBranch $autoPush) {
944
+ $devBranchName = ''
945
+ } else {
946
+ $mergeSucceeded = $false
947
+ $status = 'merge_conflict'
948
+ Write-PrizmWarn "Auto-merge failed - dev branch preserved for inspection"
949
+ }
678
950
  }
679
951
  }
680
952
  } elseif ($isGitRepository -and $devBranchName) {
@@ -685,7 +957,20 @@ function Invoke-PrizmPipeline {
685
957
  Restore-PrizmOriginalBranch $paths.ProjectRoot $originalBranch $devBranchName | Out-Null
686
958
  }
687
959
 
960
+ if ($status -eq 'success' -and $mergeSucceeded -and $isGitRepository) {
961
+ Invoke-PrizmGitCommitPath $paths.ProjectRoot $listPath "chore($CurrentItemId): update $idName status" | Out-Null
962
+ }
963
+
688
964
  if ($status -ne 'success') {
965
+ if ($Kind -eq 'feature') {
966
+ $failureSlug = if ($semanticCompletion) { [string]$semanticCompletion.Slug } else { Get-PrizmFeatureSlugFromList $listPath $CurrentItemId }
967
+ if ($failureSlug) {
968
+ $featureArtifactDir = Join-Path $paths.PrizmkitDir "specs\$failureSlug"
969
+ $failureLog = Join-Path $featureArtifactDir 'failure-log.md'
970
+ $checkpointPath = Join-Path $featureArtifactDir 'workflow-checkpoint.json'
971
+ Write-PrizmRuntimeFailureLog $failureLog $CurrentItemId $sessionId $status $exitCode $staleKillMarker $progressJson $checkpointPath $paths.ProjectRoot $baseCommit
972
+ }
973
+ }
689
974
  $updateResult = Invoke-PrizmPythonJson $python (@((Join-Path $paths.ScriptsDir $updateScript), $listOption, $listPath, '--state-dir', $stateDir, '--action', 'update', $idOption, $CurrentItemId, '--session-id', $sessionId, '--session-status', $status) + $maxRetryArgs)
690
975
  if ($updateResult -and $updateResult.PSObject.Properties['new_status']) {
691
976
  $itemListStatus = [string]$updateResult.new_status
@@ -286,7 +286,7 @@ def extract_baseline_failures(test_commands, project_root):
286
286
  def format_ac_checklist(acceptance_criteria):
287
287
  """Format acceptance criteria as a markdown checkbox list."""
288
288
  if not acceptance_criteria:
289
- return "- [ ] (no acceptance criteria specified)"
289
+ return "- (no Verification Gates specified)"
290
290
  lines = []
291
291
  for item in acceptance_criteria:
292
292
  lines.append("- [ ] {}".format(item))
@@ -323,10 +323,10 @@ def format_user_context(user_context):
323
323
  if not items:
324
324
  return ""
325
325
  lines = [
326
- "### User-Provided Context (HIGHEST PRIORITY)",
327
- "",
328
- "> The following materials were provided by the user. "
329
- "They take precedence over AI inference.",
326
+ "> These materials were provided by the user and are authoritative "
327
+ "when they clarify or constrain this feature. They do not expand "
328
+ "the current scope by themselves; use the Task Contract to decide "
329
+ "what belongs to this session.",
330
330
  "",
331
331
  ]
332
332
  for item in items:
@@ -970,6 +970,10 @@ def assemble_sections(pipeline_mode, sections_dir, init_done, is_resume,
970
970
  mission += "\n\n" + tier_desc
971
971
  sections.append(("mission", mission))
972
972
 
973
+ # --- Task Contract: single source of current scope and gates ---
974
+ sections.append(("task-contract",
975
+ load_section(sections_dir, "task-contract.md")))
976
+
973
977
  # --- Feature Context (XML-wrapped, optimization 3) ---
974
978
  sections.append(("feature-context",
975
979
  load_section(sections_dir, "feature-context.md")))
@@ -1079,13 +1083,8 @@ def assemble_sections(pipeline_mode, sections_dir, init_done, is_resume,
1079
1083
  load_section(sections_dir,
1080
1084
  "test-failure-recovery-agent.md")))
1081
1085
 
1082
- # --- AC Verification Checklist (all tiers) ---
1083
- ac_checklist_path = os.path.join(sections_dir, "ac-verification-checklist.md")
1084
- if os.path.isfile(ac_checklist_path):
1085
- sections.append(("ac-verification-checklist",
1086
- load_section(sections_dir,
1087
- "ac-verification-checklist.md")))
1088
-
1086
+ # Verification Gates are included in Task Contract. Keep AC in one place so
1087
+ # background context and implementation prompts cannot redefine scope.
1089
1088
  # --- Review (only for agent tiers) ---
1090
1089
  if pipeline_mode == "full":
1091
1090
  sections.append(("phase-review",
@@ -17,6 +17,7 @@ The script runs until:
17
17
  import argparse
18
18
  import json
19
19
  import os
20
+ import re
20
21
  import signal
21
22
  import sys
22
23
  import tempfile
@@ -59,6 +60,58 @@ PHASE_KEYWORDS = {
59
60
  },
60
61
  }
61
62
 
63
+ CONTEXT_ERROR_PATTERNS = [
64
+ re.compile(pattern, re.IGNORECASE)
65
+ for pattern in (
66
+ r"context_too_large",
67
+ r"model_context_window_exceeded",
68
+ r"Your input exceeds the context window",
69
+ r"input exceeds the context window",
70
+ r"context window of this model",
71
+ r"context window exceeded",
72
+ r"invalid_request_error.*context window",
73
+ r"context window.*invalid_request_error",
74
+ )
75
+ ]
76
+
77
+ ERROR_CONTEXT_PATTERNS = [
78
+ re.compile(pattern, re.IGNORECASE)
79
+ for pattern in (
80
+ r"\bapi error\b",
81
+ r"invalid_request_error",
82
+ r"\bstatus\s*[:=]?\s*(400|413)\b",
83
+ r"\bapi_error_status\b",
84
+ r"\bapi_error_code\b",
85
+ r"\blast_result_is_error\b\s*[\"':=]*\s*true\b",
86
+ r"\bis_error\b\s*[\"':=]*\s*true\b",
87
+ )
88
+ ]
89
+
90
+
91
+ def _has_error_context(text):
92
+ """Return true when free text looks like a runtime/provider error."""
93
+ if not text:
94
+ return False
95
+ return any(pattern.search(text) for pattern in ERROR_CONTEXT_PATTERNS)
96
+
97
+
98
+ def detect_api_error_code(text, require_error_context=False):
99
+ """Return a normalized fatal/runtime error code from terminal text.
100
+
101
+ Structured terminal result/error events and raw stderr can be matched
102
+ directly. Ordinary assistant prose is noisier: it may mention the phrase
103
+ "input exceeds the context window" while explaining a test or recovery
104
+ rule, so callers can require additional error-like context there.
105
+ """
106
+ if not text:
107
+ return ""
108
+ if require_error_context and not _has_error_context(text):
109
+ return ""
110
+ for pattern in CONTEXT_ERROR_PATTERNS:
111
+ if pattern.search(text):
112
+ return "context_too_large"
113
+ return ""
114
+
62
115
 
63
116
  class ProgressTracker:
64
117
  """Tracks progress state from stream-json events."""
@@ -73,6 +126,12 @@ class ProgressTracker:
73
126
  self.tool_call_counts = Counter()
74
127
  self.total_tool_calls = 0
75
128
  self.last_text_snippet = ""
129
+ self.last_result_is_error = False
130
+ self.api_error_status = None
131
+ self.api_error_code = ""
132
+ self.terminal_result_text = ""
133
+ self.terminal_success_at = ""
134
+ self.fatal_error_code = ""
76
135
  self.is_active = True
77
136
  self.errors = []
78
137
  self.event_format = ""
@@ -164,11 +223,13 @@ class ProgressTracker:
164
223
  elif event_type == "turn.failed":
165
224
  error = event.get("error") or event.get("message") or "Codex turn failed"
166
225
  self.errors.append(str(error))
226
+ self._detect_terminal_error(str(error))
167
227
  self.current_tool = None
168
228
 
169
229
  elif event_type == "error":
170
230
  error = event.get("error") or event.get("message") or "Unknown error"
171
231
  self.errors.append(str(error))
232
+ self._detect_terminal_error(str(error))
172
233
 
173
234
  return
174
235
 
@@ -196,6 +257,7 @@ class ProgressTracker:
196
257
  if text.strip():
197
258
  self.last_text_snippet = text.strip()[:120]
198
259
  self._detect_phase(text)
260
+ self._detect_terminal_error(text, require_error_context=True)
199
261
 
200
262
  elif event_type == "tool_result" or event_type == "user":
201
263
  # tool_result contains output from tool execution
@@ -274,6 +336,28 @@ class ProgressTracker:
274
336
  state.setdefault("subagent_type", "")
275
337
  self._update_claude_subagent_status_counts()
276
338
 
339
+ elif event_type == "result":
340
+ self.event_format = self.event_format or "stream-json"
341
+ self.is_active = False
342
+ result_text = event.get("result") or event.get("message") or ""
343
+ error_obj = event.get("error")
344
+ if isinstance(error_obj, dict):
345
+ error_text = " ".join(
346
+ str(error_obj.get(key) or "")
347
+ for key in ("type", "code", "message")
348
+ if error_obj.get(key)
349
+ )
350
+ result_text = " ".join(part for part in (str(result_text), error_text) if part)
351
+ api_error_code = event.get("api_error_code") or event.get("error_code") or ""
352
+ if isinstance(error_obj, dict) and not api_error_code:
353
+ api_error_code = error_obj.get("code") or error_obj.get("type") or ""
354
+ self._record_terminal_result(
355
+ text=str(result_text or ""),
356
+ is_error=bool(event.get("is_error")),
357
+ api_error_status=event.get("api_error_status"),
358
+ api_error_code=str(api_error_code or ""),
359
+ )
360
+
277
361
  # ── Claude API raw stream format ────────────────────────────
278
362
  elif event_type == "message_start":
279
363
  self.event_format = self.event_format or "stream-json"
@@ -316,6 +400,7 @@ class ProgressTracker:
316
400
  self.last_text_snippet = stripped[:120]
317
401
  # Try to detect phase from text
318
402
  self._detect_phase(text)
403
+ self._detect_terminal_error(text, require_error_context=True)
319
404
 
320
405
  elif delta_type == "input_json_delta":
321
406
  partial = delta.get("partial_json", "")
@@ -331,21 +416,73 @@ class ProgressTracker:
331
416
  self._extract_tool_summary(full_input)
332
417
  self._detect_phase(full_input)
333
418
  else:
334
- # Text block finished - detect phase from accumulated text
419
+ # Text block finished - detect phase and terminal errors from accumulated text
335
420
  if self._text_buffer:
336
421
  self._detect_phase(self._text_buffer)
422
+ self._detect_terminal_error(
423
+ self._text_buffer,
424
+ require_error_context=True,
425
+ )
337
426
  self._in_tool_use = False
338
427
  self._current_tool_input_parts = []
339
428
 
340
429
  elif event_type == "error":
341
430
  error_msg = event.get("error", {}).get("message", "Unknown error")
342
431
  self.errors.append(error_msg)
432
+ self._detect_terminal_error(str(error_msg))
343
433
 
344
434
  # Check for subagent indicator
345
435
  if event.get("parent_tool_use_id"):
346
436
  # This is a sub-agent event; tool name is still tracked normally
347
437
  pass
348
438
 
439
+ def _record_terminal_result(self, text="", is_error=False, api_error_status=None, api_error_code=""):
440
+ """Record a Claude Code terminal result event."""
441
+ terminal_text = str(text or "")
442
+ self.last_result_is_error = bool(is_error)
443
+ if api_error_status not in (None, ""):
444
+ try:
445
+ self.api_error_status = int(api_error_status)
446
+ except (TypeError, ValueError):
447
+ self.api_error_status = api_error_status
448
+ error_like_result = (
449
+ self.last_result_is_error
450
+ or api_error_status not in (None, "")
451
+ or bool(api_error_code)
452
+ or _has_error_context(terminal_text)
453
+ )
454
+ normalized_code = detect_api_error_code(
455
+ " ".join([str(api_error_code or ""), terminal_text]),
456
+ require_error_context=not error_like_result,
457
+ )
458
+ if normalized_code:
459
+ self.api_error_code = normalized_code
460
+ self.fatal_error_code = normalized_code
461
+ elif api_error_code:
462
+ self.api_error_code = str(api_error_code)
463
+ self.terminal_result_text = terminal_text[:1000]
464
+ if terminal_text.strip():
465
+ self.last_text_snippet = terminal_text.strip()[:120]
466
+ if not self.last_result_is_error and not self.fatal_error_code:
467
+ self.terminal_success_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
468
+ elif self.fatal_error_code:
469
+ self.errors.append(self.fatal_error_code)
470
+
471
+ def _detect_terminal_error(self, text, require_error_context=False):
472
+ """Detect fatal context-window errors from unstructured text."""
473
+ code = detect_api_error_code(
474
+ str(text or ""),
475
+ require_error_context=require_error_context,
476
+ )
477
+ if not code:
478
+ return
479
+ self.last_result_is_error = True
480
+ self.api_error_code = code
481
+ self.fatal_error_code = code
482
+ self.terminal_result_text = str(text or "")[:1000]
483
+ if text:
484
+ self.last_text_snippet = str(text).strip()[:120]
485
+
349
486
  def _detect_phase(self, text):
350
487
  """Detect pipeline phase from text content.
351
488
 
@@ -692,6 +829,12 @@ class ProgressTracker:
692
829
  "child_activity_signature": self.child_activity_signature,
693
830
  "last_child_activity_at": self.last_child_activity_at,
694
831
  "last_text_snippet": self.last_text_snippet,
832
+ "last_result_is_error": self.last_result_is_error,
833
+ "api_error_status": self.api_error_status,
834
+ "api_error_code": self.api_error_code,
835
+ "terminal_result_text": self.terminal_result_text,
836
+ "terminal_success_at": self.terminal_success_at,
837
+ "fatal_error_code": self.fatal_error_code,
695
838
  "is_active": self.is_active,
696
839
  "errors": self.errors[-10:], # Keep last 10 errors
697
840
  }
@@ -728,6 +871,12 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
728
871
  state["current_phase"],
729
872
  state["total_tool_calls"],
730
873
  state.get("child_activity_signature", ""),
874
+ state.get("last_result_is_error"),
875
+ state.get("api_error_status"),
876
+ state.get("api_error_code", ""),
877
+ state.get("fatal_error_code", ""),
878
+ state.get("terminal_result_text", ""),
879
+ tuple(state.get("errors", [])),
731
880
  )
732
881
 
733
882
  # Wait for log file to appear
@@ -752,11 +901,19 @@ def tail_and_parse(session_log, progress_file, poll_interval=0.5):
752
901
  event = json.loads(line)
753
902
  tracker.process_event(event)
754
903
  except json.JSONDecodeError:
755
- # Not a JSON line (could be stderr mixed in)
756
- # Use it as a text snippet if meaningful
904
+ # Not a JSON line (could be stderr mixed in). Use it as a
905
+ # text snippet and only treat it as terminal when it has a
906
+ # strong API/runtime error marker; ordinary assistant prose
907
+ # can discuss context limits without being fatal.
757
908
  stripped = line.strip()
758
909
  if stripped and len(stripped) > 5:
759
910
  tracker.last_text_snippet = stripped[:120]
911
+ tracker._detect_terminal_error(stripped, require_error_context=True)
912
+ current_state = tracker.to_dict()
913
+ current_state_key = state_key(current_state)
914
+ if current_state_key != last_write_state:
915
+ atomic_write_json(current_state, progress_file)
916
+ last_write_state = current_state_key
760
917
  continue
761
918
 
762
919
  # Write progress if state changed