wogiflow 2.15.1 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/wogi-challenge.md +4 -4
- package/.claude/commands/wogi-gate-stats.md +1 -1
- package/.claude/docs/intent-grounded-reasoning.md +1 -1
- package/.claude/docs/phases/02-spec.md +2 -2
- package/.workflow/agents/logic-adversary.md +7 -2
- package/.workflow/templates/claude-md.hbs +27 -0
- package/lib/wogi-claude +87 -0
- package/package.json +3 -2
- package/scripts/flow-architect-pass.js +3 -3
- package/scripts/flow-config-defaults.js +51 -0
- package/scripts/flow-constants.js +3 -1
- package/scripts/flow-correct.js +1 -0
- package/scripts/flow-done.js +16 -0
- package/scripts/flow-logic-adversary.js +4 -4
- package/scripts/flow-migrate-igr.js +1 -1
- package/scripts/hooks/core/session-context.js +25 -0
- package/scripts/hooks/core/session-history.js +116 -0
- package/scripts/hooks/core/task-boundary-reset.js +249 -0
- package/scripts/hooks/core/task-completed.js +35 -0
- package/scripts/hooks/entry/claude-code/stop.js +63 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
---
|
|
2
|
-
description: "Manual trigger of the IGR Logic Adversary — critique a plan against the Logic Constitution
|
|
2
|
+
description: "Manual trigger of the IGR Logic Adversary — critique a plan against the Logic Constitution v2 rubric (11 principles, including Platform Capability Grounding)."
|
|
3
3
|
effort: medium
|
|
4
4
|
---
|
|
5
5
|
|
|
@@ -17,13 +17,13 @@ Story: `wf-b00262b1` (IGR)
|
|
|
17
17
|
/wogi-challenge wf-XXXXXXXX
|
|
18
18
|
|
|
19
19
|
# Critique with an explicit rubric version
|
|
20
|
-
/wogi-challenge path/to/plan.md --rubric=logic-constitution-
|
|
20
|
+
/wogi-challenge path/to/plan.md --rubric=logic-constitution-v2
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
## What it does
|
|
24
24
|
|
|
25
25
|
1. Loads the plan (either from a file path or from `.workflow/plans/{taskId}.md`).
|
|
26
|
-
2. Calls `scripts/flow-logic-adversary.js buildAdversaryPrompt` to assemble the critique prompt — includes the
|
|
26
|
+
2. Calls `scripts/flow-logic-adversary.js buildAdversaryPrompt` to assemble the critique prompt — includes the 11-principle Logic Constitution v2 (Principle 11 is Platform Capability Grounding), few-shot calibration examples, and all available intent artifacts.
|
|
27
27
|
3. Spawns a sub-agent via the Agent tool on a different model than this session when possible (Sonnet when you're on Opus; Opus when you're on Sonnet) — the model-separation rule per the approved spec.
|
|
28
28
|
4. Parses the returned JSON verdict against the rubric schema.
|
|
29
29
|
5. Records a telemetry event (`gateId: logic-adversary`) with the verdict.
|
|
@@ -46,7 +46,7 @@ Story: `wf-b00262b1` (IGR)
|
|
|
46
46
|
## Under the hood
|
|
47
47
|
|
|
48
48
|
- Script: `scripts/flow-logic-adversary.js`
|
|
49
|
-
- Rubric: `.workflow/rubrics/logic-constitution-
|
|
49
|
+
- Rubric: `.workflow/rubrics/logic-constitution-v2.md` (v1 retained for historical telemetry)
|
|
50
50
|
- Persona: `.workflow/agents/logic-adversary.md`
|
|
51
51
|
- Calibration: `.workflow/state/adversary-calibration.json`
|
|
52
52
|
- Telemetry: `gateId: logic-adversary` in `.workflow/state/gate-telemetry.jsonl`
|
|
@@ -43,7 +43,7 @@ Story: `wf-faf340cf` (IGR Story 0 — Gate Telemetry & Self-Assessment Framework
|
|
|
43
43
|
A gate with `pass% = 100%` and `miss% > 10%` is **rubber-stamping**. It's letting things through that you then have to correct. This is the failure mode the owner's QA-98%-parable warned against: 100% coverage that creates false confidence is more dangerous than 70% coverage that triggers a second review.
|
|
44
44
|
|
|
45
45
|
When you see high miss rates:
|
|
46
|
-
1. Tune the rubric (for `logic-adversary`: edit `.workflow/rubrics/logic-constitution-
|
|
46
|
+
1. Tune the rubric (for `logic-adversary`: edit `.workflow/rubrics/logic-constitution-v2.md`)
|
|
47
47
|
2. Add calibration examples (for `logic-adversary`: append to `.workflow/state/adversary-calibration.json`)
|
|
48
48
|
3. Strengthen the gate's blocking behavior (for `completion-truth-gate`: raise `minTierForDone` or set `blockFalseCompletion: true`)
|
|
49
49
|
|
|
@@ -18,7 +18,7 @@ Research finding: across 1,309 user messages mined, first-pass agent output was
|
|
|
18
18
|
| 1 | **Intent Bootstrap** — scaffolds product/domain/glossary/user-journeys artifacts; agnostic trap-zone detector finds structural ambiguities | `scripts/flow-intent-bootstrap.js` + `scripts/flow-trap-zone.js` |
|
|
19
19
|
| 2 | **Intent Framing Pass** — per-task reasoning step; produces a Framing Artifact resolving ambiguities before any other work | `scripts/flow-intent-framing.js` |
|
|
20
20
|
| 3 | **Architect Pass** — read-only sub-agent produces an 8-section pre-spec plan | `scripts/flow-architect-pass.js` + persona `.workflow/agents/architect.md` |
|
|
21
|
-
| 4 | **Logic Adversary** — separate sub-agent on a different model critiques the plan against the
|
|
21
|
+
| 4 | **Logic Adversary** — separate sub-agent on a different model critiques the plan against the 11-principle Logic Constitution (v2 adds Principle 11 — Platform Capability Grounding) | `scripts/flow-logic-adversary.js` + rubric `.workflow/rubrics/logic-constitution-v2.md` |
|
|
22
22
|
| 5 | **Session Correction Memory** — detects user corrections during a session and cross-references back to gates that passed the contradicted work | extensions in `scripts/flow-correction-detector.js` |
|
|
23
23
|
| 6 | **Completion Truth Gate** — audits "done" claims against Tier 0–4 evidence; downgrades language when evidence is insufficient | `scripts/flow-completion-truth-gate.js` |
|
|
24
24
|
| 7 | **Pipeline wiring + rollout** — integrates all above into `/wogi-start`, the gate registry, the eval framework | (this story) |
|
|
@@ -6,7 +6,7 @@ Instructions for the spec/approval phase. Loaded on-demand when phase transition
|
|
|
6
6
|
|
|
7
7
|
**Conditional** — runs for L1+ tasks when IGR on. L3 skip. L2 runs only on ultrathink auto-bump.
|
|
8
8
|
|
|
9
|
-
Spawn a **read-only sub-agent** (Explore subagent_type, with Read/Grep/Glob only — no Edit/Write/Bash) on a model chosen per `config.intentGroundedReasoning.architectPass.modelOverride`. Input: Framing Artifact from Step 1.15 + explore findings from Step 1.3 + scope-confidence audit from Step 1.45 + the Logic Constitution
|
|
9
|
+
Spawn a **read-only sub-agent** (Explore subagent_type, with Read/Grep/Glob only — no Edit/Write/Bash) on a model chosen per `config.intentGroundedReasoning.architectPass.modelOverride`. Input: Framing Artifact from Step 1.15 + explore findings from Step 1.3 + scope-confidence audit from Step 1.45 + the Logic Constitution v2 rubric (so the Architect anticipates the Adversary's checks, including Principle 11 — Platform Capability Grounding, which demands citation + enforcement-preservation + alternative-ruled-out + fallback for every platform-capability claim).
|
|
10
10
|
|
|
11
11
|
Build the prompt via `node scripts/flow-architect-pass.js prompt <task>`. Invoke via Agent tool. Output: an 8-section plan at `.workflow/plans/{taskId}.md` (PINs: approach, data-model, journey-impact, net-new, alternatives, risks, reversibility, dependencies). Parse via `parsePlanArtifact()`; if structural FAIL, re-prompt.
|
|
12
12
|
|
|
@@ -20,7 +20,7 @@ When IGR flag is OFF: SKIPPED. Pipeline proceeds from Step 1.45 directly to Step
|
|
|
20
20
|
|
|
21
21
|
Spawn a **separate sub-agent on a different model** than the Architect (Sonnet when Architect is Opus; Opus when Architect is Sonnet — per `modelSeparation: different-from-architect`). Per Anthropic harness research, same-model self-critique is a known rubber-stamp failure mode.
|
|
22
22
|
|
|
23
|
-
Build the prompt via `node scripts/flow-logic-adversary.js prompt .workflow/plans/{taskId}.md`. The Adversary critiques the plan against the
|
|
23
|
+
Build the prompt via `node scripts/flow-logic-adversary.js prompt .workflow/plans/{taskId}.md`. The Adversary critiques the plan against the 11-principle Logic Constitution v2 with few-shot calibration examples from `.workflow/state/adversary-calibration.json`. Principle 11 (Platform Capability Grounding) always runs — every claim about hook/tool/API/platform behavior must be cited, enforcement must be preserved, an alternative must be named, and a fallback must be specified.
|
|
24
24
|
|
|
25
25
|
Iteration loop (max 3 rounds by default):
|
|
26
26
|
- `overallVerdict: PASS` or `PASS_WITH_CONCERNS` → proceed to Step 1.5. Concerns surface at approval gate (Step 1.6).
|
|
@@ -11,9 +11,9 @@
|
|
|
11
11
|
|
|
12
12
|
You are the **Logic Adversary** for WogiFlow's Intent-Grounded Reasoning layer.
|
|
13
13
|
|
|
14
|
-
Your job is to find logic problems in a plan BEFORE any code is written. You are not critiquing code. You are not checking style, library choice, or syntax — other gates handle those. You are reasoning about whether this plan is **logically right** for the project it's proposed for.
|
|
14
|
+
Your job is to find logic problems in a plan BEFORE any code is written. You are not critiquing code. You are not checking style, library choice, or syntax — other gates handle those. You are reasoning about whether this plan is **logically right** for the project it's proposed for AND whether its claims about the target platform (hooks, tool APIs, subagent model, MCP, etc.) are actually true.
|
|
15
15
|
|
|
16
|
-
You have a specific rubric: the Logic Constitution (currently
|
|
16
|
+
You have a specific rubric: the Logic Constitution (currently v2). You will receive it as input. For each of the 11 principles, you produce a verdict: PASS, CONCERN, FAIL, or SKIP. You cite specific evidence for every verdict. Verdicts without evidence are themselves failures of your job.
|
|
17
17
|
|
|
18
18
|
### What you are looking for
|
|
19
19
|
|
|
@@ -29,6 +29,11 @@ Patterns that produce logic failures in practice — seen in real agent session
|
|
|
29
29
|
8. **Implicit-requirement blindness** — happy path only, no edge cases.
|
|
30
30
|
9. **User-journey orphans** — dead-end screens, unreachable features.
|
|
31
31
|
10. **Undocumented irreversibility** — destructive ops without confirmation.
|
|
32
|
+
11. **Ungrounded platform-capability claims** — plans that rely on a hook, tool API, subagent behavior, MCP feature, or slash command working a certain way WITHOUT citation, WITHOUT enforcement-preservation evidence, WITHOUT a ruled-out alternative, or WITHOUT a capability-unavailable fallback. For every platform-capability claim, demand all four: citation, enforcement walk-through, alternative, fallback. Missing any of the four = FAIL. **Additionally, for runtime-behavior claims (hooks firing, tools returning specific shapes, signals being handled, events being emitted), hearsay-level citations — code comments or docs claiming "X does Y" — are NOT sufficient. Demand either O1 (a captured observation: log, telemetry, trace, test result) OR O2 (a named live-test plan that produces O1 before downstream code is built). See P11.1 in the rubric. A comment saying "the hook fires" is not evidence the hook fires; a log line showing it firing is.**
|
|
33
|
+
|
|
34
|
+
**P11.2 — The same discipline applies to the PROJECT'S OWN RULES**, not just platform capabilities. For every artifact a plan produces (task IDs, file names, config values, state-file entries, spec structures, commit messages), demand: (E1) which rule from `decisions.md`, `feedback-patterns.md`, `.claude/rules/`, a schema, or a validator function applies? (E2) show the artifact satisfying the rule — run the validator, show the format side-by-side with the rule, paste the passing check — *not* "I followed it." (E3) what's the failure mode when violated? Examples of P11.2 violations: (a) "task ID `wf-test0001` follows WogiFlow convention" — no, the convention requires hex, this fails `validateTaskId()`; (b) "config key `taskBoundaryReset` is valid" without being in `flow-constants.js`'s known-keys list; (c) "file name `flowFoo.js` follows kebab-case" — it doesn't. Reflex: *what's the artifact? what rule governs it? is satisfaction SHOWN, not just claimed?*
|
|
35
|
+
|
|
36
|
+
**P11.3 — Also check for EXISTING WOGIFLOW FEATURES that touch the same domain.** Before shipping any new mechanism (hook, wrapper, CLI entry, state file, config key, skill), enumerate the sibling surface: (S1) `grep -r "execSync\|spawn.*claude" lib/ scripts/`, check `.claude/commands/`, check `scripts/flow-constants.js`, check `lib/workspace.js` — does an existing feature already touch this domain? (S2) Show how the new mechanism composes, conflicts, or integrates with each sibling. "Orthogonal" is OK but must be asserted. (S3) If integration work is needed (e.g., the new wrapper needs to be injected into workspace's `execSync('claude')` call), include it in scope OR explicitly file a follow-up story. Silent omission of sibling integration = FAIL. Example violation caught live: `wogi-claude` wrapper initially missed that `lib/workspace.js:1612` spawns claude directly, so workspace-mode workers weren't restart-capable.
|
|
32
37
|
|
|
33
38
|
### What you are NOT looking for
|
|
34
39
|
|
|
@@ -216,6 +216,33 @@ If the user provides N items, ALL N must become tracked work items. No exception
|
|
|
216
216
|
|
|
217
217
|
**This rule applies everywhere**: `/wogi-start`, `/wogi-story`, `/wogi-epics`, `/wogi-extract-review`, and any other command that converts user input into tracked work.
|
|
218
218
|
|
|
219
|
+
### Mid-Execution Anti-Deferral (MANDATORY — APPLIES AFTER TASKS ARE CREATED)
|
|
220
|
+
|
|
221
|
+
**Reordering is permitted. Deferring is not.** Once work is tracked inside an epic, story, or wave, you MUST NOT propose to skip, postpone, drop, or "deprioritize to later" any of it — regardless of how risky, expensive, or token-heavy it looks. If the work improves code quality or user experience, it ships the plan it was scoped into. You may only change the **sequence** of execution.
|
|
222
|
+
|
|
223
|
+
**Why this rule exists**: Baseline Claude Code training biases toward deferral to conserve tokens and reduce risk ("let's save X for later," "we can defer the high-risk piece," "this can wait until v2"). That bias is a token-preservation reflex, not a quality judgment. In WogiFlow, scope decisions are the user's, not the AI's — the AI proposes sequencing, the user decides scope.
|
|
224
|
+
|
|
225
|
+
**Mid-Execution Anti-Rationalization Checklist** — If ANY of these thoughts cross your mind, STOP:
|
|
226
|
+
- "This piece is high-risk — let's defer it to a later epic" → WRONG. It ships this epic. Reorder it later in the sequence if needed.
|
|
227
|
+
- "To save tokens, let's skip wf-XXXX for now" → WRONG. Token cost is never a reason to drop scoped work.
|
|
228
|
+
- "The user probably won't miss this one" → WRONG. The user tracked it. It ships.
|
|
229
|
+
- "We can revisit this after the main work lands" → WRONG. "Revisit" is a soft defer. Sequence it, don't postpone it.
|
|
230
|
+
- "This was lower priority anyway" → WRONG. Priority affects ORDER, never INCLUSION.
|
|
231
|
+
|
|
232
|
+
**What you MAY do after tasks are tracked:**
|
|
233
|
+
- Propose a **sequence** (A → B → C, or A∥B → C) with reasoning
|
|
234
|
+
- Propose **parallelization** when independent
|
|
235
|
+
- Propose **prerequisites** that must land first (that is reordering, not deferral)
|
|
236
|
+
- Flag risks without using them as justification to drop scope
|
|
237
|
+
|
|
238
|
+
**What you must NEVER do after tasks are tracked:**
|
|
239
|
+
- Propose to "defer" a tracked story "to save tokens" or "reduce risk"
|
|
240
|
+
- Skip a scoped story because you judged it lower-value
|
|
241
|
+
- Use the word "defer" as a euphemism for "drop"
|
|
242
|
+
- Present a plan that silently omits already-tracked work
|
|
243
|
+
|
|
244
|
+
**When genuinely unsure the work is still needed**: ask the user explicitly — "Do you still want wf-XXXX to ship this epic, or should we drop it?" Let them decide. Do NOT make that call autonomously.
|
|
245
|
+
|
|
219
246
|
### Task ID Format (MANDATORY)
|
|
220
247
|
|
|
221
248
|
All task IDs MUST be generated by `generateTaskId()` from `wogiflow/scripts/flow-utils.js`. **Never manually type a task ID.**
|
package/lib/wogi-claude
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# wogi-claude — Task-boundary session restart wrapper for Claude Code.
|
|
3
|
+
#
|
|
4
|
+
# When the WogiFlow TaskCompleted hook (with taskBoundaryReset.enabled: true)
|
|
5
|
+
# finishes a task, it writes a restart flag to $WOGI_RESTART_FLAG and sends
|
|
6
|
+
# SIGTERM to its parent Claude Code process. Claude Code exits cleanly; this
|
|
7
|
+
# wrapper detects the flag and restarts claude in a fresh context. All WogiFlow
|
|
8
|
+
# state is preserved on disk and re-hydrated via SessionStart hook.
|
|
9
|
+
#
|
|
10
|
+
# Why this exists: Claude Code has no programmatic /clear or in-session reset.
|
|
11
|
+
# Prior-conversation tokens accumulate across tasks, burning 10-20% of context
|
|
12
|
+
# budget before new work begins. This wrapper recovers that budget cleanly.
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# Direct: wogi-claude [claude args...]
|
|
16
|
+
# PATH: ensure ~/.wogiflow/bin is on PATH, then run `wogi-claude` instead of `claude`
|
|
17
|
+
# Opt-out: pass --no-wogi-restart to run claude once without the restart loop
|
|
18
|
+
#
|
|
19
|
+
# Environment:
|
|
20
|
+
# WOGI_RESTART_FLAG — path to restart-flag file (default: <cwd>/.workflow/state/restart-requested)
|
|
21
|
+
# WOGI_MAX_RESTARTS — safety cap, default 50 (prevents runaway restart storms)
|
|
22
|
+
# WOGI_WRAPPER_PID — exported to child; hook checks this to confirm wrapper is present
|
|
23
|
+
# WOGI_CLAUDE_BIN — override path to claude binary (default: found via PATH)
|
|
24
|
+
|
|
25
|
+
set -u
|
|
26
|
+
|
|
27
|
+
# --- Opt-out path: no restart loop, just exec claude once ---
|
|
28
|
+
for arg in "$@"; do
|
|
29
|
+
if [ "$arg" = "--no-wogi-restart" ]; then
|
|
30
|
+
# strip the flag from args and exec
|
|
31
|
+
filtered=()
|
|
32
|
+
for a in "$@"; do [ "$a" = "--no-wogi-restart" ] || filtered+=("$a"); done
|
|
33
|
+
CLAUDE_BIN="${WOGI_CLAUDE_BIN:-claude}"
|
|
34
|
+
exec "$CLAUDE_BIN" "${filtered[@]}"
|
|
35
|
+
fi
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
# --- Resolve paths and config ---
|
|
39
|
+
CLAUDE_BIN="${WOGI_CLAUDE_BIN:-claude}"
|
|
40
|
+
PROJECT_ROOT="$(pwd)"
|
|
41
|
+
FLAG_FILE="${WOGI_RESTART_FLAG:-$PROJECT_ROOT/.workflow/state/restart-requested}"
|
|
42
|
+
MAX_RESTARTS="${WOGI_MAX_RESTARTS:-50}"
|
|
43
|
+
export WOGI_RESTART_FLAG="$FLAG_FILE"
|
|
44
|
+
export WOGI_WRAPPER_PID=$$
|
|
45
|
+
|
|
46
|
+
# --- Handle Ctrl+C / signals gracefully: propagate to child, don't restart on user-initiated exit ---
|
|
47
|
+
INTERRUPTED=0
|
|
48
|
+
on_interrupt() {
|
|
49
|
+
INTERRUPTED=1
|
|
50
|
+
# Let the child process receive the signal naturally; don't kill ourselves yet.
|
|
51
|
+
}
|
|
52
|
+
trap on_interrupt INT TERM
|
|
53
|
+
|
|
54
|
+
# --- Main loop ---
|
|
55
|
+
count=0
|
|
56
|
+
while true; do
|
|
57
|
+
count=$((count + 1))
|
|
58
|
+
if [ "$count" -gt "$MAX_RESTARTS" ]; then
|
|
59
|
+
echo "[wogi-claude] max restarts ($MAX_RESTARTS) reached — exiting" >&2
|
|
60
|
+
exit 1
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
# Clear any stale flag from a prior run before starting a new claude
|
|
64
|
+
# (defensive — hook should clean up after itself but if a crash left it, remove it)
|
|
65
|
+
if [ "$count" -eq 1 ] && [ -f "$FLAG_FILE" ]; then
|
|
66
|
+
rm -f "$FLAG_FILE"
|
|
67
|
+
fi
|
|
68
|
+
|
|
69
|
+
"$CLAUDE_BIN" "$@"
|
|
70
|
+
inner_exit=$?
|
|
71
|
+
|
|
72
|
+
# If the user hit Ctrl+C (INT/TERM reached us), don't restart
|
|
73
|
+
if [ "$INTERRUPTED" -eq 1 ]; then
|
|
74
|
+
exit "$inner_exit"
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
# If the restart flag was written, consume it and loop
|
|
78
|
+
if [ -f "$FLAG_FILE" ]; then
|
|
79
|
+
rm -f "$FLAG_FILE"
|
|
80
|
+
# Brief informational line so user knows what just happened
|
|
81
|
+
echo "[wogi-claude] task boundary — restarting with fresh context (iteration $((count + 1)))" >&2
|
|
82
|
+
continue
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
# No flag — claude exited for its own reasons; propagate exit code
|
|
86
|
+
exit "$inner_exit"
|
|
87
|
+
done
|
package/package.json
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wogiflow",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.16.0",
|
|
4
4
|
"description": "AI-powered development workflow management system with multi-model support",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"flow": "bin/flow",
|
|
8
|
-
"wogi-flow": "bin/flow"
|
|
8
|
+
"wogi-flow": "bin/flow",
|
|
9
|
+
"wogi-claude": "lib/wogi-claude"
|
|
9
10
|
},
|
|
10
11
|
"scripts": {
|
|
11
12
|
"flow": "./scripts/flow",
|
|
@@ -106,7 +106,7 @@ function isArchitectDisabled() {
|
|
|
106
106
|
* @param {string} [opts.framing] - Framing artifact content. If absent, attempts to load via loadFramingArtifact.
|
|
107
107
|
* @param {string} [opts.exploreFindings] - Consolidated explore-phase findings.
|
|
108
108
|
* @param {string} [opts.scopeConfidenceAudit] - Optional per spec §2.2; degraded mode if absent.
|
|
109
|
-
* @param {string} [opts.constitutionVersion='logic-constitution-
|
|
109
|
+
* @param {string} [opts.constitutionVersion='logic-constitution-v2']
|
|
110
110
|
* @returns {Promise<{ systemPrompt:string, userPrompt:string, metadata:Object }>}
|
|
111
111
|
*/
|
|
112
112
|
async function buildArchitectPrompt(opts) {
|
|
@@ -114,7 +114,7 @@ async function buildArchitectPrompt(opts) {
|
|
|
114
114
|
throw new TypeError('buildArchitectPrompt: opts.taskId required');
|
|
115
115
|
}
|
|
116
116
|
const taskId = opts.taskId;
|
|
117
|
-
const constitutionVersion = opts.constitutionVersion || 'logic-constitution-
|
|
117
|
+
const constitutionVersion = opts.constitutionVersion || 'logic-constitution-v2';
|
|
118
118
|
|
|
119
119
|
const dis = isArchitectDisabled();
|
|
120
120
|
if (dis.disabled) {
|
|
@@ -537,7 +537,7 @@ async function cliPrompt(argv) {
|
|
|
537
537
|
const built = await buildArchitectPrompt({
|
|
538
538
|
taskId: args.task || path.basename(inputFile, path.extname(inputFile)),
|
|
539
539
|
taskInput,
|
|
540
|
-
constitutionVersion: args.rubric || 'logic-constitution-
|
|
540
|
+
constitutionVersion: args.rubric || 'logic-constitution-v2',
|
|
541
541
|
});
|
|
542
542
|
console.log('===== SYSTEM PROMPT =====');
|
|
543
543
|
console.log(built.systemPrompt);
|
|
@@ -850,6 +850,25 @@ const CONFIG_DEFAULTS = {
|
|
|
850
850
|
respectDependencies: true
|
|
851
851
|
},
|
|
852
852
|
|
|
853
|
+
// --- Task-Boundary Session Restart (wf-39e9dc09) ---
|
|
854
|
+
// EXPERIMENTAL, OPT-IN. When enabled AND the `wogi-claude` wrapper is running,
|
|
855
|
+
// TaskCompleted triggers a clean restart of the Claude Code process so each
|
|
856
|
+
// new task starts with a fresh context. State files persist; the wrapper
|
|
857
|
+
// detects the restart flag and relaunches claude. See lib/wogi-claude for
|
|
858
|
+
// the wrapper. See scripts/hooks/core/task-boundary-reset.js for the trigger.
|
|
859
|
+
//
|
|
860
|
+
// REQUIRES verification against real Claude Code before enabling broadly —
|
|
861
|
+
// the SIGTERM-to-parent pattern must be confirmed to exit Claude Code
|
|
862
|
+
// gracefully. Keep disabled until integration test in a throwaway session
|
|
863
|
+
// confirms clean exit + flag consumption + fresh restart.
|
|
864
|
+
taskBoundaryReset: {
|
|
865
|
+
_comment: 'Opt-in per-task context reset via wogi-claude wrapper. See lib/wogi-claude.',
|
|
866
|
+
enabled: false,
|
|
867
|
+
_comment_enabled: 'Set true ONLY after verifying SIGTERM behavior with real Claude Code in a throwaway session.',
|
|
868
|
+
maxRestartsPerSession: 50,
|
|
869
|
+
_comment_maxRestartsPerSession: 'Safety cap. The wrapper also has WOGI_MAX_RESTARTS env override.'
|
|
870
|
+
},
|
|
871
|
+
|
|
853
872
|
// --- Contract Surface (Teams-only — activated on wogi login) ---
|
|
854
873
|
contractSurface: {
|
|
855
874
|
enabled: false,
|
|
@@ -1026,6 +1045,38 @@ const CONFIG_DEFAULTS = {
|
|
|
1026
1045
|
standaloneBypassTask: true
|
|
1027
1046
|
},
|
|
1028
1047
|
|
|
1048
|
+
// --- Externalized Episodic Memory (epic-episodic-memory) ---
|
|
1049
|
+
// Default OFF until Wave E regression tests validate ≥30% token savings.
|
|
1050
|
+
// See .workflow/audits/state-coverage-2026-04-15.md for design rationale.
|
|
1051
|
+
externalMemory: {
|
|
1052
|
+
enabled: false,
|
|
1053
|
+
thresholds: {
|
|
1054
|
+
agentTokens: 2000,
|
|
1055
|
+
readLines: 200,
|
|
1056
|
+
bashLines: 100
|
|
1057
|
+
},
|
|
1058
|
+
retention: {
|
|
1059
|
+
compressDays: 7,
|
|
1060
|
+
evictDays: 30
|
|
1061
|
+
},
|
|
1062
|
+
exemptions: {
|
|
1063
|
+
pathGlobs: [
|
|
1064
|
+
'.claude/docs/phases/**',
|
|
1065
|
+
'.workflow/state/workflow-phase.json',
|
|
1066
|
+
'.workflow/state/task-checkpoint.json'
|
|
1067
|
+
],
|
|
1068
|
+
tools: ['TodoWrite', 'Glob', 'Grep']
|
|
1069
|
+
},
|
|
1070
|
+
capture: {
|
|
1071
|
+
enabled: false,
|
|
1072
|
+
blockOnMiss: true,
|
|
1073
|
+
minLevel: 'L2'
|
|
1074
|
+
},
|
|
1075
|
+
telemetry: {
|
|
1076
|
+
enabled: true
|
|
1077
|
+
}
|
|
1078
|
+
},
|
|
1079
|
+
|
|
1029
1080
|
// --- Workflow Steps ---
|
|
1030
1081
|
workflowSteps: WORKFLOW_STEP_DEFAULTS
|
|
1031
1082
|
};
|
|
@@ -142,7 +142,9 @@ const KNOWN_CONFIG_KEYS = [
|
|
|
142
142
|
'security', 'storyDecomposition', 'techDebt', 'traces',
|
|
143
143
|
'webmcp', 'workflowSteps',
|
|
144
144
|
// v2.0.0+ compat shim output keys
|
|
145
|
-
'proactiveCompaction', 'communitySync'
|
|
145
|
+
'proactiveCompaction', 'communitySync',
|
|
146
|
+
// Task-boundary session restart (wf-39e9dc09) — opt-in, experimental
|
|
147
|
+
'taskBoundaryReset'
|
|
146
148
|
];
|
|
147
149
|
|
|
148
150
|
module.exports = {
|
package/scripts/flow-correct.js
CHANGED
package/scripts/flow-done.js
CHANGED
|
@@ -596,6 +596,22 @@ async function main() {
|
|
|
596
596
|
|
|
597
597
|
success(`Completed: ${taskId}`);
|
|
598
598
|
|
|
599
|
+
// wf-39e9dc09 Phase 1 — mark task-just-completed so the next Stop-hook
|
|
600
|
+
// invocation can trigger a session restart when taskBoundaryReset is on
|
|
601
|
+
// and the wogi-claude wrapper is running. Safe no-op otherwise (the marker
|
|
602
|
+
// is cheap; Phase 2 checks preconditions before acting on it).
|
|
603
|
+
try {
|
|
604
|
+
const { markRestartPending } = require('./hooks/core/task-boundary-reset');
|
|
605
|
+
markRestartPending({
|
|
606
|
+
taskId,
|
|
607
|
+
source: 'flow-done'
|
|
608
|
+
});
|
|
609
|
+
} catch (err) {
|
|
610
|
+
if (process.env.DEBUG) {
|
|
611
|
+
console.error(`[flow-done] markRestartPending failed (non-fatal): ${err.message}`);
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
599
615
|
// v5.0: Show TodoWrite completion stats if available
|
|
600
616
|
if (todoWriteSync) {
|
|
601
617
|
try {
|
|
@@ -53,7 +53,7 @@ const gateTelemetry = require('./flow-gate-telemetry');
|
|
|
53
53
|
// ============================================================
|
|
54
54
|
|
|
55
55
|
const RUBRIC_DIR = path.join(PATHS.workflow, 'rubrics');
|
|
56
|
-
const DEFAULT_RUBRIC = 'logic-constitution-
|
|
56
|
+
const DEFAULT_RUBRIC = 'logic-constitution-v2';
|
|
57
57
|
const CALIBRATION_PATH = path.join(PATHS.state, 'adversary-calibration.json');
|
|
58
58
|
|
|
59
59
|
const VALID_OVERALL_VERDICTS = new Set([
|
|
@@ -84,7 +84,7 @@ const INTENT_ARTIFACTS = {
|
|
|
84
84
|
|
|
85
85
|
/**
|
|
86
86
|
* Load the Logic Constitution rubric by version identifier.
|
|
87
|
-
* @param {string} [version='logic-constitution-
|
|
87
|
+
* @param {string} [version='logic-constitution-v2']
|
|
88
88
|
* @returns {{ content: string, version: string, path: string }}
|
|
89
89
|
*/
|
|
90
90
|
function loadRubric(version = DEFAULT_RUBRIC) {
|
|
@@ -149,7 +149,7 @@ function readIntentArtifact(key) {
|
|
|
149
149
|
* @param {string} [opts.taskId]
|
|
150
150
|
* @param {number} [opts.round=1] - Iteration round number (≥1).
|
|
151
151
|
* @param {object} [opts.previousAdversaryOutput] - Prior round's parsed output when re-running.
|
|
152
|
-
* @param {string} [opts.rubricVersion='logic-constitution-
|
|
152
|
+
* @param {string} [opts.rubricVersion='logic-constitution-v2']
|
|
153
153
|
* @param {number} [opts.calibrationCount=3] - How many calibration examples to inject.
|
|
154
154
|
* @returns {{ systemPrompt:string, userPrompt:string, metadata:object }}
|
|
155
155
|
*/
|
|
@@ -317,7 +317,7 @@ All 10 principles must appear in the \`principles\` array, even if SKIPped.
|
|
|
317
317
|
* @param {Object} ctx
|
|
318
318
|
* @param {string} [ctx.taskId]
|
|
319
319
|
* @param {number} [ctx.round=1]
|
|
320
|
-
* @param {string} [ctx.rubricVersion='logic-constitution-
|
|
320
|
+
* @param {string} [ctx.rubricVersion='logic-constitution-v2']
|
|
321
321
|
* @returns {object} The validated adversary verdict.
|
|
322
322
|
*/
|
|
323
323
|
function parseAdversaryOutput(response, ctx = {}) {
|
|
@@ -50,7 +50,7 @@ const REQUIRED_IGR_SCRIPTS = [
|
|
|
50
50
|
];
|
|
51
51
|
|
|
52
52
|
const REQUIRED_PERSONAS = ['logic-adversary.md', 'architect.md'];
|
|
53
|
-
const REQUIRED_RUBRICS = ['logic-constitution-v1.md'];
|
|
53
|
+
const REQUIRED_RUBRICS = ['logic-constitution-v1.md', 'logic-constitution-v2.md'];
|
|
54
54
|
|
|
55
55
|
// ARCH-002 fix (2026-04-13): use shared parseArgs from flow-cli-utils
|
|
56
56
|
const { parseArgs } = require('./flow-cli-utils');
|
|
@@ -812,6 +812,31 @@ function formatContextForInjection(context) {
|
|
|
812
812
|
const ctx = context.context;
|
|
813
813
|
let output = '## Wogi Flow Session Context\n\n';
|
|
814
814
|
|
|
815
|
+
// Post-restart continuity note (wf-39e9dc09 — Stop-hook triggered restart)
|
|
816
|
+
// If the most recent session in session-history.json was ended by
|
|
817
|
+
// task-boundary-restart and happened very recently, surface the resume
|
|
818
|
+
// token so the user/AI knows prior context is recoverable.
|
|
819
|
+
try {
|
|
820
|
+
const { getMostRecentPriorSession } = require('./session-history');
|
|
821
|
+
const prior = getMostRecentPriorSession(ctx?.cliSessionId);
|
|
822
|
+
if (prior && prior.endReason === 'task-boundary-restart') {
|
|
823
|
+
const endedAt = new Date(prior.endedAt).getTime();
|
|
824
|
+
const ageMinutes = (Date.now() - endedAt) / 60000;
|
|
825
|
+
// Only surface if the prior session ended in the last 24 hours —
|
|
826
|
+
// older entries are history, not active continuity signals.
|
|
827
|
+
if (ageMinutes < 60 * 24) {
|
|
828
|
+
output += `### Continuing from prior session (task-boundary restart)\n`;
|
|
829
|
+
output += `Prior Claude Code session ended ${ageMinutes < 1 ? 'just now' : `${ageMinutes.toFixed(1)}m ago`} after completing task **${prior.lastActiveTaskTitle || prior.tasksCompletedInSession?.[0] || 'unknown'}**.\n`;
|
|
830
|
+
output += `\n**Durable state survived.** Read \`.workflow/state/ready.json\`, \`decisions.md\`, \`feedback-patterns.md\`, \`request-log.md\` for canonical task/decision/activity history.\n`;
|
|
831
|
+
output += `\n**Prior conversation transcript is archived but not loaded.** To recover it if needed:\n`;
|
|
832
|
+
output += `\`\`\`bash\n${prior.resumeCommand}\n\`\`\`\n`;
|
|
833
|
+
output += `\nThis restart is normal — WogiFlow's task-boundary reset (\`config.taskBoundaryReset.enabled: true\`) recovered session-token budget by resetting context at the task boundary. Proceed with the user's next instruction; if they reference prior conversation you don't have, use the resume command above.\n\n`;
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
} catch (_err) {
|
|
837
|
+
// Non-critical — history file may not exist; continue with normal context
|
|
838
|
+
}
|
|
839
|
+
|
|
815
840
|
// CRITICAL: CLAUDE_CODE_SIMPLE mode warning (highest priority)
|
|
816
841
|
if (ctx.simpleModeWarning && ctx.simpleModeWarning.active) {
|
|
817
842
|
output += `### CLAUDE_CODE_SIMPLE Mode Detected\n`;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wogi Flow - Session History
|
|
3
|
+
*
|
|
4
|
+
* Captures a lightweight digest of each Claude Code session BEFORE a
|
|
5
|
+
* task-boundary restart. After restart, the new session can read this
|
|
6
|
+
* history to know what the prior session did and, if needed, resume it
|
|
7
|
+
* via `claude --resume <cliSessionId>`.
|
|
8
|
+
*
|
|
9
|
+
* This is the "prior-session access" guarantee the user asked for after
|
|
10
|
+
* wf-39e9dc09 landed: state files preserve the durable outcomes, but
|
|
11
|
+
* sometimes you want the full prior conversation too. The resume token
|
|
12
|
+
* is the escape hatch.
|
|
13
|
+
*
|
|
14
|
+
* Storage: `.workflow/state/session-history.json`
|
|
15
|
+
* {
|
|
16
|
+
* "version": 1,
|
|
17
|
+
* "sessions": [
|
|
18
|
+
* {
|
|
19
|
+
* "cliSessionId": "b92bc5e3-...",
|
|
20
|
+
* "endedAt": "2026-04-15T09:40:49.293Z",
|
|
21
|
+
* "endReason": "task-boundary-restart",
|
|
22
|
+
* "tasksCompletedInSession": ["wf-00000001"],
|
|
23
|
+
* "lastActiveTaskTitle": "TEST: trigger restart via flow done",
|
|
24
|
+
* "resumeCommand": "claude --resume b92bc5e3-..."
|
|
25
|
+
* },
|
|
26
|
+
* ...
|
|
27
|
+
* ]
|
|
28
|
+
* }
|
|
29
|
+
*
|
|
30
|
+
* Capped at 20 entries (FIFO). Older entries archive-roll into
|
|
31
|
+
* `.workflow/archive/session-history-<date>.json`.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
const fs = require('node:fs');
|
|
35
|
+
const path = require('node:path');
|
|
36
|
+
|
|
37
|
+
const { PATHS, safeJsonParse, writeJson } = require('../../flow-utils');
|
|
38
|
+
|
|
39
|
+
const HISTORY_FILE = 'session-history.json';
|
|
40
|
+
const MAX_ENTRIES = 20;
|
|
41
|
+
|
|
42
|
+
function getHistoryPath() {
|
|
43
|
+
return path.join(PATHS.state, HISTORY_FILE);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function readHistory() {
|
|
47
|
+
return safeJsonParse(getHistoryPath(), { version: 1, sessions: [] });
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Append a session-end record. Called from Stop hook when a restart is about
|
|
52
|
+
* to fire, so the new session has a pointer to the ended session.
|
|
53
|
+
*
|
|
54
|
+
* @param {Object} entry
|
|
55
|
+
* @param {string} entry.cliSessionId
|
|
56
|
+
* @param {string} [entry.endReason='task-boundary-restart']
|
|
57
|
+
* @param {Array<string>} [entry.tasksCompletedInSession]
|
|
58
|
+
* @param {string} [entry.lastActiveTaskTitle]
|
|
59
|
+
* @returns {{ recorded: boolean, reason?: string }}
|
|
60
|
+
*/
|
|
61
|
+
function recordSessionEnd(entry) {
|
|
62
|
+
if (!entry || !entry.cliSessionId) {
|
|
63
|
+
return { recorded: false, reason: 'missing-cliSessionId' };
|
|
64
|
+
}
|
|
65
|
+
try {
|
|
66
|
+
const history = readHistory();
|
|
67
|
+
const record = {
|
|
68
|
+
cliSessionId: entry.cliSessionId,
|
|
69
|
+
endedAt: new Date().toISOString(),
|
|
70
|
+
endReason: entry.endReason || 'task-boundary-restart',
|
|
71
|
+
tasksCompletedInSession: Array.isArray(entry.tasksCompletedInSession)
|
|
72
|
+
? entry.tasksCompletedInSession.slice(-20)
|
|
73
|
+
: [],
|
|
74
|
+
lastActiveTaskTitle: entry.lastActiveTaskTitle || null,
|
|
75
|
+
resumeCommand: `claude --resume ${entry.cliSessionId}`
|
|
76
|
+
};
|
|
77
|
+
history.sessions = history.sessions || [];
|
|
78
|
+
history.sessions.unshift(record);
|
|
79
|
+
// Cap FIFO
|
|
80
|
+
if (history.sessions.length > MAX_ENTRIES) {
|
|
81
|
+
history.sessions = history.sessions.slice(0, MAX_ENTRIES);
|
|
82
|
+
}
|
|
83
|
+
writeJson(getHistoryPath(), history);
|
|
84
|
+
return { recorded: true };
|
|
85
|
+
} catch (err) {
|
|
86
|
+
return { recorded: false, reason: `history-write-failed: ${err.message}` };
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Get the most recent prior session (the one before this one).
|
|
92
|
+
* Returns null if the history is empty or only contains the current session.
|
|
93
|
+
*
|
|
94
|
+
* @param {string} [currentSessionId] — if provided, skip entries matching this ID
|
|
95
|
+
* @returns {Object|null}
|
|
96
|
+
*/
|
|
97
|
+
function getMostRecentPriorSession(currentSessionId) {
|
|
98
|
+
try {
|
|
99
|
+
const history = readHistory();
|
|
100
|
+
const sessions = history.sessions || [];
|
|
101
|
+
for (const s of sessions) {
|
|
102
|
+
if (currentSessionId && s.cliSessionId === currentSessionId) continue;
|
|
103
|
+
return s;
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
} catch (_err) {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
module.exports = {
|
|
112
|
+
recordSessionEnd,
|
|
113
|
+
getMostRecentPriorSession,
|
|
114
|
+
readHistory,
|
|
115
|
+
getHistoryPath
|
|
116
|
+
};
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wogi Flow - Task Boundary Reset (Core Module)
|
|
3
|
+
*
|
|
4
|
+
* Experimental, opt-in: on real WogiFlow task completion, write a restart flag
|
|
5
|
+
* and send SIGTERM to the parent Claude Code process. The wogi-claude wrapper
|
|
6
|
+
* detects the flag on claude's clean exit and relaunches with a fresh context,
|
|
7
|
+
* recovering the 10-20% of session tokens that prior conversation was burning.
|
|
8
|
+
*
|
|
9
|
+
* Design (two-phase, after empirical P11 failure on 2026-04-15):
|
|
10
|
+
*
|
|
11
|
+
* Phase 1 — markRestartPending()
|
|
12
|
+
* Called from task-completion sites (scripts/hooks/core/task-completed.js,
|
|
13
|
+
* scripts/flow-done.js). Writes a small marker file under .workflow/state/.
|
|
14
|
+
* No signals, no exits — just a durable "task is done; please restart at
|
|
15
|
+
* the next safe boundary" note.
|
|
16
|
+
*
|
|
17
|
+
* Phase 2 — consumeAndTriggerRestart()
|
|
18
|
+
* Called from the Stop hook entry (scripts/hooks/entry/claude-code/stop.js),
|
|
19
|
+
* which runs as a direct child of the claude process. If the pending marker
|
|
20
|
+
* exists AND preconditions hold, the function writes the wrapper flag and
|
|
21
|
+
* sends SIGTERM to ppid (claude). The wrapper restarts claude with a fresh
|
|
22
|
+
* context.
|
|
23
|
+
*
|
|
24
|
+
* Why two phases?
|
|
25
|
+
* The original single-phase design attached to Claude Code's TaskCompleted
|
|
26
|
+
* event — which turned out not to fire for Task-tool subagent completions
|
|
27
|
+
* despite an internal code comment claiming so. P11.1 (observed-behavior
|
|
28
|
+
* evidence requirement) was added to the rubric after that failure. This
|
|
29
|
+
* redesign uses Stop hook + marker file because the Stop hook is directly
|
|
30
|
+
* observed firing in Claude Code sessions and is a verified child of the
|
|
31
|
+
* claude process.
|
|
32
|
+
*
|
|
33
|
+
* Preconditions (Phase 2 only; Phase 1 is always safe to write):
|
|
34
|
+
* 1. config.taskBoundaryReset.enabled === true
|
|
35
|
+
* 2. process.env.WOGI_WRAPPER_PID is set (wogi-claude wrapper is present)
|
|
36
|
+
* 3. process.env.WOGI_RESTART_FLAG is set
|
|
37
|
+
* 4. The task-just-completed marker file exists
|
|
38
|
+
*
|
|
39
|
+
* If any precondition fails in Phase 2: no-op. The marker stays in place for
|
|
40
|
+
* a future Stop-hook invocation, or is cleaned up on session-end.
|
|
41
|
+
*
|
|
42
|
+
* Rollback: set config.taskBoundaryReset.enabled = false. Phase 1 still writes
|
|
43
|
+
* the marker (cheap, harmless); Phase 2 no-ops. No state-file corruption.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
const fs = require('node:fs');
|
|
47
|
+
const path = require('node:path');
|
|
48
|
+
|
|
49
|
+
const { getConfig, PATHS } = require('../../flow-utils');
|
|
50
|
+
|
|
51
|
+
const PENDING_MARKER_FILE = 'task-just-completed';
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Locate the pending-marker file path inside .workflow/state/.
|
|
55
|
+
* @returns {string}
|
|
56
|
+
*/
|
|
57
|
+
function getPendingMarkerPath() {
|
|
58
|
+
return path.join(PATHS.state, PENDING_MARKER_FILE);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Phase 1 — mark that a task just completed and a restart is desired at the
|
|
63
|
+
* next Stop-hook boundary. Safe to call even when the feature is disabled;
|
|
64
|
+
* Phase 2 is what checks the config.
|
|
65
|
+
*
|
|
66
|
+
* @param {Object} ctx
|
|
67
|
+
* @param {string} ctx.taskId
|
|
68
|
+
* @param {string} [ctx.taskTitle]
|
|
69
|
+
* @param {string} [ctx.source] where the call came from, for telemetry
|
|
70
|
+
* @returns {{ marked: boolean, markerPath?: string, reason?: string }}
|
|
71
|
+
*/
|
|
72
|
+
function markRestartPending(ctx) {
|
|
73
|
+
try {
|
|
74
|
+
const markerPath = getPendingMarkerPath();
|
|
75
|
+
fs.mkdirSync(path.dirname(markerPath), { recursive: true });
|
|
76
|
+
const payload = {
|
|
77
|
+
version: 1,
|
|
78
|
+
taskId: ctx?.taskId || null,
|
|
79
|
+
taskTitle: ctx?.taskTitle || null,
|
|
80
|
+
source: ctx?.source || 'unspecified',
|
|
81
|
+
markedAt: new Date().toISOString()
|
|
82
|
+
};
|
|
83
|
+
fs.writeFileSync(markerPath, JSON.stringify(payload, null, 2));
|
|
84
|
+
return { marked: true, markerPath };
|
|
85
|
+
} catch (err) {
|
|
86
|
+
return { marked: false, reason: `marker-write-failed: ${err.message}` };
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Check whether the Phase-2 preconditions hold.
|
|
92
|
+
* @returns {{ ready: boolean, reason?: string, flagPath?: string, parentPid?: number }}
|
|
93
|
+
*/
|
|
94
|
+
function checkPreconditions() {
|
|
95
|
+
try {
|
|
96
|
+
const config = getConfig();
|
|
97
|
+
const tbr = config.taskBoundaryReset || {};
|
|
98
|
+
if (tbr.enabled !== true) {
|
|
99
|
+
return { ready: false, reason: 'disabled-by-config' };
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const wrapperPid = process.env.WOGI_WRAPPER_PID;
|
|
103
|
+
if (!wrapperPid) {
|
|
104
|
+
return { ready: false, reason: 'no-wrapper-pid' };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const flagPath = process.env.WOGI_RESTART_FLAG;
|
|
108
|
+
if (!flagPath) {
|
|
109
|
+
return { ready: false, reason: 'no-flag-path' };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const parentPid = process.ppid;
|
|
113
|
+
if (!parentPid || typeof parentPid !== 'number') {
|
|
114
|
+
return { ready: false, reason: 'no-parent-pid' };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return { ready: true, flagPath, parentPid };
|
|
118
|
+
} catch (err) {
|
|
119
|
+
return { ready: false, reason: `config-error: ${err.message}` };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Phase 2 — called by the Stop hook entry (direct child of claude). If
|
|
125
|
+
* preconditions pass AND the pending marker exists, consume the marker, write
|
|
126
|
+
* the wrapper flag, and SIGTERM claude. Wrapper restarts.
|
|
127
|
+
*
|
|
128
|
+
* Returns a result object for diagnostics; never throws. If something goes
|
|
129
|
+
* wrong, the Stop hook should continue with its normal flow.
|
|
130
|
+
*
|
|
131
|
+
* @returns {{ triggered: boolean, reason?: string, flagPath?: string, parentPid?: number }}
|
|
132
|
+
*/
|
|
133
|
+
function consumeAndTriggerRestart() {
|
|
134
|
+
const markerPath = getPendingMarkerPath();
|
|
135
|
+
if (!fs.existsSync(markerPath)) {
|
|
136
|
+
return { triggered: false, reason: 'no-pending-marker' };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const pre = checkPreconditions();
|
|
140
|
+
if (!pre.ready) {
|
|
141
|
+
if (process.env.DEBUG) {
|
|
142
|
+
console.error(`[task-boundary-reset] phase-2 skip: ${pre.reason}`);
|
|
143
|
+
}
|
|
144
|
+
return { triggered: false, reason: pre.reason };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Consume the marker. Do this BEFORE signaling so we never double-fire if
|
|
148
|
+
// for some reason the signal delivery is delayed and a second Stop-hook
|
|
149
|
+
// invocation races through.
|
|
150
|
+
let markerPayload = null;
|
|
151
|
+
try {
|
|
152
|
+
markerPayload = JSON.parse(fs.readFileSync(markerPath, 'utf-8'));
|
|
153
|
+
} catch (_err) { /* payload read is optional */ }
|
|
154
|
+
try {
|
|
155
|
+
fs.unlinkSync(markerPath);
|
|
156
|
+
} catch (err) {
|
|
157
|
+
// If we can't even delete the marker, something is wrong with the
|
|
158
|
+
// filesystem. Abort before signaling so we don't cause a thrash.
|
|
159
|
+
return { triggered: false, reason: `marker-unlink-failed: ${err.message}` };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Write the wrapper's restart-requested flag with a copy of the marker
|
|
163
|
+
// payload for diagnostic context.
|
|
164
|
+
const flagPayload = {
|
|
165
|
+
version: 1,
|
|
166
|
+
reason: 'task-boundary',
|
|
167
|
+
...(markerPayload || {}),
|
|
168
|
+
triggeredAt: new Date().toISOString(),
|
|
169
|
+
wrapperPid: process.env.WOGI_WRAPPER_PID
|
|
170
|
+
};
|
|
171
|
+
try {
|
|
172
|
+
const dir = path.dirname(pre.flagPath);
|
|
173
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
174
|
+
fs.writeFileSync(pre.flagPath, JSON.stringify(flagPayload, null, 2));
|
|
175
|
+
} catch (err) {
|
|
176
|
+
return { triggered: false, reason: `flag-write-failed: ${err.message}` };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// SIGTERM our parent (claude). The wrapper sees the flag on claude's exit
|
|
180
|
+
// and restarts. If SIGTERM turns out to not shut claude down cleanly in
|
|
181
|
+
// real testing, try SIGHUP or SIGINT as fallbacks (see spec wf-39e9dc09).
|
|
182
|
+
try {
|
|
183
|
+
process.kill(pre.parentPid, 'SIGTERM');
|
|
184
|
+
} catch (err) {
|
|
185
|
+
// Kill failed — clean up the flag so the wrapper doesn't try to restart
|
|
186
|
+
// a claude that is still alive and may produce more output.
|
|
187
|
+
try { fs.unlinkSync(pre.flagPath); } catch (_err) { /* best effort */ }
|
|
188
|
+
return { triggered: false, reason: `sigterm-failed: ${err.message}` };
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
triggered: true,
|
|
193
|
+
flagPath: pre.flagPath,
|
|
194
|
+
parentPid: pre.parentPid
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Convenience: whether a pending marker currently exists. Diagnostic only.
|
|
200
|
+
* @returns {boolean}
|
|
201
|
+
*/
|
|
202
|
+
function hasPendingMarker() {
|
|
203
|
+
try {
|
|
204
|
+
return fs.existsSync(getPendingMarkerPath());
|
|
205
|
+
} catch (_err) {
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
module.exports = {
|
|
211
|
+
// Phase 1 — called from task-completion code paths
|
|
212
|
+
markRestartPending,
|
|
213
|
+
|
|
214
|
+
// Phase 2 — called from the Stop hook entry
|
|
215
|
+
consumeAndTriggerRestart,
|
|
216
|
+
|
|
217
|
+
// Diagnostics
|
|
218
|
+
checkPreconditions,
|
|
219
|
+
hasPendingMarker,
|
|
220
|
+
getPendingMarkerPath,
|
|
221
|
+
|
|
222
|
+
// Back-compat: earlier code calls this name. Route it to Phase 1 so existing
|
|
223
|
+
// wiring in task-completed.js still does the right thing (mark the marker,
|
|
224
|
+
// don't SIGTERM yet). A later refactor can remove this alias.
|
|
225
|
+
maybeTriggerRestart: markRestartPending
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
// CLI smoke-check: `node scripts/hooks/core/task-boundary-reset.js <cmd>`
|
|
229
|
+
if (require.main === module) {
|
|
230
|
+
const arg = process.argv[2];
|
|
231
|
+
if (arg === 'check') {
|
|
232
|
+
console.log(JSON.stringify(checkPreconditions(), null, 2));
|
|
233
|
+
process.exit(0);
|
|
234
|
+
}
|
|
235
|
+
if (arg === 'has-pending') {
|
|
236
|
+
console.log(JSON.stringify({ hasPendingMarker: hasPendingMarker() }, null, 2));
|
|
237
|
+
process.exit(0);
|
|
238
|
+
}
|
|
239
|
+
if (arg === 'mark') {
|
|
240
|
+
console.log(JSON.stringify(markRestartPending({ source: 'cli-test' }), null, 2));
|
|
241
|
+
process.exit(0);
|
|
242
|
+
}
|
|
243
|
+
if (arg === 'consume') {
|
|
244
|
+
console.log(JSON.stringify(consumeAndTriggerRestart(), null, 2));
|
|
245
|
+
process.exit(0);
|
|
246
|
+
}
|
|
247
|
+
console.log('Usage: node task-boundary-reset.js <check|has-pending|mark|consume>');
|
|
248
|
+
process.exit(2);
|
|
249
|
+
}
|
|
@@ -500,6 +500,41 @@ async function handleTaskCompleted(input) {
|
|
|
500
500
|
} catch (_err) {
|
|
501
501
|
// Non-critical — pending module may not be available
|
|
502
502
|
}
|
|
503
|
+
|
|
504
|
+
// Task-boundary session restart (wf-39e9dc09) — experimental, opt-in.
|
|
505
|
+
// MUST run AFTER all state-write cleanup above. No-ops unless:
|
|
506
|
+
// 1. config.taskBoundaryReset.enabled === true
|
|
507
|
+
// 2. WOGI_WRAPPER_PID env var is set (proves wogi-claude wrapper is running us)
|
|
508
|
+
// 3. WOGI_RESTART_FLAG env var is set
|
|
509
|
+
// 4. Task completed cleanly (result.completed === true)
|
|
510
|
+
//
|
|
511
|
+
// When triggered, writes a restart-flag file and sends SIGTERM to our parent
|
|
512
|
+
// (Claude Code). The wrapper sees the flag on claude's clean exit and
|
|
513
|
+
// restarts with a fresh context. State files are already flushed to disk.
|
|
514
|
+
if (result.completed && completedTask?.id) {
|
|
515
|
+
try {
|
|
516
|
+
const { maybeTriggerRestart } = require('./task-boundary-reset');
|
|
517
|
+
const restartResult = maybeTriggerRestart({
|
|
518
|
+
taskId: completedTask.id,
|
|
519
|
+
taskTitle: completedTask.title
|
|
520
|
+
});
|
|
521
|
+
if (restartResult.triggered) {
|
|
522
|
+
result.taskBoundaryRestart = {
|
|
523
|
+
triggered: true,
|
|
524
|
+
flagPath: restartResult.flagPath
|
|
525
|
+
};
|
|
526
|
+
result.message = (result.message || '') +
|
|
527
|
+
' [Task-boundary restart triggered — session will restart on clean exit]';
|
|
528
|
+
} else if (process.env.DEBUG) {
|
|
529
|
+
console.error(`[Task Completed] Restart skipped: ${restartResult.reason}`);
|
|
530
|
+
}
|
|
531
|
+
} catch (err) {
|
|
532
|
+
// Fail-open — restart failure must not block task completion
|
|
533
|
+
if (process.env.DEBUG) {
|
|
534
|
+
console.error(`[Task Completed] Restart module error: ${err.message}`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
503
538
|
} catch (err) {
|
|
504
539
|
result.message = `Task completed handler error: ${err.message}`;
|
|
505
540
|
}
|
|
@@ -120,6 +120,69 @@ runHook('Stop', async ({ parsedInput }) => {
|
|
|
120
120
|
}
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
+
// Task-boundary session restart (wf-39e9dc09 — Phase 2, Stop-hook pivot).
|
|
124
|
+
// Runs BEFORE checkLoopExit so we can SIGTERM cleanly if a task was just
|
|
125
|
+
// completed. This is a verified direct child of the claude process (the
|
|
126
|
+
// Stop hook fires reliably — directly observed in test run 2026-04-15,
|
|
127
|
+
// unlike TaskCompleted which was found not to fire for Task-tool subagents).
|
|
128
|
+
// No-op unless task-just-completed marker exists AND feature is enabled
|
|
129
|
+
// AND wogi-claude wrapper env is present.
|
|
130
|
+
try {
|
|
131
|
+
const { consumeAndTriggerRestart, hasPendingMarker } = require('../../core/task-boundary-reset');
|
|
132
|
+
|
|
133
|
+
// If we're about to restart, record the session in history FIRST so the
|
|
134
|
+
// new session can find the prior session's resume token. Use parsedInput
|
|
135
|
+
// or session-state for the cliSessionId.
|
|
136
|
+
if (hasPendingMarker()) {
|
|
137
|
+
try {
|
|
138
|
+
const { recordSessionEnd } = require('../../core/session-history');
|
|
139
|
+
let cliSessionId = parsedInput?.sessionId || null;
|
|
140
|
+
if (!cliSessionId) {
|
|
141
|
+
// Fallback: read from session-state.json
|
|
142
|
+
const { PATHS, safeJsonParse } = require('../../../flow-utils');
|
|
143
|
+
const path = require('node:path');
|
|
144
|
+
const ss = safeJsonParse(path.join(PATHS.state, 'session-state.json'), {});
|
|
145
|
+
cliSessionId = ss.cliSessionId || null;
|
|
146
|
+
}
|
|
147
|
+
if (cliSessionId) {
|
|
148
|
+
// Collect tasks completed in this session from recentlyCompleted
|
|
149
|
+
// (best-effort — not all of these are from THIS session but it's
|
|
150
|
+
// a reasonable approximation; in practice the newest entries are ours)
|
|
151
|
+
const { PATHS, safeJsonParse } = require('../../../flow-utils');
|
|
152
|
+
const path = require('node:path');
|
|
153
|
+
const ready = safeJsonParse(path.join(PATHS.state, 'ready.json'), {});
|
|
154
|
+
const recent = ready.recentlyCompleted || [];
|
|
155
|
+
const lastCompleted = recent[0] || null;
|
|
156
|
+
recordSessionEnd({
|
|
157
|
+
cliSessionId,
|
|
158
|
+
endReason: 'task-boundary-restart',
|
|
159
|
+
tasksCompletedInSession: recent.slice(0, 5).map(t => t.id).filter(Boolean),
|
|
160
|
+
lastActiveTaskTitle: lastCompleted?.title || null
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
} catch (err) {
|
|
164
|
+
if (process.env.DEBUG) {
|
|
165
|
+
console.error(`[Stop] Session history record failed (non-fatal): ${err.message}`);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const restartResult = consumeAndTriggerRestart();
|
|
171
|
+
if (restartResult.triggered && process.env.DEBUG) {
|
|
172
|
+
console.error(`[Stop] Task-boundary restart triggered — claude will exit, wrapper will relaunch`);
|
|
173
|
+
} else if (!restartResult.triggered && restartResult.reason !== 'no-pending-marker' && process.env.DEBUG) {
|
|
174
|
+
console.error(`[Stop] Task-boundary restart check: ${restartResult.reason}`);
|
|
175
|
+
}
|
|
176
|
+
// If we SIGTERM'd our parent, the process will begin shutting down. Still
|
|
177
|
+
// return the normal Stop-hook result so any in-flight return value flows
|
|
178
|
+
// back to claude before the signal is handled.
|
|
179
|
+
} catch (err) {
|
|
180
|
+
if (process.env.DEBUG) {
|
|
181
|
+
console.error(`[Stop] Task-boundary restart module error (fail-open): ${err.message}`);
|
|
182
|
+
}
|
|
183
|
+
// Never block Stop on restart-module errors.
|
|
184
|
+
}
|
|
185
|
+
|
|
123
186
|
// Check if loop can exit
|
|
124
187
|
return await checkLoopExit();
|
|
125
188
|
}, { failMode: 'warn', failOutput: { continue: false } });
|