npm - wogiflow - Versions diffs - 2.30.2 → 2.30.4 - Mend

wogiflow 2.30.2 → 2.30.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +1 -1
package/scripts/flow-defer-auth.js +28 -0
package/scripts/flow-deferral-classifier-ai.js +229 -0
package/scripts/hooks/core/deferral-classifier.js +111 -92
package/scripts/hooks/core/deferral-gate.js +137 -25
package/scripts/hooks/core/no-defer-policy.js +107 -0
package/scripts/hooks/entry/claude-code/session-start.js +14 -0
package/scripts/hooks/entry/claude-code/stop.js +24 -1
package/scripts/hooks/entry/claude-code/user-prompt-submit.js +12 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wogiflow",
-  "version": "2.30.2",
+  "version": "2.30.4",
   "description": "AI-powered development workflow management system with multi-model support",
   "main": "lib/index.js",
   "bin": {

package/scripts/flow-defer-auth.js CHANGED Viewed

@@ -32,6 +32,32 @@ function parseArgs(argv) {
 }
 function cmdGrant(args) {
+  // wf-b8839d99: Refuse to grant when invoked from a non-TTY context (i.e.,
+  // from Claude Code's Bash tool or any subprocess pipeline). The AI cannot
+  // self-issue deferral authorization — the gate exists precisely to prevent
+  // that. A human running this CLI from a terminal has stdin.isTTY === true;
+  // an AI subprocess does not.
+  //
+  // Override: --i-am-human bypasses the TTY check. Provided so out-of-band
+  // automation (CI scripts, etc.) can still grant deliberately; the flag
+  // signals intent. Caveat: if the AI passes this flag, that's an explicit
+  // policy violation that shows up in shell history / commit logs.
+  const isHuman = Boolean(process.stdin.isTTY) || args['i-am-human'] === true;
+  if (!isHuman) {
+    console.error('grant: refused — non-TTY invocation detected.');
+    console.error('');
+    console.error('Per wf-b8839d99: AI subprocesses cannot self-issue deferral authorization.');
+    console.error('The auth marker may only be written by:');
+    console.error('  1. The UserPromptSubmit AI classifier interpreting the user\'s message, OR');
+    console.error('  2. A human running this CLI from a terminal directly.');
+    console.error('');
+    console.error('If the user authorized deferral, surface it back through the conversation —');
+    console.error('the classifier will detect it and write the marker on the next prompt.');
+    console.error('');
+    console.error('Override (genuine automation): pass --i-am-human (logged in audit trail).');
+    process.exit(3);
+  }
   let scope = 'all';
   if (args.findings) {
     scope = String(args.findings)
@@ -53,6 +79,8 @@ function cmdGrant(args) {
   const payload = gate.writeAuth({
     scope,
     source: reason,
+    userPromptExcerpt: '(out-of-band CLI grant — no user prompt)',
+    confidence: 100,
     grantedBy: 'explicit-cli',
     ttlSec
   });

package/scripts/flow-deferral-classifier-ai.js ADDED Viewed

@@ -0,0 +1,229 @@
+'use strict';
+/**
+ * Wogi Flow — AI-Based Deferral-Intent Classifier (wf-b8839d99)
+ *
+ * Replaces the prior regex-based deferral classifier. The user surfaced a
+ * critical case on 2026-05-11: regex `/\bfix\s+(everything|all\s+of\s+(them|it)|all\s+findings?)\b/i`
+ * did NOT match bare "fix all" — natural English the user actually typed.
+ * Result: AI silently deferred findings the user had told it to fix.
+ *
+ * Why AI, not regex (user instruction, restated 2026-05-11):
+ *   "Regex is prone to mistakes. I don't want regex or matching when I
+ *    answer things like that. AI needs to get my responses and analyze them."
+ *
+ * Design mirrors flow-worker-question-classifier.js:
+ *   - Single Haiku call per UserPromptSubmit
+ *   - Returns {intent, confidence, reason, interpretation}
+ *   - Fail-open: missing API key / model error → {classified: false} → no
+ *     state change. The gate's default-restrictive behavior holds.
+ *   - JSON validated for shape + prototype-pollution
+ *
+ * Three outputs the classifier produces:
+ *   negative  — user wants no deferrals (any phrasing: "fix all", "I don't
+ *               like tech debt", "no deferrals", "fix everything", etc.).
+ *               Triggers no-defer-pin write + auth-marker clear.
+ *   positive  — user explicitly authorized deferring specific items.
+ *               Triggers auth-marker write with scope.
+ *   none      — nothing relevant said. No state change.
+ *
+ * The classifier ALSO captures `interpretation`: the AI's brief explanation
+ * of WHAT it understood the user to mean. This goes into the auth/pin marker
+ * `source` field SEPARATELY from any verbatim quote — ending the "false
+ * attribution" failure shape where the AI fabricated a "user said X" claim.
+ */
+const DEFAULT_MIN_CONFIDENCE = 75;
+const DEFAULT_MODEL = 'anthropic:claude-3-5-haiku-latest';
+const MAX_PROMPT_CHARS = 4000;
+const MAX_TOKENS = 400;
+const TEMPERATURE = 0.0;
+const { DANGEROUS_KEYS } = require('./flow-io');
+/**
+ * Build the deferral-intent classifier prompt.
+ *
+ * Designed to distinguish:
+ *   - Explicit no-defer commands ("fix all", "fix everything", "no
+ *     deferrals", "I don't like tech debt", "always fix it") → NEGATIVE
+ *   - Explicit defer commands ("defer F5", "skip the low ones", "option 2",
+ *     "ship as-is", "fix critical only") → POSITIVE
+ *   - Everything else (unrelated chatter, ambiguous, conditional) → NONE
+ *
+ * Critical: the classifier MUST default to NONE on ambiguity. Granting
+ * auth when in doubt is the original bug. Failing to detect a no-defer
+ * is recoverable (user can repeat); silently granting auth is not.
+ */
+function buildDeferralPrompt(userPrompt) {
+  return `You classify whether a user's message to an AI development assistant expresses deferral intent — and if so, which direction.
+Three categories:
+  NEGATIVE — user wants NO deferrals; everything should be fixed.
+    Examples: "fix all", "fix everything", "fix all of them", "no deferrals",
+    "I don't like tech debt", "don't defer anything", "ship everything fixed",
+    "I always want it all fixed", "fix it all".
+  POSITIVE — user explicitly authorizes deferring specific items.
+    Examples: "defer F5", "skip the low-priority ones", "option 2", "option 4",
+    "fix critical only", "ship as-is", "good enough for now", "create tasks
+    for the rest", "leave that for later".
+  NONE — neither. Includes:
+    - Unrelated messages ("looks good", "thanks", "let's discuss X")
+    - Ambiguous statements where defer-intent is unclear
+    - Conditional / hypothetical ("we could defer X if needed" — that's
+      reasoning aloud, not an authorization)
+    - Questions about deferring without a directive
+    - The word "defer" appearing in technical context (e.g., "defer the
+      callback execution")
+CRITICAL RULES:
+  1. When ambiguous, return NONE. The cost of missing a defer signal is low
+     (user can repeat); the cost of false-positive auth is high (AI defers
+     work the user wanted done).
+  2. NEGATIVE takes precedence. If the user says both "fix everything" AND
+     "skip Y" in the same message, return NEGATIVE — they want it all.
+  3. Standing preferences ("I always", "from now on", "as a rule") about
+     deferring are NEGATIVE even if no current finding is in scope.
+  4. Confidence: only >= 80 if the message is unambiguous about defer intent.
+     Anything that requires reading between the lines is < 80.
+[USER_MESSAGE_START]
+${String(userPrompt || '').slice(0, MAX_PROMPT_CHARS)}
+[USER_MESSAGE_END]
+Return JSON only, no prose, no markdown fences:
+{
+  "intent": "negative" | "positive" | "none",
+  "confidence": 0-100,
+  "interpretation": "one short sentence: what you understood the user to mean",
+  "scope": "all" | [array of finding IDs like F1, F2, M3] | null,
+  "standing": true | false
+}
+Examples:
+- "fix all" → {"intent":"negative","confidence":95,"interpretation":"user wants every finding fixed, no deferrals","scope":null,"standing":false}
+- "I don't like tech debt" → {"intent":"negative","confidence":90,"interpretation":"standing preference against accumulating deferred work","scope":null,"standing":true}
+- "defer F5 and F6, fix the rest" → {"intent":"positive","confidence":95,"interpretation":"user authorizes deferring F5 and F6 specifically","scope":["F5","F6"],"standing":false}
+- "option 2" → {"intent":"positive","confidence":90,"interpretation":"user picked the fix-critical-only menu option","scope":"all","standing":false}
+- "looks good, let's continue" → {"intent":"none","confidence":85,"interpretation":"acknowledgment, no defer signal","scope":null,"standing":false}
+- "could we defer this?" → {"intent":"none","confidence":80,"interpretation":"question, not an authorization","scope":null,"standing":false}`;
+}
+function hasDangerousKeys(value) {
+  if (!value || typeof value !== 'object') return false;
+  if (Array.isArray(value)) return value.some(hasDangerousKeys);
+  for (const key of Object.keys(value)) {
+    if (DANGEROUS_KEYS.has(key)) return true;
+    if (hasDangerousKeys(value[key])) return true;
+  }
+  return false;
+}
+/**
+ * Classify user-prompt deferral intent.
+ *
+ * @param {string} userPrompt - The user's message
+ * @param {Object} [options]
+ * @param {number} [options.minConfidence=75] - Confidence threshold for treating as actionable
+ * @param {string} [options.model] - Model override
+ * @returns {Promise<{
+ *   classified: boolean,
+ *   intent?: 'negative'|'positive'|'none',
+ *   confidence?: number,
+ *   interpretation?: string,
+ *   scope?: string|string[]|null,
+ *   standing?: boolean,
+ *   actionable?: boolean,
+ *   minConfidence?: number,
+ *   reason?: string
+ * }>}
+ */
+async function classifyUserDeferralIntent(userPrompt, options = {}) {
+  const minConfidence = Number.isFinite(options.minConfidence) ? options.minConfidence : DEFAULT_MIN_CONFIDENCE;
+  const model = options.model || DEFAULT_MODEL;
+  if (typeof userPrompt !== 'string' || userPrompt.trim().length === 0) {
+    return { classified: false, reason: 'empty-prompt' };
+  }
+  if (!process.env.ANTHROPIC_API_KEY) {
+    return { classified: false, reason: 'no-credentials' };
+  }
+  let callModel;
+  try {
+    ({ callModel } = require('./flow-model-caller'));
+  } catch (_err) {
+    return { classified: false, reason: 'no-model-caller' };
+  }
+  const prompt = buildDeferralPrompt(userPrompt);
+  let result;
+  try {
+    result = await callModel(model, prompt, {
+      temperature: TEMPERATURE,
+      maxTokens: MAX_TOKENS
+    });
+  } catch (err) {
+    if (process.env.DEBUG) {
+      console.error(`[deferral-classifier-ai] model call failed: ${err.message}`);
+    }
+    return { classified: false, reason: 'model-error' };
+  }
+  const raw = String(result?.response ?? result?.content ?? '').trim();
+  if (!raw) return { classified: false, reason: 'empty-response' };
+  const jsonMatch = raw.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) return { classified: false, reason: 'non-json-response' };
+  let parsed;
+  try {
+    parsed = JSON.parse(jsonMatch[0]);
+  } catch (_err) {
+    return { classified: false, reason: 'json-parse-error' };
+  }
+  if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+    return { classified: false, reason: 'bad-shape' };
+  }
+  if (hasDangerousKeys(parsed)) {
+    return { classified: false, reason: 'dangerous-keys' };
+  }
+  const intentRaw = String(parsed.intent || '').toLowerCase();
+  const intent = ['negative', 'positive', 'none'].includes(intentRaw) ? intentRaw : 'none';
+  const confidence = Number.isFinite(parsed.confidence) ? Math.round(parsed.confidence) : 0;
+  const interpretation = typeof parsed.interpretation === 'string'
+    ? parsed.interpretation.slice(0, 500)
+    : '';
+  let scope = parsed.scope;
+  if (scope === undefined) scope = null;
+  if (typeof scope === 'string' && scope !== 'all') scope = null;
+  if (Array.isArray(scope)) {
+    scope = scope.filter(s => typeof s === 'string' && /^[A-Za-z]\d+$/.test(s.trim())).map(s => s.trim());
+    if (scope.length === 0) scope = null;
+  }
+  const standing = Boolean(parsed.standing);
+  return {
+    classified: true,
+    intent,
+    confidence,
+    interpretation,
+    scope,
+    standing,
+    actionable: intent !== 'none' && confidence >= minConfidence,
+    minConfidence
+  };
+}
+module.exports = {
+  classifyUserDeferralIntent,
+  buildDeferralPrompt,
+  hasDangerousKeys,
+  DEFAULT_MIN_CONFIDENCE,
+  DEFAULT_MODEL
+};

package/scripts/hooks/core/deferral-classifier.js CHANGED Viewed

@@ -1,129 +1,148 @@
 #!/usr/bin/env node
 /**
- * Wogi Flow — Deferral Intent Classifier (wf-f9912af6)
+ * Wogi Flow — Deferral Intent Classifier — Hook Wrapper (wf-b8839d99)
  *
- * Regex-based detector for explicit user deferral intent in UserPromptSubmit
- * messages. Cheap (no Haiku call), deterministic, runs every prompt.
+ * Thin wrapper around the AI-based classifier (scripts/flow-deferral-classifier-ai.js).
+ * Originally regex-based; replaced 2026-05-11 after the user surfaced the
+ * "fix all" miss + false-attribution incident.
  *
- * NEGATIVE intent takes precedence over POSITIVE — if the user says both
- * "fix everything" and "skip Y" in the same message, we assume they want
- * everything fixed (the defer-everything pattern is the dangerous one this
- * gate exists to stop).
+ * User's instruction (2026-05-11):
+ *   "Regex is prone to mistakes. I don't want regex or matching when I
+ *    answer things like that. AI needs to get my responses and analyze them."
  *
- * Negative match → write `no-defer-pin.json` (HARD block, overrides any auth)
- * Positive match → write `deferral-authorization.json` (allows specific scope)
- * Neither → no-op
+ * Flow at UserPromptSubmit:
+ *   1. Call AI classifier (Haiku, cheap, ~500ms)
+ *   2. If actionable + negative → write no-defer-pin (clear any auth)
+ *   3. If actionable + positive → write auth marker
+ *   4. None/low-confidence/classifier-error → no state change (fail-open)
  *
- * Fail-open: any error in classification falls through silently.
+ * The marker source field is now the AI's structured interpretation, NOT
+ * a free-form string the AI invents. The classifier returns {intent,
+ * confidence, interpretation} as a triple; we record all three plus the
+ * verbatim user-message excerpt in the marker. Audit trails can then
+ * distinguish "user said X" from "AI interpreted Y".
+ *
+ * The synchronous regex API used to be the entry point. We keep the same
+ * name and return shape but the implementation is now an async call to
+ * the AI classifier. Callers in user-prompt-submit.js are already async.
  */
-// Negative phrases (HIGH PRIORITY — clear auth, write no-defer pin)
-const NEGATIVE_PATTERNS = [
-  /\bfix\s+(everything|all\s+of\s+(them|it)|all\s+findings?)\b/i,
-  /\bno\s+deferr?als?\b/i,
-  /\b(don'?t|do\s+not)\s+defer\b/i,
-  /\bi\s+don'?t\s+(want|like)\s+(tech\s*-?\s*debt|technical\s*-?\s*debt|deferr?al)/i,
-  /\bnever\s+defer\b/i,
-  /\balways\s+fix\s+(what'?s\s+broken|what\s+needs?\s+fixing)/i,
-  /\bnothing\s+(should\s+be|gets)\s+deferr?ed\b/i,
-];
-// Positive phrases (MEDIUM PRIORITY — write auth marker)
-// We're conservative: require defer/skip phrasing to be coupled with finding
-// context (this/that/those/it/option N/F\d+/severity word) to avoid catching
-// unrelated mentions like "let's defer the meeting".
-const POSITIVE_PATTERNS = [
-  // "defer X" / "skip X" with a referent
-  /\b(defer|skip|ignore|drop)\s+(this|that|those|it|them|f\d+|finding\s+\w+)\b/i,
-  /\bleave\s+(this|that|those|f\d+|.*?)\s+(for\s+)?later\b/i,
-  // /wogi-review menu options that mean defer
-  /\boption\s*[24]\b/i, // option 2 = "fix critical only"; option 4 = "create tasks for all (defer)"
-  /\bcreate\s+tasks?\s+for\s+(all|the\s+rest|remaining)\b/i,
-  // Severity-scoped deferrals
-  /\bfix\s+(only\s+)?(critical|high)\s*(\s*\/\s*high)?\s+only\b/i,
-  /\bfix\s+(critical|high)\s+(only|first)\b/i,
-  /\bskip\s+(low|medium|low\s*\/\s*medium)\b/i,
-  // Ship-as-is style
-  /\bship\s+(it\s+)?as\s*-?\s*is\b/i,
-  /\bgood\s+enough\s+(as\s*-?\s*is|for\s+now)\b/i,
-  /\bcall\s+it\s+(done|good)\b/i,
-];
+const NEGATIVE_INTENT = 'negative';
+const POSITIVE_INTENT = 'positive';
 /**
- * Classify a user prompt for deferral intent.
+ * Apply deferral-intent classification at UserPromptSubmit time.
  *
- * @param {string} prompt - the user's UserPromptSubmit text
- * @returns {{ intent: 'negative'|'positive'|'none', match?: string, scope?: string|string[] }}
+ * @param {string} prompt - The user's message
+ * @param {Object} config - Loaded WogiFlow config
+ * @returns {Promise<{
+ *   applied: boolean,
+ *   intent?: 'negative'|'positive'|'none',
+ *   match?: string,
+ *   reason?: string
+ * }>}
  */
-function classifyDeferralIntent(prompt) {
-  if (!prompt || typeof prompt !== 'string') return { intent: 'none' };
+async function applyClassification(prompt, config) {
+  try {
+    if (config?.deferralGate?.classifyUserPrompts === false) {
+      return { applied: false, reason: 'classifier-disabled' };
+    }
-  // Negative first — overrides positive
-  for (const rx of NEGATIVE_PATTERNS) {
-    const m = prompt.match(rx);
-    if (m) return { intent: 'negative', match: m[0] };
-  }
+    const { classifyUserDeferralIntent } = require('../../flow-deferral-classifier-ai');
+    const result = await classifyUserDeferralIntent(prompt, {
+      minConfidence: config?.deferralGate?.minClassifierConfidence
+    });
+    if (!result.classified) {
+      // Fail-open — no state change on classifier error. Status quo holds.
+      if (process.env.DEBUG) {
+        console.error(`[deferral-classifier] classifier skipped: ${result.reason}`);
+      }
+      return { applied: false, reason: `classifier-skipped: ${result.reason}` };
+    }
-  // Positive
-  for (const rx of POSITIVE_PATTERNS) {
-    const m = prompt.match(rx);
-    if (m) {
-      // Try to extract scope — look for F\d+ ids in the prompt
-      const findingIds = Array.from(prompt.matchAll(/\bF\d+\b/g)).map(x => x[0]);
+    if (!result.actionable) {
       return {
-        intent: 'positive',
-        match: m[0],
-        scope: findingIds.length > 0 ? findingIds : 'all'
+        applied: false,
+        intent: result.intent,
+        reason: `below-threshold (confidence ${result.confidence} < ${result.minConfidence})`
       };
     }
-  }
-  return { intent: 'none' };
-}
-/**
- * Apply classification result to the gate's state files. Wired into
- * UserPromptSubmit. Fail-open throughout.
- */
-function applyClassification(prompt, config) {
-  try {
-    if (config?.deferralGate?.classifyUserPrompts === false) return { applied: false, reason: 'classifier-disabled' };
+    const gate = require('./deferral-gate');
-    const result = classifyDeferralIntent(prompt);
-    if (result.intent === 'none') return { applied: false, reason: 'no-match' };
+    if (result.intent === NEGATIVE_INTENT) {
+      // wf-b8839d99 fix #5: if there was a prior auth marker, the user's
+      // negative is likely a correction ("I did not authorize"). Write a
+      // brief routing-recovery grace window so the AI can act on the
+      // correction without re-routing through /wogi-start first.
+      let priorAuthExisted = false;
+      try { priorAuthExisted = Boolean(gate.loadAuth()); } catch (_err) { /* fine */ }
+      gate.writeNoDeferPin({
+        source: result.interpretation,
+        userPromptExcerpt: typeof prompt === 'string' ? prompt.slice(0, 300) : '',
+        confidence: result.confidence,
+        grantedBy: 'ai-classifier',
+        standing: result.standing
+      });
-    // Lazy-require to avoid load-order coupling
-    const gate = require('./deferral-gate');
+      if (priorAuthExisted) {
+        try {
+          const fs = require('node:fs');
+          const path = require('node:path');
+          const { PATHS } = require('../../flow-utils');
+          const gracePath = path.join(PATHS.state, 'routing-recovery-grace.json');
+          const now = Date.now();
+          fs.writeFileSync(gracePath, JSON.stringify({
+            grantedAt: new Date(now).toISOString(),
+            expiresAt: new Date(now + 60 * 1000).toISOString(),
+            reason: 'user-correction-after-prior-defer-auth',
+            userPromptExcerpt: typeof prompt === 'string' ? prompt.slice(0, 300) : ''
+          }, null, 2));
+        } catch (_err) { /* fail-open */ }
+      }
-    if (result.intent === 'negative') {
-      gate.writeNoDeferPin({ source: result.match });
-      return { applied: true, intent: 'negative', match: result.match };
+      return {
+        applied: true,
+        intent: 'negative',
+        match: result.interpretation,
+        confidence: result.confidence,
+        standing: result.standing,
+        correctionGrace: priorAuthExisted
+      };
     }
-    if (result.intent === 'positive') {
+    if (result.intent === POSITIVE_INTENT) {
       gate.writeAuth({
-        scope: result.scope,
-        source: result.match,
-        grantedBy: 'user-prompt',
+        scope: result.scope || 'all',
+        source: result.interpretation,
+        userPromptExcerpt: typeof prompt === 'string' ? prompt.slice(0, 300) : '',
+        confidence: result.confidence,
+        grantedBy: 'ai-classifier',
         config
       });
-      return { applied: true, intent: 'positive', match: result.match, scope: result.scope };
+      return {
+        applied: true,
+        intent: 'positive',
+        match: result.interpretation,
+        scope: result.scope || 'all',
+        confidence: result.confidence
+      };
     }
-    return { applied: false, reason: 'unhandled-intent' };
+    return { applied: false, intent: result.intent, reason: 'none-intent' };
   } catch (err) {
-    if (process.env.DEBUG) console.error(`[deferral-classifier] applyClassification error (fail-open): ${err.message}`);
+    if (process.env.DEBUG) {
+      console.error(`[deferral-classifier] applyClassification error (fail-open): ${err.message}`);
+    }
     return { applied: false, reason: `error: ${err.message}` };
   }
 }
 module.exports = {
-  classifyDeferralIntent,
   applyClassification,
-  NEGATIVE_PATTERNS,
-  POSITIVE_PATTERNS
+  NEGATIVE_INTENT,
+  POSITIVE_INTENT
 };

package/scripts/hooks/core/deferral-gate.js CHANGED Viewed

@@ -121,17 +121,43 @@ function clearNoDeferPin() {
   try { fs.unlinkSync(getNoDeferPinPath()); } catch (_err) { /* fine if absent */ }
 }
-function writeAuth({ scope = 'all', source = 'unspecified', grantedBy = 'user-prompt', ttlSec, config } = {}) {
+/**
+ * wf-b8839d99 — Marker shape now captures the verbatim user prompt excerpt
+ * SEPARATELY from the AI's interpretation. Prior shape had only a single
+ * `source` string the AI could fill with anything, enabling the false-
+ * attribution failure ("user-authorized" with a fabricated quote).
+ *
+ * Fields:
+ *   source                — AI's structured interpretation (what it understood)
+ *   userPromptExcerpt     — Verbatim user message excerpt (≤300 chars)
+ *   confidence            — AI classifier confidence (0-100)
+ *   grantedBy             — One of: 'ai-classifier', 'explicit-cli', 'user-prompt' (legacy)
+ *   standing              — true if this represents a standing/permanent rule
+ *
+ * Auditors can compare `source` (AI claim) against `userPromptExcerpt`
+ * (actual user words) to detect over-interpretation.
+ */
+function writeAuth({
+  scope = 'all',
+  source = 'unspecified',
+  userPromptExcerpt = '',
+  confidence = 0,
+  grantedBy = 'user-prompt',
+  ttlSec,
+  config
+} = {}) {
   try {
     const ttl = Number.isFinite(ttlSec) ? ttlSec : getAuthTtlSeconds(config);
     const now = Date.now();
     const payload = {
-      version: 1,
+      version: 2,
       grantedAt: new Date(now).toISOString(),
       expiresAt: new Date(now + ttl * 1000).toISOString(),
       scope,
       grantedBy,
-      source: typeof source === 'string' ? source.slice(0, 1000) : 'unspecified'
+      source: typeof source === 'string' ? source.slice(0, 1000) : 'unspecified',
+      userPromptExcerpt: typeof userPromptExcerpt === 'string' ? userPromptExcerpt.slice(0, 500) : '',
+      confidence: Number.isFinite(confidence) ? Math.round(confidence) : 0
     };
     fs.mkdirSync(path.dirname(getAuthPath()), { recursive: true });
     const tmp = `${getAuthPath()}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
@@ -144,14 +170,32 @@ function writeAuth({ scope = 'all', source = 'unspecified', grantedBy = 'user-pr
   }
 }
-function writeNoDeferPin({ source = 'unspecified', ttlSec = 1800 } = {}) {
+function writeNoDeferPin({
+  source = 'unspecified',
+  userPromptExcerpt = '',
+  confidence = 0,
+  grantedBy = 'ai-classifier',
+  standing = false,
+  ttlSec
+} = {}) {
   try {
+    // wf-b8839d99: standing pins (e.g., "I don't like tech debt" as a rule)
+    // get a much longer TTL — 7 days — so a standing preference doesn't
+    // silently expire after 30 min and re-open the deferral door. The pin
+    // is also refreshed at SessionStart from decisions.md.
+    const effectiveTtl = Number.isFinite(ttlSec)
+      ? ttlSec
+      : (standing ? 7 * 24 * 3600 : 1800);
     const now = Date.now();
     const payload = {
-      version: 1,
+      version: 2,
       pinnedAt: new Date(now).toISOString(),
-      expiresAt: new Date(now + ttlSec * 1000).toISOString(),
-      source: typeof source === 'string' ? source.slice(0, 1000) : 'unspecified'
+      expiresAt: new Date(now + effectiveTtl * 1000).toISOString(),
+      source: typeof source === 'string' ? source.slice(0, 1000) : 'unspecified',
+      userPromptExcerpt: typeof userPromptExcerpt === 'string' ? userPromptExcerpt.slice(0, 500) : '',
+      confidence: Number.isFinite(confidence) ? Math.round(confidence) : 0,
+      grantedBy,
+      standing: Boolean(standing)
     };
     fs.mkdirSync(path.dirname(getNoDeferPinPath()), { recursive: true });
     const tmp = `${getNoDeferPinPath()}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
@@ -271,33 +315,100 @@ function checkWriteGate(filePath, newContentRaw, config) {
 }
 /**
- * Validate a Bash command against the deferral gate. Two-stage:
- *   Stage 1: does the command mention any target file basename?
- *   Stage 2: does the command also mention `deferred` literal substring?
- * If both → fail SAFE (block) unless we can parse the content and prove auth.
+ * Strip quoted regions + heredoc bodies from a Bash command so the structural
+ * regex below only sees actual shell tokens. Released v2.30.3 over-triggered
+ * because the previous regex matched markdown blockquote `> "text"` inside
+ * heredoc bodies of `gh release create --notes "$(cat <<'EOF'...EOF)"`.
+ *
+ * Best-effort: handles single-quoted, double-quoted, backtick, and heredoc
+ * patterns. Doesn't attempt full shell parsing.
+ */
+function stripQuotedContent(cmd) {
+  if (typeof cmd !== 'string') return '';
+  let stripped = cmd;
+  // Heredocs first (multiline) — replace body with a sentinel
+  stripped = stripped.replace(/<<-?\s*['"]?(\w+)['"]?[\s\S]*?\n\1\s*$/gm, ' <<HEREDOC>> ');
+  stripped = stripped.replace(/<<-?\s*['"]?(\w+)['"]?[\s\S]*?\n\1\b/g, ' <<HEREDOC>> ');
+  // Single-quoted strings
+  stripped = stripped.replace(/'[^']*'/g, "''");
+  // Backtick command substitution
+  stripped = stripped.replace(/`[^`]*`/g, '``');
+  // Double-quoted strings (allow escaped quotes inside)
+  stripped = stripped.replace(/"(?:[^"\\]|\\.)*"/g, '""');
+  return stripped;
+}
+/**
+ * Validate a Bash command against the deferral gate.
+ *
+ * wf-4a5b7a6f rewrite (2026-05-11): previously this used three independent
+ * regex checks AND'd together, which over-triggered on commands that merely
+ * REFERENCED the target file and the word "deferred" as text content
+ * (markdown blockquotes, commit messages, gh release notes). The
+ * `>\s*[^&|]` part of `mutates` matched markdown blockquote syntax inside
+ * heredocs. The bare-word `\bdeferred\b` part of `mentionsDeferral` matched
+ * any prose mention of "deferred".
  *
- * For v1 we don't deep-parse the bash command; we conservatively block any
- * Bash that touches a target file AND contains a deferral status literal,
- * pointing the AI at the Write/Edit path (which can be properly inspected).
+ * Fix:
+ *   1. Run the structural mutation check on a QUOTE-STRIPPED command —
+ *      a `>` inside `"..."` or `'...'` is not a shell redirect.
+ *   2. Tighten the mutation check to require the target file be the WRITE
+ *      DESTINATION, not merely mentioned anywhere.
+ *   3. Tighten deferral-content detection to the JSON-shape pattern only;
+ *      drop the bare-word match.
+ *
+ * If the AI tries to actually mutate the file via Bash with deferred
+ * content, the gate still catches it. Prose mentions pass through.
  */
 function checkBashGate(command, config) {
   try {
     if (!isGateEnabled(config)) return { blocked: false };
     if (typeof command !== 'string' || !command) return { blocked: false };
-    const mentionsTarget = /last-(review|audit)\.json/.test(command);
-    if (!mentionsTarget) return { blocked: false };
-    // Heuristic: only block when the command appears to MUTATE the file
-    // (writeFileSync, redirection >, sed -i, etc.). Pure reads (cat, jq, grep)
-    // are allowed.
-    const mutates = /(?:writeFileSync|>\s*[^&|]|>>\s*[^&|]|sed\s+-i|tee\s+|fs\.write|rename(?:Sync)?)/.test(command);
-    if (!mutates) return { blocked: false };
-    const mentionsDeferral = /\bdeferred[-_a-zA-Z0-9]*\b|"status"\s*:\s*"(deferred|wont-?fix|skipped|dismissed)/i.test(command);
+    // Step 1: strip quoted/heredoc content for the SHELL-LEVEL structural
+    // check (catches `>`, `tee` in actual shell positions, not inside markdown).
+    const stripped = stripQuotedContent(command);
+    // Step 2: detect a mutation operation targeting the review/audit file
+    // SPECIFICALLY. The patterns require the target file to be the WRITE
+    // DESTINATION — not merely mentioned. We test against BOTH the stripped
+    // command (catches shell-level redirects) AND the original command
+    // (catches in-language constructs like `node -e "fs.writeFileSync(...)"`
+    // where the JS payload is inside double-quotes and would be stripped).
+    // The patterns themselves are tight enough that running on the original
+    // doesn't re-introduce the prose-mention false positives — they require
+    // a write-verb token (writeFileSync, tee, etc.) IMMEDIATELY before the
+    // file path.
+    const writeToTargetPatterns = [
+      /(?:>>?|>\|)\s+['"]?[^\s'"`|&;]*last-(?:review|audit)\.json/,
+      /\btee\b(?:\s+-[a-zA-Z]+)*\s+['"]?[^\s'"`|&;]*last-(?:review|audit)\.json/,
+      /\b(?:fs\.)?writeFileSync\s*\(\s*[`'"][^`'"]*last-(?:review|audit)\.json/,
+      /\bfs\.write[A-Z][a-zA-Z]*\s*\(\s*[`'"][^`'"]*last-(?:review|audit)\.json/,
+      /\bsed\s+-i\b[^|;&]*\blast-(?:review|audit)\.json/,
+      /\b(?:mv|cp|rename(?:Sync)?)\s+\S+\s+['"]?[^\s'"`|&;]*last-(?:review|audit)\.json/
+    ];
+    const mutatesTarget = writeToTargetPatterns.some(re => re.test(stripped) || re.test(command));
+    if (!mutatesTarget) return { blocked: false };
+    // Step 3: check the ORIGINAL command for deferred-status content. We
+    // accept TWO signals:
+    //   - Quoted value: "deferred" / 'deferred' / `deferred` — JSON, JS,
+    //     template-literal styles.
+    //   - Bare word `\bdeferred\b` (or wont-?fix, skipped, dismissed) — fallback
+    //     for cases where escaping mangles the quote chars (e.g. shell-escaped
+    //     `\"deferred\"` inside a `node -e` payload where the quote becomes
+    //     non-adjacent to the word).
+    //
+    // The earlier false-positive case (prose mentions in release notes) is
+    // already closed by the tightened mutation check above — we only reach
+    // this step when the command demonstrably writes TO the target file.
+    // At that point, ANY mention of the deferral keyword is genuinely
+    // suspicious; the gate should err on the side of blocking.
+    const quotedDeferral = /['"`](deferred(?:[-_][a-zA-Z0-9]+)?|wont-?fix|won-?t-?fix|skipped|dismissed)['"`]/i;
+    const bareDeferral = /\b(deferred(?:[-_][a-zA-Z0-9]+)?|wont-?fix|won-?t-?fix|skipped|dismissed)\b/i;
+    const mentionsDeferral = quotedDeferral.test(command) || bareDeferral.test(command);
     if (!mentionsDeferral) return { blocked: false };
-    // We can't easily extract and validate the new content from arbitrary bash.
     // Check auth: if the user has authorized deferrals, allow. Otherwise block.
     const authResult = isAuthorized([{ id: 'unspecified' }]);
     if (authResult.authorized) return { blocked: false };
@@ -349,6 +460,7 @@ module.exports = {
   // Core checks
   checkWriteGate,
   checkBashGate,
+  stripQuotedContent,
   // Auth API (used by classifier + CLI helper)
   loadAuth,

package/scripts/hooks/core/no-defer-policy.js ADDED Viewed

@@ -0,0 +1,107 @@
+'use strict';
+/**
+ * Wogi Flow — Standing No-Defer Policy Refresh (wf-b8839d99)
+ *
+ * Reads `.workflow/state/decisions.md` at SessionStart for an explicit
+ * no-defer policy section and refreshes the no-defer-pin if found. This
+ * makes a user's standing preference ("I don't like tech debt", written
+ * via /wogi-decide into decisions.md) survive session boundaries instead
+ * of evaporating with the 7-day pin TTL.
+ *
+ * Recognized markers in decisions.md (case-insensitive, structured-section
+ * scan — NOT user-prompt parsing, so simple string match is acceptable):
+ *   - "## No-Deferral Policy" (or "### No-Deferral Policy")
+ *   - "## Anti-Tech-Debt Policy" / "### Anti-Tech-Debt Policy"
+ *   - Body must contain "active" / "enabled" / "enforced" (any of those)
+ *
+ * If found → write a fresh standing no-defer-pin with 30-day TTL. The pin
+ * carries `grantedBy: 'decisions-policy'` to distinguish it from per-prompt
+ * pins written by the AI classifier.
+ *
+ * Fail-open: any read/parse error → no action. Decisions.md is optional
+ * and many projects won't have a policy section.
+ */
+const fs = require('node:fs');
+const path = require('node:path');
+const { PATHS } = require('../../flow-utils');
+const POLICY_HEADER_PATTERNS = [
+  /^#{2,3}\s+No-?Deferr?al\s+Policy\b/im,
+  /^#{2,3}\s+Anti-?Tech-?Debt\s+Policy\b/im
+];
+const ACTIVE_MARKERS = /\b(active|enabled|enforced)\b/i;
+const POLICY_PIN_TTL_SEC = 30 * 24 * 3600; // 30 days
+/**
+ * Check decisions.md for a no-defer policy section.
+ *
+ * @returns {{ active: boolean, header?: string, snippet?: string }}
+ */
+function detectPolicy() {
+  try {
+    const decisionsPath = path.join(PATHS.state, 'decisions.md');
+    if (!fs.existsSync(decisionsPath)) return { active: false };
+    const content = fs.readFileSync(decisionsPath, 'utf-8');
+    if (typeof content !== 'string' || content.length === 0) return { active: false };
+    for (const re of POLICY_HEADER_PATTERNS) {
+      const m = content.match(re);
+      if (!m) continue;
+      // Found a header — check the next ~500 chars for an "active" marker.
+      const startIdx = m.index || 0;
+      const window = content.slice(startIdx, startIdx + 500);
+      if (ACTIVE_MARKERS.test(window)) {
+        return {
+          active: true,
+          header: m[0].trim(),
+          snippet: window.slice(0, 200).trim()
+        };
+      }
+    }
+    return { active: false };
+  } catch (_err) {
+    return { active: false };
+  }
+}
+/**
+ * Refresh the no-defer pin if decisions.md has an active policy.
+ *
+ * @returns {{ refreshed: boolean, reason?: string }}
+ */
+function refreshFromPolicy() {
+  try {
+    const policy = detectPolicy();
+    if (!policy.active) {
+      return { refreshed: false, reason: 'no-active-policy' };
+    }
+    const gate = require('./deferral-gate');
+    gate.writeNoDeferPin({
+      source: `Standing policy in decisions.md: ${policy.header}`,
+      userPromptExcerpt: policy.snippet || '',
+      confidence: 100,
+      grantedBy: 'decisions-policy',
+      standing: true,
+      ttlSec: POLICY_PIN_TTL_SEC
+    });
+    return { refreshed: true, header: policy.header };
+  } catch (err) {
+    if (process.env.DEBUG) {
+      console.error(`[no-defer-policy] refreshFromPolicy error (fail-open): ${err.message}`);
+    }
+    return { refreshed: false, reason: `error: ${err.message}` };
+  }
+}
+module.exports = {
+  detectPolicy,
+  refreshFromPolicy,
+  POLICY_PIN_TTL_SEC,
+  POLICY_HEADER_PATTERNS,
+  ACTIVE_MARKERS
+};

package/scripts/hooks/entry/claude-code/session-start.js CHANGED Viewed

@@ -78,6 +78,20 @@ runHook('SessionStart', async ({ parsedInput }) => {
   await bridgeSyncPromise;
   _bootMark('after bridge sync');
+  // wf-b8839d99: Refresh standing no-defer pin from decisions.md if a policy
+  // section is present. Fail-open — never blocks session start.
+  try {
+    const { refreshFromPolicy } = require('../../core/no-defer-policy');
+    const r = refreshFromPolicy();
+    if (r.refreshed && process.env.DEBUG) {
+      console.error(`[session-start] Refreshed no-defer pin from policy: ${r.header}`);
+    }
+  } catch (err) {
+    if (process.env.DEBUG) {
+      console.error(`[session-start] no-defer policy refresh failed: ${err.message}`);
+    }
+  }
   // CLAUDE.md drift detection — check if manually edited since last sync
   let driftDetected = false;
   let driftMarkerMissing = false;

package/scripts/hooks/entry/claude-code/stop.js CHANGED Viewed

@@ -31,12 +31,35 @@ runHook('Stop', async ({ parsedInput }) => {
     longInputActive = isLongInputPending();
   } catch (_err) { /* fail-open */ }
+  // wf-b8839d99 fix #5 — Routing-recovery grace window. If the user just
+  // corrected a prior AI defer-auth ("I did not authorize..."), the deferral
+  // classifier wrote a 60-second grace marker. During that window, the AI
+  // should be able to undo/revoke without bouncing through /wogi-start first.
+  // Routing-enforcement softens to a single warning instead of hard-blocking.
+  let recoveryGraceActive = false;
+  try {
+    const fs = require('node:fs');
+    const path = require('node:path');
+    const { PATHS } = require('../../../flow-utils');
+    const gracePath = path.join(PATHS.state, 'routing-recovery-grace.json');
+    if (fs.existsSync(gracePath)) {
+      const raw = fs.readFileSync(gracePath, 'utf-8');
+      const data = JSON.parse(raw);
+      if (data?.expiresAt && Date.parse(data.expiresAt) > Date.now()) {
+        recoveryGraceActive = true;
+      } else {
+        // Expired — clean up
+        try { fs.unlinkSync(gracePath); } catch (_err) { /* fine */ }
+      }
+    }
+  } catch (_err) { /* fail-open */ }
   // v6.2: Routing enforcement check — catches text-only response bypass
   // If routing-pending flag is still set when the AI tries to stop, it means
   // the AI responded to the user's message without ever invoking a /wogi-* command.
   // This is the exact bypass we need to prevent (especially after context compaction).
   try {
-    if (isRoutingPending() && !longInputActive) {
+    if (isRoutingPending() && !longInputActive && !recoveryGraceActive) {
       // Use counter-based approach instead of clearing immediately.
       // This gives the AI multiple chances to comply before giving up.
       // Gap 4 fix: clearing immediately made this single-shot protection.

package/scripts/hooks/entry/claude-code/user-prompt-submit.js CHANGED Viewed

@@ -100,18 +100,22 @@ runHook('UserPromptSubmit', async ({ input, parsedInput }) => {
     }
   }
-  // wf-f9912af6: Deferral-intent classifier — detect explicit defer/no-defer
-  // phrases in the user's prompt and write/clear the auth marker accordingly.
-  // Negative intent ("fix everything", "no deferrals", "I don't want tech debt")
-  // hard-pins the gate to block all deferrals; positive intent ("defer X",
-  // "fix critical only", "ship as-is") writes a time-limited auth marker.
-  // Fail-open throughout.
+  // wf-b8839d99 (replaces wf-f9912af6 regex classifier): AI-based deferral-
+  // intent classifier. Calls Haiku to interpret the user's prompt. NEGATIVE
+  // ("fix all", "I don't like tech debt", any phrasing) writes a no-defer-pin;
+  // POSITIVE ("defer F5", "option 2", "ship as-is") writes a scoped auth
+  // marker. The marker now captures the verbatim user excerpt SEPARATELY from
+  // the AI's interpretation — ending the false-attribution failure shape.
+  // Fail-open throughout: classifier errors / missing API key → no state
+  // change (status quo holds; gate's default-restrictive behavior preserved).
   if (typeof prompt === 'string' && prompt.trim().length > 0) {
     try {
       const { applyClassification } = require('../../core/deferral-classifier');
-      const r = applyClassification(prompt, hookConfig);
+      const r = await applyClassification(prompt, hookConfig);
       if (r.applied && process.env.DEBUG) {
-        console.error(`[Hook] Deferral classifier: intent=${r.intent}, match="${r.match}"`);
+        console.error(`[Hook] Deferral classifier (AI): intent=${r.intent}, confidence=${r.confidence}, standing=${r.standing}, scope=${JSON.stringify(r.scope)}`);
+      } else if (process.env.DEBUG && r.reason) {
+        console.error(`[Hook] Deferral classifier (AI): no-op — ${r.reason}`);
       }
     } catch (err) {
       if (process.env.DEBUG) {