npm - wogiflow - Versions diffs - 2.31.2 → 2.32.0 - Mend

wogiflow 2.31.2 → 2.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.claude/commands/wogi-start.md +4 -1
package/package.json +1 -1
package/scripts/hooks/adapters/claude-code.js +3 -0
package/scripts/hooks/core/research-required-classifier.js +73 -17
package/scripts/hooks/core/research-required-gate.js +16 -6
package/scripts/hooks/core/user-prompt-orchestrator.js +10 -3

package/.claude/commands/wogi-start.md CHANGED Viewed

@@ -135,10 +135,13 @@ When a local `/wogi-*` CLI command fails (error in output, "Unknown skill", comm
 | Tier | Marker phrases | What you do |
 |------|---------------|-------------|
-| **Tier 1 — Factual** | "what is", "how many", "show me", "list all", "which file", "where does" | Answer directly from code/docs. No gate. |
+| **Tier 1a — Generic factual** | "what is a/an &lt;concept&gt;", "what does &lt;general term&gt; mean", "how many &lt;X&gt; in a &lt;Y&gt;" — general knowledge, NOT about this project | Answer directly. No gate. |
+| **Tier 1b — Project-specific factual / locational** | "where is X (configured/stored/saved/defined)", "which file/module/function handles Y", "how does the &lt;this project's X&gt; work", "show me the &lt;project content&gt;", "list all the &lt;project things&gt;" | **MUST run Read/Grep/Glob against the actual codebase FIRST. Your answer MUST cite the file:line(s) you read. NO "Tier 1 → answer directly" shortcut.** Grep if you don't know where to look. Enforced mechanically by `research-required-gate` at Stop hook + an upfront nudge at UserPromptSubmit. wf-1bcc67d5. |
 | **Tier 2 — Domain** (default for ambiguous) | "what should", "how should", "recommend", "which approach", "what do you think about", "is it better to" | **Surface assumptions, then WAIT.** |
 | **Tier 3 — Architecture** | "should we restructure", "what's the right architecture", "design a schema", "how to migrate", "should we split / merge / replace" | Tier 2 flow + spawn adversary on a different model after recommendation. |
+> **Why Tier 1b exists** (wf-1bcc67d5): a confident model treats "answer directly from code/docs" as license to answer from its *prior* — pattern-matching "where do secrets go" to "use a .env file" — and never opens a file. In the wogiflow-cli incident (2026-05-12) the model did exactly this, doubled down twice under pushback, and only grepped on the third correction — by which point it had contradicted committed work and proposed a storage location the CLI doesn't even read. The fix: locational/project-factual questions are gated like diagnostic ones. **There is no path where you assert "X lives at Y" in this project without having opened a file that proves it.**
 **Tier 2 flow — the user is the adversary**:
 1. Before any analysis, identify the domain-model assumptions your answer will depend on (typically 2–5).
 2. Present them in a fenced block and STOP:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wogiflow",
-  "version": "2.31.2",
+  "version": "2.32.0",
   "description": "AI-powered development workflow management system with multi-model support",
   "main": "lib/index.js",
   "bin": {

package/scripts/hooks/adapters/claude-code.js CHANGED Viewed

@@ -446,6 +446,9 @@ Run: /wogi-start ${coreResult.nextTaskId}`;
       else if (coreResult.message) pieces.push(coreResult.message);
     }
     // Info pieces always pass through.
+    // wf-1bcc67d5: research-required nudge — placed FIRST among info pieces
+    // because it's a "do this BEFORE you answer" instruction, not background.
+    if (coreResult.researchRequiredNudge) pieces.push(coreResult.researchRequiredNudge);
     if (coreResult.phasePrompt) pieces.push(coreResult.phasePrompt);
     if (coreResult.overduePrompt) pieces.push(coreResult.overduePrompt);

package/scripts/hooks/core/research-required-classifier.js CHANGED Viewed

@@ -56,14 +56,39 @@ const DIAGNOSTIC_PATTERNS = [
   /\bdo\s+you\s+(think|recommend|suggest)\b/i,
 ];
-// Factual markers — Tier 1 (no marker, AI can answer from code/docs)
+// Generic-factual markers — Tier 1a (no marker; answerable from general
+// knowledge). NARROWED from the prior FACTUAL_PATTERNS: "where is X",
+// "which file", "show me", "list all" all MOVED to LOCATIONAL_PATTERNS
+// below because in a project context they are almost always asking about
+// THIS codebase and require a Read first. See wf-1bcc67d5 (the wogiflow-cli
+// "where do API keys get saved" incident — model answered from prior,
+// doubled down twice, before finally grepping).
 const FACTUAL_PATTERNS = [
-  /^\s*what\s+is\b/i,
-  /^\s*where\s+(is|does|are)\b/i,
-  /^\s*how\s+many\b/i,
-  /^\s*show\s+me\b/i,
-  /^\s*list\s+(all|the)\b/i,
-  /^\s*which\s+file\b/i,
+  /^\s*what\s+is\s+(a|an)\b/i,            // "what is a closure" — conceptual
+  /^\s*what\s+does\s+\w+\s+mean\b/i,      // "what does idempotent mean"
+  /^\s*how\s+many\s+\w+\s+(are\s+in|in)\s+a\b/i, // "how many days in a year" — generic
+];
+// Locational / project-specific-factual markers — Tier 1b (WRITES the
+// evidence marker, same as diagnostic). "Where is X configured?", "which
+// file handles Y?", "how does the deferral gate work?" — these are
+// answerable ONLY by reading this codebase. The model MUST Read/Grep/Glob
+// first and cite what it read. No "Tier 1 → answer directly" shortcut.
+// wf-1bcc67d5.
+const LOCATIONAL_PATTERNS = [
+  /\bwhere\s+(is|are|do|does|did|should|would|can)\b/i,           // where is X / where are the keys / where does X get saved
+  /\bwhich\s+(file|module|function|component|class|method|hook|gate|script|test|directory|folder|package)\b/i,
+  /\bwhat\s+(file|module|function|class|hook|gate|script|component)\s+(handles?|does|is|contains?|defines?)\b/i,
+  /\bwhat\s+is\s+(responsible\s+for|the\s+\w+\s+(file|module|gate|hook)\s+for)\b/i,
+  // "how does the deferral gate work" — allow multiple words between the
+  // determiner and the action verb (lazy [\s\w-]*?). Covers 1+ noun phrases.
+  /\bhow\s+(does|do|did)\s+(the|this|our|its?|wogiflow|a|an)\b[\s\w-]*?\s+(work|works|worked|happen|behave|operate|function|run|fire|trigger|get\s+\w+)\b/i,
+  // "how is the routing flag configured" — same lazy gap
+  /\bhow\s+(is|are|was|were|does|do)\s+[\s\w-]*?\b(configured|wired|stored|set\s+up|implemented|handled|loaded|registered|defined|saved|kept|read|written|persisted|injected)\b/i,
+  /^\s*(show\s+me\s+(the|all|how|where)|list\s+(all|the))\b/i,    // show me the routes / list all the gates — project enumeration
+  /\bis\s+there\s+(a|an|any)\s+\w+\s+(in\s+(this|the)\s+(project|codebase|repo|code)|here)\b/i,
+  /\b(in\s+this\s+(project|codebase|repo|code))\b.*\b(where|how|which|what)\b/i,
+  /\b(where|how|which|what)\b.*\b(in\s+this\s+(project|codebase|repo|code))\b/i,
 ];
 // Command markers — task IDs, imperatives, follow-ups
@@ -96,12 +121,21 @@ function getMaxAttempts(config) {
 }
 /**
- * Classify a user prompt into command / factual / diagnostic.
+ * Classify a user prompt into command / factual / locational / diagnostic.
+ *
+ * Order matters: override > command > generic-factual > locational > diagnostic > default(none).
  *
- * Order matters: override > command > factual > diagnostic > default(none).
+ * `locational` and `diagnostic` BOTH write the evidence marker — the Stop
+ * hook then requires Read/Grep/Glob calls before the answer is accepted.
+ * `command` and `factual` and `none` do NOT write the marker.
+ *
+ * The generic-factual check is intentionally NARROW (only "what is a/an
+ * <concept>", "what does X mean", "how many X in a Y"). Anything that
+ * smells project-specific — "where is X", "which file/module", "how does
+ * the X work" — falls through to `locational` and is gated. See wf-1bcc67d5.
  *
  * @param {string} prompt
- * @returns {{ category: 'command'|'factual'|'diagnostic'|'none', match?: string, overridden?: boolean }}
+ * @returns {{ category: 'command'|'factual'|'locational'|'diagnostic'|'none', match?: string, overridden?: boolean }}
  */
 function classifyPrompt(prompt) {
   if (typeof prompt !== 'string') return { category: 'none' };
@@ -119,13 +153,19 @@ function classifyPrompt(prompt) {
     if (m) return { category: 'command', match: m[0] };
   }
-  // Factual next
+  // Generic-factual next (NARROW — only truly-conceptual questions)
   for (const rx of FACTUAL_PATTERNS) {
     const m = trimmed.match(rx);
     if (m) return { category: 'factual', match: m[0] };
   }
-  // Diagnostic last
+  // Locational / project-specific-factual — gated (writes marker)
+  for (const rx of LOCATIONAL_PATTERNS) {
+    const m = trimmed.match(rx);
+    if (m) return { category: 'locational', match: m[0] };
+  }
+  // Diagnostic — gated (writes marker)
   for (const rx of DIAGNOSTIC_PATTERNS) {
     const m = trimmed.match(rx);
     if (m) return { category: 'diagnostic', match: m[0] };
@@ -134,23 +174,32 @@ function classifyPrompt(prompt) {
   return { category: 'none' };
 }
+// Both 'diagnostic' AND 'locational' write the evidence marker. wf-1bcc67d5.
+const GATED_CATEGORIES = new Set(['diagnostic', 'locational']);
 /**
  * Apply classification — write or skip the marker. Fail-open.
+ *
+ * @returns {{ applied: boolean, category?: string, match?: string,
+ *             requiredEvidence?: number, nudge?: string, reason?: string }}
+ *   On a gated category, `nudge` is a short upfront reminder string the
+ *   UserPromptSubmit orchestrator can surface as additionalContext so the
+ *   model is told to Read BEFORE answering — not just re-prompted at Stop.
  */
 function applyClassification(prompt, config) {
   try {
     if (!isClassifierEnabled(config)) return { applied: false, reason: 'classifier-disabled' };
     const result = classifyPrompt(prompt);
-    if (result.category !== 'diagnostic') {
-      return { applied: false, category: result.category, reason: 'not-diagnostic' };
+    if (!GATED_CATEGORIES.has(result.category)) {
+      return { applied: false, category: result.category, reason: 'not-gated' };
     }
     const requiredEvidence = getRequiredEvidence(config);
     const payload = {
       version: 1,
       classifiedAt: new Date().toISOString(),
-      category: 'diagnostic',
+      category: result.category,
       match: result.match,
       requiredEvidence,
       attemptCount: 0
@@ -159,7 +208,12 @@ function applyClassification(prompt, config) {
     const tmp = `${getMarkerPath()}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
     fs.writeFileSync(tmp, JSON.stringify(payload, null, 2));
     fs.renameSync(tmp, getMarkerPath());
-    return { applied: true, category: 'diagnostic', match: result.match, requiredEvidence };
+    const nudge = result.category === 'locational'
+      ? `[research-required] This is a project-specific locational question (matched "${result.match}"). Before answering, run Read/Grep/Glob against the actual codebase — do NOT answer from prior knowledge or industry defaults. Your answer MUST cite the file:line(s) you read. (wf-1bcc67d5: a confident model answering "where does X live" from memory, doubling down, is the exact failure this gate exists to stop.)`
+      : `[research-required] This is a diagnostic question (matched "${result.match}"). Read at least ${requiredEvidence} relevant evidence files before answering; cite them.`;
+    return { applied: true, category: result.category, match: result.match, requiredEvidence, nudge };
   } catch (err) {
     if (process.env.DEBUG) {
       console.error(`[research-required-classifier] applyClassification error (fail-open): ${err.message}`);
@@ -205,5 +259,7 @@ module.exports = {
   OVERRIDE_PREFIX,
   DIAGNOSTIC_PATTERNS,
   FACTUAL_PATTERNS,
-  COMMAND_PATTERNS
+  LOCATIONAL_PATTERNS,
+  COMMAND_PATTERNS,
+  GATED_CATEGORIES
 };

package/scripts/hooks/core/research-required-gate.js CHANGED Viewed

@@ -196,23 +196,33 @@ function checkResearchRequiredGate(opts = {}) {
       };
     }
+    // wf-1bcc67d5: the marker now carries category 'diagnostic' OR 'locational'.
+    // For locational ("where does X live", "which file handles Y"), the message
+    // is sharper — answering from prior knowledge is the precise failure shape.
+    const isLocational = marker.category === 'locational';
+    const kind = isLocational ? 'project-specific locational question' : 'diagnostic question';
     return {
       blocked: true,
       hardStop: false,
       evidenceCount,
       requiredEvidence,
       message:
-        `RESEARCH-REQUIRED VIOLATION: the user asked a diagnostic question (matched "${marker.match}") ` +
+        `RESEARCH-REQUIRED VIOLATION: the user asked a ${kind} (matched "${marker.match}") ` +
         `but you produced an answer with only ${evidenceCount} evidence read${evidenceCount === 1 ? '' : 's'} ` +
         `(minimum required: ${requiredEvidence}).\n\n` +
+        (isLocational
+          ? `You answered "${kind === 'project-specific locational question' ? 'where/which/how X works in this project' : '...'}" WITHOUT opening a file. That is the exact failure wf-1bcc67d5 exists to stop: a confident model pattern-matching to industry defaults instead of checking THIS codebase, then doubling down. Do NOT answer from prior knowledge.\n\n`
+          : '') +
         `Re-do this turn:\n` +
-        `  1. Identify the relevant code/state files for the question.\n` +
-        `  2. Read at least ${requiredEvidence} of them via the Read tool. Bash with cat/head/grep/rg ` +
+        `  1. Identify the relevant code/state files for the question (grep first if you're not sure where).\n` +
+        `  2. Read at least ${requiredEvidence} of them via the Read tool. Bash with cat/head/grep/rg/Glob/Grep ` +
         `against evidence paths also counts.\n` +
-        `  3. THEN answer with citations (file:line where appropriate).\n\n` +
+        `  3. THEN answer — and the answer MUST cite the file:line(s) you actually read. An uncited answer to a ` +
+        `${kind} is not acceptable.\n\n` +
         `Evidence prefixes that count: .workflow/state/, .workflow/changes/, lib/, scripts/, src/, tests/, app/.\n\n` +
-        `If you genuinely don't know which files to read, ask the user via \`flow ask "<question>"\`.\n` +
-        `If you believe this is a factual question that doesn't need research, the user can prefix their ` +
+        `If you genuinely cannot find the relevant files after grepping, say so explicitly and ask the user via ` +
+        `\`flow ask "<question>"\` — do NOT guess.\n` +
+        `If this is genuinely a generic-knowledge question (not about this project), the user can prefix their ` +
         `next prompt with \`!\` to skip the gate.\n\n` +
         `Attempt ${next.attemptCount}/${maxAttempts}.`
     };

package/scripts/hooks/core/user-prompt-orchestrator.js CHANGED Viewed

@@ -70,13 +70,19 @@ async function orchestrateUserPromptSubmit({ input, parsedInput }) {
     }
   }
-  // wf-5cd71b1f: Research-required classifier
+  // wf-5cd71b1f + wf-1bcc67d5: Research-required classifier. Now catches
+  // 'locational' (project-specific "where is X / which file / how does the
+  // X work") in addition to 'diagnostic'. Both write the evidence marker
+  // (Stop-hook backstop) AND surface an upfront nudge so the model is told
+  // to Read BEFORE answering — not just re-prompted after.
+  let researchRequiredNudge = null;
   if (typeof prompt === 'string' && prompt.trim().length > 0) {
     try {
       const { applyClassification: applyResearchClassification } = require('./research-required-classifier');
       const r = applyResearchClassification(prompt, hookConfig);
-      if (r.applied && process.env.DEBUG) {
-        console.error(`[Hook] Research-required classifier: category=${r.category}, match="${r.match}"`);
+      if (r.applied) {
+        if (r.nudge) researchRequiredNudge = r.nudge;
+        if (process.env.DEBUG) console.error(`[Hook] Research-required classifier: category=${r.category}, match="${r.match}"`);
       }
     } catch (err) {
       if (process.env.DEBUG) console.error(`[Hook] Research-required classifier failed: ${err.message}`);
@@ -168,6 +174,7 @@ async function orchestrateUserPromptSubmit({ input, parsedInput }) {
   }
   if (phasePrompt) coreResult = { ...coreResult, phasePrompt };
+  if (researchRequiredNudge) coreResult = { ...coreResult, researchRequiredNudge };
   // wf-d3e67abe — overdue workspace dispatches
   try {