wogiflow 2.31.2 → 2.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -135,10 +135,13 @@ When a local `/wogi-*` CLI command fails (error in output, "Unknown skill", comm
135
135
 
136
136
  | Tier | Marker phrases | What you do |
137
137
  |------|---------------|-------------|
138
- | **Tier 1Factual** | "what is", "how many", "show me", "list all", "which file", "where does" | Answer directly from code/docs. No gate. |
138
+ | **Tier 1aGeneric factual** | "what is a/an <concept>", "what does <general term> mean", "how many <X> in a <Y>" — general knowledge, NOT about this project | Answer directly. No gate. |
139
+ | **Tier 1b — Project-specific factual / locational** | "where is X (configured/stored/saved/defined)", "which file/module/function handles Y", "how does the <this project's X> work", "show me the <project content>", "list all the <project things>" | **MUST run Read/Grep/Glob against the actual codebase FIRST. Your answer MUST cite the file:line(s) you read. NO "Tier 1 → answer directly" shortcut.** Grep if you don't know where to look. Enforced mechanically by `research-required-gate` at Stop hook + an upfront nudge at UserPromptSubmit. wf-1bcc67d5. |
139
140
  | **Tier 2 — Domain** (default for ambiguous) | "what should", "how should", "recommend", "which approach", "what do you think about", "is it better to" | **Surface assumptions, then WAIT.** |
140
141
  | **Tier 3 — Architecture** | "should we restructure", "what's the right architecture", "design a schema", "how to migrate", "should we split / merge / replace" | Tier 2 flow + spawn adversary on a different model after recommendation. |
141
142
 
143
+ > **Why Tier 1b exists** (wf-1bcc67d5): a confident model treats "answer directly from code/docs" as license to answer from its *prior* — pattern-matching "where do secrets go" to "use a .env file" — and never opens a file. In the wogiflow-cli incident (2026-05-12) the model did exactly this, doubled down twice under pushback, and only grepped on the third correction — by which point it had contradicted committed work and proposed a storage location the CLI doesn't even read. The fix: locational/project-factual questions are gated like diagnostic ones. **There is no path where you assert "X lives at Y" in this project without having opened a file that proves it.**
144
+
142
145
  **Tier 2 flow — the user is the adversary**:
143
146
  1. Before any analysis, identify the domain-model assumptions your answer will depend on (typically 2–5).
144
147
  2. Present them in a fenced block and STOP:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wogiflow",
3
- "version": "2.31.2",
3
+ "version": "2.32.0",
4
4
  "description": "AI-powered development workflow management system with multi-model support",
5
5
  "main": "lib/index.js",
6
6
  "bin": {
@@ -446,6 +446,9 @@ Run: /wogi-start ${coreResult.nextTaskId}`;
446
446
  else if (coreResult.message) pieces.push(coreResult.message);
447
447
  }
448
448
  // Info pieces always pass through.
449
+ // wf-1bcc67d5: research-required nudge — placed FIRST among info pieces
450
+ // because it's a "do this BEFORE you answer" instruction, not background.
451
+ if (coreResult.researchRequiredNudge) pieces.push(coreResult.researchRequiredNudge);
449
452
  if (coreResult.phasePrompt) pieces.push(coreResult.phasePrompt);
450
453
  if (coreResult.overduePrompt) pieces.push(coreResult.overduePrompt);
451
454
 
@@ -56,14 +56,39 @@ const DIAGNOSTIC_PATTERNS = [
56
56
  /\bdo\s+you\s+(think|recommend|suggest)\b/i,
57
57
  ];
58
58
 
59
- // Factual markers — Tier 1 (no marker, AI can answer from code/docs)
59
+ // Generic-factual markers — Tier 1a (no marker; answerable from general
60
+ // knowledge). NARROWED from the prior FACTUAL_PATTERNS: "where is X",
61
+ // "which file", "show me", "list all" all MOVED to LOCATIONAL_PATTERNS
62
+ // below because in a project context they are almost always asking about
63
+ // THIS codebase and require a Read first. See wf-1bcc67d5 (the wogiflow-cli
64
+ // "where do API keys get saved" incident — model answered from prior,
65
+ // doubled down twice, before finally grepping).
60
66
  const FACTUAL_PATTERNS = [
61
- /^\s*what\s+is\b/i,
62
- /^\s*where\s+(is|does|are)\b/i,
63
- /^\s*how\s+many\b/i,
64
- /^\s*show\s+me\b/i,
65
- /^\s*list\s+(all|the)\b/i,
66
- /^\s*which\s+file\b/i,
67
+ /^\s*what\s+is\s+(a|an)\b/i, // "what is a closure" — conceptual
68
+ /^\s*what\s+does\s+\w+\s+mean\b/i, // "what does idempotent mean"
69
+ /^\s*how\s+many\s+\w+\s+(are\s+in|in)\s+a\b/i, // "how many days in a year" — generic
70
+ ];
71
+
72
+ // Locational / project-specific-factual markers — Tier 1b (WRITES the
73
+ // evidence marker, same as diagnostic). "Where is X configured?", "which
74
+ // file handles Y?", "how does the deferral gate work?" — these are
75
+ // answerable ONLY by reading this codebase. The model MUST Read/Grep/Glob
76
+ // first and cite what it read. No "Tier 1 → answer directly" shortcut.
77
+ // wf-1bcc67d5.
78
+ const LOCATIONAL_PATTERNS = [
79
+ /\bwhere\s+(is|are|do|does|did|should|would|can)\b/i, // where is X / where are the keys / where does X get saved
80
+ /\bwhich\s+(file|module|function|component|class|method|hook|gate|script|test|directory|folder|package)\b/i,
81
+ /\bwhat\s+(file|module|function|class|hook|gate|script|component)\s+(handles?|does|is|contains?|defines?)\b/i,
82
+ /\bwhat\s+is\s+(responsible\s+for|the\s+\w+\s+(file|module|gate|hook)\s+for)\b/i,
83
+ // "how does the deferral gate work" — allow multiple words between the
84
+ // determiner and the action verb (lazy [\s\w-]*?). Covers 1+ noun phrases.
85
+ /\bhow\s+(does|do|did)\s+(the|this|our|its?|wogiflow|a|an)\b[\s\w-]*?\s+(work|works|worked|happen|behave|operate|function|run|fire|trigger|get\s+\w+)\b/i,
86
+ // "how is the routing flag configured" — same lazy gap
87
+ /\bhow\s+(is|are|was|were|does|do)\s+[\s\w-]*?\b(configured|wired|stored|set\s+up|implemented|handled|loaded|registered|defined|saved|kept|read|written|persisted|injected)\b/i,
88
+ /^\s*(show\s+me\s+(the|all|how|where)|list\s+(all|the))\b/i, // show me the routes / list all the gates — project enumeration
89
+ /\bis\s+there\s+(a|an|any)\s+\w+\s+(in\s+(this|the)\s+(project|codebase|repo|code)|here)\b/i,
90
+ /\b(in\s+this\s+(project|codebase|repo|code))\b.*\b(where|how|which|what)\b/i,
91
+ /\b(where|how|which|what)\b.*\b(in\s+this\s+(project|codebase|repo|code))\b/i,
67
92
  ];
68
93
 
69
94
  // Command markers — task IDs, imperatives, follow-ups
@@ -96,12 +121,21 @@ function getMaxAttempts(config) {
96
121
  }
97
122
 
98
123
  /**
99
- * Classify a user prompt into command / factual / diagnostic.
124
+ * Classify a user prompt into command / factual / locational / diagnostic.
125
+ *
126
+ * Order matters: override > command > generic-factual > locational > diagnostic > default(none).
100
127
  *
101
- * Order matters: override > command > factual > diagnostic > default(none).
128
+ * `locational` and `diagnostic` BOTH write the evidence marker the Stop
129
+ * hook then requires Read/Grep/Glob calls before the answer is accepted.
130
+ * `command` and `factual` and `none` do NOT write the marker.
131
+ *
132
+ * The generic-factual check is intentionally NARROW (only "what is a/an
133
+ * <concept>", "what does X mean", "how many X in a Y"). Anything that
134
+ * smells project-specific — "where is X", "which file/module", "how does
135
+ * the X work" — falls through to `locational` and is gated. See wf-1bcc67d5.
102
136
  *
103
137
  * @param {string} prompt
104
- * @returns {{ category: 'command'|'factual'|'diagnostic'|'none', match?: string, overridden?: boolean }}
138
+ * @returns {{ category: 'command'|'factual'|'locational'|'diagnostic'|'none', match?: string, overridden?: boolean }}
105
139
  */
106
140
  function classifyPrompt(prompt) {
107
141
  if (typeof prompt !== 'string') return { category: 'none' };
@@ -119,13 +153,19 @@ function classifyPrompt(prompt) {
119
153
  if (m) return { category: 'command', match: m[0] };
120
154
  }
121
155
 
122
- // Factual next
156
+ // Generic-factual next (NARROW — only truly-conceptual questions)
123
157
  for (const rx of FACTUAL_PATTERNS) {
124
158
  const m = trimmed.match(rx);
125
159
  if (m) return { category: 'factual', match: m[0] };
126
160
  }
127
161
 
128
- // Diagnostic last
162
+ // Locational / project-specific-factual — gated (writes marker)
163
+ for (const rx of LOCATIONAL_PATTERNS) {
164
+ const m = trimmed.match(rx);
165
+ if (m) return { category: 'locational', match: m[0] };
166
+ }
167
+
168
+ // Diagnostic — gated (writes marker)
129
169
  for (const rx of DIAGNOSTIC_PATTERNS) {
130
170
  const m = trimmed.match(rx);
131
171
  if (m) return { category: 'diagnostic', match: m[0] };
@@ -134,23 +174,32 @@ function classifyPrompt(prompt) {
134
174
  return { category: 'none' };
135
175
  }
136
176
 
177
+ // Both 'diagnostic' AND 'locational' write the evidence marker. wf-1bcc67d5.
178
+ const GATED_CATEGORIES = new Set(['diagnostic', 'locational']);
179
+
137
180
  /**
138
181
  * Apply classification — write or skip the marker. Fail-open.
182
+ *
183
+ * @returns {{ applied: boolean, category?: string, match?: string,
184
+ * requiredEvidence?: number, nudge?: string, reason?: string }}
185
+ * On a gated category, `nudge` is a short upfront reminder string the
186
+ * UserPromptSubmit orchestrator can surface as additionalContext so the
187
+ * model is told to Read BEFORE answering — not just re-prompted at Stop.
139
188
  */
140
189
  function applyClassification(prompt, config) {
141
190
  try {
142
191
  if (!isClassifierEnabled(config)) return { applied: false, reason: 'classifier-disabled' };
143
192
 
144
193
  const result = classifyPrompt(prompt);
145
- if (result.category !== 'diagnostic') {
146
- return { applied: false, category: result.category, reason: 'not-diagnostic' };
194
+ if (!GATED_CATEGORIES.has(result.category)) {
195
+ return { applied: false, category: result.category, reason: 'not-gated' };
147
196
  }
148
197
 
149
198
  const requiredEvidence = getRequiredEvidence(config);
150
199
  const payload = {
151
200
  version: 1,
152
201
  classifiedAt: new Date().toISOString(),
153
- category: 'diagnostic',
202
+ category: result.category,
154
203
  match: result.match,
155
204
  requiredEvidence,
156
205
  attemptCount: 0
@@ -159,7 +208,12 @@ function applyClassification(prompt, config) {
159
208
  const tmp = `${getMarkerPath()}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
160
209
  fs.writeFileSync(tmp, JSON.stringify(payload, null, 2));
161
210
  fs.renameSync(tmp, getMarkerPath());
162
- return { applied: true, category: 'diagnostic', match: result.match, requiredEvidence };
211
+
212
+ const nudge = result.category === 'locational'
213
+ ? `[research-required] This is a project-specific locational question (matched "${result.match}"). Before answering, run Read/Grep/Glob against the actual codebase — do NOT answer from prior knowledge or industry defaults. Your answer MUST cite the file:line(s) you read. (wf-1bcc67d5: a confident model answering "where does X live" from memory, doubling down, is the exact failure this gate exists to stop.)`
214
+ : `[research-required] This is a diagnostic question (matched "${result.match}"). Read at least ${requiredEvidence} relevant evidence files before answering; cite them.`;
215
+
216
+ return { applied: true, category: result.category, match: result.match, requiredEvidence, nudge };
163
217
  } catch (err) {
164
218
  if (process.env.DEBUG) {
165
219
  console.error(`[research-required-classifier] applyClassification error (fail-open): ${err.message}`);
@@ -205,5 +259,7 @@ module.exports = {
205
259
  OVERRIDE_PREFIX,
206
260
  DIAGNOSTIC_PATTERNS,
207
261
  FACTUAL_PATTERNS,
208
- COMMAND_PATTERNS
262
+ LOCATIONAL_PATTERNS,
263
+ COMMAND_PATTERNS,
264
+ GATED_CATEGORIES
209
265
  };
@@ -196,23 +196,33 @@ function checkResearchRequiredGate(opts = {}) {
196
196
  };
197
197
  }
198
198
 
199
+ // wf-1bcc67d5: the marker now carries category 'diagnostic' OR 'locational'.
200
+ // For locational ("where does X live", "which file handles Y"), the message
201
+ // is sharper — answering from prior knowledge is the precise failure shape.
202
+ const isLocational = marker.category === 'locational';
203
+ const kind = isLocational ? 'project-specific locational question' : 'diagnostic question';
199
204
  return {
200
205
  blocked: true,
201
206
  hardStop: false,
202
207
  evidenceCount,
203
208
  requiredEvidence,
204
209
  message:
205
- `RESEARCH-REQUIRED VIOLATION: the user asked a diagnostic question (matched "${marker.match}") ` +
210
+ `RESEARCH-REQUIRED VIOLATION: the user asked a ${kind} (matched "${marker.match}") ` +
206
211
  `but you produced an answer with only ${evidenceCount} evidence read${evidenceCount === 1 ? '' : 's'} ` +
207
212
  `(minimum required: ${requiredEvidence}).\n\n` +
213
+ (isLocational
214
+ ? `You answered "${kind === 'project-specific locational question' ? 'where/which/how X works in this project' : '...'}" WITHOUT opening a file. That is the exact failure wf-1bcc67d5 exists to stop: a confident model pattern-matching to industry defaults instead of checking THIS codebase, then doubling down. Do NOT answer from prior knowledge.\n\n`
215
+ : '') +
208
216
  `Re-do this turn:\n` +
209
- ` 1. Identify the relevant code/state files for the question.\n` +
210
- ` 2. Read at least ${requiredEvidence} of them via the Read tool. Bash with cat/head/grep/rg ` +
217
+ ` 1. Identify the relevant code/state files for the question (grep first if you're not sure where).\n` +
218
+ ` 2. Read at least ${requiredEvidence} of them via the Read tool. Bash with cat/head/grep/rg/Glob/Grep ` +
211
219
  `against evidence paths also counts.\n` +
212
- ` 3. THEN answer with citations (file:line where appropriate).\n\n` +
220
+ ` 3. THEN answer and the answer MUST cite the file:line(s) you actually read. An uncited answer to a ` +
221
+ `${kind} is not acceptable.\n\n` +
213
222
  `Evidence prefixes that count: .workflow/state/, .workflow/changes/, lib/, scripts/, src/, tests/, app/.\n\n` +
214
- `If you genuinely don't know which files to read, ask the user via \`flow ask "<question>"\`.\n` +
215
- `If you believe this is a factual question that doesn't need research, the user can prefix their ` +
223
+ `If you genuinely cannot find the relevant files after grepping, say so explicitly and ask the user via ` +
224
+ `\`flow ask "<question>"\` do NOT guess.\n` +
225
+ `If this is genuinely a generic-knowledge question (not about this project), the user can prefix their ` +
216
226
  `next prompt with \`!\` to skip the gate.\n\n` +
217
227
  `Attempt ${next.attemptCount}/${maxAttempts}.`
218
228
  };
@@ -70,13 +70,19 @@ async function orchestrateUserPromptSubmit({ input, parsedInput }) {
70
70
  }
71
71
  }
72
72
 
73
- // wf-5cd71b1f: Research-required classifier
73
+ // wf-5cd71b1f + wf-1bcc67d5: Research-required classifier. Now catches
74
+ // 'locational' (project-specific "where is X / which file / how does the
75
+ // X work") in addition to 'diagnostic'. Both write the evidence marker
76
+ // (Stop-hook backstop) AND surface an upfront nudge so the model is told
77
+ // to Read BEFORE answering — not just re-prompted after.
78
+ let researchRequiredNudge = null;
74
79
  if (typeof prompt === 'string' && prompt.trim().length > 0) {
75
80
  try {
76
81
  const { applyClassification: applyResearchClassification } = require('./research-required-classifier');
77
82
  const r = applyResearchClassification(prompt, hookConfig);
78
- if (r.applied && process.env.DEBUG) {
79
- console.error(`[Hook] Research-required classifier: category=${r.category}, match="${r.match}"`);
83
+ if (r.applied) {
84
+ if (r.nudge) researchRequiredNudge = r.nudge;
85
+ if (process.env.DEBUG) console.error(`[Hook] Research-required classifier: category=${r.category}, match="${r.match}"`);
80
86
  }
81
87
  } catch (err) {
82
88
  if (process.env.DEBUG) console.error(`[Hook] Research-required classifier failed: ${err.message}`);
@@ -168,6 +174,7 @@ async function orchestrateUserPromptSubmit({ input, parsedInput }) {
168
174
  }
169
175
 
170
176
  if (phasePrompt) coreResult = { ...coreResult, phasePrompt };
177
+ if (researchRequiredNudge) coreResult = { ...coreResult, researchRequiredNudge };
171
178
 
172
179
  // wf-d3e67abe — overdue workspace dispatches
173
180
  try {