wogiflow 2.31.2 → 2.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/wogi-start.md +4 -1
- package/.claude/docs/claude-code-compatibility.md +51 -0
- package/.claude/docs/scheduled-mode.md +213 -0
- package/.claude/docs/skill-portability.md +190 -0
- package/.claude/rules/alternative-hook-args-exec-form.md +6 -0
- package/.claude/settings.json +2 -1
- package/.claude/skills/_template/skill.md +1 -0
- package/.claude/skills/conventional-commit/knowledge/examples.md +65 -0
- package/.claude/skills/conventional-commit/skill.md +76 -0
- package/bin/flow +16 -0
- package/lib/scheduled-mode.js +377 -0
- package/lib/skill-export-agentskills.js +211 -0
- package/lib/skill-export-claude-plugin.js +143 -0
- package/lib/skill-portability.js +324 -0
- package/lib/skill-registry.js +32 -2
- package/package.json +2 -2
- package/scripts/flow +8 -0
- package/scripts/flow-config-defaults.js +20 -0
- package/scripts/flow-schedule.js +469 -0
- package/scripts/flow-scheduled-runner.js +614 -0
- package/scripts/flow-skill-export.js +334 -0
- package/scripts/hooks/adapters/claude-code.js +15 -1
- package/scripts/hooks/core/git-safety-gate.js +92 -20
- package/scripts/hooks/core/long-input-enforcement.js +139 -4
- package/scripts/hooks/core/research-required-classifier.js +73 -17
- package/scripts/hooks/core/research-required-gate.js +16 -6
- package/scripts/hooks/core/user-prompt-orchestrator.js +10 -3
|
@@ -71,6 +71,83 @@ const SOURCE_LINK_PATTERNS = [
|
|
|
71
71
|
/\bwf-[a-f0-9]{8}\b/i // bare wf-ID reference
|
|
72
72
|
];
|
|
73
73
|
|
|
74
|
+
/**
|
|
75
|
+
* Strip quoted/pasted content from a prompt so item + line counts reflect
|
|
76
|
+
* what the USER is actually requesting, not what they're illustrating.
|
|
77
|
+
*
|
|
78
|
+
* Removes:
|
|
79
|
+
* - Fenced code blocks (``` … ```) — pasted code or transcript output
|
|
80
|
+
* - Lines starting with `⏺` — pasted Claude Code transcript bullet
|
|
81
|
+
* - Lines starting with ` ⎿ ` — pasted Claude Code tool-result indent
|
|
82
|
+
* - Lines starting with `>` (markdown blockquote, indented or not) — quoted source
|
|
83
|
+
* - Indented blocks of 4+ leading spaces directly after a fence-less line
|
|
84
|
+
* (informal code-block convention — git diff output, REPL traces, etc.)
|
|
85
|
+
*
|
|
86
|
+
* Conservative: only strips when stripping changes the count classification —
|
|
87
|
+
* downstream callers compare strip vs. raw and use the lower count if it crosses
|
|
88
|
+
* the threshold. (Tested directly via the helper export; the classifier wires
|
|
89
|
+
* it into both detectLongFormPrompt and hasTaskSignals.)
|
|
90
|
+
*
|
|
91
|
+
* Why this matters: the current turn's user prompt was a short narrative + a
|
|
92
|
+
* ~70-line PASTED transcript inside a fenced block. The raw line count crossed
|
|
93
|
+
* the threshold, the imperatives inside the transcript ("fix", "add", "rm")
|
|
94
|
+
* crossed the task-signal threshold, and the gate fired — even though the user
|
|
95
|
+
* pasted the transcript to ILLUSTRATE a bug, not to deliver work items.
|
|
96
|
+
*
|
|
97
|
+
* @param {string} text
|
|
98
|
+
* @returns {string} stripped text (always a string; '' if input wasn't)
|
|
99
|
+
*/
|
|
100
|
+
function stripQuotedContent(text) {
|
|
101
|
+
if (typeof text !== 'string') return '';
|
|
102
|
+
|
|
103
|
+
// 1. Strip fenced code blocks (greedy, but match per-block so unclosed
|
|
104
|
+
// fences don't eat the rest of the prompt).
|
|
105
|
+
let stripped = text.replace(/^```[^\n]*\n[\s\S]*?\n```\s*$/gm, '');
|
|
106
|
+
|
|
107
|
+
// 2. Strip pasted-transcript / blockquote lines.
|
|
108
|
+
const lines = stripped.split('\n');
|
|
109
|
+
const kept = [];
|
|
110
|
+
for (const line of lines) {
|
|
111
|
+
// ⏺ — Claude Code transcript bullet
|
|
112
|
+
if (/^\s*⏺/.test(line)) continue;
|
|
113
|
+
// ⎿ — Claude Code tool-result continuation marker
|
|
114
|
+
if (/^\s*⎿/.test(line)) continue;
|
|
115
|
+
// > — markdown blockquote (any indent level)
|
|
116
|
+
if (/^\s*>/.test(line)) continue;
|
|
117
|
+
// 4+ leading-space "code-by-indentation" lines that don't look like
|
|
118
|
+
// a markdown list item (those start with `- ` / `* ` / `N. ` AFTER spaces).
|
|
119
|
+
if (/^ {4,}\S/.test(line) && !/^\s*(?:[-*]|\d+[.)])\s+/.test(line)) continue;
|
|
120
|
+
kept.push(line);
|
|
121
|
+
}
|
|
122
|
+
return kept.join('\n');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Detect a Claude Code skill-body echo. When the AI calls `Skill(...)`, the
|
|
127
|
+
* harness surfaces the full skill prompt + args back as a "user message" via
|
|
128
|
+
* UserPromptSubmit. These are AI-composed, not user-typed; firing the gate
|
|
129
|
+
* on them creates a deadlock (the AI can't dismiss its own skill args, and
|
|
130
|
+
* extract-review needs Bash which is also gated).
|
|
131
|
+
*
|
|
132
|
+
* Detection: the prompt contains ≥2 structural markers that only appear in
|
|
133
|
+
* Claude Code skill bodies (heading hierarchies, "ARGUMENTS: {args}" template,
|
|
134
|
+
* etc.). These are exceedingly unlikely to appear in user-typed prose.
|
|
135
|
+
*
|
|
136
|
+
* @param {string} text
|
|
137
|
+
* @returns {boolean}
|
|
138
|
+
*/
|
|
139
|
+
function isSkillBodyEcho(text) {
|
|
140
|
+
if (typeof text !== 'string' || text.length < 500) return false;
|
|
141
|
+
let hits = 0;
|
|
142
|
+
for (const marker of SKILL_BODY_MARKERS) {
|
|
143
|
+
if (text.includes(marker)) {
|
|
144
|
+
hits++;
|
|
145
|
+
if (hits >= 2) return true;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
150
|
+
|
|
74
151
|
function countDiscreteItems(text) {
|
|
75
152
|
if (typeof text !== 'string') return 0;
|
|
76
153
|
let count = 0;
|
|
@@ -83,9 +160,12 @@ function countDiscreteItems(text) {
|
|
|
83
160
|
|
|
84
161
|
function detectLongFormPrompt(text) {
|
|
85
162
|
if (typeof text !== 'string' || !text.trim()) return false;
|
|
86
|
-
|
|
163
|
+
// Strip quoted/pasted content before counting — only the USER's own words
|
|
164
|
+
// contribute to thresholds (otherwise the gate fires on illustrative pastes).
|
|
165
|
+
const stripped = stripQuotedContent(text);
|
|
166
|
+
const lineCount = stripped.split('\n').filter(l => l.trim()).length;
|
|
87
167
|
if (lineCount > LONG_LINE_THRESHOLD) return true;
|
|
88
|
-
if (countDiscreteItems(
|
|
168
|
+
if (countDiscreteItems(stripped) >= LONG_ITEM_THRESHOLD) return true;
|
|
89
169
|
return false;
|
|
90
170
|
}
|
|
91
171
|
|
|
@@ -116,6 +196,27 @@ const SYSTEM_CONTENT_PREFIXES = [
|
|
|
116
196
|
'<bash-stderr>'
|
|
117
197
|
];
|
|
118
198
|
|
|
199
|
+
// Skill-body markers that indicate the prompt is a Claude Code skill body
|
|
200
|
+
// being echoed back to the model after an AI Skill(...) invocation. When
|
|
201
|
+
// the AI calls `Skill(skill="wogi-start", args="...long...")`, Claude Code
|
|
202
|
+
// surfaces the full skill prompt + args as the next "user message" — going
|
|
203
|
+
// through UserPromptSubmit. The args are AI-composed, not user-typed, so
|
|
204
|
+
// the gate must NOT fire on them. We detect this by the structural markers
|
|
205
|
+
// that only ever appear in skill body bodies (not in regular user prose).
|
|
206
|
+
// Treating it as a user prompt was the deadlock shape from the wogiflow-cli
|
|
207
|
+
// 2026-05-13 incident — see the bug report transcript in this commit's body.
|
|
208
|
+
const SKILL_BODY_MARKERS = [
|
|
209
|
+
'**UNIVERSAL ENTRY POINT**',
|
|
210
|
+
'## Request Triage (AI-Driven Routing',
|
|
211
|
+
'### Command Catalog',
|
|
212
|
+
'### Pre-Routing Checks (Automatic)',
|
|
213
|
+
'Routing order: Task ID',
|
|
214
|
+
'## Phase Execution (MANDATORY)',
|
|
215
|
+
'## Mandatory Rules',
|
|
216
|
+
'ARGUMENTS: {args}',
|
|
217
|
+
'## How It Works (MANDATORY',
|
|
218
|
+
];
|
|
219
|
+
|
|
119
220
|
/**
|
|
120
221
|
* Detect content that originates from the system (tool results, sub-agent
|
|
121
222
|
* notifications, slash-command framings) rather than user typing. These
|
|
@@ -137,9 +238,14 @@ function isSystemOriginatedContent(text) {
|
|
|
137
238
|
|
|
138
239
|
function hasTaskSignals(text) {
|
|
139
240
|
if (typeof text !== 'string') return false;
|
|
241
|
+
// Imperatives inside pasted code/transcript/blockquotes are illustrative,
|
|
242
|
+
// not the user's own work-creating instructions. Count only on the USER's
|
|
243
|
+
// own words. (Without this, pasted error logs containing "fix" / "add"
|
|
244
|
+
// / "remove" trip the gate as if the user were ordering 5 tasks.)
|
|
245
|
+
const stripped = stripQuotedContent(text);
|
|
140
246
|
let imperativeHits = 0;
|
|
141
247
|
for (const re of TASK_IMPERATIVES) {
|
|
142
|
-
const m =
|
|
248
|
+
const m = stripped.match(new RegExp(re.source, 'gi'));
|
|
143
249
|
if (m) imperativeHits += m.length;
|
|
144
250
|
}
|
|
145
251
|
return imperativeHits >= 2;
|
|
@@ -176,6 +282,13 @@ function shouldForceExtractReview({ text, source, env = process.env } = {}) {
|
|
|
176
282
|
if (isSystemOriginatedContent(text)) {
|
|
177
283
|
return { forced: false, level: 'pass', reason: 'system-originated-content' };
|
|
178
284
|
}
|
|
285
|
+
// Deadlock fix (2026-05-13): AI-composed Skill args get surfaced back as
|
|
286
|
+
// a "user message" by the harness. Detect the skill-body echo signature
|
|
287
|
+
// and skip the gate — the args are AI-decomposed, not user-typed, so
|
|
288
|
+
// item-reconciliation has no source to reconcile against.
|
|
289
|
+
if (isSkillBodyEcho(text)) {
|
|
290
|
+
return { forced: false, level: 'pass', reason: 'skill-body-echo' };
|
|
291
|
+
}
|
|
179
292
|
if (!detectLongFormPrompt(text)) {
|
|
180
293
|
return { forced: false, level: 'pass', reason: 'below-long-input-threshold' };
|
|
181
294
|
}
|
|
@@ -308,6 +421,20 @@ function checkLongInputPendingGate(toolName, toolInput) {
|
|
|
308
421
|
if (/flow\s+extract-zero-loss/.test(cmd)) return { blocked: false };
|
|
309
422
|
if (/flow\s+long-input/.test(cmd)) return { blocked: false };
|
|
310
423
|
if (/flow-source-fidelity\.js/.test(cmd)) return { blocked: false };
|
|
424
|
+
// EMERGENCY ESCAPE (2026-05-13 deadlock fix): when the `flow` CLI is
|
|
425
|
+
// unavailable (e.g., target project has no node_modules/wogiflow on PATH,
|
|
426
|
+
// or the CLI itself is broken), allow the user to manually clear the
|
|
427
|
+
// marker file via `rm`. Scoped narrowly to the exact marker path so it
|
|
428
|
+
// can't be used as a general-purpose Bash escape.
|
|
429
|
+
if (/^\s*rm\s+(?:-[a-zA-Z]+\s+)?(?:["']?)\.workflow\/state\/long-input-pending\.json(?:["']?)\s*$/.test(cmd)) {
|
|
430
|
+
return { blocked: false };
|
|
431
|
+
}
|
|
432
|
+
// Also allow the node-script equivalent (for sessions where `rm` is
|
|
433
|
+
// unavailable, e.g. some Windows shells). Matches both `fs.unlinkSync(...)`
|
|
434
|
+
// and `require('fs').unlinkSync(...)` forms.
|
|
435
|
+
if (/unlinkSync\s*\(\s*['"]\.workflow\/state\/long-input-pending\.json['"]\s*\)/.test(cmd)) {
|
|
436
|
+
return { blocked: false };
|
|
437
|
+
}
|
|
311
438
|
// Falls through to block for everything else
|
|
312
439
|
}
|
|
313
440
|
|
|
@@ -334,6 +461,11 @@ function checkLongInputPendingGate(toolName, toolInput) {
|
|
|
334
461
|
' 2. (ESCAPE HATCH) If this prompt genuinely does NOT create work',
|
|
335
462
|
' (e.g., it\'s a log dump or pure question), dismiss with:',
|
|
336
463
|
' `flow long-input-pending dismiss --reason="<concrete reason>"`',
|
|
464
|
+
' 3. (EMERGENCY) If both paths above fail (e.g., `flow` CLI missing',
|
|
465
|
+
' or broken), manually clear the marker file:',
|
|
466
|
+
' `rm .workflow/state/long-input-pending.json`',
|
|
467
|
+
' (This Bash command is explicitly allowed by the gate as a',
|
|
468
|
+
' deadlock escape.)',
|
|
337
469
|
'',
|
|
338
470
|
'Read/Glob/Grep tools remain available for investigation.'
|
|
339
471
|
].join('\n')
|
|
@@ -345,10 +477,12 @@ module.exports = {
|
|
|
345
477
|
LONG_LINE_THRESHOLD,
|
|
346
478
|
LONG_ITEM_THRESHOLD,
|
|
347
479
|
SYSTEM_CONTENT_PREFIXES,
|
|
480
|
+
SKILL_BODY_MARKERS,
|
|
348
481
|
detectLongFormPrompt,
|
|
349
482
|
hasSourceLink,
|
|
350
483
|
hasTaskSignals,
|
|
351
484
|
isSystemOriginatedContent,
|
|
485
|
+
isSkillBodyEcho,
|
|
352
486
|
isChannelDispatchInWorker,
|
|
353
487
|
shouldForceExtractReview,
|
|
354
488
|
buildEnforcementMessage,
|
|
@@ -357,5 +491,6 @@ module.exports = {
|
|
|
357
491
|
isLongInputPending,
|
|
358
492
|
readLongInputPending,
|
|
359
493
|
checkLongInputPendingGate,
|
|
360
|
-
countDiscreteItems
|
|
494
|
+
countDiscreteItems,
|
|
495
|
+
stripQuotedContent
|
|
361
496
|
};
|
|
@@ -56,14 +56,39 @@ const DIAGNOSTIC_PATTERNS = [
|
|
|
56
56
|
/\bdo\s+you\s+(think|recommend|suggest)\b/i,
|
|
57
57
|
];
|
|
58
58
|
|
|
59
|
-
//
|
|
59
|
+
// Generic-factual markers — Tier 1a (no marker; answerable from general
|
|
60
|
+
// knowledge). NARROWED from the prior FACTUAL_PATTERNS: "where is X",
|
|
61
|
+
// "which file", "show me", "list all" all MOVED to LOCATIONAL_PATTERNS
|
|
62
|
+
// below because in a project context they are almost always asking about
|
|
63
|
+
// THIS codebase and require a Read first. See wf-1bcc67d5 (the wogiflow-cli
|
|
64
|
+
// "where do API keys get saved" incident — model answered from prior,
|
|
65
|
+
// doubled down twice, before finally grepping).
|
|
60
66
|
const FACTUAL_PATTERNS = [
|
|
61
|
-
/^\s*what\s+is\b/i,
|
|
62
|
-
/^\s*
|
|
63
|
-
/^\s*how\s+many\b/i,
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
+
/^\s*what\s+is\s+(a|an)\b/i, // "what is a closure" — conceptual
|
|
68
|
+
/^\s*what\s+does\s+\w+\s+mean\b/i, // "what does idempotent mean"
|
|
69
|
+
/^\s*how\s+many\s+\w+\s+(are\s+in|in)\s+a\b/i, // "how many days in a year" — generic
|
|
70
|
+
];
|
|
71
|
+
|
|
72
|
+
// Locational / project-specific-factual markers — Tier 1b (WRITES the
|
|
73
|
+
// evidence marker, same as diagnostic). "Where is X configured?", "which
|
|
74
|
+
// file handles Y?", "how does the deferral gate work?" — these are
|
|
75
|
+
// answerable ONLY by reading this codebase. The model MUST Read/Grep/Glob
|
|
76
|
+
// first and cite what it read. No "Tier 1 → answer directly" shortcut.
|
|
77
|
+
// wf-1bcc67d5.
|
|
78
|
+
const LOCATIONAL_PATTERNS = [
|
|
79
|
+
/\bwhere\s+(is|are|do|does|did|should|would|can)\b/i, // where is X / where are the keys / where does X get saved
|
|
80
|
+
/\bwhich\s+(file|module|function|component|class|method|hook|gate|script|test|directory|folder|package)\b/i,
|
|
81
|
+
/\bwhat\s+(file|module|function|class|hook|gate|script|component)\s+(handles?|does|is|contains?|defines?)\b/i,
|
|
82
|
+
/\bwhat\s+is\s+(responsible\s+for|the\s+\w+\s+(file|module|gate|hook)\s+for)\b/i,
|
|
83
|
+
// "how does the deferral gate work" — allow multiple words between the
|
|
84
|
+
// determiner and the action verb (lazy [\s\w-]*?). Covers 1+ noun phrases.
|
|
85
|
+
/\bhow\s+(does|do|did)\s+(the|this|our|its?|wogiflow|a|an)\b[\s\w-]*?\s+(work|works|worked|happen|behave|operate|function|run|fire|trigger|get\s+\w+)\b/i,
|
|
86
|
+
// "how is the routing flag configured" — same lazy gap
|
|
87
|
+
/\bhow\s+(is|are|was|were|does|do)\s+[\s\w-]*?\b(configured|wired|stored|set\s+up|implemented|handled|loaded|registered|defined|saved|kept|read|written|persisted|injected)\b/i,
|
|
88
|
+
/^\s*(show\s+me\s+(the|all|how|where)|list\s+(all|the))\b/i, // show me the routes / list all the gates — project enumeration
|
|
89
|
+
/\bis\s+there\s+(a|an|any)\s+\w+\s+(in\s+(this|the)\s+(project|codebase|repo|code)|here)\b/i,
|
|
90
|
+
/\b(in\s+this\s+(project|codebase|repo|code))\b.*\b(where|how|which|what)\b/i,
|
|
91
|
+
/\b(where|how|which|what)\b.*\b(in\s+this\s+(project|codebase|repo|code))\b/i,
|
|
67
92
|
];
|
|
68
93
|
|
|
69
94
|
// Command markers — task IDs, imperatives, follow-ups
|
|
@@ -96,12 +121,21 @@ function getMaxAttempts(config) {
|
|
|
96
121
|
}
|
|
97
122
|
|
|
98
123
|
/**
|
|
99
|
-
* Classify a user prompt into command / factual / diagnostic.
|
|
124
|
+
* Classify a user prompt into command / factual / locational / diagnostic.
|
|
125
|
+
*
|
|
126
|
+
* Order matters: override > command > generic-factual > locational > diagnostic > default(none).
|
|
100
127
|
*
|
|
101
|
-
*
|
|
128
|
+
* `locational` and `diagnostic` BOTH write the evidence marker — the Stop
|
|
129
|
+
* hook then requires Read/Grep/Glob calls before the answer is accepted.
|
|
130
|
+
* `command` and `factual` and `none` do NOT write the marker.
|
|
131
|
+
*
|
|
132
|
+
* The generic-factual check is intentionally NARROW (only "what is a/an
|
|
133
|
+
* <concept>", "what does X mean", "how many X in a Y"). Anything that
|
|
134
|
+
* smells project-specific — "where is X", "which file/module", "how does
|
|
135
|
+
* the X work" — falls through to `locational` and is gated. See wf-1bcc67d5.
|
|
102
136
|
*
|
|
103
137
|
* @param {string} prompt
|
|
104
|
-
* @returns {{ category: 'command'|'factual'|'diagnostic'|'none', match?: string, overridden?: boolean }}
|
|
138
|
+
* @returns {{ category: 'command'|'factual'|'locational'|'diagnostic'|'none', match?: string, overridden?: boolean }}
|
|
105
139
|
*/
|
|
106
140
|
function classifyPrompt(prompt) {
|
|
107
141
|
if (typeof prompt !== 'string') return { category: 'none' };
|
|
@@ -119,13 +153,19 @@ function classifyPrompt(prompt) {
|
|
|
119
153
|
if (m) return { category: 'command', match: m[0] };
|
|
120
154
|
}
|
|
121
155
|
|
|
122
|
-
//
|
|
156
|
+
// Generic-factual next (NARROW — only truly-conceptual questions)
|
|
123
157
|
for (const rx of FACTUAL_PATTERNS) {
|
|
124
158
|
const m = trimmed.match(rx);
|
|
125
159
|
if (m) return { category: 'factual', match: m[0] };
|
|
126
160
|
}
|
|
127
161
|
|
|
128
|
-
//
|
|
162
|
+
// Locational / project-specific-factual — gated (writes marker)
|
|
163
|
+
for (const rx of LOCATIONAL_PATTERNS) {
|
|
164
|
+
const m = trimmed.match(rx);
|
|
165
|
+
if (m) return { category: 'locational', match: m[0] };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Diagnostic — gated (writes marker)
|
|
129
169
|
for (const rx of DIAGNOSTIC_PATTERNS) {
|
|
130
170
|
const m = trimmed.match(rx);
|
|
131
171
|
if (m) return { category: 'diagnostic', match: m[0] };
|
|
@@ -134,23 +174,32 @@ function classifyPrompt(prompt) {
|
|
|
134
174
|
return { category: 'none' };
|
|
135
175
|
}
|
|
136
176
|
|
|
177
|
+
// Both 'diagnostic' AND 'locational' write the evidence marker. wf-1bcc67d5.
|
|
178
|
+
const GATED_CATEGORIES = new Set(['diagnostic', 'locational']);
|
|
179
|
+
|
|
137
180
|
/**
|
|
138
181
|
* Apply classification — write or skip the marker. Fail-open.
|
|
182
|
+
*
|
|
183
|
+
* @returns {{ applied: boolean, category?: string, match?: string,
|
|
184
|
+
* requiredEvidence?: number, nudge?: string, reason?: string }}
|
|
185
|
+
* On a gated category, `nudge` is a short upfront reminder string the
|
|
186
|
+
* UserPromptSubmit orchestrator can surface as additionalContext so the
|
|
187
|
+
* model is told to Read BEFORE answering — not just re-prompted at Stop.
|
|
139
188
|
*/
|
|
140
189
|
function applyClassification(prompt, config) {
|
|
141
190
|
try {
|
|
142
191
|
if (!isClassifierEnabled(config)) return { applied: false, reason: 'classifier-disabled' };
|
|
143
192
|
|
|
144
193
|
const result = classifyPrompt(prompt);
|
|
145
|
-
if (result.category
|
|
146
|
-
return { applied: false, category: result.category, reason: 'not-
|
|
194
|
+
if (!GATED_CATEGORIES.has(result.category)) {
|
|
195
|
+
return { applied: false, category: result.category, reason: 'not-gated' };
|
|
147
196
|
}
|
|
148
197
|
|
|
149
198
|
const requiredEvidence = getRequiredEvidence(config);
|
|
150
199
|
const payload = {
|
|
151
200
|
version: 1,
|
|
152
201
|
classifiedAt: new Date().toISOString(),
|
|
153
|
-
category:
|
|
202
|
+
category: result.category,
|
|
154
203
|
match: result.match,
|
|
155
204
|
requiredEvidence,
|
|
156
205
|
attemptCount: 0
|
|
@@ -159,7 +208,12 @@ function applyClassification(prompt, config) {
|
|
|
159
208
|
const tmp = `${getMarkerPath()}.tmp.${process.pid}.${Math.random().toString(36).slice(2, 8)}`;
|
|
160
209
|
fs.writeFileSync(tmp, JSON.stringify(payload, null, 2));
|
|
161
210
|
fs.renameSync(tmp, getMarkerPath());
|
|
162
|
-
|
|
211
|
+
|
|
212
|
+
const nudge = result.category === 'locational'
|
|
213
|
+
? `[research-required] This is a project-specific locational question (matched "${result.match}"). Before answering, run Read/Grep/Glob against the actual codebase — do NOT answer from prior knowledge or industry defaults. Your answer MUST cite the file:line(s) you read. (wf-1bcc67d5: a confident model answering "where does X live" from memory, doubling down, is the exact failure this gate exists to stop.)`
|
|
214
|
+
: `[research-required] This is a diagnostic question (matched "${result.match}"). Read at least ${requiredEvidence} relevant evidence files before answering; cite them.`;
|
|
215
|
+
|
|
216
|
+
return { applied: true, category: result.category, match: result.match, requiredEvidence, nudge };
|
|
163
217
|
} catch (err) {
|
|
164
218
|
if (process.env.DEBUG) {
|
|
165
219
|
console.error(`[research-required-classifier] applyClassification error (fail-open): ${err.message}`);
|
|
@@ -205,5 +259,7 @@ module.exports = {
|
|
|
205
259
|
OVERRIDE_PREFIX,
|
|
206
260
|
DIAGNOSTIC_PATTERNS,
|
|
207
261
|
FACTUAL_PATTERNS,
|
|
208
|
-
|
|
262
|
+
LOCATIONAL_PATTERNS,
|
|
263
|
+
COMMAND_PATTERNS,
|
|
264
|
+
GATED_CATEGORIES
|
|
209
265
|
};
|
|
@@ -196,23 +196,33 @@ function checkResearchRequiredGate(opts = {}) {
|
|
|
196
196
|
};
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
+
// wf-1bcc67d5: the marker now carries category 'diagnostic' OR 'locational'.
|
|
200
|
+
// For locational ("where does X live", "which file handles Y"), the message
|
|
201
|
+
// is sharper — answering from prior knowledge is the precise failure shape.
|
|
202
|
+
const isLocational = marker.category === 'locational';
|
|
203
|
+
const kind = isLocational ? 'project-specific locational question' : 'diagnostic question';
|
|
199
204
|
return {
|
|
200
205
|
blocked: true,
|
|
201
206
|
hardStop: false,
|
|
202
207
|
evidenceCount,
|
|
203
208
|
requiredEvidence,
|
|
204
209
|
message:
|
|
205
|
-
`RESEARCH-REQUIRED VIOLATION: the user asked a
|
|
210
|
+
`RESEARCH-REQUIRED VIOLATION: the user asked a ${kind} (matched "${marker.match}") ` +
|
|
206
211
|
`but you produced an answer with only ${evidenceCount} evidence read${evidenceCount === 1 ? '' : 's'} ` +
|
|
207
212
|
`(minimum required: ${requiredEvidence}).\n\n` +
|
|
213
|
+
(isLocational
|
|
214
|
+
? `You answered "${kind === 'project-specific locational question' ? 'where/which/how X works in this project' : '...'}" WITHOUT opening a file. That is the exact failure wf-1bcc67d5 exists to stop: a confident model pattern-matching to industry defaults instead of checking THIS codebase, then doubling down. Do NOT answer from prior knowledge.\n\n`
|
|
215
|
+
: '') +
|
|
208
216
|
`Re-do this turn:\n` +
|
|
209
|
-
` 1. Identify the relevant code/state files for the question.\n` +
|
|
210
|
-
` 2. Read at least ${requiredEvidence} of them via the Read tool. Bash with cat/head/grep/rg ` +
|
|
217
|
+
` 1. Identify the relevant code/state files for the question (grep first if you're not sure where).\n` +
|
|
218
|
+
` 2. Read at least ${requiredEvidence} of them via the Read tool. Bash with cat/head/grep/rg/Glob/Grep ` +
|
|
211
219
|
`against evidence paths also counts.\n` +
|
|
212
|
-
` 3. THEN answer
|
|
220
|
+
` 3. THEN answer — and the answer MUST cite the file:line(s) you actually read. An uncited answer to a ` +
|
|
221
|
+
`${kind} is not acceptable.\n\n` +
|
|
213
222
|
`Evidence prefixes that count: .workflow/state/, .workflow/changes/, lib/, scripts/, src/, tests/, app/.\n\n` +
|
|
214
|
-
`If you genuinely
|
|
215
|
-
|
|
223
|
+
`If you genuinely cannot find the relevant files after grepping, say so explicitly and ask the user via ` +
|
|
224
|
+
`\`flow ask "<question>"\` — do NOT guess.\n` +
|
|
225
|
+
`If this is genuinely a generic-knowledge question (not about this project), the user can prefix their ` +
|
|
216
226
|
`next prompt with \`!\` to skip the gate.\n\n` +
|
|
217
227
|
`Attempt ${next.attemptCount}/${maxAttempts}.`
|
|
218
228
|
};
|
|
@@ -70,13 +70,19 @@ async function orchestrateUserPromptSubmit({ input, parsedInput }) {
|
|
|
70
70
|
}
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
// wf-5cd71b1f: Research-required classifier
|
|
73
|
+
// wf-5cd71b1f + wf-1bcc67d5: Research-required classifier. Now catches
|
|
74
|
+
// 'locational' (project-specific "where is X / which file / how does the
|
|
75
|
+
// X work") in addition to 'diagnostic'. Both write the evidence marker
|
|
76
|
+
// (Stop-hook backstop) AND surface an upfront nudge so the model is told
|
|
77
|
+
// to Read BEFORE answering — not just re-prompted after.
|
|
78
|
+
let researchRequiredNudge = null;
|
|
74
79
|
if (typeof prompt === 'string' && prompt.trim().length > 0) {
|
|
75
80
|
try {
|
|
76
81
|
const { applyClassification: applyResearchClassification } = require('./research-required-classifier');
|
|
77
82
|
const r = applyResearchClassification(prompt, hookConfig);
|
|
78
|
-
if (r.applied
|
|
79
|
-
|
|
83
|
+
if (r.applied) {
|
|
84
|
+
if (r.nudge) researchRequiredNudge = r.nudge;
|
|
85
|
+
if (process.env.DEBUG) console.error(`[Hook] Research-required classifier: category=${r.category}, match="${r.match}"`);
|
|
80
86
|
}
|
|
81
87
|
} catch (err) {
|
|
82
88
|
if (process.env.DEBUG) console.error(`[Hook] Research-required classifier failed: ${err.message}`);
|
|
@@ -168,6 +174,7 @@ async function orchestrateUserPromptSubmit({ input, parsedInput }) {
|
|
|
168
174
|
}
|
|
169
175
|
|
|
170
176
|
if (phasePrompt) coreResult = { ...coreResult, phasePrompt };
|
|
177
|
+
if (researchRequiredNudge) coreResult = { ...coreResult, researchRequiredNudge };
|
|
171
178
|
|
|
172
179
|
// wf-d3e67abe — overdue workspace dispatches
|
|
173
180
|
try {
|