wogiflow 2.30.3 → 2.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/wogi-self-adversary.md +130 -0
- package/package.json +2 -2
- package/scripts/flow-impl-question-classifier.js +176 -0
- package/scripts/flow-self-adversary-loop.js +360 -0
- package/scripts/hooks/core/deferral-gate.js +86 -18
- package/scripts/hooks/core/pre-tool-deps.js +11 -0
- package/scripts/hooks/core/pre-tool-orchestrator.js +21 -0
- package/scripts/hooks/core/self-adversary-gate.js +292 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# /wogi-self-adversary — Self-adversary decision loop
|
|
2
|
+
|
|
3
|
+
Iterate a generator and adversary on different models until you reach ≥95% confidence on an implementation-class decision. Only escalate to the user if confidence stays low after the loop.
|
|
4
|
+
|
|
5
|
+
**Triggers**: invoked by the AI itself when blocked by the self-adversary PreToolUse gate (wf-e399bd8d), OR by the user directly.
|
|
6
|
+
|
|
7
|
+
## Usage
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
/wogi-self-adversary "<question + brief context>"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
The argument should be the question the AI was about to ask the user, optionally followed by relevant context (files, prior decisions, constraints). Both will be passed to the loop.
|
|
14
|
+
|
|
15
|
+
## How it works
|
|
16
|
+
|
|
17
|
+
For Claude inside this skill — read carefully, then execute.
|
|
18
|
+
|
|
19
|
+
### Step 1: Parse the argument
|
|
20
|
+
|
|
21
|
+
The ARGUMENTS string contains the question + context. Split on a sensible boundary (first newline, or `--context:` separator if present). If no clear split, treat the entire argument as the question and leave context empty.
|
|
22
|
+
|
|
23
|
+
### Step 2: Run the loop
|
|
24
|
+
|
|
25
|
+
```js
|
|
26
|
+
const { runSelfAdversaryLoop } = require('wogiflow/scripts/flow-self-adversary-loop');
|
|
27
|
+
const result = await runSelfAdversaryLoop({
|
|
28
|
+
question: questionText,
|
|
29
|
+
context: contextText,
|
|
30
|
+
maxIterations: 8,
|
|
31
|
+
targetConfidence: 95
|
|
32
|
+
});
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Or via Bash if a CLI wrapper exists; otherwise invoke through Node inline.
|
|
36
|
+
|
|
37
|
+
### Step 3: Handle the result
|
|
38
|
+
|
|
39
|
+
**Three possible outcomes:**
|
|
40
|
+
|
|
41
|
+
**A. `escalate: false`** — confident decision reached.
|
|
42
|
+
|
|
43
|
+
1. Display the decision + confidence + iteration count to the user as a summary.
|
|
44
|
+
2. Write the completion marker so the next `AskUserQuestion` (if any) is allowed:
|
|
45
|
+
```js
|
|
46
|
+
const gate = require('wogiflow/scripts/hooks/core/self-adversary-gate');
|
|
47
|
+
gate.writeCompletionMarker({
|
|
48
|
+
question: questionText,
|
|
49
|
+
decision: result.decision,
|
|
50
|
+
confidence: result.confidence,
|
|
51
|
+
iterationCount: result.iterationCount
|
|
52
|
+
});
|
|
53
|
+
```
|
|
54
|
+
3. ACT on the decision in your subsequent tool calls — no more asking, no hedging.
|
|
55
|
+
|
|
56
|
+
**B. `escalate: true` (reason: `low-confidence` / `max-iterations-exhausted`)** — loop ran but couldn't converge.
|
|
57
|
+
|
|
58
|
+
1. Write the escalation marker (allows the next `AskUserQuestion` to pass without re-blocking):
|
|
59
|
+
```js
|
|
60
|
+
gate.writeEscalationMarker({
|
|
61
|
+
question: questionText,
|
|
62
|
+
decision: result.decision,
|
|
63
|
+
confidence: result.confidence,
|
|
64
|
+
iterationCount: result.iterationCount,
|
|
65
|
+
reason: result.reason
|
|
66
|
+
});
|
|
67
|
+
```
|
|
68
|
+
2. Surface to the user with: the question, what the loop concluded (best decision + confidence), why iteration couldn't push past the threshold, and what specific resolution you need from them.
|
|
69
|
+
3. Call `AskUserQuestion` (which now passes the gate).
|
|
70
|
+
|
|
71
|
+
**C. `escalate: true` (reason: `no-credentials` / `model-error` / etc.)** — loop couldn't run.
|
|
72
|
+
|
|
73
|
+
1. Note the failure mode briefly to the user.
|
|
74
|
+
2. Write the escalation marker and surface the original question.
|
|
75
|
+
|
|
76
|
+
### Step 4: Audit trail
|
|
77
|
+
|
|
78
|
+
Append a one-line summary to `.workflow/state/self-adversary-log.json` (append-only, ring-buffered at 100):
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"timestamp": "...",
|
|
83
|
+
"questionHash": "...",
|
|
84
|
+
"iterations": N,
|
|
85
|
+
"finalConfidence": X,
|
|
86
|
+
"outcome": "decided" | "escalated",
|
|
87
|
+
"reason": "..."
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
This lets the user audit how often the loop converges vs escalates. Helps tune `targetConfidence` and `maxIterations` over time.
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
`.workflow/config.json`:
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"selfAdversaryGate": {
|
|
100
|
+
"enabled": true,
|
|
101
|
+
"targetConfidence": 95,
|
|
102
|
+
"maxIterations": 8,
|
|
103
|
+
"generatorModel": "anthropic:claude-sonnet-4-6",
|
|
104
|
+
"adversaryModel": "anthropic:claude-3-5-haiku-latest"
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
- `enabled: false` — disables both the PreToolUse gate AND prevents the skill from running. Reverts to "always ask the user".
|
|
110
|
+
- `targetConfidence` — clamped to [50, 99]; default 95.
|
|
111
|
+
- `maxIterations` — clamped to [1, 12]; default 8.
|
|
112
|
+
|
|
113
|
+
## Files
|
|
114
|
+
|
|
115
|
+
| File | Purpose |
|
|
116
|
+
|---|---|
|
|
117
|
+
| `scripts/flow-self-adversary-loop.js` | Core loop (generator ↔ adversary, iteration memory in-process). |
|
|
118
|
+
| `scripts/flow-impl-question-classifier.js` | Haiku classifier — implementation vs product/architecture/sensitive. |
|
|
119
|
+
| `scripts/hooks/core/self-adversary-gate.js` | PreToolUse intercept + markers. |
|
|
120
|
+
| `.workflow/state/self-adversary-complete.json` | Single-use marker, allows next AskUserQuestion. |
|
|
121
|
+
| `.workflow/state/self-adversary-escalation.json` | Single-use marker, allows next AskUserQuestion after loop concluded "needs-user". |
|
|
122
|
+
| `.workflow/state/self-adversary-log.json` | Append-only audit trail. |
|
|
123
|
+
|
|
124
|
+
## Why this exists
|
|
125
|
+
|
|
126
|
+
User directive 2026-05-11 (wf-e399bd8d):
|
|
127
|
+
|
|
128
|
+
> "Always do highest standards, best approach, don't compromise on quality for token savings. Challenge yourself a few times and most of the times you get to a point where you already know what to do with very high confidence, 90 or 95+ percent. When you have doubt that you'll be able to challenge yourself, use adversary research. And do it in a few iterations until you're confident. And only if you're still not confident, then ask the user."
|
|
129
|
+
|
|
130
|
+
The pattern maps to Self-Refine (Madaan et al. 2023) + Reflexion (Shinn et al. 2023) + Multi-Agent Reflexion (different-model adversary escapes local optima). WogiFlow already runs an Architect+Adversary loop at the PLAN level (IGR Step 1.55/1.57). This skill is the implementation-decision analogue, finer-grained, runs during coding rather than spec_review.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wogiflow",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.31.0",
|
|
4
4
|
"description": "AI-powered development workflow management system with multi-model support",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
},
|
|
11
11
|
"scripts": {
|
|
12
12
|
"flow": "./scripts/flow",
|
|
13
|
-
"test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-correction-detector-reconcile.test.js tests/flow-correction-backfill.test.js tests/flow-audit-gates-feature-output-health.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js tests/flow-standards-forbidden-patterns.test.js tests/flow-hooks-architect-required-gate.test.js tests/flow-architect-runs.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
|
|
13
|
+
"test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-correction-detector-reconcile.test.js tests/flow-correction-backfill.test.js tests/flow-audit-gates-feature-output-health.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js tests/flow-standards-forbidden-patterns.test.js tests/flow-hooks-architect-required-gate.test.js tests/flow-architect-runs.test.js tests/flow-installer-forbidden-patterns.test.js tests/flow-deferral-classifier-ai.test.js tests/flow-no-defer-policy.test.js tests/flow-self-adversary-loop.test.js tests/flow-impl-question-classifier.test.js tests/flow-hooks-self-adversary-gate.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
|
|
14
14
|
"test:syntax": "find scripts/ lib/ -name '*.js' -not -path '*/node_modules/*' -exec node --check {} +",
|
|
15
15
|
"lint": "eslint scripts/ lib/ tests/",
|
|
16
16
|
"lint:ci": "eslint scripts/ lib/ tests/ --max-warnings 0",
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow — Implementation-Question Classifier (wf-e399bd8d)
|
|
5
|
+
*
|
|
6
|
+
* Classifies an "AI is about to ask the user" question to decide whether
|
|
7
|
+
* the self-adversary loop should run first or the question should reach
|
|
8
|
+
* the user directly.
|
|
9
|
+
*
|
|
10
|
+
* Four categories:
|
|
11
|
+
* implementation — code structure, library/algorithm choice, naming,
|
|
12
|
+
* refactor mechanics, testing approach. The AI should
|
|
13
|
+
* self-adversary (likely high enough confidence).
|
|
14
|
+
* product — domain semantics, user-facing behavior, what to
|
|
15
|
+
* SHOW the user, what counts as "done" for the
|
|
16
|
+
* business. The AI cannot self-adversary; ask user.
|
|
17
|
+
* architecture — system-design tradeoffs (DB choice, deployment
|
|
18
|
+
* topology, public API shape). Tier-3: existing
|
|
19
|
+
* researchReasoningGate handles this with adversary;
|
|
20
|
+
* the new loop can also handle it but caller decides.
|
|
21
|
+
* sensitive — destructive operations (delete, force-push, drop),
|
|
22
|
+
* cross-boundary commitments (notify users, send
|
|
23
|
+
* emails). Always ask.
|
|
24
|
+
*
|
|
25
|
+
* The classifier is a small Haiku call. Fail-open: any error → ask
|
|
26
|
+
* (treat as if classification said "product"), preserving prior
|
|
27
|
+
* behavior. This avoids the failure shape from wf-b8839d99 (regex
|
|
28
|
+
* silently misclassifying).
|
|
29
|
+
*
|
|
30
|
+
* Note: this is interpretation of an AI-AUTHORED question (the question
|
|
31
|
+
* the AI is about to ask the user). It is NOT user-input parsing — so
|
|
32
|
+
* the "no regex on user answers" rule from wf-b8839d99 doesn't constrain
|
|
33
|
+
* us. We still use AI here because hedging vocabulary for implementation
|
|
34
|
+
* vs product is unbounded.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
const DEFAULT_MIN_CONFIDENCE = 75;
|
|
38
|
+
const DEFAULT_MODEL = 'anthropic:claude-3-5-haiku-latest';
|
|
39
|
+
const MAX_QUESTION_CHARS = 3000;
|
|
40
|
+
const MAX_TOKENS = 300;
|
|
41
|
+
const TEMPERATURE = 0.0;
|
|
42
|
+
|
|
43
|
+
const { DANGEROUS_KEYS } = require('./flow-io');
|
|
44
|
+
|
|
45
|
+
function hasDangerousKeys(value) {
|
|
46
|
+
if (!value || typeof value !== 'object') return false;
|
|
47
|
+
if (Array.isArray(value)) return value.some(hasDangerousKeys);
|
|
48
|
+
for (const key of Object.keys(value)) {
|
|
49
|
+
if (DANGEROUS_KEYS.has(key)) return true;
|
|
50
|
+
if (hasDangerousKeys(value[key])) return true;
|
|
51
|
+
}
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function buildClassifierPrompt(questionText) {
|
|
56
|
+
return `You classify the type of question an AI development assistant is about to ask the user. The user has instructed the AI to STOP asking implementation-class questions — instead, the AI should iterate generator↔adversary on a different model until ≥95% confidence. Product, architecture, or sensitive questions still reach the user normally.
|
|
57
|
+
|
|
58
|
+
[QUESTION_START]
|
|
59
|
+
${String(questionText || '').slice(0, MAX_QUESTION_CHARS)}
|
|
60
|
+
[QUESTION_END]
|
|
61
|
+
|
|
62
|
+
Four categories:
|
|
63
|
+
|
|
64
|
+
IMPLEMENTATION — code structure, library or algorithm choice, naming,
|
|
65
|
+
refactor mechanics, test framework picks, error-handling shape, code
|
|
66
|
+
organization, idiom selection. The AI can reason this out with research.
|
|
67
|
+
|
|
68
|
+
PRODUCT — domain semantics, user-facing behavior decisions, feature
|
|
69
|
+
scope, what counts as "done" for the business, copy/tone, UX flow
|
|
70
|
+
decisions. The AI cannot reason its way to these without the owner.
|
|
71
|
+
|
|
72
|
+
ARCHITECTURE — system-design tradeoffs (database choice, deployment
|
|
73
|
+
topology, public API shape, multi-tenant boundaries). High-stakes;
|
|
74
|
+
self-adversary alone may not be enough but more iteration helps.
|
|
75
|
+
|
|
76
|
+
SENSITIVE — destructive operations (delete data, force-push, drop
|
|
77
|
+
table), cross-boundary commitments (notify users, send emails),
|
|
78
|
+
legal/compliance gates. Always ask the user.
|
|
79
|
+
|
|
80
|
+
CRITICAL RULES:
|
|
81
|
+
1. When ambiguous, return PRODUCT — the cost of mis-asking is low, the
|
|
82
|
+
cost of mis-acting is high.
|
|
83
|
+
2. Even if the question phrasing is technical, ask whether the ANSWER
|
|
84
|
+
depends on user-only knowledge. "Which date format do users
|
|
85
|
+
prefer?" — phrasing is technical, answer is product.
|
|
86
|
+
3. Confidence: only ≥80 if the category is unambiguous.
|
|
87
|
+
|
|
88
|
+
Return JSON only, no prose, no markdown fences:
|
|
89
|
+
{
|
|
90
|
+
"category": "implementation" | "product" | "architecture" | "sensitive",
|
|
91
|
+
"confidence": 0-100,
|
|
92
|
+
"reason": "one short sentence"
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
- "Should this be a map() or for-loop?" → {"category":"implementation","confidence":95,"reason":"pure code-style choice"}
|
|
97
|
+
- "Which date format do users prefer?" → {"category":"product","confidence":90,"reason":"answer depends on user preference"}
|
|
98
|
+
- "Should we use Postgres or MongoDB?" → {"category":"architecture","confidence":85,"reason":"system-design tradeoff"}
|
|
99
|
+
- "OK to delete the migration table?" → {"category":"sensitive","confidence":95,"reason":"destructive operation"}
|
|
100
|
+
- "Should I add error handling here?" → {"category":"implementation","confidence":85,"reason":"code-quality choice the AI can research"}`;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async function classifyImplementationQuestion(questionText, options = {}) {
|
|
104
|
+
const minConfidence = Number.isFinite(options.minConfidence) ? options.minConfidence : DEFAULT_MIN_CONFIDENCE;
|
|
105
|
+
const model = options.model || DEFAULT_MODEL;
|
|
106
|
+
|
|
107
|
+
if (typeof questionText !== 'string' || questionText.trim().length === 0) {
|
|
108
|
+
return { classified: false, reason: 'empty-question' };
|
|
109
|
+
}
|
|
110
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
111
|
+
return { classified: false, reason: 'no-credentials' };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let callModel;
|
|
115
|
+
try {
|
|
116
|
+
({ callModel } = require('./flow-model-caller'));
|
|
117
|
+
} catch (_err) {
|
|
118
|
+
return { classified: false, reason: 'no-model-caller' };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
let result;
|
|
122
|
+
try {
|
|
123
|
+
result = await callModel(model, buildClassifierPrompt(questionText), {
|
|
124
|
+
temperature: TEMPERATURE,
|
|
125
|
+
maxTokens: MAX_TOKENS
|
|
126
|
+
});
|
|
127
|
+
} catch (err) {
|
|
128
|
+
if (process.env.DEBUG) {
|
|
129
|
+
console.error(`[impl-question-classifier] model call failed: ${err.message}`);
|
|
130
|
+
}
|
|
131
|
+
return { classified: false, reason: 'model-error' };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const raw = String(result?.response ?? result?.content ?? '').trim();
|
|
135
|
+
if (!raw) return { classified: false, reason: 'empty-response' };
|
|
136
|
+
|
|
137
|
+
const jsonMatch = raw.match(/\{[\s\S]*\}/);
|
|
138
|
+
if (!jsonMatch) return { classified: false, reason: 'non-json-response' };
|
|
139
|
+
|
|
140
|
+
let parsed;
|
|
141
|
+
try {
|
|
142
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
143
|
+
} catch (_err) {
|
|
144
|
+
return { classified: false, reason: 'json-parse-error' };
|
|
145
|
+
}
|
|
146
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
147
|
+
return { classified: false, reason: 'bad-shape' };
|
|
148
|
+
}
|
|
149
|
+
if (hasDangerousKeys(parsed)) {
|
|
150
|
+
return { classified: false, reason: 'dangerous-keys' };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const categoryRaw = String(parsed.category || '').toLowerCase();
|
|
154
|
+
const category = ['implementation', 'product', 'architecture', 'sensitive'].includes(categoryRaw)
|
|
155
|
+
? categoryRaw
|
|
156
|
+
: 'product'; // fail-safe default
|
|
157
|
+
const confidence = Number.isFinite(parsed.confidence) ? Math.round(parsed.confidence) : 0;
|
|
158
|
+
const reason = typeof parsed.reason === 'string' ? parsed.reason.slice(0, 240) : '';
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
classified: true,
|
|
162
|
+
category,
|
|
163
|
+
confidence,
|
|
164
|
+
reason,
|
|
165
|
+
shouldRunLoop: category === 'implementation' && confidence >= minConfidence,
|
|
166
|
+
minConfidence
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
module.exports = {
|
|
171
|
+
classifyImplementationQuestion,
|
|
172
|
+
buildClassifierPrompt,
|
|
173
|
+
hasDangerousKeys,
|
|
174
|
+
DEFAULT_MIN_CONFIDENCE,
|
|
175
|
+
DEFAULT_MODEL
|
|
176
|
+
};
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow — Self-Adversary Decision Loop (wf-e399bd8d)
|
|
5
|
+
*
|
|
6
|
+
* Implements the Self-Refine + Reflexion pattern for implementation-class
|
|
7
|
+
* decision-making. When the AI hits an "implementation/approach" question
|
|
8
|
+
* mid-task that it would otherwise ask the user about, it should instead
|
|
9
|
+
* iterate generator ↔ adversary on different models until confidence ≥ 95%
|
|
10
|
+
* (or max iterations). Only then, if still uncertain, escalate to user.
|
|
11
|
+
*
|
|
12
|
+
* User directive (2026-05-11, wf-e399bd8d original prompt):
|
|
13
|
+
* "Always do highest standards, best approach, don't compromise on quality
|
|
14
|
+
* for token savings. Challenge yourself a few times and most of the times
|
|
15
|
+
* you get to a point where you already know what to do with very high
|
|
16
|
+
* confidence, 90 or 95+ percent. When you have doubt that you'll be able
|
|
17
|
+
* to challenge yourself, use adversary research. And do it in a few
|
|
18
|
+
* iterations until you're confident. And only if you're still not
|
|
19
|
+
* confident, then ask the user."
|
|
20
|
+
*
|
|
21
|
+
* Pattern references:
|
|
22
|
+
* - Self-Refine (Madaan et al. 2023, arxiv 2303.17651): same LLM
|
|
23
|
+
* generates → critiques → refines. ~20% absolute task gains.
|
|
24
|
+
* - Reflexion (Shinn et al. 2023, arxiv 2303.11366): verbal self-
|
|
25
|
+
* reflection stored in iteration memory, ~25-50% production gains.
|
|
26
|
+
* - Socratic Self-Refine (SSR, 2025): step-level confidence with
|
|
27
|
+
* sub-question decomposition.
|
|
28
|
+
* - WogiFlow IGR Architect+Adversary (existing): different-model
|
|
29
|
+
* adversary at the PLAN level. This module is the IMPLEMENTATION-
|
|
30
|
+
* DECISION analogue.
|
|
31
|
+
*
|
|
32
|
+
* Architecture:
|
|
33
|
+
* 1. Generator (default: Sonnet) produces initial decision + confidence
|
|
34
|
+
* + rationale + sub-confidences (which parts are weakest).
|
|
35
|
+
* 2. Adversary (default: Haiku, different model to escape local optima)
|
|
36
|
+
* critiques: weakest claims, counterexamples, alternatives the
|
|
37
|
+
* generator missed.
|
|
38
|
+
* 3. Generator refines, taking adversary feedback into account. Memory
|
|
39
|
+
* of prior iterations is appended (Reflexion pattern) — in-process
|
|
40
|
+
* only, NEVER persisted to disk (avoid memory-injection attacks per
|
|
41
|
+
* International AI Safety Report 2026).
|
|
42
|
+
* 4. Loop terminates when: confidence ≥ threshold, OR max iterations
|
|
43
|
+
* reached, OR adversary fails-open.
|
|
44
|
+
* 5. AskUserQuestion is structurally unavailable to sub-agents inside
|
|
45
|
+
* this loop (prompts forbid it, models told). If the model insists
|
|
46
|
+
* on asking, that signals genuine ambiguity → escalate.
|
|
47
|
+
*
|
|
48
|
+
* Failure modes — all fail SAFE (escalate to user):
|
|
49
|
+
* - No API key: return { escalate: true, reason: 'no-credentials' }
|
|
50
|
+
* - Model call error: return { escalate: true, reason: 'model-error' }
|
|
51
|
+
* - Malformed JSON: skip that iteration, retry
|
|
52
|
+
* - Max iterations + confidence < threshold: return { escalate: true,
|
|
53
|
+
* reason: 'low-confidence', confidence, decision }
|
|
54
|
+
*
|
|
55
|
+
* Fail-safe direction: escalating to user is SAFER than acting on a
|
|
56
|
+
* low-confidence self-adversary decision. The user's instruction was
|
|
57
|
+
* "only if you're still not confident, then ask the user" — so escalation
|
|
58
|
+
* IS the contract when uncertainty remains.
|
|
59
|
+
*/
|
|
60
|
+
|
|
61
|
+
const DEFAULT_MAX_ITERATIONS = 8;
|
|
62
|
+
const DEFAULT_TARGET_CONFIDENCE = 95;
|
|
63
|
+
const DEFAULT_GENERATOR_MODEL = 'anthropic:claude-sonnet-4-6';
|
|
64
|
+
const DEFAULT_ADVERSARY_MODEL = 'anthropic:claude-3-5-haiku-latest';
|
|
65
|
+
const MAX_CONTEXT_CHARS = 8000;
|
|
66
|
+
const MAX_TOKENS_GEN = 1200;
|
|
67
|
+
const MAX_TOKENS_ADV = 800;
|
|
68
|
+
const TEMPERATURE = 0.0;
|
|
69
|
+
|
|
70
|
+
const { DANGEROUS_KEYS } = require('./flow-io');
|
|
71
|
+
|
|
72
|
+
function hasDangerousKeys(value) {
|
|
73
|
+
if (!value || typeof value !== 'object') return false;
|
|
74
|
+
if (Array.isArray(value)) return value.some(hasDangerousKeys);
|
|
75
|
+
for (const key of Object.keys(value)) {
|
|
76
|
+
if (DANGEROUS_KEYS.has(key)) return true;
|
|
77
|
+
if (hasDangerousKeys(value[key])) return true;
|
|
78
|
+
}
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function buildGeneratorPrompt({ question, context, iterationMemory }) {
|
|
83
|
+
const memoryBlock = iterationMemory.length === 0
|
|
84
|
+
? '(no prior iterations)'
|
|
85
|
+
: iterationMemory.map((it, i) =>
|
|
86
|
+
`## Iteration ${i + 1}\nDecision: ${it.decision}\nConfidence: ${it.confidence}%\nWeak points (per adversary): ${it.adversaryCritique || '(no critique yet)'}`
|
|
87
|
+
).join('\n\n');
|
|
88
|
+
|
|
89
|
+
return `You are the GENERATOR in a Self-Refine + Reflexion loop for an implementation-class decision.
|
|
90
|
+
|
|
91
|
+
The user has asked WogiFlow to handle implementation-approach decisions WITHOUT asking the user every time — instead, you iterate with an adversary on a DIFFERENT model until you reach ≥95% confidence, then act. Asking the user is reserved for product/domain questions and genuine ambiguity that survives the loop.
|
|
92
|
+
|
|
93
|
+
## Decision question
|
|
94
|
+
${String(question || '').slice(0, MAX_CONTEXT_CHARS / 2)}
|
|
95
|
+
|
|
96
|
+
## Surrounding context
|
|
97
|
+
${String(context || '').slice(0, MAX_CONTEXT_CHARS / 2)}
|
|
98
|
+
|
|
99
|
+
## Iteration memory (prior rounds in THIS loop)
|
|
100
|
+
${memoryBlock}
|
|
101
|
+
|
|
102
|
+
## Your task
|
|
103
|
+
|
|
104
|
+
1. State the decision you would make right now.
|
|
105
|
+
2. Give brief rationale (≤4 sentences) — anchored to the context and any adversary critiques in the memory.
|
|
106
|
+
3. Score your own confidence 0-100 — be calibrated, not optimistic. If a key sub-claim is shaky, the overall confidence cannot be higher than the weakest sub-claim.
|
|
107
|
+
4. List your weakest sub-claims (what an adversary would attack).
|
|
108
|
+
|
|
109
|
+
Return JSON only, no prose, no markdown fences:
|
|
110
|
+
{
|
|
111
|
+
"decision": "one-sentence final answer",
|
|
112
|
+
"rationale": "≤4 sentences, in plain text",
|
|
113
|
+
"confidence": 0-100,
|
|
114
|
+
"weakSubClaims": ["...", "..."]
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
Calibration rules:
|
|
118
|
+
- If you have not considered ≥2 alternatives, confidence ≤ 70.
|
|
119
|
+
- If a domain-specific fact is uncertain, confidence ≤ 80.
|
|
120
|
+
- Confidence ≥ 95 means: you've reasoned through alternatives, the rationale withstands obvious counterarguments, and the implementation is well-defined.
|
|
121
|
+
- You CANNOT ask the user — that path is structurally unavailable inside this loop.`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function buildAdversaryPrompt({ question, context, candidate }) {
|
|
125
|
+
return `You are the ADVERSARY in a Self-Refine + Reflexion loop. A GENERATOR (different model) just produced a candidate decision. Your job: find the weakest spots.
|
|
126
|
+
|
|
127
|
+
## Decision question
|
|
128
|
+
${String(question || '').slice(0, MAX_CONTEXT_CHARS / 2)}
|
|
129
|
+
|
|
130
|
+
## Surrounding context
|
|
131
|
+
${String(context || '').slice(0, MAX_CONTEXT_CHARS / 2)}
|
|
132
|
+
|
|
133
|
+
## Candidate decision
|
|
134
|
+
Decision: ${candidate.decision}
|
|
135
|
+
Rationale: ${candidate.rationale}
|
|
136
|
+
Self-confidence: ${candidate.confidence}%
|
|
137
|
+
Weak sub-claims (self-reported): ${(candidate.weakSubClaims || []).join('; ') || '(none)'}
|
|
138
|
+
|
|
139
|
+
## Your task
|
|
140
|
+
|
|
141
|
+
Be a sharp, specific critic. Don't restate the candidate — attack it.
|
|
142
|
+
1. Strongest counterargument or missed alternative (≤2 sentences).
|
|
143
|
+
2. Any sub-claim that the generator over-confidenced (≤2 sentences).
|
|
144
|
+
3. Adjusted-confidence estimate — what would YOU score it at, after considering the above?
|
|
145
|
+
|
|
146
|
+
Return JSON only, no prose, no markdown fences:
|
|
147
|
+
{
|
|
148
|
+
"critique": "the counterargument / missed alternative",
|
|
149
|
+
"overconfidentClaims": "the sub-claim issue, or 'none' if calibration is fair",
|
|
150
|
+
"adjustedConfidence": 0-100,
|
|
151
|
+
"verdict": "accept" | "revise" | "needs-user"
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
Verdict rules:
|
|
155
|
+
- "accept" — candidate is sound, confidence is calibrated, no significant weak points.
|
|
156
|
+
- "revise" — candidate has fixable issues; generator should refine.
|
|
157
|
+
- "needs-user" — genuine ambiguity / domain question that no amount of iteration resolves. Use sparingly.`;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function extractJson(raw) {
|
|
161
|
+
if (typeof raw !== 'string') return null;
|
|
162
|
+
const match = raw.match(/\{[\s\S]*\}/);
|
|
163
|
+
if (!match) return null;
|
|
164
|
+
try {
|
|
165
|
+
const parsed = JSON.parse(match[0]);
|
|
166
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return null;
|
|
167
|
+
if (hasDangerousKeys(parsed)) return null;
|
|
168
|
+
return parsed;
|
|
169
|
+
} catch (_err) {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Run the self-adversary loop.
|
|
176
|
+
*
|
|
177
|
+
* @param {Object} opts
|
|
178
|
+
* @param {string} opts.question - The implementation-class question
|
|
179
|
+
* @param {string} [opts.context] - Surrounding context (files, decisions, etc.)
|
|
180
|
+
* @param {number} [opts.maxIterations=8]
|
|
181
|
+
* @param {number} [opts.targetConfidence=95]
|
|
182
|
+
* @param {string} [opts.generatorModel]
|
|
183
|
+
* @param {string} [opts.adversaryModel]
|
|
184
|
+
* @returns {Promise<{
|
|
185
|
+
* classified: boolean,
|
|
186
|
+
* escalate: boolean,
|
|
187
|
+
* reason?: string,
|
|
188
|
+
* decision?: string,
|
|
189
|
+
* rationale?: string,
|
|
190
|
+
* confidence?: number,
|
|
191
|
+
* iterations?: Array,
|
|
192
|
+
* iterationCount?: number,
|
|
193
|
+
* targetConfidence?: number
|
|
194
|
+
* }>}
|
|
195
|
+
*/
|
|
196
|
+
async function runSelfAdversaryLoop(opts = {}) {
|
|
197
|
+
const question = typeof opts.question === 'string' ? opts.question.trim() : '';
|
|
198
|
+
if (!question) {
|
|
199
|
+
return { classified: false, escalate: true, reason: 'empty-question' };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const context = typeof opts.context === 'string' ? opts.context : '';
|
|
203
|
+
const maxIterations = Number.isFinite(opts.maxIterations) && opts.maxIterations > 0
|
|
204
|
+
? Math.min(opts.maxIterations, 12)
|
|
205
|
+
: DEFAULT_MAX_ITERATIONS;
|
|
206
|
+
const targetConfidence = Number.isFinite(opts.targetConfidence)
|
|
207
|
+
? Math.max(50, Math.min(99, opts.targetConfidence))
|
|
208
|
+
: DEFAULT_TARGET_CONFIDENCE;
|
|
209
|
+
const generatorModel = opts.generatorModel || DEFAULT_GENERATOR_MODEL;
|
|
210
|
+
const adversaryModel = opts.adversaryModel || DEFAULT_ADVERSARY_MODEL;
|
|
211
|
+
|
|
212
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
213
|
+
return { classified: false, escalate: true, reason: 'no-credentials' };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let callModel;
|
|
217
|
+
try {
|
|
218
|
+
({ callModel } = require('./flow-model-caller'));
|
|
219
|
+
} catch (_err) {
|
|
220
|
+
return { classified: false, escalate: true, reason: 'no-model-caller' };
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// In-process iteration memory ONLY (NEVER persist to disk — prevents
|
|
224
|
+
// the memory-injection attack vector noted in International AI Safety
|
|
225
|
+
// Report 2026).
|
|
226
|
+
const iterationMemory = [];
|
|
227
|
+
|
|
228
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
229
|
+
// Generator pass
|
|
230
|
+
let genRaw;
|
|
231
|
+
try {
|
|
232
|
+
const r = await callModel(generatorModel, buildGeneratorPrompt({ question, context, iterationMemory }), {
|
|
233
|
+
temperature: TEMPERATURE,
|
|
234
|
+
maxTokens: MAX_TOKENS_GEN
|
|
235
|
+
});
|
|
236
|
+
genRaw = String(r?.response ?? r?.content ?? '').trim();
|
|
237
|
+
} catch (err) {
|
|
238
|
+
if (process.env.DEBUG) {
|
|
239
|
+
console.error(`[self-adversary-loop] generator iter ${i + 1} model error: ${err.message}`);
|
|
240
|
+
}
|
|
241
|
+
return { classified: false, escalate: true, reason: 'generator-error' };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const candidate = extractJson(genRaw);
|
|
245
|
+
if (!candidate || typeof candidate.decision !== 'string' || !Number.isFinite(candidate.confidence)) {
|
|
246
|
+
// Bad iteration — record skip and retry
|
|
247
|
+
iterationMemory.push({
|
|
248
|
+
decision: '(malformed generator output)',
|
|
249
|
+
confidence: 0,
|
|
250
|
+
adversaryCritique: null,
|
|
251
|
+
skipped: true
|
|
252
|
+
});
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
candidate.confidence = Math.max(0, Math.min(100, Math.round(candidate.confidence)));
|
|
256
|
+
|
|
257
|
+
// Adversary pass — on a DIFFERENT model
|
|
258
|
+
let advRaw;
|
|
259
|
+
try {
|
|
260
|
+
const r = await callModel(adversaryModel, buildAdversaryPrompt({ question, context, candidate }), {
|
|
261
|
+
temperature: TEMPERATURE,
|
|
262
|
+
maxTokens: MAX_TOKENS_ADV
|
|
263
|
+
});
|
|
264
|
+
advRaw = String(r?.response ?? r?.content ?? '').trim();
|
|
265
|
+
} catch (err) {
|
|
266
|
+
if (process.env.DEBUG) {
|
|
267
|
+
console.error(`[self-adversary-loop] adversary iter ${i + 1} model error: ${err.message}`);
|
|
268
|
+
}
|
|
269
|
+
// Adversary error: accept candidate as final WITHOUT adversary boost.
|
|
270
|
+
// If generator already says ≥ targetConfidence, take it; else escalate.
|
|
271
|
+
iterationMemory.push({
|
|
272
|
+
decision: candidate.decision,
|
|
273
|
+
rationale: candidate.rationale,
|
|
274
|
+
confidence: candidate.confidence,
|
|
275
|
+
adversaryCritique: null,
|
|
276
|
+
adversaryError: true
|
|
277
|
+
});
|
|
278
|
+
if (candidate.confidence >= targetConfidence) {
|
|
279
|
+
return buildSuccess(candidate, iterationMemory, targetConfidence);
|
|
280
|
+
}
|
|
281
|
+
return buildEscalate(candidate, iterationMemory, targetConfidence, 'adversary-error');
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const critique = extractJson(advRaw);
|
|
285
|
+
const adjustedConfidence = critique && Number.isFinite(critique.adjustedConfidence)
|
|
286
|
+
? Math.max(0, Math.min(100, Math.round(critique.adjustedConfidence)))
|
|
287
|
+
: candidate.confidence;
|
|
288
|
+
const verdict = critique?.verdict || 'revise';
|
|
289
|
+
|
|
290
|
+
iterationMemory.push({
|
|
291
|
+
decision: candidate.decision,
|
|
292
|
+
rationale: candidate.rationale,
|
|
293
|
+
confidence: candidate.confidence,
|
|
294
|
+
adjustedConfidence,
|
|
295
|
+
adversaryCritique: critique?.critique || '(adversary returned malformed JSON)',
|
|
296
|
+
overconfidentClaims: critique?.overconfidentClaims || 'unknown',
|
|
297
|
+
verdict
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
// Termination checks
|
|
301
|
+
if (verdict === 'needs-user') {
|
|
302
|
+
return buildEscalate(candidate, iterationMemory, targetConfidence, 'adversary-says-needs-user');
|
|
303
|
+
}
|
|
304
|
+
if (verdict === 'accept' && adjustedConfidence >= targetConfidence) {
|
|
305
|
+
return buildSuccess({ ...candidate, confidence: adjustedConfidence }, iterationMemory, targetConfidence);
|
|
306
|
+
}
|
|
307
|
+
if (adjustedConfidence >= targetConfidence) {
|
|
308
|
+
return buildSuccess({ ...candidate, confidence: adjustedConfidence }, iterationMemory, targetConfidence);
|
|
309
|
+
}
|
|
310
|
+
// Otherwise loop again with the critique in memory
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Max iterations exhausted without reaching threshold
|
|
314
|
+
const last = iterationMemory[iterationMemory.length - 1] || {};
|
|
315
|
+
return buildEscalate(
|
|
316
|
+
{ decision: last.decision, rationale: last.rationale, confidence: last.adjustedConfidence || last.confidence || 0 },
|
|
317
|
+
iterationMemory,
|
|
318
|
+
targetConfidence,
|
|
319
|
+
'max-iterations-exhausted'
|
|
320
|
+
);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function buildSuccess(candidate, iterationMemory, targetConfidence) {
|
|
324
|
+
return {
|
|
325
|
+
classified: true,
|
|
326
|
+
escalate: false,
|
|
327
|
+
decision: candidate.decision,
|
|
328
|
+
rationale: candidate.rationale,
|
|
329
|
+
confidence: candidate.confidence,
|
|
330
|
+
iterations: iterationMemory,
|
|
331
|
+
iterationCount: iterationMemory.length,
|
|
332
|
+
targetConfidence
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
function buildEscalate(candidate, iterationMemory, targetConfidence, reason) {
|
|
337
|
+
return {
|
|
338
|
+
classified: true,
|
|
339
|
+
escalate: true,
|
|
340
|
+
reason,
|
|
341
|
+
decision: candidate.decision || null,
|
|
342
|
+
rationale: candidate.rationale || null,
|
|
343
|
+
confidence: candidate.confidence || 0,
|
|
344
|
+
iterations: iterationMemory,
|
|
345
|
+
iterationCount: iterationMemory.length,
|
|
346
|
+
targetConfidence
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
module.exports = {
|
|
351
|
+
runSelfAdversaryLoop,
|
|
352
|
+
buildGeneratorPrompt,
|
|
353
|
+
buildAdversaryPrompt,
|
|
354
|
+
extractJson,
|
|
355
|
+
hasDangerousKeys,
|
|
356
|
+
DEFAULT_MAX_ITERATIONS,
|
|
357
|
+
DEFAULT_TARGET_CONFIDENCE,
|
|
358
|
+
DEFAULT_GENERATOR_MODEL,
|
|
359
|
+
DEFAULT_ADVERSARY_MODEL
|
|
360
|
+
};
|
|
@@ -315,33 +315,100 @@ function checkWriteGate(filePath, newContentRaw, config) {
|
|
|
315
315
|
}
|
|
316
316
|
|
|
317
317
|
/**
|
|
318
|
-
*
|
|
319
|
-
*
|
|
320
|
-
*
|
|
321
|
-
*
|
|
318
|
+
* Strip quoted regions + heredoc bodies from a Bash command so the structural
|
|
319
|
+
* regex below only sees actual shell tokens. Released v2.30.3 over-triggered
|
|
320
|
+
* because the previous regex matched markdown blockquote `> "text"` inside
|
|
321
|
+
* heredoc bodies of `gh release create --notes "$(cat <<'EOF'...EOF)"`.
|
|
322
322
|
*
|
|
323
|
-
*
|
|
324
|
-
*
|
|
325
|
-
|
|
323
|
+
* Best-effort: handles single-quoted, double-quoted, backtick, and heredoc
|
|
324
|
+
* patterns. Doesn't attempt full shell parsing.
|
|
325
|
+
*/
|
|
326
|
+
function stripQuotedContent(cmd) {
|
|
327
|
+
if (typeof cmd !== 'string') return '';
|
|
328
|
+
let stripped = cmd;
|
|
329
|
+
// Heredocs first (multiline) — replace body with a sentinel
|
|
330
|
+
stripped = stripped.replace(/<<-?\s*['"]?(\w+)['"]?[\s\S]*?\n\1\s*$/gm, ' <<HEREDOC>> ');
|
|
331
|
+
stripped = stripped.replace(/<<-?\s*['"]?(\w+)['"]?[\s\S]*?\n\1\b/g, ' <<HEREDOC>> ');
|
|
332
|
+
// Single-quoted strings
|
|
333
|
+
stripped = stripped.replace(/'[^']*'/g, "''");
|
|
334
|
+
// Backtick command substitution
|
|
335
|
+
stripped = stripped.replace(/`[^`]*`/g, '``');
|
|
336
|
+
// Double-quoted strings (allow escaped quotes inside)
|
|
337
|
+
stripped = stripped.replace(/"(?:[^"\\]|\\.)*"/g, '""');
|
|
338
|
+
return stripped;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Validate a Bash command against the deferral gate.
|
|
343
|
+
*
|
|
344
|
+
* wf-4a5b7a6f rewrite (2026-05-11): previously this used three independent
|
|
345
|
+
* regex checks AND'd together, which over-triggered on commands that merely
|
|
346
|
+
* REFERENCED the target file and the word "deferred" as text content
|
|
347
|
+
* (markdown blockquotes, commit messages, gh release notes). The
|
|
348
|
+
* `>\s*[^&|]` part of `mutates` matched markdown blockquote syntax inside
|
|
349
|
+
* heredocs. The bare-word `\bdeferred\b` part of `mentionsDeferral` matched
|
|
350
|
+
* any prose mention of "deferred".
|
|
351
|
+
*
|
|
352
|
+
* Fix:
|
|
353
|
+
* 1. Run the structural mutation check on a QUOTE-STRIPPED command —
|
|
354
|
+
* a `>` inside `"..."` or `'...'` is not a shell redirect.
|
|
355
|
+
* 2. Tighten the mutation check to require the target file be the WRITE
|
|
356
|
+
* DESTINATION, not merely mentioned anywhere.
|
|
357
|
+
* 3. Tighten deferral-content detection to the JSON-shape pattern only;
|
|
358
|
+
* drop the bare-word match.
|
|
359
|
+
*
|
|
360
|
+
* If the AI tries to actually mutate the file via Bash with deferred
|
|
361
|
+
* content, the gate still catches it. Prose mentions pass through.
|
|
326
362
|
*/
|
|
327
363
|
function checkBashGate(command, config) {
|
|
328
364
|
try {
|
|
329
365
|
if (!isGateEnabled(config)) return { blocked: false };
|
|
330
366
|
if (typeof command !== 'string' || !command) return { blocked: false };
|
|
331
367
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
//
|
|
337
|
-
//
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
368
|
+
// Step 1: strip quoted/heredoc content for the SHELL-LEVEL structural
|
|
369
|
+
// check (catches `>`, `tee` in actual shell positions, not inside markdown).
|
|
370
|
+
const stripped = stripQuotedContent(command);
|
|
371
|
+
|
|
372
|
+
// Step 2: detect a mutation operation targeting the review/audit file
|
|
373
|
+
// SPECIFICALLY. The patterns require the target file to be the WRITE
|
|
374
|
+
// DESTINATION — not merely mentioned. We test against BOTH the stripped
|
|
375
|
+
// command (catches shell-level redirects) AND the original command
|
|
376
|
+
// (catches in-language constructs like `node -e "fs.writeFileSync(...)"`
|
|
377
|
+
// where the JS payload is inside double-quotes and would be stripped).
|
|
378
|
+
// The patterns themselves are tight enough that running on the original
|
|
379
|
+
// doesn't re-introduce the prose-mention false positives — they require
|
|
380
|
+
// a write-verb token (writeFileSync, tee, etc.) IMMEDIATELY before the
|
|
381
|
+
// file path.
|
|
382
|
+
const writeToTargetPatterns = [
|
|
383
|
+
/(?:>>?|>\|)\s+['"]?[^\s'"`|&;]*last-(?:review|audit)\.json/,
|
|
384
|
+
/\btee\b(?:\s+-[a-zA-Z]+)*\s+['"]?[^\s'"`|&;]*last-(?:review|audit)\.json/,
|
|
385
|
+
/\b(?:fs\.)?writeFileSync\s*\(\s*[`'"][^`'"]*last-(?:review|audit)\.json/,
|
|
386
|
+
/\bfs\.write[A-Z][a-zA-Z]*\s*\(\s*[`'"][^`'"]*last-(?:review|audit)\.json/,
|
|
387
|
+
/\bsed\s+-i\b[^|;&]*\blast-(?:review|audit)\.json/,
|
|
388
|
+
/\b(?:mv|cp|rename(?:Sync)?)\s+\S+\s+['"]?[^\s'"`|&;]*last-(?:review|audit)\.json/
|
|
389
|
+
];
|
|
390
|
+
const mutatesTarget = writeToTargetPatterns.some(re => re.test(stripped) || re.test(command));
|
|
391
|
+
if (!mutatesTarget) return { blocked: false };
|
|
392
|
+
|
|
393
|
+
// Step 3: check the ORIGINAL command for deferred-status content. We
|
|
394
|
+
// accept TWO signals:
|
|
395
|
+
// - Quoted value: "deferred" / 'deferred' / `deferred` — JSON, JS,
|
|
396
|
+
// template-literal styles.
|
|
397
|
+
// - Bare word `\bdeferred\b` (or wont-?fix, skipped, dismissed) — fallback
|
|
398
|
+
// for cases where escaping mangles the quote chars (e.g. shell-escaped
|
|
399
|
+
// `\"deferred\"` inside a `node -e` payload where the quote becomes
|
|
400
|
+
// non-adjacent to the word).
|
|
401
|
+
//
|
|
402
|
+
// The earlier false-positive case (prose mentions in release notes) is
|
|
403
|
+
// already closed by the tightened mutation check above — we only reach
|
|
404
|
+
// this step when the command demonstrably writes TO the target file.
|
|
405
|
+
// At that point, ANY mention of the deferral keyword is genuinely
|
|
406
|
+
// suspicious; the gate should err on the side of blocking.
|
|
407
|
+
const quotedDeferral = /['"`](deferred(?:[-_][a-zA-Z0-9]+)?|wont-?fix|won-?t-?fix|skipped|dismissed)['"`]/i;
|
|
408
|
+
const bareDeferral = /\b(deferred(?:[-_][a-zA-Z0-9]+)?|wont-?fix|won-?t-?fix|skipped|dismissed)\b/i;
|
|
409
|
+
const mentionsDeferral = quotedDeferral.test(command) || bareDeferral.test(command);
|
|
342
410
|
if (!mentionsDeferral) return { blocked: false };
|
|
343
411
|
|
|
344
|
-
// We can't easily extract and validate the new content from arbitrary bash.
|
|
345
412
|
// Check auth: if the user has authorized deferrals, allow. Otherwise block.
|
|
346
413
|
const authResult = isAuthorized([{ id: 'unspecified' }]);
|
|
347
414
|
if (authResult.authorized) return { blocked: false };
|
|
@@ -393,6 +460,7 @@ module.exports = {
|
|
|
393
460
|
// Core checks
|
|
394
461
|
checkWriteGate,
|
|
395
462
|
checkBashGate,
|
|
463
|
+
stripQuotedContent,
|
|
396
464
|
|
|
397
465
|
// Auth API (used by classifier + CLI helper)
|
|
398
466
|
loadAuth,
|
|
@@ -149,6 +149,16 @@ function loadGateDeps() {
|
|
|
149
149
|
if (process.env.DEBUG) console.error(`[Hook] Long-input-pending gate not loaded: ${_err.message}`);
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
+
// wf-e399bd8d — Self-adversary gate. Intercepts AskUserQuestion for
|
|
153
|
+
// implementation-class questions, requires the AI to run a self-adversary
|
|
154
|
+
// loop first. Fail-open via _noop if module fails to load.
|
|
155
|
+
let checkSelfAdversaryGate = _noop;
|
|
156
|
+
try {
|
|
157
|
+
checkSelfAdversaryGate = require('./self-adversary-gate').checkSelfAdversaryGate;
|
|
158
|
+
} catch (_err) {
|
|
159
|
+
if (process.env.DEBUG) console.error(`[Hook] Self-adversary gate not loaded: ${_err.message}`);
|
|
160
|
+
}
|
|
161
|
+
|
|
152
162
|
// CLI-agnostic helpers (not gates per se but consumed by the orchestrator)
|
|
153
163
|
const { markSkillPending } = require('../../flow-durable-session');
|
|
154
164
|
const { getConfig } = require('../../flow-utils');
|
|
@@ -183,6 +193,7 @@ function loadGateDeps() {
|
|
|
183
193
|
checkStrikeGate, checkBugfixScope, checkScopeMutation,
|
|
184
194
|
checkGitSafety, checkManagerBoundary, checkWorkerBoundary, checkPathDiscipline,
|
|
185
195
|
checkLongInputPendingGate,
|
|
196
|
+
checkSelfAdversaryGate,
|
|
186
197
|
// Side-effect helpers
|
|
187
198
|
markSkillPending,
|
|
188
199
|
// Config + runtime
|
|
@@ -347,6 +347,27 @@ function runPreToolGates(ctx, deps) {
|
|
|
347
347
|
}
|
|
348
348
|
}
|
|
349
349
|
|
|
350
|
+
// wf-e399bd8d — Self-adversary gate. If the AI is about to invoke
|
|
351
|
+
// AskUserQuestion with an implementation-class question, block it
|
|
352
|
+
// and require the self-adversary loop to run first. Product /
|
|
353
|
+
// architecture / sensitive questions pass through. Fail-open: any
|
|
354
|
+
// error allows the call.
|
|
355
|
+
if (toolName === 'AskUserQuestion' && typeof deps.checkSelfAdversaryGate === 'function') {
|
|
356
|
+
try {
|
|
357
|
+
const saResult = deps.checkSelfAdversaryGate(toolName, toolInput, config);
|
|
358
|
+
if (saResult.blocked) {
|
|
359
|
+
return {
|
|
360
|
+
allowed: false,
|
|
361
|
+
blocked: true,
|
|
362
|
+
reason: saResult.reason,
|
|
363
|
+
message: saResult.message,
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
} catch (err) {
|
|
367
|
+
if (process.env.DEBUG) console.error(`[Hook] Self-adversary gate error (fail-open): ${err.message}`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
350
371
|
// Long-input-pending gate (P11.6 mechanical layer): if the prior
|
|
351
372
|
// UserPromptSubmit hook flagged this prompt as long-form-without-source-link
|
|
352
373
|
// and wrote the pending marker, block any mutating tool until extract-review
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow — Self-Adversary PreToolUse Gate (wf-e399bd8d)
|
|
5
|
+
*
|
|
6
|
+
* Intercepts AskUserQuestion tool calls. If the question classifier
|
|
7
|
+
* returns IMPLEMENTATION with high confidence AND no recent self-
|
|
8
|
+
* adversary loop completion marker exists, BLOCK the call with
|
|
9
|
+
* instructions to run the loop first.
|
|
10
|
+
*
|
|
11
|
+
* State markers used:
|
|
12
|
+
* .workflow/state/self-adversary-complete.json
|
|
13
|
+
* Written by the loop when it produces a confident decision.
|
|
14
|
+
* Single-use; cleared on consumption.
|
|
15
|
+
* Shape:
|
|
16
|
+
* {
|
|
17
|
+
* completedAt: ISO timestamp,
|
|
18
|
+
* questionHash: SHA-256-hex (first 16) of the original question,
|
|
19
|
+
* decision, confidence, iterationCount,
|
|
20
|
+
* expiresAt: ISO timestamp (5 min TTL)
|
|
21
|
+
* }
|
|
22
|
+
*
|
|
23
|
+
* .workflow/state/self-adversary-escalation.json
|
|
24
|
+
* Written by the loop when iteration exhausts. Indicates the AI
|
|
25
|
+
* DID iterate but still needs the user. Allows AskUserQuestion to
|
|
26
|
+
* pass through without re-running the loop. Single-use, 5 min TTL.
|
|
27
|
+
*
|
|
28
|
+
* Note: the classifier is async (Haiku call). PreToolUse hooks must
|
|
29
|
+
* return promptly. Two options:
|
|
30
|
+
* A) Block all AskUserQuestion calls if classifier hasn't pre-run,
|
|
31
|
+
* requiring the AI to explicitly invoke the loop first.
|
|
32
|
+
* B) Run classifier inline (async) and block based on result.
|
|
33
|
+
*
|
|
34
|
+
* Approach: (A) primary path, with a synchronous heuristic fallback
|
|
35
|
+
* that catches obvious implementation phrasings. The synchronous
|
|
36
|
+
* heuristic uses keyword presence (NOT user-input parsing — this is
|
|
37
|
+
* AI-authored question text, the "no regex on user answers" rule
|
|
38
|
+
* doesn't apply). The classifier itself is invoked from the user-
|
|
39
|
+
* prompt-submit hook (where async is fine) OR by the AI explicitly
|
|
40
|
+
* via the wogi-self-adversary skill.
|
|
41
|
+
*
|
|
42
|
+
* Fail-open: any error → allow the AskUserQuestion through.
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
const fs = require('node:fs');
|
|
46
|
+
const path = require('node:path');
|
|
47
|
+
const crypto = require('node:crypto');
|
|
48
|
+
|
|
49
|
+
const { PATHS } = require('../../flow-utils');
|
|
50
|
+
const { safeJsonParse } = require('../../flow-io');
|
|
51
|
+
|
|
52
|
+
const COMPLETE_FILE = 'self-adversary-complete.json';
|
|
53
|
+
const ESCALATION_FILE = 'self-adversary-escalation.json';
|
|
54
|
+
const DEFAULT_TTL_SECONDS = 300; // 5 min
|
|
55
|
+
|
|
56
|
+
function getCompletePath() { return path.join(PATHS.state, COMPLETE_FILE); }
|
|
57
|
+
function getEscalationPath() { return path.join(PATHS.state, ESCALATION_FILE); }
|
|
58
|
+
|
|
59
|
+
function hashQuestion(text) {
|
|
60
|
+
if (typeof text !== 'string') return '';
|
|
61
|
+
return crypto.createHash('sha256').update(text).digest('hex').slice(0, 16);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function isGateEnabled(config) {
|
|
65
|
+
const cfg = config?.selfAdversaryGate;
|
|
66
|
+
if (cfg === false) return false;
|
|
67
|
+
if (cfg && typeof cfg === 'object' && cfg.enabled === false) return false;
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function loadMarker(filePath) {
|
|
72
|
+
const data = safeJsonParse(filePath, null);
|
|
73
|
+
if (!data || typeof data !== 'object') return null;
|
|
74
|
+
if (data.expiresAt) {
|
|
75
|
+
const exp = Date.parse(data.expiresAt);
|
|
76
|
+
if (Number.isFinite(exp) && exp < Date.now()) return null;
|
|
77
|
+
}
|
|
78
|
+
return data;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function consumeMarker(filePath) {
|
|
82
|
+
try { fs.unlinkSync(filePath); } catch (_err) { /* fine */ }
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function writeCompletionMarker({ question, decision, confidence, iterationCount, ttlSec }) {
|
|
86
|
+
try {
|
|
87
|
+
const ttl = Number.isFinite(ttlSec) ? ttlSec : DEFAULT_TTL_SECONDS;
|
|
88
|
+
const now = Date.now();
|
|
89
|
+
const payload = {
|
|
90
|
+
version: 1,
|
|
91
|
+
completedAt: new Date(now).toISOString(),
|
|
92
|
+
expiresAt: new Date(now + ttl * 1000).toISOString(),
|
|
93
|
+
questionHash: hashQuestion(question),
|
|
94
|
+
decision: typeof decision === 'string' ? decision.slice(0, 500) : '',
|
|
95
|
+
confidence: Number.isFinite(confidence) ? Math.round(confidence) : 0,
|
|
96
|
+
iterationCount: Number.isFinite(iterationCount) ? iterationCount : 0
|
|
97
|
+
};
|
|
98
|
+
fs.mkdirSync(path.dirname(getCompletePath()), { recursive: true });
|
|
99
|
+
fs.writeFileSync(getCompletePath(), JSON.stringify(payload, null, 2));
|
|
100
|
+
return payload;
|
|
101
|
+
} catch (err) {
|
|
102
|
+
if (process.env.DEBUG) {
|
|
103
|
+
console.error(`[self-adversary-gate] writeCompletionMarker failed: ${err.message}`);
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function writeEscalationMarker({ question, decision, confidence, iterationCount, reason, ttlSec }) {
|
|
110
|
+
try {
|
|
111
|
+
const ttl = Number.isFinite(ttlSec) ? ttlSec : DEFAULT_TTL_SECONDS;
|
|
112
|
+
const now = Date.now();
|
|
113
|
+
const payload = {
|
|
114
|
+
version: 1,
|
|
115
|
+
escalatedAt: new Date(now).toISOString(),
|
|
116
|
+
expiresAt: new Date(now + ttl * 1000).toISOString(),
|
|
117
|
+
questionHash: hashQuestion(question),
|
|
118
|
+
reason: typeof reason === 'string' ? reason : 'unknown',
|
|
119
|
+
bestDecision: typeof decision === 'string' ? decision.slice(0, 500) : '',
|
|
120
|
+
finalConfidence: Number.isFinite(confidence) ? Math.round(confidence) : 0,
|
|
121
|
+
iterationCount: Number.isFinite(iterationCount) ? iterationCount : 0
|
|
122
|
+
};
|
|
123
|
+
fs.mkdirSync(path.dirname(getEscalationPath()), { recursive: true });
|
|
124
|
+
fs.writeFileSync(getEscalationPath(), JSON.stringify(payload, null, 2));
|
|
125
|
+
return payload;
|
|
126
|
+
} catch (err) {
|
|
127
|
+
if (process.env.DEBUG) {
|
|
128
|
+
console.error(`[self-adversary-gate] writeEscalationMarker failed: ${err.message}`);
|
|
129
|
+
}
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Synchronous heuristic: does this question text LOOK implementation-class?
|
|
136
|
+
* Used as a fallback when the async classifier hasn't run. Conservative —
|
|
137
|
+
* defaults to NOT-implementation when ambiguous so AskUserQuestion passes.
|
|
138
|
+
* This is NOT user-input parsing (the text is AI-authored), so keyword
|
|
139
|
+
* matching is acceptable here. The async classifier provides the
|
|
140
|
+
* authoritative answer; this heuristic just catches the obvious cases.
|
|
141
|
+
*/
|
|
142
|
+
const IMPLEMENTATION_HEURISTIC_KEYWORDS = [
|
|
143
|
+
/\bmap\(\)\s+(?:or|vs)\s+for(?:-loop)?/i,
|
|
144
|
+
/\bwhich\s+(?:library|framework|algorithm|approach|pattern)\b/i,
|
|
145
|
+
/\bshould\s+(?:i|we)\s+use\s+\w+\s+or\s+\w+/i,
|
|
146
|
+
/\b(?:naming|name)\s+(?:convention|this|the\s+\w+)/i,
|
|
147
|
+
/\b(?:refactor|extract|inline)\s+(?:this|the)\b/i,
|
|
148
|
+
/\btest\s+(?:framework|library)\b/i,
|
|
149
|
+
/\berror\s+handling\s+(?:approach|pattern|style)/i,
|
|
150
|
+
/\bcode\s+(?:style|organization|structure)\b/i
|
|
151
|
+
];
|
|
152
|
+
|
|
153
|
+
const PRODUCT_HEURISTIC_KEYWORDS = [
|
|
154
|
+
/\bwhat\s+(?:should|do)\s+(?:users?|customers?)\b/i,
|
|
155
|
+
/\b(?:business|product)\s+(?:rule|decision|requirement)\b/i,
|
|
156
|
+
/\bcounts?\s+as\s+(?:done|complete|valid)\b/i,
|
|
157
|
+
/\bwhich\s+(?:behavior|outcome)\s+(?:do\s+you|should)\s+(?:want|prefer)\b/i,
|
|
158
|
+
/\b(?:delete|drop|truncate|remove|destroy)\b.*\b(?:data|table|migration|user)/i
|
|
159
|
+
];
|
|
160
|
+
|
|
161
|
+
function heuristicCategory(questionText) {
|
|
162
|
+
if (typeof questionText !== 'string') return 'unknown';
|
|
163
|
+
for (const re of PRODUCT_HEURISTIC_KEYWORDS) {
|
|
164
|
+
if (re.test(questionText)) return 'product';
|
|
165
|
+
}
|
|
166
|
+
for (const re of IMPLEMENTATION_HEURISTIC_KEYWORDS) {
|
|
167
|
+
if (re.test(questionText)) return 'implementation';
|
|
168
|
+
}
|
|
169
|
+
return 'unknown';
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* PreToolUse intercept on AskUserQuestion. Returns { blocked: bool, message? }.
|
|
174
|
+
*
|
|
175
|
+
* Decision tree:
|
|
176
|
+
* 1. Gate disabled → allow.
|
|
177
|
+
* 2. Tool is not AskUserQuestion → allow.
|
|
178
|
+
* 3. Escalation marker present for this question → allow (loop already ran).
|
|
179
|
+
* 4. Completion marker present for this question → allow (AI may follow up).
|
|
180
|
+
* 5. Sync heuristic → 'implementation' → block with loop-first instructions.
|
|
181
|
+
* 6. Otherwise → allow.
|
|
182
|
+
*
|
|
183
|
+
* The classifier itself (Haiku call) lives in flow-impl-question-classifier.js
|
|
184
|
+
* and is invoked by the `wogi-self-adversary` skill or by the user-prompt-
|
|
185
|
+
* submit hook for upstream classification — NOT from this synchronous gate.
|
|
186
|
+
*/
|
|
187
|
+
function checkSelfAdversaryGate(toolName, toolInput, config) {
|
|
188
|
+
try {
|
|
189
|
+
if (!isGateEnabled(config)) return { blocked: false };
|
|
190
|
+
if (toolName !== 'AskUserQuestion') return { blocked: false };
|
|
191
|
+
|
|
192
|
+
// Extract question text from the tool input shape (Claude Code's
|
|
193
|
+
// AskUserQuestion accepts a `questions` array).
|
|
194
|
+
let questionText = '';
|
|
195
|
+
if (toolInput && Array.isArray(toolInput.questions) && toolInput.questions.length > 0) {
|
|
196
|
+
const parts = [];
|
|
197
|
+
for (const q of toolInput.questions) {
|
|
198
|
+
if (q && typeof q.question === 'string') parts.push(q.question);
|
|
199
|
+
}
|
|
200
|
+
questionText = parts.join('\n');
|
|
201
|
+
} else if (toolInput && typeof toolInput.prompt === 'string') {
|
|
202
|
+
questionText = toolInput.prompt;
|
|
203
|
+
}
|
|
204
|
+
if (!questionText.trim()) return { blocked: false };
|
|
205
|
+
|
|
206
|
+
const qHash = hashQuestion(questionText);
|
|
207
|
+
|
|
208
|
+
// Check escalation marker
|
|
209
|
+
const escalation = loadMarker(getEscalationPath());
|
|
210
|
+
if (escalation && escalation.questionHash === qHash) {
|
|
211
|
+
// Consume and allow — the loop already ran and confirmed user is needed.
|
|
212
|
+
consumeMarker(getEscalationPath());
|
|
213
|
+
return { blocked: false, reason: 'escalation-marker-consumed' };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Check completion marker — AI already decided via loop, this AskUserQuestion
|
|
217
|
+
// is a follow-up (e.g., "I decided X, but did you want Y instead?")
|
|
218
|
+
const complete = loadMarker(getCompletePath());
|
|
219
|
+
if (complete && complete.questionHash === qHash) {
|
|
220
|
+
consumeMarker(getCompletePath());
|
|
221
|
+
return { blocked: false, reason: 'completion-marker-consumed' };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Sync heuristic
|
|
225
|
+
const heuristic = heuristicCategory(questionText);
|
|
226
|
+
if (heuristic !== 'implementation') {
|
|
227
|
+
return { blocked: false, reason: `heuristic-${heuristic}` };
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Heuristic says implementation — block and require loop.
|
|
231
|
+
return {
|
|
232
|
+
blocked: true,
|
|
233
|
+
reason: 'implementation-heuristic',
|
|
234
|
+
message: buildBlockMessage(questionText, qHash)
|
|
235
|
+
};
|
|
236
|
+
} catch (err) {
|
|
237
|
+
if (process.env.DEBUG) {
|
|
238
|
+
console.error(`[self-adversary-gate] checkSelfAdversaryGate error (fail-open): ${err.message}`);
|
|
239
|
+
}
|
|
240
|
+
return { blocked: false };
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function buildBlockMessage(questionText, qHash) {
|
|
245
|
+
const preview = questionText.slice(0, 240);
|
|
246
|
+
return [
|
|
247
|
+
'BLOCKED: AskUserQuestion looks like an implementation-class question.',
|
|
248
|
+
'',
|
|
249
|
+
'WogiFlow user directive (wf-e399bd8d): when you have doubt about an',
|
|
250
|
+
'implementation decision (code structure, library choice, naming,',
|
|
251
|
+
'refactor mechanics, etc.), self-adversary FIRST — iterate generator',
|
|
252
|
+
'and adversary on different models until ≥95% confidence. Only escalate',
|
|
253
|
+
'to the user if confidence stays low after the loop.',
|
|
254
|
+
'',
|
|
255
|
+
`Question intercepted: "${preview}${questionText.length > 240 ? '…' : ''}"`,
|
|
256
|
+
`Question hash: ${qHash}`,
|
|
257
|
+
'',
|
|
258
|
+
'How to proceed:',
|
|
259
|
+
' 1. RECOMMENDED — invoke the self-adversary skill:',
|
|
260
|
+
' Skill(skill="wogi-self-adversary", args="<the question + brief context>")',
|
|
261
|
+
' The skill runs the loop, writes a completion or escalation marker,',
|
|
262
|
+
' then either acts on the high-confidence decision or re-issues the',
|
|
263
|
+
' AskUserQuestion (which will now pass).',
|
|
264
|
+
'',
|
|
265
|
+
' 2. ESCAPE HATCH — if this is genuinely product / architecture /',
|
|
266
|
+
' sensitive, the heuristic is wrong. Re-phrase the question to make',
|
|
267
|
+
' the product-domain nature explicit (e.g., reference the user as',
|
|
268
|
+
' decision-maker, name business constraints), and try again.',
|
|
269
|
+
'',
|
|
270
|
+
' 3. OVERRIDE — set the question metadata to bypass (advanced only).',
|
|
271
|
+
'',
|
|
272
|
+
'See: scripts/hooks/core/self-adversary-gate.js, wf-e399bd8d.'
|
|
273
|
+
].join('\n');
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
module.exports = {
|
|
277
|
+
checkSelfAdversaryGate,
|
|
278
|
+
hashQuestion,
|
|
279
|
+
isGateEnabled,
|
|
280
|
+
loadMarker,
|
|
281
|
+
consumeMarker,
|
|
282
|
+
writeCompletionMarker,
|
|
283
|
+
writeEscalationMarker,
|
|
284
|
+
heuristicCategory,
|
|
285
|
+
getCompletePath,
|
|
286
|
+
getEscalationPath,
|
|
287
|
+
COMPLETE_FILE,
|
|
288
|
+
ESCALATION_FILE,
|
|
289
|
+
DEFAULT_TTL_SECONDS,
|
|
290
|
+
IMPLEMENTATION_HEURISTIC_KEYWORDS,
|
|
291
|
+
PRODUCT_HEURISTIC_KEYWORDS
|
|
292
|
+
};
|