wogiflow 2.30.4 → 2.31.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/wogi-self-adversary.md +130 -0
- package/.claude/docs/config-schema.md +219 -0
- package/package.json +2 -2
- package/scripts/flow-defer-auth.js +41 -10
- package/scripts/flow-deferral-classifier-ai.js +3 -1
- package/scripts/flow-impl-question-classifier.js +178 -0
- package/scripts/flow-self-adversary-loop.js +422 -0
- package/scripts/flow-standards-gate.js +3 -1
- package/scripts/hooks/core/deferral-classifier.js +3 -0
- package/scripts/hooks/core/deferral-gate.js +6 -3
- package/scripts/hooks/core/gate-orchestrator.js +26 -1
- package/scripts/hooks/core/pre-tool-deps.js +11 -0
- package/scripts/hooks/core/pre-tool-orchestrator.js +21 -0
- package/scripts/hooks/core/self-adversary-gate.js +295 -0
- package/scripts/hooks/core/session-start-orchestrator.js +269 -0
- package/scripts/hooks/core/stop-orchestrator.js +123 -0
- package/scripts/hooks/core/task-boundary-restart-coordinator.js +84 -0
- package/scripts/hooks/core/user-prompt-orchestrator.js +201 -0
- package/scripts/hooks/core/workspace-stop-gates.js +133 -0
- package/scripts/hooks/core/workspace-stop-notify.js +76 -0
- package/scripts/hooks/entry/claude-code/session-start.js +19 -352
- package/scripts/hooks/entry/claude-code/stop.js +10 -485
- package/scripts/hooks/entry/claude-code/user-prompt-submit.js +9 -277
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# /wogi-self-adversary — Self-adversary decision loop
|
|
2
|
+
|
|
3
|
+
Iterate a generator and adversary on different models until you reach ≥95% confidence on an implementation-class decision. Only escalate to the user if confidence stays low after the loop.
|
|
4
|
+
|
|
5
|
+
**Triggers**: invoked by the AI itself when blocked by the self-adversary PreToolUse gate (wf-e399bd8d), OR by the user directly.
|
|
6
|
+
|
|
7
|
+
## Usage
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
/wogi-self-adversary "<question + brief context>"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
The argument should be the question the AI was about to ask the user, optionally followed by relevant context (files, prior decisions, constraints). Both will be passed to the loop.
|
|
14
|
+
|
|
15
|
+
## How it works
|
|
16
|
+
|
|
17
|
+
For Claude inside this skill — read carefully, then execute.
|
|
18
|
+
|
|
19
|
+
### Step 1: Parse the argument
|
|
20
|
+
|
|
21
|
+
The ARGUMENTS string contains the question + context. Split on a sensible boundary (first newline, or `--context:` separator if present). If no clear split, treat the entire argument as the question and leave context empty.
|
|
22
|
+
|
|
23
|
+
### Step 2: Run the loop
|
|
24
|
+
|
|
25
|
+
```js
|
|
26
|
+
const { runSelfAdversaryLoop } = require('wogiflow/scripts/flow-self-adversary-loop');
|
|
27
|
+
const result = await runSelfAdversaryLoop({
|
|
28
|
+
question: questionText,
|
|
29
|
+
context: contextText,
|
|
30
|
+
maxIterations: 8,
|
|
31
|
+
targetConfidence: 95
|
|
32
|
+
});
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Or via Bash if a CLI wrapper exists; otherwise invoke through Node inline.
|
|
36
|
+
|
|
37
|
+
### Step 3: Handle the result
|
|
38
|
+
|
|
39
|
+
**Three possible outcomes:**
|
|
40
|
+
|
|
41
|
+
**A. `escalate: false`** — confident decision reached.
|
|
42
|
+
|
|
43
|
+
1. Display the decision + confidence + iteration count to the user as a summary.
|
|
44
|
+
2. Write the completion marker so the next `AskUserQuestion` (if any) is allowed:
|
|
45
|
+
```js
|
|
46
|
+
const gate = require('wogiflow/scripts/hooks/core/self-adversary-gate');
|
|
47
|
+
gate.writeCompletionMarker({
|
|
48
|
+
question: questionText,
|
|
49
|
+
decision: result.decision,
|
|
50
|
+
confidence: result.confidence,
|
|
51
|
+
iterationCount: result.iterationCount
|
|
52
|
+
});
|
|
53
|
+
```
|
|
54
|
+
3. ACT on the decision in your subsequent tool calls — no more asking, no hedging.
|
|
55
|
+
|
|
56
|
+
**B. `escalate: true` (reason: `low-confidence` / `max-iterations-exhausted`)** — loop ran but couldn't converge.
|
|
57
|
+
|
|
58
|
+
1. Write the escalation marker (allows the next `AskUserQuestion` to pass without re-blocking):
|
|
59
|
+
```js
|
|
60
|
+
gate.writeEscalationMarker({
|
|
61
|
+
question: questionText,
|
|
62
|
+
decision: result.decision,
|
|
63
|
+
confidence: result.confidence,
|
|
64
|
+
iterationCount: result.iterationCount,
|
|
65
|
+
reason: result.reason
|
|
66
|
+
});
|
|
67
|
+
```
|
|
68
|
+
2. Surface to the user with: the question, what the loop concluded (best decision + confidence), why iteration couldn't push past the threshold, and what specific resolution you need from them.
|
|
69
|
+
3. Call `AskUserQuestion` (which now passes the gate).
|
|
70
|
+
|
|
71
|
+
**C. `escalate: true` (reason: `no-credentials` / `model-error` / etc.)** — loop couldn't run.
|
|
72
|
+
|
|
73
|
+
1. Note the failure mode briefly to the user.
|
|
74
|
+
2. Write the escalation marker and surface the original question.
|
|
75
|
+
|
|
76
|
+
### Step 4: Audit trail
|
|
77
|
+
|
|
78
|
+
Append a one-line summary to `.workflow/state/self-adversary-log.json` (append-only, ring-buffered at 100):
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"timestamp": "...",
|
|
83
|
+
"questionHash": "...",
|
|
84
|
+
"iterations": N,
|
|
85
|
+
"finalConfidence": X,
|
|
86
|
+
"outcome": "decided" | "escalated",
|
|
87
|
+
"reason": "..."
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
This lets the user audit how often the loop converges vs escalates. Helps tune `targetConfidence` and `maxIterations` over time.
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
`.workflow/config.json`:
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"selfAdversaryGate": {
|
|
100
|
+
"enabled": true,
|
|
101
|
+
"targetConfidence": 95,
|
|
102
|
+
"maxIterations": 8,
|
|
103
|
+
"generatorModel": "anthropic:claude-sonnet-4-6",
|
|
104
|
+
"adversaryModel": "anthropic:claude-3-5-haiku-latest"
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
- `enabled: false` — disables both the PreToolUse gate AND prevents the skill from running. Reverts to "always ask the user".
|
|
110
|
+
- `targetConfidence` — clamped to [50, 99]; default 95.
|
|
111
|
+
- `maxIterations` — clamped to [1, 12]; default 8.
|
|
112
|
+
|
|
113
|
+
## Files
|
|
114
|
+
|
|
115
|
+
| File | Purpose |
|
|
116
|
+
|---|---|
|
|
117
|
+
| `scripts/flow-self-adversary-loop.js` | Core loop (generator ↔ adversary, iteration memory in-process). |
|
|
118
|
+
| `scripts/flow-impl-question-classifier.js` | Haiku classifier — implementation vs product/architecture/sensitive. |
|
|
119
|
+
| `scripts/hooks/core/self-adversary-gate.js` | PreToolUse intercept + markers. |
|
|
120
|
+
| `.workflow/state/self-adversary-complete.json` | Single-use marker, allows next AskUserQuestion. |
|
|
121
|
+
| `.workflow/state/self-adversary-escalation.json` | Single-use marker, allows next AskUserQuestion after loop concluded "needs-user". |
|
|
122
|
+
| `.workflow/state/self-adversary-log.json` | Append-only audit trail. |
|
|
123
|
+
|
|
124
|
+
## Why this exists
|
|
125
|
+
|
|
126
|
+
User directive 2026-05-11 (wf-e399bd8d):
|
|
127
|
+
|
|
128
|
+
> "Always do highest standards, best approach, don't compromise on quality for token savings. Challenge yourself a few times and most of the times you get to a point where you already know what to do with very high confidence, 90 or 95+ percent. When you have doubt that you'll be able to challenge yourself, use adversary research. And do it in a few iterations until you're confident. And only if you're still not confident, then ask the user."
|
|
129
|
+
|
|
130
|
+
The pattern maps to Self-Refine (Madaan et al. 2023) + Reflexion (Shinn et al. 2023) + Multi-Agent Reflexion (different-model adversary escapes local optima). WogiFlow already runs an Architect+Adversary loop at the PLAN level (IGR Step 1.55/1.57). This skill is the implementation-decision analogue, finer-grained, runs during coding rather than spec_review.
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# WogiFlow Config Schema Reference
|
|
2
|
+
|
|
3
|
+
Authoritative reference for `.workflow/config.json` keys. Defaults live in `scripts/flow-config-defaults.js`.
|
|
4
|
+
|
|
5
|
+
Created: 2026-05-11 (wf-6e31850e A-5)
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Gates
|
|
10
|
+
|
|
11
|
+
### `deferralGate` (wf-f9912af6, wf-b8839d99)
|
|
12
|
+
|
|
13
|
+
Prevents AI from silently writing `status: deferred*` to review/audit findings without user authorization.
|
|
14
|
+
|
|
15
|
+
| Key | Type | Default | Description |
|
|
16
|
+
|---|---|---|---|
|
|
17
|
+
| `enabled` | bool | `true` | Master switch |
|
|
18
|
+
| `authTtlSeconds` | int | `600` | Auth marker lifetime (10 min) |
|
|
19
|
+
| `classifyUserPrompts` | bool | `true` | Run AI classifier at UserPromptSubmit |
|
|
20
|
+
| `minClassifierConfidence` | int | `75` | Confidence threshold for treating intent as actionable |
|
|
21
|
+
|
|
22
|
+
### `selfAdversaryGate` (wf-e399bd8d)
|
|
23
|
+
|
|
24
|
+
Intercepts AskUserQuestion for implementation-class questions, requires self-adversary loop first.
|
|
25
|
+
|
|
26
|
+
| Key | Type | Default | Description |
|
|
27
|
+
|---|---|---|---|
|
|
28
|
+
| `enabled` | bool | `true` | Master switch |
|
|
29
|
+
| `targetConfidence` | int | `95` | Loop terminates when confidence ≥ this. Range [50, 99]. |
|
|
30
|
+
| `maxIterations` | int | `8` | Loop iteration cap. Range [1, 12]. |
|
|
31
|
+
| `generatorModel` | string | `anthropic:claude-sonnet-4-6` | Model for the GENERATOR pass |
|
|
32
|
+
| `adversaryModel` | string | `anthropic:claude-3-5-haiku-latest` | Model for the ADVERSARY pass (MUST differ from generator) |
|
|
33
|
+
|
|
34
|
+
### `longInputGate` (P11.5 mechanical enforcement)
|
|
35
|
+
|
|
36
|
+
Forces long-form prompts without source-link through `/wogi-extract-review`.
|
|
37
|
+
|
|
38
|
+
| Key | Type | Default | Description |
|
|
39
|
+
|---|---|---|---|
|
|
40
|
+
| `enabled` | bool | `true` | Master switch |
|
|
41
|
+
| `lineThreshold` | int | `40` | Lines above which prompt is considered long-form |
|
|
42
|
+
| `itemThreshold` | int | `5` | Discrete-item count above which prompt is considered long-form |
|
|
43
|
+
|
|
44
|
+
### `researchRequiredGate` (wf-5cd71b1f)
|
|
45
|
+
|
|
46
|
+
Forces evidence-reading before answering diagnostic prompts.
|
|
47
|
+
|
|
48
|
+
| Key | Type | Default | Description |
|
|
49
|
+
|---|---|---|---|
|
|
50
|
+
| `enabled` | bool | `true` | Master switch |
|
|
51
|
+
| `requiredEvidence` | int | `2` | Minimum Read calls against evidence prefixes |
|
|
52
|
+
| `maxAttempts` | int | `3` | Soft re-prompt attempts before hard-stop |
|
|
53
|
+
|
|
54
|
+
### `phaseGate`
|
|
55
|
+
|
|
56
|
+
Controls Edit/Write/Bash blocking based on workflow phase.
|
|
57
|
+
|
|
58
|
+
| Key | Type | Default | Description |
|
|
59
|
+
|---|---|---|---|
|
|
60
|
+
| `hooks.rules.phaseGate.enabled` | bool | `false` | Strict; only blocks when `true`. State writing happens regardless (wf-88a08fd4). |
|
|
61
|
+
| `hooks.rules.phaseReadGate.enabled` | bool | `true` | Block Edit/Write/Bash until current phase's docs file is read |
|
|
62
|
+
|
|
63
|
+
### `taskGate`
|
|
64
|
+
|
|
65
|
+
Controls whether Edit/Write/Bash require an active task.
|
|
66
|
+
|
|
67
|
+
| Key | Type | Default | Description |
|
|
68
|
+
|---|---|---|---|
|
|
69
|
+
| `enforcement.taskGating.enabled` | bool | `true` | Master switch |
|
|
70
|
+
| `enforcement.taskGating.blockWithoutTask` | bool | `true` | Block edits without active task |
|
|
71
|
+
| `enforcement.taskGating.autoCreateTask` | bool | `false` | Auto-create quick task for ad-hoc edits |
|
|
72
|
+
| `enforcement.strictMode` | bool | `true` | Strict-mode shortcut |
|
|
73
|
+
| `enforcement.requireTaskForImplementation` | bool | `true` | Requires task for implementation edits |
|
|
74
|
+
| `enforcement.blockAutoTask` | bool | `false` | Block edits even when auto-task was created |
|
|
75
|
+
|
|
76
|
+
## Review system
|
|
77
|
+
|
|
78
|
+
### `review.framingPass` (IGR v6.0 Phase 0)
|
|
79
|
+
|
|
80
|
+
| Key | Type | Default |
|
|
81
|
+
|---|---|---|
|
|
82
|
+
| `enabled` | bool | `true` |
|
|
83
|
+
| `itemReconciliation` | bool | `true` |
|
|
84
|
+
| `adversaryInExploratory` | bool | `false` |
|
|
85
|
+
|
|
86
|
+
### `review.evidenceTiers` (IGR v6.0)
|
|
87
|
+
|
|
88
|
+
| Key | Type | Default |
|
|
89
|
+
|---|---|---|
|
|
90
|
+
| `enabled` | bool | `true` |
|
|
91
|
+
| `capByTier` | bool | `true` |
|
|
92
|
+
|
|
93
|
+
### `review.confidenceTiers` (IGR v6.0)
|
|
94
|
+
|
|
95
|
+
| Key | Type | Default |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| `enabled` | bool | `true` |
|
|
98
|
+
|
|
99
|
+
### `review.adversaryPass` (IGR v6.0 Phase 2.8)
|
|
100
|
+
|
|
101
|
+
| Key | Type | Default |
|
|
102
|
+
|---|---|---|
|
|
103
|
+
| `enabled` | bool | `true` |
|
|
104
|
+
| `adversaryModel` | object | mapping: agents-on-X → adversary-on-Y |
|
|
105
|
+
| `applySeverityAdjustments` | bool | `true` |
|
|
106
|
+
| `applyScopeDrift` | bool | `true` |
|
|
107
|
+
| `blockOnBlockVerdict` | bool | `true` |
|
|
108
|
+
|
|
109
|
+
### `review.completionTruthGate`
|
|
110
|
+
|
|
111
|
+
| Key | Type | Default |
|
|
112
|
+
|---|---|---|
|
|
113
|
+
| `enabled` | bool | `true` |
|
|
114
|
+
| `requireInteractiveForFixed` | bool | `true` |
|
|
115
|
+
|
|
116
|
+
### `review.gitVerifiedClaims`
|
|
117
|
+
|
|
118
|
+
| Key | Type | Default |
|
|
119
|
+
|---|---|---|
|
|
120
|
+
| `enabled` | bool | `true` |
|
|
121
|
+
| `verifyFileCreation` | bool | `true` |
|
|
122
|
+
| `verifyContentMatch` | bool | `true` |
|
|
123
|
+
| `blockOnMismatch` | bool | `true` |
|
|
124
|
+
|
|
125
|
+
### `review.agents`
|
|
126
|
+
|
|
127
|
+
| Key | Type | Default |
|
|
128
|
+
|---|---|---|
|
|
129
|
+
| `core` | array | `["code-logic", "security", "architecture"]` |
|
|
130
|
+
| `optional` | array | `["performance"]` |
|
|
131
|
+
| `projectRules` | bool | `true` |
|
|
132
|
+
| `projectRulesSource` | string | `"decisions.md"` |
|
|
133
|
+
| `maxParallelAgents` | int | `6` |
|
|
134
|
+
|
|
135
|
+
### `review.minFindings` / `review.requireJustificationIfClean`
|
|
136
|
+
|
|
137
|
+
| Key | Type | Default |
|
|
138
|
+
|---|---|---|
|
|
139
|
+
| `minFindings` | int | `3` |
|
|
140
|
+
| `requireJustificationIfClean` | bool | `true` |
|
|
141
|
+
|
|
142
|
+
## IGR (Intent-Grounded Reasoning)
|
|
143
|
+
|
|
144
|
+
### `intentGroundedReasoning`
|
|
145
|
+
|
|
146
|
+
| Key | Type | Default |
|
|
147
|
+
|---|---|---|
|
|
148
|
+
| `enabled` | bool | `true` |
|
|
149
|
+
|
|
150
|
+
### `architectRequired` (wf-037f8d66)
|
|
151
|
+
|
|
152
|
+
| Key | Type | Default |
|
|
153
|
+
|---|---|---|
|
|
154
|
+
| `enabled` | bool | `true` |
|
|
155
|
+
|
|
156
|
+
## Workspace mode
|
|
157
|
+
|
|
158
|
+
### `workspace`
|
|
159
|
+
|
|
160
|
+
| Key | Type | Default |
|
|
161
|
+
|---|---|---|
|
|
162
|
+
| `toolFirstTurnGate.enabled` | bool | `true` |
|
|
163
|
+
| `toolFirstTurnGate.strict` | bool | `true` |
|
|
164
|
+
| `aiWorkerQuestionClassifier.enabled` | bool | `true` |
|
|
165
|
+
| `aiWorkerQuestionClassifier.minConfidence` | int | `70` |
|
|
166
|
+
| `aiWorkerQuestionClassifier.model` | string | `claude-3-5-haiku-latest` |
|
|
167
|
+
| `blockAskUserQuestionInWorker` | bool | `true` |
|
|
168
|
+
| `autoPickupChannelDispatches` | bool | `true` |
|
|
169
|
+
|
|
170
|
+
## Autonomous mode
|
|
171
|
+
|
|
172
|
+
### `autonomousMode`
|
|
173
|
+
|
|
174
|
+
| Key | Type | Default |
|
|
175
|
+
|---|---|---|
|
|
176
|
+
| `cascadeStrategy` | string | `"auto"` |
|
|
177
|
+
| `maxAdversaryInvocations` | int | `30` |
|
|
178
|
+
| `stalenessThresholdMs` | int | `3600000` |
|
|
179
|
+
|
|
180
|
+
## Sprint reset
|
|
181
|
+
|
|
182
|
+
### `sprintReset`
|
|
183
|
+
|
|
184
|
+
| Key | Type | Default |
|
|
185
|
+
|---|---|---|
|
|
186
|
+
| `enabled` | bool | `true` |
|
|
187
|
+
| `criteriaPerSprint` | int | `3` |
|
|
188
|
+
| `minTaskCriteria` | int | `5` |
|
|
189
|
+
|
|
190
|
+
## Misc
|
|
191
|
+
|
|
192
|
+
### `mainModeQuestionClassifier`
|
|
193
|
+
|
|
194
|
+
| Key | Type | Default |
|
|
195
|
+
|---|---|---|
|
|
196
|
+
| `enabled` | bool | `true` |
|
|
197
|
+
| `minConfidence` | int | `70` |
|
|
198
|
+
| `model` | string | `claude-3-5-haiku-latest` |
|
|
199
|
+
|
|
200
|
+
### `taskBoundaryReset`
|
|
201
|
+
|
|
202
|
+
| Key | Type | Default |
|
|
203
|
+
|---|---|---|
|
|
204
|
+
| `enabled` | bool | varies |
|
|
205
|
+
| `autoPickupNextTask` | bool | `true` |
|
|
206
|
+
|
|
207
|
+
### `bulkOrchestrator`
|
|
208
|
+
|
|
209
|
+
| Key | Type | Default |
|
|
210
|
+
|---|---|---|
|
|
211
|
+
| `enabled` | bool | `true` |
|
|
212
|
+
| `parallelLimit` | int | `3` |
|
|
213
|
+
| `useWorktrees` | bool | `true` |
|
|
214
|
+
| `onFailure` | string | `"stop-dependent"` |
|
|
215
|
+
| `summaryDepth` | string | `"standard"` |
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
This is a hand-curated reference. The authoritative source is `scripts/flow-config-defaults.js` — when in doubt, read that file.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wogiflow",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.31.1",
|
|
4
4
|
"description": "AI-powered development workflow management system with multi-model support",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
},
|
|
11
11
|
"scripts": {
|
|
12
12
|
"flow": "./scripts/flow",
|
|
13
|
-
"test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-correction-detector-reconcile.test.js tests/flow-correction-backfill.test.js tests/flow-audit-gates-feature-output-health.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js tests/flow-standards-forbidden-patterns.test.js tests/flow-hooks-architect-required-gate.test.js tests/flow-architect-runs.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
|
|
13
|
+
"test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-correction-detector-reconcile.test.js tests/flow-correction-backfill.test.js tests/flow-audit-gates-feature-output-health.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js tests/flow-standards-forbidden-patterns.test.js tests/flow-hooks-architect-required-gate.test.js tests/flow-architect-runs.test.js tests/flow-installer-forbidden-patterns.test.js tests/flow-deferral-classifier-ai.test.js tests/flow-no-defer-policy.test.js tests/flow-self-adversary-loop.test.js tests/flow-impl-question-classifier.test.js tests/flow-hooks-self-adversary-gate.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
|
|
14
14
|
"test:syntax": "find scripts/ lib/ -name '*.js' -not -path '*/node_modules/*' -exec node --check {} +",
|
|
15
15
|
"lint": "eslint scripts/ lib/ tests/",
|
|
16
16
|
"lint:ci": "eslint scripts/ lib/ tests/ --max-warnings 0",
|
|
@@ -32,17 +32,48 @@ function parseArgs(argv) {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
function cmdGrant(args) {
|
|
35
|
-
// wf-b8839d99: Refuse to grant when invoked from a non-TTY context
|
|
36
|
-
//
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
35
|
+
// wf-b8839d99: Refuse to grant when invoked from a non-TTY context.
|
|
36
|
+
// wf-6e31850e (S-5): Defense-in-depth — also check parent process name.
|
|
37
|
+
// PTY allocation can fake TTY; checking parent process binds the gate to
|
|
38
|
+
// an actual shell. Falls back gracefully if /proc isn't queryable (macOS,
|
|
39
|
+
// restricted environments) — keeps the TTY check as primary signal.
|
|
40
40
|
//
|
|
41
|
-
// Override: --i-am-human bypasses
|
|
42
|
-
//
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
// Override: --i-am-human bypasses both checks. Logged to shell history;
|
|
42
|
+
// CI pipelines that need to grant must explicitly opt in.
|
|
43
|
+
function detectParentShell() {
|
|
44
|
+
try {
|
|
45
|
+
const ppid = process.ppid;
|
|
46
|
+
if (!ppid) return null;
|
|
47
|
+
// Linux: /proc/<ppid>/comm contains the parent process name
|
|
48
|
+
const fs = require('node:fs');
|
|
49
|
+
try {
|
|
50
|
+
const comm = fs.readFileSync(`/proc/${ppid}/comm`, 'utf-8').trim();
|
|
51
|
+
if (/^(bash|zsh|fish|sh|ksh|dash|tcsh)$/.test(comm)) return comm;
|
|
52
|
+
return `not-a-shell:${comm}`;
|
|
53
|
+
} catch (_err) {
|
|
54
|
+
// macOS / Windows / restricted: fall back to ps
|
|
55
|
+
const { execSync } = require('node:child_process');
|
|
56
|
+
try {
|
|
57
|
+
const out = execSync(`ps -p ${ppid} -o comm=`, { encoding: 'utf-8', timeout: 1000 }).trim();
|
|
58
|
+
const base = require('node:path').basename(out);
|
|
59
|
+
if (/^(-?bash|-?zsh|-?fish|-?sh|-?ksh|-?dash|-?tcsh)$/.test(base)) return base;
|
|
60
|
+
return `not-a-shell:${base}`;
|
|
61
|
+
} catch (_err2) {
|
|
62
|
+
return null; // ps unavailable — fall back to TTY check only
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
} catch (_err) {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const ttySignal = Boolean(process.stdin.isTTY);
|
|
71
|
+
const parentShell = detectParentShell();
|
|
72
|
+
const parentIsShell = parentShell && !parentShell.startsWith('not-a-shell:');
|
|
73
|
+
const parentSignal = parentShell === null ? null : parentIsShell; // null = couldn't detect
|
|
74
|
+
// Human if: explicit --i-am-human OR (TTY AND (parent is shell OR parent undetectable))
|
|
75
|
+
const isHuman = args['i-am-human'] === true ||
|
|
76
|
+
(ttySignal && parentSignal !== false);
|
|
46
77
|
if (!isHuman) {
|
|
47
78
|
console.error('grant: refused — non-TTY invocation detected.');
|
|
48
79
|
console.error('');
|
|
@@ -169,7 +169,9 @@ async function classifyUserDeferralIntent(userPrompt, options = {}) {
|
|
|
169
169
|
});
|
|
170
170
|
} catch (err) {
|
|
171
171
|
if (process.env.DEBUG) {
|
|
172
|
-
|
|
172
|
+
// wf-6e31850e (S-2): sanitize potential API-key leakage in error messages.
|
|
173
|
+
const safe = String(err.message || '').replace(/sk-[A-Za-z0-9_-]{10,}/g, 'sk-***');
|
|
174
|
+
console.error(`[deferral-classifier-ai] model call failed: ${safe}`);
|
|
173
175
|
}
|
|
174
176
|
return { classified: false, reason: 'model-error' };
|
|
175
177
|
}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow — Implementation-Question Classifier (wf-e399bd8d)
|
|
5
|
+
*
|
|
6
|
+
* Classifies an "AI is about to ask the user" question to decide whether
|
|
7
|
+
* the self-adversary loop should run first or the question should reach
|
|
8
|
+
* the user directly.
|
|
9
|
+
*
|
|
10
|
+
* Four categories:
|
|
11
|
+
* implementation — code structure, library/algorithm choice, naming,
|
|
12
|
+
* refactor mechanics, testing approach. The AI should
|
|
13
|
+
* self-adversary (likely high enough confidence).
|
|
14
|
+
* product — domain semantics, user-facing behavior, what to
|
|
15
|
+
* SHOW the user, what counts as "done" for the
|
|
16
|
+
* business. The AI cannot self-adversary; ask user.
|
|
17
|
+
* architecture — system-design tradeoffs (DB choice, deployment
|
|
18
|
+
* topology, public API shape). Tier-3: existing
|
|
19
|
+
* researchReasoningGate handles this with adversary;
|
|
20
|
+
* the new loop can also handle it but caller decides.
|
|
21
|
+
* sensitive — destructive operations (delete, force-push, drop),
|
|
22
|
+
* cross-boundary commitments (notify users, send
|
|
23
|
+
* emails). Always ask.
|
|
24
|
+
*
|
|
25
|
+
* The classifier is a small Haiku call. Fail-open: any error → ask
|
|
26
|
+
* (treat as if classification said "product"), preserving prior
|
|
27
|
+
* behavior. This avoids the failure shape from wf-b8839d99 (regex
|
|
28
|
+
* silently misclassifying).
|
|
29
|
+
*
|
|
30
|
+
* Note: this is interpretation of an AI-AUTHORED question (the question
|
|
31
|
+
* the AI is about to ask the user). It is NOT user-input parsing — so
|
|
32
|
+
* the "no regex on user answers" rule from wf-b8839d99 doesn't constrain
|
|
33
|
+
* us. We still use AI here because hedging vocabulary for implementation
|
|
34
|
+
* vs product is unbounded.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
const DEFAULT_MIN_CONFIDENCE = 75;
|
|
38
|
+
const DEFAULT_MODEL = 'anthropic:claude-3-5-haiku-latest';
|
|
39
|
+
const MAX_QUESTION_CHARS = 3000;
|
|
40
|
+
const MAX_TOKENS = 300;
|
|
41
|
+
const TEMPERATURE = 0.0;
|
|
42
|
+
|
|
43
|
+
const { DANGEROUS_KEYS } = require('./flow-io');
|
|
44
|
+
|
|
45
|
+
function hasDangerousKeys(value) {
|
|
46
|
+
if (!value || typeof value !== 'object') return false;
|
|
47
|
+
if (Array.isArray(value)) return value.some(hasDangerousKeys);
|
|
48
|
+
for (const key of Object.keys(value)) {
|
|
49
|
+
if (DANGEROUS_KEYS.has(key)) return true;
|
|
50
|
+
if (hasDangerousKeys(value[key])) return true;
|
|
51
|
+
}
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function buildClassifierPrompt(questionText) {
|
|
56
|
+
return `You classify the type of question an AI development assistant is about to ask the user. The user has instructed the AI to STOP asking implementation-class questions — instead, the AI should iterate generator↔adversary on a different model until ≥95% confidence. Product, architecture, or sensitive questions still reach the user normally.
|
|
57
|
+
|
|
58
|
+
[QUESTION_START]
|
|
59
|
+
${String(questionText || '').slice(0, MAX_QUESTION_CHARS)}
|
|
60
|
+
[QUESTION_END]
|
|
61
|
+
|
|
62
|
+
Four categories:
|
|
63
|
+
|
|
64
|
+
IMPLEMENTATION — code structure, library or algorithm choice, naming,
|
|
65
|
+
refactor mechanics, test framework picks, error-handling shape, code
|
|
66
|
+
organization, idiom selection. The AI can reason this out with research.
|
|
67
|
+
|
|
68
|
+
PRODUCT — domain semantics, user-facing behavior decisions, feature
|
|
69
|
+
scope, what counts as "done" for the business, copy/tone, UX flow
|
|
70
|
+
decisions. The AI cannot reason its way to these without the owner.
|
|
71
|
+
|
|
72
|
+
ARCHITECTURE — system-design tradeoffs (database choice, deployment
|
|
73
|
+
topology, public API shape, multi-tenant boundaries). High-stakes;
|
|
74
|
+
self-adversary alone may not be enough but more iteration helps.
|
|
75
|
+
|
|
76
|
+
SENSITIVE — destructive operations (delete data, force-push, drop
|
|
77
|
+
table), cross-boundary commitments (notify users, send emails),
|
|
78
|
+
legal/compliance gates. Always ask the user.
|
|
79
|
+
|
|
80
|
+
CRITICAL RULES:
|
|
81
|
+
1. When ambiguous, return PRODUCT — the cost of mis-asking is low, the
|
|
82
|
+
cost of mis-acting is high.
|
|
83
|
+
2. Even if the question phrasing is technical, ask whether the ANSWER
|
|
84
|
+
depends on user-only knowledge. "Which date format do users
|
|
85
|
+
prefer?" — phrasing is technical, answer is product.
|
|
86
|
+
3. Confidence: only ≥80 if the category is unambiguous.
|
|
87
|
+
|
|
88
|
+
Return JSON only, no prose, no markdown fences:
|
|
89
|
+
{
|
|
90
|
+
"category": "implementation" | "product" | "architecture" | "sensitive",
|
|
91
|
+
"confidence": 0-100,
|
|
92
|
+
"reason": "one short sentence"
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
- "Should this be a map() or for-loop?" → {"category":"implementation","confidence":95,"reason":"pure code-style choice"}
|
|
97
|
+
- "Which date format do users prefer?" → {"category":"product","confidence":90,"reason":"answer depends on user preference"}
|
|
98
|
+
- "Should we use Postgres or MongoDB?" → {"category":"architecture","confidence":85,"reason":"system-design tradeoff"}
|
|
99
|
+
- "OK to delete the migration table?" → {"category":"sensitive","confidence":95,"reason":"destructive operation"}
|
|
100
|
+
- "Should I add error handling here?" → {"category":"implementation","confidence":85,"reason":"code-quality choice the AI can research"}`;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async function classifyImplementationQuestion(questionText, options = {}) {
|
|
104
|
+
const minConfidence = Number.isFinite(options.minConfidence) ? options.minConfidence : DEFAULT_MIN_CONFIDENCE;
|
|
105
|
+
const model = options.model || DEFAULT_MODEL;
|
|
106
|
+
|
|
107
|
+
if (typeof questionText !== 'string' || questionText.trim().length === 0) {
|
|
108
|
+
return { classified: false, reason: 'empty-question' };
|
|
109
|
+
}
|
|
110
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
111
|
+
return { classified: false, reason: 'no-credentials' };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
let callModel;
|
|
115
|
+
try {
|
|
116
|
+
({ callModel } = require('./flow-model-caller'));
|
|
117
|
+
} catch (_err) {
|
|
118
|
+
return { classified: false, reason: 'no-model-caller' };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
let result;
|
|
122
|
+
try {
|
|
123
|
+
result = await callModel(model, buildClassifierPrompt(questionText), {
|
|
124
|
+
temperature: TEMPERATURE,
|
|
125
|
+
maxTokens: MAX_TOKENS
|
|
126
|
+
});
|
|
127
|
+
} catch (err) {
|
|
128
|
+
if (process.env.DEBUG) {
|
|
129
|
+
// wf-6e31850e (S-2): sanitize potential API-key leakage in error messages.
|
|
130
|
+
const safe = String(err.message || '').replace(/sk-[A-Za-z0-9_-]{10,}/g, 'sk-***');
|
|
131
|
+
console.error(`[impl-question-classifier] model call failed: ${safe}`);
|
|
132
|
+
}
|
|
133
|
+
return { classified: false, reason: 'model-error' };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const raw = String(result?.response ?? result?.content ?? '').trim();
|
|
137
|
+
if (!raw) return { classified: false, reason: 'empty-response' };
|
|
138
|
+
|
|
139
|
+
const jsonMatch = raw.match(/\{[\s\S]*\}/);
|
|
140
|
+
if (!jsonMatch) return { classified: false, reason: 'non-json-response' };
|
|
141
|
+
|
|
142
|
+
let parsed;
|
|
143
|
+
try {
|
|
144
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
145
|
+
} catch (_err) {
|
|
146
|
+
return { classified: false, reason: 'json-parse-error' };
|
|
147
|
+
}
|
|
148
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
149
|
+
return { classified: false, reason: 'bad-shape' };
|
|
150
|
+
}
|
|
151
|
+
if (hasDangerousKeys(parsed)) {
|
|
152
|
+
return { classified: false, reason: 'dangerous-keys' };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const categoryRaw = String(parsed.category || '').toLowerCase();
|
|
156
|
+
const category = ['implementation', 'product', 'architecture', 'sensitive'].includes(categoryRaw)
|
|
157
|
+
? categoryRaw
|
|
158
|
+
: 'product'; // fail-safe default
|
|
159
|
+
const confidence = Number.isFinite(parsed.confidence) ? Math.round(parsed.confidence) : 0;
|
|
160
|
+
const reason = typeof parsed.reason === 'string' ? parsed.reason.slice(0, 240) : '';
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
classified: true,
|
|
164
|
+
category,
|
|
165
|
+
confidence,
|
|
166
|
+
reason,
|
|
167
|
+
shouldRunLoop: category === 'implementation' && confidence >= minConfidence,
|
|
168
|
+
minConfidence
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
module.exports = {
|
|
173
|
+
classifyImplementationQuestion,
|
|
174
|
+
buildClassifierPrompt,
|
|
175
|
+
hasDangerousKeys,
|
|
176
|
+
DEFAULT_MIN_CONFIDENCE,
|
|
177
|
+
DEFAULT_MODEL
|
|
178
|
+
};
|