wogiflow 2.29.8 → 2.29.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wogiflow",
3
- "version": "2.29.8",
3
+ "version": "2.29.9",
4
4
  "description": "AI-powered development workflow management system with multi-model support",
5
5
  "main": "lib/index.js",
6
6
  "bin": {
@@ -10,7 +10,7 @@
10
10
  },
11
11
  "scripts": {
12
12
  "flow": "./scripts/flow",
13
- "test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
13
+ "test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-correction-detector-reconcile.test.js tests/flow-correction-backfill.test.js tests/flow-audit-gates-feature-output-health.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
14
14
  "test:syntax": "find scripts/ lib/ -name '*.js' -not -path '*/node_modules/*' -exec node --check {} +",
15
15
  "lint": "eslint scripts/ lib/ tests/",
16
16
  "lint:ci": "eslint scripts/ lib/ tests/ --max-warnings 0",
@@ -34,6 +34,7 @@ const fs = require('node:fs');
34
34
  const path = require('node:path');
35
35
 
36
36
  const { PATHS, safeJsonParse } = require('./flow-utils');
37
+ const { safeJsonParseString } = require('./flow-io');
37
38
 
38
39
  // ============================================================
39
40
  // Score Cap Thresholds
@@ -641,6 +642,114 @@ function compareTrend(currentResults, previousAudit) {
641
642
  // Main: Run All Gates
642
643
  // ============================================================
643
644
 
645
+ /**
646
+ * Gate: Feature Output Health (wf-6c58953a)
647
+ *
648
+ * Inspects DATA produced by features, not just CODE that produces it.
649
+ * Catches "silent feature no-op" — feature runs without errors, persists
650
+ * data, but the persisted data has all-null structured fields. This class
651
+ * is invisible to traditional code review/lint/typecheck/tests.
652
+ *
653
+ * Discovered 2026-05-09 when wogiflow-cli investigation found the
654
+ * correction-extractor was capturing user frustration but writing null
655
+ * structured fields. The /wogi-audit ran B+ and missed it because every
656
+ * agent inspects code, not output.
657
+ *
658
+ * Rule registry — explicit per-file checks, NOT a generic walker (per
659
+ * challenge round: blanket "all-null is bug" is false-positive city).
660
+ *
661
+ * @param {string} [projectRoot=PATHS.root] — project to inspect (default: current)
662
+ * @returns {Object} gate result with severity + findings
663
+ */
664
+ function checkFeatureOutputHealth(projectRoot = PATHS.root) {
665
+ const findings = [];
666
+ const stateDir = path.join(projectRoot, '.workflow', 'state');
667
+ const corrDir = path.join(projectRoot, '.workflow', 'corrections');
668
+
669
+ // ---- Rule 1: pending-corrections.json null-fields ratio ----
670
+ // Note: pending-corrections.json is a top-level ARRAY, so safeJsonParse
671
+ // (which rejects arrays) won't work. Use file-read + safeJsonParseString.
672
+ const pcPath = path.join(stateDir, 'pending-corrections.json');
673
+ if (fs.existsSync(pcPath)) {
674
+ let records = [];
675
+ try {
676
+ const content = fs.readFileSync(pcPath, 'utf-8');
677
+ records = safeJsonParseString(content, []);
678
+ } catch (_err) { /* fail-open */ }
679
+ const arr = Array.isArray(records) ? records : [];
680
+ if (arr.length > 0) {
681
+ const nullCount = arr.filter(r =>
682
+ r && typeof r === 'object' &&
683
+ (r.whatWasWrong == null) &&
684
+ (r.whatUserWants == null)
685
+ ).length;
686
+ const ratio = nullCount / arr.length;
687
+ if (ratio >= 0.5) {
688
+ findings.push({
689
+ rule: 'pending-corrections-null-fields',
690
+ severity: ratio === 1 ? 'high' : 'medium',
691
+ message: `${nullCount}/${arr.length} (${Math.round(ratio * 100)}%) pending-corrections records have null structured fields. Likely correction-detector extraction failure. Run \`flow-correction-backfill\` or restore via Layer 2 enrichment.`,
692
+ evidence: `${path.relative(projectRoot, pcPath)}: ${arr.length} records analyzed; ${nullCount} fully null`
693
+ });
694
+ }
695
+ }
696
+ }
697
+
698
+ // ---- Rule 2: prompt-history × corrections cross-reference ----
699
+ // prompt-history.json is also typically a top-level array.
700
+ const phPath = path.join(stateDir, 'prompt-history.json');
701
+ if (fs.existsSync(phPath)) {
702
+ let ph = [];
703
+ try {
704
+ const content = fs.readFileSync(phPath, 'utf-8');
705
+ ph = safeJsonParseString(content, []);
706
+ } catch (_err) { /* fail-open */ }
707
+ const phArr = Array.isArray(ph) ? ph : (ph && Array.isArray(ph.prompts) ? ph.prompts : []);
708
+
709
+ // Frustration markers (regex per known-pattern set)
710
+ const frustrationRe = /\b(don'?t|stop|wait|actually|why did|why is|you keep|you always|fucking|seriously)\b/i;
711
+ let frustrationCount = 0;
712
+ for (const entry of phArr) {
713
+ if (!entry || typeof entry !== 'object') continue;
714
+ const text = entry.prompt || entry.text || entry.userMessage || '';
715
+ if (typeof text === 'string' && frustrationRe.test(text)) frustrationCount++;
716
+ }
717
+
718
+ let corrCount = 0;
719
+ if (fs.existsSync(corrDir)) {
720
+ try {
721
+ corrCount = fs.readdirSync(corrDir).filter(f => f.endsWith('.md')).length;
722
+ } catch (_err) { /* fail-open */ }
723
+ }
724
+
725
+ if (frustrationCount >= 3 && corrCount === 0) {
726
+ findings.push({
727
+ rule: 'prompt-history-vs-corrections-mismatch',
728
+ severity: 'high',
729
+ message: `prompt-history.json has ${frustrationCount} frustration markers but corrections/ is empty. Correction-extractor pipeline appears non-functional (captures input, fails to materialize records).`,
730
+ evidence: `prompt-history: ${frustrationCount} matches across ${phArr.length} entries; corrections/: ${corrCount} files`
731
+ });
732
+ }
733
+ }
734
+
735
+ // Determine overall gate severity
736
+ const hasHigh = findings.some(f => f.severity === 'high');
737
+ const hasMed = findings.some(f => f.severity === 'medium');
738
+ const severity = hasHigh ? 'high' : hasMed ? 'medium' : 'pass';
739
+
740
+ return {
741
+ gate: 'feature-output-health',
742
+ exists: true,
743
+ passed: findings.length === 0,
744
+ findings,
745
+ severity,
746
+ scoreCap: 100, // doesn't cap score directly; surfaces as audit findings
747
+ message: findings.length === 0
748
+ ? 'Feature output health: no issues detected'
749
+ : `Feature output health: ${findings.length} finding(s) — ${findings.map(f => f.rule).join(', ')}`
750
+ };
751
+ }
752
+
644
753
  /**
645
754
  * Run all Gate 0 checks and return consolidated results.
646
755
  * @returns {Object} gate results with score cap
@@ -654,6 +763,7 @@ function runAllGates() {
654
763
  gates.push(checkLintConfigIntegrity());
655
764
  gates.push(checkTests());
656
765
  gates.push(checkScriptCompleteness());
766
+ gates.push(checkFeatureOutputHealth());
657
767
 
658
768
  const cap = calculateScoreCap(gates);
659
769
  const framework = detectFramework();
@@ -716,6 +826,14 @@ function main() {
716
826
  console.log(JSON.stringify(checkScriptCompleteness(), null, 2));
717
827
  break;
718
828
 
829
+ case 'feature-output-health': {
830
+ // Optional --project=<path> argument for cross-project audit
831
+ const projArg = process.argv.find(a => a.startsWith('--project='));
832
+ const projectRoot = projArg ? projArg.slice('--project='.length) : PATHS.root;
833
+ console.log(JSON.stringify(checkFeatureOutputHealth(projectRoot), null, 2));
834
+ break;
835
+ }
836
+
719
837
  case 'eslint-disable':
720
838
  console.log(JSON.stringify(countEslintDisables(), null, 2));
721
839
  break;
@@ -827,6 +945,7 @@ module.exports = {
827
945
  checkTests,
828
946
  parseTestErrorCount, // wf-e111d850: exposed for unit testing
829
947
  checkScriptCompleteness,
948
+ checkFeatureOutputHealth, // wf-6c58953a: feature output health gate
830
949
 
831
950
  // Extended checks
832
951
  countEslintDisables,
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Wogi Flow — Pending-Corrections Backfill (wf-6c58953a)
5
+ *
6
+ * Backfills records in `.workflow/state/pending-corrections.json` that have
7
+ * null `whatWasWrong` / `whatUserWants` fields. The fix lands at code level
8
+ * (flow-correction-detector.js Layer 1+2 reconciliation), but historical
9
+ * records persisted before the fix already have null fields. This tool
10
+ * applies the same deterministic-fallback extraction retroactively.
11
+ *
12
+ * Strategy:
13
+ * - Read pending-corrections.json
14
+ * - For each record where userMessage is populated AND
15
+ * (whatWasWrong is null OR whatUserWants is null)
16
+ * - Apply deterministic extraction: whatWasWrong = first 200 chars of
17
+ * userMessage; whatUserWants stays null (intent inference is an LLM job
18
+ * — honest null > wrong guess; live extractor will populate going forward)
19
+ * - Mark `enrichmentSource: "backfill-<date>"` so consumers can distinguish
20
+ * backfilled from live extractions
21
+ * - Atomic write: write-temp + rename
22
+ *
23
+ * Usage:
24
+ * node scripts/flow-correction-backfill.js # current project
25
+ * node scripts/flow-correction-backfill.js --project=<path> # explicit project
26
+ * node scripts/flow-correction-backfill.js --dry-run # report only
27
+ */
28
+
29
+ 'use strict';
30
+
31
+ const fs = require('node:fs');
32
+ const path = require('node:path');
33
+
34
+ const { PATHS } = require('./flow-utils');
35
+ const { safeJsonParseString } = require('./flow-io');
36
+ const { deterministicWhatWasWrong } = require('./flow-correction-detector');
37
+
38
+ const BACKFILL_DATE = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
39
+
40
+ /**
41
+ * Backfill a single project's pending-corrections.json.
42
+ *
43
+ * @param {string} projectRoot — project directory containing .workflow/
44
+ * @param {Object} [opts]
45
+ * @param {boolean} [opts.dryRun=false] — if true, return what WOULD change without writing
46
+ * @returns {{ found: number, backfilled: number, alreadyPopulated: number, written: boolean, path: string|null, dryRun: boolean }}
47
+ */
48
+ function backfillPendingCorrections(projectRoot, opts = {}) {
49
+ const { dryRun = false } = opts;
50
+ const pcPath = path.join(projectRoot, '.workflow', 'state', 'pending-corrections.json');
51
+
52
+ const result = {
53
+ found: 0,
54
+ backfilled: 0,
55
+ alreadyPopulated: 0,
56
+ written: false,
57
+ path: null,
58
+ dryRun
59
+ };
60
+
61
+ if (!fs.existsSync(pcPath)) {
62
+ result.path = pcPath;
63
+ return result;
64
+ }
65
+
66
+ let content;
67
+ try {
68
+ content = fs.readFileSync(pcPath, 'utf-8');
69
+ } catch (err) {
70
+ throw new Error(`Cannot read pending-corrections at ${pcPath}: ${err.message}`);
71
+ }
72
+
73
+ const records = safeJsonParseString(content, []);
74
+ if (!Array.isArray(records)) {
75
+ throw new Error(`Expected array at ${pcPath}; got ${typeof records}`);
76
+ }
77
+
78
+ result.found = records.length;
79
+ result.path = pcPath;
80
+
81
+ let changed = false;
82
+ for (const r of records) {
83
+ if (!r || typeof r !== 'object') continue;
84
+ const userMsg = r.userMessage;
85
+ if (typeof userMsg !== 'string' || !userMsg.trim()) continue;
86
+
87
+ const needsFill = (r.whatWasWrong == null) && (r.whatUserWants == null);
88
+ if (!needsFill) {
89
+ result.alreadyPopulated += 1;
90
+ continue;
91
+ }
92
+
93
+ // Apply deterministic extraction (whatWasWrong only — whatUserWants
94
+ // stays null; intent inference is the live extractor's job going forward)
95
+ r.whatWasWrong = deterministicWhatWasWrong(userMsg);
96
+ r.enrichmentSource = `backfill-${BACKFILL_DATE}`;
97
+ result.backfilled += 1;
98
+ changed = true;
99
+ }
100
+
101
+ if (changed && !dryRun) {
102
+ // Atomic write: write-temp + rename
103
+ const tmpPath = `${pcPath}.tmp-${process.pid}`;
104
+ fs.writeFileSync(tmpPath, JSON.stringify(records, null, 2) + '\n');
105
+ fs.renameSync(tmpPath, pcPath);
106
+ result.written = true;
107
+ }
108
+
109
+ return result;
110
+ }
111
+
112
+ // ============================================================
113
+ // CLI
114
+ // ============================================================
115
+
116
+ function main() {
117
+ const argv = process.argv.slice(2);
118
+ const projArg = argv.find(a => a.startsWith('--project='));
119
+ const dryRun = argv.includes('--dry-run');
120
+
121
+ const projectRoot = projArg ? projArg.slice('--project='.length) : PATHS.root;
122
+
123
+ let result;
124
+ try {
125
+ result = backfillPendingCorrections(projectRoot, { dryRun });
126
+ } catch (err) {
127
+ console.error(`Error: ${err.message}`);
128
+ process.exit(1);
129
+ }
130
+
131
+ console.log(JSON.stringify({
132
+ project: projectRoot,
133
+ pendingCorrectionsPath: result.path,
134
+ found: result.found,
135
+ backfilled: result.backfilled,
136
+ alreadyPopulated: result.alreadyPopulated,
137
+ written: result.written,
138
+ dryRun: result.dryRun
139
+ }, null, 2));
140
+ }
141
+
142
+ module.exports = {
143
+ backfillPendingCorrections
144
+ };
145
+
146
+ if (require.main === module) {
147
+ main();
148
+ }
@@ -329,14 +329,106 @@ function recordHybridTelemetry(verdict, runCtx = {}) {
329
329
  }
330
330
  }
331
331
 
332
+ // ============================================================================
333
+ // Layer 1 + Layer 2 Reconciliation (wf-6c58953a)
334
+ // ============================================================================
335
+
336
+ /**
337
+ * Deterministic fallback for `whatWasWrong` — preserves the user's literal
338
+ * frustration text when LLM extraction is unavailable or fails. Better than
339
+ * null: a 200-char excerpt is honest signal; null is data loss.
340
+ *
341
+ * @param {string} message — user message
342
+ * @returns {string|null}
343
+ */
344
+ function deterministicWhatWasWrong(message) {
345
+ if (typeof message !== 'string') return null;
346
+ const trimmed = message.trim();
347
+ if (!trimmed) return null;
348
+ return trimmed.slice(0, 200);
349
+ }
350
+
351
+ /**
352
+ * Reconcile Layer 1 (keyword classifier) + Layer 2 (Haiku LLM) results.
353
+ *
354
+ * Pre-fix bug: Layer 1 returned `{whatWasWrong: null, whatUserWants: null}`
355
+ * when keyword matched, never calling Layer 2. The user's actual frustration
356
+ * was captured but structured fields stayed null — silent feature no-op.
357
+ *
358
+ * Post-fix design:
359
+ * - Layer 1 hit + Layer 2 success: trust Layer 1's classification (high-
360
+ * precision keyword match), use Layer 2's strings if non-null else
361
+ * deterministic fallback. Record `llmDisagreed` if Layer 2 said
362
+ * `isCorrection: false` (e.g., user said "I'm just asking a question").
363
+ * - Layer 1 hit + Layer 2 fail/skip: deterministic fallback for `whatWasWrong`
364
+ * (first 200 chars). `whatUserWants` stays null (intent inference is an
365
+ * LLM job; honest null > wrong guess).
366
+ * - Layer 1 miss + Layer 2 success: Layer 2 is primary classifier (existing path).
367
+ * - Both miss: not a correction.
368
+ *
369
+ * Pure function — testable in isolation, no LLM mock needed.
370
+ *
371
+ * @param {Object|null} layer1 — Layer 1 result {isCorrection, confidence, correctionType, method, matchedPattern}
372
+ * @param {Object|null} layer2 — Layer 2 (LLM) result {isCorrection, confidence, correctionType, whatWasWrong, whatUserWants}
373
+ * @param {string} trimmed — trimmed user message (for deterministic fallback)
374
+ * @returns {Object|null} reconciled record OR null if not a correction
375
+ */
376
+ function reconcileExtraction(layer1, layer2, trimmed) {
377
+ // Both layers ran
378
+ if (layer1 && layer2) {
379
+ const what = layer2.whatWasWrong || deterministicWhatWasWrong(trimmed);
380
+ const wants = layer2.whatUserWants || null;
381
+ return {
382
+ isCorrection: true, // Layer 1 high-precision keyword match wins binary
383
+ confidence: layer1.confidence,
384
+ correctionType: layer1.correctionType || layer2.correctionType || 'behavior',
385
+ whatWasWrong: what,
386
+ whatUserWants: wants,
387
+ method: 'keyword+ai',
388
+ matchedPattern: layer1.matchedPattern,
389
+ enrichmentSource: layer2.whatWasWrong ? 'haiku' : 'deterministic-fallback',
390
+ llmDisagreed: layer2.isCorrection === false,
391
+ };
392
+ }
393
+ // Layer 1 only (Layer 2 unavailable: no API key, network error, etc.)
394
+ if (layer1) {
395
+ return {
396
+ isCorrection: true,
397
+ confidence: layer1.confidence,
398
+ correctionType: layer1.correctionType || 'behavior',
399
+ whatWasWrong: deterministicWhatWasWrong(trimmed),
400
+ whatUserWants: null,
401
+ method: layer1.method,
402
+ matchedPattern: layer1.matchedPattern,
403
+ enrichmentSource: 'deterministic-fallback',
404
+ };
405
+ }
406
+ // Layer 2 only (Layer 1 missed)
407
+ if (layer2 && layer2.isCorrection) {
408
+ return {
409
+ isCorrection: true,
410
+ confidence: layer2.confidence,
411
+ correctionType: layer2.correctionType || null,
412
+ whatWasWrong: layer2.whatWasWrong || null,
413
+ whatUserWants: layer2.whatUserWants || null,
414
+ method: 'ai',
415
+ enrichmentSource: 'haiku',
416
+ };
417
+ }
418
+ // Both missed → not a correction
419
+ return null;
420
+ }
421
+
332
422
  // ============================================================================
333
423
  // AI-Based Detection (Haiku — language-agnostic)
334
424
  // ============================================================================
335
425
 
336
426
  /**
337
427
  * Detect if a message is a correction using Claude Haiku.
338
- * This is the ONLY detection method no regex fallback.
339
- * Works in any language.
428
+ * Hybrid: Layer 1 keyword classifier (fast) + Layer 2 Haiku enrichment.
429
+ *
430
+ * wf-6c58953a (2026-05-09): Layer 1 hit no longer short-circuits structured
431
+ * extraction. See reconcileExtraction() for the post-fix design rationale.
340
432
  *
341
433
  * @param {string} userMessage - The user's message
342
434
  * @param {string} previousContext - Summary of what the AI was doing
@@ -354,8 +446,12 @@ async function detectCorrection(userMessage, previousContext = '') {
354
446
  return { isCorrection: false, confidence: 0, method: 'skipped', reason: 'length-filter' };
355
447
  }
356
448
 
357
- // Layer 1 (wf-e6d65edf) — keyword pre-classifier. Skips Haiku entirely on a hit.
449
+ // Layer 1 (wf-e6d65edf) — keyword pre-classifier.
450
+ // wf-6c58953a: NO longer short-circuits structured extraction. Layer 1's
451
+ // classification is captured; reconcile with Layer 2 (or deterministic
452
+ // fallback when Layer 2 unavailable) at end.
358
453
  const hybridCfg = getHybridConfig();
454
+ let layer1Result = null;
359
455
  if (hybridCfg.hybridEnabled) {
360
456
  const matched = findKeywordMatch(trimmed);
361
457
  if (matched) {
@@ -368,12 +464,10 @@ async function detectCorrection(userMessage, previousContext = '') {
368
464
  confidence: conf,
369
465
  durationMs: Date.now() - start,
370
466
  });
371
- return {
467
+ layer1Result = {
372
468
  isCorrection: true,
373
469
  confidence: conf,
374
470
  correctionType: 'behavior',
375
- whatWasWrong: null,
376
- whatUserWants: null,
377
471
  method: 'keyword',
378
472
  matchedPattern: matched.phrase,
379
473
  };
@@ -383,6 +477,10 @@ async function detectCorrection(userMessage, previousContext = '') {
383
477
  // Check if API key is available
384
478
  const apiKey = process.env.ANTHROPIC_API_KEY;
385
479
  if (!apiKey) {
480
+ // wf-6c58953a: Layer 1 hit + no API key → deterministic fallback (not null)
481
+ if (layer1Result) {
482
+ return reconcileExtraction(layer1Result, null, trimmed);
483
+ }
386
484
  return { isCorrection: false, confidence: 0, method: 'skipped', reason: 'no-api-key' };
387
485
  }
388
486
 
@@ -492,11 +590,19 @@ Respond with JSON only (no markdown, no explanation):
492
590
  durationMs: Date.now() - start,
493
591
  });
494
592
 
593
+ // wf-6c58953a: reconcile Layer 1 + Layer 2 (or just Layer 2 if Layer 1 missed)
594
+ const reconciled = reconcileExtraction(layer1Result, aiResult, trimmed);
595
+ if (reconciled) return reconciled;
596
+ // Both layers say no-correction
495
597
  return aiResult;
496
598
  } catch (err) {
497
599
  if (process.env.DEBUG) {
498
600
  console.error(`[DEBUG] AI correction detection failed: ${err.message}`);
499
601
  }
602
+ // wf-6c58953a: Layer 2 failure with Layer 1 hit → deterministic fallback
603
+ if (layer1Result) {
604
+ return reconcileExtraction(layer1Result, null, trimmed);
605
+ }
500
606
  return { isCorrection: false, confidence: 0, method: 'ai', reason: err.message };
501
607
  }
502
608
  }
@@ -1295,11 +1401,15 @@ function correlateWithPriorGates(correction) {
1295
1401
  // ============================================================================
1296
1402
 
1297
1403
  module.exports = {
1298
- // Detection (AI-only)
1404
+ // Detection (hybrid Layer 1 + Layer 2)
1299
1405
  detectCorrection,
1300
1406
  batchAnalyzePrompts,
1301
1407
  spawnBackgroundDetection,
1302
1408
 
1409
+ // wf-6c58953a: reconciliation helpers exposed for unit testing + backfill
1410
+ reconcileExtraction,
1411
+ deterministicWhatWasWrong,
1412
+
1303
1413
  // Queue management
1304
1414
  loadPendingCorrections,
1305
1415
  queuePendingCorrection,