wogiflow 2.29.8 → 2.29.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wogiflow",
|
|
3
|
-
"version": "2.29.
|
|
3
|
+
"version": "2.29.9",
|
|
4
4
|
"description": "AI-powered development workflow management system with multi-model support",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
},
|
|
11
11
|
"scripts": {
|
|
12
12
|
"flow": "./scripts/flow",
|
|
13
|
-
"test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
|
|
13
|
+
"test": "NODE_ENV=test node --test tests/auto-compact-prompt.test.js tests/flow-paths.test.js tests/flow-io.test.js tests/flow-audit-gates.test.js tests/flow-standards-hook-three-layer.test.js tests/flow-correction-detector-reconcile.test.js tests/flow-correction-backfill.test.js tests/flow-audit-gates-feature-output-health.test.js tests/flow-config-loader.test.js tests/flow-damage-control.test.js tests/flow-output.test.js tests/flow-constants.test.js tests/flow-session-state.test.js tests/flow-hooks-integration.test.js tests/flow-utils.test.js tests/flow-security.test.js tests/flow-memory-db.test.js tests/flow-durable-session.test.js tests/flow-skill-matcher.test.js tests/flow-bridge.test.js tests/flow-proactive-compact.test.js tests/flow-cascade-completion.test.js tests/flow-capture-gate.test.js tests/flow-correction-detector-hybrid.test.js tests/flow-promote.test.js tests/flow-archive-runs.test.js tests/flow-memory.test.js tests/flow-hooks-pre-tool-helpers.test.js tests/flow-hooks-bugfix-scope-gate.test.js tests/flow-hooks-routing-gate.test.js tests/flow-hooks-phase-read-gate.test.js tests/flow-hooks-commit-log-gate.test.js tests/flow-hooks-deploy-gate.test.js tests/flow-hooks-todowrite-gate.test.js tests/flow-hooks-git-safety-gate.test.js tests/flow-hooks-scope-mutation-gate.test.js tests/flow-hooks-strike-gate.test.js tests/flow-hooks-component-check.test.js tests/flow-hooks-scope-gate.test.js tests/flow-hooks-implementation-gate.test.js tests/flow-hooks-research-gate.test.js tests/flow-hooks-loop-check.test.js tests/flow-hooks-manager-boundary-gate.test.js tests/flow-hooks-phase-gate.test.js tests/flow-hooks-pre-tool-orchestrator.test.js tests/flow-hooks-observation-capture.test.js tests/flow-hooks-task-gate.test.js tests/flow-durable-session-suspension.test.js tests/flow-health-mcp-scopes.test.js tests/flow-lean-config.test.js tests/flow-workspace-autopickup.test.js tests/flow-worker-boundary-gate.test.js tests/flow-worker-question-classifier.test.js tests/flow-completion-truth-gate-contradictions.test.js tests/flow-structure-sensor.test.js tests/flow-workspace-dispatch-tracking.test.js tests/workspace-ipc-sqlite.test.js tests/workspace-ipc-multi-worker.test.js tests/flow-story-gates.test.js tests/flow-workspace-restart-handoff.test.js tests/flow-wogi-claude-wrapper.test.js tests/flow-wave1-integrations.test.js tests/flow-wave2-integrations.test.js tests/flow-wave3-integrations.test.js tests/flow-commit-claims-gate.test.js tests/auto-review.test.js tests/gate-telemetry-surface.test.js tests/agents-md-alias.test.js tests/flow-skill-manage.test.js tests/fuzzy-patch.test.js tests/mode-schema.test.js tests/flow-feature-dossier.test.js tests/flow-autonomous-mode.test.js tests/flow-epic-cascade.test.js tests/flow-workspace-summary.test.js tests/flow-hooks-research-evidence-gate.test.js tests/flow-worker-mcp-strip.test.js tests/flow-orchestrate-corrections.test.js tests/flow-source-fidelity.test.js tests/flow-hooks-long-input-enforcement.test.js tests/workspace-channel-tracking.test.js tests/flow-hooks-deletion-log.test.js tests/flow-task-boundary-reset.test.js tests/flow-deferral-gate.test.js tests/flow-research-required-gate.test.js && NODE_ENV=test node tests/run-quality-gates.test.js",
|
|
14
14
|
"test:syntax": "find scripts/ lib/ -name '*.js' -not -path '*/node_modules/*' -exec node --check {} +",
|
|
15
15
|
"lint": "eslint scripts/ lib/ tests/",
|
|
16
16
|
"lint:ci": "eslint scripts/ lib/ tests/ --max-warnings 0",
|
|
@@ -34,6 +34,7 @@ const fs = require('node:fs');
|
|
|
34
34
|
const path = require('node:path');
|
|
35
35
|
|
|
36
36
|
const { PATHS, safeJsonParse } = require('./flow-utils');
|
|
37
|
+
const { safeJsonParseString } = require('./flow-io');
|
|
37
38
|
|
|
38
39
|
// ============================================================
|
|
39
40
|
// Score Cap Thresholds
|
|
@@ -641,6 +642,114 @@ function compareTrend(currentResults, previousAudit) {
|
|
|
641
642
|
// Main: Run All Gates
|
|
642
643
|
// ============================================================
|
|
643
644
|
|
|
645
|
+
/**
|
|
646
|
+
* Gate: Feature Output Health (wf-6c58953a)
|
|
647
|
+
*
|
|
648
|
+
* Inspects DATA produced by features, not just CODE that produces it.
|
|
649
|
+
* Catches "silent feature no-op" — feature runs without errors, persists
|
|
650
|
+
* data, but the persisted data has all-null structured fields. This class
|
|
651
|
+
* is invisible to traditional code review/lint/typecheck/tests.
|
|
652
|
+
*
|
|
653
|
+
* Discovered 2026-05-09 when wogiflow-cli investigation found the
|
|
654
|
+
* correction-extractor was capturing user frustration but writing null
|
|
655
|
+
* structured fields. The /wogi-audit ran B+ and missed it because every
|
|
656
|
+
* agent inspects code, not output.
|
|
657
|
+
*
|
|
658
|
+
* Rule registry — explicit per-file checks, NOT a generic walker (per
|
|
659
|
+
* challenge round: blanket "all-null is bug" is false-positive city).
|
|
660
|
+
*
|
|
661
|
+
* @param {string} [projectRoot=PATHS.root] — project to inspect (default: current)
|
|
662
|
+
* @returns {Object} gate result with severity + findings
|
|
663
|
+
*/
|
|
664
|
+
function checkFeatureOutputHealth(projectRoot = PATHS.root) {
|
|
665
|
+
const findings = [];
|
|
666
|
+
const stateDir = path.join(projectRoot, '.workflow', 'state');
|
|
667
|
+
const corrDir = path.join(projectRoot, '.workflow', 'corrections');
|
|
668
|
+
|
|
669
|
+
// ---- Rule 1: pending-corrections.json null-fields ratio ----
|
|
670
|
+
// Note: pending-corrections.json is a top-level ARRAY, so safeJsonParse
|
|
671
|
+
// (which rejects arrays) won't work. Use file-read + safeJsonParseString.
|
|
672
|
+
const pcPath = path.join(stateDir, 'pending-corrections.json');
|
|
673
|
+
if (fs.existsSync(pcPath)) {
|
|
674
|
+
let records = [];
|
|
675
|
+
try {
|
|
676
|
+
const content = fs.readFileSync(pcPath, 'utf-8');
|
|
677
|
+
records = safeJsonParseString(content, []);
|
|
678
|
+
} catch (_err) { /* fail-open */ }
|
|
679
|
+
const arr = Array.isArray(records) ? records : [];
|
|
680
|
+
if (arr.length > 0) {
|
|
681
|
+
const nullCount = arr.filter(r =>
|
|
682
|
+
r && typeof r === 'object' &&
|
|
683
|
+
(r.whatWasWrong == null) &&
|
|
684
|
+
(r.whatUserWants == null)
|
|
685
|
+
).length;
|
|
686
|
+
const ratio = nullCount / arr.length;
|
|
687
|
+
if (ratio >= 0.5) {
|
|
688
|
+
findings.push({
|
|
689
|
+
rule: 'pending-corrections-null-fields',
|
|
690
|
+
severity: ratio === 1 ? 'high' : 'medium',
|
|
691
|
+
message: `${nullCount}/${arr.length} (${Math.round(ratio * 100)}%) pending-corrections records have null structured fields. Likely correction-detector extraction failure. Run \`flow-correction-backfill\` or restore via Layer 2 enrichment.`,
|
|
692
|
+
evidence: `${path.relative(projectRoot, pcPath)}: ${arr.length} records analyzed; ${nullCount} fully null`
|
|
693
|
+
});
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
// ---- Rule 2: prompt-history × corrections cross-reference ----
|
|
699
|
+
// prompt-history.json is also typically a top-level array.
|
|
700
|
+
const phPath = path.join(stateDir, 'prompt-history.json');
|
|
701
|
+
if (fs.existsSync(phPath)) {
|
|
702
|
+
let ph = [];
|
|
703
|
+
try {
|
|
704
|
+
const content = fs.readFileSync(phPath, 'utf-8');
|
|
705
|
+
ph = safeJsonParseString(content, []);
|
|
706
|
+
} catch (_err) { /* fail-open */ }
|
|
707
|
+
const phArr = Array.isArray(ph) ? ph : (ph && Array.isArray(ph.prompts) ? ph.prompts : []);
|
|
708
|
+
|
|
709
|
+
// Frustration markers (regex per known-pattern set)
|
|
710
|
+
const frustrationRe = /\b(don'?t|stop|wait|actually|why did|why is|you keep|you always|fucking|seriously)\b/i;
|
|
711
|
+
let frustrationCount = 0;
|
|
712
|
+
for (const entry of phArr) {
|
|
713
|
+
if (!entry || typeof entry !== 'object') continue;
|
|
714
|
+
const text = entry.prompt || entry.text || entry.userMessage || '';
|
|
715
|
+
if (typeof text === 'string' && frustrationRe.test(text)) frustrationCount++;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
let corrCount = 0;
|
|
719
|
+
if (fs.existsSync(corrDir)) {
|
|
720
|
+
try {
|
|
721
|
+
corrCount = fs.readdirSync(corrDir).filter(f => f.endsWith('.md')).length;
|
|
722
|
+
} catch (_err) { /* fail-open */ }
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
if (frustrationCount >= 3 && corrCount === 0) {
|
|
726
|
+
findings.push({
|
|
727
|
+
rule: 'prompt-history-vs-corrections-mismatch',
|
|
728
|
+
severity: 'high',
|
|
729
|
+
message: `prompt-history.json has ${frustrationCount} frustration markers but corrections/ is empty. Correction-extractor pipeline appears non-functional (captures input, fails to materialize records).`,
|
|
730
|
+
evidence: `prompt-history: ${frustrationCount} matches across ${phArr.length} entries; corrections/: ${corrCount} files`
|
|
731
|
+
});
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
// Determine overall gate severity
|
|
736
|
+
const hasHigh = findings.some(f => f.severity === 'high');
|
|
737
|
+
const hasMed = findings.some(f => f.severity === 'medium');
|
|
738
|
+
const severity = hasHigh ? 'high' : hasMed ? 'medium' : 'pass';
|
|
739
|
+
|
|
740
|
+
return {
|
|
741
|
+
gate: 'feature-output-health',
|
|
742
|
+
exists: true,
|
|
743
|
+
passed: findings.length === 0,
|
|
744
|
+
findings,
|
|
745
|
+
severity,
|
|
746
|
+
scoreCap: 100, // doesn't cap score directly; surfaces as audit findings
|
|
747
|
+
message: findings.length === 0
|
|
748
|
+
? 'Feature output health: no issues detected'
|
|
749
|
+
: `Feature output health: ${findings.length} finding(s) — ${findings.map(f => f.rule).join(', ')}`
|
|
750
|
+
};
|
|
751
|
+
}
|
|
752
|
+
|
|
644
753
|
/**
|
|
645
754
|
* Run all Gate 0 checks and return consolidated results.
|
|
646
755
|
* @returns {Object} gate results with score cap
|
|
@@ -654,6 +763,7 @@ function runAllGates() {
|
|
|
654
763
|
gates.push(checkLintConfigIntegrity());
|
|
655
764
|
gates.push(checkTests());
|
|
656
765
|
gates.push(checkScriptCompleteness());
|
|
766
|
+
gates.push(checkFeatureOutputHealth());
|
|
657
767
|
|
|
658
768
|
const cap = calculateScoreCap(gates);
|
|
659
769
|
const framework = detectFramework();
|
|
@@ -716,6 +826,14 @@ function main() {
|
|
|
716
826
|
console.log(JSON.stringify(checkScriptCompleteness(), null, 2));
|
|
717
827
|
break;
|
|
718
828
|
|
|
829
|
+
case 'feature-output-health': {
|
|
830
|
+
// Optional --project=<path> argument for cross-project audit
|
|
831
|
+
const projArg = process.argv.find(a => a.startsWith('--project='));
|
|
832
|
+
const projectRoot = projArg ? projArg.slice('--project='.length) : PATHS.root;
|
|
833
|
+
console.log(JSON.stringify(checkFeatureOutputHealth(projectRoot), null, 2));
|
|
834
|
+
break;
|
|
835
|
+
}
|
|
836
|
+
|
|
719
837
|
case 'eslint-disable':
|
|
720
838
|
console.log(JSON.stringify(countEslintDisables(), null, 2));
|
|
721
839
|
break;
|
|
@@ -827,6 +945,7 @@ module.exports = {
|
|
|
827
945
|
checkTests,
|
|
828
946
|
parseTestErrorCount, // wf-e111d850: exposed for unit testing
|
|
829
947
|
checkScriptCompleteness,
|
|
948
|
+
checkFeatureOutputHealth, // wf-6c58953a: feature output health gate
|
|
830
949
|
|
|
831
950
|
// Extended checks
|
|
832
951
|
countEslintDisables,
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Wogi Flow — Pending-Corrections Backfill (wf-6c58953a)
|
|
5
|
+
*
|
|
6
|
+
* Backfills records in `.workflow/state/pending-corrections.json` that have
|
|
7
|
+
* null `whatWasWrong` / `whatUserWants` fields. The fix lands at code level
|
|
8
|
+
* (flow-correction-detector.js Layer 1+2 reconciliation), but historical
|
|
9
|
+
* records persisted before the fix already have null fields. This tool
|
|
10
|
+
* applies the same deterministic-fallback extraction retroactively.
|
|
11
|
+
*
|
|
12
|
+
* Strategy:
|
|
13
|
+
* - Read pending-corrections.json
|
|
14
|
+
* - For each record where userMessage is populated AND
|
|
15
|
+
* (whatWasWrong is null OR whatUserWants is null)
|
|
16
|
+
* - Apply deterministic extraction: whatWasWrong = first 200 chars of
|
|
17
|
+
* userMessage; whatUserWants stays null (intent inference is an LLM job
|
|
18
|
+
* — honest null > wrong guess; live extractor will populate going forward)
|
|
19
|
+
* - Mark `enrichmentSource: "backfill-<date>"` so consumers can distinguish
|
|
20
|
+
* backfilled from live extractions
|
|
21
|
+
* - Atomic write: write-temp + rename
|
|
22
|
+
*
|
|
23
|
+
* Usage:
|
|
24
|
+
* node scripts/flow-correction-backfill.js # current project
|
|
25
|
+
* node scripts/flow-correction-backfill.js --project=<path> # explicit project
|
|
26
|
+
* node scripts/flow-correction-backfill.js --dry-run # report only
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
'use strict';
|
|
30
|
+
|
|
31
|
+
const fs = require('node:fs');
|
|
32
|
+
const path = require('node:path');
|
|
33
|
+
|
|
34
|
+
const { PATHS } = require('./flow-utils');
|
|
35
|
+
const { safeJsonParseString } = require('./flow-io');
|
|
36
|
+
const { deterministicWhatWasWrong } = require('./flow-correction-detector');
|
|
37
|
+
|
|
38
|
+
const BACKFILL_DATE = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Backfill a single project's pending-corrections.json.
|
|
42
|
+
*
|
|
43
|
+
* @param {string} projectRoot — project directory containing .workflow/
|
|
44
|
+
* @param {Object} [opts]
|
|
45
|
+
* @param {boolean} [opts.dryRun=false] — if true, return what WOULD change without writing
|
|
46
|
+
* @returns {{ found: number, backfilled: number, alreadyPopulated: number, written: boolean, path: string|null, dryRun: boolean }}
|
|
47
|
+
*/
|
|
48
|
+
function backfillPendingCorrections(projectRoot, opts = {}) {
|
|
49
|
+
const { dryRun = false } = opts;
|
|
50
|
+
const pcPath = path.join(projectRoot, '.workflow', 'state', 'pending-corrections.json');
|
|
51
|
+
|
|
52
|
+
const result = {
|
|
53
|
+
found: 0,
|
|
54
|
+
backfilled: 0,
|
|
55
|
+
alreadyPopulated: 0,
|
|
56
|
+
written: false,
|
|
57
|
+
path: null,
|
|
58
|
+
dryRun
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
if (!fs.existsSync(pcPath)) {
|
|
62
|
+
result.path = pcPath;
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
let content;
|
|
67
|
+
try {
|
|
68
|
+
content = fs.readFileSync(pcPath, 'utf-8');
|
|
69
|
+
} catch (err) {
|
|
70
|
+
throw new Error(`Cannot read pending-corrections at ${pcPath}: ${err.message}`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const records = safeJsonParseString(content, []);
|
|
74
|
+
if (!Array.isArray(records)) {
|
|
75
|
+
throw new Error(`Expected array at ${pcPath}; got ${typeof records}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
result.found = records.length;
|
|
79
|
+
result.path = pcPath;
|
|
80
|
+
|
|
81
|
+
let changed = false;
|
|
82
|
+
for (const r of records) {
|
|
83
|
+
if (!r || typeof r !== 'object') continue;
|
|
84
|
+
const userMsg = r.userMessage;
|
|
85
|
+
if (typeof userMsg !== 'string' || !userMsg.trim()) continue;
|
|
86
|
+
|
|
87
|
+
const needsFill = (r.whatWasWrong == null) && (r.whatUserWants == null);
|
|
88
|
+
if (!needsFill) {
|
|
89
|
+
result.alreadyPopulated += 1;
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Apply deterministic extraction (whatWasWrong only — whatUserWants
|
|
94
|
+
// stays null; intent inference is the live extractor's job going forward)
|
|
95
|
+
r.whatWasWrong = deterministicWhatWasWrong(userMsg);
|
|
96
|
+
r.enrichmentSource = `backfill-${BACKFILL_DATE}`;
|
|
97
|
+
result.backfilled += 1;
|
|
98
|
+
changed = true;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (changed && !dryRun) {
|
|
102
|
+
// Atomic write: write-temp + rename
|
|
103
|
+
const tmpPath = `${pcPath}.tmp-${process.pid}`;
|
|
104
|
+
fs.writeFileSync(tmpPath, JSON.stringify(records, null, 2) + '\n');
|
|
105
|
+
fs.renameSync(tmpPath, pcPath);
|
|
106
|
+
result.written = true;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return result;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ============================================================
|
|
113
|
+
// CLI
|
|
114
|
+
// ============================================================
|
|
115
|
+
|
|
116
|
+
function main() {
|
|
117
|
+
const argv = process.argv.slice(2);
|
|
118
|
+
const projArg = argv.find(a => a.startsWith('--project='));
|
|
119
|
+
const dryRun = argv.includes('--dry-run');
|
|
120
|
+
|
|
121
|
+
const projectRoot = projArg ? projArg.slice('--project='.length) : PATHS.root;
|
|
122
|
+
|
|
123
|
+
let result;
|
|
124
|
+
try {
|
|
125
|
+
result = backfillPendingCorrections(projectRoot, { dryRun });
|
|
126
|
+
} catch (err) {
|
|
127
|
+
console.error(`Error: ${err.message}`);
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
console.log(JSON.stringify({
|
|
132
|
+
project: projectRoot,
|
|
133
|
+
pendingCorrectionsPath: result.path,
|
|
134
|
+
found: result.found,
|
|
135
|
+
backfilled: result.backfilled,
|
|
136
|
+
alreadyPopulated: result.alreadyPopulated,
|
|
137
|
+
written: result.written,
|
|
138
|
+
dryRun: result.dryRun
|
|
139
|
+
}, null, 2));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
module.exports = {
|
|
143
|
+
backfillPendingCorrections
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
if (require.main === module) {
|
|
147
|
+
main();
|
|
148
|
+
}
|
|
@@ -329,14 +329,106 @@ function recordHybridTelemetry(verdict, runCtx = {}) {
|
|
|
329
329
|
}
|
|
330
330
|
}
|
|
331
331
|
|
|
332
|
+
// ============================================================================
|
|
333
|
+
// Layer 1 + Layer 2 Reconciliation (wf-6c58953a)
|
|
334
|
+
// ============================================================================
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Deterministic fallback for `whatWasWrong` — preserves the user's literal
|
|
338
|
+
* frustration text when LLM extraction is unavailable or fails. Better than
|
|
339
|
+
* null: a 200-char excerpt is honest signal; null is data loss.
|
|
340
|
+
*
|
|
341
|
+
* @param {string} message — user message
|
|
342
|
+
* @returns {string|null}
|
|
343
|
+
*/
|
|
344
|
+
function deterministicWhatWasWrong(message) {
|
|
345
|
+
if (typeof message !== 'string') return null;
|
|
346
|
+
const trimmed = message.trim();
|
|
347
|
+
if (!trimmed) return null;
|
|
348
|
+
return trimmed.slice(0, 200);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Reconcile Layer 1 (keyword classifier) + Layer 2 (Haiku LLM) results.
|
|
353
|
+
*
|
|
354
|
+
* Pre-fix bug: Layer 1 returned `{whatWasWrong: null, whatUserWants: null}`
|
|
355
|
+
* when keyword matched, never calling Layer 2. The user's actual frustration
|
|
356
|
+
* was captured but structured fields stayed null — silent feature no-op.
|
|
357
|
+
*
|
|
358
|
+
* Post-fix design:
|
|
359
|
+
* - Layer 1 hit + Layer 2 success: trust Layer 1's classification (high-
|
|
360
|
+
* precision keyword match), use Layer 2's strings if non-null else
|
|
361
|
+
* deterministic fallback. Record `llmDisagreed` if Layer 2 said
|
|
362
|
+
* `isCorrection: false` (e.g., user said "I'm just asking a question").
|
|
363
|
+
* - Layer 1 hit + Layer 2 fail/skip: deterministic fallback for `whatWasWrong`
|
|
364
|
+
* (first 200 chars). `whatUserWants` stays null (intent inference is an
|
|
365
|
+
* LLM job; honest null > wrong guess).
|
|
366
|
+
* - Layer 1 miss + Layer 2 success: Layer 2 is primary classifier (existing path).
|
|
367
|
+
* - Both miss: not a correction.
|
|
368
|
+
*
|
|
369
|
+
* Pure function — testable in isolation, no LLM mock needed.
|
|
370
|
+
*
|
|
371
|
+
* @param {Object|null} layer1 — Layer 1 result {isCorrection, confidence, correctionType, method, matchedPattern}
|
|
372
|
+
* @param {Object|null} layer2 — Layer 2 (LLM) result {isCorrection, confidence, correctionType, whatWasWrong, whatUserWants}
|
|
373
|
+
* @param {string} trimmed — trimmed user message (for deterministic fallback)
|
|
374
|
+
* @returns {Object|null} reconciled record OR null if not a correction
|
|
375
|
+
*/
|
|
376
|
+
function reconcileExtraction(layer1, layer2, trimmed) {
|
|
377
|
+
// Both layers ran
|
|
378
|
+
if (layer1 && layer2) {
|
|
379
|
+
const what = layer2.whatWasWrong || deterministicWhatWasWrong(trimmed);
|
|
380
|
+
const wants = layer2.whatUserWants || null;
|
|
381
|
+
return {
|
|
382
|
+
isCorrection: true, // Layer 1 high-precision keyword match wins binary
|
|
383
|
+
confidence: layer1.confidence,
|
|
384
|
+
correctionType: layer1.correctionType || layer2.correctionType || 'behavior',
|
|
385
|
+
whatWasWrong: what,
|
|
386
|
+
whatUserWants: wants,
|
|
387
|
+
method: 'keyword+ai',
|
|
388
|
+
matchedPattern: layer1.matchedPattern,
|
|
389
|
+
enrichmentSource: layer2.whatWasWrong ? 'haiku' : 'deterministic-fallback',
|
|
390
|
+
llmDisagreed: layer2.isCorrection === false,
|
|
391
|
+
};
|
|
392
|
+
}
|
|
393
|
+
// Layer 1 only (Layer 2 unavailable: no API key, network error, etc.)
|
|
394
|
+
if (layer1) {
|
|
395
|
+
return {
|
|
396
|
+
isCorrection: true,
|
|
397
|
+
confidence: layer1.confidence,
|
|
398
|
+
correctionType: layer1.correctionType || 'behavior',
|
|
399
|
+
whatWasWrong: deterministicWhatWasWrong(trimmed),
|
|
400
|
+
whatUserWants: null,
|
|
401
|
+
method: layer1.method,
|
|
402
|
+
matchedPattern: layer1.matchedPattern,
|
|
403
|
+
enrichmentSource: 'deterministic-fallback',
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
// Layer 2 only (Layer 1 missed)
|
|
407
|
+
if (layer2 && layer2.isCorrection) {
|
|
408
|
+
return {
|
|
409
|
+
isCorrection: true,
|
|
410
|
+
confidence: layer2.confidence,
|
|
411
|
+
correctionType: layer2.correctionType || null,
|
|
412
|
+
whatWasWrong: layer2.whatWasWrong || null,
|
|
413
|
+
whatUserWants: layer2.whatUserWants || null,
|
|
414
|
+
method: 'ai',
|
|
415
|
+
enrichmentSource: 'haiku',
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
// Both missed → not a correction
|
|
419
|
+
return null;
|
|
420
|
+
}
|
|
421
|
+
|
|
332
422
|
// ============================================================================
|
|
333
423
|
// AI-Based Detection (Haiku — language-agnostic)
|
|
334
424
|
// ============================================================================
|
|
335
425
|
|
|
336
426
|
/**
|
|
337
427
|
* Detect if a message is a correction using Claude Haiku.
|
|
338
|
-
*
|
|
339
|
-
*
|
|
428
|
+
* Hybrid: Layer 1 keyword classifier (fast) + Layer 2 Haiku enrichment.
|
|
429
|
+
*
|
|
430
|
+
* wf-6c58953a (2026-05-09): Layer 1 hit no longer short-circuits structured
|
|
431
|
+
* extraction. See reconcileExtraction() for the post-fix design rationale.
|
|
340
432
|
*
|
|
341
433
|
* @param {string} userMessage - The user's message
|
|
342
434
|
* @param {string} previousContext - Summary of what the AI was doing
|
|
@@ -354,8 +446,12 @@ async function detectCorrection(userMessage, previousContext = '') {
|
|
|
354
446
|
return { isCorrection: false, confidence: 0, method: 'skipped', reason: 'length-filter' };
|
|
355
447
|
}
|
|
356
448
|
|
|
357
|
-
// Layer 1 (wf-e6d65edf) — keyword pre-classifier.
|
|
449
|
+
// Layer 1 (wf-e6d65edf) — keyword pre-classifier.
|
|
450
|
+
// wf-6c58953a: NO longer short-circuits structured extraction. Layer 1's
|
|
451
|
+
// classification is captured; reconcile with Layer 2 (or deterministic
|
|
452
|
+
// fallback when Layer 2 unavailable) at end.
|
|
358
453
|
const hybridCfg = getHybridConfig();
|
|
454
|
+
let layer1Result = null;
|
|
359
455
|
if (hybridCfg.hybridEnabled) {
|
|
360
456
|
const matched = findKeywordMatch(trimmed);
|
|
361
457
|
if (matched) {
|
|
@@ -368,12 +464,10 @@ async function detectCorrection(userMessage, previousContext = '') {
|
|
|
368
464
|
confidence: conf,
|
|
369
465
|
durationMs: Date.now() - start,
|
|
370
466
|
});
|
|
371
|
-
|
|
467
|
+
layer1Result = {
|
|
372
468
|
isCorrection: true,
|
|
373
469
|
confidence: conf,
|
|
374
470
|
correctionType: 'behavior',
|
|
375
|
-
whatWasWrong: null,
|
|
376
|
-
whatUserWants: null,
|
|
377
471
|
method: 'keyword',
|
|
378
472
|
matchedPattern: matched.phrase,
|
|
379
473
|
};
|
|
@@ -383,6 +477,10 @@ async function detectCorrection(userMessage, previousContext = '') {
|
|
|
383
477
|
// Check if API key is available
|
|
384
478
|
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
385
479
|
if (!apiKey) {
|
|
480
|
+
// wf-6c58953a: Layer 1 hit + no API key → deterministic fallback (not null)
|
|
481
|
+
if (layer1Result) {
|
|
482
|
+
return reconcileExtraction(layer1Result, null, trimmed);
|
|
483
|
+
}
|
|
386
484
|
return { isCorrection: false, confidence: 0, method: 'skipped', reason: 'no-api-key' };
|
|
387
485
|
}
|
|
388
486
|
|
|
@@ -492,11 +590,19 @@ Respond with JSON only (no markdown, no explanation):
|
|
|
492
590
|
durationMs: Date.now() - start,
|
|
493
591
|
});
|
|
494
592
|
|
|
593
|
+
// wf-6c58953a: reconcile Layer 1 + Layer 2 (or just Layer 2 if Layer 1 missed)
|
|
594
|
+
const reconciled = reconcileExtraction(layer1Result, aiResult, trimmed);
|
|
595
|
+
if (reconciled) return reconciled;
|
|
596
|
+
// Both layers say no-correction
|
|
495
597
|
return aiResult;
|
|
496
598
|
} catch (err) {
|
|
497
599
|
if (process.env.DEBUG) {
|
|
498
600
|
console.error(`[DEBUG] AI correction detection failed: ${err.message}`);
|
|
499
601
|
}
|
|
602
|
+
// wf-6c58953a: Layer 2 failure with Layer 1 hit → deterministic fallback
|
|
603
|
+
if (layer1Result) {
|
|
604
|
+
return reconcileExtraction(layer1Result, null, trimmed);
|
|
605
|
+
}
|
|
500
606
|
return { isCorrection: false, confidence: 0, method: 'ai', reason: err.message };
|
|
501
607
|
}
|
|
502
608
|
}
|
|
@@ -1295,11 +1401,15 @@ function correlateWithPriorGates(correction) {
|
|
|
1295
1401
|
// ============================================================================
|
|
1296
1402
|
|
|
1297
1403
|
module.exports = {
|
|
1298
|
-
// Detection (
|
|
1404
|
+
// Detection (hybrid Layer 1 + Layer 2)
|
|
1299
1405
|
detectCorrection,
|
|
1300
1406
|
batchAnalyzePrompts,
|
|
1301
1407
|
spawnBackgroundDetection,
|
|
1302
1408
|
|
|
1409
|
+
// wf-6c58953a: reconciliation helpers exposed for unit testing + backfill
|
|
1410
|
+
reconcileExtraction,
|
|
1411
|
+
deterministicWhatWasWrong,
|
|
1412
|
+
|
|
1303
1413
|
// Queue management
|
|
1304
1414
|
loadPendingCorrections,
|
|
1305
1415
|
queuePendingCorrection,
|