mindforge-cc 11.2.0 → 11.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mindforge/config.json +3 -2
- package/CHANGELOG.md +37 -1
- package/MINDFORGE.md +5 -5
- package/bin/autonomous/mesh-self-healer.js +101 -28
- package/bin/browser/regression-writer.js +45 -3
- package/bin/browser/session-manager.js +21 -17
- package/bin/engine/logic-drift-detector.js +14 -6
- package/bin/engine/logic-validator.js +155 -25
- package/bin/engine/orbital-guardian.js +56 -10
- package/bin/engine/reason-source-aligner.js +19 -6
- package/bin/engine/remediation-engine.js +1 -1
- package/bin/engine/self-corrective-synthesizer.js +1 -1
- package/bin/engine/sre-manager.js +33 -6
- package/bin/governance/policy-engine.js +17 -4
- package/bin/governance/ztai-archiver.js +74 -9
- package/bin/governance/ztai-manager.js +3 -3
- package/bin/installer-core.js +31 -2
- package/bin/memory/eis-client.js +45 -4
- package/bin/memory/vector-hub.js +32 -0
- package/bin/review/finding-synthesizer.js +35 -6
- package/bin/security/trust-boundaries.js +96 -4
- package/bin/security/trust-gate-hook.js +13 -3
- package/bin/skill-registry.js +31 -20
- package/bin/sre/shadow-mirror.js +90 -40
- package/bin/utils/append-queue.js +12 -0
- package/bin/utils/file-io.js +4 -45
- package/bin/utils/version-check.js +21 -5
- package/package.json +1 -1
package/.mindforge/config.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "11.2.
|
|
2
|
+
"version": "11.2.1",
|
|
3
3
|
"environment": "development",
|
|
4
4
|
"governance": {
|
|
5
5
|
"drift_threshold": 0.75,
|
|
6
6
|
"critical_drift_threshold": 0.5,
|
|
7
7
|
"res_threshold": 0.8,
|
|
8
|
-
"active_did": "did:mindforge:
|
|
8
|
+
"active_did": "did:mindforge:cbca5a94-a796-4919-a1a3-d0488f228611"
|
|
9
9
|
},
|
|
10
10
|
"revops": {
|
|
11
11
|
"market_registry": {
|
|
@@ -107,6 +107,7 @@
|
|
|
107
107
|
},
|
|
108
108
|
"cost_routing": {
|
|
109
109
|
"enabled": true,
|
|
110
|
+
"shadow_mode": true,
|
|
110
111
|
"simple_threshold": 3,
|
|
111
112
|
"standard_threshold": 6,
|
|
112
113
|
"complex_threshold": 8,
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,41 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [11.2.1] - 2026-05-31 — "Hardening" (security & integrity audit remediation)
|
|
4
|
+
|
|
5
|
+
Post-v11.2.0 audit remediation. Closes every exploitable security defect and
|
|
6
|
+
false-assurance stub surfaced by the end-to-end audit. No new features, no
|
|
7
|
+
breaking changes — fixes and honest-labeling only.
|
|
8
|
+
|
|
9
|
+
### Security (fixed)
|
|
10
|
+
|
|
11
|
+
- **trust-gate-hook**: scans the whole command + every line (a benign first line could previously cloak a destructive later line)
|
|
12
|
+
- **orbital-guardian**: `verify()` re-checks the Ed25519 attestation signature (added `did`/`signed_message` columns + migration); rejects forged APPROVED rows
|
|
13
|
+
- **policy-engine**: `reasoning_proof` alone no longer bypasses the blast-radius limit (`isProofValid` inits false; cryptographic `pq_proof` path unchanged)
|
|
14
|
+
- **shadow-mirror**: git calls use `execFileSync` (argv) + fail-closed `sanitizeRemediationId()` — closes command injection via `remediation_id`
|
|
15
|
+
- **trust-boundaries `isHighImpact`**: added chmod/chown/dd/mv/kill/shutdown/eval/command-substitution/redirect detection + shell-obfuscation normalization; narrowed interpreter-script pattern to stop false-positives on `node <projectfile>`
|
|
16
|
+
- **eis-client**: `verifyRemoteProvenance` delegates to real ZTAI signature verification, fail-closed (was returning true for any non-empty signature)
|
|
17
|
+
|
|
18
|
+
### Integrity / honest labeling (fixed)
|
|
19
|
+
|
|
20
|
+
- **ztai-archiver**: `verifyIntegrity()` recomputes the Merkle root from the live log (was a no-op `return true`); fail-closed on tamper/delete/reorder
|
|
21
|
+
- **mesh-self-healer**: emits an honest degraded advisory instead of fabricated 94%/100% consensus
|
|
22
|
+
- **logic-validator**: probes Ollama when reachable, honest heuristic fallback; stopped advertising the dead model path
|
|
23
|
+
- **reason-source-aligner**: consistent return shape (uninitialized no longer silently disables the mission-fidelity gate); real Jaccard similarity
|
|
24
|
+
- **sre-manager**: HMAC artifact relabeled as integrity tag, not "ZK-Proof"
|
|
25
|
+
- **installer-core**: PQAS-enabled message gated on the real `experimental.pqc_demo` flag
|
|
26
|
+
- **finding-synthesizer**: detects real severity-gap contradictions (was hardcoded `[]`)
|
|
27
|
+
- **logic-drift-detector**: relabeled heuristic, not "Neural"
|
|
28
|
+
- **session-manager / shadow-mirror(docker) / regression-writer / skill-registry**: honest disclosure instead of silent empties / fake isolation / tautological tests / mock placeholders
|
|
29
|
+
- **MINDFORGE.md**: `[PQAS_ENFORCED]` reconciled to reflect simulated/inactive default; **ztai-manager** logs relabeled `[ZTAI-HSM-SIM]`
|
|
30
|
+
|
|
31
|
+
### CI / hygiene
|
|
32
|
+
|
|
33
|
+
- **release workflow**: asserts the git tag matches `package.json` version (fail-closed) + skips publish if the version is already on npm
|
|
34
|
+
- **version-check**: runtime drift check widened to all 4 sources (was 2); SDK README guarded by test
|
|
35
|
+
- removed dead `AuditRotator` class (CHANGELOG had wrongly claimed it removed); deprecated orphaned `createAppendQueue`
|
|
36
|
+
- refreshed stale `v11.1.0` banners → current; added `cost_routing.shadow_mode` latch
|
|
37
|
+
- version bumped to 11.2.1 across all sources
|
|
38
|
+
|
|
3
39
|
## [11.2.0] - 2026-05-31 — "Verification & Trust"
|
|
4
40
|
|
|
5
41
|
### Added
|
|
@@ -28,7 +64,7 @@
|
|
|
28
64
|
### Removed
|
|
29
65
|
|
|
30
66
|
- Dead `quantum-verify` CLI command entry (no handler existed)
|
|
31
|
-
- Dead `AuditRotator`
|
|
67
|
+
- Dead `AuditRotator` class and its export from `bin/utils/file-io.js` (zero callers; rotation broke the hash chain at rotation boundaries — see `bin/autonomous/audit-writer.js`)
|
|
32
68
|
|
|
33
69
|
### Changed
|
|
34
70
|
|
package/MINDFORGE.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
# MINDFORGE.md — Parameter Registry (v11.2.
|
|
1
|
+
# MINDFORGE.md — Parameter Registry (v11.2.1)
|
|
2
2
|
|
|
3
3
|
## 1. IDENTITY & VERSIONING
|
|
4
4
|
|
|
5
5
|
[NAME] = MindForge
|
|
6
|
-
[VERSION] = 11.2.
|
|
6
|
+
[VERSION] = 11.2.1
|
|
7
7
|
[STABLE] = true
|
|
8
8
|
[MODE] = "Platform Sovereign"
|
|
9
|
-
[REQUIRED_CORE_VERSION] = 11.2.
|
|
9
|
+
[REQUIRED_CORE_VERSION] = 11.2.1
|
|
10
10
|
[SOVEREIGN_IDENTITY] = true
|
|
11
11
|
[SRE_LAYER_ENABLED] = true
|
|
12
12
|
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
[ZTAI_KEY_TYPE] = "Dilithium-5"
|
|
27
27
|
[NEXUS_TRACE_RETENTION_DAYS] = 30
|
|
28
28
|
[CADIA_CORE] = true
|
|
29
|
-
[PQAS_ENFORCED] =
|
|
29
|
+
[PQAS_ENFORCED] = false # PQAS is SIMULATED/inactive by default (config: pqas_enabled=false, gated behind experimental.pqc_demo). Tier-3 trust uses real Ed25519. See .mindforge/config.json + bin/governance/quantum-crypto.js.
|
|
30
30
|
[PROACTIVE_HOMING] = true
|
|
31
31
|
|
|
32
32
|
---
|
|
@@ -100,6 +100,6 @@ The following parameters cannot be overridden by plugins, agents, or session-lev
|
|
|
100
100
|
- [MIN_SOUL_SCORE] — Minimum SOUL score required for architectural changes
|
|
101
101
|
- [BLOCK_ON_SECURITY] — Security gate enforcement cannot be disabled
|
|
102
102
|
- [COST_HARD_LIMIT_USD] — Hard cost limit cannot be raised without human approval
|
|
103
|
-
- [
|
|
103
|
+
- [BLOCK_ON_SECURITY] is non-overridable; PQAS itself is simulated/experimental (inactive by default) and is NOT a non-overridable guarantee — do not rely on it as an enforced control
|
|
104
104
|
- [SOVEREIGN_IDENTITY] — Identity verification is always required
|
|
105
105
|
- [ENABLE_ZTAI] — Zero-trust identity cannot be bypassed
|
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* MindForge v7 — Proactive Semantic Homing (Pillar XII)
|
|
3
3
|
* Mesh Self-Healer: Peer agents "home in" on drifting nodes to provide collaborative reasoning.
|
|
4
|
+
*
|
|
5
|
+
* UC-22 (audit finding #16) — HONEST LABELLING:
|
|
6
|
+
* There is no live, runtime peer-reasoning mesh in this build. Previously this
|
|
7
|
+
* module FABRICATED a collective consensus (hardcoded peers, canned
|
|
8
|
+
* confidence:94, "100% agreement" log). That emitted false assurance on the
|
|
9
|
+
* live auto-runner self-heal path. It now consults the ONLY real peer source
|
|
10
|
+
* available — the ztai-manager session-agent registry — and, when that yields
|
|
11
|
+
* no real peers (the common case at runtime), degrades GRACEFULLY to a clearly
|
|
12
|
+
* labelled single-source advisory with NO fabricated confidence or consensus.
|
|
4
13
|
*/
|
|
5
14
|
'use strict';
|
|
6
15
|
|
|
7
|
-
const fs = require('node:fs');
|
|
8
16
|
const path = require('node:path');
|
|
9
17
|
|
|
10
18
|
class MeshSelfHealer {
|
|
@@ -14,52 +22,117 @@ class MeshSelfHealer {
|
|
|
14
22
|
|
|
15
23
|
/**
|
|
16
24
|
* Peer agents "home in" on a node with high logic drift.
|
|
25
|
+
*
|
|
26
|
+
* @param {string} driftingAgentDid - DID of the drifting node.
|
|
27
|
+
* @param {number} driftScore - Logic-drift score (only acts on >= 80).
|
|
28
|
+
* @param {object} [options] - { sessionId } to scope the real peer lookup.
|
|
29
|
+
* @returns {Promise<object|null>} Honest advisory object, or null below threshold.
|
|
17
30
|
*/
|
|
18
|
-
async homeIn(driftingAgentDid, driftScore) {
|
|
31
|
+
async homeIn(driftingAgentDid, driftScore, options = {}) {
|
|
19
32
|
if (driftScore < 80) return null; // Only home in on major drift
|
|
20
|
-
|
|
21
|
-
console.log(`[HOMING-HEAL] Global Mesh Alert: Agent ${driftingAgentDid} experiencing critical logic drift (${driftScore}). Peer agents redirecting...`);
|
|
22
|
-
|
|
23
|
-
// Find nearby idle agents or specialists
|
|
24
|
-
const peers = this.findAvailablePeers(driftingAgentDid);
|
|
25
|
-
const healingNodes = [];
|
|
26
33
|
|
|
34
|
+
console.log(`[HOMING-HEAL] Global Mesh Alert: Agent ${driftingAgentDid} experiencing critical logic drift (${driftScore}). Seeking peer reasoning support...`);
|
|
35
|
+
|
|
36
|
+
// Consult the only REAL peer source: the ztai-manager session registry.
|
|
37
|
+
// Returns an empty array when no live peers are registered.
|
|
38
|
+
const peers = this.findAvailablePeers(driftingAgentDid, options.sessionId);
|
|
39
|
+
|
|
40
|
+
if (peers.length === 0) {
|
|
41
|
+
return this.degradedAdvisory(driftingAgentDid);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const healingNodes = [];
|
|
27
45
|
for (const peer of peers) {
|
|
28
|
-
console.log(`[HOMING-HEAL]
|
|
46
|
+
console.log(`[HOMING-HEAL] Peer ${peer.did} homing in on ${driftingAgentDid} to provide reasoning support.`);
|
|
29
47
|
const supportTrace = await this.provideCollectiveReasoning(peer, driftingAgentDid);
|
|
30
48
|
healingNodes.push(supportTrace);
|
|
31
49
|
}
|
|
32
50
|
|
|
33
|
-
return this.reconcileReasoning(healingNodes);
|
|
51
|
+
return this.reconcileReasoning(healingNodes, driftingAgentDid);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Discovers REAL peer agents from the live registry. There are no invented
|
|
56
|
+
* peers — if nothing is registered for the session, this returns []
|
|
57
|
+
* and the caller degrades honestly.
|
|
58
|
+
*
|
|
59
|
+
* @param {string} driftingAgentDid - The node being healed (excluded from peers).
|
|
60
|
+
* @param {string|null} sessionId - Session scope for the registry lookup.
|
|
61
|
+
* @returns {Array<{did:string, persona?:string}>} Real peers (possibly empty).
|
|
62
|
+
*/
|
|
63
|
+
findAvailablePeers(driftingAgentDid, sessionId = null) {
|
|
64
|
+
let agents = [];
|
|
65
|
+
try {
|
|
66
|
+
// Lazy require to avoid a hard coupling / load cost on the cold path.
|
|
67
|
+
const ztaiManager = require('../governance/ztai-manager');
|
|
68
|
+
if (typeof ztaiManager.getSessionAgents === 'function') {
|
|
69
|
+
agents = ztaiManager.getSessionAgents(sessionId) || [];
|
|
70
|
+
}
|
|
71
|
+
} catch {
|
|
72
|
+
// No registry available — treat as no live peers (honest degraded mode).
|
|
73
|
+
agents = [];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Exclude the drifting node itself; only real, distinct peers may help.
|
|
77
|
+
return agents.filter(a => a && a.did && a.did !== driftingAgentDid);
|
|
34
78
|
}
|
|
35
79
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
80
|
+
/**
|
|
81
|
+
* Honest single-source advisory used when no live peer mesh is available.
|
|
82
|
+
* Carries NO fabricated confidence and makes NO consensus claim.
|
|
83
|
+
*
|
|
84
|
+
* @param {string} driftingAgentDid
|
|
85
|
+
* @returns {object}
|
|
86
|
+
*/
|
|
87
|
+
degradedAdvisory(driftingAgentDid) {
|
|
88
|
+
console.log('[HOMING-HEAL] No live peer mesh available — emitting single-source advisory (degraded).');
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
type: 'advisory',
|
|
92
|
+
mesh_available: false,
|
|
93
|
+
degraded: true,
|
|
94
|
+
confidence: null,
|
|
95
|
+
consensus: null,
|
|
96
|
+
target: driftingAgentDid,
|
|
97
|
+
recommendation: 'Heuristic single-source steering: pause the drifting node, re-anchor to the last verified plan/spec, and require human or higher-tier review before resuming. No multi-agent consensus was available to corroborate this.',
|
|
98
|
+
source: 'Mesh-Self-Healing (degraded: no live peers)'
|
|
99
|
+
};
|
|
42
100
|
}
|
|
43
101
|
|
|
44
102
|
async provideCollectiveReasoning(peer, target) {
|
|
45
|
-
//
|
|
103
|
+
// A real peer contributes a reasoning node. Confidence is left null here:
|
|
104
|
+
// this build has no model-backed scoring, so we do not invent a number.
|
|
46
105
|
return {
|
|
47
106
|
provider: peer.did,
|
|
48
|
-
target
|
|
49
|
-
reasoning: '
|
|
50
|
-
confidence:
|
|
107
|
+
target,
|
|
108
|
+
reasoning: 'Peer steering note: re-sync drifting node with the last verified plan state.',
|
|
109
|
+
confidence: null
|
|
51
110
|
};
|
|
52
111
|
}
|
|
53
112
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
113
|
+
/**
|
|
114
|
+
* Reconciles multiple REAL peer reasoning nodes. With live peers present we
|
|
115
|
+
* report how many corroborated, but we still never invent a confidence score
|
|
116
|
+
* or a "100% agreement" claim.
|
|
117
|
+
*
|
|
118
|
+
* @param {Array<object>} nodes - Real peer reasoning contributions.
|
|
119
|
+
* @param {string} driftingAgentDid
|
|
120
|
+
* @returns {object}
|
|
121
|
+
*/
|
|
122
|
+
reconcileReasoning(nodes, driftingAgentDid) {
|
|
123
|
+
const peerCount = nodes.length;
|
|
124
|
+
console.log(`[HOMING-HEAL] Collective reasoning gathered from ${peerCount} live peer(s); no confidence fabricated.`);
|
|
125
|
+
|
|
59
126
|
return {
|
|
60
|
-
type: '
|
|
61
|
-
|
|
62
|
-
|
|
127
|
+
type: 'advisory',
|
|
128
|
+
mesh_available: true,
|
|
129
|
+
degraded: false,
|
|
130
|
+
confidence: null, // No model-backed scoring in this build — stay honest.
|
|
131
|
+
consensus: nodes[0].reasoning,
|
|
132
|
+
peer_count: peerCount,
|
|
133
|
+
target: driftingAgentDid,
|
|
134
|
+
recommendation: nodes[0].reasoning,
|
|
135
|
+
source: `Mesh-Self-Healing (${peerCount} live peer(s))`
|
|
63
136
|
};
|
|
64
137
|
}
|
|
65
138
|
}
|
|
@@ -10,12 +10,54 @@ function write(bug, phaseNum) {
|
|
|
10
10
|
const dir = path.join(process.cwd(), 'tests', 'regression');
|
|
11
11
|
fs.mkdirSync(dir, { recursive: true });
|
|
12
12
|
const name = `phase${phaseNum}-${bug.surface.replace(/\//g, '-').slice(1) || 'home'}.test.ts`;
|
|
13
|
+
|
|
14
|
+
// Embed the bug's surface and failure signal as safely-escaped JS string
|
|
15
|
+
// literals. JSON.stringify escapes quotes, backticks and ${...} so a
|
|
16
|
+
// freeform bug.error cannot break out of the generated source.
|
|
17
|
+
const surfaceLit = JSON.stringify(bug.surface);
|
|
18
|
+
const errorLit = JSON.stringify(bug.error);
|
|
19
|
+
|
|
20
|
+
// The generated test reproduces the original failure conditions and asserts
|
|
21
|
+
// the page no longer exhibits THIS bug's signal — it is NOT a body-visibility
|
|
22
|
+
// tautology that passes for any page.
|
|
13
23
|
const content = `
|
|
14
24
|
import { test, expect } from '@playwright/test';
|
|
15
25
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
26
|
+
// Regression guard for the bug originally observed on ${bug.surface}:
|
|
27
|
+
// ${String(bug.error).replace(/[\r\n]+/g, ' ')}
|
|
28
|
+
// This test fails again if that failure signal re-appears (console error,
|
|
29
|
+
// page text, or a >=400 HTTP status on the affected surface).
|
|
30
|
+
const SURFACE = ${surfaceLit};
|
|
31
|
+
const BUG_SIGNAL = ${errorLit};
|
|
32
|
+
|
|
33
|
+
test('Regression: ' + SURFACE + ' [' + BUG_SIGNAL + ']', async ({ page }) => {
|
|
34
|
+
const consoleErrors: string[] = [];
|
|
35
|
+
page.on('console', (msg) => {
|
|
36
|
+
if (msg.type() === 'error') consoleErrors.push(msg.text());
|
|
37
|
+
});
|
|
38
|
+
page.on('pageerror', (err) => consoleErrors.push(String(err)));
|
|
39
|
+
|
|
40
|
+
const response = await page.goto(SURFACE);
|
|
41
|
+
|
|
42
|
+
// 1. The affected surface must load without the original HTTP failure.
|
|
43
|
+
if (response) {
|
|
44
|
+
expect(response.status(), 'surface re-returned a failing HTTP status').toBeLessThan(400);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// 2. The specific failure signal must not re-appear in the console.
|
|
48
|
+
expect(
|
|
49
|
+
consoleErrors.some((line) => line.includes(BUG_SIGNAL)),
|
|
50
|
+
'console re-emitted the original error: ' + BUG_SIGNAL
|
|
51
|
+
).toBeFalsy();
|
|
52
|
+
|
|
53
|
+
// 3. ...nor be surfaced in the rendered page text.
|
|
54
|
+
const bodyText = await page.textContent('body');
|
|
55
|
+
expect(
|
|
56
|
+
(bodyText || '').includes(BUG_SIGNAL),
|
|
57
|
+
'page re-rendered the original error: ' + BUG_SIGNAL
|
|
58
|
+
).toBeFalsy();
|
|
59
|
+
|
|
60
|
+
// 4. Smoke check: the page actually rendered something.
|
|
19
61
|
expect(await page.isVisible('body')).toBeTruthy();
|
|
20
62
|
});
|
|
21
63
|
`;
|
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
|
|
8
8
|
const fs = require('fs');
|
|
9
9
|
const path = require('path');
|
|
10
|
-
const os = require('os');
|
|
11
10
|
|
|
12
11
|
const SESSIONS_DIR = path.join(process.cwd(), '.mindforge', 'browser', 'sessions');
|
|
13
12
|
const ensureDir = () => fs.mkdirSync(SESSIONS_DIR, { recursive: true });
|
|
@@ -71,23 +70,28 @@ async function loadSession(name, context) {
|
|
|
71
70
|
return { cookiesLoaded };
|
|
72
71
|
}
|
|
73
72
|
|
|
73
|
+
/**
|
|
74
|
+
* Import cookies/sessions directly from a native browser profile.
|
|
75
|
+
*
|
|
76
|
+
* NOT IMPLEMENTED: native browser cookie DB import was removed together with
|
|
77
|
+
* the `better-sqlite3` dependency (the project now uses sql.js / WASM). Browser
|
|
78
|
+
* cookie stores are SQLite databases, and decoding them required that native
|
|
79
|
+
* backend. Rather than silently returning an empty array — which would lie about
|
|
80
|
+
* capability and let callers mistake "no cookies imported" for success — this
|
|
81
|
+
* method throws so the missing capability is explicit.
|
|
82
|
+
*
|
|
83
|
+
* To populate a session, capture cookies live via a browser context and use
|
|
84
|
+
* `saveSession` / `loadSession` instead.
|
|
85
|
+
*
|
|
86
|
+
* @param {string} source - Browser identifier (chrome, arc, brave, edge).
|
|
87
|
+
* @throws {Error} Always — native browser cookie import is not implemented.
|
|
88
|
+
*/
|
|
74
89
|
function importFromBrowser(source) {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
edge: `${home}/Library/Application Support/Microsoft Edge/Default/Cookies`,
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
const p = paths[source.toLowerCase()];
|
|
84
|
-
if (!p || !fs.existsSync(p)) {
|
|
85
|
-
throw new Error(`Cookie file for ${source} not found at ${p}`);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// Real SQLite parsing would happen here via better-sqlite3 if installed.
|
|
89
|
-
// This is a placeholder for the logic specified in the roadmap.
|
|
90
|
-
return [];
|
|
90
|
+
throw new Error(
|
|
91
|
+
`importFromBrowser not implemented for "${source}": the native browser ` +
|
|
92
|
+
'cookie-DB backend (better-sqlite3) was removed project-wide. ' +
|
|
93
|
+
'Capture cookies live via a browser context and use saveSession/loadSession instead.'
|
|
94
|
+
);
|
|
91
95
|
}
|
|
92
96
|
|
|
93
97
|
module.exports = { saveSession, loadSession, importFromBrowser };
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* MindForge v6.1.0-alpha —
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* MindForge v6.1.0-alpha — Logic Drift Detector (Pillar X)
|
|
3
|
+
*
|
|
4
|
+
* HEURISTIC drift detector. Despite the "Pillar X" product naming, this
|
|
5
|
+
* component does NOT use a neural network, embeddings, or any learned model.
|
|
6
|
+
* It scores reasoning traces using pure keyword/ratio heuristics:
|
|
7
|
+
* - unique-word-to-total ratio (proxy for "rambling")
|
|
8
|
+
* - max word-repetition count (proxy for circular reasoning)
|
|
9
|
+
* - presence of a small hardcoded list of contradiction phrases
|
|
10
|
+
*
|
|
11
|
+
* Flags "Semantic Decay" (repeated failure patterns, contradiction markers,
|
|
12
|
+
* or mission drift) heuristically. No model inference is performed.
|
|
7
13
|
*/
|
|
8
14
|
'use strict';
|
|
9
15
|
|
|
@@ -48,7 +54,9 @@ class LogicDriftDetector {
|
|
|
48
54
|
}
|
|
49
55
|
|
|
50
56
|
/**
|
|
51
|
-
* Internal Heuristic:
|
|
57
|
+
* Internal Heuristic: approximates "rambling" via a unique-keyword-to-word
|
|
58
|
+
* ratio. NOTE: this is NOT a semantic/embedding measure — "density" here is
|
|
59
|
+
* a plain lexical ratio, not model-derived semantic similarity.
|
|
52
60
|
*/
|
|
53
61
|
_calculateSemanticDensity(thought) {
|
|
54
62
|
const words = thought.split(/\s+/).length;
|
|
@@ -1,67 +1,197 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* MindForge v7 — Neural Drift Remediation (NDR)
|
|
3
3
|
* Component: Logic Validator
|
|
4
|
-
*
|
|
4
|
+
*
|
|
5
5
|
* Performs high-level semantic validation on agent reasoning traces.
|
|
6
|
-
*
|
|
6
|
+
*
|
|
7
|
+
* Strategy: real-when-available, else honest heuristic.
|
|
8
|
+
* - By DEFAULT this validator uses a local Self-Reflective Heuristic
|
|
9
|
+
* (`_reflectiveHeuristic`). This is the standard path and runs everywhere,
|
|
10
|
+
* with no external dependency.
|
|
11
|
+
* - OPTIONALLY, if a local Ollama model is actually reachable at the
|
|
12
|
+
* configured endpoint, validation is upgraded to a real model call
|
|
13
|
+
* (`_modelValidation`). Reachability is determined by a real, fail-fast
|
|
14
|
+
* network probe — never a hardcoded flag. When Ollama is absent (the
|
|
15
|
+
* normal case) the probe fails fast and we fall back to the heuristic.
|
|
16
|
+
*
|
|
17
|
+
* The return shape is stable: { is_valid, confidence, critique, method }.
|
|
18
|
+
* Consumers (nexus-tracer) read `is_valid` and `critique`.
|
|
7
19
|
*/
|
|
8
20
|
'use strict';
|
|
9
21
|
|
|
10
22
|
const configManager = require('../governance/config-manager');
|
|
11
23
|
|
|
24
|
+
// Fail-fast budget for the reachability probe and the model call. Ollama is
|
|
25
|
+
// usually absent, so this must time out quickly to avoid hanging CI/production.
|
|
26
|
+
const PROBE_TIMEOUT_MS = 400;
|
|
27
|
+
const MODEL_TIMEOUT_MS = 4000;
|
|
28
|
+
|
|
12
29
|
class LogicValidator {
|
|
13
30
|
constructor() {
|
|
14
31
|
this.endpoint = configManager.get('governance.local_model_endpoint', 'localhost:11434');
|
|
15
|
-
this.
|
|
32
|
+
this.model = configManager.get('governance.local_model_name', 'llama3');
|
|
33
|
+
// Reflects reality: set by probeModel(), not hardcoded. Unknown until probed.
|
|
34
|
+
this.isModelAvailable = false;
|
|
35
|
+
this._probed = false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Normalises the configured endpoint into a base URL (adds scheme if absent).
|
|
40
|
+
* @returns {string}
|
|
41
|
+
*/
|
|
42
|
+
_baseUrl() {
|
|
43
|
+
const ep = String(this.endpoint || 'localhost:11434').trim();
|
|
44
|
+
return /^https?:\/\//i.test(ep) ? ep.replace(/\/+$/, '') : `http://${ep.replace(/\/+$/, '')}`;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Resets cached probe state (used by tests to re-probe after changing endpoint).
|
|
49
|
+
*/
|
|
50
|
+
resetProbe() {
|
|
51
|
+
this._probed = false;
|
|
52
|
+
this.isModelAvailable = false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Real, fail-fast reachability check for a local Ollama instance.
|
|
57
|
+
* Performs a short GET to the Ollama tags endpoint. On ANY error or timeout
|
|
58
|
+
* (the normal case when Ollama is absent) it resolves `false` — never throws,
|
|
59
|
+
* never hangs. Sets `this.isModelAvailable` from the actual result.
|
|
60
|
+
* @returns {Promise<boolean>}
|
|
61
|
+
*/
|
|
62
|
+
async probeModel() {
|
|
63
|
+
let reachable = false;
|
|
64
|
+
try {
|
|
65
|
+
const res = await fetch(`${this._baseUrl()}/api/tags`, {
|
|
66
|
+
method: 'GET',
|
|
67
|
+
signal: AbortSignal.timeout(PROBE_TIMEOUT_MS)
|
|
68
|
+
});
|
|
69
|
+
reachable = res.ok;
|
|
70
|
+
} catch {
|
|
71
|
+
// ECONNREFUSED / timeout / DNS / abort — Ollama not reachable. Stay quiet.
|
|
72
|
+
reachable = false;
|
|
73
|
+
}
|
|
74
|
+
this.isModelAvailable = reachable;
|
|
75
|
+
this._probed = true;
|
|
76
|
+
return reachable;
|
|
16
77
|
}
|
|
17
78
|
|
|
18
79
|
/**
|
|
19
80
|
* Validates a reasoning trace using the best available method.
|
|
81
|
+
* Probes for a local model on first call (lazy); falls back to the heuristic
|
|
82
|
+
* when unreachable.
|
|
20
83
|
* @param {string} thought - The agent's thought string
|
|
21
84
|
* @param {Object} context - Optional metadata (span attributes, etc.)
|
|
22
85
|
*/
|
|
23
86
|
async validate(thought, context = {}) {
|
|
24
|
-
|
|
87
|
+
const spanTag = context && context.span_id ? ` span=${context.span_id}` : '';
|
|
88
|
+
console.log(`[LogicValidator] Validating trace segment (Length: ${thought.length})${spanTag}`);
|
|
89
|
+
|
|
90
|
+
if (!this._probed) {
|
|
91
|
+
await this.probeModel();
|
|
92
|
+
}
|
|
25
93
|
|
|
26
|
-
// In a real v7 deployment, we would perform an asynchronous fetch to Ollama/Llama-CPP
|
|
27
|
-
// For this simulation, we simulate a "Reflective Heuristic" analysis.
|
|
28
|
-
|
|
29
94
|
if (this.isModelAvailable) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
95
|
+
try {
|
|
96
|
+
return await this._modelValidation(thought);
|
|
97
|
+
} catch {
|
|
98
|
+
// Model became unreachable mid-flight — degrade honestly to heuristic.
|
|
99
|
+
this.isModelAvailable = false;
|
|
100
|
+
return this._reflectiveHeuristic(thought);
|
|
101
|
+
}
|
|
33
102
|
}
|
|
103
|
+
return this._reflectiveHeuristic(thought);
|
|
34
104
|
}
|
|
35
105
|
|
|
36
106
|
/**
|
|
37
|
-
*
|
|
107
|
+
* Real Local Model Validation via Ollama's /api/generate.
|
|
108
|
+
* Asks the model whether the thought is logical and grounded, then derives a
|
|
109
|
+
* real is_valid/confidence from the response — no fabricated fixed values.
|
|
38
110
|
*/
|
|
39
|
-
async _modelValidation(thought
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
confidence: 0
|
|
44
|
-
critique: '
|
|
45
|
-
|
|
111
|
+
async _modelValidation(thought) {
|
|
112
|
+
const prompt =
|
|
113
|
+
'You are a reasoning-trace auditor. Decide whether the following agent ' +
|
|
114
|
+
'thought is logical and grounded (consistent, on-task, no self-contradiction).\n' +
|
|
115
|
+
'Reply with ONLY a JSON object: {"valid": <true|false>, "confidence": <0..1>, ' +
|
|
116
|
+
'"critique": "<short reason>"}.\n\n' +
|
|
117
|
+
`Thought: """${thought}"""`;
|
|
118
|
+
|
|
119
|
+
const res = await fetch(`${this._baseUrl()}/api/generate`, {
|
|
120
|
+
method: 'POST',
|
|
121
|
+
headers: { 'Content-Type': 'application/json' },
|
|
122
|
+
body: JSON.stringify({ model: this.model, prompt, stream: false }),
|
|
123
|
+
signal: AbortSignal.timeout(MODEL_TIMEOUT_MS)
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
if (!res.ok) {
|
|
127
|
+
throw new Error(`Ollama responded ${res.status}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
const payload = await res.json();
|
|
131
|
+
const parsed = this._parseModelResponse(payload && payload.response);
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
is_valid: parsed.valid,
|
|
135
|
+
confidence: parsed.confidence,
|
|
136
|
+
critique: parsed.critique,
|
|
137
|
+
method: `ollama:${this.model}`
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Robustly parses the model's textual response into a verdict. Falls back to
|
|
143
|
+
* conservative defaults derived from the raw text when JSON is unavailable —
|
|
144
|
+
* never invents a fixed high-confidence pass.
|
|
145
|
+
* @param {string} raw
|
|
146
|
+
*/
|
|
147
|
+
_parseModelResponse(raw) {
|
|
148
|
+
const text = String(raw || '');
|
|
149
|
+
const match = text.match(/\{[\s\S]*\}/);
|
|
150
|
+
if (match) {
|
|
151
|
+
try {
|
|
152
|
+
const obj = JSON.parse(match[0]);
|
|
153
|
+
const valid = obj.valid === true || obj.valid === 'true';
|
|
154
|
+
let confidence = Number(obj.confidence);
|
|
155
|
+
if (!Number.isFinite(confidence)) confidence = valid ? 0.6 : 0.4;
|
|
156
|
+
confidence = Math.min(1, Math.max(0, confidence));
|
|
157
|
+
const critique = typeof obj.critique === 'string' && obj.critique.trim()
|
|
158
|
+
? obj.critique.trim()
|
|
159
|
+
: (valid ? 'Model judged the thought logical and grounded.'
|
|
160
|
+
: 'Model flagged the thought as illogical or ungrounded.');
|
|
161
|
+
return { valid, confidence, critique };
|
|
162
|
+
} catch {
|
|
163
|
+
// fall through to text heuristic below
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// No parseable JSON: derive a conservative verdict from the raw text.
|
|
168
|
+
const lowered = text.toLowerCase();
|
|
169
|
+
const valid = !/(invalid|illogical|not\s+grounded|inconsistent|"valid"\s*:\s*false)/.test(lowered)
|
|
170
|
+
&& /(valid|logical|grounded|consistent)/.test(lowered);
|
|
171
|
+
return {
|
|
172
|
+
valid,
|
|
173
|
+
confidence: valid ? 0.55 : 0.45,
|
|
174
|
+
critique: 'Model response was unstructured; verdict derived from text.'
|
|
46
175
|
};
|
|
47
|
-
return result;
|
|
48
176
|
}
|
|
49
177
|
|
|
50
178
|
/**
|
|
51
|
-
*
|
|
179
|
+
* Local Self-Reflective Heuristic — the default validation path. More
|
|
180
|
+
* intensive than the DriftDetector; uses self-doubt and goal-misalignment
|
|
181
|
+
* markers. Honestly labelled as a heuristic (no model is involved here).
|
|
52
182
|
*/
|
|
53
|
-
async _reflectiveHeuristic(thought
|
|
54
|
-
const t = thought.toLowerCase();
|
|
55
|
-
|
|
183
|
+
async _reflectiveHeuristic(thought) {
|
|
184
|
+
const t = String(thought || '').toLowerCase();
|
|
185
|
+
|
|
56
186
|
// Check for "Self-Doubt" markers that might indicate drift
|
|
57
187
|
const doubtMarkers = ['i am not sure', 'maybe i should wait', 'actually, i forgot', 'i will instead try to just'];
|
|
58
188
|
const doubtCount = doubtMarkers.filter(m => t.includes(m)).length;
|
|
59
189
|
|
|
60
|
-
// Check for "Goal Misalignment"
|
|
190
|
+
// Check for "Goal Misalignment"
|
|
61
191
|
const goalMismatch = t.includes('ignoring current goal') || t.includes('outside scope');
|
|
62
192
|
|
|
63
193
|
const score = 1.0 - (doubtCount * 0.2) - (goalMismatch ? 0.5 : 0);
|
|
64
|
-
|
|
194
|
+
|
|
65
195
|
return {
|
|
66
196
|
is_valid: score > 0.6,
|
|
67
197
|
confidence: parseFloat(score.toFixed(2)),
|