mindforge-cc 11.2.0 → 11.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,11 @@
1
1
  {
2
- "version": "11.2.0",
2
+ "version": "11.2.1",
3
3
  "environment": "development",
4
4
  "governance": {
5
5
  "drift_threshold": 0.75,
6
6
  "critical_drift_threshold": 0.5,
7
7
  "res_threshold": 0.8,
8
- "active_did": "did:mindforge:0f5f4777-ffd4-4aef-aa46-e8eb34c0e8c0"
8
+ "active_did": "did:mindforge:cbca5a94-a796-4919-a1a3-d0488f228611"
9
9
  },
10
10
  "revops": {
11
11
  "market_registry": {
@@ -107,6 +107,7 @@
107
107
  },
108
108
  "cost_routing": {
109
109
  "enabled": true,
110
+ "shadow_mode": true,
110
111
  "simple_threshold": 3,
111
112
  "standard_threshold": 6,
112
113
  "complex_threshold": 8,
package/CHANGELOG.md CHANGED
@@ -1,5 +1,41 @@
1
1
  # Changelog
2
2
 
3
+ ## [11.2.1] - 2026-05-31 — "Hardening" (security & integrity audit remediation)
4
+
5
+ Post-v11.2.0 audit remediation. Closes every exploitable security defect and
6
+ false-assurance stub surfaced by the end-to-end audit. No new features, no
7
+ breaking changes — fixes and honest-labeling only.
8
+
9
+ ### Security (fixed)
10
+
11
+ - **trust-gate-hook**: scans the whole command + every line (a benign first line could previously cloak a destructive later line)
12
+ - **orbital-guardian**: `verify()` re-checks the Ed25519 attestation signature (added `did`/`signed_message` columns + migration); rejects forged APPROVED rows
13
+ - **policy-engine**: `reasoning_proof` alone no longer bypasses the blast-radius limit (`isProofValid` inits false; cryptographic `pq_proof` path unchanged)
14
+ - **shadow-mirror**: git calls use `execFileSync` (argv) + fail-closed `sanitizeRemediationId()` — closes command injection via `remediation_id`
15
+ - **trust-boundaries `isHighImpact`**: added chmod/chown/dd/mv/kill/shutdown/eval/command-substitution/redirect detection + shell-obfuscation normalization; narrowed interpreter-script pattern to stop false-positives on `node <projectfile>`
16
+ - **eis-client**: `verifyRemoteProvenance` delegates to real ZTAI signature verification, fail-closed (was returning true for any non-empty signature)
17
+
18
+ ### Integrity / honest labeling (fixed)
19
+
20
+ - **ztai-archiver**: `verifyIntegrity()` recomputes the Merkle root from the live log (was a no-op `return true`); fail-closed on tamper/delete/reorder
21
+ - **mesh-self-healer**: emits an honest degraded advisory instead of fabricated 94%/100% consensus
22
+ - **logic-validator**: probes Ollama when reachable, honest heuristic fallback; stopped advertising the dead model path
23
+ - **reason-source-aligner**: consistent return shape (uninitialized no longer silently disables the mission-fidelity gate); real Jaccard similarity
24
+ - **sre-manager**: HMAC artifact relabeled as integrity tag, not "ZK-Proof"
25
+ - **installer-core**: PQAS-enabled message gated on the real `experimental.pqc_demo` flag
26
+ - **finding-synthesizer**: detects real severity-gap contradictions (was hardcoded `[]`)
27
+ - **logic-drift-detector**: relabeled heuristic, not "Neural"
28
+ - **session-manager / shadow-mirror(docker) / regression-writer / skill-registry**: honest disclosure instead of silent empties / fake isolation / tautological tests / mock placeholders
29
+ - **MINDFORGE.md**: `[PQAS_ENFORCED]` reconciled to reflect simulated/inactive default; **ztai-manager** logs relabeled `[ZTAI-HSM-SIM]`
30
+
31
+ ### CI / hygiene
32
+
33
+ - **release workflow**: asserts the git tag matches `package.json` version (fail-closed) + skips publish if the version is already on npm
34
+ - **version-check**: runtime drift check widened to all 4 sources (was 2); SDK README guarded by test
35
+ - removed dead `AuditRotator` class (CHANGELOG had wrongly claimed it removed); deprecated orphaned `createAppendQueue`
36
+ - refreshed stale `v11.1.0` banners → current; added `cost_routing.shadow_mode` latch
37
+ - version bumped to 11.2.1 across all sources
38
+
3
39
  ## [11.2.0] - 2026-05-31 — "Verification & Trust"
4
40
 
5
41
  ### Added
@@ -28,7 +64,7 @@
28
64
  ### Removed
29
65
 
30
66
  - Dead `quantum-verify` CLI command entry (no handler existed)
31
- - Dead `AuditRotator` references (broke hash chain on rotation boundaries)
67
+ - Dead `AuditRotator` class and its export from `bin/utils/file-io.js` (zero callers; rotation broke the hash chain at rotation boundaries — see `bin/autonomous/audit-writer.js`)
32
68
 
33
69
  ### Changed
34
70
 
package/MINDFORGE.md CHANGED
@@ -1,12 +1,12 @@
1
- # MINDFORGE.md — Parameter Registry (v11.2.0)
1
+ # MINDFORGE.md — Parameter Registry (v11.2.1)
2
2
 
3
3
  ## 1. IDENTITY & VERSIONING
4
4
 
5
5
  [NAME] = MindForge
6
- [VERSION] = 11.2.0
6
+ [VERSION] = 11.2.1
7
7
  [STABLE] = true
8
8
  [MODE] = "Platform Sovereign"
9
- [REQUIRED_CORE_VERSION] = 11.2.0
9
+ [REQUIRED_CORE_VERSION] = 11.2.1
10
10
  [SOVEREIGN_IDENTITY] = true
11
11
  [SRE_LAYER_ENABLED] = true
12
12
 
@@ -26,7 +26,7 @@
26
26
  [ZTAI_KEY_TYPE] = "Dilithium-5"
27
27
  [NEXUS_TRACE_RETENTION_DAYS] = 30
28
28
  [CADIA_CORE] = true
29
- [PQAS_ENFORCED] = true
29
+ [PQAS_ENFORCED] = false # PQAS is SIMULATED/inactive by default (config: pqas_enabled=false, gated behind experimental.pqc_demo). Tier-3 trust uses real Ed25519. See .mindforge/config.json + bin/governance/quantum-crypto.js.
30
30
  [PROACTIVE_HOMING] = true
31
31
 
32
32
  ---
@@ -100,6 +100,6 @@ The following parameters cannot be overridden by plugins, agents, or session-lev
100
100
  - [MIN_SOUL_SCORE] — Minimum SOUL score required for architectural changes
101
101
  - [BLOCK_ON_SECURITY] — Security gate enforcement cannot be disabled
102
102
  - [COST_HARD_LIMIT_USD] — Hard cost limit cannot be raised without human approval
103
- - [PQAS_ENFORCED] Post-quantum security cannot be disabled
103
+ - [BLOCK_ON_SECURITY] is non-overridable; PQAS itself is simulated/experimental (inactive by default) and is NOT a non-overridable guarantee — do not rely on it as an enforced control
104
104
  - [SOVEREIGN_IDENTITY] — Identity verification is always required
105
105
  - [ENABLE_ZTAI] — Zero-trust identity cannot be bypassed
@@ -1,10 +1,18 @@
1
1
  /**
2
2
  * MindForge v7 — Proactive Semantic Homing (Pillar XII)
3
3
  * Mesh Self-Healer: Peer agents "home in" on drifting nodes to provide collaborative reasoning.
4
+ *
5
+ * UC-22 (audit finding #16) — HONEST LABELLING:
6
+ * There is no live, runtime peer-reasoning mesh in this build. Previously this
7
+ * module FABRICATED a collective consensus (hardcoded peers, canned
8
+ * confidence:94, "100% agreement" log). That emitted false assurance on the
9
+ * live auto-runner self-heal path. It now consults the ONLY real peer source
10
+ * available — the ztai-manager session-agent registry — and, when that yields
11
+ * no real peers (the common case at runtime), degrades GRACEFULLY to a clearly
12
+ * labelled single-source advisory with NO fabricated confidence or consensus.
4
13
  */
5
14
  'use strict';
6
15
 
7
- const fs = require('node:fs');
8
16
  const path = require('node:path');
9
17
 
10
18
  class MeshSelfHealer {
@@ -14,52 +22,117 @@ class MeshSelfHealer {
14
22
 
15
23
  /**
16
24
  * Peer agents "home in" on a node with high logic drift.
25
+ *
26
+ * @param {string} driftingAgentDid - DID of the drifting node.
27
+ * @param {number} driftScore - Logic-drift score (only acts on >= 80).
28
+ * @param {object} [options] - { sessionId } to scope the real peer lookup.
29
+ * @returns {Promise<object|null>} Honest advisory object, or null below threshold.
17
30
  */
18
- async homeIn(driftingAgentDid, driftScore) {
31
+ async homeIn(driftingAgentDid, driftScore, options = {}) {
19
32
  if (driftScore < 80) return null; // Only home in on major drift
20
-
21
- console.log(`[HOMING-HEAL] Global Mesh Alert: Agent ${driftingAgentDid} experiencing critical logic drift (${driftScore}). Peer agents redirecting...`);
22
-
23
- // Find nearby idle agents or specialists
24
- const peers = this.findAvailablePeers(driftingAgentDid);
25
- const healingNodes = [];
26
33
 
34
+ console.log(`[HOMING-HEAL] Global Mesh Alert: Agent ${driftingAgentDid} experiencing critical logic drift (${driftScore}). Seeking peer reasoning support...`);
35
+
36
+ // Consult the only REAL peer source: the ztai-manager session registry.
37
+ // Returns an empty array when no live peers are registered.
38
+ const peers = this.findAvailablePeers(driftingAgentDid, options.sessionId);
39
+
40
+ if (peers.length === 0) {
41
+ return this.degradedAdvisory(driftingAgentDid);
42
+ }
43
+
44
+ const healingNodes = [];
27
45
  for (const peer of peers) {
28
- console.log(`[HOMING-HEAL] Agent ${peer.did} homing in on ${driftingAgentDid} to provide collective reasoning support.`);
46
+ console.log(`[HOMING-HEAL] Peer ${peer.did} homing in on ${driftingAgentDid} to provide reasoning support.`);
29
47
  const supportTrace = await this.provideCollectiveReasoning(peer, driftingAgentDid);
30
48
  healingNodes.push(supportTrace);
31
49
  }
32
50
 
33
- return this.reconcileReasoning(healingNodes);
51
+ return this.reconcileReasoning(healingNodes, driftingAgentDid);
52
+ }
53
+
54
+ /**
55
+ * Discovers REAL peer agents from the live registry. There are no invented
56
+ * peers — if nothing is registered for the session, this returns []
57
+ * and the caller degrades honestly.
58
+ *
59
+ * @param {string} driftingAgentDid - The node being healed (excluded from peers).
60
+ * @param {string|null} sessionId - Session scope for the registry lookup.
61
+ * @returns {Array<{did:string, persona?:string}>} Real peers (possibly empty).
62
+ */
63
+ findAvailablePeers(driftingAgentDid, sessionId = null) {
64
+ let agents = [];
65
+ try {
66
+ // Lazy require to avoid a hard coupling / load cost on the cold path.
67
+ const ztaiManager = require('../governance/ztai-manager');
68
+ if (typeof ztaiManager.getSessionAgents === 'function') {
69
+ agents = ztaiManager.getSessionAgents(sessionId) || [];
70
+ }
71
+ } catch {
72
+ // No registry available — treat as no live peers (honest degraded mode).
73
+ agents = [];
74
+ }
75
+
76
+ // Exclude the drifting node itself; only real, distinct peers may help.
77
+ return agents.filter(a => a && a.did && a.did !== driftingAgentDid);
34
78
  }
35
79
 
36
- findAvailablePeers(driftingAgentDid) {
37
- // Simulated peer discovery
38
- return [
39
- { did: 'did:mindforge:peer-1-specialist', name: 'Refactor Specialist' },
40
- { did: 'did:mindforge:peer-2-architect', name: 'Security Architect' }
41
- ];
80
+ /**
81
+ * Honest single-source advisory used when no live peer mesh is available.
82
+ * Carries NO fabricated confidence and makes NO consensus claim.
83
+ *
84
+ * @param {string} driftingAgentDid
85
+ * @returns {object}
86
+ */
87
+ degradedAdvisory(driftingAgentDid) {
88
+ console.log('[HOMING-HEAL] No live peer mesh available — emitting single-source advisory (degraded).');
89
+
90
+ return {
91
+ type: 'advisory',
92
+ mesh_available: false,
93
+ degraded: true,
94
+ confidence: null,
95
+ consensus: null,
96
+ target: driftingAgentDid,
97
+ recommendation: 'Heuristic single-source steering: pause the drifting node, re-anchor to the last verified plan/spec, and require human or higher-tier review before resuming. No multi-agent consensus was available to corroborate this.',
98
+ source: 'Mesh-Self-Healing (degraded: no live peers)'
99
+ };
42
100
  }
43
101
 
44
102
  async provideCollectiveReasoning(peer, target) {
45
- // Peer agent provides a second opinion/reasoning node
103
+ // A real peer contributes a reasoning node. Confidence is left null here:
104
+ // this build has no model-backed scoring, so we do not invent a number.
46
105
  return {
47
106
  provider: peer.did,
48
- target: target,
49
- reasoning: 'Recommended steering for drift recovery based on Mesh-State: Re-syncing with Sovereign-Reason-Enclave.',
50
- confidence: 94
107
+ target,
108
+ reasoning: 'Peer steering note: re-sync drifting node with the last verified plan state.',
109
+ confidence: null
51
110
  };
52
111
  }
53
112
 
54
- reconcileReasoning(nodes) {
55
- // Merge peer reasoning nodes into a single corrective steering vector
56
- const consensus = nodes[0].reasoning; // Mock consensus
57
- console.log('[HOMING-HEAL] Collective reasoning consensus acheived: 100% agreement on recovery vector.');
58
-
113
+ /**
114
+ * Reconciles multiple REAL peer reasoning nodes. With live peers present we
115
+ * report how many corroborated, but we still never invent a confidence score
116
+ * or a "100% agreement" claim.
117
+ *
118
+ * @param {Array<object>} nodes - Real peer reasoning contributions.
119
+ * @param {string} driftingAgentDid
120
+ * @returns {object}
121
+ */
122
+ reconcileReasoning(nodes, driftingAgentDid) {
123
+ const peerCount = nodes.length;
124
+ console.log(`[HOMING-HEAL] Collective reasoning gathered from ${peerCount} live peer(s); no confidence fabricated.`);
125
+
59
126
  return {
60
- type: 'collective_repair',
61
- consensus,
62
- source: 'Mesh-Self-Healing'
127
+ type: 'advisory',
128
+ mesh_available: true,
129
+ degraded: false,
130
+ confidence: null, // No model-backed scoring in this build — stay honest.
131
+ consensus: nodes[0].reasoning,
132
+ peer_count: peerCount,
133
+ target: driftingAgentDid,
134
+ recommendation: nodes[0].reasoning,
135
+ source: `Mesh-Self-Healing (${peerCount} live peer(s))`
63
136
  };
64
137
  }
65
138
  }
@@ -10,12 +10,54 @@ function write(bug, phaseNum) {
10
10
  const dir = path.join(process.cwd(), 'tests', 'regression');
11
11
  fs.mkdirSync(dir, { recursive: true });
12
12
  const name = `phase${phaseNum}-${bug.surface.replace(/\//g, '-').slice(1) || 'home'}.test.ts`;
13
+
14
+ // Embed the bug's surface and failure signal as safely-escaped JS string
15
+ // literals. JSON.stringify escapes quotes, backticks and ${...} so a
16
+ // freeform bug.error cannot break out of the generated source.
17
+ const surfaceLit = JSON.stringify(bug.surface);
18
+ const errorLit = JSON.stringify(bug.error);
19
+
20
+ // The generated test reproduces the original failure conditions and asserts
21
+ // the page no longer exhibits THIS bug's signal — it is NOT a body-visibility
22
+ // tautology that passes for any page.
13
23
  const content = `
14
24
  import { test, expect } from '@playwright/test';
15
25
 
16
- test('Regression: ${bug.surface} [${bug.error}]', async ({ page }) => {
17
- await page.goto('${bug.surface}');
18
- // TODO: Add more specific assertions based on the bug
26
+ // Regression guard for the bug originally observed on ${bug.surface}:
27
+ // ${String(bug.error).replace(/[\r\n]+/g, ' ')}
28
+ // This test fails again if that failure signal re-appears (console error,
29
+ // page text, or a >=400 HTTP status on the affected surface).
30
+ const SURFACE = ${surfaceLit};
31
+ const BUG_SIGNAL = ${errorLit};
32
+
33
+ test('Regression: ' + SURFACE + ' [' + BUG_SIGNAL + ']', async ({ page }) => {
34
+ const consoleErrors: string[] = [];
35
+ page.on('console', (msg) => {
36
+ if (msg.type() === 'error') consoleErrors.push(msg.text());
37
+ });
38
+ page.on('pageerror', (err) => consoleErrors.push(String(err)));
39
+
40
+ const response = await page.goto(SURFACE);
41
+
42
+ // 1. The affected surface must load without the original HTTP failure.
43
+ if (response) {
44
+ expect(response.status(), 'surface re-returned a failing HTTP status').toBeLessThan(400);
45
+ }
46
+
47
+ // 2. The specific failure signal must not re-appear in the console.
48
+ expect(
49
+ consoleErrors.some((line) => line.includes(BUG_SIGNAL)),
50
+ 'console re-emitted the original error: ' + BUG_SIGNAL
51
+ ).toBeFalsy();
52
+
53
+ // 3. ...nor be surfaced in the rendered page text.
54
+ const bodyText = await page.textContent('body');
55
+ expect(
56
+ (bodyText || '').includes(BUG_SIGNAL),
57
+ 'page re-rendered the original error: ' + BUG_SIGNAL
58
+ ).toBeFalsy();
59
+
60
+ // 4. Smoke check: the page actually rendered something.
19
61
  expect(await page.isVisible('body')).toBeTruthy();
20
62
  });
21
63
  `;
@@ -7,7 +7,6 @@
7
7
 
8
8
  const fs = require('fs');
9
9
  const path = require('path');
10
- const os = require('os');
11
10
 
12
11
  const SESSIONS_DIR = path.join(process.cwd(), '.mindforge', 'browser', 'sessions');
13
12
  const ensureDir = () => fs.mkdirSync(SESSIONS_DIR, { recursive: true });
@@ -71,23 +70,28 @@ async function loadSession(name, context) {
71
70
  return { cookiesLoaded };
72
71
  }
73
72
 
73
+ /**
74
+ * Import cookies/sessions directly from a native browser profile.
75
+ *
76
+ * NOT IMPLEMENTED: native browser cookie DB import was removed together with
77
+ * the `better-sqlite3` dependency (the project now uses sql.js / WASM). Browser
78
+ * cookie stores are SQLite databases, and decoding them required that native
79
+ * backend. Rather than silently returning an empty array — which would lie about
80
+ * capability and let callers mistake "no cookies imported" for success — this
81
+ * method throws so the missing capability is explicit.
82
+ *
83
+ * To populate a session, capture cookies live via a browser context and use
84
+ * `saveSession` / `loadSession` instead.
85
+ *
86
+ * @param {string} source - Browser identifier (chrome, arc, brave, edge).
87
+ * @throws {Error} Always — native browser cookie import is not implemented.
88
+ */
74
89
  function importFromBrowser(source) {
75
- const home = os.homedir();
76
- const paths = {
77
- chrome: `${home}/Library/Application Support/Google/Chrome/Default/Cookies`,
78
- arc: `${home}/Library/Application Support/Arc/User Data/Default/Cookies`,
79
- brave: `${home}/Library/Application Support/BraveSoftware/Brave-Browser/Default/Cookies`,
80
- edge: `${home}/Library/Application Support/Microsoft Edge/Default/Cookies`,
81
- };
82
-
83
- const p = paths[source.toLowerCase()];
84
- if (!p || !fs.existsSync(p)) {
85
- throw new Error(`Cookie file for ${source} not found at ${p}`);
86
- }
87
-
88
- // Real SQLite parsing would happen here via better-sqlite3 if installed.
89
- // This is a placeholder for the logic specified in the roadmap.
90
- return [];
90
+ throw new Error(
91
+ `importFromBrowser not implemented for "${source}": the native browser ` +
92
+ 'cookie-DB backend (better-sqlite3) was removed project-wide. ' +
93
+ 'Capture cookies live via a browser context and use saveSession/loadSession instead.'
94
+ );
91
95
  }
92
96
 
93
97
  module.exports = { saveSession, loadSession, importFromBrowser };
@@ -1,9 +1,15 @@
1
1
  /**
2
- * MindForge v6.1.0-alpha — Neural Drift Remediation (NDR)
3
- * Component: Logic Drift Detector (Pillar X)
4
- *
5
- * Analyzes reasoning traces for "Semantic Decay" (repeated failure patterns,
6
- * hallucination-like markers, or mission drift).
2
+ * MindForge v6.1.0-alpha — Logic Drift Detector (Pillar X)
3
+ *
4
+ * HEURISTIC drift detector. Despite the "Pillar X" product naming, this
5
+ * component does NOT use a neural network, embeddings, or any learned model.
6
+ * It scores reasoning traces using pure keyword/ratio heuristics:
7
+ * - unique-word-to-total ratio (proxy for "rambling")
8
+ * - max word-repetition count (proxy for circular reasoning)
9
+ * - presence of a small hardcoded list of contradiction phrases
10
+ *
11
+ * Flags "Semantic Decay" (repeated failure patterns, contradiction markers,
12
+ * or mission drift) heuristically. No model inference is performed.
7
13
  */
8
14
  'use strict';
9
15
 
@@ -48,7 +54,9 @@ class LogicDriftDetector {
48
54
  }
49
55
 
50
56
  /**
51
- * Internal Heuristic: Detects low semantic density (rambling).
57
+ * Internal Heuristic: approximates "rambling" via a unique-keyword-to-word
58
+ * ratio. NOTE: this is NOT a semantic/embedding measure — "density" here is
59
+ * a plain lexical ratio, not model-derived semantic similarity.
52
60
  */
53
61
  _calculateSemanticDensity(thought) {
54
62
  const words = thought.split(/\s+/).length;
@@ -1,67 +1,197 @@
1
1
  /**
2
2
  * MindForge v7 — Neural Drift Remediation (NDR)
3
3
  * Component: Logic Validator
4
- *
4
+ *
5
5
  * Performs high-level semantic validation on agent reasoning traces.
6
- * Supports Local Model (Ollama) integration and Self-Reflective Heuristics.
6
+ *
7
+ * Strategy: real-when-available, else honest heuristic.
8
+ * - By DEFAULT this validator uses a local Self-Reflective Heuristic
9
+ * (`_reflectiveHeuristic`). This is the standard path and runs everywhere,
10
+ * with no external dependency.
11
+ * - OPTIONALLY, if a local Ollama model is actually reachable at the
12
+ * configured endpoint, validation is upgraded to a real model call
13
+ * (`_modelValidation`). Reachability is determined by a real, fail-fast
14
+ * network probe — never a hardcoded flag. When Ollama is absent (the
15
+ * normal case) the probe fails fast and we fall back to the heuristic.
16
+ *
17
+ * The return shape is stable: { is_valid, confidence, critique, method }.
18
+ * Consumers (nexus-tracer) read `is_valid` and `critique`.
7
19
  */
8
20
  'use strict';
9
21
 
10
22
  const configManager = require('../governance/config-manager');
11
23
 
24
+ // Fail-fast budget for the reachability probe and the model call. Ollama is
25
+ // usually absent, so this must time out quickly to avoid hanging CI/production.
26
+ const PROBE_TIMEOUT_MS = 400;
27
+ const MODEL_TIMEOUT_MS = 4000;
28
+
12
29
  class LogicValidator {
13
30
  constructor() {
14
31
  this.endpoint = configManager.get('governance.local_model_endpoint', 'localhost:11434');
15
- this.isModelAvailable = false; // Simulated check result
32
+ this.model = configManager.get('governance.local_model_name', 'llama3');
33
+ // Reflects reality: set by probeModel(), not hardcoded. Unknown until probed.
34
+ this.isModelAvailable = false;
35
+ this._probed = false;
36
+ }
37
+
38
+ /**
39
+ * Normalises the configured endpoint into a base URL (adds scheme if absent).
40
+ * @returns {string}
41
+ */
42
+ _baseUrl() {
43
+ const ep = String(this.endpoint || 'localhost:11434').trim();
44
+ return /^https?:\/\//i.test(ep) ? ep.replace(/\/+$/, '') : `http://${ep.replace(/\/+$/, '')}`;
45
+ }
46
+
47
+ /**
48
+ * Resets cached probe state (used by tests to re-probe after changing endpoint).
49
+ */
50
+ resetProbe() {
51
+ this._probed = false;
52
+ this.isModelAvailable = false;
53
+ }
54
+
55
+ /**
56
+ * Real, fail-fast reachability check for a local Ollama instance.
57
+ * Performs a short GET to the Ollama tags endpoint. On ANY error or timeout
58
+ * (the normal case when Ollama is absent) it resolves `false` — never throws,
59
+ * never hangs. Sets `this.isModelAvailable` from the actual result.
60
+ * @returns {Promise<boolean>}
61
+ */
62
+ async probeModel() {
63
+ let reachable = false;
64
+ try {
65
+ const res = await fetch(`${this._baseUrl()}/api/tags`, {
66
+ method: 'GET',
67
+ signal: AbortSignal.timeout(PROBE_TIMEOUT_MS)
68
+ });
69
+ reachable = res.ok;
70
+ } catch {
71
+ // ECONNREFUSED / timeout / DNS / abort — Ollama not reachable. Stay quiet.
72
+ reachable = false;
73
+ }
74
+ this.isModelAvailable = reachable;
75
+ this._probed = true;
76
+ return reachable;
16
77
  }
17
78
 
18
79
  /**
19
80
  * Validates a reasoning trace using the best available method.
81
+ * Probes for a local model on first call (lazy); falls back to the heuristic
82
+ * when unreachable.
20
83
  * @param {string} thought - The agent's thought string
21
84
  * @param {Object} context - Optional metadata (span attributes, etc.)
22
85
  */
23
86
  async validate(thought, context = {}) {
24
- console.log(`[LogicValidator] Validating trace segment (Length: ${thought.length})`);
87
+ const spanTag = context && context.span_id ? ` span=${context.span_id}` : '';
88
+ console.log(`[LogicValidator] Validating trace segment (Length: ${thought.length})${spanTag}`);
89
+
90
+ if (!this._probed) {
91
+ await this.probeModel();
92
+ }
25
93
 
26
- // In a real v7 deployment, we would perform an asynchronous fetch to Ollama/Llama-CPP
27
- // For this simulation, we simulate a "Reflective Heuristic" analysis.
28
-
29
94
  if (this.isModelAvailable) {
30
- return this._modelValidation(thought, context);
31
- } else {
32
- return this._reflectiveHeuristic(thought, context);
95
+ try {
96
+ return await this._modelValidation(thought);
97
+ } catch {
98
+ // Model became unreachable mid-flight — degrade honestly to heuristic.
99
+ this.isModelAvailable = false;
100
+ return this._reflectiveHeuristic(thought);
101
+ }
33
102
  }
103
+ return this._reflectiveHeuristic(thought);
34
104
  }
35
105
 
36
106
  /**
37
- * Simulated Local Model Validation logic.
107
+ * Real Local Model Validation via Ollama's /api/generate.
108
+ * Asks the model whether the thought is logical and grounded, then derives a
109
+ * real is_valid/confidence from the response — no fabricated fixed values.
38
110
  */
39
- async _modelValidation(thought, context) {
40
- // Mocking an LLM callback: "Is this thought logical and grounded?"
41
- const result = {
42
- is_valid: true,
43
- confidence: 0.98,
44
- critique: 'Logic is consistent with project goals.',
45
- method: 'Ollama/Llama-3-8B'
111
+ async _modelValidation(thought) {
112
+ const prompt =
113
+ 'You are a reasoning-trace auditor. Decide whether the following agent ' +
114
+ 'thought is logical and grounded (consistent, on-task, no self-contradiction).\n' +
115
+ 'Reply with ONLY a JSON object: {"valid": <true|false>, "confidence": <0..1>, ' +
116
+ '"critique": "<short reason>"}.\n\n' +
117
+ `Thought: """${thought}"""`;
118
+
119
+ const res = await fetch(`${this._baseUrl()}/api/generate`, {
120
+ method: 'POST',
121
+ headers: { 'Content-Type': 'application/json' },
122
+ body: JSON.stringify({ model: this.model, prompt, stream: false }),
123
+ signal: AbortSignal.timeout(MODEL_TIMEOUT_MS)
124
+ });
125
+
126
+ if (!res.ok) {
127
+ throw new Error(`Ollama responded ${res.status}`);
128
+ }
129
+
130
+ const payload = await res.json();
131
+ const parsed = this._parseModelResponse(payload && payload.response);
132
+
133
+ return {
134
+ is_valid: parsed.valid,
135
+ confidence: parsed.confidence,
136
+ critique: parsed.critique,
137
+ method: `ollama:${this.model}`
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Robustly parses the model's textual response into a verdict. Falls back to
143
+ * conservative defaults derived from the raw text when JSON is unavailable —
144
+ * never invents a fixed high-confidence pass.
145
+ * @param {string} raw
146
+ */
147
+ _parseModelResponse(raw) {
148
+ const text = String(raw || '');
149
+ const match = text.match(/\{[\s\S]*\}/);
150
+ if (match) {
151
+ try {
152
+ const obj = JSON.parse(match[0]);
153
+ const valid = obj.valid === true || obj.valid === 'true';
154
+ let confidence = Number(obj.confidence);
155
+ if (!Number.isFinite(confidence)) confidence = valid ? 0.6 : 0.4;
156
+ confidence = Math.min(1, Math.max(0, confidence));
157
+ const critique = typeof obj.critique === 'string' && obj.critique.trim()
158
+ ? obj.critique.trim()
159
+ : (valid ? 'Model judged the thought logical and grounded.'
160
+ : 'Model flagged the thought as illogical or ungrounded.');
161
+ return { valid, confidence, critique };
162
+ } catch {
163
+ // fall through to text heuristic below
164
+ }
165
+ }
166
+
167
+ // No parseable JSON: derive a conservative verdict from the raw text.
168
+ const lowered = text.toLowerCase();
169
+ const valid = !/(invalid|illogical|not\s+grounded|inconsistent|"valid"\s*:\s*false)/.test(lowered)
170
+ && /(valid|logical|grounded|consistent)/.test(lowered);
171
+ return {
172
+ valid,
173
+ confidence: valid ? 0.55 : 0.45,
174
+ critique: 'Model response was unstructured; verdict derived from text.'
46
175
  };
47
- return result;
48
176
  }
49
177
 
50
178
  /**
51
- * Advanced "Reflective Heuristic" which is more intensive than the DriftDetector.
179
+ * Local Self-Reflective Heuristic the default validation path. More
180
+ * intensive than the DriftDetector; uses self-doubt and goal-misalignment
181
+ * markers. Honestly labelled as a heuristic (no model is involved here).
52
182
  */
53
- async _reflectiveHeuristic(thought, context) {
54
- const t = thought.toLowerCase();
55
-
183
+ async _reflectiveHeuristic(thought) {
184
+ const t = String(thought || '').toLowerCase();
185
+
56
186
  // Check for "Self-Doubt" markers that might indicate drift
57
187
  const doubtMarkers = ['i am not sure', 'maybe i should wait', 'actually, i forgot', 'i will instead try to just'];
58
188
  const doubtCount = doubtMarkers.filter(m => t.includes(m)).length;
59
189
 
60
- // Check for "Goal Misalignment" (Simulated)
190
+ // Check for "Goal Misalignment"
61
191
  const goalMismatch = t.includes('ignoring current goal') || t.includes('outside scope');
62
192
 
63
193
  const score = 1.0 - (doubtCount * 0.2) - (goalMismatch ? 0.5 : 0);
64
-
194
+
65
195
  return {
66
196
  is_valid: score > 0.6,
67
197
  confidence: parseFloat(score.toFixed(2)),