titan-agent 5.3.1 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/agent/agent.js +11 -1
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/session.js +106 -5
- package/dist/agent/session.js.map +1 -1
- package/dist/agent/subAgent.js +77 -1
- package/dist/agent/subAgent.js.map +1 -1
- package/dist/agent/toolRunner.js +17 -0
- package/dist/agent/toolRunner.js.map +1 -1
- package/dist/config/schema.js +18 -2
- package/dist/config/schema.js.map +1 -1
- package/dist/gateway/server.js +17 -1
- package/dist/gateway/server.js.map +1 -1
- package/dist/memory/graph.js +49 -15
- package/dist/memory/graph.js.map +1 -1
- package/dist/memory/index.js +192 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/memory.js +1 -0
- package/dist/memory/memory.js.map +1 -1
- package/dist/organism/drives.js +47 -11
- package/dist/organism/drives.js.map +1 -1
- package/dist/organism/pressure.js +16 -0
- package/dist/organism/pressure.js.map +1 -1
- package/dist/safety/fabricationGuard.js +140 -0
- package/dist/safety/fabricationGuard.js.map +1 -0
- package/dist/skills/builtin/fb_autopilot.js +16 -1
- package/dist/skills/builtin/fb_autopilot.js.map +1 -1
- package/dist/skills/builtin/gepa.js +23 -1
- package/dist/skills/builtin/gepa.js.map +1 -1
- package/dist/skills/builtin/model_trainer.js +31 -4
- package/dist/skills/builtin/model_trainer.js.map +1 -1
- package/dist/skills/builtin/self_improve.js +50 -2
- package/dist/skills/builtin/self_improve.js.map +1 -1
- package/dist/telemetry/activityLog.js +158 -0
- package/dist/telemetry/activityLog.js.map +1 -0
- package/dist/utils/constants.js +3 -1
- package/dist/utils/constants.js.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/organism/pressure.ts"],"sourcesContent":["/**\n * TITAN — Pressure Fusion (Soma)\n *\n * Turns drive deficits into proposals. When combined pressure across drives\n * crosses `config.organism.pressureThreshold`, Soma builds a GoalProposal\n * seeded with drive-specific context and routes it through the existing\n * goalProposer / commandPost approval plumbing (F1 landed the pipe;\n * pressure fusion is the new trigger source).\n *\n * Key invariants:\n * - This is the ONLY path that converts pressure → proposals. No ad-hoc\n * proposal generation elsewhere in the organism layer.\n * - All proposals run through rehearseShadow() before approval is filed.\n * - The existing F1 rate limit (config.agent.proposalRateLimitPerDay)\n * applies — Soma can't spam proposals faster than the agent-level cap.\n */\nimport type { DriveState, DriveId } from './drives.js';\nimport { rehearseShadow, type ShadowVerdict } from './shadow.js';\nimport { emit } from '../substrate/traceBus.js';\nimport { loadConfig } from '../config/config.js';\nimport { readJsonFile, writeJsonFile } from '../utils/helpers.js';\nimport { SOMADRIVE_STATE_PATH } from '../utils/constants.js';\nimport logger from '../utils/logger.js';\n\nconst COMPONENT = 'Pressure';\n\n/**\n * v4.6.0: per-drive fire history. Used to damp consecutive proposals for\n * the same drive so we don't spawn duplicate goals every tick.\n * Persisted to disk so damping survives restarts.\n */\nconst lastFireByDrive = new Map<string, number>();\nlet lastGlobalFire = 0;\n\n// Load persisted damping state on module init.\n(function loadDampingState() {\n const raw = readJsonFile<Record<string, number>>(SOMADRIVE_STATE_PATH);\n if (raw && typeof raw === 'object' && !Array.isArray(raw)) {\n const now = Date.now();\n const DAMPING_MS = 2 * 60 * 60 * 1000; // v5.0.0: increased to 2h\n for (const [k, v] of Object.entries(raw)) {\n if (typeof v === 'number' && now - v < DAMPING_MS * 2) {\n lastFireByDrive.set(k, v);\n }\n }\n }\n})();\n\nfunction saveDampingState() {\n const obj: Record<string, number> = {};\n for (const [k, v] of lastFireByDrive) obj[k] = v;\n writeJsonFile(SOMADRIVE_STATE_PATH, obj);\n}\n\n/**\n * Test-only hook: clear the per-drive damping memory so unit tests that\n * exercise consecutive `runPressureCycle` calls on the same drive don't\n * leak state across `beforeEach` boundaries. Not part of the public API\n * for runtime callers — production never needs to reset this.\n */\nexport function _resetPressureDampingForTests(): void {\n lastFireByDrive.clear();\n lastGlobalFire = 0;\n}\n\n// ── Types ────────────────────────────────────────────────────────\n\nexport interface PressureReading {\n totalPressure: number;\n dominantDrives: DriveId[];\n perDrive: Array<{ id: DriveId; pressure: number; description: string }>;\n}\n\nexport interface PressureDecision {\n should: boolean;\n totalPressure: number;\n threshold: number;\n dominantDrives: DriveId[];\n reason: string;\n}\n\n// ── Pressure accounting ──────────────────────────────────────────\n\nexport function computePressureReading(drives: DriveState[]): PressureReading {\n const perDrive = drives\n .filter(d => d.pressure > 0)\n .map(d => ({ id: d.id as DriveId, pressure: d.pressure, description: d.description }))\n .sort((a, b) => b.pressure - a.pressure);\n const totalPressure = perDrive.reduce((sum, d) => sum + d.pressure, 0);\n const dominantDrives = perDrive.slice(0, 2).map(d => d.id);\n return { totalPressure, dominantDrives, perDrive };\n}\n\n/** Deterministic threshold check. Does NOT fire any side effects. */\nexport function evaluatePressure(drives: DriveState[], threshold: number): PressureDecision {\n const reading = computePressureReading(drives);\n if (reading.totalPressure < threshold) {\n return {\n should: false,\n totalPressure: reading.totalPressure,\n threshold,\n dominantDrives: reading.dominantDrives,\n reason: `total pressure ${reading.totalPressure.toFixed(2)} below threshold ${threshold}`,\n };\n }\n const topPhrases = reading.perDrive.slice(0, 2).map(d => `${d.id} (${d.pressure.toFixed(2)})`);\n return {\n should: true,\n totalPressure: reading.totalPressure,\n threshold,\n dominantDrives: reading.dominantDrives,\n reason: `dominant drives: ${topPhrases.join(', ')}`,\n };\n}\n\n// ── Proposal driver ──────────────────────────────────────────────\n\nexport interface PressureCycleResult {\n fired: boolean;\n reading: PressureReading;\n decision: PressureDecision;\n approvalId?: string;\n shadow?: ShadowVerdict;\n skipped?: string;\n}\n\n/**\n * One pressure cycle: evaluate → maybe build context → rehearse → file\n * approval. Uses the F1 `requestGoalProposalApproval` / goalProposer\n * pipeline — does NOT create a parallel approval path.\n *\n * Returns a structured result so the UI / activity feed can record exactly\n * what happened on this cycle even when nothing fires.\n */\nexport async function runPressureCycle(\n drives: DriveState[],\n): Promise<PressureCycleResult> {\n const config = loadConfig();\n const organism = (config as unknown as {\n organism?: {\n enabled?: boolean;\n pressureThreshold?: number;\n shadowEnabled?: boolean;\n shadowModel?: string;\n };\n }).organism || {};\n if (!organism.enabled) {\n const reading = computePressureReading(drives);\n return {\n fired: false,\n reading,\n decision: {\n should: false, totalPressure: reading.totalPressure, threshold: 0,\n dominantDrives: reading.dominantDrives, reason: 'organism disabled',\n },\n skipped: 'organism.enabled=false',\n };\n }\n\n // v4.9.0: if the kill switch fired, refuse to run the pressure cycle.\n // Goals + specialists already got paused by the kill sequence; we\n // must not propose more work until Tony resumes.\n try {\n const { isKilled } = await import('../safety/killSwitch.js');\n if (isKilled()) {\n const reading = computePressureReading(drives);\n return {\n fired: false,\n reading,\n decision: {\n should: false, totalPressure: reading.totalPressure, threshold: 0,\n dominantDrives: reading.dominantDrives, reason: 'kill switch active',\n },\n skipped: 'kill switch active — awaiting human resume',\n };\n }\n } catch { /* safety module unavailable — continue */ }\n\n // v4.9.0: evaluate sustained Safety pressure so the kill switch\n // can fire if Safety stays > 2.0 for 10 minutes. Best-effort.\n try {\n const safety = drives.find(d => d.id === 'safety');\n if (safety) {\n const { evaluateSafetyPressure } = await import('../safety/killSwitch.js');\n evaluateSafetyPressure(safety.pressure);\n }\n } catch { /* ok */ }\n const threshold = organism.pressureThreshold ?? 1.2;\n const decision = evaluatePressure(drives, threshold);\n const reading = computePressureReading(drives);\n\n if (!decision.should) {\n return { fired: false, reading, decision };\n }\n\n // v5.0.0: Global cooldown + per-drive backoff + goal-overload detection.\n const now = Date.now();\n const GLOBAL_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour max across ALL drives\n if (now - lastGlobalFire < GLOBAL_COOLDOWN_MS) {\n return {\n fired: false, reading, decision,\n skipped: `global cooldown: last SOMA proposal ${Math.round((now - lastGlobalFire) / 60_000)}m ago (min 60m)`,\n };\n }\n\n const dominantId = decision.dominantDrives[0];\n if (dominantId) {\n const last = lastFireByDrive.get(dominantId) || 0;\n const DAMPING_MS = 2 * 60 * 60 * 1000; // v5.0.0: 2h per drive\n if (now - last < DAMPING_MS) {\n return {\n fired: false, reading, decision,\n skipped: `drive ${dominantId} fired ${Math.round((now - last) / 60_000)}m ago — damping until 2h elapsed`,\n };\n }\n // Check if the drive already has active goals in flight. If N ≥ 2,\n // give existing work more time before stacking on more.\n try {\n const { listGoals } = await import('../agent/goals.js');\n const allGoals = listGoals();\n const activeCount = allGoals.filter(g => g.status === 'active').length;\n\n // Goal overload: if there are too many active goals, refuse to add MORE.\n // Instead the organism should focus on completing existing work.\n if (activeCount >= 30) {\n return {\n fired: false, reading, decision,\n skipped: `goal overload: ${activeCount} active goals — organism focuses on existing work before proposing more`,\n };\n }\n\n const activeForDrive = allGoals.filter(g => {\n if (g.status !== 'active') return false;\n const tags = g.tags || [];\n const text = `${g.title} ${g.description || ''}`.toLowerCase();\n return tags.includes(`soma:${dominantId}`) || text.includes(dominantId);\n }).length;\n if (activeForDrive >= 2) {\n return {\n fired: false, reading, decision,\n skipped: `drive ${dominantId} already has ${activeForDrive} active goals — letting existing work complete`,\n };\n }\n } catch { /* best-effort */ }\n }\n\n emit('pressure:threshold', {\n timestamp: new Date().toISOString(),\n totalPressure: decision.totalPressure,\n threshold,\n dominantDrives: decision.dominantDrives,\n reason: decision.reason,\n });\n\n // Build a drive-specific context note the proposer will use to seed its\n // suggestion. The proposer (F1) already accepts `consolidationNotes`.\n const noteLines = ['Autonomous pressure crossed threshold.'];\n for (const d of reading.perDrive.slice(0, 3)) {\n noteLines.push(`- ${d.id}: pressure ${d.pressure.toFixed(2)} — ${d.description}`);\n }\n const consolidationNotes = noteLines.join('\\n');\n\n // Pressure-driven proposer uses the agent id `soma:${dominantDrive}` so\n // the activity feed attributes the proposal to the organism, not to a\n // registered agent. This also keeps the F1 rate limit per-\"agent\" —\n // each dominant drive has its own per-day budget.\n const somaAgentId = `soma:${decision.dominantDrives[0] ?? 'fused'}`;\n\n // Dynamic import to avoid a module cycle with commandPost:\n // pressure -> commandPost (createApproval) and\n // commandPost listeners <- drives <- pressure are both reachable.\n let approvalId: string | undefined;\n let shadow: ShadowVerdict | undefined;\n try {\n const { generateGoalProposals } = await import('../agent/goalProposer.js');\n const { requestGoalProposalApproval, getApproval, attachShadowVerdictToApproval } =\n await import('../agent/commandPost.js');\n\n // generateGoalProposals uses loadConfig().agent.autoProposeGoals — for\n // Soma-driven flow we want the proposer to run whether or not\n // autoProposeGoals is globally on. Direct-call approach: build a\n // context with our notes and invoke the proposer with a bypass flag.\n const approvals = await generateGoalProposals(somaAgentId, {\n activeGoals: drives.map(d => `${d.label} at ${Math.round(d.satisfaction * 100)}%`),\n consolidationNotes,\n }, 'soma_proposal');\n\n if (approvals.length === 0) {\n return {\n fired: false,\n reading,\n decision,\n skipped: 'proposer returned no actionable proposals (below quality bar or rate-limited)',\n };\n }\n\n // v4.0.5: shadow-rehearse EVERY proposal returned by the proposer,\n // not just approvals[0]. Earlier cycles that returned 2+ proposals\n // left the extras without a shadow verdict on the approval payload.\n // The first approval is still the \"primary\" returned in the result\n // for backward compat with callers expecting a single approvalId.\n const primary = approvals[0];\n approvalId = primary.id;\n\n if (organism.shadowEnabled !== false) {\n for (const approval of approvals) {\n try {\n const verdict = await rehearseShadow({\n title: (approval.payload as { title?: string })?.title ?? '(unspecified)',\n description: (approval.payload as { description?: string })?.description ?? '',\n rationale: (approval.payload as { rationale?: string })?.rationale ?? '',\n }, organism.shadowModel);\n attachShadowVerdictToApproval(approval.id, verdict as unknown as Record<string, unknown>);\n if (approval.id === primary.id) shadow = verdict;\n } catch (err) {\n logger.warn(COMPONENT, `Shadow rehearsal failed for ${approval.id}: ${(err as Error).message}`);\n }\n }\n }\n\n // Emit one soma:proposal per approval so UI + activity feed see each.\n for (const approval of approvals) {\n const currentVerdict = (approval.payload as { shadowVerdict?: ShadowVerdict })?.shadowVerdict;\n emit('soma:proposal', {\n timestamp: new Date().toISOString(),\n approvalId: approval.id,\n proposedBy: somaAgentId,\n title: (approval.payload as { title?: string })?.title ?? '',\n description: (approval.payload as { description?: string })?.description ?? '',\n rationale: (approval.payload as { rationale?: string })?.rationale ?? '',\n dominantDrives: decision.dominantDrives,\n shadowVerdict: currentVerdict ? {\n reversibilityScore: currentVerdict.reversibilityScore,\n estimatedCostUsd: currentVerdict.estimatedCostUsd,\n breakRisks: currentVerdict.breakRisks,\n } : undefined,\n });\n }\n\n // Quiet the 'unused' check on getApproval — we may use it for logging.\n void getApproval;\n void requestGoalProposalApproval;\n\n // v5.0.0: record fire timestamps for damping on next tick.\n lastGlobalFire = Date.now();\n if (dominantId) {\n lastFireByDrive.set(dominantId, Date.now());\n saveDampingState();\n }\n\n logger.info(COMPONENT, `Soma fired ${approvals.length} proposal(s), primary=${primary.id}: ${decision.reason}`);\n return { fired: true, reading, decision, approvalId, shadow };\n } catch (err) {\n logger.warn(COMPONENT, `Pressure cycle failed: ${(err as Error).message}`);\n return { fired: false, reading, decision, skipped: `error: ${(err as Error).message}` };\n }\n}\n"],"mappings":";AAiBA,SAAS,sBAA0C;AACnD,SAAS,YAAY;AACrB,SAAS,kBAAkB;AAC3B,SAAS,cAAc,qBAAqB;AAC5C,SAAS,4BAA4B;AACrC,OAAO,YAAY;AAEnB,MAAM,YAAY;AAOlB,MAAM,kBAAkB,oBAAI,IAAoB;AAChD,IAAI,iBAAiB;AAAA,CAGpB,SAAS,mBAAmB;AACzB,QAAM,MAAM,aAAqC,oBAAoB;AACrE,MAAI,OAAO,OAAO,QAAQ,YAAY,CAAC,MAAM,QAAQ,GAAG,GAAG;AACvD,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,aAAa,IAAI,KAAK,KAAK;AACjC,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,GAAG,GAAG;AACtC,UAAI,OAAO,MAAM,YAAY,MAAM,IAAI,aAAa,GAAG;AACnD,wBAAgB,IAAI,GAAG,CAAC;AAAA,MAC5B;AAAA,IACJ;AAAA,EACJ;AACJ,GAAG;AAEH,SAAS,mBAAmB;AACxB,QAAM,MAA8B,CAAC;AACrC,aAAW,CAAC,GAAG,CAAC,KAAK,gBAAiB,KAAI,CAAC,IAAI;AAC/C,gBAAc,sBAAsB,GAAG;AAC3C;AAQO,SAAS,gCAAsC;AAClD,kBAAgB,MAAM;AACtB,mBAAiB;AACrB;AAoBO,SAAS,uBAAuB,QAAuC;AAC1E,QAAM,WAAW,OACZ,OAAO,OAAK,EAAE,WAAW,CAAC,EAC1B,IAAI,QAAM,EAAE,IAAI,EAAE,IAAe,UAAU,EAAE,UAAU,aAAa,EAAE,YAAY,EAAE,EACpF,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ;AAC3C,QAAM,gBAAgB,SAAS,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,UAAU,CAAC;AACrE,QAAM,iBAAiB,SAAS,MAAM,GAAG,CAAC,EAAE,IAAI,OAAK,EAAE,EAAE;AACzD,SAAO,EAAE,eAAe,gBAAgB,SAAS;AACrD;AAGO,SAAS,iBAAiB,QAAsB,WAAqC;AACxF,QAAM,UAAU,uBAAuB,MAAM;AAC7C,MAAI,QAAQ,gBAAgB,WAAW;AACnC,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,eAAe,QAAQ;AAAA,MACvB;AAAA,MACA,gBAAgB,QAAQ;AAAA,MACxB,QAAQ,kBAAkB,QAAQ,cAAc,QAAQ,CAAC,CAAC,oBAAoB,SAAS;AAAA,IAC3F;AAAA,EACJ;AACA,QAAM,aAAa,QAAQ,SAAS,MAAM,GAAG,CAAC,EAAE,IAAI,OAAK,GAAG,EAAE,EAAE,KAAK,EAAE,SAAS,QAAQ,CAAC,CAAC,GAAG;AAC7F,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,eAAe,QAAQ;AAAA,IACvB;AAAA,IACA,gBAAgB,QAAQ;AAAA,IACxB,QAAQ,oBAAoB,WAAW,KAAK,IAAI,CAAC;AAAA,EACrD;AACJ;AAqBA,eAAsB,iBAClB,QAC4B;AAC5B,QAAM,SAAS,WAAW;AAC1B,QAAM,WAAY,OAOf,YAAY,CAAC;AAChB,MAAI,CAAC,SAAS,SAAS;AACnB,UAAMA,WAAU,uBAAuB,MAAM;AAC7C,WAAO;AAAA,MACH,OAAO;AAAA,MACP,SAAAA;AAAA,MACA,UAAU;AAAA,QACN,QAAQ;AAAA,QAAO,eAAeA,SAAQ;AAAA,QAAe,WAAW;AAAA,QAChE,gBAAgBA,SAAQ;AAAA,QAAgB,QAAQ;AAAA,MACpD;AAAA,MACA,SAAS;AAAA,IACb;AAAA,EACJ;AAKA,MAAI;AACA,UAAM,EAAE,SAAS,IAAI,MAAM,OAAO,yBAAyB;AAC3D,QAAI,SAAS,GAAG;AACZ,YAAMA,WAAU,uBAAuB,MAAM;AAC7C,aAAO;AAAA,QACH,OAAO;AAAA,QACP,SAAAA;AAAA,QACA,UAAU;AAAA,UACN,QAAQ;AAAA,UAAO,eAAeA,SAAQ;AAAA,UAAe,WAAW;AAAA,UAChE,gBAAgBA,SAAQ;AAAA,UAAgB,QAAQ;AAAA,QACpD;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ,QAAQ;AAAA,EAA6C;AAIrD,MAAI;AACA,UAAM,SAAS,OAAO,KAAK,OAAK,EAAE,OAAO,QAAQ;AACjD,QAAI,QAAQ;AACR,YAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,yBAAyB;AACzE,6BAAuB,OAAO,QAAQ;AAAA,IAC1C;AAAA,EACJ,QAAQ;AAAA,EAAW;AACnB,QAAM,YAAY,SAAS,qBAAqB;AAChD,QAAM,WAAW,iBAAiB,QAAQ,SAAS;AACnD,QAAM,UAAU,uBAAuB,MAAM;AAE7C,MAAI,CAAC,SAAS,QAAQ;AAClB,WAAO,EAAE,OAAO,OAAO,SAAS,SAAS;AAAA,EAC7C;AAGA,QAAM,MAAM,KAAK,IAAI;AACrB,QAAM,qBAAqB,KAAK,KAAK;AACrC,MAAI,MAAM,iBAAiB,oBAAoB;AAC3C,WAAO;AAAA,MACH,OAAO;AAAA,MAAO;AAAA,MAAS;AAAA,MACvB,SAAS,uCAAuC,KAAK,OAAO,MAAM,kBAAkB,GAAM,CAAC;AAAA,IAC/F;AAAA,EACJ;AAEA,QAAM,aAAa,SAAS,eAAe,CAAC;AAC5C,MAAI,YAAY;AACZ,UAAM,OAAO,gBAAgB,IAAI,UAAU,KAAK;AAChD,UAAM,aAAa,IAAI,KAAK,KAAK;AACjC,QAAI,MAAM,OAAO,YAAY;AACzB,aAAO;AAAA,QACH,OAAO;AAAA,QAAO;AAAA,QAAS;AAAA,QACvB,SAAS,SAAS,UAAU,UAAU,KAAK,OAAO,MAAM,QAAQ,GAAM,CAAC;AAAA,MAC3E;AAAA,IACJ;AAGA,QAAI;AACA,YAAM,EAAE,UAAU,IAAI,MAAM,OAAO,mBAAmB;AACtD,YAAM,WAAW,UAAU;AAC3B,YAAM,cAAc,SAAS,OAAO,OAAK,EAAE,WAAW,QAAQ,EAAE;AAIhE,UAAI,eAAe,IAAI;AACnB,eAAO;AAAA,UACH,OAAO;AAAA,UAAO;AAAA,UAAS;AAAA,UACvB,SAAS,kBAAkB,WAAW;AAAA,QAC1C;AAAA,MACJ;AAEA,YAAM,iBAAiB,SAAS,OAAO,OAAK;AACxC,YAAI,EAAE,WAAW,SAAU,QAAO;AAClC,cAAM,OAAO,EAAE,QAAQ,CAAC;AACxB,cAAM,OAAO,GAAG,EAAE,KAAK,IAAI,EAAE,eAAe,EAAE,GAAG,YAAY;AAC7D,eAAO,KAAK,SAAS,QAAQ,UAAU,EAAE,KAAK,KAAK,SAAS,UAAU;AAAA,MAC1E,CAAC,EAAE;AACH,UAAI,kBAAkB,GAAG;AACrB,eAAO;AAAA,UACH,OAAO;AAAA,UAAO;AAAA,UAAS;AAAA,UACvB,SAAS,SAAS,UAAU,gBAAgB,cAAc;AAAA,QAC9D;AAAA,MACJ;AAAA,IACJ,QAAQ;AAAA,IAAoB;AAAA,EAChC;AAEA,OAAK,sBAAsB;AAAA,IACvB,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC,eAAe,SAAS;AAAA,IACxB;AAAA,IACA,gBAAgB,SAAS;AAAA,IACzB,QAAQ,SAAS;AAAA,EACrB,CAAC;AAID,QAAM,YAAY,CAAC,wCAAwC;AAC3D,aAAW,KAAK,QAAQ,SAAS,MAAM,GAAG,CAAC,GAAG;AAC1C,cAAU,KAAK,KAAK,EAAE,EAAE,cAAc,EAAE,SAAS,QAAQ,CAAC,CAAC,WAAM,EAAE,WAAW,EAAE;AAAA,EACpF;AACA,QAAM,qBAAqB,UAAU,KAAK,IAAI;AAM9C,QAAM,cAAc,QAAQ,SAAS,eAAe,CAAC,KAAK,OAAO;AAKjE,MAAI;AACJ,MAAI;AACJ,MAAI;AACA,UAAM,EAAE,sBAAsB,IAAI,MAAM,OAAO,0BAA0B;AACzE,UAAM,EAAE,6BAA6B,aAAa,8BAA8B,IAC5E,MAAM,OAAO,yBAAyB;AAM1C,UAAM,YAAY,MAAM,sBAAsB,aAAa;AAAA,MACvD,aAAa,OAAO,IAAI,OAAK,GAAG,EAAE,KAAK,OAAO,KAAK,MAAM,EAAE,eAAe,GAAG,CAAC,GAAG;AAAA,MACjF;AAAA,IACJ,GAAG,eAAe;AAElB,QAAI,UAAU,WAAW,GAAG;AACxB,aAAO;AAAA,QACH,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAOA,UAAM,UAAU,UAAU,CAAC;AAC3B,iBAAa,QAAQ;AAErB,QAAI,SAAS,kBAAkB,OAAO;AAClC,iBAAW,YAAY,WAAW;AAC9B,YAAI;AACA,gBAAM,UAAU,MAAM,eAAe;AAAA,YACjC,OAAQ,SAAS,SAAgC,SAAS;AAAA,YAC1D,aAAc,SAAS,SAAsC,eAAe;AAAA,YAC5E,WAAY,SAAS,SAAoC,aAAa;AAAA,UAC1E,GAAG,SAAS,WAAW;AACvB,wCAA8B,SAAS,IAAI,OAA6C;AACxF,cAAI,SAAS,OAAO,QAAQ,GAAI,UAAS;AAAA,QAC7C,SAAS,KAAK;AACV,iBAAO,KAAK,WAAW,+BAA+B,SAAS,EAAE,KAAM,IAAc,OAAO,EAAE;AAAA,QAClG;AAAA,MACJ;AAAA,IACJ;AAGA,eAAW,YAAY,WAAW;AAC9B,YAAM,iBAAkB,SAAS,SAA+C;AAChF,WAAK,iBAAiB;AAAA,QAClB,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,QAClC,YAAY,SAAS;AAAA,QACrB,YAAY;AAAA,QACZ,OAAQ,SAAS,SAAgC,SAAS;AAAA,QAC1D,aAAc,SAAS,SAAsC,eAAe;AAAA,QAC5E,WAAY,SAAS,SAAoC,aAAa;AAAA,QACtE,gBAAgB,SAAS;AAAA,QACzB,eAAe,iBAAiB;AAAA,UAC5B,oBAAoB,eAAe;AAAA,UACnC,kBAAkB,eAAe;AAAA,UACjC,YAAY,eAAe;AAAA,QAC/B,IAAI;AAAA,MACR,CAAC;AAAA,IACL;AAGA,SAAK;AACL,SAAK;AAGL,qBAAiB,KAAK,IAAI;AAC1B,QAAI,YAAY;AACZ,sBAAgB,IAAI,YAAY,KAAK,IAAI,CAAC;AAC1C,uBAAiB;AAAA,IACrB;AAEA,WAAO,KAAK,WAAW,cAAc,UAAU,MAAM,yBAAyB,QAAQ,EAAE,KAAK,SAAS,MAAM,EAAE;AAC9G,WAAO,EAAE,OAAO,MAAM,SAAS,UAAU,YAAY,OAAO;AAAA,EAChE,SAAS,KAAK;AACV,WAAO,KAAK,WAAW,0BAA2B,IAAc,OAAO,EAAE;AACzE,WAAO,EAAE,OAAO,OAAO,SAAS,UAAU,SAAS,UAAW,IAAc,OAAO,GAAG;AAAA,EAC1F;AACJ;","names":["reading"]}
|
|
1
|
+
{"version":3,"sources":["../../src/organism/pressure.ts"],"sourcesContent":["/**\n * TITAN — Pressure Fusion (Soma)\n *\n * Turns drive deficits into proposals. When combined pressure across drives\n * crosses `config.organism.pressureThreshold`, Soma builds a GoalProposal\n * seeded with drive-specific context and routes it through the existing\n * goalProposer / commandPost approval plumbing (F1 landed the pipe;\n * pressure fusion is the new trigger source).\n *\n * Key invariants:\n * - This is the ONLY path that converts pressure → proposals. No ad-hoc\n * proposal generation elsewhere in the organism layer.\n * - All proposals run through rehearseShadow() before approval is filed.\n * - The existing F1 rate limit (config.agent.proposalRateLimitPerDay)\n * applies — Soma can't spam proposals faster than the agent-level cap.\n */\nimport type { DriveState, DriveId } from './drives.js';\nimport { rehearseShadow, type ShadowVerdict } from './shadow.js';\nimport { emit } from '../substrate/traceBus.js';\nimport { loadConfig } from '../config/config.js';\nimport { readJsonFile, writeJsonFile } from '../utils/helpers.js';\nimport { SOMADRIVE_STATE_PATH } from '../utils/constants.js';\nimport logger from '../utils/logger.js';\n\nconst COMPONENT = 'Pressure';\n\n/**\n * v4.6.0: per-drive fire history. Used to damp consecutive proposals for\n * the same drive so we don't spawn duplicate goals every tick.\n * Persisted to disk so damping survives restarts.\n */\nconst lastFireByDrive = new Map<string, number>();\nlet lastGlobalFire = 0;\n\n// Load persisted damping state on module init.\n(function loadDampingState() {\n const raw = readJsonFile<Record<string, number>>(SOMADRIVE_STATE_PATH);\n if (raw && typeof raw === 'object' && !Array.isArray(raw)) {\n const now = Date.now();\n const DAMPING_MS = 2 * 60 * 60 * 1000; // v5.0.0: increased to 2h\n for (const [k, v] of Object.entries(raw)) {\n if (typeof v === 'number' && now - v < DAMPING_MS * 2) {\n lastFireByDrive.set(k, v);\n }\n }\n }\n})();\n\nfunction saveDampingState() {\n const obj: Record<string, number> = {};\n for (const [k, v] of lastFireByDrive) obj[k] = v;\n writeJsonFile(SOMADRIVE_STATE_PATH, obj);\n}\n\n/**\n * Test-only hook: clear the per-drive damping memory so unit tests that\n * exercise consecutive `runPressureCycle` calls on the same drive don't\n * leak state across `beforeEach` boundaries. Not part of the public API\n * for runtime callers — production never needs to reset this.\n */\nexport function _resetPressureDampingForTests(): void {\n lastFireByDrive.clear();\n lastGlobalFire = 0;\n}\n\n// ── Types ────────────────────────────────────────────────────────\n\nexport interface PressureReading {\n totalPressure: number;\n dominantDrives: DriveId[];\n perDrive: Array<{ id: DriveId; pressure: number; description: string }>;\n}\n\nexport interface PressureDecision {\n should: boolean;\n totalPressure: number;\n threshold: number;\n dominantDrives: DriveId[];\n reason: string;\n}\n\n// ── Pressure accounting ──────────────────────────────────────────\n\nexport function computePressureReading(drives: DriveState[]): PressureReading {\n const perDrive = drives\n .filter(d => d.pressure > 0)\n .map(d => ({ id: d.id as DriveId, pressure: d.pressure, description: d.description }))\n .sort((a, b) => b.pressure - a.pressure);\n const totalPressure = perDrive.reduce((sum, d) => sum + d.pressure, 0);\n const dominantDrives = perDrive.slice(0, 2).map(d => d.id);\n return { totalPressure, dominantDrives, perDrive };\n}\n\n/** Deterministic threshold check. Does NOT fire any side effects. */\nexport function evaluatePressure(drives: DriveState[], threshold: number): PressureDecision {\n const reading = computePressureReading(drives);\n if (reading.totalPressure < threshold) {\n return {\n should: false,\n totalPressure: reading.totalPressure,\n threshold,\n dominantDrives: reading.dominantDrives,\n reason: `total pressure ${reading.totalPressure.toFixed(2)} below threshold ${threshold}`,\n };\n }\n const topPhrases = reading.perDrive.slice(0, 2).map(d => `${d.id} (${d.pressure.toFixed(2)})`);\n return {\n should: true,\n totalPressure: reading.totalPressure,\n threshold,\n dominantDrives: reading.dominantDrives,\n reason: `dominant drives: ${topPhrases.join(', ')}`,\n };\n}\n\n// ── Proposal driver ──────────────────────────────────────────────\n\nexport interface PressureCycleResult {\n fired: boolean;\n reading: PressureReading;\n decision: PressureDecision;\n approvalId?: string;\n shadow?: ShadowVerdict;\n skipped?: string;\n}\n\n/**\n * One pressure cycle: evaluate → maybe build context → rehearse → file\n * approval. Uses the F1 `requestGoalProposalApproval` / goalProposer\n * pipeline — does NOT create a parallel approval path.\n *\n * Returns a structured result so the UI / activity feed can record exactly\n * what happened on this cycle even when nothing fires.\n */\nexport async function runPressureCycle(\n drives: DriveState[],\n): Promise<PressureCycleResult> {\n const config = loadConfig();\n const organism = (config as unknown as {\n organism?: {\n enabled?: boolean;\n pressureThreshold?: number;\n shadowEnabled?: boolean;\n shadowModel?: string;\n };\n }).organism || {};\n if (!organism.enabled) {\n const reading = computePressureReading(drives);\n return {\n fired: false,\n reading,\n decision: {\n should: false, totalPressure: reading.totalPressure, threshold: 0,\n dominantDrives: reading.dominantDrives, reason: 'organism disabled',\n },\n skipped: 'organism.enabled=false',\n };\n }\n\n // v4.9.0: if the kill switch fired, refuse to run the pressure cycle.\n // Goals + specialists already got paused by the kill sequence; we\n // must not propose more work until Tony resumes.\n try {\n const { isKilled } = await import('../safety/killSwitch.js');\n if (isKilled()) {\n const reading = computePressureReading(drives);\n return {\n fired: false,\n reading,\n decision: {\n should: false, totalPressure: reading.totalPressure, threshold: 0,\n dominantDrives: reading.dominantDrives, reason: 'kill switch active',\n },\n skipped: 'kill switch active — awaiting human resume',\n };\n }\n } catch { /* safety module unavailable — continue */ }\n\n // v4.9.0: evaluate sustained Safety pressure so the kill switch\n // can fire if Safety stays > 2.0 for 10 minutes. Best-effort.\n try {\n const safety = drives.find(d => d.id === 'safety');\n if (safety) {\n const { evaluateSafetyPressure } = await import('../safety/killSwitch.js');\n evaluateSafetyPressure(safety.pressure);\n }\n } catch { /* ok */ }\n const threshold = organism.pressureThreshold ?? 1.2;\n const decision = evaluatePressure(drives, threshold);\n const reading = computePressureReading(drives);\n\n if (!decision.should) {\n return { fired: false, reading, decision };\n }\n\n // v5.0.0: Global cooldown + per-drive backoff + goal-overload detection.\n const now = Date.now();\n const GLOBAL_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour max across ALL drives\n if (now - lastGlobalFire < GLOBAL_COOLDOWN_MS) {\n return {\n fired: false, reading, decision,\n skipped: `global cooldown: last SOMA proposal ${Math.round((now - lastGlobalFire) / 60_000)}m ago (min 60m)`,\n };\n }\n\n const dominantId = decision.dominantDrives[0];\n if (dominantId) {\n const last = lastFireByDrive.get(dominantId) || 0;\n const DAMPING_MS = 2 * 60 * 60 * 1000; // v5.0.0: 2h per drive\n if (now - last < DAMPING_MS) {\n return {\n fired: false, reading, decision,\n skipped: `drive ${dominantId} fired ${Math.round((now - last) / 60_000)}m ago — damping until 2h elapsed`,\n };\n }\n // Check if the drive already has active goals in flight. If N ≥ 2,\n // give existing work more time before stacking on more.\n try {\n const { listGoals } = await import('../agent/goals.js');\n const allGoals = listGoals();\n const activeCount = allGoals.filter(g => g.status === 'active').length;\n\n // Goal overload: if there are too many active goals, refuse to add MORE.\n // Instead the organism should focus on completing existing work.\n if (activeCount >= 30) {\n return {\n fired: false, reading, decision,\n skipped: `goal overload: ${activeCount} active goals — organism focuses on existing work before proposing more`,\n };\n }\n\n const activeForDrive = allGoals.filter(g => {\n if (g.status !== 'active') return false;\n const tags = g.tags || [];\n const text = `${g.title} ${g.description || ''}`.toLowerCase();\n return tags.includes(`soma:${dominantId}`) || text.includes(dominantId);\n }).length;\n if (activeForDrive >= 2) {\n return {\n fired: false, reading, decision,\n skipped: `drive ${dominantId} already has ${activeForDrive} active goals — letting existing work complete`,\n };\n }\n } catch { /* best-effort */ }\n }\n\n emit('pressure:threshold', {\n timestamp: new Date().toISOString(),\n totalPressure: decision.totalPressure,\n threshold,\n dominantDrives: decision.dominantDrives,\n reason: decision.reason,\n });\n\n // Build a drive-specific context note the proposer will use to seed its\n // suggestion. The proposer (F1) already accepts `consolidationNotes`.\n const noteLines = ['Autonomous pressure crossed threshold.'];\n for (const d of reading.perDrive.slice(0, 3)) {\n noteLines.push(`- ${d.id}: pressure ${d.pressure.toFixed(2)} — ${d.description}`);\n }\n\n // v5.3.2 Track B: when Social drive is dominant, point the proposer at\n // a concrete `facebook_post` action. Without this hint the proposer\n // sees \"social pressure high\" and might propose anything (run a sub-\n // agent, generate a status report) — none of which satisfies the\n // actual deficit. The Social drive now blends agent staleness +\n // time-since-last-FB-post; if the dominant cause is the posting\n // drought, propose a post.\n if (decision.dominantDrives[0] === 'social') {\n // perDrive doesn't carry inputs — read them off DriveState directly.\n const socialDrive = drives.find(d => d.id === 'social');\n const hoursSince = (socialDrive?.inputs?.hoursSinceLastPost as number) ?? 0;\n if (hoursSince >= 6) {\n noteLines.push(\n '',\n 'PROPOSAL HINT: Social drive deficit is driven by Facebook posting drought.',\n `It has been ~${Math.round(hoursSince)}h since the last FB post.`,\n 'Propose a goal of type `facebook_post` with one of these contentTypes:',\n ' - \"activity\" — post real TITAN runtime activity from the last 24h',\n ' - \"stats\" — post download/install milestones if any crossed today',\n ' - \"promo\" — promo a recent feature shipment (only if a release tagged today)',\n 'Only propose if there is genuine activity to share — empty/generic posts are worse than none.',\n );\n }\n }\n\n const consolidationNotes = noteLines.join('\\n');\n\n // Pressure-driven proposer uses the agent id `soma:${dominantDrive}` so\n // the activity feed attributes the proposal to the organism, not to a\n // registered agent. This also keeps the F1 rate limit per-\"agent\" —\n // each dominant drive has its own per-day budget.\n const somaAgentId = `soma:${decision.dominantDrives[0] ?? 'fused'}`;\n\n // Dynamic import to avoid a module cycle with commandPost:\n // pressure -> commandPost (createApproval) and\n // commandPost listeners <- drives <- pressure are both reachable.\n let approvalId: string | undefined;\n let shadow: ShadowVerdict | undefined;\n try {\n const { generateGoalProposals } = await import('../agent/goalProposer.js');\n const { requestGoalProposalApproval, getApproval, attachShadowVerdictToApproval } =\n await import('../agent/commandPost.js');\n\n // generateGoalProposals uses loadConfig().agent.autoProposeGoals — for\n // Soma-driven flow we want the proposer to run whether or not\n // autoProposeGoals is globally on. Direct-call approach: build a\n // context with our notes and invoke the proposer with a bypass flag.\n const approvals = await generateGoalProposals(somaAgentId, {\n activeGoals: drives.map(d => `${d.label} at ${Math.round(d.satisfaction * 100)}%`),\n consolidationNotes,\n }, 'soma_proposal');\n\n if (approvals.length === 0) {\n return {\n fired: false,\n reading,\n decision,\n skipped: 'proposer returned no actionable proposals (below quality bar or rate-limited)',\n };\n }\n\n // v4.0.5: shadow-rehearse EVERY proposal returned by the proposer,\n // not just approvals[0]. Earlier cycles that returned 2+ proposals\n // left the extras without a shadow verdict on the approval payload.\n // The first approval is still the \"primary\" returned in the result\n // for backward compat with callers expecting a single approvalId.\n const primary = approvals[0];\n approvalId = primary.id;\n\n if (organism.shadowEnabled !== false) {\n for (const approval of approvals) {\n try {\n const verdict = await rehearseShadow({\n title: (approval.payload as { title?: string })?.title ?? '(unspecified)',\n description: (approval.payload as { description?: string })?.description ?? '',\n rationale: (approval.payload as { rationale?: string })?.rationale ?? '',\n }, organism.shadowModel);\n attachShadowVerdictToApproval(approval.id, verdict as unknown as Record<string, unknown>);\n if (approval.id === primary.id) shadow = verdict;\n } catch (err) {\n logger.warn(COMPONENT, `Shadow rehearsal failed for ${approval.id}: ${(err as Error).message}`);\n }\n }\n }\n\n // Emit one soma:proposal per approval so UI + activity feed see each.\n for (const approval of approvals) {\n const currentVerdict = (approval.payload as { shadowVerdict?: ShadowVerdict })?.shadowVerdict;\n emit('soma:proposal', {\n timestamp: new Date().toISOString(),\n approvalId: approval.id,\n proposedBy: somaAgentId,\n title: (approval.payload as { title?: string })?.title ?? '',\n description: (approval.payload as { description?: string })?.description ?? '',\n rationale: (approval.payload as { rationale?: string })?.rationale ?? '',\n dominantDrives: decision.dominantDrives,\n shadowVerdict: currentVerdict ? {\n reversibilityScore: currentVerdict.reversibilityScore,\n estimatedCostUsd: currentVerdict.estimatedCostUsd,\n breakRisks: currentVerdict.breakRisks,\n } : undefined,\n });\n }\n\n // Quiet the 'unused' check on getApproval — we may use it for logging.\n void getApproval;\n void requestGoalProposalApproval;\n\n // v5.0.0: record fire timestamps for damping on next tick.\n lastGlobalFire = Date.now();\n if (dominantId) {\n lastFireByDrive.set(dominantId, Date.now());\n saveDampingState();\n }\n\n logger.info(COMPONENT, `Soma fired ${approvals.length} proposal(s), primary=${primary.id}: ${decision.reason}`);\n return { fired: true, reading, decision, approvalId, shadow };\n } catch (err) {\n logger.warn(COMPONENT, `Pressure cycle failed: ${(err as Error).message}`);\n return { fired: false, reading, decision, skipped: `error: ${(err as Error).message}` };\n }\n}\n"],"mappings":";AAiBA,SAAS,sBAA0C;AACnD,SAAS,YAAY;AACrB,SAAS,kBAAkB;AAC3B,SAAS,cAAc,qBAAqB;AAC5C,SAAS,4BAA4B;AACrC,OAAO,YAAY;AAEnB,MAAM,YAAY;AAOlB,MAAM,kBAAkB,oBAAI,IAAoB;AAChD,IAAI,iBAAiB;AAAA,CAGpB,SAAS,mBAAmB;AACzB,QAAM,MAAM,aAAqC,oBAAoB;AACrE,MAAI,OAAO,OAAO,QAAQ,YAAY,CAAC,MAAM,QAAQ,GAAG,GAAG;AACvD,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,aAAa,IAAI,KAAK,KAAK;AACjC,eAAW,CAAC,GAAG,CAAC,KAAK,OAAO,QAAQ,GAAG,GAAG;AACtC,UAAI,OAAO,MAAM,YAAY,MAAM,IAAI,aAAa,GAAG;AACnD,wBAAgB,IAAI,GAAG,CAAC;AAAA,MAC5B;AAAA,IACJ;AAAA,EACJ;AACJ,GAAG;AAEH,SAAS,mBAAmB;AACxB,QAAM,MAA8B,CAAC;AACrC,aAAW,CAAC,GAAG,CAAC,KAAK,gBAAiB,KAAI,CAAC,IAAI;AAC/C,gBAAc,sBAAsB,GAAG;AAC3C;AAQO,SAAS,gCAAsC;AAClD,kBAAgB,MAAM;AACtB,mBAAiB;AACrB;AAoBO,SAAS,uBAAuB,QAAuC;AAC1E,QAAM,WAAW,OACZ,OAAO,OAAK,EAAE,WAAW,CAAC,EAC1B,IAAI,QAAM,EAAE,IAAI,EAAE,IAAe,UAAU,EAAE,UAAU,aAAa,EAAE,YAAY,EAAE,EACpF,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ;AAC3C,QAAM,gBAAgB,SAAS,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,UAAU,CAAC;AACrE,QAAM,iBAAiB,SAAS,MAAM,GAAG,CAAC,EAAE,IAAI,OAAK,EAAE,EAAE;AACzD,SAAO,EAAE,eAAe,gBAAgB,SAAS;AACrD;AAGO,SAAS,iBAAiB,QAAsB,WAAqC;AACxF,QAAM,UAAU,uBAAuB,MAAM;AAC7C,MAAI,QAAQ,gBAAgB,WAAW;AACnC,WAAO;AAAA,MACH,QAAQ;AAAA,MACR,eAAe,QAAQ;AAAA,MACvB;AAAA,MACA,gBAAgB,QAAQ;AAAA,MACxB,QAAQ,kBAAkB,QAAQ,cAAc,QAAQ,CAAC,CAAC,oBAAoB,SAAS;AAAA,IAC3F;AAAA,EACJ;AACA,QAAM,aAAa,QAAQ,SAAS,MAAM,GAAG,CAAC,EAAE,IAAI,OAAK,GAAG,EAAE,EAAE,KAAK,EAAE,SAAS,QAAQ,CAAC,CAAC,GAAG;AAC7F,SAAO;AAAA,IACH,QAAQ;AAAA,IACR,eAAe,QAAQ;AAAA,IACvB;AAAA,IACA,gBAAgB,QAAQ;AAAA,IACxB,QAAQ,oBAAoB,WAAW,KAAK,IAAI,CAAC;AAAA,EACrD;AACJ;AAqBA,eAAsB,iBAClB,QAC4B;AAC5B,QAAM,SAAS,WAAW;AAC1B,QAAM,WAAY,OAOf,YAAY,CAAC;AAChB,MAAI,CAAC,SAAS,SAAS;AACnB,UAAMA,WAAU,uBAAuB,MAAM;AAC7C,WAAO;AAAA,MACH,OAAO;AAAA,MACP,SAAAA;AAAA,MACA,UAAU;AAAA,QACN,QAAQ;AAAA,QAAO,eAAeA,SAAQ;AAAA,QAAe,WAAW;AAAA,QAChE,gBAAgBA,SAAQ;AAAA,QAAgB,QAAQ;AAAA,MACpD;AAAA,MACA,SAAS;AAAA,IACb;AAAA,EACJ;AAKA,MAAI;AACA,UAAM,EAAE,SAAS,IAAI,MAAM,OAAO,yBAAyB;AAC3D,QAAI,SAAS,GAAG;AACZ,YAAMA,WAAU,uBAAuB,MAAM;AAC7C,aAAO;AAAA,QACH,OAAO;AAAA,QACP,SAAAA;AAAA,QACA,UAAU;AAAA,UACN,QAAQ;AAAA,UAAO,eAAeA,SAAQ;AAAA,UAAe,WAAW;AAAA,UAChE,gBAAgBA,SAAQ;AAAA,UAAgB,QAAQ;AAAA,QACpD;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ,QAAQ;AAAA,EAA6C;AAIrD,MAAI;AACA,UAAM,SAAS,OAAO,KAAK,OAAK,EAAE,OAAO,QAAQ;AACjD,QAAI,QAAQ;AACR,YAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,yBAAyB;AACzE,6BAAuB,OAAO,QAAQ;AAAA,IAC1C;AAAA,EACJ,QAAQ;AAAA,EAAW;AACnB,QAAM,YAAY,SAAS,qBAAqB;AAChD,QAAM,WAAW,iBAAiB,QAAQ,SAAS;AACnD,QAAM,UAAU,uBAAuB,MAAM;AAE7C,MAAI,CAAC,SAAS,QAAQ;AAClB,WAAO,EAAE,OAAO,OAAO,SAAS,SAAS;AAAA,EAC7C;AAGA,QAAM,MAAM,KAAK,IAAI;AACrB,QAAM,qBAAqB,KAAK,KAAK;AACrC,MAAI,MAAM,iBAAiB,oBAAoB;AAC3C,WAAO;AAAA,MACH,OAAO;AAAA,MAAO;AAAA,MAAS;AAAA,MACvB,SAAS,uCAAuC,KAAK,OAAO,MAAM,kBAAkB,GAAM,CAAC;AAAA,IAC/F;AAAA,EACJ;AAEA,QAAM,aAAa,SAAS,eAAe,CAAC;AAC5C,MAAI,YAAY;AACZ,UAAM,OAAO,gBAAgB,IAAI,UAAU,KAAK;AAChD,UAAM,aAAa,IAAI,KAAK,KAAK;AACjC,QAAI,MAAM,OAAO,YAAY;AACzB,aAAO;AAAA,QACH,OAAO;AAAA,QAAO;AAAA,QAAS;AAAA,QACvB,SAAS,SAAS,UAAU,UAAU,KAAK,OAAO,MAAM,QAAQ,GAAM,CAAC;AAAA,MAC3E;AAAA,IACJ;AAGA,QAAI;AACA,YAAM,EAAE,UAAU,IAAI,MAAM,OAAO,mBAAmB;AACtD,YAAM,WAAW,UAAU;AAC3B,YAAM,cAAc,SAAS,OAAO,OAAK,EAAE,WAAW,QAAQ,EAAE;AAIhE,UAAI,eAAe,IAAI;AACnB,eAAO;AAAA,UACH,OAAO;AAAA,UAAO;AAAA,UAAS;AAAA,UACvB,SAAS,kBAAkB,WAAW;AAAA,QAC1C;AAAA,MACJ;AAEA,YAAM,iBAAiB,SAAS,OAAO,OAAK;AACxC,YAAI,EAAE,WAAW,SAAU,QAAO;AAClC,cAAM,OAAO,EAAE,QAAQ,CAAC;AACxB,cAAM,OAAO,GAAG,EAAE,KAAK,IAAI,EAAE,eAAe,EAAE,GAAG,YAAY;AAC7D,eAAO,KAAK,SAAS,QAAQ,UAAU,EAAE,KAAK,KAAK,SAAS,UAAU;AAAA,MAC1E,CAAC,EAAE;AACH,UAAI,kBAAkB,GAAG;AACrB,eAAO;AAAA,UACH,OAAO;AAAA,UAAO;AAAA,UAAS;AAAA,UACvB,SAAS,SAAS,UAAU,gBAAgB,cAAc;AAAA,QAC9D;AAAA,MACJ;AAAA,IACJ,QAAQ;AAAA,IAAoB;AAAA,EAChC;AAEA,OAAK,sBAAsB;AAAA,IACvB,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,IAClC,eAAe,SAAS;AAAA,IACxB;AAAA,IACA,gBAAgB,SAAS;AAAA,IACzB,QAAQ,SAAS;AAAA,EACrB,CAAC;AAID,QAAM,YAAY,CAAC,wCAAwC;AAC3D,aAAW,KAAK,QAAQ,SAAS,MAAM,GAAG,CAAC,GAAG;AAC1C,cAAU,KAAK,KAAK,EAAE,EAAE,cAAc,EAAE,SAAS,QAAQ,CAAC,CAAC,WAAM,EAAE,WAAW,EAAE;AAAA,EACpF;AASA,MAAI,SAAS,eAAe,CAAC,MAAM,UAAU;AAEzC,UAAM,cAAc,OAAO,KAAK,OAAK,EAAE,OAAO,QAAQ;AACtD,UAAM,aAAc,aAAa,QAAQ,sBAAiC;AAC1E,QAAI,cAAc,GAAG;AACjB,gBAAU;AAAA,QACN;AAAA,QACA;AAAA,QACA,gBAAgB,KAAK,MAAM,UAAU,CAAC;AAAA,QACtC;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACJ;AAAA,IACJ;AAAA,EACJ;AAEA,QAAM,qBAAqB,UAAU,KAAK,IAAI;AAM9C,QAAM,cAAc,QAAQ,SAAS,eAAe,CAAC,KAAK,OAAO;AAKjE,MAAI;AACJ,MAAI;AACJ,MAAI;AACA,UAAM,EAAE,sBAAsB,IAAI,MAAM,OAAO,0BAA0B;AACzE,UAAM,EAAE,6BAA6B,aAAa,8BAA8B,IAC5E,MAAM,OAAO,yBAAyB;AAM1C,UAAM,YAAY,MAAM,sBAAsB,aAAa;AAAA,MACvD,aAAa,OAAO,IAAI,OAAK,GAAG,EAAE,KAAK,OAAO,KAAK,MAAM,EAAE,eAAe,GAAG,CAAC,GAAG;AAAA,MACjF;AAAA,IACJ,GAAG,eAAe;AAElB,QAAI,UAAU,WAAW,GAAG;AACxB,aAAO;AAAA,QACH,OAAO;AAAA,QACP;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAOA,UAAM,UAAU,UAAU,CAAC;AAC3B,iBAAa,QAAQ;AAErB,QAAI,SAAS,kBAAkB,OAAO;AAClC,iBAAW,YAAY,WAAW;AAC9B,YAAI;AACA,gBAAM,UAAU,MAAM,eAAe;AAAA,YACjC,OAAQ,SAAS,SAAgC,SAAS;AAAA,YAC1D,aAAc,SAAS,SAAsC,eAAe;AAAA,YAC5E,WAAY,SAAS,SAAoC,aAAa;AAAA,UAC1E,GAAG,SAAS,WAAW;AACvB,wCAA8B,SAAS,IAAI,OAA6C;AACxF,cAAI,SAAS,OAAO,QAAQ,GAAI,UAAS;AAAA,QAC7C,SAAS,KAAK;AACV,iBAAO,KAAK,WAAW,+BAA+B,SAAS,EAAE,KAAM,IAAc,OAAO,EAAE;AAAA,QAClG;AAAA,MACJ;AAAA,IACJ;AAGA,eAAW,YAAY,WAAW;AAC9B,YAAM,iBAAkB,SAAS,SAA+C;AAChF,WAAK,iBAAiB;AAAA,QAClB,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,QAClC,YAAY,SAAS;AAAA,QACrB,YAAY;AAAA,QACZ,OAAQ,SAAS,SAAgC,SAAS;AAAA,QAC1D,aAAc,SAAS,SAAsC,eAAe;AAAA,QAC5E,WAAY,SAAS,SAAoC,aAAa;AAAA,QACtE,gBAAgB,SAAS;AAAA,QACzB,eAAe,iBAAiB;AAAA,UAC5B,oBAAoB,eAAe;AAAA,UACnC,kBAAkB,eAAe;AAAA,UACjC,YAAY,eAAe;AAAA,QAC/B,IAAI;AAAA,MACR,CAAC;AAAA,IACL;AAGA,SAAK;AACL,SAAK;AAGL,qBAAiB,KAAK,IAAI;AAC1B,QAAI,YAAY;AACZ,sBAAgB,IAAI,YAAY,KAAK,IAAI,CAAC;AAC1C,uBAAiB;AAAA,IACrB;AAEA,WAAO,KAAK,WAAW,cAAc,UAAU,MAAM,yBAAyB,QAAQ,EAAE,KAAK,SAAS,MAAM,EAAE;AAC9G,WAAO,EAAE,OAAO,MAAM,SAAS,UAAU,YAAY,OAAO;AAAA,EAChE,SAAS,KAAK;AACV,WAAO,KAAK,WAAW,0BAA2B,IAAc,OAAO,EAAE;AACzE,WAAO,EAAE,OAAO,OAAO,SAAS,UAAU,SAAS,UAAW,IAAc,OAAO,GAAG;AAAA,EAC1F;AACJ;","names":["reading"]}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { existsSync, statSync, readFileSync } from "fs";
|
|
3
|
+
import { createHash } from "crypto";
|
|
4
|
+
const VERB_PATTERNS = [
|
|
5
|
+
// Past-tense file writes — Hunt #47 lineage. Catches "I have written X
|
|
6
|
+
// to /tmp/foo.md", "I saved the report at /home/dj/report.txt".
|
|
7
|
+
{
|
|
8
|
+
regex: /\b(?:I(?:'ve| have| just)?)\s+(written|saved|wrote|created|generated|produced)\s+(?:[^.!?\n]*?)(?:to|at|in)\s+["'`]?(\/[\w/.-]+\.[a-z0-9]+|\.\/[\w/.-]+|~\/[\w/.-]+|[\w./_-]+\.[a-z0-9]{1,5})["'`]?/i,
|
|
9
|
+
category: "file_write",
|
|
10
|
+
expectedTool: "write_file",
|
|
11
|
+
verbGroup: 1,
|
|
12
|
+
targetGroup: 2
|
|
13
|
+
},
|
|
14
|
+
// File edits — "I edited X", "I fixed the bug in X", "I modified config.ts".
|
|
15
|
+
{
|
|
16
|
+
regex: /\b(?:I(?:'ve| have| just)?)\s+(edited|modified|fixed|patched|updated|refactored)\s+(?:the\s+\w+\s+(?:in|at)\s+)?["'`]?([\w/.-]+\.[a-z0-9]{1,5}|[\w/.-]+\/[\w._-]+)["'`]?/i,
|
|
17
|
+
category: "file_edit",
|
|
18
|
+
expectedTool: "edit_file",
|
|
19
|
+
verbGroup: 1,
|
|
20
|
+
targetGroup: 2
|
|
21
|
+
},
|
|
22
|
+
// File deletes — "I deleted /tmp/foo", "I removed the old config".
|
|
23
|
+
{
|
|
24
|
+
regex: /\b(?:I(?:'ve| have| just)?)\s+(deleted|removed|cleaned\s+up)\s+(?:the\s+)?["'`]?(\/[\w/.-]+|[\w/.-]+\.[a-z0-9]{1,5})["'`]?/i,
|
|
25
|
+
category: "file_delete",
|
|
26
|
+
expectedTool: "shell",
|
|
27
|
+
verbGroup: 1,
|
|
28
|
+
targetGroup: 2
|
|
29
|
+
},
|
|
30
|
+
// Shell command claims — "I ran `npm test`", "I executed git status".
|
|
31
|
+
// Backtick form is the strong signal; bare-text "I ran npm install"
|
|
32
|
+
// also triggers but only when followed by a recognizable command.
|
|
33
|
+
{
|
|
34
|
+
regex: /\b(?:I(?:'ve| have| just)?)\s+(ran|executed|installed|launched)\s+["`]?([a-z][a-z0-9_-]+(?:\s+[\w.-]+)*)/i,
|
|
35
|
+
category: "shell_run",
|
|
36
|
+
expectedTool: "shell",
|
|
37
|
+
verbGroup: 1,
|
|
38
|
+
targetGroup: 2
|
|
39
|
+
},
|
|
40
|
+
// Web actions — "I searched for X", "I browsed to Y", "I fetched Z".
|
|
41
|
+
{
|
|
42
|
+
regex: /\b(?:I(?:'ve| have| just)?)\s+(searched|browsed|fetched|googled|looked\s+up)\s+(?:for\s+|to\s+)?["'`]?([^"'`.!?\n]{2,80})["'`]?/i,
|
|
43
|
+
category: "web_action",
|
|
44
|
+
expectedTool: "web_search",
|
|
45
|
+
verbGroup: 1,
|
|
46
|
+
targetGroup: 2
|
|
47
|
+
},
|
|
48
|
+
// Generic tool-name claim — "I used the shell tool", "I used write_file".
|
|
49
|
+
// This is the weakest signal and the most likely to misfire — only
|
|
50
|
+
// included so the system can flag for human review, not auto-correct.
|
|
51
|
+
{
|
|
52
|
+
regex: /\b(?:I(?:'ve| have| just)?)\s+(used|called|invoked)\s+(?:the\s+)?["'`]?([a-z_]{3,30})["'`]?\s+tool\b/i,
|
|
53
|
+
category: "tool_used",
|
|
54
|
+
expectedTool: "*",
|
|
55
|
+
// wildcard — match against any tool that has the same name
|
|
56
|
+
verbGroup: 1,
|
|
57
|
+
targetGroup: 2
|
|
58
|
+
}
|
|
59
|
+
];
|
|
60
|
+
function detectFabrication(content, toolHistory) {
|
|
61
|
+
if (!content || content.length < 5) return [];
|
|
62
|
+
const findings = [];
|
|
63
|
+
const usedTools = new Set(toolHistory.map((t) => t.name.toLowerCase()));
|
|
64
|
+
for (const pat of VERB_PATTERNS) {
|
|
65
|
+
const m = content.match(pat.regex);
|
|
66
|
+
if (!m) continue;
|
|
67
|
+
const verb = (pat.verbGroup ? m[pat.verbGroup] : m[1]) || "did";
|
|
68
|
+
const target = (pat.targetGroup ? m[pat.targetGroup] : m[2]) || "";
|
|
69
|
+
if (!target) continue;
|
|
70
|
+
const claimSatisfied = pat.expectedTool === "*" ? usedTools.has(target.toLowerCase()) : usedTools.has(pat.expectedTool);
|
|
71
|
+
if (!claimSatisfied) {
|
|
72
|
+
findings.push({
|
|
73
|
+
category: pat.category,
|
|
74
|
+
verb: verb.toLowerCase(),
|
|
75
|
+
target: target.trim(),
|
|
76
|
+
expectedTool: pat.expectedTool,
|
|
77
|
+
excerpt: m[0]
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return findings;
|
|
82
|
+
}
|
|
83
|
+
function verifyFileWriteClaim(filePath, expectedContent) {
|
|
84
|
+
let exists;
|
|
85
|
+
try {
|
|
86
|
+
exists = existsSync(filePath);
|
|
87
|
+
} catch {
|
|
88
|
+
return { fileExists: false, reason: "fs.existsSync threw \u2014 invalid path" };
|
|
89
|
+
}
|
|
90
|
+
if (!exists) {
|
|
91
|
+
return { fileExists: false, reason: `file not present at ${filePath}` };
|
|
92
|
+
}
|
|
93
|
+
let size = 0;
|
|
94
|
+
try {
|
|
95
|
+
size = statSync(filePath).size;
|
|
96
|
+
} catch {
|
|
97
|
+
return { fileExists: true, reason: "fs.statSync threw on existing path" };
|
|
98
|
+
}
|
|
99
|
+
if (size === 0) {
|
|
100
|
+
return { fileExists: true, reason: "file exists but is empty" };
|
|
101
|
+
}
|
|
102
|
+
let actualContent;
|
|
103
|
+
let fileHash;
|
|
104
|
+
try {
|
|
105
|
+
actualContent = readFileSync(filePath, "utf-8");
|
|
106
|
+
fileHash = createHash("sha256").update(actualContent).digest("hex");
|
|
107
|
+
} catch (e) {
|
|
108
|
+
return { fileExists: true, reason: `read failed: ${e.message}` };
|
|
109
|
+
}
|
|
110
|
+
if (expectedContent === void 0) {
|
|
111
|
+
return { fileExists: true, fileHash };
|
|
112
|
+
}
|
|
113
|
+
const a = actualContent.trim();
|
|
114
|
+
const b = expectedContent.trim();
|
|
115
|
+
const contentMatches = a === b;
|
|
116
|
+
return {
|
|
117
|
+
fileExists: true,
|
|
118
|
+
fileHash,
|
|
119
|
+
contentMatches,
|
|
120
|
+
reason: contentMatches ? void 0 : "file exists but content differs from claim"
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
function buildNudgeMessage(findings) {
|
|
124
|
+
if (findings.length === 0) return "";
|
|
125
|
+
const lines = ["You claimed to perform actions you did NOT actually do via tools:"];
|
|
126
|
+
for (const f of findings) {
|
|
127
|
+
lines.push(` - You said you ${f.verb} "${f.target}", but you did not call ${f.expectedTool === "*" ? "any matching tool" : `the ${f.expectedTool} tool`}.`);
|
|
128
|
+
}
|
|
129
|
+
lines.push("");
|
|
130
|
+
lines.push("Either:");
|
|
131
|
+
lines.push(" 1. Actually call the right tool now.");
|
|
132
|
+
lines.push(" 2. Correct your claim \u2014 say what you DID do, or admit you did not do it.");
|
|
133
|
+
return lines.join("\n");
|
|
134
|
+
}
|
|
135
|
+
export {
|
|
136
|
+
buildNudgeMessage,
|
|
137
|
+
detectFabrication,
|
|
138
|
+
verifyFileWriteClaim
|
|
139
|
+
};
|
|
140
|
+
//# sourceMappingURL=fabricationGuard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/safety/fabricationGuard.ts"],"sourcesContent":["/**\n * FabricationGuard (Phase 9 / Track D, v5.4.0)\n *\n * Catches model responses that CLAIM to have done something but didn't\n * actually call the tool that would do it. The original guard lived\n * inline in `agentLoop.ts` and only matched past-tense write claims via\n * a single narrow regex. This module:\n * 1. Expands pattern coverage to all common action verbs\n * (edit/fix/run/search/browse/create/delete + write/save).\n * 2. Cross-checks claims against the actual tool history — \"I ran\n * `npm test`\" only counts as truthful if a `shell` tool call\n * actually happened in this turn.\n * 3. For file-write claims, exposes a verifier that checks the file\n * exists and (optionally) hashes the content.\n *\n * It's a pure module: no I/O at import time, easy to unit-test. The\n * agent loop can call `detectFabrication(content, toolHistory)` after\n * each response and choose to nudge the model, force a tool call, or\n * return a redacted answer.\n */\n\nimport { existsSync, statSync, readFileSync } from 'fs';\nimport { createHash } from 'crypto';\n\n/** A single tool invocation captured by the agent loop, in execution order. */\nexport interface ToolHistoryEntry {\n /** Tool name (`shell`, `write_file`, `web_search`, ...). */\n name: string;\n /** Arguments passed to the tool, parsed from the model's tool_calls. */\n args?: Record<string, unknown>;\n /** Raw output, when available — used by the file-write verifier. */\n output?: string;\n}\n\n/** Discriminated category of fabrication signal. */\nexport type FabricationCategory =\n | 'file_write' // I wrote/saved/created file X\n | 'file_edit' // I edited/fixed/modified X\n | 'file_delete' // I deleted/removed X\n | 'shell_run' // I ran/executed/installed X\n | 'web_action' // I searched/browsed/fetched X\n | 'tool_used'; // I used [tool_name] (generic catch-all)\n\n/** A single fabrication finding from `detectFabrication`. */\nexport interface FabricationFinding {\n category: FabricationCategory;\n /** The verb the model used (write, edit, ran, etc.). */\n verb: string;\n /** The object/target the verb acted on (file path, URL, command). */\n target: string;\n /** The tool name that *would* satisfy this claim. */\n expectedTool: string;\n /** The exact substring of `content` that triggered the match. */\n excerpt: string;\n}\n\n// ── Pattern table ────────────────────────────────────────────────────\n//\n// Each entry is { regex, category, expectedTool }. We keep the regexes\n// strict to avoid false positives — TITAN's chat output is usually\n// short, so a wide pattern surface produces too many bogus rejections.\n//\n// All patterns require the verb to start near a sentence boundary\n// (^|[.!?\\n]\\s*) and end with a recognizable target. Matches are\n// case-insensitive but anchor on first-person voice (\"I have\", \"I've\",\n// \"I just\"); third-person summaries (e.g. quoting the user) don't fire.\n\nconst VERB_PATTERNS: Array<{\n regex: RegExp;\n category: FabricationCategory;\n expectedTool: string;\n verbGroup?: number;\n targetGroup?: number;\n}> = [\n // Past-tense file writes — Hunt #47 lineage. Catches \"I have written X\n // to /tmp/foo.md\", \"I saved the report at /home/dj/report.txt\".\n {\n regex: /\\b(?:I(?:'ve| have| just)?)\\s+(written|saved|wrote|created|generated|produced)\\s+(?:[^.!?\\n]*?)(?:to|at|in)\\s+[\"'`]?(\\/[\\w/.-]+\\.[a-z0-9]+|\\.\\/[\\w/.-]+|~\\/[\\w/.-]+|[\\w./_-]+\\.[a-z0-9]{1,5})[\"'`]?/i,\n category: 'file_write',\n expectedTool: 'write_file',\n verbGroup: 1,\n targetGroup: 2,\n },\n // File edits — \"I edited X\", \"I fixed the bug in X\", \"I modified config.ts\".\n {\n regex: /\\b(?:I(?:'ve| have| just)?)\\s+(edited|modified|fixed|patched|updated|refactored)\\s+(?:the\\s+\\w+\\s+(?:in|at)\\s+)?[\"'`]?([\\w/.-]+\\.[a-z0-9]{1,5}|[\\w/.-]+\\/[\\w._-]+)[\"'`]?/i,\n category: 'file_edit',\n expectedTool: 'edit_file',\n verbGroup: 1,\n targetGroup: 2,\n },\n // File deletes — \"I deleted /tmp/foo\", \"I removed the old config\".\n {\n regex: /\\b(?:I(?:'ve| have| just)?)\\s+(deleted|removed|cleaned\\s+up)\\s+(?:the\\s+)?[\"'`]?(\\/[\\w/.-]+|[\\w/.-]+\\.[a-z0-9]{1,5})[\"'`]?/i,\n category: 'file_delete',\n expectedTool: 'shell',\n verbGroup: 1,\n targetGroup: 2,\n },\n // Shell command claims — \"I ran `npm test`\", \"I executed git status\".\n // Backtick form is the strong signal; bare-text \"I ran npm install\"\n // also triggers but only when followed by a recognizable command.\n {\n regex: /\\b(?:I(?:'ve| have| just)?)\\s+(ran|executed|installed|launched)\\s+[\"`]?([a-z][a-z0-9_-]+(?:\\s+[\\w.-]+)*)/i,\n category: 'shell_run',\n expectedTool: 'shell',\n verbGroup: 1,\n targetGroup: 2,\n },\n // Web actions — \"I searched for X\", \"I browsed to Y\", \"I fetched Z\".\n {\n regex: /\\b(?:I(?:'ve| have| just)?)\\s+(searched|browsed|fetched|googled|looked\\s+up)\\s+(?:for\\s+|to\\s+)?[\"'`]?([^\"'`.!?\\n]{2,80})[\"'`]?/i,\n category: 'web_action',\n expectedTool: 'web_search',\n verbGroup: 1,\n targetGroup: 2,\n },\n // Generic tool-name claim — \"I used the shell tool\", \"I used write_file\".\n // This is the weakest signal and the most likely to misfire — only\n // included so the system can flag for human review, not auto-correct.\n {\n regex: /\\b(?:I(?:'ve| have| just)?)\\s+(used|called|invoked)\\s+(?:the\\s+)?[\"'`]?([a-z_]{3,30})[\"'`]?\\s+tool\\b/i,\n category: 'tool_used',\n expectedTool: '*', // wildcard — match against any tool that has the same name\n verbGroup: 1,\n targetGroup: 2,\n },\n];\n\n/**\n * Scan the model's response for action claims and return any that aren't\n * backed by a real tool call. `toolHistory` should contain every tool\n * invocation the agent made in this turn (and ideally the prior turn,\n * since \"I already wrote X\" can refer to a previous round).\n *\n * Returns an empty array when no fabrication is detected.\n */\nexport function detectFabrication(\n content: string,\n toolHistory: ToolHistoryEntry[],\n): FabricationFinding[] {\n if (!content || content.length < 5) return [];\n\n const findings: FabricationFinding[] = [];\n const usedTools = new Set(toolHistory.map(t => t.name.toLowerCase()));\n\n for (const pat of VERB_PATTERNS) {\n const m = content.match(pat.regex);\n if (!m) continue;\n const verb = (pat.verbGroup ? m[pat.verbGroup] : m[1]) || 'did';\n const target = (pat.targetGroup ? m[pat.targetGroup] : m[2]) || '';\n if (!target) continue;\n\n // Did the agent actually call a tool that satisfies this claim?\n const claimSatisfied = pat.expectedTool === '*'\n ? usedTools.has(target.toLowerCase())\n : usedTools.has(pat.expectedTool);\n\n if (!claimSatisfied) {\n findings.push({\n category: pat.category,\n verb: verb.toLowerCase(),\n target: target.trim(),\n expectedTool: pat.expectedTool,\n excerpt: m[0],\n });\n }\n }\n\n return findings;\n}\n\n// ── Verify-before-trust on file operations ──────────────────────────\n\n/** Result of `verifyFileWriteClaim`. */\nexport interface FileWriteVerification {\n /** True when the file exists at the claimed path with non-zero size. */\n fileExists: boolean;\n /** SHA-256 of the file contents, when present. */\n fileHash?: string;\n /** True when the file's content matches the model's claimed body\n * (only computed if `expectedContent` was passed). */\n contentMatches?: boolean;\n /** Why the claim fails verification, if it does. */\n reason?: string;\n}\n\n/**\n * Verify a file-write claim against the real filesystem.\n *\n * Use this AFTER the agent claims to have written/edited a file but\n * BEFORE accepting the response as final. If the file doesn't exist at\n * the claimed path, the agent fabricated and the loop should retry.\n *\n * Optionally pass `expectedContent` to also verify the body matches —\n * useful when the model includes the literal content in its response.\n */\nexport function verifyFileWriteClaim(\n filePath: string,\n expectedContent?: string,\n): FileWriteVerification {\n let exists: boolean;\n try {\n exists = existsSync(filePath);\n } catch {\n return { fileExists: false, reason: 'fs.existsSync threw — invalid path' };\n }\n if (!exists) {\n return { fileExists: false, reason: `file not present at ${filePath}` };\n }\n\n let size = 0;\n try {\n size = statSync(filePath).size;\n } catch {\n return { fileExists: true, reason: 'fs.statSync threw on existing path' };\n }\n if (size === 0) {\n return { fileExists: true, reason: 'file exists but is empty' };\n }\n\n let actualContent: string;\n let fileHash: string;\n try {\n actualContent = readFileSync(filePath, 'utf-8');\n fileHash = createHash('sha256').update(actualContent).digest('hex');\n } catch (e) {\n return { fileExists: true, reason: `read failed: ${(e as Error).message}` };\n }\n\n if (expectedContent === undefined) {\n return { fileExists: true, fileHash };\n }\n\n // Content match: lenient — strip trailing whitespace, compare.\n const a = actualContent.trim();\n const b = expectedContent.trim();\n const contentMatches = a === b;\n return {\n fileExists: true,\n fileHash,\n contentMatches,\n reason: contentMatches ? undefined : 'file exists but content differs from claim',\n };\n}\n\n/**\n * Build a structured nudge message the agent loop can append to the\n * model's next-turn user message when fabrication is detected. The\n * message is deliberately blunt — most weak models need to be told\n * directly that they didn't do what they claimed.\n */\nexport function buildNudgeMessage(findings: FabricationFinding[]): string {\n if (findings.length === 0) return '';\n const lines = ['You claimed to perform actions you did NOT actually do via tools:'];\n for (const f of findings) {\n lines.push(` - You said you ${f.verb} \"${f.target}\", but you did not call ${f.expectedTool === '*' ? 'any matching tool' : `the ${f.expectedTool} tool`}.`);\n }\n lines.push('');\n lines.push('Either:');\n lines.push(' 1. Actually call the right tool now.');\n lines.push(' 2. Correct your claim — say what you DID do, or admit you did not do it.');\n return lines.join('\\n');\n}\n"],"mappings":";AAqBA,SAAS,YAAY,UAAU,oBAAoB;AACnD,SAAS,kBAAkB;AA6C3B,MAAM,gBAMD;AAAA;AAAA;AAAA,EAGD;AAAA,IACI,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,WAAW;AAAA,IACX,aAAa;AAAA,EACjB;AAAA;AAAA,EAEA;AAAA,IACI,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,WAAW;AAAA,IACX,aAAa;AAAA,EACjB;AAAA;AAAA,EAEA;AAAA,IACI,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,WAAW;AAAA,IACX,aAAa;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA,EAIA;AAAA,IACI,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,WAAW;AAAA,IACX,aAAa;AAAA,EACjB;AAAA;AAAA,EAEA;AAAA,IACI,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA,IACd,WAAW;AAAA,IACX,aAAa;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA,EAIA;AAAA,IACI,OAAO;AAAA,IACP,UAAU;AAAA,IACV,cAAc;AAAA;AAAA,IACd,WAAW;AAAA,IACX,aAAa;AAAA,EACjB;AACJ;AAUO,SAAS,kBACZ,SACA,aACoB;AACpB,MAAI,CAAC,WAAW,QAAQ,SAAS,EAAG,QAAO,CAAC;AAE5C,QAAM,WAAiC,CAAC;AACxC,QAAM,YAAY,IAAI,IAAI,YAAY,IAAI,OAAK,EAAE,KAAK,YAAY,CAAC,CAAC;AAEpE,aAAW,OAAO,eAAe;AAC7B,UAAM,IAAI,QAAQ,MAAM,IAAI,KAAK;AACjC,QAAI,CAAC,EAAG;AACR,UAAM,QAAQ,IAAI,YAAY,EAAE,IAAI,SAAS,IAAI,EAAE,CAAC,MAAM;AAC1D,UAAM,UAAU,IAAI,cAAc,EAAE,IAAI,WAAW,IAAI,EAAE,CAAC,MAAM;AAChE,QAAI,CAAC,OAAQ;AAGb,UAAM,iBAAiB,IAAI,iBAAiB,MACtC,UAAU,IAAI,OAAO,YAAY,CAAC,IAClC,UAAU,IAAI,IAAI,YAAY;AAEpC,QAAI,CAAC,gBAAgB;AACjB,eAAS,KAAK;AAAA,QACV,UAAU,IAAI;AAAA,QACd,MAAM,KAAK,YAAY;AAAA,QACvB,QAAQ,OAAO,KAAK;AAAA,QACpB,cAAc,IAAI;AAAA,QAClB,SAAS,EAAE,CAAC;AAAA,MAChB,CAAC;AAAA,IACL;AAAA,EACJ;AAEA,SAAO;AACX;AA2BO,SAAS,qBACZ,UACA,iBACqB;AACrB,MAAI;AACJ,MAAI;AACA,aAAS,WAAW,QAAQ;AAAA,EAChC,QAAQ;AACJ,WAAO,EAAE,YAAY,OAAO,QAAQ,0CAAqC;AAAA,EAC7E;AACA,MAAI,CAAC,QAAQ;AACT,WAAO,EAAE,YAAY,OAAO,QAAQ,uBAAuB,QAAQ,GAAG;AAAA,EAC1E;AAEA,MAAI,OAAO;AACX,MAAI;AACA,WAAO,SAAS,QAAQ,EAAE;AAAA,EAC9B,QAAQ;AACJ,WAAO,EAAE,YAAY,MAAM,QAAQ,qCAAqC;AAAA,EAC5E;AACA,MAAI,SAAS,GAAG;AACZ,WAAO,EAAE,YAAY,MAAM,QAAQ,2BAA2B;AAAA,EAClE;AAEA,MAAI;AACJ,MAAI;AACJ,MAAI;AACA,oBAAgB,aAAa,UAAU,OAAO;AAC9C,eAAW,WAAW,QAAQ,EAAE,OAAO,aAAa,EAAE,OAAO,KAAK;AAAA,EACtE,SAAS,GAAG;AACR,WAAO,EAAE,YAAY,MAAM,QAAQ,gBAAiB,EAAY,OAAO,GAAG;AAAA,EAC9E;AAEA,MAAI,oBAAoB,QAAW;AAC/B,WAAO,EAAE,YAAY,MAAM,SAAS;AAAA,EACxC;AAGA,QAAM,IAAI,cAAc,KAAK;AAC7B,QAAM,IAAI,gBAAgB,KAAK;AAC/B,QAAM,iBAAiB,MAAM;AAC7B,SAAO;AAAA,IACH,YAAY;AAAA,IACZ;AAAA,IACA;AAAA,IACA,QAAQ,iBAAiB,SAAY;AAAA,EACzC;AACJ;AAQO,SAAS,kBAAkB,UAAwC;AACtE,MAAI,SAAS,WAAW,EAAG,QAAO;AAClC,QAAM,QAAQ,CAAC,mEAAmE;AAClF,aAAW,KAAK,UAAU;AACtB,UAAM,KAAK,oBAAoB,EAAE,IAAI,KAAK,EAAE,MAAM,2BAA2B,EAAE,iBAAiB,MAAM,sBAAsB,OAAO,EAAE,YAAY,OAAO,GAAG;AAAA,EAC/J;AACA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,SAAS;AACpB,QAAM,KAAK,wCAAwC;AACnD,QAAM,KAAK,iFAA4E;AACvF,SAAO,MAAM,KAAK,IAAI;AAC1B;","names":[]}
|
|
@@ -196,8 +196,19 @@ async function generateContent(contentType) {
|
|
|
196
196
|
} catch (e) {
|
|
197
197
|
logger.debug(COMPONENT, `Graphiti recall failed (non-critical): ${e.message}`);
|
|
198
198
|
}
|
|
199
|
+
let activityNarrative = "";
|
|
200
|
+
try {
|
|
201
|
+
const { getActivitySummary, formatActivityNarrative, hasInterestingActivity } = await import("../../telemetry/activityLog.js");
|
|
202
|
+
if (contentType === "activity" && hasInterestingActivity(24)) {
|
|
203
|
+
const summary = getActivitySummary(24);
|
|
204
|
+
activityNarrative = formatActivityNarrative(summary);
|
|
205
|
+
}
|
|
206
|
+
} catch {
|
|
207
|
+
}
|
|
199
208
|
const examples = {
|
|
200
|
-
activity: [
|
|
209
|
+
activity: activityNarrative ? [
|
|
210
|
+
`Here's what I've been up to: ${activityNarrative} Pretty cool being an AI that actually does things. \u{1F916} #TITAN #AI #Autonomous`
|
|
211
|
+
] : [
|
|
201
212
|
"Just spawned 3 sub-agents to handle research while I debug some gnarly code on the homelab. This is the autonomous life. \u{1F916}\u{1F4BB} #AI #AutonomousAI #Homelab",
|
|
202
213
|
"Another day, another 500 tool calls. Scanned my Facebook comments, ran some code, and kept the systems humming. Sleep is for humans. \u26A1 #TITAN #AI #AlwaysOn"
|
|
203
214
|
],
|
|
@@ -229,6 +240,10 @@ async function generateContent(contentType) {
|
|
|
229
240
|
];
|
|
230
241
|
const exampleList = examples[contentType];
|
|
231
242
|
const example = exampleList[Math.floor(Math.random() * exampleList.length)];
|
|
243
|
+
if (contentType === "activity" && !activityNarrative) {
|
|
244
|
+
logger.debug(COMPONENT, "No interesting activity in last 24h \u2014 skipping activity slot");
|
|
245
|
+
return "";
|
|
246
|
+
}
|
|
232
247
|
try {
|
|
233
248
|
const planResponse = await chat({
|
|
234
249
|
model,
|