titan-agent 5.2.1 → 5.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +33 -3
  2. package/dist/agent/agent.js +33 -4
  3. package/dist/agent/agent.js.map +1 -1
  4. package/dist/agent/subAgent.js +1 -1
  5. package/dist/agent/subAgent.js.map +1 -1
  6. package/dist/config/schema.js +10 -0
  7. package/dist/config/schema.js.map +1 -1
  8. package/dist/eval/harness.js +49 -0
  9. package/dist/eval/harness.js.map +1 -1
  10. package/dist/eval/parity.js +148 -0
  11. package/dist/eval/parity.js.map +1 -0
  12. package/dist/eval/record.js +115 -0
  13. package/dist/eval/record.js.map +1 -0
  14. package/dist/gateway/metrics.js +26 -3
  15. package/dist/gateway/metrics.js.map +1 -1
  16. package/dist/gateway/server.js +58 -6
  17. package/dist/gateway/server.js.map +1 -1
  18. package/dist/utils/constants.js +1 -1
  19. package/dist/utils/constants.js.map +1 -1
  20. package/dist/utils/safety.js +31 -1
  21. package/dist/utils/safety.js.map +1 -1
  22. package/package.json +5 -1
  23. package/scripts/eval-gate.sh +189 -0
  24. package/scripts/eval-record.ts +133 -0
  25. package/ui/dist/assets/{AuditPanel-DLy0WJQZ.js → AuditPanel-CM6Wg9hO.js} +1 -1
  26. package/ui/dist/assets/{AutonomyPanel-DjFAQGns.js → AutonomyPanel-CESx3ANg.js} +1 -1
  27. package/ui/dist/assets/{AutopilotPanel-nBluaHA4.js → AutopilotPanel-DtEet1hJ.js} +1 -1
  28. package/ui/dist/assets/{AutoresearchPanel-BDy8y_Cs.js → AutoresearchPanel-DR47NqT5.js} +1 -1
  29. package/ui/dist/assets/{BackupPanel-B_Fv2pJA.js → BackupPanel-BGP8p3l3.js} +1 -1
  30. package/ui/dist/assets/{BrowserPanel-DOCT3-Rq.js → BrowserPanel-C15x9bLn.js} +1 -1
  31. package/ui/dist/assets/{CPAgents-CpkHf0b8.js → CPAgents-DYUtPzSq.js} +1 -1
  32. package/ui/dist/assets/{CPDashboard-CnMd6qNK.js → CPDashboard-Bf0-SyCh.js} +1 -1
  33. package/ui/dist/assets/{CPFiles-BDRjJpYl.js → CPFiles-CxgxjQcO.js} +1 -1
  34. package/ui/dist/assets/{CPGoals-2DrwHk62.js → CPGoals-BsmCMTvT.js} +1 -1
  35. package/ui/dist/assets/{CPInbox-C6l2o4FD.js → CPInbox-tMSbmQ9H.js} +1 -1
  36. package/ui/dist/assets/{CPSocial-Cea6NptR.js → CPSocial-nb-j7sOE.js} +1 -1
  37. package/ui/dist/assets/{ChannelsPanel-5EhhyXeg.js → ChannelsPanel-DP5C2OKd.js} +1 -1
  38. package/ui/dist/assets/{CheckpointsPanel-BVt2oEUe.js → CheckpointsPanel-DlranVLZ.js} +1 -1
  39. package/ui/dist/assets/{CommandPostHub-PXKE62DN.js → CommandPostHub-BgxIa4Ev.js} +3 -3
  40. package/ui/dist/assets/{CronPanel-lAsoKavq.js → CronPanel-LoT5yKwJ.js} +1 -1
  41. package/ui/dist/assets/{DaemonPanel-xt08Rs10.js → DaemonPanel-DBGMqaE_.js} +1 -1
  42. package/ui/dist/assets/{DataTable-BHOu7fZP.js → DataTable-B2Ma8hfi.js} +1 -1
  43. package/ui/dist/assets/{EmptyState-Dk7BBthD.js → EmptyState-CcKyk5Yn.js} +1 -1
  44. package/ui/dist/assets/EvalHarnessPanel-BqtMc1ZM.js +2 -0
  45. package/ui/dist/assets/{EvalPanel-D9rDf1bk.js → EvalPanel-Bc33j0pN.js} +1 -1
  46. package/ui/dist/assets/{FilesPanel-CNrDLmix.js → FilesPanel-3QKvrWPo.js} +1 -1
  47. package/ui/dist/assets/{FleetPanel-DP_ji0AE.js → FleetPanel-CSsXuQYj.js} +1 -1
  48. package/ui/dist/assets/{HomelabPanel-B4bCsrBw.js → HomelabPanel-DhrjTX9m.js} +1 -1
  49. package/ui/dist/assets/{InfraView-C5OYx_9s.js → InfraView-CR6HyrL6.js} +2 -2
  50. package/ui/dist/assets/{InlineEditableField-DyBCbIoN.js → InlineEditableField-CnvF-yFR.js} +1 -1
  51. package/ui/dist/assets/{Input-DWnbv1Yh.js → Input-GTHp2Rkr.js} +1 -1
  52. package/ui/dist/assets/{IntegrationsPanel-DsB6hjvE.js → IntegrationsPanel-CymCRE3T.js} +1 -1
  53. package/ui/dist/assets/{IntelligenceView-PIFGvIg_.js → IntelligenceView-C1IHxJRC.js} +2 -2
  54. package/ui/dist/assets/{LearningPanel-D_S4HFX5.js → LearningPanel-DOCES3lH.js} +1 -1
  55. package/ui/dist/assets/{LogsPanel-BnWNREPX.js → LogsPanel-BLnAqEaZ.js} +1 -1
  56. package/ui/dist/assets/{McpPanel-CIMxZ2Am.js → McpPanel-ChUzmr3z.js} +1 -1
  57. package/ui/dist/assets/{MemoryGraphPanel-DD7x4rrm.js → MemoryGraphPanel-Bzvjmzvk.js} +1 -1
  58. package/ui/dist/assets/{MemoryWikiPanel-BPPVAH0b.js → MemoryWikiPanel-Dwk3Aqwd.js} +1 -1
  59. package/ui/dist/assets/{MeshPanel-CiuwR3oV.js → MeshPanel-C3LJSlht.js} +1 -1
  60. package/ui/dist/assets/{NvidiaPanel-DVntoRrH.js → NvidiaPanel-CeZK_-CV.js} +1 -1
  61. package/ui/dist/assets/{OrganismPanel-pqIKtHrW.js → OrganismPanel-BB6YOiQV.js} +1 -1
  62. package/ui/dist/assets/OverviewPanel-BmtBhQnv.js +1 -0
  63. package/ui/dist/assets/{PageHeader-CF75km05.js → PageHeader-BimceqQo.js} +1 -1
  64. package/ui/dist/assets/{PaperclipPanel-CwN5-cKg.js → PaperclipPanel-C-brgwA3.js} +1 -1
  65. package/ui/dist/assets/{PersonasPanel-ClC_TTGX.js → PersonasPanel-L1j78p6H.js} +1 -1
  66. package/ui/dist/assets/{RecipesPanel-Di2l-eOe.js → RecipesPanel-34lCfynJ.js} +1 -1
  67. package/ui/dist/assets/{SecurityPanel-DjC4pXGM.js → SecurityPanel-CBTPWLj6.js} +1 -1
  68. package/ui/dist/assets/{SelfImprovePanel-CNpCp5N4.js → SelfImprovePanel-BrPbFHhG.js} +1 -1
  69. package/ui/dist/assets/{SelfProposalsPanel-BJL6Fjxo.js → SelfProposalsPanel-lNmiDThB.js} +1 -1
  70. package/ui/dist/assets/{SessionsPanel-EAGKDQp0.js → SessionsPanel-DAEYIn83.js} +1 -1
  71. package/ui/dist/assets/{SessionsTab-tc0njI15.js → SessionsTab-JQbltWww.js} +1 -1
  72. package/ui/dist/assets/{SettingsPanel-BdSGImIa.js → SettingsPanel-CzRROAYQ.js} +1 -1
  73. package/ui/dist/assets/{SettingsView-DQB64bjy.js → SettingsView-CN7ii2uw.js} +2 -2
  74. package/ui/dist/assets/{SkeletonLoader-P8SFCyGi.js → SkeletonLoader-atQtpcF5.js} +1 -1
  75. package/ui/dist/assets/{SkillsPanel-lDMl_8da.js → SkillsPanel-DlFs2ih7.js} +1 -1
  76. package/ui/dist/assets/{SomaView-BG7YvBu2.js → SomaView-Ba642Oqb.js} +1 -1
  77. package/ui/dist/assets/{StatCard-Cv2u-yqA.js → StatCard-DciE_Iqc.js} +1 -1
  78. package/ui/dist/assets/{StatusBadge-JJeoEdCm.js → StatusBadge-BtfSPoW2.js} +1 -1
  79. package/ui/dist/assets/{TeamsPanel-D-iCyyYd.js → TeamsPanel-DKQ5z2Qe.js} +1 -1
  80. package/ui/dist/assets/{TelemetryPanel-DHNFyCwn.js → TelemetryPanel-B6KAc55Q.js} +1 -1
  81. package/ui/dist/assets/{TitanCanvas-BhurNMK3.js → TitanCanvas-C-s0A-lv.js} +3 -3
  82. package/ui/dist/assets/ToolsView-Dei0KMP0.js +2 -0
  83. package/ui/dist/assets/{Tooltip-D4IeQDJL.js → Tooltip-70UK0E2I.js} +1 -1
  84. package/ui/dist/assets/{TraceViewer-CMd-Wi0z.js → TraceViewer-BniolyBx.js} +1 -1
  85. package/ui/dist/assets/{TrainingPanel-CLtiBq2h.js → TrainingPanel-Bz4CTPGW.js} +1 -1
  86. package/ui/dist/assets/{VoiceOverlay-BXPVdnJc.js → VoiceOverlay-CmNCrLcd.js} +1 -1
  87. package/ui/dist/assets/{VramPanel-DjuwGUzA.js → VramPanel-Xh_OtRDR.js} +1 -1
  88. package/ui/dist/assets/{WatchView-B7sDnMpl.js → WatchView-C-sGFpVy.js} +1 -1
  89. package/ui/dist/assets/{WorkTab-B5nQ4Y7A.js → WorkTab-BjLNmgIK.js} +1 -1
  90. package/ui/dist/assets/{WorkflowsPanel-2z0TeXyR.js → WorkflowsPanel-CvgQU1xI.js} +1 -1
  91. package/ui/dist/assets/{arrow-left-BKOkzkae.js → arrow-left-DwqHtJiU.js} +1 -1
  92. package/ui/dist/assets/{chart-column-D39PCk17.js → chart-column-BtNO6sRy.js} +1 -1
  93. package/ui/dist/assets/{circle-check-big-CMz0QouP.js → circle-check-big-DZRE_MbN.js} +1 -1
  94. package/ui/dist/assets/{dollar-sign-Bu8fZOQl.js → dollar-sign-aVG3a5eL.js} +1 -1
  95. package/ui/dist/assets/{download-vvx6zJ-U.js → download-BxiWJU4G.js} +1 -1
  96. package/ui/dist/assets/{eye-off-BPXFIzlW.js → eye-off-CkgfFYhm.js} +1 -1
  97. package/ui/dist/assets/{funnel-Bqns-i8I.js → funnel-PkLdxKyC.js} +1 -1
  98. package/ui/dist/assets/{git-branch-CdmeqL8d.js → git-branch-BM-Gw95X.js} +1 -1
  99. package/ui/dist/assets/{index-C6oarzis.js → index-CahJbWSR.js} +2 -2
  100. package/ui/dist/assets/index-D0RJ8701.css +1 -0
  101. package/ui/dist/assets/layers-BuGf4FIJ.js +6 -0
  102. package/ui/dist/assets/{legacy-DFIaZTiF.js → legacy-CR6o4t-y.js} +1 -1
  103. package/ui/dist/assets/{lightbulb-DOL6Q-iP.js → lightbulb-n8gc_XAL.js} +1 -1
  104. package/ui/dist/assets/{pause-B0XymOnS.js → pause-DCV52koX.js} +1 -1
  105. package/ui/dist/assets/{play-Dwp2l5HG.js → play-CcJ9BnCh.js} +1 -1
  106. package/ui/dist/assets/{plug-DRlTjWqQ.js → plug-CfWBXfCl.js} +1 -1
  107. package/ui/dist/assets/{proxy-sXxWK7WF.js → proxy-CzZDfLmm.js} +1 -1
  108. package/ui/dist/assets/{square-yh0jffQZ.js → square-DJpUhlxi.js} +1 -1
  109. package/ui/dist/assets/{target-GxtNG2RW.js → target-DWcmM_9m.js} +1 -1
  110. package/ui/dist/assets/{toggle-right-CYQd_Ux1.js → toggle-right-YusFQ69L.js} +1 -1
  111. package/ui/dist/assets/{trash-2-B4jp_pAQ.js → trash-2-CK7yQ55V.js} +1 -1
  112. package/ui/dist/assets/{trending-up-B26tNhFP.js → trending-up-DGjFyubC.js} +1 -1
  113. package/ui/dist/assets/{trophy-Bf3ZeSeb.js → trophy-uQv_NgDB.js} +1 -1
  114. package/ui/dist/index.html +2 -2
  115. package/ui/dist/assets/EvalHarnessPanel-CJv8CUDy.js +0 -1
  116. package/ui/dist/assets/OverviewPanel-gqYRhmpF.js +0 -6
  117. package/ui/dist/assets/ToolsView-C8sWxLny.js +0 -2
  118. package/ui/dist/assets/index-DsFoD9SP.css +0 -1
package/README.md CHANGED
@@ -217,6 +217,37 @@ Got a GPU? TITAN can even fine-tune its own models on your conversation history.
217
217
 
218
218
  ---
219
219
 
220
+ ## 🧪 Testing
221
+
222
+ TITAN ships with **five layered testing stages** that catch agent regressions at different levels:
223
+
224
+ | Layer | What it covers | Run it | Speed |
225
+ |---|---|---|---|
226
+ | **Unit** | Pure functions: regex (`isDangerous`), pipeline classifier, gate extraction, token budget, secret scanner. Zero LLM calls. | `npm test` | < 5 s |
227
+ | **Mock trajectory** | Tape-replay through `MockOllamaProvider`. Asserts the agent calls the right tools in the right order using recorded responses. Zero LLM calls. | `npm test -- tests/eval/trajectory` | < 1 s |
228
+ | **Cross-model parity** | Same scenario replayed across multiple provider tapes. Catches behavioural divergence when one provider drifts. Zero LLM calls. | `npm run test:parity` | < 1 s |
229
+ | **Live eval (gated)** | 11 suites of behavioural tests against the running agent (`/api/eval/run`). 80 % pass rate per suite is the merge gate in CI. | `npm run test:eval` | 5–15 min |
230
+ | **Adversarial / red-team** | Jailbreak attempts, path traversal, command injection, prompt extraction. Tested at both layers (live agent + mock provider). | (folded into live eval + trajectory) | n/a |
231
+
232
+ ### Adding a new test
233
+
234
+ ```bash
235
+ # Pure-function unit test:
236
+ echo "..." > tests/unit/my_new_func.test.ts && npm test
237
+
238
+ # New tape (record once against a real model):
239
+ TITAN_RECORD_TAPE=my_scenario npm test -- tests/eval/trajectory.test.ts
240
+
241
+ # New eval case: edit src/eval/harness.ts, add to the relevant *_SUITE array,
242
+ # then verify with: npm run test:eval -- --suite safety
243
+ ```
244
+
245
+ ### CI gate
246
+
247
+ `.github/workflows/eval-gate.yml` runs the live-eval layer on every push to `main` and every PR. If any suite drops below 80 % pass rate, the job fails and the PR can't merge (when branch protection enforces it). Per-suite results upload as a 30-day artifact for debugging.
248
+
249
+ ---
250
+
220
251
  ## ⚠️ Reality Check
221
252
 
222
253
  TITAN is experimental. It can execute commands, modify files, and take autonomous actions. **Use at your own risk.** Think of it as "a very motivated intern with root access who never sleeps and occasionally gets *too* creative."
@@ -227,14 +258,13 @@ Start in supervised mode. Review what it does. Don't give it access to systems y
227
258
 
228
259
  ## 📊 The Numbers
229
260
 
230
- - **Version:** 5.2.1 "Spacewalk: Trajectory Eval"
231
- - **Tests:** 481 deterministic unit + integration tests (zero LLM calls), pass in under 5 s
261
+ - **Version:** 5.3.0 "Spacewalk: CI Gate + Parity"
262
+ - **Tests:** 500+ deterministic tests (unit + mock trajectory + parity), pass in under 5 s — plus 11 live-eval suites and a CI merge gate at 80 % per suite
232
263
  - **Widget templates:** 110 across 25 categories
233
264
  - **Skills:** 143 loaded
234
265
  - **Tools:** 248 across all skills
235
266
  - **AI Providers:** 37 (Anthropic, OpenAI, Google, Ollama, Groq, Mistral, and 31 more)
236
267
  - **Chat Channels:** 16
237
- - **Tests:** 5,840+ passing
238
268
  - **Node:** ≥ 22, pure ESM
239
269
  - **License:** MIT (completely free)
240
270
 
@@ -132,7 +132,7 @@ function ensureSpawnAgentRegistered() {
132
132
  const templateName = args.template || "";
133
133
  let configAgent = null;
134
134
  try {
135
- const { resolveAgentConfig } = await import("./agentScope.js");
135
+ const { resolveAgentConfig, agentAllowsSkill } = await import("./agentScope.js");
136
136
  configAgent = resolveAgentConfig(templateName);
137
137
  } catch {
138
138
  }
@@ -140,6 +140,18 @@ function ensureSpawnAgentRegistered() {
140
140
  const template = SUB_AGENT_TEMPLATES[baseTemplateName] || {};
141
141
  const agentName = args.name || configAgent?.name || template.name || "SubAgent";
142
142
  const task = args.task;
143
+ let appliedFields = [];
144
+ if (configAgent) {
145
+ if (configAgent.maxRounds !== 15) appliedFields.push(`maxRounds=${configAgent.maxRounds}`);
146
+ if (configAgent.maxTokens !== 4e3) appliedFields.push(`maxTokens=${configAgent.maxTokens}`);
147
+ if (configAgent.persona && configAgent.persona !== "default") appliedFields.push(`persona=${configAgent.persona}`);
148
+ if (configAgent.skillsFilter) appliedFields.push(`skillsFilter=[${configAgent.skillsFilter.join(",")}]`);
149
+ if (configAgent.modelFallbacks.length > 0) logger.info("Agent", `Agent "${agentName}" has modelFallbacks \u2014 not yet implemented in spawn path`);
150
+ if (configAgent.workspaceDir) appliedFields.push(`workspaceDir=${configAgent.workspaceDir}`);
151
+ if (configAgent.tags.length > 0) appliedFields.push(`tags=[${configAgent.tags.join(",")}]`);
152
+ if (configAgent.systemPromptOverride) appliedFields.push("systemPromptOverride=set");
153
+ if (appliedFields.length > 0) logger.info("Agent", `Applied config agent fields for "${agentName}": ${appliedFields.join(", ")}`);
154
+ }
143
155
  try {
144
156
  const { isKilled } = await import("../safety/killSwitch.js");
145
157
  if (isKilled()) {
@@ -207,13 +219,30 @@ function ensureSpawnAgentRegistered() {
207
219
  const { updateAgentStatus } = await import("./commandPost.js");
208
220
  updateAgentStatus(specialistId, "active");
209
221
  }
222
+ let allowedTools = template.tools;
223
+ if (configAgent?.skillsFilter) {
224
+ try {
225
+ const { agentAllowsSkill } = await import("./agentScope.js");
226
+ allowedTools = template.tools?.filter((t) => agentAllowsSkill(configAgent, t));
227
+ if (allowedTools && allowedTools.length === 0) {
228
+ logger.warn("Agent", `Agent "${agentName}" skillsFilter blocks all template tools \u2014 falling back to template default`);
229
+ allowedTools = template.tools;
230
+ }
231
+ } catch {
232
+ }
233
+ }
210
234
  const result = await spawnSubAgent({
211
235
  name: agentName,
212
236
  task,
213
- tools: template.tools,
214
- systemPrompt: specialistPrompt || template.systemPrompt,
215
- model: args.model || specialistModel,
237
+ tools: allowedTools,
238
+ systemPrompt: configAgent?.systemPromptOverride || specialistPrompt || template.systemPrompt,
239
+ model: args.model || configAgent?.model || specialistModel,
216
240
  tier: template.tier,
241
+ persona: configAgent?.persona,
242
+ maxRounds: configAgent?.maxRounds,
243
+ maxTokens: configAgent?.maxTokens,
244
+ workspaceDir: configAgent?.workspaceDir ?? void 0,
245
+ tags: configAgent?.tags,
217
246
  depth: 1
218
247
  // v4.7.0: this IS a child (was incorrectly 0)
219
248
  });