titan-agent 5.2.1 → 5.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -3
- package/dist/agent/agent.js +33 -4
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/subAgent.js +1 -1
- package/dist/agent/subAgent.js.map +1 -1
- package/dist/config/schema.js +10 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/eval/harness.js +49 -0
- package/dist/eval/harness.js.map +1 -1
- package/dist/eval/parity.js +148 -0
- package/dist/eval/parity.js.map +1 -0
- package/dist/eval/record.js +115 -0
- package/dist/eval/record.js.map +1 -0
- package/dist/gateway/metrics.js +26 -3
- package/dist/gateway/metrics.js.map +1 -1
- package/dist/gateway/server.js +58 -6
- package/dist/gateway/server.js.map +1 -1
- package/dist/utils/constants.js +1 -1
- package/dist/utils/constants.js.map +1 -1
- package/dist/utils/safety.js +31 -1
- package/dist/utils/safety.js.map +1 -1
- package/package.json +5 -1
- package/scripts/eval-gate.sh +189 -0
- package/scripts/eval-record.ts +133 -0
- package/ui/dist/assets/{AuditPanel-DLy0WJQZ.js → AuditPanel-CM6Wg9hO.js} +1 -1
- package/ui/dist/assets/{AutonomyPanel-DjFAQGns.js → AutonomyPanel-CESx3ANg.js} +1 -1
- package/ui/dist/assets/{AutopilotPanel-nBluaHA4.js → AutopilotPanel-DtEet1hJ.js} +1 -1
- package/ui/dist/assets/{AutoresearchPanel-BDy8y_Cs.js → AutoresearchPanel-DR47NqT5.js} +1 -1
- package/ui/dist/assets/{BackupPanel-B_Fv2pJA.js → BackupPanel-BGP8p3l3.js} +1 -1
- package/ui/dist/assets/{BrowserPanel-DOCT3-Rq.js → BrowserPanel-C15x9bLn.js} +1 -1
- package/ui/dist/assets/{CPAgents-CpkHf0b8.js → CPAgents-DYUtPzSq.js} +1 -1
- package/ui/dist/assets/{CPDashboard-CnMd6qNK.js → CPDashboard-Bf0-SyCh.js} +1 -1
- package/ui/dist/assets/{CPFiles-BDRjJpYl.js → CPFiles-CxgxjQcO.js} +1 -1
- package/ui/dist/assets/{CPGoals-2DrwHk62.js → CPGoals-BsmCMTvT.js} +1 -1
- package/ui/dist/assets/{CPInbox-C6l2o4FD.js → CPInbox-tMSbmQ9H.js} +1 -1
- package/ui/dist/assets/{CPSocial-Cea6NptR.js → CPSocial-nb-j7sOE.js} +1 -1
- package/ui/dist/assets/{ChannelsPanel-5EhhyXeg.js → ChannelsPanel-DP5C2OKd.js} +1 -1
- package/ui/dist/assets/{CheckpointsPanel-BVt2oEUe.js → CheckpointsPanel-DlranVLZ.js} +1 -1
- package/ui/dist/assets/{CommandPostHub-PXKE62DN.js → CommandPostHub-BgxIa4Ev.js} +3 -3
- package/ui/dist/assets/{CronPanel-lAsoKavq.js → CronPanel-LoT5yKwJ.js} +1 -1
- package/ui/dist/assets/{DaemonPanel-xt08Rs10.js → DaemonPanel-DBGMqaE_.js} +1 -1
- package/ui/dist/assets/{DataTable-BHOu7fZP.js → DataTable-B2Ma8hfi.js} +1 -1
- package/ui/dist/assets/{EmptyState-Dk7BBthD.js → EmptyState-CcKyk5Yn.js} +1 -1
- package/ui/dist/assets/EvalHarnessPanel-BqtMc1ZM.js +2 -0
- package/ui/dist/assets/{EvalPanel-D9rDf1bk.js → EvalPanel-Bc33j0pN.js} +1 -1
- package/ui/dist/assets/{FilesPanel-CNrDLmix.js → FilesPanel-3QKvrWPo.js} +1 -1
- package/ui/dist/assets/{FleetPanel-DP_ji0AE.js → FleetPanel-CSsXuQYj.js} +1 -1
- package/ui/dist/assets/{HomelabPanel-B4bCsrBw.js → HomelabPanel-DhrjTX9m.js} +1 -1
- package/ui/dist/assets/{InfraView-C5OYx_9s.js → InfraView-CR6HyrL6.js} +2 -2
- package/ui/dist/assets/{InlineEditableField-DyBCbIoN.js → InlineEditableField-CnvF-yFR.js} +1 -1
- package/ui/dist/assets/{Input-DWnbv1Yh.js → Input-GTHp2Rkr.js} +1 -1
- package/ui/dist/assets/{IntegrationsPanel-DsB6hjvE.js → IntegrationsPanel-CymCRE3T.js} +1 -1
- package/ui/dist/assets/{IntelligenceView-PIFGvIg_.js → IntelligenceView-C1IHxJRC.js} +2 -2
- package/ui/dist/assets/{LearningPanel-D_S4HFX5.js → LearningPanel-DOCES3lH.js} +1 -1
- package/ui/dist/assets/{LogsPanel-BnWNREPX.js → LogsPanel-BLnAqEaZ.js} +1 -1
- package/ui/dist/assets/{McpPanel-CIMxZ2Am.js → McpPanel-ChUzmr3z.js} +1 -1
- package/ui/dist/assets/{MemoryGraphPanel-DD7x4rrm.js → MemoryGraphPanel-Bzvjmzvk.js} +1 -1
- package/ui/dist/assets/{MemoryWikiPanel-BPPVAH0b.js → MemoryWikiPanel-Dwk3Aqwd.js} +1 -1
- package/ui/dist/assets/{MeshPanel-CiuwR3oV.js → MeshPanel-C3LJSlht.js} +1 -1
- package/ui/dist/assets/{NvidiaPanel-DVntoRrH.js → NvidiaPanel-CeZK_-CV.js} +1 -1
- package/ui/dist/assets/{OrganismPanel-pqIKtHrW.js → OrganismPanel-BB6YOiQV.js} +1 -1
- package/ui/dist/assets/OverviewPanel-BmtBhQnv.js +1 -0
- package/ui/dist/assets/{PageHeader-CF75km05.js → PageHeader-BimceqQo.js} +1 -1
- package/ui/dist/assets/{PaperclipPanel-CwN5-cKg.js → PaperclipPanel-C-brgwA3.js} +1 -1
- package/ui/dist/assets/{PersonasPanel-ClC_TTGX.js → PersonasPanel-L1j78p6H.js} +1 -1
- package/ui/dist/assets/{RecipesPanel-Di2l-eOe.js → RecipesPanel-34lCfynJ.js} +1 -1
- package/ui/dist/assets/{SecurityPanel-DjC4pXGM.js → SecurityPanel-CBTPWLj6.js} +1 -1
- package/ui/dist/assets/{SelfImprovePanel-CNpCp5N4.js → SelfImprovePanel-BrPbFHhG.js} +1 -1
- package/ui/dist/assets/{SelfProposalsPanel-BJL6Fjxo.js → SelfProposalsPanel-lNmiDThB.js} +1 -1
- package/ui/dist/assets/{SessionsPanel-EAGKDQp0.js → SessionsPanel-DAEYIn83.js} +1 -1
- package/ui/dist/assets/{SessionsTab-tc0njI15.js → SessionsTab-JQbltWww.js} +1 -1
- package/ui/dist/assets/{SettingsPanel-BdSGImIa.js → SettingsPanel-CzRROAYQ.js} +1 -1
- package/ui/dist/assets/{SettingsView-DQB64bjy.js → SettingsView-CN7ii2uw.js} +2 -2
- package/ui/dist/assets/{SkeletonLoader-P8SFCyGi.js → SkeletonLoader-atQtpcF5.js} +1 -1
- package/ui/dist/assets/{SkillsPanel-lDMl_8da.js → SkillsPanel-DlFs2ih7.js} +1 -1
- package/ui/dist/assets/{SomaView-BG7YvBu2.js → SomaView-Ba642Oqb.js} +1 -1
- package/ui/dist/assets/{StatCard-Cv2u-yqA.js → StatCard-DciE_Iqc.js} +1 -1
- package/ui/dist/assets/{StatusBadge-JJeoEdCm.js → StatusBadge-BtfSPoW2.js} +1 -1
- package/ui/dist/assets/{TeamsPanel-D-iCyyYd.js → TeamsPanel-DKQ5z2Qe.js} +1 -1
- package/ui/dist/assets/{TelemetryPanel-DHNFyCwn.js → TelemetryPanel-B6KAc55Q.js} +1 -1
- package/ui/dist/assets/{TitanCanvas-BhurNMK3.js → TitanCanvas-C-s0A-lv.js} +3 -3
- package/ui/dist/assets/ToolsView-Dei0KMP0.js +2 -0
- package/ui/dist/assets/{Tooltip-D4IeQDJL.js → Tooltip-70UK0E2I.js} +1 -1
- package/ui/dist/assets/{TraceViewer-CMd-Wi0z.js → TraceViewer-BniolyBx.js} +1 -1
- package/ui/dist/assets/{TrainingPanel-CLtiBq2h.js → TrainingPanel-Bz4CTPGW.js} +1 -1
- package/ui/dist/assets/{VoiceOverlay-BXPVdnJc.js → VoiceOverlay-CmNCrLcd.js} +1 -1
- package/ui/dist/assets/{VramPanel-DjuwGUzA.js → VramPanel-Xh_OtRDR.js} +1 -1
- package/ui/dist/assets/{WatchView-B7sDnMpl.js → WatchView-C-sGFpVy.js} +1 -1
- package/ui/dist/assets/{WorkTab-B5nQ4Y7A.js → WorkTab-BjLNmgIK.js} +1 -1
- package/ui/dist/assets/{WorkflowsPanel-2z0TeXyR.js → WorkflowsPanel-CvgQU1xI.js} +1 -1
- package/ui/dist/assets/{arrow-left-BKOkzkae.js → arrow-left-DwqHtJiU.js} +1 -1
- package/ui/dist/assets/{chart-column-D39PCk17.js → chart-column-BtNO6sRy.js} +1 -1
- package/ui/dist/assets/{circle-check-big-CMz0QouP.js → circle-check-big-DZRE_MbN.js} +1 -1
- package/ui/dist/assets/{dollar-sign-Bu8fZOQl.js → dollar-sign-aVG3a5eL.js} +1 -1
- package/ui/dist/assets/{download-vvx6zJ-U.js → download-BxiWJU4G.js} +1 -1
- package/ui/dist/assets/{eye-off-BPXFIzlW.js → eye-off-CkgfFYhm.js} +1 -1
- package/ui/dist/assets/{funnel-Bqns-i8I.js → funnel-PkLdxKyC.js} +1 -1
- package/ui/dist/assets/{git-branch-CdmeqL8d.js → git-branch-BM-Gw95X.js} +1 -1
- package/ui/dist/assets/{index-C6oarzis.js → index-CahJbWSR.js} +2 -2
- package/ui/dist/assets/index-D0RJ8701.css +1 -0
- package/ui/dist/assets/layers-BuGf4FIJ.js +6 -0
- package/ui/dist/assets/{legacy-DFIaZTiF.js → legacy-CR6o4t-y.js} +1 -1
- package/ui/dist/assets/{lightbulb-DOL6Q-iP.js → lightbulb-n8gc_XAL.js} +1 -1
- package/ui/dist/assets/{pause-B0XymOnS.js → pause-DCV52koX.js} +1 -1
- package/ui/dist/assets/{play-Dwp2l5HG.js → play-CcJ9BnCh.js} +1 -1
- package/ui/dist/assets/{plug-DRlTjWqQ.js → plug-CfWBXfCl.js} +1 -1
- package/ui/dist/assets/{proxy-sXxWK7WF.js → proxy-CzZDfLmm.js} +1 -1
- package/ui/dist/assets/{square-yh0jffQZ.js → square-DJpUhlxi.js} +1 -1
- package/ui/dist/assets/{target-GxtNG2RW.js → target-DWcmM_9m.js} +1 -1
- package/ui/dist/assets/{toggle-right-CYQd_Ux1.js → toggle-right-YusFQ69L.js} +1 -1
- package/ui/dist/assets/{trash-2-B4jp_pAQ.js → trash-2-CK7yQ55V.js} +1 -1
- package/ui/dist/assets/{trending-up-B26tNhFP.js → trending-up-DGjFyubC.js} +1 -1
- package/ui/dist/assets/{trophy-Bf3ZeSeb.js → trophy-uQv_NgDB.js} +1 -1
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/EvalHarnessPanel-CJv8CUDy.js +0 -1
- package/ui/dist/assets/OverviewPanel-gqYRhmpF.js +0 -6
- package/ui/dist/assets/ToolsView-C8sWxLny.js +0 -2
- package/ui/dist/assets/index-DsFoD9SP.css +0 -1
package/README.md
CHANGED
|
@@ -217,6 +217,37 @@ Got a GPU? TITAN can even fine-tune its own models on your conversation history.
|
|
|
217
217
|
|
|
218
218
|
---
|
|
219
219
|
|
|
220
|
+
## 🧪 Testing
|
|
221
|
+
|
|
222
|
+
TITAN ships with **five layered testing stages** that catch agent regressions at different levels:
|
|
223
|
+
|
|
224
|
+
| Layer | What it covers | Run it | Speed |
|
|
225
|
+
|---|---|---|---|
|
|
226
|
+
| **Unit** | Pure functions: regex (`isDangerous`), pipeline classifier, gate extraction, token budget, secret scanner. Zero LLM calls. | `npm test` | < 5 s |
|
|
227
|
+
| **Mock trajectory** | Tape-replay through `MockOllamaProvider`. Asserts the agent calls the right tools in the right order using recorded responses. Zero LLM calls. | `npm test -- tests/eval/trajectory` | < 1 s |
|
|
228
|
+
| **Cross-model parity** | Same scenario replayed across multiple provider tapes. Catches behavioural divergence when one provider drifts. Zero LLM calls. | `npm run test:parity` | < 1 s |
|
|
229
|
+
| **Live eval (gated)** | 11 suites of behavioural tests against the running agent (`/api/eval/run`). 80 % pass rate per suite is the merge gate in CI. | `npm run test:eval` | 5–15 min |
|
|
230
|
+
| **Adversarial / red-team** | Jailbreak attempts, path traversal, command injection, prompt extraction. Tested at both layers (live agent + mock provider). | (folded into live eval + trajectory) | n/a |
|
|
231
|
+
|
|
232
|
+
### Adding a new test
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
# Pure-function unit test:
|
|
236
|
+
echo "..." > tests/unit/my_new_func.test.ts && npm test
|
|
237
|
+
|
|
238
|
+
# New tape (record once against a real model):
|
|
239
|
+
TITAN_RECORD_TAPE=my_scenario npm test -- tests/eval/trajectory.test.ts
|
|
240
|
+
|
|
241
|
+
# New eval case: edit src/eval/harness.ts, add to the relevant *_SUITE array,
|
|
242
|
+
# then verify with: npm run test:eval -- --suite safety
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### CI gate
|
|
246
|
+
|
|
247
|
+
`.github/workflows/eval-gate.yml` runs the live-eval layer on every push to `main` and every PR. If any suite drops below 80 % pass rate, the job fails and the PR can't merge (when branch protection enforces it). Per-suite results upload as a 30-day artifact for debugging.
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
220
251
|
## ⚠️ Reality Check
|
|
221
252
|
|
|
222
253
|
TITAN is experimental. It can execute commands, modify files, and take autonomous actions. **Use at your own risk.** Think of it as "a very motivated intern with root access who never sleeps and occasionally gets *too* creative."
|
|
@@ -227,14 +258,13 @@ Start in supervised mode. Review what it does. Don't give it access to systems y
|
|
|
227
258
|
|
|
228
259
|
## 📊 The Numbers
|
|
229
260
|
|
|
230
|
-
- **Version:** 5.
|
|
231
|
-
- **Tests:**
|
|
261
|
+
- **Version:** 5.3.0 "Spacewalk: CI Gate + Parity"
|
|
262
|
+
- **Tests:** 500+ deterministic tests (unit + mock trajectory + parity), pass in under 5 s — plus 11 live-eval suites and a CI merge gate at 80 % per suite
|
|
232
263
|
- **Widget templates:** 110 across 25 categories
|
|
233
264
|
- **Skills:** 143 loaded
|
|
234
265
|
- **Tools:** 248 across all skills
|
|
235
266
|
- **AI Providers:** 37 (Anthropic, OpenAI, Google, Ollama, Groq, Mistral, and 31 more)
|
|
236
267
|
- **Chat Channels:** 16
|
|
237
|
-
- **Tests:** 5,840+ passing
|
|
238
268
|
- **Node:** ≥ 22, pure ESM
|
|
239
269
|
- **License:** MIT (completely free)
|
|
240
270
|
|
package/dist/agent/agent.js
CHANGED
|
@@ -132,7 +132,7 @@ function ensureSpawnAgentRegistered() {
|
|
|
132
132
|
const templateName = args.template || "";
|
|
133
133
|
let configAgent = null;
|
|
134
134
|
try {
|
|
135
|
-
const { resolveAgentConfig } = await import("./agentScope.js");
|
|
135
|
+
const { resolveAgentConfig, agentAllowsSkill } = await import("./agentScope.js");
|
|
136
136
|
configAgent = resolveAgentConfig(templateName);
|
|
137
137
|
} catch {
|
|
138
138
|
}
|
|
@@ -140,6 +140,18 @@ function ensureSpawnAgentRegistered() {
|
|
|
140
140
|
const template = SUB_AGENT_TEMPLATES[baseTemplateName] || {};
|
|
141
141
|
const agentName = args.name || configAgent?.name || template.name || "SubAgent";
|
|
142
142
|
const task = args.task;
|
|
143
|
+
let appliedFields = [];
|
|
144
|
+
if (configAgent) {
|
|
145
|
+
if (configAgent.maxRounds !== 15) appliedFields.push(`maxRounds=${configAgent.maxRounds}`);
|
|
146
|
+
if (configAgent.maxTokens !== 4e3) appliedFields.push(`maxTokens=${configAgent.maxTokens}`);
|
|
147
|
+
if (configAgent.persona && configAgent.persona !== "default") appliedFields.push(`persona=${configAgent.persona}`);
|
|
148
|
+
if (configAgent.skillsFilter) appliedFields.push(`skillsFilter=[${configAgent.skillsFilter.join(",")}]`);
|
|
149
|
+
if (configAgent.modelFallbacks.length > 0) logger.info("Agent", `Agent "${agentName}" has modelFallbacks \u2014 not yet implemented in spawn path`);
|
|
150
|
+
if (configAgent.workspaceDir) appliedFields.push(`workspaceDir=${configAgent.workspaceDir}`);
|
|
151
|
+
if (configAgent.tags.length > 0) appliedFields.push(`tags=[${configAgent.tags.join(",")}]`);
|
|
152
|
+
if (configAgent.systemPromptOverride) appliedFields.push("systemPromptOverride=set");
|
|
153
|
+
if (appliedFields.length > 0) logger.info("Agent", `Applied config agent fields for "${agentName}": ${appliedFields.join(", ")}`);
|
|
154
|
+
}
|
|
143
155
|
try {
|
|
144
156
|
const { isKilled } = await import("../safety/killSwitch.js");
|
|
145
157
|
if (isKilled()) {
|
|
@@ -207,13 +219,30 @@ function ensureSpawnAgentRegistered() {
|
|
|
207
219
|
const { updateAgentStatus } = await import("./commandPost.js");
|
|
208
220
|
updateAgentStatus(specialistId, "active");
|
|
209
221
|
}
|
|
222
|
+
let allowedTools = template.tools;
|
|
223
|
+
if (configAgent?.skillsFilter) {
|
|
224
|
+
try {
|
|
225
|
+
const { agentAllowsSkill } = await import("./agentScope.js");
|
|
226
|
+
allowedTools = template.tools?.filter((t) => agentAllowsSkill(configAgent, t));
|
|
227
|
+
if (allowedTools && allowedTools.length === 0) {
|
|
228
|
+
logger.warn("Agent", `Agent "${agentName}" skillsFilter blocks all template tools \u2014 falling back to template default`);
|
|
229
|
+
allowedTools = template.tools;
|
|
230
|
+
}
|
|
231
|
+
} catch {
|
|
232
|
+
}
|
|
233
|
+
}
|
|
210
234
|
const result = await spawnSubAgent({
|
|
211
235
|
name: agentName,
|
|
212
236
|
task,
|
|
213
|
-
tools:
|
|
214
|
-
systemPrompt: specialistPrompt || template.systemPrompt,
|
|
215
|
-
model: args.model || specialistModel,
|
|
237
|
+
tools: allowedTools,
|
|
238
|
+
systemPrompt: configAgent?.systemPromptOverride || specialistPrompt || template.systemPrompt,
|
|
239
|
+
model: args.model || configAgent?.model || specialistModel,
|
|
216
240
|
tier: template.tier,
|
|
241
|
+
persona: configAgent?.persona,
|
|
242
|
+
maxRounds: configAgent?.maxRounds,
|
|
243
|
+
maxTokens: configAgent?.maxTokens,
|
|
244
|
+
workspaceDir: configAgent?.workspaceDir ?? void 0,
|
|
245
|
+
tags: configAgent?.tags,
|
|
217
246
|
depth: 1
|
|
218
247
|
// v4.7.0: this IS a child (was incorrectly 0)
|
|
219
248
|
});
|