shroud-privacy 2.2.11 → 2.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +19 -10
  2. package/dist/hooks.js +246 -14
  3. package/openclaw.plugin.json +1 -1
  4. package/package.json +3 -2
  5. package/dist/agent-session.d.ts +0 -259
  6. package/dist/agent-session.js +0 -693
  7. package/dist/compliance.d.ts +0 -44
  8. package/dist/compliance.js +0 -76
  9. package/dist/dashboard.d.ts +0 -42
  10. package/dist/dashboard.js +0 -1558
  11. package/dist/detectors/injection-multilingual.d.ts +0 -27
  12. package/dist/detectors/injection-multilingual.js +0 -399
  13. package/dist/detectors/injection-signatures.d.ts +0 -26
  14. package/dist/detectors/injection-signatures.js +0 -508
  15. package/dist/detectors/injection.d.ts +0 -56
  16. package/dist/detectors/injection.js +0 -269
  17. package/dist/detectors/tool-guard.d.ts +0 -27
  18. package/dist/detectors/tool-guard.js +0 -418
  19. package/dist/event-grader.d.ts +0 -97
  20. package/dist/event-grader.js +0 -214
  21. package/dist/exposure.d.ts +0 -29
  22. package/dist/exposure.js +0 -72
  23. package/dist/policy.d.ts +0 -99
  24. package/dist/policy.js +0 -212
  25. package/dist/profiler-analysis.d.ts +0 -35
  26. package/dist/profiler-analysis.js +0 -230
  27. package/dist/profiler-store.d.ts +0 -33
  28. package/dist/profiler-store.js +0 -118
  29. package/dist/profiler-types.d.ts +0 -128
  30. package/dist/profiler-types.js +0 -16
  31. package/dist/profiler.d.ts +0 -81
  32. package/dist/profiler.js +0 -392
  33. package/dist/security-event.d.ts +0 -70
  34. package/dist/security-event.js +0 -80
  35. package/dist/siem.d.ts +0 -49
  36. package/dist/siem.js +0 -113
  37. package/dist/signature-loader.d.ts +0 -113
  38. package/dist/signature-loader.js +0 -255
  39. package/dist/store-file.d.ts +0 -26
  40. package/dist/store-file.js +0 -79
@@ -1,693 +0,0 @@
1
- /**
2
- * Agent session tracking — maps LLM API calls to local agent identities.
3
- *
4
- * Each agent has a unique identity derived from its system prompt, plugin set,
5
- * and model. This module tracks which agent is making each LLM call, enabling:
6
- * - Per-agent WAF rules (different injection policies per agent)
7
- * - Per-agent behavioural baselines (Track 3)
8
- * - Per-agent canary attribution (Track 2)
9
- * - Multi-agent session correlation
10
- */
11
- import { createHash } from "node:crypto";
12
- /**
13
- * Tracks agent sessions and maps LLM calls to agent identities.
14
- * One instance shared via globalThis across all plugin loads.
15
- */
16
- export class AgentSessionTracker {
17
- /** Active sessions keyed by agent label (the stable identity). */
18
- _sessions = new Map();
19
- /** Current active agent label. */
20
- _currentLabel = "";
21
- /** LLM call log (ring buffer, last 200 calls). */
22
- _callLog = [];
23
- /** Timestamp when the current LLM call started (for response time). */
24
- _callStartTime = 0;
25
- /**
26
- * Register or update an agent session from system prompt content.
27
- *
28
- * Identity strategy: the extracted LABEL is the primary key, not the
29
- * prompt skeleton hash. System prompts contain too much dynamic content
30
- * (conversation context, RAG, tool results) to produce stable hashes.
31
- * The label — extracted from "- Name: X", "You are X", etc. — is the
32
- * stable identity that humans recognise.
33
- *
34
- * The buildId is still computed for fingerprinting but is NOT used as
35
- * the session key.
36
- */
37
- registerAgent(systemPrompt, pluginList = [], modelId = "unknown") {
38
- const label = extractLabel(systemPrompt);
39
- const buildId = computeBuildId(systemPrompt, pluginList, modelId);
40
- this._currentLabel = label;
41
- let session = this._sessions.get(label);
42
- if (!session) {
43
- session = {
44
- agentBuildId: buildId,
45
- agentLabel: label,
46
- sessionId: createHash("sha256")
47
- .update(`${label}:${Date.now()}:${Math.random()}`)
48
- .digest("hex")
49
- .slice(0, 12),
50
- startedAt: Date.now(),
51
- llmCallCount: 0,
52
- securityEventCount: 0,
53
- lastCallAt: Date.now(),
54
- detectedModel: modelId,
55
- channelSource: "",
56
- classification: classifyAgent(label, systemPrompt),
57
- toolInventory: [],
58
- soulExtract: "",
59
- cache: {
60
- totalInputTokens: 0, totalOutputTokens: 0,
61
- totalCacheRead: 0, totalCacheWrite: 0,
62
- avgHitRatio: 0, baselineHitRatio: -1,
63
- baselineSamples: 0, callsWithCache: 0,
64
- },
65
- channels: [],
66
- heartbeat: {
67
- enabled: false, recent: [], avgIntervalMs: -1,
68
- lastAt: 0, status: "unknown", lastResponse: "",
69
- },
70
- };
71
- this._sessions.set(label, session);
72
- }
73
- else {
74
- // Update build ID to latest (prompt may evolve, label stays stable)
75
- session.agentBuildId = buildId;
76
- }
77
- return session;
78
- }
79
- /** Update detected model from LLM API request body. */
80
- updateModel(model) {
81
- const session = this._sessions.get(this._currentLabel);
82
- if (session && model) {
83
- session.detectedModel = model;
84
- }
85
- }
86
- /** Update channel source (e.g. "slack:C00000001"). */
87
- updateChannel(source) {
88
- const session = this._sessions.get(this._currentLabel);
89
- if (session && source) {
90
- session.channelSource = source;
91
- }
92
- }
93
- /**
94
- * Update per-agent cache stats from an LLM response.
95
- * Returns anomaly alerts if cache behaviour deviates from baseline.
96
- */
97
- updateCache(usage) {
98
- const session = this._sessions.get(this._currentLabel);
99
- if (!session || usage.inputTokens === 0)
100
- return null;
101
- const c = session.cache;
102
- c.totalInputTokens += usage.inputTokens;
103
- c.totalOutputTokens += usage.outputTokens;
104
- c.totalCacheRead += usage.cacheReadTokens;
105
- c.totalCacheWrite += usage.cacheWriteTokens;
106
- c.callsWithCache++;
107
- const hitRatio = usage.inputTokens > 0
108
- ? usage.cacheReadTokens / usage.inputTokens : 0;
109
- // Running average (exponential moving average, alpha=0.3)
110
- c.avgHitRatio = c.callsWithCache === 1
111
- ? hitRatio
112
- : c.avgHitRatio * 0.7 + hitRatio * 0.3;
113
- // Establish baseline from first 5 calls
114
- const BASELINE_WINDOW = 5;
115
- if (c.baselineSamples < BASELINE_WINDOW) {
116
- c.baselineSamples++;
117
- c.baselineHitRatio = c.baselineSamples === 1
118
- ? hitRatio
119
- : ((c.baselineHitRatio * (c.baselineSamples - 1)) + hitRatio) / c.baselineSamples;
120
- return null; // Still learning baseline
121
- }
122
- // Anomaly detection: compare current ratio to baseline
123
- // 1. Cache ratio drop >30% — possible prompt injection/tampering
124
- if (c.baselineHitRatio > 0.3 && hitRatio < c.baselineHitRatio * 0.5) {
125
- return {
126
- alert: `Cache hit ratio dropped to ${Math.round(hitRatio * 100)}% (baseline: ${Math.round(c.baselineHitRatio * 100)}%) — possible system prompt change`,
127
- severity: "high",
128
- };
129
- }
130
- // 2. Zero cache hits when baseline expects them
131
- if (c.baselineHitRatio > 0.5 && hitRatio === 0) {
132
- return {
133
- alert: `Zero cache hits (baseline: ${Math.round(c.baselineHitRatio * 100)}%) — system prompt may have been replaced`,
134
- severity: "high",
135
- };
136
- }
137
- // 3. Unusual cache write spike (>3x baseline write ratio)
138
- const baselineWriteRatio = c.totalCacheWrite / Math.max(1, c.totalInputTokens - usage.inputTokens);
139
- const currentWriteRatio = usage.cacheWriteTokens / Math.max(1, usage.inputTokens);
140
- if (c.callsWithCache > BASELINE_WINDOW && baselineWriteRatio > 0 && currentWriteRatio > baselineWriteRatio * 3) {
141
- return {
142
- alert: `Cache write spike: ${Math.round(currentWriteRatio * 100)}% of input (baseline: ${Math.round(baselineWriteRatio * 100)}%) — possible prompt stuffing`,
143
- severity: "medium",
144
- };
145
- }
146
- return null;
147
- }
148
- /** Record a heartbeat for the current agent. Returns alert if missed. */
149
- recordHeartbeat(response) {
150
- const session = this._sessions.get(this._currentLabel);
151
- if (!session)
152
- return null;
153
- const hb = session.heartbeat;
154
- const now = Date.now();
155
- hb.enabled = true;
156
- hb.lastAt = now;
157
- hb.lastResponse = (response || "").slice(0, 200);
158
- hb.recent.push(now);
159
- if (hb.recent.length > 10)
160
- hb.recent.shift();
161
- hb.status = "alive";
162
- // Calculate average interval from recent timestamps
163
- if (hb.recent.length >= 3) {
164
- let totalGap = 0;
165
- for (let i = 1; i < hb.recent.length; i++) {
166
- totalGap += hb.recent[i] - hb.recent[i - 1];
167
- }
168
- hb.avgIntervalMs = totalGap / (hb.recent.length - 1);
169
- }
170
- // Check if response is an alert (not HEARTBEAT_OK)
171
- if (response && !response.includes("HEARTBEAT_OK") && response.trim().length > 5) {
172
- return {
173
- alert: `Heartbeat alert from ${session.agentLabel}: ${response.slice(0, 150)}`,
174
- severity: "medium",
175
- };
176
- }
177
- return null;
178
- }
179
- /** Check all agents for missed heartbeats. Call periodically. */
180
- checkHeartbeatHealth() {
181
- const alerts = [];
182
- const now = Date.now();
183
- for (const session of this._sessions.values()) {
184
- const hb = session.heartbeat;
185
- if (!hb.enabled || hb.avgIntervalMs <= 0)
186
- continue;
187
- const sinceLastHb = now - hb.lastAt;
188
- const prevStatus = hb.status;
189
- if (sinceLastHb > hb.avgIntervalMs * 5) {
190
- hb.status = "dead";
191
- }
192
- else if (sinceLastHb > hb.avgIntervalMs * 2) {
193
- hb.status = "stale";
194
- }
195
- else {
196
- hb.status = "alive";
197
- }
198
- // Alert on status transitions
199
- if (hb.status !== prevStatus && hb.status !== "alive") {
200
- alerts.push({
201
- agentLabel: session.agentLabel,
202
- status: hb.status,
203
- alert: `${session.agentLabel} heartbeat ${hb.status} — last seen ${Math.round(sinceLastHb / 60000)}m ago (expected every ${Math.round(hb.avgIntervalMs / 60000)}m)`,
204
- });
205
- }
206
- }
207
- return alerts;
208
- }
209
- /** Detect and record the channel from prompt metadata. */
210
- updateChannelFromPrompt(prompt) {
211
- const session = this._sessions.get(this._currentLabel);
212
- if (!session)
213
- return null;
214
- const ch = detectChannel(prompt);
215
- if (ch && !session.channels.includes(ch)) {
216
- session.channels.push(ch);
217
- }
218
- return ch;
219
- }
220
- /** Update tool inventory from body.tools array. Only sets once (first call). */
221
- updateTools(tools) {
222
- const session = this._sessions.get(this._currentLabel);
223
- if (session && tools.length > 0 && session.toolInventory.length === 0) {
224
- session.toolInventory = tools;
225
- // Re-classify with tool data for better accuracy
226
- session.classification = classifyAgentWithTools(session.agentLabel, "", session.toolInventory);
227
- }
228
- }
229
- /** Update SOUL extract from early messages. Only sets once. */
230
- updateSoul(soul) {
231
- const session = this._sessions.get(this._currentLabel);
232
- if (session && soul && !session.soulExtract) {
233
- session.soulExtract = soul.slice(0, 500);
234
- // Re-classify with SOUL data
235
- session.classification = classifyAgentWithTools(session.agentLabel, session.soulExtract, session.toolInventory);
236
- }
237
- }
238
- /** Record an LLM API call for the current agent. */
239
- recordLlmCall() {
240
- const session = this._sessions.get(this._currentLabel);
241
- if (session) {
242
- session.llmCallCount++;
243
- session.lastCallAt = Date.now();
244
- }
245
- return session ?? null;
246
- }
247
- /** Record a security event for the current agent. */
248
- recordSecurityEvent(count = 1) {
249
- const session = this._sessions.get(this._currentLabel);
250
- if (session) {
251
- session.securityEventCount += count;
252
- }
253
- }
254
- /** Get the current active agent session. */
255
- getCurrentSession() {
256
- return this._sessions.get(this._currentLabel) ?? null;
257
- }
258
- /** Get the current agent build ID. */
259
- getCurrentBuildId() {
260
- const session = this._sessions.get(this._currentLabel);
261
- return session?.agentBuildId ?? "";
262
- }
263
- /** Get all tracked agent sessions. */
264
- getAllSessions() {
265
- return [...this._sessions.values()];
266
- }
267
- /** Get session by build ID. */
268
- getSession(buildId) {
269
- // Search by build ID (secondary key)
270
- for (const session of this._sessions.values()) {
271
- if (session.agentBuildId === buildId)
272
- return session;
273
- }
274
- return null;
275
- }
276
- /** Get session by label (primary key). */
277
- getSessionByLabel(label) {
278
- return this._sessions.get(label) ?? null;
279
- }
280
- /** Mark the start of an LLM call (for response time tracking). */
281
- markCallStart() {
282
- this._callStartTime = Date.now();
283
- }
284
- /** Log a completed LLM call with full details. */
285
- logCall(details) {
286
- const hitPct = details.inputTokens > 0
287
- ? Math.round((details.cacheReadTokens / details.inputTokens) * 100) : 0;
288
- this._callLog.push({
289
- timestamp: Date.now(),
290
- agentLabel: this._currentLabel || "Unknown",
291
- url: details.url,
292
- model: details.model,
293
- inputTokens: details.inputTokens,
294
- outputTokens: details.outputTokens,
295
- cacheReadTokens: details.cacheReadTokens,
296
- cacheWriteTokens: details.cacheWriteTokens,
297
- cacheHitPct: hitPct,
298
- responseTimeMs: this._callStartTime > 0 ? Date.now() - this._callStartTime : 0,
299
- channel: details.channel,
300
- securityEvents: details.securityEvents,
301
- reason: details.reason,
302
- });
303
- // Ring buffer — keep last 200
304
- if (this._callLog.length > 200)
305
- this._callLog.shift();
306
- this._callStartTime = 0;
307
- }
308
- /** Get the LLM call log. */
309
- getCallLog() {
310
- return this._callLog;
311
- }
312
- /** Reset all session tracking. */
313
- reset() {
314
- this._sessions.clear();
315
- this._currentLabel = "";
316
- }
317
- }
318
- /**
319
- * Compute a stable agent build ID.
320
- *
321
- * Uses a "skeleton" of the system prompt rather than the full text.
322
- * This makes the ID resilient to:
323
- * - Dynamic timestamps, dates, session IDs injected into prompts
324
- * - User names or account-specific context
325
- * - Retrieved RAG snippets appended to the base prompt
326
- * - Minor wording tweaks during prompt iteration
327
- *
328
- * The skeleton is: first 500 chars of the prompt with numbers, dates,
329
- * emails, UUIDs, and hex strings normalized to placeholders.
330
- */
331
- export function computeBuildId(systemPrompt, pluginList, modelId) {
332
- const skeleton = extractPromptSkeleton(systemPrompt);
333
- const components = [
334
- skeleton,
335
- pluginList.sort().join(","),
336
- modelId,
337
- ];
338
- return createHash("sha256")
339
- .update(components.join("\n"))
340
- .digest("hex")
341
- .slice(0, 16);
342
- }
343
- /**
344
- * Extract a stable "skeleton" from a system prompt by normalizing
345
- * dynamic content to placeholders.
346
- *
347
- * Normalizes: timestamps, dates, numbers >4 digits, emails, UUIDs,
348
- * hex strings >8 chars, IP addresses, URLs with path components.
349
- * Keeps: the structural words, role definitions, tool descriptions,
350
- * behavioral instructions — the parts that define the agent's identity.
351
- */
352
- export function extractPromptSkeleton(prompt) {
353
- let s = prompt;
354
- // --- Phase 1: Strip ALL volatile/dynamic blocks ---
355
- // XML-tagged blocks (system-reminder, context, memory, tool results, etc.)
356
- s = s.replace(/<[a-z][-a-z_]*[^>]*>[\s\S]*?<\/[a-z][-a-z_]*>/gi, "");
357
- // OpenClaw system context prefix — per-session metadata
358
- s = s.replace(/^System:\s*\[.*?\].*?\n/gm, "");
359
- s = s.replace(/^Sender\s*\(.*?\):.*?\n/gm, "");
360
- s = s.replace(/^Session\s+\w+:.*?\n/gm, "");
361
- s = s.replace(/^Channel:.*?\n/gm, "");
362
- s = s.replace(/^\[.*?\]\s*$/gm, "");
363
- // Conversation/chat history sections and everything after
364
- s = s.replace(/(?:^|\n)(?:Current conversation|Recent messages|Conversation history|Chat history|# Environment|gitStatus):?\s*\n[\s\S]*/im, "");
365
- // --- Phase 2: Take a SHORT identity window ---
366
- // Agent identity is in the first few sentences. A small window avoids
367
- // capturing dynamic content (tools, RAG, conversation context).
368
- s = s.slice(0, 500);
369
- // --- Phase 3: Normalize dynamic tokens ---
370
- s = s.replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, "<UUID>");
371
- s = s.replace(/\b\d{4}[-/]\d{2}[-/]\d{2}(?:[T ]\d{2}:\d{2}(?::\d{2})?(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?)?\b/g, "<DATE>");
372
- s = s.replace(/\b\d{1,2}:\d{2}(?::\d{2})?\s*(?:AM|PM|am|pm)?\b/g, "<TIME>");
373
- s = s.replace(/\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b/g, "<EMAIL>");
374
- s = s.replace(/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g, "<IP>");
375
- s = s.replace(/\b[0-9a-f]{7,}\b/gi, "<HEX>");
376
- s = s.replace(/\b\d{4,}\b/g, "<NUM>");
377
- s = s.replace(/https?:\/\/[^\s<>"']+/g, "<URL>");
378
- s = s.replace(/(?:GMT|UTC)[+-]\d{1,2}(?::\d{2})?/g, "<TZ>");
379
- s = s.replace(/(?:\/[\w.-]+){2,}/g, "<PATH>");
380
- // Collapse whitespace
381
- s = s.replace(/\s+/g, " ").trim();
382
- return s;
383
- }
384
- /**
385
- * Extract a short, snappy agent name from system prompt.
386
- *
387
- * Handles multiple formats:
388
- * 1. OpenClaw IDENTITY.md: "- Name: PJ" or "- Name: Coach Alessandra"
389
- * 2. "You are [Name/Role]" — with or without article (a/an/the)
390
- * 3. "- Creature: X" (OpenClaw IDENTITY.md secondary)
391
- * 4. "My name is [X]" / "I am [X]" / "called [X]" patterns
392
- * 5. Markdown heading: "# [AgentName]"
393
- * 6. Fallback: first meaningful line, cleaned up
394
- *
395
- * Skips OpenClaw system context prefixes (timestamps, session metadata).
396
- */
397
- function extractLabel(systemPrompt) {
398
- // Strategy: try multiple extraction approaches in order of confidence.
399
- // 1. OpenClaw channel/conversation label: "#agent-name" or "conversation_label"
400
- // When OpenClaw sends session context, the channel name IS the agent identity.
401
- const channelMatch = systemPrompt.match(/"conversation_label"\s*:\s*"#?([^"]+)"/);
402
- if (channelMatch) {
403
- let name = channelMatch[1].trim();
404
- name = name.replace(/-(main|dev|test|staging|prod|channel|chat|bot)$/i, "");
405
- name = name.split(/[-_]/).map(w => w.length <= 3 ? w.toUpperCase() : w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
406
- if (name.length > 1 && name.length < 50)
407
- return name;
408
- }
409
- // 2a. Slack channel header: "Slack message in #channel-name"
410
- const slackChannelMatch = systemPrompt.match(/Slack\s+message\s+in\s+#([^\s]+)/i);
411
- if (slackChannelMatch) {
412
- let name = slackChannelMatch[1].trim();
413
- name = name.replace(/-(main|dev|test|staging|prod|channel|chat|bot)$/i, "");
414
- name = name.split(/[-_]/).map(w => w.length <= 3 ? w.toUpperCase() : w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
415
- if (name.length > 1 && name.length < 50)
416
- return name;
417
- }
418
- // 2b. WhatsApp header: "WhatsApp message from [Name]" or "WhatsApp group [Name]"
419
- const waMatch = systemPrompt.match(/WhatsApp\s+(?:message|group)\s+(?:from\s+|in\s+)?["']?([^"'\n]+)/i);
420
- if (waMatch) {
421
- const name = waMatch[1].trim().replace(/\s*\(.*?\)\s*$/, ""); // strip phone in parens
422
- if (name.length > 1 && name.length < 50)
423
- return name;
424
- }
425
- // 2c. TUI / terminal: "TUI session" or "terminal session" — use agent name from session key
426
- // Session keys: "agent:main:tui:..." → extract "main"
427
- const tuiMatch = systemPrompt.match(/(?:TUI|terminal)\s+(?:session|message)/i);
428
- if (tuiMatch) {
429
- // Try to find agent name from session key pattern in metadata
430
- const agentKeyMatch = systemPrompt.match(/agent:([^:]+):/);
431
- if (agentKeyMatch) {
432
- let name = agentKeyMatch[1].trim();
433
- name = name.split(/[-_]/).map(w => w.length <= 3 ? w.toUpperCase() : w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
434
- if (name.length > 1 && name.length < 50)
435
- return name;
436
- }
437
- }
438
- // 3. Try section-based extraction (framework preamble + agent SOUL.md)
439
- const sections = systemPrompt.split(/\n---+\n/);
440
- const candidates = sections.length > 1
441
- ? [sections[sections.length - 1], systemPrompt]
442
- : [systemPrompt];
443
- for (const text of candidates) {
444
- const label = _extractLabelFromText(text);
445
- if (label)
446
- return label;
447
- }
448
- return "Unknown Agent";
449
- }
450
- /** Extract agent label from a single text block. Returns null if no confident match. */
451
- function _extractLabelFromText(text) {
452
- // 1. OpenClaw IDENTITY.md format: "- Name: X" (highest confidence)
453
- const nameMatch = text.match(/-\s*Name:\s*(.+)/i);
454
- if (nameMatch) {
455
- const name = nameMatch[1].trim();
456
- if (name.length > 1 && name.length < 60)
457
- return name;
458
- }
459
- // 2. "You are [Name/Role]" — article is optional
460
- // Use the LAST match in the text, not the first — the agent's identity
461
- // is typically after any framework preamble.
462
- const roleMatches = [...text.matchAll(/[Yy]ou\s+are\s+(?:a\s+|an\s+|the\s+)?(.+?)(?:\.|,|\n|$)/g)];
463
- if (roleMatches.length > 0) {
464
- // Prefer the last "You are" match (agent identity, not framework)
465
- const match = roleMatches[roleMatches.length - 1];
466
- let role = match[1].trim();
467
- role = role.replace(/\s+(?:at|for|who|that|which|specializing|working|based|created|developed|built|made|designed|powered)\s+.*/i, "");
468
- if (role === role.toLowerCase()) {
469
- role = role.replace(/\b\w/g, (c) => c.toUpperCase());
470
- }
471
- if (role.length > 1 && role.length < 50)
472
- return role;
473
- }
474
- // 3. "- Creature: X" (OpenClaw IDENTITY.md secondary)
475
- const creatureMatch = text.match(/-\s*Creature:\s*(.+)/i);
476
- if (creatureMatch) {
477
- const creature = creatureMatch[1].trim();
478
- if (creature.length > 3 && creature.length < 60) {
479
- return creature.replace(/\b\w/g, (c) => c.toUpperCase());
480
- }
481
- }
482
- // 4. "My name is X" / "I am X" / "called X"
483
- const selfIdMatch = text.match(/(?:[Mm]y\s+name\s+is|I\s+am|I'm|[Cc]alled)\s+([A-Z][A-Za-z0-9 _-]{1,40})(?:\.|,|\n|$)/);
484
- if (selfIdMatch) {
485
- const name = selfIdMatch[1].trim();
486
- if (name.length > 1 && name.length < 50)
487
- return name;
488
- }
489
- // 5. Markdown heading: "# AgentName"
490
- const headingMatch = text.match(/^#\s+(.{2,40})$/m);
491
- if (headingMatch) {
492
- const heading = headingMatch[1].trim();
493
- if (heading.length > 1 && heading.length < 50 &&
494
- !/^(system|instructions|config|settings|readme)/i.test(heading)) {
495
- return heading;
496
- }
497
- }
498
- // 6. Fallback — only accept short, name-like strings (not instructions)
499
- // A valid name is 1-3 words, no trailing punctuation, no metadata patterns.
500
- const firstLine = text
501
- .split("\n")
502
- .map((l) => l.trim())
503
- .find((l) => l.length > 2 &&
504
- l.length < 30 &&
505
- !l.startsWith("#") &&
506
- !l.startsWith("<!--") &&
507
- !l.startsWith("System:") &&
508
- !l.startsWith("- ") &&
509
- !l.endsWith(":") && // reject "Rules:", "Conversation info:"
510
- !l.endsWith(")") && // reject "(untrusted metadata)"
511
- !l.includes("(") && // reject parenthetical context
512
- !l.includes("{") && // reject JSON
513
- !/^\d{4}-\d{2}-\d{2}/.test(l) &&
514
- !/^\[.*\]$/.test(l) &&
515
- !/^(when|if|do|don't|always|never|you |respond|make|use|keep|try|be |note|remember|ensure|for |the |this |please|conversation|session|sender|channel|message)/i.test(l) &&
516
- l.split(/\s+/).length <= 4);
517
- if (!firstLine)
518
- return null;
519
- return firstLine;
520
- }
521
- // ===================================================================
522
- // Agent role classifier — keyword-based, zero dependencies
523
- // ===================================================================
524
- /** Role taxonomy with keyword signals. Ordered by specificity (most specific first). */
525
- const ROLE_TAXONOMY = [
526
- { role: "Security Research", keywords: /security|threat|vulnerab|pentest|exploit|malware|incident|forensic|soc\b|siem|ids|ips|firewall/i },
527
- { role: "DevOps / SRE", keywords: /devops|sre\b|deploy|infra|kubernetes|k8s|docker|terraform|ansible|ci\s*\/?\s*cd|pipeline|monitoring|grafana|prometheus/i },
528
- { role: "System Admin", keywords: /sysadmin|system\s*admin|server|linux|network\s*admin|dns|dhcp|ldap|active\s*directory/i },
529
- { role: "Network Engineering", keywords: /network|router|switch|vlan|bgp|ospf|firewall\s*rule|palo\s*alto|juniper|cisco/i },
530
- { role: "Software Engineering", keywords: /software|develop|program|code|engineer|fullstack|backend|frontend|api\b|microservice/i },
531
- { role: "Data / Analytics", keywords: /data\s*scien|analytics|machine\s*learn|ml\b|ai\b|model|dataset|pipeline|etl|warehouse/i },
532
- { role: "Customer Support", keywords: /support|customer|helpdesk|ticket|billing|account\s*issue|service\s*desk|crm/i },
533
- { role: "Sales / Outreach", keywords: /sales|outreach|prospect|lead\s*gen|crm|pipeline|deal|quota|revenue/i },
534
- { role: "Research", keywords: /research|investigat|analy[sz]|report|study|academic|paper|journal|semicond|alpha/i },
535
- { role: "Coaching / Training", keywords: /coach|train|mentor|fitness|endurance|athlete|workout|nutrition|performance/i },
536
- { role: "Writing / Content", keywords: /writ|content|blog|article|copy|editor|journalist|marketing\s*content/i },
537
- { role: "Legal / Compliance", keywords: /legal|compliance|regulat|audit|policy|gdpr|hipaa|sox\b|contract/i },
538
- { role: "Finance", keywords: /financ|accounting|budget|invest|portfolio|trading|revenue|forecast/i },
539
- { role: "Personal Assistant", keywords: /personal|assistant|scheduler|organiz|reminder|task\s*manag|daily|general\s*purpose/i },
540
- ];
541
- /**
542
- * Classify an agent's role from its label and system prompt content.
543
- * Uses keyword matching against a role taxonomy — no LLM call needed.
544
- *
545
- * Strategy: check the LABEL first (high confidence), then fall back to
546
- * the system prompt (inferred). This prevents noisy metadata in the
547
- * prompt from overriding the agent's actual identity.
548
- */
549
- /** Build a classification result with colour and confidence percentage. */
550
- function makeClassification(role, pct, signals) {
551
- const confidence = pct >= 80 ? "high" : pct >= 50 ? "medium" : "low";
552
- // Colour: green for high, blue for medium, grey for low
553
- const colour = pct >= 80 ? "#3fb950" : pct >= 50 ? "#58a6ff" : "#8b949e";
554
- return { role, confidencePct: pct, confidence, colour, signals };
555
- }
556
- /**
557
- * Classify an agent's role from its label and system prompt content.
558
- * Uses keyword matching against a role taxonomy — no LLM call needed.
559
- *
560
- * Confidence scoring:
561
- * 90% — role keyword in agent label (explicit naming)
562
- * 70% — role keyword in SOUL.md "You are a [role]" declaration
563
- * 40% — role keyword found in general prompt metadata
564
- * 10% — no match, "General Agent"
565
- *
566
- * Multiple signal matches boost confidence by 5% each (capped at 95%).
567
- */
568
- export function classifyAgent(label, systemPrompt) {
569
- // 1. Match against label first — highest confidence signal
570
- const labelLower = label.toLowerCase();
571
- for (const { role, keywords } of ROLE_TAXONOMY) {
572
- const matches = [...labelLower.matchAll(new RegExp(keywords.source, "gi"))];
573
- if (matches.length > 0) {
574
- const pct = Math.min(95, 90 + (matches.length - 1) * 5);
575
- return makeClassification(role, pct, matches.map(m => m[0]));
576
- }
577
- }
578
- // 2. Extract SOUL.md content — look for "You are a [role]" patterns
579
- const soulMatch = systemPrompt.match(/[Yy]ou\s+are\s+(?:a\s+|an\s+|the\s+)?(.{10,200})(?:\.|$)/m);
580
- const soulText = soulMatch ? soulMatch[1].toLowerCase() : "";
581
- if (soulText) {
582
- for (const { role, keywords } of ROLE_TAXONOMY) {
583
- const matches = [...soulText.matchAll(new RegExp(keywords.source, "gi"))];
584
- if (matches.length > 0) {
585
- const pct = Math.min(85, 70 + (matches.length - 1) * 5);
586
- return makeClassification(role, pct, matches.map(m => m[0]));
587
- }
588
- }
589
- }
590
- // 3. Last resort: scan the full prompt
591
- const fullLower = systemPrompt.toLowerCase();
592
- for (const { role, keywords } of ROLE_TAXONOMY) {
593
- const matches = [...fullLower.matchAll(new RegExp(keywords.source, "gi"))];
594
- if (matches.length > 0) {
595
- const pct = Math.min(60, 40 + (matches.length - 1) * 5);
596
- return makeClassification(role, pct, matches.map(m => m[0]));
597
- }
598
- }
599
- return makeClassification("General Agent", 10, []);
600
- }
601
- /** Tool name patterns that indicate specific roles. */
602
- const TOOL_ROLE_SIGNALS = [
603
- { role: "DevOps / SRE", tools: /deploy|kubernetes|docker|terraform|ansible|helm|kubectl|aws|gcloud|azure/i },
604
- { role: "Software Engineering", tools: /code|compile|build|test|lint|git|npm|pip|cargo|debug|exec|write_file|read_file/i },
605
- { role: "System Admin", tools: /ssh|systemctl|service|cron|mount|useradd|passwd|iptables/i },
606
- { role: "Network Engineering", tools: /ping|traceroute|nslookup|dig|netstat|snmp|bgp|route/i },
607
- { role: "Data / Analytics", tools: /query|sql|bigquery|spark|pandas|jupyter|notebook|dataset/i },
608
- { role: "Customer Support", tools: /ticket|zendesk|intercom|crm|freshdesk|jira.*service/i },
609
- { role: "Sales / Outreach", tools: /salesforce|hubspot|outreach|email.*send|linkedin|prospect/i },
610
- { role: "Research", tools: /search|web_fetch|browser|scrape|crawl|arxiv|scholar/i },
611
- { role: "Writing / Content", tools: /publish|wordpress|medium|draft|edit.*doc|notion/i },
612
- { role: "Personal Assistant", tools: /calendar|schedule|remind|todo|weather|timer/i },
613
- ];
614
- /**
615
- * Enhanced classifier that uses tools + SOUL.md + label.
616
- * Called when new data (tools or SOUL) becomes available.
617
- */
618
- export function classifyAgentWithTools(label, soulExtract, tools) {
619
- // Start with base classification
620
- const base = classifyAgent(label, soulExtract);
621
- // If already high confidence, keep it
622
- if (base.confidencePct >= 80)
623
- return base;
624
- // Try to upgrade using tool inventory — but ONLY if base is General Agent
625
- // or if tools confirm the same role. All OpenClaw agents share base tools
626
- // (Read, Write, exec, etc.) so generic tools shouldn't override a label match.
627
- if (tools.length > 0 && (base.role === "General Agent" || base.confidencePct < 50)) {
628
- const toolStr = tools.join(" ").toLowerCase();
629
- for (const { role, tools: pattern } of TOOL_ROLE_SIGNALS) {
630
- const matches = [...toolStr.matchAll(new RegExp(pattern.source, "gi"))];
631
- if (matches.length >= 2) { // Require 2+ tool matches to classify from tools alone
632
- const toolPct = Math.min(75, 50 + matches.length * 5);
633
- const signals = [...base.signals, ...matches.map(m => "tool:" + m[0])];
634
- return makeClassification(role, toolPct, signals);
635
- }
636
- }
637
- }
638
- else if (tools.length > 0 && base.confidencePct >= 50) {
639
- // Tools confirm existing classification — boost confidence
640
- const toolStr = tools.join(" ").toLowerCase();
641
- for (const { role, tools: pattern } of TOOL_ROLE_SIGNALS) {
642
- if (role === base.role) {
643
- const matches = [...toolStr.matchAll(new RegExp(pattern.source, "gi"))];
644
- if (matches.length > 0) {
645
- const boosted = Math.min(95, base.confidencePct + 10);
646
- const signals = [...base.signals, ...matches.map(m => "tool:" + m[0])];
647
- return makeClassification(role, boosted, signals);
648
- }
649
- }
650
- }
651
- }
652
- // Try SOUL.md if we have it and base is still weak
653
- if (soulExtract && base.confidencePct < 50) {
654
- const soulResult = classifyAgent(label, soulExtract);
655
- if (soulResult.confidencePct > base.confidencePct) {
656
- return soulResult;
657
- }
658
- }
659
- return base;
660
- }
661
- // ===================================================================
662
- // Channel detection — extract channel type from prompt metadata
663
- // ===================================================================
664
- /** Detect the channel type from OpenClaw prompt metadata. */
665
- export function detectChannel(prompt) {
666
- if (/Slack\s+message/i.test(prompt))
667
- return "slack";
668
- if (/WhatsApp\s+message/i.test(prompt))
669
- return "whatsapp";
670
- if (/TUI\s+(?:session|message)/i.test(prompt) || /openclaw-tui/i.test(prompt))
671
- return "tui";
672
- if (/Email\s+(?:message|from)/i.test(prompt) || /Gmail\s+/i.test(prompt))
673
- return "email";
674
- if (/Cron\s+(?:job|task|trigger)/i.test(prompt) || /scheduled\s+task/i.test(prompt))
675
- return "cron";
676
- if (/Discord\s+message/i.test(prompt))
677
- return "discord";
678
- if (/Telegram\s+message/i.test(prompt))
679
- return "telegram";
680
- if (/Teams\s+message/i.test(prompt))
681
- return "teams";
682
- if (/HEARTBEAT\.md|HEARTBEAT_OK|heartbeat\s+(?:check|run|turn)/i.test(prompt))
683
- return "heartbeat";
684
- return null;
685
- }
686
- /** Check if a prompt is a heartbeat prompt. */
687
- export function isHeartbeatPrompt(prompt) {
688
- return /HEARTBEAT\.md|Read\s+HEARTBEAT|heartbeat\s+(?:check|run|turn)|nothing\s+needs\s+attention.*HEARTBEAT_OK/i.test(prompt);
689
- }
690
- /** Check if a response is a heartbeat OK response. */
691
- export function isHeartbeatOk(response) {
692
- return /HEARTBEAT_OK/i.test(response);
693
- }