@byte5ai/palaia 2.0.13 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hooks.ts DELETED
@@ -1,2012 +0,0 @@
1
- /**
2
- * Lifecycle hooks for the Palaia OpenClaw plugin.
3
- *
4
- * - before_prompt_build: Query-based contextual recall (Issue #65).
5
- * Returns appendSystemContext with 🧠 instruction when memory is used.
6
- * - agent_end: Auto-capture of significant exchanges (Issue #64).
7
- * Now with LLM-based extraction via OpenClaw's runEmbeddedPiAgent,
8
- * falling back to rule-based extraction if the LLM is unavailable.
9
- * - message_received: Captures inbound message ID for emoji reactions.
10
- * - palaia-recovery service: Replays WAL on startup.
11
- * - /palaia command: Show memory status.
12
- */
13
-
14
- import fs from "node:fs/promises";
15
- import path from "node:path";
16
- import os from "node:os";
17
- import { run, runJson, recover, type RunnerOpts, getEmbedServerManager } from "./runner.js";
18
- import type { PalaiaPluginConfig, RecallTypeWeights } from "./config.js";
19
-
20
- // ============================================================================
21
- // Plugin State Persistence (Issue #87: Recall counter for nudges)
22
- // ============================================================================
23
-
24
- interface PluginState {
25
- successfulRecalls: number;
26
- satisfactionNudged: boolean;
27
- transparencyNudged: boolean;
28
- firstRecallTimestamp: string | null;
29
- }
30
-
31
- const DEFAULT_PLUGIN_STATE: PluginState = {
32
- successfulRecalls: 0,
33
- satisfactionNudged: false,
34
- transparencyNudged: false,
35
- firstRecallTimestamp: null,
36
- };
37
-
38
- /**
39
- * Load plugin state from disk.
40
- *
41
- * Note: No file locking is applied here. The plugin-state.json file stores
42
- * non-critical counters (recall count, nudge flags). In the worst case of a
43
- * race condition between multiple agents, a nudge fires one recall too early
44
- * or too late. This is acceptable given the low-stakes nature of the data
45
- * and the complexity cost of adding advisory locks in Node.js.
46
- */
47
- async function loadPluginState(workspace?: string): Promise<PluginState> {
48
- const dir = workspace || process.cwd();
49
- const statePath = path.join(dir, ".palaia", "plugin-state.json");
50
- try {
51
- const raw = await fs.readFile(statePath, "utf-8");
52
- return { ...DEFAULT_PLUGIN_STATE, ...JSON.parse(raw) };
53
- } catch {
54
- return { ...DEFAULT_PLUGIN_STATE };
55
- }
56
- }
57
-
58
- async function savePluginState(state: PluginState, workspace?: string): Promise<void> {
59
- const dir = workspace || process.cwd();
60
- const statePath = path.join(dir, ".palaia", "plugin-state.json");
61
- try {
62
- await fs.writeFile(statePath, JSON.stringify(state, null, 2));
63
- } catch {
64
- // Non-fatal
65
- }
66
- }
67
-
68
- // ============================================================================
69
- // Session-isolated Turn State (Issue #87: Emoji Reactions)
70
- // ============================================================================
71
-
72
- /** Per-session turn state for tracking recall/capture across hooks. */
73
- interface TurnState {
74
- recallOccurred: boolean;
75
- lastInboundMessageId: string | null;
76
- lastInboundChannelId: string | null;
77
- channelProvider: string | null;
78
- capturedInThisTurn: boolean;
79
- /** Timestamp when this entry was created (for TTL-based pruning). */
80
- createdAt: number;
81
- }
82
-
83
- function createDefaultTurnState(): TurnState {
84
- return {
85
- recallOccurred: false,
86
- lastInboundMessageId: null,
87
- lastInboundChannelId: null,
88
- channelProvider: null,
89
- capturedInThisTurn: false,
90
- createdAt: Date.now(),
91
- };
92
- }
93
-
94
- /** Maximum age for turn state entries before they are pruned (5 minutes). */
95
- const TURN_STATE_TTL_MS = 5 * 60 * 1000;
96
- /** Maximum age for inbound message entries before they are pruned (5 minutes). */
97
- const INBOUND_MESSAGE_TTL_MS = 5 * 60 * 1000;
98
-
99
- /**
100
- * Remove stale entries from turnStateBySession and lastInboundMessageByChannel.
101
- * Called at the start of before_prompt_build to prevent memory leaks from
102
- * sessions that were killed/crashed without firing agent_end.
103
- */
104
- export function pruneStaleEntries(): void {
105
- const now = Date.now();
106
- for (const [key, state] of turnStateBySession) {
107
- if (now - state.createdAt > TURN_STATE_TTL_MS) {
108
- turnStateBySession.delete(key);
109
- }
110
- }
111
- for (const [key, entry] of lastInboundMessageByChannel) {
112
- if (now - entry.timestamp > INBOUND_MESSAGE_TTL_MS) {
113
- lastInboundMessageByChannel.delete(key);
114
- }
115
- }
116
- }
117
-
118
- /**
119
- * Session-isolated turn state map. Keyed by sessionKey.
120
- * Set in before_prompt_build / message_received, consumed + deleted in agent_end.
121
- * NEVER use global variables for turn data — race condition with multi-agent.
122
- */
123
- const turnStateBySession = new Map<string, TurnState>();
124
-
125
- // ============================================================================
126
- // Inbound Message ID Store (for emoji reactions)
127
- // ============================================================================
128
-
129
- /**
130
- * Stores the most recent inbound message ID per channel.
131
- * Keyed by channelId (e.g. "C0AKE2G15HV"), value is the message ts.
132
- * Written by message_received, consumed by agent_end.
133
- * Entries are short-lived and cleaned up after agent_end.
134
- */
135
- const lastInboundMessageByChannel = new Map<string, { messageId: string; provider: string; timestamp: number }>();
136
-
137
- /** Channels that support emoji reactions. */
138
- const REACTION_SUPPORTED_PROVIDERS = new Set(["slack", "discord"]);
139
-
140
- // ============================================================================
141
- // Logger (Issue: api.logger integration)
142
- // ============================================================================
143
-
144
- /** Module-level logger — defaults to console, replaced by api.logger in registerHooks. */
145
- let logger: { info: (...args: any[]) => void; warn: (...args: any[]) => void } = {
146
- info: (...args: any[]) => console.log(...args),
147
- warn: (...args: any[]) => console.warn(...args),
148
- };
149
-
150
- // ============================================================================
151
- // Scope Validation (Issue #90)
152
- // ============================================================================
153
-
154
- const VALID_SCOPES = ["private", "team", "public"];
155
-
156
- /**
157
- * Check if a scope string is valid for palaia write.
158
- * Valid: "private", "team", "public", or any "shared:*" prefix.
159
- */
160
- export function isValidScope(s: string): boolean {
161
- return VALID_SCOPES.includes(s) || s.startsWith("shared:");
162
- }
163
-
164
- /**
165
- * Sanitize a scope value — returns the value if valid, otherwise fallback.
166
- * Enforces: LLM may suggest private or team, but NEVER public (unless explicitly configured).
167
- */
168
- export function sanitizeScope(rawScope: string | null | undefined, fallback = "team", allowPublic = false): string {
169
- if (!rawScope || !isValidScope(rawScope)) return fallback;
170
- // Block public scope unless explicitly allowed (config-level override)
171
- if (rawScope === "public" && !allowPublic) return fallback;
172
- return rawScope;
173
- }
174
-
175
- // ============================================================================
176
- // Session Key Helpers
177
- // ============================================================================
178
-
179
- /**
180
- * Extract channel target from a session key.
181
- * e.g. "agent:main:slack:channel:c0ake2g15hv" → "channel:C0AKE2G15HV"
182
- */
183
- export function extractTargetFromSessionKey(sessionKey: string): string | undefined {
184
- const parts = sessionKey.split(":");
185
- for (let i = 0; i < parts.length - 1; i++) {
186
- if (parts[i] === "channel" || parts[i] === "dm" || parts[i] === "group") {
187
- return `${parts[i]}:${parts[i + 1].toUpperCase()}`;
188
- }
189
- }
190
- return undefined;
191
- }
192
-
193
- /**
194
- * Extract channel provider from a session key.
195
- * e.g. "agent:main:slack:channel:c0ake2g15hv" → "slack"
196
- */
197
- export function extractChannelFromSessionKey(sessionKey: string): string | undefined {
198
- const parts = sessionKey.split(":");
199
- if (parts.length >= 5 && parts[0] === "agent") {
200
- return parts[2];
201
- }
202
- return undefined;
203
- }
204
-
205
- // ============================================================================
206
- // Emoji Reaction Helpers (Issue #87: Reactions)
207
- // ============================================================================
208
-
209
- /**
210
- * Extract the Slack channel ID from a session key.
211
- * e.g. "agent:main:slack:channel:c0ake2g15hv" → "C0AKE2G15HV"
212
- */
213
- export function extractSlackChannelIdFromSessionKey(sessionKey: string): string | undefined {
214
- const parts = sessionKey.split(":");
215
- for (let i = 0; i < parts.length - 1; i++) {
216
- if (parts[i] === "channel" || parts[i] === "dm") {
217
- return parts[i + 1].toUpperCase();
218
- }
219
- }
220
- return undefined;
221
- }
222
-
223
- /**
224
- * Extract the real Slack channel ID from event metadata or ctx.
225
- * OpenClaw stores the channel in "channel:C0AKE2G15HV" format in:
226
- * - event.metadata.to
227
- * - event.metadata.originatingTo
228
- * - ctx.conversationId
229
- *
230
- * ctx.channelId is the PROVIDER NAME ("slack"), not the channel ID.
231
- * ctx.sessionKey is null during message_received.
232
- */
233
- export function extractChannelIdFromEvent(event: any, ctx: any): string | undefined {
234
- const rawTo = event?.metadata?.to
235
- ?? event?.metadata?.originatingTo
236
- ?? ctx?.conversationId
237
- ?? "";
238
- const match = String(rawTo).match(/^(?:channel|dm|group):([A-Z0-9]+)$/i);
239
- return match ? match[1].toUpperCase() : undefined;
240
- }
241
-
242
- /**
243
- * Resolve the session key for the current turn from available ctx.
244
- * Tries ctx.sessionKey first, then falls back to sessionId.
245
- */
246
- function resolveSessionKeyFromCtx(ctx: any): string | undefined {
247
- const sk = ctx?.sessionKey?.trim?.();
248
- if (sk) return sk;
249
- const sid = ctx?.sessionId?.trim?.();
250
- return sid || undefined;
251
- }
252
-
253
- /**
254
- * Get or create turn state for a session.
255
- */
256
- export function getOrCreateTurnState(sessionKey: string): TurnState {
257
- let state = turnStateBySession.get(sessionKey);
258
- if (!state) {
259
- state = createDefaultTurnState();
260
- turnStateBySession.set(sessionKey, state);
261
- }
262
- return state;
263
- }
264
-
265
- /**
266
- * Delete turn state for a session (cleanup after agent_end).
267
- */
268
- export function deleteTurnState(sessionKey: string): void {
269
- turnStateBySession.delete(sessionKey);
270
- }
271
-
272
- /**
273
- * Send an emoji reaction to a message via the Slack Web API (or Discord API).
274
- * Only fires for supported channels (slack, discord). Silently no-ops for others.
275
- *
276
- * For Slack, calls reactions.add via the @slack/web-api client.
277
- * Requires SLACK_BOT_TOKEN in the environment.
278
- */
279
- export async function sendReaction(
280
- channelId: string,
281
- messageId: string,
282
- emoji: string,
283
- provider: string,
284
- ): Promise<void> {
285
- if (!channelId || !messageId || !emoji) return;
286
- if (!REACTION_SUPPORTED_PROVIDERS.has(provider)) return;
287
-
288
- if (provider === "slack") {
289
- await sendSlackReaction(channelId, messageId, emoji);
290
- }
291
- // Discord: future implementation
292
- }
293
-
294
- /** Cached Slack bot token resolved from env or OpenClaw config. */
295
- let _cachedSlackToken: string | null | undefined;
296
-
297
- /**
298
- * Resolve the Slack bot token from environment or OpenClaw config file.
299
- * Caches the result for the lifetime of the process.
300
- *
301
- * Resolution order:
302
- * 1. SLACK_BOT_TOKEN env var (explicit override)
303
- * 2. OpenClaw config: channels.slack.botToken (standard single-account)
304
- * 3. OpenClaw config: channels.slack.accounts.default.botToken (multi-account)
305
- *
306
- * Config path: OPENCLAW_CONFIG env var → ~/.openclaw/openclaw.json
307
- */
308
- async function resolveSlackBotToken(): Promise<string | null> {
309
- if (_cachedSlackToken !== undefined) return _cachedSlackToken;
310
-
311
- // 1) Environment variable
312
- const envToken = process.env.SLACK_BOT_TOKEN?.trim();
313
- if (envToken) {
314
- _cachedSlackToken = envToken;
315
- return envToken;
316
- }
317
-
318
- // 2) OpenClaw config file — OPENCLAW_CONFIG takes precedence over default path
319
- const configPaths = [
320
- process.env.OPENCLAW_CONFIG || "",
321
- path.join(os.homedir(), ".openclaw", "openclaw.json"),
322
- ].filter(Boolean);
323
-
324
- for (const configPath of configPaths) {
325
- try {
326
- const raw = await fs.readFile(configPath, "utf-8");
327
- const config = JSON.parse(raw);
328
-
329
- // 2a) Standard path: channels.slack.botToken
330
- const directToken = config?.channels?.slack?.botToken?.trim();
331
- if (directToken) {
332
- _cachedSlackToken = directToken;
333
- return directToken;
334
- }
335
-
336
- // 2b) Multi-account path: channels.slack.accounts.default.botToken
337
- const accountToken = config?.channels?.slack?.accounts?.default?.botToken?.trim();
338
- if (accountToken) {
339
- _cachedSlackToken = accountToken;
340
- return accountToken;
341
- }
342
- } catch {
343
- // Try next path
344
- }
345
- }
346
-
347
- _cachedSlackToken = null;
348
- return null;
349
- }
350
-
351
- /** Reset cached token (for testing). */
352
- export function resetSlackTokenCache(): void {
353
- _cachedSlackToken = undefined;
354
- }
355
-
356
- async function sendSlackReaction(
357
- channelId: string,
358
- messageId: string,
359
- emoji: string,
360
- ): Promise<void> {
361
- const token = await resolveSlackBotToken();
362
- if (!token) {
363
- logger.warn("[palaia] Cannot send Slack reaction: no bot token found");
364
- return;
365
- }
366
-
367
- const normalizedEmoji = emoji.replace(/^:/, "").replace(/:$/, "");
368
-
369
- const controller = new AbortController();
370
- const timeout = setTimeout(() => controller.abort(), 5000);
371
-
372
- try {
373
- const response = await fetch("https://slack.com/api/reactions.add", {
374
- method: "POST",
375
- headers: {
376
- "Content-Type": "application/json; charset=utf-8",
377
- Authorization: `Bearer ${token}`,
378
- },
379
- body: JSON.stringify({
380
- channel: channelId,
381
- timestamp: messageId,
382
- name: normalizedEmoji,
383
- }),
384
- signal: controller.signal,
385
- });
386
- const data = await response.json() as { ok: boolean; error?: string };
387
- if (!data.ok && data.error !== "already_reacted") {
388
- logger.warn(`[palaia] Slack reaction failed: ${data.error} (${normalizedEmoji} on ${channelId})`);
389
- }
390
- } catch (err) {
391
- if ((err as Error).name !== "AbortError") {
392
- logger.warn(`[palaia] Slack reaction error (${normalizedEmoji}): ${err}`);
393
- }
394
- } finally {
395
- clearTimeout(timeout);
396
- }
397
- }
398
-
399
- // ============================================================================
400
- // Footnote Helpers (Issue #87)
401
- // ============================================================================
402
-
403
- /**
404
- * Format an ISO date string as a short date: "Mar 16", "Feb 10".
405
- */
406
- export function formatShortDate(isoDate: string): string {
407
- const months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
408
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
409
- try {
410
- const d = new Date(isoDate);
411
- if (isNaN(d.getTime())) return "";
412
- return `${months[d.getMonth()]} ${d.getDate()}`;
413
- } catch {
414
- return "";
415
- }
416
- }
417
-
418
- /**
419
- * Check if an injected entry is relevant to the response text.
420
- * Simple keyword overlap: split title into words, check if >=2 words appear
421
- * in the response (case-insensitive). Words shorter than 3 chars are skipped.
422
- */
423
- export function isEntryRelevant(title: string, responseText: string): boolean {
424
- const responseLower = responseText.toLowerCase();
425
- const titleWords = title
426
- .toLowerCase()
427
- .split(/[\s\-_/]+/)
428
- .filter((w) => w.length >= 3);
429
- if (titleWords.length === 0) return false;
430
- const threshold = titleWords.length === 1 ? 1 : 2;
431
- let matches = 0;
432
- for (const word of titleWords) {
433
- if (responseLower.includes(word)) {
434
- matches++;
435
- if (matches >= threshold) return true;
436
- }
437
- }
438
- return false;
439
- }
440
-
441
- /**
442
- * Build a memory source footnote string from injected entries and response text.
443
- * Returns null if no relevant entries found.
444
- */
445
- export function buildFootnote(
446
- entries: Array<{ title: string; date: string }>,
447
- _responseText: string,
448
- maxEntries = 3,
449
- ): string | null {
450
- if (entries.length === 0) return null;
451
-
452
- const display = entries.slice(0, maxEntries);
453
- const parts = display.map((e) => {
454
- const dateStr = formatShortDate(e.date);
455
- return dateStr ? `"${e.title}" (${dateStr})` : `"${e.title}"`;
456
- });
457
- return `\n\n📎 Palaia: ${parts.join(", ")}`;
458
- }
459
-
460
- // ============================================================================
461
- // Satisfaction / Transparency Nudge Helpers (Issue #87)
462
- // ============================================================================
463
-
464
- const SATISFACTION_THRESHOLD = 10;
465
- const TRANSPARENCY_RECALL_THRESHOLD = 50;
466
- const TRANSPARENCY_DAYS_THRESHOLD = 7;
467
-
468
- const SATISFACTION_NUDGE_TEXT =
469
- "Your user has been using Palaia for a while now. " +
470
- "Ask them casually if they're happy with the memory system. " +
471
- "If there are issues, suggest `palaia doctor`.";
472
-
473
- const TRANSPARENCY_NUDGE_TEXT =
474
- "Your user has been seeing memory Footnotes and capture confirmations for several days. " +
475
- "Ask them once: 'Would you like to keep seeing memory source references and capture " +
476
- "confirmations, or should I hide them? You can change this anytime.' " +
477
- "Based on their answer: `palaia config set showMemorySources true/false` and " +
478
- "`palaia config set showCaptureConfirm true/false`";
479
-
480
- /**
481
- * Check which nudges (if any) should fire based on plugin state.
482
- * Returns nudge texts to prepend, and updates state accordingly.
483
- */
484
- export function checkNudges(state: PluginState): { nudges: string[]; updated: boolean } {
485
- const nudges: string[] = [];
486
- let updated = false;
487
-
488
- if (!state.satisfactionNudged && state.successfulRecalls >= SATISFACTION_THRESHOLD) {
489
- nudges.push(SATISFACTION_NUDGE_TEXT);
490
- state.satisfactionNudged = true;
491
- updated = true;
492
- }
493
-
494
- if (!state.transparencyNudged && state.firstRecallTimestamp) {
495
- const daysSinceFirst = (Date.now() - new Date(state.firstRecallTimestamp).getTime()) / (1000 * 60 * 60 * 24);
496
- if (state.successfulRecalls >= TRANSPARENCY_RECALL_THRESHOLD || daysSinceFirst >= TRANSPARENCY_DAYS_THRESHOLD) {
497
- nudges.push(TRANSPARENCY_NUDGE_TEXT);
498
- state.transparencyNudged = true;
499
- updated = true;
500
- }
501
- }
502
-
503
- return { nudges, updated };
504
- }
505
-
506
- // ============================================================================
507
- // Capture Hints (Issue #81)
508
- // ============================================================================
509
-
510
- /** Parsed palaia-hint tag attributes */
511
- export interface PalaiaHint {
512
- project?: string;
513
- scope?: string;
514
- type?: string;
515
- tags?: string[];
516
- }
517
-
518
- /**
519
- * Parse `<palaia-hint ... />` tags from text.
520
- * Returns extracted hints and cleaned text with hints removed.
521
- */
522
- export function parsePalaiaHints(text: string): { hints: PalaiaHint[]; cleanedText: string } {
523
- const hints: PalaiaHint[] = [];
524
- const regex = /<palaia-hint\s+([^/]*)\s*\/>/gi;
525
-
526
- let match: RegExpExecArray | null;
527
- while ((match = regex.exec(text)) !== null) {
528
- const attrs = match[1];
529
- const hint: PalaiaHint = {};
530
-
531
- const projectMatch = attrs.match(/project\s*=\s*"([^"]*)"/i);
532
- if (projectMatch) hint.project = projectMatch[1];
533
-
534
- const scopeMatch = attrs.match(/scope\s*=\s*"([^"]*)"/i);
535
- if (scopeMatch) hint.scope = scopeMatch[1];
536
-
537
- const typeMatch = attrs.match(/type\s*=\s*"([^"]*)"/i);
538
- if (typeMatch) hint.type = typeMatch[1];
539
-
540
- const tagsMatch = attrs.match(/tags\s*=\s*"([^"]*)"/i);
541
- if (tagsMatch) hint.tags = tagsMatch[1].split(",").map((t) => t.trim()).filter(Boolean);
542
-
543
- hints.push(hint);
544
- }
545
-
546
- const cleanedText = text.replace(/<palaia-hint\s+[^/]*\s*\/>/gi, "").trim();
547
- return { hints, cleanedText };
548
- }
549
-
550
- // ============================================================================
551
- // Project Cache (Issue #81)
552
- // ============================================================================
553
-
554
- interface CachedProject {
555
- name: string;
556
- description?: string;
557
- }
558
-
559
- let _cachedProjects: CachedProject[] | null = null;
560
- let _projectCacheTime = 0;
561
- const PROJECT_CACHE_TTL_MS = 60_000;
562
-
563
- /** Reset project cache (for testing). */
564
- export function resetProjectCache(): void {
565
- _cachedProjects = null;
566
- _projectCacheTime = 0;
567
- }
568
-
569
- /**
570
- * Load known projects from CLI, with caching.
571
- */
572
- async function loadProjects(opts: import("./runner.js").RunnerOpts): Promise<CachedProject[]> {
573
- const now = Date.now();
574
- if (_cachedProjects && (now - _projectCacheTime) < PROJECT_CACHE_TTL_MS) {
575
- return _cachedProjects;
576
- }
577
-
578
- try {
579
- const result = await runJson<{ projects: Array<{ name: string; description?: string }> }>(
580
- ["project", "list"],
581
- opts,
582
- );
583
- _cachedProjects = (result.projects || []).map((p) => ({
584
- name: p.name,
585
- description: p.description,
586
- }));
587
- _projectCacheTime = now;
588
- return _cachedProjects;
589
- } catch {
590
- return _cachedProjects || [];
591
- }
592
- }
593
-
594
- // ============================================================================
595
- // Types
596
- // ============================================================================
597
-
598
- /** Shape returned by `palaia query --json` or `palaia list --json` */
599
- interface QueryResult {
600
- results: Array<{
601
- id: string;
602
- body?: string;
603
- content?: string;
604
- score: number;
605
- tier: string;
606
- scope: string;
607
- title?: string;
608
- type?: string;
609
- tags?: string[];
610
- }>;
611
- }
612
-
613
- /** Message shape from OpenClaw event.messages */
614
- interface Message {
615
- role?: string;
616
- content?: string | Array<{ type?: string; text?: string }>;
617
- }
618
-
619
- // ============================================================================
620
- // LLM-based Extraction (Issue #64 upgrade)
621
- // ============================================================================
622
-
623
- /** Result from LLM-based knowledge extraction */
624
- export interface ExtractionResult {
625
- content: string;
626
- type: "memory" | "process" | "task";
627
- tags: string[];
628
- significance: number;
629
- project?: string | null;
630
- scope?: string | null;
631
- }
632
-
633
- type RunEmbeddedPiAgentFn = (params: Record<string, unknown>) => Promise<unknown>;
634
-
635
- let _embeddedPiAgentLoader: Promise<RunEmbeddedPiAgentFn> | null = null;
636
- /** Whether the LLM import failure has already been logged (to avoid spam). */
637
- let _llmImportFailureLogged = false;
638
-
639
- /**
640
- * Resolve the path to OpenClaw's extensionAPI module.
641
- * Uses multiple strategies for portability across installation layouts.
642
- */
643
- function resolveExtensionAPIPath(): string | null {
644
- // Strategy 1: require.resolve with openclaw package exports
645
- try {
646
- return require.resolve("openclaw/dist/extensionAPI.js");
647
- } catch {
648
- // Not resolvable via standard module resolution
649
- }
650
-
651
- // Strategy 2: Resolve openclaw main entry, then navigate to dist/extensionAPI.js
652
- try {
653
- const openclawMain = require.resolve("openclaw");
654
- const candidate = path.join(path.dirname(openclawMain), "extensionAPI.js");
655
- if (require("node:fs").existsSync(candidate)) return candidate;
656
- } catch {
657
- // openclaw not resolvable at all
658
- }
659
-
660
- // Strategy 3: Sibling in global node_modules (plugin installed alongside openclaw)
661
- try {
662
- const thisFile = typeof __dirname !== "undefined" ? __dirname : path.dirname(new URL(import.meta.url).pathname);
663
- // Walk up from plugin src/dist to node_modules, then into openclaw
664
- let dir = thisFile;
665
- for (let i = 0; i < 6; i++) {
666
- const candidate = path.join(dir, "openclaw", "dist", "extensionAPI.js");
667
- if (require("node:fs").existsSync(candidate)) return candidate;
668
- const parent = path.dirname(dir);
669
- if (parent === dir) break;
670
- dir = parent;
671
- }
672
- } catch {
673
- // Traversal failed
674
- }
675
-
676
- // Strategy 4: Well-known global install paths
677
- const globalCandidates = [
678
- path.join(os.homedir(), ".openclaw", "node_modules", "openclaw", "dist", "extensionAPI.js"),
679
- "/home/linuxbrew/.linuxbrew/lib/node_modules/openclaw/dist/extensionAPI.js",
680
- "/usr/local/lib/node_modules/openclaw/dist/extensionAPI.js",
681
- "/usr/lib/node_modules/openclaw/dist/extensionAPI.js",
682
- ];
683
- for (const candidate of globalCandidates) {
684
- try {
685
- if (require("node:fs").existsSync(candidate)) return candidate;
686
- } catch {
687
- // skip
688
- }
689
- }
690
-
691
- return null;
692
- }
693
-
694
- async function loadRunEmbeddedPiAgent(): Promise<RunEmbeddedPiAgentFn> {
695
- const resolved = resolveExtensionAPIPath();
696
- if (!resolved) {
697
- throw new Error("Could not locate openclaw/dist/extensionAPI.js — tried module resolution, sibling lookup, and global paths");
698
- }
699
-
700
- const mod = (await import(resolved)) as { runEmbeddedPiAgent?: unknown };
701
- const fn = (mod as any).runEmbeddedPiAgent;
702
- if (typeof fn !== "function") {
703
- throw new Error(`runEmbeddedPiAgent not exported from ${resolved}`);
704
- }
705
- return fn as RunEmbeddedPiAgentFn;
706
- }
707
-
708
- export function getEmbeddedPiAgent(): Promise<RunEmbeddedPiAgentFn> {
709
- if (!_embeddedPiAgentLoader) {
710
- _embeddedPiAgentLoader = loadRunEmbeddedPiAgent();
711
- }
712
- return _embeddedPiAgentLoader;
713
- }
714
-
715
- /** Reset cached loader (for testing). */
716
- export function resetEmbeddedPiAgentLoader(): void {
717
- _embeddedPiAgentLoader = null;
718
- _llmImportFailureLogged = false;
719
- }
720
-
721
- /** Override the cached loader with a custom promise (for testing). */
722
- export function setEmbeddedPiAgentLoader(loader: Promise<RunEmbeddedPiAgentFn> | null): void {
723
- _embeddedPiAgentLoader = loader;
724
- }
725
-
726
- const EXTRACTION_SYSTEM_PROMPT_BASE = `You are a knowledge extraction engine. Analyze the following conversation exchange and identify information worth remembering long-term.
727
-
728
- For each piece of knowledge, return a JSON array of objects:
729
- - "content": concise summary of the knowledge (1-3 sentences)
730
- - "type": "memory" (facts, decisions, preferences), "process" (workflows, procedures, steps), or "task" (action items, todos, commitments)
731
- - "tags": array of significance tags from: ["decision", "lesson", "surprise", "commitment", "correction", "preference", "fact"]
732
- - "significance": 0.0-1.0 how important this is for long-term recall
733
- - "project": which project this belongs to (from known projects list, or null if unclear)
734
- - "scope": "private" (personal preference, agent-specific), "team" (shared knowledge), or "public" (documentation)
735
-
736
- Only extract genuinely significant knowledge. Skip small talk, acknowledgments, routine exchanges.
737
- Do NOT extract if similar knowledge was likely captured in a recent exchange. Prefer quality over quantity. Skip routine status updates and acknowledgments.
738
- Return empty array [] if nothing is worth remembering.
739
- Return ONLY valid JSON, no markdown fences.`;
740
-
741
- function buildExtractionPrompt(projects: CachedProject[]): string {
742
- if (projects.length === 0) return EXTRACTION_SYSTEM_PROMPT_BASE;
743
- const projectList = projects
744
- .map((p) => `${p.name}${p.description ? ` (${p.description})` : ""}`)
745
- .join(", ");
746
- return `${EXTRACTION_SYSTEM_PROMPT_BASE}\n\nKnown projects: ${projectList}`;
747
- }
748
-
749
- /** Whether the captureModel fallback warning has already been logged (to avoid spam). */
750
- let _captureModelFallbackWarned = false;
751
-
752
- /** Whether the captureModel→primary model fallback warning has been logged (max 1x per gateway lifetime). */
753
- let _captureModelFailoverWarned = false;
754
-
755
- /** Reset captureModel fallback warning flag (for testing). */
756
- export function resetCaptureModelFallbackWarning(): void {
757
- _captureModelFallbackWarned = false;
758
- _captureModelFailoverWarned = false;
759
- }
760
-
761
- /**
762
- * Resolve the model to use for LLM-based capture extraction.
763
- *
764
- * Strategy (no static model mapping — user config is the source of truth):
765
- * 1. If captureModel is set explicitly (e.g. "anthropic/claude-haiku-4-5"): use it directly.
766
- * 2. If captureModel is unset: use the primary model from user config.
767
- * Log a one-time warning recommending to set a cheaper captureModel.
768
- * 3. Never fall back to static model IDs — model IDs change and not every user has Anthropic.
769
- */
770
- export function resolveCaptureModel(
771
- config: any,
772
- captureModel?: string,
773
- ): { provider: string; model: string } | undefined {
774
- // Case 1: explicit model ID provided (not "cheap")
775
- if (captureModel && captureModel !== "cheap") {
776
- const parts = captureModel.split("/");
777
- if (parts.length >= 2) {
778
- return { provider: parts[0], model: parts.slice(1).join("/") };
779
- }
780
- // No slash — treat as model name with provider from primary config
781
- const defaultsModel = config?.agents?.defaults?.model;
782
- const primary = typeof defaultsModel === "string"
783
- ? defaultsModel.trim()
784
- : (defaultsModel?.primary?.trim() ?? "");
785
- const defaultProvider = primary.split("/")[0];
786
- if (defaultProvider) {
787
- return { provider: defaultProvider, model: captureModel };
788
- }
789
- }
790
-
791
- // Case 2: "cheap" or unset — use primary model from user config
792
- const defaultsModel = config?.agents?.defaults?.model;
793
-
794
- const primary = typeof defaultsModel === "string"
795
- ? defaultsModel.trim()
796
- : (typeof defaultsModel === "object" && defaultsModel !== null
797
- ? String(defaultsModel.primary ?? "").trim()
798
- : "");
799
-
800
- if (primary) {
801
- const parts = primary.split("/");
802
- if (parts.length >= 2) {
803
- if (!_captureModelFallbackWarned) {
804
- _captureModelFallbackWarned = true;
805
- logger.warn(`[palaia] No captureModel configured — using primary model. Set captureModel in plugin config for cost savings.`);
806
- }
807
- return { provider: parts[0], model: parts.slice(1).join("/") };
808
- }
809
- }
810
-
811
- return undefined;
812
- }
813
-
814
- function stripCodeFences(s: string): string {
815
- const trimmed = s.trim();
816
- const m = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
817
- if (m) return (m[1] ?? "").trim();
818
- return trimmed;
819
- }
820
-
821
- function collectText(payloads: Array<{ text?: string; isError?: boolean }> | undefined): string {
822
- return (payloads ?? [])
823
- .filter((p) => !p.isError && typeof p.text === "string")
824
- .map((p) => p.text ?? "")
825
- .join("\n")
826
- .trim();
827
- }
828
-
829
- /**
830
- * Trim message texts to a recent window for LLM extraction.
831
- * Only extract from recent exchanges — full history causes LLM timeouts
832
- * and dilutes extraction quality.
833
- *
834
- * Strategy: keep last N user+assistant pairs (skip toolResult roles),
835
- * then hard-cap at maxChars from the end (newest messages kept).
836
- */
837
- export function trimToRecentExchanges(
838
- texts: Array<{ role: string; text: string; provenance?: string }>,
839
- maxPairs = 5,
840
- maxChars = 10_000,
841
- ): Array<{ role: string; text: string; provenance?: string }> {
842
- // Filter to only user + assistant messages (skip tool, toolResult, system, etc.)
843
- const exchanges = texts.filter((t) => t.role === "user" || t.role === "assistant");
844
-
845
- // Keep the last N pairs (a pair = one user + one assistant message)
846
- // Only count external_user messages as real user turns.
847
- // System-injected user messages (inter_session, internal_system) don't count as conversation turns.
848
- // Walk backwards, count pairs
849
- let pairCount = 0;
850
- let lastRole = "";
851
- let cutIndex = 0; // default: keep everything
852
- for (let i = exchanges.length - 1; i >= 0; i--) {
853
- const isRealUser = exchanges[i].role === "user" && (
854
- exchanges[i].provenance === "external_user" ||
855
- !exchanges[i].provenance // backward compat: no provenance = treat as real user
856
- );
857
- // Count a new pair when we see a real user message after having seen an assistant
858
- if (isRealUser && lastRole === "assistant") {
859
- pairCount++;
860
- if (pairCount > maxPairs) {
861
- cutIndex = i + 1; // keep from next message onwards
862
- break;
863
- }
864
- }
865
- if (exchanges[i].role !== lastRole) {
866
- lastRole = exchanges[i].role;
867
- }
868
- }
869
- let trimmed = exchanges.slice(cutIndex);
870
-
871
- // Hard cap: max chars from the end (keep newest)
872
- let totalChars = trimmed.reduce((sum, t) => sum + t.text.length + t.role.length + 5, 0);
873
- while (totalChars > maxChars && trimmed.length > 1) {
874
- const removed = trimmed.shift()!;
875
- totalChars -= removed.text.length + removed.role.length + 5;
876
- }
877
-
878
- return trimmed;
879
- }
880
-
881
- export async function extractWithLLM(
882
- messages: unknown[],
883
- config: any,
884
- pluginConfig?: { captureModel?: string },
885
- knownProjects?: CachedProject[],
886
- ): Promise<ExtractionResult[]> {
887
- const runEmbeddedPiAgent = await getEmbeddedPiAgent();
888
-
889
- const resolved = resolveCaptureModel(config, pluginConfig?.captureModel);
890
- if (!resolved) {
891
- throw new Error("No model available for LLM extraction");
892
- }
893
-
894
- const allTexts = extractMessageTexts(messages);
895
- // Strip Palaia-injected recall context from user messages to prevent feedback loop
896
- const cleanedTexts = allTexts.map(t =>
897
- t.role === "user"
898
- ? { ...t, text: stripPalaiaInjectedContext(t.text) }
899
- : t
900
- );
901
- // Only extract from recent exchanges — full history causes LLM timeouts
902
- // and dilutes extraction quality
903
- const recentTexts = trimToRecentExchanges(cleanedTexts);
904
- const exchangeText = recentTexts
905
- .map((t) => `[${t.role}]: ${t.text}`)
906
- .join("\n");
907
-
908
- if (!exchangeText.trim()) {
909
- return [];
910
- }
911
-
912
- const systemPrompt = buildExtractionPrompt(knownProjects || []);
913
- const prompt = `${systemPrompt}\n\n--- CONVERSATION ---\n${exchangeText}\n--- END ---`;
914
-
915
- let tmpDir: string | null = null;
916
- try {
917
- tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "palaia-extract-"));
918
- const sessionId = `palaia-extract-${Date.now()}`;
919
- const sessionFile = path.join(tmpDir, "session.json");
920
-
921
- const result = await runEmbeddedPiAgent({
922
- sessionId,
923
- sessionFile,
924
- workspaceDir: config?.agents?.defaults?.workspace ?? process.cwd(),
925
- config,
926
- prompt,
927
- timeoutMs: 15_000,
928
- runId: `palaia-extract-${Date.now()}`,
929
- provider: resolved.provider,
930
- model: resolved.model,
931
- disableTools: true,
932
- streamParams: { maxTokens: 2048 },
933
- });
934
-
935
- const text = collectText((result as any).payloads);
936
- if (!text) return [];
937
-
938
- const raw = stripCodeFences(text);
939
- let parsed: unknown;
940
- try {
941
- parsed = JSON.parse(raw);
942
- } catch {
943
- throw new Error(`LLM returned invalid JSON: ${raw.slice(0, 200)}`);
944
- }
945
-
946
- if (!Array.isArray(parsed)) {
947
- throw new Error(`LLM returned non-array: ${typeof parsed}`);
948
- }
949
-
950
- const results: ExtractionResult[] = [];
951
- for (const item of parsed) {
952
- if (!item || typeof item !== "object") continue;
953
- const content = typeof item.content === "string" ? item.content.trim() : "";
954
- if (!content) continue;
955
-
956
- const validTypes = new Set(["memory", "process", "task"]);
957
- const type = validTypes.has(item.type) ? item.type : "memory";
958
-
959
- const validTags = new Set([
960
- "decision", "lesson", "surprise", "commitment",
961
- "correction", "preference", "fact",
962
- ]);
963
- const tags = Array.isArray(item.tags)
964
- ? item.tags.filter((t: unknown) => typeof t === "string" && validTags.has(t))
965
- : [];
966
-
967
- const significance = typeof item.significance === "number"
968
- ? Math.max(0, Math.min(1, item.significance))
969
- : 0.5;
970
-
971
- const project = typeof item.project === "string" && item.project.trim()
972
- ? item.project.trim()
973
- : null;
974
-
975
- const scope = typeof item.scope === "string" && isValidScope(item.scope)
976
- ? item.scope
977
- : null;
978
-
979
- results.push({ content, type, tags, significance, project, scope });
980
- }
981
-
982
- return results;
983
- } finally {
984
- if (tmpDir) {
985
- try { await fs.rm(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
986
- }
987
- }
988
- }
989
-
990
- // ============================================================================
991
- // Auto-Capture: Rule-based extraction (Issue #64)
992
- // ============================================================================
993
-
994
- const TRIVIAL_RESPONSES = new Set([
995
- "ok", "ja", "nein", "yes", "no", "sure", "klar", "danke", "thanks",
996
- "thx", "k", "👍", "👎", "ack", "nope", "yep", "yup", "alright",
997
- "fine", "gut", "passt", "okay", "hmm", "hm", "ah", "aha",
998
- ]);
999
-
1000
- const SIGNIFICANCE_RULES: Array<{
1001
- pattern: RegExp;
1002
- tag: string;
1003
- type: "memory" | "process" | "task";
1004
- }> = [
1005
- { pattern: /(?:we decided|entschieden|decision:|beschlossen|let'?s go with|wir nehmen|agreed on)/i, tag: "decision", type: "memory" },
1006
- { pattern: /(?:will use|werden nutzen|going forward|ab jetzt|from now on)/i, tag: "decision", type: "memory" },
1007
- { pattern: /(?:learned|gelernt|lesson:|erkenntnis|takeaway|insight|turns out|it seems)/i, tag: "lesson", type: "memory" },
1008
- { pattern: /(?:mistake was|fehler war|should have|hätten sollen|next time)/i, tag: "lesson", type: "memory" },
1009
- { pattern: /(?:surprising|überraschend|unexpected|unerwartet|didn'?t expect|nicht erwartet|plot twist)/i, tag: "surprise", type: "memory" },
1010
- { pattern: /(?:i will|ich werde|todo:|action item|must do|muss noch|need to|commit to|verspreche)/i, tag: "commitment", type: "task" },
1011
- { pattern: /(?:deadline|frist|due date|bis zum|by end of|spätestens)/i, tag: "commitment", type: "task" },
1012
- { pattern: /(?:the process is|der prozess|steps?:|workflow:|how to|anleitung|recipe:|checklist)/i, tag: "process", type: "process" },
1013
- { pattern: /(?:first,?\s.*then|schritt \d|step \d|1\.\s.*2\.\s)/i, tag: "process", type: "process" },
1014
- ];
1015
-
1016
- const NOISE_PATTERNS: RegExp[] = [
1017
- /(?:PASSED|FAILED|ERROR)\s+\[?\d+%\]?/i,
1018
- /(?:test_\w+|tests?\/\w+\.(?:py|ts|js))\s*::/,
1019
- /(?:pytest|vitest|jest|mocha)\s+(?:run|--)/i,
1020
- /\d+ passed,?\s*\d* (?:failed|error|warning)/i,
1021
- /^(?:=+\s*(?:test session|ERRORS|FAILURES|short test summary))/m,
1022
- /(?:Traceback \(most recent call last\)|^\s+File ".*", line \d+)/m,
1023
- /^\s+at\s+\S+\s+\(.*:\d+:\d+\)/m,
1024
- /^(?:\/[\w/.-]+){3,}\s*$/m,
1025
- /(?:npm\s+(?:ERR|WARN)|pip\s+install|cargo\s+build)/i,
1026
- /^(?:warning|error)\[?\w*\]?:\s/m,
1027
- ];
1028
-
1029
- export function isNoiseContent(text: string): boolean {
1030
- let matchCount = 0;
1031
- for (const pattern of NOISE_PATTERNS) {
1032
- if (pattern.test(text)) {
1033
- matchCount++;
1034
- if (matchCount >= 2) return true;
1035
- }
1036
- }
1037
-
1038
- const lines = text.split("\n").filter((l) => l.trim().length > 0);
1039
- if (lines.length > 3) {
1040
- const pathLines = lines.filter((l) => /^\s*(?:\/[\w/.-]+){2,}/.test(l.trim()));
1041
- if (pathLines.length / lines.length > 0.5) return true;
1042
- }
1043
-
1044
- return false;
1045
- }
1046
-
1047
- export function shouldAttemptCapture(
1048
- exchangeText: string,
1049
- minChars = 100,
1050
- ): boolean {
1051
- const trimmed = exchangeText.trim();
1052
-
1053
- if (trimmed.length < minChars) return false;
1054
-
1055
- const words = trimmed.toLowerCase().split(/\s+/);
1056
- if (words.length <= 3 && words.every((w) => TRIVIAL_RESPONSES.has(w))) {
1057
- return false;
1058
- }
1059
-
1060
- if (trimmed.includes("<relevant-memories>")) return false;
1061
- if (trimmed.startsWith("<") && trimmed.includes("</")) return false;
1062
-
1063
- if (isNoiseContent(trimmed)) return false;
1064
-
1065
- return true;
1066
- }
1067
-
1068
- export function extractSignificance(
1069
- exchangeText: string,
1070
- ): { tags: string[]; type: "memory" | "process" | "task"; summary: string } | null {
1071
- const matched: Array<{ tag: string; type: "memory" | "process" | "task" }> = [];
1072
-
1073
- for (const rule of SIGNIFICANCE_RULES) {
1074
- if (rule.pattern.test(exchangeText)) {
1075
- matched.push({ tag: rule.tag, type: rule.type });
1076
- }
1077
- }
1078
-
1079
- if (matched.length === 0) return null;
1080
-
1081
- // Require at least 2 different significance tags for rule-based capture
1082
- const uniqueTags = new Set(matched.map((m) => m.tag));
1083
- if (uniqueTags.size < 2) return null;
1084
-
1085
- const typePriority: Record<string, number> = { task: 3, process: 2, memory: 1 };
1086
- const primaryType = matched.reduce(
1087
- (best, m) => (typePriority[m.type] > typePriority[best] ? m.type : best),
1088
- "memory" as "memory" | "process" | "task",
1089
- );
1090
-
1091
- const tags = [...new Set(matched.map((m) => m.tag))];
1092
-
1093
- const sentences = exchangeText
1094
- .split(/[.!?\n]+/)
1095
- .map((s) => s.trim())
1096
- .filter((s) => s.length > 20 && s.length < 500);
1097
-
1098
- const relevantSentences = sentences.filter((s) =>
1099
- SIGNIFICANCE_RULES.some((r) => r.pattern.test(s)),
1100
- );
1101
-
1102
- const summary = (relevantSentences.length > 0 ? relevantSentences : sentences)
1103
- .slice(0, 3)
1104
- .join(". ")
1105
- .slice(0, 500);
1106
-
1107
- if (!summary) return null;
1108
-
1109
- return { tags, type: primaryType, summary };
1110
- }
1111
-
1112
- /**
1113
- * Strip Palaia-injected recall context from message text.
1114
- * The recall block is prepended to user messages by before_prompt_build via prependContext.
1115
- * OpenClaw merges it into the user message, so agent_end sees it as user content.
1116
- * Without stripping, auto-capture re-captures the injected memories → feedback loop.
1117
- *
1118
- * The block has a stable structure:
1119
- * - Starts with "## Active Memory (Palaia)"
1120
- * - Contains [t/m], [t/pr], [t/tk] prefixed entries
1121
- * - Ends with "[palaia] auto-capture=on..." nudge line
1122
- */
1123
- export function stripPalaiaInjectedContext(text: string): string {
1124
- // Pattern: "## Active Memory (Palaia)" ... "[palaia] auto-capture=on..." + optional trailing newlines
1125
- // The nudge line is always present and marks the end of the injected block
1126
- const PALAIA_BLOCK_RE = /## Active Memory \(Palaia\)[\s\S]*?\[palaia\][^\n]*\n*/;
1127
- return text.replace(PALAIA_BLOCK_RE, '').trim();
1128
- }
1129
-
1130
- export function extractMessageTexts(messages: unknown[]): Array<{ role: string; text: string; provenance?: string }> {
1131
- const result: Array<{ role: string; text: string; provenance?: string }> = [];
1132
-
1133
- for (const msg of messages) {
1134
- if (!msg || typeof msg !== "object") continue;
1135
- const m = msg as Message;
1136
- const role = m.role;
1137
- if (!role || typeof role !== "string") continue;
1138
-
1139
- // Extract provenance kind (string or object with .kind)
1140
- const rawProvenance = (m as any).provenance?.kind ?? (m as any).provenance;
1141
- const provenance = typeof rawProvenance === "string" ? rawProvenance : undefined;
1142
-
1143
- if (typeof m.content === "string" && m.content.trim()) {
1144
- result.push({ role, text: m.content.trim(), provenance });
1145
- continue;
1146
- }
1147
-
1148
- if (Array.isArray(m.content)) {
1149
- for (const block of m.content) {
1150
- if (
1151
- block &&
1152
- typeof block === "object" &&
1153
- block.type === "text" &&
1154
- typeof block.text === "string" &&
1155
- block.text.trim()
1156
- ) {
1157
- result.push({ role, text: block.text.trim(), provenance });
1158
- }
1159
- }
1160
- }
1161
- }
1162
-
1163
- return result;
1164
- }
1165
-
1166
- export function getLastUserMessage(messages: unknown[]): string | null {
1167
- const texts = extractMessageTexts(messages);
1168
- // Prefer external_user provenance (real human input)
1169
- for (let i = texts.length - 1; i >= 0; i--) {
1170
- if (texts[i].role === "user" && texts[i].provenance === "external_user")
1171
- return texts[i].text;
1172
- }
1173
- // Fallback: any user message (backward compat for OpenClaw without provenance)
1174
- for (let i = texts.length - 1; i >= 0; i--) {
1175
- if (texts[i].role === "user") return texts[i].text;
1176
- }
1177
- return null;
1178
- }
1179
-
1180
- // ============================================================================
1181
- // Channel Envelope Stripping (v2.0.6)
1182
- // ============================================================================
1183
-
1184
- /**
1185
- * Strip OpenClaw channel envelope from message text.
1186
- * Matches the pattern: [TIMESTAMP] or [CHANNEL TIMESTAMP] prefix
1187
- * that OpenClaw adds to inbound messages from all channels.
1188
- * Based on OpenClaw's internal stripEnvelope() logic.
1189
- */
1190
- const ENVELOPE_PREFIX_RE = /^\[([^\]]+)\]\s*/;
1191
- const ENVELOPE_CHANNELS = [
1192
- "WebChat", "WhatsApp", "Telegram", "Signal", "Slack",
1193
- "Discord", "Google Chat", "iMessage", "Teams", "Matrix",
1194
- "Zalo", "Zalo Personal", "BlueBubbles",
1195
- ];
1196
-
1197
- function looksLikeEnvelopeHeader(header: string): boolean {
1198
- // ISO timestamp pattern
1199
- if (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}Z\b/.test(header)) return true;
1200
- // Space-separated timestamp
1201
- if (/\d{4}-\d{2}-\d{2} \d{2}:\d{2}\b/.test(header)) return true;
1202
- // Channel prefix
1203
- return ENVELOPE_CHANNELS.some(ch => header.startsWith(`${ch} `));
1204
- }
1205
-
1206
- export function stripChannelEnvelope(text: string): string {
1207
- const match = text.match(ENVELOPE_PREFIX_RE);
1208
- if (!match) return text;
1209
- if (!looksLikeEnvelopeHeader(match[1] ?? "")) return text;
1210
- return text.slice(match[0].length);
1211
- }
1212
-
1213
- /**
1214
- * Strip "System: [timestamp] Channel message in #channel from User: " prefix.
1215
- * OpenClaw wraps inbound messages with this pattern for all channel providers.
1216
- */
1217
- const SYSTEM_PREFIX_RE = /^System:\s*\[\d{4}-\d{2}-\d{2}[^\]]*\]\s*(?:Slack message|Telegram message|Discord message|WhatsApp message|Signal message|message).*?(?:from \w+:\s*)?/i;
1218
-
1219
- export function stripSystemPrefix(text: string): string {
1220
- const match = text.match(SYSTEM_PREFIX_RE);
1221
- if (match) return text.slice(match[0].length);
1222
- return text;
1223
- }
1224
-
1225
- // ============================================================================
1226
- // Recall Query Builder (v2.0.6: envelope-aware, provenance-based)
1227
- // ============================================================================
1228
-
1229
- /**
1230
- * Messages that are purely system content (no user text).
1231
- * Used to skip edited notifications, sync events, inter-session messages, etc.
1232
- */
1233
- function isSystemOnlyContent(text: string): boolean {
1234
- if (!text) return true;
1235
- if (text.startsWith("System:")) return true;
1236
- if (text.startsWith("[Queued")) return true;
1237
- if (text.startsWith("[Inter-session")) return true;
1238
- if (/^Slack message (edited|deleted)/.test(text)) return true;
1239
- if (/^\[auto\]/.test(text)) return true;
1240
- if (text.length < 3) return true;
1241
- return false;
1242
- }
1243
-
1244
- /**
1245
- * Build a recall query from message history.
1246
- *
1247
- * v2.0.6: Strips OpenClaw channel envelopes (System: [...] Slack message from ...:)
1248
- * and inter-session prefixes before building the query. This prevents envelope
1249
- * metadata from polluting semantic search and causing timeouts / false-high scores.
1250
- *
1251
- * - Filters out inter_session and internal_system provenance messages.
1252
- * - Falls back to any user message for backward compat (OpenClaw without provenance).
1253
- * - Strips channel envelopes and system prefixes from message text.
1254
- * - Skips system-only content (edited notifications, sync events).
1255
- * - Short messages (< 30 chars): prepends previous for context.
1256
- * - Hard-caps at 500 characters.
1257
- */
1258
- export function buildRecallQuery(messages: unknown[]): string {
1259
- const texts = extractMessageTexts(messages).map(t =>
1260
- t.role === "user"
1261
- ? { ...t, text: stripPalaiaInjectedContext(t.text) }
1262
- : t
1263
- );
1264
-
1265
- // Step 1: Filter out inter_session messages (sub-agent results, sessions_send)
1266
- const candidates = texts.filter(
1267
- t => t.role === "user" && t.provenance !== "inter_session" && t.provenance !== "internal_system"
1268
- );
1269
-
1270
- // Fallback: if no messages without provenance, use all user messages
1271
- const userMsgs = candidates.length > 0
1272
- ? candidates
1273
- : texts.filter(t => t.role === "user");
1274
-
1275
- if (userMsgs.length === 0) return "";
1276
-
1277
- // Step 2: Strip envelopes from the last user message(s)
1278
- let lastText = stripSystemPrefix(stripChannelEnvelope(userMsgs[userMsgs.length - 1].text.trim()));
1279
-
1280
- // Skip system-only messages (edited notifications, sync events, etc.)
1281
- // Walk backwards to find a message with actual content
1282
- let idx = userMsgs.length - 1;
1283
- while (idx >= 0 && (!lastText || isSystemOnlyContent(lastText))) {
1284
- idx--;
1285
- if (idx >= 0) {
1286
- lastText = stripSystemPrefix(stripChannelEnvelope(userMsgs[idx].text.trim()));
1287
- }
1288
- }
1289
-
1290
- if (!lastText) return "";
1291
-
1292
- // Step 3: Short messages → include previous for context
1293
- if (lastText.length < 30 && idx > 0) {
1294
- const prevText = stripSystemPrefix(stripChannelEnvelope(userMsgs[idx - 1].text.trim()));
1295
- if (prevText && !isSystemOnlyContent(prevText)) {
1296
- return `${prevText} ${lastText}`.slice(0, 500);
1297
- }
1298
- }
1299
-
1300
- return lastText.slice(0, 500);
1301
- }
1302
-
1303
- // ============================================================================
1304
- // Query-based Recall: Type-weighted reranking (Issue #65)
1305
- // ============================================================================
1306
-
1307
- interface RankedEntry {
1308
- id: string;
1309
- body: string;
1310
- title: string;
1311
- scope: string;
1312
- tier: string;
1313
- type: string;
1314
- score: number;
1315
- weightedScore: number;
1316
- }
1317
-
1318
- export function rerankByTypeWeight(
1319
- results: QueryResult["results"],
1320
- weights: RecallTypeWeights,
1321
- ): RankedEntry[] {
1322
- return results
1323
- .map((r) => {
1324
- const type = r.type || "memory";
1325
- const weight = weights[type] ?? 1.0;
1326
- return {
1327
- id: r.id,
1328
- body: r.content || r.body || "",
1329
- title: r.title || "(untitled)",
1330
- scope: r.scope,
1331
- tier: r.tier,
1332
- type,
1333
- score: r.score,
1334
- weightedScore: r.score * weight,
1335
- };
1336
- })
1337
- .sort((a, b) => b.weightedScore - a.weightedScore);
1338
- }
1339
-
1340
- // ============================================================================
1341
- // Hook helpers
1342
- // ============================================================================
1343
-
1344
- function buildRunnerOpts(config: PalaiaPluginConfig): RunnerOpts {
1345
- return {
1346
- binaryPath: config.binaryPath,
1347
- workspace: config.workspace,
1348
- timeoutMs: config.timeoutMs,
1349
- };
1350
- }
1351
-
1352
- // ============================================================================
1353
- // /palaia status command — Format helpers
1354
- // ============================================================================
1355
-
1356
- function formatStatusResponse(
1357
- state: PluginState,
1358
- stats: Record<string, unknown>,
1359
- config: PalaiaPluginConfig,
1360
- ): string {
1361
- const lines: string[] = ["Palaia Memory Status", ""];
1362
-
1363
- // Recall count
1364
- const sinceDate = state.firstRecallTimestamp
1365
- ? formatShortDate(state.firstRecallTimestamp)
1366
- : "n/a";
1367
- lines.push(`Recalls: ${state.successfulRecalls} successful (since ${sinceDate})`);
1368
-
1369
- // Store stats from palaia status --json
1370
- const totalEntries = stats.total_entries ?? stats.totalEntries ?? "?";
1371
- const hotEntries = stats.hot ?? stats.hotEntries ?? "?";
1372
- const warmEntries = stats.warm ?? stats.warmEntries ?? "?";
1373
- lines.push(`Store: ${totalEntries} entries (${hotEntries} hot, ${warmEntries} warm)`);
1374
-
1375
- // Recall indicator
1376
- lines.push(`Recall indicator: ${config.showMemorySources ? "ON" : "OFF"}`);
1377
-
1378
- // Config summary
1379
- lines.push(`Config: autoCapture=${config.autoCapture}, captureScope=${config.captureScope || "team"}`);
1380
-
1381
- return lines.join("\n");
1382
- }
1383
-
1384
- // ============================================================================
1385
- // Legacy exports kept for tests
1386
- // ============================================================================
1387
-
1388
- /** Reset all turn state, inbound message store, and cached tokens (for testing and cleanup). */
1389
- export function resetTurnState(): void {
1390
- turnStateBySession.clear();
1391
- lastInboundMessageByChannel.clear();
1392
- resetSlackTokenCache();
1393
- }
1394
-
1395
- // ============================================================================
1396
- // Hook registration
1397
- // ============================================================================
1398
-
1399
- /**
1400
- * Register lifecycle hooks on the plugin API.
1401
- */
1402
- export function registerHooks(api: any, config: PalaiaPluginConfig): void {
1403
- // Store api.logger for module-wide use (integrates into OpenClaw log system)
1404
- if (api.logger && typeof api.logger.info === "function") {
1405
- logger = api.logger;
1406
- }
1407
-
1408
- const opts = buildRunnerOpts(config);
1409
-
1410
- // ── Startup checks (H-2, H-3, captureModel validation) ────────
1411
- (async () => {
1412
- // H-2: Warn if no agent is configured
1413
- if (!process.env.PALAIA_AGENT) {
1414
- try {
1415
- const statusOut = await run(["config", "get", "agent"], { ...opts, timeoutMs: 3000 });
1416
- if (!statusOut.trim()) {
1417
- logger.warn(
1418
- "[palaia] No agent configured. Set PALAIA_AGENT env var or run 'palaia init --agent <name>'. " +
1419
- "Auto-captured entries will have no agent attribution."
1420
- );
1421
- }
1422
- } catch {
1423
- logger.warn(
1424
- "[palaia] No agent configured. Set PALAIA_AGENT env var or run 'palaia init --agent <name>'. " +
1425
- "Auto-captured entries will have no agent attribution."
1426
- );
1427
- }
1428
- }
1429
-
1430
- // H-3: Warn if no embedding provider beyond BM25
1431
- try {
1432
- const statusJson = await run(["status", "--json"], { ...opts, timeoutMs: 5000 });
1433
- if (statusJson && statusJson.trim()) {
1434
- const status = JSON.parse(statusJson);
1435
- // embedding_chain can be at top level OR nested under config
1436
- const chain = status.embedding_chain
1437
- || status.embeddingChain
1438
- || status.config?.embedding_chain
1439
- || status.config?.embeddingChain
1440
- || [];
1441
- const hasSemanticProvider = Array.isArray(chain)
1442
- ? chain.some((p: string) => p !== "bm25")
1443
- : false;
1444
- // Also check embedding_provider as a fallback signal
1445
- const hasProviderConfig = !!(
1446
- status.embedding_provider
1447
- || status.config?.embedding_provider
1448
- );
1449
- if (!hasSemanticProvider && !hasProviderConfig) {
1450
- logger.warn(
1451
- "[palaia] No embedding provider configured. Semantic search is inactive (BM25 keyword-only). " +
1452
- "Run 'pip install palaia[fastembed]' and 'palaia doctor --fix' for better recall quality."
1453
- );
1454
- }
1455
- }
1456
- // If statusJson is empty/null, skip warning (CLI may not be available)
1457
- } catch {
1458
- // Non-fatal — status check failed, skip warning (avoid false positive)
1459
- }
1460
-
1461
- // Validate captureModel auth at plugin startup via modelAuth API
1462
- if (config.captureModel && api.runtime?.modelAuth) {
1463
- try {
1464
- const resolved = resolveCaptureModel(api.config, config.captureModel);
1465
- if (resolved?.provider) {
1466
- const key = await api.runtime.modelAuth.resolveApiKeyForProvider({ provider: resolved.provider, cfg: api.config });
1467
- if (!key) {
1468
- logger.warn(`[palaia] captureModel provider "${resolved.provider}" has no API key — auto-capture LLM extraction will fail`);
1469
- }
1470
- }
1471
- } catch { /* non-fatal */ }
1472
- }
1473
- })();
1474
-
1475
- // ── /palaia status command ─────────────────────────────────────
1476
- api.registerCommand({
1477
- name: "palaia-status",
1478
- description: "Show Palaia memory status",
1479
- async handler(_args: string) {
1480
- try {
1481
- const state = await loadPluginState(config.workspace);
1482
-
1483
- let stats: Record<string, unknown> = {};
1484
- try {
1485
- const statsOutput = await run(["status", "--json"], opts);
1486
- stats = JSON.parse(statsOutput || "{}");
1487
- } catch {
1488
- // Non-fatal
1489
- }
1490
-
1491
- return { text: formatStatusResponse(state, stats, config) };
1492
- } catch (error) {
1493
- return { text: `Palaia status error: ${error}` };
1494
- }
1495
- },
1496
- });
1497
-
1498
- // ── message_received (capture inbound message ID for reactions) ─
1499
- api.on("message_received", (event: any, ctx: any) => {
1500
- try {
1501
- const messageId = event?.metadata?.messageId;
1502
- const provider = event?.metadata?.provider;
1503
-
1504
- // ctx.channelId returns the provider name ("slack"), NOT the actual channel ID.
1505
- // ctx.sessionKey is null during message_received.
1506
- // Extract the real channel ID from event.metadata.to / ctx.conversationId.
1507
- const channelId = extractChannelIdFromEvent(event, ctx)
1508
- ?? (resolveSessionKeyFromCtx(ctx) ? extractSlackChannelIdFromSessionKey(resolveSessionKeyFromCtx(ctx)!) : undefined);
1509
- const sessionKey = resolveSessionKeyFromCtx(ctx);
1510
-
1511
-
1512
- if (messageId && channelId && provider && REACTION_SUPPORTED_PROVIDERS.has(provider)) {
1513
- // Normalize channelId to UPPERCASE for consistent lookups
1514
- // (extractSlackChannelIdFromSessionKey returns uppercase)
1515
- const normalizedChannelId = String(channelId).toUpperCase();
1516
- lastInboundMessageByChannel.set(normalizedChannelId, {
1517
- messageId: String(messageId),
1518
- provider,
1519
- timestamp: Date.now(),
1520
- });
1521
-
1522
- // Also populate turnState if sessionKey is available
1523
- if (sessionKey) {
1524
- const turnState = getOrCreateTurnState(sessionKey);
1525
- turnState.lastInboundMessageId = String(messageId);
1526
- turnState.lastInboundChannelId = normalizedChannelId;
1527
- turnState.channelProvider = provider;
1528
- }
1529
- }
1530
- } catch {
1531
- // Non-fatal — never block message flow
1532
- }
1533
- });
1534
-
1535
- // ── before_prompt_build (Issue #65: Query-based Recall) ────────
1536
- if (config.memoryInject) {
1537
- api.on("before_prompt_build", async (event: any, ctx: any) => {
1538
- // Prune stale entries to prevent memory leaks from crashed sessions (C-2)
1539
- pruneStaleEntries();
1540
-
1541
- try {
1542
- const maxChars = config.maxInjectedChars || 4000;
1543
- const limit = Math.min(config.maxResults || 10, 20);
1544
- let entries: QueryResult["results"] = [];
1545
-
1546
- if (config.recallMode === "query") {
1547
- const userMessage = event.messages
1548
- ? buildRecallQuery(event.messages)
1549
- : (event.prompt || null);
1550
-
1551
- if (userMessage && userMessage.length >= 5) {
1552
- // Try embed server first (fast path: ~0.5s), then CLI fallback (~3-14s)
1553
- let serverQueried = false;
1554
- if (config.embeddingServer) {
1555
- try {
1556
- const mgr = getEmbedServerManager(opts);
1557
- const resp = await mgr.query({
1558
- text: userMessage,
1559
- top_k: limit,
1560
- include_cold: config.tier === "all",
1561
- }, config.timeoutMs || 3000);
1562
- if (resp?.result?.results && Array.isArray(resp.result.results)) {
1563
- entries = resp.result.results;
1564
- serverQueried = true;
1565
- }
1566
- } catch (serverError) {
1567
- logger.warn(`[palaia] Embed server query failed, falling back to CLI: ${serverError}`);
1568
- }
1569
- }
1570
-
1571
- // CLI fallback
1572
- if (!serverQueried) {
1573
- try {
1574
- const queryArgs: string[] = ["query", userMessage, "--limit", String(limit)];
1575
- if (config.tier === "all") {
1576
- queryArgs.push("--all");
1577
- }
1578
- const result = await runJson<QueryResult>(queryArgs, { ...opts, timeoutMs: 15000 });
1579
- if (result && Array.isArray(result.results)) {
1580
- entries = result.results;
1581
- }
1582
- } catch (queryError) {
1583
- logger.warn(`[palaia] Query recall failed, falling back to list: ${queryError}`);
1584
- }
1585
- }
1586
- }
1587
- }
1588
-
1589
- // Fallback: list mode (no emoji — list-based recall is not query-relevant)
1590
- let isListFallback = false;
1591
- if (entries.length === 0) {
1592
- isListFallback = true;
1593
- try {
1594
- const listArgs: string[] = ["list"];
1595
- if (config.tier === "all") {
1596
- listArgs.push("--all");
1597
- } else {
1598
- listArgs.push("--tier", config.tier || "hot");
1599
- }
1600
- const result = await runJson<QueryResult>(listArgs, opts);
1601
- if (result && Array.isArray(result.results)) {
1602
- entries = result.results;
1603
- }
1604
- } catch {
1605
- return;
1606
- }
1607
- }
1608
-
1609
- if (entries.length === 0) return;
1610
-
1611
- // Apply type-weighted reranking
1612
- const ranked = rerankByTypeWeight(entries, config.recallTypeWeight);
1613
-
1614
- // Build context string with char budget (compact format for token efficiency)
1615
- const SCOPE_SHORT: Record<string, string> = { team: "t", private: "p", public: "pub" };
1616
- const TYPE_SHORT: Record<string, string> = { memory: "m", process: "pr", task: "tk" };
1617
-
1618
- let text = "## Active Memory (Palaia)\n\n";
1619
- let chars = text.length;
1620
-
1621
- for (const entry of ranked) {
1622
- const scopeKey = SCOPE_SHORT[entry.scope] || entry.scope;
1623
- const typeKey = TYPE_SHORT[entry.type] || entry.type;
1624
- const prefix = `[${scopeKey}/${typeKey}]`;
1625
-
1626
- // If body starts with title (common), skip title to save tokens
1627
- let line: string;
1628
- if (entry.body.toLowerCase().startsWith(entry.title.toLowerCase())) {
1629
- line = `${prefix} ${entry.body}\n\n`;
1630
- } else {
1631
- line = `${prefix} ${entry.title}\n${entry.body}\n\n`;
1632
- }
1633
-
1634
- if (chars + line.length > maxChars) break;
1635
- text += line;
1636
- chars += line.length;
1637
- }
1638
-
1639
- // Persistent usage nudge — compact guidance for the agent
1640
- const USAGE_NUDGE = "[palaia] auto-capture=on. Manual write: --type process (SOPs/checklists) or --type task (todos with assignee/deadline) only. Conversation knowledge is auto-captured — do not duplicate with manual writes.";
1641
- text += USAGE_NUDGE + "\n\n";
1642
-
1643
- // Update recall counter for satisfaction/transparency nudges (Issue #87)
1644
- let nudgeContext = "";
1645
- try {
1646
- const pluginState = await loadPluginState(config.workspace);
1647
- pluginState.successfulRecalls++;
1648
- if (!pluginState.firstRecallTimestamp) {
1649
- pluginState.firstRecallTimestamp = new Date().toISOString();
1650
- }
1651
- const { nudges } = checkNudges(pluginState);
1652
- if (nudges.length > 0) {
1653
- nudgeContext = "\n\n## Agent Nudge (Palaia)\n\n" + nudges.join("\n\n");
1654
- }
1655
- await savePluginState(pluginState, config.workspace);
1656
- } catch {
1657
- // Non-fatal
1658
- }
1659
-
1660
- // Track recall in session-isolated turn state for emoji reactions
1661
- // Only flag recall as meaningful if at least one result scores above threshold
1662
- // List-fallback never triggers brain emoji (not query-relevant)
1663
- const hasRelevantRecall = !isListFallback && entries.some(
1664
- (e) => typeof e.score === "number" && e.score >= config.recallMinScore,
1665
- );
1666
- const sessionKey = resolveSessionKeyFromCtx(ctx);
1667
- if (sessionKey && hasRelevantRecall) {
1668
- const turnState = getOrCreateTurnState(sessionKey);
1669
- turnState.recallOccurred = true;
1670
-
1671
- // Populate channel info — prefer event metadata, fall back to sessionKey
1672
- const provider = extractChannelFromSessionKey(sessionKey);
1673
- if (provider) turnState.channelProvider = provider;
1674
- const slackChannel = extractChannelIdFromEvent(event, ctx)
1675
- ?? extractSlackChannelIdFromSessionKey(sessionKey);
1676
- if (slackChannel) turnState.lastInboundChannelId = slackChannel;
1677
-
1678
- // Try to get the inbound message ID from the message_received store
1679
- if (slackChannel) {
1680
- const inbound = lastInboundMessageByChannel.get(slackChannel);
1681
- if (inbound && (Date.now() - inbound.timestamp) < 30_000) {
1682
- turnState.lastInboundMessageId = inbound.messageId;
1683
- }
1684
- }
1685
- }
1686
-
1687
- // Return prependContext + appendSystemContext for recall emoji
1688
- return {
1689
- prependContext: text + nudgeContext,
1690
- appendSystemContext: config.showMemorySources
1691
- ? "You used Palaia memory in this turn. Add 🧠 at the very end of your response (after everything else, on its own line)."
1692
- : undefined,
1693
- };
1694
- } catch (error) {
1695
- logger.warn(`[palaia] Memory injection failed: ${error}`);
1696
- }
1697
- });
1698
- }
1699
-
1700
- // ── message_sending (Issue #81: Hint stripping) ──────────────────
1701
- api.on("message_sending", (_event: any, _ctx: any) => {
1702
- const content = _event?.content;
1703
- if (typeof content !== "string") return;
1704
-
1705
- const { hints, cleanedText } = parsePalaiaHints(content);
1706
- if (hints.length > 0) {
1707
- return { content: cleanedText };
1708
- }
1709
- });
1710
-
1711
- // ── agent_end (Issue #64 + #81: Auto-Capture with Metadata + Reactions) ───
1712
- if (config.autoCapture) {
1713
- api.on("agent_end", async (event: any, ctx: any) => {
1714
- // Resolve session key for turn state
1715
- const sessionKey = resolveSessionKeyFromCtx(ctx);
1716
-
1717
- // DEBUG: always log agent_end firing
1718
-
1719
- if (!event.success || !event.messages || event.messages.length === 0) {
1720
- return;
1721
- }
1722
-
1723
- try {
1724
- const agentName = process.env.PALAIA_AGENT || undefined;
1725
-
1726
- const allTexts = extractMessageTexts(event.messages);
1727
-
1728
- const userTurns = allTexts.filter((t) => t.role === "user").length;
1729
- if (userTurns < config.captureMinTurns) {
1730
- return;
1731
- }
1732
-
1733
- // Parse capture hints from all messages (Issue #81)
1734
- const collectedHints: PalaiaHint[] = [];
1735
- for (const t of allTexts) {
1736
- const { hints } = parsePalaiaHints(t.text);
1737
- collectedHints.push(...hints);
1738
- }
1739
-
1740
- // Strip Palaia-injected recall context from user messages to prevent feedback loop.
1741
- // The recall block is prepended to user messages by before_prompt_build.
1742
- // Without stripping, auto-capture would re-capture previously recalled memories.
1743
- const cleanedTexts = allTexts.map(t =>
1744
- t.role === "user"
1745
- ? { ...t, text: stripPalaiaInjectedContext(t.text) }
1746
- : t
1747
- );
1748
-
1749
- // Only extract from recent exchanges — full history causes LLM timeouts
1750
- // and dilutes extraction quality
1751
- const recentTexts = trimToRecentExchanges(cleanedTexts);
1752
-
1753
- // Build exchange text from recent window only
1754
- const exchangeParts: string[] = [];
1755
- for (const t of recentTexts) {
1756
- const { cleanedText } = parsePalaiaHints(t.text);
1757
- exchangeParts.push(`[${t.role}]: ${cleanedText}`);
1758
- }
1759
- const exchangeText = exchangeParts.join("\n");
1760
-
1761
- if (!shouldAttemptCapture(exchangeText)) {
1762
- return;
1763
- }
1764
-
1765
- const knownProjects = await loadProjects(opts);
1766
-
1767
- // Helper: build CLI args with metadata
1768
- const buildWriteArgs = (
1769
- content: string,
1770
- type: string,
1771
- tags: string[],
1772
- itemProject?: string | null,
1773
- itemScope?: string | null,
1774
- ): string[] => {
1775
- const args: string[] = [
1776
- "write",
1777
- content,
1778
- "--type", type,
1779
- "--tags", tags.join(",") || "auto-capture",
1780
- ];
1781
-
1782
- // Scope guardrail: config.captureScope overrides everything; otherwise max team (no public)
1783
- const scope = config.captureScope
1784
- ? sanitizeScope(config.captureScope, "team", true)
1785
- : sanitizeScope(itemScope, "team", false);
1786
- args.push("--scope", scope);
1787
-
1788
- const project = config.captureProject || itemProject;
1789
- if (project) {
1790
- args.push("--project", project);
1791
- }
1792
-
1793
- if (agentName) {
1794
- args.push("--agent", agentName);
1795
- }
1796
-
1797
- return args;
1798
- };
1799
-
1800
- // Helper: store LLM extraction results
1801
- const storeLLMResults = async (results: ExtractionResult[]) => {
1802
- for (const r of results) {
1803
- if (r.significance >= config.captureMinSignificance) {
1804
- const hintForProject = collectedHints.find((h) => h.project);
1805
- const hintForScope = collectedHints.find((h) => h.scope);
1806
-
1807
- const effectiveProject = hintForProject?.project || r.project;
1808
- const effectiveScope = hintForScope?.scope || r.scope;
1809
-
1810
- // Project validation: reject unknown projects
1811
- let validatedProject = effectiveProject;
1812
- if (validatedProject && knownProjects.length > 0) {
1813
- const isKnown = knownProjects.some(
1814
- (p) => p.name.toLowerCase() === validatedProject!.toLowerCase(),
1815
- );
1816
- if (!isKnown) {
1817
- logger.info(`[palaia] Auto-capture: unknown project "${validatedProject}" ignored`);
1818
- validatedProject = null;
1819
- }
1820
- }
1821
-
1822
- // Always include auto-capture tag for GC identification
1823
- const tags = [...r.tags];
1824
- if (!tags.includes("auto-capture")) tags.push("auto-capture");
1825
-
1826
- const args = buildWriteArgs(
1827
- r.content,
1828
- r.type,
1829
- tags,
1830
- validatedProject,
1831
- effectiveScope,
1832
- );
1833
- await run(args, { ...opts, timeoutMs: 10_000 });
1834
- logger.info(
1835
- `[palaia] LLM auto-captured: type=${r.type}, significance=${r.significance}, tags=${tags.join(",")}, project=${validatedProject || "none"}, scope=${effectiveScope || "team"}`
1836
- );
1837
- }
1838
- }
1839
- };
1840
-
1841
- // LLM-based extraction (primary)
1842
- let llmHandled = false;
1843
- try {
1844
- const results = await extractWithLLM(event.messages, api.config, {
1845
- captureModel: config.captureModel,
1846
- }, knownProjects);
1847
-
1848
- await storeLLMResults(results);
1849
- llmHandled = true;
1850
- } catch (llmError) {
1851
- // Check if this is a model-availability error (not a generic import failure)
1852
- const errStr = String(llmError);
1853
- const isModelError = /FailoverError|Unknown model|unknown model|401|403|model.*not found|not_found|model_not_found/i.test(errStr);
1854
-
1855
- if (isModelError && config.captureModel) {
1856
- // captureModel is broken — try primary model as fallback
1857
- if (!_captureModelFailoverWarned) {
1858
- _captureModelFailoverWarned = true;
1859
- logger.warn(`[palaia] WARNING: captureModel failed (${errStr}). Using primary model as fallback. Please update captureModel in your config.`);
1860
- }
1861
- try {
1862
- // Retry without captureModel → resolveCaptureModel will use primary model
1863
- const fallbackResults = await extractWithLLM(event.messages, api.config, {
1864
- captureModel: undefined,
1865
- }, knownProjects);
1866
- await storeLLMResults(fallbackResults);
1867
- llmHandled = true;
1868
- } catch (fallbackError) {
1869
- if (!_llmImportFailureLogged) {
1870
- logger.warn(`[palaia] LLM extraction failed (primary model fallback also failed): ${fallbackError}`);
1871
- _llmImportFailureLogged = true;
1872
- }
1873
- }
1874
- } else {
1875
- if (!_llmImportFailureLogged) {
1876
- logger.warn(`[palaia] LLM extraction failed, using rule-based fallback: ${llmError}`);
1877
- _llmImportFailureLogged = true;
1878
- }
1879
- }
1880
- }
1881
-
1882
- // Rule-based fallback (max 1 per turn)
1883
- if (!llmHandled) {
1884
- let captureData: { tags: string[]; type: string; summary: string } | null = null;
1885
-
1886
- if (config.captureFrequency === "significant") {
1887
- const significance = extractSignificance(exchangeText);
1888
- if (!significance) {
1889
- return;
1890
- }
1891
- captureData = significance;
1892
- } else {
1893
- const summary = exchangeParts
1894
- .slice(-4)
1895
- .map((p) => p.slice(0, 200))
1896
- .join(" | ")
1897
- .slice(0, 500);
1898
- captureData = { tags: ["auto-capture"], type: "memory", summary };
1899
- }
1900
-
1901
- // Always include auto-capture tag for GC identification
1902
- if (!captureData.tags.includes("auto-capture")) {
1903
- captureData.tags.push("auto-capture");
1904
- }
1905
-
1906
- const hintForProject = collectedHints.find((h) => h.project);
1907
- const hintForScope = collectedHints.find((h) => h.scope);
1908
-
1909
- const args = buildWriteArgs(
1910
- captureData.summary,
1911
- captureData.type,
1912
- captureData.tags,
1913
- hintForProject?.project,
1914
- hintForScope?.scope,
1915
- );
1916
-
1917
- await run(args, { ...opts, timeoutMs: 10_000 });
1918
- logger.info(
1919
- `[palaia] Rule-based auto-captured: type=${captureData.type}, tags=${captureData.tags.join(",")}`
1920
- );
1921
- }
1922
-
1923
- // Mark that capture occurred in this turn
1924
- if (sessionKey) {
1925
- const turnState = getOrCreateTurnState(sessionKey);
1926
- turnState.capturedInThisTurn = true;
1927
- } else {
1928
- }
1929
- } catch (error) {
1930
- logger.warn(`[palaia] Auto-capture failed: ${error}`);
1931
- }
1932
-
1933
- // ── Emoji Reactions (Issue #87) ──────────────────────────
1934
- // Send reactions AFTER capture completes, using turn state.
1935
- if (sessionKey) {
1936
- try {
1937
- const turnState = turnStateBySession.get(sessionKey);
1938
- if (turnState) {
1939
- const provider = turnState.channelProvider
1940
- || extractChannelFromSessionKey(sessionKey)
1941
- || (ctx?.channelId as string | undefined);
1942
- const channelId = turnState.lastInboundChannelId
1943
- || extractChannelIdFromEvent(event, ctx)
1944
- || extractSlackChannelIdFromSessionKey(sessionKey);
1945
- const messageId = turnState.lastInboundMessageId;
1946
-
1947
-
1948
- if (provider && REACTION_SUPPORTED_PROVIDERS.has(provider) && channelId && messageId) {
1949
- // Capture confirmation: 💾
1950
- if (turnState.capturedInThisTurn && config.showCaptureConfirm) {
1951
- await sendReaction(channelId, messageId, "floppy_disk", provider);
1952
- }
1953
-
1954
- // Recall indicator: 🧠
1955
- if (turnState.recallOccurred && config.showMemorySources) {
1956
- await sendReaction(channelId, messageId, "brain", provider);
1957
- }
1958
- } else {
1959
- }
1960
- }
1961
- } catch (reactionError) {
1962
- logger.warn(`[palaia] Reaction sending failed: ${reactionError}`);
1963
- } finally {
1964
- // Always clean up turn state
1965
- deleteTurnState(sessionKey);
1966
- }
1967
- }
1968
- });
1969
- }
1970
-
1971
- // ── agent_end: Recall-only reactions (when autoCapture is off) ─
1972
- if (!config.autoCapture && config.showMemorySources) {
1973
- api.on("agent_end", async (_event: any, ctx: any) => {
1974
- const sessionKey = resolveSessionKeyFromCtx(ctx);
1975
- if (!sessionKey) return;
1976
-
1977
- try {
1978
- const turnState = turnStateBySession.get(sessionKey);
1979
- if (turnState?.recallOccurred) {
1980
- const provider = turnState.channelProvider
1981
- || extractChannelFromSessionKey(sessionKey);
1982
- const channelId = turnState.lastInboundChannelId
1983
- || extractChannelIdFromEvent(_event, ctx)
1984
- || extractSlackChannelIdFromSessionKey(sessionKey);
1985
- const messageId = turnState.lastInboundMessageId;
1986
-
1987
- if (provider && REACTION_SUPPORTED_PROVIDERS.has(provider) && channelId && messageId) {
1988
- await sendReaction(channelId, messageId, "brain", provider);
1989
- }
1990
- }
1991
- } catch (err) {
1992
- logger.warn(`[palaia] Recall reaction failed: ${err}`);
1993
- } finally {
1994
- deleteTurnState(sessionKey);
1995
- }
1996
- });
1997
- }
1998
-
1999
- // ── Startup Recovery Service ───────────────────────────────────
2000
- api.registerService({
2001
- id: "palaia-recovery",
2002
- start: async () => {
2003
- const result = await recover(opts);
2004
- if (result.replayed > 0) {
2005
- logger.info(`[palaia] WAL recovery: replayed ${result.replayed} entries`);
2006
- }
2007
- if (result.errors > 0) {
2008
- logger.warn(`[palaia] WAL recovery completed with ${result.errors} error(s)`);
2009
- }
2010
- },
2011
- });
2012
- }