teleton 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/bootstrap-DDFVEMYI.js +128 -0
  2. package/dist/{server-3FHI2SEB.js → chunk-2ERTYRHA.js} +26 -372
  3. package/dist/{chunk-5FNWBZ5K.js → chunk-33Z47EXI.js} +264 -274
  4. package/dist/{chunk-3S4GGLLR.js → chunk-35MX4ZUI.js} +23 -104
  5. package/dist/chunk-3UFPFWYP.js +12 -0
  6. package/dist/chunk-5SEMA47R.js +75 -0
  7. package/dist/{chunk-PHSAHTK4.js → chunk-6OOHHJ4N.js} +3 -108
  8. package/dist/{chunk-CGOXE4WP.js → chunk-7MWKT67G.js} +467 -914
  9. package/dist/chunk-AEHTQI3H.js +142 -0
  10. package/dist/{chunk-S6PHGKOC.js → chunk-AERHOXGC.js} +88 -322
  11. package/dist/chunk-ALKAAG4O.js +487 -0
  12. package/dist/{chunk-UP55PXFH.js → chunk-C4NKJT2Z.js} +8 -0
  13. package/dist/chunk-CUE4UZXR.js +129 -0
  14. package/dist/chunk-FUNF6H4W.js +251 -0
  15. package/dist/{chunk-7U7BOHCL.js → chunk-GHMXWAXI.js} +147 -63
  16. package/dist/{chunk-QBHRXLZS.js → chunk-H7MFXJZK.js} +2 -2
  17. package/dist/{chunk-QV2GLOTK.js → chunk-LC4TV3KL.js} +1 -1
  18. package/dist/{chunk-AYWEJCDB.js → chunk-LVTKJQ7O.js} +12 -10
  19. package/dist/{chunk-RCMD3U65.js → chunk-NQ6FZKCE.js} +13 -0
  20. package/dist/chunk-NVKBBTI6.js +128 -0
  21. package/dist/{setup-server-32XGDPE6.js → chunk-OIMAE24Q.js} +55 -216
  22. package/dist/{chunk-OJCLKU5Z.js → chunk-WFTC3JJW.js} +16 -0
  23. package/dist/chunk-WTDAICGT.js +175 -0
  24. package/dist/{chunk-KVXV7EF7.js → chunk-XDZDOKIF.js} +2 -2
  25. package/dist/cli/index.js +91 -27
  26. package/dist/{client-MPHPIZB6.js → client-5KD25NOP.js} +5 -4
  27. package/dist/{get-my-gifts-CC6HAVWB.js → get-my-gifts-Y7EN7RK4.js} +3 -3
  28. package/dist/index.js +19 -13
  29. package/dist/local-IHKJFQJS.js +9 -0
  30. package/dist/{memory-UBHM7ILG.js → memory-QMJRM3XJ.js} +9 -5
  31. package/dist/memory-hook-VUNWZ3NY.js +19 -0
  32. package/dist/{migrate-UBBEJ5BL.js → migrate-5VBAP52B.js} +5 -4
  33. package/dist/server-JF6FX772.js +813 -0
  34. package/dist/server-N4T7E25M.js +396 -0
  35. package/dist/setup-server-IX3BFPPH.js +217 -0
  36. package/dist/{store-M5IMUQCL.js → store-BY7S6IFN.js} +6 -5
  37. package/dist/{task-dependency-resolver-RR2O5S7B.js → task-dependency-resolver-L6UUMTHK.js} +2 -2
  38. package/dist/{task-executor-6W5HRX5C.js → task-executor-XBNJLUCS.js} +2 -2
  39. package/dist/{tool-adapter-IH5VGBOO.js → tool-adapter-IVX2XQJE.js} +1 -1
  40. package/dist/{tool-index-PMAOXWUA.js → tool-index-FTERJSZK.js} +4 -3
  41. package/dist/{transcript-NGDPSNIH.js → transcript-IM7G25OS.js} +2 -2
  42. package/package.json +4 -2
  43. package/dist/chunk-XBE4JB7C.js +0 -8
@@ -0,0 +1,487 @@
1
+ import {
2
+ getErrorMessage
3
+ } from "./chunk-3UFPFWYP.js";
4
+ import {
5
+ ADAPTIVE_CHUNK_RATIO_BASE,
6
+ ADAPTIVE_CHUNK_RATIO_MIN,
7
+ ADAPTIVE_CHUNK_RATIO_TRIGGER,
8
+ CHARS_PER_TOKEN_ESTIMATE,
9
+ DEFAULT_CONTEXT_WINDOW,
10
+ DEFAULT_MAX_SUMMARY_TOKENS,
11
+ DEFAULT_SUMMARY_FALLBACK_TOKENS,
12
+ OVERSIZED_MESSAGE_RATIO,
13
+ SESSION_SLUG_MAX_TOKENS,
14
+ SESSION_SLUG_RECENT_MESSAGES,
15
+ TOKEN_ESTIMATE_SAFETY_MARGIN
16
+ } from "./chunk-C4NKJT2Z.js";
17
+ import {
18
+ getUtilityModel
19
+ } from "./chunk-LVTKJQ7O.js";
20
+ import {
21
+ createLogger
22
+ } from "./chunk-NQ6FZKCE.js";
23
+
24
+ // src/session/memory-hook.ts
25
+ import { writeFile, mkdir, readdir, readFile, unlink } from "fs/promises";
26
+ import { join } from "path";
27
+ import { complete as complete2 } from "@mariozechner/pi-ai";
28
+
29
+ // src/memory/ai-summarization.ts
30
+ import {
31
+ complete
32
+ } from "@mariozechner/pi-ai";
33
+ var log = createLogger("Memory");
34
+ function estimateMessageTokens(content) {
35
+ return Math.ceil(content.length / CHARS_PER_TOKEN_ESTIMATE * TOKEN_ESTIMATE_SAFETY_MARGIN);
36
+ }
37
+ function splitMessagesByTokens(messages, maxChunkTokens) {
38
+ if (messages.length === 0) {
39
+ return [];
40
+ }
41
+ const chunks = [];
42
+ let currentChunk = [];
43
+ let currentTokens = 0;
44
+ for (const message of messages) {
45
+ const content = extractMessageContent(message);
46
+ const messageTokens = estimateMessageTokens(content);
47
+ if (currentChunk.length > 0 && currentTokens + messageTokens > maxChunkTokens) {
48
+ chunks.push(currentChunk);
49
+ currentChunk = [];
50
+ currentTokens = 0;
51
+ }
52
+ currentChunk.push(message);
53
+ currentTokens += messageTokens;
54
+ if (messageTokens > maxChunkTokens && currentChunk.length === 1) {
55
+ chunks.push(currentChunk);
56
+ currentChunk = [];
57
+ currentTokens = 0;
58
+ }
59
+ }
60
+ if (currentChunk.length > 0) {
61
+ chunks.push(currentChunk);
62
+ }
63
+ return chunks;
64
+ }
65
+ function extractMessageContent(message) {
66
+ if (message.role === "user") {
67
+ return typeof message.content === "string" ? message.content : "[complex content]";
68
+ } else if (message.role === "assistant") {
69
+ return message.content.filter((block) => block.type === "text").map((block) => block.text).join("\n");
70
+ }
71
+ return "";
72
+ }
73
+ function formatMessagesForSummary(messages) {
74
+ const formatted = [];
75
+ for (const msg of messages) {
76
+ if (msg.role === "user") {
77
+ const content = typeof msg.content === "string" ? msg.content : "[complex]";
78
+ const bodyMatch = content.match(/\] (.+)/s);
79
+ const body = bodyMatch ? bodyMatch[1] : content;
80
+ formatted.push(`User: ${body}`);
81
+ } else if (msg.role === "assistant") {
82
+ const textBlocks = msg.content.filter((b) => b.type === "text");
83
+ if (textBlocks.length > 0) {
84
+ const text = textBlocks.map((b) => b.text).join("\n");
85
+ formatted.push(`Assistant: ${text}`);
86
+ }
87
+ const toolCalls = msg.content.filter((b) => b.type === "toolCall");
88
+ if (toolCalls.length > 0) {
89
+ const toolNames = toolCalls.map((b) => b.name).join(", ");
90
+ formatted.push(`[Used tools: ${toolNames}]`);
91
+ }
92
+ } else if (msg.role === "toolResult") {
93
+ formatted.push(`[Tool result: ${msg.toolName}]`);
94
+ }
95
+ }
96
+ return formatted.join("\n\n");
97
+ }
98
+ function isOversizedForSummary(message, contextWindow) {
99
+ const content = extractMessageContent(message);
100
+ const tokens = estimateMessageTokens(content);
101
+ return tokens > contextWindow * OVERSIZED_MESSAGE_RATIO;
102
+ }
103
+ function computeAdaptiveChunkRatio(messages, contextWindow) {
104
+ const BASE_CHUNK_RATIO = ADAPTIVE_CHUNK_RATIO_BASE;
105
+ const MIN_CHUNK_RATIO = ADAPTIVE_CHUNK_RATIO_MIN;
106
+ if (messages.length === 0) {
107
+ return BASE_CHUNK_RATIO;
108
+ }
109
+ let totalTokens = 0;
110
+ for (const msg of messages) {
111
+ const content = extractMessageContent(msg);
112
+ totalTokens += estimateMessageTokens(content);
113
+ }
114
+ const avgTokens = totalTokens / messages.length;
115
+ const avgRatio = avgTokens / contextWindow;
116
+ if (avgRatio > ADAPTIVE_CHUNK_RATIO_TRIGGER) {
117
+ const reduction = Math.min(avgRatio * 2, BASE_CHUNK_RATIO - MIN_CHUNK_RATIO);
118
+ return Math.max(MIN_CHUNK_RATIO, BASE_CHUNK_RATIO - reduction);
119
+ }
120
+ return BASE_CHUNK_RATIO;
121
+ }
122
+ async function summarizeViaClaude(params) {
123
+ const provider = params.provider || "anthropic";
124
+ const model = getUtilityModel(provider, params.utilityModel);
125
+ const maxTokens = params.maxSummaryTokens ?? DEFAULT_SUMMARY_FALLBACK_TOKENS;
126
+ const formatted = formatMessagesForSummary(params.messages);
127
+ if (!formatted.trim()) {
128
+ return "No conversation content to summarize.";
129
+ }
130
+ const defaultInstructions = `Summarize this conversation concisely. Focus on:
131
+ - Key decisions made
132
+ - Action items and TODOs
133
+ - Open questions
134
+ - Important context and constraints
135
+ - Technical details that matter
136
+
137
+ Be specific but concise. Preserve critical information.`;
138
+ const instructions = params.customInstructions ? `${defaultInstructions}
139
+
140
+ Additional focus:
141
+ ${params.customInstructions}` : defaultInstructions;
142
+ try {
143
+ const context = {
144
+ messages: [
145
+ {
146
+ role: "user",
147
+ content: `${instructions}
148
+
149
+ Conversation:
150
+ ${formatted}`,
151
+ timestamp: Date.now()
152
+ }
153
+ ]
154
+ };
155
+ const response = await complete(model, context, {
156
+ apiKey: params.apiKey,
157
+ maxTokens
158
+ });
159
+ const textContent = response.content.find((block) => block.type === "text");
160
+ const summary = textContent?.type === "text" ? textContent.text : "";
161
+ return summary.trim() || "Unable to generate summary.";
162
+ } catch (error) {
163
+ log.error({ err: error }, "Summarization error");
164
+ throw new Error(`Summarization failed: ${getErrorMessage(error)}`);
165
+ }
166
+ }
167
+ async function summarizeInChunks(params) {
168
+ if (params.messages.length === 0) {
169
+ return {
170
+ summary: "No messages to summarize.",
171
+ tokensUsed: 0,
172
+ chunksProcessed: 0
173
+ };
174
+ }
175
+ const chunks = splitMessagesByTokens(params.messages, params.maxChunkTokens);
176
+ log.info(`Splitting into ${chunks.length} chunks for summarization`);
177
+ if (chunks.length === 1) {
178
+ const summary = await summarizeViaClaude({
179
+ messages: chunks[0],
180
+ apiKey: params.apiKey,
181
+ maxSummaryTokens: params.maxSummaryTokens,
182
+ customInstructions: params.customInstructions,
183
+ provider: params.provider,
184
+ utilityModel: params.utilityModel
185
+ });
186
+ return {
187
+ summary,
188
+ tokensUsed: estimateMessageTokens(summary),
189
+ chunksProcessed: 1
190
+ };
191
+ }
192
+ const partialSummaries = [];
193
+ for (let i = 0; i < chunks.length; i++) {
194
+ log.info(`Summarizing chunk ${i + 1}/${chunks.length} (${chunks[i].length} messages)`);
195
+ const partial = await summarizeViaClaude({
196
+ messages: chunks[i],
197
+ apiKey: params.apiKey,
198
+ maxSummaryTokens: Math.floor(
199
+ (params.maxSummaryTokens ?? DEFAULT_SUMMARY_FALLBACK_TOKENS) / 2
200
+ ),
201
+ customInstructions: params.customInstructions,
202
+ provider: params.provider,
203
+ utilityModel: params.utilityModel
204
+ });
205
+ partialSummaries.push(partial);
206
+ }
207
+ log.info(`Merging ${partialSummaries.length} partial summaries`);
208
+ const provider = params.provider || "anthropic";
209
+ const model = getUtilityModel(provider, params.utilityModel);
210
+ const mergeContext = {
211
+ messages: [
212
+ {
213
+ role: "user",
214
+ content: `Merge these partial conversation summaries into one cohesive summary.
215
+ Preserve all key decisions, action items, open questions, and important context.
216
+ Do not add new information - only synthesize what's provided.
217
+
218
+ Partial summaries:
219
+
220
+ ${partialSummaries.map((s, i) => `Part ${i + 1}:
221
+ ${s}`).join("\n\n---\n\n")}`,
222
+ timestamp: Date.now()
223
+ }
224
+ ]
225
+ };
226
+ const mergeResponse = await complete(model, mergeContext, {
227
+ apiKey: params.apiKey,
228
+ maxTokens: params.maxSummaryTokens ?? DEFAULT_SUMMARY_FALLBACK_TOKENS
229
+ });
230
+ const textContent = mergeResponse.content.find((block) => block.type === "text");
231
+ const merged = textContent?.type === "text" ? textContent.text : "";
232
+ return {
233
+ summary: merged.trim() || "Unable to merge summaries.",
234
+ tokensUsed: estimateMessageTokens(merged),
235
+ chunksProcessed: chunks.length
236
+ };
237
+ }
238
+ async function summarizeWithFallback(params) {
239
+ if (params.messages.length === 0) {
240
+ return {
241
+ summary: "No messages to summarize.",
242
+ tokensUsed: 0,
243
+ chunksProcessed: 0
244
+ };
245
+ }
246
+ const chunkRatio = computeAdaptiveChunkRatio(params.messages, params.contextWindow);
247
+ const maxChunkTokens = Math.floor(params.contextWindow * chunkRatio);
248
+ log.info(
249
+ `AI Summarization: ${params.messages.length} messages, chunk ratio: ${(chunkRatio * 100).toFixed(0)}%`
250
+ );
251
+ try {
252
+ return await summarizeInChunks({
253
+ messages: params.messages,
254
+ apiKey: params.apiKey,
255
+ maxChunkTokens,
256
+ maxSummaryTokens: params.maxSummaryTokens,
257
+ customInstructions: params.customInstructions,
258
+ provider: params.provider,
259
+ utilityModel: params.utilityModel
260
+ });
261
+ } catch (fullError) {
262
+ log.warn(
263
+ `Full summarization failed: ${fullError instanceof Error ? fullError.message : String(fullError)}`
264
+ );
265
+ }
266
+ const smallMessages = [];
267
+ const oversizedNotes = [];
268
+ for (const msg of params.messages) {
269
+ if (isOversizedForSummary(msg, params.contextWindow)) {
270
+ const content = extractMessageContent(msg);
271
+ const tokens = estimateMessageTokens(content);
272
+ oversizedNotes.push(
273
+ `[Large ${msg.role} message (~${Math.round(tokens / 1e3)}K tokens) omitted from summary]`
274
+ );
275
+ } else {
276
+ smallMessages.push(msg);
277
+ }
278
+ }
279
+ log.info(
280
+ `Fallback: Processing ${smallMessages.length} messages, skipping ${oversizedNotes.length} oversized`
281
+ );
282
+ if (smallMessages.length > 0) {
283
+ try {
284
+ const result = await summarizeInChunks({
285
+ messages: smallMessages,
286
+ apiKey: params.apiKey,
287
+ maxChunkTokens,
288
+ maxSummaryTokens: params.maxSummaryTokens,
289
+ customInstructions: params.customInstructions,
290
+ provider: params.provider,
291
+ utilityModel: params.utilityModel
292
+ });
293
+ const notes = oversizedNotes.length > 0 ? `
294
+
295
+ ${oversizedNotes.join("\n")}` : "";
296
+ return {
297
+ summary: result.summary + notes,
298
+ tokensUsed: result.tokensUsed,
299
+ chunksProcessed: result.chunksProcessed
300
+ };
301
+ } catch (partialError) {
302
+ log.warn(
303
+ `Partial summarization also failed: ${partialError instanceof Error ? partialError.message : String(partialError)}`
304
+ );
305
+ }
306
+ }
307
+ const note = `Context contained ${params.messages.length} messages (${oversizedNotes.length} were oversized). AI summarization unavailable due to size constraints. Recent conversation history was preserved.`;
308
+ return {
309
+ summary: note,
310
+ tokensUsed: estimateMessageTokens(note),
311
+ chunksProcessed: 0
312
+ };
313
+ }
314
+
315
+ // src/session/memory-hook.ts
316
+ var log2 = createLogger("Session");
317
+ async function generateSlugViaClaude(params) {
318
+ const provider = params.provider || "anthropic";
319
+ const model = getUtilityModel(provider, params.utilityModel);
320
+ const formatted = formatMessagesForSummary(params.messages.slice(-SESSION_SLUG_RECENT_MESSAGES));
321
+ if (!formatted.trim()) {
322
+ return "empty-session";
323
+ }
324
+ try {
325
+ const context = {
326
+ messages: [
327
+ {
328
+ role: "user",
329
+ content: `Generate a short, descriptive slug (2-4 words, kebab-case) for this conversation.
330
+ Examples: "gift-transfer-fix", "context-overflow-debug", "telegram-integration"
331
+
332
+ Conversation:
333
+ ${formatted}
334
+
335
+ Slug:`,
336
+ timestamp: Date.now()
337
+ }
338
+ ]
339
+ };
340
+ const response = await complete2(model, context, {
341
+ apiKey: params.apiKey,
342
+ maxTokens: SESSION_SLUG_MAX_TOKENS
343
+ });
344
+ const textContent = response.content.find((block) => block.type === "text");
345
+ const slug = textContent?.type === "text" ? textContent.text.trim() : "";
346
+ return slug.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").slice(0, 50) || "session";
347
+ } catch (error) {
348
+ log2.warn({ err: error }, "Slug generation failed, using fallback");
349
+ const now = /* @__PURE__ */ new Date();
350
+ return `session-${now.getHours().toString().padStart(2, "0")}${now.getMinutes().toString().padStart(2, "0")}`;
351
+ }
352
+ }
353
+ async function saveSessionMemory(params) {
354
+ try {
355
+ const { TELETON_ROOT } = await import("./paths-XA2RJH4S.js");
356
+ const memoryDir = join(TELETON_ROOT, "memory");
357
+ await mkdir(memoryDir, { recursive: true });
358
+ const now = /* @__PURE__ */ new Date();
359
+ const dateStr = now.toISOString().split("T")[0];
360
+ log2.info("Generating semantic slug for session memory...");
361
+ const slug = await generateSlugViaClaude({
362
+ messages: params.context.messages,
363
+ apiKey: params.apiKey,
364
+ provider: params.provider,
365
+ utilityModel: params.utilityModel
366
+ });
367
+ const filename = `${dateStr}-${slug}.md`;
368
+ const filepath = join(memoryDir, filename);
369
+ const timeStr = now.toISOString().split("T")[1].split(".")[0];
370
+ log2.info("Generating session summary...");
371
+ let summary;
372
+ try {
373
+ summary = await summarizeViaClaude({
374
+ messages: params.context.messages,
375
+ apiKey: params.apiKey,
376
+ maxSummaryTokens: DEFAULT_MAX_SUMMARY_TOKENS,
377
+ customInstructions: "Summarize this session comprehensively. Include key topics, decisions made, problems solved, and important context.",
378
+ provider: params.provider,
379
+ utilityModel: params.utilityModel
380
+ });
381
+ } catch (error) {
382
+ log2.warn({ err: error }, "Session summary generation failed");
383
+ summary = `Session contained ${params.context.messages.length} messages. Summary generation failed.`;
384
+ }
385
+ const content = `# Session Memory: ${dateStr} ${timeStr} UTC
386
+
387
+ ## Metadata
388
+
389
+ - **Old Session ID**: \`${params.oldSessionId}\`
390
+ - **New Session ID**: \`${params.newSessionId}\`
391
+ - **Chat ID**: \`${params.chatId}\`
392
+ - **Timestamp**: ${now.toISOString()}
393
+ - **Message Count**: ${params.context.messages.length}
394
+
395
+ ## Session Summary
396
+
397
+ ${summary}
398
+
399
+ ## Context
400
+
401
+ This session was compacted and migrated to a new session ID. The summary above preserves key information for continuity.
402
+
403
+ ---
404
+
405
+ *Generated automatically by Teleton-AI session memory hook*
406
+ `;
407
+ await writeFile(filepath, content, "utf-8");
408
+ const relPath = filepath.replace(TELETON_ROOT, "~/.teleton");
409
+ log2.info(`Session memory saved: ${relPath}`);
410
+ } catch (error) {
411
+ log2.error({ err: error }, "Failed to save session memory");
412
+ }
413
+ }
414
+ var CONSOLIDATION_THRESHOLD = 20;
415
+ var CONSOLIDATION_BATCH = 10;
416
+ async function consolidateOldMemoryFiles(params) {
417
+ try {
418
+ const { TELETON_ROOT } = await import("./paths-XA2RJH4S.js");
419
+ const memoryDir = join(TELETON_ROOT, "memory");
420
+ let entries;
421
+ try {
422
+ entries = await readdir(memoryDir);
423
+ } catch {
424
+ return { consolidated: 0 };
425
+ }
426
+ const sessionFiles = entries.filter((f) => /^\d{4}-\d{2}-\d{2}-.+\.md$/.test(f) && !f.startsWith("consolidated-")).sort();
427
+ if (sessionFiles.length < CONSOLIDATION_THRESHOLD) {
428
+ return { consolidated: 0 };
429
+ }
430
+ const batch = sessionFiles.slice(0, CONSOLIDATION_BATCH);
431
+ log2.info(`Consolidating ${batch.length} old session memory files...`);
432
+ const contents = [];
433
+ for (const file of batch) {
434
+ const text = await readFile(join(memoryDir, file), "utf-8");
435
+ contents.push(`--- ${file} ---
436
+ ${text}`);
437
+ }
438
+ const combined = contents.join("\n\n");
439
+ let summary;
440
+ try {
441
+ const result = await summarizeWithFallback({
442
+ messages: [{ role: "user", content: combined, timestamp: Date.now() }],
443
+ apiKey: params.apiKey,
444
+ contextWindow: DEFAULT_CONTEXT_WINDOW,
445
+ maxSummaryTokens: DEFAULT_MAX_SUMMARY_TOKENS,
446
+ customInstructions: "Consolidate these session memories into a single comprehensive summary. Preserve key facts, decisions, patterns, and important context. Remove redundancy. Organize by topic.",
447
+ provider: params.provider,
448
+ utilityModel: params.utilityModel
449
+ });
450
+ summary = result.summary;
451
+ } catch (error) {
452
+ log2.warn({ err: error }, "Consolidation summary failed, skipping");
453
+ return { consolidated: 0 };
454
+ }
455
+ const dateOf = (f) => f.slice(0, 10);
456
+ const dateRange = `${dateOf(batch[0])}_to_${dateOf(batch[batch.length - 1])}`;
457
+ const outFile = `consolidated-${dateRange}.md`;
458
+ const outContent = `# Consolidated Session Memories
459
+
460
+ ## Period
461
+ ${batch[0]} \u2192 ${batch[batch.length - 1]}
462
+
463
+ ## Summary
464
+
465
+ ${summary}
466
+
467
+ ---
468
+
469
+ *Consolidated from ${batch.length} session files by Teleton memory consolidation*
470
+ `;
471
+ await writeFile(join(memoryDir, outFile), outContent, "utf-8");
472
+ for (const file of batch) {
473
+ await unlink(join(memoryDir, file));
474
+ }
475
+ log2.info(`Consolidated ${batch.length} files \u2192 ${outFile}`);
476
+ return { consolidated: batch.length };
477
+ } catch (error) {
478
+ log2.error({ err: error }, "Memory consolidation failed");
479
+ return { consolidated: 0 };
480
+ }
481
+ }
482
+
483
+ export {
484
+ summarizeWithFallback,
485
+ saveSessionMemory,
486
+ consolidateOldMemoryFiles
487
+ };
@@ -23,6 +23,9 @@ var CONTEXT_MAX_RECENT_MESSAGES = 10;
23
23
  var CONTEXT_MAX_RELEVANT_CHUNKS = 5;
24
24
  var FEED_MESSAGE_MAX_CHARS = 2e3;
25
25
  var HYBRID_SEARCH_MIN_SCORE = 0.15;
26
+ var RECENCY_DECAY_FACTOR = 0.05;
27
+ var RECENCY_WEIGHT = 0.15;
28
+ var EMBEDDING_QUERY_MAX_CHARS = 1e3;
26
29
  var CONTEXT_OVERFLOW_SUMMARY_MESSAGES = 15;
27
30
  var RATE_LIMIT_MAX_RETRIES = 3;
28
31
  var SERVER_ERROR_MAX_RETRIES = 3;
@@ -56,6 +59,7 @@ var RESULT_TRUNCATION_KEEP_CHARS = 500;
56
59
  var EMBEDDING_CACHE_EVICTION_RATIO = 0.1;
57
60
  var WEB_FETCH_MAX_TEXT_LENGTH = 2e4;
58
61
  var WEB_SEARCH_MAX_RESULTS = 10;
62
+ var TOOL_CONCURRENCY_LIMIT = 2;
59
63
  var TOOL_RAG_MIN_SCORE = 0.1;
60
64
  var TOOL_RAG_VECTOR_WEIGHT = 0.6;
61
65
  var TOOL_RAG_KEYWORD_WEIGHT = 0.4;
@@ -85,6 +89,9 @@ export {
85
89
  CONTEXT_MAX_RELEVANT_CHUNKS,
86
90
  FEED_MESSAGE_MAX_CHARS,
87
91
  HYBRID_SEARCH_MIN_SCORE,
92
+ RECENCY_DECAY_FACTOR,
93
+ RECENCY_WEIGHT,
94
+ EMBEDDING_QUERY_MAX_CHARS,
88
95
  CONTEXT_OVERFLOW_SUMMARY_MESSAGES,
89
96
  RATE_LIMIT_MAX_RETRIES,
90
97
  SERVER_ERROR_MAX_RETRIES,
@@ -118,6 +125,7 @@ export {
118
125
  EMBEDDING_CACHE_EVICTION_RATIO,
119
126
  WEB_FETCH_MAX_TEXT_LENGTH,
120
127
  WEB_SEARCH_MAX_RESULTS,
128
+ TOOL_CONCURRENCY_LIMIT,
121
129
  TOOL_RAG_MIN_SCORE,
122
130
  TOOL_RAG_VECTOR_WEIGHT,
123
131
  TOOL_RAG_KEYWORD_WEIGHT
@@ -0,0 +1,129 @@
1
+ import {
2
+ TELETON_ROOT
3
+ } from "./chunk-EYWNOHMJ.js";
4
+ import {
5
+ createLogger
6
+ } from "./chunk-NQ6FZKCE.js";
7
+
8
+ // src/memory/embeddings/local.ts
9
+ import { pipeline, env } from "@huggingface/transformers";
10
+ import { join, dirname } from "path";
11
+ import { mkdirSync, writeFileSync, renameSync, statSync, unlinkSync } from "fs";
12
+ var log = createLogger("Memory");
13
+ var modelCacheDir = join(TELETON_ROOT, "models");
14
+ try {
15
+ mkdirSync(modelCacheDir, { recursive: true });
16
+ } catch {
17
+ }
18
+ env.cacheDir = modelCacheDir;
19
+ var MIN_FILE_SIZES = { "onnx/model.onnx": 1e6 };
20
+ function isCacheFileValid(filePath, fileName) {
21
+ try {
22
+ return statSync(filePath).size >= (MIN_FILE_SIZES[fileName] ?? 1);
23
+ } catch {
24
+ return false;
25
+ }
26
+ }
27
+ async function ensureModelCached(model) {
28
+ const files = ["config.json", "tokenizer_config.json", "tokenizer.json", "onnx/model.onnx"];
29
+ const baseUrl = `https://huggingface.co/${model}/resolve/main`;
30
+ for (const file of files) {
31
+ const localPath = join(modelCacheDir, model, file);
32
+ if (isCacheFileValid(localPath, file)) continue;
33
+ try {
34
+ unlinkSync(localPath);
35
+ } catch {
36
+ }
37
+ log.info(`Downloading ${model}/${file}...`);
38
+ mkdirSync(dirname(localPath), { recursive: true });
39
+ const res = await fetch(`${baseUrl}/${file}`, { redirect: "follow" });
40
+ if (!res.ok) {
41
+ throw new Error(`Failed to download ${model}/${file}: ${res.status} ${res.statusText}`);
42
+ }
43
+ const buffer = Buffer.from(await res.arrayBuffer());
44
+ const tmpPath = localPath + ".tmp";
45
+ writeFileSync(tmpPath, buffer);
46
+ renameSync(tmpPath, localPath);
47
+ }
48
+ }
49
+ var extractorPromise = null;
50
+ function getExtractor(model) {
51
+ if (!extractorPromise) {
52
+ log.info(`Loading local embedding model: ${model} (cache: ${modelCacheDir})`);
53
+ extractorPromise = pipeline("feature-extraction", model, {
54
+ dtype: "fp32",
55
+ // Explicit cache_dir to avoid any env race condition
56
+ cache_dir: modelCacheDir,
57
+ // Prevent pthread_setaffinity_np EINVAL on VPS/containers with restricted CPU sets.
58
+ // ONNX Runtime skips thread affinity when thread counts are explicit.
59
+ session_options: { intraOpNumThreads: 1, interOpNumThreads: 1 }
60
+ }).then((ext) => {
61
+ log.info(`Local embedding model ready`);
62
+ return ext;
63
+ }).catch((err) => {
64
+ log.error(`Failed to load embedding model: ${err.message}`);
65
+ extractorPromise = null;
66
+ throw err;
67
+ });
68
+ }
69
+ return extractorPromise;
70
+ }
71
+ var LocalEmbeddingProvider = class {
72
+ id = "local";
73
+ model;
74
+ dimensions;
75
+ _disabled = false;
76
+ constructor(config) {
77
+ this.model = config.model || "Xenova/all-MiniLM-L6-v2";
78
+ this.dimensions = 384;
79
+ }
80
+ /**
81
+ * Pre-download and load the model at startup.
82
+ * If loading fails, retries once then marks provider as disabled (FTS5-only).
83
+ * Call this once during app init — avoids retry spam on every message.
84
+ */
85
+ async warmup() {
86
+ for (let attempt = 1; attempt <= 2; attempt++) {
87
+ try {
88
+ await ensureModelCached(this.model);
89
+ await getExtractor(this.model);
90
+ return true;
91
+ } catch {
92
+ if (attempt === 1) {
93
+ log.warn(`Embedding model load failed (attempt 1), retrying...`);
94
+ await new Promise((r) => setTimeout(r, 1e3));
95
+ } else {
96
+ log.warn(
97
+ `Local embedding model unavailable \u2014 falling back to FTS5-only search (no vector embeddings)`
98
+ );
99
+ this._disabled = true;
100
+ return false;
101
+ }
102
+ }
103
+ }
104
+ return false;
105
+ }
106
+ async embedQuery(text) {
107
+ if (this._disabled) return [];
108
+ const extractor = await getExtractor(this.model);
109
+ const output = await extractor(text, { pooling: "mean", normalize: true });
110
+ return Array.from(output.data);
111
+ }
112
+ async embedBatch(texts) {
113
+ if (this._disabled) return [];
114
+ if (texts.length === 0) return [];
115
+ const extractor = await getExtractor(this.model);
116
+ const output = await extractor(texts, { pooling: "mean", normalize: true });
117
+ const data = output.data;
118
+ const dims = this.dimensions;
119
+ const results = [];
120
+ for (let i = 0; i < texts.length; i++) {
121
+ results.push(Array.from(data.slice(i * dims, (i + 1) * dims)));
122
+ }
123
+ return results;
124
+ }
125
+ };
126
+
127
+ export {
128
+ LocalEmbeddingProvider
129
+ };