npm - companionbot - Versions diffs - 0.11.1 → 0.12.1 - Mend

companionbot 0.11.1 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/ai/claude.js +79 -65
package/dist/cli/main.js +16 -1
package/dist/config/constants.js +93 -2
package/dist/health/index.js +3 -1
package/dist/memory/benchmark.js +114 -0
package/dist/memory/embeddings.js +67 -5
package/dist/memory/ftsIndex.js +288 -68
package/dist/memory/ftsIndex.optimized.js +368 -0
package/dist/memory/ftsIndex.original.js +148 -0
package/dist/memory/hybridSearch.js +147 -41
package/dist/memory/hybridSearch.optimized.js +209 -0
package/dist/memory/hybridSearch.original.js +146 -0
package/dist/memory/index.js +2 -2
package/dist/memory/vectorStore.js +333 -160
package/dist/memory/vectorStore.optimized.js +492 -0
package/dist/memory/vectorStore.original.js +350 -0
package/dist/telegram/bot.js +42 -12
package/dist/telegram/handlers/commands.js +12 -0
package/dist/telegram/handlers/messages.js +241 -65
package/dist/telegram/utils/cache.js +35 -0
package/dist/telegram/utils/index.js +1 -1
package/dist/telegram/utils/prompt.js +126 -189
package/dist/tools/compress.js +141 -0
package/dist/tools/definitions.js +313 -0
package/dist/tools/index.js +11 -2
package/dist/tools/timeout.js +78 -0
package/dist/utils/index.js +1 -0
package/dist/utils/retry.js +288 -0
package/dist/warmup.js +120 -0
package/package.json +1 -1
package/templates/AGENTS.md +27 -184

package/dist/ai/claude.js CHANGED Viewed

@@ -1,48 +1,21 @@
 import Anthropic, { APIError } from "@anthropic-ai/sdk";
 import { tools, executeTool } from "../tools/index.js";
-import { sleep } from "../utils/time.js";
-import { MAX_RETRIES, BASE_RETRY_DELAY_MS, MAX_TOOL_ITERATIONS, TOOL_RESULT_MAX_LENGTH, TOOL_INPUT_SUMMARY_LENGTH, TOOL_OUTPUT_SUMMARY_LENGTH, } from "../utils/constants.js";
-async function withRetry(fn, retries = MAX_RETRIES) {
-    let lastError = null;
-    for (let attempt = 0; attempt < retries; attempt++) {
-        try {
-            return await fn();
-        }
-        catch (error) {
-            // APIError 타입 체크
-            if (error instanceof APIError) {
-                lastError = error;
-                // Rate limit (429)
-                if (error.status === 429) {
-                    const retryAfter = error.headers?.["retry-after"];
-                    const delay = retryAfter
-                        ? parseInt(retryAfter) * 1000
-                        : BASE_RETRY_DELAY_MS * Math.pow(2, attempt);
-                    console.log(`[RateLimit] 429 received, waiting ${delay}ms (attempt ${attempt + 1}/${retries})`);
-                    await sleep(delay);
-                    continue;
-                }
-                // 서버 에러 (500+)
-                if (error.status >= 500) {
-                    const delay = BASE_RETRY_DELAY_MS * Math.pow(2, attempt);
-                    console.log(`[ServerError] ${error.status}, waiting ${delay}ms (attempt ${attempt + 1}/${retries})`);
-                    await sleep(delay);
-                    continue;
-                }
-            }
-            // 일반 Error 처리
-            if (error instanceof Error) {
-                lastError = error;
-            }
-            else {
-                lastError = new Error(String(error));
-            }
-            // 다른 에러는 바로 throw
-            throw error;
-        }
-    }
-    throw lastError;
-}
+import { MAX_TOOL_ITERATIONS, TOOL_INPUT_SUMMARY_LENGTH, TOOL_OUTPUT_SUMMARY_LENGTH, } from "../utils/constants.js";
+import { withRetry, withTimeout, } from "../utils/retry.js";
+import { getToolTimeout } from "../tools/timeout.js";
+import { compressToolResult } from "../tools/compress.js";
+// API 호출 타임아웃 (2분) - Claude의 긴 응답 시간 고려
+const API_TIMEOUT_MS = 120000;
+// 재시도 설정
+const API_RETRY_OPTIONS = {
+    maxRetries: 3,
+    initialDelayMs: 1000,
+    maxDelayMs: 30000,
+    onRetry: (attempt, error, delay) => {
+        const errMsg = error instanceof Error ? error.message : String(error);
+        console.log(`[API Retry] Attempt ${attempt}, waiting ${delay}ms: ${errMsg.slice(0, 100)}`);
+    },
+};
 let anthropic = null;
 function getClient() {
     if (!anthropic) {
@@ -163,31 +136,57 @@ export async function chat(messages, systemPrompt, modelId = "sonnet", thinkingL
         return params;
     };
     let response;
-    response = await withRetry(() => client.messages.create(buildRequestParams()));
+    response = await withRetry(() => withTimeout(() => client.messages.create(buildRequestParams()), API_TIMEOUT_MS, "API 응답 시간 초과"), API_RETRY_OPTIONS);
     // Tool use 루프 - Claude가 도구 사용을 멈출 때까지 반복
     let iterations = 0;
     while (response.stop_reason === "tool_use" && iterations < MAX_TOOL_ITERATIONS) {
         iterations++;
         const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
-        // 도구 실행 결과 수집
-        const toolResults = [];
-        for (const toolUse of toolUseBlocks) {
-            console.log(`[Tool] ${toolUse.name}:`, JSON.stringify(toolUse.input));
-            const result = await executeTool(toolUse.name, toolUse.input);
-            // 결과가 너무 길면 자르기
-            const truncatedResult = result.length > TOOL_RESULT_MAX_LENGTH
-                ? result.slice(0, TOOL_RESULT_MAX_LENGTH) + "\n... (truncated)"
-                : result;
-            toolResults.push({
-                type: "tool_result",
-                tool_use_id: toolUse.id,
-                content: truncatedResult,
-            });
-            // 도구 사용 기록 (히스토리 참조용)
+        // 도구 병렬 실행 (성능 최적화)
+        console.log(`[Tool] Executing ${toolUseBlocks.length} tool(s) in parallel`);
+        const toolExecutions = await Promise.all(toolUseBlocks.map(async (toolUse) => {
+            const startTime = Date.now();
+            console.log(`[Tool] ${toolUse.name}:`, JSON.stringify(toolUse.input).slice(0, 200));
+            try {
+                // 도구별 타임아웃 적용
+                const timeout = getToolTimeout(toolUse.name);
+                const result = await Promise.race([
+                    executeTool(toolUse.name, toolUse.input),
+                    new Promise((_, reject) => setTimeout(() => reject(new Error(`Tool ${toolUse.name} timed out after ${timeout}ms`)), timeout)),
+                ]);
+                const elapsed = Date.now() - startTime;
+                console.log(`[Tool] ${toolUse.name} completed in ${elapsed}ms`);
+                // 스마트 결과 압축
+                const compressedResult = compressToolResult(toolUse.name, result);
+                return {
+                    toolUse,
+                    result: compressedResult,
+                    success: true,
+                };
+            }
+            catch (error) {
+                const elapsed = Date.now() - startTime;
+                const errorMsg = error instanceof Error ? error.message : String(error);
+                console.error(`[Tool] ${toolUse.name} failed after ${elapsed}ms:`, errorMsg);
+                return {
+                    toolUse,
+                    result: `Error: ${errorMsg}`,
+                    success: false,
+                };
+            }
+        }));
+        // 결과 수집
+        const toolResults = toolExecutions.map((exec) => ({
+            type: "tool_result",
+            tool_use_id: exec.toolUse.id,
+            content: exec.result,
+        }));
+        // 도구 사용 기록
+        for (const exec of toolExecutions) {
             toolsUsed.push({
-                name: toolUse.name,
-                input: JSON.stringify(toolUse.input).slice(0, TOOL_INPUT_SUMMARY_LENGTH),
-                output: truncatedResult.slice(0, TOOL_OUTPUT_SUMMARY_LENGTH),
+                name: exec.toolUse.name,
+                input: JSON.stringify(exec.toolUse.input).slice(0, TOOL_INPUT_SUMMARY_LENGTH),
+                output: exec.result.slice(0, TOOL_OUTPUT_SUMMARY_LENGTH),
             });
         }
         // 어시스턴트 메시지와 도구 결과 추가
@@ -200,7 +199,7 @@ export async function chat(messages, systemPrompt, modelId = "sonnet", thinkingL
             content: toolResults,
         });
         // 다음 응답 요청 (도구 루프에서도 thinking 유지)
-        response = await withRetry(() => client.messages.create(buildRequestParams()));
+        response = await withRetry(() => withTimeout(() => client.messages.create(buildRequestParams()), API_TIMEOUT_MS, "API 응답 시간 초과"), API_RETRY_OPTIONS);
     }
     // 반복 횟수 초과 시 경고
     if (iterations >= MAX_TOOL_ITERATIONS) {
@@ -226,8 +225,12 @@ export async function chat(messages, systemPrompt, modelId = "sonnet", thinkingL
  * 스트리밍 중 에러 발생 시 적절한 에러 메시지를 반환하거나 예외를 전파함
  */
 export async function chatSmart(messages, systemPrompt, modelId, thinkingLevel = "medium", onChunk) {
+    // 콜백 정규화
+    const callbacks = typeof onChunk === 'function'
+        ? { onChunk }
+        : (onChunk ?? {});
     // 스트리밍 콜백이 없으면 그냥 일반 chat 사용
-    if (!onChunk) {
+    if (!callbacks.onChunk) {
         const result = await chat(messages, systemPrompt, modelId, thinkingLevel);
         return { text: result.text, usedTools: result.toolsUsed.length > 0, toolsUsed: result.toolsUsed };
     }
@@ -279,7 +282,7 @@ export async function chatSmart(messages, systemPrompt, modelId, thinkingLevel =
             streamingStarted = true;
             accumulated += text;
             try {
-                await onChunk(text, accumulated);
+                await callbacks.onChunk(text, accumulated);
             }
             catch (err) {
                 // editMessageText 실패 등은 무시하고 계속
@@ -293,6 +296,17 @@ export async function chatSmart(messages, systemPrompt, modelId, thinkingLevel =
         // 주의: chat()은 내부에서 withRetry를 사용하므로 여기서 추가 재시도 불필요
         if (stopReason === "tool_use") {
             console.log("[Stream] Tool use detected, falling back to chat()");
+            // 도구 이름 추출하여 콜백 호출
+            const toolUseBlocks = finalMessage.content.filter((block) => block.type === "tool_use");
+            const toolNames = toolUseBlocks.map(t => t.name);
+            if (callbacks.onToolStart && toolNames.length > 0) {
+                try {
+                    await callbacks.onToolStart(toolNames);
+                }
+                catch (err) {
+                    console.warn("[Stream] Tool start callback error (ignored):", err);
+                }
+            }
             const result = await chat(messages, systemPrompt, modelId, thinkingLevel);
             return { text: result.text, usedTools: true, toolsUsed: result.toolsUsed };
         }

package/dist/cli/main.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { createBot } from "../telegram/bot.js";
 import { cleanupHeartbeats } from "../heartbeat/index.js";
 import { cleanupBriefings } from "../briefing/index.js";
 import { cleanupReminders } from "../reminders/index.js";
+import { preloadEmbeddingModel, preloadVectorStore } from "../memory/index.js";
 function createPrompt() {
     return readline.createInterface({
         input: process.stdin,
@@ -233,7 +234,21 @@ async function main() {
     }
     // 4. 환경변수 설정
     process.env.ANTHROPIC_API_KEY = apiKey;
-    // 5. 봇 시작
+    // 5. 🚀 사전 로딩 (첫 응답 속도 개선)
+    console.log(`
+╔═══════════════════════════════════════════════════════════════╗
+║                   ⏳ 시스템 사전 로딩...                       ║
+╚═══════════════════════════════════════════════════════════════╝
+`);
+    const preloadStart = Date.now();
+    // 임베딩 모델 + 벡터 저장소 병렬 로딩
+    await Promise.all([
+        preloadEmbeddingModel(),
+        preloadVectorStore(),
+    ]);
+    console.log(`   ✓ 사전 로딩 완료 (${Date.now() - preloadStart}ms)
+`);
+    // 6. 봇 시작
     console.log(`
 ╔═══════════════════════════════════════════════════════════════╗
 ║                      🚀 봇 시작!                              ║

package/dist/config/constants.js CHANGED Viewed

@@ -70,14 +70,81 @@ export const MEMORY = {
 // 텔레그램/UI 관련 설정
 // ============================================
 export const TELEGRAM = {
-    /** 스트리밍 업데이트 간격 (밀리초) */
-    STREAM_UPDATE_INTERVAL_MS: 500,
+    /** 스트리밍 업데이트 간격 - 적응형 (밀리초) */
+    STREAM_UPDATE_INTERVAL_MS: 500, // 레거시 호환용
+    /** 스트리밍 적응형 간격 설정 */
+    STREAM_INTERVAL: {
+        /** 첫 번째 업데이트 (즉시) */
+        FIRST_MS: 0,
+        /** 초기 빠른 업데이트 (처음 5회) */
+        FAST_MS: 200,
+        /** 이후 일반 간격 */
+        NORMAL_MS: 400,
+        /** 빠른 업데이트 횟수 */
+        FAST_COUNT: 5,
+    },
+    /** 텔레그램 메시지 최대 길이 */
+    MAX_MESSAGE_LENGTH: 4096,
     /** 최대 이미지 크기 (바이트) - 10MB */
     MAX_IMAGE_SIZE: 10 * 1024 * 1024,
     /** URL 처리 최대 개수 */
     MAX_URL_FETCH: 3,
     /** 캘린더 미리보기 이벤트 수 */
     CALENDAR_PREVIEW_COUNT: 3,
+    /** 스트리밍 UI 아이콘 */
+    STREAM_ICONS: {
+        THINKING: "💭",
+        TYPING: "▌",
+        TOOL: "🔧",
+        DONE: "",
+    },
+    /** Typing indicator 자동 갱신 간격 (밀리초) - 텔레그램은 5초 후 만료 */
+    TYPING_REFRESH_MS: 4000,
+    /** 도구별 친화적 상태 메시지 */
+    TOOL_STATUS_MESSAGES: {
+        // 검색/정보 조회
+        web_search: { icon: "🔍", text: "웹에서 검색하는 중", estimate: "5-10초" },
+        web_fetch: { icon: "📄", text: "웹페이지 읽는 중", estimate: "3-5초" },
+        get_weather: { icon: "🌤️", text: "날씨 확인 중", estimate: "2-3초" },
+        memory_search: { icon: "🧠", text: "기억 검색 중", estimate: "1-2초" },
+        memory_reindex: { icon: "🧠", text: "기억 재색인 중", estimate: "10-30초" },
+        // 파일 작업
+        read_file: { icon: "📖", text: "파일 읽는 중", estimate: "1초" },
+        write_file: { icon: "✍️", text: "파일 쓰는 중", estimate: "1초" },
+        edit_file: { icon: "✏️", text: "파일 수정 중", estimate: "1초" },
+        list_directory: { icon: "📁", text: "폴더 살펴보는 중", estimate: "1초" },
+        // 명령어 실행
+        run_command: { icon: "⚡", text: "명령어 실행 중", estimate: "변동" },
+        list_sessions: { icon: "📋", text: "세션 목록 확인 중", estimate: "1초" },
+        get_session_log: { icon: "📜", text: "로그 가져오는 중", estimate: "1초" },
+        kill_session: { icon: "🛑", text: "세션 종료 중", estimate: "1초" },
+        // 일정/리마인더
+        get_calendar_events: { icon: "📅", text: "일정 확인 중", estimate: "2-3초" },
+        add_calendar_event: { icon: "📅", text: "일정 추가 중", estimate: "2-3초" },
+        delete_calendar_event: { icon: "📅", text: "일정 삭제 중", estimate: "2초" },
+        set_reminder: { icon: "⏰", text: "알림 설정 중", estimate: "1초" },
+        list_reminders: { icon: "⏰", text: "알림 목록 확인 중", estimate: "1초" },
+        cancel_reminder: { icon: "⏰", text: "알림 취소 중", estimate: "1초" },
+        // 브리핑/하트비트
+        control_briefing: { icon: "☀️", text: "브리핑 설정 중", estimate: "1초" },
+        send_briefing_now: { icon: "☀️", text: "브리핑 준비 중", estimate: "5-10초" },
+        control_heartbeat: { icon: "💓", text: "하트비트 설정 중", estimate: "1초" },
+        run_heartbeat_check: { icon: "💓", text: "체크 실행 중", estimate: "3-5초" },
+        // 서브에이전트
+        spawn_agent: { icon: "🤖", text: "서브에이전트 생성 중", estimate: "2-3초" },
+        list_agents: { icon: "🤖", text: "에이전트 목록 확인 중", estimate: "1초" },
+        cancel_agent: { icon: "🤖", text: "에이전트 취소 중", estimate: "1초" },
+        // Cron
+        add_cron: { icon: "🕐", text: "예약 작업 추가 중", estimate: "1초" },
+        list_crons: { icon: "🕐", text: "예약 작업 확인 중", estimate: "1초" },
+        remove_cron: { icon: "🕐", text: "예약 작업 삭제 중", estimate: "1초" },
+        toggle_cron: { icon: "🕐", text: "예약 작업 설정 중", estimate: "1초" },
+        run_cron: { icon: "🕐", text: "예약 작업 실행 중", estimate: "변동" },
+        // 기타
+        change_model: { icon: "🔄", text: "모델 변경 중", estimate: "1초" },
+        save_memory: { icon: "💾", text: "기억 저장 중", estimate: "1초" },
+        save_persona: { icon: "✨", text: "페르소나 저장 중", estimate: "2초" },
+    },
 };
 // ============================================
 // 보안/토큰 관련 설정
@@ -86,3 +153,27 @@ export const SECURITY = {
     /** 리셋 토큰 만료 시간 (밀리초) - 1분 */
     RESET_TOKEN_TTL_MS: 60000,
 };
+// ============================================
+// API/네트워크 설정
+// ============================================
+export const API = {
+    /** Claude API 타임아웃 (밀리초) - 2분 */
+    TIMEOUT_MS: 120000,
+    /** 최대 재시도 횟수 */
+    MAX_RETRIES: 3,
+    /** 초기 재시도 대기 시간 (밀리초) */
+    INITIAL_RETRY_DELAY_MS: 1000,
+    /** 최대 재시도 대기 시간 (밀리초) */
+    MAX_RETRY_DELAY_MS: 30000,
+    /** 재시도 백오프 배수 */
+    BACKOFF_MULTIPLIER: 2,
+};
+// ============================================
+// 메모리 검색 타임아웃 설정
+// ============================================
+export const SEARCH = {
+    /** 전체 검색 타임아웃 (밀리초) */
+    TIMEOUT_MS: 5000,
+    /** 임베딩 생성 타임아웃 (밀리초) */
+    EMBED_TIMEOUT_MS: 3000,
+};

package/dist/health/index.js CHANGED Viewed

@@ -3,6 +3,7 @@
  * 봇의 상태를 추적하고 모니터링합니다.
  * @module health
  */
+import { getWarmupStatus } from "../warmup.js";
 let startTime = Date.now();
 let lastActivity = Date.now();
 let messageCount = 0;
@@ -36,7 +37,8 @@ export function getHealthStatus() {
         lastActivity,
         messageCount,
         errorCount,
-        isHealthy
+        isHealthy,
+        warmup: getWarmupStatus(),
     };
 }
 /**

package/dist/memory/benchmark.js ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * 메모리 검색 벤치마크
+ *
+ * 사용법: npx tsx src/memory/benchmark.ts
+ */
+import { performance } from "perf_hooks";
+// 현재 구현
+import * as currentVector from "./vectorStore.js";
+import * as currentFts from "./ftsIndex.js";
+import * as currentHybrid from "./hybridSearch.js";
+// 최적화 구현 (주석 해제하여 비교)
+// import * as optimizedVector from "./vectorStore.optimized.js";
+// import * as optimizedFts from "./ftsIndex.optimized.js";
+// import * as optimizedHybrid from "./hybridSearch.optimized.js";
+const TEST_QUERIES = [
+    "오늘 무슨 일이 있었어?",
+    "지난주 회의 내용",
+    "프로젝트 마감일",
+    "API 키 설정",
+    "에러 해결 방법",
+    "CompanionBot feature",
+    "일정 확인",
+    "메모리 검색 최적화",
+    "테스트 코드 작성",
+    "버그 수정",
+];
+async function benchmark(name, fn, iterations = 10) {
+    const times = [];
+    // 워밍업
+    await fn();
+    await fn();
+    for (let i = 0; i < iterations; i++) {
+        const start = performance.now();
+        await fn();
+        const end = performance.now();
+        times.push(end - start);
+    }
+    times.sort((a, b) => a - b);
+    const avgMs = times.reduce((a, b) => a + b, 0) / times.length;
+    const p95Idx = Math.floor(times.length * 0.95);
+    return {
+        name,
+        avgMs: Math.round(avgMs * 100) / 100,
+        minMs: Math.round(times[0] * 100) / 100,
+        maxMs: Math.round(times[times.length - 1] * 100) / 100,
+        p95Ms: Math.round(times[p95Idx] * 100) / 100,
+        ops: Math.round(1000 / avgMs),
+    };
+}
+function printResult(result) {
+    console.log(`
+${result.name}:
+  Average: ${result.avgMs}ms
+  Min: ${result.minMs}ms
+  Max: ${result.maxMs}ms
+  P95: ${result.p95Ms}ms
+  Ops/sec: ${result.ops}
+`);
+}
+async function main() {
+    console.log("=".repeat(60));
+    console.log("Memory Search Benchmark");
+    console.log("=".repeat(60));
+    console.log(`\nTest queries: ${TEST_QUERIES.length}`);
+    console.log(`Iterations per query: 10\n`);
+    // 1. 벡터 검색 벤치마크
+    console.log("\n--- Vector Search ---");
+    for (const query of TEST_QUERIES.slice(0, 3)) {
+        const result = await benchmark(`Vector search: "${query.slice(0, 20)}..."`, async () => {
+            return currentHybrid.searchVector(query, 5, 0.3);
+        }, 10);
+        printResult(result);
+    }
+    // 2. FTS 검색 벤치마크
+    console.log("\n--- FTS Keyword Search ---");
+    for (const query of TEST_QUERIES.slice(0, 3)) {
+        const result = await benchmark(`FTS search: "${query.slice(0, 20)}..."`, async () => {
+            return currentFts.searchKeyword(query, 10);
+        }, 10);
+        printResult(result);
+    }
+    // 3. 하이브리드 검색 벤치마크
+    console.log("\n--- Hybrid Search ---");
+    for (const query of TEST_QUERIES.slice(0, 3)) {
+        const result = await benchmark(`Hybrid search: "${query.slice(0, 20)}..."`, async () => {
+            return currentHybrid.hybridSearch(query, { topK: 5 });
+        }, 10);
+        printResult(result);
+    }
+    // 4. 청크 로딩 벤치마크
+    console.log("\n--- Chunk Loading ---");
+    // 캐시 무효화 후 로딩 시간
+    currentVector.invalidateCache();
+    const loadResult = await benchmark("Load all chunks (cold)", async () => {
+        currentVector.invalidateCache();
+        return currentVector.loadAllMemoryChunks();
+    }, 5);
+    printResult(loadResult);
+    // 캐시된 로딩 시간
+    const cachedLoadResult = await benchmark("Load all chunks (cached)", async () => {
+        return currentVector.loadAllMemoryChunks();
+    }, 10);
+    printResult(cachedLoadResult);
+    // 5. 통계
+    console.log("\n--- Statistics ---");
+    const chunks = await currentVector.loadAllMemoryChunks();
+    const ftsCount = currentFts.getDocumentCount();
+    console.log(`Total chunks in vector store: ${chunks.length}`);
+    console.log(`Total documents in FTS: ${ftsCount}`);
+    console.log("\n" + "=".repeat(60));
+    console.log("Benchmark complete");
+    console.log("=".repeat(60));
+}
+main().catch(console.error);

package/dist/memory/embeddings.js CHANGED Viewed

@@ -8,6 +8,24 @@ let embeddingPipeline = null;
 // 모델 로딩 중인지 추적
 let isLoading = false;
 let loadingPromise = null;
+// ============== 쿼리 임베딩 LRU 캐시 ==============
+// 같은 검색 쿼리가 반복될 때 임베딩 재계산 방지
+const QUERY_CACHE_MAX_SIZE = 100;
+const queryEmbeddingCache = new Map();
+/**
+ * LRU 방식으로 캐시 정리
+ */
+function pruneQueryCache() {
+    if (queryEmbeddingCache.size <= QUERY_CACHE_MAX_SIZE)
+        return;
+    // lastUsed 기준 정렬하여 오래된 것 삭제
+    const entries = [...queryEmbeddingCache.entries()];
+    entries.sort((a, b) => a[1].lastUsed - b[1].lastUsed);
+    const toRemove = entries.slice(0, entries.length - QUERY_CACHE_MAX_SIZE);
+    for (const [key] of toRemove) {
+        queryEmbeddingCache.delete(key);
+    }
+}
 /**
  * 임베딩 파이프라인을 초기화합니다.
  * 작고 빠른 모델 사용 (384 차원)
@@ -21,42 +39,73 @@ async function getEmbeddingPipeline() {
         return loadingPromise;
     }
     isLoading = true;
+    console.log("[Embedding] Loading model...");
+    const startTime = Date.now();
     loadingPromise = pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2" // 384차원, 빠르고 가벼움
     );
     try {
         embeddingPipeline = await loadingPromise;
+        console.log(`[Embedding] Model loaded in ${Date.now() - startTime}ms`);
         return embeddingPipeline;
     }
     finally {
         isLoading = false;
     }
 }
+/**
+ * 🚀 사전 로딩: 봇 시작 시 호출하여 첫 요청 지연 방지
+ */
+export async function preloadEmbeddingModel() {
+    try {
+        await getEmbeddingPipeline();
+    }
+    catch (error) {
+        console.warn("[Embedding] Preload failed:", error);
+    }
+}
 /**
  * 텍스트를 임베딩 벡터로 변환합니다.
+ * LRU 캐시로 반복 쿼리 성능 향상.
  * @param text 변환할 텍스트
+ * @param useCache 캐시 사용 여부 (기본 true, 청크 임베딩 시 false 권장)
  * @returns 384차원 임베딩 벡터
  */
-export async function embed(text) {
+export async function embed(text, useCache = true) {
     // null/undefined 처리
     if (text == null) {
         return new Array(384).fill(0);
     }
-    const pipe = await getEmbeddingPipeline();
     // 텍스트 정규화
     const cleanText = text.trim().slice(0, 512); // 최대 512자
     if (!cleanText) {
         return new Array(384).fill(0);
     }
+    // 캐시 확인
+    if (useCache) {
+        const cached = queryEmbeddingCache.get(cleanText);
+        if (cached) {
+            cached.lastUsed = Date.now();
+            return cached.embedding;
+        }
+    }
+    const pipe = await getEmbeddingPipeline();
     const result = await pipe(cleanText, {
         pooling: "mean",
         normalize: true,
     });
     // Tensor를 배열로 변환
-    return Array.from(result.data);
+    const embedding = Array.from(result.data);
+    // 캐시 저장
+    if (useCache) {
+        queryEmbeddingCache.set(cleanText, { embedding, lastUsed: Date.now() });
+        pruneQueryCache();
+    }
+    return embedding;
 }
 /**
  * 여러 텍스트를 배치로 임베딩합니다.
  * 병렬로 처리하여 성능 향상 (모델 내부에서 순차 처리되더라도 Promise 오버헤드 감소)
+ * 청크용이므로 쿼리 캐시 사용 안 함 (vectorStore의 영속 캐시 사용).
  * @param texts 변환할 텍스트 배열
  * @returns 임베딩 벡터 배열
  */
@@ -65,19 +114,32 @@ export async function embedBatch(texts) {
     if (!texts || texts.length === 0)
         return [];
     if (texts.length === 1)
-        return [await embed(texts[0])];
+        return [await embed(texts[0], false)];
     // 동시성 제한 (메모리 보호)
     const CONCURRENCY = 5;
     const results = new Array(texts.length);
     for (let i = 0; i < texts.length; i += CONCURRENCY) {
         const batch = texts.slice(i, i + CONCURRENCY);
-        const batchResults = await Promise.all(batch.map(text => embed(text)));
+        // 청크 임베딩은 캐시 사용 안 함 (useCache=false)
+        const batchResults = await Promise.all(batch.map(text => embed(text, false)));
         for (let j = 0; j < batchResults.length; j++) {
             results[i + j] = batchResults[j];
         }
     }
     return results;
 }
+/**
+ * 쿼리 임베딩 캐시 통계를 반환합니다.
+ */
+export function getQueryCacheStats() {
+    return { size: queryEmbeddingCache.size, maxSize: QUERY_CACHE_MAX_SIZE };
+}
+/**
+ * 쿼리 임베딩 캐시를 초기화합니다.
+ */
+export function clearQueryCache() {
+    queryEmbeddingCache.clear();
+}
 /**
  * 두 벡터 간의 코사인 유사도를 계산합니다.
  *