@tekmidian/pai 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -159,9 +159,17 @@ function buildOverlapPrefix(chunks, overlapTokens) {
159
159
  * 4. Apply overlap: each chunk includes the last `overlap` tokens from the
160
160
  * previous chunk.
161
161
  */
162
+ /**
163
+ * Strip `<private>...</private>` blocks from content before indexing.
164
+ * Content within these tags is excluded from memory — never stored or searched.
165
+ */
166
+ function stripPrivateTags(content) {
167
+ return content.replace(/<private>[\s\S]*?<\/private>/gi, "");
168
+ }
162
169
  function chunkMarkdown(content, opts) {
163
170
  const maxTokens = opts?.maxTokens ?? DEFAULT_MAX_TOKENS;
164
171
  const overlapTokens = opts?.overlap ?? DEFAULT_OVERLAP;
172
+ content = stripPrivateTags(content);
165
173
  if (!content.trim()) return [];
166
174
  const sections = splitBySections(content.split("\n").map((text, idx) => ({
167
175
  text,
@@ -417,4 +425,4 @@ const INDEX_YIELD_EVERY = 10;
417
425
 
418
426
  //#endregion
419
427
  export { parseSessionTitleChunk as a, yieldToEventLoop as c, sha256 as d, sha256File as f, isPathTooBroadForContentScan as i, chunkMarkdown as l, chunkId as n, walkContentFiles as o, detectTier as r, walkMdFiles as s, INDEX_YIELD_EVERY as t, estimateTokens as u };
420
- //# sourceMappingURL=helpers-BEST-4Gx.mjs.map
428
+ //# sourceMappingURL=helpers-OCVFgprQ.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"helpers-OCVFgprQ.mjs","names":["createHash"],"sources":["../src/utils/hash.ts","../src/memory/chunker.ts","../src/memory/indexer/helpers.ts"],"sourcesContent":["/**\n * Shared hashing utilities. Centralises all SHA-256 usage so every module\n * obtains digests through the same function rather than inlining createHash.\n */\n\nimport { createHash } from \"node:crypto\";\n\n/**\n * Compute a SHA-256 hex digest of the given string.\n * Aliased as sha256File for compatibility with existing call-sites that use\n * that name to hash file contents.\n */\nexport function sha256(content: string): string {\n return createHash(\"sha256\").update(content).digest(\"hex\");\n}\n\n/** Alias kept for backwards compatibility with memory/indexer call-sites. */\nexport const sha256File = sha256;\n","/**\n * Markdown text chunker for the PAI memory engine.\n *\n * Splits markdown files into overlapping text segments suitable for BM25\n * full-text indexing. Respects heading boundaries where possible, falling\n * back to paragraph and sentence splitting when sections are large.\n */\n\nimport { sha256 } from \"../utils/hash.js\";\n\nexport interface Chunk {\n text: string;\n startLine: number; // 1-indexed\n endLine: number; // 1-indexed, inclusive\n hash: string; // SHA-256 of text\n}\n\nexport interface ChunkOptions {\n /** Approximate maximum tokens per chunk. Default 400. */\n maxTokens?: number;\n /** Overlap in tokens from the previous chunk. Default 80. */\n overlap?: number;\n}\n\nconst DEFAULT_MAX_TOKENS = 400;\nconst DEFAULT_OVERLAP = 80;\n\n/**\n * Approximate token count using a words * 1.3 heuristic.\n * Matches the OpenClaw estimate approach.\n */\nexport function estimateTokens(text: string): number {\n const wordCount = text.split(/\\s+/).filter(Boolean).length;\n return Math.ceil(wordCount * 1.3);\n}\n\n// sha256 imported from utils/hash.ts\n\n// ---------------------------------------------------------------------------\n// Internal section / paragraph / sentence splitters\n// ---------------------------------------------------------------------------\n\n/**\n * A contiguous block of lines associated with an approximate token count.\n */\ninterface LineBlock {\n lines: Array<{ text: string; lineNo: number }>;\n tokens: number;\n}\n\n/**\n * Split content into sections delimited by ## or ### headings.\n * Each section starts at its heading line (or at line 1 for a preamble).\n */\nfunction splitBySections(\n lines: Array<{ text: string; lineNo: number }>,\n): LineBlock[] {\n const sections: LineBlock[] = [];\n let current: Array<{ text: string; lineNo: number }> = [];\n\n for (const line of lines) {\n const isHeading = /^#{1,3}\\s/.test(line.text);\n if (isHeading && current.length > 0) {\n const text = current.map((l) => l.text).join(\"\\n\");\n sections.push({ lines: current, tokens: estimateTokens(text) });\n current = [];\n }\n current.push(line);\n }\n\n if (current.length > 0) {\n const text = current.map((l) => l.text).join(\"\\n\");\n sections.push({ lines: current, tokens: estimateTokens(text) });\n }\n\n return sections;\n}\n\n/**\n * Split a LineBlock by double-newline paragraph boundaries.\n */\nfunction splitByParagraphs(block: LineBlock): LineBlock[] {\n const paragraphs: LineBlock[] = [];\n let current: Array<{ text: string; lineNo: number }> = [];\n\n for (const line of block.lines) {\n if (line.text.trim() === \"\" && current.length > 0) {\n // Empty line — potential paragraph boundary\n const text = current.map((l) => l.text).join(\"\\n\");\n paragraphs.push({ lines: [...current], tokens: estimateTokens(text) });\n current = [];\n } else {\n current.push(line);\n }\n }\n\n if (current.length > 0) {\n const text = current.map((l) => l.text).join(\"\\n\");\n paragraphs.push({ lines: current, tokens: estimateTokens(text) });\n }\n\n return paragraphs.length > 0 ? paragraphs : [block];\n}\n\n/**\n * Split a LineBlock by sentence boundaries (. ! ?) when even paragraphs are\n * too large. Works character-by-character within joined lines.\n */\nfunction splitBySentences(block: LineBlock, maxTokens: number): LineBlock[] {\n const fullText = block.lines.map((l) => l.text).join(\" \");\n // Very rough sentence split — split on '. ', '! ', '? ' followed by uppercase\n const sentenceRe = /(?<=[.!?])\\s+(?=[A-Z\"'])/g;\n const sentences = fullText.split(sentenceRe);\n\n const result: LineBlock[] = [];\n let accText = \"\";\n // We can't recover exact line numbers inside a single oversized paragraph,\n // so we approximate using the block's start/end lines distributed evenly.\n const startLine = block.lines[0]?.lineNo ?? 1;\n const endLine = block.lines[block.lines.length - 1]?.lineNo ?? startLine;\n const totalLines = endLine - startLine + 1;\n const linesPerSentence = Math.max(1, Math.floor(totalLines / Math.max(1, sentences.length)));\n\n let sentenceIdx = 0;\n let approxLine = startLine;\n\n const flush = () => {\n if (!accText.trim()) return;\n const endApprox = Math.min(approxLine + linesPerSentence - 1, endLine);\n result.push({\n lines: [{ text: accText.trim(), lineNo: approxLine }],\n tokens: estimateTokens(accText),\n });\n approxLine = endApprox + 1;\n accText = \"\";\n };\n\n for (const sentence of sentences) {\n sentenceIdx++;\n const candidateText = accText ? accText + \" \" + sentence : sentence;\n if (estimateTokens(candidateText) > maxTokens && accText) {\n flush();\n accText = sentence;\n } else {\n accText = candidateText;\n }\n }\n void sentenceIdx; // used only for iteration count\n flush();\n\n return result.length > 0 ? result : [block];\n}\n\n// ---------------------------------------------------------------------------\n// Overlap helper\n// ---------------------------------------------------------------------------\n\n/**\n * Extract the last `overlapTokens` worth of text from a list of previously\n * emitted chunks to prepend to the next chunk.\n */\nfunction buildOverlapPrefix(\n chunks: Chunk[],\n overlapTokens: number,\n): Array<{ text: string; lineNo: number }> {\n if (overlapTokens <= 0 || chunks.length === 0) return [];\n\n const lastChunk = chunks[chunks.length - 1];\n if (!lastChunk) return [];\n\n const lines = lastChunk.text.split(\"\\n\");\n const kept: string[] = [];\n let acc = 0;\n\n for (let i = lines.length - 1; i >= 0; i--) {\n const lineTokens = estimateTokens(lines[i] ?? \"\");\n acc += lineTokens;\n kept.unshift(lines[i] ?? \"\");\n if (acc >= overlapTokens) break;\n }\n\n // Distribute overlap lines across the lastChunk's line range\n const startLine = lastChunk.endLine - kept.length + 1;\n return kept.map((text, idx) => ({ text, lineNo: Math.max(lastChunk.startLine, startLine + idx) }));\n}\n\n// ---------------------------------------------------------------------------\n// Public API\n// ---------------------------------------------------------------------------\n\n/**\n * Chunk a markdown file into overlapping segments for BM25 indexing.\n *\n * Strategy:\n * 1. Split by headings (##, ###) as natural boundaries.\n * 2. If a section exceeds maxTokens, split by paragraphs.\n * 3. If a paragraph still exceeds maxTokens, split by sentences.\n * 4. Apply overlap: each chunk includes the last `overlap` tokens from the\n * previous chunk.\n */\n/**\n * Strip `<private>...</private>` blocks from content before indexing.\n * Content within these tags is excluded from memory — never stored or searched.\n */\nexport function stripPrivateTags(content: string): string {\n return content.replace(/<private>[\\s\\S]*?<\\/private>/gi, \"\");\n}\n\nexport function chunkMarkdown(content: string, opts?: ChunkOptions): Chunk[] {\n const maxTokens = opts?.maxTokens ?? DEFAULT_MAX_TOKENS;\n const overlapTokens = opts?.overlap ?? DEFAULT_OVERLAP;\n\n // Strip private content before indexing\n content = stripPrivateTags(content);\n\n if (!content.trim()) return [];\n\n const rawLines = content.split(\"\\n\");\n const lines: Array<{ text: string; lineNo: number }> = rawLines.map((text, idx) => ({\n text,\n lineNo: idx + 1, // 1-indexed\n }));\n\n // Step 1: section split\n const sections = splitBySections(lines);\n\n // Step 2 & 3: further split oversized sections\n const finalBlocks: LineBlock[] = [];\n for (const section of sections) {\n if (section.tokens <= maxTokens) {\n finalBlocks.push(section);\n continue;\n }\n // Too big — split by paragraphs\n const paras = splitByParagraphs(section);\n for (const para of paras) {\n if (para.tokens <= maxTokens) {\n finalBlocks.push(para);\n continue;\n }\n // Still too big — split by sentences\n const sentences = splitBySentences(para, maxTokens);\n finalBlocks.push(...sentences);\n }\n }\n\n // Step 4: build final chunks with overlap\n const chunks: Chunk[] = [];\n\n for (const block of finalBlocks) {\n if (block.lines.length === 0) continue;\n\n // Build overlap prefix from previous chunks\n const overlapLines = buildOverlapPrefix(chunks, overlapTokens);\n\n // Combine overlap + block lines\n const allLines = [...overlapLines, ...block.lines];\n const text = allLines.map((l) => l.text).join(\"\\n\").trim();\n\n if (!text) continue;\n\n const startLine = block.lines[0]?.lineNo ?? 1;\n const endLine = block.lines[block.lines.length - 1]?.lineNo ?? startLine;\n\n chunks.push({\n text,\n startLine,\n endLine,\n hash: sha256(text),\n });\n }\n\n return chunks;\n}\n","/**\n * Shared helpers for the PAI memory indexers.\n *\n * Contains utilities used by both the sync (SQLite) and async (StorageBackend)\n * indexer paths: hashing, chunk ID generation, directory walking, and path guards.\n */\n\nimport { readdirSync, existsSync } from \"node:fs\";\nimport { sha256File } from \"../../utils/hash.js\";\nimport { join, normalize } from \"node:path\";\nimport { homedir } from \"node:os\";\nimport { basename } from \"node:path\";\n\n// ---------------------------------------------------------------------------\n// Tier detection\n// ---------------------------------------------------------------------------\n\n/**\n * Classify a relative file path into one of the four memory tiers.\n *\n * Rules (in priority order):\n * - MEMORY.md anywhere in memory/ → 'evergreen'\n * - YYYY-MM-DD.md in memory/ → 'daily'\n * - anything else in memory/ → 'topic'\n * - anything in Notes/ → 'session'\n */\nexport function detectTier(\n relativePath: string,\n): \"evergreen\" | \"daily\" | \"topic\" | \"session\" {\n // Normalise to forward slashes and strip leading ./\n const p = relativePath.replace(/\\\\/g, \"/\").replace(/^\\.\\//, \"\");\n\n // Notes directory → session tier\n if (p.startsWith(\"Notes/\") || p === \"Notes\") {\n return \"session\";\n }\n\n const fileName = basename(p);\n\n // MEMORY.md (case-sensitive match) → evergreen\n if (fileName === \"MEMORY.md\") {\n return \"evergreen\";\n }\n\n // YYYY-MM-DD.md → daily\n if (/^\\d{4}-\\d{2}-\\d{2}\\.md$/.test(fileName)) {\n return \"daily\";\n }\n\n // Default for memory/ files\n return \"topic\";\n}\n\n// ---------------------------------------------------------------------------\n// Hashing and chunk ID generation\n// ---------------------------------------------------------------------------\n\n// sha256File imported from ../../utils/hash.js\nexport { sha256File } from \"../../utils/hash.js\";\n\n/**\n * Generate a deterministic chunk ID from its coordinates.\n * Format: sha256(\"projectId:path:chunkIndex:startLine:endLine\")\n *\n * The chunkIndex (0-based position within the file) is included so that\n * chunks with approximated line numbers (e.g. from splitBySentences) never\n * produce colliding IDs even when multiple chunks share the same startLine/endLine.\n */\nexport function chunkId(\n projectId: number,\n path: string,\n chunkIndex: number,\n startLine: number,\n endLine: number,\n): string {\n return createHash(\"sha256\")\n .update(`${projectId}:${path}:${chunkIndex}:${startLine}:${endLine}`)\n .digest(\"hex\");\n}\n\n// ---------------------------------------------------------------------------\n// Event loop yield\n// ---------------------------------------------------------------------------\n\n/**\n * Yield to the Node.js event loop so that IPC server can process requests\n * during long index runs.\n *\n * Uses setTimeout(10ms) rather than setImmediate — the 10ms pause gives the\n * event loop enough time to accept and process incoming IPC connections\n * (socket data, new connections, etc.). Without this, synchronous ONNX\n * inference blocks IPC for the full duration of each embedding (~50-100ms\n * per chunk).\n */\nexport function yieldToEventLoop(): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, 10));\n}\n\n// ---------------------------------------------------------------------------\n// Directory skip sets\n// ---------------------------------------------------------------------------\n\n/**\n * Directories to ALWAYS skip, at any depth, during any directory walk.\n * These are build artifacts, dependency trees, and VCS internals that\n * should never be indexed regardless of where they appear in the tree.\n */\nexport const ALWAYS_SKIP_DIRS = new Set([\n // Version control\n \".git\",\n // Dependency directories (any language)\n \"node_modules\",\n \"vendor\",\n \"Pods\", // CocoaPods (iOS/macOS)\n // Build / compile output\n \"dist\",\n \"build\",\n \"out\",\n \"DerivedData\", // Xcode\n \".next\", // Next.js\n // Python virtual environments and caches\n \".venv\",\n \"venv\",\n \"__pycache__\",\n // General caches\n \".cache\",\n \".bun\",\n // Backup snapshots (Carbon Copy Cloner, Time Machine, etc.)\n \"snaps\",\n \".Trashes\",\n]);\n\n/**\n * Directories to skip when doing a root-level content scan.\n * These are either already handled by dedicated scans or should never be indexed.\n */\nexport const ROOT_SCAN_SKIP_DIRS = new Set([\n \"memory\",\n \"Notes\",\n \".claude\",\n \".DS_Store\",\n // Everything in ALWAYS_SKIP_DIRS is also excluded at root level\n ...ALWAYS_SKIP_DIRS,\n]);\n\n/**\n * Additional directories to skip at the content-scan level (first level below root).\n * These are common macOS/Linux home-directory or repo noise directories that are\n * never meaningful as project content.\n */\nexport const CONTENT_SCAN_SKIP_DIRS = new Set([\n // macOS home directory standard folders\n \"Library\",\n \"Applications\",\n \"Music\",\n \"Movies\",\n \"Pictures\",\n \"Desktop\",\n \"Downloads\",\n \"Public\",\n // Common dev noise\n \"coverage\",\n // Everything in ALWAYS_SKIP_DIRS is also excluded at this level\n ...ALWAYS_SKIP_DIRS,\n]);\n\n// ---------------------------------------------------------------------------\n// Directory walkers\n// ---------------------------------------------------------------------------\n\n/**\n * Safety cap: maximum number of .md files collected per project scan.\n * Prevents runaway scans on huge root paths (e.g. home directory).\n * Projects with more files than this are scanned up to the cap only.\n */\nconst MAX_FILES_PER_PROJECT = 5_000;\n\n/**\n * Maximum recursion depth for directory walks.\n * Prevents deep traversal of large directory trees (e.g. development repos).\n * Depth 0 = the given directory itself (no recursion).\n * Value 6 allows: root → subdirs → sub-subdirs → ... up to 6 levels.\n * Sufficient for memory/, Notes/, and typical docs structures.\n */\nconst MAX_WALK_DEPTH = 6;\n\n/**\n * Recursively collect all .md files under a directory.\n * Returns absolute paths. Stops early if the accumulated count hits the cap\n * or if the recursion depth exceeds MAX_WALK_DEPTH.\n *\n * @param dir Directory to scan.\n * @param acc Shared accumulator array (mutated in place for early exit).\n * @param cap Maximum number of files to collect (across all recursive calls).\n * @param depth Current recursion depth (0 = the initial call).\n */\nexport function walkMdFiles(\n dir: string,\n acc?: string[],\n cap = MAX_FILES_PER_PROJECT,\n depth = 0,\n): string[] {\n const results = acc ?? [];\n if (!existsSync(dir)) return results;\n if (results.length >= cap) return results;\n if (depth > MAX_WALK_DEPTH) return results;\n\n try {\n for (const entry of readdirSync(dir, { withFileTypes: true })) {\n if (results.length >= cap) break;\n if (entry.isSymbolicLink()) continue;\n // Skip known junk directories at every recursion depth\n if (ALWAYS_SKIP_DIRS.has(entry.name)) continue;\n const full = join(dir, entry.name);\n if (entry.isDirectory()) {\n walkMdFiles(full, results, cap, depth + 1);\n } else if (entry.isFile() && entry.name.endsWith(\".md\")) {\n results.push(full);\n }\n }\n } catch {\n // Unreadable directory — skip\n }\n return results;\n}\n\n/**\n * Recursively collect all .md files under rootPath, excluding directories\n * that are already covered by dedicated scans (memory/, Notes/) and\n * common noise directories (.git, node_modules, etc.).\n *\n * Returns absolute paths for files NOT already handled by the specific scanners.\n * Stops collecting once MAX_FILES_PER_PROJECT is reached.\n */\nexport function walkContentFiles(rootPath: string): string[] {\n if (!existsSync(rootPath)) return [];\n\n const results: string[] = [];\n try {\n for (const entry of readdirSync(rootPath, { withFileTypes: true })) {\n if (results.length >= MAX_FILES_PER_PROJECT) break;\n if (entry.isSymbolicLink()) continue;\n if (ROOT_SCAN_SKIP_DIRS.has(entry.name)) continue;\n if (CONTENT_SCAN_SKIP_DIRS.has(entry.name)) continue;\n\n const full = join(rootPath, entry.name);\n if (entry.isDirectory()) {\n walkMdFiles(full, results, MAX_FILES_PER_PROJECT);\n } else if (entry.isFile() && entry.name.endsWith(\".md\")) {\n // Skip root-level MEMORY.md — handled by the dedicated evergreen scan\n if (entry.name !== \"MEMORY.md\") {\n results.push(full);\n }\n }\n }\n } catch {\n // Unreadable directory — skip\n }\n return results;\n}\n\n// ---------------------------------------------------------------------------\n// Path safety guard\n// ---------------------------------------------------------------------------\n\n/** Paths that must never be indexed — system/temp dirs that can contain backup snapshots. */\nconst BLOCKED_ROOTS = new Set([\"/tmp\", \"/private/tmp\", \"/var\", \"/private/var\"]);\n\n/**\n * Returns true if rootPath should skip the recursive content scan.\n *\n * Skips content scanning for:\n * - The home directory itself or any ancestor (too broad — millions of files)\n * - Git repositories (code repos — index memory/ and Notes/ only, not all .md files)\n *\n * The content scan is still useful for Obsidian vaults, Notes folders, and\n * other doc-centric project trees where ALL markdown files are meaningful.\n *\n * The memory/, Notes/, and claude_notes_dir scans always run regardless.\n */\nexport function isPathTooBroadForContentScan(rootPath: string): boolean {\n const normalized = normalize(rootPath);\n\n // Block system/temp directories outright (CCC snapshots live here)\n if (BLOCKED_ROOTS.has(normalized)) return true;\n for (const blocked of BLOCKED_ROOTS) {\n if (normalized.startsWith(blocked + \"/\")) return true;\n }\n\n const home = homedir();\n\n // Skip the home directory itself or any ancestor of home\n if (home.startsWith(normalized) || normalized === \"/\") {\n return true;\n }\n\n // Skip home directory itself (depth 0)\n if (normalized.startsWith(home)) {\n const rel = normalized.slice(home.length).replace(/^\\//, \"\");\n const depth = rel ? rel.split(\"/\").length : 0;\n if (depth === 0) return true;\n }\n\n // Skip git repositories — content scan is only for doc-centric projects\n // (Obsidian vaults, knowledge bases). Code repos use memory/ and Notes/ only.\n if (existsSync(join(normalized, \".git\"))) {\n return true;\n }\n\n return false;\n}\n\n// ---------------------------------------------------------------------------\n// Session title parser\n// ---------------------------------------------------------------------------\n\nconst SESSION_TITLE_RE = /^(\\d{4})\\s*-\\s*(\\d{4}-\\d{2}-\\d{2})\\s*-\\s*(.+)\\.md$/;\n\n/**\n * Parse a session title from a Notes filename.\n * Format: \"NNNN - YYYY-MM-DD - Descriptive Title.md\"\n * Returns a synthetic chunk text like \"Session #0086 2026-02-23: Pai Daemon Background Service\"\n * or null if the filename doesn't match the expected pattern.\n */\nexport function parseSessionTitleChunk(fileName: string): string | null {\n const m = SESSION_TITLE_RE.exec(fileName);\n if (!m) return null;\n const [, num, date, title] = m;\n return `Session #${num} ${date}: ${title}`;\n}\n\n/** Number of files to process before yielding to the event loop inside indexProject. */\nexport const INDEX_YIELD_EVERY = 10;\n"],"mappings":";;;;;;;;;;;;;;;AAYA,SAAgB,OAAO,SAAyB;AAC9C,QAAOA,aAAW,SAAS,CAAC,OAAO,QAAQ,CAAC,OAAO,MAAM;;;AAI3D,MAAa,aAAa;;;;;;;;;;;ACO1B,MAAM,qBAAqB;AAC3B,MAAM,kBAAkB;;;;;AAMxB,SAAgB,eAAe,MAAsB;CACnD,MAAM,YAAY,KAAK,MAAM,MAAM,CAAC,OAAO,QAAQ,CAAC;AACpD,QAAO,KAAK,KAAK,YAAY,IAAI;;;;;;AAqBnC,SAAS,gBACP,OACa;CACb,MAAM,WAAwB,EAAE;CAChC,IAAI,UAAmD,EAAE;AAEzD,MAAK,MAAM,QAAQ,OAAO;AAExB,MADkB,YAAY,KAAK,KAAK,KAAK,IAC5B,QAAQ,SAAS,GAAG;GACnC,MAAM,OAAO,QAAQ,KAAK,MAAM,EAAE,KAAK,CAAC,KAAK,KAAK;AAClD,YAAS,KAAK;IAAE,OAAO;IAAS,QAAQ,eAAe,KAAK;IAAE,CAAC;AAC/D,aAAU,EAAE;;AAEd,UAAQ,KAAK,KAAK;;AAGpB,KAAI,QAAQ,SAAS,GAAG;EACtB,MAAM,OAAO,QAAQ,KAAK,MAAM,EAAE,KAAK,CAAC,KAAK,KAAK;AAClD,WAAS,KAAK;GAAE,OAAO;GAAS,QAAQ,eAAe,KAAK;GAAE,CAAC;;AAGjE,QAAO;;;;;AAMT,SAAS,kBAAkB,OAA+B;CACxD,MAAM,aAA0B,EAAE;CAClC,IAAI,UAAmD,EAAE;AAEzD,MAAK,MAAM,QAAQ,MAAM,MACvB,KAAI,KAAK,KAAK,MAAM,KAAK,MAAM,QAAQ,SAAS,GAAG;EAEjD,MAAM,OAAO,QAAQ,KAAK,MAAM,EAAE,KAAK,CAAC,KAAK,KAAK;AAClD,aAAW,KAAK;GAAE,OAAO,CAAC,GAAG,QAAQ;GAAE,QAAQ,eAAe,KAAK;GAAE,CAAC;AACtE,YAAU,EAAE;OAEZ,SAAQ,KAAK,KAAK;AAItB,KAAI,QAAQ,SAAS,GAAG;EACtB,MAAM,OAAO,QAAQ,KAAK,MAAM,EAAE,KAAK,CAAC,KAAK,KAAK;AAClD,aAAW,KAAK;GAAE,OAAO;GAAS,QAAQ,eAAe,KAAK;GAAE,CAAC;;AAGnE,QAAO,WAAW,SAAS,IAAI,aAAa,CAAC,MAAM;;;;;;AAOrD,SAAS,iBAAiB,OAAkB,WAAgC;CAI1E,MAAM,YAHW,MAAM,MAAM,KAAK,MAAM,EAAE,KAAK,CAAC,KAAK,IAAI,CAG9B,MADR,4BACyB;CAE5C,MAAM,SAAsB,EAAE;CAC9B,IAAI,UAAU;CAGd,MAAM,YAAY,MAAM,MAAM,IAAI,UAAU;CAC5C,MAAM,UAAU,MAAM,MAAM,MAAM,MAAM,SAAS,IAAI,UAAU;CAC/D,MAAM,aAAa,UAAU,YAAY;CACzC,MAAM,mBAAmB,KAAK,IAAI,GAAG,KAAK,MAAM,aAAa,KAAK,IAAI,GAAG,UAAU,OAAO,CAAC,CAAC;CAE5F,IAAI,cAAc;CAClB,IAAI,aAAa;CAEjB,MAAM,cAAc;AAClB,MAAI,CAAC,QAAQ,MAAM,CAAE;EACrB,MAAM,YAAY,KAAK,IAAI,aAAa,mBAAmB,GAAG,QAAQ;AACtE,SAAO,KAAK;GACV,OAAO,CAAC;IAAE,MAAM,QAAQ,MAAM;IAAE,QAAQ;IAAY,CAAC;GACrD,QAAQ,eAAe,QAAQ;GAChC,CAAC;AACF,eAAa,YAAY;AACzB,YAAU;;AAGZ,MAAK,MAAM,YAAY,WAAW;AAChC;EACA,MAAM,gBAAgB,UAAU,UAAU,MAAM,WAAW;AAC3D,MAAI,eAAe,cAAc,GAAG,aAAa,SAAS;AACxD,UAAO;AACP,aAAU;QAEV,WAAU;;AAId,QAAO;AAEP,QAAO,OAAO,SAAS,IAAI,SAAS,CAAC,MAAM;;;;;;AAW7C,SAAS,mBACP,QACA,eACyC;AACzC,KAAI,iBAAiB,KAAK,OAAO,WAAW,EAAG,QAAO,EAAE;CAExD,MAAM,YAAY,OAAO,OAAO,SAAS;AACzC,KAAI,CAAC,UAAW,QAAO,EAAE;CAEzB,MAAM,QAAQ,UAAU,KAAK,MAAM,KAAK;CACxC,MAAM,OAAiB,EAAE;CACzB,IAAI,MAAM;AAEV,MAAK,IAAI,IAAI,MAAM,SAAS,GAAG,KAAK,GAAG,KAAK;EAC1C,MAAM,aAAa,eAAe,MAAM,MAAM,GAAG;AACjD,SAAO;AACP,OAAK,QAAQ,MAAM,MAAM,GAAG;AAC5B,MAAI,OAAO,cAAe;;CAI5B,MAAM,YAAY,UAAU,UAAU,KAAK,SAAS;AACpD,QAAO,KAAK,KAAK,MAAM,SAAS;EAAE;EAAM,QAAQ,KAAK,IAAI,UAAU,WAAW,YAAY,IAAI;EAAE,EAAE;;;;;;;;;;;;;;;;AAqBpG,SAAgB,iBAAiB,SAAyB;AACxD,QAAO,QAAQ,QAAQ,kCAAkC,GAAG;;AAG9D,SAAgB,cAAc,SAAiB,MAA8B;CAC3E,MAAM,YAAY,MAAM,aAAa;CACrC,MAAM,gBAAgB,MAAM,WAAW;AAGvC,WAAU,iBAAiB,QAAQ;AAEnC,KAAI,CAAC,QAAQ,MAAM,CAAE,QAAO,EAAE;CAS9B,MAAM,WAAW,gBAPA,QAAQ,MAAM,KAAK,CAC4B,KAAK,MAAM,SAAS;EAClF;EACA,QAAQ,MAAM;EACf,EAAE,CAGoC;CAGvC,MAAM,cAA2B,EAAE;AACnC,MAAK,MAAM,WAAW,UAAU;AAC9B,MAAI,QAAQ,UAAU,WAAW;AAC/B,eAAY,KAAK,QAAQ;AACzB;;EAGF,MAAM,QAAQ,kBAAkB,QAAQ;AACxC,OAAK,MAAM,QAAQ,OAAO;AACxB,OAAI,KAAK,UAAU,WAAW;AAC5B,gBAAY,KAAK,KAAK;AACtB;;GAGF,MAAM,YAAY,iBAAiB,MAAM,UAAU;AACnD,eAAY,KAAK,GAAG,UAAU;;;CAKlC,MAAM,SAAkB,EAAE;AAE1B,MAAK,MAAM,SAAS,aAAa;AAC/B,MAAI,MAAM,MAAM,WAAW,EAAG;EAO9B,MAAM,OADW,CAAC,GAHG,mBAAmB,QAAQ,cAAc,EAG3B,GAAG,MAAM,MAAM,CAC5B,KAAK,MAAM,EAAE,KAAK,CAAC,KAAK,KAAK,CAAC,MAAM;AAE1D,MAAI,CAAC,KAAM;EAEX,MAAM,YAAY,MAAM,MAAM,IAAI,UAAU;EAC5C,MAAM,UAAU,MAAM,MAAM,MAAM,MAAM,SAAS,IAAI,UAAU;AAE/D,SAAO,KAAK;GACV;GACA;GACA;GACA,MAAM,OAAO,KAAK;GACnB,CAAC;;AAGJ,QAAO;;;;;;;;;;;;;;;;;;;;ACtPT,SAAgB,WACd,cAC6C;CAE7C,MAAM,IAAI,aAAa,QAAQ,OAAO,IAAI,CAAC,QAAQ,SAAS,GAAG;AAG/D,KAAI,EAAE,WAAW,SAAS,IAAI,MAAM,QAClC,QAAO;CAGT,MAAM,WAAW,SAAS,EAAE;AAG5B,KAAI,aAAa,YACf,QAAO;AAIT,KAAI,0BAA0B,KAAK,SAAS,CAC1C,QAAO;AAIT,QAAO;;;;;;;;;;AAkBT,SAAgB,QACd,WACA,MACA,YACA,WACA,SACQ;AACR,QAAO,WAAW,SAAS,CACxB,OAAO,GAAG,UAAU,GAAG,KAAK,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,CACpE,OAAO,MAAM;;;;;;;;;;;;AAiBlB,SAAgB,mBAAkC;AAChD,QAAO,IAAI,SAAS,YAAY,WAAW,SAAS,GAAG,CAAC;;;;;;;AAY1D,MAAa,mBAAmB,IAAI,IAAI;CAEtC;CAEA;CACA;CACA;CAEA;CACA;CACA;CACA;CACA;CAEA;CACA;CACA;CAEA;CACA;CAEA;CACA;CACD,CAAC;;;;;AAMF,MAAa,sBAAsB,IAAI,IAAI;CACzC;CACA;CACA;CACA;CAEA,GAAG;CACJ,CAAC;;;;;;AAOF,MAAa,yBAAyB,IAAI,IAAI;CAE5C;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CAEA;CAEA,GAAG;CACJ,CAAC;;;;;;AAWF,MAAM,wBAAwB;;;;;;;;AAS9B,MAAM,iBAAiB;;;;;;;;;;;AAYvB,SAAgB,YACd,KACA,KACA,MAAM,uBACN,QAAQ,GACE;CACV,MAAM,UAAU,OAAO,EAAE;AACzB,KAAI,CAAC,WAAW,IAAI,CAAE,QAAO;AAC7B,KAAI,QAAQ,UAAU,IAAK,QAAO;AAClC,KAAI,QAAQ,eAAgB,QAAO;AAEnC,KAAI;AACF,OAAK,MAAM,SAAS,YAAY,KAAK,EAAE,eAAe,MAAM,CAAC,EAAE;AAC7D,OAAI,QAAQ,UAAU,IAAK;AAC3B,OAAI,MAAM,gBAAgB,CAAE;AAE5B,OAAI,iBAAiB,IAAI,MAAM,KAAK,CAAE;GACtC,MAAM,OAAO,KAAK,KAAK,MAAM,KAAK;AAClC,OAAI,MAAM,aAAa,CACrB,aAAY,MAAM,SAAS,KAAK,QAAQ,EAAE;YACjC,MAAM,QAAQ,IAAI,MAAM,KAAK,SAAS,MAAM,CACrD,SAAQ,KAAK,KAAK;;SAGhB;AAGR,QAAO;;;;;;;;;;AAWT,SAAgB,iBAAiB,UAA4B;AAC3D,KAAI,CAAC,WAAW,SAAS,CAAE,QAAO,EAAE;CAEpC,MAAM,UAAoB,EAAE;AAC5B,KAAI;AACF,OAAK,MAAM,SAAS,YAAY,UAAU,EAAE,eAAe,MAAM,CAAC,EAAE;AAClE,OAAI,QAAQ,UAAU,sBAAuB;AAC7C,OAAI,MAAM,gBAAgB,CAAE;AAC5B,OAAI,oBAAoB,IAAI,MAAM,KAAK,CAAE;AACzC,OAAI,uBAAuB,IAAI,MAAM,KAAK,CAAE;GAE5C,MAAM,OAAO,KAAK,UAAU,MAAM,KAAK;AACvC,OAAI,MAAM,aAAa,CACrB,aAAY,MAAM,SAAS,sBAAsB;YACxC,MAAM,QAAQ,IAAI,MAAM,KAAK,SAAS,MAAM,EAErD;QAAI,MAAM,SAAS,YACjB,SAAQ,KAAK,KAAK;;;SAIlB;AAGR,QAAO;;;AAQT,MAAM,gBAAgB,IAAI,IAAI;CAAC;CAAQ;CAAgB;CAAQ;CAAe,CAAC;;;;;;;;;;;;;AAc/E,SAAgB,6BAA6B,UAA2B;CACtE,MAAM,aAAa,UAAU,SAAS;AAGtC,KAAI,cAAc,IAAI,WAAW,CAAE,QAAO;AAC1C,MAAK,MAAM,WAAW,cACpB,KAAI,WAAW,WAAW,UAAU,IAAI,CAAE,QAAO;CAGnD,MAAM,OAAO,SAAS;AAGtB,KAAI,KAAK,WAAW,WAAW,IAAI,eAAe,IAChD,QAAO;AAIT,KAAI,WAAW,WAAW,KAAK,EAAE;EAC/B,MAAM,MAAM,WAAW,MAAM,KAAK,OAAO,CAAC,QAAQ,OAAO,GAAG;AAE5D,OADc,MAAM,IAAI,MAAM,IAAI,CAAC,SAAS,OAC9B,EAAG,QAAO;;AAK1B,KAAI,WAAW,KAAK,YAAY,OAAO,CAAC,CACtC,QAAO;AAGT,QAAO;;AAOT,MAAM,mBAAmB;;;;;;;AAQzB,SAAgB,uBAAuB,UAAiC;CACtE,MAAM,IAAI,iBAAiB,KAAK,SAAS;AACzC,KAAI,CAAC,EAAG,QAAO;CACf,MAAM,GAAG,KAAK,MAAM,SAAS;AAC7B,QAAO,YAAY,IAAI,GAAG,KAAK,IAAI;;;AAIrC,MAAa,oBAAoB"}
package/dist/index.d.mts CHANGED
@@ -195,16 +195,6 @@ interface ChunkOptions {
195
195
  * Matches the OpenClaw estimate approach.
196
196
  */
197
197
  declare function estimateTokens(text: string): number;
198
- /**
199
- * Chunk a markdown file into overlapping segments for BM25 indexing.
200
- *
201
- * Strategy:
202
- * 1. Split by headings (##, ###) as natural boundaries.
203
- * 2. If a section exceeds maxTokens, split by paragraphs.
204
- * 3. If a paragraph still exceeds maxTokens, split by sentences.
205
- * 4. Apply overlap: each chunk includes the last `overlap` tokens from the
206
- * previous chunk.
207
- */
208
198
  declare function chunkMarkdown(content: string, opts?: ChunkOptions): Chunk[];
209
199
  //#endregion
210
200
  //#region src/memory/indexer/helpers.d.ts
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/registry/schema.ts","../src/registry/db.ts","../src/registry/migrate.ts","../src/registry/pai-marker.ts","../src/memory/schema.ts","../src/memory/db.ts","../src/memory/chunker.ts","../src/memory/indexer/helpers.ts","../src/memory/indexer/types.ts","../src/memory/indexer/sync.ts","../src/memory/search.ts","../src/memory/reranker.ts"],"mappings":";;;cAgBa,cAAA;AAAA,cAEA,iBAAA;;;;;;ACYb;;;iBD0GgB,gBAAA,CAAiB,EAAA,EAAI,QAAA;;;AAtHrC;;;;;AAsHA;;;;;;AAtHA,iBCYgB,YAAA,CAAa,IAAA,YAAuC,UAAA;;;;;;;ACkIpE;;;;;AAUC;;;;;;;;;;;AAyBD;;;;;AAmCA;iBA5GgB,gBAAA,CACd,OAAA,UACA,SAAA,GAAY,GAAA;;;;;;iBAoCE,OAAA,CAAQ,KAAA;AAAA,UAgBd,aAAA;EACR,MAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,QAAA;AAAA;;;;;;iBAcc,oBAAA,CACd,QAAA,WACC,aAAA;AAAA,UAiCc,eAAA;EACf,gBAAA;EACA,eAAA;EACA,gBAAA;EACA,MAAA;AAAA;;;;;;AC1DF;;;;;iBDuEgB,eAAA,CACd,EAAA,EAAI,QAAA,EACJ,YAAA,YACC,eAAA;;;;;;AF1OH;;;;;AAEA;UGMiB,SAAA;;EAEf,IAAA;EHR4B;EGU5B,IAAA;EH4G8B;EG1G9B,WAAA;AAAA;;;;;AFAF;;;;;;;iBEkJgB,eAAA,CACd,WAAA,UACA,IAAA,UACA,WAAA;ADzDF;;;;AAAA,iBC0HgB,aAAA,CACd,WAAA;EACG,IAAA;EAAc,UAAA;EAAoB,MAAA;AAAA;ADtFvC;;;;;AAUC;;;;;AAVD,iBCmHgB,kBAAA,CAAmB,UAAA,aAAuB,SAAA;;;cCzP7C,qBAAA;;;;;;AFgGb;;;;iBEzCgB,0BAAA,CAA2B,EAAA,EAAI,QAAA;;;AJ/D/C;;;;;AAsHA;;;;;;AAtHA,iBKYgB,cAAA,CAAe,IAAA,YAAyC,UAAA;;;;;;ALdxE;;;;UMNiB,KAAA;EACf,IAAA;EACA,SAAA;EACA,OAAA;EACA,IAAA;AAAA;AAAA,UAGe,YAAA;ENuHe;EMrH9B,SAAA;ENqHmC;EMnHnC,OAAA;AAAA;;;ALSF;;iBKCgB,cAAA,CAAe,IAAA;;;;;;AJ2F/B;;;;;iBI8EgB,aAAA,CAAc,OAAA,UAAiB,IAAA,GAAO,YAAA,GAAe,KAAA;;;;;;ANxLrE;;;;;AAEA;;;;;AAsHA;;iBO9GgB,UAAA,CACd,YAAA;;;;;;UCvBe,WAAA;EACf,cAAA;EACA,aAAA;EACA,YAAA;AAAA;;;;;;;APuBF;iBQcgB,SAAA,CACd,EAAA,EAAI,QAAA,EACJ,SAAA,UACA,QAAA,UACA,YAAA,UACA,MAAA,UACA,IAAA;;;;;;;APwEF;;;;iBOoDsB,YAAA,CACpB,EAAA,EAAI,QAAA,EACJ,SAAA,UACA,QAAA,UACA,cAAA,mBACC,OAAA,CAAQ,WAAA;;;;;APnBX;;iBO8NsB,QAAA,CACpB,EAAA,EAAI,QAAA,EACJ,UAAA,EAAY,QAAA,GACX,OAAA;EAAU,QAAA;EAAkB,MAAA,EAAQ,WAAA;AAAA;;;UC7WtB,YAAA;EACf,SAAA;EACA,WAAA;EACA,IAAA;EACA,SAAA;EACA,OAAA;EACA,OAAA;EACA,KAAA;EACA,IAAA;EACA,MAAA;EACA,SAAA;AAAA;AAAA,UAGe,aAAA;ETH2D;ESK1E,UAAA;;EAEA,OAAA;ERqFc;EQnFd,KAAA;;EAEA,UAAA;ERkFA;EQhFA,QAAA;AAAA;;;ARqHF;;;;;AAUC;;;;;;;;;;;iBQpGe,aAAA,CAAc,KAAA;;;;;ARgK9B;;;;;;;;iBQ9HgB,YAAA,CACd,EAAA,EAAI,QAAA,EACJ,KAAA,UACA,IAAA,GAAO,aAAA,GACN,YAAA;;;;;iBAwRa,aAAA,CACd,OAAA,EAAS,YAAA,IACT,UAAA,EAAY,QAAA,GACX,YAAA;;;;;;;iBCjWa,sBAAA,CAAuB,KAAA;AAAA,UAoCtB,aAAA;;EAEf,IAAA;EV9C0E;;;;AC4F5E;ESxCE,aAAA;AAAA;;;;;;;AT8EF;;;iBSlEsB,aAAA,CACpB,KAAA,UACA,OAAA,EAAS,YAAA,IACT,IAAA,GAAO,aAAA,GACN,OAAA,CAAQ,YAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/registry/schema.ts","../src/registry/db.ts","../src/registry/migrate.ts","../src/registry/pai-marker.ts","../src/memory/schema.ts","../src/memory/db.ts","../src/memory/chunker.ts","../src/memory/indexer/helpers.ts","../src/memory/indexer/types.ts","../src/memory/indexer/sync.ts","../src/memory/search.ts","../src/memory/reranker.ts"],"mappings":";;;cAgBa,cAAA;AAAA,cAEA,iBAAA;;;;;;ACYb;;;iBD0GgB,gBAAA,CAAiB,EAAA,EAAI,QAAA;;;AAtHrC;;;;;AAsHA;;;;;;AAtHA,iBCYgB,YAAA,CAAa,IAAA,YAAuC,UAAA;;;;;;;ACkIpE;;;;;AAUC;;;;;;;;;;;AAyBD;;;;;AAmCA;iBA5GgB,gBAAA,CACd,OAAA,UACA,SAAA,GAAY,GAAA;;;;;;iBAoCE,OAAA,CAAQ,KAAA;AAAA,UAgBd,aAAA;EACR,MAAA;EACA,IAAA;EACA,IAAA;EACA,KAAA;EACA,QAAA;AAAA;;;;;;iBAcc,oBAAA,CACd,QAAA,WACC,aAAA;AAAA,UAiCc,eAAA;EACf,gBAAA;EACA,eAAA;EACA,gBAAA;EACA,MAAA;AAAA;;;;;;AC1DF;;;;;iBDuEgB,eAAA,CACd,EAAA,EAAI,QAAA,EACJ,YAAA,YACC,eAAA;;;;;;AF1OH;;;;;AAEA;UGMiB,SAAA;;EAEf,IAAA;EHR4B;EGU5B,IAAA;EH4G8B;EG1G9B,WAAA;AAAA;;;;;AFAF;;;;;;;iBEkJgB,eAAA,CACd,WAAA,UACA,IAAA,UACA,WAAA;ADzDF;;;;AAAA,iBC0HgB,aAAA,CACd,WAAA;EACG,IAAA;EAAc,UAAA;EAAoB,MAAA;AAAA;ADtFvC;;;;;AAUC;;;;;AAVD,iBCmHgB,kBAAA,CAAmB,UAAA,aAAuB,SAAA;;;cCzP7C,qBAAA;;;;;;AFgGb;;;;iBEzCgB,0BAAA,CAA2B,EAAA,EAAI,QAAA;;;AJ/D/C;;;;;AAsHA;;;;;;AAtHA,iBKYgB,cAAA,CAAe,IAAA,YAAyC,UAAA;;;;;;ALdxE;;;;UMNiB,KAAA;EACf,IAAA;EACA,SAAA;EACA,OAAA;EACA,IAAA;AAAA;AAAA,UAGe,YAAA;ENuHe;EMrH9B,SAAA;ENqHmC;EMnHnC,OAAA;AAAA;;;ALSF;;iBKCgB,cAAA,CAAe,IAAA;AAAA,iBAiLf,aAAA,CAAc,OAAA,UAAiB,IAAA,GAAO,YAAA,GAAe,KAAA;;;;;;ANhMrE;;;;;AAEA;;;;;AAsHA;;iBO9GgB,UAAA,CACd,YAAA;;;;;;UCvBe,WAAA;EACf,cAAA;EACA,aAAA;EACA,YAAA;AAAA;;;;;;;APuBF;iBQcgB,SAAA,CACd,EAAA,EAAI,QAAA,EACJ,SAAA,UACA,QAAA,UACA,YAAA,UACA,MAAA,UACA,IAAA;;;;;;;APwEF;;;;iBOoDsB,YAAA,CACpB,EAAA,EAAI,QAAA,EACJ,SAAA,UACA,QAAA,UACA,cAAA,mBACC,OAAA,CAAQ,WAAA;;;;;APnBX;;iBO8NsB,QAAA,CACpB,EAAA,EAAI,QAAA,EACJ,UAAA,EAAY,QAAA,GACX,OAAA;EAAU,QAAA;EAAkB,MAAA,EAAQ,WAAA;AAAA;;;UC7WtB,YAAA;EACf,SAAA;EACA,WAAA;EACA,IAAA;EACA,SAAA;EACA,OAAA;EACA,OAAA;EACA,KAAA;EACA,IAAA;EACA,MAAA;EACA,SAAA;AAAA;AAAA,UAGe,aAAA;ETH2D;ESK1E,UAAA;;EAEA,OAAA;ERqFc;EQnFd,KAAA;;EAEA,UAAA;ERkFA;EQhFA,QAAA;AAAA;;;ARqHF;;;;;AAUC;;;;;;;;;;;iBQpGe,aAAA,CAAc,KAAA;;;;;ARgK9B;;;;;;;;iBQ9HgB,YAAA,CACd,EAAA,EAAI,QAAA,EACJ,KAAA,UACA,IAAA,GAAO,aAAA,GACN,YAAA;;;;;iBAwRa,aAAA,CACd,OAAA,EAAS,YAAA,IACT,UAAA,EAAY,QAAA,GACX,YAAA;;;;;;;iBCjWa,sBAAA,CAAuB,KAAA;AAAA,UAoCtB,aAAA;;EAEf,IAAA;EV9C0E;;;;AC4F5E;ESxCE,aAAA;AAAA;;;;;;;AT8EF;;;iBSlEsB,aAAA,CACpB,KAAA,UACA,OAAA,EAAS,YAAA,IACT,IAAA,GAAO,aAAA,GACN,OAAA,CAAQ,YAAA"}
package/dist/index.mjs CHANGED
@@ -3,8 +3,8 @@ import "./utils-QSfKagcj.mjs";
3
3
  import { a as slugify, i as parseSessionFilename, n as decodeEncodedDir, r as migrateFromJson } from "./migrate-jokLenje.mjs";
4
4
  import { n as ensurePaiMarker, r as readPaiMarker, t as discoverPaiMarkers } from "./pai-marker-CXQPX2P6.mjs";
5
5
  import { i as initializeFederationSchema, n as openFederation, r as FEDERATION_SCHEMA_SQL } from "./db-DdUperSl.mjs";
6
- import { l as chunkMarkdown, r as detectTier, u as estimateTokens } from "./helpers-BEST-4Gx.mjs";
7
- import { i as indexProject, n as indexAll, r as indexFile } from "./sync-BOsnEj2-.mjs";
6
+ import { l as chunkMarkdown, r as detectTier, u as estimateTokens } from "./helpers-OCVFgprQ.mjs";
7
+ import { i as indexProject, n as indexAll, r as indexFile } from "./sync-CdHSL9Kc.mjs";
8
8
  import "./embeddings-DGRAPAYb.mjs";
9
9
  import { n as populateSlugs, r as searchMemory, t as buildFtsQuery } from "./search-DC1qhkKn.mjs";
10
10
  import { n as rerankResults, t as configureRerankerModel } from "./reranker-CMNZcfVx.mjs";
@@ -1,4 +1,4 @@
1
- import { a as parseSessionTitleChunk, c as yieldToEventLoop, f as sha256File, i as isPathTooBroadForContentScan, l as chunkMarkdown, n as chunkId, o as walkContentFiles, r as detectTier, s as walkMdFiles, t as INDEX_YIELD_EVERY } from "./helpers-BEST-4Gx.mjs";
1
+ import { a as parseSessionTitleChunk, c as yieldToEventLoop, f as sha256File, i as isPathTooBroadForContentScan, l as chunkMarkdown, n as chunkId, o as walkContentFiles, r as detectTier, s as walkMdFiles, t as INDEX_YIELD_EVERY } from "./helpers-OCVFgprQ.mjs";
2
2
  import { existsSync, readFileSync, statSync } from "node:fs";
3
3
  import { basename, join, relative } from "node:path";
4
4
 
@@ -285,4 +285,4 @@ async function indexAllWithBackend(backend, registryDb) {
285
285
 
286
286
  //#endregion
287
287
  export { embedChunksWithBackend, indexAllWithBackend };
288
- //# sourceMappingURL=indexer-backend-CIIlrYh6.mjs.map
288
+ //# sourceMappingURL=indexer-backend-DXkDh-kc.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"indexer-backend-CIIlrYh6.mjs","names":[],"sources":["../src/memory/indexer/async.ts"],"sourcesContent":["/**\n * Backend-aware async indexer for PAI federation memory.\n *\n * Provides the same functionality as sync.ts but writes through the\n * StorageBackend interface instead of directly to better-sqlite3.\n * Used when the daemon is configured with the Postgres backend.\n *\n * The SQLite path still uses sync.ts directly (which is faster for SQLite\n * due to synchronous transactions).\n */\n\nimport { readFileSync, statSync, existsSync } from \"node:fs\";\nimport { join, relative, basename } from \"node:path\";\nimport type { Database } from \"better-sqlite3\";\nimport type { StorageBackend, ChunkRow } from \"../../storage/interface.js\";\nimport { chunkMarkdown } from \"../chunker.js\";\nimport {\n sha256File,\n chunkId,\n detectTier,\n walkMdFiles,\n walkContentFiles,\n isPathTooBroadForContentScan,\n parseSessionTitleChunk,\n yieldToEventLoop,\n INDEX_YIELD_EVERY,\n} from \"./helpers.js\";\nimport type { IndexResult } from \"./types.js\";\n\nexport type { IndexResult };\n\n// ---------------------------------------------------------------------------\n// Single-file indexing via StorageBackend\n// ---------------------------------------------------------------------------\n\n/**\n * Index a single file through the StorageBackend interface.\n * Returns true if the file was re-indexed (changed or new), false if skipped.\n */\nexport async function indexFileWithBackend(\n backend: StorageBackend,\n projectId: number,\n rootPath: string,\n relativePath: string,\n source: string,\n tier: string,\n): Promise<boolean> {\n const absPath = join(rootPath, relativePath);\n\n let content: string;\n let stat: ReturnType<typeof statSync>;\n try {\n content = readFileSync(absPath, \"utf8\");\n stat = statSync(absPath);\n } catch {\n return false;\n }\n\n const hash = sha256File(content);\n const mtime = Math.floor(stat.mtimeMs);\n const size = stat.size;\n\n // Change detection\n const existingHash = await backend.getFileHash(projectId, relativePath);\n if (existingHash === hash) return false;\n\n // Delete old chunks\n await backend.deleteChunksForFile(projectId, relativePath);\n\n // Chunk the content\n const rawChunks = chunkMarkdown(content);\n const updatedAt = Date.now();\n\n const chunks: ChunkRow[] = rawChunks.map((c, i) => ({\n id: chunkId(projectId, relativePath, i, c.startLine, c.endLine),\n projectId,\n source,\n tier,\n path: relativePath,\n startLine: c.startLine,\n endLine: c.endLine,\n hash: c.hash,\n text: c.text,\n updatedAt,\n embedding: null,\n }));\n\n // Insert chunks + update file record\n await backend.insertChunks(chunks);\n await backend.upsertFile({ projectId, path: relativePath, source, tier, hash, mtime, size });\n\n return true;\n}\n\n// ---------------------------------------------------------------------------\n// Project-level indexing via StorageBackend\n// ---------------------------------------------------------------------------\n\nexport async function indexProjectWithBackend(\n backend: StorageBackend,\n projectId: number,\n rootPath: string,\n claudeNotesDir?: string | null,\n): Promise<IndexResult> {\n const result: IndexResult = { filesProcessed: 0, chunksCreated: 0, filesSkipped: 0 };\n\n const filesToIndex: Array<{ absPath: string; rootBase: string; source: string; tier: string }> = [];\n\n const rootMemoryMd = join(rootPath, \"MEMORY.md\");\n if (existsSync(rootMemoryMd)) {\n filesToIndex.push({ absPath: rootMemoryMd, rootBase: rootPath, source: \"memory\", tier: \"evergreen\" });\n }\n\n const memoryDir = join(rootPath, \"memory\");\n for (const absPath of walkMdFiles(memoryDir)) {\n const relPath = relative(rootPath, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"memory\", tier });\n }\n\n const notesDir = join(rootPath, \"Notes\");\n for (const absPath of walkMdFiles(notesDir)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic session-title chunks for Notes files\n {\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(notesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(rootPath, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n const titleChunk: ChunkRow = {\n id, projectId, source: \"notes\", tier: \"session\",\n path: syntheticPath, startLine: 0, endLine: 0,\n hash, text, updatedAt, embedding: null,\n };\n try {\n await backend.insertChunks([titleChunk]);\n } catch {\n // Skip title chunks that cause backend errors\n }\n }\n }\n\n if (!isPathTooBroadForContentScan(rootPath)) {\n for (const absPath of walkContentFiles(rootPath)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"content\", tier: \"topic\" });\n }\n }\n\n if (claudeNotesDir && claudeNotesDir !== notesDir) {\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n filesToIndex.push({ absPath, rootBase: claudeNotesDir, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic title chunks for claude notes dir\n {\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(claudeNotesDir, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n const titleChunk: ChunkRow = {\n id, projectId, source: \"notes\", tier: \"session\",\n path: syntheticPath, startLine: 0, endLine: 0,\n hash, text, updatedAt, embedding: null,\n };\n try {\n await backend.insertChunks([titleChunk]);\n } catch {\n // Skip title chunks that cause backend errors\n }\n }\n }\n\n if (claudeNotesDir.endsWith(\"/Notes\")) {\n const claudeProjectDir = claudeNotesDir.slice(0, -\"/Notes\".length);\n const claudeMemoryMd = join(claudeProjectDir, \"MEMORY.md\");\n if (existsSync(claudeMemoryMd)) {\n filesToIndex.push({ absPath: claudeMemoryMd, rootBase: claudeProjectDir, source: \"memory\", tier: \"evergreen\" });\n }\n const claudeMemoryDir = join(claudeProjectDir, \"memory\");\n for (const absPath of walkMdFiles(claudeMemoryDir)) {\n const relPath = relative(claudeProjectDir, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: claudeProjectDir, source: \"memory\", tier });\n }\n }\n }\n\n await yieldToEventLoop();\n\n let filesSinceYield = 0;\n\n for (const { absPath, rootBase, source, tier } of filesToIndex) {\n if (filesSinceYield >= INDEX_YIELD_EVERY) {\n await yieldToEventLoop();\n filesSinceYield = 0;\n }\n filesSinceYield++;\n\n const relPath = relative(rootBase, absPath);\n try {\n const changed = await indexFileWithBackend(backend, projectId, rootBase, relPath, source, tier);\n\n if (changed) {\n const ids = await backend.getChunkIds(projectId, relPath);\n result.filesProcessed++;\n result.chunksCreated += ids.length;\n } else {\n result.filesSkipped++;\n }\n } catch {\n // Skip files that cause backend errors (e.g. null bytes in Postgres)\n result.filesSkipped++;\n }\n }\n\n // Prune stale paths\n const livePaths = new Set<string>();\n for (const { absPath, rootBase } of filesToIndex) {\n livePaths.add(relative(rootBase, absPath));\n }\n\n const dbChunkPaths = await backend.getDistinctChunkPaths(projectId);\n\n const stalePaths: string[] = [];\n for (const p of dbChunkPaths) {\n const basePath = p.endsWith(\"::title\") ? p.slice(0, -\"::title\".length) : p;\n if (!livePaths.has(basePath)) {\n stalePaths.push(p);\n }\n }\n\n if (stalePaths.length > 0) {\n await backend.deletePaths(projectId, stalePaths);\n }\n\n return result;\n}\n\n// ---------------------------------------------------------------------------\n// Embedding generation via StorageBackend\n// ---------------------------------------------------------------------------\n\nconst EMBED_BATCH_SIZE = 50;\nconst EMBED_YIELD_EVERY = 1;\n\n/**\n * Generate and store embeddings for all unembedded chunks via the StorageBackend.\n *\n * Processes chunks in batches of EMBED_BATCH_SIZE, yielding to the event loop\n * every EMBED_YIELD_EVERY chunks to avoid blocking IPC calls from MCP shims.\n *\n * The optional `shouldStop` callback is checked between every batch. When it\n * returns true the embed loop exits early so the caller (e.g. the daemon\n * shutdown handler) can close the pool without racing against active queries.\n *\n * Returns the number of newly embedded chunks.\n */\nexport async function embedChunksWithBackend(\n backend: StorageBackend,\n shouldStop?: () => boolean,\n projectNames?: Map<number, string>,\n): Promise<number> {\n const { generateEmbedding, serializeEmbedding } = await import(\"../embeddings.js\");\n\n const rows = await backend.getUnembeddedChunkIds();\n if (rows.length === 0) return 0;\n\n const total = rows.length;\n let embedded = 0;\n\n // Build a summary of what needs embedding: count chunks per project_id\n const projectChunkCounts = new Map<number, { count: number; samplePath: string }>();\n for (const row of rows) {\n const entry = projectChunkCounts.get(row.project_id);\n if (entry) {\n entry.count++;\n } else {\n projectChunkCounts.set(row.project_id, { count: 1, samplePath: row.path });\n }\n }\n const pName = (pid: number) => projectNames?.get(pid) ?? `project ${pid}`;\n const projectSummary = Array.from(projectChunkCounts.entries())\n .map(([pid, { count, samplePath }]) => ` ${pName(pid)}: ${count} chunks (e.g. ${samplePath})`)\n .join(\"\\n\");\n process.stderr.write(\n `[pai-daemon] Embed pass: ${total} unembedded chunks across ${projectChunkCounts.size} project(s)\\n${projectSummary}\\n`\n );\n\n // Track current project for transition logging\n let currentProjectId = -1;\n let projectEmbedded = 0;\n\n for (let i = 0; i < rows.length; i += EMBED_BATCH_SIZE) {\n // Check cancellation between every batch before touching the pool again\n if (shouldStop?.()) {\n process.stderr.write(\n `[pai-daemon] Embed pass cancelled after ${embedded}/${total} chunks (shutdown requested)\\n`\n );\n break;\n }\n\n const batch = rows.slice(i, i + EMBED_BATCH_SIZE);\n\n for (let j = 0; j < batch.length; j++) {\n const { id, text, project_id, path } = batch[j];\n\n // Log when switching to a new project\n if (project_id !== currentProjectId) {\n if (currentProjectId !== -1) {\n process.stderr.write(\n `[pai-daemon] Finished ${pName(currentProjectId)}: ${projectEmbedded} chunks embedded\\n`\n );\n }\n const info = projectChunkCounts.get(project_id);\n process.stderr.write(\n `[pai-daemon] Embedding ${pName(project_id)} (${info?.count ?? \"?\"} chunks, starting at ${path})\\n`\n );\n currentProjectId = project_id;\n projectEmbedded = 0;\n }\n\n // Yield to the event loop periodically to keep IPC responsive\n if ((embedded + j) % EMBED_YIELD_EVERY === 0) {\n await yieldToEventLoop();\n }\n\n const vec = await generateEmbedding(text);\n const blob = serializeEmbedding(vec);\n await backend.updateEmbedding(id, blob);\n projectEmbedded++;\n }\n\n embedded += batch.length;\n\n // Log progress with current file path for context\n const lastChunk = batch[batch.length - 1];\n process.stderr.write(\n `[pai-daemon] Embedded ${embedded}/${total} chunks (${pName(lastChunk.project_id)}: ${lastChunk.path})\\n`\n );\n }\n\n // Log final project completion\n if (currentProjectId !== -1) {\n process.stderr.write(\n `[pai-daemon] Finished ${pName(currentProjectId)}: ${projectEmbedded} chunks embedded\\n`\n );\n }\n\n return embedded;\n}\n\n// ---------------------------------------------------------------------------\n// Global indexing via StorageBackend\n// ---------------------------------------------------------------------------\n\nexport async function indexAllWithBackend(\n backend: StorageBackend,\n registryDb: Database,\n): Promise<{ projects: number; result: IndexResult }> {\n const projects = registryDb\n .prepare(\"SELECT id, root_path, claude_notes_dir FROM projects WHERE status = 'active'\")\n .all() as Array<{ id: number; root_path: string; claude_notes_dir: string | null }>;\n\n const totals: IndexResult = { filesProcessed: 0, chunksCreated: 0, filesSkipped: 0 };\n\n for (const project of projects) {\n await yieldToEventLoop();\n const r = await indexProjectWithBackend(backend, project.id, project.root_path, project.claude_notes_dir);\n totals.filesProcessed += r.filesProcessed;\n totals.chunksCreated += r.chunksCreated;\n totals.filesSkipped += r.filesSkipped;\n }\n\n return { projects: projects.length, result: totals };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AAuCA,eAAsB,qBACpB,SACA,WACA,UACA,cACA,QACA,MACkB;CAClB,MAAM,UAAU,KAAK,UAAU,aAAa;CAE5C,IAAI;CACJ,IAAI;AACJ,KAAI;AACF,YAAU,aAAa,SAAS,OAAO;AACvC,SAAO,SAAS,QAAQ;SAClB;AACN,SAAO;;CAGT,MAAM,OAAO,WAAW,QAAQ;CAChC,MAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ;CACtC,MAAM,OAAO,KAAK;AAIlB,KADqB,MAAM,QAAQ,YAAY,WAAW,aAAa,KAClD,KAAM,QAAO;AAGlC,OAAM,QAAQ,oBAAoB,WAAW,aAAa;CAG1D,MAAM,YAAY,cAAc,QAAQ;CACxC,MAAM,YAAY,KAAK,KAAK;CAE5B,MAAM,SAAqB,UAAU,KAAK,GAAG,OAAO;EAClD,IAAI,QAAQ,WAAW,cAAc,GAAG,EAAE,WAAW,EAAE,QAAQ;EAC/D;EACA;EACA;EACA,MAAM;EACN,WAAW,EAAE;EACb,SAAS,EAAE;EACX,MAAM,EAAE;EACR,MAAM,EAAE;EACR;EACA,WAAW;EACZ,EAAE;AAGH,OAAM,QAAQ,aAAa,OAAO;AAClC,OAAM,QAAQ,WAAW;EAAE;EAAW,MAAM;EAAc;EAAQ;EAAM;EAAM;EAAO;EAAM,CAAC;AAE5F,QAAO;;AAOT,eAAsB,wBACpB,SACA,WACA,UACA,gBACsB;CACtB,MAAM,SAAsB;EAAE,gBAAgB;EAAG,eAAe;EAAG,cAAc;EAAG;CAEpF,MAAM,eAA2F,EAAE;CAEnG,MAAM,eAAe,KAAK,UAAU,YAAY;AAChD,KAAI,WAAW,aAAa,CAC1B,cAAa,KAAK;EAAE,SAAS;EAAc,UAAU;EAAU,QAAQ;EAAU,MAAM;EAAa,CAAC;CAGvG,MAAM,YAAY,KAAK,UAAU,SAAS;AAC1C,MAAK,MAAM,WAAW,YAAY,UAAU,EAAE;EAE5C,MAAM,OAAO,WADG,SAAS,UAAU,QAAQ,CACX;AAChC,eAAa,KAAK;GAAE;GAAS,UAAU;GAAU,QAAQ;GAAU;GAAM,CAAC;;CAG5E,MAAM,WAAW,KAAK,UAAU,QAAQ;AACxC,MAAK,MAAM,WAAW,YAAY,SAAS,CACzC,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAS,MAAM;EAAW,CAAC;CAItF;EACE,MAAM,YAAY,KAAK,KAAK;AAC5B,OAAK,MAAM,WAAW,YAAY,SAAS,EAAE;GAE3C,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,OAAI,CAAC,KAAM;GAEX,MAAM,gBAAgB,GADN,SAAS,UAAU,QAAQ,CACV;GAGjC,MAAM,aAAuB;IAC3B,IAHS,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;IAG/C;IAAW,QAAQ;IAAS,MAAM;IACtC,MAAM;IAAe,WAAW;IAAG,SAAS;IAC5C,MAJW,WAAW,KAAK;IAIrB;IAAM;IAAW,WAAW;IACnC;AACD,OAAI;AACF,UAAM,QAAQ,aAAa,CAAC,WAAW,CAAC;WAClC;;;AAMZ,KAAI,CAAC,6BAA6B,SAAS,CACzC,MAAK,MAAM,WAAW,iBAAiB,SAAS,CAC9C,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAW,MAAM;EAAS,CAAC;AAIxF,KAAI,kBAAkB,mBAAmB,UAAU;AACjD,OAAK,MAAM,WAAW,YAAY,eAAe,CAC/C,cAAa,KAAK;GAAE;GAAS,UAAU;GAAgB,QAAQ;GAAS,MAAM;GAAW,CAAC;EAI5F;GACE,MAAM,YAAY,KAAK,KAAK;AAC5B,QAAK,MAAM,WAAW,YAAY,eAAe,EAAE;IAEjD,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,QAAI,CAAC,KAAM;IAEX,MAAM,gBAAgB,GADN,SAAS,gBAAgB,QAAQ,CAChB;IAGjC,MAAM,aAAuB;KAC3B,IAHS,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;KAG/C;KAAW,QAAQ;KAAS,MAAM;KACtC,MAAM;KAAe,WAAW;KAAG,SAAS;KAC5C,MAJW,WAAW,KAAK;KAIrB;KAAM;KAAW,WAAW;KACnC;AACD,QAAI;AACF,WAAM,QAAQ,aAAa,CAAC,WAAW,CAAC;YAClC;;;AAMZ,MAAI,eAAe,SAAS,SAAS,EAAE;GACrC,MAAM,mBAAmB,eAAe,MAAM,GAAG,GAAiB;GAClE,MAAM,iBAAiB,KAAK,kBAAkB,YAAY;AAC1D,OAAI,WAAW,eAAe,CAC5B,cAAa,KAAK;IAAE,SAAS;IAAgB,UAAU;IAAkB,QAAQ;IAAU,MAAM;IAAa,CAAC;GAEjH,MAAM,kBAAkB,KAAK,kBAAkB,SAAS;AACxD,QAAK,MAAM,WAAW,YAAY,gBAAgB,EAAE;IAElD,MAAM,OAAO,WADG,SAAS,kBAAkB,QAAQ,CACnB;AAChC,iBAAa,KAAK;KAAE;KAAS,UAAU;KAAkB,QAAQ;KAAU;KAAM,CAAC;;;;AAKxF,OAAM,kBAAkB;CAExB,IAAI,kBAAkB;AAEtB,MAAK,MAAM,EAAE,SAAS,UAAU,QAAQ,UAAU,cAAc;AAC9D,MAAI,mBAAmB,mBAAmB;AACxC,SAAM,kBAAkB;AACxB,qBAAkB;;AAEpB;EAEA,MAAM,UAAU,SAAS,UAAU,QAAQ;AAC3C,MAAI;AAGF,OAFgB,MAAM,qBAAqB,SAAS,WAAW,UAAU,SAAS,QAAQ,KAAK,EAElF;IACX,MAAM,MAAM,MAAM,QAAQ,YAAY,WAAW,QAAQ;AACzD,WAAO;AACP,WAAO,iBAAiB,IAAI;SAE5B,QAAO;UAEH;AAEN,UAAO;;;CAKX,MAAM,4BAAY,IAAI,KAAa;AACnC,MAAK,MAAM,EAAE,SAAS,cAAc,aAClC,WAAU,IAAI,SAAS,UAAU,QAAQ,CAAC;CAG5C,MAAM,eAAe,MAAM,QAAQ,sBAAsB,UAAU;CAEnE,MAAM,aAAuB,EAAE;AAC/B,MAAK,MAAM,KAAK,cAAc;EAC5B,MAAM,WAAW,EAAE,SAAS,UAAU,GAAG,EAAE,MAAM,GAAG,GAAkB,GAAG;AACzE,MAAI,CAAC,UAAU,IAAI,SAAS,CAC1B,YAAW,KAAK,EAAE;;AAItB,KAAI,WAAW,SAAS,EACtB,OAAM,QAAQ,YAAY,WAAW,WAAW;AAGlD,QAAO;;AAOT,MAAM,mBAAmB;AACzB,MAAM,oBAAoB;;;;;;;;;;;;;AAc1B,eAAsB,uBACpB,SACA,YACA,cACiB;CACjB,MAAM,EAAE,mBAAmB,uBAAuB,MAAM,OAAO;CAE/D,MAAM,OAAO,MAAM,QAAQ,uBAAuB;AAClD,KAAI,KAAK,WAAW,EAAG,QAAO;CAE9B,MAAM,QAAQ,KAAK;CACnB,IAAI,WAAW;CAGf,MAAM,qCAAqB,IAAI,KAAoD;AACnF,MAAK,MAAM,OAAO,MAAM;EACtB,MAAM,QAAQ,mBAAmB,IAAI,IAAI,WAAW;AACpD,MAAI,MACF,OAAM;MAEN,oBAAmB,IAAI,IAAI,YAAY;GAAE,OAAO;GAAG,YAAY,IAAI;GAAM,CAAC;;CAG9E,MAAM,SAAS,QAAgB,cAAc,IAAI,IAAI,IAAI,WAAW;CACpE,MAAM,iBAAiB,MAAM,KAAK,mBAAmB,SAAS,CAAC,CAC5D,KAAK,CAAC,KAAK,EAAE,OAAO,kBAAkB,KAAK,MAAM,IAAI,CAAC,IAAI,MAAM,gBAAgB,WAAW,GAAG,CAC9F,KAAK,KAAK;AACb,SAAQ,OAAO,MACb,4BAA4B,MAAM,4BAA4B,mBAAmB,KAAK,eAAe,eAAe,IACrH;CAGD,IAAI,mBAAmB;CACvB,IAAI,kBAAkB;AAEtB,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,kBAAkB;AAEtD,MAAI,cAAc,EAAE;AAClB,WAAQ,OAAO,MACb,2CAA2C,SAAS,GAAG,MAAM,gCAC9D;AACD;;EAGF,MAAM,QAAQ,KAAK,MAAM,GAAG,IAAI,iBAAiB;AAEjD,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,EAAE,IAAI,MAAM,YAAY,SAAS,MAAM;AAG7C,OAAI,eAAe,kBAAkB;AACnC,QAAI,qBAAqB,GACvB,SAAQ,OAAO,MACb,yBAAyB,MAAM,iBAAiB,CAAC,IAAI,gBAAgB,oBACtE;IAEH,MAAM,OAAO,mBAAmB,IAAI,WAAW;AAC/C,YAAQ,OAAO,MACb,0BAA0B,MAAM,WAAW,CAAC,IAAI,MAAM,SAAS,IAAI,uBAAuB,KAAK,KAChG;AACD,uBAAmB;AACnB,sBAAkB;;AAIpB,QAAK,WAAW,KAAK,sBAAsB,EACzC,OAAM,kBAAkB;GAI1B,MAAM,OAAO,mBADD,MAAM,kBAAkB,KAAK,CACL;AACpC,SAAM,QAAQ,gBAAgB,IAAI,KAAK;AACvC;;AAGF,cAAY,MAAM;EAGlB,MAAM,YAAY,MAAM,MAAM,SAAS;AACvC,UAAQ,OAAO,MACb,yBAAyB,SAAS,GAAG,MAAM,WAAW,MAAM,UAAU,WAAW,CAAC,IAAI,UAAU,KAAK,KACtG;;AAIH,KAAI,qBAAqB,GACvB,SAAQ,OAAO,MACb,yBAAyB,MAAM,iBAAiB,CAAC,IAAI,gBAAgB,oBACtE;AAGH,QAAO;;AAOT,eAAsB,oBACpB,SACA,YACoD;CACpD,MAAM,WAAW,WACd,QAAQ,+EAA+E,CACvF,KAAK;CAER,MAAM,SAAsB;EAAE,gBAAgB;EAAG,eAAe;EAAG,cAAc;EAAG;AAEpF,MAAK,MAAM,WAAW,UAAU;AAC9B,QAAM,kBAAkB;EACxB,MAAM,IAAI,MAAM,wBAAwB,SAAS,QAAQ,IAAI,QAAQ,WAAW,QAAQ,iBAAiB;AACzG,SAAO,kBAAkB,EAAE;AAC3B,SAAO,iBAAiB,EAAE;AAC1B,SAAO,gBAAgB,EAAE;;AAG3B,QAAO;EAAE,UAAU,SAAS;EAAQ,QAAQ;EAAQ"}
1
+ {"version":3,"file":"indexer-backend-DXkDh-kc.mjs","names":[],"sources":["../src/memory/indexer/async.ts"],"sourcesContent":["/**\n * Backend-aware async indexer for PAI federation memory.\n *\n * Provides the same functionality as sync.ts but writes through the\n * StorageBackend interface instead of directly to better-sqlite3.\n * Used when the daemon is configured with the Postgres backend.\n *\n * The SQLite path still uses sync.ts directly (which is faster for SQLite\n * due to synchronous transactions).\n */\n\nimport { readFileSync, statSync, existsSync } from \"node:fs\";\nimport { join, relative, basename } from \"node:path\";\nimport type { Database } from \"better-sqlite3\";\nimport type { StorageBackend, ChunkRow } from \"../../storage/interface.js\";\nimport { chunkMarkdown } from \"../chunker.js\";\nimport {\n sha256File,\n chunkId,\n detectTier,\n walkMdFiles,\n walkContentFiles,\n isPathTooBroadForContentScan,\n parseSessionTitleChunk,\n yieldToEventLoop,\n INDEX_YIELD_EVERY,\n} from \"./helpers.js\";\nimport type { IndexResult } from \"./types.js\";\n\nexport type { IndexResult };\n\n// ---------------------------------------------------------------------------\n// Single-file indexing via StorageBackend\n// ---------------------------------------------------------------------------\n\n/**\n * Index a single file through the StorageBackend interface.\n * Returns true if the file was re-indexed (changed or new), false if skipped.\n */\nexport async function indexFileWithBackend(\n backend: StorageBackend,\n projectId: number,\n rootPath: string,\n relativePath: string,\n source: string,\n tier: string,\n): Promise<boolean> {\n const absPath = join(rootPath, relativePath);\n\n let content: string;\n let stat: ReturnType<typeof statSync>;\n try {\n content = readFileSync(absPath, \"utf8\");\n stat = statSync(absPath);\n } catch {\n return false;\n }\n\n const hash = sha256File(content);\n const mtime = Math.floor(stat.mtimeMs);\n const size = stat.size;\n\n // Change detection\n const existingHash = await backend.getFileHash(projectId, relativePath);\n if (existingHash === hash) return false;\n\n // Delete old chunks\n await backend.deleteChunksForFile(projectId, relativePath);\n\n // Chunk the content\n const rawChunks = chunkMarkdown(content);\n const updatedAt = Date.now();\n\n const chunks: ChunkRow[] = rawChunks.map((c, i) => ({\n id: chunkId(projectId, relativePath, i, c.startLine, c.endLine),\n projectId,\n source,\n tier,\n path: relativePath,\n startLine: c.startLine,\n endLine: c.endLine,\n hash: c.hash,\n text: c.text,\n updatedAt,\n embedding: null,\n }));\n\n // Insert chunks + update file record\n await backend.insertChunks(chunks);\n await backend.upsertFile({ projectId, path: relativePath, source, tier, hash, mtime, size });\n\n return true;\n}\n\n// ---------------------------------------------------------------------------\n// Project-level indexing via StorageBackend\n// ---------------------------------------------------------------------------\n\nexport async function indexProjectWithBackend(\n backend: StorageBackend,\n projectId: number,\n rootPath: string,\n claudeNotesDir?: string | null,\n): Promise<IndexResult> {\n const result: IndexResult = { filesProcessed: 0, chunksCreated: 0, filesSkipped: 0 };\n\n const filesToIndex: Array<{ absPath: string; rootBase: string; source: string; tier: string }> = [];\n\n const rootMemoryMd = join(rootPath, \"MEMORY.md\");\n if (existsSync(rootMemoryMd)) {\n filesToIndex.push({ absPath: rootMemoryMd, rootBase: rootPath, source: \"memory\", tier: \"evergreen\" });\n }\n\n const memoryDir = join(rootPath, \"memory\");\n for (const absPath of walkMdFiles(memoryDir)) {\n const relPath = relative(rootPath, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"memory\", tier });\n }\n\n const notesDir = join(rootPath, \"Notes\");\n for (const absPath of walkMdFiles(notesDir)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic session-title chunks for Notes files\n {\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(notesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(rootPath, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n const titleChunk: ChunkRow = {\n id, projectId, source: \"notes\", tier: \"session\",\n path: syntheticPath, startLine: 0, endLine: 0,\n hash, text, updatedAt, embedding: null,\n };\n try {\n await backend.insertChunks([titleChunk]);\n } catch {\n // Skip title chunks that cause backend errors\n }\n }\n }\n\n if (!isPathTooBroadForContentScan(rootPath)) {\n for (const absPath of walkContentFiles(rootPath)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"content\", tier: \"topic\" });\n }\n }\n\n if (claudeNotesDir && claudeNotesDir !== notesDir) {\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n filesToIndex.push({ absPath, rootBase: claudeNotesDir, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic title chunks for claude notes dir\n {\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(claudeNotesDir, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n const titleChunk: ChunkRow = {\n id, projectId, source: \"notes\", tier: \"session\",\n path: syntheticPath, startLine: 0, endLine: 0,\n hash, text, updatedAt, embedding: null,\n };\n try {\n await backend.insertChunks([titleChunk]);\n } catch {\n // Skip title chunks that cause backend errors\n }\n }\n }\n\n if (claudeNotesDir.endsWith(\"/Notes\")) {\n const claudeProjectDir = claudeNotesDir.slice(0, -\"/Notes\".length);\n const claudeMemoryMd = join(claudeProjectDir, \"MEMORY.md\");\n if (existsSync(claudeMemoryMd)) {\n filesToIndex.push({ absPath: claudeMemoryMd, rootBase: claudeProjectDir, source: \"memory\", tier: \"evergreen\" });\n }\n const claudeMemoryDir = join(claudeProjectDir, \"memory\");\n for (const absPath of walkMdFiles(claudeMemoryDir)) {\n const relPath = relative(claudeProjectDir, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: claudeProjectDir, source: \"memory\", tier });\n }\n }\n }\n\n await yieldToEventLoop();\n\n let filesSinceYield = 0;\n\n for (const { absPath, rootBase, source, tier } of filesToIndex) {\n if (filesSinceYield >= INDEX_YIELD_EVERY) {\n await yieldToEventLoop();\n filesSinceYield = 0;\n }\n filesSinceYield++;\n\n const relPath = relative(rootBase, absPath);\n try {\n const changed = await indexFileWithBackend(backend, projectId, rootBase, relPath, source, tier);\n\n if (changed) {\n const ids = await backend.getChunkIds(projectId, relPath);\n result.filesProcessed++;\n result.chunksCreated += ids.length;\n } else {\n result.filesSkipped++;\n }\n } catch {\n // Skip files that cause backend errors (e.g. null bytes in Postgres)\n result.filesSkipped++;\n }\n }\n\n // Prune stale paths\n const livePaths = new Set<string>();\n for (const { absPath, rootBase } of filesToIndex) {\n livePaths.add(relative(rootBase, absPath));\n }\n\n const dbChunkPaths = await backend.getDistinctChunkPaths(projectId);\n\n const stalePaths: string[] = [];\n for (const p of dbChunkPaths) {\n const basePath = p.endsWith(\"::title\") ? p.slice(0, -\"::title\".length) : p;\n if (!livePaths.has(basePath)) {\n stalePaths.push(p);\n }\n }\n\n if (stalePaths.length > 0) {\n await backend.deletePaths(projectId, stalePaths);\n }\n\n return result;\n}\n\n// ---------------------------------------------------------------------------\n// Embedding generation via StorageBackend\n// ---------------------------------------------------------------------------\n\nconst EMBED_BATCH_SIZE = 50;\nconst EMBED_YIELD_EVERY = 1;\n\n/**\n * Generate and store embeddings for all unembedded chunks via the StorageBackend.\n *\n * Processes chunks in batches of EMBED_BATCH_SIZE, yielding to the event loop\n * every EMBED_YIELD_EVERY chunks to avoid blocking IPC calls from MCP shims.\n *\n * The optional `shouldStop` callback is checked between every batch. When it\n * returns true the embed loop exits early so the caller (e.g. the daemon\n * shutdown handler) can close the pool without racing against active queries.\n *\n * Returns the number of newly embedded chunks.\n */\nexport async function embedChunksWithBackend(\n backend: StorageBackend,\n shouldStop?: () => boolean,\n projectNames?: Map<number, string>,\n): Promise<number> {\n const { generateEmbedding, serializeEmbedding } = await import(\"../embeddings.js\");\n\n const rows = await backend.getUnembeddedChunkIds();\n if (rows.length === 0) return 0;\n\n const total = rows.length;\n let embedded = 0;\n\n // Build a summary of what needs embedding: count chunks per project_id\n const projectChunkCounts = new Map<number, { count: number; samplePath: string }>();\n for (const row of rows) {\n const entry = projectChunkCounts.get(row.project_id);\n if (entry) {\n entry.count++;\n } else {\n projectChunkCounts.set(row.project_id, { count: 1, samplePath: row.path });\n }\n }\n const pName = (pid: number) => projectNames?.get(pid) ?? `project ${pid}`;\n const projectSummary = Array.from(projectChunkCounts.entries())\n .map(([pid, { count, samplePath }]) => ` ${pName(pid)}: ${count} chunks (e.g. ${samplePath})`)\n .join(\"\\n\");\n process.stderr.write(\n `[pai-daemon] Embed pass: ${total} unembedded chunks across ${projectChunkCounts.size} project(s)\\n${projectSummary}\\n`\n );\n\n // Track current project for transition logging\n let currentProjectId = -1;\n let projectEmbedded = 0;\n\n for (let i = 0; i < rows.length; i += EMBED_BATCH_SIZE) {\n // Check cancellation between every batch before touching the pool again\n if (shouldStop?.()) {\n process.stderr.write(\n `[pai-daemon] Embed pass cancelled after ${embedded}/${total} chunks (shutdown requested)\\n`\n );\n break;\n }\n\n const batch = rows.slice(i, i + EMBED_BATCH_SIZE);\n\n for (let j = 0; j < batch.length; j++) {\n const { id, text, project_id, path } = batch[j];\n\n // Log when switching to a new project\n if (project_id !== currentProjectId) {\n if (currentProjectId !== -1) {\n process.stderr.write(\n `[pai-daemon] Finished ${pName(currentProjectId)}: ${projectEmbedded} chunks embedded\\n`\n );\n }\n const info = projectChunkCounts.get(project_id);\n process.stderr.write(\n `[pai-daemon] Embedding ${pName(project_id)} (${info?.count ?? \"?\"} chunks, starting at ${path})\\n`\n );\n currentProjectId = project_id;\n projectEmbedded = 0;\n }\n\n // Yield to the event loop periodically to keep IPC responsive\n if ((embedded + j) % EMBED_YIELD_EVERY === 0) {\n await yieldToEventLoop();\n }\n\n const vec = await generateEmbedding(text);\n const blob = serializeEmbedding(vec);\n await backend.updateEmbedding(id, blob);\n projectEmbedded++;\n }\n\n embedded += batch.length;\n\n // Log progress with current file path for context\n const lastChunk = batch[batch.length - 1];\n process.stderr.write(\n `[pai-daemon] Embedded ${embedded}/${total} chunks (${pName(lastChunk.project_id)}: ${lastChunk.path})\\n`\n );\n }\n\n // Log final project completion\n if (currentProjectId !== -1) {\n process.stderr.write(\n `[pai-daemon] Finished ${pName(currentProjectId)}: ${projectEmbedded} chunks embedded\\n`\n );\n }\n\n return embedded;\n}\n\n// ---------------------------------------------------------------------------\n// Global indexing via StorageBackend\n// ---------------------------------------------------------------------------\n\nexport async function indexAllWithBackend(\n backend: StorageBackend,\n registryDb: Database,\n): Promise<{ projects: number; result: IndexResult }> {\n const projects = registryDb\n .prepare(\"SELECT id, root_path, claude_notes_dir FROM projects WHERE status = 'active'\")\n .all() as Array<{ id: number; root_path: string; claude_notes_dir: string | null }>;\n\n const totals: IndexResult = { filesProcessed: 0, chunksCreated: 0, filesSkipped: 0 };\n\n for (const project of projects) {\n await yieldToEventLoop();\n const r = await indexProjectWithBackend(backend, project.id, project.root_path, project.claude_notes_dir);\n totals.filesProcessed += r.filesProcessed;\n totals.chunksCreated += r.chunksCreated;\n totals.filesSkipped += r.filesSkipped;\n }\n\n return { projects: projects.length, result: totals };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AAuCA,eAAsB,qBACpB,SACA,WACA,UACA,cACA,QACA,MACkB;CAClB,MAAM,UAAU,KAAK,UAAU,aAAa;CAE5C,IAAI;CACJ,IAAI;AACJ,KAAI;AACF,YAAU,aAAa,SAAS,OAAO;AACvC,SAAO,SAAS,QAAQ;SAClB;AACN,SAAO;;CAGT,MAAM,OAAO,WAAW,QAAQ;CAChC,MAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ;CACtC,MAAM,OAAO,KAAK;AAIlB,KADqB,MAAM,QAAQ,YAAY,WAAW,aAAa,KAClD,KAAM,QAAO;AAGlC,OAAM,QAAQ,oBAAoB,WAAW,aAAa;CAG1D,MAAM,YAAY,cAAc,QAAQ;CACxC,MAAM,YAAY,KAAK,KAAK;CAE5B,MAAM,SAAqB,UAAU,KAAK,GAAG,OAAO;EAClD,IAAI,QAAQ,WAAW,cAAc,GAAG,EAAE,WAAW,EAAE,QAAQ;EAC/D;EACA;EACA;EACA,MAAM;EACN,WAAW,EAAE;EACb,SAAS,EAAE;EACX,MAAM,EAAE;EACR,MAAM,EAAE;EACR;EACA,WAAW;EACZ,EAAE;AAGH,OAAM,QAAQ,aAAa,OAAO;AAClC,OAAM,QAAQ,WAAW;EAAE;EAAW,MAAM;EAAc;EAAQ;EAAM;EAAM;EAAO;EAAM,CAAC;AAE5F,QAAO;;AAOT,eAAsB,wBACpB,SACA,WACA,UACA,gBACsB;CACtB,MAAM,SAAsB;EAAE,gBAAgB;EAAG,eAAe;EAAG,cAAc;EAAG;CAEpF,MAAM,eAA2F,EAAE;CAEnG,MAAM,eAAe,KAAK,UAAU,YAAY;AAChD,KAAI,WAAW,aAAa,CAC1B,cAAa,KAAK;EAAE,SAAS;EAAc,UAAU;EAAU,QAAQ;EAAU,MAAM;EAAa,CAAC;CAGvG,MAAM,YAAY,KAAK,UAAU,SAAS;AAC1C,MAAK,MAAM,WAAW,YAAY,UAAU,EAAE;EAE5C,MAAM,OAAO,WADG,SAAS,UAAU,QAAQ,CACX;AAChC,eAAa,KAAK;GAAE;GAAS,UAAU;GAAU,QAAQ;GAAU;GAAM,CAAC;;CAG5E,MAAM,WAAW,KAAK,UAAU,QAAQ;AACxC,MAAK,MAAM,WAAW,YAAY,SAAS,CACzC,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAS,MAAM;EAAW,CAAC;CAItF;EACE,MAAM,YAAY,KAAK,KAAK;AAC5B,OAAK,MAAM,WAAW,YAAY,SAAS,EAAE;GAE3C,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,OAAI,CAAC,KAAM;GAEX,MAAM,gBAAgB,GADN,SAAS,UAAU,QAAQ,CACV;GAGjC,MAAM,aAAuB;IAC3B,IAHS,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;IAG/C;IAAW,QAAQ;IAAS,MAAM;IACtC,MAAM;IAAe,WAAW;IAAG,SAAS;IAC5C,MAJW,WAAW,KAAK;IAIrB;IAAM;IAAW,WAAW;IACnC;AACD,OAAI;AACF,UAAM,QAAQ,aAAa,CAAC,WAAW,CAAC;WAClC;;;AAMZ,KAAI,CAAC,6BAA6B,SAAS,CACzC,MAAK,MAAM,WAAW,iBAAiB,SAAS,CAC9C,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAW,MAAM;EAAS,CAAC;AAIxF,KAAI,kBAAkB,mBAAmB,UAAU;AACjD,OAAK,MAAM,WAAW,YAAY,eAAe,CAC/C,cAAa,KAAK;GAAE;GAAS,UAAU;GAAgB,QAAQ;GAAS,MAAM;GAAW,CAAC;EAI5F;GACE,MAAM,YAAY,KAAK,KAAK;AAC5B,QAAK,MAAM,WAAW,YAAY,eAAe,EAAE;IAEjD,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,QAAI,CAAC,KAAM;IAEX,MAAM,gBAAgB,GADN,SAAS,gBAAgB,QAAQ,CAChB;IAGjC,MAAM,aAAuB;KAC3B,IAHS,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;KAG/C;KAAW,QAAQ;KAAS,MAAM;KACtC,MAAM;KAAe,WAAW;KAAG,SAAS;KAC5C,MAJW,WAAW,KAAK;KAIrB;KAAM;KAAW,WAAW;KACnC;AACD,QAAI;AACF,WAAM,QAAQ,aAAa,CAAC,WAAW,CAAC;YAClC;;;AAMZ,MAAI,eAAe,SAAS,SAAS,EAAE;GACrC,MAAM,mBAAmB,eAAe,MAAM,GAAG,GAAiB;GAClE,MAAM,iBAAiB,KAAK,kBAAkB,YAAY;AAC1D,OAAI,WAAW,eAAe,CAC5B,cAAa,KAAK;IAAE,SAAS;IAAgB,UAAU;IAAkB,QAAQ;IAAU,MAAM;IAAa,CAAC;GAEjH,MAAM,kBAAkB,KAAK,kBAAkB,SAAS;AACxD,QAAK,MAAM,WAAW,YAAY,gBAAgB,EAAE;IAElD,MAAM,OAAO,WADG,SAAS,kBAAkB,QAAQ,CACnB;AAChC,iBAAa,KAAK;KAAE;KAAS,UAAU;KAAkB,QAAQ;KAAU;KAAM,CAAC;;;;AAKxF,OAAM,kBAAkB;CAExB,IAAI,kBAAkB;AAEtB,MAAK,MAAM,EAAE,SAAS,UAAU,QAAQ,UAAU,cAAc;AAC9D,MAAI,mBAAmB,mBAAmB;AACxC,SAAM,kBAAkB;AACxB,qBAAkB;;AAEpB;EAEA,MAAM,UAAU,SAAS,UAAU,QAAQ;AAC3C,MAAI;AAGF,OAFgB,MAAM,qBAAqB,SAAS,WAAW,UAAU,SAAS,QAAQ,KAAK,EAElF;IACX,MAAM,MAAM,MAAM,QAAQ,YAAY,WAAW,QAAQ;AACzD,WAAO;AACP,WAAO,iBAAiB,IAAI;SAE5B,QAAO;UAEH;AAEN,UAAO;;;CAKX,MAAM,4BAAY,IAAI,KAAa;AACnC,MAAK,MAAM,EAAE,SAAS,cAAc,aAClC,WAAU,IAAI,SAAS,UAAU,QAAQ,CAAC;CAG5C,MAAM,eAAe,MAAM,QAAQ,sBAAsB,UAAU;CAEnE,MAAM,aAAuB,EAAE;AAC/B,MAAK,MAAM,KAAK,cAAc;EAC5B,MAAM,WAAW,EAAE,SAAS,UAAU,GAAG,EAAE,MAAM,GAAG,GAAkB,GAAG;AACzE,MAAI,CAAC,UAAU,IAAI,SAAS,CAC1B,YAAW,KAAK,EAAE;;AAItB,KAAI,WAAW,SAAS,EACtB,OAAM,QAAQ,YAAY,WAAW,WAAW;AAGlD,QAAO;;AAOT,MAAM,mBAAmB;AACzB,MAAM,oBAAoB;;;;;;;;;;;;;AAc1B,eAAsB,uBACpB,SACA,YACA,cACiB;CACjB,MAAM,EAAE,mBAAmB,uBAAuB,MAAM,OAAO;CAE/D,MAAM,OAAO,MAAM,QAAQ,uBAAuB;AAClD,KAAI,KAAK,WAAW,EAAG,QAAO;CAE9B,MAAM,QAAQ,KAAK;CACnB,IAAI,WAAW;CAGf,MAAM,qCAAqB,IAAI,KAAoD;AACnF,MAAK,MAAM,OAAO,MAAM;EACtB,MAAM,QAAQ,mBAAmB,IAAI,IAAI,WAAW;AACpD,MAAI,MACF,OAAM;MAEN,oBAAmB,IAAI,IAAI,YAAY;GAAE,OAAO;GAAG,YAAY,IAAI;GAAM,CAAC;;CAG9E,MAAM,SAAS,QAAgB,cAAc,IAAI,IAAI,IAAI,WAAW;CACpE,MAAM,iBAAiB,MAAM,KAAK,mBAAmB,SAAS,CAAC,CAC5D,KAAK,CAAC,KAAK,EAAE,OAAO,kBAAkB,KAAK,MAAM,IAAI,CAAC,IAAI,MAAM,gBAAgB,WAAW,GAAG,CAC9F,KAAK,KAAK;AACb,SAAQ,OAAO,MACb,4BAA4B,MAAM,4BAA4B,mBAAmB,KAAK,eAAe,eAAe,IACrH;CAGD,IAAI,mBAAmB;CACvB,IAAI,kBAAkB;AAEtB,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,kBAAkB;AAEtD,MAAI,cAAc,EAAE;AAClB,WAAQ,OAAO,MACb,2CAA2C,SAAS,GAAG,MAAM,gCAC9D;AACD;;EAGF,MAAM,QAAQ,KAAK,MAAM,GAAG,IAAI,iBAAiB;AAEjD,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,EAAE,IAAI,MAAM,YAAY,SAAS,MAAM;AAG7C,OAAI,eAAe,kBAAkB;AACnC,QAAI,qBAAqB,GACvB,SAAQ,OAAO,MACb,yBAAyB,MAAM,iBAAiB,CAAC,IAAI,gBAAgB,oBACtE;IAEH,MAAM,OAAO,mBAAmB,IAAI,WAAW;AAC/C,YAAQ,OAAO,MACb,0BAA0B,MAAM,WAAW,CAAC,IAAI,MAAM,SAAS,IAAI,uBAAuB,KAAK,KAChG;AACD,uBAAmB;AACnB,sBAAkB;;AAIpB,QAAK,WAAW,KAAK,sBAAsB,EACzC,OAAM,kBAAkB;GAI1B,MAAM,OAAO,mBADD,MAAM,kBAAkB,KAAK,CACL;AACpC,SAAM,QAAQ,gBAAgB,IAAI,KAAK;AACvC;;AAGF,cAAY,MAAM;EAGlB,MAAM,YAAY,MAAM,MAAM,SAAS;AACvC,UAAQ,OAAO,MACb,yBAAyB,SAAS,GAAG,MAAM,WAAW,MAAM,UAAU,WAAW,CAAC,IAAI,UAAU,KAAK,KACtG;;AAIH,KAAI,qBAAqB,GACvB,SAAQ,OAAO,MACb,yBAAyB,MAAM,iBAAiB,CAAC,IAAI,gBAAgB,oBACtE;AAGH,QAAO;;AAOT,eAAsB,oBACpB,SACA,YACoD;CACpD,MAAM,WAAW,WACd,QAAQ,+EAA+E,CACvF,KAAK;CAER,MAAM,SAAsB;EAAE,gBAAgB;EAAG,eAAe;EAAG,cAAc;EAAG;AAEpF,MAAK,MAAM,WAAW,UAAU;AAC9B,QAAM,kBAAkB;EACxB,MAAM,IAAI,MAAM,wBAAwB,SAAS,QAAQ,IAAI,QAAQ,WAAW,QAAQ,iBAAiB;AACzG,SAAO,kBAAkB,EAAE;AAC3B,SAAO,iBAAiB,EAAE;AAC1B,SAAO,gBAAgB,EAAE;;AAG3B,QAAO;EAAE,UAAU,SAAS;EAAQ,QAAQ;EAAQ"}
@@ -1,4 +1,4 @@
1
- import { a as parseSessionTitleChunk, c as yieldToEventLoop, f as sha256File, i as isPathTooBroadForContentScan, l as chunkMarkdown, n as chunkId, o as walkContentFiles, r as detectTier, s as walkMdFiles, t as INDEX_YIELD_EVERY } from "./helpers-BEST-4Gx.mjs";
1
+ import { a as parseSessionTitleChunk, c as yieldToEventLoop, f as sha256File, i as isPathTooBroadForContentScan, l as chunkMarkdown, n as chunkId, o as walkContentFiles, r as detectTier, s as walkMdFiles, t as INDEX_YIELD_EVERY } from "./helpers-OCVFgprQ.mjs";
2
2
  import { existsSync, readFileSync, statSync } from "node:fs";
3
3
  import { basename, join, relative } from "node:path";
4
4
 
@@ -307,4 +307,4 @@ async function embedChunks(db, projectId, batchSize = 50, onProgress) {
307
307
 
308
308
  //#endregion
309
309
  export { indexProject as i, indexAll as n, indexFile as r, embedChunks as t };
310
- //# sourceMappingURL=sync-BOsnEj2-.mjs.map
310
+ //# sourceMappingURL=sync-CdHSL9Kc.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"sync-BOsnEj2-.mjs","names":[],"sources":["../src/memory/indexer/sync.ts"],"sourcesContent":["/**\n * Synchronous (SQLite) indexer for the PAI federation memory engine.\n *\n * Scans project memory/ and Notes/ directories, chunks markdown files, and\n * inserts the resulting chunks into federation.db for BM25 search.\n *\n * Change detection: files whose SHA-256 hash has not changed since the last\n * index run are skipped, keeping incremental re-indexing fast.\n *\n * Uses raw better-sqlite3 Database directly for maximum SQLite performance\n * (synchronous transactions, no serialisation overhead).\n */\n\nimport { readFileSync, statSync, existsSync } from \"node:fs\";\nimport { join, relative, basename } from \"node:path\";\nimport type { Database } from \"better-sqlite3\";\nimport { chunkMarkdown } from \"../chunker.js\";\nimport {\n sha256File,\n chunkId,\n detectTier,\n walkMdFiles,\n walkContentFiles,\n isPathTooBroadForContentScan,\n parseSessionTitleChunk,\n yieldToEventLoop,\n INDEX_YIELD_EVERY,\n} from \"./helpers.js\";\nimport type { IndexResult, EmbedResult } from \"./types.js\";\n\nexport type { IndexResult, EmbedResult };\n\n// Re-export detectTier for backward-compatibility (consumers import it from indexer.js)\nexport { detectTier };\n\n// ---------------------------------------------------------------------------\n// Single-file indexing\n// ---------------------------------------------------------------------------\n\n/**\n * Index a single file into the federation database.\n *\n * @returns true if the file was re-indexed (changed or new), false if skipped.\n */\nexport function indexFile(\n db: Database,\n projectId: number,\n rootPath: string,\n relativePath: string,\n source: string,\n tier: string,\n): boolean {\n const absPath = join(rootPath, relativePath);\n\n // Read file content\n let content: string;\n let stat: ReturnType<typeof statSync>;\n try {\n content = readFileSync(absPath, \"utf8\");\n stat = statSync(absPath);\n } catch {\n // File unreadable or missing — skip silently\n return false;\n }\n\n const hash = sha256File(content);\n const mtime = Math.floor(stat.mtimeMs);\n const size = stat.size;\n\n // Check if the file has changed since last index\n const existing = db\n .prepare(\n \"SELECT hash FROM memory_files WHERE project_id = ? AND path = ?\",\n )\n .get(projectId, relativePath) as { hash: string } | undefined;\n\n if (existing?.hash === hash) {\n // Unchanged — skip\n return false;\n }\n\n // Delete old chunks for this file from both tables\n const oldChunkIds = db\n .prepare(\n \"SELECT id FROM memory_chunks WHERE project_id = ? AND path = ?\",\n )\n .all(projectId, relativePath) as Array<{ id: string }>;\n\n const deleteFts = db.prepare(\"DELETE FROM memory_fts WHERE id = ?\");\n const deleteChunk = db.prepare(\n \"DELETE FROM memory_chunks WHERE project_id = ? AND path = ?\",\n );\n\n db.transaction(() => {\n for (const row of oldChunkIds) {\n deleteFts.run(row.id);\n }\n deleteChunk.run(projectId, relativePath);\n })();\n\n // Chunk the new content\n const chunks = chunkMarkdown(content);\n\n // Insert new chunks into memory_chunks and memory_fts\n const insertChunk = db.prepare(`\n INSERT INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n\n const insertFts = db.prepare(`\n INSERT INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n\n const upsertFile = db.prepare(`\n INSERT INTO memory_files (project_id, path, source, tier, hash, mtime, size)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n ON CONFLICT(project_id, path) DO UPDATE SET\n source = excluded.source,\n tier = excluded.tier,\n hash = excluded.hash,\n mtime = excluded.mtime,\n size = excluded.size\n `);\n\n const updatedAt = Date.now();\n\n db.transaction(() => {\n for (let i = 0; i < chunks.length; i++) {\n const chunk = chunks[i]!;\n const id = chunkId(projectId, relativePath, i, chunk.startLine, chunk.endLine);\n insertChunk.run(\n id,\n projectId,\n source,\n tier,\n relativePath,\n chunk.startLine,\n chunk.endLine,\n chunk.hash,\n chunk.text,\n updatedAt,\n );\n insertFts.run(\n chunk.text,\n id,\n projectId,\n relativePath,\n source,\n tier,\n chunk.startLine,\n chunk.endLine,\n );\n }\n upsertFile.run(projectId, relativePath, source, tier, hash, mtime, size);\n })();\n\n return true;\n}\n\n// ---------------------------------------------------------------------------\n// Project-level indexing\n// ---------------------------------------------------------------------------\n\n/**\n * Index all memory, Notes, and content files for a single registered project.\n *\n * Scans:\n * - {rootPath}/MEMORY.md → source='memory', tier='evergreen'\n * - {rootPath}/memory/ → source='memory', tier from detectTier()\n * - {rootPath}/Notes/ → source='notes', tier='session'\n * - {rootPath}/**\\/*.md → source='content', tier='topic' (all other .md files, recursive)\n * - {claudeNotesDir}/ → source='notes', tier='session' (if set and different)\n */\nexport async function indexProject(\n db: Database,\n projectId: number,\n rootPath: string,\n claudeNotesDir?: string | null,\n): Promise<IndexResult> {\n const result: IndexResult = {\n filesProcessed: 0,\n chunksCreated: 0,\n filesSkipped: 0,\n };\n\n const filesToIndex: Array<{ absPath: string; rootBase: string; source: string; tier: string }> = [];\n\n // Root-level MEMORY.md\n const rootMemoryMd = join(rootPath, \"MEMORY.md\");\n if (existsSync(rootMemoryMd)) {\n filesToIndex.push({ absPath: rootMemoryMd, rootBase: rootPath, source: \"memory\", tier: \"evergreen\" });\n }\n\n // memory/ directory\n const memoryDir = join(rootPath, \"memory\");\n for (const absPath of walkMdFiles(memoryDir)) {\n const relPath = relative(rootPath, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"memory\", tier });\n }\n\n // {rootPath}/Notes/ directory\n const notesDir = join(rootPath, \"Notes\");\n for (const absPath of walkMdFiles(notesDir)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic session-title chunks for Notes files with the standard filename format:\n // \"NNNN - YYYY-MM-DD - Descriptive Title.md\"\n {\n const titleInsertChunk = db.prepare(`\n INSERT OR IGNORE INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const titleInsertFts = db.prepare(`\n INSERT OR IGNORE INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(notesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(rootPath, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n db.transaction(() => {\n titleInsertChunk.run(id, projectId, \"notes\", \"session\", syntheticPath, 0, 0, hash, text, updatedAt);\n titleInsertFts.run(text, id, projectId, syntheticPath, \"notes\", \"session\", 0, 0);\n })();\n }\n }\n\n // {rootPath}/**/*.md — all other markdown content\n if (!isPathTooBroadForContentScan(rootPath)) {\n for (const absPath of walkContentFiles(rootPath)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"content\", tier: \"topic\" });\n }\n }\n\n // Claude Code session notes directory (~/.claude/projects/{encoded}/Notes/)\n if (claudeNotesDir && claudeNotesDir !== notesDir) {\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n filesToIndex.push({ absPath, rootBase: claudeNotesDir, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic title chunks for claude notes dir\n {\n const updatedAt = Date.now();\n const titleInsertChunk2 = db.prepare(`\n INSERT OR IGNORE INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const titleInsertFts2 = db.prepare(`\n INSERT OR IGNORE INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(claudeNotesDir, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n db.transaction(() => {\n titleInsertChunk2.run(id, projectId, \"notes\", \"session\", syntheticPath, 0, 0, hash, text, updatedAt);\n titleInsertFts2.run(text, id, projectId, syntheticPath, \"notes\", \"session\", 0, 0);\n })();\n }\n }\n\n // Derive the sibling memory/ directory: .../Notes/ → .../memory/\n if (claudeNotesDir.endsWith(\"/Notes\")) {\n const claudeProjectDir = claudeNotesDir.slice(0, -\"/Notes\".length);\n const claudeMemoryDir = join(claudeProjectDir, \"memory\");\n\n const claudeMemoryMd = join(claudeProjectDir, \"MEMORY.md\");\n if (existsSync(claudeMemoryMd)) {\n filesToIndex.push({\n absPath: claudeMemoryMd,\n rootBase: claudeProjectDir,\n source: \"memory\",\n tier: \"evergreen\",\n });\n }\n\n for (const absPath of walkMdFiles(claudeMemoryDir)) {\n const relPath = relative(claudeProjectDir, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: claudeProjectDir, source: \"memory\", tier });\n }\n }\n }\n\n // Yield after collection phase before processing\n await yieldToEventLoop();\n\n let filesSinceYield = 0;\n\n for (const { absPath, rootBase, source, tier } of filesToIndex) {\n if (filesSinceYield >= INDEX_YIELD_EVERY) {\n await yieldToEventLoop();\n filesSinceYield = 0;\n }\n filesSinceYield++;\n\n const relPath = relative(rootBase, absPath);\n const changed = indexFile(db, projectId, rootBase, relPath, source, tier);\n\n if (changed) {\n const count = db\n .prepare(\n \"SELECT COUNT(*) as n FROM memory_chunks WHERE project_id = ? AND path = ?\",\n )\n .get(projectId, relPath) as { n: number };\n\n result.filesProcessed++;\n result.chunksCreated += count.n;\n } else {\n result.filesSkipped++;\n }\n }\n\n // Prune stale paths: remove DB entries for files that no longer exist on disk.\n const livePaths = new Set<string>();\n for (const { absPath, rootBase } of filesToIndex) {\n livePaths.add(relative(rootBase, absPath));\n }\n\n const dbChunkPaths = db\n .prepare(\"SELECT DISTINCT path FROM memory_chunks WHERE project_id = ?\")\n .all(projectId) as Array<{ path: string }>;\n\n const stalePaths: string[] = [];\n for (const row of dbChunkPaths) {\n const basePath = row.path.endsWith(\"::title\")\n ? row.path.slice(0, -\"::title\".length)\n : row.path;\n if (!livePaths.has(basePath)) {\n stalePaths.push(row.path);\n }\n }\n\n if (stalePaths.length > 0) {\n const deleteChunksFts = db.prepare(\"DELETE FROM memory_fts WHERE id = ?\");\n const deleteChunks = db.prepare(\n \"DELETE FROM memory_chunks WHERE project_id = ? AND path = ?\",\n );\n const deleteFile = db.prepare(\n \"DELETE FROM memory_files WHERE project_id = ? AND path = ?\",\n );\n\n db.transaction(() => {\n for (const stalePath of stalePaths) {\n const chunkIds = db\n .prepare(\"SELECT id FROM memory_chunks WHERE project_id = ? AND path = ?\")\n .all(projectId, stalePath) as Array<{ id: string }>;\n for (const { id } of chunkIds) {\n deleteChunksFts.run(id);\n }\n deleteChunks.run(projectId, stalePath);\n deleteFile.run(projectId, stalePath);\n }\n })();\n }\n\n return result;\n}\n\n// ---------------------------------------------------------------------------\n// Global indexing (all registered projects)\n// ---------------------------------------------------------------------------\n\n/**\n * Index all active projects registered in the registry DB.\n *\n * Async: yields to the event loop between each project so that the daemon's\n * Unix socket server can process IPC requests (e.g. status) while indexing.\n */\nexport async function indexAll(\n db: Database,\n registryDb: Database,\n): Promise<{ projects: number; result: IndexResult }> {\n const projects = registryDb\n .prepare(\"SELECT id, root_path, claude_notes_dir FROM projects WHERE status = 'active'\")\n .all() as Array<{ id: number; root_path: string; claude_notes_dir: string | null }>;\n\n const totals: IndexResult = {\n filesProcessed: 0,\n chunksCreated: 0,\n filesSkipped: 0,\n };\n\n for (const project of projects) {\n await yieldToEventLoop();\n const r = await indexProject(db, project.id, project.root_path, project.claude_notes_dir);\n totals.filesProcessed += r.filesProcessed;\n totals.chunksCreated += r.chunksCreated;\n totals.filesSkipped += r.filesSkipped;\n }\n\n return { projects: projects.length, result: totals };\n}\n\n// ---------------------------------------------------------------------------\n// Embedding generation\n// ---------------------------------------------------------------------------\n\n/**\n * Generate and store embeddings for chunks that do not yet have one.\n *\n * Because better-sqlite3 is synchronous but the embedding pipeline is async,\n * we fetch all unembedded chunk texts first, generate embeddings in batches,\n * and then write them back in a transaction.\n *\n * @param db Open federation database.\n * @param projectId Optional — restrict to a specific project.\n * @param batchSize Number of chunks to embed per round. Default 50.\n * @param onProgress Optional callback called after each batch with running totals.\n */\nexport async function embedChunks(\n db: Database,\n projectId?: number,\n batchSize = 50,\n onProgress?: (embedded: number, total: number) => void,\n): Promise<EmbedResult> {\n // Dynamic import — keeps the heavy ML runtime out of the module load path\n const { generateEmbedding, serializeEmbedding } = await import(\"../embeddings.js\");\n\n const conditions = [\"embedding IS NULL\"];\n const params: (string | number)[] = [];\n\n if (projectId !== undefined) {\n conditions.push(\"project_id = ?\");\n params.push(projectId);\n }\n\n const where = \"WHERE \" + conditions.join(\" AND \");\n\n const rows = db\n .prepare(`SELECT id, text FROM memory_chunks ${where} ORDER BY id`)\n .all(...params) as Array<{ id: string; text: string }>;\n\n if (rows.length === 0) {\n return { chunksEmbedded: 0, chunksSkipped: 0 };\n }\n\n const updateStmt = db.prepare(\n \"UPDATE memory_chunks SET embedding = ? WHERE id = ?\",\n );\n\n let embedded = 0;\n const total = rows.length;\n\n for (let i = 0; i < rows.length; i += batchSize) {\n const batch = rows.slice(i, i + batchSize);\n\n // Generate embeddings for the batch (async — must happen OUTSIDE transaction)\n const embeddings: Array<{ id: string; blob: Buffer }> = [];\n for (const row of batch) {\n const vec = await generateEmbedding(row.text);\n const blob = serializeEmbedding(vec);\n embeddings.push({ id: row.id, blob });\n }\n\n // Write the batch in a single transaction\n db.transaction(() => {\n for (const { id, blob } of embeddings) {\n updateStmt.run(blob, id);\n }\n })();\n\n embedded += embeddings.length;\n onProgress?.(embedded, total);\n }\n\n return { chunksEmbedded: embedded, chunksSkipped: 0 };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AA4CA,SAAgB,UACd,IACA,WACA,UACA,cACA,QACA,MACS;CACT,MAAM,UAAU,KAAK,UAAU,aAAa;CAG5C,IAAI;CACJ,IAAI;AACJ,KAAI;AACF,YAAU,aAAa,SAAS,OAAO;AACvC,SAAO,SAAS,QAAQ;SAClB;AAEN,SAAO;;CAGT,MAAM,OAAO,WAAW,QAAQ;CAChC,MAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ;CACtC,MAAM,OAAO,KAAK;AASlB,KANiB,GACd,QACC,kEACD,CACA,IAAI,WAAW,aAAa,EAEjB,SAAS,KAErB,QAAO;CAIT,MAAM,cAAc,GACjB,QACC,iEACD,CACA,IAAI,WAAW,aAAa;CAE/B,MAAM,YAAY,GAAG,QAAQ,sCAAsC;CACnE,MAAM,cAAc,GAAG,QACrB,8DACD;AAED,IAAG,kBAAkB;AACnB,OAAK,MAAM,OAAO,YAChB,WAAU,IAAI,IAAI,GAAG;AAEvB,cAAY,IAAI,WAAW,aAAa;GACxC,EAAE;CAGJ,MAAM,SAAS,cAAc,QAAQ;CAGrC,MAAM,cAAc,GAAG,QAAQ;;;IAG7B;CAEF,MAAM,YAAY,GAAG,QAAQ;;;IAG3B;CAEF,MAAM,aAAa,GAAG,QAAQ;;;;;;;;;IAS5B;CAEF,MAAM,YAAY,KAAK,KAAK;AAE5B,IAAG,kBAAkB;AACnB,OAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;GACtC,MAAM,QAAQ,OAAO;GACrB,MAAM,KAAK,QAAQ,WAAW,cAAc,GAAG,MAAM,WAAW,MAAM,QAAQ;AAC9E,eAAY,IACV,IACA,WACA,QACA,MACA,cACA,MAAM,WACN,MAAM,SACN,MAAM,MACN,MAAM,MACN,UACD;AACD,aAAU,IACR,MAAM,MACN,IACA,WACA,cACA,QACA,MACA,MAAM,WACN,MAAM,QACP;;AAEH,aAAW,IAAI,WAAW,cAAc,QAAQ,MAAM,MAAM,OAAO,KAAK;GACxE,EAAE;AAEJ,QAAO;;;;;;;;;;;;AAiBT,eAAsB,aACpB,IACA,WACA,UACA,gBACsB;CACtB,MAAM,SAAsB;EAC1B,gBAAgB;EAChB,eAAe;EACf,cAAc;EACf;CAED,MAAM,eAA2F,EAAE;CAGnG,MAAM,eAAe,KAAK,UAAU,YAAY;AAChD,KAAI,WAAW,aAAa,CAC1B,cAAa,KAAK;EAAE,SAAS;EAAc,UAAU;EAAU,QAAQ;EAAU,MAAM;EAAa,CAAC;CAIvG,MAAM,YAAY,KAAK,UAAU,SAAS;AAC1C,MAAK,MAAM,WAAW,YAAY,UAAU,EAAE;EAE5C,MAAM,OAAO,WADG,SAAS,UAAU,QAAQ,CACX;AAChC,eAAa,KAAK;GAAE;GAAS,UAAU;GAAU,QAAQ;GAAU;GAAM,CAAC;;CAI5E,MAAM,WAAW,KAAK,UAAU,QAAQ;AACxC,MAAK,MAAM,WAAW,YAAY,SAAS,CACzC,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAS,MAAM;EAAW,CAAC;CAKtF;EACE,MAAM,mBAAmB,GAAG,QAAQ;;;MAGlC;EACF,MAAM,iBAAiB,GAAG,QAAQ;;;MAGhC;EACF,MAAM,YAAY,KAAK,KAAK;AAC5B,OAAK,MAAM,WAAW,YAAY,SAAS,EAAE;GAE3C,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,OAAI,CAAC,KAAM;GAEX,MAAM,gBAAgB,GADN,SAAS,UAAU,QAAQ,CACV;GACjC,MAAM,KAAK,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;GACrD,MAAM,OAAO,WAAW,KAAK;AAC7B,MAAG,kBAAkB;AACnB,qBAAiB,IAAI,IAAI,WAAW,SAAS,WAAW,eAAe,GAAG,GAAG,MAAM,MAAM,UAAU;AACnG,mBAAe,IAAI,MAAM,IAAI,WAAW,eAAe,SAAS,WAAW,GAAG,EAAE;KAChF,EAAE;;;AAKR,KAAI,CAAC,6BAA6B,SAAS,CACzC,MAAK,MAAM,WAAW,iBAAiB,SAAS,CAC9C,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAW,MAAM;EAAS,CAAC;AAKxF,KAAI,kBAAkB,mBAAmB,UAAU;AACjD,OAAK,MAAM,WAAW,YAAY,eAAe,CAC/C,cAAa,KAAK;GAAE;GAAS,UAAU;GAAgB,QAAQ;GAAS,MAAM;GAAW,CAAC;EAI5F;GACE,MAAM,YAAY,KAAK,KAAK;GAC5B,MAAM,oBAAoB,GAAG,QAAQ;;;QAGnC;GACF,MAAM,kBAAkB,GAAG,QAAQ;;;QAGjC;AACF,QAAK,MAAM,WAAW,YAAY,eAAe,EAAE;IAEjD,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,QAAI,CAAC,KAAM;IAEX,MAAM,gBAAgB,GADN,SAAS,gBAAgB,QAAQ,CAChB;IACjC,MAAM,KAAK,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;IACrD,MAAM,OAAO,WAAW,KAAK;AAC7B,OAAG,kBAAkB;AACnB,uBAAkB,IAAI,IAAI,WAAW,SAAS,WAAW,eAAe,GAAG,GAAG,MAAM,MAAM,UAAU;AACpG,qBAAgB,IAAI,MAAM,IAAI,WAAW,eAAe,SAAS,WAAW,GAAG,EAAE;MACjF,EAAE;;;AAKR,MAAI,eAAe,SAAS,SAAS,EAAE;GACrC,MAAM,mBAAmB,eAAe,MAAM,GAAG,GAAiB;GAClE,MAAM,kBAAkB,KAAK,kBAAkB,SAAS;GAExD,MAAM,iBAAiB,KAAK,kBAAkB,YAAY;AAC1D,OAAI,WAAW,eAAe,CAC5B,cAAa,KAAK;IAChB,SAAS;IACT,UAAU;IACV,QAAQ;IACR,MAAM;IACP,CAAC;AAGJ,QAAK,MAAM,WAAW,YAAY,gBAAgB,EAAE;IAElD,MAAM,OAAO,WADG,SAAS,kBAAkB,QAAQ,CACnB;AAChC,iBAAa,KAAK;KAAE;KAAS,UAAU;KAAkB,QAAQ;KAAU;KAAM,CAAC;;;;AAMxF,OAAM,kBAAkB;CAExB,IAAI,kBAAkB;AAEtB,MAAK,MAAM,EAAE,SAAS,UAAU,QAAQ,UAAU,cAAc;AAC9D,MAAI,mBAAmB,mBAAmB;AACxC,SAAM,kBAAkB;AACxB,qBAAkB;;AAEpB;EAEA,MAAM,UAAU,SAAS,UAAU,QAAQ;AAG3C,MAFgB,UAAU,IAAI,WAAW,UAAU,SAAS,QAAQ,KAAK,EAE5D;GACX,MAAM,QAAQ,GACX,QACC,4EACD,CACA,IAAI,WAAW,QAAQ;AAE1B,UAAO;AACP,UAAO,iBAAiB,MAAM;QAE9B,QAAO;;CAKX,MAAM,4BAAY,IAAI,KAAa;AACnC,MAAK,MAAM,EAAE,SAAS,cAAc,aAClC,WAAU,IAAI,SAAS,UAAU,QAAQ,CAAC;CAG5C,MAAM,eAAe,GAClB,QAAQ,+DAA+D,CACvE,IAAI,UAAU;CAEjB,MAAM,aAAuB,EAAE;AAC/B,MAAK,MAAM,OAAO,cAAc;EAC9B,MAAM,WAAW,IAAI,KAAK,SAAS,UAAU,GACzC,IAAI,KAAK,MAAM,GAAG,GAAkB,GACpC,IAAI;AACR,MAAI,CAAC,UAAU,IAAI,SAAS,CAC1B,YAAW,KAAK,IAAI,KAAK;;AAI7B,KAAI,WAAW,SAAS,GAAG;EACzB,MAAM,kBAAkB,GAAG,QAAQ,sCAAsC;EACzE,MAAM,eAAe,GAAG,QACtB,8DACD;EACD,MAAM,aAAa,GAAG,QACpB,6DACD;AAED,KAAG,kBAAkB;AACnB,QAAK,MAAM,aAAa,YAAY;IAClC,MAAM,WAAW,GACd,QAAQ,iEAAiE,CACzE,IAAI,WAAW,UAAU;AAC5B,SAAK,MAAM,EAAE,QAAQ,SACnB,iBAAgB,IAAI,GAAG;AAEzB,iBAAa,IAAI,WAAW,UAAU;AACtC,eAAW,IAAI,WAAW,UAAU;;IAEtC,EAAE;;AAGN,QAAO;;;;;;;;AAaT,eAAsB,SACpB,IACA,YACoD;CACpD,MAAM,WAAW,WACd,QAAQ,+EAA+E,CACvF,KAAK;CAER,MAAM,SAAsB;EAC1B,gBAAgB;EAChB,eAAe;EACf,cAAc;EACf;AAED,MAAK,MAAM,WAAW,UAAU;AAC9B,QAAM,kBAAkB;EACxB,MAAM,IAAI,MAAM,aAAa,IAAI,QAAQ,IAAI,QAAQ,WAAW,QAAQ,iBAAiB;AACzF,SAAO,kBAAkB,EAAE;AAC3B,SAAO,iBAAiB,EAAE;AAC1B,SAAO,gBAAgB,EAAE;;AAG3B,QAAO;EAAE,UAAU,SAAS;EAAQ,QAAQ;EAAQ;;;;;;;;;;;;;;AAmBtD,eAAsB,YACpB,IACA,WACA,YAAY,IACZ,YACsB;CAEtB,MAAM,EAAE,mBAAmB,uBAAuB,MAAM,OAAO;CAE/D,MAAM,aAAa,CAAC,oBAAoB;CACxC,MAAM,SAA8B,EAAE;AAEtC,KAAI,cAAc,QAAW;AAC3B,aAAW,KAAK,iBAAiB;AACjC,SAAO,KAAK,UAAU;;CAGxB,MAAM,QAAQ,WAAW,WAAW,KAAK,QAAQ;CAEjD,MAAM,OAAO,GACV,QAAQ,sCAAsC,MAAM,cAAc,CAClE,IAAI,GAAG,OAAO;AAEjB,KAAI,KAAK,WAAW,EAClB,QAAO;EAAE,gBAAgB;EAAG,eAAe;EAAG;CAGhD,MAAM,aAAa,GAAG,QACpB,sDACD;CAED,IAAI,WAAW;CACf,MAAM,QAAQ,KAAK;AAEnB,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,WAAW;EAC/C,MAAM,QAAQ,KAAK,MAAM,GAAG,IAAI,UAAU;EAG1C,MAAM,aAAkD,EAAE;AAC1D,OAAK,MAAM,OAAO,OAAO;GAEvB,MAAM,OAAO,mBADD,MAAM,kBAAkB,IAAI,KAAK,CACT;AACpC,cAAW,KAAK;IAAE,IAAI,IAAI;IAAI;IAAM,CAAC;;AAIvC,KAAG,kBAAkB;AACnB,QAAK,MAAM,EAAE,IAAI,UAAU,WACzB,YAAW,IAAI,MAAM,GAAG;IAE1B,EAAE;AAEJ,cAAY,WAAW;AACvB,eAAa,UAAU,MAAM;;AAG/B,QAAO;EAAE,gBAAgB;EAAU,eAAe;EAAG"}
1
+ {"version":3,"file":"sync-CdHSL9Kc.mjs","names":[],"sources":["../src/memory/indexer/sync.ts"],"sourcesContent":["/**\n * Synchronous (SQLite) indexer for the PAI federation memory engine.\n *\n * Scans project memory/ and Notes/ directories, chunks markdown files, and\n * inserts the resulting chunks into federation.db for BM25 search.\n *\n * Change detection: files whose SHA-256 hash has not changed since the last\n * index run are skipped, keeping incremental re-indexing fast.\n *\n * Uses raw better-sqlite3 Database directly for maximum SQLite performance\n * (synchronous transactions, no serialisation overhead).\n */\n\nimport { readFileSync, statSync, existsSync } from \"node:fs\";\nimport { join, relative, basename } from \"node:path\";\nimport type { Database } from \"better-sqlite3\";\nimport { chunkMarkdown } from \"../chunker.js\";\nimport {\n sha256File,\n chunkId,\n detectTier,\n walkMdFiles,\n walkContentFiles,\n isPathTooBroadForContentScan,\n parseSessionTitleChunk,\n yieldToEventLoop,\n INDEX_YIELD_EVERY,\n} from \"./helpers.js\";\nimport type { IndexResult, EmbedResult } from \"./types.js\";\n\nexport type { IndexResult, EmbedResult };\n\n// Re-export detectTier for backward-compatibility (consumers import it from indexer.js)\nexport { detectTier };\n\n// ---------------------------------------------------------------------------\n// Single-file indexing\n// ---------------------------------------------------------------------------\n\n/**\n * Index a single file into the federation database.\n *\n * @returns true if the file was re-indexed (changed or new), false if skipped.\n */\nexport function indexFile(\n db: Database,\n projectId: number,\n rootPath: string,\n relativePath: string,\n source: string,\n tier: string,\n): boolean {\n const absPath = join(rootPath, relativePath);\n\n // Read file content\n let content: string;\n let stat: ReturnType<typeof statSync>;\n try {\n content = readFileSync(absPath, \"utf8\");\n stat = statSync(absPath);\n } catch {\n // File unreadable or missing — skip silently\n return false;\n }\n\n const hash = sha256File(content);\n const mtime = Math.floor(stat.mtimeMs);\n const size = stat.size;\n\n // Check if the file has changed since last index\n const existing = db\n .prepare(\n \"SELECT hash FROM memory_files WHERE project_id = ? AND path = ?\",\n )\n .get(projectId, relativePath) as { hash: string } | undefined;\n\n if (existing?.hash === hash) {\n // Unchanged — skip\n return false;\n }\n\n // Delete old chunks for this file from both tables\n const oldChunkIds = db\n .prepare(\n \"SELECT id FROM memory_chunks WHERE project_id = ? AND path = ?\",\n )\n .all(projectId, relativePath) as Array<{ id: string }>;\n\n const deleteFts = db.prepare(\"DELETE FROM memory_fts WHERE id = ?\");\n const deleteChunk = db.prepare(\n \"DELETE FROM memory_chunks WHERE project_id = ? AND path = ?\",\n );\n\n db.transaction(() => {\n for (const row of oldChunkIds) {\n deleteFts.run(row.id);\n }\n deleteChunk.run(projectId, relativePath);\n })();\n\n // Chunk the new content\n const chunks = chunkMarkdown(content);\n\n // Insert new chunks into memory_chunks and memory_fts\n const insertChunk = db.prepare(`\n INSERT INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n\n const insertFts = db.prepare(`\n INSERT INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n\n const upsertFile = db.prepare(`\n INSERT INTO memory_files (project_id, path, source, tier, hash, mtime, size)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n ON CONFLICT(project_id, path) DO UPDATE SET\n source = excluded.source,\n tier = excluded.tier,\n hash = excluded.hash,\n mtime = excluded.mtime,\n size = excluded.size\n `);\n\n const updatedAt = Date.now();\n\n db.transaction(() => {\n for (let i = 0; i < chunks.length; i++) {\n const chunk = chunks[i]!;\n const id = chunkId(projectId, relativePath, i, chunk.startLine, chunk.endLine);\n insertChunk.run(\n id,\n projectId,\n source,\n tier,\n relativePath,\n chunk.startLine,\n chunk.endLine,\n chunk.hash,\n chunk.text,\n updatedAt,\n );\n insertFts.run(\n chunk.text,\n id,\n projectId,\n relativePath,\n source,\n tier,\n chunk.startLine,\n chunk.endLine,\n );\n }\n upsertFile.run(projectId, relativePath, source, tier, hash, mtime, size);\n })();\n\n return true;\n}\n\n// ---------------------------------------------------------------------------\n// Project-level indexing\n// ---------------------------------------------------------------------------\n\n/**\n * Index all memory, Notes, and content files for a single registered project.\n *\n * Scans:\n * - {rootPath}/MEMORY.md → source='memory', tier='evergreen'\n * - {rootPath}/memory/ → source='memory', tier from detectTier()\n * - {rootPath}/Notes/ → source='notes', tier='session'\n * - {rootPath}/**\\/*.md → source='content', tier='topic' (all other .md files, recursive)\n * - {claudeNotesDir}/ → source='notes', tier='session' (if set and different)\n */\nexport async function indexProject(\n db: Database,\n projectId: number,\n rootPath: string,\n claudeNotesDir?: string | null,\n): Promise<IndexResult> {\n const result: IndexResult = {\n filesProcessed: 0,\n chunksCreated: 0,\n filesSkipped: 0,\n };\n\n const filesToIndex: Array<{ absPath: string; rootBase: string; source: string; tier: string }> = [];\n\n // Root-level MEMORY.md\n const rootMemoryMd = join(rootPath, \"MEMORY.md\");\n if (existsSync(rootMemoryMd)) {\n filesToIndex.push({ absPath: rootMemoryMd, rootBase: rootPath, source: \"memory\", tier: \"evergreen\" });\n }\n\n // memory/ directory\n const memoryDir = join(rootPath, \"memory\");\n for (const absPath of walkMdFiles(memoryDir)) {\n const relPath = relative(rootPath, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"memory\", tier });\n }\n\n // {rootPath}/Notes/ directory\n const notesDir = join(rootPath, \"Notes\");\n for (const absPath of walkMdFiles(notesDir)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic session-title chunks for Notes files with the standard filename format:\n // \"NNNN - YYYY-MM-DD - Descriptive Title.md\"\n {\n const titleInsertChunk = db.prepare(`\n INSERT OR IGNORE INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const titleInsertFts = db.prepare(`\n INSERT OR IGNORE INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(notesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(rootPath, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n db.transaction(() => {\n titleInsertChunk.run(id, projectId, \"notes\", \"session\", syntheticPath, 0, 0, hash, text, updatedAt);\n titleInsertFts.run(text, id, projectId, syntheticPath, \"notes\", \"session\", 0, 0);\n })();\n }\n }\n\n // {rootPath}/**/*.md — all other markdown content\n if (!isPathTooBroadForContentScan(rootPath)) {\n for (const absPath of walkContentFiles(rootPath)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"content\", tier: \"topic\" });\n }\n }\n\n // Claude Code session notes directory (~/.claude/projects/{encoded}/Notes/)\n if (claudeNotesDir && claudeNotesDir !== notesDir) {\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n filesToIndex.push({ absPath, rootBase: claudeNotesDir, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic title chunks for claude notes dir\n {\n const updatedAt = Date.now();\n const titleInsertChunk2 = db.prepare(`\n INSERT OR IGNORE INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const titleInsertFts2 = db.prepare(`\n INSERT OR IGNORE INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(claudeNotesDir, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n db.transaction(() => {\n titleInsertChunk2.run(id, projectId, \"notes\", \"session\", syntheticPath, 0, 0, hash, text, updatedAt);\n titleInsertFts2.run(text, id, projectId, syntheticPath, \"notes\", \"session\", 0, 0);\n })();\n }\n }\n\n // Derive the sibling memory/ directory: .../Notes/ → .../memory/\n if (claudeNotesDir.endsWith(\"/Notes\")) {\n const claudeProjectDir = claudeNotesDir.slice(0, -\"/Notes\".length);\n const claudeMemoryDir = join(claudeProjectDir, \"memory\");\n\n const claudeMemoryMd = join(claudeProjectDir, \"MEMORY.md\");\n if (existsSync(claudeMemoryMd)) {\n filesToIndex.push({\n absPath: claudeMemoryMd,\n rootBase: claudeProjectDir,\n source: \"memory\",\n tier: \"evergreen\",\n });\n }\n\n for (const absPath of walkMdFiles(claudeMemoryDir)) {\n const relPath = relative(claudeProjectDir, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: claudeProjectDir, source: \"memory\", tier });\n }\n }\n }\n\n // Yield after collection phase before processing\n await yieldToEventLoop();\n\n let filesSinceYield = 0;\n\n for (const { absPath, rootBase, source, tier } of filesToIndex) {\n if (filesSinceYield >= INDEX_YIELD_EVERY) {\n await yieldToEventLoop();\n filesSinceYield = 0;\n }\n filesSinceYield++;\n\n const relPath = relative(rootBase, absPath);\n const changed = indexFile(db, projectId, rootBase, relPath, source, tier);\n\n if (changed) {\n const count = db\n .prepare(\n \"SELECT COUNT(*) as n FROM memory_chunks WHERE project_id = ? AND path = ?\",\n )\n .get(projectId, relPath) as { n: number };\n\n result.filesProcessed++;\n result.chunksCreated += count.n;\n } else {\n result.filesSkipped++;\n }\n }\n\n // Prune stale paths: remove DB entries for files that no longer exist on disk.\n const livePaths = new Set<string>();\n for (const { absPath, rootBase } of filesToIndex) {\n livePaths.add(relative(rootBase, absPath));\n }\n\n const dbChunkPaths = db\n .prepare(\"SELECT DISTINCT path FROM memory_chunks WHERE project_id = ?\")\n .all(projectId) as Array<{ path: string }>;\n\n const stalePaths: string[] = [];\n for (const row of dbChunkPaths) {\n const basePath = row.path.endsWith(\"::title\")\n ? row.path.slice(0, -\"::title\".length)\n : row.path;\n if (!livePaths.has(basePath)) {\n stalePaths.push(row.path);\n }\n }\n\n if (stalePaths.length > 0) {\n const deleteChunksFts = db.prepare(\"DELETE FROM memory_fts WHERE id = ?\");\n const deleteChunks = db.prepare(\n \"DELETE FROM memory_chunks WHERE project_id = ? AND path = ?\",\n );\n const deleteFile = db.prepare(\n \"DELETE FROM memory_files WHERE project_id = ? AND path = ?\",\n );\n\n db.transaction(() => {\n for (const stalePath of stalePaths) {\n const chunkIds = db\n .prepare(\"SELECT id FROM memory_chunks WHERE project_id = ? AND path = ?\")\n .all(projectId, stalePath) as Array<{ id: string }>;\n for (const { id } of chunkIds) {\n deleteChunksFts.run(id);\n }\n deleteChunks.run(projectId, stalePath);\n deleteFile.run(projectId, stalePath);\n }\n })();\n }\n\n return result;\n}\n\n// ---------------------------------------------------------------------------\n// Global indexing (all registered projects)\n// ---------------------------------------------------------------------------\n\n/**\n * Index all active projects registered in the registry DB.\n *\n * Async: yields to the event loop between each project so that the daemon's\n * Unix socket server can process IPC requests (e.g. status) while indexing.\n */\nexport async function indexAll(\n db: Database,\n registryDb: Database,\n): Promise<{ projects: number; result: IndexResult }> {\n const projects = registryDb\n .prepare(\"SELECT id, root_path, claude_notes_dir FROM projects WHERE status = 'active'\")\n .all() as Array<{ id: number; root_path: string; claude_notes_dir: string | null }>;\n\n const totals: IndexResult = {\n filesProcessed: 0,\n chunksCreated: 0,\n filesSkipped: 0,\n };\n\n for (const project of projects) {\n await yieldToEventLoop();\n const r = await indexProject(db, project.id, project.root_path, project.claude_notes_dir);\n totals.filesProcessed += r.filesProcessed;\n totals.chunksCreated += r.chunksCreated;\n totals.filesSkipped += r.filesSkipped;\n }\n\n return { projects: projects.length, result: totals };\n}\n\n// ---------------------------------------------------------------------------\n// Embedding generation\n// ---------------------------------------------------------------------------\n\n/**\n * Generate and store embeddings for chunks that do not yet have one.\n *\n * Because better-sqlite3 is synchronous but the embedding pipeline is async,\n * we fetch all unembedded chunk texts first, generate embeddings in batches,\n * and then write them back in a transaction.\n *\n * @param db Open federation database.\n * @param projectId Optional — restrict to a specific project.\n * @param batchSize Number of chunks to embed per round. Default 50.\n * @param onProgress Optional callback called after each batch with running totals.\n */\nexport async function embedChunks(\n db: Database,\n projectId?: number,\n batchSize = 50,\n onProgress?: (embedded: number, total: number) => void,\n): Promise<EmbedResult> {\n // Dynamic import — keeps the heavy ML runtime out of the module load path\n const { generateEmbedding, serializeEmbedding } = await import(\"../embeddings.js\");\n\n const conditions = [\"embedding IS NULL\"];\n const params: (string | number)[] = [];\n\n if (projectId !== undefined) {\n conditions.push(\"project_id = ?\");\n params.push(projectId);\n }\n\n const where = \"WHERE \" + conditions.join(\" AND \");\n\n const rows = db\n .prepare(`SELECT id, text FROM memory_chunks ${where} ORDER BY id`)\n .all(...params) as Array<{ id: string; text: string }>;\n\n if (rows.length === 0) {\n return { chunksEmbedded: 0, chunksSkipped: 0 };\n }\n\n const updateStmt = db.prepare(\n \"UPDATE memory_chunks SET embedding = ? WHERE id = ?\",\n );\n\n let embedded = 0;\n const total = rows.length;\n\n for (let i = 0; i < rows.length; i += batchSize) {\n const batch = rows.slice(i, i + batchSize);\n\n // Generate embeddings for the batch (async — must happen OUTSIDE transaction)\n const embeddings: Array<{ id: string; blob: Buffer }> = [];\n for (const row of batch) {\n const vec = await generateEmbedding(row.text);\n const blob = serializeEmbedding(vec);\n embeddings.push({ id: row.id, blob });\n }\n\n // Write the batch in a single transaction\n db.transaction(() => {\n for (const { id, blob } of embeddings) {\n updateStmt.run(blob, id);\n }\n })();\n\n embedded += embeddings.length;\n onProgress?.(embedded, total);\n }\n\n return { chunksEmbedded: embedded, chunksSkipped: 0 };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AA4CA,SAAgB,UACd,IACA,WACA,UACA,cACA,QACA,MACS;CACT,MAAM,UAAU,KAAK,UAAU,aAAa;CAG5C,IAAI;CACJ,IAAI;AACJ,KAAI;AACF,YAAU,aAAa,SAAS,OAAO;AACvC,SAAO,SAAS,QAAQ;SAClB;AAEN,SAAO;;CAGT,MAAM,OAAO,WAAW,QAAQ;CAChC,MAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ;CACtC,MAAM,OAAO,KAAK;AASlB,KANiB,GACd,QACC,kEACD,CACA,IAAI,WAAW,aAAa,EAEjB,SAAS,KAErB,QAAO;CAIT,MAAM,cAAc,GACjB,QACC,iEACD,CACA,IAAI,WAAW,aAAa;CAE/B,MAAM,YAAY,GAAG,QAAQ,sCAAsC;CACnE,MAAM,cAAc,GAAG,QACrB,8DACD;AAED,IAAG,kBAAkB;AACnB,OAAK,MAAM,OAAO,YAChB,WAAU,IAAI,IAAI,GAAG;AAEvB,cAAY,IAAI,WAAW,aAAa;GACxC,EAAE;CAGJ,MAAM,SAAS,cAAc,QAAQ;CAGrC,MAAM,cAAc,GAAG,QAAQ;;;IAG7B;CAEF,MAAM,YAAY,GAAG,QAAQ;;;IAG3B;CAEF,MAAM,aAAa,GAAG,QAAQ;;;;;;;;;IAS5B;CAEF,MAAM,YAAY,KAAK,KAAK;AAE5B,IAAG,kBAAkB;AACnB,OAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;GACtC,MAAM,QAAQ,OAAO;GACrB,MAAM,KAAK,QAAQ,WAAW,cAAc,GAAG,MAAM,WAAW,MAAM,QAAQ;AAC9E,eAAY,IACV,IACA,WACA,QACA,MACA,cACA,MAAM,WACN,MAAM,SACN,MAAM,MACN,MAAM,MACN,UACD;AACD,aAAU,IACR,MAAM,MACN,IACA,WACA,cACA,QACA,MACA,MAAM,WACN,MAAM,QACP;;AAEH,aAAW,IAAI,WAAW,cAAc,QAAQ,MAAM,MAAM,OAAO,KAAK;GACxE,EAAE;AAEJ,QAAO;;;;;;;;;;;;AAiBT,eAAsB,aACpB,IACA,WACA,UACA,gBACsB;CACtB,MAAM,SAAsB;EAC1B,gBAAgB;EAChB,eAAe;EACf,cAAc;EACf;CAED,MAAM,eAA2F,EAAE;CAGnG,MAAM,eAAe,KAAK,UAAU,YAAY;AAChD,KAAI,WAAW,aAAa,CAC1B,cAAa,KAAK;EAAE,SAAS;EAAc,UAAU;EAAU,QAAQ;EAAU,MAAM;EAAa,CAAC;CAIvG,MAAM,YAAY,KAAK,UAAU,SAAS;AAC1C,MAAK,MAAM,WAAW,YAAY,UAAU,EAAE;EAE5C,MAAM,OAAO,WADG,SAAS,UAAU,QAAQ,CACX;AAChC,eAAa,KAAK;GAAE;GAAS,UAAU;GAAU,QAAQ;GAAU;GAAM,CAAC;;CAI5E,MAAM,WAAW,KAAK,UAAU,QAAQ;AACxC,MAAK,MAAM,WAAW,YAAY,SAAS,CACzC,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAS,MAAM;EAAW,CAAC;CAKtF;EACE,MAAM,mBAAmB,GAAG,QAAQ;;;MAGlC;EACF,MAAM,iBAAiB,GAAG,QAAQ;;;MAGhC;EACF,MAAM,YAAY,KAAK,KAAK;AAC5B,OAAK,MAAM,WAAW,YAAY,SAAS,EAAE;GAE3C,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,OAAI,CAAC,KAAM;GAEX,MAAM,gBAAgB,GADN,SAAS,UAAU,QAAQ,CACV;GACjC,MAAM,KAAK,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;GACrD,MAAM,OAAO,WAAW,KAAK;AAC7B,MAAG,kBAAkB;AACnB,qBAAiB,IAAI,IAAI,WAAW,SAAS,WAAW,eAAe,GAAG,GAAG,MAAM,MAAM,UAAU;AACnG,mBAAe,IAAI,MAAM,IAAI,WAAW,eAAe,SAAS,WAAW,GAAG,EAAE;KAChF,EAAE;;;AAKR,KAAI,CAAC,6BAA6B,SAAS,CACzC,MAAK,MAAM,WAAW,iBAAiB,SAAS,CAC9C,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAW,MAAM;EAAS,CAAC;AAKxF,KAAI,kBAAkB,mBAAmB,UAAU;AACjD,OAAK,MAAM,WAAW,YAAY,eAAe,CAC/C,cAAa,KAAK;GAAE;GAAS,UAAU;GAAgB,QAAQ;GAAS,MAAM;GAAW,CAAC;EAI5F;GACE,MAAM,YAAY,KAAK,KAAK;GAC5B,MAAM,oBAAoB,GAAG,QAAQ;;;QAGnC;GACF,MAAM,kBAAkB,GAAG,QAAQ;;;QAGjC;AACF,QAAK,MAAM,WAAW,YAAY,eAAe,EAAE;IAEjD,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,QAAI,CAAC,KAAM;IAEX,MAAM,gBAAgB,GADN,SAAS,gBAAgB,QAAQ,CAChB;IACjC,MAAM,KAAK,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;IACrD,MAAM,OAAO,WAAW,KAAK;AAC7B,OAAG,kBAAkB;AACnB,uBAAkB,IAAI,IAAI,WAAW,SAAS,WAAW,eAAe,GAAG,GAAG,MAAM,MAAM,UAAU;AACpG,qBAAgB,IAAI,MAAM,IAAI,WAAW,eAAe,SAAS,WAAW,GAAG,EAAE;MACjF,EAAE;;;AAKR,MAAI,eAAe,SAAS,SAAS,EAAE;GACrC,MAAM,mBAAmB,eAAe,MAAM,GAAG,GAAiB;GAClE,MAAM,kBAAkB,KAAK,kBAAkB,SAAS;GAExD,MAAM,iBAAiB,KAAK,kBAAkB,YAAY;AAC1D,OAAI,WAAW,eAAe,CAC5B,cAAa,KAAK;IAChB,SAAS;IACT,UAAU;IACV,QAAQ;IACR,MAAM;IACP,CAAC;AAGJ,QAAK,MAAM,WAAW,YAAY,gBAAgB,EAAE;IAElD,MAAM,OAAO,WADG,SAAS,kBAAkB,QAAQ,CACnB;AAChC,iBAAa,KAAK;KAAE;KAAS,UAAU;KAAkB,QAAQ;KAAU;KAAM,CAAC;;;;AAMxF,OAAM,kBAAkB;CAExB,IAAI,kBAAkB;AAEtB,MAAK,MAAM,EAAE,SAAS,UAAU,QAAQ,UAAU,cAAc;AAC9D,MAAI,mBAAmB,mBAAmB;AACxC,SAAM,kBAAkB;AACxB,qBAAkB;;AAEpB;EAEA,MAAM,UAAU,SAAS,UAAU,QAAQ;AAG3C,MAFgB,UAAU,IAAI,WAAW,UAAU,SAAS,QAAQ,KAAK,EAE5D;GACX,MAAM,QAAQ,GACX,QACC,4EACD,CACA,IAAI,WAAW,QAAQ;AAE1B,UAAO;AACP,UAAO,iBAAiB,MAAM;QAE9B,QAAO;;CAKX,MAAM,4BAAY,IAAI,KAAa;AACnC,MAAK,MAAM,EAAE,SAAS,cAAc,aAClC,WAAU,IAAI,SAAS,UAAU,QAAQ,CAAC;CAG5C,MAAM,eAAe,GAClB,QAAQ,+DAA+D,CACvE,IAAI,UAAU;CAEjB,MAAM,aAAuB,EAAE;AAC/B,MAAK,MAAM,OAAO,cAAc;EAC9B,MAAM,WAAW,IAAI,KAAK,SAAS,UAAU,GACzC,IAAI,KAAK,MAAM,GAAG,GAAkB,GACpC,IAAI;AACR,MAAI,CAAC,UAAU,IAAI,SAAS,CAC1B,YAAW,KAAK,IAAI,KAAK;;AAI7B,KAAI,WAAW,SAAS,GAAG;EACzB,MAAM,kBAAkB,GAAG,QAAQ,sCAAsC;EACzE,MAAM,eAAe,GAAG,QACtB,8DACD;EACD,MAAM,aAAa,GAAG,QACpB,6DACD;AAED,KAAG,kBAAkB;AACnB,QAAK,MAAM,aAAa,YAAY;IAClC,MAAM,WAAW,GACd,QAAQ,iEAAiE,CACzE,IAAI,WAAW,UAAU;AAC5B,SAAK,MAAM,EAAE,QAAQ,SACnB,iBAAgB,IAAI,GAAG;AAEzB,iBAAa,IAAI,WAAW,UAAU;AACtC,eAAW,IAAI,WAAW,UAAU;;IAEtC,EAAE;;AAGN,QAAO;;;;;;;;AAaT,eAAsB,SACpB,IACA,YACoD;CACpD,MAAM,WAAW,WACd,QAAQ,+EAA+E,CACvF,KAAK;CAER,MAAM,SAAsB;EAC1B,gBAAgB;EAChB,eAAe;EACf,cAAc;EACf;AAED,MAAK,MAAM,WAAW,UAAU;AAC9B,QAAM,kBAAkB;EACxB,MAAM,IAAI,MAAM,aAAa,IAAI,QAAQ,IAAI,QAAQ,WAAW,QAAQ,iBAAiB;AACzF,SAAO,kBAAkB,EAAE;AAC3B,SAAO,iBAAiB,EAAE;AAC1B,SAAO,gBAAgB,EAAE;;AAG3B,QAAO;EAAE,UAAU,SAAS;EAAQ,QAAQ;EAAQ;;;;;;;;;;;;;;AAmBtD,eAAsB,YACpB,IACA,WACA,YAAY,IACZ,YACsB;CAEtB,MAAM,EAAE,mBAAmB,uBAAuB,MAAM,OAAO;CAE/D,MAAM,aAAa,CAAC,oBAAoB;CACxC,MAAM,SAA8B,EAAE;AAEtC,KAAI,cAAc,QAAW;AAC3B,aAAW,KAAK,iBAAiB;AACjC,SAAO,KAAK,UAAU;;CAGxB,MAAM,QAAQ,WAAW,WAAW,KAAK,QAAQ;CAEjD,MAAM,OAAO,GACV,QAAQ,sCAAsC,MAAM,cAAc,CAClE,IAAI,GAAG,OAAO;AAEjB,KAAI,KAAK,WAAW,EAClB,QAAO;EAAE,gBAAgB;EAAG,eAAe;EAAG;CAGhD,MAAM,aAAa,GAAG,QACpB,sDACD;CAED,IAAI,WAAW;CACf,MAAM,QAAQ,KAAK;AAEnB,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,WAAW;EAC/C,MAAM,QAAQ,KAAK,MAAM,GAAG,IAAI,UAAU;EAG1C,MAAM,aAAkD,EAAE;AAC1D,OAAK,MAAM,OAAO,OAAO;GAEvB,MAAM,OAAO,mBADD,MAAM,kBAAkB,IAAI,KAAK,CACT;AACpC,cAAW,KAAK;IAAE,IAAI,IAAI;IAAI;IAAM,CAAC;;AAIvC,KAAG,kBAAkB;AACnB,QAAK,MAAM,EAAE,IAAI,UAAU,WACzB,YAAW,IAAI,MAAM,GAAG;IAE1B,EAAE;AAEJ,cAAY,WAAW;AACvB,eAAa,UAAU,MAAM;;AAG/B,QAAO;EAAE,gBAAgB;EAAU,eAAe;EAAG"}
@@ -119,11 +119,14 @@ async function toolMemorySearch(registryDb, federation, params, searchDefaults)
119
119
  text: `No results found for query: "${params.query}" (mode: ${mode})`
120
120
  }] };
121
121
  const rerankLabel = shouldRerank ? " +rerank" : "";
122
+ const useCompact = params.format === "compact";
122
123
  const formatted = withSlugs.map((r, i) => {
123
- const header = `[${i + 1}] ${r.projectSlug ?? `project:${r.projectId}`} — ${r.path} (lines ${r.startLine}-${r.endLine}) score=${r.score.toFixed(4)} tier=${r.tier} source=${r.source}`;
124
+ const slug = r.projectSlug ?? `project:${r.projectId}`;
125
+ if (useCompact) return `[${i + 1}] ${slug} — ${r.path} L${r.startLine}-${r.endLine} score=${r.score.toFixed(3)}`;
126
+ const header = `[${i + 1}] ${slug} — ${r.path} (lines ${r.startLine}-${r.endLine}) score=${r.score.toFixed(4)} tier=${r.tier} source=${r.source}`;
124
127
  const raw = r.snippet.trim();
125
128
  return `${header}\n${raw.length > snippetLength ? raw.slice(0, snippetLength) + "..." : raw}`;
126
- }).join("\n\n---\n\n");
129
+ }).join(useCompact ? "\n" : "\n\n---\n\n");
127
130
  try {
128
131
  const { saveQueryResult } = await import("./query-feedback-CQSumXDy.mjs").then((n) => n.t);
129
132
  saveQueryResult({
@@ -1637,4 +1640,4 @@ var tools_exports = /* @__PURE__ */ __exportAll({
1637
1640
 
1638
1641
  //#endregion
1639
1642
  export { toolSessionList as a, toolProjectHealth as c, toolProjectTodo as d, toolMemoryGet as f, toolRegistrySearch as i, toolProjectInfo as l, toolMemoryTaxonomy as n, toolSessionRoute as o, toolMemorySearch as p, toolMemoryWakeup as r, toolProjectDetect as s, tools_exports as t, toolProjectList as u };
1640
- //# sourceMappingURL=tools-8t7BQrm9.mjs.map
1643
+ //# sourceMappingURL=tools-gMHdjmHC.mjs.map