@tekmidian/pai 0.5.6 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/ARCHITECTURE.md +72 -1
  2. package/README.md +107 -3
  3. package/dist/{auto-route-BG6I_4B1.mjs → auto-route-C-DrW6BL.mjs} +3 -3
  4. package/dist/{auto-route-BG6I_4B1.mjs.map → auto-route-C-DrW6BL.mjs.map} +1 -1
  5. package/dist/cli/index.mjs +1897 -1569
  6. package/dist/cli/index.mjs.map +1 -1
  7. package/dist/clusters-JIDQW65f.mjs +201 -0
  8. package/dist/clusters-JIDQW65f.mjs.map +1 -0
  9. package/dist/{config-Cf92lGX_.mjs → config-BuhHWyOK.mjs} +21 -6
  10. package/dist/config-BuhHWyOK.mjs.map +1 -0
  11. package/dist/daemon/index.mjs +12 -9
  12. package/dist/daemon/index.mjs.map +1 -1
  13. package/dist/{daemon-D9evGlgR.mjs → daemon-D3hYb5_C.mjs} +670 -219
  14. package/dist/daemon-D3hYb5_C.mjs.map +1 -0
  15. package/dist/daemon-mcp/index.mjs +4597 -4
  16. package/dist/daemon-mcp/index.mjs.map +1 -1
  17. package/dist/{db-4lSqLFb8.mjs → db-BtuN768f.mjs} +9 -2
  18. package/dist/db-BtuN768f.mjs.map +1 -0
  19. package/dist/db-DdUperSl.mjs +110 -0
  20. package/dist/db-DdUperSl.mjs.map +1 -0
  21. package/dist/{detect-BU3Nx_2L.mjs → detect-CdaA48EI.mjs} +1 -1
  22. package/dist/{detect-BU3Nx_2L.mjs.map → detect-CdaA48EI.mjs.map} +1 -1
  23. package/dist/{detector-Bp-2SM3x.mjs → detector-jGBuYQJM.mjs} +2 -2
  24. package/dist/{detector-Bp-2SM3x.mjs.map → detector-jGBuYQJM.mjs.map} +1 -1
  25. package/dist/{factory-Bzcy70G9.mjs → factory-Ygqe_bVZ.mjs} +7 -5
  26. package/dist/{factory-Bzcy70G9.mjs.map → factory-Ygqe_bVZ.mjs.map} +1 -1
  27. package/dist/helpers-BEST-4Gx.mjs +420 -0
  28. package/dist/helpers-BEST-4Gx.mjs.map +1 -0
  29. package/dist/hooks/capture-all-events.mjs +19 -4
  30. package/dist/hooks/capture-all-events.mjs.map +4 -4
  31. package/dist/hooks/capture-session-summary.mjs +38 -0
  32. package/dist/hooks/capture-session-summary.mjs.map +3 -3
  33. package/dist/hooks/cleanup-session-files.mjs +6 -12
  34. package/dist/hooks/cleanup-session-files.mjs.map +4 -4
  35. package/dist/hooks/context-compression-hook.mjs +105 -111
  36. package/dist/hooks/context-compression-hook.mjs.map +4 -4
  37. package/dist/hooks/initialize-session.mjs +26 -17
  38. package/dist/hooks/initialize-session.mjs.map +4 -4
  39. package/dist/hooks/inject-observations.mjs +220 -0
  40. package/dist/hooks/inject-observations.mjs.map +7 -0
  41. package/dist/hooks/load-core-context.mjs +18 -2
  42. package/dist/hooks/load-core-context.mjs.map +4 -4
  43. package/dist/hooks/load-project-context.mjs +102 -97
  44. package/dist/hooks/load-project-context.mjs.map +4 -4
  45. package/dist/hooks/observe.mjs +354 -0
  46. package/dist/hooks/observe.mjs.map +7 -0
  47. package/dist/hooks/stop-hook.mjs +174 -90
  48. package/dist/hooks/stop-hook.mjs.map +4 -4
  49. package/dist/hooks/sync-todo-to-md.mjs +31 -33
  50. package/dist/hooks/sync-todo-to-md.mjs.map +4 -4
  51. package/dist/index.d.mts +32 -9
  52. package/dist/index.d.mts.map +1 -1
  53. package/dist/index.mjs +6 -9
  54. package/dist/indexer-D53l5d1U.mjs +1 -0
  55. package/dist/{indexer-backend-CIMXedqk.mjs → indexer-backend-jcJFsmB4.mjs} +37 -127
  56. package/dist/indexer-backend-jcJFsmB4.mjs.map +1 -0
  57. package/dist/{ipc-client-Bjg_a1dc.mjs → ipc-client-CoyUHPod.mjs} +2 -7
  58. package/dist/{ipc-client-Bjg_a1dc.mjs.map → ipc-client-CoyUHPod.mjs.map} +1 -1
  59. package/dist/latent-ideas-bTJo6Omd.mjs +191 -0
  60. package/dist/latent-ideas-bTJo6Omd.mjs.map +1 -0
  61. package/dist/neighborhood-BYYbEkUJ.mjs +135 -0
  62. package/dist/neighborhood-BYYbEkUJ.mjs.map +1 -0
  63. package/dist/note-context-BK24bX8Y.mjs +126 -0
  64. package/dist/note-context-BK24bX8Y.mjs.map +1 -0
  65. package/dist/postgres-CKf-EDtS.mjs +846 -0
  66. package/dist/postgres-CKf-EDtS.mjs.map +1 -0
  67. package/dist/{reranker-D7bRAHi6.mjs → reranker-CMNZcfVx.mjs} +1 -1
  68. package/dist/{reranker-D7bRAHi6.mjs.map → reranker-CMNZcfVx.mjs.map} +1 -1
  69. package/dist/{search-_oHfguA5.mjs → search-DC1qhkKn.mjs} +2 -58
  70. package/dist/search-DC1qhkKn.mjs.map +1 -0
  71. package/dist/{sqlite-WWBq7_2C.mjs → sqlite-l-s9xPjY.mjs} +160 -3
  72. package/dist/sqlite-l-s9xPjY.mjs.map +1 -0
  73. package/dist/state-C6_vqz7w.mjs +102 -0
  74. package/dist/state-C6_vqz7w.mjs.map +1 -0
  75. package/dist/stop-words-BaMEGVeY.mjs +326 -0
  76. package/dist/stop-words-BaMEGVeY.mjs.map +1 -0
  77. package/dist/{indexer-CMPOiY1r.mjs → sync-BOsnEj2-.mjs} +14 -216
  78. package/dist/sync-BOsnEj2-.mjs.map +1 -0
  79. package/dist/themes-BvYF0W8T.mjs +148 -0
  80. package/dist/themes-BvYF0W8T.mjs.map +1 -0
  81. package/dist/{tools-DV_lsiCc.mjs → tools-DcaJlYDN.mjs} +162 -273
  82. package/dist/tools-DcaJlYDN.mjs.map +1 -0
  83. package/dist/trace-CRx9lPuc.mjs +137 -0
  84. package/dist/trace-CRx9lPuc.mjs.map +1 -0
  85. package/dist/{vault-indexer-DXWs9pDn.mjs → vault-indexer-Bi2cRmn7.mjs} +174 -138
  86. package/dist/vault-indexer-Bi2cRmn7.mjs.map +1 -0
  87. package/dist/zettelkasten-cdajbnPr.mjs +708 -0
  88. package/dist/zettelkasten-cdajbnPr.mjs.map +1 -0
  89. package/package.json +1 -2
  90. package/src/hooks/ts/capture-all-events.ts +6 -0
  91. package/src/hooks/ts/lib/project-utils/index.ts +50 -0
  92. package/src/hooks/ts/lib/project-utils/notify.ts +75 -0
  93. package/src/hooks/ts/lib/project-utils/paths.ts +218 -0
  94. package/src/hooks/ts/lib/project-utils/session-notes.ts +363 -0
  95. package/src/hooks/ts/lib/project-utils/todo.ts +178 -0
  96. package/src/hooks/ts/lib/project-utils/tokens.ts +39 -0
  97. package/src/hooks/ts/lib/project-utils.ts +40 -999
  98. package/src/hooks/ts/post-tool-use/observe.ts +327 -0
  99. package/src/hooks/ts/pre-compact/context-compression-hook.ts +6 -0
  100. package/src/hooks/ts/session-end/capture-session-summary.ts +41 -0
  101. package/src/hooks/ts/session-start/initialize-session.ts +7 -1
  102. package/src/hooks/ts/session-start/inject-observations.ts +254 -0
  103. package/src/hooks/ts/session-start/load-core-context.ts +7 -0
  104. package/src/hooks/ts/session-start/load-project-context.ts +8 -1
  105. package/src/hooks/ts/stop/stop-hook.ts +28 -0
  106. package/templates/claude-md.template.md +7 -74
  107. package/templates/skills/user/.gitkeep +0 -0
  108. package/dist/chunker-CbnBe0s0.mjs +0 -191
  109. package/dist/chunker-CbnBe0s0.mjs.map +0 -1
  110. package/dist/config-Cf92lGX_.mjs.map +0 -1
  111. package/dist/daemon-D9evGlgR.mjs.map +0 -1
  112. package/dist/db-4lSqLFb8.mjs.map +0 -1
  113. package/dist/db-Dp8VXIMR.mjs +0 -212
  114. package/dist/db-Dp8VXIMR.mjs.map +0 -1
  115. package/dist/indexer-CMPOiY1r.mjs.map +0 -1
  116. package/dist/indexer-backend-CIMXedqk.mjs.map +0 -1
  117. package/dist/mcp/index.d.mts +0 -1
  118. package/dist/mcp/index.mjs +0 -500
  119. package/dist/mcp/index.mjs.map +0 -1
  120. package/dist/postgres-FXrHDPcE.mjs +0 -358
  121. package/dist/postgres-FXrHDPcE.mjs.map +0 -1
  122. package/dist/schemas-BFIgGntb.mjs +0 -3405
  123. package/dist/schemas-BFIgGntb.mjs.map +0 -1
  124. package/dist/search-_oHfguA5.mjs.map +0 -1
  125. package/dist/sqlite-WWBq7_2C.mjs.map +0 -1
  126. package/dist/tools-DV_lsiCc.mjs.map +0 -1
  127. package/dist/vault-indexer-DXWs9pDn.mjs.map +0 -1
  128. package/dist/zettelkasten-e-a4rW_6.mjs +0 -901
  129. package/dist/zettelkasten-e-a4rW_6.mjs.map +0 -1
  130. package/templates/README.md +0 -181
  131. package/templates/skills/createskill-skill.template.md +0 -78
  132. package/templates/skills/history-system.template.md +0 -371
  133. package/templates/skills/hook-system.template.md +0 -913
  134. package/templates/skills/sessions-skill.template.md +0 -102
  135. package/templates/skills/skill-system.template.md +0 -214
  136. package/templates/skills/terminal-tabs.template.md +0 -120
  137. package/templates/templates.md +0 -20
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sync-BOsnEj2-.mjs","names":[],"sources":["../src/memory/indexer/sync.ts"],"sourcesContent":["/**\n * Synchronous (SQLite) indexer for the PAI federation memory engine.\n *\n * Scans project memory/ and Notes/ directories, chunks markdown files, and\n * inserts the resulting chunks into federation.db for BM25 search.\n *\n * Change detection: files whose SHA-256 hash has not changed since the last\n * index run are skipped, keeping incremental re-indexing fast.\n *\n * Uses raw better-sqlite3 Database directly for maximum SQLite performance\n * (synchronous transactions, no serialisation overhead).\n */\n\nimport { readFileSync, statSync, existsSync } from \"node:fs\";\nimport { join, relative, basename } from \"node:path\";\nimport type { Database } from \"better-sqlite3\";\nimport { chunkMarkdown } from \"../chunker.js\";\nimport {\n sha256File,\n chunkId,\n detectTier,\n walkMdFiles,\n walkContentFiles,\n isPathTooBroadForContentScan,\n parseSessionTitleChunk,\n yieldToEventLoop,\n INDEX_YIELD_EVERY,\n} from \"./helpers.js\";\nimport type { IndexResult, EmbedResult } from \"./types.js\";\n\nexport type { IndexResult, EmbedResult };\n\n// Re-export detectTier for backward-compatibility (consumers import it from indexer.js)\nexport { detectTier };\n\n// ---------------------------------------------------------------------------\n// Single-file indexing\n// ---------------------------------------------------------------------------\n\n/**\n * Index a single file into the federation database.\n *\n * @returns true if the file was re-indexed (changed or new), false if skipped.\n */\nexport function indexFile(\n db: Database,\n projectId: number,\n rootPath: string,\n relativePath: string,\n source: string,\n tier: string,\n): boolean {\n const absPath = join(rootPath, relativePath);\n\n // Read file content\n let content: string;\n let stat: ReturnType<typeof statSync>;\n try {\n content = readFileSync(absPath, \"utf8\");\n stat = statSync(absPath);\n } catch {\n // File unreadable or missing — skip silently\n return false;\n }\n\n const hash = sha256File(content);\n const mtime = Math.floor(stat.mtimeMs);\n const size = stat.size;\n\n // Check if the file has changed since last index\n const existing = db\n .prepare(\n \"SELECT hash FROM memory_files WHERE project_id = ? AND path = ?\",\n )\n .get(projectId, relativePath) as { hash: string } | undefined;\n\n if (existing?.hash === hash) {\n // Unchanged — skip\n return false;\n }\n\n // Delete old chunks for this file from both tables\n const oldChunkIds = db\n .prepare(\n \"SELECT id FROM memory_chunks WHERE project_id = ? AND path = ?\",\n )\n .all(projectId, relativePath) as Array<{ id: string }>;\n\n const deleteFts = db.prepare(\"DELETE FROM memory_fts WHERE id = ?\");\n const deleteChunk = db.prepare(\n \"DELETE FROM memory_chunks WHERE project_id = ? AND path = ?\",\n );\n\n db.transaction(() => {\n for (const row of oldChunkIds) {\n deleteFts.run(row.id);\n }\n deleteChunk.run(projectId, relativePath);\n })();\n\n // Chunk the new content\n const chunks = chunkMarkdown(content);\n\n // Insert new chunks into memory_chunks and memory_fts\n const insertChunk = db.prepare(`\n INSERT INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n\n const insertFts = db.prepare(`\n INSERT INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n\n const upsertFile = db.prepare(`\n INSERT INTO memory_files (project_id, path, source, tier, hash, mtime, size)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n ON CONFLICT(project_id, path) DO UPDATE SET\n source = excluded.source,\n tier = excluded.tier,\n hash = excluded.hash,\n mtime = excluded.mtime,\n size = excluded.size\n `);\n\n const updatedAt = Date.now();\n\n db.transaction(() => {\n for (let i = 0; i < chunks.length; i++) {\n const chunk = chunks[i]!;\n const id = chunkId(projectId, relativePath, i, chunk.startLine, chunk.endLine);\n insertChunk.run(\n id,\n projectId,\n source,\n tier,\n relativePath,\n chunk.startLine,\n chunk.endLine,\n chunk.hash,\n chunk.text,\n updatedAt,\n );\n insertFts.run(\n chunk.text,\n id,\n projectId,\n relativePath,\n source,\n tier,\n chunk.startLine,\n chunk.endLine,\n );\n }\n upsertFile.run(projectId, relativePath, source, tier, hash, mtime, size);\n })();\n\n return true;\n}\n\n// ---------------------------------------------------------------------------\n// Project-level indexing\n// ---------------------------------------------------------------------------\n\n/**\n * Index all memory, Notes, and content files for a single registered project.\n *\n * Scans:\n * - {rootPath}/MEMORY.md → source='memory', tier='evergreen'\n * - {rootPath}/memory/ → source='memory', tier from detectTier()\n * - {rootPath}/Notes/ → source='notes', tier='session'\n * - {rootPath}/**\\/*.md → source='content', tier='topic' (all other .md files, recursive)\n * - {claudeNotesDir}/ → source='notes', tier='session' (if set and different)\n */\nexport async function indexProject(\n db: Database,\n projectId: number,\n rootPath: string,\n claudeNotesDir?: string | null,\n): Promise<IndexResult> {\n const result: IndexResult = {\n filesProcessed: 0,\n chunksCreated: 0,\n filesSkipped: 0,\n };\n\n const filesToIndex: Array<{ absPath: string; rootBase: string; source: string; tier: string }> = [];\n\n // Root-level MEMORY.md\n const rootMemoryMd = join(rootPath, \"MEMORY.md\");\n if (existsSync(rootMemoryMd)) {\n filesToIndex.push({ absPath: rootMemoryMd, rootBase: rootPath, source: \"memory\", tier: \"evergreen\" });\n }\n\n // memory/ directory\n const memoryDir = join(rootPath, \"memory\");\n for (const absPath of walkMdFiles(memoryDir)) {\n const relPath = relative(rootPath, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"memory\", tier });\n }\n\n // {rootPath}/Notes/ directory\n const notesDir = join(rootPath, \"Notes\");\n for (const absPath of walkMdFiles(notesDir)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic session-title chunks for Notes files with the standard filename format:\n // \"NNNN - YYYY-MM-DD - Descriptive Title.md\"\n {\n const titleInsertChunk = db.prepare(`\n INSERT OR IGNORE INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const titleInsertFts = db.prepare(`\n INSERT OR IGNORE INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const updatedAt = Date.now();\n for (const absPath of walkMdFiles(notesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(rootPath, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n db.transaction(() => {\n titleInsertChunk.run(id, projectId, \"notes\", \"session\", syntheticPath, 0, 0, hash, text, updatedAt);\n titleInsertFts.run(text, id, projectId, syntheticPath, \"notes\", \"session\", 0, 0);\n })();\n }\n }\n\n // {rootPath}/**/*.md — all other markdown content\n if (!isPathTooBroadForContentScan(rootPath)) {\n for (const absPath of walkContentFiles(rootPath)) {\n filesToIndex.push({ absPath, rootBase: rootPath, source: \"content\", tier: \"topic\" });\n }\n }\n\n // Claude Code session notes directory (~/.claude/projects/{encoded}/Notes/)\n if (claudeNotesDir && claudeNotesDir !== notesDir) {\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n filesToIndex.push({ absPath, rootBase: claudeNotesDir, source: \"notes\", tier: \"session\" });\n }\n\n // Synthetic title chunks for claude notes dir\n {\n const updatedAt = Date.now();\n const titleInsertChunk2 = db.prepare(`\n INSERT OR IGNORE INTO memory_chunks (id, project_id, source, tier, path, start_line, end_line, hash, text, updated_at)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n `);\n const titleInsertFts2 = db.prepare(`\n INSERT OR IGNORE INTO memory_fts (text, id, project_id, path, source, tier, start_line, end_line)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n `);\n for (const absPath of walkMdFiles(claudeNotesDir)) {\n const fileName = basename(absPath);\n const text = parseSessionTitleChunk(fileName);\n if (!text) continue;\n const relPath = relative(claudeNotesDir, absPath);\n const syntheticPath = `${relPath}::title`;\n const id = chunkId(projectId, syntheticPath, 0, 0, 0);\n const hash = sha256File(text);\n db.transaction(() => {\n titleInsertChunk2.run(id, projectId, \"notes\", \"session\", syntheticPath, 0, 0, hash, text, updatedAt);\n titleInsertFts2.run(text, id, projectId, syntheticPath, \"notes\", \"session\", 0, 0);\n })();\n }\n }\n\n // Derive the sibling memory/ directory: .../Notes/ → .../memory/\n if (claudeNotesDir.endsWith(\"/Notes\")) {\n const claudeProjectDir = claudeNotesDir.slice(0, -\"/Notes\".length);\n const claudeMemoryDir = join(claudeProjectDir, \"memory\");\n\n const claudeMemoryMd = join(claudeProjectDir, \"MEMORY.md\");\n if (existsSync(claudeMemoryMd)) {\n filesToIndex.push({\n absPath: claudeMemoryMd,\n rootBase: claudeProjectDir,\n source: \"memory\",\n tier: \"evergreen\",\n });\n }\n\n for (const absPath of walkMdFiles(claudeMemoryDir)) {\n const relPath = relative(claudeProjectDir, absPath);\n const tier = detectTier(relPath);\n filesToIndex.push({ absPath, rootBase: claudeProjectDir, source: \"memory\", tier });\n }\n }\n }\n\n // Yield after collection phase before processing\n await yieldToEventLoop();\n\n let filesSinceYield = 0;\n\n for (const { absPath, rootBase, source, tier } of filesToIndex) {\n if (filesSinceYield >= INDEX_YIELD_EVERY) {\n await yieldToEventLoop();\n filesSinceYield = 0;\n }\n filesSinceYield++;\n\n const relPath = relative(rootBase, absPath);\n const changed = indexFile(db, projectId, rootBase, relPath, source, tier);\n\n if (changed) {\n const count = db\n .prepare(\n \"SELECT COUNT(*) as n FROM memory_chunks WHERE project_id = ? AND path = ?\",\n )\n .get(projectId, relPath) as { n: number };\n\n result.filesProcessed++;\n result.chunksCreated += count.n;\n } else {\n result.filesSkipped++;\n }\n }\n\n // Prune stale paths: remove DB entries for files that no longer exist on disk.\n const livePaths = new Set<string>();\n for (const { absPath, rootBase } of filesToIndex) {\n livePaths.add(relative(rootBase, absPath));\n }\n\n const dbChunkPaths = db\n .prepare(\"SELECT DISTINCT path FROM memory_chunks WHERE project_id = ?\")\n .all(projectId) as Array<{ path: string }>;\n\n const stalePaths: string[] = [];\n for (const row of dbChunkPaths) {\n const basePath = row.path.endsWith(\"::title\")\n ? row.path.slice(0, -\"::title\".length)\n : row.path;\n if (!livePaths.has(basePath)) {\n stalePaths.push(row.path);\n }\n }\n\n if (stalePaths.length > 0) {\n const deleteChunksFts = db.prepare(\"DELETE FROM memory_fts WHERE id = ?\");\n const deleteChunks = db.prepare(\n \"DELETE FROM memory_chunks WHERE project_id = ? AND path = ?\",\n );\n const deleteFile = db.prepare(\n \"DELETE FROM memory_files WHERE project_id = ? AND path = ?\",\n );\n\n db.transaction(() => {\n for (const stalePath of stalePaths) {\n const chunkIds = db\n .prepare(\"SELECT id FROM memory_chunks WHERE project_id = ? AND path = ?\")\n .all(projectId, stalePath) as Array<{ id: string }>;\n for (const { id } of chunkIds) {\n deleteChunksFts.run(id);\n }\n deleteChunks.run(projectId, stalePath);\n deleteFile.run(projectId, stalePath);\n }\n })();\n }\n\n return result;\n}\n\n// ---------------------------------------------------------------------------\n// Global indexing (all registered projects)\n// ---------------------------------------------------------------------------\n\n/**\n * Index all active projects registered in the registry DB.\n *\n * Async: yields to the event loop between each project so that the daemon's\n * Unix socket server can process IPC requests (e.g. status) while indexing.\n */\nexport async function indexAll(\n db: Database,\n registryDb: Database,\n): Promise<{ projects: number; result: IndexResult }> {\n const projects = registryDb\n .prepare(\"SELECT id, root_path, claude_notes_dir FROM projects WHERE status = 'active'\")\n .all() as Array<{ id: number; root_path: string; claude_notes_dir: string | null }>;\n\n const totals: IndexResult = {\n filesProcessed: 0,\n chunksCreated: 0,\n filesSkipped: 0,\n };\n\n for (const project of projects) {\n await yieldToEventLoop();\n const r = await indexProject(db, project.id, project.root_path, project.claude_notes_dir);\n totals.filesProcessed += r.filesProcessed;\n totals.chunksCreated += r.chunksCreated;\n totals.filesSkipped += r.filesSkipped;\n }\n\n return { projects: projects.length, result: totals };\n}\n\n// ---------------------------------------------------------------------------\n// Embedding generation\n// ---------------------------------------------------------------------------\n\n/**\n * Generate and store embeddings for chunks that do not yet have one.\n *\n * Because better-sqlite3 is synchronous but the embedding pipeline is async,\n * we fetch all unembedded chunk texts first, generate embeddings in batches,\n * and then write them back in a transaction.\n *\n * @param db Open federation database.\n * @param projectId Optional — restrict to a specific project.\n * @param batchSize Number of chunks to embed per round. Default 50.\n * @param onProgress Optional callback called after each batch with running totals.\n */\nexport async function embedChunks(\n db: Database,\n projectId?: number,\n batchSize = 50,\n onProgress?: (embedded: number, total: number) => void,\n): Promise<EmbedResult> {\n // Dynamic import — keeps the heavy ML runtime out of the module load path\n const { generateEmbedding, serializeEmbedding } = await import(\"../embeddings.js\");\n\n const conditions = [\"embedding IS NULL\"];\n const params: (string | number)[] = [];\n\n if (projectId !== undefined) {\n conditions.push(\"project_id = ?\");\n params.push(projectId);\n }\n\n const where = \"WHERE \" + conditions.join(\" AND \");\n\n const rows = db\n .prepare(`SELECT id, text FROM memory_chunks ${where} ORDER BY id`)\n .all(...params) as Array<{ id: string; text: string }>;\n\n if (rows.length === 0) {\n return { chunksEmbedded: 0, chunksSkipped: 0 };\n }\n\n const updateStmt = db.prepare(\n \"UPDATE memory_chunks SET embedding = ? WHERE id = ?\",\n );\n\n let embedded = 0;\n const total = rows.length;\n\n for (let i = 0; i < rows.length; i += batchSize) {\n const batch = rows.slice(i, i + batchSize);\n\n // Generate embeddings for the batch (async — must happen OUTSIDE transaction)\n const embeddings: Array<{ id: string; blob: Buffer }> = [];\n for (const row of batch) {\n const vec = await generateEmbedding(row.text);\n const blob = serializeEmbedding(vec);\n embeddings.push({ id: row.id, blob });\n }\n\n // Write the batch in a single transaction\n db.transaction(() => {\n for (const { id, blob } of embeddings) {\n updateStmt.run(blob, id);\n }\n })();\n\n embedded += embeddings.length;\n onProgress?.(embedded, total);\n }\n\n return { chunksEmbedded: embedded, chunksSkipped: 0 };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AA4CA,SAAgB,UACd,IACA,WACA,UACA,cACA,QACA,MACS;CACT,MAAM,UAAU,KAAK,UAAU,aAAa;CAG5C,IAAI;CACJ,IAAI;AACJ,KAAI;AACF,YAAU,aAAa,SAAS,OAAO;AACvC,SAAO,SAAS,QAAQ;SAClB;AAEN,SAAO;;CAGT,MAAM,OAAO,WAAW,QAAQ;CAChC,MAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ;CACtC,MAAM,OAAO,KAAK;AASlB,KANiB,GACd,QACC,kEACD,CACA,IAAI,WAAW,aAAa,EAEjB,SAAS,KAErB,QAAO;CAIT,MAAM,cAAc,GACjB,QACC,iEACD,CACA,IAAI,WAAW,aAAa;CAE/B,MAAM,YAAY,GAAG,QAAQ,sCAAsC;CACnE,MAAM,cAAc,GAAG,QACrB,8DACD;AAED,IAAG,kBAAkB;AACnB,OAAK,MAAM,OAAO,YAChB,WAAU,IAAI,IAAI,GAAG;AAEvB,cAAY,IAAI,WAAW,aAAa;GACxC,EAAE;CAGJ,MAAM,SAAS,cAAc,QAAQ;CAGrC,MAAM,cAAc,GAAG,QAAQ;;;IAG7B;CAEF,MAAM,YAAY,GAAG,QAAQ;;;IAG3B;CAEF,MAAM,aAAa,GAAG,QAAQ;;;;;;;;;IAS5B;CAEF,MAAM,YAAY,KAAK,KAAK;AAE5B,IAAG,kBAAkB;AACnB,OAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;GACtC,MAAM,QAAQ,OAAO;GACrB,MAAM,KAAK,QAAQ,WAAW,cAAc,GAAG,MAAM,WAAW,MAAM,QAAQ;AAC9E,eAAY,IACV,IACA,WACA,QACA,MACA,cACA,MAAM,WACN,MAAM,SACN,MAAM,MACN,MAAM,MACN,UACD;AACD,aAAU,IACR,MAAM,MACN,IACA,WACA,cACA,QACA,MACA,MAAM,WACN,MAAM,QACP;;AAEH,aAAW,IAAI,WAAW,cAAc,QAAQ,MAAM,MAAM,OAAO,KAAK;GACxE,EAAE;AAEJ,QAAO;;;;;;;;;;;;AAiBT,eAAsB,aACpB,IACA,WACA,UACA,gBACsB;CACtB,MAAM,SAAsB;EAC1B,gBAAgB;EAChB,eAAe;EACf,cAAc;EACf;CAED,MAAM,eAA2F,EAAE;CAGnG,MAAM,eAAe,KAAK,UAAU,YAAY;AAChD,KAAI,WAAW,aAAa,CAC1B,cAAa,KAAK;EAAE,SAAS;EAAc,UAAU;EAAU,QAAQ;EAAU,MAAM;EAAa,CAAC;CAIvG,MAAM,YAAY,KAAK,UAAU,SAAS;AAC1C,MAAK,MAAM,WAAW,YAAY,UAAU,EAAE;EAE5C,MAAM,OAAO,WADG,SAAS,UAAU,QAAQ,CACX;AAChC,eAAa,KAAK;GAAE;GAAS,UAAU;GAAU,QAAQ;GAAU;GAAM,CAAC;;CAI5E,MAAM,WAAW,KAAK,UAAU,QAAQ;AACxC,MAAK,MAAM,WAAW,YAAY,SAAS,CACzC,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAS,MAAM;EAAW,CAAC;CAKtF;EACE,MAAM,mBAAmB,GAAG,QAAQ;;;MAGlC;EACF,MAAM,iBAAiB,GAAG,QAAQ;;;MAGhC;EACF,MAAM,YAAY,KAAK,KAAK;AAC5B,OAAK,MAAM,WAAW,YAAY,SAAS,EAAE;GAE3C,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,OAAI,CAAC,KAAM;GAEX,MAAM,gBAAgB,GADN,SAAS,UAAU,QAAQ,CACV;GACjC,MAAM,KAAK,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;GACrD,MAAM,OAAO,WAAW,KAAK;AAC7B,MAAG,kBAAkB;AACnB,qBAAiB,IAAI,IAAI,WAAW,SAAS,WAAW,eAAe,GAAG,GAAG,MAAM,MAAM,UAAU;AACnG,mBAAe,IAAI,MAAM,IAAI,WAAW,eAAe,SAAS,WAAW,GAAG,EAAE;KAChF,EAAE;;;AAKR,KAAI,CAAC,6BAA6B,SAAS,CACzC,MAAK,MAAM,WAAW,iBAAiB,SAAS,CAC9C,cAAa,KAAK;EAAE;EAAS,UAAU;EAAU,QAAQ;EAAW,MAAM;EAAS,CAAC;AAKxF,KAAI,kBAAkB,mBAAmB,UAAU;AACjD,OAAK,MAAM,WAAW,YAAY,eAAe,CAC/C,cAAa,KAAK;GAAE;GAAS,UAAU;GAAgB,QAAQ;GAAS,MAAM;GAAW,CAAC;EAI5F;GACE,MAAM,YAAY,KAAK,KAAK;GAC5B,MAAM,oBAAoB,GAAG,QAAQ;;;QAGnC;GACF,MAAM,kBAAkB,GAAG,QAAQ;;;QAGjC;AACF,QAAK,MAAM,WAAW,YAAY,eAAe,EAAE;IAEjD,MAAM,OAAO,uBADI,SAAS,QAAQ,CACW;AAC7C,QAAI,CAAC,KAAM;IAEX,MAAM,gBAAgB,GADN,SAAS,gBAAgB,QAAQ,CAChB;IACjC,MAAM,KAAK,QAAQ,WAAW,eAAe,GAAG,GAAG,EAAE;IACrD,MAAM,OAAO,WAAW,KAAK;AAC7B,OAAG,kBAAkB;AACnB,uBAAkB,IAAI,IAAI,WAAW,SAAS,WAAW,eAAe,GAAG,GAAG,MAAM,MAAM,UAAU;AACpG,qBAAgB,IAAI,MAAM,IAAI,WAAW,eAAe,SAAS,WAAW,GAAG,EAAE;MACjF,EAAE;;;AAKR,MAAI,eAAe,SAAS,SAAS,EAAE;GACrC,MAAM,mBAAmB,eAAe,MAAM,GAAG,GAAiB;GAClE,MAAM,kBAAkB,KAAK,kBAAkB,SAAS;GAExD,MAAM,iBAAiB,KAAK,kBAAkB,YAAY;AAC1D,OAAI,WAAW,eAAe,CAC5B,cAAa,KAAK;IAChB,SAAS;IACT,UAAU;IACV,QAAQ;IACR,MAAM;IACP,CAAC;AAGJ,QAAK,MAAM,WAAW,YAAY,gBAAgB,EAAE;IAElD,MAAM,OAAO,WADG,SAAS,kBAAkB,QAAQ,CACnB;AAChC,iBAAa,KAAK;KAAE;KAAS,UAAU;KAAkB,QAAQ;KAAU;KAAM,CAAC;;;;AAMxF,OAAM,kBAAkB;CAExB,IAAI,kBAAkB;AAEtB,MAAK,MAAM,EAAE,SAAS,UAAU,QAAQ,UAAU,cAAc;AAC9D,MAAI,mBAAmB,mBAAmB;AACxC,SAAM,kBAAkB;AACxB,qBAAkB;;AAEpB;EAEA,MAAM,UAAU,SAAS,UAAU,QAAQ;AAG3C,MAFgB,UAAU,IAAI,WAAW,UAAU,SAAS,QAAQ,KAAK,EAE5D;GACX,MAAM,QAAQ,GACX,QACC,4EACD,CACA,IAAI,WAAW,QAAQ;AAE1B,UAAO;AACP,UAAO,iBAAiB,MAAM;QAE9B,QAAO;;CAKX,MAAM,4BAAY,IAAI,KAAa;AACnC,MAAK,MAAM,EAAE,SAAS,cAAc,aAClC,WAAU,IAAI,SAAS,UAAU,QAAQ,CAAC;CAG5C,MAAM,eAAe,GAClB,QAAQ,+DAA+D,CACvE,IAAI,UAAU;CAEjB,MAAM,aAAuB,EAAE;AAC/B,MAAK,MAAM,OAAO,cAAc;EAC9B,MAAM,WAAW,IAAI,KAAK,SAAS,UAAU,GACzC,IAAI,KAAK,MAAM,GAAG,GAAkB,GACpC,IAAI;AACR,MAAI,CAAC,UAAU,IAAI,SAAS,CAC1B,YAAW,KAAK,IAAI,KAAK;;AAI7B,KAAI,WAAW,SAAS,GAAG;EACzB,MAAM,kBAAkB,GAAG,QAAQ,sCAAsC;EACzE,MAAM,eAAe,GAAG,QACtB,8DACD;EACD,MAAM,aAAa,GAAG,QACpB,6DACD;AAED,KAAG,kBAAkB;AACnB,QAAK,MAAM,aAAa,YAAY;IAClC,MAAM,WAAW,GACd,QAAQ,iEAAiE,CACzE,IAAI,WAAW,UAAU;AAC5B,SAAK,MAAM,EAAE,QAAQ,SACnB,iBAAgB,IAAI,GAAG;AAEzB,iBAAa,IAAI,WAAW,UAAU;AACtC,eAAW,IAAI,WAAW,UAAU;;IAEtC,EAAE;;AAGN,QAAO;;;;;;;;AAaT,eAAsB,SACpB,IACA,YACoD;CACpD,MAAM,WAAW,WACd,QAAQ,+EAA+E,CACvF,KAAK;CAER,MAAM,SAAsB;EAC1B,gBAAgB;EAChB,eAAe;EACf,cAAc;EACf;AAED,MAAK,MAAM,WAAW,UAAU;AAC9B,QAAM,kBAAkB;EACxB,MAAM,IAAI,MAAM,aAAa,IAAI,QAAQ,IAAI,QAAQ,WAAW,QAAQ,iBAAiB;AACzF,SAAO,kBAAkB,EAAE;AAC3B,SAAO,iBAAiB,EAAE;AAC1B,SAAO,gBAAgB,EAAE;;AAG3B,QAAO;EAAE,UAAU,SAAS;EAAQ,QAAQ;EAAQ;;;;;;;;;;;;;;AAmBtD,eAAsB,YACpB,IACA,WACA,YAAY,IACZ,YACsB;CAEtB,MAAM,EAAE,mBAAmB,uBAAuB,MAAM,OAAO;CAE/D,MAAM,aAAa,CAAC,oBAAoB;CACxC,MAAM,SAA8B,EAAE;AAEtC,KAAI,cAAc,QAAW;AAC3B,aAAW,KAAK,iBAAiB;AACjC,SAAO,KAAK,UAAU;;CAGxB,MAAM,QAAQ,WAAW,WAAW,KAAK,QAAQ;CAEjD,MAAM,OAAO,GACV,QAAQ,sCAAsC,MAAM,cAAc,CAClE,IAAI,GAAG,OAAO;AAEjB,KAAI,KAAK,WAAW,EAClB,QAAO;EAAE,gBAAgB;EAAG,eAAe;EAAG;CAGhD,MAAM,aAAa,GAAG,QACpB,sDACD;CAED,IAAI,WAAW;CACf,MAAM,QAAQ,KAAK;AAEnB,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,WAAW;EAC/C,MAAM,QAAQ,KAAK,MAAM,GAAG,IAAI,UAAU;EAG1C,MAAM,aAAkD,EAAE;AAC1D,OAAK,MAAM,OAAO,OAAO;GAEvB,MAAM,OAAO,mBADD,MAAM,kBAAkB,IAAI,KAAK,CACT;AACpC,cAAW,KAAK;IAAE,IAAI,IAAI;IAAI;IAAM,CAAC;;AAIvC,KAAG,kBAAkB;AACnB,QAAK,MAAM,EAAE,IAAI,UAAU,WACzB,YAAW,IAAI,MAAM,GAAG;IAE1B,EAAE;AAEJ,cAAY,WAAW;AACvB,eAAa,UAAU,MAAM;;AAG/B,QAAO;EAAE,gBAAgB;EAAU,eAAe;EAAG"}
@@ -0,0 +1,148 @@
1
+ import { n as cosineSimilarity, r as deserializeEmbedding } from "./embeddings-DGRAPAYb.mjs";
2
+ import { t as STOP_WORDS } from "./stop-words-BaMEGVeY.mjs";
3
+
4
+ //#region src/zettelkasten/themes.ts
5
+ const MAX_CHUNKS = 5e3;
6
+ function getTopFolder(vaultPath) {
7
+ const parts = vaultPath.split("/");
8
+ return parts.length > 1 ? parts[0] : "";
9
+ }
10
+ function generateLabel(titles) {
11
+ const wordCounts = /* @__PURE__ */ new Map();
12
+ for (const title of titles) {
13
+ if (!title) continue;
14
+ const words = title.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter((w) => w.length > 2 && !STOP_WORDS.has(w));
15
+ for (const word of words) wordCounts.set(word, (wordCounts.get(word) ?? 0) + 1);
16
+ }
17
+ return [...wordCounts.entries()].sort((a, b) => b[1] - a[1]).slice(0, 3).map(([w]) => w).join(" / ");
18
+ }
19
+ async function computeLinkedRatio(backend, paths) {
20
+ if (paths.length < 2) return 0;
21
+ const totalPairs = paths.length * (paths.length - 1) / 2;
22
+ const pathSet = new Set(paths);
23
+ let linkedPairs = 0;
24
+ for (const path of paths) {
25
+ const links = await backend.getLinksFromSource(path);
26
+ for (const link of links) if (link.targetPath && pathSet.has(link.targetPath)) linkedPairs++;
27
+ }
28
+ const uniquePairs = linkedPairs / 2;
29
+ return Math.min(1, uniquePairs / totalPairs);
30
+ }
31
+ function averageEmbeddings(embeddings) {
32
+ if (embeddings.length === 0) return new Float32Array(0);
33
+ const dim = embeddings[0].length;
34
+ const sum = new Float32Array(dim);
35
+ for (const vec of embeddings) for (let i = 0; i < dim; i++) sum[i] += vec[i];
36
+ const avg = new Float32Array(dim);
37
+ for (let i = 0; i < dim; i++) avg[i] = sum[i] / embeddings.length;
38
+ return avg;
39
+ }
40
+ /**
41
+ * Detect emerging themes in recently-modified notes using agglomerative single-linkage
42
+ * clustering of note-level embeddings.
43
+ */
44
+ async function zettelThemes(backend, opts) {
45
+ const lookbackDays = opts.lookbackDays ?? 30;
46
+ const minClusterSize = opts.minClusterSize ?? 3;
47
+ const maxThemes = opts.maxThemes ?? 10;
48
+ const similarityThreshold = opts.similarityThreshold ?? .65;
49
+ const now = Date.now();
50
+ const from = now - lookbackDays * 864e5;
51
+ const recentNotes = (await backend.getRecentVaultFiles(from)).map((f) => ({
52
+ vault_path: f.vaultPath,
53
+ title: f.title,
54
+ indexed_at: f.indexedAt
55
+ }));
56
+ const chunkRows = await backend.getChunksWithEmbeddings(opts.vaultProjectId, MAX_CHUNKS);
57
+ const embeddingsByPath = /* @__PURE__ */ new Map();
58
+ for (const row of chunkRows) {
59
+ const vec = deserializeEmbedding(row.embedding);
60
+ const arr = embeddingsByPath.get(row.path);
61
+ if (!arr) embeddingsByPath.set(row.path, [vec]);
62
+ else arr.push(vec);
63
+ }
64
+ const fileEmbeddings = /* @__PURE__ */ new Map();
65
+ for (const [path, vecs] of embeddingsByPath) fileEmbeddings.set(path, averageEmbeddings(vecs));
66
+ const clusters = [];
67
+ for (const note of recentNotes) {
68
+ const embedding = fileEmbeddings.get(note.vault_path);
69
+ if (!embedding) continue;
70
+ clusters.push({
71
+ paths: [note.vault_path],
72
+ titles: [note.title],
73
+ indexedAts: [note.indexed_at],
74
+ centroid: embedding
75
+ });
76
+ }
77
+ const totalNotesAnalyzed = clusters.length;
78
+ let merged = true;
79
+ while (merged && clusters.length > 1) {
80
+ merged = false;
81
+ let bestSim = similarityThreshold;
82
+ let bestI = -1;
83
+ let bestJ = -1;
84
+ for (let i = 0; i < clusters.length; i++) for (let j = i + 1; j < clusters.length; j++) {
85
+ const sim = cosineSimilarity(clusters[i].centroid, clusters[j].centroid);
86
+ if (sim > bestSim) {
87
+ bestSim = sim;
88
+ bestI = i;
89
+ bestJ = j;
90
+ }
91
+ }
92
+ if (bestI === -1) break;
93
+ const ci = clusters[bestI];
94
+ const cj = clusters[bestJ];
95
+ const mergedPaths = [...ci.paths, ...cj.paths];
96
+ const mergedTitles = [...ci.titles, ...cj.titles];
97
+ const mergedIndexedAts = [...ci.indexedAts, ...cj.indexedAts];
98
+ const memberEmbeddings = [];
99
+ for (const p of mergedPaths) {
100
+ const emb = fileEmbeddings.get(p);
101
+ if (emb) memberEmbeddings.push(emb);
102
+ }
103
+ clusters[bestI] = {
104
+ paths: mergedPaths,
105
+ titles: mergedTitles,
106
+ indexedAts: mergedIndexedAts,
107
+ centroid: averageEmbeddings(memberEmbeddings)
108
+ };
109
+ clusters.splice(bestJ, 1);
110
+ merged = true;
111
+ }
112
+ const themes = [];
113
+ let clusterIndex = 0;
114
+ for (const cluster of clusters) {
115
+ if (cluster.paths.length < minClusterSize) continue;
116
+ const label = generateLabel(cluster.titles) || `Theme ${clusterIndex + 1}`;
117
+ const avgRecency = cluster.indexedAts.reduce((sum, t) => sum + t, 0) / cluster.indexedAts.length;
118
+ const folderDiversity = new Set(cluster.paths.map(getTopFolder)).size / cluster.paths.length;
119
+ const linkedRatio = await computeLinkedRatio(backend, cluster.paths);
120
+ const suggestIndexNote = linkedRatio < .3 && cluster.paths.length >= 5;
121
+ themes.push({
122
+ id: clusterIndex++,
123
+ label,
124
+ notes: cluster.paths.map((path, idx) => ({
125
+ path,
126
+ title: cluster.titles[idx]
127
+ })),
128
+ size: cluster.paths.length,
129
+ folderDiversity,
130
+ avgRecency,
131
+ linkedRatio,
132
+ suggestIndexNote
133
+ });
134
+ }
135
+ themes.sort((a, b) => b.size * b.folderDiversity * (b.avgRecency / now) - a.size * a.folderDiversity * (a.avgRecency / now));
136
+ return {
137
+ themes: themes.slice(0, maxThemes),
138
+ totalNotesAnalyzed,
139
+ timeWindow: {
140
+ from,
141
+ to: now
142
+ }
143
+ };
144
+ }
145
+
146
+ //#endregion
147
+ export { zettelThemes as t };
148
+ //# sourceMappingURL=themes-BvYF0W8T.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"themes-BvYF0W8T.mjs","names":[],"sources":["../src/zettelkasten/themes.ts"],"sourcesContent":["import type { StorageBackend } from \"../storage/interface.js\";\nimport { deserializeEmbedding, cosineSimilarity } from \"../memory/embeddings.js\";\nimport { STOP_WORDS } from \"../utils/stop-words.js\";\n\nexport interface ThemeOptions {\n vaultProjectId: number;\n lookbackDays?: number;\n minClusterSize?: number;\n maxThemes?: number;\n similarityThreshold?: number;\n}\n\nexport interface ThemeCluster {\n id: number;\n label: string;\n notes: Array<{\n path: string;\n title: string | null;\n }>;\n size: number;\n folderDiversity: number;\n avgRecency: number;\n linkedRatio: number;\n suggestIndexNote: boolean;\n}\n\nexport interface ThemeResult {\n themes: ThemeCluster[];\n totalNotesAnalyzed: number;\n timeWindow: { from: number; to: number };\n}\n\nconst MAX_CHUNKS = 5000;\n\n// STOP_WORDS imported from utils/stop-words.ts\n\nfunction getTopFolder(vaultPath: string): string {\n const parts = vaultPath.split(\"/\");\n return parts.length > 1 ? parts[0] : \"\";\n}\n\nfunction generateLabel(titles: Array<string | null>): string {\n const wordCounts = new Map<string, number>();\n for (const title of titles) {\n if (!title) continue;\n const words = title\n .toLowerCase()\n .replace(/[^a-z0-9\\s]/g, \" \")\n .split(/\\s+/)\n .filter((w) => w.length > 2 && !STOP_WORDS.has(w));\n for (const word of words) {\n wordCounts.set(word, (wordCounts.get(word) ?? 0) + 1);\n }\n }\n const sorted = [...wordCounts.entries()].sort((a, b) => b[1] - a[1]);\n return sorted\n .slice(0, 3)\n .map(([w]) => w)\n .join(\" / \");\n}\n\nasync function computeLinkedRatio(backend: StorageBackend, paths: string[]): Promise<number> {\n if (paths.length < 2) return 0;\n const totalPairs = (paths.length * (paths.length - 1)) / 2;\n const pathSet = new Set(paths);\n let linkedPairs = 0;\n\n for (const path of paths) {\n const links = await backend.getLinksFromSource(path);\n for (const link of links) {\n if (link.targetPath && pathSet.has(link.targetPath)) {\n linkedPairs++;\n }\n }\n }\n\n // Each bidirectional pair might be counted once per direction; divide by 2 to normalize\n const uniquePairs = linkedPairs / 2;\n return Math.min(1, uniquePairs / totalPairs);\n}\n\ntype ClusterNode = {\n paths: string[];\n titles: Array<string | null>;\n indexedAts: number[];\n centroid: Float32Array;\n};\n\nfunction averageEmbeddings(embeddings: Float32Array[]): Float32Array {\n if (embeddings.length === 0) return new Float32Array(0);\n const dim = embeddings[0].length;\n const sum = new Float32Array(dim);\n for (const vec of embeddings) {\n for (let i = 0; i < dim; i++) {\n sum[i] += vec[i];\n }\n }\n const avg = new Float32Array(dim);\n for (let i = 0; i < dim; i++) {\n avg[i] = sum[i] / embeddings.length;\n }\n return avg;\n}\n\n/**\n * Detect emerging themes in recently-modified notes using agglomerative single-linkage\n * clustering of note-level embeddings.\n */\nexport async function zettelThemes(\n backend: StorageBackend,\n opts: ThemeOptions,\n): Promise<ThemeResult> {\n const lookbackDays = opts.lookbackDays ?? 30;\n const minClusterSize = opts.minClusterSize ?? 3;\n const maxThemes = opts.maxThemes ?? 10;\n const similarityThreshold = opts.similarityThreshold ?? 0.65;\n\n const now = Date.now();\n const from = now - lookbackDays * 86400000;\n\n // Step 1: get recent notes\n const recentFiles = await backend.getRecentVaultFiles(from);\n const recentNotes = recentFiles.map(f => ({ vault_path: f.vaultPath, title: f.title, indexed_at: f.indexedAt }));\n\n // Step 2: get file-level embeddings from memory_chunks\n const chunkRows = await backend.getChunksWithEmbeddings(opts.vaultProjectId, MAX_CHUNKS);\n\n const embeddingsByPath = new Map<string, Float32Array[]>();\n for (const row of chunkRows) {\n const vec = deserializeEmbedding(row.embedding);\n const arr = embeddingsByPath.get(row.path);\n if (!arr) {\n embeddingsByPath.set(row.path, [vec]);\n } else {\n arr.push(vec);\n }\n }\n\n const fileEmbeddings = new Map<string, Float32Array>();\n for (const [path, vecs] of embeddingsByPath) {\n fileEmbeddings.set(path, averageEmbeddings(vecs));\n }\n\n // Step 3: build initial clusters — only include notes that have embeddings\n const clusters: ClusterNode[] = [];\n for (const note of recentNotes) {\n const embedding = fileEmbeddings.get(note.vault_path);\n if (!embedding) continue;\n clusters.push({\n paths: [note.vault_path],\n titles: [note.title],\n indexedAts: [note.indexed_at],\n centroid: embedding,\n });\n }\n\n const totalNotesAnalyzed = clusters.length;\n\n // Step 4: agglomerative single-linkage clustering\n // Stop when no two clusters have similarity >= threshold\n let merged = true;\n while (merged && clusters.length > 1) {\n merged = false;\n let bestSim = similarityThreshold;\n let bestI = -1;\n let bestJ = -1;\n\n for (let i = 0; i < clusters.length; i++) {\n for (let j = i + 1; j < clusters.length; j++) {\n const sim = cosineSimilarity(clusters[i].centroid, clusters[j].centroid);\n if (sim > bestSim) {\n bestSim = sim;\n bestI = i;\n bestJ = j;\n }\n }\n }\n\n if (bestI === -1) break;\n\n // Merge cluster j into cluster i\n const ci = clusters[bestI];\n const cj = clusters[bestJ];\n const mergedPaths = [...ci.paths, ...cj.paths];\n const mergedTitles = [...ci.titles, ...cj.titles];\n const mergedIndexedAts = [...ci.indexedAts, ...cj.indexedAts];\n\n // Recompute centroid from averaged embeddings of all member paths\n const memberEmbeddings: Float32Array[] = [];\n for (const p of mergedPaths) {\n const emb = fileEmbeddings.get(p);\n if (emb) memberEmbeddings.push(emb);\n }\n\n clusters[bestI] = {\n paths: mergedPaths,\n titles: mergedTitles,\n indexedAts: mergedIndexedAts,\n centroid: averageEmbeddings(memberEmbeddings),\n };\n\n clusters.splice(bestJ, 1);\n merged = true;\n }\n\n // Step 5: filter and annotate clusters\n const themes: ThemeCluster[] = [];\n let clusterIndex = 0;\n\n for (const cluster of clusters) {\n if (cluster.paths.length < minClusterSize) continue;\n\n const label = generateLabel(cluster.titles) || `Theme ${clusterIndex + 1}`;\n const avgRecency =\n cluster.indexedAts.reduce((sum, t) => sum + t, 0) / cluster.indexedAts.length;\n\n const uniqueFolders = new Set(cluster.paths.map(getTopFolder));\n const folderDiversity = uniqueFolders.size / cluster.paths.length;\n\n const linkedRatio = await computeLinkedRatio(backend, cluster.paths);\n const suggestIndexNote = linkedRatio < 0.3 && cluster.paths.length >= 5;\n\n themes.push({\n id: clusterIndex++,\n label,\n notes: cluster.paths.map((path, idx) => ({\n path,\n title: cluster.titles[idx],\n })),\n size: cluster.paths.length,\n folderDiversity,\n avgRecency,\n linkedRatio,\n suggestIndexNote,\n });\n }\n\n // Step 6: rank by size * folderDiversity * recency_ratio\n themes.sort(\n (a, b) =>\n b.size * b.folderDiversity * (b.avgRecency / now) -\n a.size * a.folderDiversity * (a.avgRecency / now),\n );\n\n return {\n themes: themes.slice(0, maxThemes),\n totalNotesAnalyzed,\n timeWindow: { from, to: now },\n };\n}\n"],"mappings":";;;;AAgCA,MAAM,aAAa;AAInB,SAAS,aAAa,WAA2B;CAC/C,MAAM,QAAQ,UAAU,MAAM,IAAI;AAClC,QAAO,MAAM,SAAS,IAAI,MAAM,KAAK;;AAGvC,SAAS,cAAc,QAAsC;CAC3D,MAAM,6BAAa,IAAI,KAAqB;AAC5C,MAAK,MAAM,SAAS,QAAQ;AAC1B,MAAI,CAAC,MAAO;EACZ,MAAM,QAAQ,MACX,aAAa,CACb,QAAQ,gBAAgB,IAAI,CAC5B,MAAM,MAAM,CACZ,QAAQ,MAAM,EAAE,SAAS,KAAK,CAAC,WAAW,IAAI,EAAE,CAAC;AACpD,OAAK,MAAM,QAAQ,MACjB,YAAW,IAAI,OAAO,WAAW,IAAI,KAAK,IAAI,KAAK,EAAE;;AAIzD,QADe,CAAC,GAAG,WAAW,SAAS,CAAC,CAAC,MAAM,GAAG,MAAM,EAAE,KAAK,EAAE,GAAG,CAEjE,MAAM,GAAG,EAAE,CACX,KAAK,CAAC,OAAO,EAAE,CACf,KAAK,MAAM;;AAGhB,eAAe,mBAAmB,SAAyB,OAAkC;AAC3F,KAAI,MAAM,SAAS,EAAG,QAAO;CAC7B,MAAM,aAAc,MAAM,UAAU,MAAM,SAAS,KAAM;CACzD,MAAM,UAAU,IAAI,IAAI,MAAM;CAC9B,IAAI,cAAc;AAElB,MAAK,MAAM,QAAQ,OAAO;EACxB,MAAM,QAAQ,MAAM,QAAQ,mBAAmB,KAAK;AACpD,OAAK,MAAM,QAAQ,MACjB,KAAI,KAAK,cAAc,QAAQ,IAAI,KAAK,WAAW,CACjD;;CAMN,MAAM,cAAc,cAAc;AAClC,QAAO,KAAK,IAAI,GAAG,cAAc,WAAW;;AAU9C,SAAS,kBAAkB,YAA0C;AACnE,KAAI,WAAW,WAAW,EAAG,QAAO,IAAI,aAAa,EAAE;CACvD,MAAM,MAAM,WAAW,GAAG;CAC1B,MAAM,MAAM,IAAI,aAAa,IAAI;AACjC,MAAK,MAAM,OAAO,WAChB,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,IACvB,KAAI,MAAM,IAAI;CAGlB,MAAM,MAAM,IAAI,aAAa,IAAI;AACjC,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,IACvB,KAAI,KAAK,IAAI,KAAK,WAAW;AAE/B,QAAO;;;;;;AAOT,eAAsB,aACpB,SACA,MACsB;CACtB,MAAM,eAAe,KAAK,gBAAgB;CAC1C,MAAM,iBAAiB,KAAK,kBAAkB;CAC9C,MAAM,YAAY,KAAK,aAAa;CACpC,MAAM,sBAAsB,KAAK,uBAAuB;CAExD,MAAM,MAAM,KAAK,KAAK;CACtB,MAAM,OAAO,MAAM,eAAe;CAIlC,MAAM,eADc,MAAM,QAAQ,oBAAoB,KAAK,EAC3B,KAAI,OAAM;EAAE,YAAY,EAAE;EAAW,OAAO,EAAE;EAAO,YAAY,EAAE;EAAW,EAAE;CAGhH,MAAM,YAAY,MAAM,QAAQ,wBAAwB,KAAK,gBAAgB,WAAW;CAExF,MAAM,mCAAmB,IAAI,KAA6B;AAC1D,MAAK,MAAM,OAAO,WAAW;EAC3B,MAAM,MAAM,qBAAqB,IAAI,UAAU;EAC/C,MAAM,MAAM,iBAAiB,IAAI,IAAI,KAAK;AAC1C,MAAI,CAAC,IACH,kBAAiB,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC;MAErC,KAAI,KAAK,IAAI;;CAIjB,MAAM,iCAAiB,IAAI,KAA2B;AACtD,MAAK,MAAM,CAAC,MAAM,SAAS,iBACzB,gBAAe,IAAI,MAAM,kBAAkB,KAAK,CAAC;CAInD,MAAM,WAA0B,EAAE;AAClC,MAAK,MAAM,QAAQ,aAAa;EAC9B,MAAM,YAAY,eAAe,IAAI,KAAK,WAAW;AACrD,MAAI,CAAC,UAAW;AAChB,WAAS,KAAK;GACZ,OAAO,CAAC,KAAK,WAAW;GACxB,QAAQ,CAAC,KAAK,MAAM;GACpB,YAAY,CAAC,KAAK,WAAW;GAC7B,UAAU;GACX,CAAC;;CAGJ,MAAM,qBAAqB,SAAS;CAIpC,IAAI,SAAS;AACb,QAAO,UAAU,SAAS,SAAS,GAAG;AACpC,WAAS;EACT,IAAI,UAAU;EACd,IAAI,QAAQ;EACZ,IAAI,QAAQ;AAEZ,OAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,IACnC,MAAK,IAAI,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;GAC5C,MAAM,MAAM,iBAAiB,SAAS,GAAG,UAAU,SAAS,GAAG,SAAS;AACxE,OAAI,MAAM,SAAS;AACjB,cAAU;AACV,YAAQ;AACR,YAAQ;;;AAKd,MAAI,UAAU,GAAI;EAGlB,MAAM,KAAK,SAAS;EACpB,MAAM,KAAK,SAAS;EACpB,MAAM,cAAc,CAAC,GAAG,GAAG,OAAO,GAAG,GAAG,MAAM;EAC9C,MAAM,eAAe,CAAC,GAAG,GAAG,QAAQ,GAAG,GAAG,OAAO;EACjD,MAAM,mBAAmB,CAAC,GAAG,GAAG,YAAY,GAAG,GAAG,WAAW;EAG7D,MAAM,mBAAmC,EAAE;AAC3C,OAAK,MAAM,KAAK,aAAa;GAC3B,MAAM,MAAM,eAAe,IAAI,EAAE;AACjC,OAAI,IAAK,kBAAiB,KAAK,IAAI;;AAGrC,WAAS,SAAS;GAChB,OAAO;GACP,QAAQ;GACR,YAAY;GACZ,UAAU,kBAAkB,iBAAiB;GAC9C;AAED,WAAS,OAAO,OAAO,EAAE;AACzB,WAAS;;CAIX,MAAM,SAAyB,EAAE;CACjC,IAAI,eAAe;AAEnB,MAAK,MAAM,WAAW,UAAU;AAC9B,MAAI,QAAQ,MAAM,SAAS,eAAgB;EAE3C,MAAM,QAAQ,cAAc,QAAQ,OAAO,IAAI,SAAS,eAAe;EACvE,MAAM,aACJ,QAAQ,WAAW,QAAQ,KAAK,MAAM,MAAM,GAAG,EAAE,GAAG,QAAQ,WAAW;EAGzE,MAAM,kBADgB,IAAI,IAAI,QAAQ,MAAM,IAAI,aAAa,CAAC,CACxB,OAAO,QAAQ,MAAM;EAE3D,MAAM,cAAc,MAAM,mBAAmB,SAAS,QAAQ,MAAM;EACpE,MAAM,mBAAmB,cAAc,MAAO,QAAQ,MAAM,UAAU;AAEtE,SAAO,KAAK;GACV,IAAI;GACJ;GACA,OAAO,QAAQ,MAAM,KAAK,MAAM,SAAS;IACvC;IACA,OAAO,QAAQ,OAAO;IACvB,EAAE;GACH,MAAM,QAAQ,MAAM;GACpB;GACA;GACA;GACA;GACD,CAAC;;AAIJ,QAAO,MACJ,GAAG,MACF,EAAE,OAAO,EAAE,mBAAmB,EAAE,aAAa,OAC7C,EAAE,OAAO,EAAE,mBAAmB,EAAE,aAAa,KAChD;AAED,QAAO;EACL,QAAQ,OAAO,MAAM,GAAG,UAAU;EAClC;EACA,YAAY;GAAE;GAAM,IAAI;GAAK;EAC9B"}