@hera-al/server 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
import{existsSync as e,readdirSync as t,readFileSync as n,statSync as s,copyFileSync as i,renameSync as r,unlinkSync as h,watch as a}from"node:fs";import{join as o,relative as c,basename as d,dirname as m}from"node:path";import l from"better-sqlite3";import u from"openai";import p from"hnswlib-node";const{HierarchicalNSW:E}=p;import{createLogger as b}from"../utils/logger.js";const x=b("MemorySearch"),f="cosine";export class MemorySearch{memoryDir;dataDir;opts;indexDb=null;indexHnsw=null;watcher=null;debounceTimer=null;embedTimer=null;indexing=!1;searchDb=null;searchHnsw=null;openai=null;indexDbPath;searchDbPath;searchNextDbPath;indexHnswPath;searchHnswPath;searchNextHnswPath;constructor(e,t,n){this.memoryDir=e,this.dataDir=t,this.opts=n,this.indexDbPath=o(t,"memory-index.db"),this.searchDbPath=o(t,"memory-search.db"),this.searchNextDbPath=o(t,"memory-search-next.db"),this.indexHnswPath=o(t,"memory-vectors.hnsw"),this.searchHnswPath=o(t,"memory-vectors-search.hnsw"),this.searchNextHnswPath=o(t,"memory-vectors-search-next.hnsw"),this.isOpenAI()&&(this.openai=new u({apiKey:n.apiKey,...n.baseURL?{baseURL:n.baseURL}:{}}))}isOpenAI(){return(this.opts.baseURL||"https://api.openai.com/v1").includes("openai.com")}getMaxInjectedChars(){return this.opts.maxInjectedChars}async start(){x.info("Starting memory search engine..."),this.indexDb=new l(this.indexDbPath),this.indexDb.pragma("journal_mode = WAL"),this.migrateEmbeddingsTable(),this.indexDb.exec("\n CREATE TABLE IF NOT EXISTS documents (\n path TEXT PRIMARY KEY,\n mtime_ms INTEGER NOT NULL,\n size INTEGER NOT NULL\n );\n\n CREATE TABLE IF NOT EXISTS chunks (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n doc_path TEXT NOT NULL,\n chunk_idx INTEGER NOT NULL,\n role TEXT NOT NULL DEFAULT '',\n timestamp TEXT NOT NULL DEFAULT '',\n session_key TEXT NOT NULL DEFAULT '',\n content TEXT NOT NULL,\n UNIQUE(doc_path, chunk_idx)\n );\n\n CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(\n content,\n content='chunks',\n content_rowid='id',\n tokenize='porter unicode61'\n );\n\n CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN\n INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);\n END;\n\n CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN\n INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES ('delete', old.id, old.content);\n END;\n\n CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN\n INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES ('delete', old.id, old.content);\n INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);\n END;\n\n -- Lookup table only (no vector BLOB) — vectors live in HNSW index\n CREATE TABLE IF NOT EXISTS embeddings (\n chunk_id INTEGER PRIMARY KEY\n );\n\n -- Metadata for detecting config changes (model, dimensions)\n CREATE TABLE IF NOT EXISTS meta (\n key TEXT PRIMARY KEY,\n value TEXT NOT NULL\n );\n"),this.checkEmbeddingConfigChange(),this.initIndexHnsw(),await this.indexFiles(),await this.embedPending(),this.publishSnapshot(),this.maybeSwap(),this.startWatcher(),this.opts.embedIntervalMs>0&&(this.embedTimer=setInterval(()=>{this.embedPending().catch(e=>x.error(`Embed cycle error: ${e}`))},this.opts.embedIntervalMs)),x.info("Memory search engine started")}stop(){this.watcher&&(this.watcher.close(),this.watcher=null),this.debounceTimer&&(clearTimeout(this.debounceTimer),this.debounceTimer=null),this.embedTimer&&(clearInterval(this.embedTimer),this.embedTimer=null),this.indexDb&&(this.indexDb.close(),this.indexDb=null),this.searchDb&&(this.searchDb.close(),this.searchDb=null),this.indexHnsw=null,this.searchHnsw=null,x.info("Memory search engine stopped")}migrateEmbeddingsTable(){if(!this.indexDb)return;if(this.indexDb.prepare("PRAGMA table_info(embeddings)").all().some(e=>"vector"===e.name)){x.info("Migrating: dropping old embeddings table (had vector BLOB). All embeddings will be re-created via HNSW."),this.indexDb.exec("DROP TABLE IF EXISTS embeddings");try{h(this.indexHnswPath)}catch{}}}checkEmbeddingConfigChange(){if(!this.indexDb)return;const e=this.indexDb.prepare("SELECT value FROM meta WHERE key = ?"),t=this.indexDb.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"),n=e.get("embedding_model")?.value,s=e.get("embedding_dimensions")?.value,i=this.opts.embeddingModel,r=String(this.opts.embeddingDimensions),a=void 0!==n&&n!==i,o=void 0!==s&&s!==r;if(a||o){const e=[];a&&e.push(`model: ${n} → ${i}`),o&&e.push(`dimensions: ${s} → ${r}`),x.info(`Embedding config changed (${e.join(", ")}). Wiping embeddings + HNSW for full re-embed.`),this.indexDb.exec("DELETE FROM embeddings");try{h(this.indexHnswPath)}catch{}}t.run("embedding_model",i),t.run("embedding_dimensions",r)}initIndexHnsw(){const t=this.opts.embeddingDimensions;if(this.indexHnsw=new E(f,t),e(this.indexHnswPath))try{this.indexHnsw.readIndexSync(this.indexHnswPath,!0),x.info(`Loaded HNSW index: ${this.indexHnsw.getCurrentCount()} points`)}catch(e){x.warn(`Failed to load HNSW index, creating new: ${e}`),this.indexHnsw.initIndex({maxElements:1e4,m:16,efConstruction:200,allowReplaceDeleted:!0})}else this.indexHnsw.initIndex({maxElements:1e4,m:16,efConstruction:200,allowReplaceDeleted:!0}),x.info("Created new HNSW index")}ensureHnswCapacity(e){if(!this.indexHnsw)return;const t=this.indexHnsw.getMaxElements();if(this.indexHnsw.getCurrentCount()+e>t){const n=Math.max(2*t,this.indexHnsw.getCurrentCount()+e+1e3);this.indexHnsw.resizeIndex(n),x.info(`Resized HNSW index: ${t} → ${n}`)}}async search(e,t){const n=t??this.opts.maxResults;if(this.maybeSwap(),!this.searchDb)return x.warn("Search DB not available"),[];const s=this.bm25Search(e,20);let i=[];try{const t=await this.embedText(e);t&&this.searchHnsw&&this.searchHnsw.getCurrentCount()>0&&(i=this.denseSearch(t,20))}catch(e){x.warn(`Dense search failed, using BM25 only: ${e}`)}const r=function(e,t,n){const s=new Map;for(let t=0;t<e.length;t++){const{id:i}=e[t];s.set(i,(s.get(i)??0)+1/(n+t+1))}for(let e=0;e<t.length;e++){const{id:i}=t[e];s.set(i,(s.get(i)??0)+1/(n+e+1))}const i=Array.from(s.entries()).map(([e,t])=>({id:e,score:t})).sort((e,t)=>t.score-e.score);return i}(s,i,this.opts.rrfK),h=[],a=this.searchDb.prepare("SELECT doc_path, role, timestamp, session_key, content FROM chunks WHERE id = ?");for(const{id:e,score:t}of r.slice(0,n)){const n=a.get(e);if(!n)continue;const s=n.content.length>this.opts.maxSnippetChars?n.content.slice(0,this.opts.maxSnippetChars)+"...":n.content;h.push({path:n.doc_path,sessionKey:n.session_key,snippet:s,score:t,role:n.role,timestamp:n.timestamp})}return x.info(`Search "${e.slice(0,60)}": ${h.length} results (sparse=${s.length}, dense=${i.length})`),h}readFile(t,s,i){const r=o(this.memoryDir,t);if(!e(r))return{path:t,content:`[File not found: ${t}]`};const h=n(r,"utf-8"),a=h.split("\n");if(void 0!==s||void 0!==i){const e=Math.max(0,(s??1)-1),n=i??a.length;return{path:t,content:a.slice(e,e+n).join("\n")}}return{path:t,content:h}}bm25Search(e,t){if(!this.searchDb)return[];try{const n=function(e){const t=e.replace(/[^\w\s]/g," ").split(/\s+/).filter(e=>e.length>0);return 0===t.length?"":t.map(e=>`"${e}"`).join(" OR ")}(e);if(!n)return[];return this.searchDb.prepare("\n SELECT chunks.id, bm25(chunks_fts) as rank\n FROM chunks_fts\n JOIN chunks ON chunks.id = chunks_fts.rowid\n WHERE chunks_fts MATCH ?\n ORDER BY rank\n LIMIT ?\n ").all(n,t).map(e=>({id:e.id,score:-e.rank}))}catch(e){return x.warn(`BM25 search error: ${e}`),[]}}denseSearch(e,t){if(!this.searchHnsw||0===this.searchHnsw.getCurrentCount())return[];const n=Math.min(t,this.searchHnsw.getCurrentCount()),s=this.searchHnsw.searchKnn(Array.from(e),n);return s.neighbors.map((e,t)=>({id:e,score:1-s.distances[t]}))}startWatcher(){if(e(this.memoryDir))try{this.watcher=a(this.memoryDir,{recursive:!0},(e,t)=>{this.debounceTimer&&clearTimeout(this.debounceTimer),this.debounceTimer=setTimeout(()=>{this.runIndexCycle()},this.opts.updateDebounceMs)})}catch(e){x.warn(`Could not start file watcher: ${e}`)}}async runIndexCycle(){if(!this.indexing){this.indexing=!0;try{await this.indexFiles(),this.publishSnapshot()}catch(e){x.error(`Index cycle error: ${e}`)}finally{this.indexing=!1}}}async indexFiles(){if(!this.indexDb||!e(this.memoryDir))return;const i=function(e){const n=[];function i(r){let h;try{h=t(r,{withFileTypes:!0})}catch{return}for(const t of h){const h=o(r,t.name);if(t.isDirectory())i(h);else if(t.name.endsWith(".md"))try{const t=s(h),i=c(e,h),r=d(m(h));n.push({fullPath:h,relPath:i,sessionKey:r===d(e)?"":r,mtimeMs:Math.floor(t.mtimeMs),size:t.size})}catch{}}}return i(e),n}(this.memoryDir);let r=0;const h=this.indexDb.prepare("INSERT OR REPLACE INTO documents (path, mtime_ms, size) VALUES (?, ?, ?)"),a=this.indexDb.prepare("SELECT mtime_ms, size FROM documents WHERE path = ?"),l=this.indexDb.prepare("DELETE FROM chunks WHERE doc_path = ?"),u=this.indexDb.prepare("SELECT id FROM chunks WHERE doc_path = ?"),p=this.indexDb.prepare("DELETE FROM embeddings WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_path = ?)"),E=this.indexDb.prepare("INSERT INTO chunks (doc_path, chunk_idx, role, timestamp, session_key, content) VALUES (?, ?, ?, ?, ?, ?)"),b=this.indexDb.prepare("SELECT path FROM documents").all().map(e=>e.path),f=new Set(i.map(e=>e.relPath));for(const e of b)if(!f.has(e)){const t=u.all(e);for(const{id:e}of t)try{this.indexHnsw?.markDelete(e)}catch{}p.run(e),l.run(e),this.indexDb.prepare("DELETE FROM documents WHERE path = ?").run(e),x.debug(`Removed deleted file from index: ${e}`)}for(const e of i){const t=a.get(e.relPath);if(t&&t.mtime_ms===e.mtimeMs&&t.size===e.size){r+=this.indexDb.prepare("SELECT COUNT(*) as c FROM chunks WHERE doc_path = ?").get(e.relPath).c;continue}const s=T(n(e.fullPath,"utf-8"),e.relPath,e.sessionKey),i=u.all(e.relPath);for(const{id:e}of i)try{this.indexHnsw?.markDelete(e)}catch{}p.run(e.relPath),l.run(e.relPath);this.indexDb.transaction(()=>{for(let t=0;t<s.length;t++){const n=s[t];E.run(e.relPath,t,n.role,n.timestamp,n.sessionKey,n.content)}h.run(e.relPath,e.mtimeMs,e.size)})(),r+=s.length,x.debug(`Indexed ${e.relPath}: ${s.length} chunks`)}x.info(`Indexed ${r} chunks from ${i.length} files`)}async embedPending(){if(!this.indexDb||!this.indexHnsw)return;const e=this.indexDb.prepare("\n SELECT c.id, c.content FROM chunks c\n LEFT JOIN embeddings e ON e.chunk_id = c.id\n WHERE e.chunk_id IS NULL\n ").all();if(0===e.length)return;x.info(`Embedding ${e.length} pending chunks...`),this.ensureHnswCapacity(e.length);const t=this.indexDb.prepare("INSERT OR REPLACE INTO embeddings (chunk_id) VALUES (?)");for(let n=0;n<e.length;n+=100){const s=e.slice(n,n+100),i=s.map(e=>this.applyPrefix(this.opts.prefixDocument,e.content).slice(0,8e3)),r=this.opts.prefixDocument.trim();r&&x.debug(`Using prefixDocument (template: ${r}) → result sample: [${i[0].slice(0,80)}]`);try{let e;if(this.openai){const t=await this.openai.embeddings.create({model:this.opts.embeddingModel,input:i,dimensions:this.opts.embeddingDimensions});e=t.data.sort((e,t)=>e.index-t.index).map(e=>e.embedding)}else e=await this.fetchEmbeddings(i);this.indexDb.transaction(()=>{for(let n=0;n<e.length;n++)this.indexHnsw.addPoint(e[n],s[n].id,!0),t.run(s[n].id)})(),x.debug(`Embedded batch ${n/100+1}: ${s.length} chunks`)}catch(e){x.error(`Embedding batch failed: ${e}`)}}this.indexHnsw.writeIndexSync(this.indexHnswPath),this.publishSnapshot(),x.info(`Embedded ${e.length} chunks (HNSW: ${this.indexHnsw.getCurrentCount()} total points)`)}publishSnapshot(){if(!this.indexDb)return;const t=o(this.dataDir,".memory-search-next.tmp"),n=o(this.dataDir,".memory-vectors-search-next.tmp");try{this.indexDb.pragma("wal_checkpoint(TRUNCATE)"),i(this.indexDbPath,t),r(t,this.searchNextDbPath),e(this.indexHnswPath)&&(i(this.indexHnswPath,n),r(n,this.searchNextHnswPath)),x.debug("Published search snapshot (DB + HNSW)")}catch(e){x.error(`Failed to publish snapshot: ${e}`);try{h(t)}catch{}try{h(n)}catch{}}}maybeSwap(){if(e(this.searchNextDbPath))try{this.searchDb&&(this.searchDb.close(),this.searchDb=null),this.searchHnsw=null,r(this.searchNextDbPath,this.searchDbPath),e(this.searchNextHnswPath)&&r(this.searchNextHnswPath,this.searchHnswPath),this.searchDb=new l(this.searchDbPath,{readonly:!0}),e(this.searchHnswPath)?(this.searchHnsw=new E(f,this.opts.embeddingDimensions),this.searchHnsw.readIndexSync(this.searchHnswPath),this.searchHnsw.setEf(50),x.debug(`Swapped to new search DB + HNSW (${this.searchHnsw.getCurrentCount()} points)`)):x.debug("Swapped to new search DB (no HNSW index yet)")}catch(t){x.error(`Failed to swap search DB: ${t}`);try{e(this.searchDbPath)&&(this.searchDb=new l(this.searchDbPath,{readonly:!0}))}catch{}}}applyPrefix(e,t){const n=e.trim();return n?n.replace(/\{content\}/g,()=>t):t}async fetchEmbeddings(e){const t=`${(this.opts.baseURL||"").replace(/\/+$/,"")}/embed`,n={"Content-Type":"application/json"};this.opts.apiKey&&(n.Authorization=`Bearer ${this.opts.apiKey}`);const s=await fetch(t,{method:"POST",headers:n,body:JSON.stringify({model:this.opts.embeddingModel,input:e})});if(!s.ok){const e=await s.text().catch(()=>"(no body)");throw new Error(`Embedding API ${s.status}: ${e.slice(0,300)}`)}const i=await s.json();if(Array.isArray(i.embeddings))return i.embeddings;throw new Error(`Unknown embedding response format. Keys: ${Object.keys(i).join(", ")}`)}async embedText(e){try{const t=this.applyPrefix(this.opts.prefixQuery,e),n=this.opts.prefixQuery.trim();if(n&&x.debug(`Using prefixQuery (template: ${n}) → result sample: [${t.slice(0,80)}]`),this.openai){const e=await this.openai.embeddings.create({model:this.opts.embeddingModel,input:t.slice(0,8e3),dimensions:this.opts.embeddingDimensions});return new Float32Array(e.data[0].embedding)}const s=await this.fetchEmbeddings([t.slice(0,8e3)]);return new Float32Array(s[0])}catch(e){return x.error(`Failed to embed query: ${e}`),null}}}function T(e,t,n){const s=[],i=e.split(/^### /m);for(const e of i){if(!e.trim())continue;const t=e.match(/^(user|assistant)\s*\(([^)]+)\)\s*\n/),i=t?t[1]:"",r=t?t[2]:"",h=t?e.slice(t[0].length).trim():e.trim();if(!h)continue;const a=1500,o=100;if(h.length<=a)s.push({role:i,timestamp:r,sessionKey:n,content:h});else{let e=0;for(;e<h.length;){const t=Math.min(e+a,h.length),c=h.slice(e,t);if(s.push({role:i,timestamp:r,sessionKey:n,content:c}),e=t-o,e+o>=h.length)break}}}return s}
|
|
1
|
+
import{existsSync as e,readdirSync as t,readFileSync as n,statSync as s,copyFileSync as i,renameSync as r,unlinkSync as h,watch as a}from"node:fs";import{join as o,relative as d,basename as c,dirname as m}from"node:path";import l from"better-sqlite3";import p from"openai";import u from"hnswlib-node";const{HierarchicalNSW:b}=u;import{createLogger as E}from"../utils/logger.js";const g=E("MemorySearch"),f="cosine";export class MemorySearch{memoryDir;dataDir;opts;indexDb=null;indexHnsw=null;watcher=null;debounceTimer=null;embedTimer=null;indexing=!1;embedding=!1;stopped=!0;searchDb=null;searchHnsw=null;openai=null;indexDbPath;searchDbPath;searchNextDbPath;indexHnswPath;searchHnswPath;searchNextHnswPath;constructor(e,t,n){this.memoryDir=e,this.dataDir=t,this.opts=n,this.indexDbPath=o(t,"memory-index.db"),this.searchDbPath=o(t,"memory-search.db"),this.searchNextDbPath=o(t,"memory-search-next.db"),this.indexHnswPath=o(t,"memory-vectors.hnsw"),this.searchHnswPath=o(t,"memory-vectors-search.hnsw"),this.searchNextHnswPath=o(t,"memory-vectors-search-next.hnsw"),this.isOpenAI()&&(this.openai=new p({apiKey:n.apiKey,...n.baseURL?{baseURL:n.baseURL}:{}}))}isOpenAI(){return(this.opts.baseURL||"https://api.openai.com/v1").includes("openai.com")}getMaxInjectedChars(){return this.opts.maxInjectedChars}async start(){g.info("Starting memory search engine..."),this.stopped=!1,this.indexDb=new l(this.indexDbPath),this.indexDb.pragma("journal_mode = WAL"),this.migrateEmbeddingsTable(),this.indexDb.exec("\n CREATE TABLE IF NOT EXISTS documents (\n path TEXT PRIMARY KEY,\n mtime_ms INTEGER NOT NULL,\n size INTEGER NOT NULL\n );\n\n CREATE TABLE IF NOT EXISTS chunks (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n doc_path TEXT NOT NULL,\n chunk_idx INTEGER NOT NULL,\n role TEXT NOT NULL DEFAULT '',\n timestamp TEXT NOT NULL DEFAULT '',\n session_key TEXT NOT NULL DEFAULT '',\n content TEXT NOT NULL,\n UNIQUE(doc_path, chunk_idx)\n );\n\n CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(\n content,\n content='chunks',\n content_rowid='id',\n tokenize='porter unicode61'\n );\n\n CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN\n INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);\n END;\n\n CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN\n INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES ('delete', old.id, old.content);\n END;\n\n CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN\n INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES ('delete', old.id, old.content);\n INSERT INTO chunks_fts(rowid, content) VALUES (new.id, new.content);\n END;\n\n -- Lookup table only (no vector BLOB) — vectors live in HNSW index\n CREATE TABLE IF NOT EXISTS embeddings (\n chunk_id INTEGER PRIMARY KEY\n );\n\n -- Metadata for detecting config changes (model, dimensions)\n CREATE TABLE IF NOT EXISTS meta (\n key TEXT PRIMARY KEY,\n value TEXT NOT NULL\n );\n"),this.checkEmbeddingConfigChange(),this.initIndexHnsw(),await this.indexFiles(),await this.embedPending(),this.publishSnapshot(),this.maybeSwap(),this.startWatcher(),this.opts.embedIntervalMs>0&&(this.embedTimer=setInterval(()=>{this.embedPending().catch(e=>g.error(`Embed cycle error: ${e}`))},this.opts.embedIntervalMs)),g.info("Memory search engine started")}stop(){this.stopped=!0,this.watcher&&(this.watcher.close(),this.watcher=null),this.debounceTimer&&(clearTimeout(this.debounceTimer),this.debounceTimer=null),this.embedTimer&&(clearInterval(this.embedTimer),this.embedTimer=null),this.indexDb&&(this.indexDb.close(),this.indexDb=null),this.searchDb&&(this.searchDb.close(),this.searchDb=null),this.indexHnsw=null,this.searchHnsw=null,g.info("Memory search engine stopped")}migrateEmbeddingsTable(){if(!this.indexDb)return;if(this.indexDb.prepare("PRAGMA table_info(embeddings)").all().some(e=>"vector"===e.name)){g.info("Migrating: dropping old embeddings table (had vector BLOB). All embeddings will be re-created via HNSW."),this.indexDb.exec("DROP TABLE IF EXISTS embeddings");try{h(this.indexHnswPath)}catch{}}}checkEmbeddingConfigChange(){if(!this.indexDb)return;const e=this.indexDb.prepare("SELECT value FROM meta WHERE key = ?"),t=this.indexDb.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)"),n=e.get("embedding_model")?.value,s=e.get("embedding_dimensions")?.value,i=this.opts.embeddingModel,r=String(this.opts.embeddingDimensions),a=void 0!==n&&n!==i,o=void 0!==s&&s!==r;if(a||o){const e=[];a&&e.push(`model: ${n} → ${i}`),o&&e.push(`dimensions: ${s} → ${r}`),g.info(`Embedding config changed (${e.join(", ")}). Wiping embeddings + HNSW for full re-embed.`),this.indexDb.exec("DELETE FROM embeddings");try{h(this.indexHnswPath)}catch{}}t.run("embedding_model",i),t.run("embedding_dimensions",r)}initIndexHnsw(){const t=this.opts.embeddingDimensions;if(this.indexHnsw=new b(f,t),e(this.indexHnswPath))try{this.indexHnsw.readIndexSync(this.indexHnswPath,!0),g.info(`Loaded HNSW index: ${this.indexHnsw.getCurrentCount()} points`)}catch(e){g.warn(`Failed to load HNSW index, creating new: ${e}`),this.indexHnsw.initIndex({maxElements:1e4,m:16,efConstruction:200,allowReplaceDeleted:!0})}else this.indexHnsw.initIndex({maxElements:1e4,m:16,efConstruction:200,allowReplaceDeleted:!0}),g.info("Created new HNSW index")}ensureHnswCapacity(e){if(!this.indexHnsw)return;const t=this.indexHnsw.getMaxElements();if(this.indexHnsw.getCurrentCount()+e>t){const n=Math.max(2*t,this.indexHnsw.getCurrentCount()+e+1e3);this.indexHnsw.resizeIndex(n),g.info(`Resized HNSW index: ${t} → ${n}`)}}async search(e,t){const n=t??this.opts.maxResults;if(this.maybeSwap(),!this.searchDb)return g.warn("Search DB not available"),[];const s=this.bm25Search(e,20);let i=[];try{const t=await this.embedText(e);t&&this.searchHnsw&&this.searchHnsw.getCurrentCount()>0&&(i=this.denseSearch(t,20))}catch(e){g.warn(`Dense search failed, using BM25 only: ${e}`)}const r=function(e,t,n){const s=new Map;for(let t=0;t<e.length;t++){const{id:i}=e[t];s.set(i,(s.get(i)??0)+1/(n+t+1))}for(let e=0;e<t.length;e++){const{id:i}=t[e];s.set(i,(s.get(i)??0)+1/(n+e+1))}const i=Array.from(s.entries()).map(([e,t])=>({id:e,score:t})).sort((e,t)=>t.score-e.score);return i}(s,i,this.opts.rrfK),h=[],a=this.searchDb.prepare("SELECT doc_path, role, timestamp, session_key, content FROM chunks WHERE id = ?");for(const{id:e,score:t}of r.slice(0,n)){const n=a.get(e);if(!n)continue;const s=n.content.length>this.opts.maxSnippetChars?n.content.slice(0,this.opts.maxSnippetChars)+"...":n.content;h.push({path:n.doc_path,sessionKey:n.session_key,snippet:s,score:t,role:n.role,timestamp:n.timestamp})}return g.info(`Search "${e.slice(0,60)}": ${h.length} results (sparse=${s.length}, dense=${i.length})`),h}readFile(t,s,i){const r=o(this.memoryDir,t);if(!e(r))return{path:t,content:`[File not found: ${t}]`};const h=n(r,"utf-8"),a=h.split("\n");if(void 0!==s||void 0!==i){const e=Math.max(0,(s??1)-1),n=i??a.length;return{path:t,content:a.slice(e,e+n).join("\n")}}return{path:t,content:h}}bm25Search(e,t){if(!this.searchDb)return[];try{const n=function(e){const t=e.replace(/[^\w\s]/g," ").split(/\s+/).filter(e=>e.length>0);return 0===t.length?"":t.map(e=>`"${e}"`).join(" OR ")}(e);if(!n)return[];return this.searchDb.prepare("\n SELECT chunks.id, bm25(chunks_fts) as rank\n FROM chunks_fts\n JOIN chunks ON chunks.id = chunks_fts.rowid\n WHERE chunks_fts MATCH ?\n ORDER BY rank\n LIMIT ?\n ").all(n,t).map(e=>({id:e.id,score:-e.rank}))}catch(e){return g.warn(`BM25 search error: ${e}`),[]}}denseSearch(e,t){if(!this.searchHnsw||0===this.searchHnsw.getCurrentCount())return[];const n=Math.min(t,this.searchHnsw.getCurrentCount()),s=this.searchHnsw.searchKnn(Array.from(e),n);return s.neighbors.map((e,t)=>({id:e,score:1-s.distances[t]}))}startWatcher(){if(e(this.memoryDir))try{this.watcher=a(this.memoryDir,{recursive:!0},(e,t)=>{this.debounceTimer&&clearTimeout(this.debounceTimer),this.debounceTimer=setTimeout(()=>{this.runIndexCycle()},this.opts.updateDebounceMs)})}catch(e){g.warn(`Could not start file watcher: ${e}`)}}async runIndexCycle(){if(!this.indexing){this.indexing=!0;try{await this.indexFiles(),this.publishSnapshot()}catch(e){g.error(`Index cycle error: ${e}`)}finally{this.indexing=!1}}}async indexFiles(){if(!this.indexDb||!e(this.memoryDir))return;const i=function(e){const n=[];function i(r){let h;try{h=t(r,{withFileTypes:!0})}catch{return}for(const t of h){const h=o(r,t.name);if(t.isDirectory())i(h);else if(t.name.endsWith(".md"))try{const t=s(h),i=d(e,h),r=c(m(h));n.push({fullPath:h,relPath:i,sessionKey:r===c(e)?"":r,mtimeMs:Math.floor(t.mtimeMs),size:t.size})}catch{}}}return i(e),n}(this.memoryDir);let r=0;const h=this.indexDb.prepare("INSERT OR REPLACE INTO documents (path, mtime_ms, size) VALUES (?, ?, ?)"),a=this.indexDb.prepare("SELECT mtime_ms, size FROM documents WHERE path = ?"),l=this.indexDb.prepare("DELETE FROM chunks WHERE doc_path = ?"),p=this.indexDb.prepare("SELECT id FROM chunks WHERE doc_path = ?"),u=this.indexDb.prepare("DELETE FROM embeddings WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_path = ?)"),b=this.indexDb.prepare("INSERT INTO chunks (doc_path, chunk_idx, role, timestamp, session_key, content) VALUES (?, ?, ?, ?, ?, ?)"),E=this.indexDb.prepare("SELECT path FROM documents").all().map(e=>e.path),f=new Set(i.map(e=>e.relPath));for(const e of E)if(!f.has(e)){const t=p.all(e);for(const{id:e}of t)try{this.indexHnsw?.markDelete(e)}catch{}u.run(e),l.run(e),this.indexDb.prepare("DELETE FROM documents WHERE path = ?").run(e),g.debug(`Removed deleted file from index: ${e}`)}for(const e of i){const t=a.get(e.relPath);if(t&&t.mtime_ms===e.mtimeMs&&t.size===e.size){r+=this.indexDb.prepare("SELECT COUNT(*) as c FROM chunks WHERE doc_path = ?").get(e.relPath).c;continue}const s=x(n(e.fullPath,"utf-8"),e.relPath,e.sessionKey),i=p.all(e.relPath);for(const{id:e}of i)try{this.indexHnsw?.markDelete(e)}catch{}u.run(e.relPath),l.run(e.relPath);this.indexDb.transaction(()=>{for(let t=0;t<s.length;t++){const n=s[t];b.run(e.relPath,t,n.role,n.timestamp,n.sessionKey,n.content)}h.run(e.relPath,e.mtimeMs,e.size)})(),r+=s.length,g.debug(`Indexed ${e.relPath}: ${s.length} chunks`)}g.info(`Indexed ${r} chunks from ${i.length} files`)}async embedPending(){if(!this.stopped&&!this.embedding&&this.indexDb&&this.indexHnsw){this.embedding=!0;try{const e=this.indexDb.prepare("\n SELECT c.id, c.content FROM chunks c\n LEFT JOIN embeddings e ON e.chunk_id = c.id\n WHERE e.chunk_id IS NULL\n ").all();if(0===e.length)return;g.info(`Embedding ${e.length} pending chunks...`),this.ensureHnswCapacity(e.length);const t=this.indexDb.prepare("INSERT OR REPLACE INTO embeddings (chunk_id) VALUES (?)");for(let n=0;n<e.length;n+=100){if(this.stopped)return void g.warn("embedPending aborted: engine stopped");const s=e.slice(n,n+100),i=s.map(e=>this.applyPrefix(this.opts.prefixDocument,e.content).slice(0,8e3)),r=this.opts.prefixDocument.trim();r&&g.debug(`Using prefixDocument (template: ${r}) → result sample: [${i[0].slice(0,80)}]`);try{let e;if(this.openai){const t=await this.openai.embeddings.create({model:this.opts.embeddingModel,input:i,dimensions:this.opts.embeddingDimensions});e=t.data.sort((e,t)=>e.index-t.index).map(e=>e.embedding)}else e=await this.fetchEmbeddings(i);if(this.stopped||!this.indexDb||!this.indexHnsw)return void g.warn("embedPending aborted: engine stopped during embedding");this.indexDb.transaction(()=>{for(let n=0;n<e.length;n++)this.indexHnsw.addPoint(e[n],s[n].id,!0),t.run(s[n].id)})(),g.debug(`Embedded batch ${n/100+1}: ${s.length} chunks`)}catch(e){if(this.stopped)return;g.error(`Embedding batch failed: ${e}`)}}if(this.stopped||!this.indexHnsw)return;this.indexHnsw.writeIndexSync(this.indexHnswPath),this.publishSnapshot(),g.info(`Embedded ${e.length} chunks (HNSW: ${this.indexHnsw.getCurrentCount()} total points)`)}finally{this.embedding=!1}}}publishSnapshot(){if(!this.indexDb)return;const t=o(this.dataDir,".memory-search-next.tmp"),n=o(this.dataDir,".memory-vectors-search-next.tmp");try{this.indexDb.pragma("wal_checkpoint(TRUNCATE)"),i(this.indexDbPath,t),r(t,this.searchNextDbPath),e(this.indexHnswPath)&&(i(this.indexHnswPath,n),r(n,this.searchNextHnswPath)),g.debug("Published search snapshot (DB + HNSW)")}catch(e){g.error(`Failed to publish snapshot: ${e}`);try{h(t)}catch{}try{h(n)}catch{}}}maybeSwap(){if(e(this.searchNextDbPath))try{this.searchDb&&(this.searchDb.close(),this.searchDb=null),this.searchHnsw=null,r(this.searchNextDbPath,this.searchDbPath),e(this.searchNextHnswPath)&&r(this.searchNextHnswPath,this.searchHnswPath),this.searchDb=new l(this.searchDbPath,{readonly:!0}),e(this.searchHnswPath)?(this.searchHnsw=new b(f,this.opts.embeddingDimensions),this.searchHnsw.readIndexSync(this.searchHnswPath),this.searchHnsw.setEf(50),g.debug(`Swapped to new search DB + HNSW (${this.searchHnsw.getCurrentCount()} points)`)):g.debug("Swapped to new search DB (no HNSW index yet)")}catch(t){g.error(`Failed to swap search DB: ${t}`);try{e(this.searchDbPath)&&(this.searchDb=new l(this.searchDbPath,{readonly:!0}))}catch{}}}applyPrefix(e,t){const n=e.trim();return n?n.replace(/\{content\}/g,()=>t):t}async fetchEmbeddings(e){const t=`${(this.opts.baseURL||"").replace(/\/+$/,"")}/embed`,n={"Content-Type":"application/json"};this.opts.apiKey&&(n.Authorization=`Bearer ${this.opts.apiKey}`);const s=await fetch(t,{method:"POST",headers:n,body:JSON.stringify({model:this.opts.embeddingModel,input:e})});if(!s.ok){const e=await s.text().catch(()=>"(no body)");throw new Error(`Embedding API ${s.status}: ${e.slice(0,300)}`)}const i=await s.json();if(Array.isArray(i.embeddings))return i.embeddings;throw new Error(`Unknown embedding response format. Keys: ${Object.keys(i).join(", ")}`)}async embedText(e){try{const t=this.applyPrefix(this.opts.prefixQuery,e),n=this.opts.prefixQuery.trim();if(n&&g.debug(`Using prefixQuery (template: ${n}) → result sample: [${t.slice(0,80)}]`),this.openai){const e=await this.openai.embeddings.create({model:this.opts.embeddingModel,input:t.slice(0,8e3),dimensions:this.opts.embeddingDimensions});return new Float32Array(e.data[0].embedding)}const s=await this.fetchEmbeddings([t.slice(0,8e3)]);return new Float32Array(s[0])}catch(e){return g.error(`Failed to embed query: ${e}`),null}}}function x(e,t,n){const s=[],i=e.split(/^### /m);for(const e of i){if(!e.trim())continue;const t=e.match(/^(user|assistant)\s*\(([^)]+)\)\s*\n/),i=t?t[1]:"",r=t?t[2]:"",h=t?e.slice(t[0].length).trim():e.trim();if(!h)continue;const a=1500,o=100;if(h.length<=a)s.push({role:i,timestamp:r,sessionKey:n,content:h});else{let e=0;for(;e<h.length;){const t=Math.min(e+a,h.length),d=h.slice(e,t);if(s.push({role:i,timestamp:r,sessionKey:n,content:d}),e=t-o,e+o>=h.length)break}}}return s}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hera-al/server",
|
|
3
|
-
"version": "1.6.
|
|
3
|
+
"version": "1.6.2",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Hera Artificial Life — Multi-channel AI agent gateway with autonomous capabilities",
|
|
6
6
|
"license": "MIT",
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
"@anthropic-ai/claude-agent-sdk": "latest",
|
|
66
66
|
"@clack/prompts": "^1.0.0",
|
|
67
67
|
"@grammyjs/runner": "^2.0.3",
|
|
68
|
-
"@hera-al/browser-server": "^1.0.
|
|
68
|
+
"@hera-al/browser-server": "^1.0.5",
|
|
69
69
|
"@hono/node-server": "^1.13.8",
|
|
70
70
|
"@types/markdown-it": "^14.1.2",
|
|
71
71
|
"@whiskeysockets/baileys": "^7.0.0-rc.9",
|