@tichopad/notes-query-tool 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -1,26 +1,1418 @@
1
1
  #!/usr/bin/env node
2
- import{defineCommand as rn,runMain as on}from"citty";var q={name:"@tichopad/notes-query-tool",type:"module",version:"0.1.0",description:"CLI tool for indexing and querying Markdown notes",engines:{node:">=24"},packageManager:"pnpm@10.32.1",main:"dist/main.js",bin:{nqt:"dist/main.js"},files:["dist"],scripts:{"db:delete-stale-lock":"rm ./dbdata/postmaster.pid","db:generate":"drizzle-kit generate","db:query":"node scripts/query.ts","db:push":"drizzle-kit push","db:studio":"drizzle-kit studio","benchdata:load":"pnpm run dev load --glob 'benchdata/**/*.md'","benchdata:reindex":"rm -rf dbdata && pnpm run benchdata:load","testdata:load":"pnpm run dev load --glob 'testdata/**/*.md'","testdata:reindex":"rm -rf dbdata && pnpm run testdata:load",dev:"node src/main.ts",build:"node scripts/build.ts",test:"node --test --test-reporter=spec 'src/**/*.test.ts'",bench:"node --test --test-reporter=spec 'bench/**/*.test.ts'",check:"biome check . && tsc --noEmit && pnpm run test",fix:"biome check --write ."},dependencies:{"@electric-sql/pglite":"0.4.4","@huggingface/transformers":"4.1.0",citty:"0.2.2",consola:"3.4.2","drizzle-orm":"0.45.2",marked:"18.0.1",yaml:"2.8.3"},devDependencies:{"@biomejs/biome":"2.4.12","@types/node":"24.12.2","drizzle-kit":"0.31.10",esbuild:"0.28.0",typescript:"6.0.2"},pnpm:{onlyBuiltDependencies:["esbuild","onnxruntime-node","protobufjs","sharp"],patchedDependencies:{"drizzle-kit@0.31.10":"patches/drizzle-kit@0.31.10.patch"}}};import{createInterface as it}from"node:readline";import{defineCommand as at}from"citty";import{eq as pe}from"drizzle-orm";import{PGlite as Xe}from"@electric-sql/pglite";import{pg_trgm as Je}from"@electric-sql/pglite/contrib/pg_trgm";import{unaccent as Ye}from"@electric-sql/pglite/contrib/unaccent";import{vector as Ze}from"@electric-sql/pglite/vector";import{drizzle as et}from"drizzle-orm/pglite";import{homedir as Ke}from"node:os";import{join as J}from"node:path";var Y="onnx-community/embeddinggemma-300m-ONNX",Z="fp32",ee=768;function Ve(){let e=process.env.XDG_DATA_HOME,t=e&&e.length>0?e:J(Ke(),".local","share");return`${J(t,"nqt")}/`}var te=Ve(),ne=2e3,re=30,oe=20,se=20,ie=.3,ae=.4,ue=.3,ce=.3,le=.2,de=.4,me=10;var tt={unaccent:Ye,vector:Ze,pg_trgm:Je};function nt(e=te){let t=new Xe({dataDir:e,extensions:tt});return et({client:t})}var z;function x(){return z||(z=nt()),z}import{sql as fe}from"drizzle-orm";import{integer as rt,pgTable as ot,text as st,timestamp as ge}from"drizzle-orm/pg-core";var k=ot("bases",{id:rt("id").primaryKey().generatedAlwaysAsIdentity(),name:st("name").notNull().unique(),createdAt:ge("created_at").notNull().default(fe`now()`),updatedAt:ge("updated_at").notNull().default(fe`now()`)});var T=class{db;constructor(t){this.db=t??x()}async getBaseByName(t){let[r]=await this.db.select().from(k).where(pe(k.name,t)).limit(1);return r}async getOrCreateBase(t){let r=await this.getBaseByName(t);if(r)return r;let[n]=await this.db.insert(k).values({name:t}).returning();if(!n)throw new Error(`Failed to create base: ${t}`);return n}async deleteBase(t){await this.db.delete(k).where(pe(k.name,t))}};var be=at({meta:{name:"drop",description:"Drop a knowledge base and all its indexed data"},args:{base:{type:"string",description:"Knowledge base name to drop",default:"default"},force:{type:"boolean",description:"Skip confirmation prompt",default:!1}},async run({args:e}){let t=new T;if(await t.getBaseByName(e.base)||(console.error(`Error: Base '${e.base}' not found.`),process.exit(1)),!e.force&&!await ut(`Are you sure you want to drop base '${e.base}'? [y/N] `)){console.log("Aborted.");return}await t.deleteBase(e.base),console.log(`Base '${e.base}' dropped.`)}});function ut(e){return new Promise(t=>{let r=it({input:process.stdin,output:process.stdout});r.question(e,n=>{r.close(),t(n.toLowerCase()==="y"||n.toLowerCase()==="yes")})})}import{createHash as Qt}from"node:crypto";import{readFile as Wt}from"node:fs/promises";import{defineCommand as jt}from"citty";import{pipeline as dt}from"@huggingface/transformers";import{createConsola as ct}from"consola";function lt(){let e=process.env.NQT_LOG_LEVEL;if(e!==void 0){let t=Number(e);if(Number.isInteger(t)&&t>=0&&t<=5)return t}return 3}var R=ct({level:lt()});function he(e){R.level=e}var m={info(e,...t){R.info(e,...t)},warn(e,...t){R.warn(e,...t)},error(e,...t){R.error(e,...t)},debug(e,...t){R.debug(e,...t)},trace(e,...t){R.trace(e,...t)}};async function G(e){return await dt("feature-extraction",Y,{device:e,dtype:Z})}function mt(e){let t=Array.from(e);return t.some(r=>!Number.isFinite(r))?[]:t}var ft="task: search result | query: ",gt="title: ",pt=" | text: ",bt="none";async function L(){let e,t="webgpu";try{e=await G("webgpu"),m.debug("Embedder loaded on WebGPU")}catch{m.warn("WebGPU unavailable, using CPU."),t="cpu",e=await G("cpu"),m.debug("Embedder loaded on CPU")}async function r(n){let o=await e(n,{pooling:"mean",normalize:!0}),s=mt(o.data);if(s.length>0)return s;if(t==="webgpu")return m.warn("WebGPU produced invalid embeddings, falling back to CPU..."),t="cpu",e=await G("cpu"),m.debug("Embedder loaded on CPU"),r(n);throw new Error("Embedding model produced non-finite values")}return{embedQuery(n){return r(ft+n)},embedDocument(n,o){let s=o?.trim()||bt;return r(gt+s+pt+n)}}}import{marked as ht}from"marked";var ye=6;function Ie(e,t){if(t<=0)throw new Error("limit must be > 0");if(e.length===0)return[];if(e.trim().length===0)return[];let r=yt(e);if(r.length===0)return[];let n=xt(r),o=Ee(n,[],t);return Rt(o,t)}function yt(e){let t=ht.lexer(e),r=[],n=0;for(let o of t){let s=o.raw??"",a=n,u=n+s.length;r.push({token:o,start:a,end:u,raw:s}),n=u}return r}function xt(e){return{heading:null,level:0,body:e,start:e[0]?.start??0,end:e[e.length-1]?.end??0}}function wt(e){let t=ye+1;for(let o of e)if(we(o.token)){let s=o.token.depth;s<t&&(t=s)}if(t>ye)return null;let r=[],n=null;for(let o of e)we(o.token)&&o.token.depth===t?(n&&r.push(xe(n)),n={heading:o,level:t,body:[],start:o.start,end:o.end}):(n||(n={heading:null,level:0,body:[],start:o.start,end:o.end}),n.body.push(o),n.end=o.end);return n&&r.push(xe(n)),r}function xe(e){let t=e.body[e.body.length-1];return t?e.end=t.end:e.heading&&(e.end=e.heading.end),e}function we(e){return e.type==="heading"}function Ee(e,t,r){let n=e.heading?$t(e.heading):null,o=n?[...t,n]:t,s=e.body[0]?.start??e.heading?.end??e.start,a=e.body[e.body.length-1]?.end??e.heading?.end??e.end,u=Et(e.body),i=e.heading?`${n}
3
-
4
- `:"",d=N(i+u),c=W(t,d,r);if(c!==null&&c.text.length<=r&&d.length>0)return[{text:c.text,breadcrumb:c.breadcrumb,startOffset:e.heading?e.heading.start:s,endOffset:a}];let l=wt(e.body);if(l&&l.length>0&&l.some(b=>b.heading!==null)){let b=[];for(let p of l)p.heading===null?p.body.some(h=>h.raw.trim().length>0)&&b.push(...ke(p.body,o,r)):b.push(...Ee(p,o,r));return b}return ke(e.body,o,r)}function ke(e,t,r){if(e.length===0)return[];let n=[];for(let o of e){let s=N(o.raw);if(s.trim().length===0)continue;let a=W(t,s,r);a!==null&&a.text.length<=r?n.push({text:a.text,breadcrumb:a.breadcrumb,startOffset:o.start,endOffset:o.end}):n.push(...kt(o,t,r))}return n}function kt(e,t,r){let n=N(e.raw),o=Tt(n);return o.length>1?Pt(o,e.start,t,r,s=>Te(s,r,Q(t,r))):It(Te(n,r,Q(t,r)),e.start,t,r)}function Tt(e){let t=[],r=/[^.!?]+[.!?]+(?:\s+|$)/g,n=0,o;for(;(o=r.exec(e))!==null;)t.push({text:o[0],offset:o.index}),n=o.index+o[0].length;return n<e.length&&t.push({text:e.slice(n),offset:n}),t.length===0?[{text:e,offset:0}]:t}function Te(e,t,r){let n=Math.max(1,t-r),o=[],s=/\S+\s*/g,a=[],u;for(;(u=s.exec(e))!==null;)a.push({text:u[0],offset:u.index});let i=a[0];if(!i)return Pe(e,n,0);let d="",c=i.offset;for(let l of a){if(l.text.length>n){d.length>0&&(o.push({text:d,offset:c}),d=""),o.push(...Pe(l.text,n,l.offset)),c=l.offset+l.text.length;continue}d.length+l.text.length>n?(o.push({text:d,offset:c}),d=l.text,c=l.offset):(d.length===0&&(c=l.offset),d+=l.text)}return d.length>0&&o.push({text:d,offset:c}),o}function Pe(e,t,r){let n=[];for(let o=0;o<e.length;o+=t)n.push({text:e.slice(o,o+t),offset:r+o});return n}function Pt(e,t,r,n,o){let s=Q(r,n),a=Math.max(1,n-s),u=[],i="",d=e[0]?.offset??0;for(let c of e){if(c.text.length>a){i.length>0&&(u.push(F(i,t+d,r,n)),i="");let l=o(c.text);for(let b of l)u.push(F(b.text,t+c.offset+b.offset,r,n));continue}i.length+c.text.length>a?(u.push(F(i,t+d,r,n)),i=c.text,d=c.offset):(i.length===0&&(d=c.offset),i+=c.text)}return i.length>0&&u.push(F(i,t+d,r,n)),u}function It(e,t,r,n){let o=[];for(let s of e)o.push(F(s.text,t+s.offset,r,n));return o}function F(e,t,r,n){let o=N(e),s=W(r,o,n);if(!s)throw new Error(`emit: assemble returned null for body length ${o.length}, limit ${n}`);let{text:a,breadcrumb:u}=s;if(a.length>n)throw new Error(`emit: assembled text length ${a.length} exceeds limit ${n} \u2014 body was not pre-split correctly`);return{text:a,breadcrumb:u,startOffset:t,endOffset:t+e.length}}function W(e,t,r){let n=[...e];for(;;){if(t.length<=r)return{text:t,breadcrumb:n};if(n.length===0)return null;n=n.slice(1)}}function Q(e,t){if(e.length===0)return 0;let r=`${e.join(`
5
- `)}
6
-
7
- `;return r.length>=t?0:r.length}function Et(e){let t="";for(let r of e)t+=r.raw;return t}function $t(e){return e.raw.replace(/\n+$/,"")}function N(e){return e.replace(/\n+$/,"")}function Rt(e,t){if(e.length<=1)return e;let r=[],n=e[0];if(!n)return e;let o=n;for(let s=1;s<e.length;s++){let a=e[s];if(!a)continue;if(!St(o.breadcrumb,a.breadcrumb)){r.push(o),o=a;continue}let u=`${o.text}
8
-
9
- ${a.text}`;(o.breadcrumb.length>0?`${o.breadcrumb.join(`
10
- `)}
11
-
12
- `:"").length+u.length<=t?o={text:u,breadcrumb:o.breadcrumb,startOffset:o.startOffset,endOffset:a.endOffset}:(r.push(o),o=a)}return r.push(o),r}function St(e,t){if(e.length!==t.length)return!1;for(let r=0;r<e.length;r++)if(e[r]!==t[r])return!1;return!0}import{glob as Dt}from"node:fs/promises";import{basename as _t}from"node:path";async function*$e(e){for await(let t of Dt(e,{withFileTypes:!0,exclude:r=>{let n=_t(r.name);return n!=="."&&n.startsWith(".")}}))t.isFile()&&(yield`${t.parentPath}/${t.name}`)}import{and as Ot,count as Ae,eq as B,sql as U}from"drizzle-orm";import{sql as Ce}from"drizzle-orm";import{customType as At,index as Mt,integer as j,pgTable as Lt,text as ve,timestamp as Fe,vector as Nt}from"drizzle-orm/pg-core";import{sql as Re}from"drizzle-orm";import{integer as Se,jsonb as Ct,pgTable as vt,text as De,timestamp as _e,unique as Ft}from"drizzle-orm/pg-core";var g=vt("files",{id:Se("id").primaryKey().generatedAlwaysAsIdentity(),baseId:Se("base_id").notNull().references(()=>k.id,{onDelete:"cascade"}),filePath:De("file_path").notNull(),contentHash:De("content_hash").notNull(),attributes:Ct("attributes").$type(),createdAt:_e("created_at").notNull().default(Re`now()`),updatedAt:_e("updated_at").notNull().default(Re`now()`)},e=>[Ft().on(e.baseId,e.filePath)]);var Bt=At({dataType(){return"tsvector"}}),f=Lt("chunks",{id:j("id").primaryKey().generatedAlwaysAsIdentity(),fileId:j("file_id").notNull().references(()=>g.id,{onDelete:"cascade"}),chunkIndex:j("chunk_index").notNull(),content:ve("content").notNull(),breadcrumbs:ve("breadcrumbs").array().notNull(),embedding:Nt("embedding",{dimensions:ee}),fts:Bt("fts"),createdAt:Fe("created_at").notNull().default(Ce`now()`),updatedAt:Fe("updated_at").notNull().default(Ce`now()`)},e=>[Mt("chunks_fts_idx").using("gin",e.fts)]);var O=class{db;constructor(t){this.db=t??x()}async getFileProcessingState(t,r){let[n]=await this.db.select({id:g.id,contentHash:g.contentHash}).from(g).where(Ot(B(g.filePath,t),B(g.baseId,r))).limit(1);if(!n)return null;let[o]=await this.db.select({total:Ae(),withEmbedding:Ae(U`CASE WHEN ${f.embedding} IS NOT NULL THEN 1 END`)}).from(f).where(B(f.fileId,n.id)),s=o?.total??0,a=o?.withEmbedding??0,u=s>0&&a===s;return{fileId:n.id,contentHash:n.contentHash,hasStoredChunksWithEmbeddings:u}}async upsertFile(t,r,n,o,s){let a={};n!==null&&(a.title=n);let[u]=await this.db.insert(g).values({filePath:t,contentHash:r,attributes:a,baseId:s}).onConflictDoUpdate({target:[g.baseId,g.filePath],set:{contentHash:r,attributes:a,updatedAt:U`now()`}}).returning({id:g.id});if(!u)throw new Error(`Failed to upsert file: ${t}`);return{id:u.id}}async replaceFileChunks(t,r){await this.db.transaction(async n=>{if(await n.delete(f).where(B(f.fileId,t)),r.length>0){let o=r.map(s=>({fileId:t,chunkIndex:s.chunkIndex,content:s.content,breadcrumbs:s.breadcrumbs,embedding:s.embedding,fts:U`to_tsvector('simple', unaccent(${s.content}))`}));await n.insert(f).values(o)}})}};import K from"node:path";import{parse as Ht}from"yaml";var qt=/^---\r?\n([\s\S]*?)\r?\n---\r?\n?/;function Me(e){let t=e.match(qt);if(!t)return{attributes:null,body:e};try{let r=Ht(t[1]);return{attributes:r&&typeof r=="object"&&!Array.isArray(r)?r:null,body:e.slice(t[0].length)}}catch{return{attributes:null,body:e}}}function zt(e){if(typeof e!="string")return null;let t=e.trim();return t||null}function Le(e){return Array.isArray(e)?e.flatMap(t=>{if(typeof t!="string")return[];let r=t.trim();return r?[r]:[]}):[]}function Gt(e){return e?{title:zt(e.title),aliases:Le(e.aliases),tags:Le(e.tags)}:null}function Ne(e,t,r){let n=[`File: ${e}`,`Path: ${t}`],o=Gt(r);o&&(o.title&&n.push(`Title: ${o.title}`),o.aliases.length>0&&n.push(`Aliases: ${o.aliases.join(", ")}`),o.tags.length>0&&n.push(`Tags: ${o.tags.join(", ")}`));let s=n.join(`
13
- `),a=[e];o&&(o.title&&o.title!==e&&a.push(o.title),o.aliases.length>0&&a.push(`aliases: ${o.aliases.join(", ")}`),o.tags.length>0&&a.push(`tags: ${o.tags.join(", ")}`));let u=a.join("; ");return{headerPrefix:s,titleString:u}}function Be(e,t){return t===null?{action:"process"}:t.contentHash!==e?{action:"process"}:t.hasStoredChunksWithEmbeddings?{action:"skip"}:{action:"process"}}async function Oe(e,t){let{repo:r,baseId:n,readText:o,hashContent:s,chunkMarkdown:a,embedDocument:u}=t,i=await o(e),d=s(i),c=await r.getFileProcessingState(e,n),l=Be(d,c);if(m.debug(`[${e}] decision: ${l.action}`),l.action==="skip")return m.info(`${e} -> skipped (unchanged)`),{status:"skipped",chunkCount:0};let{attributes:b,body:p}=Me(i),h=K.basename(e,".md"),y=K.basename(K.dirname(e)),{headerPrefix:$,titleString:S}=Ne(h,y,b),P=a(p||i,ne);m.debug(`[${e}] produced ${P.length} chunks`);let{id:A}=await r.upsertFile(e,d,null,new Date,n),M=await Promise.all(P.map(async(D,_)=>{let C=`${$}
14
-
15
- ${D.text}`,v=D.text.trim(),I=v?await u(v,S):await u(C,S);return m.trace(`[${e}] chunk ${_} embedded (${I.length}d)`),{content:C,embedding:I,chunkIndex:_,breadcrumbs:D.breadcrumb}}));return await r.replaceFileChunks(A,M),m.debug(`[${e}] chunks written to DB`),m.info(`${e} \u2192 ${P.length} chunks`),{status:"processed",chunkCount:P.length}}var He=jt({meta:{name:"load",description:"Load notes files"},args:{glob:{type:"string",description:"Files glob (e.g. 'notes/**/*.md')",required:!0},base:{type:"string",description:"Knowledge base name to use",default:"default"}},async run({args:e}){let t=performance.now();m.debug("Starting load...");let r=0,n=0,o=0,s=0,a=new O,i=await new T().getOrCreateBase(e.base);m.debug(`Using base: ${i.name} (id=${i.id})`);let d=null,c=async(p,h)=>(d||(d=await L(),m.debug("Embedder initialised")),d.embedDocument(p,h)),l=[],b=[];for await(let p of $e(e.glob)){m.debug(`Processing file: ${p}`);let h=await Oe(p,{repo:a,baseId:i.id,readText:y=>Wt(y,"utf8"),hashContent:y=>Qt("sha256").update(y).digest("hex"),chunkMarkdown:Ie,embedDocument:c});l.push(p),b.push(h)}r=l.length;for(let p of b)p.status==="skipped"?n++:(o++,s+=p.chunkCount);console.log(`Done. ${r} files seen, ${o} processed, ${n} skipped, ${s} chunks total.`),console.log(`Time taken: ${((performance.now()-t)/1e3).toFixed(2)}s`)}});import{defineCommand as Jt}from"citty";import{and as V,cosineDistance as Vt,desc as X,eq as E,gt as Xt,sql as w}from"drizzle-orm";import Ut from"node:path";function qe(e,t,r,n){m.trace(`Fusing scores \u2014 vector: ${e.length}, fts: ${t.length}, trigram: ${r.length}`);let o=Math.max(...e.map(i=>i.similarity),1e-9),s=Math.max(...t.map(i=>i.rank),1e-9),a=Math.max(...r.map(i=>i.score),1e-9),u=new Map;for(let i of e)u.set(i.id,{id:i.id,filePath:i.filePath,chunkIndex:i.chunkIndex,breadcrumbs:i.breadcrumbs,content:i.content,score:i.similarity/o*n.vector});for(let i of t){let d=i.rank/s*n.fts,c=u.get(i.id);c?c.score+=d:u.set(i.id,{id:i.id,filePath:i.filePath,chunkIndex:i.chunkIndex,breadcrumbs:i.breadcrumbs,content:i.content,score:d})}for(let i of r){let d=i.score/a*n.trigram,c=u.get(i.id);c?c.score+=d:u.set(i.id,{id:i.id,filePath:i.filePath,chunkIndex:i.chunkIndex,breadcrumbs:i.breadcrumbs,content:i.content,score:d})}return u}function Kt(e){let t=/\[\[([^\]|#]+)(?:[|#][^\]]*)?\]\]/g,r=new Set,n=t.exec(e);for(;n!==null;)n[1]!==void 0&&r.add(n[1].trim()),n=t.exec(e);return[...r]}function ze(e,t,r=me,n=le,o=de){let s=new Map;for(let c of t){let l=Ut.basename(c,".md");s.has(l)||s.set(l,new Set),s.get(l)?.add(c)}let a=new Set([...e.values()].map(c=>c.filePath)),u=[...e.values()].sort((c,l)=>l.score-c.score).slice(0,r),i=new Map;for(let c of u){let l=Kt(c.content);for(let b of l){let p=s.get(b);if(p)for(let h of p){if(h===c.filePath||!a.has(h))continue;let y=i.get(h)??0,$=Math.min(y+n*c.score,o);i.set(h,$),m.trace(`Boosting ${h} by ${($-y).toFixed(4)}`)}}}let d=new Map;for(let[c,l]of e){let b=i.get(l.filePath)??0;d.set(c,b>0?{...l,score:l.score+b}:l)}return d}function Ge(e,t){let r=new Map;for(let n of e.values()){let o=r.get(n.filePath);!o||n.score>o.result.score?r.set(n.filePath,{result:n,extraChunks:o?o.extraChunks+1:0}):o.extraChunks++}return[...r.values()].map(({result:n})=>n).sort((n,o)=>o.score-n.score).slice(0,t)}async function Qe(e){let{vectorText:t,queryText:r,trigramText:n,embedQuery:o,db:s=x(),baseId:a,weights:u={vector:ie,fts:ae,trigram:ue},limits:i={vector:re,fts:oe,trigram:se},trigramThreshold:d=ce,trigramMode:c="strict",topK:l=10}=e,b=await o(t),p=n??r;m.debug(`Executing query \u2014 vector: "${t}", fulltext: "${r}"`);let h=w`1 - (${Vt(f.embedding,b)})`,y=c==="strict"?"strict_word_similarity":"word_similarity",$=c==="strict"?w.raw("<<%"):w.raw("<%"),S=w`${w.raw(y)}(${p}, ${f.content})`,[P,A,M]=await Promise.all([s.select({id:f.id,filePath:g.filePath,chunkIndex:f.chunkIndex,breadcrumbs:f.breadcrumbs,content:f.content,similarity:h}).from(f).innerJoin(g,E(f.fileId,g.id)).where(V(Xt(h,0),E(g.baseId,a))).orderBy(X(h)).limit(i.vector),s.select({id:f.id,filePath:g.filePath,chunkIndex:f.chunkIndex,breadcrumbs:f.breadcrumbs,content:f.content,rank:w`ts_rank(${f.fts}, websearch_to_tsquery('simple', unaccent(${r})))`}).from(f).innerJoin(g,E(f.fileId,g.id)).where(V(w`${f.fts} @@ websearch_to_tsquery('simple', unaccent(${r}))`,E(g.baseId,a))).orderBy(X(w`ts_rank(${f.fts}, websearch_to_tsquery('simple', unaccent(${r})))`)).limit(i.fts),s.transaction(async I=>(await I.execute(w`SELECT set_limit(${d})`),I.select({id:f.id,filePath:g.filePath,chunkIndex:f.chunkIndex,breadcrumbs:f.breadcrumbs,content:f.content,score:S}).from(f).innerJoin(g,E(f.fileId,g.id)).where(V(w`${p} ${$} ${f.content}`,E(g.baseId,a))).orderBy(X(S)).limit(i.trigram)))]);m.debug(`Vector: ${P.length} hits, FTS: ${A.length} hits, Trigram: ${M.length} hits`);let D=await s.select({filePath:g.filePath}).from(g).where(E(g.baseId,a)),_=qe(P,A,M,u);m.debug(`After fusion: ${_.size} unique chunks`);let C=ze(_,D.map(I=>I.filePath));m.debug(`After wikilink rerank: ${C.size} chunks`);let v=Ge(C,l);return m.debug(`Returning ${v.length} results`),v}var We=Jt({meta:{name:"query",description:"Search notes by semantic query"},args:{vector:{type:"string",alias:"v",description:"Semantic query for vector search",required:!0},fulltext:{type:"string",alias:"f",description:'Keyword query for full-text search (supports PostgreSQL websearch syntax: OR, -word, "phrases")',required:!0},trigram:{type:"string",alias:"g",description:"Plain-text keyword for trigram search (defaults to --fulltext)",required:!1},trigramMode:{type:"string",alias:"t",description:"Trigram operator: 'strict' (strict_word_similarity, <<%) or 'word' (word_similarity, <%)",default:"strict"},base:{type:"string",description:"Knowledge base name to use",default:"default"}},async run({args:e}){let t=e.trigramMode;if(t!=="strict"&&t!=="word")throw new Error(`Invalid --trigram-mode "${t}". Must be "strict" or "word".`);let n=await new T().getBaseByName(e.base);n||(m.error(`Base '${e.base}' does not exist.`),process.exit(1));let o=await L(),s=await Qe({vectorText:e.vector,queryText:e.fulltext,trigramText:e.trigram,embedQuery:o.embedQuery.bind(o),trigramMode:t,baseId:n.id});if(s.length===0){m.info("No matching chunks found.");return}for(let a of s){let u="";u+=`<file path="${a.filePath}">
16
- `,u+=`<meta>
17
- `,u+=`Chunk index: ${a.chunkIndex}
18
- `,u+=`Score: ${Number(a.score).toFixed(3)}
19
- `,a.breadcrumbs.length>0&&(u+=`Breadcrumbs: ${a.breadcrumbs.join(" > ")}
20
- `),u+=`</meta>
21
- `,u+=`<content>
22
- `,u+=`${a.content}
23
- `,u+=`</content>
24
- `,u+=`</file>
25
- `,console.log(u)}}});import{join as Yt}from"node:path";import{fileURLToPath as Zt}from"node:url";import{migrate as en}from"drizzle-orm/pglite/migrator";var tn="./drizzle",nn=Yt(Zt(new URL(".",import.meta.url)),tn);async function je(e){try{m.debug("Running DB migrations..."),await en(e,{migrationsFolder:nn}),m.debug("Migrations complete")}catch(t){throw new Error(`Migration failed: ${t instanceof Error?t.message:String(t)}`,{cause:t})}}var sn=rn({meta:{name:"notes-query-tool",version:q.version,description:q.description,alias:"nqt"},args:{verbose:{type:"boolean",description:"Enable verbose logging (sets log level to max)",default:!1},base:{type:"string",description:"Knowledge base name to use",default:"default"}},subCommands:{load:He,query:We,drop:be},async setup({args:e}){e.verbose&&he(999);let t=x();await t.$client.waitReady,await je(t)},async cleanup(){await x().$client.close()}});await on(sn).catch(e=>{H(`Error: ${e}`,1)});process.on("unhandledRejection",e=>{H(`Unhandled Rejection: ${e}`,1)});process.on("uncaughtException",e=>{H(`Uncaught Exception: ${e}`,1)});process.on("SIGINT",()=>{H("Received SIGINT, shutting down...",0)});function H(e,t){m.error(e),x().$client.close().finally(()=>process.exit(t))}
2
+
3
+ // src/main.ts
4
+ import { defineCommand as defineCommand4, runMain } from "citty";
5
+
6
+ // package.json
7
+ var package_default = {
8
+ name: "@tichopad/notes-query-tool",
9
+ type: "module",
10
+ version: "0.1.2",
11
+ description: "CLI tool for indexing and querying Markdown notes",
12
+ license: "Unlicense",
13
+ repository: {
14
+ url: "https://github.com/tichopad/notes-query-tool"
15
+ },
16
+ engines: {
17
+ node: ">=24"
18
+ },
19
+ packageManager: "pnpm@11.5.1",
20
+ main: "dist/main.js",
21
+ bin: {
22
+ nqt: "dist/main.js"
23
+ },
24
+ files: [
25
+ "dist"
26
+ ],
27
+ scripts: {
28
+ "db:delete-stale-lock": "rm ./dbdata/postmaster.pid",
29
+ "db:generate": "drizzle-kit generate",
30
+ "db:query": "node scripts/query.ts",
31
+ "db:push": "drizzle-kit push",
32
+ "db:studio": "drizzle-kit studio",
33
+ "benchdata:load": "pnpm run dev load --glob 'benchdata/**/*.md'",
34
+ "benchdata:reindex": "rm -rf dbdata && pnpm run benchdata:load",
35
+ "testdata:load": "pnpm run dev load --glob 'testdata/**/*.md'",
36
+ "testdata:reindex": "rm -rf dbdata && pnpm run testdata:load",
37
+ dev: "node src/main.ts",
38
+ build: "node scripts/build.ts",
39
+ test: "node --test --test-reporter=spec 'src/**/*.test.ts'",
40
+ bench: "node bench/retrieval.ts",
41
+ check: "biome check . && tsc --noEmit && pnpm run test",
42
+ fix: "biome check --write ."
43
+ },
44
+ dependencies: {
45
+ "@electric-sql/pglite": "0.5.1",
46
+ "@electric-sql/pglite-pgvector": "0.0.2",
47
+ "@huggingface/transformers": "4.2.0",
48
+ citty: "0.2.2",
49
+ consola: "3.4.2",
50
+ "drizzle-orm": "0.45.2",
51
+ marked: "18.0.4",
52
+ yaml: "2.9.0"
53
+ },
54
+ devDependencies: {
55
+ "@biomejs/biome": "2.4.16",
56
+ "@types/node": "24.12.4",
57
+ "drizzle-kit": "0.31.10",
58
+ esbuild: "0.28.0",
59
+ typescript: "6.0.3"
60
+ }
61
+ };
62
+
63
+ // src/commands/drop.ts
64
+ import { createInterface } from "node:readline";
65
+ import { defineCommand } from "citty";
66
+
67
+ // src/database/base-repository.ts
68
+ import { eq } from "drizzle-orm";
69
+
70
+ // src/database/client.ts
71
+ import { PGlite } from "@electric-sql/pglite";
72
+ import { pg_trgm } from "@electric-sql/pglite/contrib/pg_trgm";
73
+ import { unaccent } from "@electric-sql/pglite/contrib/unaccent";
74
+ import { vector } from "@electric-sql/pglite-pgvector";
75
+ import { drizzle } from "drizzle-orm/pglite";
76
+
77
+ // src/config.ts
78
+ import { homedir } from "node:os";
79
+ import { join } from "node:path";
80
+ var MODEL_ID = "onnx-community/embeddinggemma-300m-ONNX";
81
+ var MODEL_DTYPE = "fp32";
82
+ var EMBEDDING_DIMS = 768;
83
+ function getDataDir() {
84
+ const xdg = process.env.XDG_DATA_HOME;
85
+ const base = xdg && xdg.length > 0 ? xdg : join(homedir(), ".local", "share");
86
+ return `${join(base, "nqt")}/`;
87
+ }
88
+ var DB_DATA_DIR = getDataDir();
89
+ var CHUNK_LIMIT_CHARS = 2e3;
90
+ var VECTOR_LIMIT = 30;
91
+ var FTS_LIMIT = 20;
92
+ var TRIGRAM_LIMIT = 20;
93
+ var VECTOR_WEIGHT = 0.3;
94
+ var FTS_WEIGHT = 0.4;
95
+ var TRIGRAM_WEIGHT = 0.3;
96
+ var TRIGRAM_THRESHOLD = 0.3;
97
+ var LINK_BOOST = 0.2;
98
+ var LINK_BOOST_CAP = 0.4;
99
+ var LINK_SOURCE_TOP_N = 10;
100
+
101
+ // src/database/client.ts
102
+ var DB_EXTENSIONS = {
103
+ unaccent,
104
+ vector,
105
+ pg_trgm
106
+ };
107
+ function createDbClient(dataDir = DB_DATA_DIR) {
108
+ const pglite = new PGlite({
109
+ dataDir,
110
+ extensions: DB_EXTENSIONS
111
+ });
112
+ return drizzle({ client: pglite });
113
+ }
114
+ var _db;
115
+ function getDb() {
116
+ if (!_db) {
117
+ _db = createDbClient();
118
+ }
119
+ return _db;
120
+ }
121
+
122
+ // src/database/schema/bases.ts
123
+ import { sql } from "drizzle-orm";
124
+ import { integer, pgTable, text, timestamp } from "drizzle-orm/pg-core";
125
+ var basesTable = pgTable("bases", {
126
+ id: integer("id").primaryKey().generatedAlwaysAsIdentity(),
127
+ name: text("name").notNull().unique(),
128
+ createdAt: timestamp("created_at").notNull().default(sql`now()`),
129
+ updatedAt: timestamp("updated_at").notNull().default(sql`now()`)
130
+ });
131
+
132
+ // src/database/base-repository.ts
133
+ var DbBaseRepository = class {
134
+ db;
135
+ constructor(db) {
136
+ this.db = db ?? getDb();
137
+ }
138
+ async getBaseByName(name) {
139
+ const [base] = await this.db.select().from(basesTable).where(eq(basesTable.name, name)).limit(1);
140
+ return base;
141
+ }
142
+ async getOrCreateBase(name) {
143
+ const existing = await this.getBaseByName(name);
144
+ if (existing) {
145
+ return existing;
146
+ }
147
+ const [created] = await this.db.insert(basesTable).values({ name }).returning();
148
+ if (!created) {
149
+ throw new Error(`Failed to create base: ${name}`);
150
+ }
151
+ return created;
152
+ }
153
+ async deleteBase(name) {
154
+ await this.db.delete(basesTable).where(eq(basesTable.name, name));
155
+ }
156
+ };
157
+
158
+ // src/commands/drop.ts
159
+ var dropCommand = defineCommand({
160
+ meta: {
161
+ name: "drop",
162
+ description: "Drop a knowledge base and all its indexed data"
163
+ },
164
+ args: {
165
+ base: {
166
+ type: "string",
167
+ description: "Knowledge base name to drop",
168
+ default: "default"
169
+ },
170
+ force: {
171
+ type: "boolean",
172
+ description: "Skip confirmation prompt",
173
+ default: false
174
+ }
175
+ },
176
+ async run({ args }) {
177
+ const baseRepo = new DbBaseRepository();
178
+ const base = await baseRepo.getBaseByName(args.base);
179
+ if (!base) {
180
+ console.error(`Error: Base '${args.base}' not found.`);
181
+ process.exit(1);
182
+ }
183
+ if (!args.force) {
184
+ const confirmed = await confirm(
185
+ `Are you sure you want to drop base '${args.base}'? [y/N] `
186
+ );
187
+ if (!confirmed) {
188
+ console.log("Aborted.");
189
+ return;
190
+ }
191
+ }
192
+ await baseRepo.deleteBase(args.base);
193
+ console.log(`Base '${args.base}' dropped.`);
194
+ }
195
+ });
196
+ function confirm(prompt) {
197
+ return new Promise((resolve) => {
198
+ const rl = createInterface({
199
+ input: process.stdin,
200
+ output: process.stdout
201
+ });
202
+ rl.question(prompt, (answer) => {
203
+ rl.close();
204
+ resolve(answer.toLowerCase() === "y" || answer.toLowerCase() === "yes");
205
+ });
206
+ });
207
+ }
208
+
209
+ // src/commands/load.ts
210
+ import { createHash } from "node:crypto";
211
+ import { readFile } from "node:fs/promises";
212
+ import { defineCommand as defineCommand2 } from "citty";
213
+
214
+ // src/embedder.ts
215
+ import {
216
+ pipeline
217
+ } from "@huggingface/transformers";
218
+
219
+ // src/logger.ts
220
+ import { createConsola } from "consola";
221
+ function resolveLevel() {
222
+ const raw = process.env.NQT_LOG_LEVEL;
223
+ if (raw !== void 0) {
224
+ const n = Number(raw);
225
+ if (Number.isInteger(n) && n >= 0 && n <= 5) {
226
+ return n;
227
+ }
228
+ }
229
+ return 3;
230
+ }
231
+ var _consola = createConsola({ level: resolveLevel() });
232
+ function setLogLevel(level) {
233
+ _consola.level = level;
234
+ }
235
+ var logger = {
236
+ info(message, ...args) {
237
+ _consola.info(message, ...args);
238
+ },
239
+ warn(message, ...args) {
240
+ _consola.warn(message, ...args);
241
+ },
242
+ error(message, ...args) {
243
+ _consola.error(message, ...args);
244
+ },
245
+ debug(message, ...args) {
246
+ _consola.debug(message, ...args);
247
+ },
248
+ trace(message, ...args) {
249
+ _consola.trace(message, ...args);
250
+ }
251
+ };
252
+
253
+ // src/embedder.ts
254
+ async function createEmbedder(device) {
255
+ return await pipeline("feature-extraction", MODEL_ID, {
256
+ device,
257
+ dtype: MODEL_DTYPE,
258
+ progress_callback: (event) => {
259
+ const nameOrFile = event.file || event.model || event.name || "model files";
260
+ if (!process.stderr.isTTY) {
261
+ if (event.status === "init") {
262
+ logger.info(`Starting download: ${nameOrFile}`);
263
+ } else if (event.status === "done") {
264
+ logger.info(`Finished download: ${nameOrFile}`);
265
+ } else if (event.status === "ready") {
266
+ logger.info(`Model ready: ${nameOrFile}`);
267
+ }
268
+ return;
269
+ }
270
+ if (event.status === "progress") {
271
+ const pct = Math.round(event.progress || 0);
272
+ process.stderr.write(`\x1B[2K\rDownloading ${nameOrFile}: ${pct}%`);
273
+ } else if (event.status === "done") {
274
+ process.stderr.write(`\x1B[2K\rDownloaded ${nameOrFile}
275
+ `);
276
+ } else if (event.status === "ready") {
277
+ process.stderr.write(`\x1B[2K\rModel ready
278
+ `);
279
+ }
280
+ }
281
+ });
282
+ }
283
+ function extractVector(data) {
284
+ const vector3 = Array.from(data);
285
+ if (vector3.some((v) => !Number.isFinite(v))) {
286
+ return [];
287
+ }
288
+ return vector3;
289
+ }
290
+ var QUERY_PREFIX = "task: search result | query: ";
291
+ var DOC_PREFIX_PREFIX = "title: ";
292
+ var DOC_PREFIX_INFIX = " | text: ";
293
+ var DEFAULT_TITLE = "none";
294
+ async function initEmbedder() {
295
+ let embed;
296
+ let device = "webgpu";
297
+ try {
298
+ embed = await createEmbedder("webgpu");
299
+ logger.debug("Embedder loaded on WebGPU");
300
+ } catch {
301
+ logger.warn("WebGPU unavailable, using CPU.");
302
+ device = "cpu";
303
+ embed = await createEmbedder("cpu");
304
+ logger.debug("Embedder loaded on CPU");
305
+ }
306
+ async function getEmbedding(text4) {
307
+ const result = await embed(text4, {
308
+ pooling: "mean",
309
+ normalize: true
310
+ });
311
+ const vector3 = extractVector(result.data);
312
+ if (vector3.length > 0) {
313
+ return vector3;
314
+ }
315
+ if (device === "webgpu") {
316
+ logger.warn("WebGPU produced invalid embeddings, falling back to CPU...");
317
+ device = "cpu";
318
+ embed = await createEmbedder("cpu");
319
+ logger.debug("Embedder loaded on CPU");
320
+ return getEmbedding(text4);
321
+ }
322
+ throw new Error("Embedding model produced non-finite values");
323
+ }
324
+ return {
325
+ embedQuery(text4) {
326
+ return getEmbedding(QUERY_PREFIX + text4);
327
+ },
328
+ embedDocument(body, title) {
329
+ const t = title?.trim() || DEFAULT_TITLE;
330
+ return getEmbedding(DOC_PREFIX_PREFIX + t + DOC_PREFIX_INFIX + body);
331
+ },
332
+ async dispose() {
333
+ await embed.dispose();
334
+ }
335
+ };
336
+ }
337
+
338
+ // src/files/chunker.ts
339
+ import { marked } from "marked";
340
+ var MAX_HEADER_LEVEL = 6;
341
+ function chunkMarkdown(md, limit) {
342
+ if (limit <= 0) throw new Error("limit must be > 0");
343
+ if (md.length === 0) return [];
344
+ if (md.trim().length === 0) return [];
345
+ const pos = lexWithOffsets(md);
346
+ if (pos.length === 0) return [];
347
+ const root = buildRootSection(pos);
348
+ const chunks = renderSection(root, [], limit);
349
+ return greedyMerge(chunks, limit);
350
+ }
351
+ function lexWithOffsets(md) {
352
+ const tokens = marked.lexer(md);
353
+ const out = [];
354
+ let cursor = 0;
355
+ for (const t of tokens) {
356
+ const raw = t.raw ?? "";
357
+ const start = cursor;
358
+ const end = cursor + raw.length;
359
+ out.push({ token: t, start, end, raw });
360
+ cursor = end;
361
+ }
362
+ return out;
363
+ }
364
+ function buildRootSection(tokens) {
365
+ return {
366
+ heading: null,
367
+ level: 0,
368
+ body: tokens,
369
+ start: tokens[0]?.start ?? 0,
370
+ end: tokens[tokens.length - 1]?.end ?? 0
371
+ };
372
+ }
373
+ function groupByShallowestHeading(body) {
374
+ let shallowest = MAX_HEADER_LEVEL + 1;
375
+ for (const pt of body) {
376
+ if (isHeading(pt.token)) {
377
+ const lvl = pt.token.depth;
378
+ if (lvl < shallowest) shallowest = lvl;
379
+ }
380
+ }
381
+ if (shallowest > MAX_HEADER_LEVEL) return null;
382
+ const sections = [];
383
+ let current = null;
384
+ for (const pt of body) {
385
+ if (isHeading(pt.token) && pt.token.depth === shallowest) {
386
+ if (current) sections.push(finalizeSection(current));
387
+ current = {
388
+ heading: pt,
389
+ level: shallowest,
390
+ body: [],
391
+ start: pt.start,
392
+ end: pt.end
393
+ };
394
+ } else {
395
+ if (!current) {
396
+ current = {
397
+ heading: null,
398
+ level: 0,
399
+ body: [],
400
+ start: pt.start,
401
+ end: pt.end
402
+ };
403
+ }
404
+ current.body.push(pt);
405
+ current.end = pt.end;
406
+ }
407
+ }
408
+ if (current) sections.push(finalizeSection(current));
409
+ return sections;
410
+ }
411
+ function finalizeSection(s) {
412
+ const last = s.body[s.body.length - 1];
413
+ if (last) {
414
+ s.end = last.end;
415
+ } else if (s.heading) {
416
+ s.end = s.heading.end;
417
+ }
418
+ return s;
419
+ }
420
+ function isHeading(t) {
421
+ return t.type === "heading";
422
+ }
423
+ function renderSection(section, ancestors, limit) {
424
+ const headingLine = section.heading ? headingLineOf(section.heading) : null;
425
+ const childAncestors = headingLine ? [...ancestors, headingLine] : ancestors;
426
+ const bodyStart = section.body[0]?.start ?? section.heading?.end ?? section.start;
427
+ const bodyEnd = section.body[section.body.length - 1]?.end ?? section.heading?.end ?? section.end;
428
+ const bodyText = concatRaw(section.body);
429
+ const ownHeadingPrefix = section.heading ? `${headingLine}
430
+
431
+ ` : "";
432
+ const fullBody = trimTrailingNewlines(ownHeadingPrefix + bodyText);
433
+ const assembled = assemble(ancestors, fullBody, limit);
434
+ if (assembled !== null && assembled.text.length <= limit && fullBody.length > 0) {
435
+ return [
436
+ {
437
+ text: assembled.text,
438
+ breadcrumb: assembled.breadcrumb,
439
+ startOffset: section.heading ? section.heading.start : bodyStart,
440
+ endOffset: bodyEnd
441
+ }
442
+ ];
443
+ }
444
+ const subs = groupByShallowestHeading(section.body);
445
+ if (subs && subs.length > 0 && subs.some((s) => s.heading !== null)) {
446
+ const out = [];
447
+ for (const sub of subs) {
448
+ if (sub.heading === null) {
449
+ if (sub.body.some((t) => t.raw.trim().length > 0)) {
450
+ out.push(...splitBody(sub.body, childAncestors, limit));
451
+ }
452
+ } else {
453
+ out.push(...renderSection(sub, childAncestors, limit));
454
+ }
455
+ }
456
+ return out;
457
+ }
458
+ return splitBody(section.body, childAncestors, limit);
459
+ }
460
+ function splitBody(body, ancestors, limit) {
461
+ if (body.length === 0) return [];
462
+ const out = [];
463
+ for (const pt of body) {
464
+ const trimmedRaw = trimTrailingNewlines(pt.raw);
465
+ if (trimmedRaw.trim().length === 0) continue;
466
+ const assembled = assemble(ancestors, trimmedRaw, limit);
467
+ if (assembled !== null && assembled.text.length <= limit) {
468
+ out.push({
469
+ text: assembled.text,
470
+ breadcrumb: assembled.breadcrumb,
471
+ startOffset: pt.start,
472
+ endOffset: pt.end
473
+ });
474
+ } else {
475
+ out.push(...splitOversizeBlock(pt, ancestors, limit));
476
+ }
477
+ }
478
+ return out;
479
+ }
480
+ function splitOversizeBlock(pt, ancestors, limit) {
481
+ const raw = trimTrailingNewlines(pt.raw);
482
+ const sentences = splitSentences(raw);
483
+ if (sentences.length > 1) {
484
+ return assembleSlices(
485
+ sentences,
486
+ pt.start,
487
+ ancestors,
488
+ limit,
489
+ (remaining) => splitWordsThenChars(
490
+ remaining,
491
+ limit,
492
+ breadcrumbBudgetSize(ancestors, limit)
493
+ )
494
+ );
495
+ }
496
+ return assembleSlicesRaw(
497
+ splitWordsThenChars(raw, limit, breadcrumbBudgetSize(ancestors, limit)),
498
+ pt.start,
499
+ ancestors,
500
+ limit
501
+ );
502
+ }
503
+ function splitSentences(raw) {
504
+ const slices = [];
505
+ const regex = /[^.!?]+[.!?]+(?:\s+|$)/g;
506
+ let lastEnd = 0;
507
+ let m;
508
+ while ((m = regex.exec(raw)) !== null) {
509
+ slices.push({ text: m[0], offset: m.index });
510
+ lastEnd = m.index + m[0].length;
511
+ }
512
+ if (lastEnd < raw.length) {
513
+ slices.push({ text: raw.slice(lastEnd), offset: lastEnd });
514
+ }
515
+ return slices.length === 0 ? [{ text: raw, offset: 0 }] : slices;
516
+ }
517
+ function splitWordsThenChars(raw, limit, budget) {
518
+ const avail = Math.max(1, limit - budget);
519
+ const slices = [];
520
+ const wordRegex = /\S+\s*/g;
521
+ const words = [];
522
+ let m;
523
+ while ((m = wordRegex.exec(raw)) !== null) {
524
+ words.push({ text: m[0], offset: m.index });
525
+ }
526
+ const first = words[0];
527
+ if (!first) return hardSlice(raw, avail, 0);
528
+ let cur = "";
529
+ let curOffset = first.offset;
530
+ for (const w of words) {
531
+ if (w.text.length > avail) {
532
+ if (cur.length > 0) {
533
+ slices.push({ text: cur, offset: curOffset });
534
+ cur = "";
535
+ }
536
+ slices.push(...hardSlice(w.text, avail, w.offset));
537
+ curOffset = w.offset + w.text.length;
538
+ continue;
539
+ }
540
+ if (cur.length + w.text.length > avail) {
541
+ slices.push({ text: cur, offset: curOffset });
542
+ cur = w.text;
543
+ curOffset = w.offset;
544
+ } else {
545
+ if (cur.length === 0) curOffset = w.offset;
546
+ cur += w.text;
547
+ }
548
+ }
549
+ if (cur.length > 0) slices.push({ text: cur, offset: curOffset });
550
+ return slices;
551
+ }
552
+ function hardSlice(text4, size, baseOffset) {
553
+ const out = [];
554
+ for (let i = 0; i < text4.length; i += size) {
555
+ out.push({ text: text4.slice(i, i + size), offset: baseOffset + i });
556
+ }
557
+ return out;
558
+ }
559
+ function assembleSlices(slices, baseOffset, ancestors, limit, furtherSplit) {
560
+ const budget = breadcrumbBudgetSize(ancestors, limit);
561
+ const avail = Math.max(1, limit - budget);
562
+ const out = [];
563
+ let buf = "";
564
+ let bufOffset = slices[0]?.offset ?? 0;
565
+ for (const s of slices) {
566
+ if (s.text.length > avail) {
567
+ if (buf.length > 0) {
568
+ out.push(emit(buf, baseOffset + bufOffset, ancestors, limit));
569
+ buf = "";
570
+ }
571
+ const finer = furtherSplit(s.text);
572
+ for (const f of finer) {
573
+ out.push(
574
+ emit(f.text, baseOffset + s.offset + f.offset, ancestors, limit)
575
+ );
576
+ }
577
+ continue;
578
+ }
579
+ if (buf.length + s.text.length > avail) {
580
+ out.push(emit(buf, baseOffset + bufOffset, ancestors, limit));
581
+ buf = s.text;
582
+ bufOffset = s.offset;
583
+ } else {
584
+ if (buf.length === 0) bufOffset = s.offset;
585
+ buf += s.text;
586
+ }
587
+ }
588
+ if (buf.length > 0)
589
+ out.push(emit(buf, baseOffset + bufOffset, ancestors, limit));
590
+ return out;
591
+ }
592
+ function assembleSlicesRaw(slices, baseOffset, ancestors, limit) {
593
+ const out = [];
594
+ for (const s of slices) {
595
+ out.push(emit(s.text, baseOffset + s.offset, ancestors, limit));
596
+ }
597
+ return out;
598
+ }
599
+ function emit(body, startOffset, ancestors, limit) {
600
+ const trimmed = trimTrailingNewlines(body);
601
+ const a = assemble(ancestors, trimmed, limit);
602
+ if (!a)
603
+ throw new Error(
604
+ `emit: assemble returned null for body length ${trimmed.length}, limit ${limit}`
605
+ );
606
+ const { text: text4, breadcrumb } = a;
607
+ if (text4.length > limit)
608
+ throw new Error(
609
+ `emit: assembled text length ${text4.length} exceeds limit ${limit} \u2014 body was not pre-split correctly`
610
+ );
611
+ return {
612
+ text: text4,
613
+ breadcrumb,
614
+ startOffset,
615
+ endOffset: startOffset + body.length
616
+ };
617
+ }
618
+ function assemble(ancestors, body, limit) {
619
+ let a = [...ancestors];
620
+ while (true) {
621
+ if (body.length <= limit) return { text: body, breadcrumb: a };
622
+ if (a.length === 0) return null;
623
+ a = a.slice(1);
624
+ }
625
+ }
626
+ function breadcrumbBudgetSize(ancestors, limit) {
627
+ if (ancestors.length === 0) return 0;
628
+ const prefix = `${ancestors.join("\n")}
629
+
630
+ `;
631
+ if (prefix.length >= limit) {
632
+ return 0;
633
+ }
634
+ return prefix.length;
635
+ }
636
+ function concatRaw(tokens) {
637
+ let s = "";
638
+ for (const t of tokens) s += t.raw;
639
+ return s;
640
+ }
641
+ function headingLineOf(pt) {
642
+ return pt.raw.replace(/\n+$/, "");
643
+ }
644
+ function trimTrailingNewlines(s) {
645
+ return s.replace(/\n+$/, "");
646
+ }
647
+ function greedyMerge(chunks, limit) {
648
+ if (chunks.length <= 1) return chunks;
649
+ const out = [];
650
+ const first = chunks[0];
651
+ if (!first) return chunks;
652
+ let cur = first;
653
+ for (let i = 1; i < chunks.length; i++) {
654
+ const next = chunks[i];
655
+ if (!next) continue;
656
+ if (!sameBreadcrumb(cur.breadcrumb, next.breadcrumb)) {
657
+ out.push(cur);
658
+ cur = next;
659
+ continue;
660
+ }
661
+ const mergedBody = `${cur.text}
662
+
663
+ ${next.text}`;
664
+ const prefix = cur.breadcrumb.length > 0 ? `${cur.breadcrumb.join("\n")}
665
+
666
+ ` : "";
667
+ if (prefix.length + mergedBody.length <= limit) {
668
+ cur = {
669
+ text: mergedBody,
670
+ breadcrumb: cur.breadcrumb,
671
+ startOffset: cur.startOffset,
672
+ endOffset: next.endOffset
673
+ };
674
+ } else {
675
+ out.push(cur);
676
+ cur = next;
677
+ }
678
+ }
679
+ out.push(cur);
680
+ return out;
681
+ }
682
+ function sameBreadcrumb(a, b) {
683
+ if (a.length !== b.length) return false;
684
+ for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
685
+ return true;
686
+ }
687
+
688
+ // src/files/load-files.ts
689
+ import { glob } from "node:fs/promises";
690
+ import { basename } from "node:path";
691
+ async function* loadFilesByGlob(globPattern) {
692
+ for await (const entry of glob(globPattern, {
693
+ withFileTypes: true,
694
+ exclude: (f) => {
695
+ const b = basename(f.name);
696
+ return b !== "." && b.startsWith(".");
697
+ }
698
+ })) {
699
+ if (entry.isFile()) {
700
+ yield `${entry.parentPath}/${entry.name}`;
701
+ }
702
+ }
703
+ }
704
+
705
+ // src/commands/load/load-repository.ts
706
+ import { and, count, eq as eq2, sql as sql4 } from "drizzle-orm";
707
+
708
+ // src/database/schema/chunks.ts
709
+ import { sql as sql3 } from "drizzle-orm";
710
+ import {
711
+ customType,
712
+ index,
713
+ integer as integer3,
714
+ pgTable as pgTable3,
715
+ text as text3,
716
+ timestamp as timestamp3,
717
+ vector as vector2
718
+ } from "drizzle-orm/pg-core";
719
+
720
+ // src/database/schema/files.ts
721
+ import { sql as sql2 } from "drizzle-orm";
722
+ import {
723
+ integer as integer2,
724
+ jsonb,
725
+ pgTable as pgTable2,
726
+ text as text2,
727
+ timestamp as timestamp2,
728
+ unique
729
+ } from "drizzle-orm/pg-core";
730
+ var filesTable = pgTable2(
731
+ "files",
732
+ {
733
+ id: integer2("id").primaryKey().generatedAlwaysAsIdentity(),
734
+ baseId: integer2("base_id").notNull().references(() => basesTable.id, { onDelete: "cascade" }),
735
+ filePath: text2("file_path").notNull(),
736
+ contentHash: text2("content_hash").notNull(),
737
+ attributes: jsonb("attributes").$type(),
738
+ createdAt: timestamp2("created_at").notNull().default(sql2`now()`),
739
+ updatedAt: timestamp2("updated_at").notNull().default(sql2`now()`)
740
+ },
741
+ (table) => [unique().on(table.baseId, table.filePath)]
742
+ );
743
+
744
+ // src/database/schema/chunks.ts
745
+ var tsvector = customType({
746
+ dataType() {
747
+ return "tsvector";
748
+ }
749
+ });
750
+ var chunksTable = pgTable3(
751
+ "chunks",
752
+ {
753
+ id: integer3("id").primaryKey().generatedAlwaysAsIdentity(),
754
+ fileId: integer3("file_id").notNull().references(() => filesTable.id, { onDelete: "cascade" }),
755
+ chunkIndex: integer3("chunk_index").notNull(),
756
+ content: text3("content").notNull(),
757
+ breadcrumbs: text3("breadcrumbs").array().notNull(),
758
+ embedding: vector2("embedding", { dimensions: EMBEDDING_DIMS }),
759
+ fts: tsvector("fts"),
760
+ createdAt: timestamp3("created_at").notNull().default(sql3`now()`),
761
+ updatedAt: timestamp3("updated_at").notNull().default(sql3`now()`)
762
+ },
763
+ (table) => [index("chunks_fts_idx").using("gin", table.fts)]
764
+ );
765
+
766
+ // src/commands/load/load-repository.ts
767
+ var DbLoadRepository = class {
768
+ db;
769
+ constructor(db) {
770
+ this.db = db ?? getDb();
771
+ }
772
+ async getFileProcessingState(filePath, baseId) {
773
+ const [file] = await this.db.select({ id: filesTable.id, contentHash: filesTable.contentHash }).from(filesTable).where(
774
+ and(eq2(filesTable.filePath, filePath), eq2(filesTable.baseId, baseId))
775
+ ).limit(1);
776
+ if (!file) {
777
+ return null;
778
+ }
779
+ const [chunkResult] = await this.db.select({
780
+ total: count(),
781
+ withEmbedding: count(
782
+ sql4`CASE WHEN ${chunksTable.embedding} IS NOT NULL THEN 1 END`
783
+ )
784
+ }).from(chunksTable).where(eq2(chunksTable.fileId, file.id));
785
+ const total = chunkResult?.total ?? 0;
786
+ const withEmbedding = chunkResult?.withEmbedding ?? 0;
787
+ const hasStoredChunksWithEmbeddings = total > 0 && withEmbedding === total;
788
+ return {
789
+ fileId: file.id,
790
+ contentHash: file.contentHash,
791
+ hasStoredChunksWithEmbeddings
792
+ };
793
+ }
794
+ async upsertFile(filePath, contentHash, title, _updatedAt, baseId) {
795
+ const attributes = {};
796
+ if (title !== null) {
797
+ attributes.title = title;
798
+ }
799
+ const [file] = await this.db.insert(filesTable).values({ filePath, contentHash, attributes, baseId }).onConflictDoUpdate({
800
+ target: [filesTable.baseId, filesTable.filePath],
801
+ set: {
802
+ contentHash,
803
+ attributes,
804
+ updatedAt: sql4`now()`
805
+ }
806
+ }).returning({ id: filesTable.id });
807
+ if (!file) {
808
+ throw new Error(`Failed to upsert file: ${filePath}`);
809
+ }
810
+ return { id: file.id };
811
+ }
812
+ async replaceFileChunks(fileId, chunks) {
813
+ await this.db.transaction(async (tx) => {
814
+ await tx.delete(chunksTable).where(eq2(chunksTable.fileId, fileId));
815
+ if (chunks.length > 0) {
816
+ const newChunks = chunks.map(
817
+ (chunk) => ({
818
+ fileId,
819
+ chunkIndex: chunk.chunkIndex,
820
+ content: chunk.content,
821
+ breadcrumbs: chunk.breadcrumbs,
822
+ embedding: chunk.embedding,
823
+ fts: sql4`to_tsvector('simple', unaccent(${chunk.content}))`
824
+ })
825
+ );
826
+ await tx.insert(chunksTable).values(newChunks);
827
+ }
828
+ });
829
+ }
830
+ };
831
+
832
+ // src/commands/load/process-file.ts
833
+ import path from "node:path";
834
+
835
+ // src/files/frontmatter.ts
836
+ import { parse as parseYaml } from "yaml";
837
+ var FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?/;
838
+ function extractFrontmatter(content) {
839
+ const match = content.match(FRONTMATTER_RE);
840
+ if (!match) {
841
+ return { attributes: null, body: content };
842
+ }
843
+ try {
844
+ const parsed = parseYaml(match[1]);
845
+ const attributes = parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : null;
846
+ return { attributes, body: content.slice(match[0].length) };
847
+ } catch {
848
+ return { attributes: null, body: content };
849
+ }
850
+ }
851
+
852
+ // src/commands/load/build-document-header.ts
853
+ function normalizeTitle(value) {
854
+ if (typeof value !== "string") return null;
855
+ const title = value.trim();
856
+ return title ? title : null;
857
+ }
858
+ function normalizeStringList(value) {
859
+ if (!Array.isArray(value)) return [];
860
+ return value.flatMap((item) => {
861
+ if (typeof item !== "string") return [];
862
+ const normalized = item.trim();
863
+ return normalized ? [normalized] : [];
864
+ });
865
+ }
866
+ function normalizeDocumentMetadata(attributes) {
867
+ if (!attributes) return null;
868
+ return {
869
+ title: normalizeTitle(attributes.title),
870
+ aliases: normalizeStringList(attributes.aliases),
871
+ tags: normalizeStringList(attributes.tags)
872
+ };
873
+ }
874
+ function buildDocumentHeader(basename2, parentDir, attributes) {
875
+ const headerLines = [`File: ${basename2}`, `Path: ${parentDir}`];
876
+ const metadata = normalizeDocumentMetadata(attributes);
877
+ if (metadata) {
878
+ if (metadata.title) headerLines.push(`Title: ${metadata.title}`);
879
+ if (metadata.aliases.length > 0)
880
+ headerLines.push(`Aliases: ${metadata.aliases.join(", ")}`);
881
+ if (metadata.tags.length > 0)
882
+ headerLines.push(`Tags: ${metadata.tags.join(", ")}`);
883
+ }
884
+ const headerPrefix = headerLines.join("\n");
885
+ const titleParts = [basename2];
886
+ if (metadata) {
887
+ if (metadata.title && metadata.title !== basename2)
888
+ titleParts.push(metadata.title);
889
+ if (metadata.aliases.length > 0)
890
+ titleParts.push(`aliases: ${metadata.aliases.join(", ")}`);
891
+ if (metadata.tags.length > 0)
892
+ titleParts.push(`tags: ${metadata.tags.join(", ")}`);
893
+ }
894
+ const titleString = titleParts.join("; ");
895
+ return { headerPrefix, titleString };
896
+ }
897
+
898
+ // src/commands/load/decide-file-processing.ts
899
+ function decideFileProcessing(nextContentHash, existing) {
900
+ if (existing === null) return { action: "process" };
901
+ if (existing.contentHash !== nextContentHash) return { action: "process" };
902
+ if (!existing.hasStoredChunksWithEmbeddings) return { action: "process" };
903
+ return { action: "skip" };
904
+ }
905
+
906
+ // src/commands/load/process-file.ts
907
+ async function processLoadedFile(filePath, deps) {
908
+ const { repo, baseId, readText, hashContent, chunkMarkdown: chunkMarkdown2, embedDocument } = deps;
909
+ const content = await readText(filePath);
910
+ const contentHash = hashContent(content);
911
+ const existingState = await repo.getFileProcessingState(filePath, baseId);
912
+ const decision = decideFileProcessing(contentHash, existingState);
913
+ logger.debug(`[${filePath}] decision: ${decision.action}`);
914
+ if (decision.action === "skip") {
915
+ logger.info(`${filePath} -> skipped (unchanged)`);
916
+ return { status: "skipped", chunkCount: 0 };
917
+ }
918
+ const { attributes, body } = extractFrontmatter(content);
919
+ const basename2 = path.basename(filePath, ".md");
920
+ const parentDir = path.basename(path.dirname(filePath));
921
+ const { headerPrefix, titleString } = buildDocumentHeader(
922
+ basename2,
923
+ parentDir,
924
+ attributes
925
+ );
926
+ const chunks = chunkMarkdown2(body || content, CHUNK_LIMIT_CHARS);
927
+ logger.debug(`[${filePath}] produced ${chunks.length} chunks`);
928
+ const { id: fileId } = await repo.upsertFile(
929
+ filePath,
930
+ contentHash,
931
+ null,
932
+ /* @__PURE__ */ new Date(),
933
+ baseId
934
+ );
935
+ const chunkDocs = await Promise.all(
936
+ chunks.map(async (chunk, i) => {
937
+ const augmented = `${headerPrefix}
938
+
939
+ ${chunk.text}`;
940
+ const bodyText = chunk.text.trim();
941
+ const embedding = bodyText ? await embedDocument(bodyText, titleString) : await embedDocument(augmented, titleString);
942
+ logger.trace(`[${filePath}] chunk ${i} embedded (${embedding.length}d)`);
943
+ return {
944
+ content: augmented,
945
+ embedding,
946
+ chunkIndex: i,
947
+ breadcrumbs: chunk.breadcrumb
948
+ };
949
+ })
950
+ );
951
+ await repo.replaceFileChunks(fileId, chunkDocs);
952
+ logger.debug(`[${filePath}] chunks written to DB`);
953
+ logger.info(`${filePath} \u2192 ${chunks.length} chunks`);
954
+ return { status: "processed", chunkCount: chunks.length };
955
+ }
956
+
957
+ // src/commands/load.ts
958
+ var loadCommand = defineCommand2({
959
+ meta: {
960
+ name: "load",
961
+ description: "Load notes files"
962
+ },
963
+ args: {
964
+ glob: {
965
+ type: "string",
966
+ description: "Files glob (e.g. 'notes/**/*.md')",
967
+ required: true
968
+ },
969
+ base: {
970
+ type: "string",
971
+ description: "Knowledge base name to use",
972
+ default: "default"
973
+ }
974
+ },
975
+ async run({ args }) {
976
+ const start = performance.now();
977
+ logger.debug("Starting load...");
978
+ let filesSeen = 0;
979
+ let filesSkipped = 0;
980
+ let filesProcessed = 0;
981
+ let chunksProduced = 0;
982
+ const repo = new DbLoadRepository();
983
+ const baseRepo = new DbBaseRepository();
984
+ const base = await baseRepo.getOrCreateBase(args.base);
985
+ logger.debug(`Using base: ${base.name} (id=${base.id})`);
986
+ let embedder = null;
987
+ const getEmbedDocument = async (body, title) => {
988
+ if (!embedder) {
989
+ embedder = await initEmbedder();
990
+ logger.debug("Embedder initialised");
991
+ }
992
+ return embedder.embedDocument(body, title);
993
+ };
994
+ const filePaths = [];
995
+ const results = [];
996
+ for await (const filePath of loadFilesByGlob(args.glob)) {
997
+ logger.debug(`Processing file: ${filePath}`);
998
+ const result = await processLoadedFile(filePath, {
999
+ repo,
1000
+ baseId: base.id,
1001
+ readText: (p) => readFile(p, "utf8"),
1002
+ hashContent: (content) => createHash("sha256").update(content).digest("hex"),
1003
+ chunkMarkdown,
1004
+ embedDocument: getEmbedDocument
1005
+ });
1006
+ filePaths.push(filePath);
1007
+ results.push(result);
1008
+ }
1009
+ filesSeen = filePaths.length;
1010
+ for (const result of results) {
1011
+ if (result.status === "skipped") {
1012
+ filesSkipped++;
1013
+ } else {
1014
+ filesProcessed++;
1015
+ chunksProduced += result.chunkCount;
1016
+ }
1017
+ }
1018
+ console.log(
1019
+ `Done. ${filesSeen} files seen, ${filesProcessed} processed, ${filesSkipped} skipped, ${chunksProduced} chunks total.`
1020
+ );
1021
+ console.log(
1022
+ `Time taken: ${((performance.now() - start) / 1e3).toFixed(2)}s`
1023
+ );
1024
+ }
1025
+ });
1026
+
1027
+ // src/commands/query.ts
1028
+ import { defineCommand as defineCommand3 } from "citty";
1029
+
1030
+ // src/query/execute.ts
1031
+ import { and as and2, cosineDistance, desc, eq as eq3, gt, sql as sql5 } from "drizzle-orm";
1032
+
1033
+ // src/query/scoring.ts
1034
+ import path2 from "node:path";
1035
+ function fuseScores(vectorResults, ftsResults, trigramResults, weights) {
1036
+ logger.trace(
1037
+ `Fusing scores \u2014 vector: ${vectorResults.length}, fts: ${ftsResults.length}, trigram: ${trigramResults.length}`
1038
+ );
1039
+ const maxSimilarity = Math.max(
1040
+ ...vectorResults.map((r) => r.similarity),
1041
+ 1e-9
1042
+ );
1043
+ const maxRank = Math.max(...ftsResults.map((r) => r.rank), 1e-9);
1044
+ const maxTrigram = Math.max(...trigramResults.map((r) => r.score), 1e-9);
1045
+ const merged = /* @__PURE__ */ new Map();
1046
+ for (const r of vectorResults) {
1047
+ merged.set(r.id, {
1048
+ id: r.id,
1049
+ filePath: r.filePath,
1050
+ chunkIndex: r.chunkIndex,
1051
+ breadcrumbs: r.breadcrumbs,
1052
+ content: r.content,
1053
+ score: r.similarity / maxSimilarity * weights.vector
1054
+ });
1055
+ }
1056
+ for (const r of ftsResults) {
1057
+ const ftsScore = r.rank / maxRank * weights.fts;
1058
+ const existing = merged.get(r.id);
1059
+ if (existing) {
1060
+ existing.score += ftsScore;
1061
+ } else {
1062
+ merged.set(r.id, {
1063
+ id: r.id,
1064
+ filePath: r.filePath,
1065
+ chunkIndex: r.chunkIndex,
1066
+ breadcrumbs: r.breadcrumbs,
1067
+ content: r.content,
1068
+ score: ftsScore
1069
+ });
1070
+ }
1071
+ }
1072
+ for (const r of trigramResults) {
1073
+ const tgScore = r.score / maxTrigram * weights.trigram;
1074
+ const existing = merged.get(r.id);
1075
+ if (existing) {
1076
+ existing.score += tgScore;
1077
+ } else {
1078
+ merged.set(r.id, {
1079
+ id: r.id,
1080
+ filePath: r.filePath,
1081
+ chunkIndex: r.chunkIndex,
1082
+ breadcrumbs: r.breadcrumbs,
1083
+ content: r.content,
1084
+ score: tgScore
1085
+ });
1086
+ }
1087
+ }
1088
+ return merged;
1089
+ }
1090
+ function extractWikilinks(content) {
1091
+ const re = /\[\[([^\]|#]+)(?:[|#][^\]]*)?\]\]/g;
1092
+ const seen = /* @__PURE__ */ new Set();
1093
+ let m = re.exec(content);
1094
+ while (m !== null) {
1095
+ if (m[1] !== void 0) seen.add(m[1].trim());
1096
+ m = re.exec(content);
1097
+ }
1098
+ return [...seen];
1099
+ }
1100
+ function rerankByWikilinks(merged, allFilePaths, topN = LINK_SOURCE_TOP_N, linkBoost = LINK_BOOST, linkBoostCap = LINK_BOOST_CAP) {
1101
+ const basenameToFilePaths = /* @__PURE__ */ new Map();
1102
+ for (const fp of allFilePaths) {
1103
+ const base = path2.basename(fp, ".md");
1104
+ if (!basenameToFilePaths.has(base)) {
1105
+ basenameToFilePaths.set(base, /* @__PURE__ */ new Set());
1106
+ }
1107
+ basenameToFilePaths.get(base)?.add(fp);
1108
+ }
1109
+ const filePathsInResults = new Set(
1110
+ [...merged.values()].map((r) => r.filePath)
1111
+ );
1112
+ const topSources = [...merged.values()].sort((a, b) => b.score - a.score).slice(0, topN);
1113
+ const boosts = /* @__PURE__ */ new Map();
1114
+ for (const src of topSources) {
1115
+ const links = extractWikilinks(src.content);
1116
+ for (const link of links) {
1117
+ const targets = basenameToFilePaths.get(link);
1118
+ if (!targets) continue;
1119
+ for (const fp of targets) {
1120
+ if (fp === src.filePath) continue;
1121
+ if (!filePathsInResults.has(fp)) continue;
1122
+ const prev = boosts.get(fp) ?? 0;
1123
+ const boost = Math.min(prev + linkBoost * src.score, linkBoostCap);
1124
+ boosts.set(fp, boost);
1125
+ logger.trace(`Boosting ${fp} by ${(boost - prev).toFixed(4)}`);
1126
+ }
1127
+ }
1128
+ }
1129
+ const result = /* @__PURE__ */ new Map();
1130
+ for (const [id, chunk] of merged) {
1131
+ const boost = boosts.get(chunk.filePath) ?? 0;
1132
+ result.set(
1133
+ id,
1134
+ boost > 0 ? { ...chunk, score: chunk.score + boost } : chunk
1135
+ );
1136
+ }
1137
+ return result;
1138
+ }
1139
+ function poolByFile(merged, topK) {
1140
+ const byFile = /* @__PURE__ */ new Map();
1141
+ for (const result of merged.values()) {
1142
+ const existing = byFile.get(result.filePath);
1143
+ if (!existing || result.score > existing.result.score) {
1144
+ byFile.set(result.filePath, {
1145
+ result,
1146
+ extraChunks: existing ? existing.extraChunks + 1 : 0
1147
+ });
1148
+ } else {
1149
+ existing.extraChunks++;
1150
+ }
1151
+ }
1152
+ return [...byFile.values()].map(({ result }) => result).sort((a, b) => b.score - a.score).slice(0, topK);
1153
+ }
1154
+
1155
+ // src/query/execute.ts
1156
+ async function executeQuery(opts) {
1157
+ const {
1158
+ vectorText,
1159
+ queryText,
1160
+ trigramText,
1161
+ embedQuery,
1162
+ db = getDb(),
1163
+ baseId,
1164
+ weights = {
1165
+ vector: VECTOR_WEIGHT,
1166
+ fts: FTS_WEIGHT,
1167
+ trigram: TRIGRAM_WEIGHT
1168
+ },
1169
+ limits = {
1170
+ vector: VECTOR_LIMIT,
1171
+ fts: FTS_LIMIT,
1172
+ trigram: TRIGRAM_LIMIT
1173
+ },
1174
+ trigramThreshold = TRIGRAM_THRESHOLD,
1175
+ trigramMode = "strict",
1176
+ topK = 10
1177
+ } = opts;
1178
+ const queryVector = await embedQuery(vectorText);
1179
+ const effectiveTrigramText = trigramText ?? queryText;
1180
+ logger.debug(
1181
+ `Executing query \u2014 vector: "${vectorText}", fulltext: "${queryText}"`
1182
+ );
1183
+ const similarity = sql5`1 - (${cosineDistance(chunksTable.embedding, queryVector)})`;
1184
+ const trigramFn = trigramMode === "strict" ? "strict_word_similarity" : "word_similarity";
1185
+ const trigramOp = trigramMode === "strict" ? sql5.raw("<<%") : sql5.raw("<%");
1186
+ const trigramScore = sql5`${sql5.raw(trigramFn)}(${effectiveTrigramText}, ${chunksTable.content})`;
1187
+ const [vectorResults, ftsResults, trigramResults] = await Promise.all([
1188
+ db.select({
1189
+ id: chunksTable.id,
1190
+ filePath: filesTable.filePath,
1191
+ chunkIndex: chunksTable.chunkIndex,
1192
+ breadcrumbs: chunksTable.breadcrumbs,
1193
+ content: chunksTable.content,
1194
+ similarity
1195
+ }).from(chunksTable).innerJoin(filesTable, eq3(chunksTable.fileId, filesTable.id)).where(and2(gt(similarity, 0), eq3(filesTable.baseId, baseId))).orderBy(desc(similarity)).limit(limits.vector),
1196
+ db.select({
1197
+ id: chunksTable.id,
1198
+ filePath: filesTable.filePath,
1199
+ chunkIndex: chunksTable.chunkIndex,
1200
+ breadcrumbs: chunksTable.breadcrumbs,
1201
+ content: chunksTable.content,
1202
+ rank: sql5`ts_rank(${chunksTable.fts}, websearch_to_tsquery('simple', unaccent(${queryText})))`
1203
+ }).from(chunksTable).innerJoin(filesTable, eq3(chunksTable.fileId, filesTable.id)).where(
1204
+ and2(
1205
+ sql5`${chunksTable.fts} @@ websearch_to_tsquery('simple', unaccent(${queryText}))`,
1206
+ eq3(filesTable.baseId, baseId)
1207
+ )
1208
+ ).orderBy(
1209
+ desc(
1210
+ sql5`ts_rank(${chunksTable.fts}, websearch_to_tsquery('simple', unaccent(${queryText})))`
1211
+ )
1212
+ ).limit(limits.fts),
1213
+ db.transaction(async (tx) => {
1214
+ await tx.execute(sql5`SELECT set_limit(${trigramThreshold})`);
1215
+ return tx.select({
1216
+ id: chunksTable.id,
1217
+ filePath: filesTable.filePath,
1218
+ chunkIndex: chunksTable.chunkIndex,
1219
+ breadcrumbs: chunksTable.breadcrumbs,
1220
+ content: chunksTable.content,
1221
+ score: trigramScore
1222
+ }).from(chunksTable).innerJoin(filesTable, eq3(chunksTable.fileId, filesTable.id)).where(
1223
+ and2(
1224
+ sql5`${effectiveTrigramText} ${trigramOp} ${chunksTable.content}`,
1225
+ eq3(filesTable.baseId, baseId)
1226
+ )
1227
+ ).orderBy(desc(trigramScore)).limit(limits.trigram);
1228
+ })
1229
+ ]);
1230
+ logger.debug(
1231
+ `Vector: ${vectorResults.length} hits, FTS: ${ftsResults.length} hits, Trigram: ${trigramResults.length} hits`
1232
+ );
1233
+ const allFiles = await db.select({ filePath: filesTable.filePath }).from(filesTable).where(eq3(filesTable.baseId, baseId));
1234
+ const fused = fuseScores(vectorResults, ftsResults, trigramResults, weights);
1235
+ logger.debug(`After fusion: ${fused.size} unique chunks`);
1236
+ const reranked = rerankByWikilinks(
1237
+ fused,
1238
+ allFiles.map((f) => f.filePath)
1239
+ );
1240
+ logger.debug(`After wikilink rerank: ${reranked.size} chunks`);
1241
+ const results = poolByFile(reranked, topK);
1242
+ logger.debug(`Returning ${results.length} results`);
1243
+ return results;
1244
+ }
1245
+
1246
+ // src/commands/query.ts
1247
+ var queryCommand = defineCommand3({
1248
+ meta: {
1249
+ name: "query",
1250
+ description: "Search notes by semantic query"
1251
+ },
1252
+ args: {
1253
+ vector: {
1254
+ type: "string",
1255
+ alias: "v",
1256
+ description: "Semantic query for vector search",
1257
+ required: true
1258
+ },
1259
+ fulltext: {
1260
+ type: "string",
1261
+ alias: "f",
1262
+ description: 'Keyword query for full-text search (supports PostgreSQL websearch syntax: OR, -word, "phrases")',
1263
+ required: true
1264
+ },
1265
+ trigram: {
1266
+ type: "string",
1267
+ alias: "g",
1268
+ description: "Plain-text keyword for trigram search (defaults to --fulltext)",
1269
+ required: false
1270
+ },
1271
+ trigramMode: {
1272
+ type: "string",
1273
+ alias: "t",
1274
+ description: "Trigram operator: 'strict' (strict_word_similarity, <<%) or 'word' (word_similarity, <%)",
1275
+ default: "strict"
1276
+ },
1277
+ base: {
1278
+ type: "string",
1279
+ description: "Knowledge base name to use",
1280
+ default: "default"
1281
+ }
1282
+ },
1283
+ async run({ args }) {
1284
+ const mode = args.trigramMode;
1285
+ if (mode !== "strict" && mode !== "word") {
1286
+ throw new Error(
1287
+ `Invalid --trigram-mode "${mode}". Must be "strict" or "word".`
1288
+ );
1289
+ }
1290
+ const baseRepo = new DbBaseRepository();
1291
+ const base = await baseRepo.getBaseByName(args.base);
1292
+ if (!base) {
1293
+ logger.error(`Base '${args.base}' does not exist.`);
1294
+ process.exit(1);
1295
+ }
1296
+ const embedder = await initEmbedder();
1297
+ const results = await executeQuery({
1298
+ vectorText: args.vector,
1299
+ queryText: args.fulltext,
1300
+ trigramText: args.trigram,
1301
+ embedQuery: embedder.embedQuery.bind(embedder),
1302
+ trigramMode: mode,
1303
+ baseId: base.id
1304
+ });
1305
+ if (results.length === 0) {
1306
+ logger.info("No matching chunks found.");
1307
+ return;
1308
+ }
1309
+ for (const row of results) {
1310
+ let output = "";
1311
+ output += `<file path="${row.filePath}">
1312
+ `;
1313
+ output += `<meta>
1314
+ `;
1315
+ output += `Chunk index: ${row.chunkIndex}
1316
+ `;
1317
+ output += `Score: ${Number(row.score).toFixed(3)}
1318
+ `;
1319
+ if (row.breadcrumbs.length > 0) {
1320
+ output += `Breadcrumbs: ${row.breadcrumbs.join(" > ")}
1321
+ `;
1322
+ }
1323
+ output += `</meta>
1324
+ `;
1325
+ output += `<content>
1326
+ `;
1327
+ output += `${row.content}
1328
+ `;
1329
+ output += `</content>
1330
+ `;
1331
+ output += `</file>
1332
+ `;
1333
+ console.log(output);
1334
+ }
1335
+ }
1336
+ });
1337
+
1338
+ // src/database/migrate.ts
1339
+ import { join as join2 } from "node:path";
1340
+ import { fileURLToPath } from "node:url";
1341
+ import { migrate } from "drizzle-orm/pglite/migrator";
1342
+ var migrationsRelativePath = true ? "./drizzle" : "../../drizzle";
1343
+ var migrationsFolder = join2(
1344
+ fileURLToPath(new URL(".", import.meta.url)),
1345
+ migrationsRelativePath
1346
+ );
1347
+ async function runMigrations(db) {
1348
+ try {
1349
+ logger.debug("Running DB migrations...");
1350
+ await migrate(db, { migrationsFolder });
1351
+ logger.debug("Migrations complete");
1352
+ } catch (err) {
1353
+ throw new Error(
1354
+ `Migration failed: ${err instanceof Error ? err.message : String(err)}`,
1355
+ { cause: err }
1356
+ );
1357
+ }
1358
+ }
1359
+
1360
+ // src/main.ts
1361
+ var main = defineCommand4({
1362
+ meta: {
1363
+ name: "notes-query-tool",
1364
+ version: package_default.version,
1365
+ description: package_default.description,
1366
+ alias: "nqt"
1367
+ },
1368
+ args: {
1369
+ verbose: {
1370
+ type: "boolean",
1371
+ description: "Enable verbose logging (sets log level to max)",
1372
+ default: false
1373
+ },
1374
+ base: {
1375
+ type: "string",
1376
+ description: "Knowledge base name to use",
1377
+ default: "default"
1378
+ }
1379
+ },
1380
+ subCommands: {
1381
+ // Handles loading notes, chunking and indexing them in the database
1382
+ load: loadCommand,
1383
+ // Handles querying the indexed notes
1384
+ query: queryCommand,
1385
+ // Handles dropping a knowledge base and all its indexed data
1386
+ drop: dropCommand
1387
+ },
1388
+ // Runs before any subcommand
1389
+ async setup({ args }) {
1390
+ if (args.verbose) {
1391
+ setLogLevel(999);
1392
+ }
1393
+ const db = getDb();
1394
+ await db.$client.waitReady;
1395
+ await runMigrations(db);
1396
+ },
1397
+ // Runs after the subcommand finishes
1398
+ async cleanup() {
1399
+ await getDb().$client.close();
1400
+ }
1401
+ });
1402
+ await runMain(main).catch((error) => {
1403
+ closeDbAndExit(`Error: ${error}`, 1);
1404
+ });
1405
+ process.on("unhandledRejection", (reason) => {
1406
+ closeDbAndExit(`Unhandled Rejection: ${reason}`, 1);
1407
+ });
1408
+ process.on("uncaughtException", (error) => {
1409
+ closeDbAndExit(`Uncaught Exception: ${error}`, 1);
1410
+ });
1411
+ process.on("SIGINT", () => {
1412
+ closeDbAndExit("Received SIGINT, shutting down...", 0);
1413
+ });
1414
+ function closeDbAndExit(message, code) {
1415
+ logger.error(message);
1416
+ getDb().$client.close().finally(() => process.exit(code));
1417
+ }
26
1418
  //# sourceMappingURL=main.js.map