@toolpack-sdk/knowledge 1.2.0-SNAPSHOT.04032026-2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +4 -4
  2. package/dist/index.cjs +25 -0
  3. package/dist/index.d.cts +239 -0
  4. package/dist/index.d.ts +239 -14
  5. package/dist/index.js +25 -31
  6. package/package.json +30 -8
  7. package/dist/embedders/ollama.d.ts +0 -15
  8. package/dist/embedders/ollama.d.ts.map +0 -1
  9. package/dist/embedders/ollama.js +0 -51
  10. package/dist/embedders/ollama.js.map +0 -1
  11. package/dist/embedders/openai.d.ts +0 -18
  12. package/dist/embedders/openai.d.ts.map +0 -1
  13. package/dist/embedders/openai.js +0 -63
  14. package/dist/embedders/openai.js.map +0 -1
  15. package/dist/errors.d.ts +0 -25
  16. package/dist/errors.d.ts.map +0 -1
  17. package/dist/errors.js +0 -58
  18. package/dist/errors.js.map +0 -1
  19. package/dist/index.d.ts.map +0 -1
  20. package/dist/index.js.map +0 -1
  21. package/dist/interfaces.d.ts +0 -48
  22. package/dist/interfaces.d.ts.map +0 -1
  23. package/dist/interfaces.js +0 -3
  24. package/dist/interfaces.js.map +0 -1
  25. package/dist/knowledge.d.ts +0 -74
  26. package/dist/knowledge.d.ts.map +0 -1
  27. package/dist/knowledge.js +0 -120
  28. package/dist/knowledge.js.map +0 -1
  29. package/dist/providers/memory.d.ts +0 -16
  30. package/dist/providers/memory.d.ts.map +0 -1
  31. package/dist/providers/memory.js +0 -72
  32. package/dist/providers/memory.js.map +0 -1
  33. package/dist/providers/persistent.d.ts +0 -23
  34. package/dist/providers/persistent.d.ts.map +0 -1
  35. package/dist/providers/persistent.js +0 -162
  36. package/dist/providers/persistent.js.map +0 -1
  37. package/dist/sources/markdown.d.ts +0 -20
  38. package/dist/sources/markdown.d.ts.map +0 -1
  39. package/dist/sources/markdown.js +0 -196
  40. package/dist/sources/markdown.js.map +0 -1
  41. package/dist/utils/chunking.d.ts +0 -6
  42. package/dist/utils/chunking.d.ts.map +0 -1
  43. package/dist/utils/chunking.js +0 -86
  44. package/dist/utils/chunking.js.map +0 -1
  45. package/dist/utils/cosine.d.ts +0 -4
  46. package/dist/utils/cosine.d.ts.map +0 -1
  47. package/dist/utils/cosine.js +0 -52
  48. package/dist/utils/cosine.js.map +0 -1
package/README.md CHANGED
@@ -5,7 +5,7 @@ RAG (Retrieval-Augmented Generation) package for Toolpack SDK.
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- npm install toolpack-knowledge
8
+ npm install @toolpack-sdk/knowledge
9
9
  ```
10
10
 
11
11
  ## Quick Start
@@ -13,7 +13,7 @@ npm install toolpack-knowledge
13
13
  ### Development (Zero Infrastructure)
14
14
 
15
15
  ```typescript
16
- import { Knowledge, MemoryProvider, MarkdownSource, OllamaEmbedder } from 'toolpack-knowledge';
16
+ import { Knowledge, MemoryProvider, MarkdownSource, OllamaEmbedder } from '@toolpack-sdk/knowledge';
17
17
 
18
18
  const kb = await Knowledge.create({
19
19
  provider: new MemoryProvider(),
@@ -29,7 +29,7 @@ console.log(results[0].chunk.content);
29
29
  ### Production (Persistent)
30
30
 
31
31
  ```typescript
32
- import { Knowledge, PersistentKnowledgeProvider, MarkdownSource, OpenAIEmbedder } from 'toolpack-knowledge';
32
+ import { Knowledge, PersistentKnowledgeProvider, MarkdownSource, OpenAIEmbedder } from '@toolpack-sdk/knowledge';
33
33
 
34
34
  const kb = await Knowledge.create({
35
35
  provider: new PersistentKnowledgeProvider({
@@ -58,7 +58,7 @@ const results = await kb.query('authentication setup', {
58
58
 
59
59
  ```typescript
60
60
  import { Toolpack } from 'toolpack-sdk';
61
- import { Knowledge, MemoryProvider, MarkdownSource, OllamaEmbedder } from 'toolpack-knowledge';
61
+ import { Knowledge, MemoryProvider, MarkdownSource, OllamaEmbedder } from '@toolpack-sdk/knowledge';
62
62
 
63
63
  const kb = await Knowledge.create({
64
64
  provider: new MemoryProvider(),
package/dist/index.cjs ADDED
@@ -0,0 +1,25 @@
1
+ "use strict";var _=Object.create;var O=Object.defineProperty;var z=Object.getOwnPropertyDescriptor;var V=Object.getOwnPropertyNames;var W=Object.getPrototypeOf,X=Object.prototype.hasOwnProperty;var G=(i,e)=>{for(var t in e)O(i,t,{get:e[t],enumerable:!0})},N=(i,e,t,r)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of V(e))!X.call(i,n)&&n!==t&&O(i,n,{get:()=>e[n],enumerable:!(r=z(e,n))||r.enumerable});return i};var h=(i,e,t)=>(t=i!=null?_(W(i)):{},N(e||!i||!i.__esModule?O(t,"default",{value:i,enumerable:!0}):t,i)),Y=i=>N(O({},"__esModule",{value:!0}),i);var ee={};G(ee,{ChunkTooLargeError:()=>K,DimensionMismatchError:()=>v,EmbeddingError:()=>l,IngestionError:()=>E,Knowledge:()=>$,KnowledgeError:()=>f,KnowledgeProviderError:()=>b,MarkdownSource:()=>T,MemoryProvider:()=>S,OllamaEmbedder:()=>M,OpenAIEmbedder:()=>A,PersistentKnowledgeProvider:()=>R});module.exports=Y(ee);var f=class extends Error{constructor(t,r){super(t);this.code=r;this.name="KnowledgeError"}code},l=class extends f{constructor(t,r){super(t,"EMBEDDING_ERROR");this.statusCode=r;this.name="EmbeddingError"}statusCode},E=class extends f{constructor(t,r){super(t,"INGESTION_ERROR");this.file=r;this.name="IngestionError"}file},K=class extends f{constructor(t,r){super(t,"CHUNK_TOO_LARGE");this.chunkSize=r;this.name="ChunkTooLargeError"}chunkSize},v=class extends f{expected;actual;constructor(e,t){super(`Dimension mismatch: expected ${e}, got ${t}`,"DIMENSION_MISMATCH"),this.name="DimensionMismatchError",this.expected=e,this.actual=t}},b=class extends f{constructor(e){super(e,"PROVIDER_ERROR"),this.name="KnowledgeProviderError"}};var $=class i{constructor(e,t,r,n,o){this.provider=e;this.embedder=t;this.description=r;this.sources=n;this.options=o}provider;embedder;description;sources;options;static async create(e){await e.provider.validateDimensions(e.embedder.dimensions);let t=new i(e.provider,e.embedder,e.description,e.sources,e),r=e.reSync!==!1;return!r&&"shouldReSync"in e.provider?(e.provider.shouldReSync()&&await t.sync(),t):(r&&await t.sync(),t)}async query(e,t){let r=await this.embedder.embed(e);return this.provider.query(r,t)}async sync(){this.options.onSync?.({type:"start"});try{let e=this.embedder.dimensions;await this.provider.clear(),await this.provider.validateDimensions(e);let t=[];for(let n of this.sources)for await(let o of n.load())t.push(o);let r=await this.embedChunks(t);r.length>0&&await this.provider.add(r),this.options.onSync?.({type:"complete",chunksAffected:r.length})}catch(e){throw this.options.onSync?.({type:"error",error:e}),e}}async embedChunks(e){if(e.length===0)return[];let t=[];try{let r=e.map(o=>o.content),n=await this.embedder.embedBatch(r);for(let o=0;o<e.length;o++)t.push({...e[o],vector:n[o]}),this.options.onEmbeddingProgress?.({source:"sync",current:o+1,total:e.length,percent:Math.round((o+1)/e.length*100)})}catch(r){if(this.options.onError?.(r,{})==="abort")throw r;for(let o=0;o<e.length;o++)try{let s=await this.embedder.embed(e[o].content);t.push({...e[o],vector:s}),this.options.onEmbeddingProgress?.({source:"sync",current:o+1,total:e.length,percent:Math.round((o+1)/e.length*100)})}catch(s){if(this.options.onError?.(s,{chunk:e[o]})==="abort")throw s}}return t}async stop(){this.provider.close&&this.provider.close()}toTool(){return{name:"knowledge_search",displayName:"Knowledge Search",description:this.description||"Search the knowledge base for relevant information",category:"search",cacheable:!1,parameters:{type:"object",properties:{query:{type:"string",description:"Search query to find relevant information"},limit:{type:"number",description:"Maximum number of results to return (default: 10)"},threshold:{type:"number",description:"Minimum similarity threshold 0-1 (default: 0.7)"},filter:{type:"object",description:"Optional metadata filters"}},required:["query"]},execute:async e=>(await this.query(e.query,{limit:e.limit,threshold:e.threshold,filter:e.filter})).map(r=>({content:r.chunk.content,score:r.score,metadata:r.chunk.metadata}))}}};function C(i,e){if(i.length!==e.length)throw new Error("Vectors must have same dimensions");let t=0,r=0,n=0;for(let s=0;s<i.length;s++)t+=i[s]*e[s],r+=i[s]*i[s],n+=e[s]*e[s];let o=Math.sqrt(r)*Math.sqrt(n);return o===0?0:t/o}function P(i,e){if(!e)return!0;for(let[t,r]of Object.entries(e)){let n=i[t];if(typeof r=="object"&&r!==null&&!Array.isArray(r)){if("$in"in r){if(!r.$in.includes(n))return!1}else if("$gt"in r){let o=r.$gt;if(typeof n!="number"||n<=o)return!1}else if("$lt"in r){let o=r.$lt;if(typeof n!="number"||n>=o)return!1}}else if(n!==r)return!1}return!0}var S=class{constructor(e={}){this.options=e}options;chunks=new Map;dimensions;async validateDimensions(e){if(this.dimensions&&this.dimensions!==e)throw new v(this.dimensions,e);this.dimensions=e}async add(e){for(let t of e){if(!t.vector)throw new b("Chunk missing vector");if(this.options.maxChunks&&this.chunks.size>=this.options.maxChunks)throw new b(`Max chunks limit reached: ${this.options.maxChunks}`);this.chunks.set(t.id,{chunk:{id:t.id,content:t.content,metadata:t.metadata},vector:t.vector})}}async query(e,t={}){let{limit:r=10,threshold:n=.7,filter:o,includeMetadata:s=!0,includeVectors:c=!1}=t,u=[];for(let{chunk:a,vector:d}of this.chunks.values()){if(o&&!P(a.metadata,o))continue;let m=C(e,d);m>=n&&u.push({chunk:{id:a.id,content:a.content,metadata:s?a.metadata:{},vector:c?d:void 0},score:m,distance:1-m})}return u.sort((a,d)=>d.score-a.score),u.slice(0,r)}async delete(e){for(let t of e)this.chunks.delete(t)}async clear(){this.chunks.clear(),this.dimensions=void 0}};var D=h(require("better-sqlite3"),1),L=h(require("fs"),1),I=h(require("path"),1),F=h(require("os"),1);var R=class{constructor(e){this.options=e;let t=e.storagePath||I.join(F.homedir(),".toolpack","knowledge");this.dbPath=I.join(t,`${e.namespace}.db`),L.mkdirSync(t,{recursive:!0}),this.db=new D.default(this.dbPath),this.db.pragma("journal_mode = WAL"),this.initSchema(),this.loadDimensions()}options;db;dimensions;dbPath;initSchema(){this.db.exec(`
2
+ CREATE TABLE IF NOT EXISTS chunks (
3
+ id TEXT PRIMARY KEY,
4
+ content TEXT NOT NULL,
5
+ metadata TEXT NOT NULL,
6
+ vector BLOB NOT NULL,
7
+ synced_at INTEGER NOT NULL
8
+ );
9
+
10
+ CREATE TABLE IF NOT EXISTS provider_meta (
11
+ key TEXT PRIMARY KEY,
12
+ value TEXT NOT NULL
13
+ );
14
+ `)}loadDimensions(){let e=this.db.prepare("SELECT value FROM provider_meta WHERE key = ?").get("dimensions");e&&(this.dimensions=parseInt(e.value,10))}async validateDimensions(e){if(this.dimensions&&this.dimensions!==e)throw new v(this.dimensions,e);this.dimensions||(this.db.prepare("INSERT OR REPLACE INTO provider_meta (key, value) VALUES (?, ?)").run("dimensions",e.toString()),this.dimensions=e)}async add(e){let t=this.db.prepare(`
15
+ INSERT OR REPLACE INTO chunks (id, content, metadata, vector, synced_at)
16
+ VALUES (?, ?, ?, ?, ?)
17
+ `);this.db.transaction(n=>{for(let o of n){if(!o.vector)throw new b("Chunk missing vector");let s=Buffer.from(new Float32Array(o.vector).buffer);t.run(o.id,o.content,JSON.stringify(o.metadata),s,Date.now())}})(e)}async query(e,t={}){let{limit:r=10,threshold:n=.7,filter:o,includeMetadata:s=!0,includeVectors:c=!1}=t,u=this.db.prepare("SELECT id, content, metadata, vector FROM chunks").all(),a=[];for(let d of u){let m=JSON.parse(d.metadata);if(o&&!P(m,o))continue;let g=new Float32Array(d.vector.buffer,d.vector.byteOffset,d.vector.byteLength/4),k=Array.from(g),w=C(e,k);w>=n&&a.push({chunk:{id:d.id,content:d.content,metadata:s?m:{},vector:c?k:void 0},score:w,distance:1-w})}return a.sort((d,m)=>m.score-d.score),a.slice(0,r)}async delete(e){let t=this.db.prepare("DELETE FROM chunks WHERE id = ?");this.db.transaction(n=>{for(let o of n)t.run(o)})(e)}async clear(){this.db.prepare("DELETE FROM chunks").run(),this.db.prepare("DELETE FROM provider_meta WHERE key = ?").run("dimensions"),this.dimensions=void 0}shouldReSync(){return this.options.reSync===!1?this.db.prepare("SELECT COUNT(*) as count FROM chunks").get().count===0:!0}close(){this.db.close()}};var q=h(require("fs/promises"),1),x=h(require("path"),1),U=h(require("crypto"),1),B=h(require("fast-glob"),1);function y(i){return Math.ceil(i.length/4)}function J(i,e){let t=i.split(/\n\n+/),r=[],n="";for(let o of t){let s=y(o);y(n)+s>e&&n?(r.push(n.trim()),n=o):n+=(n?`
18
+
19
+ `:"")+o}return n&&r.push(n.trim()),r}function Z(i,e){let t=i.match(/[^.!?]+[.!?]+/g)||[i],r=[],n="";for(let o of t){let s=y(o);y(n)+s>e&&n?(r.push(n.trim()),n=o):n+=(n?" ":"")+o}return n&&r.push(n.trim()),r}function j(i,e){if(i.length<=1||e===0)return i;let t=[];for(let r=0;r<i.length;r++){let n=i[r];if(r>0){let s=i[r-1].split(/\s+/),c=Math.ceil(e/4);n=s.slice(-c).join(" ")+" "+n}t.push(n)}return t}function Q(i,e){if(y(i)<=e)return[i];let r=J(i,e),n=[];for(let o of r)y(o)>e?n.push(...Z(o,e)):n.push(o);return n}var T=class{constructor(e,t={}){this.pattern=e;this.options={maxChunkSize:t.maxChunkSize??2e3,chunkOverlap:t.chunkOverlap??200,minChunkSize:t.minChunkSize??100,namespace:t.namespace??"markdown",metadata:t.metadata??{}}}pattern;options;async*load(){let e=this.pattern.replace(/\\/g,"/"),t=await(0,B.default)(e,{absolute:!0});for(let r of t)try{let n=await q.readFile(r,"utf-8"),o=this.chunkMarkdown(n,r);for(let s of o)yield s}catch(n){throw new E(`Failed to process file: ${n.message}`,r)}}chunkMarkdown(e,t){let r=this.extractFrontmatter(e),n=this.removeFrontmatter(e),o=this.parseHeadings(n),s=[],c=0;for(let u of o){let a=/```[\s\S]*?```/.test(u.content),d=y(u.content);if(d<this.options.minChunkSize&&s.length>0){let g=s[s.length-1];g.content+=`
20
+
21
+ `+u.content,a&&(g.metadata.hasCode=!0);continue}let m;d>this.options.maxChunkSize?m=Q(u.content,this.options.maxChunkSize):m=[u.content],this.options.chunkOverlap>0&&m.length>1&&(m=j(m,this.options.chunkOverlap));for(let g=0;g<m.length;g++){let k=m[g],w=this.generateChunkId(t,k,c);s.push({id:w,content:k,metadata:{...this.options.metadata,...r,heading:u.heading,hasCode:a,source:x.basename(t),sourcePath:t,chunkIndex:c,totalChunks:m.length}}),c++}}return s}parseHeadings(e){let t=e.split(`
22
+ `),r=[],n=[],o=[];for(let s of t){let c=s.match(/^(#{1,6})\s+(.+)$/);if(c){if(o.length>0){let d=n.map(m=>m.text);r.push({heading:d.length>0?[...d]:[""],content:o.join(`
23
+ `).trim(),level:n.length>0?n[n.length-1].level:0}),o=[]}let u=c[1].length,a=c[2].trim();for(;n.length>0&&n[n.length-1].level>=u;)n.pop();n.push({level:u,text:a}),o.push(s)}else o.push(s)}if(o.length>0){let s=n.map(c=>c.text);r.push({heading:s.length>0?[...s]:[""],content:o.join(`
24
+ `).trim(),level:n.length>0?n[n.length-1].level:0})}return r.filter(s=>s.content.length>0)}extractFrontmatter(e){let t=e.match(/^---\n([\s\S]*?)\n---/);if(!t)return{};let r=t[1],n={},o=r.split(`
25
+ `);for(let s of o){let c=s.match(/^(\w+):\s*(.+)$/);if(c){let u=c[1],a=c[2].trim();a==="true"?a=!0:a==="false"?a=!1:isNaN(Number(a))?typeof a=="string"&&a.startsWith("[")&&a.endsWith("]")&&(a=a.slice(1,-1).split(",").map(d=>d.trim())):a=Number(a),n[u]=a}}return n}removeFrontmatter(e){return e.replace(/^---\n[\s\S]*?\n---\n/,"")}generateChunkId(e,t,r){let n=U.createHash("md5").update(t).digest("hex").substring(0,8),o=x.basename(e,x.extname(e));return`${this.options.namespace}:${o}:${r}:${n}`}};var M=class{constructor(e){this.options=e;this.baseUrl=e.baseUrl||"http://localhost:11434",this.dimensions=e.dimensions||this.getModelDimensions(e.model)}options;dimensions;baseUrl;getModelDimensions(e){let t={"nomic-embed-text":768,"mxbai-embed-large":1024,"all-minilm":384,"snowflake-arctic-embed":1024,"bge-m3":1024,"bge-large":1024,"all-minilm:l6-v2":384,"all-minilm:l12-v2":384},r=t[e];if(!r)throw new l(`Unknown Ollama model '${e}'. Provide 'dimensions' in OllamaEmbedderOptions or use a known model: ${Object.keys(t).join(", ")}`);return r}async embed(e){let t=null,r=this.options.retries||3,n=this.options.retryDelay||1e3;for(let o=0;o<r;o++)try{let s=await fetch(`${this.baseUrl}/api/embeddings`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({model:this.options.model,prompt:e})});if(!s.ok)throw new l(`Ollama embedding failed: ${s.statusText}`,s.status);return(await s.json()).embedding}catch(s){if(t=s,s instanceof l&&s.statusCode&&s.statusCode>=400&&s.statusCode<500)throw s;o<r-1&&await new Promise(c=>setTimeout(c,n))}throw new l(`Ollama embedding failed after ${r} retries: ${t?.message}`)}async embedBatch(e){let t=[];for(let r of e)t.push(await this.embed(r));return t}};var H=h(require("openai"),1);var A=class{constructor(e){this.options=e;this.client=new H.default({apiKey:e.apiKey,timeout:e.timeout||3e4}),this.dimensions=this.getModelDimensions(e.model)}options;dimensions;client;getModelDimensions(e){return{"text-embedding-3-small":1536,"text-embedding-3-large":3072,"text-embedding-ada-002":1536}[e]||1536}async embed(e){let t=null,r=this.options.retries||3;for(let n=0;n<r;n++)try{return(await this.client.embeddings.create({model:this.options.model,input:e})).data[0].embedding}catch(o){t=o,n<r-1&&await new Promise(s=>setTimeout(s,this.options.retryDelay||1e3))}throw new l(`OpenAI embedding failed after ${r} retries: ${t?.message}`)}async embedBatch(e){let t=null,r=this.options.retries||3;for(let n=0;n<r;n++)try{return(await this.client.embeddings.create({model:this.options.model,input:e})).data.map(s=>s.embedding)}catch(o){t=o,n<r-1&&await new Promise(s=>setTimeout(s,this.options.retryDelay||1e3))}throw new l(`OpenAI batch embedding failed after ${r} retries: ${t?.message}`)}};0&&(module.exports={ChunkTooLargeError,DimensionMismatchError,EmbeddingError,IngestionError,Knowledge,KnowledgeError,KnowledgeProviderError,MarkdownSource,MemoryProvider,OllamaEmbedder,OpenAIEmbedder,PersistentKnowledgeProvider});
@@ -0,0 +1,239 @@
1
+ interface Chunk {
2
+ id: string;
3
+ content: string;
4
+ metadata: Record<string, unknown>;
5
+ vector?: number[];
6
+ }
7
+ interface ChunkUpdate {
8
+ type: 'add' | 'update' | 'delete';
9
+ chunk: Chunk;
10
+ }
11
+ interface QueryOptions {
12
+ limit?: number;
13
+ threshold?: number;
14
+ filter?: MetadataFilter;
15
+ includeMetadata?: boolean;
16
+ includeVectors?: boolean;
17
+ }
18
+ interface MetadataFilter {
19
+ [key: string]: string | number | boolean | {
20
+ $in: unknown[];
21
+ } | {
22
+ $gt: number;
23
+ } | {
24
+ $lt: number;
25
+ };
26
+ }
27
+ interface QueryResult {
28
+ chunk: Chunk;
29
+ score: number;
30
+ distance?: number;
31
+ }
32
+ interface KnowledgeProvider {
33
+ add(chunks: Chunk[]): Promise<void>;
34
+ query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
35
+ delete(ids: string[]): Promise<void>;
36
+ clear(): Promise<void>;
37
+ validateDimensions(dimensions: number): Promise<void>;
38
+ close?(): void;
39
+ }
40
+ interface KnowledgeSource {
41
+ load(): AsyncIterable<Chunk>;
42
+ watch?(): AsyncIterable<ChunkUpdate>;
43
+ }
44
+ interface Embedder {
45
+ embed(text: string): Promise<number[]>;
46
+ embedBatch(texts: string[]): Promise<number[][]>;
47
+ readonly dimensions: number;
48
+ }
49
+
50
+ declare class KnowledgeError extends Error {
51
+ readonly code?: string | undefined;
52
+ constructor(message: string, code?: string | undefined);
53
+ }
54
+ declare class EmbeddingError extends KnowledgeError {
55
+ readonly statusCode?: number | undefined;
56
+ constructor(message: string, statusCode?: number | undefined);
57
+ }
58
+ declare class IngestionError extends KnowledgeError {
59
+ readonly file?: string | undefined;
60
+ constructor(message: string, file?: string | undefined);
61
+ }
62
+ declare class ChunkTooLargeError extends KnowledgeError {
63
+ readonly chunkSize: number;
64
+ constructor(message: string, chunkSize: number);
65
+ }
66
+ declare class DimensionMismatchError extends KnowledgeError {
67
+ readonly expected: number;
68
+ readonly actual: number;
69
+ constructor(expected: number, actual: number);
70
+ }
71
+ declare class KnowledgeProviderError extends KnowledgeError {
72
+ constructor(message: string);
73
+ }
74
+
75
+ interface KnowledgeOptions {
76
+ provider: KnowledgeProvider;
77
+ sources: KnowledgeSource[];
78
+ embedder: Embedder;
79
+ description: string;
80
+ reSync?: boolean;
81
+ onError?: ErrorHandler;
82
+ onSync?: SyncEventHandler;
83
+ onEmbeddingProgress?: EmbeddingProgressHandler;
84
+ }
85
+ type ErrorHandler = (error: Error, context: {
86
+ file?: string;
87
+ chunk?: Chunk;
88
+ }) => 'skip' | 'abort';
89
+ interface SyncEvent {
90
+ type: 'start' | 'file' | 'chunk' | 'complete' | 'error';
91
+ file?: string;
92
+ chunksAffected?: number;
93
+ error?: Error;
94
+ }
95
+ type SyncEventHandler = (event: SyncEvent) => void;
96
+ interface EmbeddingProgressEvent {
97
+ source: string;
98
+ current: number;
99
+ total: number;
100
+ percent: number;
101
+ }
102
+ type EmbeddingProgressHandler = (event: EmbeddingProgressEvent) => void;
103
+ declare class Knowledge {
104
+ private provider;
105
+ private embedder;
106
+ private description;
107
+ private sources;
108
+ private options;
109
+ private constructor();
110
+ static create(options: KnowledgeOptions): Promise<Knowledge>;
111
+ query(text: string, options?: QueryOptions): Promise<QueryResult[]>;
112
+ sync(): Promise<void>;
113
+ private embedChunks;
114
+ stop(): Promise<void>;
115
+ toTool(): KnowledgeTool;
116
+ }
117
+ interface KnowledgeTool {
118
+ name: string;
119
+ displayName: string;
120
+ description: string;
121
+ category: string;
122
+ cacheable?: boolean;
123
+ parameters: {
124
+ type: string;
125
+ properties: Record<string, unknown>;
126
+ required: string[];
127
+ };
128
+ execute: (params: KnowledgeToolParams) => Promise<KnowledgeToolResult[]>;
129
+ }
130
+ interface KnowledgeToolParams {
131
+ query: string;
132
+ limit?: number;
133
+ threshold?: number;
134
+ filter?: Record<string, string | number | boolean | {
135
+ $in: unknown[];
136
+ } | {
137
+ $gt: number;
138
+ } | {
139
+ $lt: number;
140
+ }>;
141
+ }
142
+ interface KnowledgeToolResult {
143
+ content: string;
144
+ score: number;
145
+ metadata: Record<string, unknown>;
146
+ }
147
+
148
+ interface MemoryProviderOptions {
149
+ maxChunks?: number;
150
+ }
151
+ declare class MemoryProvider implements KnowledgeProvider {
152
+ private options;
153
+ private chunks;
154
+ private dimensions?;
155
+ constructor(options?: MemoryProviderOptions);
156
+ validateDimensions(dimensions: number): Promise<void>;
157
+ add(chunks: Chunk[]): Promise<void>;
158
+ query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
159
+ delete(ids: string[]): Promise<void>;
160
+ clear(): Promise<void>;
161
+ }
162
+
163
+ interface PersistentKnowledgeProviderOptions {
164
+ namespace: string;
165
+ storagePath?: string;
166
+ reSync?: boolean;
167
+ }
168
+ declare class PersistentKnowledgeProvider implements KnowledgeProvider {
169
+ private options;
170
+ private db;
171
+ private dimensions?;
172
+ private dbPath;
173
+ constructor(options: PersistentKnowledgeProviderOptions);
174
+ private initSchema;
175
+ private loadDimensions;
176
+ validateDimensions(dimensions: number): Promise<void>;
177
+ add(chunks: Chunk[]): Promise<void>;
178
+ query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
179
+ delete(ids: string[]): Promise<void>;
180
+ clear(): Promise<void>;
181
+ shouldReSync(): boolean;
182
+ close(): void;
183
+ }
184
+
185
+ interface MarkdownSourceOptions {
186
+ maxChunkSize?: number;
187
+ chunkOverlap?: number;
188
+ minChunkSize?: number;
189
+ namespace?: string;
190
+ metadata?: Record<string, unknown>;
191
+ }
192
+ declare class MarkdownSource implements KnowledgeSource {
193
+ private pattern;
194
+ private options;
195
+ constructor(pattern: string, options?: MarkdownSourceOptions);
196
+ load(): AsyncIterable<Chunk>;
197
+ private chunkMarkdown;
198
+ private parseHeadings;
199
+ private extractFrontmatter;
200
+ private removeFrontmatter;
201
+ private generateChunkId;
202
+ }
203
+
204
+ interface OllamaEmbedderOptions {
205
+ model: string;
206
+ baseUrl?: string;
207
+ /** Override auto-detected dimensions for custom/unknown models */
208
+ dimensions?: number;
209
+ retries?: number;
210
+ retryDelay?: number;
211
+ }
212
+ declare class OllamaEmbedder implements Embedder {
213
+ private options;
214
+ readonly dimensions: number;
215
+ private baseUrl;
216
+ constructor(options: OllamaEmbedderOptions);
217
+ private getModelDimensions;
218
+ embed(text: string): Promise<number[]>;
219
+ embedBatch(texts: string[]): Promise<number[][]>;
220
+ }
221
+
222
+ interface OpenAIEmbedderOptions {
223
+ model: string;
224
+ apiKey: string;
225
+ retries?: number;
226
+ retryDelay?: number;
227
+ timeout?: number;
228
+ }
229
+ declare class OpenAIEmbedder implements Embedder {
230
+ private options;
231
+ readonly dimensions: number;
232
+ private client;
233
+ constructor(options: OpenAIEmbedderOptions);
234
+ private getModelDimensions;
235
+ embed(text: string): Promise<number[]>;
236
+ embedBatch(texts: string[]): Promise<number[][]>;
237
+ }
238
+
239
+ export { type Chunk, ChunkTooLargeError, type ChunkUpdate, DimensionMismatchError, type Embedder, EmbeddingError, type EmbeddingProgressEvent, type EmbeddingProgressHandler, type ErrorHandler, IngestionError, Knowledge, KnowledgeError, type KnowledgeOptions, type KnowledgeProvider, KnowledgeProviderError, type KnowledgeSource, type KnowledgeTool, type KnowledgeToolParams, type KnowledgeToolResult, MarkdownSource, type MarkdownSourceOptions, MemoryProvider, type MemoryProviderOptions, type MetadataFilter, OllamaEmbedder, type OllamaEmbedderOptions, OpenAIEmbedder, type OpenAIEmbedderOptions, PersistentKnowledgeProvider, type PersistentKnowledgeProviderOptions, type QueryOptions, type QueryResult, type SyncEvent, type SyncEventHandler };
package/dist/index.d.ts CHANGED
@@ -1,14 +1,239 @@
1
- export * from './interfaces.js';
2
- export * from './errors.js';
3
- export * from './knowledge.js';
4
- export { MemoryProvider } from './providers/memory.js';
5
- export type { MemoryProviderOptions } from './providers/memory.js';
6
- export { PersistentKnowledgeProvider } from './providers/persistent.js';
7
- export type { PersistentKnowledgeProviderOptions } from './providers/persistent.js';
8
- export { MarkdownSource } from './sources/markdown.js';
9
- export type { MarkdownSourceOptions } from './sources/markdown.js';
10
- export { OllamaEmbedder } from './embedders/ollama.js';
11
- export type { OllamaEmbedderOptions } from './embedders/ollama.js';
12
- export { OpenAIEmbedder } from './embedders/openai.js';
13
- export type { OpenAIEmbedderOptions } from './embedders/openai.js';
14
- //# sourceMappingURL=index.d.ts.map
1
+ interface Chunk {
2
+ id: string;
3
+ content: string;
4
+ metadata: Record<string, unknown>;
5
+ vector?: number[];
6
+ }
7
+ interface ChunkUpdate {
8
+ type: 'add' | 'update' | 'delete';
9
+ chunk: Chunk;
10
+ }
11
+ interface QueryOptions {
12
+ limit?: number;
13
+ threshold?: number;
14
+ filter?: MetadataFilter;
15
+ includeMetadata?: boolean;
16
+ includeVectors?: boolean;
17
+ }
18
+ interface MetadataFilter {
19
+ [key: string]: string | number | boolean | {
20
+ $in: unknown[];
21
+ } | {
22
+ $gt: number;
23
+ } | {
24
+ $lt: number;
25
+ };
26
+ }
27
+ interface QueryResult {
28
+ chunk: Chunk;
29
+ score: number;
30
+ distance?: number;
31
+ }
32
+ interface KnowledgeProvider {
33
+ add(chunks: Chunk[]): Promise<void>;
34
+ query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
35
+ delete(ids: string[]): Promise<void>;
36
+ clear(): Promise<void>;
37
+ validateDimensions(dimensions: number): Promise<void>;
38
+ close?(): void;
39
+ }
40
+ interface KnowledgeSource {
41
+ load(): AsyncIterable<Chunk>;
42
+ watch?(): AsyncIterable<ChunkUpdate>;
43
+ }
44
+ interface Embedder {
45
+ embed(text: string): Promise<number[]>;
46
+ embedBatch(texts: string[]): Promise<number[][]>;
47
+ readonly dimensions: number;
48
+ }
49
+
50
+ declare class KnowledgeError extends Error {
51
+ readonly code?: string | undefined;
52
+ constructor(message: string, code?: string | undefined);
53
+ }
54
+ declare class EmbeddingError extends KnowledgeError {
55
+ readonly statusCode?: number | undefined;
56
+ constructor(message: string, statusCode?: number | undefined);
57
+ }
58
+ declare class IngestionError extends KnowledgeError {
59
+ readonly file?: string | undefined;
60
+ constructor(message: string, file?: string | undefined);
61
+ }
62
+ declare class ChunkTooLargeError extends KnowledgeError {
63
+ readonly chunkSize: number;
64
+ constructor(message: string, chunkSize: number);
65
+ }
66
+ declare class DimensionMismatchError extends KnowledgeError {
67
+ readonly expected: number;
68
+ readonly actual: number;
69
+ constructor(expected: number, actual: number);
70
+ }
71
+ declare class KnowledgeProviderError extends KnowledgeError {
72
+ constructor(message: string);
73
+ }
74
+
75
+ interface KnowledgeOptions {
76
+ provider: KnowledgeProvider;
77
+ sources: KnowledgeSource[];
78
+ embedder: Embedder;
79
+ description: string;
80
+ reSync?: boolean;
81
+ onError?: ErrorHandler;
82
+ onSync?: SyncEventHandler;
83
+ onEmbeddingProgress?: EmbeddingProgressHandler;
84
+ }
85
+ type ErrorHandler = (error: Error, context: {
86
+ file?: string;
87
+ chunk?: Chunk;
88
+ }) => 'skip' | 'abort';
89
+ interface SyncEvent {
90
+ type: 'start' | 'file' | 'chunk' | 'complete' | 'error';
91
+ file?: string;
92
+ chunksAffected?: number;
93
+ error?: Error;
94
+ }
95
+ type SyncEventHandler = (event: SyncEvent) => void;
96
+ interface EmbeddingProgressEvent {
97
+ source: string;
98
+ current: number;
99
+ total: number;
100
+ percent: number;
101
+ }
102
+ type EmbeddingProgressHandler = (event: EmbeddingProgressEvent) => void;
103
+ declare class Knowledge {
104
+ private provider;
105
+ private embedder;
106
+ private description;
107
+ private sources;
108
+ private options;
109
+ private constructor();
110
+ static create(options: KnowledgeOptions): Promise<Knowledge>;
111
+ query(text: string, options?: QueryOptions): Promise<QueryResult[]>;
112
+ sync(): Promise<void>;
113
+ private embedChunks;
114
+ stop(): Promise<void>;
115
+ toTool(): KnowledgeTool;
116
+ }
117
+ interface KnowledgeTool {
118
+ name: string;
119
+ displayName: string;
120
+ description: string;
121
+ category: string;
122
+ cacheable?: boolean;
123
+ parameters: {
124
+ type: string;
125
+ properties: Record<string, unknown>;
126
+ required: string[];
127
+ };
128
+ execute: (params: KnowledgeToolParams) => Promise<KnowledgeToolResult[]>;
129
+ }
130
+ interface KnowledgeToolParams {
131
+ query: string;
132
+ limit?: number;
133
+ threshold?: number;
134
+ filter?: Record<string, string | number | boolean | {
135
+ $in: unknown[];
136
+ } | {
137
+ $gt: number;
138
+ } | {
139
+ $lt: number;
140
+ }>;
141
+ }
142
+ interface KnowledgeToolResult {
143
+ content: string;
144
+ score: number;
145
+ metadata: Record<string, unknown>;
146
+ }
147
+
148
+ interface MemoryProviderOptions {
149
+ maxChunks?: number;
150
+ }
151
+ declare class MemoryProvider implements KnowledgeProvider {
152
+ private options;
153
+ private chunks;
154
+ private dimensions?;
155
+ constructor(options?: MemoryProviderOptions);
156
+ validateDimensions(dimensions: number): Promise<void>;
157
+ add(chunks: Chunk[]): Promise<void>;
158
+ query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
159
+ delete(ids: string[]): Promise<void>;
160
+ clear(): Promise<void>;
161
+ }
162
+
163
+ interface PersistentKnowledgeProviderOptions {
164
+ namespace: string;
165
+ storagePath?: string;
166
+ reSync?: boolean;
167
+ }
168
+ declare class PersistentKnowledgeProvider implements KnowledgeProvider {
169
+ private options;
170
+ private db;
171
+ private dimensions?;
172
+ private dbPath;
173
+ constructor(options: PersistentKnowledgeProviderOptions);
174
+ private initSchema;
175
+ private loadDimensions;
176
+ validateDimensions(dimensions: number): Promise<void>;
177
+ add(chunks: Chunk[]): Promise<void>;
178
+ query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
179
+ delete(ids: string[]): Promise<void>;
180
+ clear(): Promise<void>;
181
+ shouldReSync(): boolean;
182
+ close(): void;
183
+ }
184
+
185
+ interface MarkdownSourceOptions {
186
+ maxChunkSize?: number;
187
+ chunkOverlap?: number;
188
+ minChunkSize?: number;
189
+ namespace?: string;
190
+ metadata?: Record<string, unknown>;
191
+ }
192
+ declare class MarkdownSource implements KnowledgeSource {
193
+ private pattern;
194
+ private options;
195
+ constructor(pattern: string, options?: MarkdownSourceOptions);
196
+ load(): AsyncIterable<Chunk>;
197
+ private chunkMarkdown;
198
+ private parseHeadings;
199
+ private extractFrontmatter;
200
+ private removeFrontmatter;
201
+ private generateChunkId;
202
+ }
203
+
204
+ interface OllamaEmbedderOptions {
205
+ model: string;
206
+ baseUrl?: string;
207
+ /** Override auto-detected dimensions for custom/unknown models */
208
+ dimensions?: number;
209
+ retries?: number;
210
+ retryDelay?: number;
211
+ }
212
+ declare class OllamaEmbedder implements Embedder {
213
+ private options;
214
+ readonly dimensions: number;
215
+ private baseUrl;
216
+ constructor(options: OllamaEmbedderOptions);
217
+ private getModelDimensions;
218
+ embed(text: string): Promise<number[]>;
219
+ embedBatch(texts: string[]): Promise<number[][]>;
220
+ }
221
+
222
+ interface OpenAIEmbedderOptions {
223
+ model: string;
224
+ apiKey: string;
225
+ retries?: number;
226
+ retryDelay?: number;
227
+ timeout?: number;
228
+ }
229
+ declare class OpenAIEmbedder implements Embedder {
230
+ private options;
231
+ readonly dimensions: number;
232
+ private client;
233
+ constructor(options: OpenAIEmbedderOptions);
234
+ private getModelDimensions;
235
+ embed(text: string): Promise<number[]>;
236
+ embedBatch(texts: string[]): Promise<number[][]>;
237
+ }
238
+
239
+ export { type Chunk, ChunkTooLargeError, type ChunkUpdate, DimensionMismatchError, type Embedder, EmbeddingError, type EmbeddingProgressEvent, type EmbeddingProgressHandler, type ErrorHandler, IngestionError, Knowledge, KnowledgeError, type KnowledgeOptions, type KnowledgeProvider, KnowledgeProviderError, type KnowledgeSource, type KnowledgeTool, type KnowledgeToolParams, type KnowledgeToolResult, MarkdownSource, type MarkdownSourceOptions, MemoryProvider, type MemoryProviderOptions, type MetadataFilter, OllamaEmbedder, type OllamaEmbedderOptions, OpenAIEmbedder, type OpenAIEmbedderOptions, PersistentKnowledgeProvider, type PersistentKnowledgeProviderOptions, type QueryOptions, type QueryResult, type SyncEvent, type SyncEventHandler };
package/dist/index.js CHANGED
@@ -1,31 +1,25 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
- for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
- };
16
- Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.OpenAIEmbedder = exports.OllamaEmbedder = exports.MarkdownSource = exports.PersistentKnowledgeProvider = exports.MemoryProvider = void 0;
18
- __exportStar(require("./interfaces.js"), exports);
19
- __exportStar(require("./errors.js"), exports);
20
- __exportStar(require("./knowledge.js"), exports);
21
- var memory_js_1 = require("./providers/memory.js");
22
- Object.defineProperty(exports, "MemoryProvider", { enumerable: true, get: function () { return memory_js_1.MemoryProvider; } });
23
- var persistent_js_1 = require("./providers/persistent.js");
24
- Object.defineProperty(exports, "PersistentKnowledgeProvider", { enumerable: true, get: function () { return persistent_js_1.PersistentKnowledgeProvider; } });
25
- var markdown_js_1 = require("./sources/markdown.js");
26
- Object.defineProperty(exports, "MarkdownSource", { enumerable: true, get: function () { return markdown_js_1.MarkdownSource; } });
27
- var ollama_js_1 = require("./embedders/ollama.js");
28
- Object.defineProperty(exports, "OllamaEmbedder", { enumerable: true, get: function () { return ollama_js_1.OllamaEmbedder; } });
29
- var openai_js_1 = require("./embedders/openai.js");
30
- Object.defineProperty(exports, "OpenAIEmbedder", { enumerable: true, get: function () { return openai_js_1.OpenAIEmbedder; } });
31
- //# sourceMappingURL=index.js.map
1
+ var b=class extends Error{constructor(t,r){super(t);this.code=r;this.name="KnowledgeError"}code},h=class extends b{constructor(t,r){super(t,"EMBEDDING_ERROR");this.statusCode=r;this.name="EmbeddingError"}statusCode},x=class extends b{constructor(t,r){super(t,"INGESTION_ERROR");this.file=r;this.name="IngestionError"}file},K=class extends b{constructor(t,r){super(t,"CHUNK_TOO_LARGE");this.chunkSize=r;this.name="ChunkTooLargeError"}chunkSize},v=class extends b{expected;actual;constructor(e,t){super(`Dimension mismatch: expected ${e}, got ${t}`,"DIMENSION_MISMATCH"),this.name="DimensionMismatchError",this.expected=e,this.actual=t}},y=class extends b{constructor(e){super(e,"PROVIDER_ERROR"),this.name="KnowledgeProviderError"}};var $=class i{constructor(e,t,r,n,o){this.provider=e;this.embedder=t;this.description=r;this.sources=n;this.options=o}provider;embedder;description;sources;options;static async create(e){await e.provider.validateDimensions(e.embedder.dimensions);let t=new i(e.provider,e.embedder,e.description,e.sources,e),r=e.reSync!==!1;return!r&&"shouldReSync"in e.provider?(e.provider.shouldReSync()&&await t.sync(),t):(r&&await t.sync(),t)}async query(e,t){let r=await this.embedder.embed(e);return this.provider.query(r,t)}async sync(){this.options.onSync?.({type:"start"});try{let e=this.embedder.dimensions;await this.provider.clear(),await this.provider.validateDimensions(e);let t=[];for(let n of this.sources)for await(let o of n.load())t.push(o);let r=await this.embedChunks(t);r.length>0&&await this.provider.add(r),this.options.onSync?.({type:"complete",chunksAffected:r.length})}catch(e){throw this.options.onSync?.({type:"error",error:e}),e}}async embedChunks(e){if(e.length===0)return[];let t=[];try{let r=e.map(o=>o.content),n=await this.embedder.embedBatch(r);for(let o=0;o<e.length;o++)t.push({...e[o],vector:n[o]}),this.options.onEmbeddingProgress?.({source:"sync",current:o+1,total:e.length,percent:Math.round((o+1)/e.length*100)})}catch(r){if(this.options.onError?.(r,{})==="abort")throw r;for(let o=0;o<e.length;o++)try{let s=await this.embedder.embed(e[o].content);t.push({...e[o],vector:s}),this.options.onEmbeddingProgress?.({source:"sync",current:o+1,total:e.length,percent:Math.round((o+1)/e.length*100)})}catch(s){if(this.options.onError?.(s,{chunk:e[o]})==="abort")throw s}}return t}async stop(){this.provider.close&&this.provider.close()}toTool(){return{name:"knowledge_search",displayName:"Knowledge Search",description:this.description||"Search the knowledge base for relevant information",category:"search",cacheable:!1,parameters:{type:"object",properties:{query:{type:"string",description:"Search query to find relevant information"},limit:{type:"number",description:"Maximum number of results to return (default: 10)"},threshold:{type:"number",description:"Minimum similarity threshold 0-1 (default: 0.7)"},filter:{type:"object",description:"Optional metadata filters"}},required:["query"]},execute:async e=>(await this.query(e.query,{limit:e.limit,threshold:e.threshold,filter:e.filter})).map(r=>({content:r.chunk.content,score:r.score,metadata:r.chunk.metadata}))}}};function O(i,e){if(i.length!==e.length)throw new Error("Vectors must have same dimensions");let t=0,r=0,n=0;for(let s=0;s<i.length;s++)t+=i[s]*e[s],r+=i[s]*i[s],n+=e[s]*e[s];let o=Math.sqrt(r)*Math.sqrt(n);return o===0?0:t/o}function C(i,e){if(!e)return!0;for(let[t,r]of Object.entries(e)){let n=i[t];if(typeof r=="object"&&r!==null&&!Array.isArray(r)){if("$in"in r){if(!r.$in.includes(n))return!1}else if("$gt"in r){let o=r.$gt;if(typeof n!="number"||n<=o)return!1}else if("$lt"in r){let o=r.$lt;if(typeof n!="number"||n>=o)return!1}}else if(n!==r)return!1}return!0}var P=class{constructor(e={}){this.options=e}options;chunks=new Map;dimensions;async validateDimensions(e){if(this.dimensions&&this.dimensions!==e)throw new v(this.dimensions,e);this.dimensions=e}async add(e){for(let t of e){if(!t.vector)throw new y("Chunk missing vector");if(this.options.maxChunks&&this.chunks.size>=this.options.maxChunks)throw new y(`Max chunks limit reached: ${this.options.maxChunks}`);this.chunks.set(t.id,{chunk:{id:t.id,content:t.content,metadata:t.metadata},vector:t.vector})}}async query(e,t={}){let{limit:r=10,threshold:n=.7,filter:o,includeMetadata:s=!0,includeVectors:c=!1}=t,u=[];for(let{chunk:a,vector:d}of this.chunks.values()){if(o&&!C(a.metadata,o))continue;let m=O(e,d);m>=n&&u.push({chunk:{id:a.id,content:a.content,metadata:s?a.metadata:{},vector:c?d:void 0},score:m,distance:1-m})}return u.sort((a,d)=>d.score-a.score),u.slice(0,r)}async delete(e){for(let t of e)this.chunks.delete(t)}async clear(){this.chunks.clear(),this.dimensions=void 0}};import Q from"better-sqlite3";import*as I from"fs";import*as S from"path";import*as N from"os";var R=class{constructor(e){this.options=e;let t=e.storagePath||S.join(N.homedir(),".toolpack","knowledge");this.dbPath=S.join(t,`${e.namespace}.db`),I.mkdirSync(t,{recursive:!0}),this.db=new Q(this.dbPath),this.db.pragma("journal_mode = WAL"),this.initSchema(),this.loadDimensions()}options;db;dimensions;dbPath;initSchema(){this.db.exec(`
2
+ CREATE TABLE IF NOT EXISTS chunks (
3
+ id TEXT PRIMARY KEY,
4
+ content TEXT NOT NULL,
5
+ metadata TEXT NOT NULL,
6
+ vector BLOB NOT NULL,
7
+ synced_at INTEGER NOT NULL
8
+ );
9
+
10
+ CREATE TABLE IF NOT EXISTS provider_meta (
11
+ key TEXT PRIMARY KEY,
12
+ value TEXT NOT NULL
13
+ );
14
+ `)}loadDimensions(){let e=this.db.prepare("SELECT value FROM provider_meta WHERE key = ?").get("dimensions");e&&(this.dimensions=parseInt(e.value,10))}async validateDimensions(e){if(this.dimensions&&this.dimensions!==e)throw new v(this.dimensions,e);this.dimensions||(this.db.prepare("INSERT OR REPLACE INTO provider_meta (key, value) VALUES (?, ?)").run("dimensions",e.toString()),this.dimensions=e)}async add(e){let t=this.db.prepare(`
15
+ INSERT OR REPLACE INTO chunks (id, content, metadata, vector, synced_at)
16
+ VALUES (?, ?, ?, ?, ?)
17
+ `);this.db.transaction(n=>{for(let o of n){if(!o.vector)throw new y("Chunk missing vector");let s=Buffer.from(new Float32Array(o.vector).buffer);t.run(o.id,o.content,JSON.stringify(o.metadata),s,Date.now())}})(e)}async query(e,t={}){let{limit:r=10,threshold:n=.7,filter:o,includeMetadata:s=!0,includeVectors:c=!1}=t,u=this.db.prepare("SELECT id, content, metadata, vector FROM chunks").all(),a=[];for(let d of u){let m=JSON.parse(d.metadata);if(o&&!C(m,o))continue;let g=new Float32Array(d.vector.buffer,d.vector.byteOffset,d.vector.byteLength/4),k=Array.from(g),w=O(e,k);w>=n&&a.push({chunk:{id:d.id,content:d.content,metadata:s?m:{},vector:c?k:void 0},score:w,distance:1-w})}return a.sort((d,m)=>m.score-d.score),a.slice(0,r)}async delete(e){let t=this.db.prepare("DELETE FROM chunks WHERE id = ?");this.db.transaction(n=>{for(let o of n)t.run(o)})(e)}async clear(){this.db.prepare("DELETE FROM chunks").run(),this.db.prepare("DELETE FROM provider_meta WHERE key = ?").run("dimensions"),this.dimensions=void 0}shouldReSync(){return this.options.reSync===!1?this.db.prepare("SELECT COUNT(*) as count FROM chunks").get().count===0:!0}close(){this.db.close()}};import*as F from"fs/promises";import*as E from"path";import*as j from"crypto";import B from"fast-glob";function f(i){return Math.ceil(i.length/4)}function q(i,e){let t=i.split(/\n\n+/),r=[],n="";for(let o of t){let s=f(o);f(n)+s>e&&n?(r.push(n.trim()),n=o):n+=(n?`
18
+
19
+ `:"")+o}return n&&r.push(n.trim()),r}function U(i,e){let t=i.match(/[^.!?]+[.!?]+/g)||[i],r=[],n="";for(let o of t){let s=f(o);f(n)+s>e&&n?(r.push(n.trim()),n=o):n+=(n?" ":"")+o}return n&&r.push(n.trim()),r}function D(i,e){if(i.length<=1||e===0)return i;let t=[];for(let r=0;r<i.length;r++){let n=i[r];if(r>0){let s=i[r-1].split(/\s+/),c=Math.ceil(e/4);n=s.slice(-c).join(" ")+" "+n}t.push(n)}return t}function L(i,e){if(f(i)<=e)return[i];let r=q(i,e),n=[];for(let o of r)f(o)>e?n.push(...U(o,e)):n.push(o);return n}var T=class{constructor(e,t={}){this.pattern=e;this.options={maxChunkSize:t.maxChunkSize??2e3,chunkOverlap:t.chunkOverlap??200,minChunkSize:t.minChunkSize??100,namespace:t.namespace??"markdown",metadata:t.metadata??{}}}pattern;options;async*load(){let e=this.pattern.replace(/\\/g,"/"),t=await B(e,{absolute:!0});for(let r of t)try{let n=await F.readFile(r,"utf-8"),o=this.chunkMarkdown(n,r);for(let s of o)yield s}catch(n){throw new x(`Failed to process file: ${n.message}`,r)}}chunkMarkdown(e,t){let r=this.extractFrontmatter(e),n=this.removeFrontmatter(e),o=this.parseHeadings(n),s=[],c=0;for(let u of o){let a=/```[\s\S]*?```/.test(u.content),d=f(u.content);if(d<this.options.minChunkSize&&s.length>0){let g=s[s.length-1];g.content+=`
20
+
21
+ `+u.content,a&&(g.metadata.hasCode=!0);continue}let m;d>this.options.maxChunkSize?m=L(u.content,this.options.maxChunkSize):m=[u.content],this.options.chunkOverlap>0&&m.length>1&&(m=D(m,this.options.chunkOverlap));for(let g=0;g<m.length;g++){let k=m[g],w=this.generateChunkId(t,k,c);s.push({id:w,content:k,metadata:{...this.options.metadata,...r,heading:u.heading,hasCode:a,source:E.basename(t),sourcePath:t,chunkIndex:c,totalChunks:m.length}}),c++}}return s}parseHeadings(e){let t=e.split(`
22
+ `),r=[],n=[],o=[];for(let s of t){let c=s.match(/^(#{1,6})\s+(.+)$/);if(c){if(o.length>0){let d=n.map(m=>m.text);r.push({heading:d.length>0?[...d]:[""],content:o.join(`
23
+ `).trim(),level:n.length>0?n[n.length-1].level:0}),o=[]}let u=c[1].length,a=c[2].trim();for(;n.length>0&&n[n.length-1].level>=u;)n.pop();n.push({level:u,text:a}),o.push(s)}else o.push(s)}if(o.length>0){let s=n.map(c=>c.text);r.push({heading:s.length>0?[...s]:[""],content:o.join(`
24
+ `).trim(),level:n.length>0?n[n.length-1].level:0})}return r.filter(s=>s.content.length>0)}extractFrontmatter(e){let t=e.match(/^---\n([\s\S]*?)\n---/);if(!t)return{};let r=t[1],n={},o=r.split(`
25
+ `);for(let s of o){let c=s.match(/^(\w+):\s*(.+)$/);if(c){let u=c[1],a=c[2].trim();a==="true"?a=!0:a==="false"?a=!1:isNaN(Number(a))?typeof a=="string"&&a.startsWith("[")&&a.endsWith("]")&&(a=a.slice(1,-1).split(",").map(d=>d.trim())):a=Number(a),n[u]=a}}return n}removeFrontmatter(e){return e.replace(/^---\n[\s\S]*?\n---\n/,"")}generateChunkId(e,t,r){let n=j.createHash("md5").update(t).digest("hex").substring(0,8),o=E.basename(e,E.extname(e));return`${this.options.namespace}:${o}:${r}:${n}`}};var M=class{constructor(e){this.options=e;this.baseUrl=e.baseUrl||"http://localhost:11434",this.dimensions=e.dimensions||this.getModelDimensions(e.model)}options;dimensions;baseUrl;getModelDimensions(e){let t={"nomic-embed-text":768,"mxbai-embed-large":1024,"all-minilm":384,"snowflake-arctic-embed":1024,"bge-m3":1024,"bge-large":1024,"all-minilm:l6-v2":384,"all-minilm:l12-v2":384},r=t[e];if(!r)throw new h(`Unknown Ollama model '${e}'. Provide 'dimensions' in OllamaEmbedderOptions or use a known model: ${Object.keys(t).join(", ")}`);return r}async embed(e){let t=null,r=this.options.retries||3,n=this.options.retryDelay||1e3;for(let o=0;o<r;o++)try{let s=await fetch(`${this.baseUrl}/api/embeddings`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({model:this.options.model,prompt:e})});if(!s.ok)throw new h(`Ollama embedding failed: ${s.statusText}`,s.status);return(await s.json()).embedding}catch(s){if(t=s,s instanceof h&&s.statusCode&&s.statusCode>=400&&s.statusCode<500)throw s;o<r-1&&await new Promise(c=>setTimeout(c,n))}throw new h(`Ollama embedding failed after ${r} retries: ${t?.message}`)}async embedBatch(e){let t=[];for(let r of e)t.push(await this.embed(r));return t}};import H from"openai";var A=class{constructor(e){this.options=e;this.client=new H({apiKey:e.apiKey,timeout:e.timeout||3e4}),this.dimensions=this.getModelDimensions(e.model)}options;dimensions;client;getModelDimensions(e){return{"text-embedding-3-small":1536,"text-embedding-3-large":3072,"text-embedding-ada-002":1536}[e]||1536}async embed(e){let t=null,r=this.options.retries||3;for(let n=0;n<r;n++)try{return(await this.client.embeddings.create({model:this.options.model,input:e})).data[0].embedding}catch(o){t=o,n<r-1&&await new Promise(s=>setTimeout(s,this.options.retryDelay||1e3))}throw new h(`OpenAI embedding failed after ${r} retries: ${t?.message}`)}async embedBatch(e){let t=null,r=this.options.retries||3;for(let n=0;n<r;n++)try{return(await this.client.embeddings.create({model:this.options.model,input:e})).data.map(s=>s.embedding)}catch(o){t=o,n<r-1&&await new Promise(s=>setTimeout(s,this.options.retryDelay||1e3))}throw new h(`OpenAI batch embedding failed after ${r} retries: ${t?.message}`)}};export{K as ChunkTooLargeError,v as DimensionMismatchError,h as EmbeddingError,x as IngestionError,$ as Knowledge,b as KnowledgeError,y as KnowledgeProviderError,T as MarkdownSource,P as MemoryProvider,M as OllamaEmbedder,A as OpenAIEmbedder,R as PersistentKnowledgeProvider};