@deepagents/retrieval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/index.d.ts +8 -0
  2. package/dist/index.d.ts.map +1 -0
  3. package/dist/index.js +8 -0
  4. package/dist/index.js.map +7 -0
  5. package/dist/lib/connectors/connector.d.ts +25 -0
  6. package/dist/lib/connectors/connector.d.ts.map +1 -0
  7. package/dist/lib/connectors/connector.js +1 -0
  8. package/dist/lib/connectors/connector.js.map +7 -0
  9. package/dist/lib/connectors/github.d.ts +35 -0
  10. package/dist/lib/connectors/github.d.ts.map +1 -0
  11. package/dist/lib/connectors/github.js +155 -0
  12. package/dist/lib/connectors/github.js.map +7 -0
  13. package/dist/lib/connectors/index.d.ts +8 -0
  14. package/dist/lib/connectors/index.d.ts.map +1 -0
  15. package/dist/lib/connectors/index.js +8 -0
  16. package/dist/lib/connectors/index.js.map +7 -0
  17. package/dist/lib/connectors/linear.d.ts +9 -0
  18. package/dist/lib/connectors/linear.d.ts.map +1 -0
  19. package/dist/lib/connectors/linear.js +29 -0
  20. package/dist/lib/connectors/linear.js.map +7 -0
  21. package/dist/lib/connectors/local.d.ts +7 -0
  22. package/dist/lib/connectors/local.d.ts.map +1 -0
  23. package/dist/lib/connectors/local.js +98 -0
  24. package/dist/lib/connectors/local.js.map +7 -0
  25. package/dist/lib/connectors/pdf.d.ts +4 -0
  26. package/dist/lib/connectors/pdf.d.ts.map +1 -0
  27. package/dist/lib/connectors/pdf.js +58 -0
  28. package/dist/lib/connectors/pdf.js.map +7 -0
  29. package/dist/lib/connectors/repo.d.ts +6 -0
  30. package/dist/lib/connectors/repo.d.ts.map +1 -0
  31. package/dist/lib/connectors/repo.js +171 -0
  32. package/dist/lib/connectors/repo.js.map +7 -0
  33. package/dist/lib/connectors/rss.d.ts +12 -0
  34. package/dist/lib/connectors/rss.d.ts.map +1 -0
  35. package/dist/lib/connectors/rss.js +136 -0
  36. package/dist/lib/connectors/rss.js.map +7 -0
  37. package/dist/lib/embedders/fastembed.d.ts +11 -0
  38. package/dist/lib/embedders/fastembed.d.ts.map +1 -0
  39. package/dist/lib/embedders/fastembed.js +35 -0
  40. package/dist/lib/embedders/fastembed.js.map +7 -0
  41. package/dist/lib/embedders/huggingface.d.ts +14 -0
  42. package/dist/lib/embedders/huggingface.d.ts.map +1 -0
  43. package/dist/lib/embedders/huggingface.js +40 -0
  44. package/dist/lib/embedders/huggingface.js.map +7 -0
  45. package/dist/lib/ingest.d.ts +24 -0
  46. package/dist/lib/ingest.d.ts.map +1 -0
  47. package/dist/lib/ingest.js +111 -0
  48. package/dist/lib/ingest.js.map +7 -0
  49. package/dist/lib/pipeline.d.ts +2 -0
  50. package/dist/lib/pipeline.d.ts.map +1 -0
  51. package/dist/lib/pipeline.js +1 -0
  52. package/dist/lib/pipeline.js.map +7 -0
  53. package/dist/lib/sidecar.d.ts +1 -0
  54. package/dist/lib/sidecar.d.ts.map +1 -0
  55. package/dist/lib/sidecar.js +1 -0
  56. package/dist/lib/sidecar.js.map +7 -0
  57. package/dist/lib/similiarty-search.d.ts +3 -0
  58. package/dist/lib/similiarty-search.d.ts.map +1 -0
  59. package/dist/lib/similiarty-search.js +43 -0
  60. package/dist/lib/similiarty-search.js.map +7 -0
  61. package/dist/lib/stores/cid.d.ts +2 -0
  62. package/dist/lib/stores/cid.d.ts.map +1 -0
  63. package/dist/lib/stores/cid.js +8 -0
  64. package/dist/lib/stores/cid.js.map +7 -0
  65. package/dist/lib/stores/sqlite/bun-sqlite.d.ts +1 -0
  66. package/dist/lib/stores/sqlite/bun-sqlite.d.ts.map +1 -0
  67. package/dist/lib/stores/sqlite/bun-sqlite.js +1 -0
  68. package/dist/lib/stores/sqlite/bun-sqlite.js.map +7 -0
  69. package/dist/lib/stores/sqlite/node-sqlite.d.ts +3 -0
  70. package/dist/lib/stores/sqlite/node-sqlite.d.ts.map +1 -0
  71. package/dist/lib/stores/sqlite/node-sqlite.js +14 -0
  72. package/dist/lib/stores/sqlite/node-sqlite.js.map +7 -0
  73. package/dist/lib/stores/sqlite/sqlite.d.ts +35 -0
  74. package/dist/lib/stores/sqlite/sqlite.d.ts.map +1 -0
  75. package/dist/lib/stores/sqlite/sqlite.js +223 -0
  76. package/dist/lib/stores/sqlite/sqlite.js.map +7 -0
  77. package/dist/lib/stores/sqlite/sqlite.sql.d.ts +3 -0
  78. package/dist/lib/stores/sqlite/sqlite.sql.d.ts.map +1 -0
  79. package/dist/lib/stores/sqlite/sqlite.sql.js +54 -0
  80. package/dist/lib/stores/sqlite/sqlite.sql.js.map +7 -0
  81. package/dist/lib/stores/store.d.ts +28 -0
  82. package/dist/lib/stores/store.d.ts.map +1 -0
  83. package/dist/lib/stores/store.js +1 -0
  84. package/dist/lib/stores/store.js.map +7 -0
  85. package/package.json +47 -0
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../src/lib/ingest.ts"],
4
+ "sourcesContent": ["import {\n MarkdownTextSplitter,\n RecursiveCharacterTextSplitter,\n} from 'langchain/text_splitter';\n\nimport type { Connector } from './connectors/connector.js';\nimport type { Splitter } from './pipeline.js';\nimport { cid } from './stores/cid.js';\nimport type { Embedder, Store } from './stores/store.js';\n\nexport interface IngestionConfig {\n connector: Connector;\n store: Store;\n splitter?: Splitter;\n embedder: Embedder;\n}\n\nexport async function ingest(\n config: IngestionConfig,\n callback?: (it: string) => void,\n) {\n const splitter = config.splitter ?? split;\n const embedder = config.embedder;\n const corpuses = config.connector.sources();\n\n for await (const it of corpuses) {\n callback?.(it.id);\n const content = await it.content();\n if (!content.trim()) {\n // skip empty files\n continue;\n }\n await config.store.index(config.connector.sourceId, {\n id: it.id,\n cid: cid(content),\n metadata: it.metadata,\n chunker: async function* () {\n // Embed in small batches to control memory usage\n const values = await splitter(it.id, content);\n const batchSize = 40;\n for (let i = 0; i < values.length; i += batchSize) {\n const batch = values.slice(i, i + batchSize);\n const { embeddings } = await embedder(batch);\n for (let j = 0; j < embeddings.length; j++) {\n yield {\n content: batch[j],\n embedding: embeddings[j],\n };\n }\n }\n },\n });\n }\n}\n\nfunction split(id: string, content: string) {\n const splitter = new MarkdownTextSplitter();\n return splitter.splitText(content);\n}\n\nexport type ChunkPosition = {\n startLine: number;\n startColumn: number;\n endLine: number;\n endColumn: number;\n};\n\nexport type SplitChunkWithPosition = {\n content: string;\n position: ChunkPosition | null;\n index: number;\n};\n\nfunction normalizeNewlines(value: string) {\n return value.replace(/\\r\\n/g, '\\n');\n}\n\nfunction computePositions(\n originalContent: string,\n chunks: string[],\n): Array<ChunkPosition | null> {\n if (!chunks.length) {\n return [];\n }\n\n const normalizedContent = normalizeNewlines(originalContent);\n const positions: Array<ChunkPosition | null> = [];\n let searchOffset = 0;\n\n for (const chunk of chunks) {\n const normalizedChunk = normalizeNewlines(chunk);\n const trimmedChunk = normalizedChunk.trim();\n\n const seek = (needle: string, fromIndex: number) =>\n needle ? normalizedContent.indexOf(needle, fromIndex) : -1;\n\n let matchIndex = seek(normalizedChunk, searchOffset);\n let matchValue = normalizedChunk;\n\n if (matchIndex === -1 && trimmedChunk) {\n matchIndex = seek(trimmedChunk, searchOffset);\n matchValue = trimmedChunk;\n }\n\n if (matchIndex === -1) {\n matchIndex = seek(normalizedChunk, 0);\n matchValue = normalizedChunk;\n }\n\n if (matchIndex === -1 && trimmedChunk) {\n matchIndex = seek(trimmedChunk, 0);\n matchValue = trimmedChunk;\n }\n\n if (matchIndex === -1) {\n positions.push(null);\n continue;\n }\n\n const before = normalizedContent.slice(0, matchIndex);\n const beforeLines = before.split('\\n');\n const startLine = beforeLines.length;\n const startColumn = beforeLines[beforeLines.length - 1].length + 1;\n\n const lines = matchValue.split('\\n');\n const endLine = startLine + lines.length - 1;\n const endColumn =\n lines.length === 1\n ? startColumn + lines[0].length\n : lines[lines.length - 1].length + 1;\n\n positions.push({ startLine, startColumn, endLine, endColumn });\n searchOffset = matchIndex + matchValue.length;\n }\n\n return positions;\n}\n\nfunction buildChunksWithPositions(\n originalContent: string,\n chunks: string[],\n): SplitChunkWithPosition[] {\n const positions = computePositions(originalContent, chunks);\n return chunks.map((content, index) => ({\n content,\n index,\n position: positions[index] ?? null,\n }));\n}\n\nexport async function splitTypeScriptWithPositions(\n id: string,\n content: string,\n): Promise<SplitChunkWithPosition[]> {\n const splitter = RecursiveCharacterTextSplitter.fromLanguage('js', {\n chunkSize: 512,\n chunkOverlap: 100,\n });\n const docs = await splitter.createDocuments([content]);\n const chunks = docs.map((d) => d.pageContent);\n return buildChunksWithPositions(content, chunks);\n}\n\nexport async function splitTypeScript(id: string, content: string) {\n const chunks = await splitTypeScriptWithPositions(id, content);\n return chunks.map((chunk) => chunk.content);\n}\n"],
5
+ "mappings": "AAAA;AAAA,EACE;AAAA,EACA;AAAA,OACK;AAIP,SAAS,WAAW;AAUpB,eAAsB,OACpB,QACA,UACA;AACA,QAAM,WAAW,OAAO,YAAY;AACpC,QAAM,WAAW,OAAO;AACxB,QAAM,WAAW,OAAO,UAAU,QAAQ;AAE1C,mBAAiB,MAAM,UAAU;AAC/B,eAAW,GAAG,EAAE;AAChB,UAAM,UAAU,MAAM,GAAG,QAAQ;AACjC,QAAI,CAAC,QAAQ,KAAK,GAAG;AAEnB;AAAA,IACF;AACA,UAAM,OAAO,MAAM,MAAM,OAAO,UAAU,UAAU;AAAA,MAClD,IAAI,GAAG;AAAA,MACP,KAAK,IAAI,OAAO;AAAA,MAChB,UAAU,GAAG;AAAA,MACb,SAAS,mBAAmB;AAE1B,cAAM,SAAS,MAAM,SAAS,GAAG,IAAI,OAAO;AAC5C,cAAM,YAAY;AAClB,iBAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK,WAAW;AACjD,gBAAM,QAAQ,OAAO,MAAM,GAAG,IAAI,SAAS;AAC3C,gBAAM,EAAE,WAAW,IAAI,MAAM,SAAS,KAAK;AAC3C,mBAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,kBAAM;AAAA,cACJ,SAAS,MAAM,CAAC;AAAA,cAChB,WAAW,WAAW,CAAC;AAAA,YACzB;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AACF;AAEA,SAAS,MAAM,IAAY,SAAiB;AAC1C,QAAM,WAAW,IAAI,qBAAqB;AAC1C,SAAO,SAAS,UAAU,OAAO;AACnC;AAeA,SAAS,kBAAkB,OAAe;AACxC,SAAO,MAAM,QAAQ,SAAS,IAAI;AACpC;AAEA,SAAS,iBACP,iBACA,QAC6B;AAC7B,MAAI,CAAC,OAAO,QAAQ;AAClB,WAAO,CAAC;AAAA,EACV;AAEA,QAAM,oBAAoB,kBAAkB,eAAe;AAC3D,QAAM,YAAyC,CAAC;AAChD,MAAI,eAAe;AAEnB,aAAW,SAAS,QAAQ;AAC1B,UAAM,kBAAkB,kBAAkB,KAAK;AAC/C,UAAM,eAAe,gBAAgB,KAAK;AAE1C,UAAM,OAAO,CAAC,QAAgB,cAC5B,SAAS,kBAAkB,QAAQ,QAAQ,SAAS,IAAI;AAE1D,QAAI,aAAa,KAAK,iBAAiB,YAAY;AACnD,QAAI,aAAa;AAEjB,QAAI,eAAe,MAAM,cAAc;AACrC,mBAAa,KAAK,cAAc,YAAY;AAC5C,mBAAa;AAAA,IACf;AAEA,QAAI,eAAe,IAAI;AACrB,mBAAa,KAAK,iBAAiB,CAAC;AACpC,mBAAa;AAAA,IACf;AAEA,QAAI,eAAe,MAAM,cAAc;AACrC,mBAAa,KAAK,cAAc,CAAC;AACjC,mBAAa;AAAA,IACf;AAEA,QAAI,eAAe,IAAI;AACrB,gBAAU,KAAK,IAAI;AACnB;AAAA,IACF;AAEA,UAAM,SAAS,kBAAkB,MAAM,GAAG,UAAU;AACpD,UAAM,cAAc,OAAO,MAAM,IAAI;AACrC,UAAM,YAAY,YAAY;AAC9B,UAAM,cAAc,YAAY,YAAY,SAAS,CAAC,EAAE,SAAS;AAEjE,UAAM,QAAQ,WAAW,MAAM,IAAI;AACnC,UAAM,UAAU,YAAY,MAAM,SAAS;AAC3C,UAAM,YACJ,MAAM,WAAW,IACb,cAAc,MAAM,CAAC,EAAE,SACvB,MAAM,MAAM,SAAS,CAAC,EAAE,SAAS;AAEvC,cAAU,KAAK,EAAE,WAAW,aAAa,SAAS,UAAU,CAAC;AAC7D,mBAAe,aAAa,WAAW;AAAA,EACzC;AAEA,SAAO;AACT;AAEA,SAAS,yBACP,iBACA,QAC0B;AAC1B,QAAM,YAAY,iBAAiB,iBAAiB,MAAM;AAC1D,SAAO,OAAO,IAAI,CAAC,SAAS,WAAW;AAAA,IACrC;AAAA,IACA;AAAA,IACA,UAAU,UAAU,KAAK,KAAK;AAAA,EAChC,EAAE;AACJ;AAEA,eAAsB,6BACpB,IACA,SACmC;AACnC,QAAM,WAAW,+BAA+B,aAAa,MAAM;AAAA,IACjE,WAAW;AAAA,IACX,cAAc;AAAA,EAChB,CAAC;AACD,QAAM,OAAO,MAAM,SAAS,gBAAgB,CAAC,OAAO,CAAC;AACrD,QAAM,SAAS,KAAK,IAAI,CAAC,MAAM,EAAE,WAAW;AAC5C,SAAO,yBAAyB,SAAS,MAAM;AACjD;AAEA,eAAsB,gBAAgB,IAAY,SAAiB;AACjE,QAAM,SAAS,MAAM,6BAA6B,IAAI,OAAO;AAC7D,SAAO,OAAO,IAAI,CAAC,UAAU,MAAM,OAAO;AAC5C;",
6
+ "names": []
7
+ }
@@ -0,0 +1,2 @@
1
+ export type Splitter = (id: string, content: string) => Promise<string[]>;
2
+ //# sourceMappingURL=pipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/lib/pipeline.ts"],"names":[],"mappings":"AAUA,MAAM,MAAM,QAAQ,GAAG,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": [],
4
+ "sourcesContent": [],
5
+ "mappings": "",
6
+ "names": []
7
+ }
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=sidecar.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sidecar.d.ts","sourceRoot":"","sources":["../../src/lib/sidecar.ts"],"names":[],"mappings":""}
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=sidecar.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": [],
4
+ "sourcesContent": [],
5
+ "mappings": "",
6
+ "names": []
7
+ }
@@ -0,0 +1,3 @@
1
+ import { type IngestionConfig } from './ingest.js';
2
+ export declare function similaritySearch(query: string, config: Omit<IngestionConfig, 'splitter'>): Promise<any[]>;
3
+ //# sourceMappingURL=similiarty-search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"similiarty-search.d.ts","sourceRoot":"","sources":["../../src/lib/similiarty-search.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,eAAe,EAAU,MAAM,aAAa,CAAC;AAE3D,wBAAsB,gBAAgB,CACpC,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,IAAI,CAAC,eAAe,EAAE,UAAU,CAAC,kBAiD1C"}
@@ -0,0 +1,43 @@
1
+ import { embedMany } from "ai";
2
+ import { ingest } from "./ingest.js";
3
+ async function similaritySearch(query, config) {
4
+ const mode = config.connector.ingestWhen ?? "contentChanged";
5
+ let shouldIngest = true;
6
+ if (mode === "never") {
7
+ if (await config.store.sourceExists(config.connector.sourceId)) {
8
+ console.log(
9
+ `Skipping ingestion for source ${config.connector.sourceId} (ingestWhen=never and source exists)`
10
+ );
11
+ shouldIngest = false;
12
+ }
13
+ } else if (mode === "expired") {
14
+ const sourceExists = await config.store.sourceExists(
15
+ config.connector.sourceId
16
+ );
17
+ if (sourceExists && !await config.store.sourceExpired(config.connector.sourceId)) {
18
+ console.log(
19
+ `Skipping ingestion for source ${config.connector.sourceId} (ingestWhen=expired and source not expired)`
20
+ );
21
+ shouldIngest = false;
22
+ }
23
+ }
24
+ const expiryDate = config.connector.expiresAfter ? new Date(Date.now() + config.connector.expiresAfter) : void 0;
25
+ if (shouldIngest) {
26
+ await ingest(config);
27
+ }
28
+ return config.store.search(
29
+ query,
30
+ { sourceId: config.connector.sourceId, topN: 50 },
31
+ config.embedder
32
+ ).then(
33
+ (results) => results.map((it) => ({
34
+ ...it,
35
+ similarity: 1 - it.distance,
36
+ distance: it.distance
37
+ }))
38
+ );
39
+ }
40
+ export {
41
+ similaritySearch
42
+ };
43
+ //# sourceMappingURL=similiarty-search.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../src/lib/similiarty-search.ts"],
4
+ "sourcesContent": ["import { embedMany } from 'ai';\n\nimport { type IngestionConfig, ingest } from './ingest.js';\n\nexport async function similaritySearch(\n query: string,\n config: Omit<IngestionConfig, 'splitter'>,\n) {\n // Ingest if needed, then perform vector similarity search via the configured store\n const mode = config.connector.ingestWhen ?? 'contentChanged';\n let shouldIngest = true;\n\n if (mode === 'never') {\n if (await config.store.sourceExists(config.connector.sourceId)) {\n console.log(\n `Skipping ingestion for source ${config.connector.sourceId} (ingestWhen=never and source exists)`,\n );\n shouldIngest = false;\n }\n } else if (mode === 'expired') {\n const sourceExists = await config.store.sourceExists(\n config.connector.sourceId,\n );\n if (\n sourceExists &&\n !(await config.store.sourceExpired(config.connector.sourceId))\n ) {\n console.log(\n `Skipping ingestion for source ${config.connector.sourceId} (ingestWhen=expired and source not expired)`,\n );\n shouldIngest = false;\n }\n }\n\n // Calculate expiry date if connector specifies expiresAfter\n const expiryDate = config.connector.expiresAfter\n ? new Date(Date.now() + config.connector.expiresAfter)\n : undefined;\n if (shouldIngest) {\n await ingest(config);\n }\n return config.store\n .search(\n query,\n { sourceId: config.connector.sourceId, topN: 50 },\n config.embedder,\n )\n .then(\n (results) =>\n results.map((it) => ({\n ...it,\n similarity: 1 - it.distance,\n distance: it.distance,\n })) as any[],\n );\n}\n"],
5
+ "mappings": "AAAA,SAAS,iBAAiB;AAE1B,SAA+B,cAAc;AAE7C,eAAsB,iBACpB,OACA,QACA;AAEA,QAAM,OAAO,OAAO,UAAU,cAAc;AAC5C,MAAI,eAAe;AAEnB,MAAI,SAAS,SAAS;AACpB,QAAI,MAAM,OAAO,MAAM,aAAa,OAAO,UAAU,QAAQ,GAAG;AAC9D,cAAQ;AAAA,QACN,iCAAiC,OAAO,UAAU,QAAQ;AAAA,MAC5D;AACA,qBAAe;AAAA,IACjB;AAAA,EACF,WAAW,SAAS,WAAW;AAC7B,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,IACnB;AACA,QACE,gBACA,CAAE,MAAM,OAAO,MAAM,cAAc,OAAO,UAAU,QAAQ,GAC5D;AACA,cAAQ;AAAA,QACN,iCAAiC,OAAO,UAAU,QAAQ;AAAA,MAC5D;AACA,qBAAe;AAAA,IACjB;AAAA,EACF;AAGA,QAAM,aAAa,OAAO,UAAU,eAChC,IAAI,KAAK,KAAK,IAAI,IAAI,OAAO,UAAU,YAAY,IACnD;AACJ,MAAI,cAAc;AAChB,UAAM,OAAO,MAAM;AAAA,EACrB;AACA,SAAO,OAAO,MACX;AAAA,IACC;AAAA,IACA,EAAE,UAAU,OAAO,UAAU,UAAU,MAAM,GAAG;AAAA,IAChD,OAAO;AAAA,EACT,EACC;AAAA,IACC,CAAC,YACC,QAAQ,IAAI,CAAC,QAAQ;AAAA,MACnB,GAAG;AAAA,MACH,YAAY,IAAI,GAAG;AAAA,MACnB,UAAU,GAAG;AAAA,IACf,EAAE;AAAA,EACN;AACJ;",
6
+ "names": []
7
+ }
@@ -0,0 +1,2 @@
1
+ export declare function cid(content: string): string;
2
+ //# sourceMappingURL=cid.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cid.d.ts","sourceRoot":"","sources":["../../../src/lib/stores/cid.ts"],"names":[],"mappings":"AAEA,wBAAgB,GAAG,CAAC,OAAO,EAAE,MAAM,UAElC"}
@@ -0,0 +1,8 @@
1
+ import { createHash } from "node:crypto";
2
+ function cid(content) {
3
+ return createHash("sha256").update(content).digest("hex");
4
+ }
5
+ export {
6
+ cid
7
+ };
8
+ //# sourceMappingURL=cid.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../../src/lib/stores/cid.ts"],
4
+ "sourcesContent": ["import { createHash } from 'node:crypto';\n\nexport function cid(content: string) {\n return createHash('sha256').update(content).digest('hex');\n}\n"],
5
+ "mappings": "AAAA,SAAS,kBAAkB;AAEpB,SAAS,IAAI,SAAiB;AACnC,SAAO,WAAW,QAAQ,EAAE,OAAO,OAAO,EAAE,OAAO,KAAK;AAC1D;",
6
+ "names": []
7
+ }
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=bun-sqlite.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bun-sqlite.d.ts","sourceRoot":"","sources":["../../../../src/lib/stores/sqlite/bun-sqlite.ts"],"names":[],"mappings":""}
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=bun-sqlite.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": [],
4
+ "sourcesContent": [],
5
+ "mappings": "",
6
+ "names": []
7
+ }
@@ -0,0 +1,3 @@
1
+ import { SQLiteStore } from './sqlite.js';
2
+ export declare function nodeSQLite(dbName: string, dimension: number): SQLiteStore;
3
+ //# sourceMappingURL=node-sqlite.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"node-sqlite.d.ts","sourceRoot":"","sources":["../../../../src/lib/stores/sqlite/node-sqlite.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAE1C,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,eAO3D"}
@@ -0,0 +1,14 @@
1
+ import { DatabaseSync } from "node:sqlite";
2
+ import * as sqliteVec from "sqlite-vec";
3
+ import { SQLiteStore } from "./sqlite.js";
4
+ function nodeSQLite(dbName, dimension) {
5
+ const db = new DatabaseSync(dbName, {
6
+ allowExtension: true
7
+ });
8
+ db.loadExtension(sqliteVec.getLoadablePath());
9
+ return new SQLiteStore(db, dimension);
10
+ }
11
+ export {
12
+ nodeSQLite
13
+ };
14
+ //# sourceMappingURL=node-sqlite.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../../../src/lib/stores/sqlite/node-sqlite.ts"],
4
+ "sourcesContent": ["import { DatabaseSync } from 'node:sqlite';\nimport * as sqliteVec from 'sqlite-vec';\n\nimport { SQLiteStore } from './sqlite.js';\n\nexport function nodeSQLite(dbName: string, dimension: number) {\n const db = new DatabaseSync(dbName, {\n allowExtension: true,\n });\n\n db.loadExtension(sqliteVec.getLoadablePath());\n return new SQLiteStore(db, dimension);\n}\n"],
5
+ "mappings": "AAAA,SAAS,oBAAoB;AAC7B,YAAY,eAAe;AAE3B,SAAS,mBAAmB;AAErB,SAAS,WAAW,QAAgB,WAAmB;AAC5D,QAAM,KAAK,IAAI,aAAa,QAAQ;AAAA,IAClC,gBAAgB;AAAA,EAClB,CAAC;AAED,KAAG,cAAc,UAAU,gBAAgB,CAAC;AAC5C,SAAO,IAAI,YAAY,IAAI,SAAS;AACtC;",
6
+ "names": []
7
+ }
@@ -0,0 +1,35 @@
1
+ import type { Chunk, Corpus, Embedder, SearchOptions, Store } from '../store.js';
2
+ interface DB {
3
+ prepare: (sql: string) => {
4
+ run: (...args: any[]) => any;
5
+ all: (...args: any[]) => any[];
6
+ get: (...args: any[]) => any;
7
+ };
8
+ exec: (sql: string) => void;
9
+ }
10
+ export declare class SQLiteStore implements Store {
11
+ #private;
12
+ constructor(db: DB, dimension: number);
13
+ search(query: string, options: SearchOptions, embedder: Embedder): Promise<any[]>;
14
+ upsertDoc(inputs: {
15
+ documentId: string;
16
+ sourceId: string;
17
+ cid: string;
18
+ metadata?: Record<string, any>;
19
+ }): any;
20
+ insertDoc(inputs: {
21
+ sourceId: string;
22
+ documentId: string;
23
+ }): (chunk: Chunk) => void;
24
+ delete(inputs: {
25
+ sourceId: string;
26
+ documentId: string;
27
+ }): any;
28
+ sourceExists(sourceId: string): boolean;
29
+ sourceExpired(sourceId: string): boolean;
30
+ setSourceExpiry(sourceId: string, expiryDate: Date): any;
31
+ index(sourceId: string, corpus: Corpus, expiryDate?: Date): Promise<void>;
32
+ }
33
+ export declare function vectorToBlob(vector: number[] | Float32Array): Buffer;
34
+ export {};
35
+ //# sourceMappingURL=sqlite.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sqlite.d.ts","sourceRoot":"","sources":["../../../../src/lib/stores/sqlite/sqlite.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,KAAK,EACL,MAAM,EACN,QAAQ,EACR,aAAa,EACb,KAAK,EACN,MAAM,aAAa,CAAC;AAKrB,UAAU,EAAE;IACV,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK;QACxB,GAAG,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,GAAG,CAAC;QAC7B,GAAG,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,GAAG,EAAE,CAAC;QAC/B,GAAG,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,GAAG,CAAC;KAC9B,CAAC;IACF,IAAI,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;CAC7B;AAED,qBAAa,WAAY,YAAW,KAAK;;gBAE3B,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM;IAwD/B,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,QAAQ;IA+CtE,SAAS,CAAC,MAAM,EAAE;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,GAAG,EAAE,MAAM,CAAC;QACZ,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAChC;IAiBD,SAAS,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,IAKhD,OAAO,KAAK;IAStB,MAAM,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE;IAMvD,YAAY,CAAC,QAAQ,EAAE,MAAM;IAO7B,aAAa,CAAC,QAAQ,EAAE,MAAM;IAW9B,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI;IAQ5C,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,IAAI;CA+ChE;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,GAAG,MAAM,CASpE"}
@@ -0,0 +1,223 @@
1
+ import dedent from "dedent";
2
+ import { template } from "lodash-es";
3
+ import sql from "./sqlite.sql.js";
4
+ const DEFAULT_TOP_N = 10;
5
+ class SQLiteStore {
6
+ #db;
7
+ constructor(db, dimension) {
8
+ this.#db = db;
9
+ const compiled = template(sql);
10
+ this.#db.exec(compiled({ DIMENSION: dimension }));
11
+ }
12
+ #transaction(callback) {
13
+ try {
14
+ this.#db.exec("BEGIN IMMEDIATE");
15
+ callback();
16
+ this.#db.exec("COMMIT");
17
+ } catch (error) {
18
+ this.#db.exec("ROLLBACK");
19
+ throw error;
20
+ }
21
+ }
22
+ #searchByDocument(inputs) {
23
+ const stmt = this.#db.prepare(dedent`
24
+ SELECT v.content, v.distance, v.document_id, d.metadata
25
+ FROM vec_chunks v
26
+ JOIN documents d ON d.id = v.document_id
27
+ WHERE v.source_id = ?
28
+ AND v.document_id = ?
29
+ AND v.embedding MATCH vec_normalize(vec_f32(?))
30
+ AND v.k = ?
31
+ ORDER BY v.distance ASC
32
+ `);
33
+ return stmt.all(
34
+ inputs.sourceId,
35
+ inputs.documentId,
36
+ inputs.embedding,
37
+ inputs.k
38
+ );
39
+ }
40
+ #searchBySource(inputs) {
41
+ const stmt = this.#db.prepare(dedent`
42
+ SELECT v.content, v.distance, v.document_id, d.metadata
43
+ FROM vec_chunks v
44
+ JOIN documents d ON d.id = v.document_id
45
+ WHERE v.source_id = ?
46
+ AND v.embedding MATCH vec_normalize(vec_f32(?))
47
+ AND v.k = ?
48
+ ORDER BY v.distance ASC
49
+ `);
50
+ return stmt.all(inputs.sourceId, inputs.embedding, inputs.k);
51
+ }
52
+ async search(query, options, embedder) {
53
+ const { embeddings } = await embedder([query]);
54
+ if (!embeddings.length) {
55
+ return [];
56
+ }
57
+ const vectorBlob = vectorToBlob(embeddings[0]);
58
+ const topN = options.topN;
59
+ if (options.documentId) {
60
+ const rows2 = this.#searchByDocument({
61
+ sourceId: options.sourceId,
62
+ documentId: options.documentId,
63
+ embedding: vectorBlob,
64
+ k: topN ?? DEFAULT_TOP_N
65
+ });
66
+ return rows2.map((r) => ({
67
+ ...r,
68
+ metadata: safeParseMetadata(r.metadata)
69
+ }));
70
+ }
71
+ const rows = this.#searchBySource({
72
+ sourceId: options.sourceId,
73
+ embedding: vectorBlob,
74
+ k: topN ?? DEFAULT_TOP_N
75
+ });
76
+ return rows.map((r) => ({
77
+ ...r,
78
+ metadata: safeParseMetadata(r.metadata)
79
+ }));
80
+ }
81
+ #upsertSource(inputs) {
82
+ const stmt = this.#db.prepare(dedent`
83
+ INSERT INTO sources (source_id) VALUES (?)
84
+ ON CONFLICT(source_id) DO UPDATE SET updated_at=strftime('%Y-%m-%dT%H:%M:%fZ','now')
85
+ `);
86
+ return stmt.run(inputs.sourceId);
87
+ }
88
+ #upsertSourceWithExpiry(inputs) {
89
+ const stmt = this.#db.prepare(dedent`
90
+ INSERT INTO sources (source_id, expires_at) VALUES (?, ?)
91
+ ON CONFLICT(source_id) DO UPDATE SET
92
+ updated_at=strftime('%Y-%m-%dT%H:%M:%fZ','now',
93
+ expires_at=excluded.expires_at
94
+ `);
95
+ return stmt.run(inputs.sourceId, inputs.expiresAt);
96
+ }
97
+ upsertDoc(inputs) {
98
+ const stmt = this.#db.prepare(dedent`
99
+ INSERT INTO documents (id, source_id, cid, metadata)
100
+ VALUES (?, ?, ?, json(?))
101
+ ON CONFLICT(id) DO UPDATE SET
102
+ cid=excluded.cid,
103
+ metadata=COALESCE(excluded.metadata, documents.metadata),
104
+ updated_at=strftime('%Y-%m-%dT%H:%M:%fZ','now')
105
+ WHERE documents.cid != excluded.cid;
106
+ `);
107
+ return stmt.run(
108
+ inputs.documentId,
109
+ inputs.sourceId,
110
+ inputs.cid,
111
+ inputs.metadata ? JSON.stringify(inputs.metadata) : null
112
+ );
113
+ }
114
+ insertDoc(inputs) {
115
+ const stmt = this.#db.prepare(dedent`
116
+ INSERT INTO vec_chunks (source_id, document_id, content, embedding)
117
+ VALUES (?, ?, ?, vec_normalize(vec_f32(?)))
118
+ `);
119
+ return (chunk) => {
120
+ stmt.run(
121
+ inputs.sourceId,
122
+ inputs.documentId,
123
+ chunk.content,
124
+ vectorToBlob(chunk.embedding)
125
+ );
126
+ };
127
+ }
128
+ delete(inputs) {
129
+ const stmt = this.#db.prepare(dedent`
130
+ DELETE FROM vec_chunks WHERE source_id = ? AND document_id = ?
131
+ `);
132
+ return stmt.run(inputs.sourceId, inputs.documentId);
133
+ }
134
+ sourceExists(sourceId) {
135
+ const stmt = this.#db.prepare(dedent`
136
+ SELECT 1 FROM sources WHERE source_id = ? LIMIT 1
137
+ `);
138
+ const row = stmt.get(sourceId);
139
+ return Boolean(row);
140
+ }
141
+ sourceExpired(sourceId) {
142
+ const stmt = this.#db.prepare(dedent`
143
+ SELECT 1 FROM sources
144
+ WHERE source_id = ?
145
+ AND expires_at IS NOT NULL
146
+ AND expires_at <= strftime('%Y-%m-%dT%H:%M:%fZ','now')
147
+ LIMIT 1
148
+ `);
149
+ const row = stmt.run(sourceId);
150
+ return Boolean(row);
151
+ }
152
+ setSourceExpiry(sourceId, expiryDate) {
153
+ const stmt = this.#db.prepare(dedent`
154
+ UPDATE sources
155
+ SET expires_at = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%fZ','now')
156
+ WHERE source_id = ?
157
+ `);
158
+ return stmt.run(expiryDate.toISOString(), sourceId);
159
+ }
160
+ async index(sourceId, corpus, expiryDate) {
161
+ if (expiryDate) {
162
+ this.#upsertSourceWithExpiry({
163
+ expiresAt: expiryDate.toISOString(),
164
+ sourceId
165
+ });
166
+ } else {
167
+ this.#upsertSource({ sourceId });
168
+ }
169
+ const info = this.upsertDoc({
170
+ documentId: corpus.id,
171
+ sourceId,
172
+ cid: corpus.cid,
173
+ metadata: corpus.metadata
174
+ });
175
+ const changed = info.changes > 0;
176
+ if (!changed) {
177
+ return;
178
+ }
179
+ const insert = this.insertDoc({ sourceId, documentId: corpus.id });
180
+ this.#transaction(() => {
181
+ this.delete({ sourceId, documentId: corpus.id });
182
+ });
183
+ const batchSize = 32;
184
+ let batch = [];
185
+ const flush = () => {
186
+ if (!batch.length) return;
187
+ this.#transaction(() => {
188
+ for (let i = 0; i < batch.length; i++) {
189
+ insert(batch[i]);
190
+ }
191
+ });
192
+ batch = [];
193
+ };
194
+ for await (const chunk of corpus.chunker()) {
195
+ batch.push(chunk);
196
+ if (batch.length >= batchSize) flush();
197
+ }
198
+ flush();
199
+ }
200
+ }
201
+ function vectorToBlob(vector) {
202
+ if (vector instanceof Float32Array) {
203
+ const copied = new Float32Array(vector.length);
204
+ copied.set(vector);
205
+ return Buffer.from(copied.buffer);
206
+ }
207
+ const floatArray = new Float32Array(vector);
208
+ return Buffer.from(floatArray.buffer);
209
+ }
210
+ function safeParseMetadata(value) {
211
+ if (value == null) return null;
212
+ if (typeof value === "object") return value;
213
+ try {
214
+ return JSON.parse(String(value));
215
+ } catch {
216
+ return null;
217
+ }
218
+ }
219
+ export {
220
+ SQLiteStore,
221
+ vectorToBlob
222
+ };
223
+ //# sourceMappingURL=sqlite.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../../../src/lib/stores/sqlite/sqlite.ts"],
4
+ "sourcesContent": ["import dedent from 'dedent';\nimport { template } from 'lodash-es';\n\nimport type {\n Chunk,\n Corpus,\n Embedder,\n SearchOptions,\n Store,\n} from '../store.js';\nimport sql from './sqlite.sql.js';\n\nconst DEFAULT_TOP_N = 10;\n\ninterface DB {\n prepare: (sql: string) => {\n run: (...args: any[]) => any;\n all: (...args: any[]) => any[];\n get: (...args: any[]) => any;\n };\n exec: (sql: string) => void;\n}\n\nexport class SQLiteStore implements Store {\n readonly #db: DB;\n constructor(db: DB, dimension: number) {\n this.#db = db;\n const compiled = template(sql);\n this.#db.exec(compiled({ DIMENSION: dimension }));\n }\n\n #transaction(callback: () => void) {\n try {\n this.#db.exec('BEGIN IMMEDIATE');\n callback();\n this.#db.exec('COMMIT');\n } catch (error) {\n this.#db.exec('ROLLBACK');\n throw error;\n }\n }\n\n #searchByDocument(inputs: {\n sourceId: string;\n documentId: string;\n embedding: Buffer<ArrayBufferLike>;\n k: number;\n }) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tSELECT v.content, v.distance, v.document_id, d.metadata\n\t\t\t\tFROM vec_chunks v\n\t\t\t\tJOIN documents d ON d.id = v.document_id\n\t\t\t\tWHERE v.source_id = ?\n\t\t\t\t\tAND v.document_id = ?\n\t\t\t\t\tAND v.embedding MATCH vec_normalize(vec_f32(?))\n\t\t\t\t\tAND v.k = ?\n\t\t\t\tORDER BY v.distance ASC\n\t\t\t`);\n return stmt.all(\n inputs.sourceId,\n inputs.documentId,\n inputs.embedding,\n inputs.k,\n );\n }\n #searchBySource(inputs: {\n sourceId: string;\n embedding: Buffer<ArrayBufferLike>;\n k: number;\n }) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tSELECT v.content, v.distance, v.document_id, d.metadata\n\t\t\t\tFROM vec_chunks v\n\t\t\t\tJOIN documents d ON d.id = v.document_id\n\t\t\t\tWHERE v.source_id = ?\n\t\t\t\t\tAND v.embedding MATCH vec_normalize(vec_f32(?))\n\t\t\t\t\tAND v.k = ?\n\t\t\t\tORDER BY v.distance ASC\n\t\t\t`);\n return stmt.all(inputs.sourceId, inputs.embedding, inputs.k);\n }\n async search(query: string, options: SearchOptions, embedder: Embedder) {\n const { embeddings } = await embedder([query]);\n if (!embeddings.length) {\n return [];\n }\n const vectorBlob = vectorToBlob(embeddings[0]);\n const topN = options.topN;\n\n if (options.documentId) {\n const rows = this.#searchByDocument({\n sourceId: options.sourceId,\n documentId: options.documentId,\n embedding: vectorBlob,\n k: topN ?? DEFAULT_TOP_N,\n });\n return rows.map((r: any) => ({\n ...r,\n metadata: safeParseMetadata(r.metadata),\n }));\n }\n\n const rows = this.#searchBySource({\n sourceId: options.sourceId,\n embedding: vectorBlob,\n k: topN ?? DEFAULT_TOP_N,\n });\n return rows.map((r: any) => ({\n ...r,\n metadata: safeParseMetadata(r.metadata),\n }));\n }\n #upsertSource(inputs: { sourceId: string }) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tINSERT INTO sources (source_id) VALUES (?)\n\t\t\t\tON CONFLICT(source_id) DO UPDATE SET updated_at=strftime('%Y-%m-%dT%H:%M:%fZ','now')\n\t\t\t`);\n return stmt.run(inputs.sourceId);\n }\n #upsertSourceWithExpiry(inputs: { sourceId: string; expiresAt: string }) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tINSERT INTO sources (source_id, expires_at) VALUES (?, ?)\n\t\t\t\tON CONFLICT(source_id) DO UPDATE SET\n\t\t\t\t\tupdated_at=strftime('%Y-%m-%dT%H:%M:%fZ','now',\n\t\t\t\t\texpires_at=excluded.expires_at\n\t\t\t`);\n return stmt.run(inputs.sourceId, inputs.expiresAt);\n }\n upsertDoc(inputs: {\n documentId: string;\n sourceId: string;\n cid: string;\n metadata?: Record<string, any>;\n }) {\n const stmt = this.#db.prepare(dedent`\n INSERT INTO documents (id, source_id, cid, metadata)\n VALUES (?, ?, ?, json(?))\n ON CONFLICT(id) DO UPDATE SET\n cid=excluded.cid,\n metadata=COALESCE(excluded.metadata, documents.metadata),\n updated_at=strftime('%Y-%m-%dT%H:%M:%fZ','now')\n WHERE documents.cid != excluded.cid;\n `);\n return stmt.run(\n inputs.documentId,\n inputs.sourceId,\n inputs.cid,\n inputs.metadata ? JSON.stringify(inputs.metadata) : null,\n );\n }\n insertDoc(inputs: { sourceId: string; documentId: string }) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tINSERT INTO vec_chunks (source_id, document_id, content, embedding)\n\t\t\t\tVALUES (?, ?, ?, vec_normalize(vec_f32(?)))\n\t\t\t`);\n return (chunk: Chunk) => {\n stmt.run(\n inputs.sourceId,\n inputs.documentId,\n chunk.content,\n vectorToBlob(chunk.embedding),\n );\n };\n }\n delete(inputs: { sourceId: string; documentId: string }) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tDELETE FROM vec_chunks WHERE source_id = ? AND document_id = ?\n\t\t\t`);\n return stmt.run(inputs.sourceId, inputs.documentId);\n }\n sourceExists(sourceId: string) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tSELECT 1 FROM sources WHERE source_id = ? LIMIT 1\n\t\t\t`);\n const row = stmt.get(sourceId);\n return Boolean(row);\n }\n sourceExpired(sourceId: string) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tSELECT 1 FROM sources\n\t\t\t\tWHERE source_id = ?\n\t\t\t\t\tAND expires_at IS NOT NULL\n\t\t\t\t\tAND expires_at <= strftime('%Y-%m-%dT%H:%M:%fZ','now')\n\t\t\t\tLIMIT 1\n\t\t\t`);\n const row = stmt.run(sourceId);\n return Boolean(row);\n }\n setSourceExpiry(sourceId: string, expiryDate: Date) {\n const stmt = this.#db.prepare(dedent`\n\t\t\t\tUPDATE sources\n\t\t\t\tSET expires_at = ?, updated_at = strftime('%Y-%m-%dT%H:%M:%fZ','now')\n\t\t\t\tWHERE source_id = ?\n\t\t\t`);\n return stmt.run(expiryDate.toISOString(), sourceId);\n }\n async index(sourceId: string, corpus: Corpus, expiryDate?: Date) {\n if (expiryDate) {\n this.#upsertSourceWithExpiry({\n expiresAt: expiryDate.toISOString(),\n sourceId,\n });\n } else {\n this.#upsertSource({ sourceId });\n }\n\n const info = this.upsertDoc({\n documentId: corpus.id,\n sourceId,\n cid: corpus.cid,\n metadata: corpus.metadata,\n });\n const changed = info.changes > 0;\n\n if (!changed) {\n return;\n }\n\n const insert = this.insertDoc({ sourceId, documentId: corpus.id });\n // Delete previous rows once before inserting\n this.#transaction(() => {\n this.delete({ sourceId, documentId: corpus.id });\n });\n\n const batchSize = 32;\n let batch: Chunk[] = [];\n const flush = () => {\n if (!batch.length) return;\n this.#transaction(() => {\n for (let i = 0; i < batch.length; i++) {\n insert(batch[i]);\n }\n });\n batch = [];\n };\n\n for await (const chunk of corpus.chunker()) {\n batch.push(chunk);\n if (batch.length >= batchSize) flush();\n }\n // flush any remaining\n flush();\n }\n}\n\nexport function vectorToBlob(vector: number[] | Float32Array): Buffer {\n if (vector instanceof Float32Array) {\n // Copy into a fresh Buffer to avoid retaining references to a larger batch tensor buffer\n const copied = new Float32Array(vector.length);\n copied.set(vector);\n return Buffer.from(copied.buffer);\n }\n const floatArray = new Float32Array(vector);\n return Buffer.from(floatArray.buffer);\n}\n\nfunction safeParseMetadata(value: any) {\n if (value == null) return null;\n if (typeof value === 'object') return value;\n try {\n return JSON.parse(String(value));\n } catch {\n return null;\n }\n}\n"],
5
+ "mappings": "AAAA,OAAO,YAAY;AACnB,SAAS,gBAAgB;AASzB,OAAO,SAAS;AAEhB,MAAM,gBAAgB;AAWf,MAAM,YAA6B;AAAA,EAC/B;AAAA,EACT,YAAY,IAAQ,WAAmB;AACrC,SAAK,MAAM;AACX,UAAM,WAAW,SAAS,GAAG;AAC7B,SAAK,IAAI,KAAK,SAAS,EAAE,WAAW,UAAU,CAAC,CAAC;AAAA,EAClD;AAAA,EAEA,aAAa,UAAsB;AACjC,QAAI;AACF,WAAK,IAAI,KAAK,iBAAiB;AAC/B,eAAS;AACT,WAAK,IAAI,KAAK,QAAQ;AAAA,IACxB,SAAS,OAAO;AACd,WAAK,IAAI,KAAK,UAAU;AACxB,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEA,kBAAkB,QAKf;AACD,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,IAS9B;AACA,WAAO,KAAK;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP,OAAO;AAAA,MACP,OAAO;AAAA,IACT;AAAA,EACF;AAAA,EACA,gBAAgB,QAIb;AACD,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,IAQ9B;AACA,WAAO,KAAK,IAAI,OAAO,UAAU,OAAO,WAAW,OAAO,CAAC;AAAA,EAC7D;AAAA,EACA,MAAM,OAAO,OAAe,SAAwB,UAAoB;AACtE,UAAM,EAAE,WAAW,IAAI,MAAM,SAAS,CAAC,KAAK,CAAC;AAC7C,QAAI,CAAC,WAAW,QAAQ;AACtB,aAAO,CAAC;AAAA,IACV;AACA,UAAM,aAAa,aAAa,WAAW,CAAC,CAAC;AAC7C,UAAM,OAAO,QAAQ;AAErB,QAAI,QAAQ,YAAY;AACtB,YAAMA,QAAO,KAAK,kBAAkB;AAAA,QAClC,UAAU,QAAQ;AAAA,QAClB,YAAY,QAAQ;AAAA,QACpB,WAAW;AAAA,QACX,GAAG,QAAQ;AAAA,MACb,CAAC;AACD,aAAOA,MAAK,IAAI,CAAC,OAAY;AAAA,QAC3B,GAAG;AAAA,QACH,UAAU,kBAAkB,EAAE,QAAQ;AAAA,MACxC,EAAE;AAAA,IACJ;AAEA,UAAM,OAAO,KAAK,gBAAgB;AAAA,MAChC,UAAU,QAAQ;AAAA,MAClB,WAAW;AAAA,MACX,GAAG,QAAQ;AAAA,IACb,CAAC;AACD,WAAO,KAAK,IAAI,CAAC,OAAY;AAAA,MAC3B,GAAG;AAAA,MACH,UAAU,kBAAkB,EAAE,QAAQ;AAAA,IACxC,EAAE;AAAA,EACJ;AAAA,EACA,cAAc,QAA8B;AAC1C,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,IAG9B;AACA,WAAO,KAAK,IAAI,OAAO,QAAQ;AAAA,EACjC;AAAA,EACA,wBAAwB,QAAiD;AACvE,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA,IAK9B;AACA,WAAO,KAAK,IAAI,OAAO,UAAU,OAAO,SAAS;AAAA,EACnD;AAAA,EACA,UAAU,QAKP;AACD,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,OAQ3B;AACH,WAAO,KAAK;AAAA,MACV,OAAO;AAAA,MACP,OAAO;AAAA,MACP,OAAO;AAAA,MACP,OAAO,WAAW,KAAK,UAAU,OAAO,QAAQ,IAAI;AAAA,IACtD;AAAA,EACF;AAAA,EACA,UAAU,QAAkD;AAC1D,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA,IAG9B;AACA,WAAO,CAAC,UAAiB;AACvB,WAAK;AAAA,QACH,OAAO;AAAA,QACP,OAAO;AAAA,QACP,MAAM;AAAA,QACN,aAAa,MAAM,SAAS;AAAA,MAC9B;AAAA,IACF;AAAA,EACF;AAAA,EACA,OAAO,QAAkD;AACvD,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA,IAE9B;AACA,WAAO,KAAK,IAAI,OAAO,UAAU,OAAO,UAAU;AAAA,EACpD;AAAA,EACA,aAAa,UAAkB;AAC7B,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA,IAE9B;AACA,UAAM,MAAM,KAAK,IAAI,QAAQ;AAC7B,WAAO,QAAQ,GAAG;AAAA,EACpB;AAAA,EACA,cAAc,UAAkB;AAC9B,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,IAM9B;AACA,UAAM,MAAM,KAAK,IAAI,QAAQ;AAC7B,WAAO,QAAQ,GAAG;AAAA,EACpB;AAAA,EACA,gBAAgB,UAAkB,YAAkB;AAClD,UAAM,OAAO,KAAK,IAAI,QAAQ;AAAA;AAAA;AAAA;AAAA,IAI9B;AACA,WAAO,KAAK,IAAI,WAAW,YAAY,GAAG,QAAQ;AAAA,EACpD;AAAA,EACA,MAAM,MAAM,UAAkB,QAAgB,YAAmB;AAC/D,QAAI,YAAY;AACd,WAAK,wBAAwB;AAAA,QAC3B,WAAW,WAAW,YAAY;AAAA,QAClC;AAAA,MACF,CAAC;AAAA,IACH,OAAO;AACL,WAAK,cAAc,EAAE,SAAS,CAAC;AAAA,IACjC;AAEA,UAAM,OAAO,KAAK,UAAU;AAAA,MAC1B,YAAY,OAAO;AAAA,MACnB;AAAA,MACA,KAAK,OAAO;AAAA,MACZ,UAAU,OAAO;AAAA,IACnB,CAAC;AACD,UAAM,UAAU,KAAK,UAAU;AAE/B,QAAI,CAAC,SAAS;AACZ;AAAA,IACF;AAEA,UAAM,SAAS,KAAK,UAAU,EAAE,UAAU,YAAY,OAAO,GAAG,CAAC;AAEjE,SAAK,aAAa,MAAM;AACtB,WAAK,OAAO,EAAE,UAAU,YAAY,OAAO,GAAG,CAAC;AAAA,IACjD,CAAC;AAED,UAAM,YAAY;AAClB,QAAI,QAAiB,CAAC;AACtB,UAAM,QAAQ,MAAM;AAClB,UAAI,CAAC,MAAM,OAAQ;AACnB,WAAK,aAAa,MAAM;AACtB,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,iBAAO,MAAM,CAAC,CAAC;AAAA,QACjB;AAAA,MACF,CAAC;AACD,cAAQ,CAAC;AAAA,IACX;AAEA,qBAAiB,SAAS,OAAO,QAAQ,GAAG;AAC1C,YAAM,KAAK,KAAK;AAChB,UAAI,MAAM,UAAU,UAAW,OAAM;AAAA,IACvC;AAEA,UAAM;AAAA,EACR;AACF;AAEO,SAAS,aAAa,QAAyC;AACpE,MAAI,kBAAkB,cAAc;AAElC,UAAM,SAAS,IAAI,aAAa,OAAO,MAAM;AAC7C,WAAO,IAAI,MAAM;AACjB,WAAO,OAAO,KAAK,OAAO,MAAM;AAAA,EAClC;AACA,QAAM,aAAa,IAAI,aAAa,MAAM;AAC1C,SAAO,OAAO,KAAK,WAAW,MAAM;AACtC;AAEA,SAAS,kBAAkB,OAAY;AACrC,MAAI,SAAS,KAAM,QAAO;AAC1B,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,MAAI;AACF,WAAO,KAAK,MAAM,OAAO,KAAK,CAAC;AAAA,EACjC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;",
6
+ "names": ["rows"]
7
+ }
@@ -0,0 +1,3 @@
1
+ declare const _default: "-- Embedding store schema\n-- Use <%= DIMENSION %> placeholder replaced at runtime.\n\nPRAGMA page_size = 32768;\nPRAGMA journal_mode = WAL;\nPRAGMA synchronous = NORMAL;\nPRAGMA temp_store = MEMORY;\nPRAGMA foreign_keys = ON;\nPRAGMA cache_size = -131072; -- ~128 MiB page cache for faster repeated reads\nPRAGMA mmap_size = 268435456; -- 256 MiB memory map window to cut syscalls\nPRAGMA wal_autocheckpoint = 1000;\nPRAGMA optimize;\n\nCREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(\n source_id TEXT PARTITION KEY,\n document_id TEXT,\n embedding FLOAT[<%= DIMENSION %>] DISTANCE_METRIC=cosine,\n +content TEXT -- auxiliary payload (not filterable) for reconstruction\n);\n\nCREATE TABLE IF NOT EXISTS sources (\n source_id TEXT PRIMARY KEY,\n updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),\n expires_at TEXT\n) STRICT;\n\nCREATE TABLE IF NOT EXISTS documents (\n id TEXT PRIMARY KEY,\n source_id TEXT NOT NULL,\n cid TEXT NOT NULL,\n metadata TEXT, -- JSON blob with arbitrary document metadata\n updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),\n FOREIGN KEY (source_id) REFERENCES sources(source_id) ON DELETE CASCADE\n) STRICT;\n\n-- Indexes to accelerate lookups / maintenance\nCREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source_id);\nCREATE INDEX IF NOT EXISTS idx_documents_source_updated ON documents(source_id, updated_at);\nCREATE INDEX IF NOT EXISTS idx_documents_cid ON documents(cid);\nCREATE INDEX IF NOT EXISTS idx_sources_expires_at ON sources(expires_at) WHERE expires_at IS NOT NULL;\n\n-- emulate cascade for the virtual table\nCREATE TRIGGER IF NOT EXISTS trg_documents_delete_vec\nAFTER DELETE ON documents\nBEGIN\n DELETE FROM vec_chunks\n WHERE source_id = OLD.source_id\n AND document_id = OLD.id;\nEND;\n";
2
+ export default _default;
3
+ //# sourceMappingURL=sqlite.sql.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sqlite.sql.d.ts","sourceRoot":"","sources":["../../../../src/lib/stores/sqlite/sqlite.sql.ts"],"names":[],"mappings":";AAAA,wBAiDC"}
@@ -0,0 +1,54 @@
1
+ var sqlite_sql_default = `-- Embedding store schema
2
+ -- Use <%= DIMENSION %> placeholder replaced at runtime.
3
+
4
+ PRAGMA page_size = 32768;
5
+ PRAGMA journal_mode = WAL;
6
+ PRAGMA synchronous = NORMAL;
7
+ PRAGMA temp_store = MEMORY;
8
+ PRAGMA foreign_keys = ON;
9
+ PRAGMA cache_size = -131072; -- ~128 MiB page cache for faster repeated reads
10
+ PRAGMA mmap_size = 268435456; -- 256 MiB memory map window to cut syscalls
11
+ PRAGMA wal_autocheckpoint = 1000;
12
+ PRAGMA optimize;
13
+
14
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(
15
+ source_id TEXT PARTITION KEY,
16
+ document_id TEXT,
17
+ embedding FLOAT[<%= DIMENSION %>] DISTANCE_METRIC=cosine,
18
+ +content TEXT -- auxiliary payload (not filterable) for reconstruction
19
+ );
20
+
21
+ CREATE TABLE IF NOT EXISTS sources (
22
+ source_id TEXT PRIMARY KEY,
23
+ updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
24
+ expires_at TEXT
25
+ ) STRICT;
26
+
27
+ CREATE TABLE IF NOT EXISTS documents (
28
+ id TEXT PRIMARY KEY,
29
+ source_id TEXT NOT NULL,
30
+ cid TEXT NOT NULL,
31
+ metadata TEXT, -- JSON blob with arbitrary document metadata
32
+ updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
33
+ FOREIGN KEY (source_id) REFERENCES sources(source_id) ON DELETE CASCADE
34
+ ) STRICT;
35
+
36
+ -- Indexes to accelerate lookups / maintenance
37
+ CREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source_id);
38
+ CREATE INDEX IF NOT EXISTS idx_documents_source_updated ON documents(source_id, updated_at);
39
+ CREATE INDEX IF NOT EXISTS idx_documents_cid ON documents(cid);
40
+ CREATE INDEX IF NOT EXISTS idx_sources_expires_at ON sources(expires_at) WHERE expires_at IS NOT NULL;
41
+
42
+ -- emulate cascade for the virtual table
43
+ CREATE TRIGGER IF NOT EXISTS trg_documents_delete_vec
44
+ AFTER DELETE ON documents
45
+ BEGIN
46
+ DELETE FROM vec_chunks
47
+ WHERE source_id = OLD.source_id
48
+ AND document_id = OLD.id;
49
+ END;
50
+ `;
51
+ export {
52
+ sqlite_sql_default as default
53
+ };
54
+ //# sourceMappingURL=sqlite.sql.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../../../src/lib/stores/sqlite/sqlite.sql.ts"],
4
+ "sourcesContent": ["export default `-- Embedding store schema\n-- Use <%= DIMENSION %> placeholder replaced at runtime.\n\nPRAGMA page_size = 32768;\nPRAGMA journal_mode = WAL;\nPRAGMA synchronous = NORMAL;\nPRAGMA temp_store = MEMORY;\nPRAGMA foreign_keys = ON;\nPRAGMA cache_size = -131072; -- ~128 MiB page cache for faster repeated reads\nPRAGMA mmap_size = 268435456; -- 256 MiB memory map window to cut syscalls\nPRAGMA wal_autocheckpoint = 1000;\nPRAGMA optimize;\n\nCREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(\n source_id TEXT PARTITION KEY,\n document_id TEXT,\n embedding FLOAT[<%= DIMENSION %>] DISTANCE_METRIC=cosine,\n +content TEXT -- auxiliary payload (not filterable) for reconstruction\n);\n\nCREATE TABLE IF NOT EXISTS sources (\n source_id TEXT PRIMARY KEY,\n updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),\n expires_at TEXT\n) STRICT;\n\nCREATE TABLE IF NOT EXISTS documents (\n id TEXT PRIMARY KEY,\n source_id TEXT NOT NULL,\n cid TEXT NOT NULL,\n metadata TEXT, -- JSON blob with arbitrary document metadata\n updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),\n FOREIGN KEY (source_id) REFERENCES sources(source_id) ON DELETE CASCADE\n) STRICT;\n\n-- Indexes to accelerate lookups / maintenance\nCREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source_id);\nCREATE INDEX IF NOT EXISTS idx_documents_source_updated ON documents(source_id, updated_at);\nCREATE INDEX IF NOT EXISTS idx_documents_cid ON documents(cid);\nCREATE INDEX IF NOT EXISTS idx_sources_expires_at ON sources(expires_at) WHERE expires_at IS NOT NULL;\n\n-- emulate cascade for the virtual table\nCREATE TRIGGER IF NOT EXISTS trg_documents_delete_vec\nAFTER DELETE ON documents\nBEGIN\n DELETE FROM vec_chunks\n WHERE source_id = OLD.source_id\n AND document_id = OLD.id;\nEND;\n`"],
5
+ "mappings": "AAAA,IAAO,qBAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;",
6
+ "names": []
7
+ }
@@ -0,0 +1,28 @@
1
+ import type { Embedding } from 'ai';
2
+ export type Chunk = {
3
+ content: string;
4
+ embedding: Embedding | Float32Array;
5
+ };
6
+ export type Embedder = (documents: string[]) => Promise<{
7
+ embeddings: (Embedding | Float32Array)[];
8
+ dimensions: number;
9
+ }>;
10
+ export interface SearchOptions {
11
+ sourceId: string;
12
+ documentId?: string;
13
+ topN?: number;
14
+ }
15
+ export type Corpus = {
16
+ id: string;
17
+ cid: string;
18
+ chunker: () => AsyncGenerator<Chunk>;
19
+ metadata?: Record<string, any> | undefined;
20
+ };
21
+ export interface Store {
22
+ search: (query: string, options: SearchOptions, embedder: Embedder) => Promise<any[]>;
23
+ sourceExists: (sourceId: string) => Promise<boolean> | boolean;
24
+ sourceExpired: (sourceId: string) => Promise<boolean> | boolean;
25
+ setSourceExpiry: (sourceId: string, expiryDate: Date) => Promise<void> | void;
26
+ index: (sourceId: string, corpus: Corpus, expiryDate?: Date) => Promise<void>;
27
+ }
28
+ //# sourceMappingURL=store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../../src/lib/stores/store.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAEpC,MAAM,MAAM,KAAK,GAAG;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,SAAS,GAAG,YAAY,CAAC;CACrC,CAAC;AACF,MAAM,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,OAAO,CAAC;IACtD,UAAU,EAAE,CAAC,SAAS,GAAG,YAAY,CAAC,EAAE,CAAC;IACzC,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC,CAAC;AAEH,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IAEjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,MAAM,MAAM,GAAG;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,cAAc,CAAC,KAAK,CAAC,CAAC;IACrC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS,CAAC;CAC5C,CAAC;AAEF,MAAM,WAAW,KAAK;IACpB,MAAM,EAAE,CACN,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,aAAa,EACtB,QAAQ,EAAE,QAAQ,KACf,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IACpB,YAAY,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAAG,OAAO,CAAC;IAC/D,aAAa,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAAG,OAAO,CAAC;IAChE,eAAe,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAC9E,KAAK,EAAE,CACL,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,UAAU,CAAC,EAAE,IAAI,KACd,OAAO,CAAC,IAAI,CAAC,CAAC;CACpB"}
@@ -0,0 +1 @@
1
+ //# sourceMappingURL=store.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": [],
4
+ "sourcesContent": [],
5
+ "mappings": "",
6
+ "names": []
7
+ }