@dcyfr/ai-rag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +588 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -0
- package/dist/loaders/html/index.d.ts +26 -0
- package/dist/loaders/html/index.d.ts.map +1 -0
- package/dist/loaders/html/index.js +106 -0
- package/dist/loaders/html/index.js.map +1 -0
- package/dist/loaders/index.d.ts +8 -0
- package/dist/loaders/index.d.ts.map +1 -0
- package/dist/loaders/index.js +7 -0
- package/dist/loaders/index.js.map +1 -0
- package/dist/loaders/markdown/index.d.ts +33 -0
- package/dist/loaders/markdown/index.d.ts.map +1 -0
- package/dist/loaders/markdown/index.js +150 -0
- package/dist/loaders/markdown/index.js.map +1 -0
- package/dist/loaders/text/index.d.ts +21 -0
- package/dist/loaders/text/index.d.ts.map +1 -0
- package/dist/loaders/text/index.js +78 -0
- package/dist/loaders/text/index.js.map +1 -0
- package/dist/pipeline/embedding/generator.d.ts +24 -0
- package/dist/pipeline/embedding/generator.d.ts.map +1 -0
- package/dist/pipeline/embedding/generator.js +42 -0
- package/dist/pipeline/embedding/generator.js.map +1 -0
- package/dist/pipeline/embedding/index.d.ts +8 -0
- package/dist/pipeline/embedding/index.d.ts.map +1 -0
- package/dist/pipeline/embedding/index.js +6 -0
- package/dist/pipeline/embedding/index.js.map +1 -0
- package/dist/pipeline/embedding/pipeline.d.ts +26 -0
- package/dist/pipeline/embedding/pipeline.d.ts.map +1 -0
- package/dist/pipeline/embedding/pipeline.js +59 -0
- package/dist/pipeline/embedding/pipeline.js.map +1 -0
- package/dist/pipeline/index.d.ts +7 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +7 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/ingestion/index.d.ts +5 -0
- package/dist/pipeline/ingestion/index.d.ts.map +1 -0
- package/dist/pipeline/ingestion/index.js +5 -0
- package/dist/pipeline/ingestion/index.js.map +1 -0
- package/dist/pipeline/ingestion/pipeline.d.ts +27 -0
- package/dist/pipeline/ingestion/pipeline.d.ts.map +1 -0
- package/dist/pipeline/ingestion/pipeline.js +118 -0
- package/dist/pipeline/ingestion/pipeline.js.map +1 -0
- package/dist/pipeline/retrieval/index.d.ts +5 -0
- package/dist/pipeline/retrieval/index.d.ts.map +1 -0
- package/dist/pipeline/retrieval/index.js +5 -0
- package/dist/pipeline/retrieval/index.js.map +1 -0
- package/dist/pipeline/retrieval/pipeline.d.ts +29 -0
- package/dist/pipeline/retrieval/pipeline.d.ts.map +1 -0
- package/dist/pipeline/retrieval/pipeline.js +109 -0
- package/dist/pipeline/retrieval/pipeline.js.map +1 -0
- package/dist/stores/index.d.ts +5 -0
- package/dist/stores/index.d.ts.map +1 -0
- package/dist/stores/index.js +5 -0
- package/dist/stores/index.js.map +1 -0
- package/dist/stores/vector/in-memory.d.ts +52 -0
- package/dist/stores/vector/in-memory.d.ts.map +1 -0
- package/dist/stores/vector/in-memory.js +172 -0
- package/dist/stores/vector/in-memory.js.map +1 -0
- package/dist/stores/vector/index.d.ts +6 -0
- package/dist/stores/vector/index.d.ts.map +1 -0
- package/dist/stores/vector/index.js +5 -0
- package/dist/stores/vector/index.js.map +1 -0
- package/dist/types/index.d.ts +259 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/docs/DOCUMENT_LOADERS.md +621 -0
- package/docs/EMBEDDINGS.md +733 -0
- package/docs/PIPELINES.md +771 -0
- package/docs/VECTOR_STORES.md +754 -0
- package/package.json +100 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/pipeline/ingestion/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/pipeline/ingestion/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document ingestion pipeline
|
|
3
|
+
* Orchestrates loading, embedding, and storage
|
|
4
|
+
*/
|
|
5
|
+
import type { DocumentLoader, EmbeddingGenerator, VectorStore, IngestionOptions, IngestionResult } from '../../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Pipeline for ingesting documents into vector store
|
|
8
|
+
*/
|
|
9
|
+
export declare class IngestionPipeline {
|
|
10
|
+
private loader;
|
|
11
|
+
private embedder;
|
|
12
|
+
private store;
|
|
13
|
+
constructor(loader: DocumentLoader, embedder: EmbeddingGenerator, store: VectorStore);
|
|
14
|
+
/**
|
|
15
|
+
* Ingest one or more documents
|
|
16
|
+
*/
|
|
17
|
+
ingest(filePaths: string | string[], options?: IngestionOptions): Promise<IngestionResult>;
|
|
18
|
+
/**
|
|
19
|
+
* Process documents: chunk and embed
|
|
20
|
+
*/
|
|
21
|
+
private processDocuments;
|
|
22
|
+
/**
|
|
23
|
+
* Simple chunking (override with loader-specific chunking if available)
|
|
24
|
+
*/
|
|
25
|
+
private chunkDocument;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../../src/pipeline/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAGV,cAAc,EACd,kBAAkB,EAClB,WAAW,EACX,gBAAgB,EAChB,eAAe,EAChB,MAAM,sBAAsB,CAAC;AAE9B;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,KAAK,CAAc;gBAGzB,MAAM,EAAE,cAAc,EACtB,QAAQ,EAAE,kBAAkB,EAC5B,KAAK,EAAE,WAAW;IAOpB;;OAEG;IACG,MAAM,CACV,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,EAC5B,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,CAAC;IA6D3B;;OAEG;YACW,gBAAgB;IAqB9B;;OAEG;IACH,OAAO,CAAC,aAAa;CAgCtB"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document ingestion pipeline
|
|
3
|
+
* Orchestrates loading, embedding, and storage
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Pipeline for ingesting documents into vector store
|
|
7
|
+
*/
|
|
8
|
+
export class IngestionPipeline {
|
|
9
|
+
loader;
|
|
10
|
+
embedder;
|
|
11
|
+
store;
|
|
12
|
+
constructor(loader, embedder, store) {
|
|
13
|
+
this.loader = loader;
|
|
14
|
+
this.embedder = embedder;
|
|
15
|
+
this.store = store;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Ingest one or more documents
|
|
19
|
+
*/
|
|
20
|
+
async ingest(filePaths, options) {
|
|
21
|
+
const paths = Array.isArray(filePaths) ? filePaths : [filePaths];
|
|
22
|
+
const startTime = Date.now();
|
|
23
|
+
const { batchSize = 32, onProgress, loaderConfig, } = options ?? {};
|
|
24
|
+
let totalDocuments = 0;
|
|
25
|
+
let totalChunks = 0;
|
|
26
|
+
const errors = [];
|
|
27
|
+
for (let i = 0; i < paths.length; i++) {
|
|
28
|
+
const path = paths[i];
|
|
29
|
+
try {
|
|
30
|
+
// Load documents
|
|
31
|
+
const documents = await this.loader.load(path, loaderConfig);
|
|
32
|
+
totalDocuments += documents.length;
|
|
33
|
+
// Chunk and embed in batches
|
|
34
|
+
const allChunks = [];
|
|
35
|
+
for (let j = 0; j < documents.length; j += batchSize) {
|
|
36
|
+
const batch = documents.slice(j, j + batchSize);
|
|
37
|
+
const chunks = await this.processDocuments(batch);
|
|
38
|
+
allChunks.push(...chunks);
|
|
39
|
+
if (onProgress) {
|
|
40
|
+
onProgress(i + 1, paths.length, {
|
|
41
|
+
currentFile: path,
|
|
42
|
+
documentsProcessed: Math.min(j + batchSize, documents.length),
|
|
43
|
+
totalDocuments: documents.length,
|
|
44
|
+
chunksGenerated: allChunks.length,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// Store chunks
|
|
49
|
+
await this.store.addDocuments(allChunks);
|
|
50
|
+
totalChunks += allChunks.length;
|
|
51
|
+
}
|
|
52
|
+
catch (error) {
|
|
53
|
+
errors.push({
|
|
54
|
+
file: path,
|
|
55
|
+
error: error instanceof Error ? error.message : String(error),
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
const endTime = Date.now();
|
|
60
|
+
return {
|
|
61
|
+
documentsProcessed: totalDocuments,
|
|
62
|
+
chunksGenerated: totalChunks,
|
|
63
|
+
errors,
|
|
64
|
+
durationMs: endTime - startTime,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Process documents: chunk and embed
|
|
69
|
+
*/
|
|
70
|
+
async processDocuments(documents) {
|
|
71
|
+
const chunks = [];
|
|
72
|
+
for (const doc of documents) {
|
|
73
|
+
// Split into chunks (using simple text splitting)
|
|
74
|
+
const docChunks = this.chunkDocument(doc);
|
|
75
|
+
chunks.push(...docChunks);
|
|
76
|
+
}
|
|
77
|
+
// Generate embeddings
|
|
78
|
+
const texts = chunks.map((chunk) => chunk.content);
|
|
79
|
+
const embeddings = await this.embedder.embed(texts);
|
|
80
|
+
// Attach embeddings
|
|
81
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
82
|
+
chunks[i].embedding = embeddings[i];
|
|
83
|
+
}
|
|
84
|
+
return chunks;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Simple chunking (override with loader-specific chunking if available)
|
|
88
|
+
*/
|
|
89
|
+
chunkDocument(doc) {
|
|
90
|
+
const chunkSize = 1000;
|
|
91
|
+
const overlap = 200;
|
|
92
|
+
const chunks = [];
|
|
93
|
+
const content = doc.content;
|
|
94
|
+
for (let i = 0; i < content.length; i += chunkSize - overlap) {
|
|
95
|
+
const chunk = content.slice(i, i + chunkSize);
|
|
96
|
+
const chunkId = `${doc.id}-chunk-${chunks.length}`;
|
|
97
|
+
chunks.push({
|
|
98
|
+
id: chunkId,
|
|
99
|
+
documentId: doc.id,
|
|
100
|
+
content: chunk,
|
|
101
|
+
index: chunks.length,
|
|
102
|
+
metadata: {
|
|
103
|
+
chunkIndex: chunks.length,
|
|
104
|
+
chunkCount: 0, // Will update after
|
|
105
|
+
startChar: i,
|
|
106
|
+
endChar: Math.min(i + chunkSize, content.length),
|
|
107
|
+
...doc.metadata,
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
// Update chunk counts
|
|
112
|
+
for (const chunk of chunks) {
|
|
113
|
+
chunk.metadata.chunkCount = chunks.length;
|
|
114
|
+
}
|
|
115
|
+
return chunks;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/pipeline/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAYH;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAiB;IACvB,QAAQ,CAAqB;IAC7B,KAAK,CAAc;IAE3B,YACE,MAAsB,EACtB,QAA4B,EAC5B,KAAkB;QAElB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CACV,SAA4B,EAC5B,OAA0B;QAE1B,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,MAAM,EACJ,SAAS,GAAG,EAAE,EACd,UAAU,EACV,YAAY,GACb,GAAG,OAAO,IAAI,EAAE,CAAC;QAElB,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,MAAM,GAA2C,EAAE,CAAC;QAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAEtB,IAAI,CAAC;gBACH,iBAAiB;gBACjB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;gBAC7D,cAAc,IAAI,SAAS,CAAC,MAAM,CAAC;gBAEnC,6BAA6B;gBAC7B,MAAM,SAAS,GAAoB,EAAE,CAAC;gBAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;oBACrD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;oBAChD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;oBAClD,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;oBAE1B,IAAI,UAAU,EAAE,CAAC;wBACf,UAAU,CAAC,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,EAAE;4BAC9B,WAAW,EAAE,IAAI;4BACjB,kBAAkB,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC;4BAC7D,cAAc,EAAE,SAAS,CAAC,MAAM;4BAChC,eAAe,EAAE,SAAS,CAAC,MAAM;yBAClC,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;gBAED,eAAe;gBACf,MAAM,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;gBACzC,WAAW,IAAI,SAAS,CAAC,MAAM,CAAC;YAClC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,IAAI;oBACV,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC9D,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE3B,OAAO;YACL,kBAAkB,EAAE,cAAc;YAClC,eAAe,EAAE,WAAW;YAC5B,MAAM;YACN,UAAU,EAAE,OAAO,GAAG,SAAS;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,gBAAgB,CAAC,SAAqB;QAClD,MAAM,MAAM,GAAoB,EAAE,CAAC;QAEnC,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,kDAAkD;YAClD,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5B,CAAC;QAED,sBAAsB;QACtB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEpD,oBAAoB;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,GAAa;QACjC,MAAM,SAAS,GAAG,IAAI,CAAC;QACvB,MAAM,OAAO,GAAG,GAAG,CAAC;QACpB,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,OAAO,EAAE,CAAC;YAC7D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,GAAG,GAAG,CAAC,EAAE,UAAU,MAAM,CAAC,MAAM,EAAE,CAAC;YAEnD,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,OAAO;gBACX,UAAU,EAAE,GAAG,CAAC,EAAE;gBAClB,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,MAAM,CAAC,MAAM;gBACpB,QAAQ,EAAE;oBACR,UAAU,EAAE,MAAM,CAAC,MAAM;oBACzB,UAAU,EAAE,CAAC,EAAE,oBAAoB;oBACnC,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC;oBAChD,GAAG,GAAG,CAAC,QAAQ;iBAChB;aACF,CAAC,CAAC;QACL,CAAC;QAED,sBAAsB;QACtB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,CAAC,QAAQ,CAAC,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5C,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/pipeline/retrieval/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/pipeline/retrieval/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval pipeline for querying vector stores
|
|
3
|
+
*/
|
|
4
|
+
import type { VectorStore, EmbeddingGenerator, QueryOptions, QueryResult } from '../../types/index.js';
|
|
5
|
+
/**
|
|
6
|
+
* Pipeline for retrieving relevant documents
|
|
7
|
+
*/
|
|
8
|
+
export declare class RetrievalPipeline {
|
|
9
|
+
private store;
|
|
10
|
+
private embedder;
|
|
11
|
+
constructor(store: VectorStore, embedder: EmbeddingGenerator);
|
|
12
|
+
/**
|
|
13
|
+
* Query the vector store for relevant documents
|
|
14
|
+
*/
|
|
15
|
+
query(query: string, options?: QueryOptions): Promise<QueryResult>;
|
|
16
|
+
/**
|
|
17
|
+
* Assemble context from search results
|
|
18
|
+
*/
|
|
19
|
+
private assembleContext;
|
|
20
|
+
/**
|
|
21
|
+
* Perform semantic search (alias for query)
|
|
22
|
+
*/
|
|
23
|
+
search(query: string, options?: QueryOptions): Promise<QueryResult>;
|
|
24
|
+
/**
|
|
25
|
+
* Get similar documents to a given document ID
|
|
26
|
+
*/
|
|
27
|
+
findSimilar(documentId: string, options?: Omit<QueryOptions, 'query'>): Promise<QueryResult>;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../../src/pipeline/retrieval/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,kBAAkB,EAClB,YAAY,EACZ,WAAW,EAEZ,MAAM,sBAAsB,CAAC;AAE9B;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,QAAQ,CAAqB;gBAEzB,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,kBAAkB;IAK5D;;OAEG;IACG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC;IA8CxE;;OAEG;IACH,OAAO,CAAC,eAAe;IA0BvB;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC;IAIzE;;OAEG;IACG,WAAW,CACf,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,IAAI,CAAC,YAAY,EAAE,OAAO,CAAC,GACpC,OAAO,CAAC,WAAW,CAAC;CAkDxB"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval pipeline for querying vector stores
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Pipeline for retrieving relevant documents
|
|
6
|
+
*/
|
|
7
|
+
export class RetrievalPipeline {
|
|
8
|
+
store;
|
|
9
|
+
embedder;
|
|
10
|
+
constructor(store, embedder) {
|
|
11
|
+
this.store = store;
|
|
12
|
+
this.embedder = embedder;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Query the vector store for relevant documents
|
|
16
|
+
*/
|
|
17
|
+
async query(query, options) {
|
|
18
|
+
const startTime = Date.now();
|
|
19
|
+
const { limit = 10, filter, threshold = 0.0, includeMetadata = true, } = options ?? {};
|
|
20
|
+
// Generate query embedding
|
|
21
|
+
const queryEmbedding = await this.embedder.embed([query]);
|
|
22
|
+
// Search vector store
|
|
23
|
+
const searchResults = await this.store.search(queryEmbedding[0], limit, filter);
|
|
24
|
+
// Filter by threshold
|
|
25
|
+
const filteredResults = searchResults.filter((result) => result.score >= threshold);
|
|
26
|
+
// Assemble context
|
|
27
|
+
const context = this.assembleContext(filteredResults, includeMetadata);
|
|
28
|
+
const endTime = Date.now();
|
|
29
|
+
return {
|
|
30
|
+
query,
|
|
31
|
+
results: filteredResults,
|
|
32
|
+
context,
|
|
33
|
+
metadata: {
|
|
34
|
+
totalResults: filteredResults.length,
|
|
35
|
+
durationMs: endTime - startTime,
|
|
36
|
+
averageScore: filteredResults.length > 0
|
|
37
|
+
? filteredResults.reduce((sum, r) => sum + r.score, 0) /
|
|
38
|
+
filteredResults.length
|
|
39
|
+
: 0,
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Assemble context from search results
|
|
45
|
+
*/
|
|
46
|
+
assembleContext(results, includeMetadata) {
|
|
47
|
+
return results
|
|
48
|
+
.map((result, index) => {
|
|
49
|
+
const { document, score } = result;
|
|
50
|
+
let context = `[Document ${index + 1}] (score: ${score.toFixed(3)})\n`;
|
|
51
|
+
if (includeMetadata) {
|
|
52
|
+
const metadata = Object.entries(document.metadata)
|
|
53
|
+
.filter(([key]) => !key.startsWith('chunk'))
|
|
54
|
+
.map(([key, value]) => `${key}: ${value}`)
|
|
55
|
+
.join(', ');
|
|
56
|
+
if (metadata) {
|
|
57
|
+
context += `Metadata: ${metadata}\n`;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
context += `${document.content}\n`;
|
|
61
|
+
return context;
|
|
62
|
+
})
|
|
63
|
+
.join('\n---\n\n');
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Perform semantic search (alias for query)
|
|
67
|
+
*/
|
|
68
|
+
async search(query, options) {
|
|
69
|
+
return this.query(query, options);
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Get similar documents to a given document ID
|
|
73
|
+
*/
|
|
74
|
+
async findSimilar(documentId, options) {
|
|
75
|
+
const document = await this.store.getDocument(documentId);
|
|
76
|
+
if (!document) {
|
|
77
|
+
throw new Error(`Document ${documentId} not found`);
|
|
78
|
+
}
|
|
79
|
+
if (!document.embedding) {
|
|
80
|
+
throw new Error(`Document ${documentId} has no embedding`);
|
|
81
|
+
}
|
|
82
|
+
const startTime = Date.now();
|
|
83
|
+
const { limit = 10, filter, threshold = 0.0, } = options ?? {};
|
|
84
|
+
// Search using document's embedding
|
|
85
|
+
const searchResults = await this.store.search(document.embedding, limit + 1, // +1 to exclude the document itself
|
|
86
|
+
filter);
|
|
87
|
+
// Filter out the query document and apply threshold
|
|
88
|
+
const filteredResults = searchResults
|
|
89
|
+
.filter((result) => result.document.id !== documentId)
|
|
90
|
+
.filter((result) => result.score >= threshold)
|
|
91
|
+
.slice(0, limit);
|
|
92
|
+
const context = this.assembleContext(filteredResults, true);
|
|
93
|
+
const endTime = Date.now();
|
|
94
|
+
return {
|
|
95
|
+
query: `[Similar to ${documentId}]`,
|
|
96
|
+
results: filteredResults,
|
|
97
|
+
context,
|
|
98
|
+
metadata: {
|
|
99
|
+
totalResults: filteredResults.length,
|
|
100
|
+
durationMs: endTime - startTime,
|
|
101
|
+
averageScore: filteredResults.length > 0
|
|
102
|
+
? filteredResults.reduce((sum, r) => sum + r.score, 0) /
|
|
103
|
+
filteredResults.length
|
|
104
|
+
: 0,
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/pipeline/retrieval/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAUH;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,KAAK,CAAc;IACnB,QAAQ,CAAqB;IAErC,YAAY,KAAkB,EAAE,QAA4B;QAC1D,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,KAAa,EAAE,OAAsB;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,MAAM,EACJ,KAAK,GAAG,EAAE,EACV,MAAM,EACN,SAAS,GAAG,GAAG,EACf,eAAe,GAAG,IAAI,GACvB,GAAG,OAAO,IAAI,EAAE,CAAC;QAElB,2BAA2B;QAC3B,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;QAE1D,sBAAsB;QACtB,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAC3C,cAAc,CAAC,CAAC,CAAC,EACjB,KAAK,EACL,MAAM,CACP,CAAC;QAEF,sBAAsB;QACtB,MAAM,eAAe,GAAG,aAAa,CAAC,MAAM,CAC1C,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,IAAI,SAAS,CACtC,CAAC;QAEF,mBAAmB;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;QAEvE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,OAAO,EAAE,eAAe;YACxB,OAAO;YACP,QAAQ,EAAE;gBACR,YAAY,EAAE,eAAe,CAAC,MAAM;gBACpC,UAAU,EAAE,OAAO,GAAG,SAAS;gBAC/B,YAAY,EACV,eAAe,CAAC,MAAM,GAAG,CAAC;oBACxB,CAAC,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;wBACpD,eAAe,CAAC,MAAM;oBACxB,CAAC,CAAC,CAAC;aACR;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAAuB,EACvB,eAAwB;QAExB,OAAO,OAAO;aACX,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YACrB,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;YACnC,IAAI,OAAO,GAAG,aAAa,KAAK,GAAG,CAAC,aAAa,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;YAEvE,IAAI,eAAe,EAAE,CAAC;gBACpB,MAAM,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;qBAC/C,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;qBAC3C,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,KAAK,EAAE,CAAC;qBACzC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAEd,IAAI,QAAQ,EAAE,CAAC;oBACb,OAAO,IAAI,aAAa,QAAQ,IAAI,CAAC;gBACvC,CAAC;YACH,CAAC;YAED,OAAO,IAAI,GAAG,QAAQ,CAAC,OAAO,IAAI,CAAC;YACnC,OAAO,OAAO,CAAC;QACjB,CAAC,CAAC;aACD,IAAI,CAAC,WAAW,CAAC,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,OAAsB;QAChD,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CACf,UAAkB,EAClB,OAAqC;QAErC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QAC1D,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,YAAY,UAAU,YAAY,CAAC,CAAC;QACtD,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,CAAC;YACxB,MAAM,IAAI,KAAK,CAAC,YAAY,UAAU,mBAAmB,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,MAAM,EACJ,KAAK,GAAG,EAAE,EACV,MAAM,EACN,SAAS,GAAG,GAAG,GAChB,GAAG,OAAO,IAAI,EAAE,CAAC;QAElB,oCAAoC;QACpC,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAC3C,QAAQ,CAAC,SAAS,EAClB,KAAK,GAAG,CAAC,EAAE,oCAAoC;QAC/C,MAAM,CACP,CAAC;QAEF,oDAAoD;QACpD,MAAM,eAAe,GAAG,aAAa;aAClC,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,KAAK,UAAU,CAAC;aACrD,MAAM,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,IAAI,SAAS,CAAC;aAC7C,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QAEnB,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,eAAe,EAAE,IAAI,CAAC,CAAC;QAE5D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE3B,OAAO;YACL,KAAK,EAAE,eAAe,UAAU,GAAG;YACnC,OAAO,EAAE,eAAe;YACxB,OAAO;YACP,QAAQ,EAAE;gBACR,YAAY,EAAE,eAAe,CAAC,MAAM;gBACpC,UAAU,EAAE,OAAO,GAAG,SAAS;gBAC/B,YAAY,EACV,eAAe,CAAC,MAAM,GAAG,CAAC;oBACxB,CAAC,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;wBACpD,eAAe,CAAC,MAAM;oBACxB,CAAC,CAAC,CAAC;aACR;SACF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stores/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,mBAAmB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stores/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory vector store implementation
|
|
3
|
+
* For development and testing - use ChromaDB/Pinecone/etc in production
|
|
4
|
+
*/
|
|
5
|
+
import type { VectorStore, VectorStoreConfig, DocumentChunk, SearchResult, MetadataFilter } from '../../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Simple in-memory vector store with cosine similarity search
|
|
8
|
+
*/
|
|
9
|
+
export declare class InMemoryVectorStore implements VectorStore {
|
|
10
|
+
private documents;
|
|
11
|
+
private config;
|
|
12
|
+
constructor(config: VectorStoreConfig);
|
|
13
|
+
addDocuments(documents: DocumentChunk[]): Promise<void>;
|
|
14
|
+
search(query: string | number[], limit?: number, filter?: MetadataFilter): Promise<SearchResult[]>;
|
|
15
|
+
deleteDocuments(ids: string[]): Promise<void>;
|
|
16
|
+
updateDocument(id: string, update: Partial<DocumentChunk>): Promise<void>;
|
|
17
|
+
getDocument(id: string): Promise<DocumentChunk | null>;
|
|
18
|
+
clear(): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Calculate cosine similarity between two vectors
|
|
21
|
+
*/
|
|
22
|
+
private calculateSimilarity;
|
|
23
|
+
/**
|
|
24
|
+
* Calculate distance between two vectors
|
|
25
|
+
*/
|
|
26
|
+
private calculateDistance;
|
|
27
|
+
/**
|
|
28
|
+
* Cosine similarity
|
|
29
|
+
*/
|
|
30
|
+
private cosineSimilarity;
|
|
31
|
+
/**
|
|
32
|
+
* Dot product
|
|
33
|
+
*/
|
|
34
|
+
private dotProduct;
|
|
35
|
+
/**
|
|
36
|
+
* Euclidean distance
|
|
37
|
+
*/
|
|
38
|
+
private euclideanDistance;
|
|
39
|
+
/**
|
|
40
|
+
* Check if document matches metadata filter
|
|
41
|
+
*/
|
|
42
|
+
private matchesFilter;
|
|
43
|
+
/**
|
|
44
|
+
* Get store statistics
|
|
45
|
+
*/
|
|
46
|
+
getStats(): {
|
|
47
|
+
documentCount: number;
|
|
48
|
+
collectionName: string;
|
|
49
|
+
dimensions: number;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=in-memory.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"in-memory.d.ts","sourceRoot":"","sources":["../../../src/stores/vector/in-memory.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EACV,WAAW,EACX,iBAAiB,EACjB,aAAa,EACb,YAAY,EACZ,cAAc,EACf,MAAM,sBAAsB,CAAC;AAE9B;;GAEG;AACH,qBAAa,mBAAoB,YAAW,WAAW;IACrD,OAAO,CAAC,SAAS,CAAyC;IAC1D,OAAO,CAAC,MAAM,CAAoB;gBAEtB,MAAM,EAAE,iBAAiB;IAQ/B,YAAY,CAAC,SAAS,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAcvD,MAAM,CACV,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,EACxB,KAAK,SAAK,EACV,MAAM,CAAC,EAAE,cAAc,GACtB,OAAO,CAAC,YAAY,EAAE,CAAC;IA+BpB,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAM7C,cAAc,CAAC,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IASzE,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,IAAI,CAAC;IAItD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAY3B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAUzB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAexB;;OAEG;IACH,OAAO,CAAC,UAAU;IAQlB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IASzB;;OAEG;IACH,OAAO,CAAC,aAAa;IAyBrB;;OAEG;IACH,QAAQ,IAAI;QACV,aAAa,EAAE,MAAM,CAAC;QACtB,cAAc,EAAE,MAAM,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;KACpB;CAOF"}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory vector store implementation
|
|
3
|
+
* For development and testing - use ChromaDB/Pinecone/etc in production
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Simple in-memory vector store with cosine similarity search
|
|
7
|
+
*/
|
|
8
|
+
export class InMemoryVectorStore {
|
|
9
|
+
documents = new Map();
|
|
10
|
+
config;
|
|
11
|
+
constructor(config) {
|
|
12
|
+
this.config = {
|
|
13
|
+
collectionName: config.collectionName,
|
|
14
|
+
embeddingDimensions: config.embeddingDimensions,
|
|
15
|
+
distanceMetric: config.distanceMetric ?? 'cosine',
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
async addDocuments(documents) {
|
|
19
|
+
for (const doc of documents) {
|
|
20
|
+
if (!doc.embedding) {
|
|
21
|
+
throw new Error(`Document ${doc.id} is missing embedding vector`);
|
|
22
|
+
}
|
|
23
|
+
if (doc.embedding.length !== this.config.embeddingDimensions) {
|
|
24
|
+
throw new Error(`Embedding dimension mismatch: expected ${this.config.embeddingDimensions}, got ${doc.embedding.length}`);
|
|
25
|
+
}
|
|
26
|
+
this.documents.set(doc.id, doc);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
async search(query, limit = 10, filter) {
|
|
30
|
+
const queryVector = typeof query === 'string' ? null : query;
|
|
31
|
+
if (!queryVector) {
|
|
32
|
+
throw new Error('Query must be an embedding vector for in-memory store');
|
|
33
|
+
}
|
|
34
|
+
const results = [];
|
|
35
|
+
for (const [_id, doc] of this.documents) {
|
|
36
|
+
// Apply metadata filter if provided
|
|
37
|
+
if (filter && !this.matchesFilter(doc, filter)) {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
if (!doc.embedding) {
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
const score = this.calculateSimilarity(queryVector, doc.embedding);
|
|
44
|
+
const distance = this.calculateDistance(queryVector, doc.embedding);
|
|
45
|
+
results.push({ document: doc, score, distance });
|
|
46
|
+
}
|
|
47
|
+
// Sort by score (descending)
|
|
48
|
+
results.sort((a, b) => b.score - a.score);
|
|
49
|
+
return results.slice(0, limit);
|
|
50
|
+
}
|
|
51
|
+
async deleteDocuments(ids) {
|
|
52
|
+
for (const id of ids) {
|
|
53
|
+
this.documents.delete(id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async updateDocument(id, update) {
|
|
57
|
+
const existing = this.documents.get(id);
|
|
58
|
+
if (!existing) {
|
|
59
|
+
throw new Error(`Document ${id} not found`);
|
|
60
|
+
}
|
|
61
|
+
this.documents.set(id, { ...existing, ...update });
|
|
62
|
+
}
|
|
63
|
+
async getDocument(id) {
|
|
64
|
+
return this.documents.get(id) ?? null;
|
|
65
|
+
}
|
|
66
|
+
async clear() {
|
|
67
|
+
this.documents.clear();
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Calculate cosine similarity between two vectors
|
|
71
|
+
*/
|
|
72
|
+
calculateSimilarity(a, b) {
|
|
73
|
+
if (this.config.distanceMetric === 'cosine') {
|
|
74
|
+
return this.cosineSimilarity(a, b);
|
|
75
|
+
}
|
|
76
|
+
else if (this.config.distanceMetric === 'dot') {
|
|
77
|
+
return this.dotProduct(a, b);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
// For Euclidean, convert distance to similarity
|
|
81
|
+
const distance = this.euclideanDistance(a, b);
|
|
82
|
+
return 1 / (1 + distance);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Calculate distance between two vectors
|
|
87
|
+
*/
|
|
88
|
+
calculateDistance(a, b) {
|
|
89
|
+
if (this.config.distanceMetric === 'euclidean') {
|
|
90
|
+
return this.euclideanDistance(a, b);
|
|
91
|
+
}
|
|
92
|
+
else if (this.config.distanceMetric === 'cosine') {
|
|
93
|
+
return 1 - this.cosineSimilarity(a, b);
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
return -this.dotProduct(a, b);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Cosine similarity
|
|
101
|
+
*/
|
|
102
|
+
cosineSimilarity(a, b) {
|
|
103
|
+
let dotProduct = 0;
|
|
104
|
+
let magnitudeA = 0;
|
|
105
|
+
let magnitudeB = 0;
|
|
106
|
+
for (let i = 0; i < a.length; i++) {
|
|
107
|
+
dotProduct += a[i] * b[i];
|
|
108
|
+
magnitudeA += a[i] * a[i];
|
|
109
|
+
magnitudeB += b[i] * b[i];
|
|
110
|
+
}
|
|
111
|
+
const magnitude = Math.sqrt(magnitudeA) * Math.sqrt(magnitudeB);
|
|
112
|
+
return magnitude === 0 ? 0 : dotProduct / magnitude;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Dot product
|
|
116
|
+
*/
|
|
117
|
+
dotProduct(a, b) {
|
|
118
|
+
let sum = 0;
|
|
119
|
+
for (let i = 0; i < a.length; i++) {
|
|
120
|
+
sum += a[i] * b[i];
|
|
121
|
+
}
|
|
122
|
+
return sum;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Euclidean distance
|
|
126
|
+
*/
|
|
127
|
+
euclideanDistance(a, b) {
|
|
128
|
+
let sum = 0;
|
|
129
|
+
for (let i = 0; i < a.length; i++) {
|
|
130
|
+
const diff = a[i] - b[i];
|
|
131
|
+
sum += diff * diff;
|
|
132
|
+
}
|
|
133
|
+
return Math.sqrt(sum);
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Check if document matches metadata filter
|
|
137
|
+
*/
|
|
138
|
+
matchesFilter(doc, filter) {
|
|
139
|
+
const value = doc.metadata[filter.field];
|
|
140
|
+
switch (filter.operator) {
|
|
141
|
+
case 'eq':
|
|
142
|
+
return value === filter.value;
|
|
143
|
+
case 'ne':
|
|
144
|
+
return value !== filter.value;
|
|
145
|
+
case 'gt':
|
|
146
|
+
return typeof value === 'number' && value > filter.value;
|
|
147
|
+
case 'gte':
|
|
148
|
+
return typeof value === 'number' && value >= filter.value;
|
|
149
|
+
case 'lt':
|
|
150
|
+
return typeof value === 'number' && value < filter.value;
|
|
151
|
+
case 'lte':
|
|
152
|
+
return typeof value === 'number' && value <= filter.value;
|
|
153
|
+
case 'in':
|
|
154
|
+
return Array.isArray(filter.value) && filter.value.includes(value);
|
|
155
|
+
case 'nin':
|
|
156
|
+
return Array.isArray(filter.value) && !filter.value.includes(value);
|
|
157
|
+
default:
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get store statistics
|
|
163
|
+
*/
|
|
164
|
+
getStats() {
|
|
165
|
+
return {
|
|
166
|
+
documentCount: this.documents.size,
|
|
167
|
+
collectionName: this.config.collectionName,
|
|
168
|
+
dimensions: this.config.embeddingDimensions,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
//# sourceMappingURL=in-memory.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"in-memory.js","sourceRoot":"","sources":["../../../src/stores/vector/in-memory.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAUH;;GAEG;AACH,MAAM,OAAO,mBAAmB;IACtB,SAAS,GAA+B,IAAI,GAAG,EAAE,CAAC;IAClD,MAAM,CAAoB;IAElC,YAAY,MAAyB;QACnC,IAAI,CAAC,MAAM,GAAG;YACZ,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,mBAAmB,EAAE,MAAM,CAAC,mBAAmB;YAC/C,cAAc,EAAE,MAAM,CAAC,cAAc,IAAI,QAAQ;SAClD,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAA0B;QAC3C,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC;gBACnB,MAAM,IAAI,KAAK,CAAC,YAAY,GAAG,CAAC,EAAE,8BAA8B,CAAC,CAAC;YACpE,CAAC;YACD,IAAI,GAAG,CAAC,SAAS,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;gBAC7D,MAAM,IAAI,KAAK,CACb,0CAA0C,IAAI,CAAC,MAAM,CAAC,mBAAmB,SAAS,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,CACzG,CAAC;YACJ,CAAC;YACD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,MAAM,CACV,KAAwB,EACxB,KAAK,GAAG,EAAE,EACV,MAAuB;QAEvB,MAAM,WAAW,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC;QAE7D,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACxC,oCAAoC;YACpC,IAAI,MAAM,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,MAAM,CAAC,EAAE,CAAC;gBAC/C,SAAS;YACX,CAAC;YAED,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC;gBACnB,SAAS;YACX,CAAC;YAED,MAAM,KAAK,GAAG,IAAI,CAAC,mBAAmB,CAAC,WAAW,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;YACnE,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;YAEpE,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,6BAA6B;QAC7B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE1C,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,GAAa;QACjC,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,EAAU,EAAE,MAA8B;QAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;QAC9C,CAAC;QAED,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,GAAG,QAAQ,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,EAAU;QAC1B,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC;IACxC,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC;IACzB,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,CAAW,EAAE,CAAW;QAClD,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC,gBAAgB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACrC,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,KAAK,KAAK,EAAE,CAAC;YAChD,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC/B,CAAC;aAAM,CAAC;YACN,gDAAgD;YAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAC9C,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,CAAW,EAAE,CAAW;QAChD,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,KAAK,WAAW,EAAE,CAAC;YAC/C,OAAO,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACtC,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,CAAC,cAAc,KAAK,QAAQ,EAAE,CAAC;YACnD,OAAO,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,CAAW,EAAE,CAAW;QAC/C,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC1B,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC1B,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5B,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAChE,OAAO,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,SAAS,CAAC;IACtD,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,CAAW,EAAE,CAAW;QACzC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,CAAW,EAAE,CAAW;QAChD,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACzB,GAAG,IAAI,IAAI,GAAG,IAAI,CAAC;QACrB,CAAC;QACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,GAAkB,EAAE,MAAsB;QAC9D,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAEzC,QAAQ,MAAM,CAAC,QAAQ,EAAE,CAAC;YACxB,KAAK,IAAI;gBACP,OAAO,KAAK,KAAK,MAAM,CAAC,KAAK,CAAC;YAChC,KAAK,IAAI;gBACP,OAAO,KAAK,KAAK,MAAM,CAAC,KAAK,CAAC;YAChC,KAAK,IAAI;gBACP,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,GAAI,MAAM,CAAC,KAAgB,CAAC;YACvE,KAAK,KAAK;gBACR,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,IAAK,MAAM,CAAC,KAAgB,CAAC;YACxE,KAAK,IAAI;gBACP,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,GAAI,MAAM,CAAC,KAAgB,CAAC;YACvE,KAAK,KAAK;gBACR,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,IAAK,MAAM,CAAC,KAAgB,CAAC;YACxE,KAAK,IAAI;gBACP,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrE,KAAK,KAAK;gBACR,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACtE;gBACE,OAAO,KAAK,CAAC;QACjB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,QAAQ;QAKN,OAAO;YACL,aAAa,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI;YAClC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;YAC1C,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,mBAAmB;SAC5C,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/stores/vector/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAGrD,YAAY,EACV,WAAW,EACX,iBAAiB,EACjB,YAAY,EACZ,cAAc,GACf,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/stores/vector/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC"}
|