@meaningfully/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nvmrc +1 -0
- package/LICENSE +7 -0
- package/README.md +3 -0
- package/dist/DocumentSetManager.d.ts +28 -0
- package/dist/DocumentSetManager.d.ts.map +1 -0
- package/dist/DocumentSetManager.js +134 -0
- package/dist/DocumentSetManager.js.map +1 -0
- package/dist/Meaningfully.d.ts +52 -0
- package/dist/Meaningfully.d.ts.map +1 -0
- package/dist/Meaningfully.js +206 -0
- package/dist/Meaningfully.js.map +1 -0
- package/dist/MetadataManager.d.ts +32 -0
- package/dist/MetadataManager.d.ts.map +1 -0
- package/dist/MetadataManager.js +115 -0
- package/dist/MetadataManager.js.map +1 -0
- package/dist/api/embedding.d.ts +7 -0
- package/dist/api/embedding.d.ts.map +1 -0
- package/dist/api/embedding.js +94 -0
- package/dist/api/embedding.js.map +1 -0
- package/dist/api/embedding.test.d.ts +2 -0
- package/dist/api/embedding.test.d.ts.map +1 -0
- package/dist/api/embedding.test.js +340 -0
- package/dist/api/embedding.test.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/services/batchingWeaviateVectorStore.d.ts +6 -0
- package/dist/services/batchingWeaviateVectorStore.d.ts.map +1 -0
- package/dist/services/batchingWeaviateVectorStore.js +21 -0
- package/dist/services/batchingWeaviateVectorStore.js.map +1 -0
- package/dist/services/csvLoader.d.ts +3 -0
- package/dist/services/csvLoader.d.ts.map +1 -0
- package/dist/services/csvLoader.js +18 -0
- package/dist/services/csvLoader.js.map +1 -0
- package/dist/services/csvLoader.test.d.ts +2 -0
- package/dist/services/csvLoader.test.d.ts.map +1 -0
- package/dist/services/csvLoader.test.js +75 -0
- package/dist/services/csvLoader.test.js.map +1 -0
- package/dist/services/embeddings.d.ts +22 -0
- package/dist/services/embeddings.d.ts.map +1 -0
- package/dist/services/embeddings.js +314 -0
- package/dist/services/embeddings.js.map +1 -0
- package/dist/services/embeddings.test.d.ts +2 -0
- package/dist/services/embeddings.test.d.ts.map +1 -0
- package/dist/services/embeddings.test.js +115 -0
- package/dist/services/embeddings.test.js.map +1 -0
- package/dist/services/loggingOpenAIEmbedding.d.ts +2 -0
- package/dist/services/loggingOpenAIEmbedding.d.ts.map +1 -0
- package/dist/services/loggingOpenAIEmbedding.js +41 -0
- package/dist/services/loggingOpenAIEmbedding.js.map +1 -0
- package/dist/services/mockEmbedding.d.ts +6 -0
- package/dist/services/mockEmbedding.d.ts.map +1 -0
- package/dist/services/mockEmbedding.js +14 -0
- package/dist/services/mockEmbedding.js.map +1 -0
- package/dist/services/progressManager.d.ts +21 -0
- package/dist/services/progressManager.d.ts.map +1 -0
- package/dist/services/progressManager.js +76 -0
- package/dist/services/progressManager.js.map +1 -0
- package/dist/services/progressVectorStoreIndex.d.ts +21 -0
- package/dist/services/progressVectorStoreIndex.d.ts.map +1 -0
- package/dist/services/progressVectorStoreIndex.js +60 -0
- package/dist/services/progressVectorStoreIndex.js.map +1 -0
- package/dist/services/sentenceSplitter.d.ts +17 -0
- package/dist/services/sentenceSplitter.d.ts.map +1 -0
- package/dist/services/sentenceSplitter.js +207 -0
- package/dist/services/sentenceSplitter.js.map +1 -0
- package/dist/services/sentenceSplitter.test.d.ts +2 -0
- package/dist/services/sentenceSplitter.test.d.ts.map +1 -0
- package/dist/services/sentenceSplitter.test.js +68 -0
- package/dist/services/sentenceSplitter.test.js.map +1 -0
- package/dist/services/sploder.d.ts +13 -0
- package/dist/services/sploder.d.ts.map +1 -0
- package/dist/services/sploder.js +45 -0
- package/dist/services/sploder.js.map +1 -0
- package/dist/types/index.d.ts +77 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils.d.ts +3 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +7 -0
- package/dist/utils.js.map +1 -0
- package/package.json +43 -0
- package/src/Meaningfully.d.ts +57 -0
- package/src/Meaningfully.ts +228 -0
- package/src/MetadataManager.d.ts +27 -0
- package/src/MetadataManager.ts +145 -0
- package/src/api/embedding.d.ts +6 -0
- package/src/api/embedding.ts +122 -0
- package/src/index.ts +5 -0
- package/src/services/batchingWeaviateVectorStore.d.ts +5 -0
- package/src/services/batchingWeaviateVectorStore.ts +23 -0
- package/src/services/csvLoader.d.ts +2 -0
- package/src/services/csvLoader.ts +24 -0
- package/src/services/embeddings.d.ts +21 -0
- package/src/services/embeddings.ts +374 -0
- package/src/services/loggingOpenAIEmbedding.d.ts +0 -0
- package/src/services/loggingOpenAIEmbedding.ts +46 -0
- package/src/services/mockEmbedding.d.ts +5 -0
- package/src/services/mockEmbedding.ts +13 -0
- package/src/services/progressManager.d.ts +20 -0
- package/src/services/progressManager.ts +88 -0
- package/src/services/progressVectorStoreIndex.d.ts +20 -0
- package/src/services/progressVectorStoreIndex.ts +95 -0
- package/src/services/sentenceSplitter.d.ts +16 -0
- package/src/services/sentenceSplitter.ts +243 -0
- package/src/services/sploder.d.ts +12 -0
- package/src/services/sploder.ts +62 -0
- package/src/types/index.d.ts +71 -0
- package/src/types/index.ts +89 -0
- package/src/utils.d.ts +2 -0
- package/src/utils.ts +6 -0
- package/tests/MetadataManager.test.ts +120 -0
- package/tests/csvLoader.test.d.ts +1 -0
- package/tests/csvLoader.test.ts +88 -0
- package/tests/embedding.test.d.ts +1 -0
- package/tests/embedding.test.ts +425 -0
- package/tests/embeddings.test.d.ts +1 -0
- package/tests/embeddings.test.ts +144 -0
- package/tests/sentenceSplitter.test.d.ts +1 -0
- package/tests/sentenceSplitter.test.ts +81 -0
- package/tsconfig.json +31 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
//@ts-nocheck
|
|
2
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
import { loadDocumentsFromCsv } from './csvLoader';
|
|
5
|
+
import { Document } from 'llamaindex';
|
|
6
|
+
import Papa from 'papaparse';
|
|
7
|
+
vi.mock('fs');
|
|
8
|
+
vi.mock('papaparse');
|
|
9
|
+
describe('csvLoader.ts', () => {
|
|
10
|
+
describe('loadDocumentsFromCsv', () => {
|
|
11
|
+
it('should load documents from CSV and return Document instances', async () => {
|
|
12
|
+
const mockFileContent = 'text,metadata1,metadata2\ncontent1,meta1,meta2\ncontent2,meta3,meta4';
|
|
13
|
+
const mockParsedData = {
|
|
14
|
+
data: [
|
|
15
|
+
{ text: 'content1', metadata1: 'meta1', metadata2: 'meta2' },
|
|
16
|
+
{ text: 'content2', metadata1: 'meta3', metadata2: 'meta4' }
|
|
17
|
+
]
|
|
18
|
+
};
|
|
19
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
20
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
21
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
22
|
+
expect(remove_id(result)).toEqual(remove_id([
|
|
23
|
+
new Document({ text: 'content1', metadata: { metadata1: 'meta1', metadata2: 'meta2' } }),
|
|
24
|
+
new Document({ text: 'content2', metadata: { metadata1: 'meta3', metadata2: 'meta4' } })
|
|
25
|
+
]));
|
|
26
|
+
});
|
|
27
|
+
it('should handle empty CSV file', async () => {
|
|
28
|
+
const mockFileContent = '';
|
|
29
|
+
const mockParsedData = { data: [] };
|
|
30
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
31
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
32
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
33
|
+
expect(result).toEqual([]);
|
|
34
|
+
});
|
|
35
|
+
it('should handle missing text column', async () => {
|
|
36
|
+
const mockFileContent = 'metadata1,metadata2\nmeta1,meta2\nmeta3,meta4';
|
|
37
|
+
const mockParsedData = {
|
|
38
|
+
data: [
|
|
39
|
+
{ metadata1: 'meta1', metadata2: 'meta2' },
|
|
40
|
+
{ metadata1: 'meta3', metadata2: 'meta4' }
|
|
41
|
+
]
|
|
42
|
+
};
|
|
43
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
44
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
45
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
46
|
+
expect(remove_id(result)).toEqual(remove_id([
|
|
47
|
+
new Document({ text: undefined, metadata: { metadata1: 'meta1', metadata2: 'meta2' } }),
|
|
48
|
+
new Document({ text: undefined, metadata: { metadata1: 'meta3', metadata2: 'meta4' } })
|
|
49
|
+
]));
|
|
50
|
+
});
|
|
51
|
+
it('should handle null values in metadata', async () => {
|
|
52
|
+
const mockFileContent = 'text,metadata1,metadata2\ncontent1,,meta2\ncontent2,meta3,';
|
|
53
|
+
const mockParsedData = {
|
|
54
|
+
data: [
|
|
55
|
+
{ text: 'content1', metadata1: null, metadata2: 'meta2' },
|
|
56
|
+
{ text: 'content2', metadata1: 'meta3', metadata2: null }
|
|
57
|
+
]
|
|
58
|
+
};
|
|
59
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
60
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
61
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
62
|
+
expect(remove_id(result)).toEqual(remove_id([
|
|
63
|
+
new Document({ text: 'content1', metadata: { metadata1: '', metadata2: 'meta2' } }),
|
|
64
|
+
new Document({ text: 'content2', metadata: { metadata1: 'meta3', metadata2: '' } })
|
|
65
|
+
]));
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
function remove_id(list_of_documents) {
|
|
70
|
+
return list_of_documents.map((doc) => {
|
|
71
|
+
const { id_, ...doc_without_id } = doc;
|
|
72
|
+
return doc_without_id;
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=csvLoader.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csvLoader.test.js","sourceRoot":"","sources":["../../src/services/csvLoader.test.ts"],"names":[],"mappings":"AAAA,aAAa;AACb,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,IAAI,MAAM,WAAW,CAAC;AAG7B,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AAErB,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;QACpC,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;YAC5E,MAAM,eAAe,GAAG,sEAAsE,CAAC;YAC/F,MAAM,cAAc,GAAG;gBACrB,IAAI,EAAE;oBACJ,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;oBAC5D,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;iBAC7D;aACF,CAAC;YACF,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEjE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;gBAC1C,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;gBACxF,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;aACzF,CAAC,CAAC,CAAC;QACN,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,eAAe,GAAG,EAAE,CAAC;YAC3B,MAAM,cAAc,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;YACpC,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEjE,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,eAAe,GAAG,+CAA+C,CAAC;YACxE,MAAM,cAAc,GAAG;gBACrB,IAAI,EAAE;oBACJ,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;oBAC1C,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;iBAC3C;aACF,CAAC;YACF,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEjE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;gBAC1C,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;gBACvF,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;aACxF,CAAC,CAAC,CAAC;QACN,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACnD,MAAM,eAAe,GAAG,4DAA4D,CAAC;YACrF,MAAM,cAAc,GAAG;gBACnB,IAAI,EAAE;oBACN,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE;oBACzD,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE;iBACxD;aACJ,CAAC;YACF,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YACjE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;gBACxC,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;gBACnF,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,EAAE,EAAE,CAAC;aACtF,CAAC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,iBAAiB;IAClC,OAAO,iBAAiB,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACnC,MAAM,EAAE,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,GAAG,CAAC;QACvC,OAAO,cAAc,CAAC;IAC1B,CAAC,CAAC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Document, VectorStoreIndex, TextNode, type StorageContext } from "llamaindex";
|
|
2
|
+
import { OllamaEmbedding } from '@llamaindex/ollama';
|
|
3
|
+
import { MistralAIEmbedding } from '@llamaindex/mistral';
|
|
4
|
+
import { GeminiEmbedding } from '@llamaindex/google';
|
|
5
|
+
import { MockEmbedding } from "./mockEmbedding.js";
|
|
6
|
+
import type { EmbeddingConfig, Settings, MetadataFilter, Clients } from "../types/index.js";
|
|
7
|
+
import { OpenAIEmbedding } from "@llamaindex/openai";
|
|
8
|
+
import { ProgressVectorStoreIndex } from "./progressVectorStoreIndex.js";
|
|
9
|
+
export declare function estimateCost(nodes: TextNode[], modelName: string): {
|
|
10
|
+
estimatedPrice: number;
|
|
11
|
+
tokenCount: number;
|
|
12
|
+
pricePer1M: number;
|
|
13
|
+
};
|
|
14
|
+
export declare function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<VectorStoreIndex>;
|
|
15
|
+
export declare function getExistingDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
|
|
16
|
+
export declare function transformDocumentsToNodes(documents: Document[], config: EmbeddingConfig): Promise<TextNode<import("llamaindex").Metadata>[]>;
|
|
17
|
+
export declare function getEmbedModel(config: EmbeddingConfig, settings: Settings): OpenAIEmbedding | OllamaEmbedding | MistralAIEmbedding | GeminiEmbedding | MockEmbedding;
|
|
18
|
+
export declare function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext>;
|
|
19
|
+
export declare function persistDocuments(documents: Document[], config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<void>;
|
|
20
|
+
export declare function persistNodes(nodes: TextNode[], config: EmbeddingConfig, settings: Settings, clients: Clients, progressCallback?: (progress: number, total: number) => void): Promise<ProgressVectorStoreIndex>;
|
|
21
|
+
export declare function searchDocuments(index: VectorStoreIndex, query: string, numResults?: number, filters?: MetadataFilter[]): Promise<import("llamaindex").NodeWithScore<import("llamaindex").Metadata>[]>;
|
|
22
|
+
//# sourceMappingURL=embeddings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/services/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,gBAAgB,EAIhB,QAAQ,EAKR,KAAK,cAAc,EAGpB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,eAAe,EAAC,MAAM,oBAAoB,CAAA;AACnD,OAAO,EAAE,kBAAkB,EAA+B,MAAM,qBAAqB,CAAA;AACrF,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AAKpD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD,OAAO,KAAK,EAAE,eAAe,EAAE,QAAQ,EAAE,cAAc,EAAE,OAAO,EAAG,MAAM,mBAAmB,CAAC;AAG7F,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAuCzE,wBAAgB,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG;IAClE,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,CAuBA;AAED,wBAAsB,2BAA2B,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,6BAqD9G;AAED,wBAAsB,mBAAmB,CAAC,MAAM,EAAE,eAAe,mDAehE;AAID,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,QAAQ,EAAE,EACrB,MAAM,EAAE,eAAe,sDA0BxB;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,eAAe,EACvB,QAAQ,EAAE,QAAQ,4FA2CnB;AAED,wBAAsB,iBAAiB,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,cAAc,CAAC,CAiB9H;AAED,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAW1I;AAED,wBAAsB,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAqCpN;AAyCD,wBAAsB,eAAe,CACnC,KAAK,EAAE,gBAAgB,EACvB,KAAK,EAAE,MAAM,EACb,UAAU,GAAE,MAAW,EACvB,OAAO,CAAC,EAAE,cAAc,EAAE,gFAU3B"}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import { VectorStoreIndex,
|
|
2
|
+
// OpenAIEmbedding,
|
|
3
|
+
IngestionPipeline, ModalityType, storageContextFromDefaults, SimpleVectorStore, Settings as LlamaindexSettings, SimpleDocumentStore } from "llamaindex";
|
|
4
|
+
import { OllamaEmbedding } from '@llamaindex/ollama';
|
|
5
|
+
import { MistralAIEmbedding, MistralAIEmbeddingModelType } from '@llamaindex/mistral';
|
|
6
|
+
import { GeminiEmbedding } from '@llamaindex/google';
|
|
7
|
+
import { PGVectorStore } from '@llamaindex/postgres';
|
|
8
|
+
import { AzureOpenAIEmbedding } from "@llamaindex/azure";
|
|
9
|
+
import { Sploder } from "./sploder.js";
|
|
10
|
+
import { CustomSentenceSplitter } from "./sentenceSplitter.js";
|
|
11
|
+
import { MockEmbedding } from "./mockEmbedding.js";
|
|
12
|
+
import { encodingForModel } from "js-tiktoken";
|
|
13
|
+
import { join } from "path";
|
|
14
|
+
import { sanitizeProjectName, capitalizeFirstLetter } from "../utils.js";
|
|
15
|
+
import * as fs from 'fs';
|
|
16
|
+
import { OpenAIEmbedding } from "@llamaindex/openai";
|
|
17
|
+
import { BatchingWeaviateVectorStore } from "./batchingWeaviateVectorStore.js";
|
|
18
|
+
import { ProgressVectorStoreIndex } from "./progressVectorStoreIndex.js";
|
|
19
|
+
// unused, but probalby eventually will be used.
|
|
20
|
+
// to be used by postgres store, which it' slooking increasingly like I have to enable again
|
|
21
|
+
const MODEL_DIMENSIONS = {
|
|
22
|
+
"text-embedding-3-small": 1536,
|
|
23
|
+
"text-embedding-3-large": 3072,
|
|
24
|
+
"mxbai-embed-large": 1024,
|
|
25
|
+
"mistral-embed": 1024,
|
|
26
|
+
"gemini-embedding-001": 768, // Gemini embedding model
|
|
27
|
+
};
|
|
28
|
+
const PRICE_PER_1M = {
|
|
29
|
+
"text-embedding-3-small": 0.02,
|
|
30
|
+
"text-embedding-3-large": 0.13,
|
|
31
|
+
"mistral-embed": 0.1,
|
|
32
|
+
"mxbai-embed-large": 0, // local model, free
|
|
33
|
+
"nomic-embed-text": 0, // local model, free
|
|
34
|
+
"gemini-embedding-001": 0.0, // Gemini embedding is currently free (unless you're on the paid tier, in which case it is $0.15/million tokens)
|
|
35
|
+
};
|
|
36
|
+
/* all transformations except the embedding step (which is handled by VectorStoreIndex.init) */
|
|
37
|
+
function getBaseTransformations(config) {
|
|
38
|
+
const transformations = [
|
|
39
|
+
new CustomSentenceSplitter({ chunkSize: config.chunkSize, chunkOverlap: config.chunkOverlap }),
|
|
40
|
+
];
|
|
41
|
+
if (config.combineSentencesIntoChunks) {
|
|
42
|
+
transformations.push(new Sploder({
|
|
43
|
+
maxStringTokenCount: config.sploderMaxSize
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
return transformations;
|
|
47
|
+
}
|
|
48
|
+
export function estimateCost(nodes, modelName) {
|
|
49
|
+
const pricePer1M = PRICE_PER_1M[modelName] || 0; // default to 0 if model not found or free
|
|
50
|
+
let tokenizer;
|
|
51
|
+
try {
|
|
52
|
+
tokenizer = encodingForModel(modelName); // This doesn't work for ollama
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
// If the tokenizer is not found, it means the model is likely not supported by tiktoken
|
|
56
|
+
// or is a local model (like Ollama). In this case, we can't estimate the cost.
|
|
57
|
+
tokenizer = encodingForModel("text-embedding-3-small"); // fallback to a known tokenizer
|
|
58
|
+
console.warn(`Tokenizer for model ${modelName} not found. Using fallback tokenizer.`);
|
|
59
|
+
}
|
|
60
|
+
const tokenCount = nodes.reduce((sum, node) => {
|
|
61
|
+
return sum + tokenizer.encode(node.text).length;
|
|
62
|
+
}, 0);
|
|
63
|
+
const estimatedPrice = tokenCount * (pricePer1M / 1_000_000);
|
|
64
|
+
return {
|
|
65
|
+
estimatedPrice,
|
|
66
|
+
tokenCount,
|
|
67
|
+
pricePer1M
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
export async function getExistingVectorStoreIndex(config, settings, clients) {
|
|
71
|
+
const embedModel = getEmbedModel(config, settings);
|
|
72
|
+
switch (config.vectorStoreType) {
|
|
73
|
+
case "simple":
|
|
74
|
+
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
|
|
75
|
+
const storageContext = await storageContextFromDefaults({
|
|
76
|
+
persistDir: persistDir,
|
|
77
|
+
});
|
|
78
|
+
let vsi = await VectorStoreIndex.init({
|
|
79
|
+
storageContext: storageContext,
|
|
80
|
+
});
|
|
81
|
+
vsi.embedModel = embedModel;
|
|
82
|
+
return vsi;
|
|
83
|
+
case "postgres":
|
|
84
|
+
if (!clients.postgresClient) {
|
|
85
|
+
throw new Error("Postgres client required but not provided");
|
|
86
|
+
}
|
|
87
|
+
const pgStore = new PGVectorStore({
|
|
88
|
+
clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
|
|
89
|
+
tableName: sanitizeProjectName(config.projectName),
|
|
90
|
+
dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
|
|
91
|
+
embeddingModel: embedModel
|
|
92
|
+
});
|
|
93
|
+
const pgStorageContext = await storageContextFromDefaults({
|
|
94
|
+
vectorStores: { [ModalityType.TEXT]: pgStore },
|
|
95
|
+
});
|
|
96
|
+
return await VectorStoreIndex.init({
|
|
97
|
+
storageContext: pgStorageContext,
|
|
98
|
+
});
|
|
99
|
+
case "weaviate":
|
|
100
|
+
if (!clients.weaviateClient) {
|
|
101
|
+
throw new Error("Weaviate client required but not provided");
|
|
102
|
+
}
|
|
103
|
+
const weaviateStore = new BatchingWeaviateVectorStore({
|
|
104
|
+
indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
|
|
105
|
+
weaviateClient: clients.weaviateClient,
|
|
106
|
+
embeddingModel: embedModel
|
|
107
|
+
});
|
|
108
|
+
// WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
|
|
109
|
+
// (WeaviateVectorStore would get `score` if we were doing hybrid search)
|
|
110
|
+
// Overwrite the private getNodeSimilarity method to use 'score' from metadata
|
|
111
|
+
// @ts-ignore
|
|
112
|
+
weaviateStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
|
|
113
|
+
return entry.metadata.score;
|
|
114
|
+
};
|
|
115
|
+
return await VectorStoreIndex.fromVectorStore(weaviateStore);
|
|
116
|
+
default:
|
|
117
|
+
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
export async function getExistingDocStore(config) {
|
|
121
|
+
// switch (config.vectorStoreType) {
|
|
122
|
+
// case "simple":
|
|
123
|
+
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
|
|
124
|
+
const storageContext = await storageContextFromDefaults({
|
|
125
|
+
persistDir: persistDir,
|
|
126
|
+
});
|
|
127
|
+
return storageContext.docStore;
|
|
128
|
+
// case "postgres":
|
|
129
|
+
// throw new Error(`Not yet implemented vector store type: ${config.vectorStoreType}`);
|
|
130
|
+
// // return await createVectorStore(config);
|
|
131
|
+
// default:
|
|
132
|
+
// throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
133
|
+
// }
|
|
134
|
+
}
|
|
135
|
+
export async function transformDocumentsToNodes(documents, config) {
|
|
136
|
+
console.time("transformDocumentsToNodes Run Time");
|
|
137
|
+
const transformations = getBaseTransformations(config);
|
|
138
|
+
// llama-index stupidly includes all the metadata in the embedding, which is a waste of tokens
|
|
139
|
+
// so we exclude everything except the text column from the embedding
|
|
140
|
+
for (const document of documents) {
|
|
141
|
+
document.excludedEmbedMetadataKeys = Object.keys(document.metadata);
|
|
142
|
+
}
|
|
143
|
+
console.time("transformDocumentsToNodes transformDocuments Run Time");
|
|
144
|
+
// remove empty documents. we can't meaningfully embed these, so we're just gonna ignore 'em.
|
|
145
|
+
// that might not ultimately be the right solution.
|
|
146
|
+
documents = documents.filter((document_) => document_.text && document_.text.length > 0);
|
|
147
|
+
// Create nodes with sentence splitting and optional sploder
|
|
148
|
+
const pipeline = new IngestionPipeline({
|
|
149
|
+
transformations
|
|
150
|
+
});
|
|
151
|
+
const nodes = (await pipeline.run({ documents: documents }));
|
|
152
|
+
console.timeEnd("transformDocumentsToNodes transformDocuments Run Time");
|
|
153
|
+
console.timeEnd("transformDocumentsToNodes Run Time");
|
|
154
|
+
return nodes;
|
|
155
|
+
}
|
|
156
|
+
export function getEmbedModel(config, settings) {
|
|
157
|
+
let embedModel;
|
|
158
|
+
if (config.modelProvider === "openai") {
|
|
159
|
+
embedModel = new OpenAIEmbedding({ model: config.modelName, apiKey: settings.openAIKey ? settings.openAIKey : undefined });
|
|
160
|
+
embedModel.embedBatchSize = 50; // all embedding models enforce a maximum of 300,000 tokens summed across all inputs in a single request
|
|
161
|
+
}
|
|
162
|
+
else if (config.modelProvider === "ollama") {
|
|
163
|
+
embedModel = new OllamaEmbedding({ model: config.modelName, config: {
|
|
164
|
+
host: settings.oLlamaBaseURL ? settings.oLlamaBaseURL : undefined
|
|
165
|
+
}, });
|
|
166
|
+
}
|
|
167
|
+
else if (config.modelProvider === "azure") {
|
|
168
|
+
if (!settings.azureOpenAIKey || !settings.azureOpenAIEndpoint) {
|
|
169
|
+
throw new Error("Azure OpenAI API key and endpoint are required for Azure embedding models");
|
|
170
|
+
}
|
|
171
|
+
embedModel = new AzureOpenAIEmbedding({
|
|
172
|
+
model: config.modelName,
|
|
173
|
+
apiKey: settings.azureOpenAIKey,
|
|
174
|
+
endpoint: settings.azureOpenAIEndpoint,
|
|
175
|
+
apiVersion: settings.azureOpenAIApiVersion ?? undefined
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
else if (config.modelProvider === "mistral") {
|
|
179
|
+
if (!settings.mistralApiKey) {
|
|
180
|
+
throw new Error("Mistral API key is required for Mistral embedding models");
|
|
181
|
+
}
|
|
182
|
+
embedModel = new MistralAIEmbedding({
|
|
183
|
+
model: MistralAIEmbeddingModelType.MISTRAL_EMBED, // only one choice!
|
|
184
|
+
apiKey: settings.mistralApiKey
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
else if (config.modelProvider === "gemini") {
|
|
188
|
+
if (!settings.geminiApiKey) {
|
|
189
|
+
throw new Error("Gemini API key is required for Gemini embedding models");
|
|
190
|
+
}
|
|
191
|
+
embedModel = new GeminiEmbedding({
|
|
192
|
+
apiKey: settings.geminiApiKey,
|
|
193
|
+
});
|
|
194
|
+
embedModel.embedBatchSize = 50;
|
|
195
|
+
}
|
|
196
|
+
else if (config.modelProvider === "mock") {
|
|
197
|
+
embedModel = new MockEmbedding();
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
throw new Error(`Unsupported embedding model provider: ${config.modelProvider}`);
|
|
201
|
+
}
|
|
202
|
+
LlamaindexSettings.embedModel = embedModel;
|
|
203
|
+
return embedModel;
|
|
204
|
+
}
|
|
205
|
+
export async function getStorageContext(config, settings, clients) {
|
|
206
|
+
const vectorStore = await createVectorStore(config, settings, clients);
|
|
207
|
+
fs.mkdirSync(config.storagePath, { recursive: true });
|
|
208
|
+
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
|
|
209
|
+
return await storageContextFromDefaults({
|
|
210
|
+
persistDir: persistDir,
|
|
211
|
+
vectorStores: { [ModalityType.TEXT]: vectorStore },
|
|
212
|
+
docStore: new SimpleDocumentStore()
|
|
213
|
+
/*
|
|
214
|
+
if docStore is created with a persist path (as it is by default in storageContextFromDefaults)
|
|
215
|
+
then it will write to disk after every put(), which happens 2+ times per document.
|
|
216
|
+
|
|
217
|
+
so we create it without a persist path, and then explicitly persist it when we're done adding documents.
|
|
218
|
+
|
|
219
|
+
see https://github.com/jeremybmerrill/meaningfully/issues/52
|
|
220
|
+
*/
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
export async function persistDocuments(documents, config, settings, clients) {
|
|
224
|
+
console.time("persistDocuments Run Time");
|
|
225
|
+
const storageContext = await getStorageContext(config, settings, clients);
|
|
226
|
+
await storageContext.docStore.addDocuments(documents, true);
|
|
227
|
+
// see comments in getStorageContext
|
|
228
|
+
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
|
|
229
|
+
// @ts-ignore
|
|
230
|
+
await storageContext.docStore.kvStore.persist(join(persistDir, "doc_store.json"));
|
|
231
|
+
console.timeEnd("persistDocuments Run Time");
|
|
232
|
+
}
|
|
233
|
+
export async function persistNodes(nodes, config, settings, clients, progressCallback) {
|
|
234
|
+
// Create and configure vector store based on type
|
|
235
|
+
console.time("persistNodes Run Time");
|
|
236
|
+
const storageContext = await getStorageContext(config, settings, clients);
|
|
237
|
+
const vectorStore = storageContext.vectorStores[ModalityType.TEXT];
|
|
238
|
+
if (!vectorStore) {
|
|
239
|
+
throw new Error("Vector store is undefined");
|
|
240
|
+
}
|
|
241
|
+
// Create index and embed documents
|
|
242
|
+
// this is what actaully embeds the nodes
|
|
243
|
+
// (even if they already have embeddings, stupidly)
|
|
244
|
+
const index = await ProgressVectorStoreIndex.init({
|
|
245
|
+
nodes,
|
|
246
|
+
storageContext,
|
|
247
|
+
logProgress: true,
|
|
248
|
+
progressCallback,
|
|
249
|
+
});
|
|
250
|
+
// I'm not sure why this explicit call to persist is necessary.
|
|
251
|
+
// storageContext should handle this, but it doesn't.
|
|
252
|
+
// all the if statements are just type-checking boilerplate.
|
|
253
|
+
// N.B. WeaviateVectorStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
|
|
254
|
+
if (vectorStore) {
|
|
255
|
+
if (vectorStore instanceof PGVectorStore || vectorStore instanceof SimpleVectorStore) {
|
|
256
|
+
await vectorStore.persist(join(config.storagePath, sanitizeProjectName(config.projectName), "vector_store.json"));
|
|
257
|
+
}
|
|
258
|
+
else if (vectorStore instanceof BatchingWeaviateVectorStore) {
|
|
259
|
+
// WeaviateVectorStore does not have a persist method, it persists automatically
|
|
260
|
+
console.log("Pretending to persist Weaviate vector store, but it actually persists automatically.");
|
|
261
|
+
}
|
|
262
|
+
else {
|
|
263
|
+
throw new Error("Vector store does not support persist method");
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
else {
|
|
267
|
+
throw new Error("Vector store is undefined");
|
|
268
|
+
}
|
|
269
|
+
console.timeEnd("persistNodes Run Time");
|
|
270
|
+
return index;
|
|
271
|
+
}
|
|
272
|
+
async function createVectorStore(config, settings, clients) {
|
|
273
|
+
const embeddingModel = getEmbedModel(config, settings);
|
|
274
|
+
switch (config.vectorStoreType) {
|
|
275
|
+
// for some reason the embedding model has to be specified here TOO
|
|
276
|
+
// otherwise it defaults to Ada.
|
|
277
|
+
case "postgres":
|
|
278
|
+
return new PGVectorStore({
|
|
279
|
+
clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
|
|
280
|
+
tableName: sanitizeProjectName(config.projectName),
|
|
281
|
+
dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
|
|
282
|
+
embeddingModel: embeddingModel
|
|
283
|
+
});
|
|
284
|
+
case "simple":
|
|
285
|
+
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
|
|
286
|
+
return SimpleVectorStore.fromPersistDir(persistDir, embeddingModel);
|
|
287
|
+
case "weaviate":
|
|
288
|
+
const vectorStore = new BatchingWeaviateVectorStore({
|
|
289
|
+
indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
|
|
290
|
+
weaviateClient: clients.weaviateClient,
|
|
291
|
+
embeddingModel: embeddingModel
|
|
292
|
+
});
|
|
293
|
+
// WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
|
|
294
|
+
// (WeaviateVectorStore would get `score` if we were doing hybrid search)
|
|
295
|
+
// Overwrite the private getNodeSimilarity method to use 'score' from metadata
|
|
296
|
+
// @ts-ignore
|
|
297
|
+
vectorStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
|
|
298
|
+
return entry.metadata.score;
|
|
299
|
+
};
|
|
300
|
+
return vectorStore;
|
|
301
|
+
default:
|
|
302
|
+
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
export async function searchDocuments(index, query, numResults = 10, filters) {
|
|
306
|
+
// const metadataFilters: MetadataFilters | undefined = filters ? {filters: filters} : undefined;
|
|
307
|
+
const metadataFilters = {
|
|
308
|
+
filters: filters ? filters : [],
|
|
309
|
+
};
|
|
310
|
+
const retriever = index.asRetriever({ similarityTopK: numResults, filters: metadataFilters });
|
|
311
|
+
const results = await retriever.retrieve(query);
|
|
312
|
+
return results;
|
|
313
|
+
}
|
|
314
|
+
//# sourceMappingURL=embeddings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/services/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,gBAAgB;AAChB,mBAAmB;AACnB,iBAAiB,EAGjB,YAAY,EAEZ,0BAA0B,EAC1B,iBAAiB,EAEjB,QAAQ,IAAI,kBAAkB,EAC9B,mBAAmB,EACpB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,eAAe,EAAC,MAAM,oBAAoB,CAAA;AACnD,OAAO,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,gBAAgB,EAAsB,MAAM,aAAa,CAAC;AACnE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AACzE,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,2BAA2B,EAAE,MAAM,kCAAkC,CAAC;AAC/E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,gDAAgD;AAChD,4FAA4F;AAC5F,MAAM,gBAAgB,GAA2B;IAC/C,wBAAwB,EAAE,IAAI;IAC9B,wBAAwB,EAAE,IAAI;IAC9B,mBAAmB,EAAE,IAAI;IACzB,eAAe,EAAE,IAAI;IACrB,sBAAsB,EAAE,GAAG,EAAE,yBAAyB;CACvD,CAAC;AAEF,MAAM,YAAY,GAA2B;IAC3C,wBAAwB,EAAE,IAAI;IAC9B,wBAAwB,EAAE,IAAI;IAC9B,eAAe,EAAE,GAAG;IACpB,mBAAmB,EAAE,CAAC,EAAE,oBAAoB;IAC5C,kBAAkB,EAAE,CAAC,EAAE,oBAAoB;IAC3C,sBAAsB,EAAE,GAAG,EAAE,gHAAgH;CAC9I,CAAC;AAGF,+FAA+F;AAC/F,SAAS,sBAAsB,CAAC,MAAuB;IACrD,MAAM,eAAe,GAAyB;QAC5C,IAAI,sBAAsB,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC;KAC/F,CAAC;IAEF,IAAI,MAAM,CAAC,0BAA0B,EAAE,CAAC;QACtC,eAAe,CAAC,IAAI,CAClB,IAAI,OAAO,CAAC;YACV,mBAAmB,EAAE,MAAM,CAAC,cAAc;SAC3C,CAAC,CACH,CAAC;IACJ,CAAC;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,KAAiB,EAAE,SAAiB;IAK/D,MAAM,UAAU,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,0CAA0C;IAE3F,IAAI,SAAS,CAAC;IACd,IAAG,CAAC;QACF,SAAS,GAAG,gBAAgB,CAAC,SAA0B,CAAC,CAAC,CAAC,+BAA+B;IAC3F,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wFAAwF;QACxF,+EAA+E;QAC/E,SAAS,GAAG,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,CAAC,gCAAgC;QACxF,OAAO,CAAC,IAAI,CAAC,uBAAuB,SAAS,uCAAuC,CAAC,CAAC;IACxF,CAAC;IACD,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE;QAC5C,OAAO,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IAClD,CAAC,EAAE,CAAC,CAAC,CAAC;IAEN,MAAM,cAAc,GAAG,UAAU,GAAG,CAAC,UAAU,GAAG,SAAS,CAAC,CAAC;IAE7D,OAAO;QACL,cAAc;QACd,UAAU;QACV,UAAU;KACX,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IAC7G,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IACnD,QAAQ,MAAM,CAAC,eAAe,EAAE,CAAC;QAC/B,KAAK,QAAQ;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;YACrF,MAAM,cAAc,GAAG,MAAM,0BAA0B,CAAC;gBACtD,UAAU,EAAE,UAAU;aACvB,CAAC,CAAC;YACH,IAAI,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC;gBACpC,cAAc,EAAE,cAAc;aAC/B,CAAC,CAAC;YACH,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC;YAC5B,OAAO,GAAG,CAAC;QAEb,KAAK,UAAU;YACb,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,OAAO,GAAG,IAAI,aAAa,CAAC;gBAChC,YAAY,EAAE,EAAE,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,0BAA0B,EAAE;gBAC1E,SAAS,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC;gBAClD,UAAU,EAAE,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,qCAAqC;gBAC7F,cAAc,EAAE,UAAU;aAC3B,CAAC,CAAC;YACH,MAAM,gBAAgB,GAAG,MAAM,0BAA0B,CAAC;gBACxD,YAAY,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE;aAC/C,CAAC,CAAC;YACH,OAAO,MAAM,gBAAgB,CAAC,IAAI,CAAC;gBACjC,cAAc,EAAE,gBAAgB;aACjC,CAAC,CAAC;QACL,KAAK,UAAU;YACb,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,aAAa,GAAG,IAAI,2BAA2B,CAAC;gBACpD,SAAS,EAAE,qBAAqB,CAAC,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;gBACzE,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,cAAc,EAAE,UAAU;aAC3B,CAAC,CAAC;YAEH,yGAAyG;YACzG,yEAAyE;YACzE,8EAA8E;YAC9E,aAAa;YACb,aAAa,CAAC,iBAAiB,GAAG,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,EAAE,EAAE;gBACpE,OAAQ,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;YAC/B,CAAC,CAAA;YAED,OAAO,MAAM,gBAAgB,CAAC,eAAe,CAAC,aAAa,CAAC,CAAA;QAE9D;YACE,MAAM,IAAI,KAAK,CAAC,kCAAkC,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,MAAuB;IAC/D,oCAAoC;IACpC,mBAAmB;IACf,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAE,CAAC;IACtF,MAAM,cAAc,GAAG,MAAM,0BAA0B,CAAC;QACtD,UAAU,EAAE,UAAU;KACvB,CAAC,CAAC;IACH,OAAO,cAAc,CAAC,QAAQ,CAAC;IAEnC,qBAAqB;IACrB,2FAA2F;IAC3F,iDAAiD;IACjD,aAAa;IACb,mFAAmF;IACnF,IAAI;AACN,CAAC;AAID,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,SAAqB,EACrB,MAAuB;IAEvB,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAEnD,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEvD,8FAA8F;IAC9F,qEAAqE;IACrE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,QAAQ,CAAC,yBAAyB,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACtE,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;IACtE,6FAA6F;IAC7F,oDAAoD;IACpD,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEzF,4DAA4D;IAC5D,MAAM,QAAQ,GAAG,IAAI,iBAAiB,CAAC;QACrC,eAAe;KAChB,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAAC,GAAG,CAAC,EAAC,SAAS,EAAE,SAAS,EAAC,CAAC,CAAe,CAAC;IAEzE,OAAO,CAAC,OAAO,CAAC,uDAAuD,CAAC,CAAC;IACzE,OAAO,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC;IACtD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,MAAuB,EACvB,QAAkB;IAElB,IAAI,UAAU,CAAC;IACf,IAAI,MAAM,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;QACtC,UAAU,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,EAAC,CAAE,CAAC;QAC3H,UAAU,CAAC,cAAc,GAAG,EAAE,CAAC,CAAC,wGAAwG;IAC1I,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;QAC7C,UAAU,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE;gBAClE,IAAI,EAAE,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;aAClE,GAAG,CAAC,CAAC;IACR,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,OAAO,EAAE,CAAC;QAC5C,IAAI,CAAC,QAAQ,CAAC,cAAc,IAAI,CAAC,QAAQ,CAAC,mBAAmB,EAAE,CAAC;YAC9D,MAAM,IAAI,KAAK,CAAC,2EAA2E,CAAC,CAAC;QAC/F,CAAC;QACD,UAAU,GAAG,IAAI,oBAAoB,CAAC;YACpC,KAAK,EAAE,MAAM,CAAC,SAAS;YACvB,MAAM,EAAE,QAAQ,CAAC,cAAc;YAC/B,QAAQ,EAAE,QAAQ,CAAC,mBAAmB;YACtC,UAAU,EAAE,QAAQ,CAAC,qBAAqB,IAAI,SAAS;SACxD,CAAC,CAAC;IACL,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,SAAS,EAAE,CAAC;QAC9C,IAAI,CAAC,QAAQ,CAAC,aAAa,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;QAC9E,CAAC;QACD,UAAU,GAAG,IAAI,kBAAkB,CAAC;YAClC,KAAK,EAAE,2BAA2B,CAAC,aAAa,EAAE,mBAAmB;YACrE,MAAM,EAAE,QAAQ,CAAC,aAAa;SAC/B,CAAC,CAAC;IACL,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;QAC7C,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QACD,UAAU,GAAG,IAAI,eAAe,CAAC;YAC/B,MAAM,EAAE,QAAQ,CAAC,YAAY;SAC9B,CAAC,CAAC;QACH,UAAU,CAAC,cAAc,GAAG,EAAE,CAAC;IACjC,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,MAAM,EAAE,CAAC;QAC3C,UAAU,GAAG,IAAI,aAAa,EAAE,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,yCAAyC,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC;IACnF,CAAC;IACD,kBAAkB,CAAC,UAAU,GAAG,UAAU,CAAC;IAC3C,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IACnG,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IACvE,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACtD,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAE,CAAC;IACtF,OAAO,MAAM,0BAA0B,CAAC;QACtC,UAAU,EAAE,UAAU;QACtB,YAAY,EAAE,EAAC,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,WAAW,EAAC;QAChD,QAAQ,EAAE,IAAI,mBAAmB,EAAE;QACjC;;;;;;;UAOE;KACL,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,SAAqB,EAAE,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IACzH,OAAO,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC1E,MAAM,cAAc,CAAC,QAAQ,CAAC,YAAY,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE5D,oCAAoC;IACpC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAE,CAAC;IACtF,aAAa;IACb,MAAO,cAAc,CAAC,QAAgC,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAC,CAAC;IAE3G,OAAO,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC;AAC/C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAiB,EAAE,MAAuB,EAAE,QAAkB,EAAE,OAAgB,EAAE,gBAA4D;IAC/K,kDAAkD;IAClD,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAEtC,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC1E,MAAM,WAAW,GAAG,cAAc,CAAC,YAAY,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IACD,mCAAmC;IACnC,yCAAyC;IACzC,mDAAmD;IACnD,MAAM,KAAK,GAAG,MAAM,wBAAwB,CAAC,IAAI,CAAC;QAChD,KAAK;QACL,cAAc;QACd,WAAW,EAAE,IAAI;QACjB,gBAAgB;KACjB,CAAC,CAAC;IAEH,gEAAgE;IAChE,qDAAqD;IACrD,4DAA4D;IAC5D,yHAAyH;IACzH,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,WAAW,YAAY,aAAa,IAAI,WAAW,YAAY,iBAAiB,EAAE,CAAC;YACrF,MAAM,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACpH,CAAC;aAAM,IAAI,WAAW,YAAY,2BAA2B,EAAE,CAAC;YAC9D,gFAAgF;YAChF,OAAO,CAAC,GAAG,CAAC,sFAAsF,CAAC,CAAC;QACtG,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC;AACf,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IAC5F,MAAM,cAAc,GAAG,aAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IACvD,QAAQ,MAAM,CAAC,eAAe,EAAE,CAAC;QAE/B,mEAAmE;QACnE,gCAAgC;QAChC,KAAK,UAAU;YACb,OAAO,IAAI,aAAa,CAAC;gBACvB,YAAY,EAAE,EAAC,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,0BAA0B,EAAC;gBACxE,SAAS,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC;gBAClD,UAAU,EAAE,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,qCAAqC;gBAC7F,cAAc,EAAE,cAAc;aAC/B,CAAC,CAAC;QAEL,KAAK,QAAQ;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;YACrF,OAAO,iBAAiB,CAAC,cAAc,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;QAEtE,KAAK,UAAU;YACb,MAAM,WAAW,GAAG,IAAI,2BAA2B,CAAC;gBAClD,SAAS,EAAE,qBAAqB,CAAC,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;gBACzE,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,cAAc,EAAE,cAAc;aAC/B,CAAC,CAAC;YAEH,yGAAyG;YACzG,yEAAyE;YACzE,8EAA8E;YAC9E,aAAa;YACb,WAAW,CAAC,iBAAiB,GAAG,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,EAAE,EAAE;gBAClE,OAAQ,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;YAC/B,CAAC,CAAA;YAED,OAAO,WAAW,CAAC;QACrB;YACE,MAAM,IAAI,KAAK,CAAC,kCAAkC,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAuB,EACvB,KAAa,EACb,aAAqB,EAAE,EACvB,OAA0B;IAE1B,iGAAiG;IACjG,MAAM,eAAe,GAAoB;QACvC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;KAChC,CAAC;IACF,MAAM,SAAS,GAAG,KAAK,CAAC,WAAW,CAAC,EAAE,cAAc,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAC;IAE9F,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAE,CAAC;IACjD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.test.d.ts","sourceRoot":"","sources":["../../src/services/embeddings.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
//@ts-nocheck
|
|
2
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
3
|
+
import { Document, TextNode } from 'llamaindex';
|
|
4
|
+
// First, set up the mock before importing the module
|
|
5
|
+
vi.mock(import("./embeddings"), async (importOriginal) => {
|
|
6
|
+
const actual = await importOriginal();
|
|
7
|
+
return {
|
|
8
|
+
...actual,
|
|
9
|
+
// your mocked methods
|
|
10
|
+
estimateCost: vi.fn(),
|
|
11
|
+
getExistingVectorStoreIndex: vi.fn(),
|
|
12
|
+
persistNodes: vi.fn(),
|
|
13
|
+
persistDocuments: vi.fn(),
|
|
14
|
+
getExistingDocStore: vi.fn(),
|
|
15
|
+
searchDocuments: vi.fn()
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
// Now import the mocked functions
|
|
19
|
+
import { transformDocumentsToNodes, getEmbedModel } from './embeddings';
|
|
20
|
+
describe('transformDocumentsToNodes', () => {
|
|
21
|
+
beforeEach(() => {
|
|
22
|
+
vi.clearAllMocks();
|
|
23
|
+
});
|
|
24
|
+
const mockConfig = {
|
|
25
|
+
chunkSize: 100,
|
|
26
|
+
chunkOverlap: 10,
|
|
27
|
+
combineSentencesIntoChunks: true,
|
|
28
|
+
sploderMaxSize: 500,
|
|
29
|
+
modelProvider: 'mock',
|
|
30
|
+
modelName: 'text-embedding-3-small',
|
|
31
|
+
vectorStoreType: "simple",
|
|
32
|
+
storagePath: './storage',
|
|
33
|
+
projectName: 'test_project',
|
|
34
|
+
splitIntoSentences: true,
|
|
35
|
+
};
|
|
36
|
+
const mockSettings = {
|
|
37
|
+
openAIKey: 'mock-api-key',
|
|
38
|
+
oLlamaBaseURL: 'http://localhost',
|
|
39
|
+
azureOpenAIKey: null,
|
|
40
|
+
azureOpenAIEndpoint: null,
|
|
41
|
+
azureOpenAIApiVersion: null,
|
|
42
|
+
mistralApiKey: null,
|
|
43
|
+
geminiApiKey: null,
|
|
44
|
+
};
|
|
45
|
+
it('should process documents and return nodes', async () => {
|
|
46
|
+
const mockDocuments = [
|
|
47
|
+
new Document({ text: 'Document 1', metadata: { key1: 'value1' } }),
|
|
48
|
+
new Document({ text: 'Document 2', metadata: { key2: 'value2' } }),
|
|
49
|
+
];
|
|
50
|
+
const mockNodes = [
|
|
51
|
+
new TextNode({ text: 'Document 1' }),
|
|
52
|
+
new TextNode({ text: 'Document 2' }),
|
|
53
|
+
];
|
|
54
|
+
const result = await transformDocumentsToNodes(mockDocuments, mockConfig, mockSettings);
|
|
55
|
+
expect(result.map((node) => node.text)).toEqual(mockNodes.map((node) => node.text));
|
|
56
|
+
});
|
|
57
|
+
it('should filter out documents with null, undefined, or zero-length text', async () => {
|
|
58
|
+
const mockDocuments = [
|
|
59
|
+
new Document({ text: 'Valid Document', metadata: { key1: 'value1' } }),
|
|
60
|
+
new Document({ text: undefined, metadata: { key3: 'value3' } }),
|
|
61
|
+
new Document({ text: '', metadata: { key4: 'value4' } }),
|
|
62
|
+
];
|
|
63
|
+
const filteredDocuments = [mockDocuments[0]];
|
|
64
|
+
const mockNodes = [new TextNode({ text: 'Valid Document' })];
|
|
65
|
+
// (transformDocumentsToNodes as vi.Mock).mockResolvedValue(mockNodes);
|
|
66
|
+
const result = await transformDocumentsToNodes(mockDocuments, mockConfig, mockSettings);
|
|
67
|
+
expect(result.map((n) => n.text)).toEqual(mockNodes.map((n) => n.text));
|
|
68
|
+
// TODO: I can't get these to work. Apparently you can't spyOn a function that is imported from the same file.
|
|
69
|
+
// all well and good but ... why did CoPilot generate a test that can't work?
|
|
70
|
+
// expect(transformDocumentsToNodes).toHaveBeenCalledWith(filteredDocuments, expect.any(Array));
|
|
71
|
+
});
|
|
72
|
+
it('should exclude all metadata keys from embedding', async () => {
|
|
73
|
+
const mockDocuments = [
|
|
74
|
+
new Document({ text: 'Document 1', metadata: { key1: 'value1', key2: 'value2' } }),
|
|
75
|
+
];
|
|
76
|
+
const nodes = await transformDocumentsToNodes(mockDocuments, mockConfig, mockSettings);
|
|
77
|
+
expect(nodes[0].excludedEmbedMetadataKeys).toEqual(['key1', 'key2']);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
describe('getEmbedModel', () => {
|
|
81
|
+
const mockConfig = {
|
|
82
|
+
chunkSize: 100,
|
|
83
|
+
chunkOverlap: 10,
|
|
84
|
+
combineSentencesIntoChunks: true,
|
|
85
|
+
sploderMaxSize: 500,
|
|
86
|
+
modelProvider: 'openai',
|
|
87
|
+
modelName: 'text-embedding-3-small',
|
|
88
|
+
vectorStoreType: "simple",
|
|
89
|
+
storagePath: './storage',
|
|
90
|
+
projectName: 'test_project',
|
|
91
|
+
splitIntoSentences: true,
|
|
92
|
+
};
|
|
93
|
+
const mockSettings = {
|
|
94
|
+
openAIKey: 'mock-api-key',
|
|
95
|
+
oLlamaBaseURL: 'http://localhost',
|
|
96
|
+
azureOpenAIKey: null,
|
|
97
|
+
azureOpenAIEndpoint: null,
|
|
98
|
+
azureOpenAIApiVersion: null,
|
|
99
|
+
mistralApiKey: null,
|
|
100
|
+
geminiApiKey: null,
|
|
101
|
+
};
|
|
102
|
+
it('should handle different model providers correctly', () => {
|
|
103
|
+
// Test with 'ollama' provider
|
|
104
|
+
const ollamaModel = getEmbedModel({ ...mockConfig, modelProvider: 'ollama' }, mockSettings);
|
|
105
|
+
expect(ollamaModel).toBeDefined();
|
|
106
|
+
// Test with 'mock' provider
|
|
107
|
+
const mockModel = getEmbedModel({ ...mockConfig, modelProvider: 'mock' }, mockSettings);
|
|
108
|
+
expect(mockModel).toBeDefined();
|
|
109
|
+
// Test with invalid provider
|
|
110
|
+
expect(() => {
|
|
111
|
+
getEmbedModel({ ...mockConfig, modelProvider: 'invalid' }, mockSettings);
|
|
112
|
+
}).toThrow('Unsupported embedding model provider: invalid');
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
//# sourceMappingURL=embeddings.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.test.js","sourceRoot":"","sources":["../../src/services/embeddings.test.ts"],"names":[],"mappings":"AAAA,aAAa;AAEb,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhD,qDAAqD;AACrD,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE;IACvD,MAAM,MAAM,GAAG,MAAM,cAAc,EAAE,CAAA;IACrC,OAAO;QACL,GAAG,MAAM;QACT,sBAAsB;QACtB,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;QACrB,2BAA2B,EAAE,EAAE,CAAC,EAAE,EAAE;QACpC,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;QACrB,gBAAgB,EAAE,EAAE,CAAC,EAAE,EAAE;QACzB,mBAAmB,EAAE,EAAE,CAAC,EAAE,EAAE;QAC5B,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE;KACzB,CAAA;AACH,CAAC,CAAC,CAAA;AAEF,kCAAkC;AAClC,OAAO,EAAE,yBAAyB,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAExE,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;IACrB,CAAC,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG;QACjB,SAAS,EAAE,GAAG;QACd,YAAY,EAAE,EAAE;QAChB,0BAA0B,EAAE,IAAI;QAChC,cAAc,EAAE,GAAG;QACnB,aAAa,EAAE,MAAM;QACrB,SAAS,EAAE,wBAAwB;QACnC,eAAe,EAAE,QAAoB;QACrC,WAAW,EAAE,WAAW;QACxB,WAAW,EAAE,cAAc;QAC3B,kBAAkB,EAAE,IAAI;KACzB,CAAC;IAEF,MAAM,YAAY,GAAG;QACnB,SAAS,EAAE,cAAc;QACzB,aAAa,EAAE,kBAAkB;QACjC,cAAc,EAAE,IAAI;QACpB,mBAAmB,EAAE,IAAI;QACzB,qBAAqB,EAAE,IAAI;QAC3B,aAAa,EAAE,IAAI;QACnB,YAAY,EAAE,IAAI;KACnB,CAAC;IAEF,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,MAAM,aAAa,GAAG;YACpB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;YAClE,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;SACnE,CAAC;QACF,MAAM,SAAS,GAAG;YAChB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC;YACpC,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC;SACrC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;QAExF,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACtF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uEAAuE,EAAE,KAAK,IAAI,EAAE;QACrF,MAAM,aAAa,GAAG;YACpB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;YACtE,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;YAC/D,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;SACzD,CAAC;QACF,MAAM,iBAAiB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,SAAS,GAAG,CAAC,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC;QAE7D,uEAAuE;QAEvE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;QACxF,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAExE,8GAA8G;QAC9G,6EAA6E;QAC7E,gGAAgG;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,aAAa,GAAG;YACpB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;SACnF,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,UAAU,EAAE,YAAY,CAAC,CAAA;QACtF,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,MAAM,UAAU,GAAG;QACjB,SAAS,EAAE,GAAG;QACd,YAAY,EAAE,EAAE;QAChB,0BAA0B,EAAE,IAAI;QAChC,cAAc,EAAE,GAAG;QACnB,aAAa,EAAE,QAAQ;QACvB,SAAS,EAAE,wBAAwB;QACnC,eAAe,EAAE,QAAoB;QACrC,WAAW,EAAE,WAAW;QACxB,WAAW,EAAE,cAAc;QAC3B,kBAAkB,EAAE,IAAI;KACzB,CAAC;IAEF,MAAM,YAAY,GAAG;QACnB,SAAS,EAAE,cAAc;QACzB,aAAa,EAAE,kBAAkB;QACjC,cAAc,EAAE,IAAI;QACpB,mBAAmB,EAAE,IAAI;QACzB,qBAAqB,EAAE,IAAI;QAC3B,aAAa,EAAE,IAAI;QACnB,YAAY,EAAE,IAAI;KACnB,CAAC;IAGF,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,8BAA8B;QAC9B,MAAM,WAAW,GAAG,aAAa,CAC/B,EAAE,GAAG,UAAU,EAAE,aAAa,EAAE,QAAQ,EAAE,EAC1C,YAAY,CACb,CAAC;QACF,MAAM,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;QAElC,4BAA4B;QAC5B,MAAM,SAAS,GAAG,aAAa,CAC7B,EAAE,GAAG,UAAU,EAAE,aAAa,EAAE,MAAM,EAAE,EACxC,YAAY,CACb,CAAC;QACF,MAAM,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAEhC,6BAA6B;QAC7B,MAAM,CAAC,GAAG,EAAE;YACV,aAAa,CACX,EAAE,GAAG,UAAU,EAAE,aAAa,EAAE,SAAgB,EAAE,EAClD,YAAY,CACb,CAAC;QACJ,CAAC,CAAC,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loggingOpenAIEmbedding.d.ts","sourceRoot":"","sources":["../../src/services/loggingOpenAIEmbedding.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// // temporary
|
|
2
|
+
// // this is a wrapper around OpenAIEmbedding that logs the input of the embedding
|
|
3
|
+
// // it's used to debug the embedding process (to make sure random metadata isn't wrongfully included)
|
|
4
|
+
// // it's not used in the production code
|
|
5
|
+
export {};
|
|
6
|
+
// import { OpenAIEmbedding } from "@llamaindex/openai";
|
|
7
|
+
// import type {
|
|
8
|
+
// OpenAI as OpenAILLM,
|
|
9
|
+
// } from "openai";
|
|
10
|
+
// type LLMInstance = Pick<OpenAILLM, "embeddings" | "apiKey" | "baseURL">;
|
|
11
|
+
// export class LoggingOpenAIEmbedding extends OpenAIEmbedding {
|
|
12
|
+
// constructor(
|
|
13
|
+
// init?: Omit<Partial<OpenAIEmbedding>, "session"> & {
|
|
14
|
+
// session?: LLMInstance;
|
|
15
|
+
// },
|
|
16
|
+
// ) {
|
|
17
|
+
// super(init);
|
|
18
|
+
// // overwrite private member "getMessage" 🙀
|
|
19
|
+
// (this as any).getOpenAIEmbedding = async function(input: string[]): Promise<number[][]> {
|
|
20
|
+
// // TODO: ensure this for every sub class by calling it in the base class
|
|
21
|
+
// input = this.truncateMaxTokens(input);
|
|
22
|
+
// console.log("LoggingOpenAIEmbedding input", input);
|
|
23
|
+
// const { data } = await (
|
|
24
|
+
// await this.session
|
|
25
|
+
// ).embeddings.create(
|
|
26
|
+
// this.dimensions
|
|
27
|
+
// ? {
|
|
28
|
+
// model: this.model,
|
|
29
|
+
// dimensions: this.dimensions, // only sent to OpenAI if set by user
|
|
30
|
+
// input,
|
|
31
|
+
// }
|
|
32
|
+
// : {
|
|
33
|
+
// model: this.model,
|
|
34
|
+
// input,
|
|
35
|
+
// },
|
|
36
|
+
// );
|
|
37
|
+
// return data.map((d) => d.embedding);
|
|
38
|
+
// }
|
|
39
|
+
// }
|
|
40
|
+
// }
|
|
41
|
+
//# sourceMappingURL=loggingOpenAIEmbedding.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"loggingOpenAIEmbedding.js","sourceRoot":"","sources":["../../src/services/loggingOpenAIEmbedding.ts"],"names":[],"mappings":"AACA,eAAe;AACf,mFAAmF;AACnF,uGAAuG;AACvG,0CAA0C;;AAE1C,wDAAwD;AACxD,gBAAgB;AAChB,yBAAyB;AACzB,mBAAmB;AACnB,2EAA2E;AAG3E,gEAAgE;AAChE,iBAAiB;AACjB,2DAA2D;AAC3D,+BAA+B;AAC/B,SAAS;AACT,QAAQ;AACR,mBAAmB;AACnB,kDAAkD;AAClD,gGAAgG;AAChG,iFAAiF;AACjF,+CAA+C;AAE/C,4DAA4D;AAE5D,iCAAiC;AACjC,6BAA6B;AAC7B,6BAA6B;AAC7B,0BAA0B;AAC1B,gBAAgB;AAChB,mCAAmC;AACnC,mFAAmF;AACnF,uBAAuB;AACvB,gBAAgB;AAChB,gBAAgB;AAChB,mCAAmC;AACnC,uBAAuB;AACvB,iBAAiB;AACjB,WAAW;AAEX,6CAA6C;AAC7C,QAAQ;AACR,MAAM;AACN,IAAI"}
|