@meaningfully/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nvmrc +1 -0
- package/LICENSE +7 -0
- package/README.md +3 -0
- package/dist/DocumentSetManager.d.ts +28 -0
- package/dist/DocumentSetManager.d.ts.map +1 -0
- package/dist/DocumentSetManager.js +134 -0
- package/dist/DocumentSetManager.js.map +1 -0
- package/dist/Meaningfully.d.ts +52 -0
- package/dist/Meaningfully.d.ts.map +1 -0
- package/dist/Meaningfully.js +206 -0
- package/dist/Meaningfully.js.map +1 -0
- package/dist/MetadataManager.d.ts +32 -0
- package/dist/MetadataManager.d.ts.map +1 -0
- package/dist/MetadataManager.js +115 -0
- package/dist/MetadataManager.js.map +1 -0
- package/dist/api/embedding.d.ts +7 -0
- package/dist/api/embedding.d.ts.map +1 -0
- package/dist/api/embedding.js +94 -0
- package/dist/api/embedding.js.map +1 -0
- package/dist/api/embedding.test.d.ts +2 -0
- package/dist/api/embedding.test.d.ts.map +1 -0
- package/dist/api/embedding.test.js +340 -0
- package/dist/api/embedding.test.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/services/batchingWeaviateVectorStore.d.ts +6 -0
- package/dist/services/batchingWeaviateVectorStore.d.ts.map +1 -0
- package/dist/services/batchingWeaviateVectorStore.js +21 -0
- package/dist/services/batchingWeaviateVectorStore.js.map +1 -0
- package/dist/services/csvLoader.d.ts +3 -0
- package/dist/services/csvLoader.d.ts.map +1 -0
- package/dist/services/csvLoader.js +18 -0
- package/dist/services/csvLoader.js.map +1 -0
- package/dist/services/csvLoader.test.d.ts +2 -0
- package/dist/services/csvLoader.test.d.ts.map +1 -0
- package/dist/services/csvLoader.test.js +75 -0
- package/dist/services/csvLoader.test.js.map +1 -0
- package/dist/services/embeddings.d.ts +22 -0
- package/dist/services/embeddings.d.ts.map +1 -0
- package/dist/services/embeddings.js +314 -0
- package/dist/services/embeddings.js.map +1 -0
- package/dist/services/embeddings.test.d.ts +2 -0
- package/dist/services/embeddings.test.d.ts.map +1 -0
- package/dist/services/embeddings.test.js +115 -0
- package/dist/services/embeddings.test.js.map +1 -0
- package/dist/services/loggingOpenAIEmbedding.d.ts +2 -0
- package/dist/services/loggingOpenAIEmbedding.d.ts.map +1 -0
- package/dist/services/loggingOpenAIEmbedding.js +41 -0
- package/dist/services/loggingOpenAIEmbedding.js.map +1 -0
- package/dist/services/mockEmbedding.d.ts +6 -0
- package/dist/services/mockEmbedding.d.ts.map +1 -0
- package/dist/services/mockEmbedding.js +14 -0
- package/dist/services/mockEmbedding.js.map +1 -0
- package/dist/services/progressManager.d.ts +21 -0
- package/dist/services/progressManager.d.ts.map +1 -0
- package/dist/services/progressManager.js +76 -0
- package/dist/services/progressManager.js.map +1 -0
- package/dist/services/progressVectorStoreIndex.d.ts +21 -0
- package/dist/services/progressVectorStoreIndex.d.ts.map +1 -0
- package/dist/services/progressVectorStoreIndex.js +60 -0
- package/dist/services/progressVectorStoreIndex.js.map +1 -0
- package/dist/services/sentenceSplitter.d.ts +17 -0
- package/dist/services/sentenceSplitter.d.ts.map +1 -0
- package/dist/services/sentenceSplitter.js +207 -0
- package/dist/services/sentenceSplitter.js.map +1 -0
- package/dist/services/sentenceSplitter.test.d.ts +2 -0
- package/dist/services/sentenceSplitter.test.d.ts.map +1 -0
- package/dist/services/sentenceSplitter.test.js +68 -0
- package/dist/services/sentenceSplitter.test.js.map +1 -0
- package/dist/services/sploder.d.ts +13 -0
- package/dist/services/sploder.d.ts.map +1 -0
- package/dist/services/sploder.js +45 -0
- package/dist/services/sploder.js.map +1 -0
- package/dist/types/index.d.ts +77 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils.d.ts +3 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +7 -0
- package/dist/utils.js.map +1 -0
- package/package.json +43 -0
- package/src/Meaningfully.d.ts +57 -0
- package/src/Meaningfully.ts +228 -0
- package/src/MetadataManager.d.ts +27 -0
- package/src/MetadataManager.ts +145 -0
- package/src/api/embedding.d.ts +6 -0
- package/src/api/embedding.ts +122 -0
- package/src/index.ts +5 -0
- package/src/services/batchingWeaviateVectorStore.d.ts +5 -0
- package/src/services/batchingWeaviateVectorStore.ts +23 -0
- package/src/services/csvLoader.d.ts +2 -0
- package/src/services/csvLoader.ts +24 -0
- package/src/services/embeddings.d.ts +21 -0
- package/src/services/embeddings.ts +374 -0
- package/src/services/loggingOpenAIEmbedding.d.ts +0 -0
- package/src/services/loggingOpenAIEmbedding.ts +46 -0
- package/src/services/mockEmbedding.d.ts +5 -0
- package/src/services/mockEmbedding.ts +13 -0
- package/src/services/progressManager.d.ts +20 -0
- package/src/services/progressManager.ts +88 -0
- package/src/services/progressVectorStoreIndex.d.ts +20 -0
- package/src/services/progressVectorStoreIndex.ts +95 -0
- package/src/services/sentenceSplitter.d.ts +16 -0
- package/src/services/sentenceSplitter.ts +243 -0
- package/src/services/sploder.d.ts +12 -0
- package/src/services/sploder.ts +62 -0
- package/src/types/index.d.ts +71 -0
- package/src/types/index.ts +89 -0
- package/src/utils.d.ts +2 -0
- package/src/utils.ts +6 -0
- package/tests/MetadataManager.test.ts +120 -0
- package/tests/csvLoader.test.d.ts +1 -0
- package/tests/csvLoader.test.ts +88 -0
- package/tests/embedding.test.d.ts +1 -0
- package/tests/embedding.test.ts +425 -0
- package/tests/embeddings.test.d.ts +1 -0
- package/tests/embeddings.test.ts +144 -0
- package/tests/sentenceSplitter.test.d.ts +1 -0
- package/tests/sentenceSplitter.test.ts +81 -0
- package/tsconfig.json +31 -0
- package/tsconfig.tsbuildinfo +1 -0
package/src/utils.d.ts
ADDED
package/src/utils.ts
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { MetadataManager } from '../src/MetadataManager.js';
|
|
2
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
3
|
+
|
|
4
|
+
describe('MetadataManager', () => {
|
|
5
|
+
let metadataManager: MetadataManager;
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
metadataManager = new (class extends MetadataManager {
|
|
9
|
+
protected async runQuery<T>(query: string, params?: any[]): Promise<T[]> {
|
|
10
|
+
return [] as T[];
|
|
11
|
+
}
|
|
12
|
+
protected async runQuerySingle<T>(query: string, params?: any[]): Promise<T | null> {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
protected async initializeDatabase(): Promise<void> {}
|
|
16
|
+
protected close(): void {}
|
|
17
|
+
})();
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('should add a document set and return its ID', async () => {
|
|
21
|
+
vi.spyOn(metadataManager, 'runQuerySingle').mockResolvedValueOnce({ set_id: 1 });
|
|
22
|
+
|
|
23
|
+
const documentSetId = await metadataManager.addDocumentSet({
|
|
24
|
+
name: 'Test Set',
|
|
25
|
+
uploadDate: new Date(),
|
|
26
|
+
parameters: {},
|
|
27
|
+
totalDocuments: 10,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
expect(documentSetId).toBe(1);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('should retrieve a document set by ID', async () => {
|
|
34
|
+
vi.spyOn(metadataManager, 'runQuerySingle').mockResolvedValueOnce({
|
|
35
|
+
set_id: 1,
|
|
36
|
+
name: 'Test Set',
|
|
37
|
+
upload_date: new Date().toISOString(),
|
|
38
|
+
parameters: '{}',
|
|
39
|
+
total_documents: 10,
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const documentSet = await metadataManager.getDocumentSet(1);
|
|
43
|
+
|
|
44
|
+
expect(documentSet).toEqual({
|
|
45
|
+
documentSetId: 1,
|
|
46
|
+
name: 'Test Set',
|
|
47
|
+
uploadDate: expect.any(Date),
|
|
48
|
+
parameters: {},
|
|
49
|
+
totalDocuments: 10,
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('should update the document count for a document set', async () => {
|
|
54
|
+
const runQuerySpy = vi.spyOn(metadataManager, 'runQuery').mockResolvedValueOnce([]);
|
|
55
|
+
|
|
56
|
+
await metadataManager.updateDocumentCount(1, 5);
|
|
57
|
+
|
|
58
|
+
expect(runQuerySpy).toHaveBeenCalledWith(metadataManager.queries.updateDocumentCount, [5, 1]);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('should delete a document set by ID', async () => {
|
|
62
|
+
const runQuerySpy = vi.spyOn(metadataManager, 'runQuery').mockResolvedValueOnce([]);
|
|
63
|
+
|
|
64
|
+
await metadataManager.deleteDocumentSet(1);
|
|
65
|
+
|
|
66
|
+
expect(runQuerySpy).toHaveBeenCalledWith(metadataManager.queries.deleteDocumentSet, [1]);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('should retrieve default settings if none exist', async () => {
|
|
70
|
+
vi.spyOn(metadataManager, 'runQuerySingle').mockResolvedValueOnce(null);
|
|
71
|
+
|
|
72
|
+
const settings = await metadataManager.getSettings();
|
|
73
|
+
|
|
74
|
+
expect(settings).toEqual({
|
|
75
|
+
openAIKey: null,
|
|
76
|
+
oLlamaBaseURL: null,
|
|
77
|
+
azureOpenAIKey: null,
|
|
78
|
+
azureOpenAIEndpoint: null,
|
|
79
|
+
azureOpenAIApiVersion: '2024-02-01',
|
|
80
|
+
mistralApiKey: null,
|
|
81
|
+
geminiApiKey: null,
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it('should update settings', async () => {
|
|
86
|
+
const runQuerySpy = vi.spyOn(metadataManager, 'runQuery').mockResolvedValueOnce([]);
|
|
87
|
+
|
|
88
|
+
const result = await metadataManager.setSettings({
|
|
89
|
+
openAIKey: 'test-key',
|
|
90
|
+
oLlamaBaseURL: 'http://localhost',
|
|
91
|
+
azureOpenAIKey: 'azure-key',
|
|
92
|
+
azureOpenAIEndpoint: 'http://azure.endpoint',
|
|
93
|
+
azureOpenAIApiVersion: '2024-02-01',
|
|
94
|
+
mistralApiKey: 'mistral-key',
|
|
95
|
+
geminiApiKey: 'gemini-key',
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
expect(result).toEqual({ success: true });
|
|
99
|
+
expect(runQuerySpy).toHaveBeenCalledWith(metadataManager.queries.upsertSettings, [
|
|
100
|
+
JSON.stringify({
|
|
101
|
+
openAIKey: 'test-key',
|
|
102
|
+
oLlamaBaseURL: 'http://localhost',
|
|
103
|
+
azureOpenAIKey: 'azure-key',
|
|
104
|
+
azureOpenAIEndpoint: 'http://azure.endpoint',
|
|
105
|
+
azureOpenAIApiVersion: '2024-02-01',
|
|
106
|
+
mistralApiKey: 'mistral-key',
|
|
107
|
+
geminiApiKey: 'gemini-key',
|
|
108
|
+
}),
|
|
109
|
+
JSON.stringify({
|
|
110
|
+
openAIKey: 'test-key',
|
|
111
|
+
oLlamaBaseURL: 'http://localhost',
|
|
112
|
+
azureOpenAIKey: 'azure-key',
|
|
113
|
+
azureOpenAIEndpoint: 'http://azure.endpoint',
|
|
114
|
+
azureOpenAIApiVersion: '2024-02-01',
|
|
115
|
+
mistralApiKey: 'mistral-key',
|
|
116
|
+
geminiApiKey: 'gemini-key',
|
|
117
|
+
}),
|
|
118
|
+
]);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
//@ts-nocheck
|
|
2
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
import { loadDocumentsFromCsv } from '../src/services/csvLoader.js';
|
|
5
|
+
import { Document } from 'llamaindex';
|
|
6
|
+
import Papa from 'papaparse';
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
vi.mock('fs');
|
|
10
|
+
vi.mock('papaparse');
|
|
11
|
+
|
|
12
|
+
describe('csvLoader.ts', () => {
|
|
13
|
+
describe('loadDocumentsFromCsv', () => {
|
|
14
|
+
it('should load documents from CSV and return Document instances', async () => {
|
|
15
|
+
const mockFileContent = 'text,metadata1,metadata2\ncontent1,meta1,meta2\ncontent2,meta3,meta4';
|
|
16
|
+
const mockParsedData = {
|
|
17
|
+
data: [
|
|
18
|
+
{ text: 'content1', metadata1: 'meta1', metadata2: 'meta2' },
|
|
19
|
+
{ text: 'content2', metadata1: 'meta3', metadata2: 'meta4' }
|
|
20
|
+
]
|
|
21
|
+
};
|
|
22
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
23
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
24
|
+
|
|
25
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
26
|
+
|
|
27
|
+
expect(remove_id(result)).toEqual(remove_id([
|
|
28
|
+
new Document({ text: 'content1', metadata: { metadata1: 'meta1', metadata2: 'meta2' } }),
|
|
29
|
+
new Document({ text: 'content2', metadata: { metadata1: 'meta3', metadata2: 'meta4' } })
|
|
30
|
+
]));
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('should handle empty CSV file', async () => {
|
|
34
|
+
const mockFileContent = '';
|
|
35
|
+
const mockParsedData = { data: [] };
|
|
36
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
37
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
38
|
+
|
|
39
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
40
|
+
|
|
41
|
+
expect(result).toEqual([]);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('should handle missing text column', async () => {
|
|
45
|
+
const mockFileContent = 'metadata1,metadata2\nmeta1,meta2\nmeta3,meta4';
|
|
46
|
+
const mockParsedData = {
|
|
47
|
+
data: [
|
|
48
|
+
{ metadata1: 'meta1', metadata2: 'meta2' },
|
|
49
|
+
{ metadata1: 'meta3', metadata2: 'meta4' }
|
|
50
|
+
]
|
|
51
|
+
};
|
|
52
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
53
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
54
|
+
|
|
55
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
56
|
+
|
|
57
|
+
expect(remove_id(result)).toEqual(remove_id([
|
|
58
|
+
new Document({ text: undefined, metadata: { metadata1: 'meta1', metadata2: 'meta2' } }),
|
|
59
|
+
new Document({ text: undefined, metadata: { metadata1: 'meta3', metadata2: 'meta4' } })
|
|
60
|
+
]));
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('should handle null values in metadata', async () => {
|
|
64
|
+
const mockFileContent = 'text,metadata1,metadata2\ncontent1,,meta2\ncontent2,meta3,';
|
|
65
|
+
const mockParsedData = {
|
|
66
|
+
data: [
|
|
67
|
+
{ text: 'content1', metadata1: null, metadata2: 'meta2' },
|
|
68
|
+
{ text: 'content2', metadata1: 'meta3', metadata2: null }
|
|
69
|
+
]
|
|
70
|
+
};
|
|
71
|
+
readFileSync.mockReturnValue(mockFileContent);
|
|
72
|
+
Papa.parse.mockReturnValue(mockParsedData);
|
|
73
|
+
|
|
74
|
+
const result = await loadDocumentsFromCsv('path/to/csv', 'text');
|
|
75
|
+
expect(remove_id(result)).toEqual(remove_id([
|
|
76
|
+
new Document({ text: 'content1', metadata: { metadata1: '', metadata2: 'meta2' } }),
|
|
77
|
+
new Document({ text: 'content2', metadata: { metadata1: 'meta3', metadata2: '' } })
|
|
78
|
+
]));
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
function remove_id(list_of_documents): Document[] {
|
|
84
|
+
return list_of_documents.map((doc) => {
|
|
85
|
+
const { id_, ...doc_without_id } = doc;
|
|
86
|
+
return doc_without_id;
|
|
87
|
+
});
|
|
88
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
//@ts-nocheck
|
|
2
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
3
|
+
import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../src/api/embedding.js';
|
|
4
|
+
import { loadDocumentsFromCsv } from '../src/services/csvLoader.js';
|
|
5
|
+
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getExistingDocStore } from '../src/services/embeddings.js';
|
|
6
|
+
import { MetadataMode } from 'llamaindex';
|
|
7
|
+
|
|
8
|
+
// filepath: /Users/jeremybmerrill/code/meaningfully/src/main/api/embedding.test.ts
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
vi.mock('../src/services/csvLoader');
|
|
12
|
+
vi.mock('../src/services/embeddings');
|
|
13
|
+
|
|
14
|
+
describe('embedding.ts', () => {
|
|
15
|
+
describe('createEmbeddings', () => {
|
|
16
|
+
it('should create embeddings and return success', async () => {
|
|
17
|
+
const mockDocuments = [{ text: 'doc1' }, { text: 'doc2' }];
|
|
18
|
+
const mockNodes = [{ node: 'node1' }, { node: 'node2' }];
|
|
19
|
+
const mockIndex = 'index1';
|
|
20
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
21
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
22
|
+
persistNodes.mockResolvedValue(mockIndex);
|
|
23
|
+
|
|
24
|
+
const result = await createEmbeddings('path/to/csv', 'text', {}, {});
|
|
25
|
+
|
|
26
|
+
expect(result).toEqual({ success: true, index: mockIndex });
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('should return error on failure', async () => {
|
|
30
|
+
loadDocumentsFromCsv.mockRejectedValue(new Error('Failed to load documents'));
|
|
31
|
+
|
|
32
|
+
const result = await createEmbeddings('path/to/csv', 'text', {}, {});
|
|
33
|
+
|
|
34
|
+
expect(result).toEqual({ success: false, error: 'Failed to load documents' });
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('should handle empty documents', async () => {
|
|
38
|
+
loadDocumentsFromCsv.mockResolvedValue([]);
|
|
39
|
+
|
|
40
|
+
const result = await createEmbeddings('path/to/csv', 'text', {}, {});
|
|
41
|
+
|
|
42
|
+
expect(result).toEqual({ success: false, error: 'That CSV does not appear to contain any documents. Please check the file and try again.' });
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
describe('previewResults', () => {
|
|
47
|
+
it('should return preview results and estimated cost', async () => {
|
|
48
|
+
const mockDocuments = Array(20).fill({ text: 'doc' });
|
|
49
|
+
const mockNodes = [{ text: 'node1', metadata: {} }, { text: 'node2', metadata: {} }];
|
|
50
|
+
const mockPreviewNodes = [{ text: 'node1', metadata: {} }, { text: 'node2', metadata: {} }];
|
|
51
|
+
const mockEstimate = { estimatedPrice: 10, tokenCount: 100, pricePer1M: 0.01 };
|
|
52
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
53
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
54
|
+
estimateCost.mockReturnValue(mockEstimate);
|
|
55
|
+
|
|
56
|
+
const result = await previewResults('path/to/csv', 'text', {});
|
|
57
|
+
|
|
58
|
+
expect(result).toEqual({
|
|
59
|
+
success: true,
|
|
60
|
+
nodes: mockPreviewNodes,
|
|
61
|
+
...mockEstimate
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('should return error on failure', async () => {
|
|
66
|
+
loadDocumentsFromCsv.mockRejectedValue(new Error('Failed to load documents'));
|
|
67
|
+
|
|
68
|
+
const result = await previewResults('path/to/csv', 'text', {});
|
|
69
|
+
|
|
70
|
+
expect(result).toEqual({ success: false, error: 'Failed to load documents' });
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('should handle empty documents', async () => {
|
|
74
|
+
loadDocumentsFromCsv.mockResolvedValue([]);
|
|
75
|
+
|
|
76
|
+
const result = await previewResults('path/to/csv', 'text', {});
|
|
77
|
+
|
|
78
|
+
expect(result).toEqual({ success: false, error: 'That CSV does not appear to contain any documents. Please check the file and try again.' });
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
describe('getDocStore', () => {
|
|
83
|
+
it('should return existing doc store', async () => {
|
|
84
|
+
const mockDocStore = 'docStore';
|
|
85
|
+
getExistingDocStore.mockResolvedValue(mockDocStore);
|
|
86
|
+
|
|
87
|
+
const result = await getDocStore({});
|
|
88
|
+
|
|
89
|
+
expect(result).toBe(mockDocStore);
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe('getIndex', () => {
|
|
94
|
+
it('should return existing vector store index', async () => {
|
|
95
|
+
const mockIndex = 'index';
|
|
96
|
+
getExistingVectorStoreIndex.mockResolvedValue(mockIndex);
|
|
97
|
+
|
|
98
|
+
const result = await getIndex({}, {});
|
|
99
|
+
|
|
100
|
+
expect(result).toBe(mockIndex);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe('search', () => {
|
|
105
|
+
it('should return search results', async () => {
|
|
106
|
+
const mockResults = [
|
|
107
|
+
{ node: { getContent: () => 'content1', metadata: {} }, score: 1 },
|
|
108
|
+
{ node: { getContent: () => 'content2', metadata: {} }, score: 2 }
|
|
109
|
+
];
|
|
110
|
+
searchDocuments.mockResolvedValue(mockResults);
|
|
111
|
+
|
|
112
|
+
const result = await search('index', 'query');
|
|
113
|
+
|
|
114
|
+
expect(result).toEqual([
|
|
115
|
+
{ text: 'content1', score: 1, metadata: {} },
|
|
116
|
+
{ text: 'content2', score: 2, metadata: {} }
|
|
117
|
+
]);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('should handle no search results', async () => {
|
|
121
|
+
searchDocuments.mockResolvedValue([]);
|
|
122
|
+
|
|
123
|
+
const result = await search('index', 'query');
|
|
124
|
+
|
|
125
|
+
expect(result).toEqual([]);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('should handle search results with null scores', async () => {
|
|
129
|
+
const mockResults = [
|
|
130
|
+
{ node: { getContent: () => 'content1', metadata: {} }, score: null },
|
|
131
|
+
{ node: { getContent: () => 'content2', metadata: {} }, score: null }
|
|
132
|
+
];
|
|
133
|
+
searchDocuments.mockResolvedValue(mockResults);
|
|
134
|
+
|
|
135
|
+
const result = await search('index', 'query');
|
|
136
|
+
|
|
137
|
+
expect(result).toEqual([
|
|
138
|
+
{ text: 'content1', score: 0, metadata: {} },
|
|
139
|
+
{ text: 'content2', score: 0, metadata: {} }
|
|
140
|
+
]);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
describe('previewResults', () => {
|
|
146
|
+
it('should return preview results and estimated cost', async () => {
|
|
147
|
+
const mockDocuments = Array(20).fill({ text: 'doc' });
|
|
148
|
+
const mockNodes = [{ text: 'node1', metadata: {} }, { text: 'node2', metadata: {} }];
|
|
149
|
+
const mockPreviewNodes = [{ text: 'node1', metadata: {} }, { text: 'node2', metadata: {} }];
|
|
150
|
+
const mockEstimate = { estimatedPrice: 10, tokenCount: 100, pricePer1M: 0.01 };
|
|
151
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
152
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
153
|
+
estimateCost.mockReturnValue(mockEstimate);
|
|
154
|
+
|
|
155
|
+
const result = await previewResults('path/to/csv', 'text', {});
|
|
156
|
+
|
|
157
|
+
expect(result).toEqual({
|
|
158
|
+
success: true,
|
|
159
|
+
nodes: mockPreviewNodes,
|
|
160
|
+
...mockEstimate
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('should return error on failure', async () => {
|
|
165
|
+
loadDocumentsFromCsv.mockRejectedValue(new Error('Failed to load documents'));
|
|
166
|
+
|
|
167
|
+
const result = await previewResults('path/to/csv', 'text', {});
|
|
168
|
+
|
|
169
|
+
expect(result).toEqual({ success: false, error: 'Failed to load documents' });
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
describe('getDocStore', () => {
|
|
174
|
+
it('should return existing doc store', async () => {
|
|
175
|
+
const mockDocStore = 'docStore';
|
|
176
|
+
getExistingDocStore.mockResolvedValue(mockDocStore);
|
|
177
|
+
|
|
178
|
+
const result = await getDocStore({});
|
|
179
|
+
|
|
180
|
+
expect(result).toBe(mockDocStore);
|
|
181
|
+
});
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
describe('getIndex', () => {
|
|
185
|
+
it('should return existing vector store index', async () => {
|
|
186
|
+
const mockIndex = 'index';
|
|
187
|
+
getExistingVectorStoreIndex.mockResolvedValue(mockIndex);
|
|
188
|
+
|
|
189
|
+
const result = await getIndex({}, {});
|
|
190
|
+
|
|
191
|
+
expect(result).toBe(mockIndex);
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
describe('search', () => {
|
|
195
|
+
it('should return search results', async () => {
|
|
196
|
+
const mockResults = [
|
|
197
|
+
{ node: { getContent: () => 'content1', metadata: {} }, score: 1 },
|
|
198
|
+
{ node: { getContent: () => 'content2', metadata: {} }, score: 2 }
|
|
199
|
+
];
|
|
200
|
+
searchDocuments.mockResolvedValue(mockResults);
|
|
201
|
+
|
|
202
|
+
const result = await search('index', 'query');
|
|
203
|
+
|
|
204
|
+
expect(result).toEqual([
|
|
205
|
+
{ text: 'content1', score: 1, metadata: {} },
|
|
206
|
+
{ text: 'content2', score: 2, metadata: {} }
|
|
207
|
+
]);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it('should handle no search results', async () => {
|
|
211
|
+
searchDocuments.mockResolvedValue([]);
|
|
212
|
+
|
|
213
|
+
const result = await search('index', 'query');
|
|
214
|
+
|
|
215
|
+
expect(result).toEqual([]);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it('should handle search results with null scores', async () => {
|
|
219
|
+
const mockResults = [
|
|
220
|
+
{ node: { getContent: () => 'content1', metadata: {} }, score: null },
|
|
221
|
+
{ node: { getContent: () => 'content2', metadata: {} }, score: null }
|
|
222
|
+
];
|
|
223
|
+
searchDocuments.mockResolvedValue(mockResults);
|
|
224
|
+
|
|
225
|
+
const result = await search('index', 'query');
|
|
226
|
+
|
|
227
|
+
expect(result).toEqual([
|
|
228
|
+
{ text: 'content1', score: 0, metadata: {} },
|
|
229
|
+
{ text: 'content2', score: 0, metadata: {} }
|
|
230
|
+
]);
|
|
231
|
+
});
|
|
232
|
+
});
|
|
233
|
+
describe('createEmbeddings with progress tracking', () => {
|
|
234
|
+
beforeEach(() => {
|
|
235
|
+
vi.clearAllMocks();
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it('should pass progress callback to persistNodes', async () => {
|
|
239
|
+
// Setup mocks
|
|
240
|
+
const mockDocuments = [{ text: 'doc1' }, { text: 'doc2' }];
|
|
241
|
+
const mockNodes = [{ text: 'node1', metadata: {} }, { text: 'node2', metadata: {} }];
|
|
242
|
+
const mockIndex = 'index1';
|
|
243
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
244
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
245
|
+
persistNodes.mockImplementation((nodes, config, settings, clients, callback) => {
|
|
246
|
+
// Call the callback with sample progress
|
|
247
|
+
if (callback) {
|
|
248
|
+
callback(1, 2); // 50% progress
|
|
249
|
+
callback(2, 2); // 100% progress
|
|
250
|
+
}
|
|
251
|
+
return Promise.resolve(mockIndex);
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
// Execute
|
|
255
|
+
const result = await createEmbeddings('path/to/csv', 'text', { modelName: 'test-model' }, {}, {});
|
|
256
|
+
|
|
257
|
+
// Verify
|
|
258
|
+
expect(persistNodes).toHaveBeenCalledTimes(1);
|
|
259
|
+
expect(persistNodes.mock.calls[0][4]).toBeInstanceOf(Function); // Verify callback was passed
|
|
260
|
+
expect(result).toEqual({ success: true, index: mockIndex });
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
it('should correctly track progress through ProgressManager', async () => {
|
|
264
|
+
// Setup
|
|
265
|
+
vi.mock('../src/services/progressManager', () => {
|
|
266
|
+
const mockInstance = {
|
|
267
|
+
startOperation: vi.fn(),
|
|
268
|
+
updateProgress: vi.fn(),
|
|
269
|
+
completeOperation: vi.fn(),
|
|
270
|
+
clearOperation: vi.fn()
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
return {
|
|
274
|
+
ProgressManager: {
|
|
275
|
+
getInstance: () => mockInstance
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
// Re-import to use mocked version
|
|
281
|
+
const { createEmbeddings } = await import('../src/api/embedding.js');
|
|
282
|
+
const { ProgressManager } = await import('../src/services/progressManager.js');
|
|
283
|
+
|
|
284
|
+
const mockDocuments = [{ text: 'doc1' }, { text: 'doc2' }];
|
|
285
|
+
const mockNodes = [{ text: 'node1', metadata: {} }];
|
|
286
|
+
const mockIndex = 'testIndex';
|
|
287
|
+
|
|
288
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
289
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
290
|
+
persistNodes.mockImplementation((nodes, config, settings, clients, callback) => {
|
|
291
|
+
if (callback) callback(1, 2); // Call with 50% progress
|
|
292
|
+
return Promise.resolve(mockIndex);
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
// Execute
|
|
296
|
+
await createEmbeddings('path/to/csv', 'text', {}, {}, {});
|
|
297
|
+
|
|
298
|
+
// Verify
|
|
299
|
+
const progressManager = ProgressManager.getInstance();
|
|
300
|
+
expect(progressManager.startOperation).toHaveBeenCalledWith(expect.stringMatching(/^embed-\d+$/), 100);
|
|
301
|
+
expect(progressManager.updateProgress).toHaveBeenCalledWith(expect.any(String), 5);
|
|
302
|
+
expect(progressManager.updateProgress).toHaveBeenCalledWith(expect.any(String), expect.any(Number));
|
|
303
|
+
expect(progressManager.completeOperation).toHaveBeenCalledWith(expect.any(String));
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
it('should properly calculate percentage in progress callback', async () => {
|
|
307
|
+
// Setup mocks with spy on updateProgress
|
|
308
|
+
vi.mock('../src/services/progressManager', () => {
|
|
309
|
+
const mockInstance = {
|
|
310
|
+
startOperation: vi.fn(),
|
|
311
|
+
updateProgress: vi.fn(),
|
|
312
|
+
completeOperation: vi.fn(),
|
|
313
|
+
clearOperation: vi.fn()
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
return {
|
|
317
|
+
ProgressManager: {
|
|
318
|
+
getInstance: () => mockInstance
|
|
319
|
+
}
|
|
320
|
+
};
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
// Re-import to use mocked version
|
|
324
|
+
const { createEmbeddings } = await import('../src/api/embedding.js');
|
|
325
|
+
const { ProgressManager } = await import('../src/services/progressManager.js');
|
|
326
|
+
|
|
327
|
+
const mockDocuments = [{ text: 'doc1' }];
|
|
328
|
+
const mockNodes = [{ text: 'node1', metadata: {} }];
|
|
329
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
330
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
331
|
+
|
|
332
|
+
// Simulate persistNodes calling the callback with various progress values
|
|
333
|
+
persistNodes.mockImplementation((nodes, config, settings, clients, callback) => {
|
|
334
|
+
if (callback) {
|
|
335
|
+
callback(0, 10); // 0% progress
|
|
336
|
+
callback(5, 10); // 50% progress
|
|
337
|
+
callback(10, 10); // 100% progress
|
|
338
|
+
}
|
|
339
|
+
return Promise.resolve('mockIndex');
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// Execute
|
|
343
|
+
await createEmbeddings('path/to/csv', 'text', {}, {}, {});
|
|
344
|
+
|
|
345
|
+
// Verify percentage calculations
|
|
346
|
+
// Initial update at 5%
|
|
347
|
+
const progressManager = ProgressManager.getInstance();
|
|
348
|
+
expect(progressManager.updateProgress).toHaveBeenCalledWith(expect.any(String), 5);
|
|
349
|
+
|
|
350
|
+
// Progress updates: 0%, 50%, 100% mapped to 5-95% range
|
|
351
|
+
expect(progressManager.updateProgress).toHaveBeenCalledWith(expect.any(String), 5); // 0% -> 5%
|
|
352
|
+
expect(progressManager.updateProgress).toHaveBeenCalledWith(expect.any(String), 50); // 50% -> 50%
|
|
353
|
+
expect(progressManager.updateProgress).toHaveBeenCalledWith(expect.any(String), 95); // 100% -> 95%
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
it('should clear operation on empty documents', async () => {
|
|
357
|
+
// Setup
|
|
358
|
+
vi.mock('../src/services/progressManager', () => {
|
|
359
|
+
const mockInstance = {
|
|
360
|
+
startOperation: vi.fn(),
|
|
361
|
+
updateProgress: vi.fn(),
|
|
362
|
+
completeOperation: vi.fn(),
|
|
363
|
+
clearOperation: vi.fn()
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
ProgressManager: {
|
|
368
|
+
getInstance: () => mockInstance
|
|
369
|
+
}
|
|
370
|
+
};
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
// Re-import to use mocked version
|
|
374
|
+
const { createEmbeddings } = await import('../src/api/embedding.js');
|
|
375
|
+
const { ProgressManager } = await import('../src/services/progressManager.js');
|
|
376
|
+
|
|
377
|
+
loadDocumentsFromCsv.mockResolvedValue([]);
|
|
378
|
+
|
|
379
|
+
// Execute
|
|
380
|
+
const result = await createEmbeddings('path/to/csv', 'text', {}, {}, {});
|
|
381
|
+
|
|
382
|
+
// Verify
|
|
383
|
+
const progressManager = ProgressManager.getInstance();
|
|
384
|
+
expect(progressManager.clearOperation).toHaveBeenCalled();
|
|
385
|
+
expect(result).toEqual({
|
|
386
|
+
success: false,
|
|
387
|
+
error: "That CSV does not appear to contain any documents. Please check the file and try again."
|
|
388
|
+
});
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
it('shoulde complete operation on successful embedding', async () => {
|
|
392
|
+
// Setup
|
|
393
|
+
vi.mock('../src/services/progressManager', () => {
|
|
394
|
+
const mockInstance = {
|
|
395
|
+
startOperation: vi.fn(),
|
|
396
|
+
updateProgress: vi.fn(),
|
|
397
|
+
completeOperation: vi.fn(),
|
|
398
|
+
clearOperation: vi.fn()
|
|
399
|
+
};
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
ProgressManager: {
|
|
403
|
+
getInstance: () => mockInstance
|
|
404
|
+
}
|
|
405
|
+
};
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
// Re-import to use mocked version
|
|
409
|
+
const { createEmbeddings } = await import('../src/api/embedding.js');
|
|
410
|
+
const { ProgressManager } = await import('../src/services/progressManager.js');
|
|
411
|
+
|
|
412
|
+
const mockDocuments = [{ text: 'doc1' }];
|
|
413
|
+
const mockNodes = [{ text: 'node1', metadata: {} }];
|
|
414
|
+
loadDocumentsFromCsv.mockResolvedValue(mockDocuments);
|
|
415
|
+
transformDocumentsToNodes.mockResolvedValue(mockNodes);
|
|
416
|
+
persistNodes.mockResolvedValue('mockIndex');
|
|
417
|
+
|
|
418
|
+
// Execute
|
|
419
|
+
await createEmbeddings('path/to/csv', 'text', {}, {}, {});
|
|
420
|
+
|
|
421
|
+
// Verify
|
|
422
|
+
const progressManager = ProgressManager.getInstance();
|
|
423
|
+
expect(progressManager.completeOperation).toHaveBeenCalled();
|
|
424
|
+
});
|
|
425
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|