@meaningfully/core 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Meaningfully.d.ts +11 -3
- package/dist/Meaningfully.d.ts.map +1 -1
- package/dist/Meaningfully.js +107 -25
- package/dist/Meaningfully.js.map +1 -1
- package/dist/api/embedding.d.ts +1 -1
- package/dist/api/embedding.d.ts.map +1 -1
- package/dist/api/embedding.js +3 -3
- package/dist/api/embedding.js.map +1 -1
- package/dist/services/embeddings.d.ts +6 -2
- package/dist/services/embeddings.d.ts.map +1 -1
- package/dist/services/embeddings.js +57 -73
- package/dist/services/embeddings.js.map +1 -1
- package/dist/types/index.d.ts +2 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/Meaningfully.ts +107 -26
- package/src/__tests__/Meaningfully.test.ts +327 -0
- package/src/api/__tests__/embedding.test.ts +4 -4
- package/src/api/embedding.ts +3 -3
- package/src/services/embeddings.d.ts +0 -1
- package/src/services/embeddings.ts +62 -81
- package/src/types/index.ts +2 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import { describe, it, beforeEach, expect, vi } from 'vitest';
|
|
2
|
+
import { MetadataManager } from '../MetadataManager';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { sanitizeProjectName } from '../utils.js';
|
|
6
|
+
import { createVectorStore, createDocumentStore, createIndexStore} from '../services/embeddings.js';
|
|
7
|
+
import { IndexStruct } from 'llamaindex';
|
|
8
|
+
import { Client } from 'pg'; // Import the real Postgres client
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
vi.mock('../MetadataManager');
|
|
12
|
+
vi.mock('fs');
|
|
13
|
+
vi.mock('path');
|
|
14
|
+
|
|
15
|
+
// Mock the embedding module before importing MeaningfullyAPI
|
|
16
|
+
vi.doMock('../api/embedding.js', () => ({
|
|
17
|
+
getIndex: vi.fn(),
|
|
18
|
+
search: vi.fn().mockResolvedValue([{ id: 1, text: 'result' }]),
|
|
19
|
+
createEmbeddings: vi.fn().mockResolvedValue({ success: true, error: null }),
|
|
20
|
+
}));
|
|
21
|
+
vi.doMock('../services/csvLoader.js', () => ({
|
|
22
|
+
loadDocumentsFromCsv: vi.fn().mockResolvedValue([]),
|
|
23
|
+
}));
|
|
24
|
+
import { BaseNode } from 'llamaindex';
|
|
25
|
+
|
|
26
|
+
// Mock BaseNode so that getEmbeddings returns made up numbers
|
|
27
|
+
vi.mock('llamaindex', async (importOriginal) => {
|
|
28
|
+
const actual = await importOriginal();
|
|
29
|
+
return {
|
|
30
|
+
...actual,
|
|
31
|
+
BaseNode: class extends actual.BaseNode {
|
|
32
|
+
async getEmbeddings() {
|
|
33
|
+
return [0.1, 0.2, 0.3, 0.4];
|
|
34
|
+
}
|
|
35
|
+
generateHash() {
|
|
36
|
+
return 'hash';
|
|
37
|
+
}
|
|
38
|
+
getContent(){
|
|
39
|
+
return "content";
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Import MeaningfullyAPI after mocking
|
|
46
|
+
const { MeaningfullyAPI } = await import('../Meaningfully');
|
|
47
|
+
|
|
48
|
+
const FAKE_SETTINGS = {
|
|
49
|
+
openAIKey: 'sk-proj-testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest',
|
|
50
|
+
azureOpenAIKey: 'testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest',
|
|
51
|
+
mistralApiKey: 'testtesttesttesttesttesttesttest',
|
|
52
|
+
geminiApiKey: 'testtesttesttesttesttesttesttesttesttes',
|
|
53
|
+
azureOpenAIApiVersion: "2024-02-01",
|
|
54
|
+
azureOpenAIEndpoint: "https://test.openai.azure.com",
|
|
55
|
+
oLlamaBaseURL: "http://localhost:11434",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
describe('MeaningfullyAPI', () => {
|
|
59
|
+
let api: MeaningfullyAPI;
|
|
60
|
+
let mockMetadataManager: MetadataManager;
|
|
61
|
+
|
|
62
|
+
beforeEach(() => {
|
|
63
|
+
// @ts-ignore
|
|
64
|
+
mockMetadataManager = new MetadataManager() as MetadataManager;
|
|
65
|
+
vi.spyOn(mockMetadataManager, 'addDocumentSet').mockResolvedValue(1);
|
|
66
|
+
vi.spyOn(mockMetadataManager, 'getSettings').mockResolvedValue(FAKE_SETTINGS);
|
|
67
|
+
vi.spyOn(mockMetadataManager, 'deleteDocumentSet').mockResolvedValue();
|
|
68
|
+
api = new MeaningfullyAPI({
|
|
69
|
+
storagePath: 'mock_storage_path',
|
|
70
|
+
metadataManager: mockMetadataManager,
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('uploadCsv', () => {
|
|
75
|
+
it('should upload a CSV and create embeddings successfully', async () => {
|
|
76
|
+
const mockData = {
|
|
77
|
+
filePath: '/mock/file.csv',
|
|
78
|
+
datasetName: 'testDataset',
|
|
79
|
+
textColumns: ['text'],
|
|
80
|
+
metadataColumns: [],
|
|
81
|
+
splitIntoSentences: true,
|
|
82
|
+
combineSentencesIntoChunks: false,
|
|
83
|
+
sploderMaxSize: 100,
|
|
84
|
+
chunkSize: 512,
|
|
85
|
+
chunkOverlap: 0,
|
|
86
|
+
modelName: 'testModel',
|
|
87
|
+
modelProvider: 'openai',
|
|
88
|
+
description: 'Test dataset',
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// Mock createEmbeddings for this test
|
|
92
|
+
const createEmbeddingsMock = vi.spyOn(await import('../api/embedding.js'), 'createEmbeddings');
|
|
93
|
+
createEmbeddingsMock.mockResolvedValue({ success: true });
|
|
94
|
+
|
|
95
|
+
const result = await api.uploadCsv(mockData);
|
|
96
|
+
|
|
97
|
+
expect(createEmbeddingsMock).toHaveBeenCalledWith(
|
|
98
|
+
expect.any(String),
|
|
99
|
+
expect.any(String),
|
|
100
|
+
expect.objectContaining({ modelName: 'testModel' }),
|
|
101
|
+
expect.any(Object),
|
|
102
|
+
expect.any(Object)
|
|
103
|
+
);
|
|
104
|
+
expect(result).toEqual({ success: true, documentSetId: 1 });
|
|
105
|
+
|
|
106
|
+
createEmbeddingsMock.mockRestore(); // Restore the original implementation after the test
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('should handle errors during embeddings creation', async () => {
|
|
110
|
+
const mockData = {
|
|
111
|
+
filePath: '/mock/file.csv',
|
|
112
|
+
datasetName: 'testDataset',
|
|
113
|
+
textColumns: ['text'],
|
|
114
|
+
metadataColumns: [],
|
|
115
|
+
splitIntoSentences: true,
|
|
116
|
+
combineSentencesIntoChunks: false,
|
|
117
|
+
sploderMaxSize: 100,
|
|
118
|
+
chunkSize: 512,
|
|
119
|
+
chunkOverlap: 0,
|
|
120
|
+
modelName: 'testModel',
|
|
121
|
+
modelProvider: 'openai',
|
|
122
|
+
description: 'Test dataset',
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// Mock createEmbeddings to simulate an error
|
|
126
|
+
const createEmbeddingsMock = vi.spyOn(await import('../api/embedding.js'), 'createEmbeddings');
|
|
127
|
+
createEmbeddingsMock.mockResolvedValue({ success: false, error: 'Embedding error' });
|
|
128
|
+
|
|
129
|
+
await expect(api.uploadCsv(mockData)).rejects.toThrow('Embedding error');
|
|
130
|
+
expect(mockMetadataManager.deleteDocumentSet).toHaveBeenCalledWith(1);
|
|
131
|
+
|
|
132
|
+
createEmbeddingsMock.mockRestore(); // Restore the original implementation after the test
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
describe('searchDocumentSet', () => {
|
|
137
|
+
it('should search a document set and return results', async () => {
|
|
138
|
+
vi.spyOn(mockMetadataManager, 'getDocumentSet').mockResolvedValue({
|
|
139
|
+
parameters: { modelName: 'testModel', modelProvider: 'openai', vectorStoreType: 'simple' },
|
|
140
|
+
name: 'testDataset',
|
|
141
|
+
documentSetId: 5,
|
|
142
|
+
uploadDate: new Date(),
|
|
143
|
+
totalDocuments: 420
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
const results = await api.searchDocumentSet(1, 'query', 10);
|
|
147
|
+
|
|
148
|
+
expect(results).toEqual([{ id: 1, text: 'result' }]);
|
|
149
|
+
expect(mockMetadataManager.getDocumentSet).toHaveBeenCalledWith(1);
|
|
150
|
+
});
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
describe('deleteDocumentSet', () => {
|
|
154
|
+
it('should delete a document set and associated files', async () => {
|
|
155
|
+
vi.spyOn(mockMetadataManager, 'getDocumentSet').mockResolvedValue({
|
|
156
|
+
parameters: { vectorStoreType: 'simple' },
|
|
157
|
+
name: 'testDataset',
|
|
158
|
+
documentSetId: 1,
|
|
159
|
+
uploadDate: new Date(),
|
|
160
|
+
totalDocuments: 100
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
vi.spyOn(fs, 'rmSync').mockImplementation(() => {});
|
|
164
|
+
|
|
165
|
+
const result = await api.deleteDocumentSet(1);
|
|
166
|
+
|
|
167
|
+
expect(mockMetadataManager.deleteDocumentSet).toHaveBeenCalledWith(1);
|
|
168
|
+
expect(fs.rmSync).toHaveBeenCalledWith(
|
|
169
|
+
path.join('mock_storage_path', 'testDataset'),
|
|
170
|
+
{ recursive: true, force: true }
|
|
171
|
+
);
|
|
172
|
+
expect(result).toEqual({ success: true });
|
|
173
|
+
});
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
describe('getMaskedSettings', () => {
|
|
177
|
+
it('should return masked settings', async () => {
|
|
178
|
+
vi.spyOn(mockMetadataManager, 'getSettings').mockResolvedValue(FAKE_SETTINGS);
|
|
179
|
+
|
|
180
|
+
const settings = await api.getMaskedSettings();
|
|
181
|
+
|
|
182
|
+
expect(settings).toEqual({
|
|
183
|
+
openAIKey: 'sk-proj-*******testtest',
|
|
184
|
+
azureOpenAIKey: 'testtest*******testtest',
|
|
185
|
+
mistralApiKey: 'testtest*******testtest',
|
|
186
|
+
geminiApiKey: 'testtest*******ttesttes',
|
|
187
|
+
azureOpenAIApiVersion: "2024-02-01",
|
|
188
|
+
azureOpenAIEndpoint: "https://test.openai.azure.com",
|
|
189
|
+
oLlamaBaseURL: "http://localhost:11434",
|
|
190
|
+
});
|
|
191
|
+
});
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
describe('MeaningfullyAPI - Store Deletion with Real Implementation', () => {
|
|
197
|
+
let api: MeaningfullyAPI;
|
|
198
|
+
let mockMetadataManager: MetadataManager;
|
|
199
|
+
let realPostgresClient: Client;
|
|
200
|
+
|
|
201
|
+
beforeEach(async () => {
|
|
202
|
+
// @ts-ignore
|
|
203
|
+
mockMetadataManager = new MetadataManager() as MetadataManager;
|
|
204
|
+
|
|
205
|
+
// Initialize a real Postgres client
|
|
206
|
+
realPostgresClient = new Client({
|
|
207
|
+
connectionString: process.env.POSTGRES_CONNECTION_STRING,
|
|
208
|
+
});
|
|
209
|
+
await realPostgresClient.connect();
|
|
210
|
+
|
|
211
|
+
api = new MeaningfullyAPI({
|
|
212
|
+
storagePath: 'mock_storage_path',
|
|
213
|
+
metadataManager: mockMetadataManager,
|
|
214
|
+
postgresClient: realPostgresClient, // Use the real client
|
|
215
|
+
});
|
|
216
|
+
vi.unmock('fs')
|
|
217
|
+
if (!fs.existsSync("mock_storage_path")){
|
|
218
|
+
fs.mkdirSync("mock_storage_path");
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('should create and delete a Postgres vector store using real implementation', async () => {
|
|
223
|
+
const projectName = 'test_project';
|
|
224
|
+
const sanitizedProjectName = sanitizeProjectName(projectName);
|
|
225
|
+
const tableName = `vecs_${sanitizedProjectName}`;
|
|
226
|
+
|
|
227
|
+
// Create the vector store using the real implementation
|
|
228
|
+
const vectorStore = await createVectorStore(
|
|
229
|
+
{ vectorStoreType: 'postgres', projectName, storagePath: 'mock_storage_path', modelProvider: 'openai', modelName: 'text-embedding-ada-002' },
|
|
230
|
+
FAKE_SETTINGS,
|
|
231
|
+
api.getClients()
|
|
232
|
+
);
|
|
233
|
+
await vectorStore.add([new BaseNode({ id: '1', text: 'test document', embedding: Array(1536).fill(0.01) })]);
|
|
234
|
+
|
|
235
|
+
// Verify the table exists
|
|
236
|
+
const tableExistsQuery = `
|
|
237
|
+
SELECT EXISTS (
|
|
238
|
+
SELECT FROM information_schema.tables
|
|
239
|
+
WHERE table_name = $1
|
|
240
|
+
);
|
|
241
|
+
`;
|
|
242
|
+
const tableExistsResult = await realPostgresClient.query(tableExistsQuery, [tableName]);
|
|
243
|
+
expect(tableExistsResult.rows[0].exists).toBe(true);
|
|
244
|
+
|
|
245
|
+
// Call the delete method
|
|
246
|
+
await api.deletePostgresVectorStore(projectName);
|
|
247
|
+
|
|
248
|
+
// Verify the table no longer exists
|
|
249
|
+
const tableDeletedResult = await realPostgresClient.query(tableExistsQuery, [tableName]);
|
|
250
|
+
expect(tableDeletedResult.rows[0].exists).toBe(false);
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
// it('should delete Simple vector store using real implementation', async () => {
|
|
254
|
+
// const projectName = 'test_project';
|
|
255
|
+
// const sanitizedProjectName = sanitizeProjectName(projectName);
|
|
256
|
+
// const storagePath = 'mock_storage_path';
|
|
257
|
+
// const persistDir = path.join(storagePath, sanitizedProjectName);
|
|
258
|
+
|
|
259
|
+
// // Create the vector store using the real implementation
|
|
260
|
+
// const vectorStore = await createVectorStore(
|
|
261
|
+
// { vectorStoreType: 'simple', projectName, storagePath, modelProvider: "openai", modelName: 'text-embedding-3-small' },
|
|
262
|
+
// FAKE_SETTINGS,
|
|
263
|
+
// api.getClients()
|
|
264
|
+
// );
|
|
265
|
+
// await vectorStore.add([new BaseNode({ id: '1', text: 'test document', embedding: [1,2,3] })]);
|
|
266
|
+
// await vectorStore.persist(path.join(persistDir, 'vector_store.json'));
|
|
267
|
+
|
|
268
|
+
// // Verify the vector store exists
|
|
269
|
+
// expect(fs.existsSync(path.join(persistDir, 'vector_store.json'))).toBe(true);
|
|
270
|
+
|
|
271
|
+
// // Call the delete method
|
|
272
|
+
// await api.deleteSimpleVectorStore(projectName);
|
|
273
|
+
|
|
274
|
+
// // Verify the vector store no longer exists
|
|
275
|
+
// expect(fs.existsSync(path.join(persistDir, 'vector_store.json'))).toBe(false);
|
|
276
|
+
// });
|
|
277
|
+
|
|
278
|
+
// it('should delete Simple document store using real implementation', async () => {
|
|
279
|
+
// const projectName = 'test_project';
|
|
280
|
+
// const sanitizedProjectName = sanitizeProjectName(projectName);
|
|
281
|
+
// const storagePath = 'mock_storage_path';
|
|
282
|
+
// const persistDir = path.join(storagePath, sanitizedProjectName);
|
|
283
|
+
|
|
284
|
+
// // Create the document store using the real implementation
|
|
285
|
+
// const docStore = await createDocumentStore(
|
|
286
|
+
// { vectorStoreType: 'simple', projectName, storagePath },
|
|
287
|
+
// FAKE_SETTINGS,
|
|
288
|
+
// api.getClients()
|
|
289
|
+
// );
|
|
290
|
+
// await docStore.addDocuments([new BaseNode({ id: '1', text: 'test document' })], true);
|
|
291
|
+
// await docStore.persist(path.join(persistDir, 'doc_store.json'));
|
|
292
|
+
|
|
293
|
+
// // Verify the document store exists
|
|
294
|
+
// expect(fs.existsSync(path.join(persistDir, 'doc_store.json'))).toBe(true);
|
|
295
|
+
|
|
296
|
+
// // Call the delete method
|
|
297
|
+
// await api.deleteSimpleDocStore(projectName);
|
|
298
|
+
|
|
299
|
+
// // Verify the document store no longer exists
|
|
300
|
+
// expect(fs.existsSync(path.join(persistDir, 'doc_store.json'))).toBe(false);
|
|
301
|
+
// });
|
|
302
|
+
|
|
303
|
+
// it('should delete Simple index store using real implementation', async () => {
|
|
304
|
+
// const projectName = 'test_project';
|
|
305
|
+
// const sanitizedProjectName = sanitizeProjectName(projectName);
|
|
306
|
+
// const storagePath = 'mock_storage_path';
|
|
307
|
+
// const persistDir = path.join(storagePath, sanitizedProjectName);
|
|
308
|
+
|
|
309
|
+
// // Create the index store using the real implementation
|
|
310
|
+
// const indexStore = await createIndexStore(
|
|
311
|
+
// { vectorStoreType: 'simple', projectName, storagePath },
|
|
312
|
+
// FAKE_SETTINGS,
|
|
313
|
+
// api.getClients()
|
|
314
|
+
// );
|
|
315
|
+
// indexStore.addIndexStruct(new IndexStruct({ summary: 'test document' }));
|
|
316
|
+
// await indexStore.persist(path.join(persistDir, 'index_store.json'));
|
|
317
|
+
|
|
318
|
+
// // Verify the index store exists
|
|
319
|
+
// expect(fs.existsSync(path.join(persistDir, 'index_store.json'))).toBe(true);
|
|
320
|
+
|
|
321
|
+
// // Call the delete method
|
|
322
|
+
// await api.deleteSimpleIndexStore(projectName);
|
|
323
|
+
|
|
324
|
+
// // Verify the index store no longer exists
|
|
325
|
+
// expect(fs.existsSync(path.join(persistDir, 'index_store.json'))).toBe(false);
|
|
326
|
+
// });
|
|
327
|
+
});
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
3
3
|
import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../embedding.js';
|
|
4
4
|
import { loadDocumentsFromCsv } from '../../services/csvLoader.js';
|
|
5
|
-
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes,
|
|
5
|
+
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getStorageContext } from '../../services/embeddings.js';
|
|
6
6
|
import { MetadataMode } from 'llamaindex';
|
|
7
7
|
|
|
8
8
|
// filepath: /Users/jeremybmerrill/code/meaningfully/src/main/api/embedding.test.ts
|
|
@@ -82,9 +82,9 @@ describe('embedding.ts', () => {
|
|
|
82
82
|
describe('getDocStore', () => {
|
|
83
83
|
it('should return existing doc store', async () => {
|
|
84
84
|
const mockDocStore = 'docStore';
|
|
85
|
-
|
|
85
|
+
getStorageContext.mockResolvedValue({ docStore: mockDocStore });
|
|
86
86
|
|
|
87
|
-
const result = await getDocStore({});
|
|
87
|
+
const result = await getDocStore({}, {}, {});
|
|
88
88
|
|
|
89
89
|
expect(result).toBe(mockDocStore);
|
|
90
90
|
});
|
|
@@ -173,7 +173,7 @@ describe('embedding.ts', () => {
|
|
|
173
173
|
describe('getDocStore', () => {
|
|
174
174
|
it('should return existing doc store', async () => {
|
|
175
175
|
const mockDocStore = 'docStore';
|
|
176
|
-
|
|
176
|
+
getStorageContext.mockResolvedValue({ docStore: mockDocStore });
|
|
177
177
|
|
|
178
178
|
const result = await getDocStore({});
|
|
179
179
|
|
package/src/api/embedding.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments,
|
|
1
|
+
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments, getStorageContext } from "../services/embeddings.js";
|
|
2
2
|
import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types/index.js";
|
|
3
3
|
import { loadDocumentsFromCsv } from "../services/csvLoader.js";
|
|
4
4
|
import { MetadataMode } from "llamaindex";
|
|
@@ -97,8 +97,8 @@ export async function previewResults(
|
|
|
97
97
|
}
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
export async function getDocStore(config: EmbeddingConfig) {
|
|
101
|
-
return await
|
|
100
|
+
export async function getDocStore(config: EmbeddingConfig, settings: Settings, clients: Clients) {
|
|
101
|
+
return (await getStorageContext(config, settings, clients)).docStore;
|
|
102
102
|
}
|
|
103
103
|
|
|
104
104
|
export async function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
|
|
@@ -12,7 +12,6 @@ export declare function estimateCost(nodes: TextNode[], modelName: string): {
|
|
|
12
12
|
pricePer1M: number;
|
|
13
13
|
};
|
|
14
14
|
export declare function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<VectorStoreIndex>;
|
|
15
|
-
export declare function getExistingDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
|
|
16
15
|
export declare function transformDocumentsToNodes(documents: Document[], config: EmbeddingConfig): Promise<TextNode<import("llamaindex").Metadata>[]>;
|
|
17
16
|
export declare function getEmbedModel(config: EmbeddingConfig, settings: Settings): MockEmbedding | OpenAIEmbedding | OllamaEmbedding | MistralAIEmbedding | GeminiEmbedding;
|
|
18
17
|
export declare function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext>;
|
|
@@ -11,12 +11,15 @@ import {
|
|
|
11
11
|
SimpleVectorStore,
|
|
12
12
|
type StorageContext,
|
|
13
13
|
Settings as LlamaindexSettings,
|
|
14
|
-
SimpleDocumentStore
|
|
14
|
+
SimpleDocumentStore,
|
|
15
|
+
BaseDocumentStore,
|
|
16
|
+
BaseIndexStore,
|
|
17
|
+
SimpleIndexStore
|
|
15
18
|
} from "llamaindex";
|
|
16
19
|
import { OllamaEmbedding} from '@llamaindex/ollama'
|
|
17
20
|
import { MistralAIEmbedding, MistralAIEmbeddingModelType } from '@llamaindex/mistral'
|
|
18
21
|
import { GeminiEmbedding } from '@llamaindex/google'
|
|
19
|
-
import { PGVectorStore } from '@llamaindex/postgres';
|
|
22
|
+
import { PGVectorStore, PostgresDocumentStore, PostgresIndexStore } from '@llamaindex/postgres';
|
|
20
23
|
import { AzureOpenAIEmbedding } from "@llamaindex/azure";
|
|
21
24
|
import { Sploder } from "./sploder.js";
|
|
22
25
|
import { CustomSentenceSplitter } from "./sentenceSplitter.js";
|
|
@@ -97,79 +100,14 @@ export function estimateCost(nodes: TextNode[], modelName: string): {
|
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
export async function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const storageContext = await storageContextFromDefaults({
|
|
105
|
-
persistDir: persistDir,
|
|
106
|
-
});
|
|
107
|
-
let vsi = await VectorStoreIndex.init({
|
|
108
|
-
storageContext: storageContext,
|
|
109
|
-
});
|
|
110
|
-
vsi.embedModel = embedModel;
|
|
111
|
-
return vsi;
|
|
112
|
-
|
|
113
|
-
case "postgres":
|
|
114
|
-
if (!clients.postgresClient) {
|
|
115
|
-
throw new Error("Postgres client required but not provided");
|
|
116
|
-
}
|
|
117
|
-
const pgStore = new PGVectorStore({
|
|
118
|
-
clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
|
|
119
|
-
tableName: sanitizeProjectName(config.projectName),
|
|
120
|
-
dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
|
|
121
|
-
embeddingModel: embedModel
|
|
122
|
-
});
|
|
123
|
-
const pgStorageContext = await storageContextFromDefaults({
|
|
124
|
-
vectorStores: { [ModalityType.TEXT]: pgStore },
|
|
125
|
-
});
|
|
126
|
-
return await VectorStoreIndex.init({
|
|
127
|
-
storageContext: pgStorageContext,
|
|
128
|
-
});
|
|
129
|
-
case "weaviate":
|
|
130
|
-
if (!clients.weaviateClient) {
|
|
131
|
-
throw new Error("Weaviate client required but not provided");
|
|
132
|
-
}
|
|
133
|
-
const weaviateStore = new BatchingWeaviateVectorStore({
|
|
134
|
-
indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
|
|
135
|
-
weaviateClient: clients.weaviateClient,
|
|
136
|
-
embeddingModel: embedModel
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
// WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
|
|
140
|
-
// (WeaviateVectorStore would get `score` if we were doing hybrid search)
|
|
141
|
-
// Overwrite the private getNodeSimilarity method to use 'score' from metadata
|
|
142
|
-
// @ts-ignore
|
|
143
|
-
weaviateStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
|
|
144
|
-
return entry.metadata.score;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
return await VectorStoreIndex.fromVectorStore(weaviateStore)
|
|
148
|
-
|
|
149
|
-
default:
|
|
150
|
-
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
103
|
+
const storageContext = await getStorageContext(config, settings, clients);
|
|
104
|
+
const vectorStore = storageContext.vectorStores[ModalityType.TEXT];
|
|
105
|
+
if (!vectorStore) {
|
|
106
|
+
throw new Error("Vector store for ModalityType.TEXT is undefined");
|
|
151
107
|
}
|
|
108
|
+
return await VectorStoreIndex.fromVectorStore(vectorStore);
|
|
152
109
|
}
|
|
153
110
|
|
|
154
|
-
export async function getExistingDocStore(config: EmbeddingConfig) {
|
|
155
|
-
// switch (config.vectorStoreType) {
|
|
156
|
-
// case "simple":
|
|
157
|
-
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
|
|
158
|
-
const storageContext = await storageContextFromDefaults({
|
|
159
|
-
persistDir: persistDir,
|
|
160
|
-
});
|
|
161
|
-
return storageContext.docStore;
|
|
162
|
-
|
|
163
|
-
// case "postgres":
|
|
164
|
-
// throw new Error(`Not yet implemented vector store type: ${config.vectorStoreType}`);
|
|
165
|
-
// // return await createVectorStore(config);
|
|
166
|
-
// default:
|
|
167
|
-
// throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
168
|
-
// }
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
111
|
export async function transformDocumentsToNodes(
|
|
174
112
|
documents: Document[],
|
|
175
113
|
config: EmbeddingConfig,
|
|
@@ -249,12 +187,15 @@ export function getEmbedModel(
|
|
|
249
187
|
|
|
250
188
|
export async function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext> {
|
|
251
189
|
const vectorStore = await createVectorStore(config, settings, clients);
|
|
190
|
+
const docStore = await createDocumentStore(config, settings, clients); // new SimpleDocumentStore()
|
|
191
|
+
const indexStore = await createIndexStore(config, settings, clients);
|
|
252
192
|
fs.mkdirSync(config.storagePath, { recursive: true });
|
|
253
193
|
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
|
|
254
194
|
return await storageContextFromDefaults({
|
|
255
195
|
persistDir: persistDir,
|
|
256
196
|
vectorStores: {[ModalityType.TEXT]: vectorStore},
|
|
257
|
-
docStore:
|
|
197
|
+
docStore: docStore,
|
|
198
|
+
indexStore: indexStore
|
|
258
199
|
/*
|
|
259
200
|
if docStore is created with a persist path (as it is by default in storageContextFromDefaults)
|
|
260
201
|
then it will write to disk after every put(), which happens 2+ times per document.
|
|
@@ -273,8 +214,13 @@ export async function persistDocuments(documents: Document[], config: EmbeddingC
|
|
|
273
214
|
|
|
274
215
|
// see comments in getStorageContext
|
|
275
216
|
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
|
|
276
|
-
|
|
277
|
-
|
|
217
|
+
if (storageContext.docStore instanceof SimpleDocumentStore) {
|
|
218
|
+
// @ts-ignore
|
|
219
|
+
await (storageContext.docStore as SimpleDocumentStore).kvStore.persist(join(persistDir, "doc_store.json"));
|
|
220
|
+
}else if (storageContext.docStore instanceof PostgresDocumentStore) {
|
|
221
|
+
// PostgresDocumentStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
|
|
222
|
+
console.log("Pretending to persist Postgres document store, but it actually persists automatically.");
|
|
223
|
+
}
|
|
278
224
|
|
|
279
225
|
console.timeEnd("persistDocuments Run Time");
|
|
280
226
|
}
|
|
@@ -303,11 +249,11 @@ export async function persistNodes(nodes: TextNode[], config: EmbeddingConfig, s
|
|
|
303
249
|
// all the if statements are just type-checking boilerplate.
|
|
304
250
|
// N.B. WeaviateVectorStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
|
|
305
251
|
if (vectorStore) {
|
|
306
|
-
if (vectorStore instanceof
|
|
252
|
+
if (vectorStore instanceof SimpleVectorStore) {
|
|
307
253
|
await vectorStore.persist(join(config.storagePath, sanitizeProjectName(config.projectName), "vector_store.json"));
|
|
308
|
-
} else if (vectorStore instanceof BatchingWeaviateVectorStore) {
|
|
254
|
+
} else if (vectorStore instanceof PGVectorStore || vectorStore instanceof BatchingWeaviateVectorStore) {
|
|
309
255
|
// WeaviateVectorStore does not have a persist method, it persists automatically
|
|
310
|
-
console.log("Pretending to persist Weaviate vector store, but it actually persists automatically.");
|
|
256
|
+
console.log("Pretending to persist Weaviate or Postgres vector store, but it actually persists automatically.");
|
|
311
257
|
} else {
|
|
312
258
|
throw new Error("Vector store does not support persist method");
|
|
313
259
|
}
|
|
@@ -318,7 +264,8 @@ export async function persistNodes(nodes: TextNode[], config: EmbeddingConfig, s
|
|
|
318
264
|
return index;
|
|
319
265
|
}
|
|
320
266
|
|
|
321
|
-
|
|
267
|
+
// exported only for tests
|
|
268
|
+
export async function createVectorStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<PGVectorStore | SimpleVectorStore | BatchingWeaviateVectorStore> {
|
|
322
269
|
const embeddingModel = getEmbedModel(config, settings);
|
|
323
270
|
switch (config.vectorStoreType) {
|
|
324
271
|
|
|
@@ -326,8 +273,8 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
|
|
|
326
273
|
// otherwise it defaults to Ada.
|
|
327
274
|
case "postgres":
|
|
328
275
|
return new PGVectorStore({
|
|
329
|
-
|
|
330
|
-
tableName: sanitizeProjectName(config.projectName),
|
|
276
|
+
client: clients.postgresClient,
|
|
277
|
+
tableName: "vecs_" + sanitizeProjectName(config.projectName),
|
|
331
278
|
dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
|
|
332
279
|
embeddingModel: embeddingModel
|
|
333
280
|
});
|
|
@@ -357,6 +304,40 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
|
|
|
357
304
|
}
|
|
358
305
|
}
|
|
359
306
|
|
|
307
|
+
// exported only for tests
|
|
308
|
+
export async function createDocumentStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseDocumentStore> {
|
|
309
|
+
// we create the doc store without a persist path, so it doesn't write to disk after every put()
|
|
310
|
+
switch (config.documentStoreType || config.vectorStoreType) {
|
|
311
|
+
case "postgres":
|
|
312
|
+
return new PostgresDocumentStore({
|
|
313
|
+
client: clients.postgresClient,
|
|
314
|
+
tableName: "docs_" + sanitizeProjectName(config.projectName),
|
|
315
|
+
});
|
|
316
|
+
case "simple":
|
|
317
|
+
case "weaviate":
|
|
318
|
+
return new SimpleDocumentStore();
|
|
319
|
+
default:
|
|
320
|
+
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// exported only for tests
|
|
325
|
+
export async function createIndexStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseIndexStore> {
|
|
326
|
+
switch (config.documentStoreType || config.vectorStoreType) {
|
|
327
|
+
case "postgres":
|
|
328
|
+
return new PostgresIndexStore({
|
|
329
|
+
client: clients.postgresClient,
|
|
330
|
+
tableName: "idx_" + sanitizeProjectName(config.projectName),
|
|
331
|
+
});
|
|
332
|
+
case "simple":
|
|
333
|
+
case "weaviate":
|
|
334
|
+
return new SimpleIndexStore();
|
|
335
|
+
default:
|
|
336
|
+
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
}
|
|
340
|
+
|
|
360
341
|
export async function searchDocuments(
|
|
361
342
|
index: VectorStoreIndex,
|
|
362
343
|
query: string,
|
package/src/types/index.ts
CHANGED
|
@@ -57,6 +57,8 @@ export interface EmbeddingConfig {
|
|
|
57
57
|
modelName: string;
|
|
58
58
|
modelProvider: string
|
|
59
59
|
vectorStoreType: "simple" | "postgres" | "weaviate";
|
|
60
|
+
documentStoreType?: "simple" | "postgres";
|
|
61
|
+
indexStoreType?: "simple" | "postgres";
|
|
60
62
|
projectName: string;
|
|
61
63
|
storagePath: string;
|
|
62
64
|
splitIntoSentences: boolean;
|