@meaningfully/core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ import { describe, it, beforeEach, expect, vi } from 'vitest';
2
+ import { MetadataManager } from '../MetadataManager';
3
+ import fs from 'fs';
4
+ import path from 'path';
5
+ import { sanitizeProjectName } from '../utils.js';
6
+ import { createVectorStore, createDocumentStore, createIndexStore} from '../services/embeddings.js';
7
+ import { IndexStruct } from 'llamaindex';
8
+ import { Client } from 'pg'; // Import the real Postgres client
9
+
10
+
11
+ vi.mock('../MetadataManager');
12
+ vi.mock('fs');
13
+ vi.mock('path');
14
+
15
+ // Mock the embedding module before importing MeaningfullyAPI
16
+ vi.doMock('../api/embedding.js', () => ({
17
+ getIndex: vi.fn(),
18
+ search: vi.fn().mockResolvedValue([{ id: 1, text: 'result' }]),
19
+ createEmbeddings: vi.fn().mockResolvedValue({ success: true, error: null }),
20
+ }));
21
+ vi.doMock('../services/csvLoader.js', () => ({
22
+ loadDocumentsFromCsv: vi.fn().mockResolvedValue([]),
23
+ }));
24
+ import { BaseNode } from 'llamaindex';
25
+
26
+ // Mock BaseNode so that getEmbeddings returns made up numbers
27
+ vi.mock('llamaindex', async (importOriginal) => {
28
+ const actual = await importOriginal();
29
+ return {
30
+ ...actual,
31
+ BaseNode: class extends actual.BaseNode {
32
+ async getEmbeddings() {
33
+ return [0.1, 0.2, 0.3, 0.4];
34
+ }
35
+ generateHash() {
36
+ return 'hash';
37
+ }
38
+ getContent(){
39
+ return "content";
40
+ }
41
+ }
42
+ };
43
+ });
44
+
45
+ // Import MeaningfullyAPI after mocking
46
+ const { MeaningfullyAPI } = await import('../Meaningfully');
47
+
48
+ const FAKE_SETTINGS = {
49
+ openAIKey: 'sk-proj-testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest',
50
+ azureOpenAIKey: 'testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest',
51
+ mistralApiKey: 'testtesttesttesttesttesttesttest',
52
+ geminiApiKey: 'testtesttesttesttesttesttesttesttesttes',
53
+ azureOpenAIApiVersion: "2024-02-01",
54
+ azureOpenAIEndpoint: "https://test.openai.azure.com",
55
+ oLlamaBaseURL: "http://localhost:11434",
56
+ }
57
+
58
+ describe('MeaningfullyAPI', () => {
59
+ let api: MeaningfullyAPI;
60
+ let mockMetadataManager: MetadataManager;
61
+
62
+ beforeEach(() => {
63
+ // @ts-ignore
64
+ mockMetadataManager = new MetadataManager() as MetadataManager;
65
+ vi.spyOn(mockMetadataManager, 'addDocumentSet').mockResolvedValue(1);
66
+ vi.spyOn(mockMetadataManager, 'getSettings').mockResolvedValue(FAKE_SETTINGS);
67
+ vi.spyOn(mockMetadataManager, 'deleteDocumentSet').mockResolvedValue();
68
+ api = new MeaningfullyAPI({
69
+ storagePath: 'mock_storage_path',
70
+ metadataManager: mockMetadataManager,
71
+ });
72
+ });
73
+
74
+ describe('uploadCsv', () => {
75
+ it('should upload a CSV and create embeddings successfully', async () => {
76
+ const mockData = {
77
+ filePath: '/mock/file.csv',
78
+ datasetName: 'testDataset',
79
+ textColumns: ['text'],
80
+ metadataColumns: [],
81
+ splitIntoSentences: true,
82
+ combineSentencesIntoChunks: false,
83
+ sploderMaxSize: 100,
84
+ chunkSize: 512,
85
+ chunkOverlap: 0,
86
+ modelName: 'testModel',
87
+ modelProvider: 'openai',
88
+ description: 'Test dataset',
89
+ };
90
+
91
+ // Mock createEmbeddings for this test
92
+ const createEmbeddingsMock = vi.spyOn(await import('../api/embedding.js'), 'createEmbeddings');
93
+ createEmbeddingsMock.mockResolvedValue({ success: true });
94
+
95
+ const result = await api.uploadCsv(mockData);
96
+
97
+ expect(createEmbeddingsMock).toHaveBeenCalledWith(
98
+ expect.any(String),
99
+ expect.any(String),
100
+ expect.objectContaining({ modelName: 'testModel' }),
101
+ expect.any(Object),
102
+ expect.any(Object)
103
+ );
104
+ expect(result).toEqual({ success: true, documentSetId: 1 });
105
+
106
+ createEmbeddingsMock.mockRestore(); // Restore the original implementation after the test
107
+ });
108
+
109
+ it('should handle errors during embeddings creation', async () => {
110
+ const mockData = {
111
+ filePath: '/mock/file.csv',
112
+ datasetName: 'testDataset',
113
+ textColumns: ['text'],
114
+ metadataColumns: [],
115
+ splitIntoSentences: true,
116
+ combineSentencesIntoChunks: false,
117
+ sploderMaxSize: 100,
118
+ chunkSize: 512,
119
+ chunkOverlap: 0,
120
+ modelName: 'testModel',
121
+ modelProvider: 'openai',
122
+ description: 'Test dataset',
123
+ };
124
+
125
+ // Mock createEmbeddings to simulate an error
126
+ const createEmbeddingsMock = vi.spyOn(await import('../api/embedding.js'), 'createEmbeddings');
127
+ createEmbeddingsMock.mockResolvedValue({ success: false, error: 'Embedding error' });
128
+
129
+ await expect(api.uploadCsv(mockData)).rejects.toThrow('Embedding error');
130
+ expect(mockMetadataManager.deleteDocumentSet).toHaveBeenCalledWith(1);
131
+
132
+ createEmbeddingsMock.mockRestore(); // Restore the original implementation after the test
133
+ });
134
+ });
135
+
136
+ describe('searchDocumentSet', () => {
137
+ it('should search a document set and return results', async () => {
138
+ vi.spyOn(mockMetadataManager, 'getDocumentSet').mockResolvedValue({
139
+ parameters: { modelName: 'testModel', modelProvider: 'openai', vectorStoreType: 'simple' },
140
+ name: 'testDataset',
141
+ documentSetId: 5,
142
+ uploadDate: new Date(),
143
+ totalDocuments: 420
144
+ });
145
+
146
+ const results = await api.searchDocumentSet(1, 'query', 10);
147
+
148
+ expect(results).toEqual([{ id: 1, text: 'result' }]);
149
+ expect(mockMetadataManager.getDocumentSet).toHaveBeenCalledWith(1);
150
+ });
151
+ });
152
+
153
+ describe('deleteDocumentSet', () => {
154
+ it('should delete a document set and associated files', async () => {
155
+ vi.spyOn(mockMetadataManager, 'getDocumentSet').mockResolvedValue({
156
+ parameters: { vectorStoreType: 'simple' },
157
+ name: 'testDataset',
158
+ documentSetId: 1,
159
+ uploadDate: new Date(),
160
+ totalDocuments: 100
161
+ });
162
+
163
+ vi.spyOn(fs, 'rmSync').mockImplementation(() => {});
164
+
165
+ const result = await api.deleteDocumentSet(1);
166
+
167
+ expect(mockMetadataManager.deleteDocumentSet).toHaveBeenCalledWith(1);
168
+ expect(fs.rmSync).toHaveBeenCalledWith(
169
+ path.join('mock_storage_path', 'testDataset'),
170
+ { recursive: true, force: true }
171
+ );
172
+ expect(result).toEqual({ success: true });
173
+ });
174
+ });
175
+
176
+ describe('getMaskedSettings', () => {
177
+ it('should return masked settings', async () => {
178
+ vi.spyOn(mockMetadataManager, 'getSettings').mockResolvedValue(FAKE_SETTINGS);
179
+
180
+ const settings = await api.getMaskedSettings();
181
+
182
+ expect(settings).toEqual({
183
+ openAIKey: 'sk-proj-*******testtest',
184
+ azureOpenAIKey: 'testtest*******testtest',
185
+ mistralApiKey: 'testtest*******testtest',
186
+ geminiApiKey: 'testtest*******ttesttes',
187
+ azureOpenAIApiVersion: "2024-02-01",
188
+ azureOpenAIEndpoint: "https://test.openai.azure.com",
189
+ oLlamaBaseURL: "http://localhost:11434",
190
+ });
191
+ });
192
+ });
193
+ });
194
+
195
+
196
+ describe('MeaningfullyAPI - Store Deletion with Real Implementation', () => {
197
+ let api: MeaningfullyAPI;
198
+ let mockMetadataManager: MetadataManager;
199
+ let realPostgresClient: Client;
200
+
201
+ beforeEach(async () => {
202
+ // @ts-ignore
203
+ mockMetadataManager = new MetadataManager() as MetadataManager;
204
+
205
+ // Initialize a real Postgres client
206
+ realPostgresClient = new Client({
207
+ connectionString: process.env.POSTGRES_CONNECTION_STRING,
208
+ });
209
+ await realPostgresClient.connect();
210
+
211
+ api = new MeaningfullyAPI({
212
+ storagePath: 'mock_storage_path',
213
+ metadataManager: mockMetadataManager,
214
+ postgresClient: realPostgresClient, // Use the real client
215
+ });
216
+ vi.unmock('fs')
217
+ if (!fs.existsSync("mock_storage_path")){
218
+ fs.mkdirSync("mock_storage_path");
219
+ }
220
+ });
221
+
222
+ it('should create and delete a Postgres vector store using real implementation', async () => {
223
+ const projectName = 'test_project';
224
+ const sanitizedProjectName = sanitizeProjectName(projectName);
225
+ const tableName = `vecs_${sanitizedProjectName}`;
226
+
227
+ // Create the vector store using the real implementation
228
+ const vectorStore = await createVectorStore(
229
+ { vectorStoreType: 'postgres', projectName, storagePath: 'mock_storage_path', modelProvider: 'openai', modelName: 'text-embedding-ada-002' },
230
+ FAKE_SETTINGS,
231
+ api.getClients()
232
+ );
233
+ await vectorStore.add([new BaseNode({ id: '1', text: 'test document', embedding: Array(1536).fill(0.01) })]);
234
+
235
+ // Verify the table exists
236
+ const tableExistsQuery = `
237
+ SELECT EXISTS (
238
+ SELECT FROM information_schema.tables
239
+ WHERE table_name = $1
240
+ );
241
+ `;
242
+ const tableExistsResult = await realPostgresClient.query(tableExistsQuery, [tableName]);
243
+ expect(tableExistsResult.rows[0].exists).toBe(true);
244
+
245
+ // Call the delete method
246
+ await api.deletePostgresVectorStore(projectName);
247
+
248
+ // Verify the table no longer exists
249
+ const tableDeletedResult = await realPostgresClient.query(tableExistsQuery, [tableName]);
250
+ expect(tableDeletedResult.rows[0].exists).toBe(false);
251
+ });
252
+
253
+ // it('should delete Simple vector store using real implementation', async () => {
254
+ // const projectName = 'test_project';
255
+ // const sanitizedProjectName = sanitizeProjectName(projectName);
256
+ // const storagePath = 'mock_storage_path';
257
+ // const persistDir = path.join(storagePath, sanitizedProjectName);
258
+
259
+ // // Create the vector store using the real implementation
260
+ // const vectorStore = await createVectorStore(
261
+ // { vectorStoreType: 'simple', projectName, storagePath, modelProvider: "openai", modelName: 'text-embedding-3-small' },
262
+ // FAKE_SETTINGS,
263
+ // api.getClients()
264
+ // );
265
+ // await vectorStore.add([new BaseNode({ id: '1', text: 'test document', embedding: [1,2,3] })]);
266
+ // await vectorStore.persist(path.join(persistDir, 'vector_store.json'));
267
+
268
+ // // Verify the vector store exists
269
+ // expect(fs.existsSync(path.join(persistDir, 'vector_store.json'))).toBe(true);
270
+
271
+ // // Call the delete method
272
+ // await api.deleteSimpleVectorStore(projectName);
273
+
274
+ // // Verify the vector store no longer exists
275
+ // expect(fs.existsSync(path.join(persistDir, 'vector_store.json'))).toBe(false);
276
+ // });
277
+
278
+ // it('should delete Simple document store using real implementation', async () => {
279
+ // const projectName = 'test_project';
280
+ // const sanitizedProjectName = sanitizeProjectName(projectName);
281
+ // const storagePath = 'mock_storage_path';
282
+ // const persistDir = path.join(storagePath, sanitizedProjectName);
283
+
284
+ // // Create the document store using the real implementation
285
+ // const docStore = await createDocumentStore(
286
+ // { vectorStoreType: 'simple', projectName, storagePath },
287
+ // FAKE_SETTINGS,
288
+ // api.getClients()
289
+ // );
290
+ // await docStore.addDocuments([new BaseNode({ id: '1', text: 'test document' })], true);
291
+ // await docStore.persist(path.join(persistDir, 'doc_store.json'));
292
+
293
+ // // Verify the document store exists
294
+ // expect(fs.existsSync(path.join(persistDir, 'doc_store.json'))).toBe(true);
295
+
296
+ // // Call the delete method
297
+ // await api.deleteSimpleDocStore(projectName);
298
+
299
+ // // Verify the document store no longer exists
300
+ // expect(fs.existsSync(path.join(persistDir, 'doc_store.json'))).toBe(false);
301
+ // });
302
+
303
+ // it('should delete Simple index store using real implementation', async () => {
304
+ // const projectName = 'test_project';
305
+ // const sanitizedProjectName = sanitizeProjectName(projectName);
306
+ // const storagePath = 'mock_storage_path';
307
+ // const persistDir = path.join(storagePath, sanitizedProjectName);
308
+
309
+ // // Create the index store using the real implementation
310
+ // const indexStore = await createIndexStore(
311
+ // { vectorStoreType: 'simple', projectName, storagePath },
312
+ // FAKE_SETTINGS,
313
+ // api.getClients()
314
+ // );
315
+ // indexStore.addIndexStruct(new IndexStruct({ summary: 'test document' }));
316
+ // await indexStore.persist(path.join(persistDir, 'index_store.json'));
317
+
318
+ // // Verify the index store exists
319
+ // expect(fs.existsSync(path.join(persistDir, 'index_store.json'))).toBe(true);
320
+
321
+ // // Call the delete method
322
+ // await api.deleteSimpleIndexStore(projectName);
323
+
324
+ // // Verify the index store no longer exists
325
+ // expect(fs.existsSync(path.join(persistDir, 'index_store.json'))).toBe(false);
326
+ // });
327
+ });
@@ -2,7 +2,7 @@
2
2
  import { describe, it, expect, vi, beforeEach } from 'vitest';
3
3
  import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../embedding.js';
4
4
  import { loadDocumentsFromCsv } from '../../services/csvLoader.js';
5
- import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getExistingDocStore } from '../../services/embeddings.js';
5
+ import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getStorageContext } from '../../services/embeddings.js';
6
6
  import { MetadataMode } from 'llamaindex';
7
7
 
8
8
  // filepath: /Users/jeremybmerrill/code/meaningfully/src/main/api/embedding.test.ts
@@ -82,9 +82,9 @@ describe('embedding.ts', () => {
82
82
  describe('getDocStore', () => {
83
83
  it('should return existing doc store', async () => {
84
84
  const mockDocStore = 'docStore';
85
- getExistingDocStore.mockResolvedValue(mockDocStore);
85
+ getStorageContext.mockResolvedValue({ docStore: mockDocStore });
86
86
 
87
- const result = await getDocStore({});
87
+ const result = await getDocStore({}, {}, {});
88
88
 
89
89
  expect(result).toBe(mockDocStore);
90
90
  });
@@ -173,7 +173,7 @@ describe('embedding.ts', () => {
173
173
  describe('getDocStore', () => {
174
174
  it('should return existing doc store', async () => {
175
175
  const mockDocStore = 'docStore';
176
- getExistingDocStore.mockResolvedValue(mockDocStore);
176
+ getStorageContext.mockResolvedValue({ docStore: mockDocStore });
177
177
 
178
178
  const result = await getDocStore({});
179
179
 
@@ -1,4 +1,4 @@
1
- import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments, getExistingDocStore } from "../services/embeddings.js";
1
+ import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments, getStorageContext } from "../services/embeddings.js";
2
2
  import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types/index.js";
3
3
  import { loadDocumentsFromCsv } from "../services/csvLoader.js";
4
4
  import { MetadataMode } from "llamaindex";
@@ -97,8 +97,8 @@ export async function previewResults(
97
97
  }
98
98
  }
99
99
 
100
- export async function getDocStore(config: EmbeddingConfig) {
101
- return await getExistingDocStore(config);
100
+ export async function getDocStore(config: EmbeddingConfig, settings: Settings, clients: Clients) {
101
+ return (await getStorageContext(config, settings, clients)).docStore;
102
102
  }
103
103
 
104
104
  export async function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
@@ -12,7 +12,6 @@ export declare function estimateCost(nodes: TextNode[], modelName: string): {
12
12
  pricePer1M: number;
13
13
  };
14
14
  export declare function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<VectorStoreIndex>;
15
- export declare function getExistingDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
16
15
  export declare function transformDocumentsToNodes(documents: Document[], config: EmbeddingConfig): Promise<TextNode<import("llamaindex").Metadata>[]>;
17
16
  export declare function getEmbedModel(config: EmbeddingConfig, settings: Settings): MockEmbedding | OpenAIEmbedding | OllamaEmbedding | MistralAIEmbedding | GeminiEmbedding;
18
17
  export declare function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext>;
@@ -11,12 +11,15 @@ import {
11
11
  SimpleVectorStore,
12
12
  type StorageContext,
13
13
  Settings as LlamaindexSettings,
14
- SimpleDocumentStore
14
+ SimpleDocumentStore,
15
+ BaseDocumentStore,
16
+ BaseIndexStore,
17
+ SimpleIndexStore
15
18
  } from "llamaindex";
16
19
  import { OllamaEmbedding} from '@llamaindex/ollama'
17
20
  import { MistralAIEmbedding, MistralAIEmbeddingModelType } from '@llamaindex/mistral'
18
21
  import { GeminiEmbedding } from '@llamaindex/google'
19
- import { PGVectorStore } from '@llamaindex/postgres';
22
+ import { PGVectorStore, PostgresDocumentStore, PostgresIndexStore } from '@llamaindex/postgres';
20
23
  import { AzureOpenAIEmbedding } from "@llamaindex/azure";
21
24
  import { Sploder } from "./sploder.js";
22
25
  import { CustomSentenceSplitter } from "./sentenceSplitter.js";
@@ -97,79 +100,14 @@ export function estimateCost(nodes: TextNode[], modelName: string): {
97
100
  }
98
101
 
99
102
  export async function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
100
- const embedModel = getEmbedModel(config, settings);
101
- switch (config.vectorStoreType) {
102
- case "simple":
103
- const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
104
- const storageContext = await storageContextFromDefaults({
105
- persistDir: persistDir,
106
- });
107
- let vsi = await VectorStoreIndex.init({
108
- storageContext: storageContext,
109
- });
110
- vsi.embedModel = embedModel;
111
- return vsi;
112
-
113
- case "postgres":
114
- if (!clients.postgresClient) {
115
- throw new Error("Postgres client required but not provided");
116
- }
117
- const pgStore = new PGVectorStore({
118
- clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
119
- tableName: sanitizeProjectName(config.projectName),
120
- dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
121
- embeddingModel: embedModel
122
- });
123
- const pgStorageContext = await storageContextFromDefaults({
124
- vectorStores: { [ModalityType.TEXT]: pgStore },
125
- });
126
- return await VectorStoreIndex.init({
127
- storageContext: pgStorageContext,
128
- });
129
- case "weaviate":
130
- if (!clients.weaviateClient) {
131
- throw new Error("Weaviate client required but not provided");
132
- }
133
- const weaviateStore = new BatchingWeaviateVectorStore({
134
- indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
135
- weaviateClient: clients.weaviateClient,
136
- embeddingModel: embedModel
137
- });
138
-
139
- // WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
140
- // (WeaviateVectorStore would get `score` if we were doing hybrid search)
141
- // Overwrite the private getNodeSimilarity method to use 'score' from metadata
142
- // @ts-ignore
143
- weaviateStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
144
- return entry.metadata.score;
145
- }
146
-
147
- return await VectorStoreIndex.fromVectorStore(weaviateStore)
148
-
149
- default:
150
- throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
103
+ const storageContext = await getStorageContext(config, settings, clients);
104
+ const vectorStore = storageContext.vectorStores[ModalityType.TEXT];
105
+ if (!vectorStore) {
106
+ throw new Error("Vector store for ModalityType.TEXT is undefined");
151
107
  }
108
+ return await VectorStoreIndex.fromVectorStore(vectorStore);
152
109
  }
153
110
 
154
- export async function getExistingDocStore(config: EmbeddingConfig) {
155
- // switch (config.vectorStoreType) {
156
- // case "simple":
157
- const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
158
- const storageContext = await storageContextFromDefaults({
159
- persistDir: persistDir,
160
- });
161
- return storageContext.docStore;
162
-
163
- // case "postgres":
164
- // throw new Error(`Not yet implemented vector store type: ${config.vectorStoreType}`);
165
- // // return await createVectorStore(config);
166
- // default:
167
- // throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
168
- // }
169
- }
170
-
171
-
172
-
173
111
  export async function transformDocumentsToNodes(
174
112
  documents: Document[],
175
113
  config: EmbeddingConfig,
@@ -249,12 +187,15 @@ export function getEmbedModel(
249
187
 
250
188
  export async function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext> {
251
189
  const vectorStore = await createVectorStore(config, settings, clients);
190
+ const docStore = await createDocumentStore(config, settings, clients); // new SimpleDocumentStore()
191
+ const indexStore = await createIndexStore(config, settings, clients);
252
192
  fs.mkdirSync(config.storagePath, { recursive: true });
253
193
  const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
254
194
  return await storageContextFromDefaults({
255
195
  persistDir: persistDir,
256
196
  vectorStores: {[ModalityType.TEXT]: vectorStore},
257
- docStore: new SimpleDocumentStore()
197
+ docStore: docStore,
198
+ indexStore: indexStore
258
199
  /*
259
200
  if docStore is created with a persist path (as it is by default in storageContextFromDefaults)
260
201
  then it will write to disk after every put(), which happens 2+ times per document.
@@ -273,8 +214,13 @@ export async function persistDocuments(documents: Document[], config: EmbeddingC
273
214
 
274
215
  // see comments in getStorageContext
275
216
  const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
276
- // @ts-ignore
277
- await (storageContext.docStore as SimpleDocumentStore).kvStore.persist(join(persistDir, "doc_store.json"));
217
+ if (storageContext.docStore instanceof SimpleDocumentStore) {
218
+ // @ts-ignore
219
+ await (storageContext.docStore as SimpleDocumentStore).kvStore.persist(join(persistDir, "doc_store.json"));
220
+ }else if (storageContext.docStore instanceof PostgresDocumentStore) {
221
+ // PostgresDocumentStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
222
+ console.log("Pretending to persist Postgres document store, but it actually persists automatically.");
223
+ }
278
224
 
279
225
  console.timeEnd("persistDocuments Run Time");
280
226
  }
@@ -303,11 +249,11 @@ export async function persistNodes(nodes: TextNode[], config: EmbeddingConfig, s
303
249
  // all the if statements are just type-checking boilerplate.
304
250
  // N.B. WeaviateVectorStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
305
251
  if (vectorStore) {
306
- if (vectorStore instanceof PGVectorStore || vectorStore instanceof SimpleVectorStore) {
252
+ if (vectorStore instanceof SimpleVectorStore) {
307
253
  await vectorStore.persist(join(config.storagePath, sanitizeProjectName(config.projectName), "vector_store.json"));
308
- } else if (vectorStore instanceof BatchingWeaviateVectorStore) {
254
+ } else if (vectorStore instanceof PGVectorStore || vectorStore instanceof BatchingWeaviateVectorStore) {
309
255
  // WeaviateVectorStore does not have a persist method, it persists automatically
310
- console.log("Pretending to persist Weaviate vector store, but it actually persists automatically.");
256
+ console.log("Pretending to persist Weaviate or Postgres vector store, but it actually persists automatically.");
311
257
  } else {
312
258
  throw new Error("Vector store does not support persist method");
313
259
  }
@@ -318,7 +264,8 @@ export async function persistNodes(nodes: TextNode[], config: EmbeddingConfig, s
318
264
  return index;
319
265
  }
320
266
 
321
- async function createVectorStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<PGVectorStore | SimpleVectorStore | BatchingWeaviateVectorStore> {
267
+ // exported only for tests
268
+ export async function createVectorStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<PGVectorStore | SimpleVectorStore | BatchingWeaviateVectorStore> {
322
269
  const embeddingModel = getEmbedModel(config, settings);
323
270
  switch (config.vectorStoreType) {
324
271
 
@@ -326,8 +273,8 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
326
273
  // otherwise it defaults to Ada.
327
274
  case "postgres":
328
275
  return new PGVectorStore({
329
- clientConfig: {connectionString: process.env.POSTGRES_CONNECTION_STRING},
330
- tableName: sanitizeProjectName(config.projectName),
276
+ client: clients.postgresClient,
277
+ tableName: "vecs_" + sanitizeProjectName(config.projectName),
331
278
  dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
332
279
  embeddingModel: embeddingModel
333
280
  });
@@ -357,6 +304,40 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
357
304
  }
358
305
  }
359
306
 
307
+ // exported only for tests
308
+ export async function createDocumentStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseDocumentStore> {
309
+ // we create the doc store without a persist path, so it doesn't write to disk after every put()
310
+ switch (config.documentStoreType || config.vectorStoreType) {
311
+ case "postgres":
312
+ return new PostgresDocumentStore({
313
+ client: clients.postgresClient,
314
+ tableName: "docs_" + sanitizeProjectName(config.projectName),
315
+ });
316
+ case "simple":
317
+ case "weaviate":
318
+ return new SimpleDocumentStore();
319
+ default:
320
+ throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
321
+ }
322
+ }
323
+
324
+ // exported only for tests
325
+ export async function createIndexStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseIndexStore> {
326
+ switch (config.documentStoreType || config.vectorStoreType) {
327
+ case "postgres":
328
+ return new PostgresIndexStore({
329
+ client: clients.postgresClient,
330
+ tableName: "idx_" + sanitizeProjectName(config.projectName),
331
+ });
332
+ case "simple":
333
+ case "weaviate":
334
+ return new SimpleIndexStore();
335
+ default:
336
+ throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
337
+ }
338
+
339
+ }
340
+
360
341
  export async function searchDocuments(
361
342
  index: VectorStoreIndex,
362
343
  query: string,
@@ -57,6 +57,8 @@ export interface EmbeddingConfig {
57
57
  modelName: string;
58
58
  modelProvider: string
59
59
  vectorStoreType: "simple" | "postgres" | "weaviate";
60
+ documentStoreType?: "simple" | "postgres";
61
+ indexStoreType?: "simple" | "postgres";
60
62
  projectName: string;
61
63
  storagePath: string;
62
64
  splitIntoSentences: boolean;