@meaningfully/core 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Meaningfully.d.ts +3 -2
- package/dist/Meaningfully.d.ts.map +1 -1
- package/dist/Meaningfully.js +8 -4
- package/dist/Meaningfully.js.map +1 -1
- package/dist/api/embedding.d.ts +1 -1
- package/dist/api/embedding.d.ts.map +1 -1
- package/dist/api/embedding.js +3 -3
- package/dist/api/embedding.js.map +1 -1
- package/dist/services/embeddings.d.ts +0 -1
- package/dist/services/embeddings.d.ts.map +1 -1
- package/dist/services/embeddings.js +51 -72
- package/dist/services/embeddings.js.map +1 -1
- package/dist/types/index.d.ts +2 -0
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +5 -2
- package/src/Meaningfully.ts +9 -5
- package/{tests → src/__tests__}/MetadataManager.test.ts +1 -1
- package/{tests → src/api/__tests__}/embedding.test.ts +20 -20
- package/src/api/embedding.ts +3 -3
- package/{tests → src/services/__tests__}/csvLoader.test.ts +1 -1
- package/{tests → src/services/__tests__}/embeddings.test.ts +2 -2
- package/{tests → src/services/__tests__}/sentenceSplitter.test.ts +1 -1
- package/src/services/embeddings.d.ts +0 -1
- package/src/services/embeddings.ts +57 -81
- package/src/types/index.ts +2 -0
- package/tsconfig.json +1 -1
- package/dist/api/embedding.test.d.ts +0 -2
- package/dist/api/embedding.test.d.ts.map +0 -1
- package/dist/api/embedding.test.js +0 -340
- package/dist/api/embedding.test.js.map +0 -1
- package/dist/services/csvLoader.test.d.ts +0 -2
- package/dist/services/csvLoader.test.d.ts.map +0 -1
- package/dist/services/csvLoader.test.js +0 -75
- package/dist/services/csvLoader.test.js.map +0 -1
- package/dist/services/embeddings.test.d.ts +0 -2
- package/dist/services/embeddings.test.d.ts.map +0 -1
- package/dist/services/embeddings.test.js +0 -115
- package/dist/services/embeddings.test.js.map +0 -1
- package/dist/services/sentenceSplitter.test.d.ts +0 -2
- package/dist/services/sentenceSplitter.test.d.ts.map +0 -1
- package/dist/services/sentenceSplitter.test.js +0 -68
- package/dist/services/sentenceSplitter.test.js.map +0 -1
- package/src/api/embedding.d.ts +0 -6
- package/tests/csvLoader.test.d.ts +0 -1
- package/tests/embedding.test.d.ts +0 -1
- package/tests/embeddings.test.d.ts +0 -1
- package/tests/sentenceSplitter.test.d.ts +0 -1
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
//@ts-nocheck
|
|
2
2
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
3
|
-
import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../
|
|
4
|
-
import { loadDocumentsFromCsv } from '
|
|
5
|
-
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes,
|
|
3
|
+
import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../embedding.js';
|
|
4
|
+
import { loadDocumentsFromCsv } from '../../services/csvLoader.js';
|
|
5
|
+
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getStorageContext } from '../../services/embeddings.js';
|
|
6
6
|
import { MetadataMode } from 'llamaindex';
|
|
7
7
|
|
|
8
8
|
// filepath: /Users/jeremybmerrill/code/meaningfully/src/main/api/embedding.test.ts
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
vi.mock('
|
|
12
|
-
vi.mock('
|
|
11
|
+
vi.mock('../../services/csvLoader');
|
|
12
|
+
vi.mock('../../services/embeddings');
|
|
13
13
|
|
|
14
14
|
describe('embedding.ts', () => {
|
|
15
15
|
describe('createEmbeddings', () => {
|
|
@@ -82,9 +82,9 @@ describe('embedding.ts', () => {
|
|
|
82
82
|
describe('getDocStore', () => {
|
|
83
83
|
it('should return existing doc store', async () => {
|
|
84
84
|
const mockDocStore = 'docStore';
|
|
85
|
-
|
|
85
|
+
getStorageContext.mockResolvedValue({ docStore: mockDocStore });
|
|
86
86
|
|
|
87
|
-
const result = await getDocStore({});
|
|
87
|
+
const result = await getDocStore({}, {}, {});
|
|
88
88
|
|
|
89
89
|
expect(result).toBe(mockDocStore);
|
|
90
90
|
});
|
|
@@ -173,7 +173,7 @@ describe('embedding.ts', () => {
|
|
|
173
173
|
describe('getDocStore', () => {
|
|
174
174
|
it('should return existing doc store', async () => {
|
|
175
175
|
const mockDocStore = 'docStore';
|
|
176
|
-
|
|
176
|
+
getStorageContext.mockResolvedValue({ docStore: mockDocStore });
|
|
177
177
|
|
|
178
178
|
const result = await getDocStore({});
|
|
179
179
|
|
|
@@ -262,7 +262,7 @@ describe('embedding.ts', () => {
|
|
|
262
262
|
|
|
263
263
|
it('should correctly track progress through ProgressManager', async () => {
|
|
264
264
|
// Setup
|
|
265
|
-
vi.mock('
|
|
265
|
+
vi.mock('../../services/progressManager', () => {
|
|
266
266
|
const mockInstance = {
|
|
267
267
|
startOperation: vi.fn(),
|
|
268
268
|
updateProgress: vi.fn(),
|
|
@@ -278,8 +278,8 @@ describe('embedding.ts', () => {
|
|
|
278
278
|
});
|
|
279
279
|
|
|
280
280
|
// Re-import to use mocked version
|
|
281
|
-
const { createEmbeddings } = await import('../
|
|
282
|
-
const { ProgressManager } = await import('
|
|
281
|
+
const { createEmbeddings } = await import('../embedding.js');
|
|
282
|
+
const { ProgressManager } = await import('../../services/progressManager.js');
|
|
283
283
|
|
|
284
284
|
const mockDocuments = [{ text: 'doc1' }, { text: 'doc2' }];
|
|
285
285
|
const mockNodes = [{ text: 'node1', metadata: {} }];
|
|
@@ -305,7 +305,7 @@ describe('embedding.ts', () => {
|
|
|
305
305
|
|
|
306
306
|
it('should properly calculate percentage in progress callback', async () => {
|
|
307
307
|
// Setup mocks with spy on updateProgress
|
|
308
|
-
vi.mock('
|
|
308
|
+
vi.mock('../../services/progressManager', () => {
|
|
309
309
|
const mockInstance = {
|
|
310
310
|
startOperation: vi.fn(),
|
|
311
311
|
updateProgress: vi.fn(),
|
|
@@ -321,8 +321,8 @@ describe('embedding.ts', () => {
|
|
|
321
321
|
});
|
|
322
322
|
|
|
323
323
|
// Re-import to use mocked version
|
|
324
|
-
const { createEmbeddings } = await import('../
|
|
325
|
-
const { ProgressManager } = await import('
|
|
324
|
+
const { createEmbeddings } = await import('../embedding.js');
|
|
325
|
+
const { ProgressManager } = await import('../../services/progressManager.js');
|
|
326
326
|
|
|
327
327
|
const mockDocuments = [{ text: 'doc1' }];
|
|
328
328
|
const mockNodes = [{ text: 'node1', metadata: {} }];
|
|
@@ -355,7 +355,7 @@ describe('embedding.ts', () => {
|
|
|
355
355
|
|
|
356
356
|
it('should clear operation on empty documents', async () => {
|
|
357
357
|
// Setup
|
|
358
|
-
vi.mock('
|
|
358
|
+
vi.mock('../../services/progressManager', () => {
|
|
359
359
|
const mockInstance = {
|
|
360
360
|
startOperation: vi.fn(),
|
|
361
361
|
updateProgress: vi.fn(),
|
|
@@ -371,8 +371,8 @@ describe('embedding.ts', () => {
|
|
|
371
371
|
});
|
|
372
372
|
|
|
373
373
|
// Re-import to use mocked version
|
|
374
|
-
const { createEmbeddings } = await import('../
|
|
375
|
-
const { ProgressManager } = await import('
|
|
374
|
+
const { createEmbeddings } = await import('../embedding.js');
|
|
375
|
+
const { ProgressManager } = await import('../../services/progressManager.js');
|
|
376
376
|
|
|
377
377
|
loadDocumentsFromCsv.mockResolvedValue([]);
|
|
378
378
|
|
|
@@ -390,7 +390,7 @@ describe('embedding.ts', () => {
|
|
|
390
390
|
|
|
391
391
|
it('shoulde complete operation on successful embedding', async () => {
|
|
392
392
|
// Setup
|
|
393
|
-
vi.mock('
|
|
393
|
+
vi.mock('../../services/progressManager', () => {
|
|
394
394
|
const mockInstance = {
|
|
395
395
|
startOperation: vi.fn(),
|
|
396
396
|
updateProgress: vi.fn(),
|
|
@@ -406,8 +406,8 @@ describe('embedding.ts', () => {
|
|
|
406
406
|
});
|
|
407
407
|
|
|
408
408
|
// Re-import to use mocked version
|
|
409
|
-
const { createEmbeddings } = await import('../
|
|
410
|
-
const { ProgressManager } = await import('
|
|
409
|
+
const { createEmbeddings } = await import('../embedding.js');
|
|
410
|
+
const { ProgressManager } = await import('../../services/progressManager.js');
|
|
411
411
|
|
|
412
412
|
const mockDocuments = [{ text: 'doc1' }];
|
|
413
413
|
const mockNodes = [{ text: 'node1', metadata: {} }];
|
package/src/api/embedding.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments,
|
|
1
|
+
import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments, getStorageContext } from "../services/embeddings.js";
|
|
2
2
|
import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types/index.js";
|
|
3
3
|
import { loadDocumentsFromCsv } from "../services/csvLoader.js";
|
|
4
4
|
import { MetadataMode } from "llamaindex";
|
|
@@ -97,8 +97,8 @@ export async function previewResults(
|
|
|
97
97
|
}
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
export async function getDocStore(config: EmbeddingConfig) {
|
|
101
|
-
return await
|
|
100
|
+
export async function getDocStore(config: EmbeddingConfig, settings: Settings, clients: Clients) {
|
|
101
|
+
return (await getStorageContext(config, settings, clients)).docStore;
|
|
102
102
|
}
|
|
103
103
|
|
|
104
104
|
export async function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
//@ts-nocheck
|
|
2
2
|
import { describe, it, expect, vi } from 'vitest';
|
|
3
3
|
import { readFileSync } from 'fs';
|
|
4
|
-
import { loadDocumentsFromCsv } from '../
|
|
4
|
+
import { loadDocumentsFromCsv } from '../csvLoader.js';
|
|
5
5
|
import { Document } from 'llamaindex';
|
|
6
6
|
import Papa from 'papaparse';
|
|
7
7
|
|
|
@@ -4,7 +4,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
|
4
4
|
import { Document, TextNode } from 'llamaindex';
|
|
5
5
|
|
|
6
6
|
// First, set up the mock before importing the module
|
|
7
|
-
vi.mock(import("../
|
|
7
|
+
vi.mock(import("../embeddings.js"), async (importOriginal) => {
|
|
8
8
|
const actual = await importOriginal()
|
|
9
9
|
return {
|
|
10
10
|
...actual,
|
|
@@ -19,7 +19,7 @@ vi.mock(import("../src/services/embeddings.js"), async (importOriginal) => {
|
|
|
19
19
|
})
|
|
20
20
|
|
|
21
21
|
// Now import the mocked functions
|
|
22
|
-
import { transformDocumentsToNodes, getEmbedModel } from '../
|
|
22
|
+
import { transformDocumentsToNodes, getEmbedModel } from '../embeddings.js';
|
|
23
23
|
|
|
24
24
|
describe('transformDocumentsToNodes', () => {
|
|
25
25
|
beforeEach(() => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
//@ts-nocheck
|
|
2
2
|
import { expect, test } from 'vitest'
|
|
3
|
-
import { CustomSentenceSplitter } from '../
|
|
3
|
+
import { CustomSentenceSplitter } from '../sentenceSplitter.js'
|
|
4
4
|
import { SentenceSplitter, IngestionPipeline, Document } from "llamaindex";
|
|
5
5
|
|
|
6
6
|
// do these tests just to make sure that we can factor out my hacky fixes when llamaindex is fixed.
|
|
@@ -12,7 +12,6 @@ export declare function estimateCost(nodes: TextNode[], modelName: string): {
|
|
|
12
12
|
pricePer1M: number;
|
|
13
13
|
};
|
|
14
14
|
export declare function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<VectorStoreIndex>;
|
|
15
|
-
export declare function getExistingDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
|
|
16
15
|
export declare function transformDocumentsToNodes(documents: Document[], config: EmbeddingConfig): Promise<TextNode<import("llamaindex").Metadata>[]>;
|
|
17
16
|
export declare function getEmbedModel(config: EmbeddingConfig, settings: Settings): MockEmbedding | OpenAIEmbedding | OllamaEmbedding | MistralAIEmbedding | GeminiEmbedding;
|
|
18
17
|
export declare function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext>;
|
|
@@ -11,12 +11,15 @@ import {
|
|
|
11
11
|
SimpleVectorStore,
|
|
12
12
|
type StorageContext,
|
|
13
13
|
Settings as LlamaindexSettings,
|
|
14
|
-
SimpleDocumentStore
|
|
14
|
+
SimpleDocumentStore,
|
|
15
|
+
BaseDocumentStore,
|
|
16
|
+
BaseIndexStore,
|
|
17
|
+
SimpleIndexStore
|
|
15
18
|
} from "llamaindex";
|
|
16
19
|
import { OllamaEmbedding} from '@llamaindex/ollama'
|
|
17
20
|
import { MistralAIEmbedding, MistralAIEmbeddingModelType } from '@llamaindex/mistral'
|
|
18
21
|
import { GeminiEmbedding } from '@llamaindex/google'
|
|
19
|
-
import { PGVectorStore } from '@llamaindex/postgres';
|
|
22
|
+
import { PGVectorStore, PostgresDocumentStore, PostgresIndexStore } from '@llamaindex/postgres';
|
|
20
23
|
import { AzureOpenAIEmbedding } from "@llamaindex/azure";
|
|
21
24
|
import { Sploder } from "./sploder.js";
|
|
22
25
|
import { CustomSentenceSplitter } from "./sentenceSplitter.js";
|
|
@@ -97,79 +100,12 @@ export function estimateCost(nodes: TextNode[], modelName: string): {
|
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
export async function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const storageContext = await storageContextFromDefaults({
|
|
105
|
-
persistDir: persistDir,
|
|
106
|
-
});
|
|
107
|
-
let vsi = await VectorStoreIndex.init({
|
|
108
|
-
storageContext: storageContext,
|
|
109
|
-
});
|
|
110
|
-
vsi.embedModel = embedModel;
|
|
111
|
-
return vsi;
|
|
112
|
-
|
|
113
|
-
case "postgres":
|
|
114
|
-
if (!clients.postgresClient) {
|
|
115
|
-
throw new Error("Postgres client required but not provided");
|
|
116
|
-
}
|
|
117
|
-
const pgStore = new PGVectorStore({
|
|
118
|
-
clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
|
|
119
|
-
tableName: sanitizeProjectName(config.projectName),
|
|
120
|
-
dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
|
|
121
|
-
embeddingModel: embedModel
|
|
122
|
-
});
|
|
123
|
-
const pgStorageContext = await storageContextFromDefaults({
|
|
124
|
-
vectorStores: { [ModalityType.TEXT]: pgStore },
|
|
125
|
-
});
|
|
126
|
-
return await VectorStoreIndex.init({
|
|
127
|
-
storageContext: pgStorageContext,
|
|
128
|
-
});
|
|
129
|
-
case "weaviate":
|
|
130
|
-
if (!clients.weaviateClient) {
|
|
131
|
-
throw new Error("Weaviate client required but not provided");
|
|
132
|
-
}
|
|
133
|
-
const weaviateStore = new BatchingWeaviateVectorStore({
|
|
134
|
-
indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
|
|
135
|
-
weaviateClient: clients.weaviateClient,
|
|
136
|
-
embeddingModel: embedModel
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
// WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
|
|
140
|
-
// (WeaviateVectorStore would get `score` if we were doing hybrid search)
|
|
141
|
-
// Overwrite the private getNodeSimilarity method to use 'score' from metadata
|
|
142
|
-
// @ts-ignore
|
|
143
|
-
weaviateStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
|
|
144
|
-
return entry.metadata.score;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
return await VectorStoreIndex.fromVectorStore(weaviateStore)
|
|
148
|
-
|
|
149
|
-
default:
|
|
150
|
-
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
export async function getExistingDocStore(config: EmbeddingConfig) {
|
|
155
|
-
// switch (config.vectorStoreType) {
|
|
156
|
-
// case "simple":
|
|
157
|
-
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
|
|
158
|
-
const storageContext = await storageContextFromDefaults({
|
|
159
|
-
persistDir: persistDir,
|
|
160
|
-
});
|
|
161
|
-
return storageContext.docStore;
|
|
162
|
-
|
|
163
|
-
// case "postgres":
|
|
164
|
-
// throw new Error(`Not yet implemented vector store type: ${config.vectorStoreType}`);
|
|
165
|
-
// // return await createVectorStore(config);
|
|
166
|
-
// default:
|
|
167
|
-
// throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
168
|
-
// }
|
|
103
|
+
const storageContext = await getStorageContext(config, settings, clients);
|
|
104
|
+
return await VectorStoreIndex.init({
|
|
105
|
+
storageContext: storageContext,
|
|
106
|
+
});
|
|
169
107
|
}
|
|
170
108
|
|
|
171
|
-
|
|
172
|
-
|
|
173
109
|
export async function transformDocumentsToNodes(
|
|
174
110
|
documents: Document[],
|
|
175
111
|
config: EmbeddingConfig,
|
|
@@ -249,12 +185,15 @@ export function getEmbedModel(
|
|
|
249
185
|
|
|
250
186
|
export async function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext> {
|
|
251
187
|
const vectorStore = await createVectorStore(config, settings, clients);
|
|
188
|
+
const docStore = await createDocumentStore(config, settings, clients); // new SimpleDocumentStore()
|
|
189
|
+
const indexStore = await createIndexStore(config, settings, clients);
|
|
252
190
|
fs.mkdirSync(config.storagePath, { recursive: true });
|
|
253
191
|
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
|
|
254
192
|
return await storageContextFromDefaults({
|
|
255
193
|
persistDir: persistDir,
|
|
256
194
|
vectorStores: {[ModalityType.TEXT]: vectorStore},
|
|
257
|
-
docStore:
|
|
195
|
+
docStore: docStore,
|
|
196
|
+
indexStore: indexStore
|
|
258
197
|
/*
|
|
259
198
|
if docStore is created with a persist path (as it is by default in storageContextFromDefaults)
|
|
260
199
|
then it will write to disk after every put(), which happens 2+ times per document.
|
|
@@ -273,8 +212,13 @@ export async function persistDocuments(documents: Document[], config: EmbeddingC
|
|
|
273
212
|
|
|
274
213
|
// see comments in getStorageContext
|
|
275
214
|
const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
|
|
276
|
-
|
|
277
|
-
|
|
215
|
+
if (storageContext.docStore instanceof SimpleDocumentStore) {
|
|
216
|
+
// @ts-ignore
|
|
217
|
+
await (storageContext.docStore as SimpleDocumentStore).kvStore.persist(join(persistDir, "doc_store.json"));
|
|
218
|
+
}else if (storageContext.docStore instanceof PostgresDocumentStore) {
|
|
219
|
+
// PostgresDocumentStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
|
|
220
|
+
console.log("Pretending to persist Postgres document store, but it actually persists automatically.");
|
|
221
|
+
}
|
|
278
222
|
|
|
279
223
|
console.timeEnd("persistDocuments Run Time");
|
|
280
224
|
}
|
|
@@ -303,11 +247,11 @@ export async function persistNodes(nodes: TextNode[], config: EmbeddingConfig, s
|
|
|
303
247
|
// all the if statements are just type-checking boilerplate.
|
|
304
248
|
// N.B. WeaviateVectorStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
|
|
305
249
|
if (vectorStore) {
|
|
306
|
-
if (vectorStore instanceof
|
|
250
|
+
if (vectorStore instanceof SimpleVectorStore) {
|
|
307
251
|
await vectorStore.persist(join(config.storagePath, sanitizeProjectName(config.projectName), "vector_store.json"));
|
|
308
|
-
} else if (vectorStore instanceof BatchingWeaviateVectorStore) {
|
|
252
|
+
} else if (vectorStore instanceof PGVectorStore || vectorStore instanceof BatchingWeaviateVectorStore) {
|
|
309
253
|
// WeaviateVectorStore does not have a persist method, it persists automatically
|
|
310
|
-
console.log("Pretending to persist Weaviate vector store, but it actually persists automatically.");
|
|
254
|
+
console.log("Pretending to persist Weaviate or Postgres vector store, but it actually persists automatically.");
|
|
311
255
|
} else {
|
|
312
256
|
throw new Error("Vector store does not support persist method");
|
|
313
257
|
}
|
|
@@ -326,8 +270,8 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
|
|
|
326
270
|
// otherwise it defaults to Ada.
|
|
327
271
|
case "postgres":
|
|
328
272
|
return new PGVectorStore({
|
|
329
|
-
|
|
330
|
-
tableName: sanitizeProjectName(config.projectName),
|
|
273
|
+
client: clients.postgresClient,
|
|
274
|
+
tableName: "vecs_" + sanitizeProjectName(config.projectName),
|
|
331
275
|
dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
|
|
332
276
|
embeddingModel: embeddingModel
|
|
333
277
|
});
|
|
@@ -357,6 +301,38 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
|
|
|
357
301
|
}
|
|
358
302
|
}
|
|
359
303
|
|
|
304
|
+
async function createDocumentStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseDocumentStore> {
|
|
305
|
+
// we create the doc store without a persist path, so it doesn't write to disk after every put()
|
|
306
|
+
switch (config.documentStoreType || config.vectorStoreType) {
|
|
307
|
+
case "postgres":
|
|
308
|
+
return new PostgresDocumentStore({
|
|
309
|
+
client: clients.postgresClient,
|
|
310
|
+
tableName: "docs_" + sanitizeProjectName(config.projectName),
|
|
311
|
+
});
|
|
312
|
+
case "simple":
|
|
313
|
+
case "weaviate":
|
|
314
|
+
return new SimpleDocumentStore();
|
|
315
|
+
default:
|
|
316
|
+
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
async function createIndexStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseIndexStore> {
|
|
321
|
+
switch (config.documentStoreType || config.vectorStoreType) {
|
|
322
|
+
case "postgres":
|
|
323
|
+
return new PostgresIndexStore({
|
|
324
|
+
client: clients.postgresClient,
|
|
325
|
+
tableName: "idx_" + sanitizeProjectName(config.projectName),
|
|
326
|
+
});
|
|
327
|
+
case "simple":
|
|
328
|
+
case "weaviate":
|
|
329
|
+
return new SimpleIndexStore();
|
|
330
|
+
default:
|
|
331
|
+
throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
}
|
|
335
|
+
|
|
360
336
|
export async function searchDocuments(
|
|
361
337
|
index: VectorStoreIndex,
|
|
362
338
|
query: string,
|
package/src/types/index.ts
CHANGED
|
@@ -57,6 +57,8 @@ export interface EmbeddingConfig {
|
|
|
57
57
|
modelName: string;
|
|
58
58
|
modelProvider: string
|
|
59
59
|
vectorStoreType: "simple" | "postgres" | "weaviate";
|
|
60
|
+
documentStoreType?: "simple" | "postgres";
|
|
61
|
+
indexStoreType?: "simple" | "postgres";
|
|
60
62
|
projectName: string;
|
|
61
63
|
storagePath: string;
|
|
62
64
|
splitIntoSentences: boolean;
|
package/tsconfig.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding.test.d.ts","sourceRoot":"","sources":["../../src/api/embedding.test.ts"],"names":[],"mappings":""}
|