@meaningfully/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/Meaningfully.d.ts +3 -2
  2. package/dist/Meaningfully.d.ts.map +1 -1
  3. package/dist/Meaningfully.js +8 -4
  4. package/dist/Meaningfully.js.map +1 -1
  5. package/dist/api/embedding.d.ts +1 -1
  6. package/dist/api/embedding.d.ts.map +1 -1
  7. package/dist/api/embedding.js +3 -3
  8. package/dist/api/embedding.js.map +1 -1
  9. package/dist/services/embeddings.d.ts +0 -1
  10. package/dist/services/embeddings.d.ts.map +1 -1
  11. package/dist/services/embeddings.js +51 -72
  12. package/dist/services/embeddings.js.map +1 -1
  13. package/dist/types/index.d.ts +2 -0
  14. package/dist/types/index.d.ts.map +1 -1
  15. package/package.json +5 -2
  16. package/src/Meaningfully.ts +9 -5
  17. package/{tests → src/__tests__}/MetadataManager.test.ts +1 -1
  18. package/{tests → src/api/__tests__}/embedding.test.ts +20 -20
  19. package/src/api/embedding.ts +3 -3
  20. package/{tests → src/services/__tests__}/csvLoader.test.ts +1 -1
  21. package/{tests → src/services/__tests__}/embeddings.test.ts +2 -2
  22. package/{tests → src/services/__tests__}/sentenceSplitter.test.ts +1 -1
  23. package/src/services/embeddings.d.ts +0 -1
  24. package/src/services/embeddings.ts +57 -81
  25. package/src/types/index.ts +2 -0
  26. package/tsconfig.json +1 -1
  27. package/dist/api/embedding.test.d.ts +0 -2
  28. package/dist/api/embedding.test.d.ts.map +0 -1
  29. package/dist/api/embedding.test.js +0 -340
  30. package/dist/api/embedding.test.js.map +0 -1
  31. package/dist/services/csvLoader.test.d.ts +0 -2
  32. package/dist/services/csvLoader.test.d.ts.map +0 -1
  33. package/dist/services/csvLoader.test.js +0 -75
  34. package/dist/services/csvLoader.test.js.map +0 -1
  35. package/dist/services/embeddings.test.d.ts +0 -2
  36. package/dist/services/embeddings.test.d.ts.map +0 -1
  37. package/dist/services/embeddings.test.js +0 -115
  38. package/dist/services/embeddings.test.js.map +0 -1
  39. package/dist/services/sentenceSplitter.test.d.ts +0 -2
  40. package/dist/services/sentenceSplitter.test.d.ts.map +0 -1
  41. package/dist/services/sentenceSplitter.test.js +0 -68
  42. package/dist/services/sentenceSplitter.test.js.map +0 -1
  43. package/src/api/embedding.d.ts +0 -6
  44. package/tests/csvLoader.test.d.ts +0 -1
  45. package/tests/embedding.test.d.ts +0 -1
  46. package/tests/embeddings.test.d.ts +0 -1
  47. package/tests/sentenceSplitter.test.d.ts +0 -1
@@ -1,15 +1,15 @@
1
1
  //@ts-nocheck
2
2
  import { describe, it, expect, vi, beforeEach } from 'vitest';
3
- import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../src/api/embedding.js';
4
- import { loadDocumentsFromCsv } from '../src/services/csvLoader.js';
5
- import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getExistingDocStore } from '../src/services/embeddings.js';
3
+ import { createEmbeddings, previewResults, getDocStore, getIndex, search } from '../embedding.js';
4
+ import { loadDocumentsFromCsv } from '../../services/csvLoader.js';
5
+ import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, getStorageContext } from '../../services/embeddings.js';
6
6
  import { MetadataMode } from 'llamaindex';
7
7
 
8
8
  // filepath: /Users/jeremybmerrill/code/meaningfully/src/main/api/embedding.test.ts
9
9
 
10
10
 
11
- vi.mock('../src/services/csvLoader');
12
- vi.mock('../src/services/embeddings');
11
+ vi.mock('../../services/csvLoader');
12
+ vi.mock('../../services/embeddings');
13
13
 
14
14
  describe('embedding.ts', () => {
15
15
  describe('createEmbeddings', () => {
@@ -82,9 +82,9 @@ describe('embedding.ts', () => {
82
82
  describe('getDocStore', () => {
83
83
  it('should return existing doc store', async () => {
84
84
  const mockDocStore = 'docStore';
85
- getExistingDocStore.mockResolvedValue(mockDocStore);
85
+ getStorageContext.mockResolvedValue({ docStore: mockDocStore });
86
86
 
87
- const result = await getDocStore({});
87
+ const result = await getDocStore({}, {}, {});
88
88
 
89
89
  expect(result).toBe(mockDocStore);
90
90
  });
@@ -173,7 +173,7 @@ describe('embedding.ts', () => {
173
173
  describe('getDocStore', () => {
174
174
  it('should return existing doc store', async () => {
175
175
  const mockDocStore = 'docStore';
176
- getExistingDocStore.mockResolvedValue(mockDocStore);
176
+ getStorageContext.mockResolvedValue({ docStore: mockDocStore });
177
177
 
178
178
  const result = await getDocStore({});
179
179
 
@@ -262,7 +262,7 @@ describe('embedding.ts', () => {
262
262
 
263
263
  it('should correctly track progress through ProgressManager', async () => {
264
264
  // Setup
265
- vi.mock('../src/services/progressManager', () => {
265
+ vi.mock('../../services/progressManager', () => {
266
266
  const mockInstance = {
267
267
  startOperation: vi.fn(),
268
268
  updateProgress: vi.fn(),
@@ -278,8 +278,8 @@ describe('embedding.ts', () => {
278
278
  });
279
279
 
280
280
  // Re-import to use mocked version
281
- const { createEmbeddings } = await import('../src/api/embedding.js');
282
- const { ProgressManager } = await import('../src/services/progressManager.js');
281
+ const { createEmbeddings } = await import('../embedding.js');
282
+ const { ProgressManager } = await import('../../services/progressManager.js');
283
283
 
284
284
  const mockDocuments = [{ text: 'doc1' }, { text: 'doc2' }];
285
285
  const mockNodes = [{ text: 'node1', metadata: {} }];
@@ -305,7 +305,7 @@ describe('embedding.ts', () => {
305
305
 
306
306
  it('should properly calculate percentage in progress callback', async () => {
307
307
  // Setup mocks with spy on updateProgress
308
- vi.mock('../src/services/progressManager', () => {
308
+ vi.mock('../../services/progressManager', () => {
309
309
  const mockInstance = {
310
310
  startOperation: vi.fn(),
311
311
  updateProgress: vi.fn(),
@@ -321,8 +321,8 @@ describe('embedding.ts', () => {
321
321
  });
322
322
 
323
323
  // Re-import to use mocked version
324
- const { createEmbeddings } = await import('../src/api/embedding.js');
325
- const { ProgressManager } = await import('../src/services/progressManager.js');
324
+ const { createEmbeddings } = await import('../embedding.js');
325
+ const { ProgressManager } = await import('../../services/progressManager.js');
326
326
 
327
327
  const mockDocuments = [{ text: 'doc1' }];
328
328
  const mockNodes = [{ text: 'node1', metadata: {} }];
@@ -355,7 +355,7 @@ describe('embedding.ts', () => {
355
355
 
356
356
  it('should clear operation on empty documents', async () => {
357
357
  // Setup
358
- vi.mock('../src/services/progressManager', () => {
358
+ vi.mock('../../services/progressManager', () => {
359
359
  const mockInstance = {
360
360
  startOperation: vi.fn(),
361
361
  updateProgress: vi.fn(),
@@ -371,8 +371,8 @@ describe('embedding.ts', () => {
371
371
  });
372
372
 
373
373
  // Re-import to use mocked version
374
- const { createEmbeddings } = await import('../src/api/embedding.js');
375
- const { ProgressManager } = await import('../src/services/progressManager.js');
374
+ const { createEmbeddings } = await import('../embedding.js');
375
+ const { ProgressManager } = await import('../../services/progressManager.js');
376
376
 
377
377
  loadDocumentsFromCsv.mockResolvedValue([]);
378
378
 
@@ -390,7 +390,7 @@ describe('embedding.ts', () => {
390
390
 
391
391
  it('shoulde complete operation on successful embedding', async () => {
392
392
  // Setup
393
- vi.mock('../src/services/progressManager', () => {
393
+ vi.mock('../../services/progressManager', () => {
394
394
  const mockInstance = {
395
395
  startOperation: vi.fn(),
396
396
  updateProgress: vi.fn(),
@@ -406,8 +406,8 @@ describe('embedding.ts', () => {
406
406
  });
407
407
 
408
408
  // Re-import to use mocked version
409
- const { createEmbeddings } = await import('../src/api/embedding.js');
410
- const { ProgressManager } = await import('../src/services/progressManager.js');
409
+ const { createEmbeddings } = await import('../embedding.js');
410
+ const { ProgressManager } = await import('../../services/progressManager.js');
411
411
 
412
412
  const mockDocuments = [{ text: 'doc1' }];
413
413
  const mockNodes = [{ text: 'node1', metadata: {} }];
@@ -1,4 +1,4 @@
1
- import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments, getExistingDocStore } from "../services/embeddings.js";
1
+ import { transformDocumentsToNodes, estimateCost, searchDocuments, getExistingVectorStoreIndex, persistNodes, persistDocuments, getStorageContext } from "../services/embeddings.js";
2
2
  import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types/index.js";
3
3
  import { loadDocumentsFromCsv } from "../services/csvLoader.js";
4
4
  import { MetadataMode } from "llamaindex";
@@ -97,8 +97,8 @@ export async function previewResults(
97
97
  }
98
98
  }
99
99
 
100
- export async function getDocStore(config: EmbeddingConfig) {
101
- return await getExistingDocStore(config);
100
+ export async function getDocStore(config: EmbeddingConfig, settings: Settings, clients: Clients) {
101
+ return (await getStorageContext(config, settings, clients)).docStore;
102
102
  }
103
103
 
104
104
  export async function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
@@ -1,7 +1,7 @@
1
1
  //@ts-nocheck
2
2
  import { describe, it, expect, vi } from 'vitest';
3
3
  import { readFileSync } from 'fs';
4
- import { loadDocumentsFromCsv } from '../src/services/csvLoader.js';
4
+ import { loadDocumentsFromCsv } from '../csvLoader.js';
5
5
  import { Document } from 'llamaindex';
6
6
  import Papa from 'papaparse';
7
7
 
@@ -4,7 +4,7 @@ import { describe, it, expect, vi, beforeEach } from 'vitest';
4
4
  import { Document, TextNode } from 'llamaindex';
5
5
 
6
6
  // First, set up the mock before importing the module
7
- vi.mock(import("../src/services/embeddings.js"), async (importOriginal) => {
7
+ vi.mock(import("../embeddings.js"), async (importOriginal) => {
8
8
  const actual = await importOriginal()
9
9
  return {
10
10
  ...actual,
@@ -19,7 +19,7 @@ vi.mock(import("../src/services/embeddings.js"), async (importOriginal) => {
19
19
  })
20
20
 
21
21
  // Now import the mocked functions
22
- import { transformDocumentsToNodes, getEmbedModel } from '../src/services/embeddings.js';
22
+ import { transformDocumentsToNodes, getEmbedModel } from '../embeddings.js';
23
23
 
24
24
  describe('transformDocumentsToNodes', () => {
25
25
  beforeEach(() => {
@@ -1,6 +1,6 @@
1
1
  //@ts-nocheck
2
2
  import { expect, test } from 'vitest'
3
- import { CustomSentenceSplitter } from '../src/services/sentenceSplitter.js'
3
+ import { CustomSentenceSplitter } from '../sentenceSplitter.js'
4
4
  import { SentenceSplitter, IngestionPipeline, Document } from "llamaindex";
5
5
 
6
6
  // do these tests just to make sure that we can factor out my hacky fixes when llamaindex is fixed.
@@ -12,7 +12,6 @@ export declare function estimateCost(nodes: TextNode[], modelName: string): {
12
12
  pricePer1M: number;
13
13
  };
14
14
  export declare function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<VectorStoreIndex>;
15
- export declare function getExistingDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
16
15
  export declare function transformDocumentsToNodes(documents: Document[], config: EmbeddingConfig): Promise<TextNode<import("llamaindex").Metadata>[]>;
17
16
  export declare function getEmbedModel(config: EmbeddingConfig, settings: Settings): MockEmbedding | OpenAIEmbedding | OllamaEmbedding | MistralAIEmbedding | GeminiEmbedding;
18
17
  export declare function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext>;
@@ -11,12 +11,15 @@ import {
11
11
  SimpleVectorStore,
12
12
  type StorageContext,
13
13
  Settings as LlamaindexSettings,
14
- SimpleDocumentStore
14
+ SimpleDocumentStore,
15
+ BaseDocumentStore,
16
+ BaseIndexStore,
17
+ SimpleIndexStore
15
18
  } from "llamaindex";
16
19
  import { OllamaEmbedding} from '@llamaindex/ollama'
17
20
  import { MistralAIEmbedding, MistralAIEmbeddingModelType } from '@llamaindex/mistral'
18
21
  import { GeminiEmbedding } from '@llamaindex/google'
19
- import { PGVectorStore } from '@llamaindex/postgres';
22
+ import { PGVectorStore, PostgresDocumentStore, PostgresIndexStore } from '@llamaindex/postgres';
20
23
  import { AzureOpenAIEmbedding } from "@llamaindex/azure";
21
24
  import { Sploder } from "./sploder.js";
22
25
  import { CustomSentenceSplitter } from "./sentenceSplitter.js";
@@ -97,79 +100,12 @@ export function estimateCost(nodes: TextNode[], modelName: string): {
97
100
  }
98
101
 
99
102
  export async function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients) {
100
- const embedModel = getEmbedModel(config, settings);
101
- switch (config.vectorStoreType) {
102
- case "simple":
103
- const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
104
- const storageContext = await storageContextFromDefaults({
105
- persistDir: persistDir,
106
- });
107
- let vsi = await VectorStoreIndex.init({
108
- storageContext: storageContext,
109
- });
110
- vsi.embedModel = embedModel;
111
- return vsi;
112
-
113
- case "postgres":
114
- if (!clients.postgresClient) {
115
- throw new Error("Postgres client required but not provided");
116
- }
117
- const pgStore = new PGVectorStore({
118
- clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
119
- tableName: sanitizeProjectName(config.projectName),
120
- dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
121
- embeddingModel: embedModel
122
- });
123
- const pgStorageContext = await storageContextFromDefaults({
124
- vectorStores: { [ModalityType.TEXT]: pgStore },
125
- });
126
- return await VectorStoreIndex.init({
127
- storageContext: pgStorageContext,
128
- });
129
- case "weaviate":
130
- if (!clients.weaviateClient) {
131
- throw new Error("Weaviate client required but not provided");
132
- }
133
- const weaviateStore = new BatchingWeaviateVectorStore({
134
- indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
135
- weaviateClient: clients.weaviateClient,
136
- embeddingModel: embedModel
137
- });
138
-
139
- // WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
140
- // (WeaviateVectorStore would get `score` if we were doing hybrid search)
141
- // Overwrite the private getNodeSimilarity method to use 'score' from metadata
142
- // @ts-ignore
143
- weaviateStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
144
- return entry.metadata.score;
145
- }
146
-
147
- return await VectorStoreIndex.fromVectorStore(weaviateStore)
148
-
149
- default:
150
- throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
151
- }
152
- }
153
-
154
- export async function getExistingDocStore(config: EmbeddingConfig) {
155
- // switch (config.vectorStoreType) {
156
- // case "simple":
157
- const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
158
- const storageContext = await storageContextFromDefaults({
159
- persistDir: persistDir,
160
- });
161
- return storageContext.docStore;
162
-
163
- // case "postgres":
164
- // throw new Error(`Not yet implemented vector store type: ${config.vectorStoreType}`);
165
- // // return await createVectorStore(config);
166
- // default:
167
- // throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
168
- // }
103
+ const storageContext = await getStorageContext(config, settings, clients);
104
+ return await VectorStoreIndex.init({
105
+ storageContext: storageContext,
106
+ });
169
107
  }
170
108
 
171
-
172
-
173
109
  export async function transformDocumentsToNodes(
174
110
  documents: Document[],
175
111
  config: EmbeddingConfig,
@@ -249,12 +185,15 @@ export function getEmbedModel(
249
185
 
250
186
  export async function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext> {
251
187
  const vectorStore = await createVectorStore(config, settings, clients);
188
+ const docStore = await createDocumentStore(config, settings, clients); // new SimpleDocumentStore()
189
+ const indexStore = await createIndexStore(config, settings, clients);
252
190
  fs.mkdirSync(config.storagePath, { recursive: true });
253
191
  const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
254
192
  return await storageContextFromDefaults({
255
193
  persistDir: persistDir,
256
194
  vectorStores: {[ModalityType.TEXT]: vectorStore},
257
- docStore: new SimpleDocumentStore()
195
+ docStore: docStore,
196
+ indexStore: indexStore
258
197
  /*
259
198
  if docStore is created with a persist path (as it is by default in storageContextFromDefaults)
260
199
  then it will write to disk after every put(), which happens 2+ times per document.
@@ -273,8 +212,13 @@ export async function persistDocuments(documents: Document[], config: EmbeddingC
273
212
 
274
213
  // see comments in getStorageContext
275
214
  const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName) );
276
- // @ts-ignore
277
- await (storageContext.docStore as SimpleDocumentStore).kvStore.persist(join(persistDir, "doc_store.json"));
215
+ if (storageContext.docStore instanceof SimpleDocumentStore) {
216
+ // @ts-ignore
217
+ await (storageContext.docStore as SimpleDocumentStore).kvStore.persist(join(persistDir, "doc_store.json"));
218
+ }else if (storageContext.docStore instanceof PostgresDocumentStore) {
219
+ // PostgresDocumentStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
220
+ console.log("Pretending to persist Postgres document store, but it actually persists automatically.");
221
+ }
278
222
 
279
223
  console.timeEnd("persistDocuments Run Time");
280
224
  }
@@ -303,11 +247,11 @@ export async function persistNodes(nodes: TextNode[], config: EmbeddingConfig, s
303
247
  // all the if statements are just type-checking boilerplate.
304
248
  // N.B. WeaviateVectorStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
305
249
  if (vectorStore) {
306
- if (vectorStore instanceof PGVectorStore || vectorStore instanceof SimpleVectorStore) {
250
+ if (vectorStore instanceof SimpleVectorStore) {
307
251
  await vectorStore.persist(join(config.storagePath, sanitizeProjectName(config.projectName), "vector_store.json"));
308
- } else if (vectorStore instanceof BatchingWeaviateVectorStore) {
252
+ } else if (vectorStore instanceof PGVectorStore || vectorStore instanceof BatchingWeaviateVectorStore) {
309
253
  // WeaviateVectorStore does not have a persist method, it persists automatically
310
- console.log("Pretending to persist Weaviate vector store, but it actually persists automatically.");
254
+ console.log("Pretending to persist Weaviate or Postgres vector store, but it actually persists automatically.");
311
255
  } else {
312
256
  throw new Error("Vector store does not support persist method");
313
257
  }
@@ -326,8 +270,8 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
326
270
  // otherwise it defaults to Ada.
327
271
  case "postgres":
328
272
  return new PGVectorStore({
329
- clientConfig: {connectionString: process.env.POSTGRES_CONNECTION_STRING},
330
- tableName: sanitizeProjectName(config.projectName),
273
+ client: clients.postgresClient,
274
+ tableName: "vecs_" + sanitizeProjectName(config.projectName),
331
275
  dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
332
276
  embeddingModel: embeddingModel
333
277
  });
@@ -357,6 +301,38 @@ async function createVectorStore(config: EmbeddingConfig, settings: Settings, cl
357
301
  }
358
302
  }
359
303
 
304
+ async function createDocumentStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseDocumentStore> {
305
+ // we create the doc store without a persist path, so it doesn't write to disk after every put()
306
+ switch (config.documentStoreType || config.vectorStoreType) {
307
+ case "postgres":
308
+ return new PostgresDocumentStore({
309
+ client: clients.postgresClient,
310
+ tableName: "docs_" + sanitizeProjectName(config.projectName),
311
+ });
312
+ case "simple":
313
+ case "weaviate":
314
+ return new SimpleDocumentStore();
315
+ default:
316
+ throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
317
+ }
318
+ }
319
+
320
+ async function createIndexStore(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<BaseIndexStore> {
321
+ switch (config.documentStoreType || config.vectorStoreType) {
322
+ case "postgres":
323
+ return new PostgresIndexStore({
324
+ client: clients.postgresClient,
325
+ tableName: "idx_" + sanitizeProjectName(config.projectName),
326
+ });
327
+ case "simple":
328
+ case "weaviate":
329
+ return new SimpleIndexStore();
330
+ default:
331
+ throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
332
+ }
333
+
334
+ }
335
+
360
336
  export async function searchDocuments(
361
337
  index: VectorStoreIndex,
362
338
  query: string,
@@ -57,6 +57,8 @@ export interface EmbeddingConfig {
57
57
  modelName: string;
58
58
  modelProvider: string
59
59
  vectorStoreType: "simple" | "postgres" | "weaviate";
60
+ documentStoreType?: "simple" | "postgres";
61
+ indexStoreType?: "simple" | "postgres";
60
62
  projectName: string;
61
63
  storagePath: string;
62
64
  splitIntoSentences: boolean;
package/tsconfig.json CHANGED
@@ -27,5 +27,5 @@
27
27
  /* AND if you're building for a library in a monorepo: */
28
28
  "declarationMap": true
29
29
  },
30
- "exclude": ["node_modules/natural", "dist/**"]
30
+ "exclude": ["node_modules/natural", "dist/**", "src/**/__tests__/**"]
31
31
  }
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=embedding.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"embedding.test.d.ts","sourceRoot":"","sources":["../../src/api/embedding.test.ts"],"names":[],"mappings":""}