@meaningfully/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.nvmrc +1 -0
  2. package/LICENSE +7 -0
  3. package/README.md +3 -0
  4. package/dist/DocumentSetManager.d.ts +28 -0
  5. package/dist/DocumentSetManager.d.ts.map +1 -0
  6. package/dist/DocumentSetManager.js +134 -0
  7. package/dist/DocumentSetManager.js.map +1 -0
  8. package/dist/Meaningfully.d.ts +52 -0
  9. package/dist/Meaningfully.d.ts.map +1 -0
  10. package/dist/Meaningfully.js +206 -0
  11. package/dist/Meaningfully.js.map +1 -0
  12. package/dist/MetadataManager.d.ts +32 -0
  13. package/dist/MetadataManager.d.ts.map +1 -0
  14. package/dist/MetadataManager.js +115 -0
  15. package/dist/MetadataManager.js.map +1 -0
  16. package/dist/api/embedding.d.ts +7 -0
  17. package/dist/api/embedding.d.ts.map +1 -0
  18. package/dist/api/embedding.js +94 -0
  19. package/dist/api/embedding.js.map +1 -0
  20. package/dist/api/embedding.test.d.ts +2 -0
  21. package/dist/api/embedding.test.d.ts.map +1 -0
  22. package/dist/api/embedding.test.js +340 -0
  23. package/dist/api/embedding.test.js.map +1 -0
  24. package/dist/index.d.ts +5 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +6 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/services/batchingWeaviateVectorStore.d.ts +6 -0
  29. package/dist/services/batchingWeaviateVectorStore.d.ts.map +1 -0
  30. package/dist/services/batchingWeaviateVectorStore.js +21 -0
  31. package/dist/services/batchingWeaviateVectorStore.js.map +1 -0
  32. package/dist/services/csvLoader.d.ts +3 -0
  33. package/dist/services/csvLoader.d.ts.map +1 -0
  34. package/dist/services/csvLoader.js +18 -0
  35. package/dist/services/csvLoader.js.map +1 -0
  36. package/dist/services/csvLoader.test.d.ts +2 -0
  37. package/dist/services/csvLoader.test.d.ts.map +1 -0
  38. package/dist/services/csvLoader.test.js +75 -0
  39. package/dist/services/csvLoader.test.js.map +1 -0
  40. package/dist/services/embeddings.d.ts +22 -0
  41. package/dist/services/embeddings.d.ts.map +1 -0
  42. package/dist/services/embeddings.js +314 -0
  43. package/dist/services/embeddings.js.map +1 -0
  44. package/dist/services/embeddings.test.d.ts +2 -0
  45. package/dist/services/embeddings.test.d.ts.map +1 -0
  46. package/dist/services/embeddings.test.js +115 -0
  47. package/dist/services/embeddings.test.js.map +1 -0
  48. package/dist/services/loggingOpenAIEmbedding.d.ts +2 -0
  49. package/dist/services/loggingOpenAIEmbedding.d.ts.map +1 -0
  50. package/dist/services/loggingOpenAIEmbedding.js +41 -0
  51. package/dist/services/loggingOpenAIEmbedding.js.map +1 -0
  52. package/dist/services/mockEmbedding.d.ts +6 -0
  53. package/dist/services/mockEmbedding.d.ts.map +1 -0
  54. package/dist/services/mockEmbedding.js +14 -0
  55. package/dist/services/mockEmbedding.js.map +1 -0
  56. package/dist/services/progressManager.d.ts +21 -0
  57. package/dist/services/progressManager.d.ts.map +1 -0
  58. package/dist/services/progressManager.js +76 -0
  59. package/dist/services/progressManager.js.map +1 -0
  60. package/dist/services/progressVectorStoreIndex.d.ts +21 -0
  61. package/dist/services/progressVectorStoreIndex.d.ts.map +1 -0
  62. package/dist/services/progressVectorStoreIndex.js +60 -0
  63. package/dist/services/progressVectorStoreIndex.js.map +1 -0
  64. package/dist/services/sentenceSplitter.d.ts +17 -0
  65. package/dist/services/sentenceSplitter.d.ts.map +1 -0
  66. package/dist/services/sentenceSplitter.js +207 -0
  67. package/dist/services/sentenceSplitter.js.map +1 -0
  68. package/dist/services/sentenceSplitter.test.d.ts +2 -0
  69. package/dist/services/sentenceSplitter.test.d.ts.map +1 -0
  70. package/dist/services/sentenceSplitter.test.js +68 -0
  71. package/dist/services/sentenceSplitter.test.js.map +1 -0
  72. package/dist/services/sploder.d.ts +13 -0
  73. package/dist/services/sploder.d.ts.map +1 -0
  74. package/dist/services/sploder.js +45 -0
  75. package/dist/services/sploder.js.map +1 -0
  76. package/dist/types/index.d.ts +77 -0
  77. package/dist/types/index.d.ts.map +1 -0
  78. package/dist/types/index.js +2 -0
  79. package/dist/types/index.js.map +1 -0
  80. package/dist/utils.d.ts +3 -0
  81. package/dist/utils.d.ts.map +1 -0
  82. package/dist/utils.js +7 -0
  83. package/dist/utils.js.map +1 -0
  84. package/package.json +43 -0
  85. package/src/Meaningfully.d.ts +57 -0
  86. package/src/Meaningfully.ts +228 -0
  87. package/src/MetadataManager.d.ts +27 -0
  88. package/src/MetadataManager.ts +145 -0
  89. package/src/api/embedding.d.ts +6 -0
  90. package/src/api/embedding.ts +122 -0
  91. package/src/index.ts +5 -0
  92. package/src/services/batchingWeaviateVectorStore.d.ts +5 -0
  93. package/src/services/batchingWeaviateVectorStore.ts +23 -0
  94. package/src/services/csvLoader.d.ts +2 -0
  95. package/src/services/csvLoader.ts +24 -0
  96. package/src/services/embeddings.d.ts +21 -0
  97. package/src/services/embeddings.ts +374 -0
  98. package/src/services/loggingOpenAIEmbedding.d.ts +0 -0
  99. package/src/services/loggingOpenAIEmbedding.ts +46 -0
  100. package/src/services/mockEmbedding.d.ts +5 -0
  101. package/src/services/mockEmbedding.ts +13 -0
  102. package/src/services/progressManager.d.ts +20 -0
  103. package/src/services/progressManager.ts +88 -0
  104. package/src/services/progressVectorStoreIndex.d.ts +20 -0
  105. package/src/services/progressVectorStoreIndex.ts +95 -0
  106. package/src/services/sentenceSplitter.d.ts +16 -0
  107. package/src/services/sentenceSplitter.ts +243 -0
  108. package/src/services/sploder.d.ts +12 -0
  109. package/src/services/sploder.ts +62 -0
  110. package/src/types/index.d.ts +71 -0
  111. package/src/types/index.ts +89 -0
  112. package/src/utils.d.ts +2 -0
  113. package/src/utils.ts +6 -0
  114. package/tests/MetadataManager.test.ts +120 -0
  115. package/tests/csvLoader.test.d.ts +1 -0
  116. package/tests/csvLoader.test.ts +88 -0
  117. package/tests/embedding.test.d.ts +1 -0
  118. package/tests/embedding.test.ts +425 -0
  119. package/tests/embeddings.test.d.ts +1 -0
  120. package/tests/embeddings.test.ts +144 -0
  121. package/tests/sentenceSplitter.test.d.ts +1 -0
  122. package/tests/sentenceSplitter.test.ts +81 -0
  123. package/tsconfig.json +31 -0
  124. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,75 @@
1
+ //@ts-nocheck
2
+ import { describe, it, expect, vi } from 'vitest';
3
+ import { readFileSync } from 'fs';
4
+ import { loadDocumentsFromCsv } from './csvLoader';
5
+ import { Document } from 'llamaindex';
6
+ import Papa from 'papaparse';
7
+ vi.mock('fs');
8
+ vi.mock('papaparse');
9
+ describe('csvLoader.ts', () => {
10
+ describe('loadDocumentsFromCsv', () => {
11
+ it('should load documents from CSV and return Document instances', async () => {
12
+ const mockFileContent = 'text,metadata1,metadata2\ncontent1,meta1,meta2\ncontent2,meta3,meta4';
13
+ const mockParsedData = {
14
+ data: [
15
+ { text: 'content1', metadata1: 'meta1', metadata2: 'meta2' },
16
+ { text: 'content2', metadata1: 'meta3', metadata2: 'meta4' }
17
+ ]
18
+ };
19
+ readFileSync.mockReturnValue(mockFileContent);
20
+ Papa.parse.mockReturnValue(mockParsedData);
21
+ const result = await loadDocumentsFromCsv('path/to/csv', 'text');
22
+ expect(remove_id(result)).toEqual(remove_id([
23
+ new Document({ text: 'content1', metadata: { metadata1: 'meta1', metadata2: 'meta2' } }),
24
+ new Document({ text: 'content2', metadata: { metadata1: 'meta3', metadata2: 'meta4' } })
25
+ ]));
26
+ });
27
+ it('should handle empty CSV file', async () => {
28
+ const mockFileContent = '';
29
+ const mockParsedData = { data: [] };
30
+ readFileSync.mockReturnValue(mockFileContent);
31
+ Papa.parse.mockReturnValue(mockParsedData);
32
+ const result = await loadDocumentsFromCsv('path/to/csv', 'text');
33
+ expect(result).toEqual([]);
34
+ });
35
+ it('should handle missing text column', async () => {
36
+ const mockFileContent = 'metadata1,metadata2\nmeta1,meta2\nmeta3,meta4';
37
+ const mockParsedData = {
38
+ data: [
39
+ { metadata1: 'meta1', metadata2: 'meta2' },
40
+ { metadata1: 'meta3', metadata2: 'meta4' }
41
+ ]
42
+ };
43
+ readFileSync.mockReturnValue(mockFileContent);
44
+ Papa.parse.mockReturnValue(mockParsedData);
45
+ const result = await loadDocumentsFromCsv('path/to/csv', 'text');
46
+ expect(remove_id(result)).toEqual(remove_id([
47
+ new Document({ text: undefined, metadata: { metadata1: 'meta1', metadata2: 'meta2' } }),
48
+ new Document({ text: undefined, metadata: { metadata1: 'meta3', metadata2: 'meta4' } })
49
+ ]));
50
+ });
51
+ it('should handle null values in metadata', async () => {
52
+ const mockFileContent = 'text,metadata1,metadata2\ncontent1,,meta2\ncontent2,meta3,';
53
+ const mockParsedData = {
54
+ data: [
55
+ { text: 'content1', metadata1: null, metadata2: 'meta2' },
56
+ { text: 'content2', metadata1: 'meta3', metadata2: null }
57
+ ]
58
+ };
59
+ readFileSync.mockReturnValue(mockFileContent);
60
+ Papa.parse.mockReturnValue(mockParsedData);
61
+ const result = await loadDocumentsFromCsv('path/to/csv', 'text');
62
+ expect(remove_id(result)).toEqual(remove_id([
63
+ new Document({ text: 'content1', metadata: { metadata1: '', metadata2: 'meta2' } }),
64
+ new Document({ text: 'content2', metadata: { metadata1: 'meta3', metadata2: '' } })
65
+ ]));
66
+ });
67
+ });
68
+ });
69
+ function remove_id(list_of_documents) {
70
+ return list_of_documents.map((doc) => {
71
+ const { id_, ...doc_without_id } = doc;
72
+ return doc_without_id;
73
+ });
74
+ }
75
+ //# sourceMappingURL=csvLoader.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csvLoader.test.js","sourceRoot":"","sources":["../../src/services/csvLoader.test.ts"],"names":[],"mappings":"AAAA,aAAa;AACb,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,IAAI,MAAM,WAAW,CAAC;AAG7B,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACd,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;AAErB,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;QACpC,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;YAC5E,MAAM,eAAe,GAAG,sEAAsE,CAAC;YAC/F,MAAM,cAAc,GAAG;gBACrB,IAAI,EAAE;oBACJ,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;oBAC5D,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;iBAC7D;aACF,CAAC;YACF,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEjE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;gBAC1C,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;gBACxF,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;aACzF,CAAC,CAAC,CAAC;QACN,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,eAAe,GAAG,EAAE,CAAC;YAC3B,MAAM,cAAc,GAAG,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;YACpC,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEjE,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,eAAe,GAAG,+CAA+C,CAAC;YACxE,MAAM,cAAc,GAAG;gBACrB,IAAI,EAAE;oBACJ,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;oBAC1C,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE;iBAC3C;aACF,CAAC;YACF,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEjE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;gBAC1C,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;gBACvF,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;aACxF,CAAC,CAAC,CAAC;QACN,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACnD,MAAM,eAAe,GAAG,4DAA4D,CAAC;YACrF,MAAM,cAAc,GAAG;gBACnB,IAAI,EAAE;oBACN,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE;oBACzD,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE;iBACxD;aACJ,CAAC;YACF,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;YAC9C,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;YAE3C,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YACjE,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;gBACxC,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,EAAE,CAAC;gBACnF,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,EAAE,EAAE,CAAC;aACtF,CAAC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,SAAS,SAAS,CAAC,iBAAiB;IAClC,OAAO,iBAAiB,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACnC,MAAM,EAAE,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,GAAG,CAAC;QACvC,OAAO,cAAc,CAAC;IAC1B,CAAC,CAAC,CAAC;AACH,CAAC"}
@@ -0,0 +1,22 @@
1
+ import { Document, VectorStoreIndex, TextNode, type StorageContext } from "llamaindex";
2
+ import { OllamaEmbedding } from '@llamaindex/ollama';
3
+ import { MistralAIEmbedding } from '@llamaindex/mistral';
4
+ import { GeminiEmbedding } from '@llamaindex/google';
5
+ import { MockEmbedding } from "./mockEmbedding.js";
6
+ import type { EmbeddingConfig, Settings, MetadataFilter, Clients } from "../types/index.js";
7
+ import { OpenAIEmbedding } from "@llamaindex/openai";
8
+ import { ProgressVectorStoreIndex } from "./progressVectorStoreIndex.js";
9
+ export declare function estimateCost(nodes: TextNode[], modelName: string): {
10
+ estimatedPrice: number;
11
+ tokenCount: number;
12
+ pricePer1M: number;
13
+ };
14
+ export declare function getExistingVectorStoreIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<VectorStoreIndex>;
15
+ export declare function getExistingDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
16
+ export declare function transformDocumentsToNodes(documents: Document[], config: EmbeddingConfig): Promise<TextNode<import("llamaindex").Metadata>[]>;
17
+ export declare function getEmbedModel(config: EmbeddingConfig, settings: Settings): OpenAIEmbedding | OllamaEmbedding | MistralAIEmbedding | GeminiEmbedding | MockEmbedding;
18
+ export declare function getStorageContext(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<StorageContext>;
19
+ export declare function persistDocuments(documents: Document[], config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<void>;
20
+ export declare function persistNodes(nodes: TextNode[], config: EmbeddingConfig, settings: Settings, clients: Clients, progressCallback?: (progress: number, total: number) => void): Promise<ProgressVectorStoreIndex>;
21
+ export declare function searchDocuments(index: VectorStoreIndex, query: string, numResults?: number, filters?: MetadataFilter[]): Promise<import("llamaindex").NodeWithScore<import("llamaindex").Metadata>[]>;
22
+ //# sourceMappingURL=embeddings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/services/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,QAAQ,EACR,gBAAgB,EAIhB,QAAQ,EAKR,KAAK,cAAc,EAGpB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,eAAe,EAAC,MAAM,oBAAoB,CAAA;AACnD,OAAO,EAAE,kBAAkB,EAA+B,MAAM,qBAAqB,CAAA;AACrF,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AAKpD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAGnD,OAAO,KAAK,EAAE,eAAe,EAAE,QAAQ,EAAE,cAAc,EAAE,OAAO,EAAG,MAAM,mBAAmB,CAAC;AAG7F,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAuCzE,wBAAgB,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG;IAClE,cAAc,EAAE,MAAM,CAAC;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB,CAuBA;AAED,wBAAsB,2BAA2B,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,6BAqD9G;AAED,wBAAsB,mBAAmB,CAAC,MAAM,EAAE,eAAe,mDAehE;AAID,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,QAAQ,EAAE,EACrB,MAAM,EAAE,eAAe,sDA0BxB;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,eAAe,EACvB,QAAQ,EAAE,QAAQ,4FA2CnB;AAED,wBAAsB,iBAAiB,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,cAAc,CAAC,CAiB9H;AAED,wBAAsB,gBAAgB,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAW1I;AAED,wBAAsB,YAAY,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAqCpN;AAyCD,wBAAsB,eAAe,CACnC,KAAK,EAAE,gBAAgB,EACvB,KAAK,EAAE,MAAM,EACb,UAAU,GAAE,MAAW,EACvB,OAAO,CAAC,EAAE,cAAc,EAAE,gFAU3B"}
@@ -0,0 +1,314 @@
1
+ import { VectorStoreIndex,
2
+ // OpenAIEmbedding,
3
+ IngestionPipeline, ModalityType, storageContextFromDefaults, SimpleVectorStore, Settings as LlamaindexSettings, SimpleDocumentStore } from "llamaindex";
4
+ import { OllamaEmbedding } from '@llamaindex/ollama';
5
+ import { MistralAIEmbedding, MistralAIEmbeddingModelType } from '@llamaindex/mistral';
6
+ import { GeminiEmbedding } from '@llamaindex/google';
7
+ import { PGVectorStore } from '@llamaindex/postgres';
8
+ import { AzureOpenAIEmbedding } from "@llamaindex/azure";
9
+ import { Sploder } from "./sploder.js";
10
+ import { CustomSentenceSplitter } from "./sentenceSplitter.js";
11
+ import { MockEmbedding } from "./mockEmbedding.js";
12
+ import { encodingForModel } from "js-tiktoken";
13
+ import { join } from "path";
14
+ import { sanitizeProjectName, capitalizeFirstLetter } from "../utils.js";
15
+ import * as fs from 'fs';
16
+ import { OpenAIEmbedding } from "@llamaindex/openai";
17
+ import { BatchingWeaviateVectorStore } from "./batchingWeaviateVectorStore.js";
18
+ import { ProgressVectorStoreIndex } from "./progressVectorStoreIndex.js";
19
+ // unused, but probalby eventually will be used.
20
+ // to be used by postgres store, which it' slooking increasingly like I have to enable again
21
+ const MODEL_DIMENSIONS = {
22
+ "text-embedding-3-small": 1536,
23
+ "text-embedding-3-large": 3072,
24
+ "mxbai-embed-large": 1024,
25
+ "mistral-embed": 1024,
26
+ "gemini-embedding-001": 768, // Gemini embedding model
27
+ };
28
+ const PRICE_PER_1M = {
29
+ "text-embedding-3-small": 0.02,
30
+ "text-embedding-3-large": 0.13,
31
+ "mistral-embed": 0.1,
32
+ "mxbai-embed-large": 0, // local model, free
33
+ "nomic-embed-text": 0, // local model, free
34
+ "gemini-embedding-001": 0.0, // Gemini embedding is currently free (unless you're on the paid tier, in which case it is $0.15/million tokens)
35
+ };
36
+ /* all transformations except the embedding step (which is handled by VectorStoreIndex.init) */
37
+ function getBaseTransformations(config) {
38
+ const transformations = [
39
+ new CustomSentenceSplitter({ chunkSize: config.chunkSize, chunkOverlap: config.chunkOverlap }),
40
+ ];
41
+ if (config.combineSentencesIntoChunks) {
42
+ transformations.push(new Sploder({
43
+ maxStringTokenCount: config.sploderMaxSize
44
+ }));
45
+ }
46
+ return transformations;
47
+ }
48
+ export function estimateCost(nodes, modelName) {
49
+ const pricePer1M = PRICE_PER_1M[modelName] || 0; // default to 0 if model not found or free
50
+ let tokenizer;
51
+ try {
52
+ tokenizer = encodingForModel(modelName); // This doesn't work for ollama
53
+ }
54
+ catch (error) {
55
+ // If the tokenizer is not found, it means the model is likely not supported by tiktoken
56
+ // or is a local model (like Ollama). In this case, we can't estimate the cost.
57
+ tokenizer = encodingForModel("text-embedding-3-small"); // fallback to a known tokenizer
58
+ console.warn(`Tokenizer for model ${modelName} not found. Using fallback tokenizer.`);
59
+ }
60
+ const tokenCount = nodes.reduce((sum, node) => {
61
+ return sum + tokenizer.encode(node.text).length;
62
+ }, 0);
63
+ const estimatedPrice = tokenCount * (pricePer1M / 1_000_000);
64
+ return {
65
+ estimatedPrice,
66
+ tokenCount,
67
+ pricePer1M
68
+ };
69
+ }
70
+ export async function getExistingVectorStoreIndex(config, settings, clients) {
71
+ const embedModel = getEmbedModel(config, settings);
72
+ switch (config.vectorStoreType) {
73
+ case "simple":
74
+ const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
75
+ const storageContext = await storageContextFromDefaults({
76
+ persistDir: persistDir,
77
+ });
78
+ let vsi = await VectorStoreIndex.init({
79
+ storageContext: storageContext,
80
+ });
81
+ vsi.embedModel = embedModel;
82
+ return vsi;
83
+ case "postgres":
84
+ if (!clients.postgresClient) {
85
+ throw new Error("Postgres client required but not provided");
86
+ }
87
+ const pgStore = new PGVectorStore({
88
+ clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
89
+ tableName: sanitizeProjectName(config.projectName),
90
+ dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
91
+ embeddingModel: embedModel
92
+ });
93
+ const pgStorageContext = await storageContextFromDefaults({
94
+ vectorStores: { [ModalityType.TEXT]: pgStore },
95
+ });
96
+ return await VectorStoreIndex.init({
97
+ storageContext: pgStorageContext,
98
+ });
99
+ case "weaviate":
100
+ if (!clients.weaviateClient) {
101
+ throw new Error("Weaviate client required but not provided");
102
+ }
103
+ const weaviateStore = new BatchingWeaviateVectorStore({
104
+ indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
105
+ weaviateClient: clients.weaviateClient,
106
+ embeddingModel: embedModel
107
+ });
108
+ // WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
109
+ // (WeaviateVectorStore would get `score` if we were doing hybrid search)
110
+ // Overwrite the private getNodeSimilarity method to use 'score' from metadata
111
+ // @ts-ignore
112
+ weaviateStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
113
+ return entry.metadata.score;
114
+ };
115
+ return await VectorStoreIndex.fromVectorStore(weaviateStore);
116
+ default:
117
+ throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
118
+ }
119
+ }
120
+ export async function getExistingDocStore(config) {
121
+ // switch (config.vectorStoreType) {
122
+ // case "simple":
123
+ const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
124
+ const storageContext = await storageContextFromDefaults({
125
+ persistDir: persistDir,
126
+ });
127
+ return storageContext.docStore;
128
+ // case "postgres":
129
+ // throw new Error(`Not yet implemented vector store type: ${config.vectorStoreType}`);
130
+ // // return await createVectorStore(config);
131
+ // default:
132
+ // throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
133
+ // }
134
+ }
135
+ export async function transformDocumentsToNodes(documents, config) {
136
+ console.time("transformDocumentsToNodes Run Time");
137
+ const transformations = getBaseTransformations(config);
138
+ // llama-index stupidly includes all the metadata in the embedding, which is a waste of tokens
139
+ // so we exclude everything except the text column from the embedding
140
+ for (const document of documents) {
141
+ document.excludedEmbedMetadataKeys = Object.keys(document.metadata);
142
+ }
143
+ console.time("transformDocumentsToNodes transformDocuments Run Time");
144
+ // remove empty documents. we can't meaningfully embed these, so we're just gonna ignore 'em.
145
+ // that might not ultimately be the right solution.
146
+ documents = documents.filter((document_) => document_.text && document_.text.length > 0);
147
+ // Create nodes with sentence splitting and optional sploder
148
+ const pipeline = new IngestionPipeline({
149
+ transformations
150
+ });
151
+ const nodes = (await pipeline.run({ documents: documents }));
152
+ console.timeEnd("transformDocumentsToNodes transformDocuments Run Time");
153
+ console.timeEnd("transformDocumentsToNodes Run Time");
154
+ return nodes;
155
+ }
156
+ export function getEmbedModel(config, settings) {
157
+ let embedModel;
158
+ if (config.modelProvider === "openai") {
159
+ embedModel = new OpenAIEmbedding({ model: config.modelName, apiKey: settings.openAIKey ? settings.openAIKey : undefined });
160
+ embedModel.embedBatchSize = 50; // all embedding models enforce a maximum of 300,000 tokens summed across all inputs in a single request
161
+ }
162
+ else if (config.modelProvider === "ollama") {
163
+ embedModel = new OllamaEmbedding({ model: config.modelName, config: {
164
+ host: settings.oLlamaBaseURL ? settings.oLlamaBaseURL : undefined
165
+ }, });
166
+ }
167
+ else if (config.modelProvider === "azure") {
168
+ if (!settings.azureOpenAIKey || !settings.azureOpenAIEndpoint) {
169
+ throw new Error("Azure OpenAI API key and endpoint are required for Azure embedding models");
170
+ }
171
+ embedModel = new AzureOpenAIEmbedding({
172
+ model: config.modelName,
173
+ apiKey: settings.azureOpenAIKey,
174
+ endpoint: settings.azureOpenAIEndpoint,
175
+ apiVersion: settings.azureOpenAIApiVersion ?? undefined
176
+ });
177
+ }
178
+ else if (config.modelProvider === "mistral") {
179
+ if (!settings.mistralApiKey) {
180
+ throw new Error("Mistral API key is required for Mistral embedding models");
181
+ }
182
+ embedModel = new MistralAIEmbedding({
183
+ model: MistralAIEmbeddingModelType.MISTRAL_EMBED, // only one choice!
184
+ apiKey: settings.mistralApiKey
185
+ });
186
+ }
187
+ else if (config.modelProvider === "gemini") {
188
+ if (!settings.geminiApiKey) {
189
+ throw new Error("Gemini API key is required for Gemini embedding models");
190
+ }
191
+ embedModel = new GeminiEmbedding({
192
+ apiKey: settings.geminiApiKey,
193
+ });
194
+ embedModel.embedBatchSize = 50;
195
+ }
196
+ else if (config.modelProvider === "mock") {
197
+ embedModel = new MockEmbedding();
198
+ }
199
+ else {
200
+ throw new Error(`Unsupported embedding model provider: ${config.modelProvider}`);
201
+ }
202
+ LlamaindexSettings.embedModel = embedModel;
203
+ return embedModel;
204
+ }
205
+ export async function getStorageContext(config, settings, clients) {
206
+ const vectorStore = await createVectorStore(config, settings, clients);
207
+ fs.mkdirSync(config.storagePath, { recursive: true });
208
+ const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
209
+ return await storageContextFromDefaults({
210
+ persistDir: persistDir,
211
+ vectorStores: { [ModalityType.TEXT]: vectorStore },
212
+ docStore: new SimpleDocumentStore()
213
+ /*
214
+ if docStore is created with a persist path (as it is by default in storageContextFromDefaults)
215
+ then it will write to disk after every put(), which happens 2+ times per document.
216
+
217
+ so we create it without a persist path, and then explicitly persist it when we're done adding documents.
218
+
219
+ see https://github.com/jeremybmerrill/meaningfully/issues/52
220
+ */
221
+ });
222
+ }
223
+ export async function persistDocuments(documents, config, settings, clients) {
224
+ console.time("persistDocuments Run Time");
225
+ const storageContext = await getStorageContext(config, settings, clients);
226
+ await storageContext.docStore.addDocuments(documents, true);
227
+ // see comments in getStorageContext
228
+ const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
229
+ // @ts-ignore
230
+ await storageContext.docStore.kvStore.persist(join(persistDir, "doc_store.json"));
231
+ console.timeEnd("persistDocuments Run Time");
232
+ }
233
+ export async function persistNodes(nodes, config, settings, clients, progressCallback) {
234
+ // Create and configure vector store based on type
235
+ console.time("persistNodes Run Time");
236
+ const storageContext = await getStorageContext(config, settings, clients);
237
+ const vectorStore = storageContext.vectorStores[ModalityType.TEXT];
238
+ if (!vectorStore) {
239
+ throw new Error("Vector store is undefined");
240
+ }
241
+ // Create index and embed documents
242
+ // this is what actaully embeds the nodes
243
+ // (even if they already have embeddings, stupidly)
244
+ const index = await ProgressVectorStoreIndex.init({
245
+ nodes,
246
+ storageContext,
247
+ logProgress: true,
248
+ progressCallback,
249
+ });
250
+ // I'm not sure why this explicit call to persist is necessary.
251
+ // storageContext should handle this, but it doesn't.
252
+ // all the if statements are just type-checking boilerplate.
253
+ // N.B. WeaviateVectorStore does not need to be explicitly persisted, so we don't include it in the OR conditional here..
254
+ if (vectorStore) {
255
+ if (vectorStore instanceof PGVectorStore || vectorStore instanceof SimpleVectorStore) {
256
+ await vectorStore.persist(join(config.storagePath, sanitizeProjectName(config.projectName), "vector_store.json"));
257
+ }
258
+ else if (vectorStore instanceof BatchingWeaviateVectorStore) {
259
+ // WeaviateVectorStore does not have a persist method, it persists automatically
260
+ console.log("Pretending to persist Weaviate vector store, but it actually persists automatically.");
261
+ }
262
+ else {
263
+ throw new Error("Vector store does not support persist method");
264
+ }
265
+ }
266
+ else {
267
+ throw new Error("Vector store is undefined");
268
+ }
269
+ console.timeEnd("persistNodes Run Time");
270
+ return index;
271
+ }
272
+ async function createVectorStore(config, settings, clients) {
273
+ const embeddingModel = getEmbedModel(config, settings);
274
+ switch (config.vectorStoreType) {
275
+ // for some reason the embedding model has to be specified here TOO
276
+ // otherwise it defaults to Ada.
277
+ case "postgres":
278
+ return new PGVectorStore({
279
+ clientConfig: { connectionString: process.env.POSTGRES_CONNECTION_STRING },
280
+ tableName: sanitizeProjectName(config.projectName),
281
+ dimensions: MODEL_DIMENSIONS[config.modelName] || 1536, // default to 1536 if model not found
282
+ embeddingModel: embeddingModel
283
+ });
284
+ case "simple":
285
+ const persistDir = join(config.storagePath, sanitizeProjectName(config.projectName));
286
+ return SimpleVectorStore.fromPersistDir(persistDir, embeddingModel);
287
+ case "weaviate":
288
+ const vectorStore = new BatchingWeaviateVectorStore({
289
+ indexName: capitalizeFirstLetter(sanitizeProjectName(config.projectName)),
290
+ weaviateClient: clients.weaviateClient,
291
+ embeddingModel: embeddingModel
292
+ });
293
+ // WeaviateVectorStore's getNodeSimilarity method looks for distance, but current weaviate provides score
294
+ // (WeaviateVectorStore would get `score` if we were doing hybrid search)
295
+ // Overwrite the private getNodeSimilarity method to use 'score' from metadata
296
+ // @ts-ignore
297
+ vectorStore.getNodeSimilarity = (entry, _similarityKey = "score") => {
298
+ return entry.metadata.score;
299
+ };
300
+ return vectorStore;
301
+ default:
302
+ throw new Error(`Unsupported vector store type: ${config.vectorStoreType}`);
303
+ }
304
+ }
305
+ export async function searchDocuments(index, query, numResults = 10, filters) {
306
+ // const metadataFilters: MetadataFilters | undefined = filters ? {filters: filters} : undefined;
307
+ const metadataFilters = {
308
+ filters: filters ? filters : [],
309
+ };
310
+ const retriever = index.asRetriever({ similarityTopK: numResults, filters: metadataFilters });
311
+ const results = await retriever.retrieve(query);
312
+ return results;
313
+ }
314
+ //# sourceMappingURL=embeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/services/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,gBAAgB;AAChB,mBAAmB;AACnB,iBAAiB,EAGjB,YAAY,EAEZ,0BAA0B,EAC1B,iBAAiB,EAEjB,QAAQ,IAAI,kBAAkB,EAC9B,mBAAmB,EACpB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,eAAe,EAAC,MAAM,oBAAoB,CAAA;AACnD,OAAO,EAAE,kBAAkB,EAAE,2BAA2B,EAAE,MAAM,qBAAqB,CAAA;AACrF,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,gBAAgB,EAAsB,MAAM,aAAa,CAAC;AACnE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AACzE,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,2BAA2B,EAAE,MAAM,kCAAkC,CAAC;AAC/E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,gDAAgD;AAChD,4FAA4F;AAC5F,MAAM,gBAAgB,GAA2B;IAC/C,wBAAwB,EAAE,IAAI;IAC9B,wBAAwB,EAAE,IAAI;IAC9B,mBAAmB,EAAE,IAAI;IACzB,eAAe,EAAE,IAAI;IACrB,sBAAsB,EAAE,GAAG,EAAE,yBAAyB;CACvD,CAAC;AAEF,MAAM,YAAY,GAA2B;IAC3C,wBAAwB,EAAE,IAAI;IAC9B,wBAAwB,EAAE,IAAI;IAC9B,eAAe,EAAE,GAAG;IACpB,mBAAmB,EAAE,CAAC,EAAE,oBAAoB;IAC5C,kBAAkB,EAAE,CAAC,EAAE,oBAAoB;IAC3C,sBAAsB,EAAE,GAAG,EAAE,gHAAgH;CAC9I,CAAC;AAGF,+FAA+F;AAC/F,SAAS,sBAAsB,CAAC,MAAuB;IACrD,MAAM,eAAe,GAAyB;QAC5C,IAAI,sBAAsB,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC;KAC/F,CAAC;IAEF,IAAI,MAAM,CAAC,0BAA0B,EAAE,CAAC;QACtC,eAAe,CAAC,IAAI,CAClB,IAAI,OAAO,CAAC;YACV,mBAAmB,EAAE,MAAM,CAAC,cAAc;SAC3C,CAAC,CACH,CAAC;IACJ,CAAC;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,KAAiB,EAAE,SAAiB;IAK/D,MAAM,UAAU,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,0CAA0C;IAE3F,IAAI,SAAS,CAAC;IACd,IAAG,CAAC;QACF,SAAS,GAAG,gBAAgB,CAAC,SAA0B,CAAC,CAAC,CAAC,+BAA+B;IAC3F,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,wFAAwF;QACxF,+EAA+E;QAC/E,SAAS,GAAG,gBAAgB,CAAC,wBAAwB,CAAC,CAAC,CAAC,gCAAgC;QACxF,OAAO,CAAC,IAAI,CAAC,uBAAuB,SAAS,uCAAuC,CAAC,CAAC;IACxF,CAAC;IACD,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,EAAE;QAC5C,OAAO,GAAG,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IAClD,CAAC,EAAE,CAAC,CAAC,CAAC;IAEN,MAAM,cAAc,GAAG,UAAU,GAAG,CAAC,UAAU,GAAG,SAAS,CAAC,CAAC;IAE7D,OAAO;QACL,cAAc;QACd,UAAU;QACV,UAAU;KACX,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IAC7G,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IACnD,QAAQ,MAAM,CAAC,eAAe,EAAE,CAAC;QAC/B,KAAK,QAAQ;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;YACrF,MAAM,cAAc,GAAG,MAAM,0BAA0B,CAAC;gBACtD,UAAU,EAAE,UAAU;aACvB,CAAC,CAAC;YACH,IAAI,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,CAAC;gBACpC,cAAc,EAAE,cAAc;aAC/B,CAAC,CAAC;YACH,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC;YAC5B,OAAO,GAAG,CAAC;QAEb,KAAK,UAAU;YACb,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,OAAO,GAAG,IAAI,aAAa,CAAC;gBAChC,YAAY,EAAE,EAAE,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,0BAA0B,EAAE;gBAC1E,SAAS,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC;gBAClD,UAAU,EAAE,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,qCAAqC;gBAC7F,cAAc,EAAE,UAAU;aAC3B,CAAC,CAAC;YACH,MAAM,gBAAgB,GAAG,MAAM,0BAA0B,CAAC;gBACxD,YAAY,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE;aAC/C,CAAC,CAAC;YACH,OAAO,MAAM,gBAAgB,CAAC,IAAI,CAAC;gBACjC,cAAc,EAAE,gBAAgB;aACjC,CAAC,CAAC;QACL,KAAK,UAAU;YACb,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;gBAC5B,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,aAAa,GAAG,IAAI,2BAA2B,CAAC;gBACpD,SAAS,EAAE,qBAAqB,CAAC,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;gBACzE,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,cAAc,EAAE,UAAU;aAC3B,CAAC,CAAC;YAEH,yGAAyG;YACzG,yEAAyE;YACzE,8EAA8E;YAC9E,aAAa;YACb,aAAa,CAAC,iBAAiB,GAAG,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,EAAE,EAAE;gBACpE,OAAQ,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;YAC/B,CAAC,CAAA;YAED,OAAO,MAAM,gBAAgB,CAAC,eAAe,CAAC,aAAa,CAAC,CAAA;QAE9D;YACE,MAAM,IAAI,KAAK,CAAC,kCAAkC,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,MAAuB;IAC/D,oCAAoC;IACpC,mBAAmB;IACf,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAE,CAAC;IACtF,MAAM,cAAc,GAAG,MAAM,0BAA0B,CAAC;QACtD,UAAU,EAAE,UAAU;KACvB,CAAC,CAAC;IACH,OAAO,cAAc,CAAC,QAAQ,CAAC;IAEnC,qBAAqB;IACrB,2FAA2F;IAC3F,iDAAiD;IACjD,aAAa;IACb,mFAAmF;IACnF,IAAI;AACN,CAAC;AAID,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,SAAqB,EACrB,MAAuB;IAEvB,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;IAEnD,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEvD,8FAA8F;IAC9F,qEAAqE;IACrE,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,QAAQ,CAAC,yBAAyB,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACtE,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;IACtE,6FAA6F;IAC7F,oDAAoD;IACpD,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,IAAI,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEzF,4DAA4D;IAC5D,MAAM,QAAQ,GAAG,IAAI,iBAAiB,CAAC;QACrC,eAAe;KAChB,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,CAAC,MAAM,QAAQ,CAAC,GAAG,CAAC,EAAC,SAAS,EAAE,SAAS,EAAC,CAAC,CAAe,CAAC;IAEzE,OAAO,CAAC,OAAO,CAAC,uDAAuD,CAAC,CAAC;IACzE,OAAO,CAAC,OAAO,CAAC,oCAAoC,CAAC,CAAC;IACtD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,MAAuB,EACvB,QAAkB;IAElB,IAAI,UAAU,CAAC;IACf,IAAI,MAAM,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;QACtC,UAAU,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,EAAC,CAAE,CAAC;QAC3H,UAAU,CAAC,cAAc,GAAG,EAAE,CAAC,CAAC,wGAAwG;IAC1I,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;QAC7C,UAAU,GAAG,IAAI,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE;gBAClE,IAAI,EAAE,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;aAClE,GAAG,CAAC,CAAC;IACR,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,OAAO,EAAE,CAAC;QAC5C,IAAI,CAAC,QAAQ,CAAC,cAAc,IAAI,CAAC,QAAQ,CAAC,mBAAmB,EAAE,CAAC;YAC9D,MAAM,IAAI,KAAK,CAAC,2EAA2E,CAAC,CAAC;QAC/F,CAAC;QACD,UAAU,GAAG,IAAI,oBAAoB,CAAC;YACpC,KAAK,EAAE,MAAM,CAAC,SAAS;YACvB,MAAM,EAAE,QAAQ,CAAC,cAAc;YAC/B,QAAQ,EAAE,QAAQ,CAAC,mBAAmB;YACtC,UAAU,EAAE,QAAQ,CAAC,qBAAqB,IAAI,SAAS;SACxD,CAAC,CAAC;IACL,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,SAAS,EAAE,CAAC;QAC9C,IAAI,CAAC,QAAQ,CAAC,aAAa,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;QAC9E,CAAC;QACD,UAAU,GAAG,IAAI,kBAAkB,CAAC;YAClC,KAAK,EAAE,2BAA2B,CAAC,aAAa,EAAE,mBAAmB;YACrE,MAAM,EAAE,QAAQ,CAAC,aAAa;SAC/B,CAAC,CAAC;IACL,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;QAC7C,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QACD,UAAU,GAAG,IAAI,eAAe,CAAC;YAC/B,MAAM,EAAE,QAAQ,CAAC,YAAY;SAC9B,CAAC,CAAC;QACH,UAAU,CAAC,cAAc,GAAG,EAAE,CAAC;IACjC,CAAC;SAAM,IAAI,MAAM,CAAC,aAAa,KAAK,MAAM,EAAE,CAAC;QAC3C,UAAU,GAAG,IAAI,aAAa,EAAE,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,yCAAyC,MAAM,CAAC,aAAa,EAAE,CAAC,CAAC;IACnF,CAAC;IACD,kBAAkB,CAAC,UAAU,GAAG,UAAU,CAAC;IAC3C,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IACnG,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IACvE,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACtD,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAE,CAAC;IACtF,OAAO,MAAM,0BAA0B,CAAC;QACtC,UAAU,EAAE,UAAU;QACtB,YAAY,EAAE,EAAC,CAAC,YAAY,CAAC,IAAI,CAAC,EAAE,WAAW,EAAC;QAChD,QAAQ,EAAE,IAAI,mBAAmB,EAAE;QACjC;;;;;;;UAOE;KACL,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,SAAqB,EAAE,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IACzH,OAAO,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC1C,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC1E,MAAM,cAAc,CAAC,QAAQ,CAAC,YAAY,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAE5D,oCAAoC;IACpC,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAE,CAAC;IACtF,aAAa;IACb,MAAO,cAAc,CAAC,QAAgC,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAC,CAAC;IAE3G,OAAO,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC;AAC/C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,KAAiB,EAAE,MAAuB,EAAE,QAAkB,EAAE,OAAgB,EAAE,gBAA4D;IAC/K,kDAAkD;IAClD,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAEtC,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC1E,MAAM,WAAW,GAAG,cAAc,CAAC,YAAY,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;IACnE,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IACD,mCAAmC;IACnC,yCAAyC;IACzC,mDAAmD;IACnD,MAAM,KAAK,GAAG,MAAM,wBAAwB,CAAC,IAAI,CAAC;QAChD,KAAK;QACL,cAAc;QACd,WAAW,EAAE,IAAI;QACjB,gBAAgB;KACjB,CAAC,CAAC;IAEH,gEAAgE;IAChE,qDAAqD;IACrD,4DAA4D;IAC5D,yHAAyH;IACzH,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,WAAW,YAAY,aAAa,IAAI,WAAW,YAAY,iBAAiB,EAAE,CAAC;YACrF,MAAM,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACpH,CAAC;aAAM,IAAI,WAAW,YAAY,2BAA2B,EAAE,CAAC;YAC9D,gFAAgF;YAChF,OAAO,CAAC,GAAG,CAAC,sFAAsF,CAAC,CAAC;QACtG,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC;IACD,OAAO,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC;AACf,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,MAAuB,EAAE,QAAkB,EAAE,OAAgB;IAC5F,MAAM,cAAc,GAAG,aAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IACvD,QAAQ,MAAM,CAAC,eAAe,EAAE,CAAC;QAE/B,mEAAmE;QACnE,gCAAgC;QAChC,KAAK,UAAU;YACb,OAAO,IAAI,aAAa,CAAC;gBACvB,YAAY,EAAE,EAAC,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,0BAA0B,EAAC;gBACxE,SAAS,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC;gBAClD,UAAU,EAAE,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,EAAE,qCAAqC;gBAC7F,cAAc,EAAE,cAAc;aAC/B,CAAC,CAAC;QAEL,KAAK,QAAQ;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;YACrF,OAAO,iBAAiB,CAAC,cAAc,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;QAEtE,KAAK,UAAU;YACb,MAAM,WAAW,GAAG,IAAI,2BAA2B,CAAC;gBAClD,SAAS,EAAE,qBAAqB,CAAC,mBAAmB,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;gBACzE,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,cAAc,EAAE,cAAc;aAC/B,CAAC,CAAC;YAEH,yGAAyG;YACzG,yEAAyE;YACzE,8EAA8E;YAC9E,aAAa;YACb,WAAW,CAAC,iBAAiB,GAAG,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,EAAE,EAAE;gBAClE,OAAQ,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;YAC/B,CAAC,CAAA;YAED,OAAO,WAAW,CAAC;QACrB;YACE,MAAM,IAAI,KAAK,CAAC,kCAAkC,MAAM,CAAC,eAAe,EAAE,CAAC,CAAC;IAChF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,KAAuB,EACvB,KAAa,EACb,aAAqB,EAAE,EACvB,OAA0B;IAE1B,iGAAiG;IACjG,MAAM,eAAe,GAAoB;QACvC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;KAChC,CAAC;IACF,MAAM,SAAS,GAAG,KAAK,CAAC,WAAW,CAAC,EAAE,cAAc,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAC;IAE9F,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAE,CAAC;IACjD,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=embeddings.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.test.d.ts","sourceRoot":"","sources":["../../src/services/embeddings.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,115 @@
1
+ //@ts-nocheck
2
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
3
+ import { Document, TextNode } from 'llamaindex';
4
+ // First, set up the mock before importing the module
5
+ vi.mock(import("./embeddings"), async (importOriginal) => {
6
+ const actual = await importOriginal();
7
+ return {
8
+ ...actual,
9
+ // your mocked methods
10
+ estimateCost: vi.fn(),
11
+ getExistingVectorStoreIndex: vi.fn(),
12
+ persistNodes: vi.fn(),
13
+ persistDocuments: vi.fn(),
14
+ getExistingDocStore: vi.fn(),
15
+ searchDocuments: vi.fn()
16
+ };
17
+ });
18
+ // Now import the mocked functions
19
+ import { transformDocumentsToNodes, getEmbedModel } from './embeddings';
20
+ describe('transformDocumentsToNodes', () => {
21
+ beforeEach(() => {
22
+ vi.clearAllMocks();
23
+ });
24
+ const mockConfig = {
25
+ chunkSize: 100,
26
+ chunkOverlap: 10,
27
+ combineSentencesIntoChunks: true,
28
+ sploderMaxSize: 500,
29
+ modelProvider: 'mock',
30
+ modelName: 'text-embedding-3-small',
31
+ vectorStoreType: "simple",
32
+ storagePath: './storage',
33
+ projectName: 'test_project',
34
+ splitIntoSentences: true,
35
+ };
36
+ const mockSettings = {
37
+ openAIKey: 'mock-api-key',
38
+ oLlamaBaseURL: 'http://localhost',
39
+ azureOpenAIKey: null,
40
+ azureOpenAIEndpoint: null,
41
+ azureOpenAIApiVersion: null,
42
+ mistralApiKey: null,
43
+ geminiApiKey: null,
44
+ };
45
+ it('should process documents and return nodes', async () => {
46
+ const mockDocuments = [
47
+ new Document({ text: 'Document 1', metadata: { key1: 'value1' } }),
48
+ new Document({ text: 'Document 2', metadata: { key2: 'value2' } }),
49
+ ];
50
+ const mockNodes = [
51
+ new TextNode({ text: 'Document 1' }),
52
+ new TextNode({ text: 'Document 2' }),
53
+ ];
54
+ const result = await transformDocumentsToNodes(mockDocuments, mockConfig, mockSettings);
55
+ expect(result.map((node) => node.text)).toEqual(mockNodes.map((node) => node.text));
56
+ });
57
+ it('should filter out documents with null, undefined, or zero-length text', async () => {
58
+ const mockDocuments = [
59
+ new Document({ text: 'Valid Document', metadata: { key1: 'value1' } }),
60
+ new Document({ text: undefined, metadata: { key3: 'value3' } }),
61
+ new Document({ text: '', metadata: { key4: 'value4' } }),
62
+ ];
63
+ const filteredDocuments = [mockDocuments[0]];
64
+ const mockNodes = [new TextNode({ text: 'Valid Document' })];
65
+ // (transformDocumentsToNodes as vi.Mock).mockResolvedValue(mockNodes);
66
+ const result = await transformDocumentsToNodes(mockDocuments, mockConfig, mockSettings);
67
+ expect(result.map((n) => n.text)).toEqual(mockNodes.map((n) => n.text));
68
+ // TODO: I can't get these to work. Apparently you can't spyOn a function that is imported from the same file.
69
+ // all well and good but ... why did CoPilot generate a test that can't work?
70
+ // expect(transformDocumentsToNodes).toHaveBeenCalledWith(filteredDocuments, expect.any(Array));
71
+ });
72
+ it('should exclude all metadata keys from embedding', async () => {
73
+ const mockDocuments = [
74
+ new Document({ text: 'Document 1', metadata: { key1: 'value1', key2: 'value2' } }),
75
+ ];
76
+ const nodes = await transformDocumentsToNodes(mockDocuments, mockConfig, mockSettings);
77
+ expect(nodes[0].excludedEmbedMetadataKeys).toEqual(['key1', 'key2']);
78
+ });
79
+ });
80
+ describe('getEmbedModel', () => {
81
+ const mockConfig = {
82
+ chunkSize: 100,
83
+ chunkOverlap: 10,
84
+ combineSentencesIntoChunks: true,
85
+ sploderMaxSize: 500,
86
+ modelProvider: 'openai',
87
+ modelName: 'text-embedding-3-small',
88
+ vectorStoreType: "simple",
89
+ storagePath: './storage',
90
+ projectName: 'test_project',
91
+ splitIntoSentences: true,
92
+ };
93
+ const mockSettings = {
94
+ openAIKey: 'mock-api-key',
95
+ oLlamaBaseURL: 'http://localhost',
96
+ azureOpenAIKey: null,
97
+ azureOpenAIEndpoint: null,
98
+ azureOpenAIApiVersion: null,
99
+ mistralApiKey: null,
100
+ geminiApiKey: null,
101
+ };
102
+ it('should handle different model providers correctly', () => {
103
+ // Test with 'ollama' provider
104
+ const ollamaModel = getEmbedModel({ ...mockConfig, modelProvider: 'ollama' }, mockSettings);
105
+ expect(ollamaModel).toBeDefined();
106
+ // Test with 'mock' provider
107
+ const mockModel = getEmbedModel({ ...mockConfig, modelProvider: 'mock' }, mockSettings);
108
+ expect(mockModel).toBeDefined();
109
+ // Test with invalid provider
110
+ expect(() => {
111
+ getEmbedModel({ ...mockConfig, modelProvider: 'invalid' }, mockSettings);
112
+ }).toThrow('Unsupported embedding model provider: invalid');
113
+ });
114
+ });
115
+ //# sourceMappingURL=embeddings.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.test.js","sourceRoot":"","sources":["../../src/services/embeddings.test.ts"],"names":[],"mappings":"AAAA,aAAa;AAEb,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhD,qDAAqD;AACrD,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE;IACvD,MAAM,MAAM,GAAG,MAAM,cAAc,EAAE,CAAA;IACrC,OAAO;QACL,GAAG,MAAM;QACT,sBAAsB;QACtB,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;QACrB,2BAA2B,EAAE,EAAE,CAAC,EAAE,EAAE;QACpC,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;QACrB,gBAAgB,EAAE,EAAE,CAAC,EAAE,EAAE;QACzB,mBAAmB,EAAE,EAAE,CAAC,EAAE,EAAE;QAC5B,eAAe,EAAE,EAAE,CAAC,EAAE,EAAE;KACzB,CAAA;AACH,CAAC,CAAC,CAAA;AAEF,kCAAkC;AAClC,OAAO,EAAE,yBAAyB,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAExE,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;IACrB,CAAC,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG;QACjB,SAAS,EAAE,GAAG;QACd,YAAY,EAAE,EAAE;QAChB,0BAA0B,EAAE,IAAI;QAChC,cAAc,EAAE,GAAG;QACnB,aAAa,EAAE,MAAM;QACrB,SAAS,EAAE,wBAAwB;QACnC,eAAe,EAAE,QAAoB;QACrC,WAAW,EAAE,WAAW;QACxB,WAAW,EAAE,cAAc;QAC3B,kBAAkB,EAAE,IAAI;KACzB,CAAC;IAEF,MAAM,YAAY,GAAG;QACnB,SAAS,EAAE,cAAc;QACzB,aAAa,EAAE,kBAAkB;QACjC,cAAc,EAAE,IAAI;QACpB,mBAAmB,EAAE,IAAI;QACzB,qBAAqB,EAAE,IAAI;QAC3B,aAAa,EAAE,IAAI;QACnB,YAAY,EAAE,IAAI;KACnB,CAAC;IAEF,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,MAAM,aAAa,GAAG;YACpB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;YAClE,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;SACnE,CAAC;QACF,MAAM,SAAS,GAAG;YAChB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC;YACpC,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC;SACrC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;QAExF,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACtF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uEAAuE,EAAE,KAAK,IAAI,EAAE;QACrF,MAAM,aAAa,GAAG;YACpB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;YACtE,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;YAC/D,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;SACzD,CAAC;QACF,MAAM,iBAAiB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,SAAS,GAAG,CAAC,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC;QAE7D,uEAAuE;QAEvE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC;QACxF,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAExE,8GAA8G;QAC9G,6EAA6E;QAC7E,gGAAgG;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,aAAa,GAAG;YACpB,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC;SACnF,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,UAAU,EAAE,YAAY,CAAC,CAAA;QACtF,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,MAAM,UAAU,GAAG;QACjB,SAAS,EAAE,GAAG;QACd,YAAY,EAAE,EAAE;QAChB,0BAA0B,EAAE,IAAI;QAChC,cAAc,EAAE,GAAG;QACnB,aAAa,EAAE,QAAQ;QACvB,SAAS,EAAE,wBAAwB;QACnC,eAAe,EAAE,QAAoB;QACrC,WAAW,EAAE,WAAW;QACxB,WAAW,EAAE,cAAc;QAC3B,kBAAkB,EAAE,IAAI;KACzB,CAAC;IAEF,MAAM,YAAY,GAAG;QACnB,SAAS,EAAE,cAAc;QACzB,aAAa,EAAE,kBAAkB;QACjC,cAAc,EAAE,IAAI;QACpB,mBAAmB,EAAE,IAAI;QACzB,qBAAqB,EAAE,IAAI;QAC3B,aAAa,EAAE,IAAI;QACnB,YAAY,EAAE,IAAI;KACnB,CAAC;IAGF,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;QAC3D,8BAA8B;QAC9B,MAAM,WAAW,GAAG,aAAa,CAC/B,EAAE,GAAG,UAAU,EAAE,aAAa,EAAE,QAAQ,EAAE,EAC1C,YAAY,CACb,CAAC;QACF,MAAM,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC;QAElC,4BAA4B;QAC5B,MAAM,SAAS,GAAG,aAAa,CAC7B,EAAE,GAAG,UAAU,EAAE,aAAa,EAAE,MAAM,EAAE,EACxC,YAAY,CACb,CAAC;QACF,MAAM,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC;QAEhC,6BAA6B;QAC7B,MAAM,CAAC,GAAG,EAAE;YACV,aAAa,CACX,EAAE,GAAG,UAAU,EAAE,aAAa,EAAE,SAAgB,EAAE,EAClD,YAAY,CACb,CAAC;QACJ,CAAC,CAAC,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=loggingOpenAIEmbedding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loggingOpenAIEmbedding.d.ts","sourceRoot":"","sources":["../../src/services/loggingOpenAIEmbedding.ts"],"names":[],"mappings":""}
@@ -0,0 +1,41 @@
1
+ // // temporary
2
+ // // this is a wrapper around OpenAIEmbedding that logs the input of the embedding
3
+ // // it's used to debug the embedding process (to make sure random metadata isn't wrongfully included)
4
+ // // it's not used in the production code
5
+ export {};
6
+ // import { OpenAIEmbedding } from "@llamaindex/openai";
7
+ // import type {
8
+ // OpenAI as OpenAILLM,
9
+ // } from "openai";
10
+ // type LLMInstance = Pick<OpenAILLM, "embeddings" | "apiKey" | "baseURL">;
11
+ // export class LoggingOpenAIEmbedding extends OpenAIEmbedding {
12
+ // constructor(
13
+ // init?: Omit<Partial<OpenAIEmbedding>, "session"> & {
14
+ // session?: LLMInstance;
15
+ // },
16
+ // ) {
17
+ // super(init);
18
+ // // overwrite private member "getMessage" 🙀
19
+ // (this as any).getOpenAIEmbedding = async function(input: string[]): Promise<number[][]> {
20
+ // // TODO: ensure this for every sub class by calling it in the base class
21
+ // input = this.truncateMaxTokens(input);
22
+ // console.log("LoggingOpenAIEmbedding input", input);
23
+ // const { data } = await (
24
+ // await this.session
25
+ // ).embeddings.create(
26
+ // this.dimensions
27
+ // ? {
28
+ // model: this.model,
29
+ // dimensions: this.dimensions, // only sent to OpenAI if set by user
30
+ // input,
31
+ // }
32
+ // : {
33
+ // model: this.model,
34
+ // input,
35
+ // },
36
+ // );
37
+ // return data.map((d) => d.embedding);
38
+ // }
39
+ // }
40
+ // }
41
+ //# sourceMappingURL=loggingOpenAIEmbedding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"loggingOpenAIEmbedding.js","sourceRoot":"","sources":["../../src/services/loggingOpenAIEmbedding.ts"],"names":[],"mappings":"AACA,eAAe;AACf,mFAAmF;AACnF,uGAAuG;AACvG,0CAA0C;;AAE1C,wDAAwD;AACxD,gBAAgB;AAChB,yBAAyB;AACzB,mBAAmB;AACnB,2EAA2E;AAG3E,gEAAgE;AAChE,iBAAiB;AACjB,2DAA2D;AAC3D,+BAA+B;AAC/B,SAAS;AACT,QAAQ;AACR,mBAAmB;AACnB,kDAAkD;AAClD,gGAAgG;AAChG,iFAAiF;AACjF,+CAA+C;AAE/C,4DAA4D;AAE5D,iCAAiC;AACjC,6BAA6B;AAC7B,6BAA6B;AAC7B,0BAA0B;AAC1B,gBAAgB;AAChB,mCAAmC;AACnC,mFAAmF;AACnF,uBAAuB;AACvB,gBAAgB;AAChB,gBAAgB;AAChB,mCAAmC;AACnC,uBAAuB;AACvB,iBAAiB;AACjB,WAAW;AAEX,6CAA6C;AAC7C,QAAQ;AACR,MAAM;AACN,IAAI"}
@@ -0,0 +1,6 @@
1
+ import { BaseEmbedding } from "llamaindex";
2
+ export declare class MockEmbedding extends BaseEmbedding {
3
+ constructor();
4
+ getTextEmbedding(text: string): Promise<number[]>;
5
+ }
6
+ //# sourceMappingURL=mockEmbedding.d.ts.map