@meaningfully/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/.nvmrc +1 -0
  2. package/LICENSE +7 -0
  3. package/README.md +3 -0
  4. package/dist/DocumentSetManager.d.ts +28 -0
  5. package/dist/DocumentSetManager.d.ts.map +1 -0
  6. package/dist/DocumentSetManager.js +134 -0
  7. package/dist/DocumentSetManager.js.map +1 -0
  8. package/dist/Meaningfully.d.ts +52 -0
  9. package/dist/Meaningfully.d.ts.map +1 -0
  10. package/dist/Meaningfully.js +206 -0
  11. package/dist/Meaningfully.js.map +1 -0
  12. package/dist/MetadataManager.d.ts +32 -0
  13. package/dist/MetadataManager.d.ts.map +1 -0
  14. package/dist/MetadataManager.js +115 -0
  15. package/dist/MetadataManager.js.map +1 -0
  16. package/dist/api/embedding.d.ts +7 -0
  17. package/dist/api/embedding.d.ts.map +1 -0
  18. package/dist/api/embedding.js +94 -0
  19. package/dist/api/embedding.js.map +1 -0
  20. package/dist/api/embedding.test.d.ts +2 -0
  21. package/dist/api/embedding.test.d.ts.map +1 -0
  22. package/dist/api/embedding.test.js +340 -0
  23. package/dist/api/embedding.test.js.map +1 -0
  24. package/dist/index.d.ts +5 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +6 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/services/batchingWeaviateVectorStore.d.ts +6 -0
  29. package/dist/services/batchingWeaviateVectorStore.d.ts.map +1 -0
  30. package/dist/services/batchingWeaviateVectorStore.js +21 -0
  31. package/dist/services/batchingWeaviateVectorStore.js.map +1 -0
  32. package/dist/services/csvLoader.d.ts +3 -0
  33. package/dist/services/csvLoader.d.ts.map +1 -0
  34. package/dist/services/csvLoader.js +18 -0
  35. package/dist/services/csvLoader.js.map +1 -0
  36. package/dist/services/csvLoader.test.d.ts +2 -0
  37. package/dist/services/csvLoader.test.d.ts.map +1 -0
  38. package/dist/services/csvLoader.test.js +75 -0
  39. package/dist/services/csvLoader.test.js.map +1 -0
  40. package/dist/services/embeddings.d.ts +22 -0
  41. package/dist/services/embeddings.d.ts.map +1 -0
  42. package/dist/services/embeddings.js +314 -0
  43. package/dist/services/embeddings.js.map +1 -0
  44. package/dist/services/embeddings.test.d.ts +2 -0
  45. package/dist/services/embeddings.test.d.ts.map +1 -0
  46. package/dist/services/embeddings.test.js +115 -0
  47. package/dist/services/embeddings.test.js.map +1 -0
  48. package/dist/services/loggingOpenAIEmbedding.d.ts +2 -0
  49. package/dist/services/loggingOpenAIEmbedding.d.ts.map +1 -0
  50. package/dist/services/loggingOpenAIEmbedding.js +41 -0
  51. package/dist/services/loggingOpenAIEmbedding.js.map +1 -0
  52. package/dist/services/mockEmbedding.d.ts +6 -0
  53. package/dist/services/mockEmbedding.d.ts.map +1 -0
  54. package/dist/services/mockEmbedding.js +14 -0
  55. package/dist/services/mockEmbedding.js.map +1 -0
  56. package/dist/services/progressManager.d.ts +21 -0
  57. package/dist/services/progressManager.d.ts.map +1 -0
  58. package/dist/services/progressManager.js +76 -0
  59. package/dist/services/progressManager.js.map +1 -0
  60. package/dist/services/progressVectorStoreIndex.d.ts +21 -0
  61. package/dist/services/progressVectorStoreIndex.d.ts.map +1 -0
  62. package/dist/services/progressVectorStoreIndex.js +60 -0
  63. package/dist/services/progressVectorStoreIndex.js.map +1 -0
  64. package/dist/services/sentenceSplitter.d.ts +17 -0
  65. package/dist/services/sentenceSplitter.d.ts.map +1 -0
  66. package/dist/services/sentenceSplitter.js +207 -0
  67. package/dist/services/sentenceSplitter.js.map +1 -0
  68. package/dist/services/sentenceSplitter.test.d.ts +2 -0
  69. package/dist/services/sentenceSplitter.test.d.ts.map +1 -0
  70. package/dist/services/sentenceSplitter.test.js +68 -0
  71. package/dist/services/sentenceSplitter.test.js.map +1 -0
  72. package/dist/services/sploder.d.ts +13 -0
  73. package/dist/services/sploder.d.ts.map +1 -0
  74. package/dist/services/sploder.js +45 -0
  75. package/dist/services/sploder.js.map +1 -0
  76. package/dist/types/index.d.ts +77 -0
  77. package/dist/types/index.d.ts.map +1 -0
  78. package/dist/types/index.js +2 -0
  79. package/dist/types/index.js.map +1 -0
  80. package/dist/utils.d.ts +3 -0
  81. package/dist/utils.d.ts.map +1 -0
  82. package/dist/utils.js +7 -0
  83. package/dist/utils.js.map +1 -0
  84. package/package.json +43 -0
  85. package/src/Meaningfully.d.ts +57 -0
  86. package/src/Meaningfully.ts +228 -0
  87. package/src/MetadataManager.d.ts +27 -0
  88. package/src/MetadataManager.ts +145 -0
  89. package/src/api/embedding.d.ts +6 -0
  90. package/src/api/embedding.ts +122 -0
  91. package/src/index.ts +5 -0
  92. package/src/services/batchingWeaviateVectorStore.d.ts +5 -0
  93. package/src/services/batchingWeaviateVectorStore.ts +23 -0
  94. package/src/services/csvLoader.d.ts +2 -0
  95. package/src/services/csvLoader.ts +24 -0
  96. package/src/services/embeddings.d.ts +21 -0
  97. package/src/services/embeddings.ts +374 -0
  98. package/src/services/loggingOpenAIEmbedding.d.ts +0 -0
  99. package/src/services/loggingOpenAIEmbedding.ts +46 -0
  100. package/src/services/mockEmbedding.d.ts +5 -0
  101. package/src/services/mockEmbedding.ts +13 -0
  102. package/src/services/progressManager.d.ts +20 -0
  103. package/src/services/progressManager.ts +88 -0
  104. package/src/services/progressVectorStoreIndex.d.ts +20 -0
  105. package/src/services/progressVectorStoreIndex.ts +95 -0
  106. package/src/services/sentenceSplitter.d.ts +16 -0
  107. package/src/services/sentenceSplitter.ts +243 -0
  108. package/src/services/sploder.d.ts +12 -0
  109. package/src/services/sploder.ts +62 -0
  110. package/src/types/index.d.ts +71 -0
  111. package/src/types/index.ts +89 -0
  112. package/src/utils.d.ts +2 -0
  113. package/src/utils.ts +6 -0
  114. package/tests/MetadataManager.test.ts +120 -0
  115. package/tests/csvLoader.test.d.ts +1 -0
  116. package/tests/csvLoader.test.ts +88 -0
  117. package/tests/embedding.test.d.ts +1 -0
  118. package/tests/embedding.test.ts +425 -0
  119. package/tests/embeddings.test.d.ts +1 -0
  120. package/tests/embeddings.test.ts +144 -0
  121. package/tests/sentenceSplitter.test.d.ts +1 -0
  122. package/tests/sentenceSplitter.test.ts +81 -0
  123. package/tsconfig.json +31 -0
  124. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,77 @@
1
+ export interface SearchResult {
2
+ text: string;
3
+ score: number;
4
+ metadata: Record<string, any>;
5
+ }
6
+ export interface EmbeddingResult {
7
+ success: boolean;
8
+ error?: string;
9
+ index?: any;
10
+ }
11
+ export interface PreviewResult {
12
+ success: boolean;
13
+ error?: string;
14
+ nodes?: Array<{
15
+ text: string;
16
+ metadata: Record<string, any>;
17
+ }>;
18
+ estimatedPrice?: number;
19
+ tokenCount?: number;
20
+ pricePer1M?: number;
21
+ }
22
+ export interface SearchConfig {
23
+ modelProvider: string;
24
+ modelName: string;
25
+ projectName: string;
26
+ }
27
+ export interface DocumentSetMetadata {
28
+ documentSetId: number;
29
+ name: string;
30
+ uploadDate: Date;
31
+ parameters: Record<string, unknown>;
32
+ totalDocuments: number;
33
+ }
34
+ export interface DocumentSetParams {
35
+ datasetName: string;
36
+ description: string;
37
+ textColumns: string[];
38
+ metadataColumns: string[];
39
+ splitIntoSentences: boolean;
40
+ combineSentencesIntoChunks: boolean;
41
+ sploderMaxSize: number;
42
+ chunkSize: number;
43
+ chunkOverlap: number;
44
+ modelName: string;
45
+ modelProvider: string;
46
+ }
47
+ export interface EmbeddingConfig {
48
+ modelName: string;
49
+ modelProvider: string;
50
+ vectorStoreType: "simple" | "postgres" | "weaviate";
51
+ projectName: string;
52
+ storagePath: string;
53
+ splitIntoSentences: boolean;
54
+ combineSentencesIntoChunks: boolean;
55
+ sploderMaxSize: number;
56
+ chunkSize: number;
57
+ chunkOverlap: number;
58
+ }
59
+ export interface Settings {
60
+ openAIKey: string | null;
61
+ oLlamaBaseURL: string | null;
62
+ azureOpenAIKey: string | null;
63
+ azureOpenAIEndpoint: string | null;
64
+ azureOpenAIApiVersion: string | null;
65
+ mistralApiKey: string | null;
66
+ geminiApiKey: string | null;
67
+ }
68
+ export interface MetadataFilter {
69
+ key: string;
70
+ operator: "==" | "in" | ">" | "<" | "!=" | ">=" | "<=" | "nin" | "any" | "all" | "text_match" | "contains" | "is_empty";
71
+ value: any;
72
+ }
73
+ export interface Clients {
74
+ weaviateClient: any;
75
+ postgresClient: any;
76
+ }
77
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,GAAG,CAAC;CACb;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAC/B,CAAC,CAAC;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAGD,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,MAAM,CAAA;IACrB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;CACpB;AAGD,MAAM,WAAW,mBAAmB;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,IAAI,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,kBAAkB,EAAE,OAAO,CAAC;IAC5B,0BAA0B,EAAE,OAAO,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAA;CACtB;AAGD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAA;IACrB,eAAe,EAAE,QAAQ,GAAG,UAAU,GAAG,UAAU,CAAC;IACpD,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,0BAA0B,EAAE,OAAO,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AAGD,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,GAAG,YAAY,GAAG,UAAU,GAAG,UAAU,CAAC;IACxH,KAAK,EAAE,GAAG,CAAA;CACX;AAED,MAAM,WAAW,OAAO;IACtB,cAAc,EAAE,GAAG,CAAC;IACpB,cAAc,EAAE,GAAG,CAAC;CACrB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":""}
@@ -0,0 +1,3 @@
1
+ export declare function sanitizeProjectName(projectName: string): string;
2
+ export declare function capitalizeFirstLetter(val: string): string;
3
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,wBAAgB,mBAAmB,CAAC,WAAW,EAAE,MAAM,UAEtD;AACD,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,UAEhD"}
package/dist/utils.js ADDED
@@ -0,0 +1,7 @@
1
+ export function sanitizeProjectName(projectName) {
2
+ return projectName.replace(/[^a-zA-Z0-9]/g, "_");
3
+ }
4
+ export function capitalizeFirstLetter(val) {
5
+ return String(val).charAt(0).toUpperCase() + String(val).slice(1);
6
+ }
7
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,mBAAmB,CAAC,WAAmB;IACrD,OAAO,WAAW,CAAC,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC;AACnD,CAAC;AACD,MAAM,UAAU,qBAAqB,CAAC,GAAW;IAC/C,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;AACpE,CAAC"}
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@meaningfully/core",
3
+ "version": "0.1.0",
4
+ "description": "Core functionality for meaningfully semantic search",
5
+ "license": "MIT",
6
+ "type": "module",
7
+ "main": "./dist/index.js",
8
+ "module": "./dist/index.js",
9
+ "types": "./dist/index.d.ts",
10
+ "exports": {
11
+ ".": {
12
+ "import": "./dist/index.js",
13
+ "types": "./dist/index.d.ts"
14
+ }
15
+ },
16
+ "scripts": {
17
+ "build": "tsc",
18
+ "watch": "tsc --watch",
19
+ "test": "vitest --silent=false --disable-console-intercept"
20
+ },
21
+ "dependencies": {
22
+ "@llamaindex/azure": "^0.1.31",
23
+ "@llamaindex/google": "^0.3.18",
24
+ "@llamaindex/mistral": "^0.1.14",
25
+ "@llamaindex/ollama": "^0.1.14",
26
+ "@llamaindex/openai": "^0.4.14",
27
+ "@llamaindex/postgres": "^0.0.64",
28
+ "@llamaindex/weaviate": "^0.0.36",
29
+ "js-tiktoken": "^1.0.8",
30
+ "llamaindex": "^0.11.14",
31
+ "lodash": "^4.17.21",
32
+ "natural": "^8.1.0",
33
+ "openai": "^5.9.0",
34
+ "papaparse": "^5.5.3"
35
+ },
36
+ "devDependencies": {
37
+ "@types/node": "^20.5.6",
38
+ "@types/papaparse": "^5.3.16",
39
+ "typescript": "^5.7.3",
40
+ "vite": "^5.4.4",
41
+ "vitest": "^3.0.8"
42
+ }
43
+ }
@@ -0,0 +1,57 @@
1
+ import { DocumentSetParams, Settings, MetadataFilter, Clients } from './types';
2
+ type HasFilePath = {
3
+ filePath: string;
4
+ };
5
+ type DocumentSetParamsFilePath = DocumentSetParams & HasFilePath;
6
+ export declare class MeaningfullyAPI {
7
+ private manager;
8
+ private storagePath;
9
+ private clients;
10
+ constructor({ storagePath, weaviateClient }: {
11
+ storagePath: string;
12
+ weaviateClient?: any;
13
+ });
14
+ setClients(clients: Clients): void;
15
+ getClients(): Clients;
16
+ listDocumentSets(page?: number, pageSize?: number): Promise<{
17
+ documents: import("./types").DocumentSetMetadata[];
18
+ total: number;
19
+ }>;
20
+ getDocumentSet(documentSetId: number): Promise<import("./types").DocumentSetMetadata | null>;
21
+ deleteDocumentSet(documentSetId: number): Promise<{
22
+ success: boolean;
23
+ }>;
24
+ getVectorStoreType(): "simple" | "weaviate";
25
+ generatePreviewData(data: DocumentSetParamsFilePath): Promise<import("./types").PreviewResult>;
26
+ uploadCsv(data: DocumentSetParamsFilePath): Promise<{
27
+ success: boolean;
28
+ documentSetId: number;
29
+ }>;
30
+ searchDocumentSet(documentSetId: number, query: string, n_results?: number, filters?: MetadataFilter[]): Promise<import("./types").SearchResult[]>;
31
+ getDocument(documentSetId: number, documentNodeId: string): Promise<import("llamaindex").BaseNode<import("llamaindex").Metadata>>;
32
+ getSettings(): Promise<{
33
+ openAIKey: null;
34
+ oLlamaBaseURL: null;
35
+ azureOpenAIKey: null;
36
+ azureOpenAIEndpoint: null;
37
+ azureOpenAIApiVersion: string;
38
+ mistralApiKey: null;
39
+ geminiApiKey: null;
40
+ } & Settings>;
41
+ setSettings(settings: Settings): Promise<Settings & {
42
+ success: boolean;
43
+ }>;
44
+ getMaskedSettings(): Promise<{
45
+ openAIKey: string | null;
46
+ oLlamaBaseURL: null;
47
+ azureOpenAIKey: string | null;
48
+ azureOpenAIEndpoint: null;
49
+ azureOpenAIApiVersion: string;
50
+ mistralApiKey: string | null;
51
+ geminiApiKey: string | null;
52
+ }>;
53
+ setMaskedSettings(newSettings: Settings): Promise<Settings & {
54
+ success: boolean;
55
+ }>;
56
+ }
57
+ export {};
@@ -0,0 +1,228 @@
1
+ import { MetadataManager } from './MetadataManager.js';
2
+ import { loadDocumentsFromCsv } from './services/csvLoader.js';
3
+ import { createEmbeddings, getIndex, search, previewResults, getDocStore } from './api/embedding.js';
4
+ import { capitalizeFirstLetter } from './utils.js';
5
+ import { join } from 'path';
6
+ import type { DocumentSetParams, Settings, MetadataFilter, Clients } from './types/index.js';
7
+ import fs from 'fs';
8
+
9
+ type HasFilePath = {filePath: string};
10
+ type DocumentSetParamsFilePath = DocumentSetParams & HasFilePath;
11
+
12
+ const MASKING_PREFIX_LENGTH = 8; // how many characters to show at the start and end of an API key when masking it for display
13
+ // Gemini API keys are 39 chars; Mistral is 32, so MASKING_PREFIX_LENGTH must be < 16 for ANYTHING to be masked.
14
+ const maskKey = (key: string | null, n: number = MASKING_PREFIX_LENGTH): string | null => {
15
+ if (!key) return null;
16
+ return (key.length > (n*2)) ? key.slice(0, n) + "*******" + key.slice(key.length - n) : key;
17
+ };
18
+
19
+
20
+ export class MeaningfullyAPI {
21
+ private manager: MetadataManager;
22
+ private storagePath: string;
23
+ private clients: Clients;
24
+
25
+ constructor({ storagePath, weaviateClient, metadataManager }: { storagePath: string, weaviateClient?: any, metadataManager: MetadataManager }) {
26
+ this.storagePath = storagePath;
27
+ this.manager = metadataManager;
28
+ this.clients = {
29
+ weaviateClient: weaviateClient,
30
+ postgresClient: null
31
+ };
32
+ }
33
+
34
+ setClients(clients: Clients) {
35
+ this.clients = { ...this.clients, ...clients };
36
+ }
37
+ getClients() {
38
+ return this.clients;
39
+ }
40
+
41
+ async listDocumentSets(page: number = 1, pageSize: number = 10) {
42
+ return await this.manager.getDocumentSets(page, pageSize);
43
+ }
44
+
45
+ async getDocumentSet(documentSetId: number) {
46
+ return await this.manager.getDocumentSet(documentSetId);
47
+ }
48
+ async deleteDocumentSet(documentSetId: number) {
49
+ // Delete the document set from the database
50
+ const result = await this.manager.getDocumentSet(documentSetId);
51
+ if (result){
52
+ // Delete the document set from the database
53
+ await this.manager.deleteDocumentSet(documentSetId);
54
+ // Delete the associated files from the filesystem
55
+ fs.rmSync(join(this.storagePath, result.name), { recursive: true, force: true });
56
+ fs.rmSync(join(this.storagePath, 'weaviate_data', capitalizeFirstLetter(result.name)), { recursive: true, force: true });
57
+ }
58
+ return { success: true };
59
+ }
60
+
61
+ getVectorStoreType() {
62
+ return this.clients.weaviateClient ? 'weaviate' : 'simple';
63
+ }
64
+
65
+ async generatePreviewData(data: DocumentSetParamsFilePath) {
66
+ const vectorStoreType = this.getVectorStoreType();
67
+ try {
68
+ if (!data.textColumns[0]) {
69
+ throw new Error("No text column specified for preview.");
70
+ }
71
+ return await previewResults(data.filePath, data.textColumns[0] as string, {
72
+ modelName: data.modelName, // needed to tokenize, estimate costs
73
+ modelProvider: data.modelProvider,
74
+ splitIntoSentences: data.splitIntoSentences,
75
+ combineSentencesIntoChunks: data.combineSentencesIntoChunks,
76
+ sploderMaxSize: 100,
77
+ vectorStoreType: vectorStoreType,
78
+ projectName: data.datasetName,
79
+ storagePath: this.storagePath,
80
+ chunkSize: data.chunkSize,
81
+ chunkOverlap: data.chunkOverlap
82
+ });
83
+ } catch (error) {
84
+ throw error;
85
+ }
86
+ }
87
+
88
+ async uploadCsv(data: DocumentSetParamsFilePath) {
89
+ // figure out if weaviate is available
90
+ const vectorStoreType = this.getVectorStoreType();
91
+ // First create the document set record
92
+ const documentSetId = await this.manager.addDocumentSet({
93
+ name: data.datasetName,
94
+ uploadDate: new Date(),
95
+ parameters: {
96
+ description: data.description,
97
+ textColumns: data.textColumns,
98
+ metadataColumns: data.metadataColumns,
99
+ splitIntoSentences: data.splitIntoSentences,
100
+ combineSentencesIntoChunks: data.combineSentencesIntoChunks,
101
+ sploderMaxSize: data.sploderMaxSize,
102
+ chunkSize: data.chunkSize,
103
+ chunkOverlap: data.chunkOverlap,
104
+ modelName: data.modelName,
105
+ modelProvider: data.modelProvider,
106
+ vectorStoreType: vectorStoreType,
107
+ },
108
+ totalDocuments: 0 // We'll update this after processing
109
+ });
110
+
111
+ const embedSettings = await this.manager.getSettings()
112
+
113
+ // Load and process the documents
114
+ try {
115
+ // Process each text column
116
+ for (const textColumn of data.textColumns) {
117
+ const documents = await loadDocumentsFromCsv(data.filePath, textColumn);
118
+
119
+ // Update total documents count
120
+ await this.manager.updateDocumentCount(documentSetId, documents.length);
121
+
122
+ // Create embeddings for this column
123
+ let ret = await createEmbeddings(data.filePath, textColumn, {
124
+ modelName: data.modelName,
125
+ modelProvider: data.modelProvider,
126
+ splitIntoSentences: data.splitIntoSentences,
127
+ combineSentencesIntoChunks: data.combineSentencesIntoChunks,
128
+ sploderMaxSize: 100, // TODO: make configurable
129
+ vectorStoreType: vectorStoreType,
130
+ projectName: data.datasetName,
131
+ // via https://medium.com/cameron-nokes/how-to-store-user-data-in-electron-3ba6bf66bc1e
132
+ storagePath: this.storagePath,
133
+ chunkSize: data.chunkSize,
134
+ chunkOverlap: data.chunkOverlap,
135
+ }, embedSettings, this.clients);
136
+ if (!ret.success) {
137
+ throw new Error(ret.error);
138
+ }
139
+ }
140
+ return { success: true, documentSetId };
141
+ } catch (error) {
142
+ // If something fails, we should probably delete the document set
143
+ await this.manager.deleteDocumentSet(documentSetId);
144
+ console.error("deleting document set due to failure ", documentSetId, error);
145
+ throw error;
146
+ }
147
+ }
148
+
149
+
150
+ async searchDocumentSet(documentSetId: number, query: string, n_results: number = 10, filters?: MetadataFilter[] ) {
151
+ const documentSet = await this.manager.getDocumentSet(documentSetId);
152
+ const settings = await this.manager.getSettings();
153
+ if (!documentSet) {
154
+ throw new Error('Document set not found');
155
+ }
156
+ const index = await getIndex({
157
+ modelName: documentSet.parameters.modelName as string,
158
+ modelProvider: documentSet.parameters.modelProvider as string,
159
+ splitIntoSentences: documentSet.parameters.splitIntoSentences as boolean,
160
+ combineSentencesIntoChunks: documentSet.parameters.combineSentencesIntoChunks as boolean,
161
+ sploderMaxSize: 100,
162
+ vectorStoreType: documentSet.parameters.vectorStoreType as 'simple' | 'weaviate',
163
+ projectName: documentSet.name,
164
+ storagePath: this.storagePath,
165
+ chunkSize: 1024, // not actually used, we just re-use a config object that has this option
166
+ chunkOverlap: 20, // not actually used, we just re-use a config object that has this option
167
+ }, settings, this.clients);
168
+ const results = await search(index, query, n_results, filters);
169
+ return results;
170
+ }
171
+
172
+ async getDocument(documentSetId: number, documentNodeId: string){
173
+ const documentSet = await this.manager.getDocumentSet(documentSetId);
174
+ if (!documentSet) {
175
+ throw new Error('Document set not found');
176
+ }
177
+ const docStore = await getDocStore({
178
+ modelName: documentSet.parameters.modelName as string,
179
+ modelProvider: documentSet.parameters.modelProvider as string,
180
+ splitIntoSentences: documentSet.parameters.splitIntoSentences as boolean,
181
+ combineSentencesIntoChunks: documentSet.parameters.combineSentencesIntoChunks as boolean,
182
+ sploderMaxSize: 100,
183
+ vectorStoreType: documentSet.parameters.vectorStoreType as 'simple' | 'weaviate',
184
+ projectName: documentSet.name,
185
+ storagePath: this.storagePath,
186
+ chunkSize: 1024, // not actually used, we just re-use a config object that has this option
187
+ chunkOverlap: 20, // not actually used, we just re-use a config object that has this option
188
+ });
189
+ const document = await docStore.getNode(documentNodeId);
190
+ if (!document) {
191
+ throw new Error('Document not found');
192
+ }
193
+ return document;
194
+ }
195
+
196
+
197
+ async getSettings() {
198
+ return this.manager.getSettings();
199
+ }
200
+ async setSettings(settings: Settings) {
201
+ return this.manager.setSettings(settings);
202
+ }
203
+
204
+ async getMaskedSettings() {
205
+ const settings = await this.manager.getSettings();
206
+ return {
207
+ openAIKey: maskKey(settings.openAIKey),
208
+ oLlamaBaseURL: settings.oLlamaBaseURL,
209
+ azureOpenAIKey: maskKey(settings.azureOpenAIKey),
210
+ azureOpenAIEndpoint: settings.azureOpenAIEndpoint,
211
+ azureOpenAIApiVersion: settings.azureOpenAIApiVersion,
212
+ mistralApiKey: maskKey(settings.mistralApiKey),
213
+ geminiApiKey: maskKey(settings.geminiApiKey)
214
+ };
215
+ }
216
+ async setMaskedSettings(newSettings: Settings) {
217
+ const oldSettings = await this.manager.getSettings();
218
+ const settings = {
219
+ ...newSettings,
220
+ openAIKey: newSettings.openAIKey == maskKey(oldSettings.openAIKey) ? oldSettings.openAIKey : newSettings.openAIKey,
221
+ azureOpenAIKey: newSettings.azureOpenAIKey == maskKey(oldSettings.azureOpenAIKey) ? oldSettings.azureOpenAIKey : newSettings.azureOpenAIKey,
222
+ mistralApiKey: newSettings.mistralApiKey == maskKey(oldSettings.mistralApiKey) ? oldSettings.mistralApiKey : newSettings.mistralApiKey,
223
+ geminiApiKey: newSettings.geminiApiKey == maskKey(oldSettings.geminiApiKey) ? oldSettings.geminiApiKey : newSettings.geminiApiKey
224
+ };
225
+ return this.manager.setSettings(settings);
226
+ }
227
+
228
+ }
@@ -0,0 +1,27 @@
1
+ import type { DocumentSetMetadata, Settings } from './types';
2
+ export declare class MetadataManager {
3
+ private sqliteDb;
4
+ constructor(storagePath: string);
5
+ private initializeDatabase;
6
+ addDocumentSet(metadata: Omit<DocumentSetMetadata, 'documentSetId'>): Promise<number>;
7
+ getDocumentSet(documentSetId: number): Promise<DocumentSetMetadata | null>;
8
+ getDocumentSets(page?: number, pageSize?: number): Promise<{
9
+ documents: DocumentSetMetadata[];
10
+ total: number;
11
+ }>;
12
+ updateDocumentCount(documentSetId: number, count: number): Promise<void>;
13
+ deleteDocumentSet(documentSetId: number): Promise<void>;
14
+ getSettings(): Promise<{
15
+ openAIKey: null;
16
+ oLlamaBaseURL: null;
17
+ azureOpenAIKey: null;
18
+ azureOpenAIEndpoint: null;
19
+ azureOpenAIApiVersion: string;
20
+ mistralApiKey: null;
21
+ geminiApiKey: null;
22
+ } & Settings>;
23
+ setSettings(settings: Settings): Promise<Settings & {
24
+ success: boolean;
25
+ }>;
26
+ close(): void;
27
+ }
@@ -0,0 +1,145 @@
1
+ import type { DocumentSetMetadata, Settings } from './types/index.js';
2
+
3
+ export abstract class MetadataManager {
4
+ protected queries = {
5
+ /*
6
+ Note: RETURNING on non-select/non-create statements is important for compatibility between SQLite and PostgreSQL.
7
+ (Without it, better-sqlite would demand to use run() instead of all() or get(), which would break the abstraction.)
8
+ */
9
+ createDocumentSetsTable: `
10
+ CREATE TABLE IF NOT EXISTS document_sets (
11
+ set_id SERIAL PRIMARY KEY,
12
+ name TEXT NOT NULL UNIQUE,
13
+ upload_date TIMESTAMP NOT NULL,
14
+ parameters TEXT NOT NULL,
15
+ total_documents INTEGER NOT NULL DEFAULT 0
16
+ );
17
+ `,
18
+ createSettingsTable: `
19
+ CREATE TABLE IF NOT EXISTS meaningfully_settings (
20
+ settings_id SERIAL PRIMARY KEY,
21
+ settings TEXT NOT NULL
22
+ );
23
+ `,
24
+ insertDocumentSet: `
25
+ INSERT INTO document_sets (name, upload_date, parameters, total_documents)
26
+ VALUES ($1, $2, $3, $4) RETURNING set_id
27
+ `,
28
+ selectDocumentSet: `
29
+ SELECT * FROM document_sets WHERE set_id = $1
30
+ `,
31
+ selectDocumentSets: `
32
+ SELECT * FROM document_sets ORDER BY upload_date DESC LIMIT $1 OFFSET $2
33
+ `,
34
+ countDocumentSets: `
35
+ SELECT COUNT(*) as count FROM document_sets
36
+ `,
37
+ updateDocumentCount: `
38
+ UPDATE document_sets SET total_documents = total_documents + $1 WHERE set_id = $2 RETURNING *
39
+ `,
40
+ deleteDocumentSet: `
41
+ DELETE FROM document_sets WHERE set_id = $1 RETURNING *
42
+ `,
43
+ selectSettings: `
44
+ SELECT * FROM meaningfully_settings WHERE settings_id = 1
45
+ `,
46
+ upsertSettings: `
47
+ INSERT INTO meaningfully_settings (settings_id, settings)
48
+ VALUES (1, $1)
49
+ ON CONFLICT (settings_id) DO UPDATE SET settings = $2
50
+ RETURNING *
51
+ `
52
+ // the two arguments $1 and $2 are identical, but, to work around a cross-compatibility bug in SQLite versus Postgresql,
53
+ // where PG can accept the same argument twice (specified as $1 in two places), but SQLITE cannot (it just has ? placeholders)
54
+ // they are specified separately.
55
+ };
56
+
57
+ protected abstract runQuery<T>(query: string, params?: any[]): Promise<T[]>;
58
+ protected abstract runQuerySingle<T>(query: string, params?: any[]): Promise<T | null>;
59
+ protected abstract initializeDatabase(): Promise<void>;
60
+ protected abstract close(): void;
61
+
62
+ async addDocumentSet(metadata: Omit<DocumentSetMetadata, 'documentSetId'>): Promise<number> {
63
+ const result = await this.runQuerySingle<{ set_id: number }>(this.queries.insertDocumentSet, [
64
+ metadata.name,
65
+ metadata.uploadDate.toISOString(),
66
+ JSON.stringify(metadata.parameters),
67
+ metadata.totalDocuments
68
+ ]);
69
+ return result?.set_id || 0;
70
+ }
71
+
72
+ async getDocumentSet(documentSetId: number): Promise<DocumentSetMetadata | null> {
73
+ const row = await this.runQuerySingle<{
74
+ set_id: number;
75
+ name: string;
76
+ upload_date: string;
77
+ parameters: string;
78
+ total_documents: number;
79
+ }>(this.queries.selectDocumentSet, [documentSetId]);
80
+
81
+ if (!row) return null;
82
+
83
+ return {
84
+ documentSetId: row.set_id,
85
+ name: row.name,
86
+ uploadDate: new Date(row.upload_date),
87
+ parameters: JSON.parse(row.parameters),
88
+ totalDocuments: row.total_documents
89
+ };
90
+ }
91
+
92
+ async getDocumentSets(page: number = 1, pageSize: number = 10): Promise<{ documents: DocumentSetMetadata[]; total: number }> {
93
+ const offset = (page - 1) * pageSize;
94
+ const totalCountRow = await this.runQuerySingle<{ count: number }>(this.queries.countDocumentSets);
95
+ const totalCount = totalCountRow?.count || 0;
96
+
97
+ const rows = await this.runQuery<{
98
+ set_id: number;
99
+ name: string;
100
+ upload_date: string;
101
+ parameters: string;
102
+ total_documents: number;
103
+ }>(this.queries.selectDocumentSets, [pageSize, offset]);
104
+
105
+ const documents = rows.map((row) => ({
106
+ documentSetId: row.set_id,
107
+ name: row.name,
108
+ uploadDate: new Date(row.upload_date),
109
+ parameters: JSON.parse(row.parameters),
110
+ totalDocuments: row.total_documents
111
+ }));
112
+
113
+ return { documents, total: totalCount };
114
+ }
115
+
116
+ async updateDocumentCount(documentSetId: number, count: number): Promise<void> {
117
+ await this.runQuery(this.queries.updateDocumentCount, [count, documentSetId]);
118
+ }
119
+
120
+ async deleteDocumentSet(documentSetId: number): Promise<void> {
121
+ await this.runQuery(this.queries.deleteDocumentSet, [documentSetId]);
122
+ }
123
+
124
+ async getSettings(): Promise<Settings> {
125
+ const DEFAULT_SETTINGS: Settings = {
126
+ openAIKey: null,
127
+ oLlamaBaseURL: null,
128
+ azureOpenAIKey: null,
129
+ azureOpenAIEndpoint: null,
130
+ azureOpenAIApiVersion: "2024-02-01",
131
+ mistralApiKey: null,
132
+ geminiApiKey: null,
133
+ };
134
+
135
+ const row = await this.runQuerySingle<{ settings: string }>(this.queries.selectSettings);
136
+ return row ? { ...DEFAULT_SETTINGS, ...JSON.parse(row.settings) } : DEFAULT_SETTINGS;
137
+ }
138
+
139
+ async setSettings(settings: Settings): Promise<{ success: boolean }> {
140
+ // the JSON.stringify(settings) is repeated to work around a cross-compatibility bug in SQLite versus Postgresql
141
+ // where PG can accept the same argument twice (specified as $1 in two places), but SQLITE cannot (it just has ? placeholders)
142
+ await this.runQuery(this.queries.upsertSettings, [JSON.stringify(settings), JSON.stringify(settings)]);
143
+ return { success: true };
144
+ }
145
+ }
@@ -0,0 +1,6 @@
1
+ import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types";
2
+ export declare function createEmbeddings(csvPath: string, textColumnName: string, config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<EmbeddingResult>;
3
+ export declare function previewResults(csvPath: string, textColumnName: string, config: EmbeddingConfig): Promise<PreviewResult>;
4
+ export declare function getDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
5
+ export declare function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<import("llamaindex").VectorStoreIndex>;
6
+ export declare function search(index: any, query: string, numResults?: number, filters?: MetadataFilter[]): Promise<SearchResult[]>;