@meaningfully/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.nvmrc +1 -0
- package/LICENSE +7 -0
- package/README.md +3 -0
- package/dist/DocumentSetManager.d.ts +28 -0
- package/dist/DocumentSetManager.d.ts.map +1 -0
- package/dist/DocumentSetManager.js +134 -0
- package/dist/DocumentSetManager.js.map +1 -0
- package/dist/Meaningfully.d.ts +52 -0
- package/dist/Meaningfully.d.ts.map +1 -0
- package/dist/Meaningfully.js +206 -0
- package/dist/Meaningfully.js.map +1 -0
- package/dist/MetadataManager.d.ts +32 -0
- package/dist/MetadataManager.d.ts.map +1 -0
- package/dist/MetadataManager.js +115 -0
- package/dist/MetadataManager.js.map +1 -0
- package/dist/api/embedding.d.ts +7 -0
- package/dist/api/embedding.d.ts.map +1 -0
- package/dist/api/embedding.js +94 -0
- package/dist/api/embedding.js.map +1 -0
- package/dist/api/embedding.test.d.ts +2 -0
- package/dist/api/embedding.test.d.ts.map +1 -0
- package/dist/api/embedding.test.js +340 -0
- package/dist/api/embedding.test.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/services/batchingWeaviateVectorStore.d.ts +6 -0
- package/dist/services/batchingWeaviateVectorStore.d.ts.map +1 -0
- package/dist/services/batchingWeaviateVectorStore.js +21 -0
- package/dist/services/batchingWeaviateVectorStore.js.map +1 -0
- package/dist/services/csvLoader.d.ts +3 -0
- package/dist/services/csvLoader.d.ts.map +1 -0
- package/dist/services/csvLoader.js +18 -0
- package/dist/services/csvLoader.js.map +1 -0
- package/dist/services/csvLoader.test.d.ts +2 -0
- package/dist/services/csvLoader.test.d.ts.map +1 -0
- package/dist/services/csvLoader.test.js +75 -0
- package/dist/services/csvLoader.test.js.map +1 -0
- package/dist/services/embeddings.d.ts +22 -0
- package/dist/services/embeddings.d.ts.map +1 -0
- package/dist/services/embeddings.js +314 -0
- package/dist/services/embeddings.js.map +1 -0
- package/dist/services/embeddings.test.d.ts +2 -0
- package/dist/services/embeddings.test.d.ts.map +1 -0
- package/dist/services/embeddings.test.js +115 -0
- package/dist/services/embeddings.test.js.map +1 -0
- package/dist/services/loggingOpenAIEmbedding.d.ts +2 -0
- package/dist/services/loggingOpenAIEmbedding.d.ts.map +1 -0
- package/dist/services/loggingOpenAIEmbedding.js +41 -0
- package/dist/services/loggingOpenAIEmbedding.js.map +1 -0
- package/dist/services/mockEmbedding.d.ts +6 -0
- package/dist/services/mockEmbedding.d.ts.map +1 -0
- package/dist/services/mockEmbedding.js +14 -0
- package/dist/services/mockEmbedding.js.map +1 -0
- package/dist/services/progressManager.d.ts +21 -0
- package/dist/services/progressManager.d.ts.map +1 -0
- package/dist/services/progressManager.js +76 -0
- package/dist/services/progressManager.js.map +1 -0
- package/dist/services/progressVectorStoreIndex.d.ts +21 -0
- package/dist/services/progressVectorStoreIndex.d.ts.map +1 -0
- package/dist/services/progressVectorStoreIndex.js +60 -0
- package/dist/services/progressVectorStoreIndex.js.map +1 -0
- package/dist/services/sentenceSplitter.d.ts +17 -0
- package/dist/services/sentenceSplitter.d.ts.map +1 -0
- package/dist/services/sentenceSplitter.js +207 -0
- package/dist/services/sentenceSplitter.js.map +1 -0
- package/dist/services/sentenceSplitter.test.d.ts +2 -0
- package/dist/services/sentenceSplitter.test.d.ts.map +1 -0
- package/dist/services/sentenceSplitter.test.js +68 -0
- package/dist/services/sentenceSplitter.test.js.map +1 -0
- package/dist/services/sploder.d.ts +13 -0
- package/dist/services/sploder.d.ts.map +1 -0
- package/dist/services/sploder.js +45 -0
- package/dist/services/sploder.js.map +1 -0
- package/dist/types/index.d.ts +77 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils.d.ts +3 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +7 -0
- package/dist/utils.js.map +1 -0
- package/package.json +43 -0
- package/src/Meaningfully.d.ts +57 -0
- package/src/Meaningfully.ts +228 -0
- package/src/MetadataManager.d.ts +27 -0
- package/src/MetadataManager.ts +145 -0
- package/src/api/embedding.d.ts +6 -0
- package/src/api/embedding.ts +122 -0
- package/src/index.ts +5 -0
- package/src/services/batchingWeaviateVectorStore.d.ts +5 -0
- package/src/services/batchingWeaviateVectorStore.ts +23 -0
- package/src/services/csvLoader.d.ts +2 -0
- package/src/services/csvLoader.ts +24 -0
- package/src/services/embeddings.d.ts +21 -0
- package/src/services/embeddings.ts +374 -0
- package/src/services/loggingOpenAIEmbedding.d.ts +0 -0
- package/src/services/loggingOpenAIEmbedding.ts +46 -0
- package/src/services/mockEmbedding.d.ts +5 -0
- package/src/services/mockEmbedding.ts +13 -0
- package/src/services/progressManager.d.ts +20 -0
- package/src/services/progressManager.ts +88 -0
- package/src/services/progressVectorStoreIndex.d.ts +20 -0
- package/src/services/progressVectorStoreIndex.ts +95 -0
- package/src/services/sentenceSplitter.d.ts +16 -0
- package/src/services/sentenceSplitter.ts +243 -0
- package/src/services/sploder.d.ts +12 -0
- package/src/services/sploder.ts +62 -0
- package/src/types/index.d.ts +71 -0
- package/src/types/index.ts +89 -0
- package/src/utils.d.ts +2 -0
- package/src/utils.ts +6 -0
- package/tests/MetadataManager.test.ts +120 -0
- package/tests/csvLoader.test.d.ts +1 -0
- package/tests/csvLoader.test.ts +88 -0
- package/tests/embedding.test.d.ts +1 -0
- package/tests/embedding.test.ts +425 -0
- package/tests/embeddings.test.d.ts +1 -0
- package/tests/embeddings.test.ts +144 -0
- package/tests/sentenceSplitter.test.d.ts +1 -0
- package/tests/sentenceSplitter.test.ts +81 -0
- package/tsconfig.json +31 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
export interface SearchResult {
|
|
2
|
+
text: string;
|
|
3
|
+
score: number;
|
|
4
|
+
metadata: Record<string, any>;
|
|
5
|
+
}
|
|
6
|
+
export interface EmbeddingResult {
|
|
7
|
+
success: boolean;
|
|
8
|
+
error?: string;
|
|
9
|
+
index?: any;
|
|
10
|
+
}
|
|
11
|
+
export interface PreviewResult {
|
|
12
|
+
success: boolean;
|
|
13
|
+
error?: string;
|
|
14
|
+
nodes?: Array<{
|
|
15
|
+
text: string;
|
|
16
|
+
metadata: Record<string, any>;
|
|
17
|
+
}>;
|
|
18
|
+
estimatedPrice?: number;
|
|
19
|
+
tokenCount?: number;
|
|
20
|
+
pricePer1M?: number;
|
|
21
|
+
}
|
|
22
|
+
export interface SearchConfig {
|
|
23
|
+
modelProvider: string;
|
|
24
|
+
modelName: string;
|
|
25
|
+
projectName: string;
|
|
26
|
+
}
|
|
27
|
+
export interface DocumentSetMetadata {
|
|
28
|
+
documentSetId: number;
|
|
29
|
+
name: string;
|
|
30
|
+
uploadDate: Date;
|
|
31
|
+
parameters: Record<string, unknown>;
|
|
32
|
+
totalDocuments: number;
|
|
33
|
+
}
|
|
34
|
+
export interface DocumentSetParams {
|
|
35
|
+
datasetName: string;
|
|
36
|
+
description: string;
|
|
37
|
+
textColumns: string[];
|
|
38
|
+
metadataColumns: string[];
|
|
39
|
+
splitIntoSentences: boolean;
|
|
40
|
+
combineSentencesIntoChunks: boolean;
|
|
41
|
+
sploderMaxSize: number;
|
|
42
|
+
chunkSize: number;
|
|
43
|
+
chunkOverlap: number;
|
|
44
|
+
modelName: string;
|
|
45
|
+
modelProvider: string;
|
|
46
|
+
}
|
|
47
|
+
export interface EmbeddingConfig {
|
|
48
|
+
modelName: string;
|
|
49
|
+
modelProvider: string;
|
|
50
|
+
vectorStoreType: "simple" | "postgres" | "weaviate";
|
|
51
|
+
projectName: string;
|
|
52
|
+
storagePath: string;
|
|
53
|
+
splitIntoSentences: boolean;
|
|
54
|
+
combineSentencesIntoChunks: boolean;
|
|
55
|
+
sploderMaxSize: number;
|
|
56
|
+
chunkSize: number;
|
|
57
|
+
chunkOverlap: number;
|
|
58
|
+
}
|
|
59
|
+
export interface Settings {
|
|
60
|
+
openAIKey: string | null;
|
|
61
|
+
oLlamaBaseURL: string | null;
|
|
62
|
+
azureOpenAIKey: string | null;
|
|
63
|
+
azureOpenAIEndpoint: string | null;
|
|
64
|
+
azureOpenAIApiVersion: string | null;
|
|
65
|
+
mistralApiKey: string | null;
|
|
66
|
+
geminiApiKey: string | null;
|
|
67
|
+
}
|
|
68
|
+
export interface MetadataFilter {
|
|
69
|
+
key: string;
|
|
70
|
+
operator: "==" | "in" | ">" | "<" | "!=" | ">=" | "<=" | "nin" | "any" | "all" | "text_match" | "contains" | "is_empty";
|
|
71
|
+
value: any;
|
|
72
|
+
}
|
|
73
|
+
export interface Clients {
|
|
74
|
+
weaviateClient: any;
|
|
75
|
+
postgresClient: any;
|
|
76
|
+
}
|
|
77
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,GAAG,CAAC;CACb;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KAC/B,CAAC,CAAC;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAGD,MAAM,WAAW,YAAY;IAC3B,aAAa,EAAE,MAAM,CAAA;IACrB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;CACpB;AAGD,MAAM,WAAW,mBAAmB;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,IAAI,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,kBAAkB,EAAE,OAAO,CAAC;IAC5B,0BAA0B,EAAE,OAAO,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAA;CACtB;AAGD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,aAAa,EAAE,MAAM,CAAA;IACrB,eAAe,EAAE,QAAQ,GAAG,UAAU,GAAG,UAAU,CAAC;IACpD,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,0BAA0B,EAAE,OAAO,CAAC;IACpC,cAAc,EAAE,MAAM,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AAGD,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,qBAAqB,EAAE,MAAM,GAAG,IAAI,CAAC;IACrC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;CAC7B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,IAAI,GAAG,IAAI,GAAG,GAAG,GAAG,GAAG,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,GAAG,YAAY,GAAG,UAAU,GAAG,UAAU,CAAC;IACxH,KAAK,EAAE,GAAG,CAAA;CACX;AAED,MAAM,WAAW,OAAO;IACtB,cAAc,EAAE,GAAG,CAAC;IACpB,cAAc,EAAE,GAAG,CAAC;CACrB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":""}
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,wBAAgB,mBAAmB,CAAC,WAAW,EAAE,MAAM,UAEtD;AACD,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,UAEhD"}
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,mBAAmB,CAAC,WAAmB;IACrD,OAAO,WAAW,CAAC,OAAO,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC;AACnD,CAAC;AACD,MAAM,UAAU,qBAAqB,CAAC,GAAW;IAC/C,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;AACpE,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@meaningfully/core",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Core functionality for meaningfully semantic search",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"main": "./dist/index.js",
|
|
8
|
+
"module": "./dist/index.js",
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"import": "./dist/index.js",
|
|
13
|
+
"types": "./dist/index.d.ts"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"scripts": {
|
|
17
|
+
"build": "tsc",
|
|
18
|
+
"watch": "tsc --watch",
|
|
19
|
+
"test": "vitest --silent=false --disable-console-intercept"
|
|
20
|
+
},
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"@llamaindex/azure": "^0.1.31",
|
|
23
|
+
"@llamaindex/google": "^0.3.18",
|
|
24
|
+
"@llamaindex/mistral": "^0.1.14",
|
|
25
|
+
"@llamaindex/ollama": "^0.1.14",
|
|
26
|
+
"@llamaindex/openai": "^0.4.14",
|
|
27
|
+
"@llamaindex/postgres": "^0.0.64",
|
|
28
|
+
"@llamaindex/weaviate": "^0.0.36",
|
|
29
|
+
"js-tiktoken": "^1.0.8",
|
|
30
|
+
"llamaindex": "^0.11.14",
|
|
31
|
+
"lodash": "^4.17.21",
|
|
32
|
+
"natural": "^8.1.0",
|
|
33
|
+
"openai": "^5.9.0",
|
|
34
|
+
"papaparse": "^5.5.3"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@types/node": "^20.5.6",
|
|
38
|
+
"@types/papaparse": "^5.3.16",
|
|
39
|
+
"typescript": "^5.7.3",
|
|
40
|
+
"vite": "^5.4.4",
|
|
41
|
+
"vitest": "^3.0.8"
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { DocumentSetParams, Settings, MetadataFilter, Clients } from './types';
|
|
2
|
+
type HasFilePath = {
|
|
3
|
+
filePath: string;
|
|
4
|
+
};
|
|
5
|
+
type DocumentSetParamsFilePath = DocumentSetParams & HasFilePath;
|
|
6
|
+
export declare class MeaningfullyAPI {
|
|
7
|
+
private manager;
|
|
8
|
+
private storagePath;
|
|
9
|
+
private clients;
|
|
10
|
+
constructor({ storagePath, weaviateClient }: {
|
|
11
|
+
storagePath: string;
|
|
12
|
+
weaviateClient?: any;
|
|
13
|
+
});
|
|
14
|
+
setClients(clients: Clients): void;
|
|
15
|
+
getClients(): Clients;
|
|
16
|
+
listDocumentSets(page?: number, pageSize?: number): Promise<{
|
|
17
|
+
documents: import("./types").DocumentSetMetadata[];
|
|
18
|
+
total: number;
|
|
19
|
+
}>;
|
|
20
|
+
getDocumentSet(documentSetId: number): Promise<import("./types").DocumentSetMetadata | null>;
|
|
21
|
+
deleteDocumentSet(documentSetId: number): Promise<{
|
|
22
|
+
success: boolean;
|
|
23
|
+
}>;
|
|
24
|
+
getVectorStoreType(): "simple" | "weaviate";
|
|
25
|
+
generatePreviewData(data: DocumentSetParamsFilePath): Promise<import("./types").PreviewResult>;
|
|
26
|
+
uploadCsv(data: DocumentSetParamsFilePath): Promise<{
|
|
27
|
+
success: boolean;
|
|
28
|
+
documentSetId: number;
|
|
29
|
+
}>;
|
|
30
|
+
searchDocumentSet(documentSetId: number, query: string, n_results?: number, filters?: MetadataFilter[]): Promise<import("./types").SearchResult[]>;
|
|
31
|
+
getDocument(documentSetId: number, documentNodeId: string): Promise<import("llamaindex").BaseNode<import("llamaindex").Metadata>>;
|
|
32
|
+
getSettings(): Promise<{
|
|
33
|
+
openAIKey: null;
|
|
34
|
+
oLlamaBaseURL: null;
|
|
35
|
+
azureOpenAIKey: null;
|
|
36
|
+
azureOpenAIEndpoint: null;
|
|
37
|
+
azureOpenAIApiVersion: string;
|
|
38
|
+
mistralApiKey: null;
|
|
39
|
+
geminiApiKey: null;
|
|
40
|
+
} & Settings>;
|
|
41
|
+
setSettings(settings: Settings): Promise<Settings & {
|
|
42
|
+
success: boolean;
|
|
43
|
+
}>;
|
|
44
|
+
getMaskedSettings(): Promise<{
|
|
45
|
+
openAIKey: string | null;
|
|
46
|
+
oLlamaBaseURL: null;
|
|
47
|
+
azureOpenAIKey: string | null;
|
|
48
|
+
azureOpenAIEndpoint: null;
|
|
49
|
+
azureOpenAIApiVersion: string;
|
|
50
|
+
mistralApiKey: string | null;
|
|
51
|
+
geminiApiKey: string | null;
|
|
52
|
+
}>;
|
|
53
|
+
setMaskedSettings(newSettings: Settings): Promise<Settings & {
|
|
54
|
+
success: boolean;
|
|
55
|
+
}>;
|
|
56
|
+
}
|
|
57
|
+
export {};
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import { MetadataManager } from './MetadataManager.js';
|
|
2
|
+
import { loadDocumentsFromCsv } from './services/csvLoader.js';
|
|
3
|
+
import { createEmbeddings, getIndex, search, previewResults, getDocStore } from './api/embedding.js';
|
|
4
|
+
import { capitalizeFirstLetter } from './utils.js';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
import type { DocumentSetParams, Settings, MetadataFilter, Clients } from './types/index.js';
|
|
7
|
+
import fs from 'fs';
|
|
8
|
+
|
|
9
|
+
type HasFilePath = {filePath: string};
|
|
10
|
+
type DocumentSetParamsFilePath = DocumentSetParams & HasFilePath;
|
|
11
|
+
|
|
12
|
+
const MASKING_PREFIX_LENGTH = 8; // how many characters to show at the start and end of an API key when masking it for display
|
|
13
|
+
// Gemini API keys are 39 chars; Mistral is 32, so MASKING_PREFIX_LENGTH must be < 16 for ANYTHING to be masked.
|
|
14
|
+
const maskKey = (key: string | null, n: number = MASKING_PREFIX_LENGTH): string | null => {
|
|
15
|
+
if (!key) return null;
|
|
16
|
+
return (key.length > (n*2)) ? key.slice(0, n) + "*******" + key.slice(key.length - n) : key;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
export class MeaningfullyAPI {
|
|
21
|
+
private manager: MetadataManager;
|
|
22
|
+
private storagePath: string;
|
|
23
|
+
private clients: Clients;
|
|
24
|
+
|
|
25
|
+
constructor({ storagePath, weaviateClient, metadataManager }: { storagePath: string, weaviateClient?: any, metadataManager: MetadataManager }) {
|
|
26
|
+
this.storagePath = storagePath;
|
|
27
|
+
this.manager = metadataManager;
|
|
28
|
+
this.clients = {
|
|
29
|
+
weaviateClient: weaviateClient,
|
|
30
|
+
postgresClient: null
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
setClients(clients: Clients) {
|
|
35
|
+
this.clients = { ...this.clients, ...clients };
|
|
36
|
+
}
|
|
37
|
+
getClients() {
|
|
38
|
+
return this.clients;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async listDocumentSets(page: number = 1, pageSize: number = 10) {
|
|
42
|
+
return await this.manager.getDocumentSets(page, pageSize);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async getDocumentSet(documentSetId: number) {
|
|
46
|
+
return await this.manager.getDocumentSet(documentSetId);
|
|
47
|
+
}
|
|
48
|
+
async deleteDocumentSet(documentSetId: number) {
|
|
49
|
+
// Delete the document set from the database
|
|
50
|
+
const result = await this.manager.getDocumentSet(documentSetId);
|
|
51
|
+
if (result){
|
|
52
|
+
// Delete the document set from the database
|
|
53
|
+
await this.manager.deleteDocumentSet(documentSetId);
|
|
54
|
+
// Delete the associated files from the filesystem
|
|
55
|
+
fs.rmSync(join(this.storagePath, result.name), { recursive: true, force: true });
|
|
56
|
+
fs.rmSync(join(this.storagePath, 'weaviate_data', capitalizeFirstLetter(result.name)), { recursive: true, force: true });
|
|
57
|
+
}
|
|
58
|
+
return { success: true };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
getVectorStoreType() {
|
|
62
|
+
return this.clients.weaviateClient ? 'weaviate' : 'simple';
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async generatePreviewData(data: DocumentSetParamsFilePath) {
|
|
66
|
+
const vectorStoreType = this.getVectorStoreType();
|
|
67
|
+
try {
|
|
68
|
+
if (!data.textColumns[0]) {
|
|
69
|
+
throw new Error("No text column specified for preview.");
|
|
70
|
+
}
|
|
71
|
+
return await previewResults(data.filePath, data.textColumns[0] as string, {
|
|
72
|
+
modelName: data.modelName, // needed to tokenize, estimate costs
|
|
73
|
+
modelProvider: data.modelProvider,
|
|
74
|
+
splitIntoSentences: data.splitIntoSentences,
|
|
75
|
+
combineSentencesIntoChunks: data.combineSentencesIntoChunks,
|
|
76
|
+
sploderMaxSize: 100,
|
|
77
|
+
vectorStoreType: vectorStoreType,
|
|
78
|
+
projectName: data.datasetName,
|
|
79
|
+
storagePath: this.storagePath,
|
|
80
|
+
chunkSize: data.chunkSize,
|
|
81
|
+
chunkOverlap: data.chunkOverlap
|
|
82
|
+
});
|
|
83
|
+
} catch (error) {
|
|
84
|
+
throw error;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async uploadCsv(data: DocumentSetParamsFilePath) {
|
|
89
|
+
// figure out if weaviate is available
|
|
90
|
+
const vectorStoreType = this.getVectorStoreType();
|
|
91
|
+
// First create the document set record
|
|
92
|
+
const documentSetId = await this.manager.addDocumentSet({
|
|
93
|
+
name: data.datasetName,
|
|
94
|
+
uploadDate: new Date(),
|
|
95
|
+
parameters: {
|
|
96
|
+
description: data.description,
|
|
97
|
+
textColumns: data.textColumns,
|
|
98
|
+
metadataColumns: data.metadataColumns,
|
|
99
|
+
splitIntoSentences: data.splitIntoSentences,
|
|
100
|
+
combineSentencesIntoChunks: data.combineSentencesIntoChunks,
|
|
101
|
+
sploderMaxSize: data.sploderMaxSize,
|
|
102
|
+
chunkSize: data.chunkSize,
|
|
103
|
+
chunkOverlap: data.chunkOverlap,
|
|
104
|
+
modelName: data.modelName,
|
|
105
|
+
modelProvider: data.modelProvider,
|
|
106
|
+
vectorStoreType: vectorStoreType,
|
|
107
|
+
},
|
|
108
|
+
totalDocuments: 0 // We'll update this after processing
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
const embedSettings = await this.manager.getSettings()
|
|
112
|
+
|
|
113
|
+
// Load and process the documents
|
|
114
|
+
try {
|
|
115
|
+
// Process each text column
|
|
116
|
+
for (const textColumn of data.textColumns) {
|
|
117
|
+
const documents = await loadDocumentsFromCsv(data.filePath, textColumn);
|
|
118
|
+
|
|
119
|
+
// Update total documents count
|
|
120
|
+
await this.manager.updateDocumentCount(documentSetId, documents.length);
|
|
121
|
+
|
|
122
|
+
// Create embeddings for this column
|
|
123
|
+
let ret = await createEmbeddings(data.filePath, textColumn, {
|
|
124
|
+
modelName: data.modelName,
|
|
125
|
+
modelProvider: data.modelProvider,
|
|
126
|
+
splitIntoSentences: data.splitIntoSentences,
|
|
127
|
+
combineSentencesIntoChunks: data.combineSentencesIntoChunks,
|
|
128
|
+
sploderMaxSize: 100, // TODO: make configurable
|
|
129
|
+
vectorStoreType: vectorStoreType,
|
|
130
|
+
projectName: data.datasetName,
|
|
131
|
+
// via https://medium.com/cameron-nokes/how-to-store-user-data-in-electron-3ba6bf66bc1e
|
|
132
|
+
storagePath: this.storagePath,
|
|
133
|
+
chunkSize: data.chunkSize,
|
|
134
|
+
chunkOverlap: data.chunkOverlap,
|
|
135
|
+
}, embedSettings, this.clients);
|
|
136
|
+
if (!ret.success) {
|
|
137
|
+
throw new Error(ret.error);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return { success: true, documentSetId };
|
|
141
|
+
} catch (error) {
|
|
142
|
+
// If something fails, we should probably delete the document set
|
|
143
|
+
await this.manager.deleteDocumentSet(documentSetId);
|
|
144
|
+
console.error("deleting document set due to failure ", documentSetId, error);
|
|
145
|
+
throw error;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async searchDocumentSet(documentSetId: number, query: string, n_results: number = 10, filters?: MetadataFilter[] ) {
|
|
151
|
+
const documentSet = await this.manager.getDocumentSet(documentSetId);
|
|
152
|
+
const settings = await this.manager.getSettings();
|
|
153
|
+
if (!documentSet) {
|
|
154
|
+
throw new Error('Document set not found');
|
|
155
|
+
}
|
|
156
|
+
const index = await getIndex({
|
|
157
|
+
modelName: documentSet.parameters.modelName as string,
|
|
158
|
+
modelProvider: documentSet.parameters.modelProvider as string,
|
|
159
|
+
splitIntoSentences: documentSet.parameters.splitIntoSentences as boolean,
|
|
160
|
+
combineSentencesIntoChunks: documentSet.parameters.combineSentencesIntoChunks as boolean,
|
|
161
|
+
sploderMaxSize: 100,
|
|
162
|
+
vectorStoreType: documentSet.parameters.vectorStoreType as 'simple' | 'weaviate',
|
|
163
|
+
projectName: documentSet.name,
|
|
164
|
+
storagePath: this.storagePath,
|
|
165
|
+
chunkSize: 1024, // not actually used, we just re-use a config object that has this option
|
|
166
|
+
chunkOverlap: 20, // not actually used, we just re-use a config object that has this option
|
|
167
|
+
}, settings, this.clients);
|
|
168
|
+
const results = await search(index, query, n_results, filters);
|
|
169
|
+
return results;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
async getDocument(documentSetId: number, documentNodeId: string){
|
|
173
|
+
const documentSet = await this.manager.getDocumentSet(documentSetId);
|
|
174
|
+
if (!documentSet) {
|
|
175
|
+
throw new Error('Document set not found');
|
|
176
|
+
}
|
|
177
|
+
const docStore = await getDocStore({
|
|
178
|
+
modelName: documentSet.parameters.modelName as string,
|
|
179
|
+
modelProvider: documentSet.parameters.modelProvider as string,
|
|
180
|
+
splitIntoSentences: documentSet.parameters.splitIntoSentences as boolean,
|
|
181
|
+
combineSentencesIntoChunks: documentSet.parameters.combineSentencesIntoChunks as boolean,
|
|
182
|
+
sploderMaxSize: 100,
|
|
183
|
+
vectorStoreType: documentSet.parameters.vectorStoreType as 'simple' | 'weaviate',
|
|
184
|
+
projectName: documentSet.name,
|
|
185
|
+
storagePath: this.storagePath,
|
|
186
|
+
chunkSize: 1024, // not actually used, we just re-use a config object that has this option
|
|
187
|
+
chunkOverlap: 20, // not actually used, we just re-use a config object that has this option
|
|
188
|
+
});
|
|
189
|
+
const document = await docStore.getNode(documentNodeId);
|
|
190
|
+
if (!document) {
|
|
191
|
+
throw new Error('Document not found');
|
|
192
|
+
}
|
|
193
|
+
return document;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
async getSettings() {
|
|
198
|
+
return this.manager.getSettings();
|
|
199
|
+
}
|
|
200
|
+
async setSettings(settings: Settings) {
|
|
201
|
+
return this.manager.setSettings(settings);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async getMaskedSettings() {
|
|
205
|
+
const settings = await this.manager.getSettings();
|
|
206
|
+
return {
|
|
207
|
+
openAIKey: maskKey(settings.openAIKey),
|
|
208
|
+
oLlamaBaseURL: settings.oLlamaBaseURL,
|
|
209
|
+
azureOpenAIKey: maskKey(settings.azureOpenAIKey),
|
|
210
|
+
azureOpenAIEndpoint: settings.azureOpenAIEndpoint,
|
|
211
|
+
azureOpenAIApiVersion: settings.azureOpenAIApiVersion,
|
|
212
|
+
mistralApiKey: maskKey(settings.mistralApiKey),
|
|
213
|
+
geminiApiKey: maskKey(settings.geminiApiKey)
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
async setMaskedSettings(newSettings: Settings) {
|
|
217
|
+
const oldSettings = await this.manager.getSettings();
|
|
218
|
+
const settings = {
|
|
219
|
+
...newSettings,
|
|
220
|
+
openAIKey: newSettings.openAIKey == maskKey(oldSettings.openAIKey) ? oldSettings.openAIKey : newSettings.openAIKey,
|
|
221
|
+
azureOpenAIKey: newSettings.azureOpenAIKey == maskKey(oldSettings.azureOpenAIKey) ? oldSettings.azureOpenAIKey : newSettings.azureOpenAIKey,
|
|
222
|
+
mistralApiKey: newSettings.mistralApiKey == maskKey(oldSettings.mistralApiKey) ? oldSettings.mistralApiKey : newSettings.mistralApiKey,
|
|
223
|
+
geminiApiKey: newSettings.geminiApiKey == maskKey(oldSettings.geminiApiKey) ? oldSettings.geminiApiKey : newSettings.geminiApiKey
|
|
224
|
+
};
|
|
225
|
+
return this.manager.setSettings(settings);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { DocumentSetMetadata, Settings } from './types';
|
|
2
|
+
export declare class MetadataManager {
|
|
3
|
+
private sqliteDb;
|
|
4
|
+
constructor(storagePath: string);
|
|
5
|
+
private initializeDatabase;
|
|
6
|
+
addDocumentSet(metadata: Omit<DocumentSetMetadata, 'documentSetId'>): Promise<number>;
|
|
7
|
+
getDocumentSet(documentSetId: number): Promise<DocumentSetMetadata | null>;
|
|
8
|
+
getDocumentSets(page?: number, pageSize?: number): Promise<{
|
|
9
|
+
documents: DocumentSetMetadata[];
|
|
10
|
+
total: number;
|
|
11
|
+
}>;
|
|
12
|
+
updateDocumentCount(documentSetId: number, count: number): Promise<void>;
|
|
13
|
+
deleteDocumentSet(documentSetId: number): Promise<void>;
|
|
14
|
+
getSettings(): Promise<{
|
|
15
|
+
openAIKey: null;
|
|
16
|
+
oLlamaBaseURL: null;
|
|
17
|
+
azureOpenAIKey: null;
|
|
18
|
+
azureOpenAIEndpoint: null;
|
|
19
|
+
azureOpenAIApiVersion: string;
|
|
20
|
+
mistralApiKey: null;
|
|
21
|
+
geminiApiKey: null;
|
|
22
|
+
} & Settings>;
|
|
23
|
+
setSettings(settings: Settings): Promise<Settings & {
|
|
24
|
+
success: boolean;
|
|
25
|
+
}>;
|
|
26
|
+
close(): void;
|
|
27
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import type { DocumentSetMetadata, Settings } from './types/index.js';
|
|
2
|
+
|
|
3
|
+
export abstract class MetadataManager {
|
|
4
|
+
protected queries = {
|
|
5
|
+
/*
|
|
6
|
+
Note: RETURNING on non-select/non-create statements is important for compatibility between SQLite and PostgreSQL.
|
|
7
|
+
(Without it, better-sqlite would demand to use run() instead of all() or get(), which would break the abstraction.)
|
|
8
|
+
*/
|
|
9
|
+
createDocumentSetsTable: `
|
|
10
|
+
CREATE TABLE IF NOT EXISTS document_sets (
|
|
11
|
+
set_id SERIAL PRIMARY KEY,
|
|
12
|
+
name TEXT NOT NULL UNIQUE,
|
|
13
|
+
upload_date TIMESTAMP NOT NULL,
|
|
14
|
+
parameters TEXT NOT NULL,
|
|
15
|
+
total_documents INTEGER NOT NULL DEFAULT 0
|
|
16
|
+
);
|
|
17
|
+
`,
|
|
18
|
+
createSettingsTable: `
|
|
19
|
+
CREATE TABLE IF NOT EXISTS meaningfully_settings (
|
|
20
|
+
settings_id SERIAL PRIMARY KEY,
|
|
21
|
+
settings TEXT NOT NULL
|
|
22
|
+
);
|
|
23
|
+
`,
|
|
24
|
+
insertDocumentSet: `
|
|
25
|
+
INSERT INTO document_sets (name, upload_date, parameters, total_documents)
|
|
26
|
+
VALUES ($1, $2, $3, $4) RETURNING set_id
|
|
27
|
+
`,
|
|
28
|
+
selectDocumentSet: `
|
|
29
|
+
SELECT * FROM document_sets WHERE set_id = $1
|
|
30
|
+
`,
|
|
31
|
+
selectDocumentSets: `
|
|
32
|
+
SELECT * FROM document_sets ORDER BY upload_date DESC LIMIT $1 OFFSET $2
|
|
33
|
+
`,
|
|
34
|
+
countDocumentSets: `
|
|
35
|
+
SELECT COUNT(*) as count FROM document_sets
|
|
36
|
+
`,
|
|
37
|
+
updateDocumentCount: `
|
|
38
|
+
UPDATE document_sets SET total_documents = total_documents + $1 WHERE set_id = $2 RETURNING *
|
|
39
|
+
`,
|
|
40
|
+
deleteDocumentSet: `
|
|
41
|
+
DELETE FROM document_sets WHERE set_id = $1 RETURNING *
|
|
42
|
+
`,
|
|
43
|
+
selectSettings: `
|
|
44
|
+
SELECT * FROM meaningfully_settings WHERE settings_id = 1
|
|
45
|
+
`,
|
|
46
|
+
upsertSettings: `
|
|
47
|
+
INSERT INTO meaningfully_settings (settings_id, settings)
|
|
48
|
+
VALUES (1, $1)
|
|
49
|
+
ON CONFLICT (settings_id) DO UPDATE SET settings = $2
|
|
50
|
+
RETURNING *
|
|
51
|
+
`
|
|
52
|
+
// the two arguments $1 and $2 are identical, but, to work around a cross-compatibility bug in SQLite versus Postgresql,
|
|
53
|
+
// where PG can accept the same argument twice (specified as $1 in two places), but SQLITE cannot (it just has ? placeholders)
|
|
54
|
+
// they are specified separately.
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
protected abstract runQuery<T>(query: string, params?: any[]): Promise<T[]>;
|
|
58
|
+
protected abstract runQuerySingle<T>(query: string, params?: any[]): Promise<T | null>;
|
|
59
|
+
protected abstract initializeDatabase(): Promise<void>;
|
|
60
|
+
protected abstract close(): void;
|
|
61
|
+
|
|
62
|
+
async addDocumentSet(metadata: Omit<DocumentSetMetadata, 'documentSetId'>): Promise<number> {
|
|
63
|
+
const result = await this.runQuerySingle<{ set_id: number }>(this.queries.insertDocumentSet, [
|
|
64
|
+
metadata.name,
|
|
65
|
+
metadata.uploadDate.toISOString(),
|
|
66
|
+
JSON.stringify(metadata.parameters),
|
|
67
|
+
metadata.totalDocuments
|
|
68
|
+
]);
|
|
69
|
+
return result?.set_id || 0;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async getDocumentSet(documentSetId: number): Promise<DocumentSetMetadata | null> {
|
|
73
|
+
const row = await this.runQuerySingle<{
|
|
74
|
+
set_id: number;
|
|
75
|
+
name: string;
|
|
76
|
+
upload_date: string;
|
|
77
|
+
parameters: string;
|
|
78
|
+
total_documents: number;
|
|
79
|
+
}>(this.queries.selectDocumentSet, [documentSetId]);
|
|
80
|
+
|
|
81
|
+
if (!row) return null;
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
documentSetId: row.set_id,
|
|
85
|
+
name: row.name,
|
|
86
|
+
uploadDate: new Date(row.upload_date),
|
|
87
|
+
parameters: JSON.parse(row.parameters),
|
|
88
|
+
totalDocuments: row.total_documents
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async getDocumentSets(page: number = 1, pageSize: number = 10): Promise<{ documents: DocumentSetMetadata[]; total: number }> {
|
|
93
|
+
const offset = (page - 1) * pageSize;
|
|
94
|
+
const totalCountRow = await this.runQuerySingle<{ count: number }>(this.queries.countDocumentSets);
|
|
95
|
+
const totalCount = totalCountRow?.count || 0;
|
|
96
|
+
|
|
97
|
+
const rows = await this.runQuery<{
|
|
98
|
+
set_id: number;
|
|
99
|
+
name: string;
|
|
100
|
+
upload_date: string;
|
|
101
|
+
parameters: string;
|
|
102
|
+
total_documents: number;
|
|
103
|
+
}>(this.queries.selectDocumentSets, [pageSize, offset]);
|
|
104
|
+
|
|
105
|
+
const documents = rows.map((row) => ({
|
|
106
|
+
documentSetId: row.set_id,
|
|
107
|
+
name: row.name,
|
|
108
|
+
uploadDate: new Date(row.upload_date),
|
|
109
|
+
parameters: JSON.parse(row.parameters),
|
|
110
|
+
totalDocuments: row.total_documents
|
|
111
|
+
}));
|
|
112
|
+
|
|
113
|
+
return { documents, total: totalCount };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async updateDocumentCount(documentSetId: number, count: number): Promise<void> {
|
|
117
|
+
await this.runQuery(this.queries.updateDocumentCount, [count, documentSetId]);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async deleteDocumentSet(documentSetId: number): Promise<void> {
|
|
121
|
+
await this.runQuery(this.queries.deleteDocumentSet, [documentSetId]);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
async getSettings(): Promise<Settings> {
|
|
125
|
+
const DEFAULT_SETTINGS: Settings = {
|
|
126
|
+
openAIKey: null,
|
|
127
|
+
oLlamaBaseURL: null,
|
|
128
|
+
azureOpenAIKey: null,
|
|
129
|
+
azureOpenAIEndpoint: null,
|
|
130
|
+
azureOpenAIApiVersion: "2024-02-01",
|
|
131
|
+
mistralApiKey: null,
|
|
132
|
+
geminiApiKey: null,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const row = await this.runQuerySingle<{ settings: string }>(this.queries.selectSettings);
|
|
136
|
+
return row ? { ...DEFAULT_SETTINGS, ...JSON.parse(row.settings) } : DEFAULT_SETTINGS;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async setSettings(settings: Settings): Promise<{ success: boolean }> {
|
|
140
|
+
// the JSON.stringify(settings) is repeated to work around a cross-compatibility bug in SQLite versus Postgresql
|
|
141
|
+
// where PG can accept the same argument twice (specified as $1 in two places), but SQLITE cannot (it just has ? placeholders)
|
|
142
|
+
await this.runQuery(this.queries.upsertSettings, [JSON.stringify(settings), JSON.stringify(settings)]);
|
|
143
|
+
return { success: true };
|
|
144
|
+
}
|
|
145
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types";
|
|
2
|
+
export declare function createEmbeddings(csvPath: string, textColumnName: string, config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<EmbeddingResult>;
|
|
3
|
+
export declare function previewResults(csvPath: string, textColumnName: string, config: EmbeddingConfig): Promise<PreviewResult>;
|
|
4
|
+
export declare function getDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
|
|
5
|
+
export declare function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<import("llamaindex").VectorStoreIndex>;
|
|
6
|
+
export declare function search(index: any, query: string, numResults?: number, filters?: MetadataFilter[]): Promise<SearchResult[]>;
|