@workglow/dataset 0.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +1134 -0
- package/dist/browser.js +1053 -0
- package/dist/browser.js.map +20 -0
- package/dist/bun.js +1054 -0
- package/dist/bun.js.map +20 -0
- package/dist/common-server.d.ts +7 -0
- package/dist/common-server.d.ts.map +1 -0
- package/dist/common.d.ts +17 -0
- package/dist/common.d.ts.map +1 -0
- package/dist/document/Document.d.ts +50 -0
- package/dist/document/Document.d.ts.map +1 -0
- package/dist/document/DocumentDataset.d.ts +79 -0
- package/dist/document/DocumentDataset.d.ts.map +1 -0
- package/dist/document/DocumentDatasetRegistry.d.ts +29 -0
- package/dist/document/DocumentDatasetRegistry.d.ts.map +1 -0
- package/dist/document/DocumentNode.d.ts +31 -0
- package/dist/document/DocumentNode.d.ts.map +1 -0
- package/dist/document/DocumentSchema.d.ts +1668 -0
- package/dist/document/DocumentSchema.d.ts.map +1 -0
- package/dist/document/DocumentStorageSchema.d.ts +43 -0
- package/dist/document/DocumentStorageSchema.d.ts.map +1 -0
- package/dist/document/StructuralParser.d.ts +30 -0
- package/dist/document/StructuralParser.d.ts.map +1 -0
- package/dist/document-chunk/DocumentChunkDataset.d.ts +79 -0
- package/dist/document-chunk/DocumentChunkDataset.d.ts.map +1 -0
- package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts +29 -0
- package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts.map +1 -0
- package/dist/document-chunk/DocumentChunkSchema.d.ts +55 -0
- package/dist/document-chunk/DocumentChunkSchema.d.ts.map +1 -0
- package/dist/node.js +1053 -0
- package/dist/node.js.map +20 -0
- package/dist/types.d.ts +7 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/util/DatasetSchema.d.ts +85 -0
- package/dist/util/DatasetSchema.d.ts.map +1 -0
- package/package.json +54 -0
- package/src/document-chunk/README.md +362 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentSchema.d.ts","sourceRoot":"","sources":["../../src/document/DocumentSchema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAkB,UAAU,EAAc,MAAM,gBAAgB,CAAC;AAE7E;;GAEG;AACH,eAAO,MAAM,QAAQ;;;;;;CAMX,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,QAAQ,CAAC,CAAC,MAAM,OAAO,QAAQ,CAAC,CAAC;AAMhE;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;CAgBO,CAAC;AAEpC,MAAM,MAAM,SAAS,GAAG,UAAU,CAAC,OAAO,eAAe,CAAC,CAAC;AAE3D;;GAEG;AACH,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;CAqBU,CAAC;AAEpC,MAAM,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;AAErD;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsBE,CAAC;AAEpC,MAAM,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAErE;;;;GAIG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBA,CAAC;AAEpC;;;;GAIG;AACH,eAAO,MAAM,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBI,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAaG,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAaI,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+BK,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmBO,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBA,CAAC;AAOpC;;GAEG;AACH,UAAU,gBAAgB;IACxB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,UAAU,CAAC,EAAE,cAAc,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAiB,SAAQ,gBAAgB;IACxD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,QAAQ,CAAC;IACxC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,WAAY,SAAQ,gBAAgB;IACnD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,OAAO,CAAC;IACvC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,aAAc,SAAQ,gBAAgB;IACrD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,SAAS,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,YAAa,SAAQ,gBAAgB;IACpD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,QAAQ,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,SAAU,SAAQ,gBAAgB;IACjD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,KAAK,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GACpB,gBAAgB,GAChB,WAAW,GACX,aAAa,GACb,YAAY,GACZ,SAAS,CAAC;AAMd;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;CAqBK,CAAC;AAEpC,MAAM,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAE/D;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgBC,CAAC;AAEpC,MAAM,MAAM,eAAe,GAAG,UAAU,CAAC,OAAO,qBAAqB,CAAC,CAAC;AAEvE;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAkCU,CAAC;AAEvC,MAAM,MAAM,SAAS,GAAG,UAAU,CAAC,UAAU,CAAC,OAAO,eAAe,CAAC,CAAC,CAAC;AAMvE;;;GAGG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDG,CAAC;AAEpC,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEnE;;GAEG;AACH,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKN,CAAC;AAEhC;;;GAGG;AACH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4DL,CAAC;AAEpC,MAAM,MAAM,qBAAqB,GAAG,UAAU,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAEnF;;GAEG;AACH,eAAO,MAAM,gCAAgC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKd,CAAC;AAEhC;;GAEG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;CAqBA,CAAC;AAEpC,MAAM,MAAM,gBAAgB,GAAG,UAAU,CAAC,OAAO,sBAAsB,CAAC,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import { ITabularStorage } from "@workglow/storage";
|
|
7
|
+
import { TypedArraySchemaOptions, type FromSchema } from "@workglow/util";
|
|
8
|
+
/**
|
|
9
|
+
* Schema for storing documents in tabular storage
|
|
10
|
+
*/
|
|
11
|
+
export declare const DocumentStorageSchema: {
|
|
12
|
+
readonly type: "object";
|
|
13
|
+
readonly properties: {
|
|
14
|
+
readonly doc_id: {
|
|
15
|
+
readonly type: "string";
|
|
16
|
+
readonly "x-auto-generated": true;
|
|
17
|
+
readonly title: "Document ID";
|
|
18
|
+
readonly description: "Unique identifier for the document";
|
|
19
|
+
};
|
|
20
|
+
readonly data: {
|
|
21
|
+
readonly type: "string";
|
|
22
|
+
readonly title: "Document Data";
|
|
23
|
+
readonly description: "JSON-serialized document";
|
|
24
|
+
};
|
|
25
|
+
readonly metadata: {
|
|
26
|
+
readonly type: "object";
|
|
27
|
+
readonly title: "Metadata";
|
|
28
|
+
readonly description: "Metadata of the document";
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
readonly required: readonly ["doc_id", "data"];
|
|
32
|
+
readonly additionalProperties: true;
|
|
33
|
+
};
|
|
34
|
+
export type DocumentStorageSchema = typeof DocumentStorageSchema;
|
|
35
|
+
export declare const DocumentStorageKey: readonly ["doc_id"];
|
|
36
|
+
export type DocumentStorageKey = typeof DocumentStorageKey;
|
|
37
|
+
export type DocumentStorageEntity = FromSchema<DocumentStorageSchema, TypedArraySchemaOptions>;
|
|
38
|
+
/**
|
|
39
|
+
* Type for inserting documents - doc_id is optional (auto-generated)
|
|
40
|
+
*/
|
|
41
|
+
export type InsertDocumentStorageEntity = Omit<DocumentStorageEntity, "doc_id"> & Partial<Pick<DocumentStorageEntity, "doc_id">>;
|
|
42
|
+
export type DocumentTabularStorage = ITabularStorage<typeof DocumentStorageSchema, DocumentStorageKey, DocumentStorageEntity>;
|
|
43
|
+
//# sourceMappingURL=DocumentStorageSchema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentStorageSchema.d.ts","sourceRoot":"","sources":["../../src/document/DocumentStorageSchema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EACL,uBAAuB,EAEvB,KAAK,UAAU,EAChB,MAAM,gBAAgB,CAAC;AAExB;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;CAsBO,CAAC;AAC1C,MAAM,MAAM,qBAAqB,GAAG,OAAO,qBAAqB,CAAC;AAEjE,eAAO,MAAM,kBAAkB,qBAAsB,CAAC;AACtD,MAAM,MAAM,kBAAkB,GAAG,OAAO,kBAAkB,CAAC;AAE3D,MAAM,MAAM,qBAAqB,GAAG,UAAU,CAAC,qBAAqB,EAAE,uBAAuB,CAAC,CAAC;AAE/F;;GAEG;AACH,MAAM,MAAM,2BAA2B,GAAG,IAAI,CAAC,qBAAqB,EAAE,QAAQ,CAAC,GAC7E,OAAO,CAAC,IAAI,CAAC,qBAAqB,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjD,MAAM,MAAM,sBAAsB,GAAG,eAAe,CAClD,OAAO,qBAAqB,EAC5B,kBAAkB,EAClB,qBAAqB,CACtB,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import { type DocumentRootNode } from "./DocumentSchema";
|
|
7
|
+
/**
|
|
8
|
+
* Parse markdown into a hierarchical DocumentNode tree
|
|
9
|
+
*/
|
|
10
|
+
export declare class StructuralParser {
|
|
11
|
+
/**
|
|
12
|
+
* Parse markdown text into a hierarchical document tree
|
|
13
|
+
*/
|
|
14
|
+
static parseMarkdown(doc_id: string, text: string, title: string): Promise<DocumentRootNode>;
|
|
15
|
+
/**
|
|
16
|
+
* Parse plain text into a hierarchical document tree
|
|
17
|
+
* Splits by double newlines to create paragraphs
|
|
18
|
+
*/
|
|
19
|
+
static parsePlainText(doc_id: string, text: string, title: string): Promise<DocumentRootNode>;
|
|
20
|
+
/**
|
|
21
|
+
* Auto-detect format and parse
|
|
22
|
+
*/
|
|
23
|
+
static parse(doc_id: string, text: string, title: string, format?: "markdown" | "text"): Promise<DocumentRootNode>;
|
|
24
|
+
/**
|
|
25
|
+
* Check if text contains markdown header patterns
|
|
26
|
+
* Looks for lines starting with 1-6 hash symbols followed by whitespace
|
|
27
|
+
*/
|
|
28
|
+
private static looksLikeMarkdown;
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=StructuralParser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"StructuralParser.d.ts","sourceRoot":"","sources":["../../src/document/StructuralParser.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EACL,KAAK,gBAAgB,EAItB,MAAM,kBAAkB,CAAC;AAE1B;;GAEG;AACH,qBAAa,gBAAgB;IAC3B;;OAEG;WACU,aAAa,CACxB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,CAAC;IAqH5B;;;OAGG;WACU,cAAc,CACzB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,CAAC;IAoE5B;;OAEG;IACH,MAAM,CAAC,KAAK,CACV,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAC3B,OAAO,CAAC,gBAAgB,CAAC;IAO5B;;;OAGG;IACH,OAAO,CAAC,MAAM,CAAC,iBAAiB;CAIjC"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import type { VectorSearchOptions } from "@workglow/storage";
|
|
7
|
+
import type { TypedArray } from "@workglow/util";
|
|
8
|
+
import type { DocumentChunk, DocumentChunkStorage, InsertDocumentChunk } from "./DocumentChunkSchema";
|
|
9
|
+
/**
|
|
10
|
+
* Document Chunk Dataset
|
|
11
|
+
*
|
|
12
|
+
* A dataset-specific wrapper around vector storage for document chunks.
|
|
13
|
+
* This provides a domain-specific API for working with document chunk embeddings
|
|
14
|
+
* in RAG pipelines.
|
|
15
|
+
*/
|
|
16
|
+
export declare class DocumentChunkDataset {
|
|
17
|
+
private storage;
|
|
18
|
+
constructor(storage: DocumentChunkStorage);
|
|
19
|
+
/**
|
|
20
|
+
* Get the underlying storage instance
|
|
21
|
+
*/
|
|
22
|
+
getStorage(): DocumentChunkStorage;
|
|
23
|
+
/**
|
|
24
|
+
* Store a document chunk
|
|
25
|
+
*/
|
|
26
|
+
put(chunk: InsertDocumentChunk): Promise<DocumentChunk>;
|
|
27
|
+
/**
|
|
28
|
+
* Store multiple document chunks
|
|
29
|
+
*/
|
|
30
|
+
putBulk(chunks: InsertDocumentChunk[]): Promise<DocumentChunk[]>;
|
|
31
|
+
/**
|
|
32
|
+
* Get a document chunk by ID
|
|
33
|
+
*/
|
|
34
|
+
get(chunk_id: string): Promise<DocumentChunk | undefined>;
|
|
35
|
+
/**
|
|
36
|
+
* Delete a document chunk
|
|
37
|
+
*/
|
|
38
|
+
delete(chunk_id: string): Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* Search for similar chunks using vector similarity
|
|
41
|
+
*/
|
|
42
|
+
similaritySearch(query: TypedArray, options?: VectorSearchOptions<Record<string, unknown>>): Promise<Array<DocumentChunk & {
|
|
43
|
+
score: number;
|
|
44
|
+
}>>;
|
|
45
|
+
/**
|
|
46
|
+
* Hybrid search (vector + full-text)
|
|
47
|
+
*/
|
|
48
|
+
hybridSearch(query: TypedArray, options: VectorSearchOptions<Record<string, unknown>> & {
|
|
49
|
+
textQuery: string;
|
|
50
|
+
vectorWeight?: number;
|
|
51
|
+
}): Promise<Array<DocumentChunk & {
|
|
52
|
+
score: number;
|
|
53
|
+
}>>;
|
|
54
|
+
/**
|
|
55
|
+
* Get all chunks
|
|
56
|
+
*/
|
|
57
|
+
getAll(): Promise<DocumentChunk[] | undefined>;
|
|
58
|
+
/**
|
|
59
|
+
* Get the count of stored chunks
|
|
60
|
+
*/
|
|
61
|
+
size(): Promise<number>;
|
|
62
|
+
/**
|
|
63
|
+
* Clear all chunks
|
|
64
|
+
*/
|
|
65
|
+
clear(): Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Destroy the storage
|
|
68
|
+
*/
|
|
69
|
+
destroy(): void;
|
|
70
|
+
/**
|
|
71
|
+
* Setup the database/storage
|
|
72
|
+
*/
|
|
73
|
+
setupDatabase(): Promise<void>;
|
|
74
|
+
/**
|
|
75
|
+
* Get the vector dimensions
|
|
76
|
+
*/
|
|
77
|
+
getVectorDimensions(): number;
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=DocumentChunkDataset.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentChunkDataset.d.ts","sourceRoot":"","sources":["../../src/document-chunk/DocumentChunkDataset.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAC7D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EACV,aAAa,EAEb,oBAAoB,EACpB,mBAAmB,EACpB,MAAM,uBAAuB,CAAC;AAE/B;;;;;;GAMG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,OAAO,CAAuB;gBAE1B,OAAO,EAAE,oBAAoB;IAIzC;;OAEG;IACH,UAAU,IAAI,oBAAoB;IAIlC;;OAEG;IACG,GAAG,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC;IAI7D;;OAEG;IACG,OAAO,CAAC,MAAM,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IAItE;;OAEG;IACG,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;IAK/D;;OAEG;IACG,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK7C;;OAEG;IACG,gBAAgB,CACpB,KAAK,EAAE,UAAU,EACjB,OAAO,CAAC,EAAE,mBAAmB,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GACrD,OAAO,CAAC,KAAK,CAAC,aAAa,GAAG;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAIpD;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,UAAU,EACjB,OAAO,EAAE,mBAAmB,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GAAG;QACtD,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GACA,OAAO,CAAC,KAAK,CAAC,aAAa,GAAG;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAOpD;;OAEG;IACG,MAAM,IAAI,OAAO,CAAC,aAAa,EAAE,GAAG,SAAS,CAAC;IAIpD;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC;IAI7B;;OAEG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;OAEG;IACH,OAAO,IAAI,IAAI;IAIf;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAIpC;;OAEG;IACH,mBAAmB,IAAI,MAAM;CAG9B"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import type { DocumentChunkDataset } from "./DocumentChunkDataset";
|
|
7
|
+
/**
|
|
8
|
+
* Service token for the document chunk dataset registry
|
|
9
|
+
* Maps dataset IDs to DocumentChunkDataset instances
|
|
10
|
+
*/
|
|
11
|
+
export declare const DOCUMENT_CHUNK_DATASET: import("@workglow/util").ServiceToken<Map<string, DocumentChunkDataset>>;
|
|
12
|
+
/**
|
|
13
|
+
* Gets the global document chunk dataset registry
|
|
14
|
+
* @returns Map of document chunk dataset ID to instance
|
|
15
|
+
*/
|
|
16
|
+
export declare function getGlobalDocumentChunkDataset(): Map<string, DocumentChunkDataset>;
|
|
17
|
+
/**
|
|
18
|
+
* Registers a document chunk dataset globally by ID
|
|
19
|
+
* @param id The unique identifier for this dataset
|
|
20
|
+
* @param dataset The dataset instance to register
|
|
21
|
+
*/
|
|
22
|
+
export declare function registerDocumentChunkDataset(id: string, dataset: DocumentChunkDataset): void;
|
|
23
|
+
/**
|
|
24
|
+
* Gets a document chunk dataset by ID from the global registry
|
|
25
|
+
* @param id The dataset identifier
|
|
26
|
+
* @returns The dataset instance or undefined if not found
|
|
27
|
+
*/
|
|
28
|
+
export declare function getDocumentChunkDataset(id: string): DocumentChunkDataset | undefined;
|
|
29
|
+
//# sourceMappingURL=DocumentChunkDatasetRegistry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentChunkDatasetRegistry.d.ts","sourceRoot":"","sources":["../../src/document-chunk/DocumentChunkDatasetRegistry.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAQH,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AAEnE;;;GAGG;AACH,eAAO,MAAM,sBAAsB,0EAC8C,CAAC;AAWlF;;;GAGG;AACH,wBAAgB,6BAA6B,IAAI,GAAG,CAAC,MAAM,EAAE,oBAAoB,CAAC,CAEjF;AAED;;;;GAIG;AACH,wBAAgB,4BAA4B,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,oBAAoB,GAAG,IAAI,CAG5F;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,EAAE,EAAE,MAAM,GAAG,oBAAoB,GAAG,SAAS,CAEpF"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import { IVectorStorage } from "@workglow/storage";
|
|
7
|
+
import { type TypedArray } from "@workglow/util";
|
|
8
|
+
/**
|
|
9
|
+
* Default schema for document chunk storage with vector embeddings
|
|
10
|
+
*/
|
|
11
|
+
export declare const DocumentChunkSchema: {
|
|
12
|
+
readonly type: "object";
|
|
13
|
+
readonly properties: {
|
|
14
|
+
readonly chunk_id: {
|
|
15
|
+
readonly type: "string";
|
|
16
|
+
readonly "x-auto-generated": true;
|
|
17
|
+
};
|
|
18
|
+
readonly doc_id: {
|
|
19
|
+
readonly type: "string";
|
|
20
|
+
};
|
|
21
|
+
readonly vector: {
|
|
22
|
+
readonly type: "array";
|
|
23
|
+
readonly format: "TypedArray";
|
|
24
|
+
readonly title: "Typed Array";
|
|
25
|
+
readonly description: "A typed array (Float32Array, Int8Array, etc.)";
|
|
26
|
+
};
|
|
27
|
+
readonly metadata: {
|
|
28
|
+
readonly type: "object";
|
|
29
|
+
readonly format: "metadata";
|
|
30
|
+
readonly additionalProperties: true;
|
|
31
|
+
};
|
|
32
|
+
};
|
|
33
|
+
readonly additionalProperties: false;
|
|
34
|
+
};
|
|
35
|
+
export type DocumentChunkSchema = typeof DocumentChunkSchema;
|
|
36
|
+
export declare const DocumentChunkPrimaryKey: readonly ["chunk_id"];
|
|
37
|
+
export type DocumentChunkPrimaryKey = typeof DocumentChunkPrimaryKey;
|
|
38
|
+
export interface DocumentChunk<Metadata extends Record<string, unknown> = Record<string, unknown>, Vector extends TypedArray = TypedArray> {
|
|
39
|
+
chunk_id: string;
|
|
40
|
+
doc_id: string;
|
|
41
|
+
vector: Vector;
|
|
42
|
+
metadata: Metadata;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Type for inserting document chunks - chunk_id is optional (auto-generated)
|
|
46
|
+
*/
|
|
47
|
+
export type InsertDocumentChunk<Metadata extends Record<string, unknown> = Record<string, unknown>, Vector extends TypedArray = TypedArray> = Omit<DocumentChunk<Metadata, Vector>, "chunk_id"> & Partial<Pick<DocumentChunk<Metadata, Vector>, "chunk_id">>;
|
|
48
|
+
/**
|
|
49
|
+
* Type for the primary key of document chunks
|
|
50
|
+
*/
|
|
51
|
+
export type DocumentChunkKey = {
|
|
52
|
+
chunk_id: string;
|
|
53
|
+
};
|
|
54
|
+
export type DocumentChunkStorage = IVectorStorage<Record<string, unknown>, typeof DocumentChunkSchema, DocumentChunk, DocumentChunkPrimaryKey>;
|
|
55
|
+
//# sourceMappingURL=DocumentChunkSchema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentChunkSchema.d.ts","sourceRoot":"","sources":["../../src/document-chunk/DocumentChunkSchema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAA+C,KAAK,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE9F;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;CASS,CAAC;AAC1C,MAAM,MAAM,mBAAmB,GAAG,OAAO,mBAAmB,CAAC;AAE7D,eAAO,MAAM,uBAAuB,uBAAwB,CAAC;AAC7D,MAAM,MAAM,uBAAuB,GAAG,OAAO,uBAAuB,CAAC;AAErE,MAAM,WAAW,aAAa,CAC5B,QAAQ,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAClE,MAAM,SAAS,UAAU,GAAG,UAAU;IAEtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,QAAQ,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,MAAM,mBAAmB,CAC7B,QAAQ,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAClE,MAAM,SAAS,UAAU,GAAG,UAAU,IACpC,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,UAAU,CAAC,GACnD,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC;AAE7D;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC;AAEpD,MAAM,MAAM,oBAAoB,GAAG,cAAc,CAC/C,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACvB,OAAO,mBAAmB,EAC1B,aAAa,EACb,uBAAuB,CACxB,CAAC"}
|