@workglow/dataset 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +1134 -0
  3. package/dist/browser.js +1053 -0
  4. package/dist/browser.js.map +20 -0
  5. package/dist/bun.js +1054 -0
  6. package/dist/bun.js.map +20 -0
  7. package/dist/common-server.d.ts +7 -0
  8. package/dist/common-server.d.ts.map +1 -0
  9. package/dist/common.d.ts +17 -0
  10. package/dist/common.d.ts.map +1 -0
  11. package/dist/document/Document.d.ts +50 -0
  12. package/dist/document/Document.d.ts.map +1 -0
  13. package/dist/document/DocumentDataset.d.ts +79 -0
  14. package/dist/document/DocumentDataset.d.ts.map +1 -0
  15. package/dist/document/DocumentDatasetRegistry.d.ts +29 -0
  16. package/dist/document/DocumentDatasetRegistry.d.ts.map +1 -0
  17. package/dist/document/DocumentNode.d.ts +31 -0
  18. package/dist/document/DocumentNode.d.ts.map +1 -0
  19. package/dist/document/DocumentSchema.d.ts +1668 -0
  20. package/dist/document/DocumentSchema.d.ts.map +1 -0
  21. package/dist/document/DocumentStorageSchema.d.ts +43 -0
  22. package/dist/document/DocumentStorageSchema.d.ts.map +1 -0
  23. package/dist/document/StructuralParser.d.ts +30 -0
  24. package/dist/document/StructuralParser.d.ts.map +1 -0
  25. package/dist/document-chunk/DocumentChunkDataset.d.ts +79 -0
  26. package/dist/document-chunk/DocumentChunkDataset.d.ts.map +1 -0
  27. package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts +29 -0
  28. package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts.map +1 -0
  29. package/dist/document-chunk/DocumentChunkSchema.d.ts +55 -0
  30. package/dist/document-chunk/DocumentChunkSchema.d.ts.map +1 -0
  31. package/dist/node.js +1053 -0
  32. package/dist/node.js.map +20 -0
  33. package/dist/types.d.ts +7 -0
  34. package/dist/types.d.ts.map +1 -0
  35. package/dist/util/DatasetSchema.d.ts +85 -0
  36. package/dist/util/DatasetSchema.d.ts.map +1 -0
  37. package/package.json +54 -0
  38. package/src/document-chunk/README.md +362 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocumentSchema.d.ts","sourceRoot":"","sources":["../../src/document/DocumentSchema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAkB,UAAU,EAAc,MAAM,gBAAgB,CAAC;AAE7E;;GAEG;AACH,eAAO,MAAM,QAAQ;;;;;;CAMX,CAAC;AAEX,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,QAAQ,CAAC,CAAC,MAAM,OAAO,QAAQ,CAAC,CAAC;AAMhE;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;CAgBO,CAAC;AAEpC,MAAM,MAAM,SAAS,GAAG,UAAU,CAAC,OAAO,eAAe,CAAC,CAAC;AAE3D;;GAEG;AACH,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;CAqBU,CAAC;AAEpC,MAAM,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;AAErD;;GAEG;AACH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsBE,CAAC;AAEpC,MAAM,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAErE;;;;GAIG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBA,CAAC;AAEpC;;;;GAIG;AACH,eAAO,MAAM,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBI,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAaG,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAaI,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+BK,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmBO,CAAC;AAEpC;;GAEG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwBA,CAAC;AAOpC;;GAEG;AACH,UAAU,gBAAgB;IACxB,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,SAAS,CAAC;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,UAAU,CAAC,EAAE,cAAc,CAAC;CACtC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAiB,SAAQ,gBAAgB;IACxD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,QAAQ,CAAC;IACxC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,WAAY,SAAQ,gBAAgB;IACnD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,OAAO,CAAC;IACvC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,aAAc,SAAQ,gBAAgB;IACrD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,SAAS,CAAC;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,YAAa,SAAQ,gBAAgB;IACpD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,QAAQ,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,SAAU,SAAQ,gBAAgB;IACjD,QAAQ,CAAC,IAAI,EAAE,OAAO,QAAQ,CAAC,KAAK,CAAC;IACrC,QAAQ,CAAC,QAAQ,EAAE,YAAY,EAAE,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GACpB,gBAAgB,GAChB,WAAW,GACX,aAAa,GACb,YAAY,GACZ,SAAS,CAAC;AAMd;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;CAqBK,CAAC;AAEpC,MAAM,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAE/D;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgBC,CAAC;AAEpC,MAAM,MAAM,eAAe,GAAG,UAAU,CAAC,OAAO,qBAAqB,CAAC,CAAC;AAEvE;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAkCU,CAAC;AAEvC,MAAM,MAAM,SAAS,GAAG,UAAU,CAAC,UAAU,CAAC,OAAO,eAAe,CAAC,CAAC,CAAC;AAMvE;;;GAGG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDG,CAAC;AAEpC,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEnE;;GAEG;AACH,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKN,CAAC;AAEhC;;;GAGG;AACH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4DL,CAAC;AAEpC,MAAM,MAAM,qBAAqB,GAAG,UAAU,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAEnF;;GAEG;AACH,eAAO,MAAM,gCAAgC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKd,CAAC;AAEhC;;GAEG;AACH,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;CAqBA,CAAC;AAEpC,MAAM,MAAM,gBAAgB,GAAG,UAAU,CAAC,OAAO,sBAAsB,CAAC,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Steven Roussey <sroussey@gmail.com>
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import { ITabularStorage } from "@workglow/storage";
7
+ import { TypedArraySchemaOptions, type FromSchema } from "@workglow/util";
8
+ /**
9
+ * Schema for storing documents in tabular storage
10
+ */
11
+ export declare const DocumentStorageSchema: {
12
+ readonly type: "object";
13
+ readonly properties: {
14
+ readonly doc_id: {
15
+ readonly type: "string";
16
+ readonly "x-auto-generated": true;
17
+ readonly title: "Document ID";
18
+ readonly description: "Unique identifier for the document";
19
+ };
20
+ readonly data: {
21
+ readonly type: "string";
22
+ readonly title: "Document Data";
23
+ readonly description: "JSON-serialized document";
24
+ };
25
+ readonly metadata: {
26
+ readonly type: "object";
27
+ readonly title: "Metadata";
28
+ readonly description: "Metadata of the document";
29
+ };
30
+ };
31
+ readonly required: readonly ["doc_id", "data"];
32
+ readonly additionalProperties: true;
33
+ };
34
+ export type DocumentStorageSchema = typeof DocumentStorageSchema;
35
+ export declare const DocumentStorageKey: readonly ["doc_id"];
36
+ export type DocumentStorageKey = typeof DocumentStorageKey;
37
+ export type DocumentStorageEntity = FromSchema<DocumentStorageSchema, TypedArraySchemaOptions>;
38
+ /**
39
+ * Type for inserting documents - doc_id is optional (auto-generated)
40
+ */
41
+ export type InsertDocumentStorageEntity = Omit<DocumentStorageEntity, "doc_id"> & Partial<Pick<DocumentStorageEntity, "doc_id">>;
42
+ export type DocumentTabularStorage = ITabularStorage<typeof DocumentStorageSchema, DocumentStorageKey, DocumentStorageEntity>;
43
+ //# sourceMappingURL=DocumentStorageSchema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocumentStorageSchema.d.ts","sourceRoot":"","sources":["../../src/document/DocumentStorageSchema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EACL,uBAAuB,EAEvB,KAAK,UAAU,EAChB,MAAM,gBAAgB,CAAC;AAExB;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;CAsBO,CAAC;AAC1C,MAAM,MAAM,qBAAqB,GAAG,OAAO,qBAAqB,CAAC;AAEjE,eAAO,MAAM,kBAAkB,qBAAsB,CAAC;AACtD,MAAM,MAAM,kBAAkB,GAAG,OAAO,kBAAkB,CAAC;AAE3D,MAAM,MAAM,qBAAqB,GAAG,UAAU,CAAC,qBAAqB,EAAE,uBAAuB,CAAC,CAAC;AAE/F;;GAEG;AACH,MAAM,MAAM,2BAA2B,GAAG,IAAI,CAAC,qBAAqB,EAAE,QAAQ,CAAC,GAC7E,OAAO,CAAC,IAAI,CAAC,qBAAqB,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEjD,MAAM,MAAM,sBAAsB,GAAG,eAAe,CAClD,OAAO,qBAAqB,EAC5B,kBAAkB,EAClB,qBAAqB,CACtB,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Steven Roussey <sroussey@gmail.com>
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import { type DocumentRootNode } from "./DocumentSchema";
7
+ /**
8
+ * Parse markdown into a hierarchical DocumentNode tree
9
+ */
10
+ export declare class StructuralParser {
11
+ /**
12
+ * Parse markdown text into a hierarchical document tree
13
+ */
14
+ static parseMarkdown(doc_id: string, text: string, title: string): Promise<DocumentRootNode>;
15
+ /**
16
+ * Parse plain text into a hierarchical document tree
17
+ * Splits by double newlines to create paragraphs
18
+ */
19
+ static parsePlainText(doc_id: string, text: string, title: string): Promise<DocumentRootNode>;
20
+ /**
21
+ * Auto-detect format and parse
22
+ */
23
+ static parse(doc_id: string, text: string, title: string, format?: "markdown" | "text"): Promise<DocumentRootNode>;
24
+ /**
25
+ * Check if text contains markdown header patterns
26
+ * Looks for lines starting with 1-6 hash symbols followed by whitespace
27
+ */
28
+ private static looksLikeMarkdown;
29
+ }
30
+ //# sourceMappingURL=StructuralParser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"StructuralParser.d.ts","sourceRoot":"","sources":["../../src/document/StructuralParser.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EACL,KAAK,gBAAgB,EAItB,MAAM,kBAAkB,CAAC;AAE1B;;GAEG;AACH,qBAAa,gBAAgB;IAC3B;;OAEG;WACU,aAAa,CACxB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,CAAC;IAqH5B;;;OAGG;WACU,cAAc,CACzB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,CAAC;IAoE5B;;OAEG;IACH,MAAM,CAAC,KAAK,CACV,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,UAAU,GAAG,MAAM,GAC3B,OAAO,CAAC,gBAAgB,CAAC;IAO5B;;;OAGG;IACH,OAAO,CAAC,MAAM,CAAC,iBAAiB;CAIjC"}
@@ -0,0 +1,79 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Steven Roussey <sroussey@gmail.com>
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import type { VectorSearchOptions } from "@workglow/storage";
7
+ import type { TypedArray } from "@workglow/util";
8
+ import type { DocumentChunk, DocumentChunkStorage, InsertDocumentChunk } from "./DocumentChunkSchema";
9
+ /**
10
+ * Document Chunk Dataset
11
+ *
12
+ * A dataset-specific wrapper around vector storage for document chunks.
13
+ * This provides a domain-specific API for working with document chunk embeddings
14
+ * in RAG pipelines.
15
+ */
16
+ export declare class DocumentChunkDataset {
17
+ private storage;
18
+ constructor(storage: DocumentChunkStorage);
19
+ /**
20
+ * Get the underlying storage instance
21
+ */
22
+ getStorage(): DocumentChunkStorage;
23
+ /**
24
+ * Store a document chunk
25
+ */
26
+ put(chunk: InsertDocumentChunk): Promise<DocumentChunk>;
27
+ /**
28
+ * Store multiple document chunks
29
+ */
30
+ putBulk(chunks: InsertDocumentChunk[]): Promise<DocumentChunk[]>;
31
+ /**
32
+ * Get a document chunk by ID
33
+ */
34
+ get(chunk_id: string): Promise<DocumentChunk | undefined>;
35
+ /**
36
+ * Delete a document chunk
37
+ */
38
+ delete(chunk_id: string): Promise<void>;
39
+ /**
40
+ * Search for similar chunks using vector similarity
41
+ */
42
+ similaritySearch(query: TypedArray, options?: VectorSearchOptions<Record<string, unknown>>): Promise<Array<DocumentChunk & {
43
+ score: number;
44
+ }>>;
45
+ /**
46
+ * Hybrid search (vector + full-text)
47
+ */
48
+ hybridSearch(query: TypedArray, options: VectorSearchOptions<Record<string, unknown>> & {
49
+ textQuery: string;
50
+ vectorWeight?: number;
51
+ }): Promise<Array<DocumentChunk & {
52
+ score: number;
53
+ }>>;
54
+ /**
55
+ * Get all chunks
56
+ */
57
+ getAll(): Promise<DocumentChunk[] | undefined>;
58
+ /**
59
+ * Get the count of stored chunks
60
+ */
61
+ size(): Promise<number>;
62
+ /**
63
+ * Clear all chunks
64
+ */
65
+ clear(): Promise<void>;
66
+ /**
67
+ * Destroy the storage
68
+ */
69
+ destroy(): void;
70
+ /**
71
+ * Setup the database/storage
72
+ */
73
+ setupDatabase(): Promise<void>;
74
+ /**
75
+ * Get the vector dimensions
76
+ */
77
+ getVectorDimensions(): number;
78
+ }
79
+ //# sourceMappingURL=DocumentChunkDataset.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocumentChunkDataset.d.ts","sourceRoot":"","sources":["../../src/document-chunk/DocumentChunkDataset.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAC7D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EACV,aAAa,EAEb,oBAAoB,EACpB,mBAAmB,EACpB,MAAM,uBAAuB,CAAC;AAE/B;;;;;;GAMG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,OAAO,CAAuB;gBAE1B,OAAO,EAAE,oBAAoB;IAIzC;;OAEG;IACH,UAAU,IAAI,oBAAoB;IAIlC;;OAEG;IACG,GAAG,CAAC,KAAK,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC;IAI7D;;OAEG;IACG,OAAO,CAAC,MAAM,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IAItE;;OAEG;IACG,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;IAK/D;;OAEG;IACG,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAK7C;;OAEG;IACG,gBAAgB,CACpB,KAAK,EAAE,UAAU,EACjB,OAAO,CAAC,EAAE,mBAAmB,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GACrD,OAAO,CAAC,KAAK,CAAC,aAAa,GAAG;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAIpD;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,UAAU,EACjB,OAAO,EAAE,mBAAmB,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,GAAG;QACtD,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GACA,OAAO,CAAC,KAAK,CAAC,aAAa,GAAG;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAOpD;;OAEG;IACG,MAAM,IAAI,OAAO,CAAC,aAAa,EAAE,GAAG,SAAS,CAAC;IAIpD;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC;IAI7B;;OAEG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;OAEG;IACH,OAAO,IAAI,IAAI;IAIf;;OAEG;IACG,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAIpC;;OAEG;IACH,mBAAmB,IAAI,MAAM;CAG9B"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Steven Roussey <sroussey@gmail.com>
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import type { DocumentChunkDataset } from "./DocumentChunkDataset";
7
+ /**
8
+ * Service token for the document chunk dataset registry
9
+ * Maps dataset IDs to DocumentChunkDataset instances
10
+ */
11
+ export declare const DOCUMENT_CHUNK_DATASET: import("@workglow/util").ServiceToken<Map<string, DocumentChunkDataset>>;
12
+ /**
13
+ * Gets the global document chunk dataset registry
14
+ * @returns Map of document chunk dataset ID to instance
15
+ */
16
+ export declare function getGlobalDocumentChunkDataset(): Map<string, DocumentChunkDataset>;
17
+ /**
18
+ * Registers a document chunk dataset globally by ID
19
+ * @param id The unique identifier for this dataset
20
+ * @param dataset The dataset instance to register
21
+ */
22
+ export declare function registerDocumentChunkDataset(id: string, dataset: DocumentChunkDataset): void;
23
+ /**
24
+ * Gets a document chunk dataset by ID from the global registry
25
+ * @param id The dataset identifier
26
+ * @returns The dataset instance or undefined if not found
27
+ */
28
+ export declare function getDocumentChunkDataset(id: string): DocumentChunkDataset | undefined;
29
+ //# sourceMappingURL=DocumentChunkDatasetRegistry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocumentChunkDatasetRegistry.d.ts","sourceRoot":"","sources":["../../src/document-chunk/DocumentChunkDatasetRegistry.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAQH,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AAEnE;;;GAGG;AACH,eAAO,MAAM,sBAAsB,0EAC8C,CAAC;AAWlF;;;GAGG;AACH,wBAAgB,6BAA6B,IAAI,GAAG,CAAC,MAAM,EAAE,oBAAoB,CAAC,CAEjF;AAED;;;;GAIG;AACH,wBAAgB,4BAA4B,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,oBAAoB,GAAG,IAAI,CAG5F;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,EAAE,EAAE,MAAM,GAAG,oBAAoB,GAAG,SAAS,CAEpF"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Steven Roussey <sroussey@gmail.com>
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import { IVectorStorage } from "@workglow/storage";
7
+ import { type TypedArray } from "@workglow/util";
8
+ /**
9
+ * Default schema for document chunk storage with vector embeddings
10
+ */
11
+ export declare const DocumentChunkSchema: {
12
+ readonly type: "object";
13
+ readonly properties: {
14
+ readonly chunk_id: {
15
+ readonly type: "string";
16
+ readonly "x-auto-generated": true;
17
+ };
18
+ readonly doc_id: {
19
+ readonly type: "string";
20
+ };
21
+ readonly vector: {
22
+ readonly type: "array";
23
+ readonly format: "TypedArray";
24
+ readonly title: "Typed Array";
25
+ readonly description: "A typed array (Float32Array, Int8Array, etc.)";
26
+ };
27
+ readonly metadata: {
28
+ readonly type: "object";
29
+ readonly format: "metadata";
30
+ readonly additionalProperties: true;
31
+ };
32
+ };
33
+ readonly additionalProperties: false;
34
+ };
35
+ export type DocumentChunkSchema = typeof DocumentChunkSchema;
36
+ export declare const DocumentChunkPrimaryKey: readonly ["chunk_id"];
37
+ export type DocumentChunkPrimaryKey = typeof DocumentChunkPrimaryKey;
38
+ export interface DocumentChunk<Metadata extends Record<string, unknown> = Record<string, unknown>, Vector extends TypedArray = TypedArray> {
39
+ chunk_id: string;
40
+ doc_id: string;
41
+ vector: Vector;
42
+ metadata: Metadata;
43
+ }
44
+ /**
45
+ * Type for inserting document chunks - chunk_id is optional (auto-generated)
46
+ */
47
+ export type InsertDocumentChunk<Metadata extends Record<string, unknown> = Record<string, unknown>, Vector extends TypedArray = TypedArray> = Omit<DocumentChunk<Metadata, Vector>, "chunk_id"> & Partial<Pick<DocumentChunk<Metadata, Vector>, "chunk_id">>;
48
+ /**
49
+ * Type for the primary key of document chunks
50
+ */
51
+ export type DocumentChunkKey = {
52
+ chunk_id: string;
53
+ };
54
+ export type DocumentChunkStorage = IVectorStorage<Record<string, unknown>, typeof DocumentChunkSchema, DocumentChunk, DocumentChunkPrimaryKey>;
55
+ //# sourceMappingURL=DocumentChunkSchema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocumentChunkSchema.d.ts","sourceRoot":"","sources":["../../src/document-chunk/DocumentChunkSchema.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAA+C,KAAK,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE9F;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;CASS,CAAC;AAC1C,MAAM,MAAM,mBAAmB,GAAG,OAAO,mBAAmB,CAAC;AAE7D,eAAO,MAAM,uBAAuB,uBAAwB,CAAC;AAC7D,MAAM,MAAM,uBAAuB,GAAG,OAAO,uBAAuB,CAAC;AAErE,MAAM,WAAW,aAAa,CAC5B,QAAQ,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAClE,MAAM,SAAS,UAAU,GAAG,UAAU;IAEtC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,QAAQ,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,MAAM,mBAAmB,CAC7B,QAAQ,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAClE,MAAM,SAAS,UAAU,GAAG,UAAU,IACpC,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,UAAU,CAAC,GACnD,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC;AAE7D;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC;AAEpD,MAAM,MAAM,oBAAoB,GAAG,cAAc,CAC/C,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACvB,OAAO,mBAAmB,EAC1B,aAAa,EACb,uBAAuB,CACxB,CAAC"}