@workglow/ai 0.0.85 → 0.0.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -34
- package/dist/browser.js +3948 -1450
- package/dist/browser.js.map +50 -39
- package/dist/bun.js +3948 -1450
- package/dist/bun.js.map +50 -39
- package/dist/common.d.ts +0 -3
- package/dist/common.d.ts.map +1 -1
- package/dist/model/ModelRegistry.d.ts +2 -2
- package/dist/model/ModelRegistry.d.ts.map +1 -1
- package/dist/model/ModelRepository.d.ts +3 -3
- package/dist/model/ModelRepository.d.ts.map +1 -1
- package/dist/model/ModelSchema.d.ts +1 -1
- package/dist/node.js +3948 -1450
- package/dist/node.js.map +50 -39
- package/dist/provider/AiProviderRegistry.d.ts +1 -0
- package/dist/provider/AiProviderRegistry.d.ts.map +1 -1
- package/dist/task/BackgroundRemovalTask.d.ts +121 -289
- package/dist/task/BackgroundRemovalTask.d.ts.map +1 -1
- package/dist/task/ChunkRetrievalTask.d.ts +243 -0
- package/dist/task/ChunkRetrievalTask.d.ts.map +1 -0
- package/dist/task/ChunkToVectorTask.d.ts +183 -0
- package/dist/task/ChunkToVectorTask.d.ts.map +1 -0
- package/dist/task/ChunkVectorHybridSearchTask.d.ts +160 -0
- package/dist/task/ChunkVectorHybridSearchTask.d.ts.map +1 -0
- package/dist/task/ChunkVectorSearchTask.d.ts +137 -0
- package/dist/task/ChunkVectorSearchTask.d.ts.map +1 -0
- package/dist/task/ChunkVectorUpsertTask.d.ts +120 -0
- package/dist/task/ChunkVectorUpsertTask.d.ts.map +1 -0
- package/dist/task/ContextBuilderTask.d.ts +131 -0
- package/dist/task/ContextBuilderTask.d.ts.map +1 -0
- package/dist/task/DocumentEnricherTask.d.ts +232 -0
- package/dist/task/DocumentEnricherTask.d.ts.map +1 -0
- package/dist/task/DownloadModelTask.d.ts +80 -208
- package/dist/task/DownloadModelTask.d.ts.map +1 -1
- package/dist/task/FaceDetectorTask.d.ts +117 -272
- package/dist/task/FaceDetectorTask.d.ts.map +1 -1
- package/dist/task/FaceLandmarkerTask.d.ts +117 -272
- package/dist/task/FaceLandmarkerTask.d.ts.map +1 -1
- package/dist/task/GestureRecognizerTask.d.ts +129 -284
- package/dist/task/GestureRecognizerTask.d.ts.map +1 -1
- package/dist/task/HandLandmarkerTask.d.ts +125 -280
- package/dist/task/HandLandmarkerTask.d.ts.map +1 -1
- package/dist/task/HierarchicalChunkerTask.d.ts +212 -0
- package/dist/task/HierarchicalChunkerTask.d.ts.map +1 -0
- package/dist/task/HierarchyJoinTask.d.ts +318 -0
- package/dist/task/HierarchyJoinTask.d.ts.map +1 -0
- package/dist/task/ImageClassificationTask.d.ts +117 -272
- package/dist/task/ImageClassificationTask.d.ts.map +1 -1
- package/dist/task/ImageEmbeddingTask.d.ts +125 -446
- package/dist/task/ImageEmbeddingTask.d.ts.map +1 -1
- package/dist/task/ImageSegmentationTask.d.ts +117 -272
- package/dist/task/ImageSegmentationTask.d.ts.map +1 -1
- package/dist/task/ImageToTextTask.d.ts +117 -272
- package/dist/task/ImageToTextTask.d.ts.map +1 -1
- package/dist/task/ObjectDetectionTask.d.ts +119 -274
- package/dist/task/ObjectDetectionTask.d.ts.map +1 -1
- package/dist/task/PoseLandmarkerTask.d.ts +117 -272
- package/dist/task/PoseLandmarkerTask.d.ts.map +1 -1
- package/dist/task/QueryExpanderTask.d.ts +129 -0
- package/dist/task/QueryExpanderTask.d.ts.map +1 -0
- package/dist/task/RerankerTask.d.ts +209 -0
- package/dist/task/RerankerTask.d.ts.map +1 -0
- package/dist/task/StructuralParserTask.d.ts +91 -0
- package/dist/task/StructuralParserTask.d.ts.map +1 -0
- package/dist/task/TextChunkerTask.d.ts +129 -0
- package/dist/task/TextChunkerTask.d.ts.map +1 -0
- package/dist/task/TextClassificationTask.d.ts +42 -115
- package/dist/task/TextClassificationTask.d.ts.map +1 -1
- package/dist/task/TextEmbeddingTask.d.ts +55 -277
- package/dist/task/TextEmbeddingTask.d.ts.map +1 -1
- package/dist/task/TextFillMaskTask.d.ts +42 -115
- package/dist/task/TextFillMaskTask.d.ts.map +1 -1
- package/dist/task/TextGenerationTask.d.ts +44 -128
- package/dist/task/TextGenerationTask.d.ts.map +1 -1
- package/dist/task/TextLanguageDetectionTask.d.ts +42 -115
- package/dist/task/TextLanguageDetectionTask.d.ts.map +1 -1
- package/dist/task/TextNamedEntityRecognitionTask.d.ts +42 -115
- package/dist/task/TextNamedEntityRecognitionTask.d.ts.map +1 -1
- package/dist/task/TextQuestionAnswerTask.d.ts +47 -144
- package/dist/task/TextQuestionAnswerTask.d.ts.map +1 -1
- package/dist/task/TextRewriterTask.d.ts +45 -131
- package/dist/task/TextRewriterTask.d.ts.map +1 -1
- package/dist/task/TextSummaryTask.d.ts +42 -115
- package/dist/task/TextSummaryTask.d.ts.map +1 -1
- package/dist/task/TextTranslationTask.d.ts +54 -168
- package/dist/task/TextTranslationTask.d.ts.map +1 -1
- package/dist/task/TopicSegmenterTask.d.ts +148 -0
- package/dist/task/TopicSegmenterTask.d.ts.map +1 -0
- package/dist/task/UnloadModelTask.d.ts +80 -208
- package/dist/task/UnloadModelTask.d.ts.map +1 -1
- package/dist/task/VectorQuantizeTask.d.ts +120 -0
- package/dist/task/VectorQuantizeTask.d.ts.map +1 -0
- package/dist/task/VectorSimilarityTask.d.ts +18 -253
- package/dist/task/VectorSimilarityTask.d.ts.map +1 -1
- package/dist/task/base/AiTask.d.ts +24 -22
- package/dist/task/base/AiTask.d.ts.map +1 -1
- package/dist/task/base/AiTaskSchemas.d.ts +5 -129
- package/dist/task/base/AiTaskSchemas.d.ts.map +1 -1
- package/dist/task/base/AiVisionTask.d.ts +1 -4
- package/dist/task/base/AiVisionTask.d.ts.map +1 -1
- package/dist/task/index.d.ts +54 -1
- package/dist/task/index.d.ts.map +1 -1
- package/package.json +14 -9
- package/dist/source/Document.d.ts +0 -56
- package/dist/source/Document.d.ts.map +0 -1
- package/dist/source/DocumentConverter.d.ts +0 -15
- package/dist/source/DocumentConverter.d.ts.map +0 -1
- package/dist/source/DocumentConverterMarkdown.d.ts +0 -13
- package/dist/source/DocumentConverterMarkdown.d.ts.map +0 -1
- package/dist/source/DocumentConverterText.d.ts +0 -13
- package/dist/source/DocumentConverterText.d.ts.map +0 -1
- package/dist/source/MasterDocument.d.ts +0 -27
- package/dist/source/MasterDocument.d.ts.map +0 -1
- package/dist/source/index.d.ts +0 -10
- package/dist/source/index.d.ts.map +0 -1
- package/dist/task/DocumentSplitterTask.d.ts +0 -58
- package/dist/task/DocumentSplitterTask.d.ts.map +0 -1
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import { CreateWorkflow, IExecuteContext, JobQueueTaskConfig, Task } from "@workglow/task-graph";
|
|
7
|
+
import { DataPortSchema, FromSchema } from "@workglow/util";
|
|
8
|
+
declare const inputSchema: {
|
|
9
|
+
readonly type: "object";
|
|
10
|
+
readonly properties: {
|
|
11
|
+
readonly doc_id: {
|
|
12
|
+
readonly type: "string";
|
|
13
|
+
readonly title: "Document ID";
|
|
14
|
+
readonly description: "The ID of the document";
|
|
15
|
+
};
|
|
16
|
+
readonly documentTree: {
|
|
17
|
+
readonly title: "Document Tree";
|
|
18
|
+
readonly description: "The hierarchical document tree to chunk";
|
|
19
|
+
};
|
|
20
|
+
readonly maxTokens: {
|
|
21
|
+
readonly type: "number";
|
|
22
|
+
readonly title: "Max Tokens";
|
|
23
|
+
readonly description: "Maximum tokens per chunk";
|
|
24
|
+
readonly minimum: 50;
|
|
25
|
+
readonly default: 512;
|
|
26
|
+
};
|
|
27
|
+
readonly overlap: {
|
|
28
|
+
readonly type: "number";
|
|
29
|
+
readonly title: "Overlap";
|
|
30
|
+
readonly description: "Overlap in tokens between chunks";
|
|
31
|
+
readonly minimum: 0;
|
|
32
|
+
readonly default: 50;
|
|
33
|
+
};
|
|
34
|
+
readonly reservedTokens: {
|
|
35
|
+
readonly type: "number";
|
|
36
|
+
readonly title: "Reserved Tokens";
|
|
37
|
+
readonly description: "Reserved tokens for metadata/wrappers";
|
|
38
|
+
readonly minimum: 0;
|
|
39
|
+
readonly default: 10;
|
|
40
|
+
};
|
|
41
|
+
readonly strategy: {
|
|
42
|
+
readonly type: "string";
|
|
43
|
+
readonly enum: readonly ["hierarchical", "flat", "sentence"];
|
|
44
|
+
readonly title: "Chunking Strategy";
|
|
45
|
+
readonly description: "Strategy for chunking";
|
|
46
|
+
readonly default: "hierarchical";
|
|
47
|
+
};
|
|
48
|
+
};
|
|
49
|
+
readonly required: readonly [];
|
|
50
|
+
readonly additionalProperties: false;
|
|
51
|
+
};
|
|
52
|
+
declare const outputSchema: {
|
|
53
|
+
readonly type: "object";
|
|
54
|
+
readonly properties: {
|
|
55
|
+
readonly doc_id: {
|
|
56
|
+
readonly type: "string";
|
|
57
|
+
readonly title: "Document ID";
|
|
58
|
+
readonly description: "The document ID (passed through)";
|
|
59
|
+
};
|
|
60
|
+
readonly chunks: {
|
|
61
|
+
readonly type: "array";
|
|
62
|
+
readonly items: {
|
|
63
|
+
readonly type: "object";
|
|
64
|
+
readonly properties: {
|
|
65
|
+
readonly chunkId: {
|
|
66
|
+
readonly type: "string";
|
|
67
|
+
readonly title: "Chunk ID";
|
|
68
|
+
readonly description: "Unique identifier for this chunk";
|
|
69
|
+
};
|
|
70
|
+
readonly doc_id: {
|
|
71
|
+
readonly type: "string";
|
|
72
|
+
readonly title: "Document ID";
|
|
73
|
+
readonly description: "ID of the parent document";
|
|
74
|
+
};
|
|
75
|
+
readonly text: {
|
|
76
|
+
readonly type: "string";
|
|
77
|
+
readonly title: "Text";
|
|
78
|
+
readonly description: "Text content of the chunk";
|
|
79
|
+
};
|
|
80
|
+
readonly nodePath: {
|
|
81
|
+
readonly type: "array";
|
|
82
|
+
readonly items: {
|
|
83
|
+
readonly type: "string";
|
|
84
|
+
};
|
|
85
|
+
readonly title: "Node Path";
|
|
86
|
+
readonly description: "Node IDs from root to leaf";
|
|
87
|
+
};
|
|
88
|
+
readonly depth: {
|
|
89
|
+
readonly type: "integer";
|
|
90
|
+
readonly title: "Depth";
|
|
91
|
+
readonly description: "Depth in the document tree";
|
|
92
|
+
};
|
|
93
|
+
readonly enrichment: {
|
|
94
|
+
readonly type: "object";
|
|
95
|
+
readonly properties: {
|
|
96
|
+
readonly summary: {
|
|
97
|
+
readonly type: "string";
|
|
98
|
+
readonly title: "Summary";
|
|
99
|
+
readonly description: "Summary of the chunk content";
|
|
100
|
+
};
|
|
101
|
+
readonly entities: {
|
|
102
|
+
readonly type: "array";
|
|
103
|
+
readonly items: {
|
|
104
|
+
readonly type: "object";
|
|
105
|
+
readonly properties: {
|
|
106
|
+
readonly text: {
|
|
107
|
+
readonly type: "string";
|
|
108
|
+
readonly title: "Text";
|
|
109
|
+
readonly description: "Entity text";
|
|
110
|
+
};
|
|
111
|
+
readonly type: {
|
|
112
|
+
readonly type: "string";
|
|
113
|
+
readonly title: "Type";
|
|
114
|
+
readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
|
|
115
|
+
};
|
|
116
|
+
readonly score: {
|
|
117
|
+
readonly type: "number";
|
|
118
|
+
readonly title: "Score";
|
|
119
|
+
readonly description: "Confidence score";
|
|
120
|
+
};
|
|
121
|
+
};
|
|
122
|
+
readonly required: readonly ["text", "type", "score"];
|
|
123
|
+
readonly additionalProperties: false;
|
|
124
|
+
};
|
|
125
|
+
readonly title: "Entities";
|
|
126
|
+
readonly description: "Named entities extracted from the chunk";
|
|
127
|
+
};
|
|
128
|
+
};
|
|
129
|
+
readonly additionalProperties: false;
|
|
130
|
+
};
|
|
131
|
+
};
|
|
132
|
+
readonly required: readonly ["chunkId", "doc_id", "text", "nodePath", "depth"];
|
|
133
|
+
readonly additionalProperties: false;
|
|
134
|
+
};
|
|
135
|
+
readonly title: "Chunks";
|
|
136
|
+
readonly description: "Array of chunk nodes";
|
|
137
|
+
};
|
|
138
|
+
readonly text: {
|
|
139
|
+
readonly type: "array";
|
|
140
|
+
readonly items: {
|
|
141
|
+
readonly type: "string";
|
|
142
|
+
};
|
|
143
|
+
readonly title: "Texts";
|
|
144
|
+
readonly description: "Chunk texts (for TextEmbeddingTask)";
|
|
145
|
+
};
|
|
146
|
+
readonly count: {
|
|
147
|
+
readonly type: "number";
|
|
148
|
+
readonly title: "Count";
|
|
149
|
+
readonly description: "Number of chunks generated";
|
|
150
|
+
};
|
|
151
|
+
};
|
|
152
|
+
readonly required: readonly ["doc_id", "chunks", "text", "count"];
|
|
153
|
+
readonly additionalProperties: false;
|
|
154
|
+
};
|
|
155
|
+
export type HierarchicalChunkerTaskInput = FromSchema<typeof inputSchema>;
|
|
156
|
+
export type HierarchicalChunkerTaskOutput = FromSchema<typeof outputSchema>;
|
|
157
|
+
/**
|
|
158
|
+
* Task for hierarchical chunking that respects token budgets and document structure
|
|
159
|
+
*/
|
|
160
|
+
export declare class HierarchicalChunkerTask extends Task<HierarchicalChunkerTaskInput, HierarchicalChunkerTaskOutput, JobQueueTaskConfig> {
|
|
161
|
+
static type: string;
|
|
162
|
+
static category: string;
|
|
163
|
+
static title: string;
|
|
164
|
+
static description: string;
|
|
165
|
+
static cacheable: boolean;
|
|
166
|
+
static inputSchema(): DataPortSchema;
|
|
167
|
+
static outputSchema(): DataPortSchema;
|
|
168
|
+
execute(input: HierarchicalChunkerTaskInput, context: IExecuteContext): Promise<HierarchicalChunkerTaskOutput>;
|
|
169
|
+
/**
|
|
170
|
+
* Hierarchical chunking that respects document structure
|
|
171
|
+
*/
|
|
172
|
+
private chunkHierarchically;
|
|
173
|
+
/**
|
|
174
|
+
* Chunk a single text string
|
|
175
|
+
*/
|
|
176
|
+
private chunkText;
|
|
177
|
+
/**
|
|
178
|
+
* Flat chunking (ignores hierarchy)
|
|
179
|
+
*/
|
|
180
|
+
private chunkFlat;
|
|
181
|
+
/**
|
|
182
|
+
* Collect all text from a node and its descendants
|
|
183
|
+
*/
|
|
184
|
+
private collectAllText;
|
|
185
|
+
}
|
|
186
|
+
export declare const hierarchicalChunker: (input: HierarchicalChunkerTaskInput, config?: JobQueueTaskConfig) => Promise<{
|
|
187
|
+
doc_id: string;
|
|
188
|
+
chunks: {
|
|
189
|
+
enrichment?: {
|
|
190
|
+
summary?: string | undefined;
|
|
191
|
+
entities?: {
|
|
192
|
+
type: string;
|
|
193
|
+
text: string;
|
|
194
|
+
score: number;
|
|
195
|
+
}[] | undefined;
|
|
196
|
+
} | undefined;
|
|
197
|
+
doc_id: string;
|
|
198
|
+
chunkId: string;
|
|
199
|
+
text: string;
|
|
200
|
+
nodePath: string[];
|
|
201
|
+
depth: number;
|
|
202
|
+
}[];
|
|
203
|
+
text: string[];
|
|
204
|
+
count: number;
|
|
205
|
+
}>;
|
|
206
|
+
declare module "@workglow/task-graph" {
|
|
207
|
+
interface Workflow {
|
|
208
|
+
hierarchicalChunker: CreateWorkflow<HierarchicalChunkerTaskInput, HierarchicalChunkerTaskOutput, JobQueueTaskConfig>;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
export {};
|
|
212
|
+
//# sourceMappingURL=HierarchicalChunkerTask.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HierarchicalChunkerTask.d.ts","sourceRoot":"","sources":["../../src/task/HierarchicalChunkerTask.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAWH,OAAO,EACL,cAAc,EACd,eAAe,EACf,kBAAkB,EAClB,IAAI,EAEL,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,cAAc,EAAE,UAAU,EAAS,MAAM,gBAAgB,CAAC;AAEnE,QAAA,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2CkB,CAAC;AAEpC,QAAA,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4BiB,CAAC;AAEpC,MAAM,MAAM,4BAA4B,GAAG,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC;AAC1E,MAAM,MAAM,6BAA6B,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;AAE5E;;GAEG;AACH,qBAAa,uBAAwB,SAAQ,IAAI,CAC/C,4BAA4B,EAC5B,6BAA6B,EAC7B,kBAAkB,CACnB;IACC,OAAc,IAAI,SAA6B;IAC/C,OAAc,QAAQ,SAAc;IACpC,OAAc,KAAK,SAA0B;IAC7C,OAAc,WAAW,SAA6D;IACtF,OAAc,SAAS,UAAQ;WAEjB,WAAW,IAAI,cAAc;WAI7B,YAAY,IAAI,cAAc;IAItC,OAAO,CACX,KAAK,EAAE,4BAA4B,EACnC,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,6BAA6B,CAAC;IAwCzC;;OAEG;YACW,mBAAmB;IAsBjC;;OAEG;YACW,SAAS;IAoDvB;;OAEG;YACW,SAAS;IAWvB;;OAEG;IACH,OAAO,CAAC,cAAc;CAgBvB;AAED,eAAO,MAAM,mBAAmB,GAC9B,OAAO,4BAA4B,EACnC,SAAS,kBAAkB;;;;;;;;;;;;;;;;;;;EAG5B,CAAC;AAEF,OAAO,QAAQ,sBAAsB,CAAC;IACpC,UAAU,QAAQ;QAChB,mBAAmB,EAAE,cAAc,CACjC,4BAA4B,EAC5B,6BAA6B,EAC7B,kBAAkB,CACnB,CAAC;KACH;CACF"}
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license
|
|
3
|
+
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
|
|
4
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
*/
|
|
6
|
+
import { CreateWorkflow, IExecuteContext, JobQueueTaskConfig, Task } from "@workglow/task-graph";
|
|
7
|
+
import { DataPortSchema, FromSchema } from "@workglow/util";
|
|
8
|
+
declare const inputSchema: {
|
|
9
|
+
readonly type: "object";
|
|
10
|
+
readonly properties: {
|
|
11
|
+
readonly documents: {
|
|
12
|
+
readonly title: "Document Dataset";
|
|
13
|
+
readonly description: "Dataset ID or instance for document data storage";
|
|
14
|
+
} & {
|
|
15
|
+
title: string;
|
|
16
|
+
description: string;
|
|
17
|
+
} & {
|
|
18
|
+
readonly format: "dataset:document";
|
|
19
|
+
readonly anyOf: readonly [{
|
|
20
|
+
readonly type: "string";
|
|
21
|
+
readonly title: "Dataset ID";
|
|
22
|
+
}, {
|
|
23
|
+
readonly title: "Dataset Instance";
|
|
24
|
+
readonly additionalProperties: true;
|
|
25
|
+
}];
|
|
26
|
+
};
|
|
27
|
+
readonly chunks: {
|
|
28
|
+
readonly type: "array";
|
|
29
|
+
readonly items: {
|
|
30
|
+
readonly type: "string";
|
|
31
|
+
};
|
|
32
|
+
readonly title: "Chunks";
|
|
33
|
+
readonly description: "Retrieved text chunks";
|
|
34
|
+
};
|
|
35
|
+
readonly chunk_ids: {
|
|
36
|
+
readonly type: "array";
|
|
37
|
+
readonly items: {
|
|
38
|
+
readonly type: "string";
|
|
39
|
+
};
|
|
40
|
+
readonly title: "Chunk IDs";
|
|
41
|
+
readonly description: "IDs of retrieved chunks";
|
|
42
|
+
};
|
|
43
|
+
readonly metadata: {
|
|
44
|
+
readonly type: "array";
|
|
45
|
+
readonly items: {
|
|
46
|
+
readonly type: "object";
|
|
47
|
+
readonly properties: {
|
|
48
|
+
readonly doc_id: {
|
|
49
|
+
readonly type: "string";
|
|
50
|
+
readonly title: "Document ID";
|
|
51
|
+
readonly description: "ID of the parent document";
|
|
52
|
+
};
|
|
53
|
+
readonly chunkId: {
|
|
54
|
+
readonly type: "string";
|
|
55
|
+
readonly title: "Chunk ID";
|
|
56
|
+
readonly description: "Unique identifier for this chunk";
|
|
57
|
+
};
|
|
58
|
+
readonly leafNodeId: {
|
|
59
|
+
readonly type: "string";
|
|
60
|
+
readonly title: "Leaf Node ID";
|
|
61
|
+
readonly description: "ID of the leaf node this chunk belongs to";
|
|
62
|
+
};
|
|
63
|
+
readonly depth: {
|
|
64
|
+
readonly type: "integer";
|
|
65
|
+
readonly title: "Depth";
|
|
66
|
+
readonly description: "Depth in the document tree";
|
|
67
|
+
};
|
|
68
|
+
readonly text: {
|
|
69
|
+
readonly type: "string";
|
|
70
|
+
readonly title: "Text";
|
|
71
|
+
readonly description: "Text content of the chunk";
|
|
72
|
+
};
|
|
73
|
+
readonly nodePath: {
|
|
74
|
+
readonly type: "array";
|
|
75
|
+
readonly items: {
|
|
76
|
+
readonly type: "string";
|
|
77
|
+
};
|
|
78
|
+
readonly title: "Node Path";
|
|
79
|
+
readonly description: "Node IDs from root to leaf";
|
|
80
|
+
};
|
|
81
|
+
readonly summary: {
|
|
82
|
+
readonly type: "string";
|
|
83
|
+
readonly title: "Summary";
|
|
84
|
+
readonly description: "Summary of the chunk content";
|
|
85
|
+
};
|
|
86
|
+
readonly entities: {
|
|
87
|
+
readonly type: "array";
|
|
88
|
+
readonly items: {
|
|
89
|
+
readonly type: "object";
|
|
90
|
+
readonly properties: {
|
|
91
|
+
readonly text: {
|
|
92
|
+
readonly type: "string";
|
|
93
|
+
readonly title: "Text";
|
|
94
|
+
readonly description: "Entity text";
|
|
95
|
+
};
|
|
96
|
+
readonly type: {
|
|
97
|
+
readonly type: "string";
|
|
98
|
+
readonly title: "Type";
|
|
99
|
+
readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
|
|
100
|
+
};
|
|
101
|
+
readonly score: {
|
|
102
|
+
readonly type: "number";
|
|
103
|
+
readonly title: "Score";
|
|
104
|
+
readonly description: "Confidence score";
|
|
105
|
+
};
|
|
106
|
+
};
|
|
107
|
+
readonly required: readonly ["text", "type", "score"];
|
|
108
|
+
readonly additionalProperties: false;
|
|
109
|
+
};
|
|
110
|
+
readonly title: "Entities";
|
|
111
|
+
readonly description: "Named entities extracted from the chunk";
|
|
112
|
+
};
|
|
113
|
+
};
|
|
114
|
+
readonly required: readonly ["doc_id", "chunkId", "leafNodeId", "depth", "text", "nodePath"];
|
|
115
|
+
readonly additionalProperties: true;
|
|
116
|
+
};
|
|
117
|
+
readonly title: "Chunk Metadata";
|
|
118
|
+
readonly description: "Metadata for each chunk";
|
|
119
|
+
};
|
|
120
|
+
readonly scores: {
|
|
121
|
+
readonly type: "array";
|
|
122
|
+
readonly items: {
|
|
123
|
+
readonly type: "number";
|
|
124
|
+
};
|
|
125
|
+
readonly title: "Scores";
|
|
126
|
+
readonly description: "Similarity scores for each result";
|
|
127
|
+
};
|
|
128
|
+
readonly includeParentSummaries: {
|
|
129
|
+
readonly type: "boolean";
|
|
130
|
+
readonly title: "Include Parent Summaries";
|
|
131
|
+
readonly description: "Whether to include summaries from parent nodes";
|
|
132
|
+
readonly default: true;
|
|
133
|
+
};
|
|
134
|
+
readonly includeEntities: {
|
|
135
|
+
readonly type: "boolean";
|
|
136
|
+
readonly title: "Include Entities";
|
|
137
|
+
readonly description: "Whether to include entities from the node hierarchy";
|
|
138
|
+
readonly default: true;
|
|
139
|
+
};
|
|
140
|
+
};
|
|
141
|
+
readonly required: readonly ["documents", "chunks", "chunk_ids", "metadata", "scores"];
|
|
142
|
+
readonly additionalProperties: false;
|
|
143
|
+
};
|
|
144
|
+
declare const outputSchema: {
|
|
145
|
+
readonly type: "object";
|
|
146
|
+
readonly properties: {
|
|
147
|
+
readonly chunks: {
|
|
148
|
+
readonly type: "array";
|
|
149
|
+
readonly items: {
|
|
150
|
+
readonly type: "string";
|
|
151
|
+
};
|
|
152
|
+
readonly title: "Chunks";
|
|
153
|
+
readonly description: "Retrieved text chunks";
|
|
154
|
+
};
|
|
155
|
+
readonly chunk_ids: {
|
|
156
|
+
readonly type: "array";
|
|
157
|
+
readonly items: {
|
|
158
|
+
readonly type: "string";
|
|
159
|
+
};
|
|
160
|
+
readonly title: "Chunk IDs";
|
|
161
|
+
readonly description: "IDs of retrieved chunks";
|
|
162
|
+
};
|
|
163
|
+
readonly metadata: {
|
|
164
|
+
readonly type: "array";
|
|
165
|
+
readonly items: {
|
|
166
|
+
readonly type: "object";
|
|
167
|
+
readonly properties: {
|
|
168
|
+
readonly doc_id: {
|
|
169
|
+
readonly type: "string";
|
|
170
|
+
readonly title: "Document ID";
|
|
171
|
+
readonly description: "ID of the parent document";
|
|
172
|
+
};
|
|
173
|
+
readonly chunkId: {
|
|
174
|
+
readonly type: "string";
|
|
175
|
+
readonly title: "Chunk ID";
|
|
176
|
+
readonly description: "Unique identifier for this chunk";
|
|
177
|
+
};
|
|
178
|
+
readonly leafNodeId: {
|
|
179
|
+
readonly type: "string";
|
|
180
|
+
readonly title: "Leaf Node ID";
|
|
181
|
+
readonly description: "ID of the leaf node this chunk belongs to";
|
|
182
|
+
};
|
|
183
|
+
readonly depth: {
|
|
184
|
+
readonly type: "integer";
|
|
185
|
+
readonly title: "Depth";
|
|
186
|
+
readonly description: "Depth in the document tree";
|
|
187
|
+
};
|
|
188
|
+
readonly text: {
|
|
189
|
+
readonly type: "string";
|
|
190
|
+
readonly title: "Text";
|
|
191
|
+
readonly description: "Text content of the chunk";
|
|
192
|
+
};
|
|
193
|
+
readonly nodePath: {
|
|
194
|
+
readonly type: "array";
|
|
195
|
+
readonly items: {
|
|
196
|
+
readonly type: "string";
|
|
197
|
+
};
|
|
198
|
+
readonly title: "Node Path";
|
|
199
|
+
readonly description: "Node IDs from root to leaf";
|
|
200
|
+
};
|
|
201
|
+
readonly summary: {
|
|
202
|
+
readonly type: "string";
|
|
203
|
+
readonly title: "Summary";
|
|
204
|
+
readonly description: "Summary of the chunk content";
|
|
205
|
+
};
|
|
206
|
+
readonly entities: {
|
|
207
|
+
readonly type: "array";
|
|
208
|
+
readonly items: {
|
|
209
|
+
readonly type: "object";
|
|
210
|
+
readonly properties: {
|
|
211
|
+
readonly text: {
|
|
212
|
+
readonly type: "string";
|
|
213
|
+
readonly title: "Text";
|
|
214
|
+
readonly description: "Entity text";
|
|
215
|
+
};
|
|
216
|
+
readonly type: {
|
|
217
|
+
readonly type: "string";
|
|
218
|
+
readonly title: "Type";
|
|
219
|
+
readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
|
|
220
|
+
};
|
|
221
|
+
readonly score: {
|
|
222
|
+
readonly type: "number";
|
|
223
|
+
readonly title: "Score";
|
|
224
|
+
readonly description: "Confidence score";
|
|
225
|
+
};
|
|
226
|
+
};
|
|
227
|
+
readonly required: readonly ["text", "type", "score"];
|
|
228
|
+
readonly additionalProperties: false;
|
|
229
|
+
};
|
|
230
|
+
readonly title: "Entities";
|
|
231
|
+
readonly description: "Named entities (rolled up from hierarchy)";
|
|
232
|
+
};
|
|
233
|
+
readonly parentSummaries: {
|
|
234
|
+
readonly type: "array";
|
|
235
|
+
readonly items: {
|
|
236
|
+
readonly type: "string";
|
|
237
|
+
};
|
|
238
|
+
readonly title: "Parent Summaries";
|
|
239
|
+
readonly description: "Summaries from ancestor nodes";
|
|
240
|
+
};
|
|
241
|
+
readonly sectionTitles: {
|
|
242
|
+
readonly type: "array";
|
|
243
|
+
readonly items: {
|
|
244
|
+
readonly type: "string";
|
|
245
|
+
};
|
|
246
|
+
readonly title: "Section Titles";
|
|
247
|
+
readonly description: "Titles of ancestor section nodes";
|
|
248
|
+
};
|
|
249
|
+
};
|
|
250
|
+
readonly required: readonly ["doc_id", "chunkId", "leafNodeId", "depth", "text", "nodePath"];
|
|
251
|
+
readonly additionalProperties: true;
|
|
252
|
+
};
|
|
253
|
+
readonly title: "Enriched Metadata";
|
|
254
|
+
readonly description: "Metadata enriched with hierarchy information";
|
|
255
|
+
};
|
|
256
|
+
readonly scores: {
|
|
257
|
+
readonly type: "array";
|
|
258
|
+
readonly items: {
|
|
259
|
+
readonly type: "number";
|
|
260
|
+
};
|
|
261
|
+
readonly title: "Scores";
|
|
262
|
+
readonly description: "Similarity scores";
|
|
263
|
+
};
|
|
264
|
+
readonly count: {
|
|
265
|
+
readonly type: "number";
|
|
266
|
+
readonly title: "Count";
|
|
267
|
+
readonly description: "Number of results";
|
|
268
|
+
};
|
|
269
|
+
};
|
|
270
|
+
readonly required: readonly ["chunks", "chunk_ids", "metadata", "scores", "count"];
|
|
271
|
+
readonly additionalProperties: false;
|
|
272
|
+
};
|
|
273
|
+
export type HierarchyJoinTaskInput = FromSchema<typeof inputSchema>;
|
|
274
|
+
export type HierarchyJoinTaskOutput = FromSchema<typeof outputSchema>;
|
|
275
|
+
/**
|
|
276
|
+
* Task for enriching search results with hierarchy information
|
|
277
|
+
* Joins chunk IDs back to document repository to get parent summaries and entities
|
|
278
|
+
*/
|
|
279
|
+
export declare class HierarchyJoinTask extends Task<HierarchyJoinTaskInput, HierarchyJoinTaskOutput, JobQueueTaskConfig> {
|
|
280
|
+
static type: string;
|
|
281
|
+
static category: string;
|
|
282
|
+
static title: string;
|
|
283
|
+
static description: string;
|
|
284
|
+
static cacheable: boolean;
|
|
285
|
+
static inputSchema(): DataPortSchema;
|
|
286
|
+
static outputSchema(): DataPortSchema;
|
|
287
|
+
execute(input: HierarchyJoinTaskInput, context: IExecuteContext): Promise<HierarchyJoinTaskOutput>;
|
|
288
|
+
}
|
|
289
|
+
export declare const hierarchyJoin: (input: HierarchyJoinTaskInput, config?: JobQueueTaskConfig) => Promise<{
|
|
290
|
+
metadata: {
|
|
291
|
+
[x: string]: unknown;
|
|
292
|
+
summary?: string | undefined;
|
|
293
|
+
entities?: {
|
|
294
|
+
type: string;
|
|
295
|
+
text: string;
|
|
296
|
+
score: number;
|
|
297
|
+
}[] | undefined;
|
|
298
|
+
parentSummaries?: string[] | undefined;
|
|
299
|
+
sectionTitles?: string[] | undefined;
|
|
300
|
+
doc_id: string;
|
|
301
|
+
chunkId: string;
|
|
302
|
+
text: string;
|
|
303
|
+
nodePath: string[];
|
|
304
|
+
depth: number;
|
|
305
|
+
leafNodeId: string;
|
|
306
|
+
}[];
|
|
307
|
+
chunks: string[];
|
|
308
|
+
scores: number[];
|
|
309
|
+
chunk_ids: string[];
|
|
310
|
+
count: number;
|
|
311
|
+
}>;
|
|
312
|
+
declare module "@workglow/task-graph" {
|
|
313
|
+
interface Workflow {
|
|
314
|
+
hierarchyJoin: CreateWorkflow<HierarchyJoinTaskInput, HierarchyJoinTaskOutput, JobQueueTaskConfig>;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
export {};
|
|
318
|
+
//# sourceMappingURL=HierarchyJoinTask.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HierarchyJoinTask.d.ts","sourceRoot":"","sources":["../../src/task/HierarchyJoinTask.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AASH,OAAO,EACL,cAAc,EACd,eAAe,EACf,kBAAkB,EAClB,IAAI,EAEL,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE5D,QAAA,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAyCkB,CAAC;AAEpC,QAAA,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8BiB,CAAC;AAEpC,MAAM,MAAM,sBAAsB,GAAG,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC;AACpE,MAAM,MAAM,uBAAuB,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;AAEtE;;;GAGG;AACH,qBAAa,iBAAkB,SAAQ,IAAI,CACzC,sBAAsB,EACtB,uBAAuB,EACvB,kBAAkB,CACnB;IACC,OAAc,IAAI,SAAuB;IACzC,OAAc,QAAQ,SAAS;IAC/B,OAAc,KAAK,SAAoB;IACvC,OAAc,WAAW,SAA2D;IACpF,OAAc,SAAS,UAAS;WAElB,WAAW,IAAI,cAAc;WAI7B,YAAY,IAAI,cAAc;IAItC,OAAO,CACX,KAAK,EAAE,sBAAsB,EAC7B,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,uBAAuB,CAAC;CAsGpC;AAED,eAAO,MAAM,aAAa,GAAI,OAAO,sBAAsB,EAAE,SAAS,kBAAkB;;;;;;;;;;;;;;;;;;;;;;EAEvF,CAAC;AAEF,OAAO,QAAQ,sBAAsB,CAAC;IACpC,UAAU,QAAQ;QAChB,aAAa,EAAE,cAAc,CAC3B,sBAAsB,EACtB,uBAAuB,EACvB,kBAAkB,CACnB,CAAC;KACH;CACF"}
|