@workglow/dataset 0.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +1134 -0
  3. package/dist/browser.js +1053 -0
  4. package/dist/browser.js.map +20 -0
  5. package/dist/bun.js +1054 -0
  6. package/dist/bun.js.map +20 -0
  7. package/dist/common-server.d.ts +7 -0
  8. package/dist/common-server.d.ts.map +1 -0
  9. package/dist/common.d.ts +17 -0
  10. package/dist/common.d.ts.map +1 -0
  11. package/dist/document/Document.d.ts +50 -0
  12. package/dist/document/Document.d.ts.map +1 -0
  13. package/dist/document/DocumentDataset.d.ts +79 -0
  14. package/dist/document/DocumentDataset.d.ts.map +1 -0
  15. package/dist/document/DocumentDatasetRegistry.d.ts +29 -0
  16. package/dist/document/DocumentDatasetRegistry.d.ts.map +1 -0
  17. package/dist/document/DocumentNode.d.ts +31 -0
  18. package/dist/document/DocumentNode.d.ts.map +1 -0
  19. package/dist/document/DocumentSchema.d.ts +1668 -0
  20. package/dist/document/DocumentSchema.d.ts.map +1 -0
  21. package/dist/document/DocumentStorageSchema.d.ts +43 -0
  22. package/dist/document/DocumentStorageSchema.d.ts.map +1 -0
  23. package/dist/document/StructuralParser.d.ts +30 -0
  24. package/dist/document/StructuralParser.d.ts.map +1 -0
  25. package/dist/document-chunk/DocumentChunkDataset.d.ts +79 -0
  26. package/dist/document-chunk/DocumentChunkDataset.d.ts.map +1 -0
  27. package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts +29 -0
  28. package/dist/document-chunk/DocumentChunkDatasetRegistry.d.ts.map +1 -0
  29. package/dist/document-chunk/DocumentChunkSchema.d.ts +55 -0
  30. package/dist/document-chunk/DocumentChunkSchema.d.ts.map +1 -0
  31. package/dist/node.js +1053 -0
  32. package/dist/node.js.map +20 -0
  33. package/dist/types.d.ts +7 -0
  34. package/dist/types.d.ts.map +1 -0
  35. package/dist/util/DatasetSchema.d.ts +85 -0
  36. package/dist/util/DatasetSchema.d.ts.map +1 -0
  37. package/package.json +54 -0
  38. package/src/document-chunk/README.md +362 -0
@@ -0,0 +1,1668 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Steven Roussey <sroussey@gmail.com>
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import type { FromSchema } from "@workglow/util";
7
+ /**
8
+ * Node kind discriminator for hierarchical document structure
9
+ */
10
+ export declare const NodeKind: {
11
+ readonly DOCUMENT: "document";
12
+ readonly SECTION: "section";
13
+ readonly PARAGRAPH: "paragraph";
14
+ readonly SENTENCE: "sentence";
15
+ readonly TOPIC: "topic";
16
+ };
17
+ export type NodeKind = (typeof NodeKind)[keyof typeof NodeKind];
18
+ /**
19
+ * Schema for source range of a node (character offsets)
20
+ */
21
+ export declare const NodeRangeSchema: {
22
+ readonly type: "object";
23
+ readonly properties: {
24
+ readonly startOffset: {
25
+ readonly type: "integer";
26
+ readonly title: "Start Offset";
27
+ readonly description: "Starting character offset";
28
+ };
29
+ readonly endOffset: {
30
+ readonly type: "integer";
31
+ readonly title: "End Offset";
32
+ readonly description: "Ending character offset";
33
+ };
34
+ };
35
+ readonly required: readonly ["startOffset", "endOffset"];
36
+ readonly additionalProperties: false;
37
+ };
38
+ export type NodeRange = FromSchema<typeof NodeRangeSchema>;
39
+ /**
40
+ * Schema for named entity extracted from text
41
+ */
42
+ export declare const EntitySchema: {
43
+ readonly type: "object";
44
+ readonly properties: {
45
+ readonly text: {
46
+ readonly type: "string";
47
+ readonly title: "Text";
48
+ readonly description: "Entity text";
49
+ };
50
+ readonly type: {
51
+ readonly type: "string";
52
+ readonly title: "Type";
53
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
54
+ };
55
+ readonly score: {
56
+ readonly type: "number";
57
+ readonly title: "Score";
58
+ readonly description: "Confidence score";
59
+ };
60
+ };
61
+ readonly required: readonly ["text", "type", "score"];
62
+ readonly additionalProperties: false;
63
+ };
64
+ export type Entity = FromSchema<typeof EntitySchema>;
65
+ /**
66
+ * Schema for enrichment data attached to a node
67
+ */
68
+ export declare const NodeEnrichmentSchema: {
69
+ readonly type: "object";
70
+ readonly properties: {
71
+ readonly summary: {
72
+ readonly type: "string";
73
+ readonly title: "Summary";
74
+ readonly description: "Summary of the node content";
75
+ };
76
+ readonly entities: {
77
+ readonly type: "array";
78
+ readonly items: {
79
+ readonly type: "object";
80
+ readonly properties: {
81
+ readonly text: {
82
+ readonly type: "string";
83
+ readonly title: "Text";
84
+ readonly description: "Entity text";
85
+ };
86
+ readonly type: {
87
+ readonly type: "string";
88
+ readonly title: "Type";
89
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
90
+ };
91
+ readonly score: {
92
+ readonly type: "number";
93
+ readonly title: "Score";
94
+ readonly description: "Confidence score";
95
+ };
96
+ };
97
+ readonly required: readonly ["text", "type", "score"];
98
+ readonly additionalProperties: false;
99
+ };
100
+ readonly title: "Entities";
101
+ readonly description: "Named entities extracted from the node";
102
+ };
103
+ readonly keywords: {
104
+ readonly type: "array";
105
+ readonly items: {
106
+ readonly type: "string";
107
+ };
108
+ readonly title: "Keywords";
109
+ readonly description: "Keywords associated with the node";
110
+ };
111
+ };
112
+ readonly additionalProperties: false;
113
+ };
114
+ export type NodeEnrichment = FromSchema<typeof NodeEnrichmentSchema>;
115
+ /**
116
+ * Schema for base document node fields (used for runtime validation)
117
+ * Note: Individual node types and DocumentNode union are defined as interfaces
118
+ * below because FromSchema cannot properly infer recursive discriminated unions.
119
+ */
120
+ export declare const DocumentNodeBaseSchema: {
121
+ readonly type: "object";
122
+ readonly properties: {
123
+ readonly nodeId: {
124
+ readonly type: "string";
125
+ readonly title: "Node ID";
126
+ readonly description: "Unique identifier for this node";
127
+ };
128
+ readonly kind: {
129
+ readonly type: "string";
130
+ readonly enum: ("document" | "section" | "paragraph" | "sentence" | "topic")[];
131
+ readonly title: "Kind";
132
+ readonly description: "Node type discriminator";
133
+ };
134
+ readonly range: {
135
+ readonly type: "object";
136
+ readonly properties: {
137
+ readonly startOffset: {
138
+ readonly type: "integer";
139
+ readonly title: "Start Offset";
140
+ readonly description: "Starting character offset";
141
+ };
142
+ readonly endOffset: {
143
+ readonly type: "integer";
144
+ readonly title: "End Offset";
145
+ readonly description: "Ending character offset";
146
+ };
147
+ };
148
+ readonly required: readonly ["startOffset", "endOffset"];
149
+ readonly additionalProperties: false;
150
+ };
151
+ readonly text: {
152
+ readonly type: "string";
153
+ readonly title: "Text";
154
+ readonly description: "Text content of the node";
155
+ };
156
+ readonly enrichment: {
157
+ readonly type: "object";
158
+ readonly properties: {
159
+ readonly summary: {
160
+ readonly type: "string";
161
+ readonly title: "Summary";
162
+ readonly description: "Summary of the node content";
163
+ };
164
+ readonly entities: {
165
+ readonly type: "array";
166
+ readonly items: {
167
+ readonly type: "object";
168
+ readonly properties: {
169
+ readonly text: {
170
+ readonly type: "string";
171
+ readonly title: "Text";
172
+ readonly description: "Entity text";
173
+ };
174
+ readonly type: {
175
+ readonly type: "string";
176
+ readonly title: "Type";
177
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
178
+ };
179
+ readonly score: {
180
+ readonly type: "number";
181
+ readonly title: "Score";
182
+ readonly description: "Confidence score";
183
+ };
184
+ };
185
+ readonly required: readonly ["text", "type", "score"];
186
+ readonly additionalProperties: false;
187
+ };
188
+ readonly title: "Entities";
189
+ readonly description: "Named entities extracted from the node";
190
+ };
191
+ readonly keywords: {
192
+ readonly type: "array";
193
+ readonly items: {
194
+ readonly type: "string";
195
+ };
196
+ readonly title: "Keywords";
197
+ readonly description: "Keywords associated with the node";
198
+ };
199
+ };
200
+ readonly additionalProperties: false;
201
+ };
202
+ };
203
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
204
+ readonly additionalProperties: true;
205
+ };
206
+ /**
207
+ * Schema for document node (generic, for runtime validation)
208
+ * This is a simplified schema for task input/output validation.
209
+ * The actual TypeScript types use a proper discriminated union.
210
+ */
211
+ export declare const DocumentNodeSchema: {
212
+ readonly type: "object";
213
+ readonly title: "Document Node";
214
+ readonly description: "A node in the hierarchical document tree";
215
+ readonly properties: {
216
+ readonly level: {
217
+ readonly type: "integer";
218
+ readonly title: "Level";
219
+ readonly description: "Header level for section nodes";
220
+ };
221
+ readonly title: {
222
+ readonly type: "string";
223
+ readonly title: "Title";
224
+ readonly description: "Section title";
225
+ };
226
+ readonly children: {
227
+ readonly type: "array";
228
+ readonly title: "Children";
229
+ readonly description: "Child nodes";
230
+ };
231
+ readonly nodeId: {
232
+ readonly type: "string";
233
+ readonly title: "Node ID";
234
+ readonly description: "Unique identifier for this node";
235
+ };
236
+ readonly kind: {
237
+ readonly type: "string";
238
+ readonly enum: ("document" | "section" | "paragraph" | "sentence" | "topic")[];
239
+ readonly title: "Kind";
240
+ readonly description: "Node type discriminator";
241
+ };
242
+ readonly range: {
243
+ readonly type: "object";
244
+ readonly properties: {
245
+ readonly startOffset: {
246
+ readonly type: "integer";
247
+ readonly title: "Start Offset";
248
+ readonly description: "Starting character offset";
249
+ };
250
+ readonly endOffset: {
251
+ readonly type: "integer";
252
+ readonly title: "End Offset";
253
+ readonly description: "Ending character offset";
254
+ };
255
+ };
256
+ readonly required: readonly ["startOffset", "endOffset"];
257
+ readonly additionalProperties: false;
258
+ };
259
+ readonly text: {
260
+ readonly type: "string";
261
+ readonly title: "Text";
262
+ readonly description: "Text content of the node";
263
+ };
264
+ readonly enrichment: {
265
+ readonly type: "object";
266
+ readonly properties: {
267
+ readonly summary: {
268
+ readonly type: "string";
269
+ readonly title: "Summary";
270
+ readonly description: "Summary of the node content";
271
+ };
272
+ readonly entities: {
273
+ readonly type: "array";
274
+ readonly items: {
275
+ readonly type: "object";
276
+ readonly properties: {
277
+ readonly text: {
278
+ readonly type: "string";
279
+ readonly title: "Text";
280
+ readonly description: "Entity text";
281
+ };
282
+ readonly type: {
283
+ readonly type: "string";
284
+ readonly title: "Type";
285
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
286
+ };
287
+ readonly score: {
288
+ readonly type: "number";
289
+ readonly title: "Score";
290
+ readonly description: "Confidence score";
291
+ };
292
+ };
293
+ readonly required: readonly ["text", "type", "score"];
294
+ readonly additionalProperties: false;
295
+ };
296
+ readonly title: "Entities";
297
+ readonly description: "Named entities extracted from the node";
298
+ };
299
+ readonly keywords: {
300
+ readonly type: "array";
301
+ readonly items: {
302
+ readonly type: "string";
303
+ };
304
+ readonly title: "Keywords";
305
+ readonly description: "Keywords associated with the node";
306
+ };
307
+ };
308
+ readonly additionalProperties: false;
309
+ };
310
+ };
311
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
312
+ readonly additionalProperties: false;
313
+ };
314
+ /**
315
+ * Schema for paragraph node
316
+ */
317
+ export declare const ParagraphNodeSchema: {
318
+ readonly type: "object";
319
+ readonly properties: {
320
+ readonly kind: {
321
+ readonly type: "string";
322
+ readonly const: "paragraph";
323
+ readonly title: "Kind";
324
+ readonly description: "Node type discriminator";
325
+ };
326
+ readonly nodeId: {
327
+ readonly type: "string";
328
+ readonly title: "Node ID";
329
+ readonly description: "Unique identifier for this node";
330
+ };
331
+ readonly range: {
332
+ readonly type: "object";
333
+ readonly properties: {
334
+ readonly startOffset: {
335
+ readonly type: "integer";
336
+ readonly title: "Start Offset";
337
+ readonly description: "Starting character offset";
338
+ };
339
+ readonly endOffset: {
340
+ readonly type: "integer";
341
+ readonly title: "End Offset";
342
+ readonly description: "Ending character offset";
343
+ };
344
+ };
345
+ readonly required: readonly ["startOffset", "endOffset"];
346
+ readonly additionalProperties: false;
347
+ };
348
+ readonly text: {
349
+ readonly type: "string";
350
+ readonly title: "Text";
351
+ readonly description: "Text content of the node";
352
+ };
353
+ readonly enrichment: {
354
+ readonly type: "object";
355
+ readonly properties: {
356
+ readonly summary: {
357
+ readonly type: "string";
358
+ readonly title: "Summary";
359
+ readonly description: "Summary of the node content";
360
+ };
361
+ readonly entities: {
362
+ readonly type: "array";
363
+ readonly items: {
364
+ readonly type: "object";
365
+ readonly properties: {
366
+ readonly text: {
367
+ readonly type: "string";
368
+ readonly title: "Text";
369
+ readonly description: "Entity text";
370
+ };
371
+ readonly type: {
372
+ readonly type: "string";
373
+ readonly title: "Type";
374
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
375
+ };
376
+ readonly score: {
377
+ readonly type: "number";
378
+ readonly title: "Score";
379
+ readonly description: "Confidence score";
380
+ };
381
+ };
382
+ readonly required: readonly ["text", "type", "score"];
383
+ readonly additionalProperties: false;
384
+ };
385
+ readonly title: "Entities";
386
+ readonly description: "Named entities extracted from the node";
387
+ };
388
+ readonly keywords: {
389
+ readonly type: "array";
390
+ readonly items: {
391
+ readonly type: "string";
392
+ };
393
+ readonly title: "Keywords";
394
+ readonly description: "Keywords associated with the node";
395
+ };
396
+ };
397
+ readonly additionalProperties: false;
398
+ };
399
+ };
400
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
401
+ readonly additionalProperties: false;
402
+ };
403
+ /**
404
+ * Schema for sentence node
405
+ */
406
+ export declare const SentenceNodeSchema: {
407
+ readonly type: "object";
408
+ readonly properties: {
409
+ readonly kind: {
410
+ readonly type: "string";
411
+ readonly const: "sentence";
412
+ readonly title: "Kind";
413
+ readonly description: "Node type discriminator";
414
+ };
415
+ readonly nodeId: {
416
+ readonly type: "string";
417
+ readonly title: "Node ID";
418
+ readonly description: "Unique identifier for this node";
419
+ };
420
+ readonly range: {
421
+ readonly type: "object";
422
+ readonly properties: {
423
+ readonly startOffset: {
424
+ readonly type: "integer";
425
+ readonly title: "Start Offset";
426
+ readonly description: "Starting character offset";
427
+ };
428
+ readonly endOffset: {
429
+ readonly type: "integer";
430
+ readonly title: "End Offset";
431
+ readonly description: "Ending character offset";
432
+ };
433
+ };
434
+ readonly required: readonly ["startOffset", "endOffset"];
435
+ readonly additionalProperties: false;
436
+ };
437
+ readonly text: {
438
+ readonly type: "string";
439
+ readonly title: "Text";
440
+ readonly description: "Text content of the node";
441
+ };
442
+ readonly enrichment: {
443
+ readonly type: "object";
444
+ readonly properties: {
445
+ readonly summary: {
446
+ readonly type: "string";
447
+ readonly title: "Summary";
448
+ readonly description: "Summary of the node content";
449
+ };
450
+ readonly entities: {
451
+ readonly type: "array";
452
+ readonly items: {
453
+ readonly type: "object";
454
+ readonly properties: {
455
+ readonly text: {
456
+ readonly type: "string";
457
+ readonly title: "Text";
458
+ readonly description: "Entity text";
459
+ };
460
+ readonly type: {
461
+ readonly type: "string";
462
+ readonly title: "Type";
463
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
464
+ };
465
+ readonly score: {
466
+ readonly type: "number";
467
+ readonly title: "Score";
468
+ readonly description: "Confidence score";
469
+ };
470
+ };
471
+ readonly required: readonly ["text", "type", "score"];
472
+ readonly additionalProperties: false;
473
+ };
474
+ readonly title: "Entities";
475
+ readonly description: "Named entities extracted from the node";
476
+ };
477
+ readonly keywords: {
478
+ readonly type: "array";
479
+ readonly items: {
480
+ readonly type: "string";
481
+ };
482
+ readonly title: "Keywords";
483
+ readonly description: "Keywords associated with the node";
484
+ };
485
+ };
486
+ readonly additionalProperties: false;
487
+ };
488
+ };
489
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
490
+ readonly additionalProperties: false;
491
+ };
492
+ /**
493
+ * Schema for section node
494
+ */
495
+ export declare const SectionNodeSchema: {
496
+ readonly type: "object";
497
+ readonly properties: {
498
+ readonly kind: {
499
+ readonly type: "string";
500
+ readonly const: "section";
501
+ readonly title: "Kind";
502
+ readonly description: "Node type discriminator";
503
+ };
504
+ readonly level: {
505
+ readonly type: "integer";
506
+ readonly minimum: 1;
507
+ readonly maximum: 6;
508
+ readonly title: "Level";
509
+ readonly description: "Header level (1-6 for markdown)";
510
+ };
511
+ readonly title: {
512
+ readonly type: "string";
513
+ readonly title: "Title";
514
+ readonly description: "Section title";
515
+ };
516
+ readonly children: {
517
+ readonly type: "array";
518
+ readonly items: {
519
+ readonly type: "object";
520
+ readonly title: "Document Node";
521
+ readonly description: "A node in the hierarchical document tree";
522
+ readonly properties: {
523
+ readonly level: {
524
+ readonly type: "integer";
525
+ readonly title: "Level";
526
+ readonly description: "Header level for section nodes";
527
+ };
528
+ readonly title: {
529
+ readonly type: "string";
530
+ readonly title: "Title";
531
+ readonly description: "Section title";
532
+ };
533
+ readonly children: {
534
+ readonly type: "array";
535
+ readonly title: "Children";
536
+ readonly description: "Child nodes";
537
+ };
538
+ readonly nodeId: {
539
+ readonly type: "string";
540
+ readonly title: "Node ID";
541
+ readonly description: "Unique identifier for this node";
542
+ };
543
+ readonly kind: {
544
+ readonly type: "string";
545
+ readonly enum: ("document" | "section" | "paragraph" | "sentence" | "topic")[];
546
+ readonly title: "Kind";
547
+ readonly description: "Node type discriminator";
548
+ };
549
+ readonly range: {
550
+ readonly type: "object";
551
+ readonly properties: {
552
+ readonly startOffset: {
553
+ readonly type: "integer";
554
+ readonly title: "Start Offset";
555
+ readonly description: "Starting character offset";
556
+ };
557
+ readonly endOffset: {
558
+ readonly type: "integer";
559
+ readonly title: "End Offset";
560
+ readonly description: "Ending character offset";
561
+ };
562
+ };
563
+ readonly required: readonly ["startOffset", "endOffset"];
564
+ readonly additionalProperties: false;
565
+ };
566
+ readonly text: {
567
+ readonly type: "string";
568
+ readonly title: "Text";
569
+ readonly description: "Text content of the node";
570
+ };
571
+ readonly enrichment: {
572
+ readonly type: "object";
573
+ readonly properties: {
574
+ readonly summary: {
575
+ readonly type: "string";
576
+ readonly title: "Summary";
577
+ readonly description: "Summary of the node content";
578
+ };
579
+ readonly entities: {
580
+ readonly type: "array";
581
+ readonly items: {
582
+ readonly type: "object";
583
+ readonly properties: {
584
+ readonly text: {
585
+ readonly type: "string";
586
+ readonly title: "Text";
587
+ readonly description: "Entity text";
588
+ };
589
+ readonly type: {
590
+ readonly type: "string";
591
+ readonly title: "Type";
592
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
593
+ };
594
+ readonly score: {
595
+ readonly type: "number";
596
+ readonly title: "Score";
597
+ readonly description: "Confidence score";
598
+ };
599
+ };
600
+ readonly required: readonly ["text", "type", "score"];
601
+ readonly additionalProperties: false;
602
+ };
603
+ readonly title: "Entities";
604
+ readonly description: "Named entities extracted from the node";
605
+ };
606
+ readonly keywords: {
607
+ readonly type: "array";
608
+ readonly items: {
609
+ readonly type: "string";
610
+ };
611
+ readonly title: "Keywords";
612
+ readonly description: "Keywords associated with the node";
613
+ };
614
+ };
615
+ readonly additionalProperties: false;
616
+ };
617
+ };
618
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
619
+ readonly additionalProperties: false;
620
+ };
621
+ readonly title: "Children";
622
+ readonly description: "Child nodes";
623
+ };
624
+ readonly nodeId: {
625
+ readonly type: "string";
626
+ readonly title: "Node ID";
627
+ readonly description: "Unique identifier for this node";
628
+ };
629
+ readonly range: {
630
+ readonly type: "object";
631
+ readonly properties: {
632
+ readonly startOffset: {
633
+ readonly type: "integer";
634
+ readonly title: "Start Offset";
635
+ readonly description: "Starting character offset";
636
+ };
637
+ readonly endOffset: {
638
+ readonly type: "integer";
639
+ readonly title: "End Offset";
640
+ readonly description: "Ending character offset";
641
+ };
642
+ };
643
+ readonly required: readonly ["startOffset", "endOffset"];
644
+ readonly additionalProperties: false;
645
+ };
646
+ readonly text: {
647
+ readonly type: "string";
648
+ readonly title: "Text";
649
+ readonly description: "Text content of the node";
650
+ };
651
+ readonly enrichment: {
652
+ readonly type: "object";
653
+ readonly properties: {
654
+ readonly summary: {
655
+ readonly type: "string";
656
+ readonly title: "Summary";
657
+ readonly description: "Summary of the node content";
658
+ };
659
+ readonly entities: {
660
+ readonly type: "array";
661
+ readonly items: {
662
+ readonly type: "object";
663
+ readonly properties: {
664
+ readonly text: {
665
+ readonly type: "string";
666
+ readonly title: "Text";
667
+ readonly description: "Entity text";
668
+ };
669
+ readonly type: {
670
+ readonly type: "string";
671
+ readonly title: "Type";
672
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
673
+ };
674
+ readonly score: {
675
+ readonly type: "number";
676
+ readonly title: "Score";
677
+ readonly description: "Confidence score";
678
+ };
679
+ };
680
+ readonly required: readonly ["text", "type", "score"];
681
+ readonly additionalProperties: false;
682
+ };
683
+ readonly title: "Entities";
684
+ readonly description: "Named entities extracted from the node";
685
+ };
686
+ readonly keywords: {
687
+ readonly type: "array";
688
+ readonly items: {
689
+ readonly type: "string";
690
+ };
691
+ readonly title: "Keywords";
692
+ readonly description: "Keywords associated with the node";
693
+ };
694
+ };
695
+ readonly additionalProperties: false;
696
+ };
697
+ };
698
+ readonly required: readonly ["nodeId", "kind", "range", "text", "level", "title", "children"];
699
+ readonly additionalProperties: false;
700
+ };
701
+ /**
702
+ * Schema for topic node
703
+ */
704
+ export declare const TopicNodeSchema: {
705
+ readonly type: "object";
706
+ readonly properties: {
707
+ readonly kind: {
708
+ readonly type: "string";
709
+ readonly const: "topic";
710
+ readonly title: "Kind";
711
+ readonly description: "Node type discriminator";
712
+ };
713
+ readonly children: {
714
+ readonly type: "array";
715
+ readonly items: {
716
+ readonly type: "object";
717
+ readonly title: "Document Node";
718
+ readonly description: "A node in the hierarchical document tree";
719
+ readonly properties: {
720
+ readonly level: {
721
+ readonly type: "integer";
722
+ readonly title: "Level";
723
+ readonly description: "Header level for section nodes";
724
+ };
725
+ readonly title: {
726
+ readonly type: "string";
727
+ readonly title: "Title";
728
+ readonly description: "Section title";
729
+ };
730
+ readonly children: {
731
+ readonly type: "array";
732
+ readonly title: "Children";
733
+ readonly description: "Child nodes";
734
+ };
735
+ readonly nodeId: {
736
+ readonly type: "string";
737
+ readonly title: "Node ID";
738
+ readonly description: "Unique identifier for this node";
739
+ };
740
+ readonly kind: {
741
+ readonly type: "string";
742
+ readonly enum: ("document" | "section" | "paragraph" | "sentence" | "topic")[];
743
+ readonly title: "Kind";
744
+ readonly description: "Node type discriminator";
745
+ };
746
+ readonly range: {
747
+ readonly type: "object";
748
+ readonly properties: {
749
+ readonly startOffset: {
750
+ readonly type: "integer";
751
+ readonly title: "Start Offset";
752
+ readonly description: "Starting character offset";
753
+ };
754
+ readonly endOffset: {
755
+ readonly type: "integer";
756
+ readonly title: "End Offset";
757
+ readonly description: "Ending character offset";
758
+ };
759
+ };
760
+ readonly required: readonly ["startOffset", "endOffset"];
761
+ readonly additionalProperties: false;
762
+ };
763
+ readonly text: {
764
+ readonly type: "string";
765
+ readonly title: "Text";
766
+ readonly description: "Text content of the node";
767
+ };
768
+ readonly enrichment: {
769
+ readonly type: "object";
770
+ readonly properties: {
771
+ readonly summary: {
772
+ readonly type: "string";
773
+ readonly title: "Summary";
774
+ readonly description: "Summary of the node content";
775
+ };
776
+ readonly entities: {
777
+ readonly type: "array";
778
+ readonly items: {
779
+ readonly type: "object";
780
+ readonly properties: {
781
+ readonly text: {
782
+ readonly type: "string";
783
+ readonly title: "Text";
784
+ readonly description: "Entity text";
785
+ };
786
+ readonly type: {
787
+ readonly type: "string";
788
+ readonly title: "Type";
789
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
790
+ };
791
+ readonly score: {
792
+ readonly type: "number";
793
+ readonly title: "Score";
794
+ readonly description: "Confidence score";
795
+ };
796
+ };
797
+ readonly required: readonly ["text", "type", "score"];
798
+ readonly additionalProperties: false;
799
+ };
800
+ readonly title: "Entities";
801
+ readonly description: "Named entities extracted from the node";
802
+ };
803
+ readonly keywords: {
804
+ readonly type: "array";
805
+ readonly items: {
806
+ readonly type: "string";
807
+ };
808
+ readonly title: "Keywords";
809
+ readonly description: "Keywords associated with the node";
810
+ };
811
+ };
812
+ readonly additionalProperties: false;
813
+ };
814
+ };
815
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
816
+ readonly additionalProperties: false;
817
+ };
818
+ readonly title: "Children";
819
+ readonly description: "Child nodes";
820
+ };
821
+ readonly nodeId: {
822
+ readonly type: "string";
823
+ readonly title: "Node ID";
824
+ readonly description: "Unique identifier for this node";
825
+ };
826
+ readonly range: {
827
+ readonly type: "object";
828
+ readonly properties: {
829
+ readonly startOffset: {
830
+ readonly type: "integer";
831
+ readonly title: "Start Offset";
832
+ readonly description: "Starting character offset";
833
+ };
834
+ readonly endOffset: {
835
+ readonly type: "integer";
836
+ readonly title: "End Offset";
837
+ readonly description: "Ending character offset";
838
+ };
839
+ };
840
+ readonly required: readonly ["startOffset", "endOffset"];
841
+ readonly additionalProperties: false;
842
+ };
843
+ readonly text: {
844
+ readonly type: "string";
845
+ readonly title: "Text";
846
+ readonly description: "Text content of the node";
847
+ };
848
+ readonly enrichment: {
849
+ readonly type: "object";
850
+ readonly properties: {
851
+ readonly summary: {
852
+ readonly type: "string";
853
+ readonly title: "Summary";
854
+ readonly description: "Summary of the node content";
855
+ };
856
+ readonly entities: {
857
+ readonly type: "array";
858
+ readonly items: {
859
+ readonly type: "object";
860
+ readonly properties: {
861
+ readonly text: {
862
+ readonly type: "string";
863
+ readonly title: "Text";
864
+ readonly description: "Entity text";
865
+ };
866
+ readonly type: {
867
+ readonly type: "string";
868
+ readonly title: "Type";
869
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
870
+ };
871
+ readonly score: {
872
+ readonly type: "number";
873
+ readonly title: "Score";
874
+ readonly description: "Confidence score";
875
+ };
876
+ };
877
+ readonly required: readonly ["text", "type", "score"];
878
+ readonly additionalProperties: false;
879
+ };
880
+ readonly title: "Entities";
881
+ readonly description: "Named entities extracted from the node";
882
+ };
883
+ readonly keywords: {
884
+ readonly type: "array";
885
+ readonly items: {
886
+ readonly type: "string";
887
+ };
888
+ readonly title: "Keywords";
889
+ readonly description: "Keywords associated with the node";
890
+ };
891
+ };
892
+ readonly additionalProperties: false;
893
+ };
894
+ };
895
+ readonly required: readonly ["nodeId", "kind", "range", "text", "children"];
896
+ readonly additionalProperties: false;
897
+ };
898
+ /**
899
+ * Schema for document root node
900
+ */
901
+ export declare const DocumentRootNodeSchema: {
902
+ readonly type: "object";
903
+ readonly properties: {
904
+ readonly kind: {
905
+ readonly type: "string";
906
+ readonly const: "document";
907
+ readonly title: "Kind";
908
+ readonly description: "Node type discriminator";
909
+ };
910
+ readonly title: {
911
+ readonly type: "string";
912
+ readonly title: "Title";
913
+ readonly description: "Document title";
914
+ };
915
+ readonly children: {
916
+ readonly type: "array";
917
+ readonly items: {
918
+ readonly type: "object";
919
+ readonly title: "Document Node";
920
+ readonly description: "A node in the hierarchical document tree";
921
+ readonly properties: {
922
+ readonly level: {
923
+ readonly type: "integer";
924
+ readonly title: "Level";
925
+ readonly description: "Header level for section nodes";
926
+ };
927
+ readonly title: {
928
+ readonly type: "string";
929
+ readonly title: "Title";
930
+ readonly description: "Section title";
931
+ };
932
+ readonly children: {
933
+ readonly type: "array";
934
+ readonly title: "Children";
935
+ readonly description: "Child nodes";
936
+ };
937
+ readonly nodeId: {
938
+ readonly type: "string";
939
+ readonly title: "Node ID";
940
+ readonly description: "Unique identifier for this node";
941
+ };
942
+ readonly kind: {
943
+ readonly type: "string";
944
+ readonly enum: ("document" | "section" | "paragraph" | "sentence" | "topic")[];
945
+ readonly title: "Kind";
946
+ readonly description: "Node type discriminator";
947
+ };
948
+ readonly range: {
949
+ readonly type: "object";
950
+ readonly properties: {
951
+ readonly startOffset: {
952
+ readonly type: "integer";
953
+ readonly title: "Start Offset";
954
+ readonly description: "Starting character offset";
955
+ };
956
+ readonly endOffset: {
957
+ readonly type: "integer";
958
+ readonly title: "End Offset";
959
+ readonly description: "Ending character offset";
960
+ };
961
+ };
962
+ readonly required: readonly ["startOffset", "endOffset"];
963
+ readonly additionalProperties: false;
964
+ };
965
+ readonly text: {
966
+ readonly type: "string";
967
+ readonly title: "Text";
968
+ readonly description: "Text content of the node";
969
+ };
970
+ readonly enrichment: {
971
+ readonly type: "object";
972
+ readonly properties: {
973
+ readonly summary: {
974
+ readonly type: "string";
975
+ readonly title: "Summary";
976
+ readonly description: "Summary of the node content";
977
+ };
978
+ readonly entities: {
979
+ readonly type: "array";
980
+ readonly items: {
981
+ readonly type: "object";
982
+ readonly properties: {
983
+ readonly text: {
984
+ readonly type: "string";
985
+ readonly title: "Text";
986
+ readonly description: "Entity text";
987
+ };
988
+ readonly type: {
989
+ readonly type: "string";
990
+ readonly title: "Type";
991
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
992
+ };
993
+ readonly score: {
994
+ readonly type: "number";
995
+ readonly title: "Score";
996
+ readonly description: "Confidence score";
997
+ };
998
+ };
999
+ readonly required: readonly ["text", "type", "score"];
1000
+ readonly additionalProperties: false;
1001
+ };
1002
+ readonly title: "Entities";
1003
+ readonly description: "Named entities extracted from the node";
1004
+ };
1005
+ readonly keywords: {
1006
+ readonly type: "array";
1007
+ readonly items: {
1008
+ readonly type: "string";
1009
+ };
1010
+ readonly title: "Keywords";
1011
+ readonly description: "Keywords associated with the node";
1012
+ };
1013
+ };
1014
+ readonly additionalProperties: false;
1015
+ };
1016
+ };
1017
+ readonly required: readonly ["nodeId", "kind", "range", "text"];
1018
+ readonly additionalProperties: false;
1019
+ };
1020
+ readonly title: "Children";
1021
+ readonly description: "Child nodes";
1022
+ };
1023
+ readonly nodeId: {
1024
+ readonly type: "string";
1025
+ readonly title: "Node ID";
1026
+ readonly description: "Unique identifier for this node";
1027
+ };
1028
+ readonly range: {
1029
+ readonly type: "object";
1030
+ readonly properties: {
1031
+ readonly startOffset: {
1032
+ readonly type: "integer";
1033
+ readonly title: "Start Offset";
1034
+ readonly description: "Starting character offset";
1035
+ };
1036
+ readonly endOffset: {
1037
+ readonly type: "integer";
1038
+ readonly title: "End Offset";
1039
+ readonly description: "Ending character offset";
1040
+ };
1041
+ };
1042
+ readonly required: readonly ["startOffset", "endOffset"];
1043
+ readonly additionalProperties: false;
1044
+ };
1045
+ readonly text: {
1046
+ readonly type: "string";
1047
+ readonly title: "Text";
1048
+ readonly description: "Text content of the node";
1049
+ };
1050
+ readonly enrichment: {
1051
+ readonly type: "object";
1052
+ readonly properties: {
1053
+ readonly summary: {
1054
+ readonly type: "string";
1055
+ readonly title: "Summary";
1056
+ readonly description: "Summary of the node content";
1057
+ };
1058
+ readonly entities: {
1059
+ readonly type: "array";
1060
+ readonly items: {
1061
+ readonly type: "object";
1062
+ readonly properties: {
1063
+ readonly text: {
1064
+ readonly type: "string";
1065
+ readonly title: "Text";
1066
+ readonly description: "Entity text";
1067
+ };
1068
+ readonly type: {
1069
+ readonly type: "string";
1070
+ readonly title: "Type";
1071
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1072
+ };
1073
+ readonly score: {
1074
+ readonly type: "number";
1075
+ readonly title: "Score";
1076
+ readonly description: "Confidence score";
1077
+ };
1078
+ };
1079
+ readonly required: readonly ["text", "type", "score"];
1080
+ readonly additionalProperties: false;
1081
+ };
1082
+ readonly title: "Entities";
1083
+ readonly description: "Named entities extracted from the node";
1084
+ };
1085
+ readonly keywords: {
1086
+ readonly type: "array";
1087
+ readonly items: {
1088
+ readonly type: "string";
1089
+ };
1090
+ readonly title: "Keywords";
1091
+ readonly description: "Keywords associated with the node";
1092
+ };
1093
+ };
1094
+ readonly additionalProperties: false;
1095
+ };
1096
+ };
1097
+ readonly required: readonly ["nodeId", "kind", "range", "text", "title", "children"];
1098
+ readonly additionalProperties: false;
1099
+ };
1100
+ /**
1101
+ * Base document node fields
1102
+ */
1103
+ interface DocumentNodeBase {
1104
+ readonly nodeId: string;
1105
+ readonly kind: NodeKind;
1106
+ readonly range: NodeRange;
1107
+ readonly text: string;
1108
+ readonly enrichment?: NodeEnrichment;
1109
+ }
1110
+ /**
1111
+ * Document root node
1112
+ */
1113
+ export interface DocumentRootNode extends DocumentNodeBase {
1114
+ readonly kind: typeof NodeKind.DOCUMENT;
1115
+ readonly title: string;
1116
+ readonly children: DocumentNode[];
1117
+ }
1118
+ /**
1119
+ * Section node (from markdown headers or structural divisions)
1120
+ */
1121
+ export interface SectionNode extends DocumentNodeBase {
1122
+ readonly kind: typeof NodeKind.SECTION;
1123
+ readonly level: number;
1124
+ readonly title: string;
1125
+ readonly children: DocumentNode[];
1126
+ }
1127
+ /**
1128
+ * Paragraph node
1129
+ */
1130
+ export interface ParagraphNode extends DocumentNodeBase {
1131
+ readonly kind: typeof NodeKind.PARAGRAPH;
1132
+ }
1133
+ /**
1134
+ * Sentence node (optional fine-grained segmentation)
1135
+ */
1136
+ export interface SentenceNode extends DocumentNodeBase {
1137
+ readonly kind: typeof NodeKind.SENTENCE;
1138
+ }
1139
+ /**
1140
+ * Topic segment node (from TopicSegmenter)
1141
+ */
1142
+ export interface TopicNode extends DocumentNodeBase {
1143
+ readonly kind: typeof NodeKind.TOPIC;
1144
+ readonly children: DocumentNode[];
1145
+ }
1146
+ /**
1147
+ * Discriminated union of all document node types
1148
+ */
1149
+ export type DocumentNode = DocumentRootNode | SectionNode | ParagraphNode | SentenceNode | TopicNode;
1150
+ /**
1151
+ * Schema for token budget configuration
1152
+ */
1153
+ export declare const TokenBudgetSchema: {
1154
+ readonly type: "object";
1155
+ readonly properties: {
1156
+ readonly maxTokensPerChunk: {
1157
+ readonly type: "integer";
1158
+ readonly title: "Max Tokens Per Chunk";
1159
+ readonly description: "Maximum tokens allowed per chunk";
1160
+ };
1161
+ readonly overlapTokens: {
1162
+ readonly type: "integer";
1163
+ readonly title: "Overlap Tokens";
1164
+ readonly description: "Number of tokens to overlap between chunks";
1165
+ };
1166
+ readonly reservedTokens: {
1167
+ readonly type: "integer";
1168
+ readonly title: "Reserved Tokens";
1169
+ readonly description: "Tokens reserved for metadata or context";
1170
+ };
1171
+ };
1172
+ readonly required: readonly ["maxTokensPerChunk", "overlapTokens", "reservedTokens"];
1173
+ readonly additionalProperties: false;
1174
+ };
1175
+ export type TokenBudget = FromSchema<typeof TokenBudgetSchema>;
1176
+ /**
1177
+ * Schema for chunk enrichment
1178
+ */
1179
+ export declare const ChunkEnrichmentSchema: {
1180
+ readonly type: "object";
1181
+ readonly properties: {
1182
+ readonly summary: {
1183
+ readonly type: "string";
1184
+ readonly title: "Summary";
1185
+ readonly description: "Summary of the chunk content";
1186
+ };
1187
+ readonly entities: {
1188
+ readonly type: "array";
1189
+ readonly items: {
1190
+ readonly type: "object";
1191
+ readonly properties: {
1192
+ readonly text: {
1193
+ readonly type: "string";
1194
+ readonly title: "Text";
1195
+ readonly description: "Entity text";
1196
+ };
1197
+ readonly type: {
1198
+ readonly type: "string";
1199
+ readonly title: "Type";
1200
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1201
+ };
1202
+ readonly score: {
1203
+ readonly type: "number";
1204
+ readonly title: "Score";
1205
+ readonly description: "Confidence score";
1206
+ };
1207
+ };
1208
+ readonly required: readonly ["text", "type", "score"];
1209
+ readonly additionalProperties: false;
1210
+ };
1211
+ readonly title: "Entities";
1212
+ readonly description: "Named entities extracted from the chunk";
1213
+ };
1214
+ };
1215
+ readonly additionalProperties: false;
1216
+ };
1217
+ export type ChunkEnrichment = FromSchema<typeof ChunkEnrichmentSchema>;
1218
+ /**
1219
+ * Schema for chunk node (output of HierarchicalChunker)
1220
+ */
1221
+ export declare const ChunkNodeSchema: () => {
1222
+ readonly type: "object";
1223
+ readonly properties: {
1224
+ readonly chunkId: {
1225
+ readonly type: "string";
1226
+ readonly title: "Chunk ID";
1227
+ readonly description: "Unique identifier for this chunk";
1228
+ };
1229
+ readonly doc_id: {
1230
+ readonly type: "string";
1231
+ readonly title: "Document ID";
1232
+ readonly description: "ID of the parent document";
1233
+ };
1234
+ readonly text: {
1235
+ readonly type: "string";
1236
+ readonly title: "Text";
1237
+ readonly description: "Text content of the chunk";
1238
+ };
1239
+ readonly nodePath: {
1240
+ readonly type: "array";
1241
+ readonly items: {
1242
+ readonly type: "string";
1243
+ };
1244
+ readonly title: "Node Path";
1245
+ readonly description: "Node IDs from root to leaf";
1246
+ };
1247
+ readonly depth: {
1248
+ readonly type: "integer";
1249
+ readonly title: "Depth";
1250
+ readonly description: "Depth in the document tree";
1251
+ };
1252
+ readonly enrichment: {
1253
+ readonly type: "object";
1254
+ readonly properties: {
1255
+ readonly summary: {
1256
+ readonly type: "string";
1257
+ readonly title: "Summary";
1258
+ readonly description: "Summary of the chunk content";
1259
+ };
1260
+ readonly entities: {
1261
+ readonly type: "array";
1262
+ readonly items: {
1263
+ readonly type: "object";
1264
+ readonly properties: {
1265
+ readonly text: {
1266
+ readonly type: "string";
1267
+ readonly title: "Text";
1268
+ readonly description: "Entity text";
1269
+ };
1270
+ readonly type: {
1271
+ readonly type: "string";
1272
+ readonly title: "Type";
1273
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1274
+ };
1275
+ readonly score: {
1276
+ readonly type: "number";
1277
+ readonly title: "Score";
1278
+ readonly description: "Confidence score";
1279
+ };
1280
+ };
1281
+ readonly required: readonly ["text", "type", "score"];
1282
+ readonly additionalProperties: false;
1283
+ };
1284
+ readonly title: "Entities";
1285
+ readonly description: "Named entities extracted from the chunk";
1286
+ };
1287
+ };
1288
+ readonly additionalProperties: false;
1289
+ };
1290
+ };
1291
+ readonly required: readonly ["chunkId", "doc_id", "text", "nodePath", "depth"];
1292
+ readonly additionalProperties: false;
1293
+ };
1294
+ export type ChunkNode = FromSchema<ReturnType<typeof ChunkNodeSchema>>;
1295
+ /**
1296
+ * Schema for chunk metadata stored in vector database
1297
+ * This is the metadata output from ChunkToVectorTask
1298
+ */
1299
+ export declare const ChunkMetadataSchema: {
1300
+ readonly type: "object";
1301
+ readonly properties: {
1302
+ readonly doc_id: {
1303
+ readonly type: "string";
1304
+ readonly title: "Document ID";
1305
+ readonly description: "ID of the parent document";
1306
+ };
1307
+ readonly chunkId: {
1308
+ readonly type: "string";
1309
+ readonly title: "Chunk ID";
1310
+ readonly description: "Unique identifier for this chunk";
1311
+ };
1312
+ readonly leafNodeId: {
1313
+ readonly type: "string";
1314
+ readonly title: "Leaf Node ID";
1315
+ readonly description: "ID of the leaf node this chunk belongs to";
1316
+ };
1317
+ readonly depth: {
1318
+ readonly type: "integer";
1319
+ readonly title: "Depth";
1320
+ readonly description: "Depth in the document tree";
1321
+ };
1322
+ readonly text: {
1323
+ readonly type: "string";
1324
+ readonly title: "Text";
1325
+ readonly description: "Text content of the chunk";
1326
+ };
1327
+ readonly nodePath: {
1328
+ readonly type: "array";
1329
+ readonly items: {
1330
+ readonly type: "string";
1331
+ };
1332
+ readonly title: "Node Path";
1333
+ readonly description: "Node IDs from root to leaf";
1334
+ };
1335
+ readonly summary: {
1336
+ readonly type: "string";
1337
+ readonly title: "Summary";
1338
+ readonly description: "Summary of the chunk content";
1339
+ };
1340
+ readonly entities: {
1341
+ readonly type: "array";
1342
+ readonly items: {
1343
+ readonly type: "object";
1344
+ readonly properties: {
1345
+ readonly text: {
1346
+ readonly type: "string";
1347
+ readonly title: "Text";
1348
+ readonly description: "Entity text";
1349
+ };
1350
+ readonly type: {
1351
+ readonly type: "string";
1352
+ readonly title: "Type";
1353
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1354
+ };
1355
+ readonly score: {
1356
+ readonly type: "number";
1357
+ readonly title: "Score";
1358
+ readonly description: "Confidence score";
1359
+ };
1360
+ };
1361
+ readonly required: readonly ["text", "type", "score"];
1362
+ readonly additionalProperties: false;
1363
+ };
1364
+ readonly title: "Entities";
1365
+ readonly description: "Named entities extracted from the chunk";
1366
+ };
1367
+ };
1368
+ readonly required: readonly ["doc_id", "chunkId", "leafNodeId", "depth", "text", "nodePath"];
1369
+ readonly additionalProperties: true;
1370
+ };
1371
+ export type ChunkMetadata = FromSchema<typeof ChunkMetadataSchema>;
1372
+ /**
1373
+ * Schema for chunk metadata array (for use in task schemas)
1374
+ */
1375
+ export declare const ChunkMetadataArraySchema: {
1376
+ readonly type: "array";
1377
+ readonly items: {
1378
+ readonly type: "object";
1379
+ readonly properties: {
1380
+ readonly doc_id: {
1381
+ readonly type: "string";
1382
+ readonly title: "Document ID";
1383
+ readonly description: "ID of the parent document";
1384
+ };
1385
+ readonly chunkId: {
1386
+ readonly type: "string";
1387
+ readonly title: "Chunk ID";
1388
+ readonly description: "Unique identifier for this chunk";
1389
+ };
1390
+ readonly leafNodeId: {
1391
+ readonly type: "string";
1392
+ readonly title: "Leaf Node ID";
1393
+ readonly description: "ID of the leaf node this chunk belongs to";
1394
+ };
1395
+ readonly depth: {
1396
+ readonly type: "integer";
1397
+ readonly title: "Depth";
1398
+ readonly description: "Depth in the document tree";
1399
+ };
1400
+ readonly text: {
1401
+ readonly type: "string";
1402
+ readonly title: "Text";
1403
+ readonly description: "Text content of the chunk";
1404
+ };
1405
+ readonly nodePath: {
1406
+ readonly type: "array";
1407
+ readonly items: {
1408
+ readonly type: "string";
1409
+ };
1410
+ readonly title: "Node Path";
1411
+ readonly description: "Node IDs from root to leaf";
1412
+ };
1413
+ readonly summary: {
1414
+ readonly type: "string";
1415
+ readonly title: "Summary";
1416
+ readonly description: "Summary of the chunk content";
1417
+ };
1418
+ readonly entities: {
1419
+ readonly type: "array";
1420
+ readonly items: {
1421
+ readonly type: "object";
1422
+ readonly properties: {
1423
+ readonly text: {
1424
+ readonly type: "string";
1425
+ readonly title: "Text";
1426
+ readonly description: "Entity text";
1427
+ };
1428
+ readonly type: {
1429
+ readonly type: "string";
1430
+ readonly title: "Type";
1431
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1432
+ };
1433
+ readonly score: {
1434
+ readonly type: "number";
1435
+ readonly title: "Score";
1436
+ readonly description: "Confidence score";
1437
+ };
1438
+ };
1439
+ readonly required: readonly ["text", "type", "score"];
1440
+ readonly additionalProperties: false;
1441
+ };
1442
+ readonly title: "Entities";
1443
+ readonly description: "Named entities extracted from the chunk";
1444
+ };
1445
+ };
1446
+ readonly required: readonly ["doc_id", "chunkId", "leafNodeId", "depth", "text", "nodePath"];
1447
+ readonly additionalProperties: true;
1448
+ };
1449
+ readonly title: "Chunk Metadata";
1450
+ readonly description: "Metadata for each chunk";
1451
+ };
1452
+ /**
1453
+ * Schema for enriched chunk metadata (after HierarchyJoinTask)
1454
+ * Extends ChunkMetadata with hierarchy information from document repository
1455
+ */
1456
+ export declare const EnrichedChunkMetadataSchema: {
1457
+ readonly type: "object";
1458
+ readonly properties: {
1459
+ readonly doc_id: {
1460
+ readonly type: "string";
1461
+ readonly title: "Document ID";
1462
+ readonly description: "ID of the parent document";
1463
+ };
1464
+ readonly chunkId: {
1465
+ readonly type: "string";
1466
+ readonly title: "Chunk ID";
1467
+ readonly description: "Unique identifier for this chunk";
1468
+ };
1469
+ readonly leafNodeId: {
1470
+ readonly type: "string";
1471
+ readonly title: "Leaf Node ID";
1472
+ readonly description: "ID of the leaf node this chunk belongs to";
1473
+ };
1474
+ readonly depth: {
1475
+ readonly type: "integer";
1476
+ readonly title: "Depth";
1477
+ readonly description: "Depth in the document tree";
1478
+ };
1479
+ readonly text: {
1480
+ readonly type: "string";
1481
+ readonly title: "Text";
1482
+ readonly description: "Text content of the chunk";
1483
+ };
1484
+ readonly nodePath: {
1485
+ readonly type: "array";
1486
+ readonly items: {
1487
+ readonly type: "string";
1488
+ };
1489
+ readonly title: "Node Path";
1490
+ readonly description: "Node IDs from root to leaf";
1491
+ };
1492
+ readonly summary: {
1493
+ readonly type: "string";
1494
+ readonly title: "Summary";
1495
+ readonly description: "Summary of the chunk content";
1496
+ };
1497
+ readonly entities: {
1498
+ readonly type: "array";
1499
+ readonly items: {
1500
+ readonly type: "object";
1501
+ readonly properties: {
1502
+ readonly text: {
1503
+ readonly type: "string";
1504
+ readonly title: "Text";
1505
+ readonly description: "Entity text";
1506
+ };
1507
+ readonly type: {
1508
+ readonly type: "string";
1509
+ readonly title: "Type";
1510
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1511
+ };
1512
+ readonly score: {
1513
+ readonly type: "number";
1514
+ readonly title: "Score";
1515
+ readonly description: "Confidence score";
1516
+ };
1517
+ };
1518
+ readonly required: readonly ["text", "type", "score"];
1519
+ readonly additionalProperties: false;
1520
+ };
1521
+ readonly title: "Entities";
1522
+ readonly description: "Named entities (rolled up from hierarchy)";
1523
+ };
1524
+ readonly parentSummaries: {
1525
+ readonly type: "array";
1526
+ readonly items: {
1527
+ readonly type: "string";
1528
+ };
1529
+ readonly title: "Parent Summaries";
1530
+ readonly description: "Summaries from ancestor nodes";
1531
+ };
1532
+ readonly sectionTitles: {
1533
+ readonly type: "array";
1534
+ readonly items: {
1535
+ readonly type: "string";
1536
+ };
1537
+ readonly title: "Section Titles";
1538
+ readonly description: "Titles of ancestor section nodes";
1539
+ };
1540
+ };
1541
+ readonly required: readonly ["doc_id", "chunkId", "leafNodeId", "depth", "text", "nodePath"];
1542
+ readonly additionalProperties: true;
1543
+ };
1544
+ export type EnrichedChunkMetadata = FromSchema<typeof EnrichedChunkMetadataSchema>;
1545
+ /**
1546
+ * Schema for enriched chunk metadata array (for use in task schemas)
1547
+ */
1548
+ export declare const EnrichedChunkMetadataArraySchema: {
1549
+ readonly type: "array";
1550
+ readonly items: {
1551
+ readonly type: "object";
1552
+ readonly properties: {
1553
+ readonly doc_id: {
1554
+ readonly type: "string";
1555
+ readonly title: "Document ID";
1556
+ readonly description: "ID of the parent document";
1557
+ };
1558
+ readonly chunkId: {
1559
+ readonly type: "string";
1560
+ readonly title: "Chunk ID";
1561
+ readonly description: "Unique identifier for this chunk";
1562
+ };
1563
+ readonly leafNodeId: {
1564
+ readonly type: "string";
1565
+ readonly title: "Leaf Node ID";
1566
+ readonly description: "ID of the leaf node this chunk belongs to";
1567
+ };
1568
+ readonly depth: {
1569
+ readonly type: "integer";
1570
+ readonly title: "Depth";
1571
+ readonly description: "Depth in the document tree";
1572
+ };
1573
+ readonly text: {
1574
+ readonly type: "string";
1575
+ readonly title: "Text";
1576
+ readonly description: "Text content of the chunk";
1577
+ };
1578
+ readonly nodePath: {
1579
+ readonly type: "array";
1580
+ readonly items: {
1581
+ readonly type: "string";
1582
+ };
1583
+ readonly title: "Node Path";
1584
+ readonly description: "Node IDs from root to leaf";
1585
+ };
1586
+ readonly summary: {
1587
+ readonly type: "string";
1588
+ readonly title: "Summary";
1589
+ readonly description: "Summary of the chunk content";
1590
+ };
1591
+ readonly entities: {
1592
+ readonly type: "array";
1593
+ readonly items: {
1594
+ readonly type: "object";
1595
+ readonly properties: {
1596
+ readonly text: {
1597
+ readonly type: "string";
1598
+ readonly title: "Text";
1599
+ readonly description: "Entity text";
1600
+ };
1601
+ readonly type: {
1602
+ readonly type: "string";
1603
+ readonly title: "Type";
1604
+ readonly description: "Entity type (e.g., PERSON, ORG, LOC)";
1605
+ };
1606
+ readonly score: {
1607
+ readonly type: "number";
1608
+ readonly title: "Score";
1609
+ readonly description: "Confidence score";
1610
+ };
1611
+ };
1612
+ readonly required: readonly ["text", "type", "score"];
1613
+ readonly additionalProperties: false;
1614
+ };
1615
+ readonly title: "Entities";
1616
+ readonly description: "Named entities (rolled up from hierarchy)";
1617
+ };
1618
+ readonly parentSummaries: {
1619
+ readonly type: "array";
1620
+ readonly items: {
1621
+ readonly type: "string";
1622
+ };
1623
+ readonly title: "Parent Summaries";
1624
+ readonly description: "Summaries from ancestor nodes";
1625
+ };
1626
+ readonly sectionTitles: {
1627
+ readonly type: "array";
1628
+ readonly items: {
1629
+ readonly type: "string";
1630
+ };
1631
+ readonly title: "Section Titles";
1632
+ readonly description: "Titles of ancestor section nodes";
1633
+ };
1634
+ };
1635
+ readonly required: readonly ["doc_id", "chunkId", "leafNodeId", "depth", "text", "nodePath"];
1636
+ readonly additionalProperties: true;
1637
+ };
1638
+ readonly title: "Enriched Metadata";
1639
+ readonly description: "Metadata enriched with hierarchy information";
1640
+ };
1641
+ /**
1642
+ * Schema for document metadata
1643
+ */
1644
+ export declare const DocumentMetadataSchema: {
1645
+ readonly type: "object";
1646
+ readonly properties: {
1647
+ readonly title: {
1648
+ readonly type: "string";
1649
+ readonly title: "Title";
1650
+ readonly description: "Document title";
1651
+ };
1652
+ readonly sourceUri: {
1653
+ readonly type: "string";
1654
+ readonly title: "Source URI";
1655
+ readonly description: "Original source URI of the document";
1656
+ };
1657
+ readonly createdAt: {
1658
+ readonly type: "string";
1659
+ readonly title: "Created At";
1660
+ readonly description: "ISO timestamp of creation";
1661
+ };
1662
+ };
1663
+ readonly required: readonly ["title"];
1664
+ readonly additionalProperties: true;
1665
+ };
1666
+ export type DocumentMetadata = FromSchema<typeof DocumentMetadataSchema>;
1667
+ export {};
1668
+ //# sourceMappingURL=DocumentSchema.d.ts.map