@workglow/knowledge-base 0.0.115

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +670 -0
  3. package/dist/browser.js +1071 -0
  4. package/dist/browser.js.map +23 -0
  5. package/dist/bun.js +1072 -0
  6. package/dist/bun.js.map +23 -0
  7. package/dist/chunk/ChunkSchema.d.ts +206 -0
  8. package/dist/chunk/ChunkSchema.d.ts.map +1 -0
  9. package/dist/chunk/ChunkVectorStorageSchema.d.ts +64 -0
  10. package/dist/chunk/ChunkVectorStorageSchema.d.ts.map +1 -0
  11. package/dist/common-server.d.ts +7 -0
  12. package/dist/common-server.d.ts.map +1 -0
  13. package/dist/common.d.ts +20 -0
  14. package/dist/common.d.ts.map +1 -0
  15. package/dist/document/Document.d.ts +51 -0
  16. package/dist/document/Document.d.ts.map +1 -0
  17. package/dist/document/DocumentNode.d.ts +32 -0
  18. package/dist/document/DocumentNode.d.ts.map +1 -0
  19. package/dist/document/DocumentSchema.d.ts +1203 -0
  20. package/dist/document/DocumentSchema.d.ts.map +1 -0
  21. package/dist/document/DocumentStorageSchema.d.ts +43 -0
  22. package/dist/document/DocumentStorageSchema.d.ts.map +1 -0
  23. package/dist/document/StructuralParser.d.ts +30 -0
  24. package/dist/document/StructuralParser.d.ts.map +1 -0
  25. package/dist/knowledge-base/InMemoryKnowledgeBaseRepository.d.ts +13 -0
  26. package/dist/knowledge-base/InMemoryKnowledgeBaseRepository.d.ts.map +1 -0
  27. package/dist/knowledge-base/KnowledgeBase.d.ts +123 -0
  28. package/dist/knowledge-base/KnowledgeBase.d.ts.map +1 -0
  29. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts +38 -0
  30. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts.map +1 -0
  31. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts +74 -0
  32. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts.map +1 -0
  33. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts +50 -0
  34. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts.map +1 -0
  35. package/dist/knowledge-base/createKnowledgeBase.d.ts +30 -0
  36. package/dist/knowledge-base/createKnowledgeBase.d.ts.map +1 -0
  37. package/dist/node.js +1071 -0
  38. package/dist/node.js.map +23 -0
  39. package/dist/types.d.ts +7 -0
  40. package/dist/types.d.ts.map +1 -0
  41. package/dist/util/DatasetSchema.d.ts +40 -0
  42. package/dist/util/DatasetSchema.d.ts.map +1 -0
  43. package/package.json +55 -0
package/dist/node.js ADDED
@@ -0,0 +1,1071 @@
1
+ // src/document/DocumentSchema.ts
2
+ var NodeKind = {
3
+ DOCUMENT: "document",
4
+ SECTION: "section",
5
+ PARAGRAPH: "paragraph",
6
+ SENTENCE: "sentence",
7
+ TOPIC: "topic"
8
+ };
9
+ var NodeRangeSchema = {
10
+ type: "object",
11
+ properties: {
12
+ startOffset: {
13
+ type: "integer",
14
+ title: "Start Offset",
15
+ description: "Starting character offset"
16
+ },
17
+ endOffset: {
18
+ type: "integer",
19
+ title: "End Offset",
20
+ description: "Ending character offset"
21
+ }
22
+ },
23
+ required: ["startOffset", "endOffset"],
24
+ additionalProperties: false
25
+ };
26
+ var EntitySchema = {
27
+ type: "object",
28
+ properties: {
29
+ text: {
30
+ type: "string",
31
+ title: "Text",
32
+ description: "Entity text"
33
+ },
34
+ type: {
35
+ type: "string",
36
+ title: "Type",
37
+ description: "Entity type (e.g., PERSON, ORG, LOC)"
38
+ },
39
+ score: {
40
+ type: "number",
41
+ title: "Score",
42
+ description: "Confidence score"
43
+ }
44
+ },
45
+ required: ["text", "type", "score"],
46
+ additionalProperties: false
47
+ };
48
+ var NodeEnrichmentSchema = {
49
+ type: "object",
50
+ properties: {
51
+ summary: {
52
+ type: "string",
53
+ title: "Summary",
54
+ description: "Summary of the node content"
55
+ },
56
+ entities: {
57
+ type: "array",
58
+ items: EntitySchema,
59
+ title: "Entities",
60
+ description: "Named entities extracted from the node"
61
+ },
62
+ keywords: {
63
+ type: "array",
64
+ items: { type: "string" },
65
+ title: "Keywords",
66
+ description: "Keywords associated with the node"
67
+ }
68
+ },
69
+ additionalProperties: false
70
+ };
71
+ var DocumentNodeBaseSchema = {
72
+ type: "object",
73
+ properties: {
74
+ nodeId: {
75
+ type: "string",
76
+ title: "Node ID",
77
+ description: "Unique identifier for this node"
78
+ },
79
+ kind: {
80
+ type: "string",
81
+ enum: Object.values(NodeKind),
82
+ title: "Kind",
83
+ description: "Node type discriminator"
84
+ },
85
+ range: NodeRangeSchema,
86
+ text: {
87
+ type: "string",
88
+ title: "Text",
89
+ description: "Text content of the node"
90
+ },
91
+ enrichment: NodeEnrichmentSchema
92
+ },
93
+ required: ["nodeId", "kind", "range", "text"],
94
+ additionalProperties: true
95
+ };
96
+ var DocumentNodeSchema = {
97
+ type: "object",
98
+ title: "Document Node",
99
+ description: "A node in the hierarchical document tree",
100
+ properties: {
101
+ ...DocumentNodeBaseSchema.properties,
102
+ level: {
103
+ type: "integer",
104
+ title: "Level",
105
+ description: "Header level for section nodes"
106
+ },
107
+ title: {
108
+ type: "string",
109
+ title: "Title",
110
+ description: "Section title"
111
+ },
112
+ children: {
113
+ type: "array",
114
+ title: "Children",
115
+ description: "Child nodes"
116
+ }
117
+ },
118
+ required: [...DocumentNodeBaseSchema.required],
119
+ additionalProperties: false
120
+ };
121
+ var ParagraphNodeSchema = {
122
+ type: "object",
123
+ properties: {
124
+ ...DocumentNodeBaseSchema.properties,
125
+ kind: {
126
+ type: "string",
127
+ const: NodeKind.PARAGRAPH,
128
+ title: "Kind",
129
+ description: "Node type discriminator"
130
+ }
131
+ },
132
+ required: [...DocumentNodeBaseSchema.required],
133
+ additionalProperties: false
134
+ };
135
+ var SentenceNodeSchema = {
136
+ type: "object",
137
+ properties: {
138
+ ...DocumentNodeBaseSchema.properties,
139
+ kind: {
140
+ type: "string",
141
+ const: NodeKind.SENTENCE,
142
+ title: "Kind",
143
+ description: "Node type discriminator"
144
+ }
145
+ },
146
+ required: [...DocumentNodeBaseSchema.required],
147
+ additionalProperties: false
148
+ };
149
+ var SectionNodeSchema = {
150
+ type: "object",
151
+ properties: {
152
+ ...DocumentNodeBaseSchema.properties,
153
+ kind: {
154
+ type: "string",
155
+ const: NodeKind.SECTION,
156
+ title: "Kind",
157
+ description: "Node type discriminator"
158
+ },
159
+ level: {
160
+ type: "integer",
161
+ minimum: 1,
162
+ maximum: 6,
163
+ title: "Level",
164
+ description: "Header level (1-6 for markdown)"
165
+ },
166
+ title: {
167
+ type: "string",
168
+ title: "Title",
169
+ description: "Section title"
170
+ },
171
+ children: {
172
+ type: "array",
173
+ items: DocumentNodeSchema,
174
+ title: "Children",
175
+ description: "Child nodes"
176
+ }
177
+ },
178
+ required: [...DocumentNodeBaseSchema.required, "level", "title", "children"],
179
+ additionalProperties: false
180
+ };
181
+ var TopicNodeSchema = {
182
+ type: "object",
183
+ properties: {
184
+ ...DocumentNodeBaseSchema.properties,
185
+ kind: {
186
+ type: "string",
187
+ const: NodeKind.TOPIC,
188
+ title: "Kind",
189
+ description: "Node type discriminator"
190
+ },
191
+ children: {
192
+ type: "array",
193
+ items: DocumentNodeSchema,
194
+ title: "Children",
195
+ description: "Child nodes"
196
+ }
197
+ },
198
+ required: [...DocumentNodeBaseSchema.required, "children"],
199
+ additionalProperties: false
200
+ };
201
+ var DocumentRootNodeSchema = {
202
+ type: "object",
203
+ properties: {
204
+ ...DocumentNodeBaseSchema.properties,
205
+ kind: {
206
+ type: "string",
207
+ const: NodeKind.DOCUMENT,
208
+ title: "Kind",
209
+ description: "Node type discriminator"
210
+ },
211
+ title: {
212
+ type: "string",
213
+ title: "Title",
214
+ description: "Document title"
215
+ },
216
+ children: {
217
+ type: "array",
218
+ items: DocumentNodeSchema,
219
+ title: "Children",
220
+ description: "Child nodes"
221
+ }
222
+ },
223
+ required: [...DocumentNodeBaseSchema.required, "title", "children"],
224
+ additionalProperties: false
225
+ };
226
+ var TokenBudgetSchema = {
227
+ type: "object",
228
+ properties: {
229
+ maxTokensPerChunk: {
230
+ type: "integer",
231
+ title: "Max Tokens Per Chunk",
232
+ description: "Maximum tokens allowed per chunk"
233
+ },
234
+ overlapTokens: {
235
+ type: "integer",
236
+ title: "Overlap Tokens",
237
+ description: "Number of tokens to overlap between chunks"
238
+ },
239
+ reservedTokens: {
240
+ type: "integer",
241
+ title: "Reserved Tokens",
242
+ description: "Tokens reserved for metadata or context"
243
+ }
244
+ },
245
+ required: ["maxTokensPerChunk", "overlapTokens", "reservedTokens"],
246
+ additionalProperties: false
247
+ };
248
+ var DocumentMetadataSchema = {
249
+ type: "object",
250
+ properties: {
251
+ title: {
252
+ type: "string",
253
+ title: "Title",
254
+ description: "Document title"
255
+ },
256
+ sourceUri: {
257
+ type: "string",
258
+ title: "Source URI",
259
+ description: "Original source URI of the document"
260
+ },
261
+ createdAt: {
262
+ type: "string",
263
+ title: "Created At",
264
+ description: "ISO timestamp of creation"
265
+ }
266
+ },
267
+ required: ["title"],
268
+ additionalProperties: true
269
+ };
270
+
271
+ // src/chunk/ChunkSchema.ts
272
+ var ChunkRecordSchema = () => ({
273
+ type: "object",
274
+ properties: {
275
+ chunkId: {
276
+ type: "string",
277
+ title: "Chunk ID",
278
+ description: "Unique identifier for this chunk"
279
+ },
280
+ doc_id: {
281
+ type: "string",
282
+ title: "Document ID",
283
+ description: "ID of the parent document"
284
+ },
285
+ text: {
286
+ type: "string",
287
+ title: "Text",
288
+ description: "Text content of the chunk"
289
+ },
290
+ nodePath: {
291
+ type: "array",
292
+ items: { type: "string" },
293
+ title: "Node Path",
294
+ description: "Node IDs from root to leaf"
295
+ },
296
+ depth: {
297
+ type: "integer",
298
+ title: "Depth",
299
+ description: "Depth in the document tree"
300
+ },
301
+ leafNodeId: {
302
+ type: "string",
303
+ title: "Leaf Node ID",
304
+ description: "ID of the leaf node this chunk belongs to"
305
+ },
306
+ summary: {
307
+ type: "string",
308
+ title: "Summary",
309
+ description: "Summary of the chunk content"
310
+ },
311
+ entities: {
312
+ type: "array",
313
+ items: EntitySchema,
314
+ title: "Entities",
315
+ description: "Named entities extracted from the chunk"
316
+ },
317
+ parentSummaries: {
318
+ type: "array",
319
+ items: { type: "string" },
320
+ title: "Parent Summaries",
321
+ description: "Summaries from ancestor nodes"
322
+ },
323
+ sectionTitles: {
324
+ type: "array",
325
+ items: { type: "string" },
326
+ title: "Section Titles",
327
+ description: "Titles of ancestor section nodes"
328
+ },
329
+ doc_title: {
330
+ type: "string",
331
+ title: "Document Title",
332
+ description: "Title of the parent document"
333
+ }
334
+ },
335
+ required: ["chunkId", "doc_id", "text", "nodePath", "depth"],
336
+ additionalProperties: true
337
+ });
338
+ var ChunkRecordArraySchema = {
339
+ type: "array",
340
+ items: ChunkRecordSchema(),
341
+ title: "Chunk Records",
342
+ description: "Array of chunk records"
343
+ };
344
+ // src/chunk/ChunkVectorStorageSchema.ts
345
+ import { TypedArraySchema } from "@workglow/util";
346
+ var ChunkVectorStorageSchema = {
347
+ type: "object",
348
+ properties: {
349
+ chunk_id: { type: "string", "x-auto-generated": true },
350
+ doc_id: { type: "string" },
351
+ vector: TypedArraySchema(),
352
+ metadata: { type: "object", format: "metadata", additionalProperties: true }
353
+ },
354
+ required: ["chunk_id", "doc_id", "vector", "metadata"],
355
+ additionalProperties: false
356
+ };
357
+ var ChunkVectorPrimaryKey = ["chunk_id"];
358
+ // src/document/Document.ts
359
+ class Document {
360
+ doc_id;
361
+ metadata;
362
+ root;
363
+ chunks;
364
+ constructor(root, metadata, chunks = [], doc_id) {
365
+ this.doc_id = doc_id;
366
+ this.root = root;
367
+ this.metadata = metadata;
368
+ this.chunks = chunks || [];
369
+ }
370
+ setChunks(chunks) {
371
+ this.chunks = chunks;
372
+ }
373
+ getChunks() {
374
+ return this.chunks;
375
+ }
376
+ setDocId(doc_id) {
377
+ this.doc_id = doc_id;
378
+ }
379
+ findChunksByNodeId(nodeId) {
380
+ return this.chunks.filter((chunk) => chunk.nodePath.includes(nodeId));
381
+ }
382
+ toJSON() {
383
+ return {
384
+ metadata: this.metadata,
385
+ root: this.root,
386
+ chunks: this.chunks
387
+ };
388
+ }
389
+ static fromJSON(json, doc_id) {
390
+ const obj = JSON.parse(json);
391
+ return new Document(obj.root, obj.metadata, obj.chunks, doc_id);
392
+ }
393
+ }
394
+
395
+ // src/knowledge-base/KnowledgeBase.ts
396
+ class KnowledgeBase {
397
+ name;
398
+ title;
399
+ description;
400
+ tabularStorage;
401
+ chunkStorage;
402
+ constructor(name, documentStorage, chunkStorage, title, description) {
403
+ this.name = name;
404
+ this.title = title ?? name;
405
+ this.description = description ?? "";
406
+ this.tabularStorage = documentStorage;
407
+ this.chunkStorage = chunkStorage;
408
+ }
409
+ async upsertDocument(document) {
410
+ const serialized = JSON.stringify(document.toJSON());
411
+ const insertEntity = {
412
+ doc_id: document.doc_id,
413
+ data: serialized
414
+ };
415
+ const entity = await this.tabularStorage.put(insertEntity);
416
+ if (document.doc_id !== entity.doc_id) {
417
+ document.setDocId(entity.doc_id);
418
+ }
419
+ return document;
420
+ }
421
+ async getDocument(doc_id) {
422
+ const entity = await this.tabularStorage.get({ doc_id });
423
+ if (!entity) {
424
+ return;
425
+ }
426
+ return Document.fromJSON(entity.data, entity.doc_id);
427
+ }
428
+ async deleteDocument(doc_id) {
429
+ await this.deleteChunksForDocument(doc_id);
430
+ await this.tabularStorage.delete({ doc_id });
431
+ }
432
+ async listDocuments() {
433
+ const entities = await this.tabularStorage.getAll();
434
+ if (!entities) {
435
+ return [];
436
+ }
437
+ return entities.map((e) => e.doc_id);
438
+ }
439
+ async getNode(doc_id, nodeId) {
440
+ const doc = await this.getDocument(doc_id);
441
+ if (!doc) {
442
+ return;
443
+ }
444
+ const traverse = (node) => {
445
+ if (node.nodeId === nodeId) {
446
+ return node;
447
+ }
448
+ if ("children" in node && Array.isArray(node.children)) {
449
+ for (const child of node.children) {
450
+ const found = traverse(child);
451
+ if (found)
452
+ return found;
453
+ }
454
+ }
455
+ return;
456
+ };
457
+ return traverse(doc.root);
458
+ }
459
+ async getAncestors(doc_id, nodeId) {
460
+ const doc = await this.getDocument(doc_id);
461
+ if (!doc) {
462
+ return [];
463
+ }
464
+ const path = [];
465
+ const findPath = (node) => {
466
+ path.push(node.nodeId);
467
+ if (node.nodeId === nodeId) {
468
+ return true;
469
+ }
470
+ if ("children" in node && Array.isArray(node.children)) {
471
+ for (const child of node.children) {
472
+ if (findPath(child)) {
473
+ return true;
474
+ }
475
+ }
476
+ }
477
+ path.pop();
478
+ return false;
479
+ };
480
+ if (!findPath(doc.root)) {
481
+ return [];
482
+ }
483
+ const ancestors = [];
484
+ let currentNode = doc.root;
485
+ ancestors.push(currentNode);
486
+ for (let i = 1;i < path.length; i++) {
487
+ const targetId = path[i];
488
+ if ("children" in currentNode && Array.isArray(currentNode.children)) {
489
+ const found = currentNode.children.find((child) => child.nodeId === targetId);
490
+ if (found) {
491
+ currentNode = found;
492
+ ancestors.push(currentNode);
493
+ } else {
494
+ break;
495
+ }
496
+ } else {
497
+ break;
498
+ }
499
+ }
500
+ return ancestors;
501
+ }
502
+ async upsertChunk(chunk) {
503
+ if (chunk.vector.length !== this.getVectorDimensions()) {
504
+ throw new Error(`Vector dimension mismatch: expected ${this.getVectorDimensions()}, got ${chunk.vector.length}.`);
505
+ }
506
+ return this.chunkStorage.put(chunk);
507
+ }
508
+ async upsertChunksBulk(chunks) {
509
+ const expected = this.getVectorDimensions();
510
+ for (const chunk of chunks) {
511
+ if (chunk.vector.length !== expected) {
512
+ throw new Error(`Vector dimension mismatch: expected ${expected}, got ${chunk.vector.length}.`);
513
+ }
514
+ }
515
+ return this.chunkStorage.putBulk(chunks);
516
+ }
517
+ async deleteChunksForDocument(doc_id) {
518
+ await this.chunkStorage.deleteSearch({ doc_id });
519
+ }
520
+ async getChunksForDocument(doc_id) {
521
+ const results = await this.chunkStorage.query({ doc_id });
522
+ return results ?? [];
523
+ }
524
+ async similaritySearch(query, options) {
525
+ return this.chunkStorage.similaritySearch(query, options);
526
+ }
527
+ async hybridSearch(query, options) {
528
+ if (typeof this.chunkStorage.hybridSearch !== "function") {
529
+ throw new Error("Hybrid search is not supported by the configured chunk storage backend. " + "Please use a vector storage implementation that provides `hybridSearch`.");
530
+ }
531
+ return this.chunkStorage.hybridSearch(query, options);
532
+ }
533
+ async prepareReindex(doc_id) {
534
+ const doc = await this.getDocument(doc_id);
535
+ if (!doc) {
536
+ return;
537
+ }
538
+ await this.deleteChunksForDocument(doc_id);
539
+ return doc;
540
+ }
541
+ async setupDatabase() {
542
+ await this.tabularStorage.setupDatabase();
543
+ await this.chunkStorage.setupDatabase();
544
+ }
545
+ destroy() {
546
+ this.tabularStorage.destroy();
547
+ this.chunkStorage.destroy();
548
+ }
549
+ async getChunk(chunk_id) {
550
+ return this.chunkStorage.get({ chunk_id });
551
+ }
552
+ async put(chunk) {
553
+ return this.chunkStorage.put(chunk);
554
+ }
555
+ async putBulk(chunks) {
556
+ return this.chunkStorage.putBulk(chunks);
557
+ }
558
+ async getAllChunks() {
559
+ return this.chunkStorage.getAll();
560
+ }
561
+ async chunkCount() {
562
+ return this.chunkStorage.size();
563
+ }
564
+ async clearChunks() {
565
+ return this.chunkStorage.deleteAll();
566
+ }
567
+ getVectorDimensions() {
568
+ return this.chunkStorage.getVectorDimensions();
569
+ }
570
+ async getDocumentChunks(doc_id) {
571
+ const doc = await this.getDocument(doc_id);
572
+ if (!doc) {
573
+ return [];
574
+ }
575
+ return doc.getChunks();
576
+ }
577
+ async findChunksByNodeId(doc_id, nodeId) {
578
+ const doc = await this.getDocument(doc_id);
579
+ if (!doc) {
580
+ return [];
581
+ }
582
+ return doc.findChunksByNodeId(nodeId);
583
+ }
584
+ }
585
+ // src/knowledge-base/KnowledgeBaseSchema.ts
586
+ var KnowledgeBaseRecordSchema = {
587
+ type: "object",
588
+ properties: {
589
+ kb_id: { type: "string" },
590
+ title: { type: "string" },
591
+ description: { type: "string" },
592
+ vector_dimensions: { type: "integer" },
593
+ document_table: { type: "string" },
594
+ chunk_table: { type: "string" },
595
+ created_at: { type: "string" },
596
+ updated_at: { type: "string" }
597
+ },
598
+ required: [
599
+ "kb_id",
600
+ "title",
601
+ "description",
602
+ "vector_dimensions",
603
+ "document_table",
604
+ "chunk_table",
605
+ "created_at",
606
+ "updated_at"
607
+ ],
608
+ additionalProperties: false
609
+ };
610
+ var KnowledgeBasePrimaryKeyNames = ["kb_id"];
611
+ function knowledgeBaseTableNames(kbId) {
612
+ const safe = kbId.replace(/[^a-zA-Z0-9_]/g, "_");
613
+ return {
614
+ documentTable: `kb_docs_${safe}`,
615
+ chunkTable: `kb_chunks_${safe}`
616
+ };
617
+ }
618
+ // src/knowledge-base/KnowledgeBaseRepository.ts
619
+ import { EventEmitter } from "@workglow/util";
620
+
621
+ class KnowledgeBaseRepository {
622
+ storage;
623
+ constructor(storage) {
624
+ this.storage = storage;
625
+ }
626
+ events = new EventEmitter;
627
+ async setupDatabase() {
628
+ await this.storage.setupDatabase?.();
629
+ }
630
+ on(name, fn) {
631
+ this.events.on(name, fn);
632
+ }
633
+ off(name, fn) {
634
+ this.events.off(name, fn);
635
+ }
636
+ once(name, fn) {
637
+ this.events.once(name, fn);
638
+ }
639
+ waitOn(name) {
640
+ return this.events.waitOn(name);
641
+ }
642
+ async addKnowledgeBase(record) {
643
+ await this.storage.put(record);
644
+ this.events.emit("knowledge_base_added", record);
645
+ return record;
646
+ }
647
+ async removeKnowledgeBase(kb_id) {
648
+ const record = await this.storage.get({ kb_id });
649
+ if (!record) {
650
+ throw new Error(`KnowledgeBase with id "${kb_id}" not found`);
651
+ }
652
+ await this.storage.delete({ kb_id });
653
+ this.events.emit("knowledge_base_removed", record);
654
+ }
655
+ async getKnowledgeBase(kb_id) {
656
+ if (typeof kb_id !== "string")
657
+ return;
658
+ const record = await this.storage.get({ kb_id });
659
+ return record ?? undefined;
660
+ }
661
+ async enumerateAll() {
662
+ const records = await this.storage.getAll();
663
+ if (!records || records.length === 0)
664
+ return [];
665
+ return records;
666
+ }
667
+ async size() {
668
+ return await this.storage.size();
669
+ }
670
+ }
671
+ // src/knowledge-base/InMemoryKnowledgeBaseRepository.ts
672
+ import { InMemoryTabularStorage } from "@workglow/storage";
673
+ class InMemoryKnowledgeBaseRepository extends KnowledgeBaseRepository {
674
+ constructor() {
675
+ super(new InMemoryTabularStorage(KnowledgeBaseRecordSchema, KnowledgeBasePrimaryKeyNames));
676
+ }
677
+ }
678
+ // src/knowledge-base/KnowledgeBaseRegistry.ts
679
+ import {
680
+ createServiceToken,
681
+ globalServiceRegistry,
682
+ registerInputResolver
683
+ } from "@workglow/util";
684
+ var KNOWLEDGE_BASES = createServiceToken("knowledge-base.registry");
685
+ var KNOWLEDGE_BASE_REPOSITORY = createServiceToken("knowledge-base.repository");
686
+ if (!globalServiceRegistry.has(KNOWLEDGE_BASES)) {
687
+ globalServiceRegistry.register(KNOWLEDGE_BASES, () => new Map, true);
688
+ }
689
+ if (!globalServiceRegistry.has(KNOWLEDGE_BASE_REPOSITORY)) {
690
+ globalServiceRegistry.register(KNOWLEDGE_BASE_REPOSITORY, () => new InMemoryKnowledgeBaseRepository, true);
691
+ }
692
+ function getGlobalKnowledgeBases() {
693
+ return globalServiceRegistry.get(KNOWLEDGE_BASES);
694
+ }
695
+ function getGlobalKnowledgeBaseRepository() {
696
+ return globalServiceRegistry.get(KNOWLEDGE_BASE_REPOSITORY);
697
+ }
698
+ function setGlobalKnowledgeBaseRepository(repository) {
699
+ globalServiceRegistry.registerInstance(KNOWLEDGE_BASE_REPOSITORY, repository);
700
+ }
701
+ async function registerKnowledgeBase(id, kb) {
702
+ const kbs = getGlobalKnowledgeBases();
703
+ kbs.set(id, kb);
704
+ const now = new Date().toISOString();
705
+ const tableNames = knowledgeBaseTableNames(id);
706
+ const record = {
707
+ kb_id: id,
708
+ title: kb.title,
709
+ description: kb.description,
710
+ vector_dimensions: kb.getVectorDimensions(),
711
+ document_table: tableNames.documentTable,
712
+ chunk_table: tableNames.chunkTable,
713
+ created_at: now,
714
+ updated_at: now
715
+ };
716
+ const repo = getGlobalKnowledgeBaseRepository();
717
+ await repo.addKnowledgeBase(record);
718
+ }
719
+ function getKnowledgeBase(id) {
720
+ return getGlobalKnowledgeBases().get(id);
721
+ }
722
+ async function resolveKnowledgeBaseFromRegistry(id, format, registry) {
723
+ const kbs = registry.has(KNOWLEDGE_BASES) ? registry.get(KNOWLEDGE_BASES) : getGlobalKnowledgeBases();
724
+ const kb = kbs.get(id);
725
+ if (!kb) {
726
+ throw new Error(`Knowledge base "${id}" not found in registry`);
727
+ }
728
+ return kb;
729
+ }
730
+ registerInputResolver("knowledge-base", resolveKnowledgeBaseFromRegistry);
731
+ // src/knowledge-base/createKnowledgeBase.ts
732
+ import { InMemoryTabularStorage as InMemoryTabularStorage2, InMemoryVectorStorage } from "@workglow/storage";
733
+
734
+ // src/document/DocumentStorageSchema.ts
735
+ var DocumentStorageSchema = {
736
+ type: "object",
737
+ properties: {
738
+ doc_id: {
739
+ type: "string",
740
+ "x-auto-generated": true,
741
+ title: "Document ID",
742
+ description: "Unique identifier for the document"
743
+ },
744
+ data: {
745
+ type: "string",
746
+ title: "Document Data",
747
+ description: "JSON-serialized document"
748
+ },
749
+ metadata: {
750
+ type: "object",
751
+ title: "Metadata",
752
+ description: "Metadata of the document"
753
+ }
754
+ },
755
+ required: ["doc_id", "data"],
756
+ additionalProperties: true
757
+ };
758
+ var DocumentStorageKey = ["doc_id"];
759
+
760
+ // src/knowledge-base/createKnowledgeBase.ts
761
+ async function createKnowledgeBase(options) {
762
+ const {
763
+ name,
764
+ vectorDimensions,
765
+ vectorType = Float32Array,
766
+ register: shouldRegister = true,
767
+ title,
768
+ description
769
+ } = options;
770
+ const tabularStorage = new InMemoryTabularStorage2(DocumentStorageSchema, DocumentStorageKey);
771
+ await tabularStorage.setupDatabase();
772
+ const vectorStorage = new InMemoryVectorStorage(ChunkVectorStorageSchema, ChunkVectorPrimaryKey, [], vectorDimensions, vectorType);
773
+ await vectorStorage.setupDatabase();
774
+ const kb = new KnowledgeBase(name, tabularStorage, vectorStorage, title, description);
775
+ if (shouldRegister) {
776
+ await registerKnowledgeBase(name, kb);
777
+ }
778
+ return kb;
779
+ }
780
+ // src/util/DatasetSchema.ts
781
+ function TypeTabularStorage(options = {}) {
782
+ return {
783
+ title: "Tabular Storage",
784
+ description: "Storage ID or instance for tabular data storage",
785
+ ...options,
786
+ format: "storage:tabular",
787
+ oneOf: [
788
+ { type: "string", title: "Storage ID" },
789
+ { title: "Storage Instance", additionalProperties: true }
790
+ ]
791
+ };
792
+ }
793
+ function TypeKnowledgeBase(options = {}) {
794
+ return {
795
+ title: "Knowledge Base",
796
+ description: "Knowledge base ID or instance",
797
+ ...options,
798
+ format: "knowledge-base",
799
+ anyOf: [
800
+ { type: "string", title: "Knowledge Base ID" },
801
+ { title: "Knowledge Base Instance", additionalProperties: true }
802
+ ]
803
+ };
804
+ }
805
+ // src/document/DocumentNode.ts
806
+ function estimateTokens(text) {
807
+ return Math.ceil(text.length / 4);
808
+ }
809
+ function hasChildren(node) {
810
+ return node.kind === NodeKind.DOCUMENT || node.kind === NodeKind.SECTION || node.kind === NodeKind.TOPIC;
811
+ }
812
+ function getChildren(node) {
813
+ if (hasChildren(node)) {
814
+ return node.children;
815
+ }
816
+ return [];
817
+ }
818
+ function* traverseDepthFirst(node) {
819
+ yield node;
820
+ if (hasChildren(node)) {
821
+ for (const child of node.children) {
822
+ yield* traverseDepthFirst(child);
823
+ }
824
+ }
825
+ }
826
+ function getNodePath(root, targetNodeId) {
827
+ const path = [];
828
+ function search(node) {
829
+ path.push(node.nodeId);
830
+ if (node.nodeId === targetNodeId) {
831
+ return true;
832
+ }
833
+ if (hasChildren(node)) {
834
+ for (const child of node.children) {
835
+ if (search(child)) {
836
+ return true;
837
+ }
838
+ }
839
+ }
840
+ path.pop();
841
+ return false;
842
+ }
843
+ return search(root) ? path : undefined;
844
+ }
845
+ function getDocumentRange(root, nodePath) {
846
+ let currentNode = root;
847
+ for (let i = 1;i < nodePath.length; i++) {
848
+ const targetId = nodePath[i];
849
+ const children = currentNode.children;
850
+ let found;
851
+ for (let j = 0;j < children.length; j++) {
852
+ if (children[j].nodeId === targetId) {
853
+ found = children[j];
854
+ break;
855
+ }
856
+ }
857
+ if (!found) {
858
+ throw new Error(`Node with id ${targetId} not found in path`);
859
+ }
860
+ currentNode = found;
861
+ }
862
+ return currentNode.range;
863
+ }
864
+ // src/document/StructuralParser.ts
865
+ import { uuid4 } from "@workglow/util";
866
+ class StructuralParser {
867
+ static async parseMarkdown(doc_id, text, title) {
868
+ const lines = text.split(`
869
+ `);
870
+ let currentOffset = 0;
871
+ const root = {
872
+ nodeId: uuid4(),
873
+ kind: NodeKind.DOCUMENT,
874
+ range: { startOffset: 0, endOffset: text.length },
875
+ text: title,
876
+ title,
877
+ children: []
878
+ };
879
+ let currentParentStack = [root];
880
+ let textBuffer = [];
881
+ let textBufferStartOffset = 0;
882
+ const flushTextBuffer = async () => {
883
+ if (textBuffer.length > 0) {
884
+ const content = textBuffer.join(`
885
+ `).trim();
886
+ if (content) {
887
+ const paragraphStartOffset = textBufferStartOffset;
888
+ const paragraphEndOffset = currentOffset;
889
+ const paragraph = {
890
+ nodeId: uuid4(),
891
+ kind: NodeKind.PARAGRAPH,
892
+ range: {
893
+ startOffset: paragraphStartOffset,
894
+ endOffset: paragraphEndOffset
895
+ },
896
+ text: content
897
+ };
898
+ currentParentStack[currentParentStack.length - 1].children.push(paragraph);
899
+ }
900
+ textBuffer = [];
901
+ }
902
+ };
903
+ for (const line of lines) {
904
+ const lineLength = line.length + 1;
905
+ const headerMatch = line.match(/^(#{1,6})\s+(.*)$/);
906
+ if (headerMatch) {
907
+ await flushTextBuffer();
908
+ const level = headerMatch[1].length;
909
+ const headerTitle = headerMatch[2];
910
+ while (currentParentStack.length > 1 && currentParentStack[currentParentStack.length - 1].kind === NodeKind.SECTION && currentParentStack[currentParentStack.length - 1].level >= level) {
911
+ const poppedSection = currentParentStack.pop();
912
+ const updatedSection = {
913
+ ...poppedSection,
914
+ range: {
915
+ ...poppedSection.range,
916
+ endOffset: currentOffset
917
+ }
918
+ };
919
+ const parent = currentParentStack[currentParentStack.length - 1];
920
+ parent.children[parent.children.length - 1] = updatedSection;
921
+ }
922
+ const sectionStartOffset = currentOffset;
923
+ const section = {
924
+ nodeId: uuid4(),
925
+ kind: NodeKind.SECTION,
926
+ level,
927
+ title: headerTitle,
928
+ range: {
929
+ startOffset: sectionStartOffset,
930
+ endOffset: text.length
931
+ },
932
+ text: headerTitle,
933
+ children: []
934
+ };
935
+ currentParentStack[currentParentStack.length - 1].children.push(section);
936
+ currentParentStack.push(section);
937
+ } else {
938
+ if (textBuffer.length === 0) {
939
+ textBufferStartOffset = currentOffset;
940
+ }
941
+ textBuffer.push(line);
942
+ }
943
+ currentOffset += lineLength;
944
+ }
945
+ await flushTextBuffer();
946
+ while (currentParentStack.length > 1) {
947
+ const section = currentParentStack.pop();
948
+ const updatedSection = {
949
+ ...section,
950
+ range: {
951
+ ...section.range,
952
+ endOffset: text.length
953
+ }
954
+ };
955
+ const parent = currentParentStack[currentParentStack.length - 1];
956
+ parent.children[parent.children.length - 1] = updatedSection;
957
+ }
958
+ return root;
959
+ }
960
+ static async parsePlainText(doc_id, text, title) {
961
+ const root = {
962
+ nodeId: uuid4(),
963
+ kind: NodeKind.DOCUMENT,
964
+ range: { startOffset: 0, endOffset: text.length },
965
+ text: title,
966
+ title,
967
+ children: []
968
+ };
969
+ const paragraphRegex = /\n\s*\n/g;
970
+ let lastIndex = 0;
971
+ let paragraphIndex = 0;
972
+ let match;
973
+ while ((match = paragraphRegex.exec(text)) !== null) {
974
+ const rawParagraph = text.slice(lastIndex, match.index);
975
+ const paragraphText = rawParagraph.trim();
976
+ if (paragraphText.length > 0) {
977
+ const trimmedRelativeStart = rawParagraph.indexOf(paragraphText);
978
+ const startOffset = lastIndex + trimmedRelativeStart;
979
+ const endOffset = startOffset + paragraphText.length;
980
+ const paragraph = {
981
+ nodeId: uuid4(),
982
+ kind: NodeKind.PARAGRAPH,
983
+ range: {
984
+ startOffset,
985
+ endOffset
986
+ },
987
+ text: paragraphText
988
+ };
989
+ root.children.push(paragraph);
990
+ paragraphIndex++;
991
+ }
992
+ lastIndex = paragraphRegex.lastIndex;
993
+ }
994
+ if (lastIndex < text.length) {
995
+ const rawParagraph = text.slice(lastIndex);
996
+ const paragraphText = rawParagraph.trim();
997
+ if (paragraphText.length > 0) {
998
+ const trimmedRelativeStart = rawParagraph.indexOf(paragraphText);
999
+ const startOffset = lastIndex + trimmedRelativeStart;
1000
+ const endOffset = startOffset + paragraphText.length;
1001
+ const paragraph = {
1002
+ nodeId: uuid4(),
1003
+ kind: NodeKind.PARAGRAPH,
1004
+ range: {
1005
+ startOffset,
1006
+ endOffset
1007
+ },
1008
+ text: paragraphText
1009
+ };
1010
+ root.children.push(paragraph);
1011
+ }
1012
+ }
1013
+ return root;
1014
+ }
1015
+ static parse(doc_id, text, title, format) {
1016
+ if (format === "markdown" || !format && this.looksLikeMarkdown(text)) {
1017
+ return this.parseMarkdown(doc_id, text, title);
1018
+ }
1019
+ return this.parsePlainText(doc_id, text, title);
1020
+ }
1021
+ static looksLikeMarkdown(text) {
1022
+ return /^#{1,6}\s/m.test(text);
1023
+ }
1024
+ }
1025
+ export {
1026
+ traverseDepthFirst,
1027
+ setGlobalKnowledgeBaseRepository,
1028
+ registerKnowledgeBase,
1029
+ knowledgeBaseTableNames,
1030
+ hasChildren,
1031
+ getNodePath,
1032
+ getKnowledgeBase,
1033
+ getGlobalKnowledgeBases,
1034
+ getGlobalKnowledgeBaseRepository,
1035
+ getDocumentRange,
1036
+ getChildren,
1037
+ estimateTokens,
1038
+ createKnowledgeBase,
1039
+ TypeTabularStorage,
1040
+ TypeKnowledgeBase,
1041
+ TopicNodeSchema,
1042
+ TokenBudgetSchema,
1043
+ StructuralParser,
1044
+ SentenceNodeSchema,
1045
+ SectionNodeSchema,
1046
+ ParagraphNodeSchema,
1047
+ NodeRangeSchema,
1048
+ NodeKind,
1049
+ NodeEnrichmentSchema,
1050
+ KnowledgeBaseRepository,
1051
+ KnowledgeBaseRecordSchema,
1052
+ KnowledgeBasePrimaryKeyNames,
1053
+ KnowledgeBase,
1054
+ KNOWLEDGE_BASE_REPOSITORY,
1055
+ KNOWLEDGE_BASES,
1056
+ InMemoryKnowledgeBaseRepository,
1057
+ EntitySchema,
1058
+ DocumentStorageSchema,
1059
+ DocumentStorageKey,
1060
+ DocumentRootNodeSchema,
1061
+ DocumentNodeSchema,
1062
+ DocumentNodeBaseSchema,
1063
+ DocumentMetadataSchema,
1064
+ Document,
1065
+ ChunkVectorStorageSchema,
1066
+ ChunkVectorPrimaryKey,
1067
+ ChunkRecordSchema,
1068
+ ChunkRecordArraySchema
1069
+ };
1070
+
1071
+ //# debugId=5A4C1CAE3E58975C64756E2164756E21