@workglow/knowledge-base 0.0.115

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +670 -0
  3. package/dist/browser.js +1071 -0
  4. package/dist/browser.js.map +23 -0
  5. package/dist/bun.js +1072 -0
  6. package/dist/bun.js.map +23 -0
  7. package/dist/chunk/ChunkSchema.d.ts +206 -0
  8. package/dist/chunk/ChunkSchema.d.ts.map +1 -0
  9. package/dist/chunk/ChunkVectorStorageSchema.d.ts +64 -0
  10. package/dist/chunk/ChunkVectorStorageSchema.d.ts.map +1 -0
  11. package/dist/common-server.d.ts +7 -0
  12. package/dist/common-server.d.ts.map +1 -0
  13. package/dist/common.d.ts +20 -0
  14. package/dist/common.d.ts.map +1 -0
  15. package/dist/document/Document.d.ts +51 -0
  16. package/dist/document/Document.d.ts.map +1 -0
  17. package/dist/document/DocumentNode.d.ts +32 -0
  18. package/dist/document/DocumentNode.d.ts.map +1 -0
  19. package/dist/document/DocumentSchema.d.ts +1203 -0
  20. package/dist/document/DocumentSchema.d.ts.map +1 -0
  21. package/dist/document/DocumentStorageSchema.d.ts +43 -0
  22. package/dist/document/DocumentStorageSchema.d.ts.map +1 -0
  23. package/dist/document/StructuralParser.d.ts +30 -0
  24. package/dist/document/StructuralParser.d.ts.map +1 -0
  25. package/dist/knowledge-base/InMemoryKnowledgeBaseRepository.d.ts +13 -0
  26. package/dist/knowledge-base/InMemoryKnowledgeBaseRepository.d.ts.map +1 -0
  27. package/dist/knowledge-base/KnowledgeBase.d.ts +123 -0
  28. package/dist/knowledge-base/KnowledgeBase.d.ts.map +1 -0
  29. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts +38 -0
  30. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts.map +1 -0
  31. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts +74 -0
  32. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts.map +1 -0
  33. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts +50 -0
  34. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts.map +1 -0
  35. package/dist/knowledge-base/createKnowledgeBase.d.ts +30 -0
  36. package/dist/knowledge-base/createKnowledgeBase.d.ts.map +1 -0
  37. package/dist/node.js +1071 -0
  38. package/dist/node.js.map +23 -0
  39. package/dist/types.d.ts +7 -0
  40. package/dist/types.d.ts.map +1 -0
  41. package/dist/util/DatasetSchema.d.ts +40 -0
  42. package/dist/util/DatasetSchema.d.ts.map +1 -0
  43. package/package.json +55 -0
package/dist/bun.js ADDED
@@ -0,0 +1,1072 @@
1
+ // @bun
2
+ // src/document/DocumentSchema.ts
3
+ var NodeKind = {
4
+ DOCUMENT: "document",
5
+ SECTION: "section",
6
+ PARAGRAPH: "paragraph",
7
+ SENTENCE: "sentence",
8
+ TOPIC: "topic"
9
+ };
10
+ var NodeRangeSchema = {
11
+ type: "object",
12
+ properties: {
13
+ startOffset: {
14
+ type: "integer",
15
+ title: "Start Offset",
16
+ description: "Starting character offset"
17
+ },
18
+ endOffset: {
19
+ type: "integer",
20
+ title: "End Offset",
21
+ description: "Ending character offset"
22
+ }
23
+ },
24
+ required: ["startOffset", "endOffset"],
25
+ additionalProperties: false
26
+ };
27
+ var EntitySchema = {
28
+ type: "object",
29
+ properties: {
30
+ text: {
31
+ type: "string",
32
+ title: "Text",
33
+ description: "Entity text"
34
+ },
35
+ type: {
36
+ type: "string",
37
+ title: "Type",
38
+ description: "Entity type (e.g., PERSON, ORG, LOC)"
39
+ },
40
+ score: {
41
+ type: "number",
42
+ title: "Score",
43
+ description: "Confidence score"
44
+ }
45
+ },
46
+ required: ["text", "type", "score"],
47
+ additionalProperties: false
48
+ };
49
+ var NodeEnrichmentSchema = {
50
+ type: "object",
51
+ properties: {
52
+ summary: {
53
+ type: "string",
54
+ title: "Summary",
55
+ description: "Summary of the node content"
56
+ },
57
+ entities: {
58
+ type: "array",
59
+ items: EntitySchema,
60
+ title: "Entities",
61
+ description: "Named entities extracted from the node"
62
+ },
63
+ keywords: {
64
+ type: "array",
65
+ items: { type: "string" },
66
+ title: "Keywords",
67
+ description: "Keywords associated with the node"
68
+ }
69
+ },
70
+ additionalProperties: false
71
+ };
72
+ var DocumentNodeBaseSchema = {
73
+ type: "object",
74
+ properties: {
75
+ nodeId: {
76
+ type: "string",
77
+ title: "Node ID",
78
+ description: "Unique identifier for this node"
79
+ },
80
+ kind: {
81
+ type: "string",
82
+ enum: Object.values(NodeKind),
83
+ title: "Kind",
84
+ description: "Node type discriminator"
85
+ },
86
+ range: NodeRangeSchema,
87
+ text: {
88
+ type: "string",
89
+ title: "Text",
90
+ description: "Text content of the node"
91
+ },
92
+ enrichment: NodeEnrichmentSchema
93
+ },
94
+ required: ["nodeId", "kind", "range", "text"],
95
+ additionalProperties: true
96
+ };
97
+ var DocumentNodeSchema = {
98
+ type: "object",
99
+ title: "Document Node",
100
+ description: "A node in the hierarchical document tree",
101
+ properties: {
102
+ ...DocumentNodeBaseSchema.properties,
103
+ level: {
104
+ type: "integer",
105
+ title: "Level",
106
+ description: "Header level for section nodes"
107
+ },
108
+ title: {
109
+ type: "string",
110
+ title: "Title",
111
+ description: "Section title"
112
+ },
113
+ children: {
114
+ type: "array",
115
+ title: "Children",
116
+ description: "Child nodes"
117
+ }
118
+ },
119
+ required: [...DocumentNodeBaseSchema.required],
120
+ additionalProperties: false
121
+ };
122
+ var ParagraphNodeSchema = {
123
+ type: "object",
124
+ properties: {
125
+ ...DocumentNodeBaseSchema.properties,
126
+ kind: {
127
+ type: "string",
128
+ const: NodeKind.PARAGRAPH,
129
+ title: "Kind",
130
+ description: "Node type discriminator"
131
+ }
132
+ },
133
+ required: [...DocumentNodeBaseSchema.required],
134
+ additionalProperties: false
135
+ };
136
+ var SentenceNodeSchema = {
137
+ type: "object",
138
+ properties: {
139
+ ...DocumentNodeBaseSchema.properties,
140
+ kind: {
141
+ type: "string",
142
+ const: NodeKind.SENTENCE,
143
+ title: "Kind",
144
+ description: "Node type discriminator"
145
+ }
146
+ },
147
+ required: [...DocumentNodeBaseSchema.required],
148
+ additionalProperties: false
149
+ };
150
+ var SectionNodeSchema = {
151
+ type: "object",
152
+ properties: {
153
+ ...DocumentNodeBaseSchema.properties,
154
+ kind: {
155
+ type: "string",
156
+ const: NodeKind.SECTION,
157
+ title: "Kind",
158
+ description: "Node type discriminator"
159
+ },
160
+ level: {
161
+ type: "integer",
162
+ minimum: 1,
163
+ maximum: 6,
164
+ title: "Level",
165
+ description: "Header level (1-6 for markdown)"
166
+ },
167
+ title: {
168
+ type: "string",
169
+ title: "Title",
170
+ description: "Section title"
171
+ },
172
+ children: {
173
+ type: "array",
174
+ items: DocumentNodeSchema,
175
+ title: "Children",
176
+ description: "Child nodes"
177
+ }
178
+ },
179
+ required: [...DocumentNodeBaseSchema.required, "level", "title", "children"],
180
+ additionalProperties: false
181
+ };
182
+ var TopicNodeSchema = {
183
+ type: "object",
184
+ properties: {
185
+ ...DocumentNodeBaseSchema.properties,
186
+ kind: {
187
+ type: "string",
188
+ const: NodeKind.TOPIC,
189
+ title: "Kind",
190
+ description: "Node type discriminator"
191
+ },
192
+ children: {
193
+ type: "array",
194
+ items: DocumentNodeSchema,
195
+ title: "Children",
196
+ description: "Child nodes"
197
+ }
198
+ },
199
+ required: [...DocumentNodeBaseSchema.required, "children"],
200
+ additionalProperties: false
201
+ };
202
+ var DocumentRootNodeSchema = {
203
+ type: "object",
204
+ properties: {
205
+ ...DocumentNodeBaseSchema.properties,
206
+ kind: {
207
+ type: "string",
208
+ const: NodeKind.DOCUMENT,
209
+ title: "Kind",
210
+ description: "Node type discriminator"
211
+ },
212
+ title: {
213
+ type: "string",
214
+ title: "Title",
215
+ description: "Document title"
216
+ },
217
+ children: {
218
+ type: "array",
219
+ items: DocumentNodeSchema,
220
+ title: "Children",
221
+ description: "Child nodes"
222
+ }
223
+ },
224
+ required: [...DocumentNodeBaseSchema.required, "title", "children"],
225
+ additionalProperties: false
226
+ };
227
+ var TokenBudgetSchema = {
228
+ type: "object",
229
+ properties: {
230
+ maxTokensPerChunk: {
231
+ type: "integer",
232
+ title: "Max Tokens Per Chunk",
233
+ description: "Maximum tokens allowed per chunk"
234
+ },
235
+ overlapTokens: {
236
+ type: "integer",
237
+ title: "Overlap Tokens",
238
+ description: "Number of tokens to overlap between chunks"
239
+ },
240
+ reservedTokens: {
241
+ type: "integer",
242
+ title: "Reserved Tokens",
243
+ description: "Tokens reserved for metadata or context"
244
+ }
245
+ },
246
+ required: ["maxTokensPerChunk", "overlapTokens", "reservedTokens"],
247
+ additionalProperties: false
248
+ };
249
+ var DocumentMetadataSchema = {
250
+ type: "object",
251
+ properties: {
252
+ title: {
253
+ type: "string",
254
+ title: "Title",
255
+ description: "Document title"
256
+ },
257
+ sourceUri: {
258
+ type: "string",
259
+ title: "Source URI",
260
+ description: "Original source URI of the document"
261
+ },
262
+ createdAt: {
263
+ type: "string",
264
+ title: "Created At",
265
+ description: "ISO timestamp of creation"
266
+ }
267
+ },
268
+ required: ["title"],
269
+ additionalProperties: true
270
+ };
271
+
272
+ // src/chunk/ChunkSchema.ts
273
+ var ChunkRecordSchema = () => ({
274
+ type: "object",
275
+ properties: {
276
+ chunkId: {
277
+ type: "string",
278
+ title: "Chunk ID",
279
+ description: "Unique identifier for this chunk"
280
+ },
281
+ doc_id: {
282
+ type: "string",
283
+ title: "Document ID",
284
+ description: "ID of the parent document"
285
+ },
286
+ text: {
287
+ type: "string",
288
+ title: "Text",
289
+ description: "Text content of the chunk"
290
+ },
291
+ nodePath: {
292
+ type: "array",
293
+ items: { type: "string" },
294
+ title: "Node Path",
295
+ description: "Node IDs from root to leaf"
296
+ },
297
+ depth: {
298
+ type: "integer",
299
+ title: "Depth",
300
+ description: "Depth in the document tree"
301
+ },
302
+ leafNodeId: {
303
+ type: "string",
304
+ title: "Leaf Node ID",
305
+ description: "ID of the leaf node this chunk belongs to"
306
+ },
307
+ summary: {
308
+ type: "string",
309
+ title: "Summary",
310
+ description: "Summary of the chunk content"
311
+ },
312
+ entities: {
313
+ type: "array",
314
+ items: EntitySchema,
315
+ title: "Entities",
316
+ description: "Named entities extracted from the chunk"
317
+ },
318
+ parentSummaries: {
319
+ type: "array",
320
+ items: { type: "string" },
321
+ title: "Parent Summaries",
322
+ description: "Summaries from ancestor nodes"
323
+ },
324
+ sectionTitles: {
325
+ type: "array",
326
+ items: { type: "string" },
327
+ title: "Section Titles",
328
+ description: "Titles of ancestor section nodes"
329
+ },
330
+ doc_title: {
331
+ type: "string",
332
+ title: "Document Title",
333
+ description: "Title of the parent document"
334
+ }
335
+ },
336
+ required: ["chunkId", "doc_id", "text", "nodePath", "depth"],
337
+ additionalProperties: true
338
+ });
339
+ var ChunkRecordArraySchema = {
340
+ type: "array",
341
+ items: ChunkRecordSchema(),
342
+ title: "Chunk Records",
343
+ description: "Array of chunk records"
344
+ };
345
+ // src/chunk/ChunkVectorStorageSchema.ts
346
+ import { TypedArraySchema } from "@workglow/util";
347
+ var ChunkVectorStorageSchema = {
348
+ type: "object",
349
+ properties: {
350
+ chunk_id: { type: "string", "x-auto-generated": true },
351
+ doc_id: { type: "string" },
352
+ vector: TypedArraySchema(),
353
+ metadata: { type: "object", format: "metadata", additionalProperties: true }
354
+ },
355
+ required: ["chunk_id", "doc_id", "vector", "metadata"],
356
+ additionalProperties: false
357
+ };
358
+ var ChunkVectorPrimaryKey = ["chunk_id"];
359
+ // src/document/Document.ts
360
+ class Document {
361
+ doc_id;
362
+ metadata;
363
+ root;
364
+ chunks;
365
+ constructor(root, metadata, chunks = [], doc_id) {
366
+ this.doc_id = doc_id;
367
+ this.root = root;
368
+ this.metadata = metadata;
369
+ this.chunks = chunks || [];
370
+ }
371
+ setChunks(chunks) {
372
+ this.chunks = chunks;
373
+ }
374
+ getChunks() {
375
+ return this.chunks;
376
+ }
377
+ setDocId(doc_id) {
378
+ this.doc_id = doc_id;
379
+ }
380
+ findChunksByNodeId(nodeId) {
381
+ return this.chunks.filter((chunk) => chunk.nodePath.includes(nodeId));
382
+ }
383
+ toJSON() {
384
+ return {
385
+ metadata: this.metadata,
386
+ root: this.root,
387
+ chunks: this.chunks
388
+ };
389
+ }
390
+ static fromJSON(json, doc_id) {
391
+ const obj = JSON.parse(json);
392
+ return new Document(obj.root, obj.metadata, obj.chunks, doc_id);
393
+ }
394
+ }
395
+
396
+ // src/knowledge-base/KnowledgeBase.ts
397
+ class KnowledgeBase {
398
+ name;
399
+ title;
400
+ description;
401
+ tabularStorage;
402
+ chunkStorage;
403
+ constructor(name, documentStorage, chunkStorage, title, description) {
404
+ this.name = name;
405
+ this.title = title ?? name;
406
+ this.description = description ?? "";
407
+ this.tabularStorage = documentStorage;
408
+ this.chunkStorage = chunkStorage;
409
+ }
410
+ async upsertDocument(document) {
411
+ const serialized = JSON.stringify(document.toJSON());
412
+ const insertEntity = {
413
+ doc_id: document.doc_id,
414
+ data: serialized
415
+ };
416
+ const entity = await this.tabularStorage.put(insertEntity);
417
+ if (document.doc_id !== entity.doc_id) {
418
+ document.setDocId(entity.doc_id);
419
+ }
420
+ return document;
421
+ }
422
+ async getDocument(doc_id) {
423
+ const entity = await this.tabularStorage.get({ doc_id });
424
+ if (!entity) {
425
+ return;
426
+ }
427
+ return Document.fromJSON(entity.data, entity.doc_id);
428
+ }
429
+ async deleteDocument(doc_id) {
430
+ await this.deleteChunksForDocument(doc_id);
431
+ await this.tabularStorage.delete({ doc_id });
432
+ }
433
+ async listDocuments() {
434
+ const entities = await this.tabularStorage.getAll();
435
+ if (!entities) {
436
+ return [];
437
+ }
438
+ return entities.map((e) => e.doc_id);
439
+ }
440
+ async getNode(doc_id, nodeId) {
441
+ const doc = await this.getDocument(doc_id);
442
+ if (!doc) {
443
+ return;
444
+ }
445
+ const traverse = (node) => {
446
+ if (node.nodeId === nodeId) {
447
+ return node;
448
+ }
449
+ if ("children" in node && Array.isArray(node.children)) {
450
+ for (const child of node.children) {
451
+ const found = traverse(child);
452
+ if (found)
453
+ return found;
454
+ }
455
+ }
456
+ return;
457
+ };
458
+ return traverse(doc.root);
459
+ }
460
+ async getAncestors(doc_id, nodeId) {
461
+ const doc = await this.getDocument(doc_id);
462
+ if (!doc) {
463
+ return [];
464
+ }
465
+ const path = [];
466
+ const findPath = (node) => {
467
+ path.push(node.nodeId);
468
+ if (node.nodeId === nodeId) {
469
+ return true;
470
+ }
471
+ if ("children" in node && Array.isArray(node.children)) {
472
+ for (const child of node.children) {
473
+ if (findPath(child)) {
474
+ return true;
475
+ }
476
+ }
477
+ }
478
+ path.pop();
479
+ return false;
480
+ };
481
+ if (!findPath(doc.root)) {
482
+ return [];
483
+ }
484
+ const ancestors = [];
485
+ let currentNode = doc.root;
486
+ ancestors.push(currentNode);
487
+ for (let i = 1;i < path.length; i++) {
488
+ const targetId = path[i];
489
+ if ("children" in currentNode && Array.isArray(currentNode.children)) {
490
+ const found = currentNode.children.find((child) => child.nodeId === targetId);
491
+ if (found) {
492
+ currentNode = found;
493
+ ancestors.push(currentNode);
494
+ } else {
495
+ break;
496
+ }
497
+ } else {
498
+ break;
499
+ }
500
+ }
501
+ return ancestors;
502
+ }
503
+ async upsertChunk(chunk) {
504
+ if (chunk.vector.length !== this.getVectorDimensions()) {
505
+ throw new Error(`Vector dimension mismatch: expected ${this.getVectorDimensions()}, got ${chunk.vector.length}.`);
506
+ }
507
+ return this.chunkStorage.put(chunk);
508
+ }
509
+ async upsertChunksBulk(chunks) {
510
+ const expected = this.getVectorDimensions();
511
+ for (const chunk of chunks) {
512
+ if (chunk.vector.length !== expected) {
513
+ throw new Error(`Vector dimension mismatch: expected ${expected}, got ${chunk.vector.length}.`);
514
+ }
515
+ }
516
+ return this.chunkStorage.putBulk(chunks);
517
+ }
518
+ async deleteChunksForDocument(doc_id) {
519
+ await this.chunkStorage.deleteSearch({ doc_id });
520
+ }
521
+ async getChunksForDocument(doc_id) {
522
+ const results = await this.chunkStorage.query({ doc_id });
523
+ return results ?? [];
524
+ }
525
+ async similaritySearch(query, options) {
526
+ return this.chunkStorage.similaritySearch(query, options);
527
+ }
528
+ async hybridSearch(query, options) {
529
+ if (typeof this.chunkStorage.hybridSearch !== "function") {
530
+ throw new Error("Hybrid search is not supported by the configured chunk storage backend. " + "Please use a vector storage implementation that provides `hybridSearch`.");
531
+ }
532
+ return this.chunkStorage.hybridSearch(query, options);
533
+ }
534
+ async prepareReindex(doc_id) {
535
+ const doc = await this.getDocument(doc_id);
536
+ if (!doc) {
537
+ return;
538
+ }
539
+ await this.deleteChunksForDocument(doc_id);
540
+ return doc;
541
+ }
542
+ async setupDatabase() {
543
+ await this.tabularStorage.setupDatabase();
544
+ await this.chunkStorage.setupDatabase();
545
+ }
546
+ destroy() {
547
+ this.tabularStorage.destroy();
548
+ this.chunkStorage.destroy();
549
+ }
550
+ async getChunk(chunk_id) {
551
+ return this.chunkStorage.get({ chunk_id });
552
+ }
553
+ async put(chunk) {
554
+ return this.chunkStorage.put(chunk);
555
+ }
556
+ async putBulk(chunks) {
557
+ return this.chunkStorage.putBulk(chunks);
558
+ }
559
+ async getAllChunks() {
560
+ return this.chunkStorage.getAll();
561
+ }
562
+ async chunkCount() {
563
+ return this.chunkStorage.size();
564
+ }
565
+ async clearChunks() {
566
+ return this.chunkStorage.deleteAll();
567
+ }
568
+ getVectorDimensions() {
569
+ return this.chunkStorage.getVectorDimensions();
570
+ }
571
+ async getDocumentChunks(doc_id) {
572
+ const doc = await this.getDocument(doc_id);
573
+ if (!doc) {
574
+ return [];
575
+ }
576
+ return doc.getChunks();
577
+ }
578
+ async findChunksByNodeId(doc_id, nodeId) {
579
+ const doc = await this.getDocument(doc_id);
580
+ if (!doc) {
581
+ return [];
582
+ }
583
+ return doc.findChunksByNodeId(nodeId);
584
+ }
585
+ }
586
+ // src/knowledge-base/KnowledgeBaseSchema.ts
587
+ var KnowledgeBaseRecordSchema = {
588
+ type: "object",
589
+ properties: {
590
+ kb_id: { type: "string" },
591
+ title: { type: "string" },
592
+ description: { type: "string" },
593
+ vector_dimensions: { type: "integer" },
594
+ document_table: { type: "string" },
595
+ chunk_table: { type: "string" },
596
+ created_at: { type: "string" },
597
+ updated_at: { type: "string" }
598
+ },
599
+ required: [
600
+ "kb_id",
601
+ "title",
602
+ "description",
603
+ "vector_dimensions",
604
+ "document_table",
605
+ "chunk_table",
606
+ "created_at",
607
+ "updated_at"
608
+ ],
609
+ additionalProperties: false
610
+ };
611
+ var KnowledgeBasePrimaryKeyNames = ["kb_id"];
612
+ function knowledgeBaseTableNames(kbId) {
613
+ const safe = kbId.replace(/[^a-zA-Z0-9_]/g, "_");
614
+ return {
615
+ documentTable: `kb_docs_${safe}`,
616
+ chunkTable: `kb_chunks_${safe}`
617
+ };
618
+ }
619
+ // src/knowledge-base/KnowledgeBaseRepository.ts
620
+ import { EventEmitter } from "@workglow/util";
621
+
622
+ class KnowledgeBaseRepository {
623
+ storage;
624
+ constructor(storage) {
625
+ this.storage = storage;
626
+ }
627
+ events = new EventEmitter;
628
+ async setupDatabase() {
629
+ await this.storage.setupDatabase?.();
630
+ }
631
+ on(name, fn) {
632
+ this.events.on(name, fn);
633
+ }
634
+ off(name, fn) {
635
+ this.events.off(name, fn);
636
+ }
637
+ once(name, fn) {
638
+ this.events.once(name, fn);
639
+ }
640
+ waitOn(name) {
641
+ return this.events.waitOn(name);
642
+ }
643
+ async addKnowledgeBase(record) {
644
+ await this.storage.put(record);
645
+ this.events.emit("knowledge_base_added", record);
646
+ return record;
647
+ }
648
+ async removeKnowledgeBase(kb_id) {
649
+ const record = await this.storage.get({ kb_id });
650
+ if (!record) {
651
+ throw new Error(`KnowledgeBase with id "${kb_id}" not found`);
652
+ }
653
+ await this.storage.delete({ kb_id });
654
+ this.events.emit("knowledge_base_removed", record);
655
+ }
656
+ async getKnowledgeBase(kb_id) {
657
+ if (typeof kb_id !== "string")
658
+ return;
659
+ const record = await this.storage.get({ kb_id });
660
+ return record ?? undefined;
661
+ }
662
+ async enumerateAll() {
663
+ const records = await this.storage.getAll();
664
+ if (!records || records.length === 0)
665
+ return [];
666
+ return records;
667
+ }
668
+ async size() {
669
+ return await this.storage.size();
670
+ }
671
+ }
672
+ // src/knowledge-base/InMemoryKnowledgeBaseRepository.ts
673
+ import { InMemoryTabularStorage } from "@workglow/storage";
674
+ class InMemoryKnowledgeBaseRepository extends KnowledgeBaseRepository {
675
+ constructor() {
676
+ super(new InMemoryTabularStorage(KnowledgeBaseRecordSchema, KnowledgeBasePrimaryKeyNames));
677
+ }
678
+ }
679
+ // src/knowledge-base/KnowledgeBaseRegistry.ts
680
+ import {
681
+ createServiceToken,
682
+ globalServiceRegistry,
683
+ registerInputResolver
684
+ } from "@workglow/util";
685
+ var KNOWLEDGE_BASES = createServiceToken("knowledge-base.registry");
686
+ var KNOWLEDGE_BASE_REPOSITORY = createServiceToken("knowledge-base.repository");
687
+ if (!globalServiceRegistry.has(KNOWLEDGE_BASES)) {
688
+ globalServiceRegistry.register(KNOWLEDGE_BASES, () => new Map, true);
689
+ }
690
+ if (!globalServiceRegistry.has(KNOWLEDGE_BASE_REPOSITORY)) {
691
+ globalServiceRegistry.register(KNOWLEDGE_BASE_REPOSITORY, () => new InMemoryKnowledgeBaseRepository, true);
692
+ }
693
+ function getGlobalKnowledgeBases() {
694
+ return globalServiceRegistry.get(KNOWLEDGE_BASES);
695
+ }
696
+ function getGlobalKnowledgeBaseRepository() {
697
+ return globalServiceRegistry.get(KNOWLEDGE_BASE_REPOSITORY);
698
+ }
699
+ function setGlobalKnowledgeBaseRepository(repository) {
700
+ globalServiceRegistry.registerInstance(KNOWLEDGE_BASE_REPOSITORY, repository);
701
+ }
702
+ async function registerKnowledgeBase(id, kb) {
703
+ const kbs = getGlobalKnowledgeBases();
704
+ kbs.set(id, kb);
705
+ const now = new Date().toISOString();
706
+ const tableNames = knowledgeBaseTableNames(id);
707
+ const record = {
708
+ kb_id: id,
709
+ title: kb.title,
710
+ description: kb.description,
711
+ vector_dimensions: kb.getVectorDimensions(),
712
+ document_table: tableNames.documentTable,
713
+ chunk_table: tableNames.chunkTable,
714
+ created_at: now,
715
+ updated_at: now
716
+ };
717
+ const repo = getGlobalKnowledgeBaseRepository();
718
+ await repo.addKnowledgeBase(record);
719
+ }
720
+ function getKnowledgeBase(id) {
721
+ return getGlobalKnowledgeBases().get(id);
722
+ }
723
+ async function resolveKnowledgeBaseFromRegistry(id, format, registry) {
724
+ const kbs = registry.has(KNOWLEDGE_BASES) ? registry.get(KNOWLEDGE_BASES) : getGlobalKnowledgeBases();
725
+ const kb = kbs.get(id);
726
+ if (!kb) {
727
+ throw new Error(`Knowledge base "${id}" not found in registry`);
728
+ }
729
+ return kb;
730
+ }
731
+ registerInputResolver("knowledge-base", resolveKnowledgeBaseFromRegistry);
732
+ // src/knowledge-base/createKnowledgeBase.ts
733
+ import { InMemoryTabularStorage as InMemoryTabularStorage2, InMemoryVectorStorage } from "@workglow/storage";
734
+
735
+ // src/document/DocumentStorageSchema.ts
736
+ var DocumentStorageSchema = {
737
+ type: "object",
738
+ properties: {
739
+ doc_id: {
740
+ type: "string",
741
+ "x-auto-generated": true,
742
+ title: "Document ID",
743
+ description: "Unique identifier for the document"
744
+ },
745
+ data: {
746
+ type: "string",
747
+ title: "Document Data",
748
+ description: "JSON-serialized document"
749
+ },
750
+ metadata: {
751
+ type: "object",
752
+ title: "Metadata",
753
+ description: "Metadata of the document"
754
+ }
755
+ },
756
+ required: ["doc_id", "data"],
757
+ additionalProperties: true
758
+ };
759
+ var DocumentStorageKey = ["doc_id"];
760
+
761
+ // src/knowledge-base/createKnowledgeBase.ts
762
+ async function createKnowledgeBase(options) {
763
+ const {
764
+ name,
765
+ vectorDimensions,
766
+ vectorType = Float32Array,
767
+ register: shouldRegister = true,
768
+ title,
769
+ description
770
+ } = options;
771
+ const tabularStorage = new InMemoryTabularStorage2(DocumentStorageSchema, DocumentStorageKey);
772
+ await tabularStorage.setupDatabase();
773
+ const vectorStorage = new InMemoryVectorStorage(ChunkVectorStorageSchema, ChunkVectorPrimaryKey, [], vectorDimensions, vectorType);
774
+ await vectorStorage.setupDatabase();
775
+ const kb = new KnowledgeBase(name, tabularStorage, vectorStorage, title, description);
776
+ if (shouldRegister) {
777
+ await registerKnowledgeBase(name, kb);
778
+ }
779
+ return kb;
780
+ }
781
+ // src/util/DatasetSchema.ts
782
+ function TypeTabularStorage(options = {}) {
783
+ return {
784
+ title: "Tabular Storage",
785
+ description: "Storage ID or instance for tabular data storage",
786
+ ...options,
787
+ format: "storage:tabular",
788
+ oneOf: [
789
+ { type: "string", title: "Storage ID" },
790
+ { title: "Storage Instance", additionalProperties: true }
791
+ ]
792
+ };
793
+ }
794
+ function TypeKnowledgeBase(options = {}) {
795
+ return {
796
+ title: "Knowledge Base",
797
+ description: "Knowledge base ID or instance",
798
+ ...options,
799
+ format: "knowledge-base",
800
+ anyOf: [
801
+ { type: "string", title: "Knowledge Base ID" },
802
+ { title: "Knowledge Base Instance", additionalProperties: true }
803
+ ]
804
+ };
805
+ }
806
+ // src/document/DocumentNode.ts
807
+ function estimateTokens(text) {
808
+ return Math.ceil(text.length / 4);
809
+ }
810
+ function hasChildren(node) {
811
+ return node.kind === NodeKind.DOCUMENT || node.kind === NodeKind.SECTION || node.kind === NodeKind.TOPIC;
812
+ }
813
+ function getChildren(node) {
814
+ if (hasChildren(node)) {
815
+ return node.children;
816
+ }
817
+ return [];
818
+ }
819
+ function* traverseDepthFirst(node) {
820
+ yield node;
821
+ if (hasChildren(node)) {
822
+ for (const child of node.children) {
823
+ yield* traverseDepthFirst(child);
824
+ }
825
+ }
826
+ }
827
+ function getNodePath(root, targetNodeId) {
828
+ const path = [];
829
+ function search(node) {
830
+ path.push(node.nodeId);
831
+ if (node.nodeId === targetNodeId) {
832
+ return true;
833
+ }
834
+ if (hasChildren(node)) {
835
+ for (const child of node.children) {
836
+ if (search(child)) {
837
+ return true;
838
+ }
839
+ }
840
+ }
841
+ path.pop();
842
+ return false;
843
+ }
844
+ return search(root) ? path : undefined;
845
+ }
846
+ function getDocumentRange(root, nodePath) {
847
+ let currentNode = root;
848
+ for (let i = 1;i < nodePath.length; i++) {
849
+ const targetId = nodePath[i];
850
+ const children = currentNode.children;
851
+ let found;
852
+ for (let j = 0;j < children.length; j++) {
853
+ if (children[j].nodeId === targetId) {
854
+ found = children[j];
855
+ break;
856
+ }
857
+ }
858
+ if (!found) {
859
+ throw new Error(`Node with id ${targetId} not found in path`);
860
+ }
861
+ currentNode = found;
862
+ }
863
+ return currentNode.range;
864
+ }
865
+ // src/document/StructuralParser.ts
866
+ import { uuid4 } from "@workglow/util";
867
+ class StructuralParser {
868
+ static async parseMarkdown(doc_id, text, title) {
869
+ const lines = text.split(`
870
+ `);
871
+ let currentOffset = 0;
872
+ const root = {
873
+ nodeId: uuid4(),
874
+ kind: NodeKind.DOCUMENT,
875
+ range: { startOffset: 0, endOffset: text.length },
876
+ text: title,
877
+ title,
878
+ children: []
879
+ };
880
+ let currentParentStack = [root];
881
+ let textBuffer = [];
882
+ let textBufferStartOffset = 0;
883
+ const flushTextBuffer = async () => {
884
+ if (textBuffer.length > 0) {
885
+ const content = textBuffer.join(`
886
+ `).trim();
887
+ if (content) {
888
+ const paragraphStartOffset = textBufferStartOffset;
889
+ const paragraphEndOffset = currentOffset;
890
+ const paragraph = {
891
+ nodeId: uuid4(),
892
+ kind: NodeKind.PARAGRAPH,
893
+ range: {
894
+ startOffset: paragraphStartOffset,
895
+ endOffset: paragraphEndOffset
896
+ },
897
+ text: content
898
+ };
899
+ currentParentStack[currentParentStack.length - 1].children.push(paragraph);
900
+ }
901
+ textBuffer = [];
902
+ }
903
+ };
904
+ for (const line of lines) {
905
+ const lineLength = line.length + 1;
906
+ const headerMatch = line.match(/^(#{1,6})\s+(.*)$/);
907
+ if (headerMatch) {
908
+ await flushTextBuffer();
909
+ const level = headerMatch[1].length;
910
+ const headerTitle = headerMatch[2];
911
+ while (currentParentStack.length > 1 && currentParentStack[currentParentStack.length - 1].kind === NodeKind.SECTION && currentParentStack[currentParentStack.length - 1].level >= level) {
912
+ const poppedSection = currentParentStack.pop();
913
+ const updatedSection = {
914
+ ...poppedSection,
915
+ range: {
916
+ ...poppedSection.range,
917
+ endOffset: currentOffset
918
+ }
919
+ };
920
+ const parent = currentParentStack[currentParentStack.length - 1];
921
+ parent.children[parent.children.length - 1] = updatedSection;
922
+ }
923
+ const sectionStartOffset = currentOffset;
924
+ const section = {
925
+ nodeId: uuid4(),
926
+ kind: NodeKind.SECTION,
927
+ level,
928
+ title: headerTitle,
929
+ range: {
930
+ startOffset: sectionStartOffset,
931
+ endOffset: text.length
932
+ },
933
+ text: headerTitle,
934
+ children: []
935
+ };
936
+ currentParentStack[currentParentStack.length - 1].children.push(section);
937
+ currentParentStack.push(section);
938
+ } else {
939
+ if (textBuffer.length === 0) {
940
+ textBufferStartOffset = currentOffset;
941
+ }
942
+ textBuffer.push(line);
943
+ }
944
+ currentOffset += lineLength;
945
+ }
946
+ await flushTextBuffer();
947
+ while (currentParentStack.length > 1) {
948
+ const section = currentParentStack.pop();
949
+ const updatedSection = {
950
+ ...section,
951
+ range: {
952
+ ...section.range,
953
+ endOffset: text.length
954
+ }
955
+ };
956
+ const parent = currentParentStack[currentParentStack.length - 1];
957
+ parent.children[parent.children.length - 1] = updatedSection;
958
+ }
959
+ return root;
960
+ }
961
+ static async parsePlainText(doc_id, text, title) {
962
+ const root = {
963
+ nodeId: uuid4(),
964
+ kind: NodeKind.DOCUMENT,
965
+ range: { startOffset: 0, endOffset: text.length },
966
+ text: title,
967
+ title,
968
+ children: []
969
+ };
970
+ const paragraphRegex = /\n\s*\n/g;
971
+ let lastIndex = 0;
972
+ let paragraphIndex = 0;
973
+ let match;
974
+ while ((match = paragraphRegex.exec(text)) !== null) {
975
+ const rawParagraph = text.slice(lastIndex, match.index);
976
+ const paragraphText = rawParagraph.trim();
977
+ if (paragraphText.length > 0) {
978
+ const trimmedRelativeStart = rawParagraph.indexOf(paragraphText);
979
+ const startOffset = lastIndex + trimmedRelativeStart;
980
+ const endOffset = startOffset + paragraphText.length;
981
+ const paragraph = {
982
+ nodeId: uuid4(),
983
+ kind: NodeKind.PARAGRAPH,
984
+ range: {
985
+ startOffset,
986
+ endOffset
987
+ },
988
+ text: paragraphText
989
+ };
990
+ root.children.push(paragraph);
991
+ paragraphIndex++;
992
+ }
993
+ lastIndex = paragraphRegex.lastIndex;
994
+ }
995
+ if (lastIndex < text.length) {
996
+ const rawParagraph = text.slice(lastIndex);
997
+ const paragraphText = rawParagraph.trim();
998
+ if (paragraphText.length > 0) {
999
+ const trimmedRelativeStart = rawParagraph.indexOf(paragraphText);
1000
+ const startOffset = lastIndex + trimmedRelativeStart;
1001
+ const endOffset = startOffset + paragraphText.length;
1002
+ const paragraph = {
1003
+ nodeId: uuid4(),
1004
+ kind: NodeKind.PARAGRAPH,
1005
+ range: {
1006
+ startOffset,
1007
+ endOffset
1008
+ },
1009
+ text: paragraphText
1010
+ };
1011
+ root.children.push(paragraph);
1012
+ }
1013
+ }
1014
+ return root;
1015
+ }
1016
+ static parse(doc_id, text, title, format) {
1017
+ if (format === "markdown" || !format && this.looksLikeMarkdown(text)) {
1018
+ return this.parseMarkdown(doc_id, text, title);
1019
+ }
1020
+ return this.parsePlainText(doc_id, text, title);
1021
+ }
1022
+ static looksLikeMarkdown(text) {
1023
+ return /^#{1,6}\s/m.test(text);
1024
+ }
1025
+ }
1026
+ export {
1027
+ traverseDepthFirst,
1028
+ setGlobalKnowledgeBaseRepository,
1029
+ registerKnowledgeBase,
1030
+ knowledgeBaseTableNames,
1031
+ hasChildren,
1032
+ getNodePath,
1033
+ getKnowledgeBase,
1034
+ getGlobalKnowledgeBases,
1035
+ getGlobalKnowledgeBaseRepository,
1036
+ getDocumentRange,
1037
+ getChildren,
1038
+ estimateTokens,
1039
+ createKnowledgeBase,
1040
+ TypeTabularStorage,
1041
+ TypeKnowledgeBase,
1042
+ TopicNodeSchema,
1043
+ TokenBudgetSchema,
1044
+ StructuralParser,
1045
+ SentenceNodeSchema,
1046
+ SectionNodeSchema,
1047
+ ParagraphNodeSchema,
1048
+ NodeRangeSchema,
1049
+ NodeKind,
1050
+ NodeEnrichmentSchema,
1051
+ KnowledgeBaseRepository,
1052
+ KnowledgeBaseRecordSchema,
1053
+ KnowledgeBasePrimaryKeyNames,
1054
+ KnowledgeBase,
1055
+ KNOWLEDGE_BASE_REPOSITORY,
1056
+ KNOWLEDGE_BASES,
1057
+ InMemoryKnowledgeBaseRepository,
1058
+ EntitySchema,
1059
+ DocumentStorageSchema,
1060
+ DocumentStorageKey,
1061
+ DocumentRootNodeSchema,
1062
+ DocumentNodeSchema,
1063
+ DocumentNodeBaseSchema,
1064
+ DocumentMetadataSchema,
1065
+ Document,
1066
+ ChunkVectorStorageSchema,
1067
+ ChunkVectorPrimaryKey,
1068
+ ChunkRecordSchema,
1069
+ ChunkRecordArraySchema
1070
+ };
1071
+
1072
+ //# debugId=DA038CF776AF0B7164756E2164756E21