@workglow/knowledge-base 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +166 -0
  2. package/dist/browser.js +321 -25
  3. package/dist/browser.js.map +15 -12
  4. package/dist/bun.js +321 -25
  5. package/dist/bun.js.map +15 -12
  6. package/dist/chunk/ChunkVectorStorageSchema.d.ts +1 -1
  7. package/dist/chunk/ChunkVectorStorageSchema.d.ts.map +1 -1
  8. package/dist/common.d.ts +3 -0
  9. package/dist/common.d.ts.map +1 -1
  10. package/dist/document/Document.d.ts.map +1 -1
  11. package/dist/document/DocumentNode.d.ts +1 -1
  12. package/dist/document/DocumentNode.d.ts.map +1 -1
  13. package/dist/document/DocumentStorageSchema.d.ts +2 -1
  14. package/dist/document/DocumentStorageSchema.d.ts.map +1 -1
  15. package/dist/document/StructuralParser.d.ts +1 -1
  16. package/dist/document/StructuralParser.d.ts.map +1 -1
  17. package/dist/knowledge-base/KnowledgeBase.d.ts +2 -0
  18. package/dist/knowledge-base/KnowledgeBase.d.ts.map +1 -1
  19. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts +17 -1
  20. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts.map +1 -1
  21. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts +5 -3
  22. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts.map +1 -1
  23. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts +4 -0
  24. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts.map +1 -1
  25. package/dist/knowledge-base/ScopedTabularStorage.d.ts +46 -0
  26. package/dist/knowledge-base/ScopedTabularStorage.d.ts.map +1 -0
  27. package/dist/knowledge-base/ScopedVectorStorage.d.ts +27 -0
  28. package/dist/knowledge-base/ScopedVectorStorage.d.ts.map +1 -0
  29. package/dist/knowledge-base/SharedTableSchemas.d.ts +93 -0
  30. package/dist/knowledge-base/SharedTableSchemas.d.ts.map +1 -0
  31. package/dist/node.js +321 -25
  32. package/dist/node.js.map +15 -12
  33. package/package.json +10 -5
package/dist/node.js CHANGED
@@ -342,9 +342,7 @@ var ChunkRecordArraySchema = {
342
342
  description: "Array of chunk records"
343
343
  };
344
344
  // src/chunk/ChunkVectorStorageSchema.ts
345
- import {
346
- TypedArraySchema
347
- } from "@workglow/util/schema";
345
+ import { TypedArraySchema } from "@workglow/util/schema";
348
346
  var ChunkVectorStorageSchema = {
349
347
  type: "object",
350
348
  properties: {
@@ -563,6 +561,12 @@ class KnowledgeBase {
563
561
  this.tabularStorage.destroy();
564
562
  this.chunkStorage.destroy();
565
563
  }
564
+ async[Symbol.asyncDispose]() {
565
+ this.destroy();
566
+ }
567
+ [Symbol.dispose]() {
568
+ this.destroy();
569
+ }
566
570
  async getChunk(chunk_id) {
567
571
  return this.chunkStorage.get({ chunk_id });
568
572
  }
@@ -599,6 +603,55 @@ class KnowledgeBase {
599
603
  return doc.findChunksByNodeId(nodeId);
600
604
  }
601
605
  }
606
+ // src/knowledge-base/SharedTableSchemas.ts
607
+ import { TypedArraySchema as TypedArraySchema2 } from "@workglow/util/schema";
608
+ var SHARED_DOCUMENT_TABLE = "shared_documents";
609
+ var SHARED_CHUNK_TABLE = "shared_chunks";
610
+ var SharedDocumentStorageSchema = {
611
+ type: "object",
612
+ properties: {
613
+ doc_id: {
614
+ type: "string",
615
+ "x-auto-generated": true,
616
+ title: "Document ID",
617
+ description: "Unique identifier for the document"
618
+ },
619
+ kb_id: {
620
+ type: "string",
621
+ title: "Knowledge Base ID",
622
+ description: "Owning knowledge base identifier"
623
+ },
624
+ data: {
625
+ type: "string",
626
+ title: "Document Data",
627
+ description: "JSON-serialized document"
628
+ },
629
+ metadata: {
630
+ type: "object",
631
+ title: "Metadata",
632
+ description: "Metadata of the document"
633
+ }
634
+ },
635
+ required: ["doc_id", "kb_id", "data"],
636
+ additionalProperties: true
637
+ };
638
+ var SharedChunkVectorStorageSchema = {
639
+ type: "object",
640
+ properties: {
641
+ chunk_id: { type: "string", "x-auto-generated": true },
642
+ kb_id: { type: "string" },
643
+ doc_id: { type: "string" },
644
+ vector: TypedArraySchema2(),
645
+ metadata: { type: "object", format: "metadata", additionalProperties: true }
646
+ },
647
+ required: ["chunk_id", "kb_id", "doc_id", "vector", "metadata"],
648
+ additionalProperties: false
649
+ };
650
+ var SharedDocumentPrimaryKey = ["kb_id", "doc_id"];
651
+ var SharedChunkPrimaryKey = ["kb_id", "chunk_id"];
652
+ var SharedDocumentIndexes = [["kb_id"]];
653
+ var SharedChunkIndexes = [["kb_id"], ["kb_id", "doc_id"]];
654
+
602
655
  // src/knowledge-base/KnowledgeBaseSchema.ts
603
656
  var KnowledgeBaseRecordSchema = {
604
657
  type: "object",
@@ -632,6 +685,9 @@ function knowledgeBaseTableNames(kbId) {
632
685
  chunkTable: `kb_chunks_${safe}`
633
686
  };
634
687
  }
688
+ function isSharedTableMode(record) {
689
+ return record.document_table === SHARED_DOCUMENT_TABLE && record.chunk_table === SHARED_CHUNK_TABLE;
690
+ }
635
691
  // src/knowledge-base/KnowledgeBaseRepository.ts
636
692
  import { EventEmitter } from "@workglow/util";
637
693
 
@@ -701,12 +757,8 @@ import {
701
757
  } from "@workglow/util";
702
758
  var KNOWLEDGE_BASES = createServiceToken("knowledge-base.registry");
703
759
  var KNOWLEDGE_BASE_REPOSITORY = createServiceToken("knowledge-base.repository");
704
- if (!globalServiceRegistry.has(KNOWLEDGE_BASES)) {
705
- globalServiceRegistry.register(KNOWLEDGE_BASES, () => new Map, true);
706
- }
707
- if (!globalServiceRegistry.has(KNOWLEDGE_BASE_REPOSITORY)) {
708
- globalServiceRegistry.register(KNOWLEDGE_BASE_REPOSITORY, () => new InMemoryKnowledgeBaseRepository, true);
709
- }
760
+ globalServiceRegistry.registerIfAbsent(KNOWLEDGE_BASES, () => new Map, true);
761
+ globalServiceRegistry.registerIfAbsent(KNOWLEDGE_BASE_REPOSITORY, () => new InMemoryKnowledgeBaseRepository, true);
710
762
  function getGlobalKnowledgeBases() {
711
763
  return globalServiceRegistry.get(KNOWLEDGE_BASES);
712
764
  }
@@ -716,23 +768,53 @@ function getGlobalKnowledgeBaseRepository() {
716
768
  function setGlobalKnowledgeBaseRepository(repository) {
717
769
  globalServiceRegistry.registerInstance(KNOWLEDGE_BASE_REPOSITORY, repository);
718
770
  }
719
- async function registerKnowledgeBase(id, kb) {
720
- const kbs = getGlobalKnowledgeBases();
721
- const now = new Date().toISOString();
722
- const tableNames = knowledgeBaseTableNames(id);
723
- const record = {
724
- kb_id: id,
725
- title: kb.title,
726
- description: kb.description,
727
- vector_dimensions: kb.getVectorDimensions(),
728
- document_table: tableNames.documentTable,
729
- chunk_table: tableNames.chunkTable,
730
- created_at: now,
731
- updated_at: now
771
+ var pendingOps = new Map;
772
+ function withIdLock(id, fn) {
773
+ const prev = pendingOps.get(id) ?? Promise.resolve();
774
+ const next = prev.then(fn, fn);
775
+ pendingOps.set(id, next);
776
+ const cleanup = () => {
777
+ if (pendingOps.get(id) === next) {
778
+ pendingOps.delete(id);
779
+ }
732
780
  };
781
+ next.finally(cleanup);
782
+ return next;
783
+ }
784
+ function registerKnowledgeBase(id, kb, options) {
785
+ return withIdLock(id, async () => {
786
+ const kbs = getGlobalKnowledgeBases();
787
+ const now = new Date().toISOString();
788
+ const useShared = options?.sharedTables === true;
789
+ const tableNames = useShared ? { documentTable: SHARED_DOCUMENT_TABLE, chunkTable: SHARED_CHUNK_TABLE } : knowledgeBaseTableNames(id);
790
+ const record = {
791
+ kb_id: id,
792
+ title: kb.title,
793
+ description: kb.description,
794
+ vector_dimensions: kb.getVectorDimensions(),
795
+ document_table: tableNames.documentTable,
796
+ chunk_table: tableNames.chunkTable,
797
+ created_at: now,
798
+ updated_at: now
799
+ };
800
+ const repo = getGlobalKnowledgeBaseRepository();
801
+ await repo.addKnowledgeBase(record);
802
+ kbs.set(id, kb);
803
+ });
804
+ }
805
+ function unregisterKnowledgeBase(id) {
806
+ return withIdLock(id, async () => {
807
+ const repo = getGlobalKnowledgeBaseRepository();
808
+ await repo.removeKnowledgeBase(id);
809
+ const kbs = getGlobalKnowledgeBases();
810
+ kbs.delete(id);
811
+ });
812
+ }
813
+ async function deregisterKnowledgeBase(id) {
733
814
  const repo = getGlobalKnowledgeBaseRepository();
734
- await repo.addKnowledgeBase(record);
735
- kbs.set(id, kb);
815
+ await repo.removeKnowledgeBase(id);
816
+ const kbs = getGlobalKnowledgeBases();
817
+ kbs.delete(id);
736
818
  }
737
819
  function getKnowledgeBase(id) {
738
820
  return getGlobalKnowledgeBases().get(id);
@@ -809,6 +891,207 @@ async function createKnowledgeBase(options) {
809
891
  }
810
892
  return kb;
811
893
  }
894
+ // src/knowledge-base/ScopedTabularStorage.ts
895
+ import { EventEmitter as EventEmitter2 } from "@workglow/util";
896
+
897
+ class ScopedTabularStorage {
898
+ inner;
899
+ kbId;
900
+ events = new EventEmitter2;
901
+ constructor(inner, kbId) {
902
+ this.inner = inner;
903
+ this.kbId = kbId;
904
+ }
905
+ inject(value) {
906
+ return { ...value, kb_id: this.kbId };
907
+ }
908
+ strip(entity) {
909
+ if (!entity)
910
+ return entity;
911
+ const { kb_id: _, ...rest } = entity;
912
+ return rest;
913
+ }
914
+ stripArray(entities) {
915
+ if (!entities)
916
+ return;
917
+ return entities.map((e) => this.strip(e));
918
+ }
919
+ async put(value) {
920
+ const result = await this.inner.put(this.inject(value));
921
+ const stripped = this.strip(result);
922
+ this.events.emit("put", stripped);
923
+ return stripped;
924
+ }
925
+ async putBulk(values) {
926
+ const injected = values.map((v) => this.inject(v));
927
+ const results = await this.inner.putBulk(injected);
928
+ const stripped = results.map((r) => this.strip(r));
929
+ for (const entity of stripped) {
930
+ this.events.emit("put", entity);
931
+ }
932
+ return stripped;
933
+ }
934
+ async get(key) {
935
+ const result = await this.inner.get({ ...key, kb_id: this.kbId });
936
+ if (!result)
937
+ return;
938
+ const stripped = this.strip(result);
939
+ this.events.emit("get", key, stripped);
940
+ return stripped;
941
+ }
942
+ async delete(key) {
943
+ await this.inner.deleteSearch({ ...key, kb_id: this.kbId });
944
+ this.events.emit("delete", key);
945
+ }
946
+ async getAll(options) {
947
+ const results = await this.inner.query({ kb_id: this.kbId }, options);
948
+ return this.stripArray(results);
949
+ }
950
+ async deleteAll() {
951
+ await this.inner.deleteSearch({ kb_id: this.kbId });
952
+ this.events.emit("clearall");
953
+ }
954
+ async size() {
955
+ let count = 0;
956
+ const pageSize = 1000;
957
+ let offset = 0;
958
+ while (true) {
959
+ const page = await this.inner.query({ kb_id: this.kbId }, { offset, limit: pageSize });
960
+ if (!page || page.length === 0)
961
+ break;
962
+ count += page.length;
963
+ if (page.length < pageSize)
964
+ break;
965
+ offset += pageSize;
966
+ }
967
+ return count;
968
+ }
969
+ async query(criteria, options) {
970
+ const results = await this.inner.query({ ...criteria, kb_id: this.kbId }, options);
971
+ const stripped = this.stripArray(results);
972
+ this.events.emit("query", criteria, stripped);
973
+ return stripped;
974
+ }
975
+ async deleteSearch(criteria) {
976
+ await this.inner.deleteSearch({ ...criteria, kb_id: this.kbId });
977
+ }
978
+ async getBulk(offset, limit) {
979
+ const results = await this.inner.query({ kb_id: this.kbId }, { offset, limit });
980
+ return this.stripArray(results);
981
+ }
982
+ async* records(pageSize = 100) {
983
+ if (pageSize <= 0) {
984
+ throw new RangeError(`pageSize must be greater than 0, got ${pageSize}`);
985
+ }
986
+ let offset = 0;
987
+ while (true) {
988
+ const page = await this.getBulk(offset, pageSize);
989
+ if (!page || page.length === 0) {
990
+ break;
991
+ }
992
+ for (const entity of page) {
993
+ yield entity;
994
+ }
995
+ if (page.length < pageSize)
996
+ break;
997
+ offset += pageSize;
998
+ }
999
+ }
1000
+ async* pages(pageSize = 100) {
1001
+ if (pageSize <= 0) {
1002
+ throw new RangeError(`pageSize must be greater than 0, got ${pageSize}`);
1003
+ }
1004
+ let offset = 0;
1005
+ while (true) {
1006
+ const page = await this.getBulk(offset, pageSize);
1007
+ if (!page || page.length === 0) {
1008
+ break;
1009
+ }
1010
+ yield page;
1011
+ if (page.length < pageSize)
1012
+ break;
1013
+ offset += pageSize;
1014
+ }
1015
+ }
1016
+ on(name, fn) {
1017
+ this.events.on(name, fn);
1018
+ }
1019
+ off(name, fn) {
1020
+ this.events.off(name, fn);
1021
+ }
1022
+ emit(name, ...args) {
1023
+ this.events.emit(name, ...args);
1024
+ }
1025
+ once(name, fn) {
1026
+ this.events.once(name, fn);
1027
+ }
1028
+ waitOn(name) {
1029
+ return this.events.waitOn(name);
1030
+ }
1031
+ subscribeToChanges(callback, options) {
1032
+ return this.inner.subscribeToChanges((change) => {
1033
+ const newKbId = change.new?.kb_id;
1034
+ const oldKbId = change.old?.kb_id;
1035
+ if (newKbId !== undefined && newKbId !== this.kbId)
1036
+ return;
1037
+ if (oldKbId !== undefined && oldKbId !== this.kbId)
1038
+ return;
1039
+ if (newKbId === undefined && oldKbId === undefined)
1040
+ return;
1041
+ callback({
1042
+ type: change.type,
1043
+ ...change.old ? { old: this.strip(change.old) } : {},
1044
+ ...change.new ? { new: this.strip(change.new) } : {}
1045
+ });
1046
+ }, options);
1047
+ }
1048
+ async setupDatabase() {}
1049
+ destroy() {}
1050
+ [Symbol.dispose]() {}
1051
+ async[Symbol.asyncDispose]() {}
1052
+ }
1053
+ // src/knowledge-base/ScopedVectorStorage.ts
1054
+ class ScopedVectorStorage extends ScopedTabularStorage {
1055
+ inner;
1056
+ overFetchMultiplier;
1057
+ constructor(inner, kbId, overFetchMultiplier = 3) {
1058
+ super(inner, kbId);
1059
+ this.inner = inner;
1060
+ this.overFetchMultiplier = overFetchMultiplier;
1061
+ }
1062
+ getVectorDimensions() {
1063
+ return this.inner.getVectorDimensions();
1064
+ }
1065
+ filterAndStrip(results, topK, overfetchLimit) {
1066
+ const filtered = results.filter((r) => r.kb_id === this.kbId).slice(0, topK);
1067
+ if (topK && overfetchLimit && results.length >= overfetchLimit && filtered.length < topK) {
1068
+ console.warn(`ScopedVectorStorage: search returned ${filtered.length}/${topK} results after ` + `kb_id filtering. Consider increasing overFetchMultiplier (currently ${this.overFetchMultiplier}).`);
1069
+ }
1070
+ return filtered.map((r) => {
1071
+ const { kb_id: _, ...rest } = r;
1072
+ return rest;
1073
+ });
1074
+ }
1075
+ async similaritySearch(query, options) {
1076
+ const overfetchLimit = options?.topK ? options.topK * this.overFetchMultiplier : undefined;
1077
+ const results = await this.inner.similaritySearch(query, {
1078
+ ...options,
1079
+ topK: overfetchLimit
1080
+ });
1081
+ return this.filterAndStrip(results, options?.topK, overfetchLimit);
1082
+ }
1083
+ async hybridSearch(query, options) {
1084
+ if (typeof this.inner.hybridSearch !== "function") {
1085
+ throw new Error("Hybrid search is not supported by the configured chunk storage backend. " + "Please use a vector storage implementation that provides `hybridSearch`.");
1086
+ }
1087
+ const overfetchLimit = options?.topK ? options.topK * this.overFetchMultiplier : undefined;
1088
+ const results = await this.inner.hybridSearch(query, {
1089
+ ...options,
1090
+ topK: overfetchLimit
1091
+ });
1092
+ return this.filterAndStrip(results, options?.topK, overfetchLimit);
1093
+ }
1094
+ }
812
1095
  // src/util/DatasetSchema.ts
813
1096
  function TypeTabularStorage(options = {}) {
814
1097
  return {
@@ -1062,10 +1345,12 @@ class StructuralParser {
1062
1345
  }
1063
1346
  }
1064
1347
  export {
1348
+ unregisterKnowledgeBase,
1065
1349
  traverseDepthFirst,
1066
1350
  setGlobalKnowledgeBaseRepository,
1067
1351
  registerKnowledgeBase,
1068
1352
  knowledgeBaseTableNames,
1353
+ isSharedTableMode,
1069
1354
  hasChildren,
1070
1355
  getNodePath,
1071
1356
  getKnowledgeBase,
@@ -1074,14 +1359,25 @@ export {
1074
1359
  getDocumentRange,
1075
1360
  getChildren,
1076
1361
  estimateTokens,
1362
+ deregisterKnowledgeBase,
1077
1363
  createKnowledgeBase,
1078
1364
  TypeTabularStorage,
1079
1365
  TypeKnowledgeBase,
1080
1366
  TopicNodeSchema,
1081
1367
  TokenBudgetSchema,
1082
1368
  StructuralParser,
1369
+ SharedDocumentStorageSchema,
1370
+ SharedDocumentPrimaryKey,
1371
+ SharedDocumentIndexes,
1372
+ SharedChunkVectorStorageSchema,
1373
+ SharedChunkPrimaryKey,
1374
+ SharedChunkIndexes,
1083
1375
  SentenceNodeSchema,
1084
1376
  SectionNodeSchema,
1377
+ ScopedVectorStorage,
1378
+ ScopedTabularStorage,
1379
+ SHARED_DOCUMENT_TABLE,
1380
+ SHARED_CHUNK_TABLE,
1085
1381
  ParagraphNodeSchema,
1086
1382
  NodeRangeSchema,
1087
1383
  NodeKind,
@@ -1107,4 +1403,4 @@ export {
1107
1403
  ChunkRecordArraySchema
1108
1404
  };
1109
1405
 
1110
- //# debugId=1C77125182EE2FF264756E2164756E21
1406
+ //# debugId=385C0FD80B51727B64756E2164756E21