@workglow/knowledge-base 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +166 -0
  2. package/dist/browser.js +330 -25
  3. package/dist/browser.js.map +15 -12
  4. package/dist/bun.js +330 -25
  5. package/dist/bun.js.map +15 -12
  6. package/dist/chunk/ChunkSchema.d.ts.map +1 -1
  7. package/dist/chunk/ChunkVectorStorageSchema.d.ts +1 -1
  8. package/dist/chunk/ChunkVectorStorageSchema.d.ts.map +1 -1
  9. package/dist/common.d.ts +3 -0
  10. package/dist/common.d.ts.map +1 -1
  11. package/dist/document/Document.d.ts.map +1 -1
  12. package/dist/document/DocumentNode.d.ts +1 -1
  13. package/dist/document/DocumentNode.d.ts.map +1 -1
  14. package/dist/document/DocumentSchema.d.ts.map +1 -1
  15. package/dist/document/DocumentStorageSchema.d.ts +2 -1
  16. package/dist/document/DocumentStorageSchema.d.ts.map +1 -1
  17. package/dist/document/StructuralParser.d.ts +1 -1
  18. package/dist/document/StructuralParser.d.ts.map +1 -1
  19. package/dist/knowledge-base/KnowledgeBase.d.ts +2 -0
  20. package/dist/knowledge-base/KnowledgeBase.d.ts.map +1 -1
  21. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts +17 -1
  22. package/dist/knowledge-base/KnowledgeBaseRegistry.d.ts.map +1 -1
  23. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts +5 -3
  24. package/dist/knowledge-base/KnowledgeBaseRepository.d.ts.map +1 -1
  25. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts +4 -0
  26. package/dist/knowledge-base/KnowledgeBaseSchema.d.ts.map +1 -1
  27. package/dist/knowledge-base/ScopedTabularStorage.d.ts +46 -0
  28. package/dist/knowledge-base/ScopedTabularStorage.d.ts.map +1 -0
  29. package/dist/knowledge-base/ScopedVectorStorage.d.ts +27 -0
  30. package/dist/knowledge-base/ScopedVectorStorage.d.ts.map +1 -0
  31. package/dist/knowledge-base/SharedTableSchemas.d.ts +93 -0
  32. package/dist/knowledge-base/SharedTableSchemas.d.ts.map +1 -0
  33. package/dist/node.js +330 -25
  34. package/dist/node.js.map +15 -12
  35. package/dist/util/DatasetSchema.d.ts.map +1 -1
  36. package/package.json +10 -5
package/dist/bun.js CHANGED
@@ -343,9 +343,7 @@ var ChunkRecordArraySchema = {
343
343
  description: "Array of chunk records"
344
344
  };
345
345
  // src/chunk/ChunkVectorStorageSchema.ts
346
- import {
347
- TypedArraySchema
348
- } from "@workglow/util/schema";
346
+ import { TypedArraySchema } from "@workglow/util/schema";
349
347
  var ChunkVectorStorageSchema = {
350
348
  type: "object",
351
349
  properties: {
@@ -564,6 +562,12 @@ class KnowledgeBase {
564
562
  this.tabularStorage.destroy();
565
563
  this.chunkStorage.destroy();
566
564
  }
565
+ async[Symbol.asyncDispose]() {
566
+ this.destroy();
567
+ }
568
+ [Symbol.dispose]() {
569
+ this.destroy();
570
+ }
567
571
  async getChunk(chunk_id) {
568
572
  return this.chunkStorage.get({ chunk_id });
569
573
  }
@@ -600,6 +604,55 @@ class KnowledgeBase {
600
604
  return doc.findChunksByNodeId(nodeId);
601
605
  }
602
606
  }
607
+ // src/knowledge-base/SharedTableSchemas.ts
608
+ import { TypedArraySchema as TypedArraySchema2 } from "@workglow/util/schema";
609
+ var SHARED_DOCUMENT_TABLE = "shared_documents";
610
+ var SHARED_CHUNK_TABLE = "shared_chunks";
611
+ var SharedDocumentStorageSchema = {
612
+ type: "object",
613
+ properties: {
614
+ doc_id: {
615
+ type: "string",
616
+ "x-auto-generated": true,
617
+ title: "Document ID",
618
+ description: "Unique identifier for the document"
619
+ },
620
+ kb_id: {
621
+ type: "string",
622
+ title: "Knowledge Base ID",
623
+ description: "Owning knowledge base identifier"
624
+ },
625
+ data: {
626
+ type: "string",
627
+ title: "Document Data",
628
+ description: "JSON-serialized document"
629
+ },
630
+ metadata: {
631
+ type: "object",
632
+ title: "Metadata",
633
+ description: "Metadata of the document"
634
+ }
635
+ },
636
+ required: ["doc_id", "kb_id", "data"],
637
+ additionalProperties: true
638
+ };
639
+ var SharedChunkVectorStorageSchema = {
640
+ type: "object",
641
+ properties: {
642
+ chunk_id: { type: "string", "x-auto-generated": true },
643
+ kb_id: { type: "string" },
644
+ doc_id: { type: "string" },
645
+ vector: TypedArraySchema2(),
646
+ metadata: { type: "object", format: "metadata", additionalProperties: true }
647
+ },
648
+ required: ["chunk_id", "kb_id", "doc_id", "vector", "metadata"],
649
+ additionalProperties: false
650
+ };
651
+ var SharedDocumentPrimaryKey = ["kb_id", "doc_id"];
652
+ var SharedChunkPrimaryKey = ["kb_id", "chunk_id"];
653
+ var SharedDocumentIndexes = [["kb_id"]];
654
+ var SharedChunkIndexes = [["kb_id"], ["kb_id", "doc_id"]];
655
+
603
656
  // src/knowledge-base/KnowledgeBaseSchema.ts
604
657
  var KnowledgeBaseRecordSchema = {
605
658
  type: "object",
@@ -633,6 +686,9 @@ function knowledgeBaseTableNames(kbId) {
633
686
  chunkTable: `kb_chunks_${safe}`
634
687
  };
635
688
  }
689
+ function isSharedTableMode(record) {
690
+ return record.document_table === SHARED_DOCUMENT_TABLE && record.chunk_table === SHARED_CHUNK_TABLE;
691
+ }
636
692
  // src/knowledge-base/KnowledgeBaseRepository.ts
637
693
  import { EventEmitter } from "@workglow/util";
638
694
 
@@ -697,16 +753,13 @@ class InMemoryKnowledgeBaseRepository extends KnowledgeBaseRepository {
697
753
  import {
698
754
  createServiceToken,
699
755
  globalServiceRegistry,
756
+ registerInputCompactor,
700
757
  registerInputResolver
701
758
  } from "@workglow/util";
702
759
  var KNOWLEDGE_BASES = createServiceToken("knowledge-base.registry");
703
760
  var KNOWLEDGE_BASE_REPOSITORY = createServiceToken("knowledge-base.repository");
704
- if (!globalServiceRegistry.has(KNOWLEDGE_BASES)) {
705
- globalServiceRegistry.register(KNOWLEDGE_BASES, () => new Map, true);
706
- }
707
- if (!globalServiceRegistry.has(KNOWLEDGE_BASE_REPOSITORY)) {
708
- globalServiceRegistry.register(KNOWLEDGE_BASE_REPOSITORY, () => new InMemoryKnowledgeBaseRepository, true);
709
- }
761
+ globalServiceRegistry.registerIfAbsent(KNOWLEDGE_BASES, () => new Map, true);
762
+ globalServiceRegistry.registerIfAbsent(KNOWLEDGE_BASE_REPOSITORY, () => new InMemoryKnowledgeBaseRepository, true);
710
763
  function getGlobalKnowledgeBases() {
711
764
  return globalServiceRegistry.get(KNOWLEDGE_BASES);
712
765
  }
@@ -716,23 +769,53 @@ function getGlobalKnowledgeBaseRepository() {
716
769
  function setGlobalKnowledgeBaseRepository(repository) {
717
770
  globalServiceRegistry.registerInstance(KNOWLEDGE_BASE_REPOSITORY, repository);
718
771
  }
719
- async function registerKnowledgeBase(id, kb) {
720
- const kbs = getGlobalKnowledgeBases();
721
- const now = new Date().toISOString();
722
- const tableNames = knowledgeBaseTableNames(id);
723
- const record = {
724
- kb_id: id,
725
- title: kb.title,
726
- description: kb.description,
727
- vector_dimensions: kb.getVectorDimensions(),
728
- document_table: tableNames.documentTable,
729
- chunk_table: tableNames.chunkTable,
730
- created_at: now,
731
- updated_at: now
772
+ var pendingOps = new Map;
773
+ function withIdLock(id, fn) {
774
+ const prev = pendingOps.get(id) ?? Promise.resolve();
775
+ const next = prev.then(fn, fn);
776
+ pendingOps.set(id, next);
777
+ const cleanup = () => {
778
+ if (pendingOps.get(id) === next) {
779
+ pendingOps.delete(id);
780
+ }
732
781
  };
782
+ next.finally(cleanup);
783
+ return next;
784
+ }
785
+ function registerKnowledgeBase(id, kb, options) {
786
+ return withIdLock(id, async () => {
787
+ const kbs = getGlobalKnowledgeBases();
788
+ const now = new Date().toISOString();
789
+ const useShared = options?.sharedTables === true;
790
+ const tableNames = useShared ? { documentTable: SHARED_DOCUMENT_TABLE, chunkTable: SHARED_CHUNK_TABLE } : knowledgeBaseTableNames(id);
791
+ const record = {
792
+ kb_id: id,
793
+ title: kb.title,
794
+ description: kb.description,
795
+ vector_dimensions: kb.getVectorDimensions(),
796
+ document_table: tableNames.documentTable,
797
+ chunk_table: tableNames.chunkTable,
798
+ created_at: now,
799
+ updated_at: now
800
+ };
801
+ const repo = getGlobalKnowledgeBaseRepository();
802
+ await repo.addKnowledgeBase(record);
803
+ kbs.set(id, kb);
804
+ });
805
+ }
806
+ function unregisterKnowledgeBase(id) {
807
+ return withIdLock(id, async () => {
808
+ const repo = getGlobalKnowledgeBaseRepository();
809
+ await repo.removeKnowledgeBase(id);
810
+ const kbs = getGlobalKnowledgeBases();
811
+ kbs.delete(id);
812
+ });
813
+ }
814
+ async function deregisterKnowledgeBase(id) {
733
815
  const repo = getGlobalKnowledgeBaseRepository();
734
- await repo.addKnowledgeBase(record);
735
- kbs.set(id, kb);
816
+ await repo.removeKnowledgeBase(id);
817
+ const kbs = getGlobalKnowledgeBases();
818
+ kbs.delete(id);
736
819
  }
737
820
  function getKnowledgeBase(id) {
738
821
  return getGlobalKnowledgeBases().get(id);
@@ -746,6 +829,14 @@ async function resolveKnowledgeBaseFromRegistry(id, format, registry) {
746
829
  return kb;
747
830
  }
748
831
  registerInputResolver("knowledge-base", resolveKnowledgeBaseFromRegistry);
832
+ registerInputCompactor("knowledge-base", (value, _format, registry) => {
833
+ const kbs = registry.has(KNOWLEDGE_BASES) ? registry.get(KNOWLEDGE_BASES) : getGlobalKnowledgeBases();
834
+ for (const [id, kb] of kbs) {
835
+ if (kb === value)
836
+ return id;
837
+ }
838
+ return;
839
+ });
749
840
  // src/knowledge-base/createKnowledgeBase.ts
750
841
  import { InMemoryTabularStorage as InMemoryTabularStorage2, InMemoryVectorStorage } from "@workglow/storage";
751
842
 
@@ -801,6 +892,207 @@ async function createKnowledgeBase(options) {
801
892
  }
802
893
  return kb;
803
894
  }
895
+ // src/knowledge-base/ScopedTabularStorage.ts
896
+ import { EventEmitter as EventEmitter2 } from "@workglow/util";
897
+
898
+ class ScopedTabularStorage {
899
+ inner;
900
+ kbId;
901
+ events = new EventEmitter2;
902
+ constructor(inner, kbId) {
903
+ this.inner = inner;
904
+ this.kbId = kbId;
905
+ }
906
+ inject(value) {
907
+ return { ...value, kb_id: this.kbId };
908
+ }
909
+ strip(entity) {
910
+ if (!entity)
911
+ return entity;
912
+ const { kb_id: _, ...rest } = entity;
913
+ return rest;
914
+ }
915
+ stripArray(entities) {
916
+ if (!entities)
917
+ return;
918
+ return entities.map((e) => this.strip(e));
919
+ }
920
+ async put(value) {
921
+ const result = await this.inner.put(this.inject(value));
922
+ const stripped = this.strip(result);
923
+ this.events.emit("put", stripped);
924
+ return stripped;
925
+ }
926
+ async putBulk(values) {
927
+ const injected = values.map((v) => this.inject(v));
928
+ const results = await this.inner.putBulk(injected);
929
+ const stripped = results.map((r) => this.strip(r));
930
+ for (const entity of stripped) {
931
+ this.events.emit("put", entity);
932
+ }
933
+ return stripped;
934
+ }
935
+ async get(key) {
936
+ const result = await this.inner.get({ ...key, kb_id: this.kbId });
937
+ if (!result)
938
+ return;
939
+ const stripped = this.strip(result);
940
+ this.events.emit("get", key, stripped);
941
+ return stripped;
942
+ }
943
+ async delete(key) {
944
+ await this.inner.deleteSearch({ ...key, kb_id: this.kbId });
945
+ this.events.emit("delete", key);
946
+ }
947
+ async getAll(options) {
948
+ const results = await this.inner.query({ kb_id: this.kbId }, options);
949
+ return this.stripArray(results);
950
+ }
951
+ async deleteAll() {
952
+ await this.inner.deleteSearch({ kb_id: this.kbId });
953
+ this.events.emit("clearall");
954
+ }
955
+ async size() {
956
+ let count = 0;
957
+ const pageSize = 1000;
958
+ let offset = 0;
959
+ while (true) {
960
+ const page = await this.inner.query({ kb_id: this.kbId }, { offset, limit: pageSize });
961
+ if (!page || page.length === 0)
962
+ break;
963
+ count += page.length;
964
+ if (page.length < pageSize)
965
+ break;
966
+ offset += pageSize;
967
+ }
968
+ return count;
969
+ }
970
+ async query(criteria, options) {
971
+ const results = await this.inner.query({ ...criteria, kb_id: this.kbId }, options);
972
+ const stripped = this.stripArray(results);
973
+ this.events.emit("query", criteria, stripped);
974
+ return stripped;
975
+ }
976
+ async deleteSearch(criteria) {
977
+ await this.inner.deleteSearch({ ...criteria, kb_id: this.kbId });
978
+ }
979
+ async getBulk(offset, limit) {
980
+ const results = await this.inner.query({ kb_id: this.kbId }, { offset, limit });
981
+ return this.stripArray(results);
982
+ }
983
+ async* records(pageSize = 100) {
984
+ if (pageSize <= 0) {
985
+ throw new RangeError(`pageSize must be greater than 0, got ${pageSize}`);
986
+ }
987
+ let offset = 0;
988
+ while (true) {
989
+ const page = await this.getBulk(offset, pageSize);
990
+ if (!page || page.length === 0) {
991
+ break;
992
+ }
993
+ for (const entity of page) {
994
+ yield entity;
995
+ }
996
+ if (page.length < pageSize)
997
+ break;
998
+ offset += pageSize;
999
+ }
1000
+ }
1001
+ async* pages(pageSize = 100) {
1002
+ if (pageSize <= 0) {
1003
+ throw new RangeError(`pageSize must be greater than 0, got ${pageSize}`);
1004
+ }
1005
+ let offset = 0;
1006
+ while (true) {
1007
+ const page = await this.getBulk(offset, pageSize);
1008
+ if (!page || page.length === 0) {
1009
+ break;
1010
+ }
1011
+ yield page;
1012
+ if (page.length < pageSize)
1013
+ break;
1014
+ offset += pageSize;
1015
+ }
1016
+ }
1017
+ on(name, fn) {
1018
+ this.events.on(name, fn);
1019
+ }
1020
+ off(name, fn) {
1021
+ this.events.off(name, fn);
1022
+ }
1023
+ emit(name, ...args) {
1024
+ this.events.emit(name, ...args);
1025
+ }
1026
+ once(name, fn) {
1027
+ this.events.once(name, fn);
1028
+ }
1029
+ waitOn(name) {
1030
+ return this.events.waitOn(name);
1031
+ }
1032
+ subscribeToChanges(callback, options) {
1033
+ return this.inner.subscribeToChanges((change) => {
1034
+ const newKbId = change.new?.kb_id;
1035
+ const oldKbId = change.old?.kb_id;
1036
+ if (newKbId !== undefined && newKbId !== this.kbId)
1037
+ return;
1038
+ if (oldKbId !== undefined && oldKbId !== this.kbId)
1039
+ return;
1040
+ if (newKbId === undefined && oldKbId === undefined)
1041
+ return;
1042
+ callback({
1043
+ type: change.type,
1044
+ ...change.old ? { old: this.strip(change.old) } : {},
1045
+ ...change.new ? { new: this.strip(change.new) } : {}
1046
+ });
1047
+ }, options);
1048
+ }
1049
+ async setupDatabase() {}
1050
+ destroy() {}
1051
+ [Symbol.dispose]() {}
1052
+ async[Symbol.asyncDispose]() {}
1053
+ }
1054
+ // src/knowledge-base/ScopedVectorStorage.ts
1055
+ class ScopedVectorStorage extends ScopedTabularStorage {
1056
+ inner;
1057
+ overFetchMultiplier;
1058
+ constructor(inner, kbId, overFetchMultiplier = 3) {
1059
+ super(inner, kbId);
1060
+ this.inner = inner;
1061
+ this.overFetchMultiplier = overFetchMultiplier;
1062
+ }
1063
+ getVectorDimensions() {
1064
+ return this.inner.getVectorDimensions();
1065
+ }
1066
+ filterAndStrip(results, topK, overfetchLimit) {
1067
+ const filtered = results.filter((r) => r.kb_id === this.kbId).slice(0, topK);
1068
+ if (topK && overfetchLimit && results.length >= overfetchLimit && filtered.length < topK) {
1069
+ console.warn(`ScopedVectorStorage: search returned ${filtered.length}/${topK} results after ` + `kb_id filtering. Consider increasing overFetchMultiplier (currently ${this.overFetchMultiplier}).`);
1070
+ }
1071
+ return filtered.map((r) => {
1072
+ const { kb_id: _, ...rest } = r;
1073
+ return rest;
1074
+ });
1075
+ }
1076
+ async similaritySearch(query, options) {
1077
+ const overfetchLimit = options?.topK ? options.topK * this.overFetchMultiplier : undefined;
1078
+ const results = await this.inner.similaritySearch(query, {
1079
+ ...options,
1080
+ topK: overfetchLimit
1081
+ });
1082
+ return this.filterAndStrip(results, options?.topK, overfetchLimit);
1083
+ }
1084
+ async hybridSearch(query, options) {
1085
+ if (typeof this.inner.hybridSearch !== "function") {
1086
+ throw new Error("Hybrid search is not supported by the configured chunk storage backend. " + "Please use a vector storage implementation that provides `hybridSearch`.");
1087
+ }
1088
+ const overfetchLimit = options?.topK ? options.topK * this.overFetchMultiplier : undefined;
1089
+ const results = await this.inner.hybridSearch(query, {
1090
+ ...options,
1091
+ topK: overfetchLimit
1092
+ });
1093
+ return this.filterAndStrip(results, options?.topK, overfetchLimit);
1094
+ }
1095
+ }
804
1096
  // src/util/DatasetSchema.ts
805
1097
  function TypeTabularStorage(options = {}) {
806
1098
  return {
@@ -1054,10 +1346,12 @@ class StructuralParser {
1054
1346
  }
1055
1347
  }
1056
1348
  export {
1349
+ unregisterKnowledgeBase,
1057
1350
  traverseDepthFirst,
1058
1351
  setGlobalKnowledgeBaseRepository,
1059
1352
  registerKnowledgeBase,
1060
1353
  knowledgeBaseTableNames,
1354
+ isSharedTableMode,
1061
1355
  hasChildren,
1062
1356
  getNodePath,
1063
1357
  getKnowledgeBase,
@@ -1066,14 +1360,25 @@ export {
1066
1360
  getDocumentRange,
1067
1361
  getChildren,
1068
1362
  estimateTokens,
1363
+ deregisterKnowledgeBase,
1069
1364
  createKnowledgeBase,
1070
1365
  TypeTabularStorage,
1071
1366
  TypeKnowledgeBase,
1072
1367
  TopicNodeSchema,
1073
1368
  TokenBudgetSchema,
1074
1369
  StructuralParser,
1370
+ SharedDocumentStorageSchema,
1371
+ SharedDocumentPrimaryKey,
1372
+ SharedDocumentIndexes,
1373
+ SharedChunkVectorStorageSchema,
1374
+ SharedChunkPrimaryKey,
1375
+ SharedChunkIndexes,
1075
1376
  SentenceNodeSchema,
1076
1377
  SectionNodeSchema,
1378
+ ScopedVectorStorage,
1379
+ ScopedTabularStorage,
1380
+ SHARED_DOCUMENT_TABLE,
1381
+ SHARED_CHUNK_TABLE,
1077
1382
  ParagraphNodeSchema,
1078
1383
  NodeRangeSchema,
1079
1384
  NodeKind,
@@ -1099,4 +1404,4 @@ export {
1099
1404
  ChunkRecordArraySchema
1100
1405
  };
1101
1406
 
1102
- //# debugId=E518FA75797167F364756E2164756E21
1407
+ //# debugId=EC2844C75568481264756E2164756E21