@workglow/ai 0.0.111 → 0.0.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +95 -80
- package/dist/browser.js.map +10 -10
- package/dist/bun.js +95 -80
- package/dist/bun.js.map +10 -10
- package/dist/node.js +95 -80
- package/dist/node.js.map +10 -10
- package/dist/task/ChunkRetrievalTask.d.ts +38 -9
- package/dist/task/ChunkRetrievalTask.d.ts.map +1 -1
- package/dist/task/ChunkToVectorTask.d.ts +61 -41
- package/dist/task/ChunkToVectorTask.d.ts.map +1 -1
- package/dist/task/ChunkVectorHybridSearchTask.d.ts +16 -11
- package/dist/task/ChunkVectorHybridSearchTask.d.ts.map +1 -1
- package/dist/task/ChunkVectorSearchTask.d.ts +9 -9
- package/dist/task/ChunkVectorSearchTask.d.ts.map +1 -1
- package/dist/task/ChunkVectorUpsertTask.d.ts +8 -8
- package/dist/task/ChunkVectorUpsertTask.d.ts.map +1 -1
- package/dist/task/HierarchicalChunkerTask.d.ts +67 -44
- package/dist/task/HierarchicalChunkerTask.d.ts.map +1 -1
- package/dist/task/HierarchyJoinTask.d.ts +69 -42
- package/dist/task/HierarchyJoinTask.d.ts.map +1 -1
- package/dist/task/QueryExpanderTask.d.ts +3 -3
- package/package.json +11 -11
package/dist/bun.js
CHANGED
|
@@ -780,7 +780,7 @@ var backgroundRemoval = (input, config) => {
|
|
|
780
780
|
Workflow.prototype.backgroundRemoval = CreateWorkflow(BackgroundRemovalTask);
|
|
781
781
|
|
|
782
782
|
// src/task/ChunkRetrievalTask.ts
|
|
783
|
-
import {
|
|
783
|
+
import { TypeKnowledgeBase } from "@workglow/dataset";
|
|
784
784
|
import {
|
|
785
785
|
CreateWorkflow as CreateWorkflow3,
|
|
786
786
|
Task,
|
|
@@ -843,9 +843,9 @@ Workflow2.prototype.textEmbedding = CreateWorkflow2(TextEmbeddingTask);
|
|
|
843
843
|
var inputSchema = {
|
|
844
844
|
type: "object",
|
|
845
845
|
properties: {
|
|
846
|
-
|
|
847
|
-
title: "
|
|
848
|
-
description: "The
|
|
846
|
+
knowledgeBase: TypeKnowledgeBase({
|
|
847
|
+
title: "Knowledge Base",
|
|
848
|
+
description: "The knowledge base instance to search in"
|
|
849
849
|
}),
|
|
850
850
|
query: TypeSingleOrArray({
|
|
851
851
|
oneOf: [
|
|
@@ -889,14 +889,14 @@ var inputSchema = {
|
|
|
889
889
|
default: false
|
|
890
890
|
}
|
|
891
891
|
},
|
|
892
|
-
required: ["
|
|
892
|
+
required: ["knowledgeBase", "query"],
|
|
893
893
|
if: {
|
|
894
894
|
properties: {
|
|
895
895
|
query: { type: "string" }
|
|
896
896
|
}
|
|
897
897
|
},
|
|
898
898
|
then: {
|
|
899
|
-
required: ["
|
|
899
|
+
required: ["knowledgeBase", "query", "model"]
|
|
900
900
|
},
|
|
901
901
|
else: {},
|
|
902
902
|
additionalProperties: false
|
|
@@ -945,9 +945,20 @@ var outputSchema = {
|
|
|
945
945
|
type: "number",
|
|
946
946
|
title: "Count",
|
|
947
947
|
description: "Number of results returned"
|
|
948
|
-
}
|
|
948
|
+
},
|
|
949
|
+
query: TypeSingleOrArray({
|
|
950
|
+
oneOf: [
|
|
951
|
+
{ type: "string" },
|
|
952
|
+
TypedArraySchema2({
|
|
953
|
+
title: "Query Vector",
|
|
954
|
+
description: "Pre-computed query vector"
|
|
955
|
+
})
|
|
956
|
+
],
|
|
957
|
+
title: "Query",
|
|
958
|
+
description: "The query used for retrieval (pass-through)"
|
|
959
|
+
})
|
|
949
960
|
},
|
|
950
|
-
required: ["chunks", "chunk_ids", "metadata", "scores", "count"],
|
|
961
|
+
required: ["chunks", "chunk_ids", "metadata", "scores", "count", "query"],
|
|
951
962
|
additionalProperties: false
|
|
952
963
|
};
|
|
953
964
|
|
|
@@ -965,7 +976,7 @@ class ChunkRetrievalTask extends Task {
|
|
|
965
976
|
}
|
|
966
977
|
async execute(input, context) {
|
|
967
978
|
const {
|
|
968
|
-
|
|
979
|
+
knowledgeBase,
|
|
969
980
|
query,
|
|
970
981
|
topK = 5,
|
|
971
982
|
filter,
|
|
@@ -973,7 +984,7 @@ class ChunkRetrievalTask extends Task {
|
|
|
973
984
|
scoreThreshold = 0,
|
|
974
985
|
returnVectors = false
|
|
975
986
|
} = input;
|
|
976
|
-
const
|
|
987
|
+
const kb = knowledgeBase;
|
|
977
988
|
let queryVectors;
|
|
978
989
|
if (typeof query === "string" || Array.isArray(query) && query.every((q) => typeof q === "string")) {
|
|
979
990
|
if (!model) {
|
|
@@ -990,7 +1001,7 @@ class ChunkRetrievalTask extends Task {
|
|
|
990
1001
|
const searchVectors = queryVectors.map((v) => v instanceof Float32Array ? v : new Float32Array(v));
|
|
991
1002
|
const results = [];
|
|
992
1003
|
for (const searchVector of searchVectors) {
|
|
993
|
-
const res = await
|
|
1004
|
+
const res = await kb.similaritySearch(searchVector, {
|
|
994
1005
|
topK,
|
|
995
1006
|
filter,
|
|
996
1007
|
scoreThreshold
|
|
@@ -999,14 +1010,15 @@ class ChunkRetrievalTask extends Task {
|
|
|
999
1010
|
}
|
|
1000
1011
|
const chunks = results.map((r) => {
|
|
1001
1012
|
const meta = r.metadata;
|
|
1002
|
-
return meta.text ||
|
|
1013
|
+
return meta.text || JSON.stringify(meta);
|
|
1003
1014
|
});
|
|
1004
1015
|
const output = {
|
|
1005
1016
|
chunks,
|
|
1006
1017
|
chunk_ids: results.map((r) => r.chunk_id),
|
|
1007
1018
|
metadata: results.map((r) => r.metadata),
|
|
1008
1019
|
scores: results.map((r) => r.score),
|
|
1009
|
-
count: results.length
|
|
1020
|
+
count: results.length,
|
|
1021
|
+
query
|
|
1010
1022
|
};
|
|
1011
1023
|
if (returnVectors) {
|
|
1012
1024
|
output.vectors = results.map((r) => r.vector);
|
|
@@ -1020,7 +1032,7 @@ var chunkRetrieval = (input, config) => {
|
|
|
1020
1032
|
Workflow3.prototype.chunkRetrieval = CreateWorkflow3(ChunkRetrievalTask);
|
|
1021
1033
|
|
|
1022
1034
|
// src/task/ChunkToVectorTask.ts
|
|
1023
|
-
import {
|
|
1035
|
+
import { ChunkRecordSchema } from "@workglow/dataset";
|
|
1024
1036
|
import {
|
|
1025
1037
|
CreateWorkflow as CreateWorkflow4,
|
|
1026
1038
|
Task as Task2,
|
|
@@ -1044,11 +1056,11 @@ var inputSchema2 = {
|
|
|
1044
1056
|
},
|
|
1045
1057
|
chunks: {
|
|
1046
1058
|
type: "array",
|
|
1047
|
-
items:
|
|
1059
|
+
items: ChunkRecordSchema(),
|
|
1048
1060
|
title: "Chunks",
|
|
1049
|
-
description: "Array of chunk
|
|
1061
|
+
description: "Array of chunk records"
|
|
1050
1062
|
},
|
|
1051
|
-
|
|
1063
|
+
vector: {
|
|
1052
1064
|
type: "array",
|
|
1053
1065
|
items: TypedArraySchema3({
|
|
1054
1066
|
title: "Vector",
|
|
@@ -1058,7 +1070,7 @@ var inputSchema2 = {
|
|
|
1058
1070
|
description: "Embeddings from TextEmbeddingTask"
|
|
1059
1071
|
}
|
|
1060
1072
|
},
|
|
1061
|
-
required: ["chunks", "
|
|
1073
|
+
required: ["chunks", "vector"],
|
|
1062
1074
|
additionalProperties: false
|
|
1063
1075
|
};
|
|
1064
1076
|
var outputSchema2 = {
|
|
@@ -1113,13 +1125,13 @@ class ChunkToVectorTask extends Task2 {
|
|
|
1113
1125
|
return outputSchema2;
|
|
1114
1126
|
}
|
|
1115
1127
|
async execute(input, context) {
|
|
1116
|
-
const { chunks,
|
|
1128
|
+
const { chunks, vector, doc_title } = input;
|
|
1117
1129
|
const chunkArray = chunks;
|
|
1118
|
-
if (!chunkArray || !
|
|
1130
|
+
if (!chunkArray || !vector) {
|
|
1119
1131
|
throw new Error("Both chunks and vector are required");
|
|
1120
1132
|
}
|
|
1121
|
-
if (chunkArray.length !==
|
|
1122
|
-
throw new Error(`Mismatch: ${chunkArray.length} chunks but ${
|
|
1133
|
+
if (chunkArray.length !== vector.length) {
|
|
1134
|
+
throw new Error(`Mismatch: ${chunkArray.length} chunks but ${vector.length} vectors`);
|
|
1123
1135
|
}
|
|
1124
1136
|
const ids = [];
|
|
1125
1137
|
const metadata = [];
|
|
@@ -1136,12 +1148,13 @@ class ChunkToVectorTask extends Task2 {
|
|
|
1136
1148
|
text: chunk.text,
|
|
1137
1149
|
nodePath: chunk.nodePath,
|
|
1138
1150
|
...doc_title ? { doc_title } : {},
|
|
1139
|
-
...chunk.
|
|
1151
|
+
...chunk.summary ? { summary: chunk.summary } : {},
|
|
1152
|
+
...chunk.entities ? { entities: chunk.entities } : {}
|
|
1140
1153
|
});
|
|
1141
1154
|
}
|
|
1142
1155
|
return {
|
|
1143
1156
|
ids,
|
|
1144
|
-
vectors,
|
|
1157
|
+
vectors: vector,
|
|
1145
1158
|
metadata,
|
|
1146
1159
|
texts
|
|
1147
1160
|
};
|
|
@@ -1153,7 +1166,7 @@ var chunkToVector = (input, config) => {
|
|
|
1153
1166
|
Workflow4.prototype.chunkToVector = CreateWorkflow4(ChunkToVectorTask);
|
|
1154
1167
|
|
|
1155
1168
|
// src/task/ChunkVectorHybridSearchTask.ts
|
|
1156
|
-
import {
|
|
1169
|
+
import { TypeKnowledgeBase as TypeKnowledgeBase2 } from "@workglow/dataset";
|
|
1157
1170
|
import {
|
|
1158
1171
|
CreateWorkflow as CreateWorkflow5,
|
|
1159
1172
|
Task as Task3,
|
|
@@ -1165,9 +1178,9 @@ import {
|
|
|
1165
1178
|
var inputSchema3 = {
|
|
1166
1179
|
type: "object",
|
|
1167
1180
|
properties: {
|
|
1168
|
-
|
|
1169
|
-
title: "
|
|
1170
|
-
description: "The
|
|
1181
|
+
knowledgeBase: TypeKnowledgeBase2({
|
|
1182
|
+
title: "Knowledge Base",
|
|
1183
|
+
description: "The knowledge base instance to search in (must support hybridSearch)"
|
|
1171
1184
|
}),
|
|
1172
1185
|
queryVector: TypedArraySchema4({
|
|
1173
1186
|
title: "Query Vector",
|
|
@@ -1213,7 +1226,7 @@ var inputSchema3 = {
|
|
|
1213
1226
|
default: false
|
|
1214
1227
|
}
|
|
1215
1228
|
},
|
|
1216
|
-
required: ["
|
|
1229
|
+
required: ["knowledgeBase", "queryVector", "queryText"],
|
|
1217
1230
|
additionalProperties: false
|
|
1218
1231
|
};
|
|
1219
1232
|
var outputSchema3 = {
|
|
@@ -1225,10 +1238,10 @@ var outputSchema3 = {
|
|
|
1225
1238
|
title: "Text Chunks",
|
|
1226
1239
|
description: "Retrieved text chunks"
|
|
1227
1240
|
},
|
|
1228
|
-
|
|
1241
|
+
chunk_ids: {
|
|
1229
1242
|
type: "array",
|
|
1230
1243
|
items: { type: "string" },
|
|
1231
|
-
title: "IDs",
|
|
1244
|
+
title: "Chunk IDs",
|
|
1232
1245
|
description: "IDs of retrieved chunks"
|
|
1233
1246
|
},
|
|
1234
1247
|
metadata: {
|
|
@@ -1260,9 +1273,14 @@ var outputSchema3 = {
|
|
|
1260
1273
|
type: "number",
|
|
1261
1274
|
title: "Count",
|
|
1262
1275
|
description: "Number of results returned"
|
|
1276
|
+
},
|
|
1277
|
+
query: {
|
|
1278
|
+
type: "string",
|
|
1279
|
+
title: "Query",
|
|
1280
|
+
description: "The text query used for search (pass-through)"
|
|
1263
1281
|
}
|
|
1264
1282
|
},
|
|
1265
|
-
required: ["chunks", "
|
|
1283
|
+
required: ["chunks", "chunk_ids", "metadata", "scores", "count", "query"],
|
|
1266
1284
|
additionalProperties: false
|
|
1267
1285
|
};
|
|
1268
1286
|
|
|
@@ -1280,7 +1298,7 @@ class ChunkVectorHybridSearchTask extends Task3 {
|
|
|
1280
1298
|
}
|
|
1281
1299
|
async execute(input, context) {
|
|
1282
1300
|
const {
|
|
1283
|
-
|
|
1301
|
+
knowledgeBase,
|
|
1284
1302
|
queryVector,
|
|
1285
1303
|
queryText,
|
|
1286
1304
|
topK = 10,
|
|
@@ -1289,12 +1307,9 @@ class ChunkVectorHybridSearchTask extends Task3 {
|
|
|
1289
1307
|
vectorWeight = 0.7,
|
|
1290
1308
|
returnVectors = false
|
|
1291
1309
|
} = input;
|
|
1292
|
-
const
|
|
1293
|
-
if (!repo.hybridSearch) {
|
|
1294
|
-
throw new Error("Dataset does not support hybrid search.");
|
|
1295
|
-
}
|
|
1310
|
+
const kb = knowledgeBase;
|
|
1296
1311
|
const searchVector = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
1297
|
-
const results = await
|
|
1312
|
+
const results = await kb.hybridSearch(searchVector, {
|
|
1298
1313
|
textQuery: queryText,
|
|
1299
1314
|
topK,
|
|
1300
1315
|
filter,
|
|
@@ -1303,14 +1318,15 @@ class ChunkVectorHybridSearchTask extends Task3 {
|
|
|
1303
1318
|
});
|
|
1304
1319
|
const chunks = results.map((r) => {
|
|
1305
1320
|
const meta = r.metadata;
|
|
1306
|
-
return meta.text ||
|
|
1321
|
+
return meta.text || JSON.stringify(meta);
|
|
1307
1322
|
});
|
|
1308
1323
|
const output = {
|
|
1309
1324
|
chunks,
|
|
1310
|
-
|
|
1325
|
+
chunk_ids: results.map((r) => r.chunk_id),
|
|
1311
1326
|
metadata: results.map((r) => r.metadata),
|
|
1312
1327
|
scores: results.map((r) => r.score),
|
|
1313
|
-
count: results.length
|
|
1328
|
+
count: results.length,
|
|
1329
|
+
query: queryText
|
|
1314
1330
|
};
|
|
1315
1331
|
if (returnVectors) {
|
|
1316
1332
|
output.vectors = results.map((r) => r.vector);
|
|
@@ -1324,7 +1340,7 @@ var hybridSearch = async (input, config) => {
|
|
|
1324
1340
|
Workflow5.prototype.hybridSearch = CreateWorkflow5(ChunkVectorHybridSearchTask);
|
|
1325
1341
|
|
|
1326
1342
|
// src/task/ChunkVectorSearchTask.ts
|
|
1327
|
-
import {
|
|
1343
|
+
import { TypeKnowledgeBase as TypeKnowledgeBase3 } from "@workglow/dataset";
|
|
1328
1344
|
import {
|
|
1329
1345
|
CreateWorkflow as CreateWorkflow6,
|
|
1330
1346
|
Task as Task4,
|
|
@@ -1336,9 +1352,9 @@ import {
|
|
|
1336
1352
|
var inputSchema4 = {
|
|
1337
1353
|
type: "object",
|
|
1338
1354
|
properties: {
|
|
1339
|
-
|
|
1340
|
-
title: "
|
|
1341
|
-
description: "The
|
|
1355
|
+
knowledgeBase: TypeKnowledgeBase3({
|
|
1356
|
+
title: "Knowledge Base",
|
|
1357
|
+
description: "The knowledge base instance to search in"
|
|
1342
1358
|
}),
|
|
1343
1359
|
query: TypedArraySchema5({
|
|
1344
1360
|
title: "Query Vector",
|
|
@@ -1365,7 +1381,7 @@ var inputSchema4 = {
|
|
|
1365
1381
|
default: 0
|
|
1366
1382
|
}
|
|
1367
1383
|
},
|
|
1368
|
-
required: ["
|
|
1384
|
+
required: ["knowledgeBase", "query"],
|
|
1369
1385
|
additionalProperties: false
|
|
1370
1386
|
};
|
|
1371
1387
|
var outputSchema4 = {
|
|
@@ -1416,7 +1432,7 @@ class ChunkVectorSearchTask extends Task4 {
|
|
|
1416
1432
|
static type = "ChunkVectorSearchTask";
|
|
1417
1433
|
static category = "Vector Store";
|
|
1418
1434
|
static title = "Vector Store Search";
|
|
1419
|
-
static description = "Search for similar vectors in a
|
|
1435
|
+
static description = "Search for similar vectors in a knowledge base";
|
|
1420
1436
|
static cacheable = true;
|
|
1421
1437
|
static inputSchema() {
|
|
1422
1438
|
return inputSchema4;
|
|
@@ -1425,9 +1441,9 @@ class ChunkVectorSearchTask extends Task4 {
|
|
|
1425
1441
|
return outputSchema4;
|
|
1426
1442
|
}
|
|
1427
1443
|
async execute(input, context) {
|
|
1428
|
-
const {
|
|
1429
|
-
const
|
|
1430
|
-
const results = await
|
|
1444
|
+
const { knowledgeBase, query, topK = 10, filter, scoreThreshold = 0 } = input;
|
|
1445
|
+
const kb = knowledgeBase;
|
|
1446
|
+
const results = await kb.similaritySearch(query, {
|
|
1431
1447
|
topK,
|
|
1432
1448
|
filter,
|
|
1433
1449
|
scoreThreshold
|
|
@@ -1447,7 +1463,7 @@ var vectorStoreSearch = (input, config) => {
|
|
|
1447
1463
|
Workflow6.prototype.vectorStoreSearch = CreateWorkflow6(ChunkVectorSearchTask);
|
|
1448
1464
|
|
|
1449
1465
|
// src/task/ChunkVectorUpsertTask.ts
|
|
1450
|
-
import {
|
|
1466
|
+
import { TypeKnowledgeBase as TypeKnowledgeBase4 } from "@workglow/dataset";
|
|
1451
1467
|
import {
|
|
1452
1468
|
CreateWorkflow as CreateWorkflow7,
|
|
1453
1469
|
Task as Task5,
|
|
@@ -1459,9 +1475,9 @@ import {
|
|
|
1459
1475
|
var inputSchema5 = {
|
|
1460
1476
|
type: "object",
|
|
1461
1477
|
properties: {
|
|
1462
|
-
|
|
1463
|
-
title: "
|
|
1464
|
-
description: "The
|
|
1478
|
+
knowledgeBase: TypeKnowledgeBase4({
|
|
1479
|
+
title: "Knowledge Base",
|
|
1480
|
+
description: "The knowledge base instance to store vectors in"
|
|
1465
1481
|
}),
|
|
1466
1482
|
doc_id: {
|
|
1467
1483
|
type: "string",
|
|
@@ -1479,7 +1495,7 @@ var inputSchema5 = {
|
|
|
1479
1495
|
additionalProperties: true
|
|
1480
1496
|
})
|
|
1481
1497
|
},
|
|
1482
|
-
required: ["
|
|
1498
|
+
required: ["knowledgeBase", "doc_id", "vectors", "metadata"],
|
|
1483
1499
|
additionalProperties: false
|
|
1484
1500
|
};
|
|
1485
1501
|
var outputSchema5 = {
|
|
@@ -1510,7 +1526,7 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1510
1526
|
static type = "ChunkVectorUpsertTask";
|
|
1511
1527
|
static category = "Vector Store";
|
|
1512
1528
|
static title = "Add to Vector Store";
|
|
1513
|
-
static description = "Store vector embeddings with metadata in a
|
|
1529
|
+
static description = "Store vector embeddings with metadata in a knowledge base";
|
|
1514
1530
|
static cacheable = false;
|
|
1515
1531
|
static inputSchema() {
|
|
1516
1532
|
return inputSchema5;
|
|
@@ -1519,10 +1535,10 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1519
1535
|
return outputSchema5;
|
|
1520
1536
|
}
|
|
1521
1537
|
async execute(input, context) {
|
|
1522
|
-
const {
|
|
1538
|
+
const { knowledgeBase, doc_id, vectors, metadata } = input;
|
|
1523
1539
|
const vectorArray = Array.isArray(vectors) ? vectors : [vectors];
|
|
1524
1540
|
const metadataArray = Array.isArray(metadata) ? metadata : Array(vectorArray.length).fill(metadata);
|
|
1525
|
-
const
|
|
1541
|
+
const kb = knowledgeBase;
|
|
1526
1542
|
await context.updateProgress(1, "Upserting vectors");
|
|
1527
1543
|
if (vectorArray.length > 1) {
|
|
1528
1544
|
if (vectorArray.length !== metadataArray.length) {
|
|
@@ -1536,7 +1552,7 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1536
1552
|
metadata: metadataItem
|
|
1537
1553
|
};
|
|
1538
1554
|
});
|
|
1539
|
-
const results = await
|
|
1555
|
+
const results = await kb.upsertChunksBulk(entities);
|
|
1540
1556
|
const chunk_ids = results.map((r) => r.chunk_id);
|
|
1541
1557
|
return {
|
|
1542
1558
|
doc_id,
|
|
@@ -1545,7 +1561,7 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1545
1561
|
};
|
|
1546
1562
|
} else if (vectorArray.length === 1) {
|
|
1547
1563
|
const metadataItem = metadataArray[0];
|
|
1548
|
-
const result = await
|
|
1564
|
+
const result = await kb.upsertChunk({
|
|
1549
1565
|
doc_id,
|
|
1550
1566
|
vector: vectorArray[0],
|
|
1551
1567
|
metadata: metadataItem
|
|
@@ -2990,7 +3006,7 @@ Workflow17.prototype.handLandmarker = CreateWorkflow17(HandLandmarkerTask);
|
|
|
2990
3006
|
|
|
2991
3007
|
// src/task/HierarchicalChunkerTask.ts
|
|
2992
3008
|
import {
|
|
2993
|
-
|
|
3009
|
+
ChunkRecordSchema as ChunkRecordSchema2,
|
|
2994
3010
|
estimateTokens as estimateTokens2,
|
|
2995
3011
|
getChildren as getChildren2,
|
|
2996
3012
|
hasChildren as hasChildren2
|
|
@@ -3060,9 +3076,9 @@ var outputSchema8 = {
|
|
|
3060
3076
|
},
|
|
3061
3077
|
chunks: {
|
|
3062
3078
|
type: "array",
|
|
3063
|
-
items:
|
|
3079
|
+
items: ChunkRecordSchema2(),
|
|
3064
3080
|
title: "Chunks",
|
|
3065
|
-
description: "Array of chunk
|
|
3081
|
+
description: "Array of chunk records"
|
|
3066
3082
|
},
|
|
3067
3083
|
text: {
|
|
3068
3084
|
type: "array",
|
|
@@ -3228,9 +3244,8 @@ Workflow18.prototype.hierarchicalChunker = CreateWorkflow18(HierarchicalChunkerT
|
|
|
3228
3244
|
|
|
3229
3245
|
// src/task/HierarchyJoinTask.ts
|
|
3230
3246
|
import {
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
TypeDocumentDataset
|
|
3247
|
+
ChunkRecordArraySchema,
|
|
3248
|
+
TypeKnowledgeBase as TypeKnowledgeBase5
|
|
3234
3249
|
} from "@workglow/dataset";
|
|
3235
3250
|
import {
|
|
3236
3251
|
CreateWorkflow as CreateWorkflow19,
|
|
@@ -3240,9 +3255,9 @@ import {
|
|
|
3240
3255
|
var inputSchema9 = {
|
|
3241
3256
|
type: "object",
|
|
3242
3257
|
properties: {
|
|
3243
|
-
|
|
3244
|
-
title: "
|
|
3245
|
-
description: "The
|
|
3258
|
+
knowledgeBase: TypeKnowledgeBase5({
|
|
3259
|
+
title: "Knowledge Base",
|
|
3260
|
+
description: "The knowledge base to query for hierarchy"
|
|
3246
3261
|
}),
|
|
3247
3262
|
chunks: {
|
|
3248
3263
|
type: "array",
|
|
@@ -3256,7 +3271,7 @@ var inputSchema9 = {
|
|
|
3256
3271
|
title: "Chunk IDs",
|
|
3257
3272
|
description: "IDs of retrieved chunks"
|
|
3258
3273
|
},
|
|
3259
|
-
metadata:
|
|
3274
|
+
metadata: ChunkRecordArraySchema,
|
|
3260
3275
|
scores: {
|
|
3261
3276
|
type: "array",
|
|
3262
3277
|
items: { type: "number" },
|
|
@@ -3276,7 +3291,7 @@ var inputSchema9 = {
|
|
|
3276
3291
|
default: true
|
|
3277
3292
|
}
|
|
3278
3293
|
},
|
|
3279
|
-
required: ["
|
|
3294
|
+
required: ["knowledgeBase", "chunks", "chunk_ids", "metadata", "scores"],
|
|
3280
3295
|
additionalProperties: false
|
|
3281
3296
|
};
|
|
3282
3297
|
var outputSchema9 = {
|
|
@@ -3294,7 +3309,7 @@ var outputSchema9 = {
|
|
|
3294
3309
|
title: "Chunk IDs",
|
|
3295
3310
|
description: "IDs of retrieved chunks"
|
|
3296
3311
|
},
|
|
3297
|
-
metadata:
|
|
3312
|
+
metadata: ChunkRecordArraySchema,
|
|
3298
3313
|
scores: {
|
|
3299
3314
|
type: "array",
|
|
3300
3315
|
items: { type: "number" },
|
|
@@ -3325,7 +3340,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3325
3340
|
}
|
|
3326
3341
|
async execute(input, context) {
|
|
3327
3342
|
const {
|
|
3328
|
-
|
|
3343
|
+
knowledgeBase,
|
|
3329
3344
|
chunks,
|
|
3330
3345
|
chunk_ids,
|
|
3331
3346
|
metadata,
|
|
@@ -3333,7 +3348,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3333
3348
|
includeParentSummaries = true,
|
|
3334
3349
|
includeEntities = true
|
|
3335
3350
|
} = input;
|
|
3336
|
-
const
|
|
3351
|
+
const kb = knowledgeBase;
|
|
3337
3352
|
const enrichedMetadata = [];
|
|
3338
3353
|
for (let i = 0;i < chunk_ids.length; i++) {
|
|
3339
3354
|
const chunkId = chunk_ids[i];
|
|
@@ -3349,7 +3364,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3349
3364
|
continue;
|
|
3350
3365
|
}
|
|
3351
3366
|
try {
|
|
3352
|
-
const ancestors = await
|
|
3367
|
+
const ancestors = await kb.getAncestors(doc_id, leafNodeId);
|
|
3353
3368
|
const enriched = { ...originalMetadata };
|
|
3354
3369
|
if (includeParentSummaries && ancestors.length > 0) {
|
|
3355
3370
|
const parentSummaries = [];
|
|
@@ -3358,7 +3373,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3358
3373
|
if (ancestor.enrichment?.summary) {
|
|
3359
3374
|
parentSummaries.push(ancestor.enrichment.summary);
|
|
3360
3375
|
}
|
|
3361
|
-
if (ancestor.kind === "section" && ancestor
|
|
3376
|
+
if (ancestor.kind === "section" && "title" in ancestor) {
|
|
3362
3377
|
sectionTitles.push(ancestor.title);
|
|
3363
3378
|
}
|
|
3364
3379
|
}
|
|
@@ -4009,7 +4024,7 @@ var inputSchema10 = {
|
|
|
4009
4024
|
var outputSchema10 = {
|
|
4010
4025
|
type: "object",
|
|
4011
4026
|
properties: {
|
|
4012
|
-
|
|
4027
|
+
query: {
|
|
4013
4028
|
type: "array",
|
|
4014
4029
|
items: { type: "string" },
|
|
4015
4030
|
title: "Expanded Queries",
|
|
@@ -4031,7 +4046,7 @@ var outputSchema10 = {
|
|
|
4031
4046
|
description: "Number of queries generated"
|
|
4032
4047
|
}
|
|
4033
4048
|
},
|
|
4034
|
-
required: ["
|
|
4049
|
+
required: ["query", "originalQuery", "method", "count"],
|
|
4035
4050
|
additionalProperties: false
|
|
4036
4051
|
};
|
|
4037
4052
|
|
|
@@ -4069,7 +4084,7 @@ class QueryExpanderTask extends Task10 {
|
|
|
4069
4084
|
queries.unshift(query);
|
|
4070
4085
|
}
|
|
4071
4086
|
return {
|
|
4072
|
-
queries,
|
|
4087
|
+
query: queries,
|
|
4073
4088
|
originalQuery: query,
|
|
4074
4089
|
method,
|
|
4075
4090
|
count: queries.length
|
|
@@ -6334,4 +6349,4 @@ export {
|
|
|
6334
6349
|
AiJob
|
|
6335
6350
|
};
|
|
6336
6351
|
|
|
6337
|
-
//# debugId=
|
|
6352
|
+
//# debugId=8E8187274333361364756E2164756E21
|