@workglow/ai 0.0.111 → 0.0.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +95 -80
- package/dist/browser.js.map +10 -10
- package/dist/bun.js +95 -80
- package/dist/bun.js.map +10 -10
- package/dist/node.js +95 -80
- package/dist/node.js.map +10 -10
- package/dist/task/ChunkRetrievalTask.d.ts +38 -9
- package/dist/task/ChunkRetrievalTask.d.ts.map +1 -1
- package/dist/task/ChunkToVectorTask.d.ts +61 -41
- package/dist/task/ChunkToVectorTask.d.ts.map +1 -1
- package/dist/task/ChunkVectorHybridSearchTask.d.ts +16 -11
- package/dist/task/ChunkVectorHybridSearchTask.d.ts.map +1 -1
- package/dist/task/ChunkVectorSearchTask.d.ts +9 -9
- package/dist/task/ChunkVectorSearchTask.d.ts.map +1 -1
- package/dist/task/ChunkVectorUpsertTask.d.ts +8 -8
- package/dist/task/ChunkVectorUpsertTask.d.ts.map +1 -1
- package/dist/task/HierarchicalChunkerTask.d.ts +67 -44
- package/dist/task/HierarchicalChunkerTask.d.ts.map +1 -1
- package/dist/task/HierarchyJoinTask.d.ts +69 -42
- package/dist/task/HierarchyJoinTask.d.ts.map +1 -1
- package/dist/task/QueryExpanderTask.d.ts +3 -3
- package/package.json +11 -11
package/dist/node.js
CHANGED
|
@@ -779,7 +779,7 @@ var backgroundRemoval = (input, config) => {
|
|
|
779
779
|
Workflow.prototype.backgroundRemoval = CreateWorkflow(BackgroundRemovalTask);
|
|
780
780
|
|
|
781
781
|
// src/task/ChunkRetrievalTask.ts
|
|
782
|
-
import {
|
|
782
|
+
import { TypeKnowledgeBase } from "@workglow/dataset";
|
|
783
783
|
import {
|
|
784
784
|
CreateWorkflow as CreateWorkflow3,
|
|
785
785
|
Task,
|
|
@@ -842,9 +842,9 @@ Workflow2.prototype.textEmbedding = CreateWorkflow2(TextEmbeddingTask);
|
|
|
842
842
|
var inputSchema = {
|
|
843
843
|
type: "object",
|
|
844
844
|
properties: {
|
|
845
|
-
|
|
846
|
-
title: "
|
|
847
|
-
description: "The
|
|
845
|
+
knowledgeBase: TypeKnowledgeBase({
|
|
846
|
+
title: "Knowledge Base",
|
|
847
|
+
description: "The knowledge base instance to search in"
|
|
848
848
|
}),
|
|
849
849
|
query: TypeSingleOrArray({
|
|
850
850
|
oneOf: [
|
|
@@ -888,14 +888,14 @@ var inputSchema = {
|
|
|
888
888
|
default: false
|
|
889
889
|
}
|
|
890
890
|
},
|
|
891
|
-
required: ["
|
|
891
|
+
required: ["knowledgeBase", "query"],
|
|
892
892
|
if: {
|
|
893
893
|
properties: {
|
|
894
894
|
query: { type: "string" }
|
|
895
895
|
}
|
|
896
896
|
},
|
|
897
897
|
then: {
|
|
898
|
-
required: ["
|
|
898
|
+
required: ["knowledgeBase", "query", "model"]
|
|
899
899
|
},
|
|
900
900
|
else: {},
|
|
901
901
|
additionalProperties: false
|
|
@@ -944,9 +944,20 @@ var outputSchema = {
|
|
|
944
944
|
type: "number",
|
|
945
945
|
title: "Count",
|
|
946
946
|
description: "Number of results returned"
|
|
947
|
-
}
|
|
947
|
+
},
|
|
948
|
+
query: TypeSingleOrArray({
|
|
949
|
+
oneOf: [
|
|
950
|
+
{ type: "string" },
|
|
951
|
+
TypedArraySchema2({
|
|
952
|
+
title: "Query Vector",
|
|
953
|
+
description: "Pre-computed query vector"
|
|
954
|
+
})
|
|
955
|
+
],
|
|
956
|
+
title: "Query",
|
|
957
|
+
description: "The query used for retrieval (pass-through)"
|
|
958
|
+
})
|
|
948
959
|
},
|
|
949
|
-
required: ["chunks", "chunk_ids", "metadata", "scores", "count"],
|
|
960
|
+
required: ["chunks", "chunk_ids", "metadata", "scores", "count", "query"],
|
|
950
961
|
additionalProperties: false
|
|
951
962
|
};
|
|
952
963
|
|
|
@@ -964,7 +975,7 @@ class ChunkRetrievalTask extends Task {
|
|
|
964
975
|
}
|
|
965
976
|
async execute(input, context) {
|
|
966
977
|
const {
|
|
967
|
-
|
|
978
|
+
knowledgeBase,
|
|
968
979
|
query,
|
|
969
980
|
topK = 5,
|
|
970
981
|
filter,
|
|
@@ -972,7 +983,7 @@ class ChunkRetrievalTask extends Task {
|
|
|
972
983
|
scoreThreshold = 0,
|
|
973
984
|
returnVectors = false
|
|
974
985
|
} = input;
|
|
975
|
-
const
|
|
986
|
+
const kb = knowledgeBase;
|
|
976
987
|
let queryVectors;
|
|
977
988
|
if (typeof query === "string" || Array.isArray(query) && query.every((q) => typeof q === "string")) {
|
|
978
989
|
if (!model) {
|
|
@@ -989,7 +1000,7 @@ class ChunkRetrievalTask extends Task {
|
|
|
989
1000
|
const searchVectors = queryVectors.map((v) => v instanceof Float32Array ? v : new Float32Array(v));
|
|
990
1001
|
const results = [];
|
|
991
1002
|
for (const searchVector of searchVectors) {
|
|
992
|
-
const res = await
|
|
1003
|
+
const res = await kb.similaritySearch(searchVector, {
|
|
993
1004
|
topK,
|
|
994
1005
|
filter,
|
|
995
1006
|
scoreThreshold
|
|
@@ -998,14 +1009,15 @@ class ChunkRetrievalTask extends Task {
|
|
|
998
1009
|
}
|
|
999
1010
|
const chunks = results.map((r) => {
|
|
1000
1011
|
const meta = r.metadata;
|
|
1001
|
-
return meta.text ||
|
|
1012
|
+
return meta.text || JSON.stringify(meta);
|
|
1002
1013
|
});
|
|
1003
1014
|
const output = {
|
|
1004
1015
|
chunks,
|
|
1005
1016
|
chunk_ids: results.map((r) => r.chunk_id),
|
|
1006
1017
|
metadata: results.map((r) => r.metadata),
|
|
1007
1018
|
scores: results.map((r) => r.score),
|
|
1008
|
-
count: results.length
|
|
1019
|
+
count: results.length,
|
|
1020
|
+
query
|
|
1009
1021
|
};
|
|
1010
1022
|
if (returnVectors) {
|
|
1011
1023
|
output.vectors = results.map((r) => r.vector);
|
|
@@ -1019,7 +1031,7 @@ var chunkRetrieval = (input, config) => {
|
|
|
1019
1031
|
Workflow3.prototype.chunkRetrieval = CreateWorkflow3(ChunkRetrievalTask);
|
|
1020
1032
|
|
|
1021
1033
|
// src/task/ChunkToVectorTask.ts
|
|
1022
|
-
import {
|
|
1034
|
+
import { ChunkRecordSchema } from "@workglow/dataset";
|
|
1023
1035
|
import {
|
|
1024
1036
|
CreateWorkflow as CreateWorkflow4,
|
|
1025
1037
|
Task as Task2,
|
|
@@ -1043,11 +1055,11 @@ var inputSchema2 = {
|
|
|
1043
1055
|
},
|
|
1044
1056
|
chunks: {
|
|
1045
1057
|
type: "array",
|
|
1046
|
-
items:
|
|
1058
|
+
items: ChunkRecordSchema(),
|
|
1047
1059
|
title: "Chunks",
|
|
1048
|
-
description: "Array of chunk
|
|
1060
|
+
description: "Array of chunk records"
|
|
1049
1061
|
},
|
|
1050
|
-
|
|
1062
|
+
vector: {
|
|
1051
1063
|
type: "array",
|
|
1052
1064
|
items: TypedArraySchema3({
|
|
1053
1065
|
title: "Vector",
|
|
@@ -1057,7 +1069,7 @@ var inputSchema2 = {
|
|
|
1057
1069
|
description: "Embeddings from TextEmbeddingTask"
|
|
1058
1070
|
}
|
|
1059
1071
|
},
|
|
1060
|
-
required: ["chunks", "
|
|
1072
|
+
required: ["chunks", "vector"],
|
|
1061
1073
|
additionalProperties: false
|
|
1062
1074
|
};
|
|
1063
1075
|
var outputSchema2 = {
|
|
@@ -1112,13 +1124,13 @@ class ChunkToVectorTask extends Task2 {
|
|
|
1112
1124
|
return outputSchema2;
|
|
1113
1125
|
}
|
|
1114
1126
|
async execute(input, context) {
|
|
1115
|
-
const { chunks,
|
|
1127
|
+
const { chunks, vector, doc_title } = input;
|
|
1116
1128
|
const chunkArray = chunks;
|
|
1117
|
-
if (!chunkArray || !
|
|
1129
|
+
if (!chunkArray || !vector) {
|
|
1118
1130
|
throw new Error("Both chunks and vector are required");
|
|
1119
1131
|
}
|
|
1120
|
-
if (chunkArray.length !==
|
|
1121
|
-
throw new Error(`Mismatch: ${chunkArray.length} chunks but ${
|
|
1132
|
+
if (chunkArray.length !== vector.length) {
|
|
1133
|
+
throw new Error(`Mismatch: ${chunkArray.length} chunks but ${vector.length} vectors`);
|
|
1122
1134
|
}
|
|
1123
1135
|
const ids = [];
|
|
1124
1136
|
const metadata = [];
|
|
@@ -1135,12 +1147,13 @@ class ChunkToVectorTask extends Task2 {
|
|
|
1135
1147
|
text: chunk.text,
|
|
1136
1148
|
nodePath: chunk.nodePath,
|
|
1137
1149
|
...doc_title ? { doc_title } : {},
|
|
1138
|
-
...chunk.
|
|
1150
|
+
...chunk.summary ? { summary: chunk.summary } : {},
|
|
1151
|
+
...chunk.entities ? { entities: chunk.entities } : {}
|
|
1139
1152
|
});
|
|
1140
1153
|
}
|
|
1141
1154
|
return {
|
|
1142
1155
|
ids,
|
|
1143
|
-
vectors,
|
|
1156
|
+
vectors: vector,
|
|
1144
1157
|
metadata,
|
|
1145
1158
|
texts
|
|
1146
1159
|
};
|
|
@@ -1152,7 +1165,7 @@ var chunkToVector = (input, config) => {
|
|
|
1152
1165
|
Workflow4.prototype.chunkToVector = CreateWorkflow4(ChunkToVectorTask);
|
|
1153
1166
|
|
|
1154
1167
|
// src/task/ChunkVectorHybridSearchTask.ts
|
|
1155
|
-
import {
|
|
1168
|
+
import { TypeKnowledgeBase as TypeKnowledgeBase2 } from "@workglow/dataset";
|
|
1156
1169
|
import {
|
|
1157
1170
|
CreateWorkflow as CreateWorkflow5,
|
|
1158
1171
|
Task as Task3,
|
|
@@ -1164,9 +1177,9 @@ import {
|
|
|
1164
1177
|
var inputSchema3 = {
|
|
1165
1178
|
type: "object",
|
|
1166
1179
|
properties: {
|
|
1167
|
-
|
|
1168
|
-
title: "
|
|
1169
|
-
description: "The
|
|
1180
|
+
knowledgeBase: TypeKnowledgeBase2({
|
|
1181
|
+
title: "Knowledge Base",
|
|
1182
|
+
description: "The knowledge base instance to search in (must support hybridSearch)"
|
|
1170
1183
|
}),
|
|
1171
1184
|
queryVector: TypedArraySchema4({
|
|
1172
1185
|
title: "Query Vector",
|
|
@@ -1212,7 +1225,7 @@ var inputSchema3 = {
|
|
|
1212
1225
|
default: false
|
|
1213
1226
|
}
|
|
1214
1227
|
},
|
|
1215
|
-
required: ["
|
|
1228
|
+
required: ["knowledgeBase", "queryVector", "queryText"],
|
|
1216
1229
|
additionalProperties: false
|
|
1217
1230
|
};
|
|
1218
1231
|
var outputSchema3 = {
|
|
@@ -1224,10 +1237,10 @@ var outputSchema3 = {
|
|
|
1224
1237
|
title: "Text Chunks",
|
|
1225
1238
|
description: "Retrieved text chunks"
|
|
1226
1239
|
},
|
|
1227
|
-
|
|
1240
|
+
chunk_ids: {
|
|
1228
1241
|
type: "array",
|
|
1229
1242
|
items: { type: "string" },
|
|
1230
|
-
title: "IDs",
|
|
1243
|
+
title: "Chunk IDs",
|
|
1231
1244
|
description: "IDs of retrieved chunks"
|
|
1232
1245
|
},
|
|
1233
1246
|
metadata: {
|
|
@@ -1259,9 +1272,14 @@ var outputSchema3 = {
|
|
|
1259
1272
|
type: "number",
|
|
1260
1273
|
title: "Count",
|
|
1261
1274
|
description: "Number of results returned"
|
|
1275
|
+
},
|
|
1276
|
+
query: {
|
|
1277
|
+
type: "string",
|
|
1278
|
+
title: "Query",
|
|
1279
|
+
description: "The text query used for search (pass-through)"
|
|
1262
1280
|
}
|
|
1263
1281
|
},
|
|
1264
|
-
required: ["chunks", "
|
|
1282
|
+
required: ["chunks", "chunk_ids", "metadata", "scores", "count", "query"],
|
|
1265
1283
|
additionalProperties: false
|
|
1266
1284
|
};
|
|
1267
1285
|
|
|
@@ -1279,7 +1297,7 @@ class ChunkVectorHybridSearchTask extends Task3 {
|
|
|
1279
1297
|
}
|
|
1280
1298
|
async execute(input, context) {
|
|
1281
1299
|
const {
|
|
1282
|
-
|
|
1300
|
+
knowledgeBase,
|
|
1283
1301
|
queryVector,
|
|
1284
1302
|
queryText,
|
|
1285
1303
|
topK = 10,
|
|
@@ -1288,12 +1306,9 @@ class ChunkVectorHybridSearchTask extends Task3 {
|
|
|
1288
1306
|
vectorWeight = 0.7,
|
|
1289
1307
|
returnVectors = false
|
|
1290
1308
|
} = input;
|
|
1291
|
-
const
|
|
1292
|
-
if (!repo.hybridSearch) {
|
|
1293
|
-
throw new Error("Dataset does not support hybrid search.");
|
|
1294
|
-
}
|
|
1309
|
+
const kb = knowledgeBase;
|
|
1295
1310
|
const searchVector = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
1296
|
-
const results = await
|
|
1311
|
+
const results = await kb.hybridSearch(searchVector, {
|
|
1297
1312
|
textQuery: queryText,
|
|
1298
1313
|
topK,
|
|
1299
1314
|
filter,
|
|
@@ -1302,14 +1317,15 @@ class ChunkVectorHybridSearchTask extends Task3 {
|
|
|
1302
1317
|
});
|
|
1303
1318
|
const chunks = results.map((r) => {
|
|
1304
1319
|
const meta = r.metadata;
|
|
1305
|
-
return meta.text ||
|
|
1320
|
+
return meta.text || JSON.stringify(meta);
|
|
1306
1321
|
});
|
|
1307
1322
|
const output = {
|
|
1308
1323
|
chunks,
|
|
1309
|
-
|
|
1324
|
+
chunk_ids: results.map((r) => r.chunk_id),
|
|
1310
1325
|
metadata: results.map((r) => r.metadata),
|
|
1311
1326
|
scores: results.map((r) => r.score),
|
|
1312
|
-
count: results.length
|
|
1327
|
+
count: results.length,
|
|
1328
|
+
query: queryText
|
|
1313
1329
|
};
|
|
1314
1330
|
if (returnVectors) {
|
|
1315
1331
|
output.vectors = results.map((r) => r.vector);
|
|
@@ -1323,7 +1339,7 @@ var hybridSearch = async (input, config) => {
|
|
|
1323
1339
|
Workflow5.prototype.hybridSearch = CreateWorkflow5(ChunkVectorHybridSearchTask);
|
|
1324
1340
|
|
|
1325
1341
|
// src/task/ChunkVectorSearchTask.ts
|
|
1326
|
-
import {
|
|
1342
|
+
import { TypeKnowledgeBase as TypeKnowledgeBase3 } from "@workglow/dataset";
|
|
1327
1343
|
import {
|
|
1328
1344
|
CreateWorkflow as CreateWorkflow6,
|
|
1329
1345
|
Task as Task4,
|
|
@@ -1335,9 +1351,9 @@ import {
|
|
|
1335
1351
|
var inputSchema4 = {
|
|
1336
1352
|
type: "object",
|
|
1337
1353
|
properties: {
|
|
1338
|
-
|
|
1339
|
-
title: "
|
|
1340
|
-
description: "The
|
|
1354
|
+
knowledgeBase: TypeKnowledgeBase3({
|
|
1355
|
+
title: "Knowledge Base",
|
|
1356
|
+
description: "The knowledge base instance to search in"
|
|
1341
1357
|
}),
|
|
1342
1358
|
query: TypedArraySchema5({
|
|
1343
1359
|
title: "Query Vector",
|
|
@@ -1364,7 +1380,7 @@ var inputSchema4 = {
|
|
|
1364
1380
|
default: 0
|
|
1365
1381
|
}
|
|
1366
1382
|
},
|
|
1367
|
-
required: ["
|
|
1383
|
+
required: ["knowledgeBase", "query"],
|
|
1368
1384
|
additionalProperties: false
|
|
1369
1385
|
};
|
|
1370
1386
|
var outputSchema4 = {
|
|
@@ -1415,7 +1431,7 @@ class ChunkVectorSearchTask extends Task4 {
|
|
|
1415
1431
|
static type = "ChunkVectorSearchTask";
|
|
1416
1432
|
static category = "Vector Store";
|
|
1417
1433
|
static title = "Vector Store Search";
|
|
1418
|
-
static description = "Search for similar vectors in a
|
|
1434
|
+
static description = "Search for similar vectors in a knowledge base";
|
|
1419
1435
|
static cacheable = true;
|
|
1420
1436
|
static inputSchema() {
|
|
1421
1437
|
return inputSchema4;
|
|
@@ -1424,9 +1440,9 @@ class ChunkVectorSearchTask extends Task4 {
|
|
|
1424
1440
|
return outputSchema4;
|
|
1425
1441
|
}
|
|
1426
1442
|
async execute(input, context) {
|
|
1427
|
-
const {
|
|
1428
|
-
const
|
|
1429
|
-
const results = await
|
|
1443
|
+
const { knowledgeBase, query, topK = 10, filter, scoreThreshold = 0 } = input;
|
|
1444
|
+
const kb = knowledgeBase;
|
|
1445
|
+
const results = await kb.similaritySearch(query, {
|
|
1430
1446
|
topK,
|
|
1431
1447
|
filter,
|
|
1432
1448
|
scoreThreshold
|
|
@@ -1446,7 +1462,7 @@ var vectorStoreSearch = (input, config) => {
|
|
|
1446
1462
|
Workflow6.prototype.vectorStoreSearch = CreateWorkflow6(ChunkVectorSearchTask);
|
|
1447
1463
|
|
|
1448
1464
|
// src/task/ChunkVectorUpsertTask.ts
|
|
1449
|
-
import {
|
|
1465
|
+
import { TypeKnowledgeBase as TypeKnowledgeBase4 } from "@workglow/dataset";
|
|
1450
1466
|
import {
|
|
1451
1467
|
CreateWorkflow as CreateWorkflow7,
|
|
1452
1468
|
Task as Task5,
|
|
@@ -1458,9 +1474,9 @@ import {
|
|
|
1458
1474
|
var inputSchema5 = {
|
|
1459
1475
|
type: "object",
|
|
1460
1476
|
properties: {
|
|
1461
|
-
|
|
1462
|
-
title: "
|
|
1463
|
-
description: "The
|
|
1477
|
+
knowledgeBase: TypeKnowledgeBase4({
|
|
1478
|
+
title: "Knowledge Base",
|
|
1479
|
+
description: "The knowledge base instance to store vectors in"
|
|
1464
1480
|
}),
|
|
1465
1481
|
doc_id: {
|
|
1466
1482
|
type: "string",
|
|
@@ -1478,7 +1494,7 @@ var inputSchema5 = {
|
|
|
1478
1494
|
additionalProperties: true
|
|
1479
1495
|
})
|
|
1480
1496
|
},
|
|
1481
|
-
required: ["
|
|
1497
|
+
required: ["knowledgeBase", "doc_id", "vectors", "metadata"],
|
|
1482
1498
|
additionalProperties: false
|
|
1483
1499
|
};
|
|
1484
1500
|
var outputSchema5 = {
|
|
@@ -1509,7 +1525,7 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1509
1525
|
static type = "ChunkVectorUpsertTask";
|
|
1510
1526
|
static category = "Vector Store";
|
|
1511
1527
|
static title = "Add to Vector Store";
|
|
1512
|
-
static description = "Store vector embeddings with metadata in a
|
|
1528
|
+
static description = "Store vector embeddings with metadata in a knowledge base";
|
|
1513
1529
|
static cacheable = false;
|
|
1514
1530
|
static inputSchema() {
|
|
1515
1531
|
return inputSchema5;
|
|
@@ -1518,10 +1534,10 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1518
1534
|
return outputSchema5;
|
|
1519
1535
|
}
|
|
1520
1536
|
async execute(input, context) {
|
|
1521
|
-
const {
|
|
1537
|
+
const { knowledgeBase, doc_id, vectors, metadata } = input;
|
|
1522
1538
|
const vectorArray = Array.isArray(vectors) ? vectors : [vectors];
|
|
1523
1539
|
const metadataArray = Array.isArray(metadata) ? metadata : Array(vectorArray.length).fill(metadata);
|
|
1524
|
-
const
|
|
1540
|
+
const kb = knowledgeBase;
|
|
1525
1541
|
await context.updateProgress(1, "Upserting vectors");
|
|
1526
1542
|
if (vectorArray.length > 1) {
|
|
1527
1543
|
if (vectorArray.length !== metadataArray.length) {
|
|
@@ -1535,7 +1551,7 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1535
1551
|
metadata: metadataItem
|
|
1536
1552
|
};
|
|
1537
1553
|
});
|
|
1538
|
-
const results = await
|
|
1554
|
+
const results = await kb.upsertChunksBulk(entities);
|
|
1539
1555
|
const chunk_ids = results.map((r) => r.chunk_id);
|
|
1540
1556
|
return {
|
|
1541
1557
|
doc_id,
|
|
@@ -1544,7 +1560,7 @@ class ChunkVectorUpsertTask extends Task5 {
|
|
|
1544
1560
|
};
|
|
1545
1561
|
} else if (vectorArray.length === 1) {
|
|
1546
1562
|
const metadataItem = metadataArray[0];
|
|
1547
|
-
const result = await
|
|
1563
|
+
const result = await kb.upsertChunk({
|
|
1548
1564
|
doc_id,
|
|
1549
1565
|
vector: vectorArray[0],
|
|
1550
1566
|
metadata: metadataItem
|
|
@@ -2989,7 +3005,7 @@ Workflow17.prototype.handLandmarker = CreateWorkflow17(HandLandmarkerTask);
|
|
|
2989
3005
|
|
|
2990
3006
|
// src/task/HierarchicalChunkerTask.ts
|
|
2991
3007
|
import {
|
|
2992
|
-
|
|
3008
|
+
ChunkRecordSchema as ChunkRecordSchema2,
|
|
2993
3009
|
estimateTokens as estimateTokens2,
|
|
2994
3010
|
getChildren as getChildren2,
|
|
2995
3011
|
hasChildren as hasChildren2
|
|
@@ -3059,9 +3075,9 @@ var outputSchema8 = {
|
|
|
3059
3075
|
},
|
|
3060
3076
|
chunks: {
|
|
3061
3077
|
type: "array",
|
|
3062
|
-
items:
|
|
3078
|
+
items: ChunkRecordSchema2(),
|
|
3063
3079
|
title: "Chunks",
|
|
3064
|
-
description: "Array of chunk
|
|
3080
|
+
description: "Array of chunk records"
|
|
3065
3081
|
},
|
|
3066
3082
|
text: {
|
|
3067
3083
|
type: "array",
|
|
@@ -3227,9 +3243,8 @@ Workflow18.prototype.hierarchicalChunker = CreateWorkflow18(HierarchicalChunkerT
|
|
|
3227
3243
|
|
|
3228
3244
|
// src/task/HierarchyJoinTask.ts
|
|
3229
3245
|
import {
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
TypeDocumentDataset
|
|
3246
|
+
ChunkRecordArraySchema,
|
|
3247
|
+
TypeKnowledgeBase as TypeKnowledgeBase5
|
|
3233
3248
|
} from "@workglow/dataset";
|
|
3234
3249
|
import {
|
|
3235
3250
|
CreateWorkflow as CreateWorkflow19,
|
|
@@ -3239,9 +3254,9 @@ import {
|
|
|
3239
3254
|
var inputSchema9 = {
|
|
3240
3255
|
type: "object",
|
|
3241
3256
|
properties: {
|
|
3242
|
-
|
|
3243
|
-
title: "
|
|
3244
|
-
description: "The
|
|
3257
|
+
knowledgeBase: TypeKnowledgeBase5({
|
|
3258
|
+
title: "Knowledge Base",
|
|
3259
|
+
description: "The knowledge base to query for hierarchy"
|
|
3245
3260
|
}),
|
|
3246
3261
|
chunks: {
|
|
3247
3262
|
type: "array",
|
|
@@ -3255,7 +3270,7 @@ var inputSchema9 = {
|
|
|
3255
3270
|
title: "Chunk IDs",
|
|
3256
3271
|
description: "IDs of retrieved chunks"
|
|
3257
3272
|
},
|
|
3258
|
-
metadata:
|
|
3273
|
+
metadata: ChunkRecordArraySchema,
|
|
3259
3274
|
scores: {
|
|
3260
3275
|
type: "array",
|
|
3261
3276
|
items: { type: "number" },
|
|
@@ -3275,7 +3290,7 @@ var inputSchema9 = {
|
|
|
3275
3290
|
default: true
|
|
3276
3291
|
}
|
|
3277
3292
|
},
|
|
3278
|
-
required: ["
|
|
3293
|
+
required: ["knowledgeBase", "chunks", "chunk_ids", "metadata", "scores"],
|
|
3279
3294
|
additionalProperties: false
|
|
3280
3295
|
};
|
|
3281
3296
|
var outputSchema9 = {
|
|
@@ -3293,7 +3308,7 @@ var outputSchema9 = {
|
|
|
3293
3308
|
title: "Chunk IDs",
|
|
3294
3309
|
description: "IDs of retrieved chunks"
|
|
3295
3310
|
},
|
|
3296
|
-
metadata:
|
|
3311
|
+
metadata: ChunkRecordArraySchema,
|
|
3297
3312
|
scores: {
|
|
3298
3313
|
type: "array",
|
|
3299
3314
|
items: { type: "number" },
|
|
@@ -3324,7 +3339,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3324
3339
|
}
|
|
3325
3340
|
async execute(input, context) {
|
|
3326
3341
|
const {
|
|
3327
|
-
|
|
3342
|
+
knowledgeBase,
|
|
3328
3343
|
chunks,
|
|
3329
3344
|
chunk_ids,
|
|
3330
3345
|
metadata,
|
|
@@ -3332,7 +3347,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3332
3347
|
includeParentSummaries = true,
|
|
3333
3348
|
includeEntities = true
|
|
3334
3349
|
} = input;
|
|
3335
|
-
const
|
|
3350
|
+
const kb = knowledgeBase;
|
|
3336
3351
|
const enrichedMetadata = [];
|
|
3337
3352
|
for (let i = 0;i < chunk_ids.length; i++) {
|
|
3338
3353
|
const chunkId = chunk_ids[i];
|
|
@@ -3348,7 +3363,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3348
3363
|
continue;
|
|
3349
3364
|
}
|
|
3350
3365
|
try {
|
|
3351
|
-
const ancestors = await
|
|
3366
|
+
const ancestors = await kb.getAncestors(doc_id, leafNodeId);
|
|
3352
3367
|
const enriched = { ...originalMetadata };
|
|
3353
3368
|
if (includeParentSummaries && ancestors.length > 0) {
|
|
3354
3369
|
const parentSummaries = [];
|
|
@@ -3357,7 +3372,7 @@ class HierarchyJoinTask extends Task9 {
|
|
|
3357
3372
|
if (ancestor.enrichment?.summary) {
|
|
3358
3373
|
parentSummaries.push(ancestor.enrichment.summary);
|
|
3359
3374
|
}
|
|
3360
|
-
if (ancestor.kind === "section" && ancestor
|
|
3375
|
+
if (ancestor.kind === "section" && "title" in ancestor) {
|
|
3361
3376
|
sectionTitles.push(ancestor.title);
|
|
3362
3377
|
}
|
|
3363
3378
|
}
|
|
@@ -4008,7 +4023,7 @@ var inputSchema10 = {
|
|
|
4008
4023
|
var outputSchema10 = {
|
|
4009
4024
|
type: "object",
|
|
4010
4025
|
properties: {
|
|
4011
|
-
|
|
4026
|
+
query: {
|
|
4012
4027
|
type: "array",
|
|
4013
4028
|
items: { type: "string" },
|
|
4014
4029
|
title: "Expanded Queries",
|
|
@@ -4030,7 +4045,7 @@ var outputSchema10 = {
|
|
|
4030
4045
|
description: "Number of queries generated"
|
|
4031
4046
|
}
|
|
4032
4047
|
},
|
|
4033
|
-
required: ["
|
|
4048
|
+
required: ["query", "originalQuery", "method", "count"],
|
|
4034
4049
|
additionalProperties: false
|
|
4035
4050
|
};
|
|
4036
4051
|
|
|
@@ -4068,7 +4083,7 @@ class QueryExpanderTask extends Task10 {
|
|
|
4068
4083
|
queries.unshift(query);
|
|
4069
4084
|
}
|
|
4070
4085
|
return {
|
|
4071
|
-
queries,
|
|
4086
|
+
query: queries,
|
|
4072
4087
|
originalQuery: query,
|
|
4073
4088
|
method,
|
|
4074
4089
|
count: queries.length
|
|
@@ -6333,4 +6348,4 @@ export {
|
|
|
6333
6348
|
AiJob
|
|
6334
6349
|
};
|
|
6335
6350
|
|
|
6336
|
-
//# debugId=
|
|
6351
|
+
//# debugId=017F13834699C0A964756E2164756E21
|