@exulu/backend 0.3.0 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +496 -97
- package/dist/index.d.cts +87 -33
- package/dist/index.d.ts +87 -33
- package/dist/index.js +492 -95
- package/package.json +9 -7
- package/types/enums/eval-types.ts +5 -0
package/dist/index.cjs
CHANGED
|
@@ -36,9 +36,11 @@ __export(index_exports, {
|
|
|
36
36
|
ExuluApp: () => ExuluApp,
|
|
37
37
|
ExuluAuthentication: () => authentication,
|
|
38
38
|
ExuluChunkers: () => ExuluChunkers,
|
|
39
|
+
ExuluCli: () => cli_default,
|
|
39
40
|
ExuluContext: () => ExuluContext,
|
|
40
41
|
ExuluDatabase: () => ExuluDatabase,
|
|
41
42
|
ExuluEmbedder: () => ExuluEmbedder,
|
|
43
|
+
ExuluEval: () => ExuluEval,
|
|
42
44
|
ExuluJobs: () => ExuluJobs,
|
|
43
45
|
ExuluLogger: () => ExuluLogger,
|
|
44
46
|
ExuluQueues: () => queues,
|
|
@@ -286,6 +288,58 @@ var workflowSchema = {
|
|
|
286
288
|
}
|
|
287
289
|
]
|
|
288
290
|
};
|
|
291
|
+
var evalResultsSchema = {
|
|
292
|
+
name: {
|
|
293
|
+
plural: "eval_results",
|
|
294
|
+
singular: "eval_result"
|
|
295
|
+
},
|
|
296
|
+
fields: [
|
|
297
|
+
{
|
|
298
|
+
name: "input",
|
|
299
|
+
type: "longText"
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
name: "output",
|
|
303
|
+
type: "longText"
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
name: "duration",
|
|
307
|
+
type: "number"
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
name: "category",
|
|
311
|
+
type: "text"
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
name: "metadata",
|
|
315
|
+
type: "json"
|
|
316
|
+
},
|
|
317
|
+
{
|
|
318
|
+
name: "result",
|
|
319
|
+
type: "number"
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
name: "agent_id",
|
|
323
|
+
type: "text"
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
name: "workflow_id",
|
|
327
|
+
type: "text"
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
name: "eval_type",
|
|
331
|
+
type: "text"
|
|
332
|
+
},
|
|
333
|
+
{
|
|
334
|
+
name: "eval_name",
|
|
335
|
+
type: "text"
|
|
336
|
+
},
|
|
337
|
+
{
|
|
338
|
+
name: "comment",
|
|
339
|
+
type: "longText"
|
|
340
|
+
}
|
|
341
|
+
]
|
|
342
|
+
};
|
|
289
343
|
var threadsSchema = {
|
|
290
344
|
name: {
|
|
291
345
|
plural: "threads",
|
|
@@ -559,6 +613,27 @@ var up = async function(knex) {
|
|
|
559
613
|
}
|
|
560
614
|
});
|
|
561
615
|
}
|
|
616
|
+
if (!await knex.schema.hasTable("eval_results")) {
|
|
617
|
+
await knex.schema.createTable("eval_results", (table) => {
|
|
618
|
+
table.uuid("id").primary().defaultTo(knex.fn.uuid());
|
|
619
|
+
table.date("createdAt").defaultTo(knex.fn.now());
|
|
620
|
+
table.date("updatedAt").defaultTo(knex.fn.now());
|
|
621
|
+
for (const field of evalResultsSchema.fields) {
|
|
622
|
+
const { type, name, references, default: defaultValue } = field;
|
|
623
|
+
if (!type || !name) {
|
|
624
|
+
continue;
|
|
625
|
+
}
|
|
626
|
+
if (type === "reference") {
|
|
627
|
+
if (!references) {
|
|
628
|
+
throw new Error("Field with type reference must have a reference definition.");
|
|
629
|
+
}
|
|
630
|
+
table.uuid(name).references(references.field).inTable(references.table);
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
mapType(table, type, sanitizeName(name), defaultValue);
|
|
634
|
+
}
|
|
635
|
+
});
|
|
636
|
+
}
|
|
562
637
|
if (!await knex.schema.hasTable("statistics")) {
|
|
563
638
|
await knex.schema.createTable("statistics", (table) => {
|
|
564
639
|
table.uuid("id").primary().defaultTo(knex.fn.uuid());
|
|
@@ -730,12 +805,10 @@ var execute = async () => {
|
|
|
730
805
|
// src/registry/classes.ts
|
|
731
806
|
var import_zod = require("zod");
|
|
732
807
|
var import_bullmq2 = require("bullmq");
|
|
733
|
-
var import_core = require("@mastra/core");
|
|
734
808
|
var import_zod2 = require("zod");
|
|
735
809
|
var fs = __toESM(require("fs"), 1);
|
|
736
810
|
var path = __toESM(require("path"), 1);
|
|
737
|
-
var
|
|
738
|
-
var import_pg = require("@mastra/pg");
|
|
811
|
+
var import_ai = require("ai");
|
|
739
812
|
|
|
740
813
|
// types/enums/statistics.ts
|
|
741
814
|
var STATISTICS_TYPE_ENUM = {
|
|
@@ -750,6 +823,11 @@ var STATISTICS_TYPE_ENUM = {
|
|
|
750
823
|
AGENT_RUN: "agent.run"
|
|
751
824
|
};
|
|
752
825
|
|
|
826
|
+
// types/enums/eval-types.ts
|
|
827
|
+
var EVAL_TYPES_ENUM = {
|
|
828
|
+
llm_as_judge: "llm_as_judge"
|
|
829
|
+
};
|
|
830
|
+
|
|
753
831
|
// src/registry/classes.ts
|
|
754
832
|
var import_knex4 = __toESM(require("pgvector/knex"), 1);
|
|
755
833
|
|
|
@@ -853,6 +931,83 @@ var JOB_STATUS_ENUM = {
|
|
|
853
931
|
stuck: "stuck"
|
|
854
932
|
};
|
|
855
933
|
|
|
934
|
+
// src/evals/utils/index.tsx
|
|
935
|
+
var ExuluEvalUtils = {
|
|
936
|
+
niahTestSet: ({
|
|
937
|
+
label,
|
|
938
|
+
contextlengths,
|
|
939
|
+
needles,
|
|
940
|
+
testDocument
|
|
941
|
+
}) => {
|
|
942
|
+
const testCases = contextlengths.map((contextlength) => {
|
|
943
|
+
let testText = testDocument.slice(0, contextlength * 4 - needles.length * 200);
|
|
944
|
+
const depthInterval = 5e3 * 4;
|
|
945
|
+
const depths = Array.from({ length: contextlength * 4 / depthInterval }, (_, i) => (i + 1) * depthInterval);
|
|
946
|
+
console.log("[EXULU] contextlength: ", {
|
|
947
|
+
tokens: contextlength,
|
|
948
|
+
chars: contextlength * 4,
|
|
949
|
+
depths
|
|
950
|
+
});
|
|
951
|
+
return depths.map((depth, index) => {
|
|
952
|
+
const first = index === 0;
|
|
953
|
+
const last = index === depths.length - 1;
|
|
954
|
+
const start = first ? 0 : depths[index - 1];
|
|
955
|
+
const end = last ? contextlength * 4 : depths[index];
|
|
956
|
+
console.log("[EXULU] Niah positions: ", {
|
|
957
|
+
start,
|
|
958
|
+
end,
|
|
959
|
+
depth,
|
|
960
|
+
index
|
|
961
|
+
});
|
|
962
|
+
let modifiedTestText = testText;
|
|
963
|
+
const insertions = [];
|
|
964
|
+
needles.forEach((needle, index2) => {
|
|
965
|
+
const basePosition = start + Math.floor(Math.random() * (end - start));
|
|
966
|
+
insertions.push({ position: basePosition, needle: needle.answer });
|
|
967
|
+
});
|
|
968
|
+
insertions.sort((a, b) => b.position - a.position);
|
|
969
|
+
console.log("[EXULU] Niah insertions: ", insertions);
|
|
970
|
+
insertions.forEach(({ position, needle }) => {
|
|
971
|
+
const insertionPosition = Math.min(position, modifiedTestText.length);
|
|
972
|
+
const beforeNeedle = modifiedTestText.slice(0, insertionPosition);
|
|
973
|
+
const afterNeedle = modifiedTestText.slice(insertionPosition);
|
|
974
|
+
modifiedTestText = beforeNeedle + needle + afterNeedle;
|
|
975
|
+
});
|
|
976
|
+
return {
|
|
977
|
+
prompt: `You are a helpful assistant.
|
|
978
|
+
|
|
979
|
+
You are given a text.
|
|
980
|
+
|
|
981
|
+
You need to answer the following question, using only the information from the text provided below. Do not hallucinate
|
|
982
|
+
or come up with an answer that is not in the text. If the text does not contain the answer, you should say "I don't know".
|
|
983
|
+
|
|
984
|
+
${needles.map((needle, index2) => `- ${index2 + 1}: ${needle.question}`).join("\n")}
|
|
985
|
+
|
|
986
|
+
The text is:
|
|
987
|
+
|
|
988
|
+
${modifiedTestText}
|
|
989
|
+
`,
|
|
990
|
+
category: `${label}-context-length-[${contextlength}]-depth-[from-${start ? start / 4 : 0}-to-${end ? end / 4 : 0}]-niah-test`,
|
|
991
|
+
metadata: {
|
|
992
|
+
contextLength: contextlength,
|
|
993
|
+
depth,
|
|
994
|
+
needles
|
|
995
|
+
}
|
|
996
|
+
};
|
|
997
|
+
});
|
|
998
|
+
});
|
|
999
|
+
const flattenedTestCases = testCases.flat();
|
|
1000
|
+
console.log("[EXULU] Niah test cases: ", flattenedTestCases.length);
|
|
1001
|
+
console.table(flattenedTestCases.map((data) => ({
|
|
1002
|
+
chars: data.prompt?.length || 0,
|
|
1003
|
+
tokens: data.prompt?.length / 4 || 0,
|
|
1004
|
+
category: data.category,
|
|
1005
|
+
metadata: data.metadata
|
|
1006
|
+
})));
|
|
1007
|
+
return flattenedTestCases;
|
|
1008
|
+
}
|
|
1009
|
+
};
|
|
1010
|
+
|
|
856
1011
|
// src/registry/classes.ts
|
|
857
1012
|
function generateSlug(name) {
|
|
858
1013
|
const normalized = name.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
|
|
@@ -881,72 +1036,52 @@ var ExuluAgent = class {
|
|
|
881
1036
|
description = "";
|
|
882
1037
|
slug = "";
|
|
883
1038
|
streaming = false;
|
|
884
|
-
type;
|
|
885
|
-
outputSchema;
|
|
886
1039
|
rateLimit;
|
|
887
1040
|
config;
|
|
888
|
-
memory;
|
|
1041
|
+
// private memory: Memory | undefined; // TODO remove mastra and do own implementation
|
|
889
1042
|
tools;
|
|
890
|
-
|
|
1043
|
+
evals;
|
|
1044
|
+
model;
|
|
891
1045
|
capabilities;
|
|
892
|
-
constructor({ id, name, description,
|
|
1046
|
+
constructor({ id, name, description, config, rateLimit, capabilities, tools, evals }) {
|
|
893
1047
|
this.id = id;
|
|
894
1048
|
this.name = name;
|
|
895
|
-
this.
|
|
1049
|
+
this.evals = evals;
|
|
896
1050
|
this.description = description;
|
|
897
|
-
this.outputSchema = outputSchema;
|
|
898
1051
|
this.rateLimit = rateLimit;
|
|
899
1052
|
this.tools = tools;
|
|
900
1053
|
this.config = config;
|
|
901
1054
|
this.capabilities = capabilities;
|
|
902
1055
|
this.slug = `/agents/${generateSlug(this.name)}/run`;
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
memory: this.memory ? this.memory : void 0
|
|
909
|
-
});
|
|
1056
|
+
this.model = this.config.model;
|
|
1057
|
+
}
|
|
1058
|
+
generate = async ({ prompt, stream }) => {
|
|
1059
|
+
if (!this.model) {
|
|
1060
|
+
throw new Error("Model is required for streaming.");
|
|
910
1061
|
}
|
|
911
|
-
if (config
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
user: process.env.POSTGRES_DB_USER || "",
|
|
919
|
-
database: "exulu",
|
|
920
|
-
// putting it into an own database that is not managed by exulu
|
|
921
|
-
password: process.env.POSTGRES_DB_PASSWORD || "",
|
|
922
|
-
ssl: process.env.POSTGRES_DB_SSL === "true" ? { rejectUnauthorized: false } : false
|
|
923
|
-
}),
|
|
924
|
-
...config?.memory.vector ? {
|
|
925
|
-
vector: new import_pg.PgVector({
|
|
926
|
-
connectionString
|
|
927
|
-
})
|
|
928
|
-
} : {},
|
|
929
|
-
options: {
|
|
930
|
-
lastMessages: config?.memory.lastMessages || 10,
|
|
931
|
-
semanticRecall: {
|
|
932
|
-
topK: config?.memory.semanticRecall.topK || 3,
|
|
933
|
-
messageRange: config?.memory.semanticRecall.messageRange || 2
|
|
934
|
-
}
|
|
935
|
-
}
|
|
1062
|
+
if (this.config.outputSchema) {
|
|
1063
|
+
if (stream) {
|
|
1064
|
+
}
|
|
1065
|
+
const { object } = await (0, import_ai.generateObject)({
|
|
1066
|
+
model: this.model,
|
|
1067
|
+
schema: this.config.outputSchema,
|
|
1068
|
+
prompt
|
|
936
1069
|
});
|
|
1070
|
+
return object;
|
|
937
1071
|
}
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1072
|
+
if (stream) {
|
|
1073
|
+
const result = (0, import_ai.streamText)({
|
|
1074
|
+
model: this.model,
|
|
1075
|
+
prompt
|
|
1076
|
+
});
|
|
1077
|
+
const text2 = await result.text;
|
|
1078
|
+
return text2;
|
|
942
1079
|
}
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
type: STATISTICS_TYPE_ENUM.AGENT_RUN,
|
|
947
|
-
trigger: "agent"
|
|
1080
|
+
const { text } = await (0, import_ai.generateText)({
|
|
1081
|
+
model: this.model,
|
|
1082
|
+
prompt
|
|
948
1083
|
});
|
|
949
|
-
return
|
|
1084
|
+
return text;
|
|
950
1085
|
};
|
|
951
1086
|
};
|
|
952
1087
|
var ExuluEmbedder = class {
|
|
@@ -1142,35 +1277,117 @@ var ExuluLogger = class {
|
|
|
1142
1277
|
}
|
|
1143
1278
|
}
|
|
1144
1279
|
};
|
|
1280
|
+
var ExuluEval = class {
|
|
1281
|
+
name;
|
|
1282
|
+
description;
|
|
1283
|
+
constructor({ name, description }) {
|
|
1284
|
+
this.name = name;
|
|
1285
|
+
this.description = description;
|
|
1286
|
+
}
|
|
1287
|
+
create = {
|
|
1288
|
+
LlmAsAJudge: {
|
|
1289
|
+
niah: ({ label, model, needles, testDocument, contextlengths }) => {
|
|
1290
|
+
return {
|
|
1291
|
+
name: this.name,
|
|
1292
|
+
description: this.description,
|
|
1293
|
+
testcases: ExuluEvalUtils.niahTestSet({
|
|
1294
|
+
label,
|
|
1295
|
+
contextlengths: contextlengths || [5e3, 3e4, 5e4, 128e3],
|
|
1296
|
+
needles,
|
|
1297
|
+
testDocument
|
|
1298
|
+
}),
|
|
1299
|
+
run: async ({ data, runner }) => {
|
|
1300
|
+
if (runner.workflow) {
|
|
1301
|
+
throw new Error("Workflows are not supported for the needle in a haystack eval.");
|
|
1302
|
+
}
|
|
1303
|
+
if (!runner.agent) {
|
|
1304
|
+
throw new Error("Agent is required for the needle in a haystack eval.");
|
|
1305
|
+
}
|
|
1306
|
+
if (!data.result) {
|
|
1307
|
+
if (!data.prompt) {
|
|
1308
|
+
throw new Error("Prompt is required for running an agent.");
|
|
1309
|
+
}
|
|
1310
|
+
const result = await runner.agent.generate({
|
|
1311
|
+
prompt: data.prompt,
|
|
1312
|
+
stream: false
|
|
1313
|
+
});
|
|
1314
|
+
data.result = result;
|
|
1315
|
+
}
|
|
1316
|
+
const { object } = await (0, import_ai.generateObject)({
|
|
1317
|
+
model,
|
|
1318
|
+
maxRetries: 3,
|
|
1319
|
+
schema: import_zod2.z.object({
|
|
1320
|
+
correctnessScore: import_zod2.z.number(),
|
|
1321
|
+
comment: import_zod2.z.string()
|
|
1322
|
+
}),
|
|
1323
|
+
prompt: `You are checking if the below "actual_answers" contain the correct information as
|
|
1324
|
+
presented in the "correct_answers" section to calculate the correctness score.
|
|
1325
|
+
|
|
1326
|
+
The correctness score should be a number between 0 and 1. 1 is the highest score.
|
|
1327
|
+
|
|
1328
|
+
For example if the actual_answers contains 1 answer of the ${needles.length} correct_answers, the
|
|
1329
|
+
score should be ${1 / needles.length}. If the actual_answers contain 2 correct answers, the
|
|
1330
|
+
score should be ${2 / needles.length} etc.. if the actual_answers contains all the correct answers, the
|
|
1331
|
+
score should be 1 and if the actual_answers contains none of the correct answers, the score should be 0.
|
|
1332
|
+
|
|
1333
|
+
You can ignore small differences in the actual_answers and the correct_answers such as spelling mistakes,
|
|
1334
|
+
punctuation, etc., if the content of the actual answer is still correct.
|
|
1335
|
+
|
|
1336
|
+
Also provide a comment on how you came to your conclusion.
|
|
1337
|
+
|
|
1338
|
+
<actual_answers>
|
|
1339
|
+
${data.result}
|
|
1340
|
+
</actual_answers>
|
|
1341
|
+
|
|
1342
|
+
<correct_answers>
|
|
1343
|
+
${needles.map((needle, index) => `- ${index + 1}: ${needle.answer}`).join("\n")}
|
|
1344
|
+
</correct_answers>`
|
|
1345
|
+
});
|
|
1346
|
+
console.log("[EXULU] eval result", object);
|
|
1347
|
+
const { db: db2 } = await postgresClient();
|
|
1348
|
+
await db2("eval_results").insert({
|
|
1349
|
+
input: data.prompt,
|
|
1350
|
+
output: data.result,
|
|
1351
|
+
duration: data.duration,
|
|
1352
|
+
result: object.correctnessScore,
|
|
1353
|
+
agent_id: runner.agent.id || void 0,
|
|
1354
|
+
eval_type: EVAL_TYPES_ENUM.llm_as_judge,
|
|
1355
|
+
eval_name: this.name,
|
|
1356
|
+
comment: object.comment,
|
|
1357
|
+
category: data.category,
|
|
1358
|
+
metadata: data.metadata,
|
|
1359
|
+
createdAt: db2.fn.now(),
|
|
1360
|
+
updatedAt: db2.fn.now()
|
|
1361
|
+
});
|
|
1362
|
+
return {
|
|
1363
|
+
score: object.correctnessScore,
|
|
1364
|
+
comment: object.comment
|
|
1365
|
+
};
|
|
1366
|
+
}
|
|
1367
|
+
};
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
};
|
|
1371
|
+
};
|
|
1145
1372
|
var ExuluTool = class {
|
|
1146
1373
|
id;
|
|
1147
1374
|
name;
|
|
1148
1375
|
description;
|
|
1149
|
-
|
|
1150
|
-
outputSchema;
|
|
1376
|
+
parameters;
|
|
1151
1377
|
type;
|
|
1152
|
-
|
|
1153
|
-
constructor({ id, name, description,
|
|
1378
|
+
tool;
|
|
1379
|
+
constructor({ id, name, description, parameters, type, execute: execute2 }) {
|
|
1154
1380
|
this.id = id;
|
|
1155
1381
|
this.name = name;
|
|
1156
1382
|
this.description = description;
|
|
1157
|
-
this.
|
|
1158
|
-
this.outputSchema = outputSchema;
|
|
1383
|
+
this.parameters = parameters;
|
|
1159
1384
|
this.type = type;
|
|
1160
|
-
this.
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
throw new Error("Tool has no execute function.");
|
|
1165
|
-
}
|
|
1166
|
-
updateStatistic({
|
|
1167
|
-
name: "count",
|
|
1168
|
-
label: this.name,
|
|
1169
|
-
type: STATISTICS_TYPE_ENUM.TOOL_CALL,
|
|
1170
|
-
trigger: "agent"
|
|
1385
|
+
this.tool = (0, import_ai.tool)({
|
|
1386
|
+
description,
|
|
1387
|
+
parameters,
|
|
1388
|
+
execute: execute2
|
|
1171
1389
|
});
|
|
1172
|
-
|
|
1173
|
-
};
|
|
1390
|
+
}
|
|
1174
1391
|
};
|
|
1175
1392
|
var ExuluContext = class {
|
|
1176
1393
|
id;
|
|
@@ -1572,21 +1789,9 @@ var ExuluContext = class {
|
|
|
1572
1789
|
id: this.id,
|
|
1573
1790
|
name: `${this.name} context`,
|
|
1574
1791
|
type: "context",
|
|
1575
|
-
|
|
1792
|
+
parameters: import_zod2.z.object({
|
|
1576
1793
|
query: import_zod2.z.string()
|
|
1577
1794
|
}),
|
|
1578
|
-
outputSchema: import_zod2.z.object({
|
|
1579
|
-
// todo check if result format is still correct based on above getItems function
|
|
1580
|
-
results: import_zod2.z.array(import_zod2.z.object({
|
|
1581
|
-
count: import_zod2.z.number(),
|
|
1582
|
-
results: import_zod2.z.array(import_zod2.z.object({
|
|
1583
|
-
id: import_zod2.z.string(),
|
|
1584
|
-
content: import_zod2.z.string(),
|
|
1585
|
-
metadata: import_zod2.z.record(import_zod2.z.any())
|
|
1586
|
-
})),
|
|
1587
|
-
errors: import_zod2.z.array(import_zod2.z.string()).optional()
|
|
1588
|
-
}))
|
|
1589
|
-
}),
|
|
1590
1795
|
description: `Gets information from the context called: ${this.name}. The context description is: ${this.description}.`,
|
|
1591
1796
|
execute: async ({ context }) => {
|
|
1592
1797
|
return await this.getItems({
|
|
@@ -2746,7 +2951,7 @@ var createUppyRoutes = async (app) => {
|
|
|
2746
2951
|
};
|
|
2747
2952
|
|
|
2748
2953
|
// src/registry/routes.ts
|
|
2749
|
-
var
|
|
2954
|
+
var import_utils2 = require("@apollo/utils.keyvaluecache");
|
|
2750
2955
|
var global_queues = {
|
|
2751
2956
|
logs_cleaner: "logs-cleaner"
|
|
2752
2957
|
};
|
|
@@ -2844,10 +3049,10 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2844
3049
|
} else {
|
|
2845
3050
|
console.log("===========================", "[EXULU] no redis server configured, not setting up recurring jobs.", "===========================");
|
|
2846
3051
|
}
|
|
2847
|
-
const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, threadsSchema, messagesSchema]);
|
|
3052
|
+
const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, evalResultsSchema, threadsSchema, messagesSchema]);
|
|
2848
3053
|
console.log("[EXULU] graphql server");
|
|
2849
3054
|
const server = new import_server3.ApolloServer({
|
|
2850
|
-
cache: new
|
|
3055
|
+
cache: new import_utils2.InMemoryLRUCache(),
|
|
2851
3056
|
schema,
|
|
2852
3057
|
introspection: true
|
|
2853
3058
|
});
|
|
@@ -2912,13 +3117,13 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2912
3117
|
});
|
|
2913
3118
|
});
|
|
2914
3119
|
app.get("/tools", async (req, res) => {
|
|
2915
|
-
res.status(200).json(tools.map((
|
|
2916
|
-
id:
|
|
2917
|
-
name:
|
|
2918
|
-
description:
|
|
2919
|
-
type:
|
|
2920
|
-
inputSchema:
|
|
2921
|
-
outputSchema:
|
|
3120
|
+
res.status(200).json(tools.map((tool2) => ({
|
|
3121
|
+
id: tool2.id,
|
|
3122
|
+
name: tool2.name,
|
|
3123
|
+
description: tool2.description,
|
|
3124
|
+
type: tool2.type || "tool",
|
|
3125
|
+
inputSchema: tool2.inputSchema ? (0, import_zodex.zerialize)(tool2.inputSchema) : null,
|
|
3126
|
+
outputSchema: tool2.outputSchema ? (0, import_zodex.zerialize)(tool2.outputSchema) : null
|
|
2922
3127
|
})));
|
|
2923
3128
|
});
|
|
2924
3129
|
app.get("/tools/:id", async (req, res) => {
|
|
@@ -2929,14 +3134,14 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2929
3134
|
});
|
|
2930
3135
|
return;
|
|
2931
3136
|
}
|
|
2932
|
-
const
|
|
2933
|
-
if (!
|
|
3137
|
+
const tool2 = tools.find((tool3) => tool3.id === id);
|
|
3138
|
+
if (!tool2) {
|
|
2934
3139
|
res.status(400).json({
|
|
2935
3140
|
message: "Tool not found."
|
|
2936
3141
|
});
|
|
2937
3142
|
return;
|
|
2938
3143
|
}
|
|
2939
|
-
res.status(200).json(
|
|
3144
|
+
res.status(200).json(tool2);
|
|
2940
3145
|
});
|
|
2941
3146
|
const deleteItem = async ({
|
|
2942
3147
|
id,
|
|
@@ -4029,6 +4234,198 @@ var ExuluApp = class {
|
|
|
4029
4234
|
|
|
4030
4235
|
// src/index.ts
|
|
4031
4236
|
var import_chonkie = require("chonkie");
|
|
4237
|
+
|
|
4238
|
+
// src/cli/index.tsx
|
|
4239
|
+
var import_react2 = require("react");
|
|
4240
|
+
var import_ink4 = require("ink");
|
|
4241
|
+
var import_ui5 = require("@inkjs/ui");
|
|
4242
|
+
var import_patch_console = __toESM(require("patch-console"), 1);
|
|
4243
|
+
|
|
4244
|
+
// src/cli/components/nav.tsx
|
|
4245
|
+
var import_ui = require("@inkjs/ui");
|
|
4246
|
+
var import_ink = require("ink");
|
|
4247
|
+
var import_jsx_runtime = require("react/jsx-runtime");
|
|
4248
|
+
var nav = [
|
|
4249
|
+
{
|
|
4250
|
+
label: "Agents",
|
|
4251
|
+
value: "agents"
|
|
4252
|
+
},
|
|
4253
|
+
{
|
|
4254
|
+
label: "Exit",
|
|
4255
|
+
value: "exit"
|
|
4256
|
+
}
|
|
4257
|
+
];
|
|
4258
|
+
var Nav = ({ setView }) => {
|
|
4259
|
+
const { exit } = (0, import_ink.useApp)();
|
|
4260
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(import_ui.Select, { options: nav, onChange: (value) => {
|
|
4261
|
+
if (value === "exit") {
|
|
4262
|
+
exit();
|
|
4263
|
+
}
|
|
4264
|
+
setView(value);
|
|
4265
|
+
} });
|
|
4266
|
+
};
|
|
4267
|
+
var nav_default = Nav;
|
|
4268
|
+
|
|
4269
|
+
// src/cli/components/agent-selector.tsx
|
|
4270
|
+
var import_ink2 = require("ink");
|
|
4271
|
+
var import_ui2 = require("@inkjs/ui");
|
|
4272
|
+
var import_jsx_runtime2 = require("react/jsx-runtime");
|
|
4273
|
+
var AgentSelector = ({ exulu, setAgent, setEvaluations }) => {
|
|
4274
|
+
const agents = exulu.agents.map((agent) => ({
|
|
4275
|
+
label: agent.name,
|
|
4276
|
+
value: agent.id
|
|
4277
|
+
}));
|
|
4278
|
+
return /* @__PURE__ */ (0, import_jsx_runtime2.jsxs)(import_jsx_runtime2.Fragment, { children: [
|
|
4279
|
+
/* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ink2.Text, { children: "Please select an agent:" }),
|
|
4280
|
+
/* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ui2.Select, { options: agents, onChange: (value) => {
|
|
4281
|
+
console.log("selected agent", value);
|
|
4282
|
+
const agent = exulu.agent(value);
|
|
4283
|
+
if (!agent) {
|
|
4284
|
+
console.error("Agent not found", value);
|
|
4285
|
+
return;
|
|
4286
|
+
}
|
|
4287
|
+
setAgent(agent);
|
|
4288
|
+
if (agent) {
|
|
4289
|
+
setEvaluations(agent.evals || []);
|
|
4290
|
+
}
|
|
4291
|
+
} })
|
|
4292
|
+
] });
|
|
4293
|
+
};
|
|
4294
|
+
var agent_selector_default = AgentSelector;
|
|
4295
|
+
|
|
4296
|
+
// src/cli/components/eval-selector.tsx
|
|
4297
|
+
var import_ui3 = require("@inkjs/ui");
|
|
4298
|
+
var import_jsx_runtime3 = require("react/jsx-runtime");
|
|
4299
|
+
var EvalSelector = ({ evaluations, setEvaluation }) => {
|
|
4300
|
+
return /* @__PURE__ */ (0, import_jsx_runtime3.jsx)(import_ui3.Select, { options: evaluations.map((evaluation) => ({
|
|
4301
|
+
label: evaluation.runner.name,
|
|
4302
|
+
value: evaluation.runner.name
|
|
4303
|
+
})), onChange: (value) => {
|
|
4304
|
+
console.log("selected eval", value);
|
|
4305
|
+
const evaluation = evaluations?.find((evaluation2) => evaluation2.runner.name === value);
|
|
4306
|
+
if (evaluation) {
|
|
4307
|
+
setEvaluation(evaluation);
|
|
4308
|
+
}
|
|
4309
|
+
} });
|
|
4310
|
+
};
|
|
4311
|
+
var eval_selector_default = EvalSelector;
|
|
4312
|
+
|
|
4313
|
+
// src/cli/components/eval-actions.tsx
|
|
4314
|
+
var import_react = require("react");
|
|
4315
|
+
var import_ui4 = require("@inkjs/ui");
|
|
4316
|
+
var import_ink3 = require("ink");
|
|
4317
|
+
var import_jsx_runtime4 = require("react/jsx-runtime");
|
|
4318
|
+
var EvalActions = ({ agent, evaluation, setEvaluation }) => {
|
|
4319
|
+
const [progress, setProgress] = (0, import_react.useState)(0);
|
|
4320
|
+
const [results, setResults] = (0, import_react.useState)([]);
|
|
4321
|
+
const [running, setRunning] = (0, import_react.useState)();
|
|
4322
|
+
const run = async (evaluation2) => {
|
|
4323
|
+
setRunning({
|
|
4324
|
+
label: evaluation2.runner.name
|
|
4325
|
+
});
|
|
4326
|
+
const testCases = evaluation2.runner.testcases;
|
|
4327
|
+
const total = testCases.length;
|
|
4328
|
+
if (!testCases) {
|
|
4329
|
+
throw new Error("No test cases found");
|
|
4330
|
+
}
|
|
4331
|
+
let i = 0;
|
|
4332
|
+
for (const testCase of testCases) {
|
|
4333
|
+
i++;
|
|
4334
|
+
const result = await evaluation2.runner.run({
|
|
4335
|
+
data: testCase,
|
|
4336
|
+
runner: {
|
|
4337
|
+
agent
|
|
4338
|
+
}
|
|
4339
|
+
});
|
|
4340
|
+
setProgress(Math.round(i / total * 100));
|
|
4341
|
+
setResults([...results, {
|
|
4342
|
+
name: evaluation2.runner.name,
|
|
4343
|
+
prompt: testCase.prompt?.slice(0, 100) + "...",
|
|
4344
|
+
score: result.score,
|
|
4345
|
+
comment: result.comment
|
|
4346
|
+
}]);
|
|
4347
|
+
}
|
|
4348
|
+
setRunning(void 0);
|
|
4349
|
+
};
|
|
4350
|
+
if (progress === 100) {
|
|
4351
|
+
return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
|
|
4352
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ink3.Text, { children: "Evaluations completed." }),
|
|
4353
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList, { children: results.map((result) => /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
|
|
4354
|
+
result.name,
|
|
4355
|
+
": ",
|
|
4356
|
+
result.score,
|
|
4357
|
+
" - ",
|
|
4358
|
+
result.comment
|
|
4359
|
+
] }) })) })
|
|
4360
|
+
] });
|
|
4361
|
+
}
|
|
4362
|
+
if (running) {
|
|
4363
|
+
return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
|
|
4364
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
|
|
4365
|
+
"Running ",
|
|
4366
|
+
running.label,
|
|
4367
|
+
"..."
|
|
4368
|
+
] }),
|
|
4369
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.ProgressBar, { value: progress })
|
|
4370
|
+
] });
|
|
4371
|
+
}
|
|
4372
|
+
return /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.Select, { options: [{
|
|
4373
|
+
label: "Run evaluation",
|
|
4374
|
+
value: "run"
|
|
4375
|
+
}, {
|
|
4376
|
+
label: "Go back",
|
|
4377
|
+
value: "back"
|
|
4378
|
+
}], onChange: (value) => {
|
|
4379
|
+
if (value === "back") {
|
|
4380
|
+
setEvaluation(void 0);
|
|
4381
|
+
}
|
|
4382
|
+
if (value === "run") {
|
|
4383
|
+
run(evaluation);
|
|
4384
|
+
}
|
|
4385
|
+
} });
|
|
4386
|
+
};
|
|
4387
|
+
var eval_actions_default = EvalActions;
|
|
4388
|
+
|
|
4389
|
+
// src/cli/index.tsx
|
|
4390
|
+
var import_jsx_runtime5 = require("react/jsx-runtime");
|
|
4391
|
+
var Main = ({ exulu }) => {
|
|
4392
|
+
(0, import_patch_console.default)((stream, data) => {
|
|
4393
|
+
setLogs([...logs, data]);
|
|
4394
|
+
});
|
|
4395
|
+
const [logs, setLogs] = (0, import_react2.useState)([]);
|
|
4396
|
+
const [view, setView] = (0, import_react2.useState)();
|
|
4397
|
+
const [agent, setAgent] = (0, import_react2.useState)();
|
|
4398
|
+
const [evaluations, setEvaluations] = (0, import_react2.useState)([]);
|
|
4399
|
+
const [evaluation, setEvaluation] = (0, import_react2.useState)();
|
|
4400
|
+
return /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Box, { borderStyle: "round", borderColor: "cyan", padding: 1, flexDirection: "column", width: "70%", children: [
|
|
4401
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: "Logs:" }),
|
|
4402
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList, { children: logs.map((log, index) => /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: log }) })) }),
|
|
4403
|
+
!view && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(nav_default, { setView }),
|
|
4404
|
+
view === "agents" && !agent && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(agent_selector_default, { exulu, setAgent, setEvaluations }),
|
|
4405
|
+
view === "agents" && agent && !evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
|
|
4406
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
|
|
4407
|
+
'Selected agent "',
|
|
4408
|
+
agent.name,
|
|
4409
|
+
'". Please select an evaluation:'
|
|
4410
|
+
] }),
|
|
4411
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_selector_default, { evaluations, setEvaluation })
|
|
4412
|
+
] }),
|
|
4413
|
+
view === "agents" && agent && evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
|
|
4414
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
|
|
4415
|
+
"Selected evaluation: ",
|
|
4416
|
+
evaluation.runner.name
|
|
4417
|
+
] }),
|
|
4418
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_actions_default, { agent, evaluation, setEvaluation })
|
|
4419
|
+
] })
|
|
4420
|
+
] });
|
|
4421
|
+
};
|
|
4422
|
+
var cli_default = {
|
|
4423
|
+
run: (exulu) => {
|
|
4424
|
+
(0, import_ink4.render)(/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(Main, { exulu }));
|
|
4425
|
+
}
|
|
4426
|
+
};
|
|
4427
|
+
|
|
4428
|
+
// src/index.ts
|
|
4032
4429
|
var ExuluJobs = {
|
|
4033
4430
|
redis: redisClient,
|
|
4034
4431
|
jobs: {
|
|
@@ -4060,9 +4457,11 @@ var ExuluDatabase = {
|
|
|
4060
4457
|
ExuluApp,
|
|
4061
4458
|
ExuluAuthentication,
|
|
4062
4459
|
ExuluChunkers,
|
|
4460
|
+
ExuluCli,
|
|
4063
4461
|
ExuluContext,
|
|
4064
4462
|
ExuluDatabase,
|
|
4065
4463
|
ExuluEmbedder,
|
|
4464
|
+
ExuluEval,
|
|
4066
4465
|
ExuluJobs,
|
|
4067
4466
|
ExuluLogger,
|
|
4068
4467
|
ExuluQueues,
|