@exulu/backend 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +496 -97
- package/dist/index.d.cts +87 -33
- package/dist/index.d.ts +87 -33
- package/dist/index.js +492 -95
- package/package.json +9 -7
- package/types/enums/eval-types.ts +5 -0
package/dist/index.js
CHANGED
|
@@ -244,6 +244,58 @@ var workflowSchema = {
|
|
|
244
244
|
}
|
|
245
245
|
]
|
|
246
246
|
};
|
|
247
|
+
var evalResultsSchema = {
|
|
248
|
+
name: {
|
|
249
|
+
plural: "eval_results",
|
|
250
|
+
singular: "eval_result"
|
|
251
|
+
},
|
|
252
|
+
fields: [
|
|
253
|
+
{
|
|
254
|
+
name: "input",
|
|
255
|
+
type: "longText"
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
name: "output",
|
|
259
|
+
type: "longText"
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
name: "duration",
|
|
263
|
+
type: "number"
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
name: "category",
|
|
267
|
+
type: "text"
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
name: "metadata",
|
|
271
|
+
type: "json"
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
name: "result",
|
|
275
|
+
type: "number"
|
|
276
|
+
},
|
|
277
|
+
{
|
|
278
|
+
name: "agent_id",
|
|
279
|
+
type: "text"
|
|
280
|
+
},
|
|
281
|
+
{
|
|
282
|
+
name: "workflow_id",
|
|
283
|
+
type: "text"
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
name: "eval_type",
|
|
287
|
+
type: "text"
|
|
288
|
+
},
|
|
289
|
+
{
|
|
290
|
+
name: "eval_name",
|
|
291
|
+
type: "text"
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
name: "comment",
|
|
295
|
+
type: "longText"
|
|
296
|
+
}
|
|
297
|
+
]
|
|
298
|
+
};
|
|
247
299
|
var threadsSchema = {
|
|
248
300
|
name: {
|
|
249
301
|
plural: "threads",
|
|
@@ -517,6 +569,27 @@ var up = async function(knex) {
|
|
|
517
569
|
}
|
|
518
570
|
});
|
|
519
571
|
}
|
|
572
|
+
if (!await knex.schema.hasTable("eval_results")) {
|
|
573
|
+
await knex.schema.createTable("eval_results", (table) => {
|
|
574
|
+
table.uuid("id").primary().defaultTo(knex.fn.uuid());
|
|
575
|
+
table.date("createdAt").defaultTo(knex.fn.now());
|
|
576
|
+
table.date("updatedAt").defaultTo(knex.fn.now());
|
|
577
|
+
for (const field of evalResultsSchema.fields) {
|
|
578
|
+
const { type, name, references, default: defaultValue } = field;
|
|
579
|
+
if (!type || !name) {
|
|
580
|
+
continue;
|
|
581
|
+
}
|
|
582
|
+
if (type === "reference") {
|
|
583
|
+
if (!references) {
|
|
584
|
+
throw new Error("Field with type reference must have a reference definition.");
|
|
585
|
+
}
|
|
586
|
+
table.uuid(name).references(references.field).inTable(references.table);
|
|
587
|
+
return;
|
|
588
|
+
}
|
|
589
|
+
mapType(table, type, sanitizeName(name), defaultValue);
|
|
590
|
+
}
|
|
591
|
+
});
|
|
592
|
+
}
|
|
520
593
|
if (!await knex.schema.hasTable("statistics")) {
|
|
521
594
|
await knex.schema.createTable("statistics", (table) => {
|
|
522
595
|
table.uuid("id").primary().defaultTo(knex.fn.uuid());
|
|
@@ -688,12 +761,10 @@ var execute = async () => {
|
|
|
688
761
|
// src/registry/classes.ts
|
|
689
762
|
import "zod";
|
|
690
763
|
import "bullmq";
|
|
691
|
-
import { Agent as MastraAgent } from "@mastra/core";
|
|
692
764
|
import { z } from "zod";
|
|
693
765
|
import * as fs from "fs";
|
|
694
766
|
import * as path from "path";
|
|
695
|
-
import {
|
|
696
|
-
import { PostgresStore, PgVector } from "@mastra/pg";
|
|
767
|
+
import { generateObject, generateText, streamText, tool } from "ai";
|
|
697
768
|
|
|
698
769
|
// types/enums/statistics.ts
|
|
699
770
|
var STATISTICS_TYPE_ENUM = {
|
|
@@ -708,6 +779,11 @@ var STATISTICS_TYPE_ENUM = {
|
|
|
708
779
|
AGENT_RUN: "agent.run"
|
|
709
780
|
};
|
|
710
781
|
|
|
782
|
+
// types/enums/eval-types.ts
|
|
783
|
+
var EVAL_TYPES_ENUM = {
|
|
784
|
+
llm_as_judge: "llm_as_judge"
|
|
785
|
+
};
|
|
786
|
+
|
|
711
787
|
// src/registry/classes.ts
|
|
712
788
|
import pgvector2 from "pgvector/knex";
|
|
713
789
|
|
|
@@ -811,6 +887,83 @@ var JOB_STATUS_ENUM = {
|
|
|
811
887
|
stuck: "stuck"
|
|
812
888
|
};
|
|
813
889
|
|
|
890
|
+
// src/evals/utils/index.tsx
|
|
891
|
+
var ExuluEvalUtils = {
|
|
892
|
+
niahTestSet: ({
|
|
893
|
+
label,
|
|
894
|
+
contextlengths,
|
|
895
|
+
needles,
|
|
896
|
+
testDocument
|
|
897
|
+
}) => {
|
|
898
|
+
const testCases = contextlengths.map((contextlength) => {
|
|
899
|
+
let testText = testDocument.slice(0, contextlength * 4 - needles.length * 200);
|
|
900
|
+
const depthInterval = 5e3 * 4;
|
|
901
|
+
const depths = Array.from({ length: contextlength * 4 / depthInterval }, (_, i) => (i + 1) * depthInterval);
|
|
902
|
+
console.log("[EXULU] contextlength: ", {
|
|
903
|
+
tokens: contextlength,
|
|
904
|
+
chars: contextlength * 4,
|
|
905
|
+
depths
|
|
906
|
+
});
|
|
907
|
+
return depths.map((depth, index) => {
|
|
908
|
+
const first = index === 0;
|
|
909
|
+
const last = index === depths.length - 1;
|
|
910
|
+
const start = first ? 0 : depths[index - 1];
|
|
911
|
+
const end = last ? contextlength * 4 : depths[index];
|
|
912
|
+
console.log("[EXULU] Niah positions: ", {
|
|
913
|
+
start,
|
|
914
|
+
end,
|
|
915
|
+
depth,
|
|
916
|
+
index
|
|
917
|
+
});
|
|
918
|
+
let modifiedTestText = testText;
|
|
919
|
+
const insertions = [];
|
|
920
|
+
needles.forEach((needle, index2) => {
|
|
921
|
+
const basePosition = start + Math.floor(Math.random() * (end - start));
|
|
922
|
+
insertions.push({ position: basePosition, needle: needle.answer });
|
|
923
|
+
});
|
|
924
|
+
insertions.sort((a, b) => b.position - a.position);
|
|
925
|
+
console.log("[EXULU] Niah insertions: ", insertions);
|
|
926
|
+
insertions.forEach(({ position, needle }) => {
|
|
927
|
+
const insertionPosition = Math.min(position, modifiedTestText.length);
|
|
928
|
+
const beforeNeedle = modifiedTestText.slice(0, insertionPosition);
|
|
929
|
+
const afterNeedle = modifiedTestText.slice(insertionPosition);
|
|
930
|
+
modifiedTestText = beforeNeedle + needle + afterNeedle;
|
|
931
|
+
});
|
|
932
|
+
return {
|
|
933
|
+
prompt: `You are a helpful assistant.
|
|
934
|
+
|
|
935
|
+
You are given a text.
|
|
936
|
+
|
|
937
|
+
You need to answer the following question, using only the information from the text provided below. Do not hallucinate
|
|
938
|
+
or come up with an answer that is not in the text. If the text does not contain the answer, you should say "I don't know".
|
|
939
|
+
|
|
940
|
+
${needles.map((needle, index2) => `- ${index2 + 1}: ${needle.question}`).join("\n")}
|
|
941
|
+
|
|
942
|
+
The text is:
|
|
943
|
+
|
|
944
|
+
${modifiedTestText}
|
|
945
|
+
`,
|
|
946
|
+
category: `${label}-context-length-[${contextlength}]-depth-[from-${start ? start / 4 : 0}-to-${end ? end / 4 : 0}]-niah-test`,
|
|
947
|
+
metadata: {
|
|
948
|
+
contextLength: contextlength,
|
|
949
|
+
depth,
|
|
950
|
+
needles
|
|
951
|
+
}
|
|
952
|
+
};
|
|
953
|
+
});
|
|
954
|
+
});
|
|
955
|
+
const flattenedTestCases = testCases.flat();
|
|
956
|
+
console.log("[EXULU] Niah test cases: ", flattenedTestCases.length);
|
|
957
|
+
console.table(flattenedTestCases.map((data) => ({
|
|
958
|
+
chars: data.prompt?.length || 0,
|
|
959
|
+
tokens: data.prompt?.length / 4 || 0,
|
|
960
|
+
category: data.category,
|
|
961
|
+
metadata: data.metadata
|
|
962
|
+
})));
|
|
963
|
+
return flattenedTestCases;
|
|
964
|
+
}
|
|
965
|
+
};
|
|
966
|
+
|
|
814
967
|
// src/registry/classes.ts
|
|
815
968
|
function generateSlug(name) {
|
|
816
969
|
const normalized = name.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
|
|
@@ -839,72 +992,52 @@ var ExuluAgent = class {
|
|
|
839
992
|
description = "";
|
|
840
993
|
slug = "";
|
|
841
994
|
streaming = false;
|
|
842
|
-
type;
|
|
843
|
-
outputSchema;
|
|
844
995
|
rateLimit;
|
|
845
996
|
config;
|
|
846
|
-
memory;
|
|
997
|
+
// private memory: Memory | undefined; // TODO remove mastra and do own implementation
|
|
847
998
|
tools;
|
|
848
|
-
|
|
999
|
+
evals;
|
|
1000
|
+
model;
|
|
849
1001
|
capabilities;
|
|
850
|
-
constructor({ id, name, description,
|
|
1002
|
+
constructor({ id, name, description, config, rateLimit, capabilities, tools, evals }) {
|
|
851
1003
|
this.id = id;
|
|
852
1004
|
this.name = name;
|
|
853
|
-
this.
|
|
1005
|
+
this.evals = evals;
|
|
854
1006
|
this.description = description;
|
|
855
|
-
this.outputSchema = outputSchema;
|
|
856
1007
|
this.rateLimit = rateLimit;
|
|
857
1008
|
this.tools = tools;
|
|
858
1009
|
this.config = config;
|
|
859
1010
|
this.capabilities = capabilities;
|
|
860
1011
|
this.slug = `/agents/${generateSlug(this.name)}/run`;
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
memory: this.memory ? this.memory : void 0
|
|
867
|
-
});
|
|
1012
|
+
this.model = this.config.model;
|
|
1013
|
+
}
|
|
1014
|
+
generate = async ({ prompt, stream }) => {
|
|
1015
|
+
if (!this.model) {
|
|
1016
|
+
throw new Error("Model is required for streaming.");
|
|
868
1017
|
}
|
|
869
|
-
if (config
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
user: process.env.POSTGRES_DB_USER || "",
|
|
877
|
-
database: "exulu",
|
|
878
|
-
// putting it into an own database that is not managed by exulu
|
|
879
|
-
password: process.env.POSTGRES_DB_PASSWORD || "",
|
|
880
|
-
ssl: process.env.POSTGRES_DB_SSL === "true" ? { rejectUnauthorized: false } : false
|
|
881
|
-
}),
|
|
882
|
-
...config?.memory.vector ? {
|
|
883
|
-
vector: new PgVector({
|
|
884
|
-
connectionString
|
|
885
|
-
})
|
|
886
|
-
} : {},
|
|
887
|
-
options: {
|
|
888
|
-
lastMessages: config?.memory.lastMessages || 10,
|
|
889
|
-
semanticRecall: {
|
|
890
|
-
topK: config?.memory.semanticRecall.topK || 3,
|
|
891
|
-
messageRange: config?.memory.semanticRecall.messageRange || 2
|
|
892
|
-
}
|
|
893
|
-
}
|
|
1018
|
+
if (this.config.outputSchema) {
|
|
1019
|
+
if (stream) {
|
|
1020
|
+
}
|
|
1021
|
+
const { object } = await generateObject({
|
|
1022
|
+
model: this.model,
|
|
1023
|
+
schema: this.config.outputSchema,
|
|
1024
|
+
prompt
|
|
894
1025
|
});
|
|
1026
|
+
return object;
|
|
895
1027
|
}
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
1028
|
+
if (stream) {
|
|
1029
|
+
const result = streamText({
|
|
1030
|
+
model: this.model,
|
|
1031
|
+
prompt
|
|
1032
|
+
});
|
|
1033
|
+
const text2 = await result.text;
|
|
1034
|
+
return text2;
|
|
900
1035
|
}
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
type: STATISTICS_TYPE_ENUM.AGENT_RUN,
|
|
905
|
-
trigger: "agent"
|
|
1036
|
+
const { text } = await generateText({
|
|
1037
|
+
model: this.model,
|
|
1038
|
+
prompt
|
|
906
1039
|
});
|
|
907
|
-
return
|
|
1040
|
+
return text;
|
|
908
1041
|
};
|
|
909
1042
|
};
|
|
910
1043
|
var ExuluEmbedder = class {
|
|
@@ -1100,35 +1233,117 @@ var ExuluLogger = class {
|
|
|
1100
1233
|
}
|
|
1101
1234
|
}
|
|
1102
1235
|
};
|
|
1236
|
+
var ExuluEval = class {
|
|
1237
|
+
name;
|
|
1238
|
+
description;
|
|
1239
|
+
constructor({ name, description }) {
|
|
1240
|
+
this.name = name;
|
|
1241
|
+
this.description = description;
|
|
1242
|
+
}
|
|
1243
|
+
create = {
|
|
1244
|
+
LlmAsAJudge: {
|
|
1245
|
+
niah: ({ label, model, needles, testDocument, contextlengths }) => {
|
|
1246
|
+
return {
|
|
1247
|
+
name: this.name,
|
|
1248
|
+
description: this.description,
|
|
1249
|
+
testcases: ExuluEvalUtils.niahTestSet({
|
|
1250
|
+
label,
|
|
1251
|
+
contextlengths: contextlengths || [5e3, 3e4, 5e4, 128e3],
|
|
1252
|
+
needles,
|
|
1253
|
+
testDocument
|
|
1254
|
+
}),
|
|
1255
|
+
run: async ({ data, runner }) => {
|
|
1256
|
+
if (runner.workflow) {
|
|
1257
|
+
throw new Error("Workflows are not supported for the needle in a haystack eval.");
|
|
1258
|
+
}
|
|
1259
|
+
if (!runner.agent) {
|
|
1260
|
+
throw new Error("Agent is required for the needle in a haystack eval.");
|
|
1261
|
+
}
|
|
1262
|
+
if (!data.result) {
|
|
1263
|
+
if (!data.prompt) {
|
|
1264
|
+
throw new Error("Prompt is required for running an agent.");
|
|
1265
|
+
}
|
|
1266
|
+
const result = await runner.agent.generate({
|
|
1267
|
+
prompt: data.prompt,
|
|
1268
|
+
stream: false
|
|
1269
|
+
});
|
|
1270
|
+
data.result = result;
|
|
1271
|
+
}
|
|
1272
|
+
const { object } = await generateObject({
|
|
1273
|
+
model,
|
|
1274
|
+
maxRetries: 3,
|
|
1275
|
+
schema: z.object({
|
|
1276
|
+
correctnessScore: z.number(),
|
|
1277
|
+
comment: z.string()
|
|
1278
|
+
}),
|
|
1279
|
+
prompt: `You are checking if the below "actual_answers" contain the correct information as
|
|
1280
|
+
presented in the "correct_answers" section to calculate the correctness score.
|
|
1281
|
+
|
|
1282
|
+
The correctness score should be a number between 0 and 1. 1 is the highest score.
|
|
1283
|
+
|
|
1284
|
+
For example if the actual_answers contains 1 answer of the ${needles.length} correct_answers, the
|
|
1285
|
+
score should be ${1 / needles.length}. If the actual_answers contain 2 correct answers, the
|
|
1286
|
+
score should be ${2 / needles.length} etc.. if the actual_answers contains all the correct answers, the
|
|
1287
|
+
score should be 1 and if the actual_answers contains none of the correct answers, the score should be 0.
|
|
1288
|
+
|
|
1289
|
+
You can ignore small differences in the actual_answers and the correct_answers such as spelling mistakes,
|
|
1290
|
+
punctuation, etc., if the content of the actual answer is still correct.
|
|
1291
|
+
|
|
1292
|
+
Also provide a comment on how you came to your conclusion.
|
|
1293
|
+
|
|
1294
|
+
<actual_answers>
|
|
1295
|
+
${data.result}
|
|
1296
|
+
</actual_answers>
|
|
1297
|
+
|
|
1298
|
+
<correct_answers>
|
|
1299
|
+
${needles.map((needle, index) => `- ${index + 1}: ${needle.answer}`).join("\n")}
|
|
1300
|
+
</correct_answers>`
|
|
1301
|
+
});
|
|
1302
|
+
console.log("[EXULU] eval result", object);
|
|
1303
|
+
const { db: db2 } = await postgresClient();
|
|
1304
|
+
await db2("eval_results").insert({
|
|
1305
|
+
input: data.prompt,
|
|
1306
|
+
output: data.result,
|
|
1307
|
+
duration: data.duration,
|
|
1308
|
+
result: object.correctnessScore,
|
|
1309
|
+
agent_id: runner.agent.id || void 0,
|
|
1310
|
+
eval_type: EVAL_TYPES_ENUM.llm_as_judge,
|
|
1311
|
+
eval_name: this.name,
|
|
1312
|
+
comment: object.comment,
|
|
1313
|
+
category: data.category,
|
|
1314
|
+
metadata: data.metadata,
|
|
1315
|
+
createdAt: db2.fn.now(),
|
|
1316
|
+
updatedAt: db2.fn.now()
|
|
1317
|
+
});
|
|
1318
|
+
return {
|
|
1319
|
+
score: object.correctnessScore,
|
|
1320
|
+
comment: object.comment
|
|
1321
|
+
};
|
|
1322
|
+
}
|
|
1323
|
+
};
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
};
|
|
1327
|
+
};
|
|
1103
1328
|
var ExuluTool = class {
|
|
1104
1329
|
id;
|
|
1105
1330
|
name;
|
|
1106
1331
|
description;
|
|
1107
|
-
|
|
1108
|
-
outputSchema;
|
|
1332
|
+
parameters;
|
|
1109
1333
|
type;
|
|
1110
|
-
|
|
1111
|
-
constructor({ id, name, description,
|
|
1334
|
+
tool;
|
|
1335
|
+
constructor({ id, name, description, parameters, type, execute: execute2 }) {
|
|
1112
1336
|
this.id = id;
|
|
1113
1337
|
this.name = name;
|
|
1114
1338
|
this.description = description;
|
|
1115
|
-
this.
|
|
1116
|
-
this.outputSchema = outputSchema;
|
|
1339
|
+
this.parameters = parameters;
|
|
1117
1340
|
this.type = type;
|
|
1118
|
-
this.
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
throw new Error("Tool has no execute function.");
|
|
1123
|
-
}
|
|
1124
|
-
updateStatistic({
|
|
1125
|
-
name: "count",
|
|
1126
|
-
label: this.name,
|
|
1127
|
-
type: STATISTICS_TYPE_ENUM.TOOL_CALL,
|
|
1128
|
-
trigger: "agent"
|
|
1341
|
+
this.tool = tool({
|
|
1342
|
+
description,
|
|
1343
|
+
parameters,
|
|
1344
|
+
execute: execute2
|
|
1129
1345
|
});
|
|
1130
|
-
|
|
1131
|
-
};
|
|
1346
|
+
}
|
|
1132
1347
|
};
|
|
1133
1348
|
var ExuluContext = class {
|
|
1134
1349
|
id;
|
|
@@ -1530,21 +1745,9 @@ var ExuluContext = class {
|
|
|
1530
1745
|
id: this.id,
|
|
1531
1746
|
name: `${this.name} context`,
|
|
1532
1747
|
type: "context",
|
|
1533
|
-
|
|
1748
|
+
parameters: z.object({
|
|
1534
1749
|
query: z.string()
|
|
1535
1750
|
}),
|
|
1536
|
-
outputSchema: z.object({
|
|
1537
|
-
// todo check if result format is still correct based on above getItems function
|
|
1538
|
-
results: z.array(z.object({
|
|
1539
|
-
count: z.number(),
|
|
1540
|
-
results: z.array(z.object({
|
|
1541
|
-
id: z.string(),
|
|
1542
|
-
content: z.string(),
|
|
1543
|
-
metadata: z.record(z.any())
|
|
1544
|
-
})),
|
|
1545
|
-
errors: z.array(z.string()).optional()
|
|
1546
|
-
}))
|
|
1547
|
-
}),
|
|
1548
1751
|
description: `Gets information from the context called: ${this.name}. The context description is: ${this.description}.`,
|
|
1549
1752
|
execute: async ({ context }) => {
|
|
1550
1753
|
return await this.getItems({
|
|
@@ -2802,7 +3005,7 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2802
3005
|
} else {
|
|
2803
3006
|
console.log("===========================", "[EXULU] no redis server configured, not setting up recurring jobs.", "===========================");
|
|
2804
3007
|
}
|
|
2805
|
-
const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, threadsSchema, messagesSchema]);
|
|
3008
|
+
const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, evalResultsSchema, threadsSchema, messagesSchema]);
|
|
2806
3009
|
console.log("[EXULU] graphql server");
|
|
2807
3010
|
const server = new ApolloServer({
|
|
2808
3011
|
cache: new InMemoryLRUCache(),
|
|
@@ -2870,13 +3073,13 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2870
3073
|
});
|
|
2871
3074
|
});
|
|
2872
3075
|
app.get("/tools", async (req, res) => {
|
|
2873
|
-
res.status(200).json(tools.map((
|
|
2874
|
-
id:
|
|
2875
|
-
name:
|
|
2876
|
-
description:
|
|
2877
|
-
type:
|
|
2878
|
-
inputSchema:
|
|
2879
|
-
outputSchema:
|
|
3076
|
+
res.status(200).json(tools.map((tool2) => ({
|
|
3077
|
+
id: tool2.id,
|
|
3078
|
+
name: tool2.name,
|
|
3079
|
+
description: tool2.description,
|
|
3080
|
+
type: tool2.type || "tool",
|
|
3081
|
+
inputSchema: tool2.inputSchema ? zerialize(tool2.inputSchema) : null,
|
|
3082
|
+
outputSchema: tool2.outputSchema ? zerialize(tool2.outputSchema) : null
|
|
2880
3083
|
})));
|
|
2881
3084
|
});
|
|
2882
3085
|
app.get("/tools/:id", async (req, res) => {
|
|
@@ -2887,14 +3090,14 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2887
3090
|
});
|
|
2888
3091
|
return;
|
|
2889
3092
|
}
|
|
2890
|
-
const
|
|
2891
|
-
if (!
|
|
3093
|
+
const tool2 = tools.find((tool3) => tool3.id === id);
|
|
3094
|
+
if (!tool2) {
|
|
2892
3095
|
res.status(400).json({
|
|
2893
3096
|
message: "Tool not found."
|
|
2894
3097
|
});
|
|
2895
3098
|
return;
|
|
2896
3099
|
}
|
|
2897
|
-
res.status(200).json(
|
|
3100
|
+
res.status(200).json(tool2);
|
|
2898
3101
|
});
|
|
2899
3102
|
const deleteItem = async ({
|
|
2900
3103
|
id,
|
|
@@ -3987,6 +4190,198 @@ var ExuluApp = class {
|
|
|
3987
4190
|
|
|
3988
4191
|
// src/index.ts
|
|
3989
4192
|
import { RecursiveChunker, SentenceChunker, RecursiveRules } from "chonkie";
|
|
4193
|
+
|
|
4194
|
+
// src/cli/index.tsx
|
|
4195
|
+
import { useState as useState2 } from "react";
|
|
4196
|
+
import { Box as Box2, Text as Text4, render as render2 } from "ink";
|
|
4197
|
+
import { UnorderedList as UnorderedList3 } from "@inkjs/ui";
|
|
4198
|
+
import patchConsole from "patch-console";
|
|
4199
|
+
|
|
4200
|
+
// src/cli/components/nav.tsx
|
|
4201
|
+
import { Select } from "@inkjs/ui";
|
|
4202
|
+
import { useApp } from "ink";
|
|
4203
|
+
import { jsx } from "react/jsx-runtime";
|
|
4204
|
+
var nav = [
|
|
4205
|
+
{
|
|
4206
|
+
label: "Agents",
|
|
4207
|
+
value: "agents"
|
|
4208
|
+
},
|
|
4209
|
+
{
|
|
4210
|
+
label: "Exit",
|
|
4211
|
+
value: "exit"
|
|
4212
|
+
}
|
|
4213
|
+
];
|
|
4214
|
+
var Nav = ({ setView }) => {
|
|
4215
|
+
const { exit } = useApp();
|
|
4216
|
+
return /* @__PURE__ */ jsx(Select, { options: nav, onChange: (value) => {
|
|
4217
|
+
if (value === "exit") {
|
|
4218
|
+
exit();
|
|
4219
|
+
}
|
|
4220
|
+
setView(value);
|
|
4221
|
+
} });
|
|
4222
|
+
};
|
|
4223
|
+
var nav_default = Nav;
|
|
4224
|
+
|
|
4225
|
+
// src/cli/components/agent-selector.tsx
|
|
4226
|
+
import { Text as Text2 } from "ink";
|
|
4227
|
+
import { Select as Select2 } from "@inkjs/ui";
|
|
4228
|
+
import { Fragment, jsx as jsx2, jsxs } from "react/jsx-runtime";
|
|
4229
|
+
var AgentSelector = ({ exulu, setAgent, setEvaluations }) => {
|
|
4230
|
+
const agents = exulu.agents.map((agent) => ({
|
|
4231
|
+
label: agent.name,
|
|
4232
|
+
value: agent.id
|
|
4233
|
+
}));
|
|
4234
|
+
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
4235
|
+
/* @__PURE__ */ jsx2(Text2, { children: "Please select an agent:" }),
|
|
4236
|
+
/* @__PURE__ */ jsx2(Select2, { options: agents, onChange: (value) => {
|
|
4237
|
+
console.log("selected agent", value);
|
|
4238
|
+
const agent = exulu.agent(value);
|
|
4239
|
+
if (!agent) {
|
|
4240
|
+
console.error("Agent not found", value);
|
|
4241
|
+
return;
|
|
4242
|
+
}
|
|
4243
|
+
setAgent(agent);
|
|
4244
|
+
if (agent) {
|
|
4245
|
+
setEvaluations(agent.evals || []);
|
|
4246
|
+
}
|
|
4247
|
+
} })
|
|
4248
|
+
] });
|
|
4249
|
+
};
|
|
4250
|
+
var agent_selector_default = AgentSelector;
|
|
4251
|
+
|
|
4252
|
+
// src/cli/components/eval-selector.tsx
|
|
4253
|
+
import { Select as Select3 } from "@inkjs/ui";
|
|
4254
|
+
import { jsx as jsx3 } from "react/jsx-runtime";
|
|
4255
|
+
var EvalSelector = ({ evaluations, setEvaluation }) => {
|
|
4256
|
+
return /* @__PURE__ */ jsx3(Select3, { options: evaluations.map((evaluation) => ({
|
|
4257
|
+
label: evaluation.runner.name,
|
|
4258
|
+
value: evaluation.runner.name
|
|
4259
|
+
})), onChange: (value) => {
|
|
4260
|
+
console.log("selected eval", value);
|
|
4261
|
+
const evaluation = evaluations?.find((evaluation2) => evaluation2.runner.name === value);
|
|
4262
|
+
if (evaluation) {
|
|
4263
|
+
setEvaluation(evaluation);
|
|
4264
|
+
}
|
|
4265
|
+
} });
|
|
4266
|
+
};
|
|
4267
|
+
var eval_selector_default = EvalSelector;
|
|
4268
|
+
|
|
4269
|
+
// src/cli/components/eval-actions.tsx
|
|
4270
|
+
import { useState } from "react";
|
|
4271
|
+
import { ProgressBar as ProgressBar2, Select as Select4, UnorderedList as UnorderedList2 } from "@inkjs/ui";
|
|
4272
|
+
import { Text as Text3 } from "ink";
|
|
4273
|
+
import { Fragment as Fragment2, jsx as jsx4, jsxs as jsxs2 } from "react/jsx-runtime";
|
|
4274
|
+
var EvalActions = ({ agent, evaluation, setEvaluation }) => {
|
|
4275
|
+
const [progress, setProgress] = useState(0);
|
|
4276
|
+
const [results, setResults] = useState([]);
|
|
4277
|
+
const [running, setRunning] = useState();
|
|
4278
|
+
const run = async (evaluation2) => {
|
|
4279
|
+
setRunning({
|
|
4280
|
+
label: evaluation2.runner.name
|
|
4281
|
+
});
|
|
4282
|
+
const testCases = evaluation2.runner.testcases;
|
|
4283
|
+
const total = testCases.length;
|
|
4284
|
+
if (!testCases) {
|
|
4285
|
+
throw new Error("No test cases found");
|
|
4286
|
+
}
|
|
4287
|
+
let i = 0;
|
|
4288
|
+
for (const testCase of testCases) {
|
|
4289
|
+
i++;
|
|
4290
|
+
const result = await evaluation2.runner.run({
|
|
4291
|
+
data: testCase,
|
|
4292
|
+
runner: {
|
|
4293
|
+
agent
|
|
4294
|
+
}
|
|
4295
|
+
});
|
|
4296
|
+
setProgress(Math.round(i / total * 100));
|
|
4297
|
+
setResults([...results, {
|
|
4298
|
+
name: evaluation2.runner.name,
|
|
4299
|
+
prompt: testCase.prompt?.slice(0, 100) + "...",
|
|
4300
|
+
score: result.score,
|
|
4301
|
+
comment: result.comment
|
|
4302
|
+
}]);
|
|
4303
|
+
}
|
|
4304
|
+
setRunning(void 0);
|
|
4305
|
+
};
|
|
4306
|
+
if (progress === 100) {
|
|
4307
|
+
return /* @__PURE__ */ jsxs2(Fragment2, { children: [
|
|
4308
|
+
/* @__PURE__ */ jsx4(Text3, { children: "Evaluations completed." }),
|
|
4309
|
+
/* @__PURE__ */ jsx4(UnorderedList2, { children: results.map((result) => /* @__PURE__ */ jsx4(UnorderedList2.Item, { children: /* @__PURE__ */ jsxs2(Text3, { children: [
|
|
4310
|
+
result.name,
|
|
4311
|
+
": ",
|
|
4312
|
+
result.score,
|
|
4313
|
+
" - ",
|
|
4314
|
+
result.comment
|
|
4315
|
+
] }) })) })
|
|
4316
|
+
] });
|
|
4317
|
+
}
|
|
4318
|
+
if (running) {
|
|
4319
|
+
return /* @__PURE__ */ jsxs2(Fragment2, { children: [
|
|
4320
|
+
/* @__PURE__ */ jsxs2(Text3, { children: [
|
|
4321
|
+
"Running ",
|
|
4322
|
+
running.label,
|
|
4323
|
+
"..."
|
|
4324
|
+
] }),
|
|
4325
|
+
/* @__PURE__ */ jsx4(ProgressBar2, { value: progress })
|
|
4326
|
+
] });
|
|
4327
|
+
}
|
|
4328
|
+
return /* @__PURE__ */ jsx4(Select4, { options: [{
|
|
4329
|
+
label: "Run evaluation",
|
|
4330
|
+
value: "run"
|
|
4331
|
+
}, {
|
|
4332
|
+
label: "Go back",
|
|
4333
|
+
value: "back"
|
|
4334
|
+
}], onChange: (value) => {
|
|
4335
|
+
if (value === "back") {
|
|
4336
|
+
setEvaluation(void 0);
|
|
4337
|
+
}
|
|
4338
|
+
if (value === "run") {
|
|
4339
|
+
run(evaluation);
|
|
4340
|
+
}
|
|
4341
|
+
} });
|
|
4342
|
+
};
|
|
4343
|
+
var eval_actions_default = EvalActions;
|
|
4344
|
+
|
|
4345
|
+
// src/cli/index.tsx
|
|
4346
|
+
import { Fragment as Fragment3, jsx as jsx5, jsxs as jsxs3 } from "react/jsx-runtime";
|
|
4347
|
+
var Main = ({ exulu }) => {
|
|
4348
|
+
patchConsole((stream, data) => {
|
|
4349
|
+
setLogs([...logs, data]);
|
|
4350
|
+
});
|
|
4351
|
+
const [logs, setLogs] = useState2([]);
|
|
4352
|
+
const [view, setView] = useState2();
|
|
4353
|
+
const [agent, setAgent] = useState2();
|
|
4354
|
+
const [evaluations, setEvaluations] = useState2([]);
|
|
4355
|
+
const [evaluation, setEvaluation] = useState2();
|
|
4356
|
+
return /* @__PURE__ */ jsxs3(Box2, { borderStyle: "round", borderColor: "cyan", padding: 1, flexDirection: "column", width: "70%", children: [
|
|
4357
|
+
/* @__PURE__ */ jsx5(Text4, { children: "Logs:" }),
|
|
4358
|
+
/* @__PURE__ */ jsx5(UnorderedList3, { children: logs.map((log, index) => /* @__PURE__ */ jsx5(UnorderedList3.Item, { children: /* @__PURE__ */ jsx5(Text4, { children: log }) })) }),
|
|
4359
|
+
!view && /* @__PURE__ */ jsx5(nav_default, { setView }),
|
|
4360
|
+
view === "agents" && !agent && /* @__PURE__ */ jsx5(agent_selector_default, { exulu, setAgent, setEvaluations }),
|
|
4361
|
+
view === "agents" && agent && !evaluation && /* @__PURE__ */ jsxs3(Fragment3, { children: [
|
|
4362
|
+
/* @__PURE__ */ jsxs3(Text4, { children: [
|
|
4363
|
+
'Selected agent "',
|
|
4364
|
+
agent.name,
|
|
4365
|
+
'". Please select an evaluation:'
|
|
4366
|
+
] }),
|
|
4367
|
+
/* @__PURE__ */ jsx5(eval_selector_default, { evaluations, setEvaluation })
|
|
4368
|
+
] }),
|
|
4369
|
+
view === "agents" && agent && evaluation && /* @__PURE__ */ jsxs3(Fragment3, { children: [
|
|
4370
|
+
/* @__PURE__ */ jsxs3(Text4, { children: [
|
|
4371
|
+
"Selected evaluation: ",
|
|
4372
|
+
evaluation.runner.name
|
|
4373
|
+
] }),
|
|
4374
|
+
/* @__PURE__ */ jsx5(eval_actions_default, { agent, evaluation, setEvaluation })
|
|
4375
|
+
] })
|
|
4376
|
+
] });
|
|
4377
|
+
};
|
|
4378
|
+
var cli_default = {
|
|
4379
|
+
run: (exulu) => {
|
|
4380
|
+
render2(/* @__PURE__ */ jsx5(Main, { exulu }));
|
|
4381
|
+
}
|
|
4382
|
+
};
|
|
4383
|
+
|
|
4384
|
+
// src/index.ts
|
|
3990
4385
|
var ExuluJobs = {
|
|
3991
4386
|
redis: redisClient,
|
|
3992
4387
|
jobs: {
|
|
@@ -4017,9 +4412,11 @@ export {
|
|
|
4017
4412
|
ExuluApp,
|
|
4018
4413
|
authentication as ExuluAuthentication,
|
|
4019
4414
|
ExuluChunkers,
|
|
4415
|
+
cli_default as ExuluCli,
|
|
4020
4416
|
ExuluContext,
|
|
4021
4417
|
ExuluDatabase,
|
|
4022
4418
|
ExuluEmbedder,
|
|
4419
|
+
ExuluEval,
|
|
4023
4420
|
ExuluJobs,
|
|
4024
4421
|
ExuluLogger,
|
|
4025
4422
|
queues as ExuluQueues,
|