@exulu/backend 0.2.9 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +497 -107
- package/dist/index.d.cts +87 -33
- package/dist/index.d.ts +87 -33
- package/dist/index.js +493 -105
- package/package.json +9 -7
- package/types/enums/eval-types.ts +5 -0
package/dist/index.cjs
CHANGED
|
@@ -36,9 +36,11 @@ __export(index_exports, {
|
|
|
36
36
|
ExuluApp: () => ExuluApp,
|
|
37
37
|
ExuluAuthentication: () => authentication,
|
|
38
38
|
ExuluChunkers: () => ExuluChunkers,
|
|
39
|
+
ExuluCli: () => cli_default,
|
|
39
40
|
ExuluContext: () => ExuluContext,
|
|
40
41
|
ExuluDatabase: () => ExuluDatabase,
|
|
41
42
|
ExuluEmbedder: () => ExuluEmbedder,
|
|
43
|
+
ExuluEval: () => ExuluEval,
|
|
42
44
|
ExuluJobs: () => ExuluJobs,
|
|
43
45
|
ExuluLogger: () => ExuluLogger,
|
|
44
46
|
ExuluQueues: () => queues,
|
|
@@ -286,6 +288,58 @@ var workflowSchema = {
|
|
|
286
288
|
}
|
|
287
289
|
]
|
|
288
290
|
};
|
|
291
|
+
var evalResultsSchema = {
|
|
292
|
+
name: {
|
|
293
|
+
plural: "eval_results",
|
|
294
|
+
singular: "eval_result"
|
|
295
|
+
},
|
|
296
|
+
fields: [
|
|
297
|
+
{
|
|
298
|
+
name: "input",
|
|
299
|
+
type: "longText"
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
name: "output",
|
|
303
|
+
type: "longText"
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
name: "duration",
|
|
307
|
+
type: "number"
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
name: "category",
|
|
311
|
+
type: "text"
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
name: "metadata",
|
|
315
|
+
type: "json"
|
|
316
|
+
},
|
|
317
|
+
{
|
|
318
|
+
name: "result",
|
|
319
|
+
type: "number"
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
name: "agent_id",
|
|
323
|
+
type: "text"
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
name: "workflow_id",
|
|
327
|
+
type: "text"
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
name: "eval_type",
|
|
331
|
+
type: "text"
|
|
332
|
+
},
|
|
333
|
+
{
|
|
334
|
+
name: "eval_name",
|
|
335
|
+
type: "text"
|
|
336
|
+
},
|
|
337
|
+
{
|
|
338
|
+
name: "comment",
|
|
339
|
+
type: "longText"
|
|
340
|
+
}
|
|
341
|
+
]
|
|
342
|
+
};
|
|
289
343
|
var threadsSchema = {
|
|
290
344
|
name: {
|
|
291
345
|
plural: "threads",
|
|
@@ -559,6 +613,27 @@ var up = async function(knex) {
|
|
|
559
613
|
}
|
|
560
614
|
});
|
|
561
615
|
}
|
|
616
|
+
if (!await knex.schema.hasTable("eval_results")) {
|
|
617
|
+
await knex.schema.createTable("eval_results", (table) => {
|
|
618
|
+
table.uuid("id").primary().defaultTo(knex.fn.uuid());
|
|
619
|
+
table.date("createdAt").defaultTo(knex.fn.now());
|
|
620
|
+
table.date("updatedAt").defaultTo(knex.fn.now());
|
|
621
|
+
for (const field of evalResultsSchema.fields) {
|
|
622
|
+
const { type, name, references, default: defaultValue } = field;
|
|
623
|
+
if (!type || !name) {
|
|
624
|
+
continue;
|
|
625
|
+
}
|
|
626
|
+
if (type === "reference") {
|
|
627
|
+
if (!references) {
|
|
628
|
+
throw new Error("Field with type reference must have a reference definition.");
|
|
629
|
+
}
|
|
630
|
+
table.uuid(name).references(references.field).inTable(references.table);
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
mapType(table, type, sanitizeName(name), defaultValue);
|
|
634
|
+
}
|
|
635
|
+
});
|
|
636
|
+
}
|
|
562
637
|
if (!await knex.schema.hasTable("statistics")) {
|
|
563
638
|
await knex.schema.createTable("statistics", (table) => {
|
|
564
639
|
table.uuid("id").primary().defaultTo(knex.fn.uuid());
|
|
@@ -730,12 +805,10 @@ var execute = async () => {
|
|
|
730
805
|
// src/registry/classes.ts
|
|
731
806
|
var import_zod = require("zod");
|
|
732
807
|
var import_bullmq2 = require("bullmq");
|
|
733
|
-
var import_core = require("@mastra/core");
|
|
734
808
|
var import_zod2 = require("zod");
|
|
735
809
|
var fs = __toESM(require("fs"), 1);
|
|
736
810
|
var path = __toESM(require("path"), 1);
|
|
737
|
-
var
|
|
738
|
-
var import_pg = require("@mastra/pg");
|
|
811
|
+
var import_ai = require("ai");
|
|
739
812
|
|
|
740
813
|
// types/enums/statistics.ts
|
|
741
814
|
var STATISTICS_TYPE_ENUM = {
|
|
@@ -750,6 +823,11 @@ var STATISTICS_TYPE_ENUM = {
|
|
|
750
823
|
AGENT_RUN: "agent.run"
|
|
751
824
|
};
|
|
752
825
|
|
|
826
|
+
// types/enums/eval-types.ts
|
|
827
|
+
var EVAL_TYPES_ENUM = {
|
|
828
|
+
llm_as_judge: "llm_as_judge"
|
|
829
|
+
};
|
|
830
|
+
|
|
753
831
|
// src/registry/classes.ts
|
|
754
832
|
var import_knex4 = __toESM(require("pgvector/knex"), 1);
|
|
755
833
|
|
|
@@ -853,6 +931,83 @@ var JOB_STATUS_ENUM = {
|
|
|
853
931
|
stuck: "stuck"
|
|
854
932
|
};
|
|
855
933
|
|
|
934
|
+
// src/evals/utils/index.tsx
|
|
935
|
+
var ExuluEvalUtils = {
|
|
936
|
+
niahTestSet: ({
|
|
937
|
+
label,
|
|
938
|
+
contextlengths,
|
|
939
|
+
needles,
|
|
940
|
+
testDocument
|
|
941
|
+
}) => {
|
|
942
|
+
const testCases = contextlengths.map((contextlength) => {
|
|
943
|
+
let testText = testDocument.slice(0, contextlength * 4 - needles.length * 200);
|
|
944
|
+
const depthInterval = 5e3 * 4;
|
|
945
|
+
const depths = Array.from({ length: contextlength * 4 / depthInterval }, (_, i) => (i + 1) * depthInterval);
|
|
946
|
+
console.log("[EXULU] contextlength: ", {
|
|
947
|
+
tokens: contextlength,
|
|
948
|
+
chars: contextlength * 4,
|
|
949
|
+
depths
|
|
950
|
+
});
|
|
951
|
+
return depths.map((depth, index) => {
|
|
952
|
+
const first = index === 0;
|
|
953
|
+
const last = index === depths.length - 1;
|
|
954
|
+
const start = first ? 0 : depths[index - 1];
|
|
955
|
+
const end = last ? contextlength * 4 : depths[index];
|
|
956
|
+
console.log("[EXULU] Niah positions: ", {
|
|
957
|
+
start,
|
|
958
|
+
end,
|
|
959
|
+
depth,
|
|
960
|
+
index
|
|
961
|
+
});
|
|
962
|
+
let modifiedTestText = testText;
|
|
963
|
+
const insertions = [];
|
|
964
|
+
needles.forEach((needle, index2) => {
|
|
965
|
+
const basePosition = start + Math.floor(Math.random() * (end - start));
|
|
966
|
+
insertions.push({ position: basePosition, needle: needle.answer });
|
|
967
|
+
});
|
|
968
|
+
insertions.sort((a, b) => b.position - a.position);
|
|
969
|
+
console.log("[EXULU] Niah insertions: ", insertions);
|
|
970
|
+
insertions.forEach(({ position, needle }) => {
|
|
971
|
+
const insertionPosition = Math.min(position, modifiedTestText.length);
|
|
972
|
+
const beforeNeedle = modifiedTestText.slice(0, insertionPosition);
|
|
973
|
+
const afterNeedle = modifiedTestText.slice(insertionPosition);
|
|
974
|
+
modifiedTestText = beforeNeedle + needle + afterNeedle;
|
|
975
|
+
});
|
|
976
|
+
return {
|
|
977
|
+
prompt: `You are a helpful assistant.
|
|
978
|
+
|
|
979
|
+
You are given a text.
|
|
980
|
+
|
|
981
|
+
You need to answer the following question, using only the information from the text provided below. Do not hallucinate
|
|
982
|
+
or come up with an answer that is not in the text. If the text does not contain the answer, you should say "I don't know".
|
|
983
|
+
|
|
984
|
+
${needles.map((needle, index2) => `- ${index2 + 1}: ${needle.question}`).join("\n")}
|
|
985
|
+
|
|
986
|
+
The text is:
|
|
987
|
+
|
|
988
|
+
${modifiedTestText}
|
|
989
|
+
`,
|
|
990
|
+
category: `${label}-context-length-[${contextlength}]-depth-[from-${start ? start / 4 : 0}-to-${end ? end / 4 : 0}]-niah-test`,
|
|
991
|
+
metadata: {
|
|
992
|
+
contextLength: contextlength,
|
|
993
|
+
depth,
|
|
994
|
+
needles
|
|
995
|
+
}
|
|
996
|
+
};
|
|
997
|
+
});
|
|
998
|
+
});
|
|
999
|
+
const flattenedTestCases = testCases.flat();
|
|
1000
|
+
console.log("[EXULU] Niah test cases: ", flattenedTestCases.length);
|
|
1001
|
+
console.table(flattenedTestCases.map((data) => ({
|
|
1002
|
+
chars: data.prompt?.length || 0,
|
|
1003
|
+
tokens: data.prompt?.length / 4 || 0,
|
|
1004
|
+
category: data.category,
|
|
1005
|
+
metadata: data.metadata
|
|
1006
|
+
})));
|
|
1007
|
+
return flattenedTestCases;
|
|
1008
|
+
}
|
|
1009
|
+
};
|
|
1010
|
+
|
|
856
1011
|
// src/registry/classes.ts
|
|
857
1012
|
function generateSlug(name) {
|
|
858
1013
|
const normalized = name.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
|
|
@@ -881,72 +1036,52 @@ var ExuluAgent = class {
|
|
|
881
1036
|
description = "";
|
|
882
1037
|
slug = "";
|
|
883
1038
|
streaming = false;
|
|
884
|
-
type;
|
|
885
|
-
outputSchema;
|
|
886
1039
|
rateLimit;
|
|
887
1040
|
config;
|
|
888
|
-
memory;
|
|
1041
|
+
// private memory: Memory | undefined; // TODO remove mastra and do own implementation
|
|
889
1042
|
tools;
|
|
890
|
-
|
|
1043
|
+
evals;
|
|
1044
|
+
model;
|
|
891
1045
|
capabilities;
|
|
892
|
-
constructor({ id, name, description,
|
|
1046
|
+
constructor({ id, name, description, config, rateLimit, capabilities, tools, evals }) {
|
|
893
1047
|
this.id = id;
|
|
894
1048
|
this.name = name;
|
|
895
|
-
this.
|
|
1049
|
+
this.evals = evals;
|
|
896
1050
|
this.description = description;
|
|
897
|
-
this.outputSchema = outputSchema;
|
|
898
1051
|
this.rateLimit = rateLimit;
|
|
899
1052
|
this.tools = tools;
|
|
900
1053
|
this.config = config;
|
|
901
1054
|
this.capabilities = capabilities;
|
|
902
1055
|
this.slug = `/agents/${generateSlug(this.name)}/run`;
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
memory: this.memory ? this.memory : void 0
|
|
909
|
-
});
|
|
1056
|
+
this.model = this.config.model;
|
|
1057
|
+
}
|
|
1058
|
+
generate = async ({ prompt, stream }) => {
|
|
1059
|
+
if (!this.model) {
|
|
1060
|
+
throw new Error("Model is required for streaming.");
|
|
910
1061
|
}
|
|
911
|
-
if (config
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
user: process.env.POSTGRES_DB_USER || "",
|
|
919
|
-
database: "exulu",
|
|
920
|
-
// putting it into an own database that is not managed by exulu
|
|
921
|
-
password: process.env.POSTGRES_DB_PASSWORD || "",
|
|
922
|
-
ssl: process.env.POSTGRES_DB_SSL === "true" ? { rejectUnauthorized: false } : false
|
|
923
|
-
}),
|
|
924
|
-
...config?.memory.vector ? {
|
|
925
|
-
vector: new import_pg.PgVector({
|
|
926
|
-
connectionString
|
|
927
|
-
})
|
|
928
|
-
} : {},
|
|
929
|
-
options: {
|
|
930
|
-
lastMessages: config?.memory.lastMessages || 10,
|
|
931
|
-
semanticRecall: {
|
|
932
|
-
topK: config?.memory.semanticRecall.topK || 3,
|
|
933
|
-
messageRange: config?.memory.semanticRecall.messageRange || 2
|
|
934
|
-
}
|
|
935
|
-
}
|
|
1062
|
+
if (this.config.outputSchema) {
|
|
1063
|
+
if (stream) {
|
|
1064
|
+
}
|
|
1065
|
+
const { object } = await (0, import_ai.generateObject)({
|
|
1066
|
+
model: this.model,
|
|
1067
|
+
schema: this.config.outputSchema,
|
|
1068
|
+
prompt
|
|
936
1069
|
});
|
|
1070
|
+
return object;
|
|
937
1071
|
}
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1072
|
+
if (stream) {
|
|
1073
|
+
const result = (0, import_ai.streamText)({
|
|
1074
|
+
model: this.model,
|
|
1075
|
+
prompt
|
|
1076
|
+
});
|
|
1077
|
+
const text2 = await result.text;
|
|
1078
|
+
return text2;
|
|
942
1079
|
}
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
type: STATISTICS_TYPE_ENUM.AGENT_RUN,
|
|
947
|
-
trigger: "agent"
|
|
1080
|
+
const { text } = await (0, import_ai.generateText)({
|
|
1081
|
+
model: this.model,
|
|
1082
|
+
prompt
|
|
948
1083
|
});
|
|
949
|
-
return
|
|
1084
|
+
return text;
|
|
950
1085
|
};
|
|
951
1086
|
};
|
|
952
1087
|
var ExuluEmbedder = class {
|
|
@@ -1142,35 +1277,117 @@ var ExuluLogger = class {
|
|
|
1142
1277
|
}
|
|
1143
1278
|
}
|
|
1144
1279
|
};
|
|
1280
|
+
var ExuluEval = class {
|
|
1281
|
+
name;
|
|
1282
|
+
description;
|
|
1283
|
+
constructor({ name, description }) {
|
|
1284
|
+
this.name = name;
|
|
1285
|
+
this.description = description;
|
|
1286
|
+
}
|
|
1287
|
+
create = {
|
|
1288
|
+
LlmAsAJudge: {
|
|
1289
|
+
niah: ({ label, model, needles, testDocument, contextlengths }) => {
|
|
1290
|
+
return {
|
|
1291
|
+
name: this.name,
|
|
1292
|
+
description: this.description,
|
|
1293
|
+
testcases: ExuluEvalUtils.niahTestSet({
|
|
1294
|
+
label,
|
|
1295
|
+
contextlengths: contextlengths || [5e3, 3e4, 5e4, 128e3],
|
|
1296
|
+
needles,
|
|
1297
|
+
testDocument
|
|
1298
|
+
}),
|
|
1299
|
+
run: async ({ data, runner }) => {
|
|
1300
|
+
if (runner.workflow) {
|
|
1301
|
+
throw new Error("Workflows are not supported for the needle in a haystack eval.");
|
|
1302
|
+
}
|
|
1303
|
+
if (!runner.agent) {
|
|
1304
|
+
throw new Error("Agent is required for the needle in a haystack eval.");
|
|
1305
|
+
}
|
|
1306
|
+
if (!data.result) {
|
|
1307
|
+
if (!data.prompt) {
|
|
1308
|
+
throw new Error("Prompt is required for running an agent.");
|
|
1309
|
+
}
|
|
1310
|
+
const result = await runner.agent.generate({
|
|
1311
|
+
prompt: data.prompt,
|
|
1312
|
+
stream: false
|
|
1313
|
+
});
|
|
1314
|
+
data.result = result;
|
|
1315
|
+
}
|
|
1316
|
+
const { object } = await (0, import_ai.generateObject)({
|
|
1317
|
+
model,
|
|
1318
|
+
maxRetries: 3,
|
|
1319
|
+
schema: import_zod2.z.object({
|
|
1320
|
+
correctnessScore: import_zod2.z.number(),
|
|
1321
|
+
comment: import_zod2.z.string()
|
|
1322
|
+
}),
|
|
1323
|
+
prompt: `You are checking if the below "actual_answers" contain the correct information as
|
|
1324
|
+
presented in the "correct_answers" section to calculate the correctness score.
|
|
1325
|
+
|
|
1326
|
+
The correctness score should be a number between 0 and 1. 1 is the highest score.
|
|
1327
|
+
|
|
1328
|
+
For example if the actual_answers contains 1 answer of the ${needles.length} correct_answers, the
|
|
1329
|
+
score should be ${1 / needles.length}. If the actual_answers contain 2 correct answers, the
|
|
1330
|
+
score should be ${2 / needles.length} etc.. if the actual_answers contains all the correct answers, the
|
|
1331
|
+
score should be 1 and if the actual_answers contains none of the correct answers, the score should be 0.
|
|
1332
|
+
|
|
1333
|
+
You can ignore small differences in the actual_answers and the correct_answers such as spelling mistakes,
|
|
1334
|
+
punctuation, etc., if the content of the actual answer is still correct.
|
|
1335
|
+
|
|
1336
|
+
Also provide a comment on how you came to your conclusion.
|
|
1337
|
+
|
|
1338
|
+
<actual_answers>
|
|
1339
|
+
${data.result}
|
|
1340
|
+
</actual_answers>
|
|
1341
|
+
|
|
1342
|
+
<correct_answers>
|
|
1343
|
+
${needles.map((needle, index) => `- ${index + 1}: ${needle.answer}`).join("\n")}
|
|
1344
|
+
</correct_answers>`
|
|
1345
|
+
});
|
|
1346
|
+
console.log("[EXULU] eval result", object);
|
|
1347
|
+
const { db: db2 } = await postgresClient();
|
|
1348
|
+
await db2("eval_results").insert({
|
|
1349
|
+
input: data.prompt,
|
|
1350
|
+
output: data.result,
|
|
1351
|
+
duration: data.duration,
|
|
1352
|
+
result: object.correctnessScore,
|
|
1353
|
+
agent_id: runner.agent.id || void 0,
|
|
1354
|
+
eval_type: EVAL_TYPES_ENUM.llm_as_judge,
|
|
1355
|
+
eval_name: this.name,
|
|
1356
|
+
comment: object.comment,
|
|
1357
|
+
category: data.category,
|
|
1358
|
+
metadata: data.metadata,
|
|
1359
|
+
createdAt: db2.fn.now(),
|
|
1360
|
+
updatedAt: db2.fn.now()
|
|
1361
|
+
});
|
|
1362
|
+
return {
|
|
1363
|
+
score: object.correctnessScore,
|
|
1364
|
+
comment: object.comment
|
|
1365
|
+
};
|
|
1366
|
+
}
|
|
1367
|
+
};
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
};
|
|
1371
|
+
};
|
|
1145
1372
|
var ExuluTool = class {
|
|
1146
1373
|
id;
|
|
1147
1374
|
name;
|
|
1148
1375
|
description;
|
|
1149
|
-
|
|
1150
|
-
outputSchema;
|
|
1376
|
+
parameters;
|
|
1151
1377
|
type;
|
|
1152
|
-
|
|
1153
|
-
constructor({ id, name, description,
|
|
1378
|
+
tool;
|
|
1379
|
+
constructor({ id, name, description, parameters, type, execute: execute2 }) {
|
|
1154
1380
|
this.id = id;
|
|
1155
1381
|
this.name = name;
|
|
1156
1382
|
this.description = description;
|
|
1157
|
-
this.
|
|
1158
|
-
this.outputSchema = outputSchema;
|
|
1383
|
+
this.parameters = parameters;
|
|
1159
1384
|
this.type = type;
|
|
1160
|
-
this.
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
throw new Error("Tool has no execute function.");
|
|
1165
|
-
}
|
|
1166
|
-
updateStatistic({
|
|
1167
|
-
name: "count",
|
|
1168
|
-
label: this.name,
|
|
1169
|
-
type: STATISTICS_TYPE_ENUM.TOOL_CALL,
|
|
1170
|
-
trigger: "agent"
|
|
1385
|
+
this.tool = (0, import_ai.tool)({
|
|
1386
|
+
description,
|
|
1387
|
+
parameters,
|
|
1388
|
+
execute: execute2
|
|
1171
1389
|
});
|
|
1172
|
-
|
|
1173
|
-
};
|
|
1390
|
+
}
|
|
1174
1391
|
};
|
|
1175
1392
|
var ExuluContext = class {
|
|
1176
1393
|
id;
|
|
@@ -1572,21 +1789,9 @@ var ExuluContext = class {
|
|
|
1572
1789
|
id: this.id,
|
|
1573
1790
|
name: `${this.name} context`,
|
|
1574
1791
|
type: "context",
|
|
1575
|
-
|
|
1792
|
+
parameters: import_zod2.z.object({
|
|
1576
1793
|
query: import_zod2.z.string()
|
|
1577
1794
|
}),
|
|
1578
|
-
outputSchema: import_zod2.z.object({
|
|
1579
|
-
// todo check if result format is still correct based on above getItems function
|
|
1580
|
-
results: import_zod2.z.array(import_zod2.z.object({
|
|
1581
|
-
count: import_zod2.z.number(),
|
|
1582
|
-
results: import_zod2.z.array(import_zod2.z.object({
|
|
1583
|
-
id: import_zod2.z.string(),
|
|
1584
|
-
content: import_zod2.z.string(),
|
|
1585
|
-
metadata: import_zod2.z.record(import_zod2.z.any())
|
|
1586
|
-
})),
|
|
1587
|
-
errors: import_zod2.z.array(import_zod2.z.string()).optional()
|
|
1588
|
-
}))
|
|
1589
|
-
}),
|
|
1590
1795
|
description: `Gets information from the context called: ${this.name}. The context description is: ${this.description}.`,
|
|
1591
1796
|
execute: async ({ context }) => {
|
|
1592
1797
|
return await this.getItems({
|
|
@@ -1641,16 +1846,7 @@ var ExuluSource = class {
|
|
|
1641
1846
|
}
|
|
1642
1847
|
};
|
|
1643
1848
|
var updateStatistic = async (statistic) => {
|
|
1644
|
-
|
|
1645
|
-
const { db: db2 } = await postgresClient();
|
|
1646
|
-
await db2.from("statistics").update({
|
|
1647
|
-
total: db2.raw("total + ?", [statistic.count ?? 1]),
|
|
1648
|
-
timeseries: db2.raw('CASE WHEN "createdAt" = ? THEN array_append(timeseries, ?) ELSE timeseries END', [currentDate, { date: currentDate, count: statistic.count ?? 1 }])
|
|
1649
|
-
}).where({
|
|
1650
|
-
name: statistic.name,
|
|
1651
|
-
label: statistic.label,
|
|
1652
|
-
type: statistic.type
|
|
1653
|
-
}).onConflict("name").merge();
|
|
1849
|
+
return;
|
|
1654
1850
|
};
|
|
1655
1851
|
|
|
1656
1852
|
// src/registry/index.ts
|
|
@@ -2755,7 +2951,7 @@ var createUppyRoutes = async (app) => {
|
|
|
2755
2951
|
};
|
|
2756
2952
|
|
|
2757
2953
|
// src/registry/routes.ts
|
|
2758
|
-
var
|
|
2954
|
+
var import_utils2 = require("@apollo/utils.keyvaluecache");
|
|
2759
2955
|
var global_queues = {
|
|
2760
2956
|
logs_cleaner: "logs-cleaner"
|
|
2761
2957
|
};
|
|
@@ -2853,10 +3049,10 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2853
3049
|
} else {
|
|
2854
3050
|
console.log("===========================", "[EXULU] no redis server configured, not setting up recurring jobs.", "===========================");
|
|
2855
3051
|
}
|
|
2856
|
-
const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, threadsSchema, messagesSchema]);
|
|
3052
|
+
const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, evalResultsSchema, threadsSchema, messagesSchema]);
|
|
2857
3053
|
console.log("[EXULU] graphql server");
|
|
2858
3054
|
const server = new import_server3.ApolloServer({
|
|
2859
|
-
cache: new
|
|
3055
|
+
cache: new import_utils2.InMemoryLRUCache(),
|
|
2860
3056
|
schema,
|
|
2861
3057
|
introspection: true
|
|
2862
3058
|
});
|
|
@@ -2921,13 +3117,13 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2921
3117
|
});
|
|
2922
3118
|
});
|
|
2923
3119
|
app.get("/tools", async (req, res) => {
|
|
2924
|
-
res.status(200).json(tools.map((
|
|
2925
|
-
id:
|
|
2926
|
-
name:
|
|
2927
|
-
description:
|
|
2928
|
-
type:
|
|
2929
|
-
inputSchema:
|
|
2930
|
-
outputSchema:
|
|
3120
|
+
res.status(200).json(tools.map((tool2) => ({
|
|
3121
|
+
id: tool2.id,
|
|
3122
|
+
name: tool2.name,
|
|
3123
|
+
description: tool2.description,
|
|
3124
|
+
type: tool2.type || "tool",
|
|
3125
|
+
inputSchema: tool2.inputSchema ? (0, import_zodex.zerialize)(tool2.inputSchema) : null,
|
|
3126
|
+
outputSchema: tool2.outputSchema ? (0, import_zodex.zerialize)(tool2.outputSchema) : null
|
|
2931
3127
|
})));
|
|
2932
3128
|
});
|
|
2933
3129
|
app.get("/tools/:id", async (req, res) => {
|
|
@@ -2938,14 +3134,14 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
|
|
|
2938
3134
|
});
|
|
2939
3135
|
return;
|
|
2940
3136
|
}
|
|
2941
|
-
const
|
|
2942
|
-
if (!
|
|
3137
|
+
const tool2 = tools.find((tool3) => tool3.id === id);
|
|
3138
|
+
if (!tool2) {
|
|
2943
3139
|
res.status(400).json({
|
|
2944
3140
|
message: "Tool not found."
|
|
2945
3141
|
});
|
|
2946
3142
|
return;
|
|
2947
3143
|
}
|
|
2948
|
-
res.status(200).json(
|
|
3144
|
+
res.status(200).json(tool2);
|
|
2949
3145
|
});
|
|
2950
3146
|
const deleteItem = async ({
|
|
2951
3147
|
id,
|
|
@@ -4038,6 +4234,198 @@ var ExuluApp = class {
|
|
|
4038
4234
|
|
|
4039
4235
|
// src/index.ts
|
|
4040
4236
|
var import_chonkie = require("chonkie");
|
|
4237
|
+
|
|
4238
|
+
// src/cli/index.tsx
|
|
4239
|
+
var import_react2 = require("react");
|
|
4240
|
+
var import_ink4 = require("ink");
|
|
4241
|
+
var import_ui5 = require("@inkjs/ui");
|
|
4242
|
+
var import_patch_console = __toESM(require("patch-console"), 1);
|
|
4243
|
+
|
|
4244
|
+
// src/cli/components/nav.tsx
|
|
4245
|
+
var import_ui = require("@inkjs/ui");
|
|
4246
|
+
var import_ink = require("ink");
|
|
4247
|
+
var import_jsx_runtime = require("react/jsx-runtime");
|
|
4248
|
+
var nav = [
|
|
4249
|
+
{
|
|
4250
|
+
label: "Agents",
|
|
4251
|
+
value: "agents"
|
|
4252
|
+
},
|
|
4253
|
+
{
|
|
4254
|
+
label: "Exit",
|
|
4255
|
+
value: "exit"
|
|
4256
|
+
}
|
|
4257
|
+
];
|
|
4258
|
+
var Nav = ({ setView }) => {
|
|
4259
|
+
const { exit } = (0, import_ink.useApp)();
|
|
4260
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(import_ui.Select, { options: nav, onChange: (value) => {
|
|
4261
|
+
if (value === "exit") {
|
|
4262
|
+
exit();
|
|
4263
|
+
}
|
|
4264
|
+
setView(value);
|
|
4265
|
+
} });
|
|
4266
|
+
};
|
|
4267
|
+
var nav_default = Nav;
|
|
4268
|
+
|
|
4269
|
+
// src/cli/components/agent-selector.tsx
|
|
4270
|
+
var import_ink2 = require("ink");
|
|
4271
|
+
var import_ui2 = require("@inkjs/ui");
|
|
4272
|
+
var import_jsx_runtime2 = require("react/jsx-runtime");
|
|
4273
|
+
var AgentSelector = ({ exulu, setAgent, setEvaluations }) => {
|
|
4274
|
+
const agents = exulu.agents.map((agent) => ({
|
|
4275
|
+
label: agent.name,
|
|
4276
|
+
value: agent.id
|
|
4277
|
+
}));
|
|
4278
|
+
return /* @__PURE__ */ (0, import_jsx_runtime2.jsxs)(import_jsx_runtime2.Fragment, { children: [
|
|
4279
|
+
/* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ink2.Text, { children: "Please select an agent:" }),
|
|
4280
|
+
/* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ui2.Select, { options: agents, onChange: (value) => {
|
|
4281
|
+
console.log("selected agent", value);
|
|
4282
|
+
const agent = exulu.agent(value);
|
|
4283
|
+
if (!agent) {
|
|
4284
|
+
console.error("Agent not found", value);
|
|
4285
|
+
return;
|
|
4286
|
+
}
|
|
4287
|
+
setAgent(agent);
|
|
4288
|
+
if (agent) {
|
|
4289
|
+
setEvaluations(agent.evals || []);
|
|
4290
|
+
}
|
|
4291
|
+
} })
|
|
4292
|
+
] });
|
|
4293
|
+
};
|
|
4294
|
+
var agent_selector_default = AgentSelector;
|
|
4295
|
+
|
|
4296
|
+
// src/cli/components/eval-selector.tsx
|
|
4297
|
+
var import_ui3 = require("@inkjs/ui");
|
|
4298
|
+
var import_jsx_runtime3 = require("react/jsx-runtime");
|
|
4299
|
+
var EvalSelector = ({ evaluations, setEvaluation }) => {
|
|
4300
|
+
return /* @__PURE__ */ (0, import_jsx_runtime3.jsx)(import_ui3.Select, { options: evaluations.map((evaluation) => ({
|
|
4301
|
+
label: evaluation.runner.name,
|
|
4302
|
+
value: evaluation.runner.name
|
|
4303
|
+
})), onChange: (value) => {
|
|
4304
|
+
console.log("selected eval", value);
|
|
4305
|
+
const evaluation = evaluations?.find((evaluation2) => evaluation2.runner.name === value);
|
|
4306
|
+
if (evaluation) {
|
|
4307
|
+
setEvaluation(evaluation);
|
|
4308
|
+
}
|
|
4309
|
+
} });
|
|
4310
|
+
};
|
|
4311
|
+
var eval_selector_default = EvalSelector;
|
|
4312
|
+
|
|
4313
|
+
// src/cli/components/eval-actions.tsx
|
|
4314
|
+
var import_react = require("react");
|
|
4315
|
+
var import_ui4 = require("@inkjs/ui");
|
|
4316
|
+
var import_ink3 = require("ink");
|
|
4317
|
+
var import_jsx_runtime4 = require("react/jsx-runtime");
|
|
4318
|
+
var EvalActions = ({ agent, evaluation, setEvaluation }) => {
|
|
4319
|
+
const [progress, setProgress] = (0, import_react.useState)(0);
|
|
4320
|
+
const [results, setResults] = (0, import_react.useState)([]);
|
|
4321
|
+
const [running, setRunning] = (0, import_react.useState)();
|
|
4322
|
+
const run = async (evaluation2) => {
|
|
4323
|
+
setRunning({
|
|
4324
|
+
label: evaluation2.runner.name
|
|
4325
|
+
});
|
|
4326
|
+
const testCases = evaluation2.runner.testcases;
|
|
4327
|
+
const total = testCases.length;
|
|
4328
|
+
if (!testCases) {
|
|
4329
|
+
throw new Error("No test cases found");
|
|
4330
|
+
}
|
|
4331
|
+
let i = 0;
|
|
4332
|
+
for (const testCase of testCases) {
|
|
4333
|
+
i++;
|
|
4334
|
+
const result = await evaluation2.runner.run({
|
|
4335
|
+
data: testCase,
|
|
4336
|
+
runner: {
|
|
4337
|
+
agent
|
|
4338
|
+
}
|
|
4339
|
+
});
|
|
4340
|
+
setProgress(Math.round(i / total * 100));
|
|
4341
|
+
setResults([...results, {
|
|
4342
|
+
name: evaluation2.runner.name,
|
|
4343
|
+
prompt: testCase.prompt?.slice(0, 100) + "...",
|
|
4344
|
+
score: result.score,
|
|
4345
|
+
comment: result.comment
|
|
4346
|
+
}]);
|
|
4347
|
+
}
|
|
4348
|
+
setRunning(void 0);
|
|
4349
|
+
};
|
|
4350
|
+
if (progress === 100) {
|
|
4351
|
+
return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
|
|
4352
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ink3.Text, { children: "Evaluations completed." }),
|
|
4353
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList, { children: results.map((result) => /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
|
|
4354
|
+
result.name,
|
|
4355
|
+
": ",
|
|
4356
|
+
result.score,
|
|
4357
|
+
" - ",
|
|
4358
|
+
result.comment
|
|
4359
|
+
] }) })) })
|
|
4360
|
+
] });
|
|
4361
|
+
}
|
|
4362
|
+
if (running) {
|
|
4363
|
+
return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
|
|
4364
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
|
|
4365
|
+
"Running ",
|
|
4366
|
+
running.label,
|
|
4367
|
+
"..."
|
|
4368
|
+
] }),
|
|
4369
|
+
/* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.ProgressBar, { value: progress })
|
|
4370
|
+
] });
|
|
4371
|
+
}
|
|
4372
|
+
return /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.Select, { options: [{
|
|
4373
|
+
label: "Run evaluation",
|
|
4374
|
+
value: "run"
|
|
4375
|
+
}, {
|
|
4376
|
+
label: "Go back",
|
|
4377
|
+
value: "back"
|
|
4378
|
+
}], onChange: (value) => {
|
|
4379
|
+
if (value === "back") {
|
|
4380
|
+
setEvaluation(void 0);
|
|
4381
|
+
}
|
|
4382
|
+
if (value === "run") {
|
|
4383
|
+
run(evaluation);
|
|
4384
|
+
}
|
|
4385
|
+
} });
|
|
4386
|
+
};
|
|
4387
|
+
var eval_actions_default = EvalActions;
|
|
4388
|
+
|
|
4389
|
+
// src/cli/index.tsx
|
|
4390
|
+
var import_jsx_runtime5 = require("react/jsx-runtime");
|
|
4391
|
+
var Main = ({ exulu }) => {
|
|
4392
|
+
(0, import_patch_console.default)((stream, data) => {
|
|
4393
|
+
setLogs([...logs, data]);
|
|
4394
|
+
});
|
|
4395
|
+
const [logs, setLogs] = (0, import_react2.useState)([]);
|
|
4396
|
+
const [view, setView] = (0, import_react2.useState)();
|
|
4397
|
+
const [agent, setAgent] = (0, import_react2.useState)();
|
|
4398
|
+
const [evaluations, setEvaluations] = (0, import_react2.useState)([]);
|
|
4399
|
+
const [evaluation, setEvaluation] = (0, import_react2.useState)();
|
|
4400
|
+
return /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Box, { borderStyle: "round", borderColor: "cyan", padding: 1, flexDirection: "column", width: "70%", children: [
|
|
4401
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: "Logs:" }),
|
|
4402
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList, { children: logs.map((log, index) => /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: log }) })) }),
|
|
4403
|
+
!view && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(nav_default, { setView }),
|
|
4404
|
+
view === "agents" && !agent && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(agent_selector_default, { exulu, setAgent, setEvaluations }),
|
|
4405
|
+
view === "agents" && agent && !evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
|
|
4406
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
|
|
4407
|
+
'Selected agent "',
|
|
4408
|
+
agent.name,
|
|
4409
|
+
'". Please select an evaluation:'
|
|
4410
|
+
] }),
|
|
4411
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_selector_default, { evaluations, setEvaluation })
|
|
4412
|
+
] }),
|
|
4413
|
+
view === "agents" && agent && evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
|
|
4414
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
|
|
4415
|
+
"Selected evaluation: ",
|
|
4416
|
+
evaluation.runner.name
|
|
4417
|
+
] }),
|
|
4418
|
+
/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_actions_default, { agent, evaluation, setEvaluation })
|
|
4419
|
+
] })
|
|
4420
|
+
] });
|
|
4421
|
+
};
|
|
4422
|
+
var cli_default = {
|
|
4423
|
+
run: (exulu) => {
|
|
4424
|
+
(0, import_ink4.render)(/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(Main, { exulu }));
|
|
4425
|
+
}
|
|
4426
|
+
};
|
|
4427
|
+
|
|
4428
|
+
// src/index.ts
|
|
4041
4429
|
var ExuluJobs = {
|
|
4042
4430
|
redis: redisClient,
|
|
4043
4431
|
jobs: {
|
|
@@ -4069,9 +4457,11 @@ var ExuluDatabase = {
|
|
|
4069
4457
|
ExuluApp,
|
|
4070
4458
|
ExuluAuthentication,
|
|
4071
4459
|
ExuluChunkers,
|
|
4460
|
+
ExuluCli,
|
|
4072
4461
|
ExuluContext,
|
|
4073
4462
|
ExuluDatabase,
|
|
4074
4463
|
ExuluEmbedder,
|
|
4464
|
+
ExuluEval,
|
|
4075
4465
|
ExuluJobs,
|
|
4076
4466
|
ExuluLogger,
|
|
4077
4467
|
ExuluQueues,
|