@exulu/backend 0.3.0 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -36,9 +36,11 @@ __export(index_exports, {
36
36
  ExuluApp: () => ExuluApp,
37
37
  ExuluAuthentication: () => authentication,
38
38
  ExuluChunkers: () => ExuluChunkers,
39
+ ExuluCli: () => cli_default,
39
40
  ExuluContext: () => ExuluContext,
40
41
  ExuluDatabase: () => ExuluDatabase,
41
42
  ExuluEmbedder: () => ExuluEmbedder,
43
+ ExuluEval: () => ExuluEval,
42
44
  ExuluJobs: () => ExuluJobs,
43
45
  ExuluLogger: () => ExuluLogger,
44
46
  ExuluQueues: () => queues,
@@ -286,6 +288,58 @@ var workflowSchema = {
286
288
  }
287
289
  ]
288
290
  };
291
+ var evalResultsSchema = {
292
+ name: {
293
+ plural: "eval_results",
294
+ singular: "eval_result"
295
+ },
296
+ fields: [
297
+ {
298
+ name: "input",
299
+ type: "longText"
300
+ },
301
+ {
302
+ name: "output",
303
+ type: "longText"
304
+ },
305
+ {
306
+ name: "duration",
307
+ type: "number"
308
+ },
309
+ {
310
+ name: "category",
311
+ type: "text"
312
+ },
313
+ {
314
+ name: "metadata",
315
+ type: "json"
316
+ },
317
+ {
318
+ name: "result",
319
+ type: "number"
320
+ },
321
+ {
322
+ name: "agent_id",
323
+ type: "text"
324
+ },
325
+ {
326
+ name: "workflow_id",
327
+ type: "text"
328
+ },
329
+ {
330
+ name: "eval_type",
331
+ type: "text"
332
+ },
333
+ {
334
+ name: "eval_name",
335
+ type: "text"
336
+ },
337
+ {
338
+ name: "comment",
339
+ type: "longText"
340
+ }
341
+ ]
342
+ };
289
343
  var threadsSchema = {
290
344
  name: {
291
345
  plural: "threads",
@@ -559,6 +613,27 @@ var up = async function(knex) {
559
613
  }
560
614
  });
561
615
  }
616
+ if (!await knex.schema.hasTable("eval_results")) {
617
+ await knex.schema.createTable("eval_results", (table) => {
618
+ table.uuid("id").primary().defaultTo(knex.fn.uuid());
619
+ table.date("createdAt").defaultTo(knex.fn.now());
620
+ table.date("updatedAt").defaultTo(knex.fn.now());
621
+ for (const field of evalResultsSchema.fields) {
622
+ const { type, name, references, default: defaultValue } = field;
623
+ if (!type || !name) {
624
+ continue;
625
+ }
626
+ if (type === "reference") {
627
+ if (!references) {
628
+ throw new Error("Field with type reference must have a reference definition.");
629
+ }
630
+ table.uuid(name).references(references.field).inTable(references.table);
631
+ return;
632
+ }
633
+ mapType(table, type, sanitizeName(name), defaultValue);
634
+ }
635
+ });
636
+ }
562
637
  if (!await knex.schema.hasTable("statistics")) {
563
638
  await knex.schema.createTable("statistics", (table) => {
564
639
  table.uuid("id").primary().defaultTo(knex.fn.uuid());
@@ -730,12 +805,10 @@ var execute = async () => {
730
805
  // src/registry/classes.ts
731
806
  var import_zod = require("zod");
732
807
  var import_bullmq2 = require("bullmq");
733
- var import_core = require("@mastra/core");
734
808
  var import_zod2 = require("zod");
735
809
  var fs = __toESM(require("fs"), 1);
736
810
  var path = __toESM(require("path"), 1);
737
- var import_memory = require("@mastra/memory");
738
- var import_pg = require("@mastra/pg");
811
+ var import_ai = require("ai");
739
812
 
740
813
  // types/enums/statistics.ts
741
814
  var STATISTICS_TYPE_ENUM = {
@@ -750,6 +823,11 @@ var STATISTICS_TYPE_ENUM = {
750
823
  AGENT_RUN: "agent.run"
751
824
  };
752
825
 
826
+ // types/enums/eval-types.ts
827
+ var EVAL_TYPES_ENUM = {
828
+ llm_as_judge: "llm_as_judge"
829
+ };
830
+
753
831
  // src/registry/classes.ts
754
832
  var import_knex4 = __toESM(require("pgvector/knex"), 1);
755
833
 
@@ -853,6 +931,83 @@ var JOB_STATUS_ENUM = {
853
931
  stuck: "stuck"
854
932
  };
855
933
 
934
+ // src/evals/utils/index.tsx
935
+ var ExuluEvalUtils = {
936
+ niahTestSet: ({
937
+ label,
938
+ contextlengths,
939
+ needles,
940
+ testDocument
941
+ }) => {
942
+ const testCases = contextlengths.map((contextlength) => {
943
+ let testText = testDocument.slice(0, contextlength * 4 - needles.length * 200);
944
+ const depthInterval = 5e3 * 4;
945
+ const depths = Array.from({ length: contextlength * 4 / depthInterval }, (_, i) => (i + 1) * depthInterval);
946
+ console.log("[EXULU] contextlength: ", {
947
+ tokens: contextlength,
948
+ chars: contextlength * 4,
949
+ depths
950
+ });
951
+ return depths.map((depth, index) => {
952
+ const first = index === 0;
953
+ const last = index === depths.length - 1;
954
+ const start = first ? 0 : depths[index - 1];
955
+ const end = last ? contextlength * 4 : depths[index];
956
+ console.log("[EXULU] Niah positions: ", {
957
+ start,
958
+ end,
959
+ depth,
960
+ index
961
+ });
962
+ let modifiedTestText = testText;
963
+ const insertions = [];
964
+ needles.forEach((needle, index2) => {
965
+ const basePosition = start + Math.floor(Math.random() * (end - start));
966
+ insertions.push({ position: basePosition, needle: needle.answer });
967
+ });
968
+ insertions.sort((a, b) => b.position - a.position);
969
+ console.log("[EXULU] Niah insertions: ", insertions);
970
+ insertions.forEach(({ position, needle }) => {
971
+ const insertionPosition = Math.min(position, modifiedTestText.length);
972
+ const beforeNeedle = modifiedTestText.slice(0, insertionPosition);
973
+ const afterNeedle = modifiedTestText.slice(insertionPosition);
974
+ modifiedTestText = beforeNeedle + needle + afterNeedle;
975
+ });
976
+ return {
977
+ prompt: `You are a helpful assistant.
978
+
979
+ You are given a text.
980
+
981
+ You need to answer the following question, using only the information from the text provided below. Do not hallucinate
982
+ or come up with an answer that is not in the text. If the text does not contain the answer, you should say "I don't know".
983
+
984
+ ${needles.map((needle, index2) => `- ${index2 + 1}: ${needle.question}`).join("\n")}
985
+
986
+ The text is:
987
+
988
+ ${modifiedTestText}
989
+ `,
990
+ category: `${label}-context-length-[${contextlength}]-depth-[from-${start ? start / 4 : 0}-to-${end ? end / 4 : 0}]-niah-test`,
991
+ metadata: {
992
+ contextLength: contextlength,
993
+ depth,
994
+ needles
995
+ }
996
+ };
997
+ });
998
+ });
999
+ const flattenedTestCases = testCases.flat();
1000
+ console.log("[EXULU] Niah test cases: ", flattenedTestCases.length);
1001
+ console.table(flattenedTestCases.map((data) => ({
1002
+ chars: data.prompt?.length || 0,
1003
+ tokens: data.prompt?.length / 4 || 0,
1004
+ category: data.category,
1005
+ metadata: data.metadata
1006
+ })));
1007
+ return flattenedTestCases;
1008
+ }
1009
+ };
1010
+
856
1011
  // src/registry/classes.ts
857
1012
  function generateSlug(name) {
858
1013
  const normalized = name.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
@@ -881,72 +1036,52 @@ var ExuluAgent = class {
881
1036
  description = "";
882
1037
  slug = "";
883
1038
  streaming = false;
884
- type;
885
- outputSchema;
886
1039
  rateLimit;
887
1040
  config;
888
- memory;
1041
+ // private memory: Memory | undefined; // TODO remove mastra and do own implementation
889
1042
  tools;
890
- agent;
1043
+ evals;
1044
+ model;
891
1045
  capabilities;
892
- constructor({ id, name, description, outputSchema, config, rateLimit, type, capabilities, tools }) {
1046
+ constructor({ id, name, description, config, rateLimit, capabilities, tools, evals }) {
893
1047
  this.id = id;
894
1048
  this.name = name;
895
- this.type = type;
1049
+ this.evals = evals;
896
1050
  this.description = description;
897
- this.outputSchema = outputSchema;
898
1051
  this.rateLimit = rateLimit;
899
1052
  this.tools = tools;
900
1053
  this.config = config;
901
1054
  this.capabilities = capabilities;
902
1055
  this.slug = `/agents/${generateSlug(this.name)}/run`;
903
- if (this.type === "agent") {
904
- this.agent = new import_core.Agent({
905
- name: this.config.name,
906
- instructions: this.config.instructions,
907
- model: this.config.model,
908
- memory: this.memory ? this.memory : void 0
909
- });
1056
+ this.model = this.config.model;
1057
+ }
1058
+ generate = async ({ prompt, stream }) => {
1059
+ if (!this.model) {
1060
+ throw new Error("Model is required for streaming.");
910
1061
  }
911
- if (config?.memory) {
912
- console.log("[EXULU] Initializing memory for agent " + this.name);
913
- const connectionString = `postgresql://${process.env.POSTGRES_DB_USER}:${process.env.POSTGRES_DB_PASSWORD}@${process.env.POSTGRES_DB_HOST}:${process.env.POSTGRES_DB_PORT}/exulu`;
914
- this.memory = new import_memory.Memory({
915
- storage: new import_pg.PostgresStore({
916
- host: process.env.POSTGRES_DB_HOST || "",
917
- port: parseInt(process.env.POSTGRES_DB_PORT || "5432"),
918
- user: process.env.POSTGRES_DB_USER || "",
919
- database: "exulu",
920
- // putting it into an own database that is not managed by exulu
921
- password: process.env.POSTGRES_DB_PASSWORD || "",
922
- ssl: process.env.POSTGRES_DB_SSL === "true" ? { rejectUnauthorized: false } : false
923
- }),
924
- ...config?.memory.vector ? {
925
- vector: new import_pg.PgVector({
926
- connectionString
927
- })
928
- } : {},
929
- options: {
930
- lastMessages: config?.memory.lastMessages || 10,
931
- semanticRecall: {
932
- topK: config?.memory.semanticRecall.topK || 3,
933
- messageRange: config?.memory.semanticRecall.messageRange || 2
934
- }
935
- }
1062
+ if (this.config.outputSchema) {
1063
+ if (stream) {
1064
+ }
1065
+ const { object } = await (0, import_ai.generateObject)({
1066
+ model: this.model,
1067
+ schema: this.config.outputSchema,
1068
+ prompt
936
1069
  });
1070
+ return object;
937
1071
  }
938
- }
939
- chat = () => {
940
- if (!this.agent) {
941
- throw new Error("Agent not found");
1072
+ if (stream) {
1073
+ const result = (0, import_ai.streamText)({
1074
+ model: this.model,
1075
+ prompt
1076
+ });
1077
+ const text2 = await result.text;
1078
+ return text2;
942
1079
  }
943
- updateStatistic({
944
- name: "count",
945
- label: this.name,
946
- type: STATISTICS_TYPE_ENUM.AGENT_RUN,
947
- trigger: "agent"
1080
+ const { text } = await (0, import_ai.generateText)({
1081
+ model: this.model,
1082
+ prompt
948
1083
  });
949
- return this.agent;
1084
+ return text;
950
1085
  };
951
1086
  };
952
1087
  var ExuluEmbedder = class {
@@ -1142,35 +1277,117 @@ var ExuluLogger = class {
1142
1277
  }
1143
1278
  }
1144
1279
  };
1280
+ var ExuluEval = class {
1281
+ name;
1282
+ description;
1283
+ constructor({ name, description }) {
1284
+ this.name = name;
1285
+ this.description = description;
1286
+ }
1287
+ create = {
1288
+ LlmAsAJudge: {
1289
+ niah: ({ label, model, needles, testDocument, contextlengths }) => {
1290
+ return {
1291
+ name: this.name,
1292
+ description: this.description,
1293
+ testcases: ExuluEvalUtils.niahTestSet({
1294
+ label,
1295
+ contextlengths: contextlengths || [5e3, 3e4, 5e4, 128e3],
1296
+ needles,
1297
+ testDocument
1298
+ }),
1299
+ run: async ({ data, runner }) => {
1300
+ if (runner.workflow) {
1301
+ throw new Error("Workflows are not supported for the needle in a haystack eval.");
1302
+ }
1303
+ if (!runner.agent) {
1304
+ throw new Error("Agent is required for the needle in a haystack eval.");
1305
+ }
1306
+ if (!data.result) {
1307
+ if (!data.prompt) {
1308
+ throw new Error("Prompt is required for running an agent.");
1309
+ }
1310
+ const result = await runner.agent.generate({
1311
+ prompt: data.prompt,
1312
+ stream: false
1313
+ });
1314
+ data.result = result;
1315
+ }
1316
+ const { object } = await (0, import_ai.generateObject)({
1317
+ model,
1318
+ maxRetries: 3,
1319
+ schema: import_zod2.z.object({
1320
+ correctnessScore: import_zod2.z.number(),
1321
+ comment: import_zod2.z.string()
1322
+ }),
1323
+ prompt: `You are checking if the below "actual_answers" contain the correct information as
1324
+ presented in the "correct_answers" section to calculate the correctness score.
1325
+
1326
+ The correctness score should be a number between 0 and 1. 1 is the highest score.
1327
+
1328
+ For example if the actual_answers contains 1 answer of the ${needles.length} correct_answers, the
1329
+ score should be ${1 / needles.length}. If the actual_answers contain 2 correct answers, the
1330
+ score should be ${2 / needles.length} etc.. if the actual_answers contains all the correct answers, the
1331
+ score should be 1 and if the actual_answers contains none of the correct answers, the score should be 0.
1332
+
1333
+ You can ignore small differences in the actual_answers and the correct_answers such as spelling mistakes,
1334
+ punctuation, etc., if the content of the actual answer is still correct.
1335
+
1336
+ Also provide a comment on how you came to your conclusion.
1337
+
1338
+ <actual_answers>
1339
+ ${data.result}
1340
+ </actual_answers>
1341
+
1342
+ <correct_answers>
1343
+ ${needles.map((needle, index) => `- ${index + 1}: ${needle.answer}`).join("\n")}
1344
+ </correct_answers>`
1345
+ });
1346
+ console.log("[EXULU] eval result", object);
1347
+ const { db: db2 } = await postgresClient();
1348
+ await db2("eval_results").insert({
1349
+ input: data.prompt,
1350
+ output: data.result,
1351
+ duration: data.duration,
1352
+ result: object.correctnessScore,
1353
+ agent_id: runner.agent.id || void 0,
1354
+ eval_type: EVAL_TYPES_ENUM.llm_as_judge,
1355
+ eval_name: this.name,
1356
+ comment: object.comment,
1357
+ category: data.category,
1358
+ metadata: data.metadata,
1359
+ createdAt: db2.fn.now(),
1360
+ updatedAt: db2.fn.now()
1361
+ });
1362
+ return {
1363
+ score: object.correctnessScore,
1364
+ comment: object.comment
1365
+ };
1366
+ }
1367
+ };
1368
+ }
1369
+ }
1370
+ };
1371
+ };
1145
1372
  var ExuluTool = class {
1146
1373
  id;
1147
1374
  name;
1148
1375
  description;
1149
- inputSchema;
1150
- outputSchema;
1376
+ parameters;
1151
1377
  type;
1152
- _execute;
1153
- constructor({ id, name, description, inputSchema, outputSchema, type, execute: execute2 }) {
1378
+ tool;
1379
+ constructor({ id, name, description, parameters, type, execute: execute2 }) {
1154
1380
  this.id = id;
1155
1381
  this.name = name;
1156
1382
  this.description = description;
1157
- this.inputSchema = inputSchema;
1158
- this.outputSchema = outputSchema;
1383
+ this.parameters = parameters;
1159
1384
  this.type = type;
1160
- this._execute = execute2;
1161
- }
1162
- execute = async (inputs) => {
1163
- if (!this._execute) {
1164
- throw new Error("Tool has no execute function.");
1165
- }
1166
- updateStatistic({
1167
- name: "count",
1168
- label: this.name,
1169
- type: STATISTICS_TYPE_ENUM.TOOL_CALL,
1170
- trigger: "agent"
1385
+ this.tool = (0, import_ai.tool)({
1386
+ description,
1387
+ parameters,
1388
+ execute: execute2
1171
1389
  });
1172
- return await this._execute(inputs);
1173
- };
1390
+ }
1174
1391
  };
1175
1392
  var ExuluContext = class {
1176
1393
  id;
@@ -1572,21 +1789,9 @@ var ExuluContext = class {
1572
1789
  id: this.id,
1573
1790
  name: `${this.name} context`,
1574
1791
  type: "context",
1575
- inputSchema: import_zod2.z.object({
1792
+ parameters: import_zod2.z.object({
1576
1793
  query: import_zod2.z.string()
1577
1794
  }),
1578
- outputSchema: import_zod2.z.object({
1579
- // todo check if result format is still correct based on above getItems function
1580
- results: import_zod2.z.array(import_zod2.z.object({
1581
- count: import_zod2.z.number(),
1582
- results: import_zod2.z.array(import_zod2.z.object({
1583
- id: import_zod2.z.string(),
1584
- content: import_zod2.z.string(),
1585
- metadata: import_zod2.z.record(import_zod2.z.any())
1586
- })),
1587
- errors: import_zod2.z.array(import_zod2.z.string()).optional()
1588
- }))
1589
- }),
1590
1795
  description: `Gets information from the context called: ${this.name}. The context description is: ${this.description}.`,
1591
1796
  execute: async ({ context }) => {
1592
1797
  return await this.getItems({
@@ -2746,7 +2951,7 @@ var createUppyRoutes = async (app) => {
2746
2951
  };
2747
2952
 
2748
2953
  // src/registry/routes.ts
2749
- var import_utils = require("@apollo/utils.keyvaluecache");
2954
+ var import_utils2 = require("@apollo/utils.keyvaluecache");
2750
2955
  var global_queues = {
2751
2956
  logs_cleaner: "logs-cleaner"
2752
2957
  };
@@ -2844,10 +3049,10 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
2844
3049
  } else {
2845
3050
  console.log("===========================", "[EXULU] no redis server configured, not setting up recurring jobs.", "===========================");
2846
3051
  }
2847
- const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, threadsSchema, messagesSchema]);
3052
+ const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, evalResultsSchema, threadsSchema, messagesSchema]);
2848
3053
  console.log("[EXULU] graphql server");
2849
3054
  const server = new import_server3.ApolloServer({
2850
- cache: new import_utils.InMemoryLRUCache(),
3055
+ cache: new import_utils2.InMemoryLRUCache(),
2851
3056
  schema,
2852
3057
  introspection: true
2853
3058
  });
@@ -2912,13 +3117,13 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
2912
3117
  });
2913
3118
  });
2914
3119
  app.get("/tools", async (req, res) => {
2915
- res.status(200).json(tools.map((tool) => ({
2916
- id: tool.id,
2917
- name: tool.name,
2918
- description: tool.description,
2919
- type: tool.type || "tool",
2920
- inputSchema: tool.inputSchema ? (0, import_zodex.zerialize)(tool.inputSchema) : null,
2921
- outputSchema: tool.outputSchema ? (0, import_zodex.zerialize)(tool.outputSchema) : null
3120
+ res.status(200).json(tools.map((tool2) => ({
3121
+ id: tool2.id,
3122
+ name: tool2.name,
3123
+ description: tool2.description,
3124
+ type: tool2.type || "tool",
3125
+ inputSchema: tool2.inputSchema ? (0, import_zodex.zerialize)(tool2.inputSchema) : null,
3126
+ outputSchema: tool2.outputSchema ? (0, import_zodex.zerialize)(tool2.outputSchema) : null
2922
3127
  })));
2923
3128
  });
2924
3129
  app.get("/tools/:id", async (req, res) => {
@@ -2929,14 +3134,14 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
2929
3134
  });
2930
3135
  return;
2931
3136
  }
2932
- const tool = tools.find((tool2) => tool2.id === id);
2933
- if (!tool) {
3137
+ const tool2 = tools.find((tool3) => tool3.id === id);
3138
+ if (!tool2) {
2934
3139
  res.status(400).json({
2935
3140
  message: "Tool not found."
2936
3141
  });
2937
3142
  return;
2938
3143
  }
2939
- res.status(200).json(tool);
3144
+ res.status(200).json(tool2);
2940
3145
  });
2941
3146
  const deleteItem = async ({
2942
3147
  id,
@@ -4029,6 +4234,198 @@ var ExuluApp = class {
4029
4234
 
4030
4235
  // src/index.ts
4031
4236
  var import_chonkie = require("chonkie");
4237
+
4238
+ // src/cli/index.tsx
4239
+ var import_react2 = require("react");
4240
+ var import_ink4 = require("ink");
4241
+ var import_ui5 = require("@inkjs/ui");
4242
+ var import_patch_console = __toESM(require("patch-console"), 1);
4243
+
4244
+ // src/cli/components/nav.tsx
4245
+ var import_ui = require("@inkjs/ui");
4246
+ var import_ink = require("ink");
4247
+ var import_jsx_runtime = require("react/jsx-runtime");
4248
+ var nav = [
4249
+ {
4250
+ label: "Agents",
4251
+ value: "agents"
4252
+ },
4253
+ {
4254
+ label: "Exit",
4255
+ value: "exit"
4256
+ }
4257
+ ];
4258
+ var Nav = ({ setView }) => {
4259
+ const { exit } = (0, import_ink.useApp)();
4260
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(import_ui.Select, { options: nav, onChange: (value) => {
4261
+ if (value === "exit") {
4262
+ exit();
4263
+ }
4264
+ setView(value);
4265
+ } });
4266
+ };
4267
+ var nav_default = Nav;
4268
+
4269
+ // src/cli/components/agent-selector.tsx
4270
+ var import_ink2 = require("ink");
4271
+ var import_ui2 = require("@inkjs/ui");
4272
+ var import_jsx_runtime2 = require("react/jsx-runtime");
4273
+ var AgentSelector = ({ exulu, setAgent, setEvaluations }) => {
4274
+ const agents = exulu.agents.map((agent) => ({
4275
+ label: agent.name,
4276
+ value: agent.id
4277
+ }));
4278
+ return /* @__PURE__ */ (0, import_jsx_runtime2.jsxs)(import_jsx_runtime2.Fragment, { children: [
4279
+ /* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ink2.Text, { children: "Please select an agent:" }),
4280
+ /* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ui2.Select, { options: agents, onChange: (value) => {
4281
+ console.log("selected agent", value);
4282
+ const agent = exulu.agent(value);
4283
+ if (!agent) {
4284
+ console.error("Agent not found", value);
4285
+ return;
4286
+ }
4287
+ setAgent(agent);
4288
+ if (agent) {
4289
+ setEvaluations(agent.evals || []);
4290
+ }
4291
+ } })
4292
+ ] });
4293
+ };
4294
+ var agent_selector_default = AgentSelector;
4295
+
4296
+ // src/cli/components/eval-selector.tsx
4297
+ var import_ui3 = require("@inkjs/ui");
4298
+ var import_jsx_runtime3 = require("react/jsx-runtime");
4299
+ var EvalSelector = ({ evaluations, setEvaluation }) => {
4300
+ return /* @__PURE__ */ (0, import_jsx_runtime3.jsx)(import_ui3.Select, { options: evaluations.map((evaluation) => ({
4301
+ label: evaluation.runner.name,
4302
+ value: evaluation.runner.name
4303
+ })), onChange: (value) => {
4304
+ console.log("selected eval", value);
4305
+ const evaluation = evaluations?.find((evaluation2) => evaluation2.runner.name === value);
4306
+ if (evaluation) {
4307
+ setEvaluation(evaluation);
4308
+ }
4309
+ } });
4310
+ };
4311
+ var eval_selector_default = EvalSelector;
4312
+
4313
+ // src/cli/components/eval-actions.tsx
4314
+ var import_react = require("react");
4315
+ var import_ui4 = require("@inkjs/ui");
4316
+ var import_ink3 = require("ink");
4317
+ var import_jsx_runtime4 = require("react/jsx-runtime");
4318
+ var EvalActions = ({ agent, evaluation, setEvaluation }) => {
4319
+ const [progress, setProgress] = (0, import_react.useState)(0);
4320
+ const [results, setResults] = (0, import_react.useState)([]);
4321
+ const [running, setRunning] = (0, import_react.useState)();
4322
+ const run = async (evaluation2) => {
4323
+ setRunning({
4324
+ label: evaluation2.runner.name
4325
+ });
4326
+ const testCases = evaluation2.runner.testcases;
4327
+ const total = testCases.length;
4328
+ if (!testCases) {
4329
+ throw new Error("No test cases found");
4330
+ }
4331
+ let i = 0;
4332
+ for (const testCase of testCases) {
4333
+ i++;
4334
+ const result = await evaluation2.runner.run({
4335
+ data: testCase,
4336
+ runner: {
4337
+ agent
4338
+ }
4339
+ });
4340
+ setProgress(Math.round(i / total * 100));
4341
+ setResults([...results, {
4342
+ name: evaluation2.runner.name,
4343
+ prompt: testCase.prompt?.slice(0, 100) + "...",
4344
+ score: result.score,
4345
+ comment: result.comment
4346
+ }]);
4347
+ }
4348
+ setRunning(void 0);
4349
+ };
4350
+ if (progress === 100) {
4351
+ return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
4352
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ink3.Text, { children: "Evaluations completed." }),
4353
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList, { children: results.map((result) => /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
4354
+ result.name,
4355
+ ": ",
4356
+ result.score,
4357
+ " - ",
4358
+ result.comment
4359
+ ] }) })) })
4360
+ ] });
4361
+ }
4362
+ if (running) {
4363
+ return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
4364
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
4365
+ "Running ",
4366
+ running.label,
4367
+ "..."
4368
+ ] }),
4369
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.ProgressBar, { value: progress })
4370
+ ] });
4371
+ }
4372
+ return /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.Select, { options: [{
4373
+ label: "Run evaluation",
4374
+ value: "run"
4375
+ }, {
4376
+ label: "Go back",
4377
+ value: "back"
4378
+ }], onChange: (value) => {
4379
+ if (value === "back") {
4380
+ setEvaluation(void 0);
4381
+ }
4382
+ if (value === "run") {
4383
+ run(evaluation);
4384
+ }
4385
+ } });
4386
+ };
4387
+ var eval_actions_default = EvalActions;
4388
+
4389
+ // src/cli/index.tsx
4390
+ var import_jsx_runtime5 = require("react/jsx-runtime");
4391
+ var Main = ({ exulu }) => {
4392
+ (0, import_patch_console.default)((stream, data) => {
4393
+ setLogs([...logs, data]);
4394
+ });
4395
+ const [logs, setLogs] = (0, import_react2.useState)([]);
4396
+ const [view, setView] = (0, import_react2.useState)();
4397
+ const [agent, setAgent] = (0, import_react2.useState)();
4398
+ const [evaluations, setEvaluations] = (0, import_react2.useState)([]);
4399
+ const [evaluation, setEvaluation] = (0, import_react2.useState)();
4400
+ return /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Box, { borderStyle: "round", borderColor: "cyan", padding: 1, flexDirection: "column", width: "70%", children: [
4401
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: "Logs:" }),
4402
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList, { children: logs.map((log, index) => /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: log }) })) }),
4403
+ !view && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(nav_default, { setView }),
4404
+ view === "agents" && !agent && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(agent_selector_default, { exulu, setAgent, setEvaluations }),
4405
+ view === "agents" && agent && !evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
4406
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
4407
+ 'Selected agent "',
4408
+ agent.name,
4409
+ '". Please select an evaluation:'
4410
+ ] }),
4411
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_selector_default, { evaluations, setEvaluation })
4412
+ ] }),
4413
+ view === "agents" && agent && evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
4414
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
4415
+ "Selected evaluation: ",
4416
+ evaluation.runner.name
4417
+ ] }),
4418
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_actions_default, { agent, evaluation, setEvaluation })
4419
+ ] })
4420
+ ] });
4421
+ };
4422
+ var cli_default = {
4423
+ run: (exulu) => {
4424
+ (0, import_ink4.render)(/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(Main, { exulu }));
4425
+ }
4426
+ };
4427
+
4428
+ // src/index.ts
4032
4429
  var ExuluJobs = {
4033
4430
  redis: redisClient,
4034
4431
  jobs: {
@@ -4060,9 +4457,11 @@ var ExuluDatabase = {
4060
4457
  ExuluApp,
4061
4458
  ExuluAuthentication,
4062
4459
  ExuluChunkers,
4460
+ ExuluCli,
4063
4461
  ExuluContext,
4064
4462
  ExuluDatabase,
4065
4463
  ExuluEmbedder,
4464
+ ExuluEval,
4066
4465
  ExuluJobs,
4067
4466
  ExuluLogger,
4068
4467
  ExuluQueues,