@exulu/backend 0.2.9 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -36,9 +36,11 @@ __export(index_exports, {
36
36
  ExuluApp: () => ExuluApp,
37
37
  ExuluAuthentication: () => authentication,
38
38
  ExuluChunkers: () => ExuluChunkers,
39
+ ExuluCli: () => cli_default,
39
40
  ExuluContext: () => ExuluContext,
40
41
  ExuluDatabase: () => ExuluDatabase,
41
42
  ExuluEmbedder: () => ExuluEmbedder,
43
+ ExuluEval: () => ExuluEval,
42
44
  ExuluJobs: () => ExuluJobs,
43
45
  ExuluLogger: () => ExuluLogger,
44
46
  ExuluQueues: () => queues,
@@ -286,6 +288,58 @@ var workflowSchema = {
286
288
  }
287
289
  ]
288
290
  };
291
+ var evalResultsSchema = {
292
+ name: {
293
+ plural: "eval_results",
294
+ singular: "eval_result"
295
+ },
296
+ fields: [
297
+ {
298
+ name: "input",
299
+ type: "longText"
300
+ },
301
+ {
302
+ name: "output",
303
+ type: "longText"
304
+ },
305
+ {
306
+ name: "duration",
307
+ type: "number"
308
+ },
309
+ {
310
+ name: "category",
311
+ type: "text"
312
+ },
313
+ {
314
+ name: "metadata",
315
+ type: "json"
316
+ },
317
+ {
318
+ name: "result",
319
+ type: "number"
320
+ },
321
+ {
322
+ name: "agent_id",
323
+ type: "text"
324
+ },
325
+ {
326
+ name: "workflow_id",
327
+ type: "text"
328
+ },
329
+ {
330
+ name: "eval_type",
331
+ type: "text"
332
+ },
333
+ {
334
+ name: "eval_name",
335
+ type: "text"
336
+ },
337
+ {
338
+ name: "comment",
339
+ type: "longText"
340
+ }
341
+ ]
342
+ };
289
343
  var threadsSchema = {
290
344
  name: {
291
345
  plural: "threads",
@@ -559,6 +613,27 @@ var up = async function(knex) {
559
613
  }
560
614
  });
561
615
  }
616
+ if (!await knex.schema.hasTable("eval_results")) {
617
+ await knex.schema.createTable("eval_results", (table) => {
618
+ table.uuid("id").primary().defaultTo(knex.fn.uuid());
619
+ table.date("createdAt").defaultTo(knex.fn.now());
620
+ table.date("updatedAt").defaultTo(knex.fn.now());
621
+ for (const field of evalResultsSchema.fields) {
622
+ const { type, name, references, default: defaultValue } = field;
623
+ if (!type || !name) {
624
+ continue;
625
+ }
626
+ if (type === "reference") {
627
+ if (!references) {
628
+ throw new Error("Field with type reference must have a reference definition.");
629
+ }
630
+ table.uuid(name).references(references.field).inTable(references.table);
631
+ return;
632
+ }
633
+ mapType(table, type, sanitizeName(name), defaultValue);
634
+ }
635
+ });
636
+ }
562
637
  if (!await knex.schema.hasTable("statistics")) {
563
638
  await knex.schema.createTable("statistics", (table) => {
564
639
  table.uuid("id").primary().defaultTo(knex.fn.uuid());
@@ -730,12 +805,10 @@ var execute = async () => {
730
805
  // src/registry/classes.ts
731
806
  var import_zod = require("zod");
732
807
  var import_bullmq2 = require("bullmq");
733
- var import_core = require("@mastra/core");
734
808
  var import_zod2 = require("zod");
735
809
  var fs = __toESM(require("fs"), 1);
736
810
  var path = __toESM(require("path"), 1);
737
- var import_memory = require("@mastra/memory");
738
- var import_pg = require("@mastra/pg");
811
+ var import_ai = require("ai");
739
812
 
740
813
  // types/enums/statistics.ts
741
814
  var STATISTICS_TYPE_ENUM = {
@@ -750,6 +823,11 @@ var STATISTICS_TYPE_ENUM = {
750
823
  AGENT_RUN: "agent.run"
751
824
  };
752
825
 
826
+ // types/enums/eval-types.ts
827
+ var EVAL_TYPES_ENUM = {
828
+ llm_as_judge: "llm_as_judge"
829
+ };
830
+
753
831
  // src/registry/classes.ts
754
832
  var import_knex4 = __toESM(require("pgvector/knex"), 1);
755
833
 
@@ -853,6 +931,83 @@ var JOB_STATUS_ENUM = {
853
931
  stuck: "stuck"
854
932
  };
855
933
 
934
+ // src/evals/utils/index.tsx
935
+ var ExuluEvalUtils = {
936
+ niahTestSet: ({
937
+ label,
938
+ contextlengths,
939
+ needles,
940
+ testDocument
941
+ }) => {
942
+ const testCases = contextlengths.map((contextlength) => {
943
+ let testText = testDocument.slice(0, contextlength * 4 - needles.length * 200);
944
+ const depthInterval = 5e3 * 4;
945
+ const depths = Array.from({ length: contextlength * 4 / depthInterval }, (_, i) => (i + 1) * depthInterval);
946
+ console.log("[EXULU] contextlength: ", {
947
+ tokens: contextlength,
948
+ chars: contextlength * 4,
949
+ depths
950
+ });
951
+ return depths.map((depth, index) => {
952
+ const first = index === 0;
953
+ const last = index === depths.length - 1;
954
+ const start = first ? 0 : depths[index - 1];
955
+ const end = last ? contextlength * 4 : depths[index];
956
+ console.log("[EXULU] Niah positions: ", {
957
+ start,
958
+ end,
959
+ depth,
960
+ index
961
+ });
962
+ let modifiedTestText = testText;
963
+ const insertions = [];
964
+ needles.forEach((needle, index2) => {
965
+ const basePosition = start + Math.floor(Math.random() * (end - start));
966
+ insertions.push({ position: basePosition, needle: needle.answer });
967
+ });
968
+ insertions.sort((a, b) => b.position - a.position);
969
+ console.log("[EXULU] Niah insertions: ", insertions);
970
+ insertions.forEach(({ position, needle }) => {
971
+ const insertionPosition = Math.min(position, modifiedTestText.length);
972
+ const beforeNeedle = modifiedTestText.slice(0, insertionPosition);
973
+ const afterNeedle = modifiedTestText.slice(insertionPosition);
974
+ modifiedTestText = beforeNeedle + needle + afterNeedle;
975
+ });
976
+ return {
977
+ prompt: `You are a helpful assistant.
978
+
979
+ You are given a text.
980
+
981
+ You need to answer the following question, using only the information from the text provided below. Do not hallucinate
982
+ or come up with an answer that is not in the text. If the text does not contain the answer, you should say "I don't know".
983
+
984
+ ${needles.map((needle, index2) => `- ${index2 + 1}: ${needle.question}`).join("\n")}
985
+
986
+ The text is:
987
+
988
+ ${modifiedTestText}
989
+ `,
990
+ category: `${label}-context-length-[${contextlength}]-depth-[from-${start ? start / 4 : 0}-to-${end ? end / 4 : 0}]-niah-test`,
991
+ metadata: {
992
+ contextLength: contextlength,
993
+ depth,
994
+ needles
995
+ }
996
+ };
997
+ });
998
+ });
999
+ const flattenedTestCases = testCases.flat();
1000
+ console.log("[EXULU] Niah test cases: ", flattenedTestCases.length);
1001
+ console.table(flattenedTestCases.map((data) => ({
1002
+ chars: data.prompt?.length || 0,
1003
+ tokens: data.prompt?.length / 4 || 0,
1004
+ category: data.category,
1005
+ metadata: data.metadata
1006
+ })));
1007
+ return flattenedTestCases;
1008
+ }
1009
+ };
1010
+
856
1011
  // src/registry/classes.ts
857
1012
  function generateSlug(name) {
858
1013
  const normalized = name.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
@@ -881,72 +1036,52 @@ var ExuluAgent = class {
881
1036
  description = "";
882
1037
  slug = "";
883
1038
  streaming = false;
884
- type;
885
- outputSchema;
886
1039
  rateLimit;
887
1040
  config;
888
- memory;
1041
+ // private memory: Memory | undefined; // TODO remove mastra and do own implementation
889
1042
  tools;
890
- agent;
1043
+ evals;
1044
+ model;
891
1045
  capabilities;
892
- constructor({ id, name, description, outputSchema, config, rateLimit, type, capabilities, tools }) {
1046
+ constructor({ id, name, description, config, rateLimit, capabilities, tools, evals }) {
893
1047
  this.id = id;
894
1048
  this.name = name;
895
- this.type = type;
1049
+ this.evals = evals;
896
1050
  this.description = description;
897
- this.outputSchema = outputSchema;
898
1051
  this.rateLimit = rateLimit;
899
1052
  this.tools = tools;
900
1053
  this.config = config;
901
1054
  this.capabilities = capabilities;
902
1055
  this.slug = `/agents/${generateSlug(this.name)}/run`;
903
- if (this.type === "agent") {
904
- this.agent = new import_core.Agent({
905
- name: this.config.name,
906
- instructions: this.config.instructions,
907
- model: this.config.model,
908
- memory: this.memory ? this.memory : void 0
909
- });
1056
+ this.model = this.config.model;
1057
+ }
1058
+ generate = async ({ prompt, stream }) => {
1059
+ if (!this.model) {
1060
+ throw new Error("Model is required for streaming.");
910
1061
  }
911
- if (config?.memory) {
912
- console.log("[EXULU] Initializing memory for agent " + this.name);
913
- const connectionString = `postgresql://${process.env.POSTGRES_DB_USER}:${process.env.POSTGRES_DB_PASSWORD}@${process.env.POSTGRES_DB_HOST}:${process.env.POSTGRES_DB_PORT}/exulu`;
914
- this.memory = new import_memory.Memory({
915
- storage: new import_pg.PostgresStore({
916
- host: process.env.POSTGRES_DB_HOST || "",
917
- port: parseInt(process.env.POSTGRES_DB_PORT || "5432"),
918
- user: process.env.POSTGRES_DB_USER || "",
919
- database: "exulu",
920
- // putting it into an own database that is not managed by exulu
921
- password: process.env.POSTGRES_DB_PASSWORD || "",
922
- ssl: process.env.POSTGRES_DB_SSL === "true" ? { rejectUnauthorized: false } : false
923
- }),
924
- ...config?.memory.vector ? {
925
- vector: new import_pg.PgVector({
926
- connectionString
927
- })
928
- } : {},
929
- options: {
930
- lastMessages: config?.memory.lastMessages || 10,
931
- semanticRecall: {
932
- topK: config?.memory.semanticRecall.topK || 3,
933
- messageRange: config?.memory.semanticRecall.messageRange || 2
934
- }
935
- }
1062
+ if (this.config.outputSchema) {
1063
+ if (stream) {
1064
+ }
1065
+ const { object } = await (0, import_ai.generateObject)({
1066
+ model: this.model,
1067
+ schema: this.config.outputSchema,
1068
+ prompt
936
1069
  });
1070
+ return object;
937
1071
  }
938
- }
939
- chat = () => {
940
- if (!this.agent) {
941
- throw new Error("Agent not found");
1072
+ if (stream) {
1073
+ const result = (0, import_ai.streamText)({
1074
+ model: this.model,
1075
+ prompt
1076
+ });
1077
+ const text2 = await result.text;
1078
+ return text2;
942
1079
  }
943
- updateStatistic({
944
- name: "count",
945
- label: this.name,
946
- type: STATISTICS_TYPE_ENUM.AGENT_RUN,
947
- trigger: "agent"
1080
+ const { text } = await (0, import_ai.generateText)({
1081
+ model: this.model,
1082
+ prompt
948
1083
  });
949
- return this.agent;
1084
+ return text;
950
1085
  };
951
1086
  };
952
1087
  var ExuluEmbedder = class {
@@ -1142,35 +1277,117 @@ var ExuluLogger = class {
1142
1277
  }
1143
1278
  }
1144
1279
  };
1280
+ var ExuluEval = class {
1281
+ name;
1282
+ description;
1283
+ constructor({ name, description }) {
1284
+ this.name = name;
1285
+ this.description = description;
1286
+ }
1287
+ create = {
1288
+ LlmAsAJudge: {
1289
+ niah: ({ label, model, needles, testDocument, contextlengths }) => {
1290
+ return {
1291
+ name: this.name,
1292
+ description: this.description,
1293
+ testcases: ExuluEvalUtils.niahTestSet({
1294
+ label,
1295
+ contextlengths: contextlengths || [5e3, 3e4, 5e4, 128e3],
1296
+ needles,
1297
+ testDocument
1298
+ }),
1299
+ run: async ({ data, runner }) => {
1300
+ if (runner.workflow) {
1301
+ throw new Error("Workflows are not supported for the needle in a haystack eval.");
1302
+ }
1303
+ if (!runner.agent) {
1304
+ throw new Error("Agent is required for the needle in a haystack eval.");
1305
+ }
1306
+ if (!data.result) {
1307
+ if (!data.prompt) {
1308
+ throw new Error("Prompt is required for running an agent.");
1309
+ }
1310
+ const result = await runner.agent.generate({
1311
+ prompt: data.prompt,
1312
+ stream: false
1313
+ });
1314
+ data.result = result;
1315
+ }
1316
+ const { object } = await (0, import_ai.generateObject)({
1317
+ model,
1318
+ maxRetries: 3,
1319
+ schema: import_zod2.z.object({
1320
+ correctnessScore: import_zod2.z.number(),
1321
+ comment: import_zod2.z.string()
1322
+ }),
1323
+ prompt: `You are checking if the below "actual_answers" contain the correct information as
1324
+ presented in the "correct_answers" section to calculate the correctness score.
1325
+
1326
+ The correctness score should be a number between 0 and 1. 1 is the highest score.
1327
+
1328
+ For example if the actual_answers contains 1 answer of the ${needles.length} correct_answers, the
1329
+ score should be ${1 / needles.length}. If the actual_answers contain 2 correct answers, the
1330
+ score should be ${2 / needles.length} etc.. if the actual_answers contains all the correct answers, the
1331
+ score should be 1 and if the actual_answers contains none of the correct answers, the score should be 0.
1332
+
1333
+ You can ignore small differences in the actual_answers and the correct_answers such as spelling mistakes,
1334
+ punctuation, etc., if the content of the actual answer is still correct.
1335
+
1336
+ Also provide a comment on how you came to your conclusion.
1337
+
1338
+ <actual_answers>
1339
+ ${data.result}
1340
+ </actual_answers>
1341
+
1342
+ <correct_answers>
1343
+ ${needles.map((needle, index) => `- ${index + 1}: ${needle.answer}`).join("\n")}
1344
+ </correct_answers>`
1345
+ });
1346
+ console.log("[EXULU] eval result", object);
1347
+ const { db: db2 } = await postgresClient();
1348
+ await db2("eval_results").insert({
1349
+ input: data.prompt,
1350
+ output: data.result,
1351
+ duration: data.duration,
1352
+ result: object.correctnessScore,
1353
+ agent_id: runner.agent.id || void 0,
1354
+ eval_type: EVAL_TYPES_ENUM.llm_as_judge,
1355
+ eval_name: this.name,
1356
+ comment: object.comment,
1357
+ category: data.category,
1358
+ metadata: data.metadata,
1359
+ createdAt: db2.fn.now(),
1360
+ updatedAt: db2.fn.now()
1361
+ });
1362
+ return {
1363
+ score: object.correctnessScore,
1364
+ comment: object.comment
1365
+ };
1366
+ }
1367
+ };
1368
+ }
1369
+ }
1370
+ };
1371
+ };
1145
1372
  var ExuluTool = class {
1146
1373
  id;
1147
1374
  name;
1148
1375
  description;
1149
- inputSchema;
1150
- outputSchema;
1376
+ parameters;
1151
1377
  type;
1152
- _execute;
1153
- constructor({ id, name, description, inputSchema, outputSchema, type, execute: execute2 }) {
1378
+ tool;
1379
+ constructor({ id, name, description, parameters, type, execute: execute2 }) {
1154
1380
  this.id = id;
1155
1381
  this.name = name;
1156
1382
  this.description = description;
1157
- this.inputSchema = inputSchema;
1158
- this.outputSchema = outputSchema;
1383
+ this.parameters = parameters;
1159
1384
  this.type = type;
1160
- this._execute = execute2;
1161
- }
1162
- execute = async (inputs) => {
1163
- if (!this._execute) {
1164
- throw new Error("Tool has no execute function.");
1165
- }
1166
- updateStatistic({
1167
- name: "count",
1168
- label: this.name,
1169
- type: STATISTICS_TYPE_ENUM.TOOL_CALL,
1170
- trigger: "agent"
1385
+ this.tool = (0, import_ai.tool)({
1386
+ description,
1387
+ parameters,
1388
+ execute: execute2
1171
1389
  });
1172
- return await this._execute(inputs);
1173
- };
1390
+ }
1174
1391
  };
1175
1392
  var ExuluContext = class {
1176
1393
  id;
@@ -1572,21 +1789,9 @@ var ExuluContext = class {
1572
1789
  id: this.id,
1573
1790
  name: `${this.name} context`,
1574
1791
  type: "context",
1575
- inputSchema: import_zod2.z.object({
1792
+ parameters: import_zod2.z.object({
1576
1793
  query: import_zod2.z.string()
1577
1794
  }),
1578
- outputSchema: import_zod2.z.object({
1579
- // todo check if result format is still correct based on above getItems function
1580
- results: import_zod2.z.array(import_zod2.z.object({
1581
- count: import_zod2.z.number(),
1582
- results: import_zod2.z.array(import_zod2.z.object({
1583
- id: import_zod2.z.string(),
1584
- content: import_zod2.z.string(),
1585
- metadata: import_zod2.z.record(import_zod2.z.any())
1586
- })),
1587
- errors: import_zod2.z.array(import_zod2.z.string()).optional()
1588
- }))
1589
- }),
1590
1795
  description: `Gets information from the context called: ${this.name}. The context description is: ${this.description}.`,
1591
1796
  execute: async ({ context }) => {
1592
1797
  return await this.getItems({
@@ -1641,16 +1846,7 @@ var ExuluSource = class {
1641
1846
  }
1642
1847
  };
1643
1848
  var updateStatistic = async (statistic) => {
1644
- const currentDate = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
1645
- const { db: db2 } = await postgresClient();
1646
- await db2.from("statistics").update({
1647
- total: db2.raw("total + ?", [statistic.count ?? 1]),
1648
- timeseries: db2.raw('CASE WHEN "createdAt" = ? THEN array_append(timeseries, ?) ELSE timeseries END', [currentDate, { date: currentDate, count: statistic.count ?? 1 }])
1649
- }).where({
1650
- name: statistic.name,
1651
- label: statistic.label,
1652
- type: statistic.type
1653
- }).onConflict("name").merge();
1849
+ return;
1654
1850
  };
1655
1851
 
1656
1852
  // src/registry/index.ts
@@ -2755,7 +2951,7 @@ var createUppyRoutes = async (app) => {
2755
2951
  };
2756
2952
 
2757
2953
  // src/registry/routes.ts
2758
- var import_utils = require("@apollo/utils.keyvaluecache");
2954
+ var import_utils2 = require("@apollo/utils.keyvaluecache");
2759
2955
  var global_queues = {
2760
2956
  logs_cleaner: "logs-cleaner"
2761
2957
  };
@@ -2853,10 +3049,10 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
2853
3049
  } else {
2854
3050
  console.log("===========================", "[EXULU] no redis server configured, not setting up recurring jobs.", "===========================");
2855
3051
  }
2856
- const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, threadsSchema, messagesSchema]);
3052
+ const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, evalResultsSchema, threadsSchema, messagesSchema]);
2857
3053
  console.log("[EXULU] graphql server");
2858
3054
  const server = new import_server3.ApolloServer({
2859
- cache: new import_utils.InMemoryLRUCache(),
3055
+ cache: new import_utils2.InMemoryLRUCache(),
2860
3056
  schema,
2861
3057
  introspection: true
2862
3058
  });
@@ -2921,13 +3117,13 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
2921
3117
  });
2922
3118
  });
2923
3119
  app.get("/tools", async (req, res) => {
2924
- res.status(200).json(tools.map((tool) => ({
2925
- id: tool.id,
2926
- name: tool.name,
2927
- description: tool.description,
2928
- type: tool.type || "tool",
2929
- inputSchema: tool.inputSchema ? (0, import_zodex.zerialize)(tool.inputSchema) : null,
2930
- outputSchema: tool.outputSchema ? (0, import_zodex.zerialize)(tool.outputSchema) : null
3120
+ res.status(200).json(tools.map((tool2) => ({
3121
+ id: tool2.id,
3122
+ name: tool2.name,
3123
+ description: tool2.description,
3124
+ type: tool2.type || "tool",
3125
+ inputSchema: tool2.inputSchema ? (0, import_zodex.zerialize)(tool2.inputSchema) : null,
3126
+ outputSchema: tool2.outputSchema ? (0, import_zodex.zerialize)(tool2.outputSchema) : null
2931
3127
  })));
2932
3128
  });
2933
3129
  app.get("/tools/:id", async (req, res) => {
@@ -2938,14 +3134,14 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
2938
3134
  });
2939
3135
  return;
2940
3136
  }
2941
- const tool = tools.find((tool2) => tool2.id === id);
2942
- if (!tool) {
3137
+ const tool2 = tools.find((tool3) => tool3.id === id);
3138
+ if (!tool2) {
2943
3139
  res.status(400).json({
2944
3140
  message: "Tool not found."
2945
3141
  });
2946
3142
  return;
2947
3143
  }
2948
- res.status(200).json(tool);
3144
+ res.status(200).json(tool2);
2949
3145
  });
2950
3146
  const deleteItem = async ({
2951
3147
  id,
@@ -4038,6 +4234,198 @@ var ExuluApp = class {
4038
4234
 
4039
4235
  // src/index.ts
4040
4236
  var import_chonkie = require("chonkie");
4237
+
4238
+ // src/cli/index.tsx
4239
+ var import_react2 = require("react");
4240
+ var import_ink4 = require("ink");
4241
+ var import_ui5 = require("@inkjs/ui");
4242
+ var import_patch_console = __toESM(require("patch-console"), 1);
4243
+
4244
+ // src/cli/components/nav.tsx
4245
+ var import_ui = require("@inkjs/ui");
4246
+ var import_ink = require("ink");
4247
+ var import_jsx_runtime = require("react/jsx-runtime");
4248
+ var nav = [
4249
+ {
4250
+ label: "Agents",
4251
+ value: "agents"
4252
+ },
4253
+ {
4254
+ label: "Exit",
4255
+ value: "exit"
4256
+ }
4257
+ ];
4258
+ var Nav = ({ setView }) => {
4259
+ const { exit } = (0, import_ink.useApp)();
4260
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(import_ui.Select, { options: nav, onChange: (value) => {
4261
+ if (value === "exit") {
4262
+ exit();
4263
+ }
4264
+ setView(value);
4265
+ } });
4266
+ };
4267
+ var nav_default = Nav;
4268
+
4269
+ // src/cli/components/agent-selector.tsx
4270
+ var import_ink2 = require("ink");
4271
+ var import_ui2 = require("@inkjs/ui");
4272
+ var import_jsx_runtime2 = require("react/jsx-runtime");
4273
+ var AgentSelector = ({ exulu, setAgent, setEvaluations }) => {
4274
+ const agents = exulu.agents.map((agent) => ({
4275
+ label: agent.name,
4276
+ value: agent.id
4277
+ }));
4278
+ return /* @__PURE__ */ (0, import_jsx_runtime2.jsxs)(import_jsx_runtime2.Fragment, { children: [
4279
+ /* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ink2.Text, { children: "Please select an agent:" }),
4280
+ /* @__PURE__ */ (0, import_jsx_runtime2.jsx)(import_ui2.Select, { options: agents, onChange: (value) => {
4281
+ console.log("selected agent", value);
4282
+ const agent = exulu.agent(value);
4283
+ if (!agent) {
4284
+ console.error("Agent not found", value);
4285
+ return;
4286
+ }
4287
+ setAgent(agent);
4288
+ if (agent) {
4289
+ setEvaluations(agent.evals || []);
4290
+ }
4291
+ } })
4292
+ ] });
4293
+ };
4294
+ var agent_selector_default = AgentSelector;
4295
+
4296
+ // src/cli/components/eval-selector.tsx
4297
+ var import_ui3 = require("@inkjs/ui");
4298
+ var import_jsx_runtime3 = require("react/jsx-runtime");
4299
+ var EvalSelector = ({ evaluations, setEvaluation }) => {
4300
+ return /* @__PURE__ */ (0, import_jsx_runtime3.jsx)(import_ui3.Select, { options: evaluations.map((evaluation) => ({
4301
+ label: evaluation.runner.name,
4302
+ value: evaluation.runner.name
4303
+ })), onChange: (value) => {
4304
+ console.log("selected eval", value);
4305
+ const evaluation = evaluations?.find((evaluation2) => evaluation2.runner.name === value);
4306
+ if (evaluation) {
4307
+ setEvaluation(evaluation);
4308
+ }
4309
+ } });
4310
+ };
4311
+ var eval_selector_default = EvalSelector;
4312
+
4313
+ // src/cli/components/eval-actions.tsx
4314
+ var import_react = require("react");
4315
+ var import_ui4 = require("@inkjs/ui");
4316
+ var import_ink3 = require("ink");
4317
+ var import_jsx_runtime4 = require("react/jsx-runtime");
4318
+ var EvalActions = ({ agent, evaluation, setEvaluation }) => {
4319
+ const [progress, setProgress] = (0, import_react.useState)(0);
4320
+ const [results, setResults] = (0, import_react.useState)([]);
4321
+ const [running, setRunning] = (0, import_react.useState)();
4322
+ const run = async (evaluation2) => {
4323
+ setRunning({
4324
+ label: evaluation2.runner.name
4325
+ });
4326
+ const testCases = evaluation2.runner.testcases;
4327
+ const total = testCases.length;
4328
+ if (!testCases) {
4329
+ throw new Error("No test cases found");
4330
+ }
4331
+ let i = 0;
4332
+ for (const testCase of testCases) {
4333
+ i++;
4334
+ const result = await evaluation2.runner.run({
4335
+ data: testCase,
4336
+ runner: {
4337
+ agent
4338
+ }
4339
+ });
4340
+ setProgress(Math.round(i / total * 100));
4341
+ setResults([...results, {
4342
+ name: evaluation2.runner.name,
4343
+ prompt: testCase.prompt?.slice(0, 100) + "...",
4344
+ score: result.score,
4345
+ comment: result.comment
4346
+ }]);
4347
+ }
4348
+ setRunning(void 0);
4349
+ };
4350
+ if (progress === 100) {
4351
+ return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
4352
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ink3.Text, { children: "Evaluations completed." }),
4353
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList, { children: results.map((result) => /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
4354
+ result.name,
4355
+ ": ",
4356
+ result.score,
4357
+ " - ",
4358
+ result.comment
4359
+ ] }) })) })
4360
+ ] });
4361
+ }
4362
+ if (running) {
4363
+ return /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_jsx_runtime4.Fragment, { children: [
4364
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsxs)(import_ink3.Text, { children: [
4365
+ "Running ",
4366
+ running.label,
4367
+ "..."
4368
+ ] }),
4369
+ /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.ProgressBar, { value: progress })
4370
+ ] });
4371
+ }
4372
+ return /* @__PURE__ */ (0, import_jsx_runtime4.jsx)(import_ui4.Select, { options: [{
4373
+ label: "Run evaluation",
4374
+ value: "run"
4375
+ }, {
4376
+ label: "Go back",
4377
+ value: "back"
4378
+ }], onChange: (value) => {
4379
+ if (value === "back") {
4380
+ setEvaluation(void 0);
4381
+ }
4382
+ if (value === "run") {
4383
+ run(evaluation);
4384
+ }
4385
+ } });
4386
+ };
4387
+ var eval_actions_default = EvalActions;
4388
+
4389
+ // src/cli/index.tsx
4390
+ var import_jsx_runtime5 = require("react/jsx-runtime");
4391
+ var Main = ({ exulu }) => {
4392
+ (0, import_patch_console.default)((stream, data) => {
4393
+ setLogs([...logs, data]);
4394
+ });
4395
+ const [logs, setLogs] = (0, import_react2.useState)([]);
4396
+ const [view, setView] = (0, import_react2.useState)();
4397
+ const [agent, setAgent] = (0, import_react2.useState)();
4398
+ const [evaluations, setEvaluations] = (0, import_react2.useState)([]);
4399
+ const [evaluation, setEvaluation] = (0, import_react2.useState)();
4400
+ return /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Box, { borderStyle: "round", borderColor: "cyan", padding: 1, flexDirection: "column", width: "70%", children: [
4401
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: "Logs:" }),
4402
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList, { children: logs.map((log, index) => /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ui5.UnorderedList.Item, { children: /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(import_ink4.Text, { children: log }) })) }),
4403
+ !view && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(nav_default, { setView }),
4404
+ view === "agents" && !agent && /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(agent_selector_default, { exulu, setAgent, setEvaluations }),
4405
+ view === "agents" && agent && !evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
4406
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
4407
+ 'Selected agent "',
4408
+ agent.name,
4409
+ '". Please select an evaluation:'
4410
+ ] }),
4411
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_selector_default, { evaluations, setEvaluation })
4412
+ ] }),
4413
+ view === "agents" && agent && evaluation && /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_jsx_runtime5.Fragment, { children: [
4414
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsxs)(import_ink4.Text, { children: [
4415
+ "Selected evaluation: ",
4416
+ evaluation.runner.name
4417
+ ] }),
4418
+ /* @__PURE__ */ (0, import_jsx_runtime5.jsx)(eval_actions_default, { agent, evaluation, setEvaluation })
4419
+ ] })
4420
+ ] });
4421
+ };
4422
+ var cli_default = {
4423
+ run: (exulu) => {
4424
+ (0, import_ink4.render)(/* @__PURE__ */ (0, import_jsx_runtime5.jsx)(Main, { exulu }));
4425
+ }
4426
+ };
4427
+
4428
+ // src/index.ts
4041
4429
  var ExuluJobs = {
4042
4430
  redis: redisClient,
4043
4431
  jobs: {
@@ -4069,9 +4457,11 @@ var ExuluDatabase = {
4069
4457
  ExuluApp,
4070
4458
  ExuluAuthentication,
4071
4459
  ExuluChunkers,
4460
+ ExuluCli,
4072
4461
  ExuluContext,
4073
4462
  ExuluDatabase,
4074
4463
  ExuluEmbedder,
4464
+ ExuluEval,
4075
4465
  ExuluJobs,
4076
4466
  ExuluLogger,
4077
4467
  ExuluQueues,