npm - @exulu/backend - Versions diffs - 0.3.0 → 0.3.2 - Mend

@exulu/backend 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.js CHANGED Viewed

@@ -244,6 +244,58 @@ var workflowSchema = {
     }
   ]
 };
+var evalResultsSchema = {
+  name: {
+    plural: "eval_results",
+    singular: "eval_result"
+  },
+  fields: [
+    {
+      name: "input",
+      type: "longText"
+    },
+    {
+      name: "output",
+      type: "longText"
+    },
+    {
+      name: "duration",
+      type: "number"
+    },
+    {
+      name: "category",
+      type: "text"
+    },
+    {
+      name: "metadata",
+      type: "json"
+    },
+    {
+      name: "result",
+      type: "number"
+    },
+    {
+      name: "agent_id",
+      type: "text"
+    },
+    {
+      name: "workflow_id",
+      type: "text"
+    },
+    {
+      name: "eval_type",
+      type: "text"
+    },
+    {
+      name: "eval_name",
+      type: "text"
+    },
+    {
+      name: "comment",
+      type: "longText"
+    }
+  ]
+};
 var threadsSchema = {
   name: {
     plural: "threads",
@@ -517,6 +569,27 @@ var up = async function(knex) {
       }
     });
   }
+  if (!await knex.schema.hasTable("eval_results")) {
+    await knex.schema.createTable("eval_results", (table) => {
+      table.uuid("id").primary().defaultTo(knex.fn.uuid());
+      table.date("createdAt").defaultTo(knex.fn.now());
+      table.date("updatedAt").defaultTo(knex.fn.now());
+      for (const field of evalResultsSchema.fields) {
+        const { type, name, references, default: defaultValue } = field;
+        if (!type || !name) {
+          continue;
+        }
+        if (type === "reference") {
+          if (!references) {
+            throw new Error("Field with type reference must have a reference definition.");
+          }
+          table.uuid(name).references(references.field).inTable(references.table);
+          return;
+        }
+        mapType(table, type, sanitizeName(name), defaultValue);
+      }
+    });
+  }
   if (!await knex.schema.hasTable("statistics")) {
     await knex.schema.createTable("statistics", (table) => {
       table.uuid("id").primary().defaultTo(knex.fn.uuid());
@@ -688,12 +761,10 @@ var execute = async () => {
 // src/registry/classes.ts
 import "zod";
 import "bullmq";
-import { Agent as MastraAgent } from "@mastra/core";
 import { z } from "zod";
 import * as fs from "fs";
 import * as path from "path";
-import { Memory } from "@mastra/memory";
-import { PostgresStore, PgVector } from "@mastra/pg";
+import { generateObject, generateText, streamText, tool } from "ai";
 // types/enums/statistics.ts
 var STATISTICS_TYPE_ENUM = {
@@ -708,6 +779,11 @@ var STATISTICS_TYPE_ENUM = {
   AGENT_RUN: "agent.run"
 };
+// types/enums/eval-types.ts
+var EVAL_TYPES_ENUM = {
+  llm_as_judge: "llm_as_judge"
+};
 // src/registry/classes.ts
 import pgvector2 from "pgvector/knex";
@@ -811,6 +887,83 @@ var JOB_STATUS_ENUM = {
   stuck: "stuck"
 };
+// src/evals/utils/index.tsx
+var ExuluEvalUtils = {
+  niahTestSet: ({
+    label,
+    contextlengths,
+    needles,
+    testDocument
+  }) => {
+    const testCases = contextlengths.map((contextlength) => {
+      let testText = testDocument.slice(0, contextlength * 4 - needles.length * 200);
+      const depthInterval = 5e3 * 4;
+      const depths = Array.from({ length: contextlength * 4 / depthInterval }, (_, i) => (i + 1) * depthInterval);
+      console.log("[EXULU] contextlength: ", {
+        tokens: contextlength,
+        chars: contextlength * 4,
+        depths
+      });
+      return depths.map((depth, index) => {
+        const first = index === 0;
+        const last = index === depths.length - 1;
+        const start = first ? 0 : depths[index - 1];
+        const end = last ? contextlength * 4 : depths[index];
+        console.log("[EXULU] Niah positions: ", {
+          start,
+          end,
+          depth,
+          index
+        });
+        let modifiedTestText = testText;
+        const insertions = [];
+        needles.forEach((needle, index2) => {
+          const basePosition = start + Math.floor(Math.random() * (end - start));
+          insertions.push({ position: basePosition, needle: needle.answer });
+        });
+        insertions.sort((a, b) => b.position - a.position);
+        console.log("[EXULU] Niah insertions: ", insertions);
+        insertions.forEach(({ position, needle }) => {
+          const insertionPosition = Math.min(position, modifiedTestText.length);
+          const beforeNeedle = modifiedTestText.slice(0, insertionPosition);
+          const afterNeedle = modifiedTestText.slice(insertionPosition);
+          modifiedTestText = beforeNeedle + needle + afterNeedle;
+        });
+        return {
+          prompt: `You are a helpful assistant.
+                You are given a text.
+                You need to answer the following question, using only the information from the text provided below. Do not hallucinate
+                or come up with an answer that is not in the text. If the text does not contain the answer, you should say "I don't know".
+                ${needles.map((needle, index2) => `- ${index2 + 1}: ${needle.question}`).join("\n")}
+                The text is:
+                ${modifiedTestText}
+                `,
+          category: `${label}-context-length-[${contextlength}]-depth-[from-${start ? start / 4 : 0}-to-${end ? end / 4 : 0}]-niah-test`,
+          metadata: {
+            contextLength: contextlength,
+            depth,
+            needles
+          }
+        };
+      });
+    });
+    const flattenedTestCases = testCases.flat();
+    console.log("[EXULU] Niah test cases: ", flattenedTestCases.length);
+    console.table(flattenedTestCases.map((data) => ({
+      chars: data.prompt?.length || 0,
+      tokens: data.prompt?.length / 4 || 0,
+      category: data.category,
+      metadata: data.metadata
+    })));
+    return flattenedTestCases;
+  }
+};
 // src/registry/classes.ts
 function generateSlug(name) {
   const normalized = name.normalize("NFKD").replace(/[\u0300-\u036f]/g, "");
@@ -839,72 +992,52 @@ var ExuluAgent = class {
   description = "";
   slug = "";
   streaming = false;
-  type;
-  outputSchema;
   rateLimit;
   config;
-  memory;
+  // private memory: Memory | undefined; // TODO remove mastra and do own implementation
   tools;
-  agent;
+  evals;
+  model;
   capabilities;
-  constructor({ id, name, description, outputSchema, config, rateLimit, type, capabilities, tools }) {
+  constructor({ id, name, description, config, rateLimit, capabilities, tools, evals }) {
     this.id = id;
     this.name = name;
-    this.type = type;
+    this.evals = evals;
     this.description = description;
-    this.outputSchema = outputSchema;
     this.rateLimit = rateLimit;
     this.tools = tools;
     this.config = config;
     this.capabilities = capabilities;
     this.slug = `/agents/${generateSlug(this.name)}/run`;
-    if (this.type === "agent") {
-      this.agent = new MastraAgent({
-        name: this.config.name,
-        instructions: this.config.instructions,
-        model: this.config.model,
-        memory: this.memory ? this.memory : void 0
-      });
+    this.model = this.config.model;
+  }
+  generate = async ({ prompt, stream }) => {
+    if (!this.model) {
+      throw new Error("Model is required for streaming.");
     }
-    if (config?.memory) {
-      console.log("[EXULU] Initializing memory for agent " + this.name);
-      const connectionString = `postgresql://${process.env.POSTGRES_DB_USER}:${process.env.POSTGRES_DB_PASSWORD}@${process.env.POSTGRES_DB_HOST}:${process.env.POSTGRES_DB_PORT}/exulu`;
-      this.memory = new Memory({
-        storage: new PostgresStore({
-          host: process.env.POSTGRES_DB_HOST || "",
-          port: parseInt(process.env.POSTGRES_DB_PORT || "5432"),
-          user: process.env.POSTGRES_DB_USER || "",
-          database: "exulu",
-          // putting it into an own database that is not managed by exulu
-          password: process.env.POSTGRES_DB_PASSWORD || "",
-          ssl: process.env.POSTGRES_DB_SSL === "true" ? { rejectUnauthorized: false } : false
-        }),
-        ...config?.memory.vector ? {
-          vector: new PgVector({
-            connectionString
-          })
-        } : {},
-        options: {
-          lastMessages: config?.memory.lastMessages || 10,
-          semanticRecall: {
-            topK: config?.memory.semanticRecall.topK || 3,
-            messageRange: config?.memory.semanticRecall.messageRange || 2
-          }
-        }
+    if (this.config.outputSchema) {
+      if (stream) {
+      }
+      const { object } = await generateObject({
+        model: this.model,
+        schema: this.config.outputSchema,
+        prompt
       });
+      return object;
     }
-  }
-  chat = () => {
-    if (!this.agent) {
-      throw new Error("Agent not found");
+    if (stream) {
+      const result = streamText({
+        model: this.model,
+        prompt
+      });
+      const text2 = await result.text;
+      return text2;
     }
-    updateStatistic({
-      name: "count",
-      label: this.name,
-      type: STATISTICS_TYPE_ENUM.AGENT_RUN,
-      trigger: "agent"
+    const { text } = await generateText({
+      model: this.model,
+      prompt
     });
-    return this.agent;
+    return text;
   };
 };
 var ExuluEmbedder = class {
@@ -1100,35 +1233,117 @@ var ExuluLogger = class {
     }
   }
 };
+var ExuluEval = class {
+  name;
+  description;
+  constructor({ name, description }) {
+    this.name = name;
+    this.description = description;
+  }
+  create = {
+    LlmAsAJudge: {
+      niah: ({ label, model, needles, testDocument, contextlengths }) => {
+        return {
+          name: this.name,
+          description: this.description,
+          testcases: ExuluEvalUtils.niahTestSet({
+            label,
+            contextlengths: contextlengths || [5e3, 3e4, 5e4, 128e3],
+            needles,
+            testDocument
+          }),
+          run: async ({ data, runner }) => {
+            if (runner.workflow) {
+              throw new Error("Workflows are not supported for the needle in a haystack eval.");
+            }
+            if (!runner.agent) {
+              throw new Error("Agent is required for the needle in a haystack eval.");
+            }
+            if (!data.result) {
+              if (!data.prompt) {
+                throw new Error("Prompt is required for running an agent.");
+              }
+              const result = await runner.agent.generate({
+                prompt: data.prompt,
+                stream: false
+              });
+              data.result = result;
+            }
+            const { object } = await generateObject({
+              model,
+              maxRetries: 3,
+              schema: z.object({
+                correctnessScore: z.number(),
+                comment: z.string()
+              }),
+              prompt: `You are checking if the below "actual_answers" contain the correct information as
+                            presented in the "correct_answers" section to calculate the correctness score.
+                            The correctness score should be a number between 0 and 1. 1 is the highest score.
+                            For example if the actual_answers contains 1 answer of the ${needles.length} correct_answers, the
+                            score should be ${1 / needles.length}. If the actual_answers contain 2 correct answers, the
+                            score should be ${2 / needles.length} etc.. if the actual_answers contains all the correct answers, the
+                            score should be 1 and if the actual_answers contains none of the correct answers, the score should be 0.
+                            You can ignore small differences in the actual_answers and the correct_answers such as spelling mistakes,
+                            punctuation, etc., if the content of the actual answer is still correct.
+                            Also provide a comment on how you came to your conclusion.
+                            <actual_answers>
+                            ${data.result}
+                            </actual_answers>
+                            <correct_answers>
+                            ${needles.map((needle, index) => `- ${index + 1}: ${needle.answer}`).join("\n")}
+                            </correct_answers>`
+            });
+            console.log("[EXULU] eval result", object);
+            const { db: db2 } = await postgresClient();
+            await db2("eval_results").insert({
+              input: data.prompt,
+              output: data.result,
+              duration: data.duration,
+              result: object.correctnessScore,
+              agent_id: runner.agent.id || void 0,
+              eval_type: EVAL_TYPES_ENUM.llm_as_judge,
+              eval_name: this.name,
+              comment: object.comment,
+              category: data.category,
+              metadata: data.metadata,
+              createdAt: db2.fn.now(),
+              updatedAt: db2.fn.now()
+            });
+            return {
+              score: object.correctnessScore,
+              comment: object.comment
+            };
+          }
+        };
+      }
+    }
+  };
+};
 var ExuluTool = class {
   id;
   name;
   description;
-  inputSchema;
-  outputSchema;
+  parameters;
   type;
-  _execute;
-  constructor({ id, name, description, inputSchema, outputSchema, type, execute: execute2 }) {
+  tool;
+  constructor({ id, name, description, parameters, type, execute: execute2 }) {
     this.id = id;
     this.name = name;
     this.description = description;
-    this.inputSchema = inputSchema;
-    this.outputSchema = outputSchema;
+    this.parameters = parameters;
     this.type = type;
-    this._execute = execute2;
-  }
-  execute = async (inputs) => {
-    if (!this._execute) {
-      throw new Error("Tool has no execute function.");
-    }
-    updateStatistic({
-      name: "count",
-      label: this.name,
-      type: STATISTICS_TYPE_ENUM.TOOL_CALL,
-      trigger: "agent"
+    this.tool = tool({
+      description,
+      parameters,
+      execute: execute2
     });
-    return await this._execute(inputs);
-  };
+  }
 };
 var ExuluContext = class {
   id;
@@ -1530,21 +1745,9 @@ var ExuluContext = class {
       id: this.id,
       name: `${this.name} context`,
       type: "context",
-      inputSchema: z.object({
+      parameters: z.object({
         query: z.string()
       }),
-      outputSchema: z.object({
-        // todo check if result format is still correct based on above getItems function
-        results: z.array(z.object({
-          count: z.number(),
-          results: z.array(z.object({
-            id: z.string(),
-            content: z.string(),
-            metadata: z.record(z.any())
-          })),
-          errors: z.array(z.string()).optional()
-        }))
-      }),
       description: `Gets information from the context called: ${this.name}. The context description is: ${this.description}.`,
       execute: async ({ context }) => {
         return await this.getItems({
@@ -2802,7 +3005,7 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
   } else {
     console.log("===========================", "[EXULU] no redis server configured, not setting up recurring jobs.", "===========================");
   }
-  const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, threadsSchema, messagesSchema]);
+  const schema = createSDL([usersSchema, rolesSchema, agentsSchema, jobsSchema, workflowSchema, evalResultsSchema, threadsSchema, messagesSchema]);
   console.log("[EXULU] graphql server");
   const server = new ApolloServer({
     cache: new InMemoryLRUCache(),
@@ -2870,13 +3073,13 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
     });
   });
   app.get("/tools", async (req, res) => {
-    res.status(200).json(tools.map((tool) => ({
-      id: tool.id,
-      name: tool.name,
-      description: tool.description,
-      type: tool.type || "tool",
-      inputSchema: tool.inputSchema ? zerialize(tool.inputSchema) : null,
-      outputSchema: tool.outputSchema ? zerialize(tool.outputSchema) : null
+    res.status(200).json(tools.map((tool2) => ({
+      id: tool2.id,
+      name: tool2.name,
+      description: tool2.description,
+      type: tool2.type || "tool",
+      inputSchema: tool2.inputSchema ? zerialize(tool2.inputSchema) : null,
+      outputSchema: tool2.outputSchema ? zerialize(tool2.outputSchema) : null
     })));
   });
   app.get("/tools/:id", async (req, res) => {
@@ -2887,14 +3090,14 @@ var createExpressRoutes = async (app, agents, embedders, tools, workflows, conte
       });
       return;
     }
-    const tool = tools.find((tool2) => tool2.id === id);
-    if (!tool) {
+    const tool2 = tools.find((tool3) => tool3.id === id);
+    if (!tool2) {
       res.status(400).json({
         message: "Tool not found."
       });
       return;
     }
-    res.status(200).json(tool);
+    res.status(200).json(tool2);
   });
   const deleteItem = async ({
     id,
@@ -3987,6 +4190,198 @@ var ExuluApp = class {
 // src/index.ts
 import { RecursiveChunker, SentenceChunker, RecursiveRules } from "chonkie";
+// src/cli/index.tsx
+import { useState as useState2 } from "react";
+import { Box as Box2, Text as Text4, render as render2 } from "ink";
+import { UnorderedList as UnorderedList3 } from "@inkjs/ui";
+import patchConsole from "patch-console";
+// src/cli/components/nav.tsx
+import { Select } from "@inkjs/ui";
+import { useApp } from "ink";
+import { jsx } from "react/jsx-runtime";
+var nav = [
+  {
+    label: "Agents",
+    value: "agents"
+  },
+  {
+    label: "Exit",
+    value: "exit"
+  }
+];
+var Nav = ({ setView }) => {
+  const { exit } = useApp();
+  return /* @__PURE__ */ jsx(Select, { options: nav, onChange: (value) => {
+    if (value === "exit") {
+      exit();
+    }
+    setView(value);
+  } });
+};
+var nav_default = Nav;
+// src/cli/components/agent-selector.tsx
+import { Text as Text2 } from "ink";
+import { Select as Select2 } from "@inkjs/ui";
+import { Fragment, jsx as jsx2, jsxs } from "react/jsx-runtime";
+var AgentSelector = ({ exulu, setAgent, setEvaluations }) => {
+  const agents = exulu.agents.map((agent) => ({
+    label: agent.name,
+    value: agent.id
+  }));
+  return /* @__PURE__ */ jsxs(Fragment, { children: [
+    /* @__PURE__ */ jsx2(Text2, { children: "Please select an agent:" }),
+    /* @__PURE__ */ jsx2(Select2, { options: agents, onChange: (value) => {
+      console.log("selected agent", value);
+      const agent = exulu.agent(value);
+      if (!agent) {
+        console.error("Agent not found", value);
+        return;
+      }
+      setAgent(agent);
+      if (agent) {
+        setEvaluations(agent.evals || []);
+      }
+    } })
+  ] });
+};
+var agent_selector_default = AgentSelector;
+// src/cli/components/eval-selector.tsx
+import { Select as Select3 } from "@inkjs/ui";
+import { jsx as jsx3 } from "react/jsx-runtime";
+var EvalSelector = ({ evaluations, setEvaluation }) => {
+  return /* @__PURE__ */ jsx3(Select3, { options: evaluations.map((evaluation) => ({
+    label: evaluation.runner.name,
+    value: evaluation.runner.name
+  })), onChange: (value) => {
+    console.log("selected eval", value);
+    const evaluation = evaluations?.find((evaluation2) => evaluation2.runner.name === value);
+    if (evaluation) {
+      setEvaluation(evaluation);
+    }
+  } });
+};
+var eval_selector_default = EvalSelector;
+// src/cli/components/eval-actions.tsx
+import { useState } from "react";
+import { ProgressBar as ProgressBar2, Select as Select4, UnorderedList as UnorderedList2 } from "@inkjs/ui";
+import { Text as Text3 } from "ink";
+import { Fragment as Fragment2, jsx as jsx4, jsxs as jsxs2 } from "react/jsx-runtime";
+var EvalActions = ({ agent, evaluation, setEvaluation }) => {
+  const [progress, setProgress] = useState(0);
+  const [results, setResults] = useState([]);
+  const [running, setRunning] = useState();
+  const run = async (evaluation2) => {
+    setRunning({
+      label: evaluation2.runner.name
+    });
+    const testCases = evaluation2.runner.testcases;
+    const total = testCases.length;
+    if (!testCases) {
+      throw new Error("No test cases found");
+    }
+    let i = 0;
+    for (const testCase of testCases) {
+      i++;
+      const result = await evaluation2.runner.run({
+        data: testCase,
+        runner: {
+          agent
+        }
+      });
+      setProgress(Math.round(i / total * 100));
+      setResults([...results, {
+        name: evaluation2.runner.name,
+        prompt: testCase.prompt?.slice(0, 100) + "...",
+        score: result.score,
+        comment: result.comment
+      }]);
+    }
+    setRunning(void 0);
+  };
+  if (progress === 100) {
+    return /* @__PURE__ */ jsxs2(Fragment2, { children: [
+      /* @__PURE__ */ jsx4(Text3, { children: "Evaluations completed." }),
+      /* @__PURE__ */ jsx4(UnorderedList2, { children: results.map((result) => /* @__PURE__ */ jsx4(UnorderedList2.Item, { children: /* @__PURE__ */ jsxs2(Text3, { children: [
+        result.name,
+        ": ",
+        result.score,
+        " - ",
+        result.comment
+      ] }) })) })
+    ] });
+  }
+  if (running) {
+    return /* @__PURE__ */ jsxs2(Fragment2, { children: [
+      /* @__PURE__ */ jsxs2(Text3, { children: [
+        "Running ",
+        running.label,
+        "..."
+      ] }),
+      /* @__PURE__ */ jsx4(ProgressBar2, { value: progress })
+    ] });
+  }
+  return /* @__PURE__ */ jsx4(Select4, { options: [{
+    label: "Run evaluation",
+    value: "run"
+  }, {
+    label: "Go back",
+    value: "back"
+  }], onChange: (value) => {
+    if (value === "back") {
+      setEvaluation(void 0);
+    }
+    if (value === "run") {
+      run(evaluation);
+    }
+  } });
+};
+var eval_actions_default = EvalActions;
+// src/cli/index.tsx
+import { Fragment as Fragment3, jsx as jsx5, jsxs as jsxs3 } from "react/jsx-runtime";
+var Main = ({ exulu }) => {
+  patchConsole((stream, data) => {
+    setLogs([...logs, data]);
+  });
+  const [logs, setLogs] = useState2([]);
+  const [view, setView] = useState2();
+  const [agent, setAgent] = useState2();
+  const [evaluations, setEvaluations] = useState2([]);
+  const [evaluation, setEvaluation] = useState2();
+  return /* @__PURE__ */ jsxs3(Box2, { borderStyle: "round", borderColor: "cyan", padding: 1, flexDirection: "column", width: "70%", children: [
+    /* @__PURE__ */ jsx5(Text4, { children: "Logs:" }),
+    /* @__PURE__ */ jsx5(UnorderedList3, { children: logs.map((log, index) => /* @__PURE__ */ jsx5(UnorderedList3.Item, { children: /* @__PURE__ */ jsx5(Text4, { children: log }) })) }),
+    !view && /* @__PURE__ */ jsx5(nav_default, { setView }),
+    view === "agents" && !agent && /* @__PURE__ */ jsx5(agent_selector_default, { exulu, setAgent, setEvaluations }),
+    view === "agents" && agent && !evaluation && /* @__PURE__ */ jsxs3(Fragment3, { children: [
+      /* @__PURE__ */ jsxs3(Text4, { children: [
+        'Selected agent "',
+        agent.name,
+        '". Please select an evaluation:'
+      ] }),
+      /* @__PURE__ */ jsx5(eval_selector_default, { evaluations, setEvaluation })
+    ] }),
+    view === "agents" && agent && evaluation && /* @__PURE__ */ jsxs3(Fragment3, { children: [
+      /* @__PURE__ */ jsxs3(Text4, { children: [
+        "Selected evaluation: ",
+        evaluation.runner.name
+      ] }),
+      /* @__PURE__ */ jsx5(eval_actions_default, { agent, evaluation, setEvaluation })
+    ] })
+  ] });
+};
+var cli_default = {
+  run: (exulu) => {
+    render2(/* @__PURE__ */ jsx5(Main, { exulu }));
+  }
+};
+// src/index.ts
 var ExuluJobs = {
   redis: redisClient,
   jobs: {
@@ -4017,9 +4412,11 @@ export {
   ExuluApp,
   authentication as ExuluAuthentication,
   ExuluChunkers,
+  cli_default as ExuluCli,
   ExuluContext,
   ExuluDatabase,
   ExuluEmbedder,
+  ExuluEval,
   ExuluJobs,
   ExuluLogger,
   queues as ExuluQueues,