npm - @axiom-lattice/agent-eval - Versions diffs - 2.1.9 → 2.1.10 - Mend

@axiom-lattice/agent-eval 2.1.9 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.turbo/turbo-build.log +10 -10
package/CHANGELOG.md +9 -0
package/dist/index.d.mts +16 -1
package/dist/index.d.ts +16 -1
package/dist/index.js +78 -10
package/dist/index.js.map +1 -1
package/dist/index.mjs +78 -10
package/dist/index.mjs.map +1 -1
package/package.json +3 -3
package/src/LatticeEval.ts +66 -2
package/src/LatticeEvalProject.ts +19 -10
package/src/LatticeEvalSuite.ts +3 -0
package/src/types.ts +0 -1

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @axiom-lattice/agent-eval@2.1.9 build /home/runner/work/agentic/agentic/packages/agent-eval
+> @axiom-lattice/agent-eval@2.1.10 build /home/runner/work/agentic/agentic/packages/agent-eval
 > tsup src/index.ts --format cjs,esm --dts --sourcemap
 [34mCLI[39m Building entry: src/index.ts
@@ -8,13 +8,13 @@
 [34mCLI[39m Target: es2020
 [34mCJS[39m Build start
 [34mESM[39m Build start
-[32mCJS[39m [1mdist/index.js     [22m[32m39.11 KB[39m
-[32mCJS[39m [1mdist/index.js.map [22m[32m67.54 KB[39m
-[32mCJS[39m ⚡️ Build success in 186ms
-[32mESM[39m [1mdist/index.mjs     [22m[32m36.92 KB[39m
-[32mESM[39m [1mdist/index.mjs.map [22m[32m67.28 KB[39m
-[32mESM[39m ⚡️ Build success in 191ms
+[32mCJS[39m [1mdist/index.js     [22m[32m41.54 KB[39m
+[32mCJS[39m [1mdist/index.js.map [22m[32m72.12 KB[39m
+[32mCJS[39m ⚡️ Build success in 155ms
+[32mESM[39m [1mdist/index.mjs     [22m[32m39.35 KB[39m
+[32mESM[39m [1mdist/index.mjs.map [22m[32m71.85 KB[39m
+[32mESM[39m ⚡️ Build success in 159ms
 [34mDTS[39m Build start
-[32mDTS[39m ⚡️ Build success in 8789ms
-[32mDTS[39m [1mdist/index.d.ts  [22m[32m10.68 KB[39m
-[32mDTS[39m [1mdist/index.d.mts [22m[32m10.68 KB[39m
+[32mDTS[39m ⚡️ Build success in 9956ms
+[32mDTS[39m [1mdist/index.d.ts  [22m[32m10.99 KB[39m
+[32mDTS[39m [1mdist/index.d.mts [22m[32m10.99 KB[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
 # @axiom-lattice/agent-eval
+## 2.1.10
+### Patch Changes
+- 773c03f: add skills
+- Updated dependencies [773c03f]
+  - @axiom-lattice/protocols@2.1.10
+  - @axiom-lattice/core@2.1.16
 ## 2.1.9
 ### Patch Changes

package/dist/index.d.mts CHANGED Viewed

@@ -10,7 +10,6 @@ type OutputFileContent = {
 };
 type OutputMessageContent = {
     type: "message_content";
-    message: string;
 };
 type OutputType = OutputFileContent | OutputMessageContent;
 interface LatticeEvalProjectType {
@@ -170,6 +169,11 @@ interface LatticeEvalCaseRunResult {
     judge_thread_id?: string;
     test_prompt?: string;
     final_output?: string;
+    messages?: Array<{
+        role: string;
+        content: string;
+        id?: string;
+    }>;
     logs: LatticeEvalLogEvent[];
 }
 /**
@@ -185,12 +189,18 @@ declare class LatticeEval {
     private lastTestPrompt?;
     private lastFinalOutput?;
     private lastDurationMs;
+    private lastMessages;
     getLastRunMeta(): {
         duration_ms: number;
         thread_id: string | undefined;
         judge_thread_id: string | undefined;
         test_prompt: string | undefined;
         final_output: string | undefined;
+        messages: {
+            role: string;
+            content: string;
+            id?: string;
+        }[];
     };
     /**
      * Create a new LatticeEval instance
@@ -255,6 +265,11 @@ interface CaseRunResult {
     judge_thread_id?: string;
     test_prompt?: string;
     final_output?: string;
+    messages?: Array<{
+        role: string;
+        content: string;
+        id?: string;
+    }>;
     error_stack?: string;
 }
 /**

package/dist/index.d.ts CHANGED Viewed

@@ -10,7 +10,6 @@ type OutputFileContent = {
 };
 type OutputMessageContent = {
     type: "message_content";
-    message: string;
 };
 type OutputType = OutputFileContent | OutputMessageContent;
 interface LatticeEvalProjectType {
@@ -170,6 +169,11 @@ interface LatticeEvalCaseRunResult {
     judge_thread_id?: string;
     test_prompt?: string;
     final_output?: string;
+    messages?: Array<{
+        role: string;
+        content: string;
+        id?: string;
+    }>;
     logs: LatticeEvalLogEvent[];
 }
 /**
@@ -185,12 +189,18 @@ declare class LatticeEval {
     private lastTestPrompt?;
     private lastFinalOutput?;
     private lastDurationMs;
+    private lastMessages;
     getLastRunMeta(): {
         duration_ms: number;
         thread_id: string | undefined;
         judge_thread_id: string | undefined;
         test_prompt: string | undefined;
         final_output: string | undefined;
+        messages: {
+            role: string;
+            content: string;
+            id?: string;
+        }[];
     };
     /**
      * Create a new LatticeEval instance
@@ -255,6 +265,11 @@ interface CaseRunResult {
     judge_thread_id?: string;
     test_prompt?: string;
     final_output?: string;
+    messages?: Array<{
+        role: string;
+        content: string;
+        id?: string;
+    }>;
     error_stack?: string;
 }
 /**

package/dist/index.js CHANGED Viewed

@@ -50,6 +50,7 @@ var LatticeEval = class {
   constructor(config) {
     this.inMemoryLogs = [];
     this.lastDurationMs = 0;
+    this.lastMessages = [];
     this.config = config;
     this.baseUrl = this.config.base_url;
     this.verbose = this.config.verbose ?? true;
@@ -60,7 +61,8 @@ var LatticeEval = class {
       thread_id: this.lastThreadId,
       judge_thread_id: this.lastJudgeThreadId,
       test_prompt: this.lastTestPrompt,
-      final_output: this.lastFinalOutput
+      final_output: this.lastFinalOutput,
+      messages: this.lastMessages
     };
   }
   getInMemoryLogs() {
@@ -78,9 +80,19 @@ var LatticeEval = class {
     if (level === "error") {
       const keyInfo = this.getKeyInfo(data);
       console.log(`  \u2717 ${message}${keyInfo ? ` ${keyInfo}` : ""}`);
-    } else if (message.includes("Starting case") || message.includes("Case evaluation completed")) {
+    } else if (message.startsWith("Starting")) {
       const keyInfo = this.getKeyInfo(data);
       console.log(`  ${message}${keyInfo ? ` ${keyInfo}` : ""}`);
+    } else if (message.includes("Case evaluation completed")) {
+      const keyInfo = this.getKeyInfo(data);
+      const pass = data?.pass;
+      const summary = data?.summary;
+      const status = pass ? "\u2713 PASS" : "\u2717 FAIL";
+      const reason = summary || (pass ? "Test passed" : "Test failed");
+      console.log(`  ${status} ${message}${keyInfo ? ` ${keyInfo}` : ""}`);
+      if (reason) {
+        console.log(`    Reason: ${reason}`);
+      }
     }
   }
   log(message, data) {
@@ -216,11 +228,15 @@ var LatticeEval = class {
     this.lastTestPrompt = void 0;
     this.lastFinalOutput = void 0;
     this.lastDurationMs = 0;
-    this.log("Starting case evaluation", {
+    this.lastMessages = [];
+    const contentAssertion = evalCase.eval?.content_assertion || "";
+    const message = contentAssertion ? `Starting: ${contentAssertion}` : "Starting";
+    this.log(message, {
       case_id: evalCase.caseId,
       thread_id: threadId,
       steps_count: evalCase.steps?.length,
-      output_type: evalCase.output?.type
+      output_type: evalCase.output?.type,
+      content_assertion: contentAssertion
     });
     let currentThreadId = threadId;
     let lastResponseData = null;
@@ -233,6 +249,38 @@ var LatticeEval = class {
       );
       currentThreadId = result.threadId;
       lastResponseData = result.responseData;
+      const existingIds = new Set(this.lastMessages.map((m) => m.id).filter(Boolean));
+      if (result.responseData?.messages && Array.isArray(result.responseData.messages)) {
+        for (const msg of result.responseData.messages) {
+          if (msg && typeof msg === "object") {
+            const msgId = msg.id || msg.lc_id || msg._id;
+            if (msgId && existingIds.has(msgId)) {
+              continue;
+            }
+            const role = msg.role || msg.getType?.() || msg.type || "unknown";
+            let content = "";
+            if (typeof msg.content === "string") {
+              content = msg.content;
+            } else if (Array.isArray(msg.content)) {
+              content = msg.content.map(
+                (c) => typeof c === "string" ? c : c?.text || (typeof c === "object" ? JSON.stringify(c) : String(c))
+              ).join("\n");
+            } else if (msg.content && typeof msg.content === "object") {
+              content = msg.content.text || JSON.stringify(msg.content);
+            } else {
+              content = String(msg.content || "");
+            }
+            this.lastMessages.push({
+              role,
+              content,
+              id: msgId
+            });
+            if (msgId) {
+              existingIds.add(msgId);
+            }
+          }
+        }
+      }
     }
     const finalAgentId = evalCase.steps[evalCase.steps.length - 1]?.agent_id || "";
     this.log("All agent steps completed", {
@@ -424,10 +472,12 @@ ${rubricsSection}
         pass
       });
     }
+    const summary = parsedResult.summary || testResultContent || "";
     this.log("Case evaluation completed", {
       case_id: evalCase.caseId,
       pass,
-      final_score: finalScore
+      final_score: finalScore,
+      summary
     });
     const finishedAt = Date.now();
     this.lastDurationMs = finishedAt - startedAt;
@@ -466,6 +516,7 @@ async function evaluateLatticeCaseWithLogs(evalCase, config) {
       judge_thread_id: meta.judge_thread_id,
       test_prompt: meta.test_prompt,
       final_output: meta.final_output,
+      messages: meta.messages,
       logs: evaluator.getInMemoryLogs()
     };
   } catch (error) {
@@ -485,6 +536,7 @@ async function evaluateLatticeCaseWithLogs(evalCase, config) {
       judge_thread_id: meta.judge_thread_id,
       test_prompt: meta.test_prompt,
       final_output: meta.final_output,
+      messages: meta.messages,
       logs: evaluator.getInMemoryLogs()
     };
   }
@@ -632,6 +684,7 @@ var LatticeEvalSuite = class {
         judge_thread_id: run.judge_thread_id,
         test_prompt: run.test_prompt,
         final_output: run.final_output,
+        messages: run.messages,
         logs: run.logs
       };
     } catch (error) {
@@ -668,6 +721,7 @@ var LatticeEvalSuite = class {
           judge_thread_id: run.judge_thread_id,
           test_prompt: run.test_prompt,
           final_output: run.final_output,
+          messages: run.messages,
           logs: run.logs
         };
       } catch (error) {
@@ -936,12 +990,26 @@ Results saved to: ${batch_dir}`);
       lines.push(`\`\`\``);
       lines.push(``);
     }
+    if (payload.messages && Array.isArray(payload.messages) && payload.messages.length > 0) {
+      lines.push(`## Conversation Messages`);
+      lines.push(``);
+      for (let i = 0; i < payload.messages.length; i++) {
+        const msg = payload.messages[i];
+        const role = msg.role || "unknown";
+        const content = msg.content || "";
+        lines.push(`### Message ${i + 1} (${role})`);
+        lines.push(``);
+        lines.push(`\`\`\``);
+        lines.push(content);
+        lines.push(`\`\`\``);
+        lines.push(``);
+      }
+    }
     if (payload.finalOutput) {
       lines.push(`## Final Output`);
       lines.push(``);
       lines.push(`\`\`\``);
-      const output = payload.finalOutput.length > 5e3 ? payload.finalOutput.substring(0, 5e3) + "\n\n... (truncated, see JSON for full output)" : payload.finalOutput;
-      lines.push(output);
+      lines.push(payload.finalOutput);
       lines.push(`\`\`\``);
       lines.push(``);
     }
@@ -949,8 +1017,7 @@ Results saved to: ${batch_dir}`);
       lines.push(`## Test Prompt`);
       lines.push(``);
       lines.push(`\`\`\``);
-      const prompt = payload.testPrompt.length > 5e3 ? payload.testPrompt.substring(0, 5e3) + "\n\n... (truncated, see JSON for full prompt)" : payload.testPrompt;
-      lines.push(prompt);
+      lines.push(payload.testPrompt);
       lines.push(`\`\`\``);
       lines.push(``);
     }
@@ -1056,7 +1123,8 @@ Results saved to: ${batch_dir}`);
           threadId: r.thread_id,
           judgeThreadId: r.judge_thread_id,
           finalOutput: r.final_output,
-          testPrompt: r.test_prompt
+          testPrompt: r.test_prompt,
+          messages: r.messages
         };
         await (0, import_promises.writeFile)(jsonPath, JSON.stringify(payload, null, 2), "utf-8");
         const mdPath = import_path.default.join(individualDir, `${baseFilename}.md`);