npm - @agentica/benchmark - Versions diffs - 0.8.3 → 0.9.0-dev.20250302 - Mend

@agentica/benchmark 0.8.3 → 0.9.0-dev.20250302

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/LICENSE +21 -21
package/README.md +326 -324
package/lib/AgenticaCallBenchmark.d.ts +7 -6
package/lib/AgenticaCallBenchmark.js.map +1 -1
package/lib/AgenticaSelectBenchmark.d.ts +7 -6
package/lib/AgenticaSelectBenchmark.js.map +1 -1
package/lib/index.mjs +46 -1
package/lib/index.mjs.map +1 -1
package/lib/internal/AgenticaBenchmarkPredicator.d.ts +5 -4
package/lib/internal/AgenticaBenchmarkPredicator.js +74 -2
package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
package/lib/internal/AgenticaBenchmarkUtil.d.ts +2 -1
package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +2 -1
package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
package/lib/internal/AgenticaPromptReporter.d.ts +2 -1
package/lib/internal/AgenticaPromptReporter.js.map +1 -1
package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
package/lib/structures/IAgenticaBenchmarkExpected.d.ts +10 -9
package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +8 -7
package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +6 -5
package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +3 -2
package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -8
package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +6 -5
package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +3 -2
package/package.json +5 -5
package/src/AgenticaCallBenchmark.ts +268 -265
package/src/AgenticaSelectBenchmark.ts +256 -254
package/src/index.ts +3 -3
package/src/internal/AgenticaBenchmarkPredicator.ts +224 -216
package/src/internal/AgenticaBenchmarkUtil.ts +44 -40
package/src/internal/AgenticaCallBenchmarkReporter.ts +183 -180
package/src/internal/AgenticaPromptReporter.ts +46 -43
package/src/internal/AgenticaSelectBenchmarkReporter.ts +213 -210
package/src/structures/IAgenticaBenchmarkExpected.ts +68 -58
package/src/structures/IAgenticaCallBenchmarkEvent.ts +113 -109
package/src/structures/IAgenticaCallBenchmarkResult.ts +70 -69
package/src/structures/IAgenticaCallBenchmarkScenario.ts +43 -39
package/src/structures/IAgenticaSelectBenchmarkEvent.ts +114 -110
package/src/structures/IAgenticaSelectBenchmarkResult.ts +72 -69
package/src/structures/IAgenticaSelectBenchmarkScenario.ts +43 -39
package/src/utils/MathUtil.ts +3 -3

package/src/internal/AgenticaCallBenchmarkReporter.ts CHANGED Viewed

@@ -1,180 +1,183 @@
-import { IAgenticaTokenUsage } from "@agentica/core";
-import { IAgenticaCallBenchmarkEvent } from "../structures/IAgenticaCallBenchmarkEvent";
-import { IAgenticaCallBenchmarkResult } from "../structures/IAgenticaCallBenchmarkResult";
-import { MathUtil } from "../utils/MathUtil";
-import { AgenticaBenchmarkUtil } from "./AgenticaBenchmarkUtil";
-import { AgenticaPromptReporter } from "./AgenticaPromptReporter";
-export namespace AgenticaCallBenchmarkReporter {
-  export const markdown = (
-    result: IAgenticaCallBenchmarkResult,
-  ): Record<string, string> =>
-    Object.fromEntries([
-      ["./README.md", writeIndex(result)],
-      ...result.experiments
-        .map((exp) => [
-          [`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
-          ...exp.events.map((event, i) => [
-            `./${exp.scenario.name}/${i + 1}.${event.type}.md`,
-            writeExperimentEvent(event, i),
-          ]),
-        ])
-        .flat(),
-    ]);
-  const writeIndex = (result: IAgenticaCallBenchmarkResult): string => {
-    const events: IAgenticaCallBenchmarkEvent[] = result.experiments
-      .map((r) => r.events)
-      .flat();
-    const average: number =
-      events
-        .map((e) => e.completed_at.getTime() - e.started_at.getTime())
-        .reduce((a, b) => a + b, 0) / events.length;
-    const aggregate: IAgenticaTokenUsage.IComponent = result.usage.aggregate;
-    return [
-      "# LLM Function Call Benchmark",
-      "## Summary",
-      `  - Aggregation:`,
-      `    - Scenarios: #${result.experiments.length.toLocaleString()}`,
-      `    - Trial: ${events.length}`,
-      `    - Success: ${events.filter((e) => e.type === "success").length}`,
-      `    - Failure: ${events.filter((e) => e.type === "failure").length}`,
-      `    - Average Time: ${MathUtil.round(average).toLocaleString()} ms`,
-      `  - Token Usage`,
-      `    - Total: ${aggregate.total.toLocaleString()}`,
-      `    - Input`,
-      `      - Total: ${aggregate.input.total.toLocaleString()}`,
-      `      - Cached: ${aggregate.input.cached.toLocaleString()}`,
-      `    - Output:`,
-      `      - Total: ${aggregate.output.total.toLocaleString()}`,
-      `      - Reasoning: ${aggregate.output.reasoning.toLocaleString()}`,
-      `      - Accepted Prediction: ${aggregate.output.accepted_prediction.toLocaleString()}`,
-      `      - Rejected Prediction: ${aggregate.output.rejected_prediction.toLocaleString()}`,
-      "",
-      "## Experiments",
-      " Name | Select | Call | Time/Avg ",
-      ":-----|:-------|:-----|----------:",
-      ...result.experiments.map((exp) =>
-        [
-          `[${exp.scenario.name}](./${exp.scenario.name}/README.md)`,
-          drawStatus(
-            exp.events,
-            (e) => e.type !== "error" && e.select === true,
-          ),
-          drawStatus(exp.events, (e) => e.type !== "error" && e.call === true),
-          `${MathUtil.round(
-            exp.events
-              .map((e) => e.completed_at.getTime() - e.started_at.getTime())
-              .reduce((a, b) => a + b, 0) / exp.events.length,
-          ).toLocaleString()} ms`,
-        ].join(" | "),
-      ),
-    ].join("\n");
-  };
-  const writeExperimentIndex = (
-    exp: IAgenticaCallBenchmarkResult.IExperiment,
-  ): string => {
-    return [
-      `# ${exp.scenario.name}`,
-      "## Summary",
-      `  - Scenarios: #${exp.events.length.toLocaleString()}`,
-      `  - Success: ${exp.events.filter((e) => e.type === "success").length}`,
-      `  - Failure: ${exp.events.filter((e) => e.type === "failure").length}`,
-      `  - Average Time: ${MathUtil.round(
-        exp.events
-          .map((e) => e.completed_at.getTime() - e.started_at.getTime())
-          .reduce((a, b) => a + b, 0) / exp.events.length,
-      ).toLocaleString()} ms`,
-      "",
-      "## Events",
-      " Name | Type | Time",
-      ":-----|:-----|----:",
-      ...exp.events.map((e, i) =>
-        [
-          `[${i + 1}.](./${i + 1}.${e.type}.md)`,
-          e.type,
-          `${MathUtil.round(e.completed_at.getTime() - e.started_at.getTime())} ms`,
-        ].join(" | "),
-      ),
-      "",
-      "## Scenario",
-      "### User Prompt",
-      exp.scenario.text,
-      "",
-      "### Expected",
-      "```json",
-      JSON.stringify(
-        AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected),
-        null,
-        2,
-      ),
-      "```",
-    ].join("\n");
-  };
-  const writeExperimentEvent = (
-    event: IAgenticaCallBenchmarkEvent,
-    index: number,
-  ): string => {
-    return [
-      `# ${index + 1}. ${event.type}`,
-      "## Summary",
-      `  - Name: ${event.scenario.name}`,
-      `  - Type: ${event.type}`,
-      `  - Time: ${MathUtil.round(
-        event.completed_at.getTime() - event.started_at.getTime(),
-      ).toLocaleString()} ms`,
-      ...(event.type !== "error"
-        ? [
-            `  - Select: ${event.select ? "✅" : "❌"}`,
-            `  - Call: ${event.call ? "✅" : "❌"}`,
-          ]
-        : []),
-      `  - Token Usage: ${event.usage.toLocaleString()}`,
-      "",
-      "## Scenario",
-      "### User Prompt",
-      event.scenario.text,
-      "",
-      "### Expected",
-      "```json",
-      JSON.stringify(
-        AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected),
-        null,
-        2,
-      ),
-      "```",
-      "",
-      "## Prompt Histories",
-      ...event.prompts.map(AgenticaPromptReporter.markdown),
-      "",
-      ...(event.type === "error"
-        ? [
-            "## Error",
-            "```json",
-            JSON.stringify(
-              AgenticaBenchmarkUtil.errorToJson(event.error),
-              null,
-              2,
-            ),
-            "```",
-          ]
-        : []),
-    ].join("\n");
-  };
-  const drawStatus = (
-    events: IAgenticaCallBenchmarkEvent[],
-    success: (e: IAgenticaCallBenchmarkEvent) => boolean,
-  ): string => {
-    const count: number = Math.floor(
-      (events.filter(success).length / events.length) * 10,
-    );
-    return (
-      new Array(count).fill("■").join("") +
-      new Array(10 - count).fill("□").join("")
-    );
-  };
-}
+import { IAgenticaTokenUsage } from "@agentica/core";
+import { ILlmSchema } from "@samchon/openapi";
+import { IAgenticaCallBenchmarkEvent } from "../structures/IAgenticaCallBenchmarkEvent";
+import { IAgenticaCallBenchmarkResult } from "../structures/IAgenticaCallBenchmarkResult";
+import { MathUtil } from "../utils/MathUtil";
+import { AgenticaBenchmarkUtil } from "./AgenticaBenchmarkUtil";
+import { AgenticaPromptReporter } from "./AgenticaPromptReporter";
+export namespace AgenticaCallBenchmarkReporter {
+  export const markdown = <Model extends ILlmSchema.Model>(
+    result: IAgenticaCallBenchmarkResult<Model>,
+  ): Record<string, string> =>
+    Object.fromEntries([
+      ["./README.md", writeIndex<Model>(result)],
+      ...result.experiments
+        .map((exp) => [
+          [`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
+          ...exp.events.map((event, i) => [
+            `./${exp.scenario.name}/${i + 1}.${event.type}.md`,
+            writeExperimentEvent(event, i),
+          ]),
+        ])
+        .flat(),
+    ]);
+  const writeIndex = <Model extends ILlmSchema.Model>(
+    result: IAgenticaCallBenchmarkResult<Model>,
+  ): string => {
+    const events: IAgenticaCallBenchmarkEvent<Model>[] = result.experiments
+      .map((r) => r.events)
+      .flat();
+    const average: number =
+      events
+        .map((e) => e.completed_at.getTime() - e.started_at.getTime())
+        .reduce((a, b) => a + b, 0) / events.length;
+    const aggregate: IAgenticaTokenUsage.IComponent = result.usage.aggregate;
+    return [
+      "# LLM Function Call Benchmark",
+      "## Summary",
+      `  - Aggregation:`,
+      `    - Scenarios: #${result.experiments.length.toLocaleString()}`,
+      `    - Trial: ${events.length}`,
+      `    - Success: ${events.filter((e) => e.type === "success").length}`,
+      `    - Failure: ${events.filter((e) => e.type === "failure").length}`,
+      `    - Average Time: ${MathUtil.round(average).toLocaleString()} ms`,
+      `  - Token Usage`,
+      `    - Total: ${aggregate.total.toLocaleString()}`,
+      `    - Input`,
+      `      - Total: ${aggregate.input.total.toLocaleString()}`,
+      `      - Cached: ${aggregate.input.cached.toLocaleString()}`,
+      `    - Output:`,
+      `      - Total: ${aggregate.output.total.toLocaleString()}`,
+      `      - Reasoning: ${aggregate.output.reasoning.toLocaleString()}`,
+      `      - Accepted Prediction: ${aggregate.output.accepted_prediction.toLocaleString()}`,
+      `      - Rejected Prediction: ${aggregate.output.rejected_prediction.toLocaleString()}`,
+      "",
+      "## Experiments",
+      " Name | Select | Call | Time/Avg ",
+      ":-----|:-------|:-----|----------:",
+      ...result.experiments.map((exp) =>
+        [
+          `[${exp.scenario.name}](./${exp.scenario.name}/README.md)`,
+          drawStatus(
+            exp.events,
+            (e) => e.type !== "error" && e.select === true,
+          ),
+          drawStatus(exp.events, (e) => e.type !== "error" && e.call === true),
+          `${MathUtil.round(
+            exp.events
+              .map((e) => e.completed_at.getTime() - e.started_at.getTime())
+              .reduce((a, b) => a + b, 0) / exp.events.length,
+          ).toLocaleString()} ms`,
+        ].join(" | "),
+      ),
+    ].join("\n");
+  };
+  const writeExperimentIndex = <Model extends ILlmSchema.Model>(
+    exp: IAgenticaCallBenchmarkResult.IExperiment<Model>,
+  ): string => {
+    return [
+      `# ${exp.scenario.name}`,
+      "## Summary",
+      `  - Scenarios: #${exp.events.length.toLocaleString()}`,
+      `  - Success: ${exp.events.filter((e) => e.type === "success").length}`,
+      `  - Failure: ${exp.events.filter((e) => e.type === "failure").length}`,
+      `  - Average Time: ${MathUtil.round(
+        exp.events
+          .map((e) => e.completed_at.getTime() - e.started_at.getTime())
+          .reduce((a, b) => a + b, 0) / exp.events.length,
+      ).toLocaleString()} ms`,
+      "",
+      "## Events",
+      " Name | Type | Time",
+      ":-----|:-----|----:",
+      ...exp.events.map((e, i) =>
+        [
+          `[${i + 1}.](./${i + 1}.${e.type}.md)`,
+          e.type,
+          `${MathUtil.round(e.completed_at.getTime() - e.started_at.getTime())} ms`,
+        ].join(" | "),
+      ),
+      "",
+      "## Scenario",
+      "### User Prompt",
+      exp.scenario.text,
+      "",
+      "### Expected",
+      "```json",
+      JSON.stringify(
+        AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected),
+        null,
+        2,
+      ),
+      "```",
+    ].join("\n");
+  };
+  const writeExperimentEvent = <Model extends ILlmSchema.Model>(
+    event: IAgenticaCallBenchmarkEvent<Model>,
+    index: number,
+  ): string => {
+    return [
+      `# ${index + 1}. ${event.type}`,
+      "## Summary",
+      `  - Name: ${event.scenario.name}`,
+      `  - Type: ${event.type}`,
+      `  - Time: ${MathUtil.round(
+        event.completed_at.getTime() - event.started_at.getTime(),
+      ).toLocaleString()} ms`,
+      ...(event.type !== "error"
+        ? [
+            `  - Select: ${event.select ? "✅" : "❌"}`,
+            `  - Call: ${event.call ? "✅" : "❌"}`,
+          ]
+        : []),
+      `  - Token Usage: ${event.usage.toLocaleString()}`,
+      "",
+      "## Scenario",
+      "### User Prompt",
+      event.scenario.text,
+      "",
+      "### Expected",
+      "```json",
+      JSON.stringify(
+        AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected),
+        null,
+        2,
+      ),
+      "```",
+      "",
+      "## Prompt Histories",
+      ...event.prompts.map(AgenticaPromptReporter.markdown),
+      "",
+      ...(event.type === "error"
+        ? [
+            "## Error",
+            "```json",
+            JSON.stringify(
+              AgenticaBenchmarkUtil.errorToJson(event.error),
+              null,
+              2,
+            ),
+            "```",
+          ]
+        : []),
+    ].join("\n");
+  };
+  const drawStatus = <Model extends ILlmSchema.Model>(
+    events: IAgenticaCallBenchmarkEvent<Model>[],
+    success: (e: IAgenticaCallBenchmarkEvent<Model>) => boolean,
+  ): string => {
+    const count: number = Math.floor(
+      (events.filter(success).length / events.length) * 10,
+    );
+    return (
+      new Array(count).fill("■").join("") +
+      new Array(10 - count).fill("□").join("")
+    );
+  };
+}

package/src/internal/AgenticaPromptReporter.ts CHANGED Viewed

@@ -1,43 +1,46 @@
-import { IAgenticaPrompt } from "@agentica/core";
-export namespace AgenticaPromptReporter {
-  export const markdown = (p: IAgenticaPrompt): string => {
-    if (p.type === "text")
-      return [`### Text (${p.role})`, p.text, ""].join("\n");
-    else if (p.type === "select" || p.type === "cancel")
-      return [
-        `### ${p.type === "select" ? "Select" : "Cancel"}`,
-        ...p.operations
-          .map((op) => [
-            `#### ${op.name}`,
-            `  - controller: ${op.controller.name}`,
-            `  - function: ${op.function.name}`,
-            `  - reason: ${op.reason}`,
-            "",
-            ...(!!op.function.description?.length
-              ? [op.function.description, ""]
-              : []),
-          ])
-          .flat(),
-      ].join("\n");
-    else if (p.type === "describe")
-      return [
-        "### Describe",
-        ...p.executions.map((e) => `  - ${e.name}`),
-        "",
-        ...p.text.split("\n").map((s) => `> ${s}`),
-        "",
-      ].join("\n");
-    return [
-      "### Execute",
-      `  - name: ${p.name}`,
-      `  - controller: ${p.controller.name}`,
-      `  - function: ${p.function.name}`,
-      "",
-      "```json",
-      JSON.stringify(p.arguments, null, 2),
-      "```",
-      "",
-    ].join("\n");
-  };
-}
+import { IAgenticaPrompt } from "@agentica/core";
+import { ILlmSchema } from "@samchon/openapi";
+export namespace AgenticaPromptReporter {
+  export const markdown = <Model extends ILlmSchema.Model>(
+    p: IAgenticaPrompt<Model>,
+  ): string => {
+    if (p.type === "text")
+      return [`### Text (${p.role})`, p.text, ""].join("\n");
+    else if (p.type === "select" || p.type === "cancel")
+      return [
+        `### ${p.type === "select" ? "Select" : "Cancel"}`,
+        ...p.operations
+          .map((op) => [
+            `#### ${op.name}`,
+            `  - controller: ${op.controller.name}`,
+            `  - function: ${op.function.name}`,
+            `  - reason: ${op.reason}`,
+            "",
+            ...(!!op.function.description?.length
+              ? [op.function.description, ""]
+              : []),
+          ])
+          .flat(),
+      ].join("\n");
+    else if (p.type === "describe")
+      return [
+        "### Describe",
+        ...p.executions.map((e) => `  - ${e.name}`),
+        "",
+        ...p.text.split("\n").map((s) => `> ${s}`),
+        "",
+      ].join("\n");
+    return [
+      "### Execute",
+      `  - name: ${p.name}`,
+      `  - controller: ${p.controller.name}`,
+      `  - function: ${p.function.name}`,
+      "",
+      "```json",
+      JSON.stringify(p.arguments, null, 2),
+      "```",
+      "",
+    ].join("\n");
+  };
+}