npm - @agentica/benchmark - Versions diffs - 0.12.21 → 0.13.0 - Mend

@agentica/benchmark 0.12.21 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/README.md +39 -33
package/lib/AgenticaCallBenchmark.d.ts +12 -6
package/lib/AgenticaCallBenchmark.js +24 -18
package/lib/AgenticaCallBenchmark.js.map +1 -1
package/lib/AgenticaSelectBenchmark.d.ts +12 -6
package/lib/AgenticaSelectBenchmark.js +14 -12
package/lib/AgenticaSelectBenchmark.js.map +1 -1
package/lib/index.mjs +315 -236
package/lib/index.mjs.map +1 -1
package/lib/internal/AgenticaBenchmarkPredicator.d.ts +38 -29
package/lib/internal/AgenticaBenchmarkPredicator.js +100 -84
package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
package/lib/internal/AgenticaBenchmarkUtil.d.ts +21 -6
package/lib/internal/AgenticaBenchmarkUtil.js +39 -33
package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +6 -5
package/lib/internal/AgenticaCallBenchmarkReporter.js +130 -126
package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
package/lib/internal/AgenticaPromptReporter.d.ts +13 -5
package/lib/internal/AgenticaPromptReporter.js +45 -41
package/lib/internal/AgenticaPromptReporter.js.map +1 -1
package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +3 -1
package/lib/internal/AgenticaSelectBenchmarkReporter.js +153 -150
package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
package/lib/structures/IAgenticaBenchmarkExpected.d.ts +8 -2
package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +9 -3
package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +10 -4
package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +8 -2
package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -3
package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +10 -4
package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +8 -2
package/lib/utils/MathUtil.d.ts +15 -3
package/lib/utils/MathUtil.js +15 -4
package/lib/utils/MathUtil.js.map +1 -1
package/package.json +12 -10
package/src/AgenticaCallBenchmark.ts +64 -45
package/src/AgenticaSelectBenchmark.ts +42 -30
package/src/internal/AgenticaBenchmarkPredicator.ts +208 -186
package/src/internal/AgenticaBenchmarkUtil.ts +58 -40
package/src/internal/AgenticaCallBenchmarkReporter.ts +180 -182
package/src/internal/AgenticaPromptReporter.ts +46 -33
package/src/internal/AgenticaSelectBenchmarkReporter.ts +205 -203
package/src/structures/IAgenticaBenchmarkExpected.ts +9 -2
package/src/structures/IAgenticaCallBenchmarkEvent.ts +9 -3
package/src/structures/IAgenticaCallBenchmarkResult.ts +10 -4
package/src/structures/IAgenticaCallBenchmarkScenario.ts +8 -2
package/src/structures/IAgenticaSelectBenchmarkEvent.ts +9 -3
package/src/structures/IAgenticaSelectBenchmarkResult.ts +10 -4
package/src/structures/IAgenticaSelectBenchmarkScenario.ts +8 -2
package/src/utils/MathUtil.ts +16 -3

package/src/internal/AgenticaCallBenchmarkReporter.ts CHANGED Viewed

@@ -1,193 +1,191 @@
-import { AgenticaTokenUsage } from "@agentica/core";
-import { ILlmSchema } from "@samchon/openapi";
+/**
+ * @module
+ * This file contains functions to work with AgenticaCallBenchmarkReporter.
+ *
+ * @author Wrtn Technologies
+ */
+import type { AgenticaTokenUsage } from "@agentica/core";
+import type { ILlmSchema } from "@samchon/openapi";
-import { IAgenticaCallBenchmarkEvent } from "../structures/IAgenticaCallBenchmarkEvent";
-import { IAgenticaCallBenchmarkResult } from "../structures/IAgenticaCallBenchmarkResult";
+import type { IAgenticaCallBenchmarkEvent } from "../structures/IAgenticaCallBenchmarkEvent";
+import type { IAgenticaCallBenchmarkResult } from "../structures/IAgenticaCallBenchmarkResult";
 import { MathUtil } from "../utils/MathUtil";
 import { AgenticaBenchmarkUtil } from "./AgenticaBenchmarkUtil";
 import { AgenticaPromptReporter } from "./AgenticaPromptReporter";
-export namespace AgenticaCallBenchmarkReporter {
-  export const markdown = <Model extends ILlmSchema.Model>(
-    result: IAgenticaCallBenchmarkResult<Model>,
-  ): Record<string, string> =>
-    Object.fromEntries([
-      ["./README.md", writeIndex<Model>(result)],
-      ...result.experiments
-        .map((exp) => [
-          [`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
-          ...exp.events.map((event, i) => [
-            `./${exp.scenario.name}/${i + 1}.${event.type}.md`,
-            writeExperimentEvent(event, i),
-          ]),
-        ])
-        .flat(),
-    ]);
+export const AgenticaCallBenchmarkReporter = {
+  markdown,
+};
-  const writeIndex = <Model extends ILlmSchema.Model>(
-    result: IAgenticaCallBenchmarkResult<Model>,
-  ): string => {
-    const events: IAgenticaCallBenchmarkEvent<Model>[] = result.experiments
-      .map((r) => r.events)
-      .flat();
-    const average: number =
-      events
-        .map((e) => e.completed_at.getTime() - e.started_at.getTime())
-        .reduce((a, b) => a + b, 0) / events.length;
-    const aggregate: AgenticaTokenUsage.IComponent = result.usage.aggregate;
-    return [
-      "# LLM Function Call Benchmark",
-      "## Summary",
-      `  - Aggregation:`,
-      `    - Scenarios: #${result.experiments.length.toLocaleString()}`,
-      `    - Trial: ${events.length}`,
-      `    - Success: ${events.filter((e) => e.type === "success").length}`,
-      `    - Failure: ${events.filter((e) => e.type === "failure").length}`,
-      `    - Average Time: ${MathUtil.round(average).toLocaleString()} ms`,
-      `  - Token Usage`,
-      `    - Total: ${aggregate.total.toLocaleString()}`,
-      `    - Input`,
-      `      - Total: ${aggregate.input.total.toLocaleString()}`,
-      `      - Cached: ${aggregate.input.cached.toLocaleString()}`,
-      `    - Output:`,
-      `      - Total: ${aggregate.output.total.toLocaleString()}`,
-      `      - Reasoning: ${aggregate.output.reasoning.toLocaleString()}`,
-      `      - Accepted Prediction: ${aggregate.output.accepted_prediction.toLocaleString()}`,
-      `      - Rejected Prediction: ${aggregate.output.rejected_prediction.toLocaleString()}`,
-      "",
-      "## Experiments",
-      " Name | Select | Call | Time/Avg ",
-      ":-----|:-------|:-----|----------:",
-      ...result.experiments.map((exp) =>
-        [
-          `[${exp.scenario.name}](./${exp.scenario.name}/README.md)`,
-          drawStatus(
-            exp.events,
-            (e) => e.type !== "error" && e.select === true,
-          ),
-          drawStatus(exp.events, (e) => e.type !== "error" && e.call === true),
-          `${MathUtil.round(
-            exp.events
-              .map((e) => e.completed_at.getTime() - e.started_at.getTime())
-              .reduce((a, b) => a + b, 0) / exp.events.length,
-          ).toLocaleString()} ms`,
-        ].join(" | "),
-      ),
-    ].join("\n");
-  };
+export function markdown<Model extends ILlmSchema.Model>(result: IAgenticaCallBenchmarkResult<Model>): Record<string, string> {
+  return Object.fromEntries([
+    ["./README.md", writeIndex<Model>(result)],
+    ...result.experiments
+      .map<[string, string][]>(exp => [
+        [`./${exp.scenario.name}/README.md`, writeExperimentIndex(exp)],
+        ...exp.events.map<[string, string]>((event, i) => [
+          `./${exp.scenario.name}/${i + 1}.${event.type}.md`,
+          writeExperimentEvent(event, i),
+        ]),
+      ])
+      .flat(),
+  ]);
+}
-  const writeExperimentIndex = <Model extends ILlmSchema.Model>(
-    exp: IAgenticaCallBenchmarkResult.IExperiment<Model>,
-  ): string => {
-    return [
-      `# ${exp.scenario.name}`,
-      "## Summary",
-      `  - Scenarios: #${exp.events.length.toLocaleString()}`,
-      `  - Success: ${exp.events.filter((e) => e.type === "success").length}`,
-      `  - Failure: ${exp.events.filter((e) => e.type === "failure").length}`,
-      `  - Average Time: ${MathUtil.round(
-        exp.events
-          .map((e) => e.completed_at.getTime() - e.started_at.getTime())
-          .reduce((a, b) => a + b, 0) / exp.events.length,
-      ).toLocaleString()} ms`,
-      "",
-      "## Events",
-      " Name | Type | Time",
-      ":-----|:-----|----:",
-      ...exp.events.map((e, i) =>
-        [
-          `[${i + 1}.](./${i + 1}.${e.type}.md)`,
-          e.type,
-          `${MathUtil.round(e.completed_at.getTime() - e.started_at.getTime())} ms`,
-        ].join(" | "),
-      ),
-      "",
-      "## Scenario",
-      "### User Prompt",
-      exp.scenario.text,
-      "",
-      "### Expected",
-      "```json",
-      JSON.stringify(
-        AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected),
-        null,
-        2,
-      ),
-      "```",
-    ].join("\n");
-  };
+function writeIndex<Model extends ILlmSchema.Model>(result: IAgenticaCallBenchmarkResult<Model>): string {
+  const events: IAgenticaCallBenchmarkEvent<Model>[] = result.experiments
+    .map(r => r.events)
+    .flat();
+  const average: number
+    = events
+      .map(e => e.completed_at.getTime() - e.started_at.getTime())
+      .reduce((a, b) => a + b, 0) / events.length;
+  const aggregate: AgenticaTokenUsage.IComponent = result.usage.aggregate;
+  return [
+    "# LLM Function Call Benchmark",
+    "## Summary",
+    `  - Aggregation:`,
+    `    - Scenarios: #${result.experiments.length.toLocaleString()}`,
+    `    - Trial: ${events.length}`,
+    `    - Success: ${events.filter(e => e.type === "success").length}`,
+    `    - Failure: ${events.filter(e => e.type === "failure").length}`,
+    `    - Average Time: ${MathUtil.round(average).toLocaleString()} ms`,
+    `  - Token Usage`,
+    `    - Total: ${aggregate.total.toLocaleString()}`,
+    `    - Input`,
+    `      - Total: ${aggregate.input.total.toLocaleString()}`,
+    `      - Cached: ${aggregate.input.cached.toLocaleString()}`,
+    `    - Output:`,
+    `      - Total: ${aggregate.output.total.toLocaleString()}`,
+    `      - Reasoning: ${aggregate.output.reasoning.toLocaleString()}`,
+    `      - Accepted Prediction: ${aggregate.output.accepted_prediction.toLocaleString()}`,
+    `      - Rejected Prediction: ${aggregate.output.rejected_prediction.toLocaleString()}`,
+    "",
+    "## Experiments",
+    " Name | Select | Call | Time/Avg ",
+    ":-----|:-------|:-----|----------:",
+    ...result.experiments.map(exp =>
+      [
+        `[${exp.scenario.name}](./${exp.scenario.name}/README.md)`,
+        drawStatus(
+          exp.events,
+          e => e.type !== "error" && e.select === true,
+        ),
+        drawStatus(exp.events, e => e.type !== "error" && e.call === true),
+        `${MathUtil.round(
+          exp.events
+            .map(e => e.completed_at.getTime() - e.started_at.getTime())
+            .reduce((a, b) => a + b, 0) / exp.events.length,
+        ).toLocaleString()} ms`,
+      ].join(" | "),
+    ),
+  ].join("\n");
+}
-  const writeExperimentEvent = <Model extends ILlmSchema.Model>(
-    event: IAgenticaCallBenchmarkEvent<Model>,
-    index: number,
-  ): string => {
-    return [
-      `# ${index + 1}. ${event.type}`,
-      "## Summary",
-      `  - Name: ${event.scenario.name}`,
-      `  - Type: ${event.type}`,
-      `  - Time: ${MathUtil.round(
-        event.completed_at.getTime() - event.started_at.getTime(),
-      ).toLocaleString()} ms`,
-      ...(event.type !== "error"
-        ? [
-            `  - Select: ${event.select ? "✅" : "❌"}`,
-            `  - Call: ${event.call ? "✅" : "❌"}`,
-          ]
-        : []),
-      `  - Token Usage:`,
-      `    - Total: ${JSON.stringify(event.usage.aggregate.total)}`,
-      `    - Input`,
-      `      - Total: ${event.usage.aggregate.input.total}`,
-      `      - Cached: ${event.usage.aggregate.input.cached}`,
-      `    - Output:`,
-      `      - Total: ${event.usage.aggregate.output.total}`,
-      `      - Accepted Prediction: ${event.usage.aggregate.output.accepted_prediction}`,
-      `      - Reasoning: ${event.usage.aggregate.output.reasoning}`,
-      `      - Rejected Prediction: ${event.usage.aggregate.output.rejected_prediction}`,
+function writeExperimentIndex<Model extends ILlmSchema.Model>(exp: IAgenticaCallBenchmarkResult.IExperiment<Model>): string {
+  return [
+    `# ${exp.scenario.name}`,
+    "## Summary",
+    `  - Scenarios: #${exp.events.length.toLocaleString()}`,
+    `  - Success: ${exp.events.filter(e => e.type === "success").length}`,
+    `  - Failure: ${exp.events.filter(e => e.type === "failure").length}`,
+    `  - Average Time: ${MathUtil.round(
+      exp.events
+        .map(e => e.completed_at.getTime() - e.started_at.getTime())
+        .reduce((a, b) => a + b, 0) / exp.events.length,
+    ).toLocaleString()} ms`,
+    "",
+    "## Events",
+    " Name | Type | Time",
+    ":-----|:-----|----:",
+    ...exp.events.map((e, i) =>
+      [
+        `[${i + 1}.](./${i + 1}.${e.type}.md)`,
+        e.type,
+        `${MathUtil.round(e.completed_at.getTime() - e.started_at.getTime())} ms`,
+      ].join(" | "),
+    ),
+    "",
+    "## Scenario",
+    "### User Prompt",
+    exp.scenario.text,
+    "",
+    "### Expected",
+    "```json",
+    JSON.stringify(
+      AgenticaBenchmarkUtil.expectedToJson(exp.scenario.expected),
+      null,
+      2,
+    ),
+    "```",
+  ].join("\n");
+}
-      "",
-      "## Scenario",
-      "### User Prompt",
-      event.scenario.text,
-      "",
-      "### Expected",
-      "```json",
-      JSON.stringify(
-        AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected),
-        null,
-        2,
-      ),
-      "```",
-      "",
-      "## Prompt Histories",
-      ...event.prompts.map(AgenticaPromptReporter.markdown),
-      "",
-      ...(event.type === "error"
-        ? [
-            "## Error",
-            "```json",
-            JSON.stringify(
-              AgenticaBenchmarkUtil.errorToJson(event.error),
-              null,
-              2,
-            ),
-            "```",
-          ]
-        : []),
-    ].join("\n");
-  };
+function writeExperimentEvent<Model extends ILlmSchema.Model>(event: IAgenticaCallBenchmarkEvent<Model>, index: number): string {
+  return [
+    `# ${index + 1}. ${event.type}`,
+    "## Summary",
+    `  - Name: ${event.scenario.name}`,
+    `  - Type: ${event.type}`,
+    `  - Time: ${MathUtil.round(
+      event.completed_at.getTime() - event.started_at.getTime(),
+    ).toLocaleString()} ms`,
+    ...(event.type !== "error"
+      ? [
+          `  - Select: ${event.select ? "✅" : "❌"}`,
+          `  - Call: ${event.call ? "✅" : "❌"}`,
+        ]
+      : []),
+    `  - Token Usage:`,
+    `    - Total: ${JSON.stringify(event.usage.aggregate.total)}`,
+    `    - Input`,
+    `      - Total: ${event.usage.aggregate.input.total}`,
+    `      - Cached: ${event.usage.aggregate.input.cached}`,
+    `    - Output:`,
+    `      - Total: ${event.usage.aggregate.output.total}`,
+    `      - Accepted Prediction: ${event.usage.aggregate.output.accepted_prediction}`,
+    `      - Reasoning: ${event.usage.aggregate.output.reasoning}`,
+    `      - Rejected Prediction: ${event.usage.aggregate.output.rejected_prediction}`,
+    "",
+    "## Scenario",
+    "### User Prompt",
+    event.scenario.text,
+    "",
+    "### Expected",
+    "```json",
+    JSON.stringify(
+      AgenticaBenchmarkUtil.expectedToJson(event.scenario.expected),
+      null,
+      2,
+    ),
+    "```",
+    "",
+    "## Prompt Histories",
+    ...event.prompts.map(AgenticaPromptReporter.markdown),
+    "",
+    ...(event.type === "error"
+      ? [
+          "## Error",
+          "```json",
+          JSON.stringify(
+            AgenticaBenchmarkUtil.errorToJson(event.error),
+            null,
+            2,
+          ),
+          "```",
+        ]
+      : []),
+  ].join("\n");
+}
-  const drawStatus = <Model extends ILlmSchema.Model>(
-    events: IAgenticaCallBenchmarkEvent<Model>[],
-    success: (e: IAgenticaCallBenchmarkEvent<Model>) => boolean,
-  ): string => {
-    const count: number = Math.floor(
-      (events.filter(success).length / events.length) * 10,
-    );
-    return (
-      new Array(count).fill("■").join("") +
-      new Array(10 - count).fill("□").join("")
-    );
-  };
+function drawStatus<Model extends ILlmSchema.Model>(events: IAgenticaCallBenchmarkEvent<Model>[], success: (e: IAgenticaCallBenchmarkEvent<Model>) => boolean): string {
+  const count: number = Math.floor(
+    (events.filter(success).length / events.length) * 10,
+  );
+  // @TODO use String.prototype.padStart, padEnd or String.prototype.repeat
+  return (
+    Array.from({ length: count }).fill("■").join("")
+    + Array.from({ length: 10 - count }).fill("□").join("")
+  );
 }

package/src/internal/AgenticaPromptReporter.ts CHANGED Viewed

@@ -1,46 +1,59 @@
-import { AgenticaPrompt } from "@agentica/core";
-import { ILlmSchema } from "@samchon/openapi";
+/**
+ * @module
+ * This file contains functions to work with AgenticaPromptReporter.
+ *
+ * @author Wrtn Technologies
+ */
+import type { AgenticaPrompt } from "@agentica/core";
+import type { ILlmSchema } from "@samchon/openapi";
-export namespace AgenticaPromptReporter {
-  export const markdown = <Model extends ILlmSchema.Model>(
-    p: AgenticaPrompt<Model>,
-  ): string => {
-    if (p.type === "text")
-      return [`### Text (${p.role})`, p.text, ""].join("\n");
-    else if (p.type === "select" || p.type === "cancel")
-      return [
-        `### ${p.type === "select" ? "Select" : "Cancel"}`,
-        ...p.selections
-          .map((s) => [
+export const AgenticaPromptReporter = {
+  markdown,
+};
+function markdown<Model extends ILlmSchema.Model>(p: AgenticaPrompt<Model>): string {
+  // @TODO use switch statement
+  if (p.type === "text") {
+    return [`### Text (${p.role})`, p.text, ""].join("\n");
+  }
+  else if (p.type === "select" || p.type === "cancel") {
+    return [
+      `### ${p.type === "select" ? "Select" : "Cancel"}`,
+      ...p.selections
+        .flatMap((s) => {
+          const functionDescriptionCount = s.operation.function.description?.length ?? 0;
+          return [
             `#### ${s.operation.name}`,
             `  - controller: ${s.operation.controller.name}`,
             `  - function: ${s.operation.function.name}`,
             `  - reason: ${s.reason}`,
             "",
-            ...(!!s.operation.function.description?.length
+            ...(functionDescriptionCount > 0
               ? [s.operation.function.description, ""]
               : []),
-          ])
-          .flat(),
-      ].join("\n");
-    else if (p.type === "describe")
-      return [
-        "### Describe",
-        ...p.executes.map((e) => `  - ${e.operation.name}`),
-        "",
-        ...p.text.split("\n").map((s) => `> ${s}`),
-        "",
-      ].join("\n");
+          ];
+        }),
+    ].join("\n");
+  }
+  else if (p.type === "describe") {
     return [
-      "### Execute",
-      `  - name: ${p.operation.name}`,
-      `  - controller: ${p.operation.controller.name}`,
-      `  - function: ${p.operation.function.name}`,
+      "### Describe",
+      ...p.executes.map(e => `  - ${e.operation.name}`),
       "",
-      "```json",
-      JSON.stringify(p.arguments, null, 2),
-      "```",
+      ...p.text.split("\n").map(s => `> ${s}`),
       "",
     ].join("\n");
-  };
+  }
+  return [
+    "### Execute",
+    `  - name: ${p.operation.name}`,
+    `  - controller: ${p.operation.controller.name}`,
+    `  - function: ${p.operation.function.name}`,
+    "",
+    "```json",
+    JSON.stringify(p.arguments, null, 2),
+    "```",
+    "",
+  ].join("\n");
 }