npm - @agentica/benchmark - Versions diffs - 0.12.21 → 0.13.0 - Mend

@agentica/benchmark 0.12.21 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/README.md +39 -33
package/lib/AgenticaCallBenchmark.d.ts +12 -6
package/lib/AgenticaCallBenchmark.js +24 -18
package/lib/AgenticaCallBenchmark.js.map +1 -1
package/lib/AgenticaSelectBenchmark.d.ts +12 -6
package/lib/AgenticaSelectBenchmark.js +14 -12
package/lib/AgenticaSelectBenchmark.js.map +1 -1
package/lib/index.mjs +315 -236
package/lib/index.mjs.map +1 -1
package/lib/internal/AgenticaBenchmarkPredicator.d.ts +38 -29
package/lib/internal/AgenticaBenchmarkPredicator.js +100 -84
package/lib/internal/AgenticaBenchmarkPredicator.js.map +1 -1
package/lib/internal/AgenticaBenchmarkUtil.d.ts +21 -6
package/lib/internal/AgenticaBenchmarkUtil.js +39 -33
package/lib/internal/AgenticaBenchmarkUtil.js.map +1 -1
package/lib/internal/AgenticaCallBenchmarkReporter.d.ts +6 -5
package/lib/internal/AgenticaCallBenchmarkReporter.js +130 -126
package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
package/lib/internal/AgenticaPromptReporter.d.ts +13 -5
package/lib/internal/AgenticaPromptReporter.js +45 -41
package/lib/internal/AgenticaPromptReporter.js.map +1 -1
package/lib/internal/AgenticaSelectBenchmarkReporter.d.ts +3 -1
package/lib/internal/AgenticaSelectBenchmarkReporter.js +153 -150
package/lib/internal/AgenticaSelectBenchmarkReporter.js.map +1 -1
package/lib/structures/IAgenticaBenchmarkExpected.d.ts +8 -2
package/lib/structures/IAgenticaCallBenchmarkEvent.d.ts +9 -3
package/lib/structures/IAgenticaCallBenchmarkResult.d.ts +10 -4
package/lib/structures/IAgenticaCallBenchmarkScenario.d.ts +8 -2
package/lib/structures/IAgenticaSelectBenchmarkEvent.d.ts +9 -3
package/lib/structures/IAgenticaSelectBenchmarkResult.d.ts +10 -4
package/lib/structures/IAgenticaSelectBenchmarkScenario.d.ts +8 -2
package/lib/utils/MathUtil.d.ts +15 -3
package/lib/utils/MathUtil.js +15 -4
package/lib/utils/MathUtil.js.map +1 -1
package/package.json +12 -10
package/src/AgenticaCallBenchmark.ts +64 -45
package/src/AgenticaSelectBenchmark.ts +42 -30
package/src/internal/AgenticaBenchmarkPredicator.ts +208 -186
package/src/internal/AgenticaBenchmarkUtil.ts +58 -40
package/src/internal/AgenticaCallBenchmarkReporter.ts +180 -182
package/src/internal/AgenticaPromptReporter.ts +46 -33
package/src/internal/AgenticaSelectBenchmarkReporter.ts +205 -203
package/src/structures/IAgenticaBenchmarkExpected.ts +9 -2
package/src/structures/IAgenticaCallBenchmarkEvent.ts +9 -3
package/src/structures/IAgenticaCallBenchmarkResult.ts +10 -4
package/src/structures/IAgenticaCallBenchmarkScenario.ts +8 -2
package/src/structures/IAgenticaSelectBenchmarkEvent.ts +9 -3
package/src/structures/IAgenticaSelectBenchmarkResult.ts +10 -4
package/src/structures/IAgenticaSelectBenchmarkScenario.ts +8 -2
package/src/utils/MathUtil.ts +16 -3

package/src/AgenticaCallBenchmark.ts CHANGED Viewed

@@ -1,13 +1,20 @@
-import { Agentica, AgenticaTokenUsage } from "@agentica/core";
-import { ILlmSchema } from "@samchon/openapi";
-import { Semaphore } from "tstl";
-import { tags } from "typia";
+/**
+ * @module
+ * This file contains the implementation of the AgenticaCallBenchmark class.
+ *
+ * @author Wrtn Technologies
+ */
+import type { Agentica } from "@agentica/core";
+import type { ILlmSchema } from "@samchon/openapi";
+import type { tags } from "typia";
+import type { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
+import type { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
+import type { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
+import { AgenticaTokenUsage } from "@agentica/core";
+import { Semaphore } from "tstl";
 import { AgenticaBenchmarkPredicator } from "./internal/AgenticaBenchmarkPredicator";
 import { AgenticaCallBenchmarkReporter } from "./internal/AgenticaCallBenchmarkReporter";
-import { IAgenticaCallBenchmarkEvent } from "./structures/IAgenticaCallBenchmarkEvent";
-import { IAgenticaCallBenchmarkResult } from "./structures/IAgenticaCallBenchmarkResult";
-import { IAgenticaCallBenchmarkScenario } from "./structures/IAgenticaCallBenchmarkScenario";
 /**
  * LLM function calling selection benchmark.
@@ -72,37 +79,40 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
   ): Promise<IAgenticaCallBenchmarkResult<Model>> {
     const started_at: Date = new Date();
     const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
-    const experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[] =
-      await Promise.all(
-        this.scenarios_.map(async (scenario) => {
-          const events: IAgenticaCallBenchmarkEvent<Model>[] =
-            await Promise.all(
-              new Array(this.config_.repeat).fill(0).map(async () => {
-                await semaphore.acquire();
-                const e: IAgenticaCallBenchmarkEvent<Model> =
-                  await this.step(scenario);
-                await semaphore.release();
-                if (listener !== undefined) listener(e);
-                return e;
-              }),
-            );
-          return {
-            scenario,
-            events,
-            usage: events
-              .filter((e) => e.type !== "error")
-              .map((e) => e.usage)
-              .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
-          };
-        }),
-      );
+    const task = this.scenarios_.map(async (scenario) => {
+      const events: IAgenticaCallBenchmarkEvent<Model>[]
+        = await Promise.all(
+          Array.from({ length: this.config_.repeat }).map(async () => {
+            await semaphore.acquire();
+            const e: IAgenticaCallBenchmarkEvent<Model>
+              = await this.step(scenario);
+            await semaphore.release();
+            if (listener !== undefined) {
+              listener(e);
+            }
+            return e;
+          }),
+        );
+      return {
+        scenario,
+        events,
+        usage: events
+          .filter(e => e.type !== "error")
+          .map(e => e.usage)
+          .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
+      };
+    });
+    const experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[]
+      = await Promise.all(task);
     return (this.result_ = {
       experiments,
       started_at,
       completed_at: new Date(),
       usage: experiments
-        .map((p) => p.usage)
-        .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
+        .map(p => p.usage)
+        .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
     });
   }
@@ -125,8 +135,9 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
    * @returns Dictionary of markdown files.
    */
   public report(): Record<string, string> {
-    if (this.result_ === null)
+    if (this.result_ === null) {
       throw new Error("Benchmark is not executed yet.");
+    }
     return AgenticaCallBenchmarkReporter.markdown(this.result_);
   }
@@ -140,8 +151,8 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
         expected: scenario.expected,
         operations: agent
           .getPromptHistories()
-          .filter((p) => p.type === "execute")
-          .map((p) => p.operation),
+          .filter(p => p.type === "execute")
+          .map(p => p.operation),
         strict: false,
       });
     const out = (): IAgenticaCallBenchmarkEvent<Model> => {
@@ -149,10 +160,10 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
         expected: scenario.expected,
         operations: agent
           .getPromptHistories()
-          .filter((p) => p.type === "select")
-          .map((p) => p.selections)
+          .filter(p => p.type === "select")
+          .map(p => p.selections)
           .flat()
-          .map((p) => p.operation),
+          .map(p => p.operation),
         strict: false,
       });
       const call = success();
@@ -170,17 +181,25 @@ export class AgenticaCallBenchmark<Model extends ILlmSchema.Model> {
     try {
       await agent.conversate(scenario.text);
-      if (success()) return out();
+      if (success()) {
+        return out();
+      }
       for (let i: number = 0; i < this.config_.consent; ++i) {
-        const next: string | null =
-          await AgenticaBenchmarkPredicator.isNext(agent);
-        if (next === null) break;
+        const next: string | null
+          = await AgenticaBenchmarkPredicator.isNext(agent);
+        if (next === null) {
+          break;
+        }
         await agent.conversate(next);
-        if (success()) return out();
+        if (success()) {
+          return out();
+        }
       }
       return out();
-    } catch (error) {
+    }
+    catch (error) {
       return {
         type: "error",
         scenario,

package/src/AgenticaSelectBenchmark.ts CHANGED Viewed

@@ -1,21 +1,29 @@
-import {
+/**
+ * @module
+ * This file contains the implementation of the AgenticaSelectBenchmark class.
+ *
+ * @author Wrtn Technologies
+ */
+import type {
   Agentica,
   AgenticaContext,
   AgenticaOperationSelection,
   AgenticaPrompt,
+} from "@agentica/core";
+import type { ILlmSchema } from "@samchon/openapi";
+import type { tags } from "typia";
+import type { IAgenticaSelectBenchmarkEvent } from "./structures/IAgenticaSelectBenchmarkEvent";
+import type { IAgenticaSelectBenchmarkResult } from "./structures/IAgenticaSelectBenchmarkResult";
+import type { IAgenticaSelectBenchmarkScenario } from "./structures/IAgenticaSelectBenchmarkScenario";
+import {
   AgenticaTextPrompt,
   AgenticaTokenUsage,
 } from "@agentica/core";
 import { ChatGptSelectFunctionAgent } from "@agentica/core/src/chatgpt/ChatGptSelectFunctionAgent";
-import { ILlmSchema } from "@samchon/openapi";
 import { Semaphore } from "tstl";
-import { tags } from "typia";
 import { AgenticaBenchmarkPredicator } from "./internal/AgenticaBenchmarkPredicator";
 import { AgenticaSelectBenchmarkReporter } from "./internal/AgenticaSelectBenchmarkReporter";
-import { IAgenticaSelectBenchmarkEvent } from "./structures/IAgenticaSelectBenchmarkEvent";
-import { IAgenticaSelectBenchmarkResult } from "./structures/IAgenticaSelectBenchmarkResult";
-import { IAgenticaSelectBenchmarkScenario } from "./structures/IAgenticaSelectBenchmarkScenario";
 /**
  * LLM function calling selection benchmark.
@@ -78,17 +86,19 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
   ): Promise<IAgenticaSelectBenchmarkResult<Model>> {
     const started_at: Date = new Date();
     const semaphore: Semaphore = new Semaphore(this.config_.simultaneous);
-    const experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[] =
-      await Promise.all(
+    const experiments: IAgenticaSelectBenchmarkResult.IExperiment<Model>[]
+      = await Promise.all(
         this.scenarios_.map(async (scenario) => {
-          const events: IAgenticaSelectBenchmarkEvent<Model>[] =
-            await Promise.all(
-              new Array(this.config_.repeat).fill(0).map(async () => {
+          const events: IAgenticaSelectBenchmarkEvent<Model>[]
+            = await Promise.all(
+              Array.from({ length: this.config_.repeat }).map(async () => {
                 await semaphore.acquire();
-                const e: IAgenticaSelectBenchmarkEvent<Model> =
-                  await this.step(scenario);
+                const e: IAgenticaSelectBenchmarkEvent<Model>
+                  = await this.step(scenario);
                 await semaphore.release();
-                if (listener !== undefined) listener(e);
+                if (listener !== undefined) {
+                  listener(e);
+                }
                 return e;
               }),
             );
@@ -96,9 +106,9 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
             scenario,
             events,
             usage: events
-              .filter((e) => e.type !== "error")
-              .map((e) => e.usage)
-              .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
+              .filter(e => e.type !== "error")
+              .map(e => e.usage)
+              .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
           };
         }),
       );
@@ -107,8 +117,8 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
       started_at,
       completed_at: new Date(),
       usage: experiments
-        .map((p) => p.usage)
-        .reduce(AgenticaTokenUsage.plus, AgenticaTokenUsage.zero()),
+        .map(p => p.usage)
+        .reduce((acc, cur) => AgenticaTokenUsage.plus(acc, cur), AgenticaTokenUsage.zero()),
     });
   }
@@ -132,8 +142,9 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
    * @returns Dictionary of markdown files.
    */
   public report(): Record<string, string> {
-    if (this.result_ === null)
+    if (this.result_ === null) {
       throw new Error("Benchmark is not executed yet.");
+    }
     return AgenticaSelectBenchmarkReporter.markdown(this.result_);
   }
@@ -143,8 +154,8 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
     const started_at: Date = new Date();
     try {
       const usage: AgenticaTokenUsage = AgenticaTokenUsage.zero();
-      const prompts: AgenticaPrompt<Model>[] =
-        await ChatGptSelectFunctionAgent.execute({
+      const prompts: AgenticaPrompt<Model>[]
+        = await ChatGptSelectFunctionAgent.execute({
           ...this.agent_.getContext({
             prompt: new AgenticaTextPrompt({
               role: "user",
@@ -158,13 +169,13 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
           dispatch: async () => {},
         } satisfies AgenticaContext<Model>);
       const selected: AgenticaOperationSelection<Model>[] = prompts
-        .filter((p) => p.type === "select")
-        .map((p) => p.selections)
+        .filter(p => p.type === "select")
+        .map(p => p.selections)
         .flat();
       return {
         type: AgenticaBenchmarkPredicator.success({
           expected: scenario.expected,
-          operations: selected.map((s) => s.operation),
+          operations: selected.map(s => s.operation),
         })
           ? "success"
           : "failure",
@@ -172,16 +183,17 @@ export class AgenticaSelectBenchmark<Model extends ILlmSchema.Model> {
         selected,
         usage,
         assistantPrompts: prompts
-          .filter((p) => p.type === "text")
+          .filter(p => p.type === "text")
           .filter(
             (p): p is AgenticaTextPrompt<"assistant"> => p.role === "assistant",
           ),
         started_at,
         completed_at: new Date(),
       } satisfies
-        | IAgenticaSelectBenchmarkEvent.ISuccess<Model>
-        | IAgenticaSelectBenchmarkEvent.IFailure<Model>;
-    } catch (error) {
+      | IAgenticaSelectBenchmarkEvent.ISuccess<Model>
+      | IAgenticaSelectBenchmarkEvent.IFailure<Model>;
+    }
+    catch (error) {
       return {
         type: "error",
         scenario,