npm - @agentica/benchmark - Versions diffs - 0.12.0 → 0.12.2-dev.20250314 - Mend

@agentica/benchmark 0.12.0 → 0.12.2-dev.20250314

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/LICENSE +21 -21
package/README.md +326 -326
package/lib/index.mjs +1 -1
package/lib/index.mjs.map +1 -1
package/lib/internal/AgenticaCallBenchmarkReporter.js +10 -1
package/lib/internal/AgenticaCallBenchmarkReporter.js.map +1 -1
package/package.json +2 -2
package/src/AgenticaCallBenchmark.ts +263 -263
package/src/AgenticaSelectBenchmark.ts +248 -248
package/src/index.ts +3 -3
package/src/internal/AgenticaBenchmarkPredicator.ts +220 -220
package/src/internal/AgenticaBenchmarkUtil.ts +44 -44
package/src/internal/AgenticaCallBenchmarkReporter.ts +193 -183
package/src/internal/AgenticaPromptReporter.ts +46 -46
package/src/internal/AgenticaSelectBenchmarkReporter.ts +215 -215
package/src/structures/IAgenticaBenchmarkExpected.ts +68 -68
package/src/structures/IAgenticaCallBenchmarkEvent.ts +113 -113
package/src/structures/IAgenticaCallBenchmarkResult.ts +70 -70
package/src/structures/IAgenticaCallBenchmarkScenario.ts +43 -43
package/src/structures/IAgenticaSelectBenchmarkEvent.ts +120 -120
package/src/structures/IAgenticaSelectBenchmarkResult.ts +72 -72
package/src/structures/IAgenticaSelectBenchmarkScenario.ts +43 -43
package/src/utils/MathUtil.ts +3 -3

package/src/structures/IAgenticaCallBenchmarkEvent.ts CHANGED Viewed

@@ -1,113 +1,113 @@
-import { AgenticaPrompt, AgenticaTokenUsage } from "@agentica/core";
-import { ILlmSchema } from "@samchon/openapi";
-import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
-/**
- * Event of LLM function selection benchmark.
- *
- * `IAgenticaCallBenchmarkEvent` is an union type of the events occurred
- * during the LLM function calling benchmark, representing one phase of
- * the benchmark testing about a scenario.
- *
- * In other words, when {@link AgenticaCallBenchmark} executes the
- * benchmark, it will run the benchmark will test a scenario repeately with
- * the given configuration {@link AgenticaCallBenchmark.IConfig.repeat}.
- * And in the repeated benchmark about a scenario,
- * `IAgenticaCallBenchmarkEvent` is one of the repeated testing.
- *
- * For reference, `IAgenticaCallBenchmarkEvent` is categorized into three
- * types: `success`, `failure`, and `error`. The `success` means the
- * benchmark testing is fully meet the expected scenario, and `failure`
- * means that the `selector` or `caller` agents had not selected or
- * called the expected operations. The last type, `error`, means that
- * an error had been occurred during the benchmark testing.
- *
- * @author Samchon
- */
-export type IAgenticaCallBenchmarkEvent<Model extends ILlmSchema.Model> =
-  | IAgenticaCallBenchmarkEvent.ISuccess<Model>
-  | IAgenticaCallBenchmarkEvent.IFailure<Model>
-  | IAgenticaCallBenchmarkEvent.IError<Model>;
-export namespace IAgenticaCallBenchmarkEvent {
-  /**
-   * Success event type.
-   *
-   * The `success` event type represents that the benchmark
-   * testing is fully meet the expected scenario.
-   */
-  export interface ISuccess<Model extends ILlmSchema.Model>
-    extends IEventBase<"success", Model> {
-    /**
-     * Whether succeeded to function selection.
-     */
-    select: true;
-    /**
-     * Whether succeeded to function call.
-     */
-    call: true;
-  }
-  /**
-   * Failure event type.
-   *
-   * The `failure` event type represents that the `selector`
-   * or `caller` agents have not selected or called following the
-   * expected scenario in the benchmark testing.
-   */
-  export interface IFailure<Model extends ILlmSchema.Model>
-    extends IEventBase<"failure", Model> {
-    /**
-     * Whether succeeded to function selection.
-     */
-    select: boolean;
-    /**
-     * Whether succeeded to function call.
-     */
-    call: boolean;
-  }
-  export interface IError<Model extends ILlmSchema.Model>
-    extends IEventBase<"error", Model> {
-    /**
-     * Error occurred during the benchmark.
-     */
-    error: unknown;
-  }
-  interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
-    /**
-     * Discriminant type.
-     */
-    type: Type;
-    /**
-     * Expected scenario.
-     */
-    scenario: IAgenticaCallBenchmarkScenario<Model>;
-    /**
-     * Prompt histories.
-     *
-     * List of prompts occurred during the benchmark testing.
-     */
-    prompts: AgenticaPrompt<Model>[];
-    /**
-     * Usage of the token during the benchmark.
-     */
-    usage: AgenticaTokenUsage;
-    /**
-     * When the benchmark testing started.
-     */
-    started_at: Date;
-    /**
-     * When the benchmark testing completed.
-     */
-    completed_at: Date;
-  }
-}
+import { AgenticaPrompt, AgenticaTokenUsage } from "@agentica/core";
+import { ILlmSchema } from "@samchon/openapi";
+import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
+/**
+ * Event of LLM function selection benchmark.
+ *
+ * `IAgenticaCallBenchmarkEvent` is an union type of the events occurred
+ * during the LLM function calling benchmark, representing one phase of
+ * the benchmark testing about a scenario.
+ *
+ * In other words, when {@link AgenticaCallBenchmark} executes the
+ * benchmark, it will run the benchmark will test a scenario repeately with
+ * the given configuration {@link AgenticaCallBenchmark.IConfig.repeat}.
+ * And in the repeated benchmark about a scenario,
+ * `IAgenticaCallBenchmarkEvent` is one of the repeated testing.
+ *
+ * For reference, `IAgenticaCallBenchmarkEvent` is categorized into three
+ * types: `success`, `failure`, and `error`. The `success` means the
+ * benchmark testing is fully meet the expected scenario, and `failure`
+ * means that the `selector` or `caller` agents had not selected or
+ * called the expected operations. The last type, `error`, means that
+ * an error had been occurred during the benchmark testing.
+ *
+ * @author Samchon
+ */
+export type IAgenticaCallBenchmarkEvent<Model extends ILlmSchema.Model> =
+  | IAgenticaCallBenchmarkEvent.ISuccess<Model>
+  | IAgenticaCallBenchmarkEvent.IFailure<Model>
+  | IAgenticaCallBenchmarkEvent.IError<Model>;
+export namespace IAgenticaCallBenchmarkEvent {
+  /**
+   * Success event type.
+   *
+   * The `success` event type represents that the benchmark
+   * testing is fully meet the expected scenario.
+   */
+  export interface ISuccess<Model extends ILlmSchema.Model>
+    extends IEventBase<"success", Model> {
+    /**
+     * Whether succeeded to function selection.
+     */
+    select: true;
+    /**
+     * Whether succeeded to function call.
+     */
+    call: true;
+  }
+  /**
+   * Failure event type.
+   *
+   * The `failure` event type represents that the `selector`
+   * or `caller` agents have not selected or called following the
+   * expected scenario in the benchmark testing.
+   */
+  export interface IFailure<Model extends ILlmSchema.Model>
+    extends IEventBase<"failure", Model> {
+    /**
+     * Whether succeeded to function selection.
+     */
+    select: boolean;
+    /**
+     * Whether succeeded to function call.
+     */
+    call: boolean;
+  }
+  export interface IError<Model extends ILlmSchema.Model>
+    extends IEventBase<"error", Model> {
+    /**
+     * Error occurred during the benchmark.
+     */
+    error: unknown;
+  }
+  interface IEventBase<Type extends string, Model extends ILlmSchema.Model> {
+    /**
+     * Discriminant type.
+     */
+    type: Type;
+    /**
+     * Expected scenario.
+     */
+    scenario: IAgenticaCallBenchmarkScenario<Model>;
+    /**
+     * Prompt histories.
+     *
+     * List of prompts occurred during the benchmark testing.
+     */
+    prompts: AgenticaPrompt<Model>[];
+    /**
+     * Usage of the token during the benchmark.
+     */
+    usage: AgenticaTokenUsage;
+    /**
+     * When the benchmark testing started.
+     */
+    started_at: Date;
+    /**
+     * When the benchmark testing completed.
+     */
+    completed_at: Date;
+  }
+}

package/src/structures/IAgenticaCallBenchmarkResult.ts CHANGED Viewed

@@ -1,70 +1,70 @@
-import { AgenticaTokenUsage } from "@agentica/core";
-import { ILlmSchema } from "@samchon/openapi";
-import { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
-import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
-/**
- * Result of the LLM function calling benchmark.
- *
- * `IAgenticaCallBenchmarkResult` is a structure representing the result
- * of the LLM function calling benchmark executed by the
- * {@link AgenticaCallBenchmark.execute execute} function.
- *
- * It contains every experiment results for each scenario, and aggregated
- * LLM token cost in the benchmark process.
- *
- * In each scenario, as the benchmark program experiments multiple times
- * about a scenario, it will contain multiple events. Also, because of the
- * characteristics of the LLM which is not predictable, the result can be
- * different in each event.
- *
- * @author Samchon
- */
-export interface IAgenticaCallBenchmarkResult<Model extends ILlmSchema.Model> {
-  /**
-   * Experiments for each scenario.
-   */
-  experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[];
-  /**
-   * Aggregated token usage information.
-   */
-  usage: AgenticaTokenUsage;
-  /**
-   * Start time of the benchmark.
-   */
-  started_at: Date;
-  /**
-   * End time of the benchmark.
-   */
-  completed_at: Date;
-}
-export namespace IAgenticaCallBenchmarkResult {
-  /**
-   * Experiment result about a scenario.
-   */
-  export interface IExperiment<Model extends ILlmSchema.Model> {
-    /**
-     * Scenario of the experiment.
-     */
-    scenario: IAgenticaCallBenchmarkScenario<Model>;
-    /**
-     * Events occurred during the benchmark in the scenario.
-     *
-     * When benchmarking a scenario, {@link AgenticaCallBenchmark} will
-     * test a scenario multiple times with the given
-     * {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
-     * And the event is one of the repeated benchmark results.
-     */
-    events: IAgenticaCallBenchmarkEvent<Model>[];
-    /**
-     * LLM token usage information.
-     */
-    usage: AgenticaTokenUsage;
-  }
-}
+import { AgenticaTokenUsage } from "@agentica/core";
+import { ILlmSchema } from "@samchon/openapi";
+import { IAgenticaCallBenchmarkEvent } from "./IAgenticaCallBenchmarkEvent";
+import { IAgenticaCallBenchmarkScenario } from "./IAgenticaCallBenchmarkScenario";
+/**
+ * Result of the LLM function calling benchmark.
+ *
+ * `IAgenticaCallBenchmarkResult` is a structure representing the result
+ * of the LLM function calling benchmark executed by the
+ * {@link AgenticaCallBenchmark.execute execute} function.
+ *
+ * It contains every experiment results for each scenario, and aggregated
+ * LLM token cost in the benchmark process.
+ *
+ * In each scenario, as the benchmark program experiments multiple times
+ * about a scenario, it will contain multiple events. Also, because of the
+ * characteristics of the LLM which is not predictable, the result can be
+ * different in each event.
+ *
+ * @author Samchon
+ */
+export interface IAgenticaCallBenchmarkResult<Model extends ILlmSchema.Model> {
+  /**
+   * Experiments for each scenario.
+   */
+  experiments: IAgenticaCallBenchmarkResult.IExperiment<Model>[];
+  /**
+   * Aggregated token usage information.
+   */
+  usage: AgenticaTokenUsage;
+  /**
+   * Start time of the benchmark.
+   */
+  started_at: Date;
+  /**
+   * End time of the benchmark.
+   */
+  completed_at: Date;
+}
+export namespace IAgenticaCallBenchmarkResult {
+  /**
+   * Experiment result about a scenario.
+   */
+  export interface IExperiment<Model extends ILlmSchema.Model> {
+    /**
+     * Scenario of the experiment.
+     */
+    scenario: IAgenticaCallBenchmarkScenario<Model>;
+    /**
+     * Events occurred during the benchmark in the scenario.
+     *
+     * When benchmarking a scenario, {@link AgenticaCallBenchmark} will
+     * test a scenario multiple times with the given
+     * {@link AgenticaCallBenchmark.IConfig.repeat repeat} count.
+     * And the event is one of the repeated benchmark results.
+     */
+    events: IAgenticaCallBenchmarkEvent<Model>[];
+    /**
+     * LLM token usage information.
+     */
+    usage: AgenticaTokenUsage;
+  }
+}

package/src/structures/IAgenticaCallBenchmarkScenario.ts CHANGED Viewed

@@ -1,43 +1,43 @@
-import { ILlmSchema } from "@samchon/openapi";
-import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
-/**
- * Scenario of function calling.
- *
- * `IAgenticaCallBenchmarkScenario` is a data structure which
- * represents a function calling benchmark scenario. It contains two
- * properties; {@linkk text} and {@link operations}.
- *
- * The {@link text} means the conversation text from the user, and
- * the other {@link operations} are the expected operations that
- * should be selected by the `caller` agent through the {@link text}
- * conversation.
- *
- * @author Samchon
- */
-export interface IAgenticaCallBenchmarkScenario<
-  Model extends ILlmSchema.Model,
-> {
-  /**
-   * Name of the scenario.
-   *
-   * It must be unique within the benchmark scenarios.
-   */
-  name: string;
-  /**
-   * The prompt text from user.
-   */
-  text: string;
-  /**
-   * Expected function calling sequence.
-   *
-   * Sequence of operations (API operation or class function) that
-   * should be called by both `selector` and `caller` agents from
-   * the user's {@link text} conversation for the LLM
-   * (Large Language Model) function calling.
-   */
-  expected: IAgenticaBenchmarkExpected<Model>;
-}
+import { ILlmSchema } from "@samchon/openapi";
+import { IAgenticaBenchmarkExpected } from "./IAgenticaBenchmarkExpected";
+/**
+ * Scenario of function calling.
+ *
+ * `IAgenticaCallBenchmarkScenario` is a data structure which
+ * represents a function calling benchmark scenario. It contains two
+ * properties; {@linkk text} and {@link operations}.
+ *
+ * The {@link text} means the conversation text from the user, and
+ * the other {@link operations} are the expected operations that
+ * should be selected by the `caller` agent through the {@link text}
+ * conversation.
+ *
+ * @author Samchon
+ */
+export interface IAgenticaCallBenchmarkScenario<
+  Model extends ILlmSchema.Model,
+> {
+  /**
+   * Name of the scenario.
+   *
+   * It must be unique within the benchmark scenarios.
+   */
+  name: string;
+  /**
+   * The prompt text from user.
+   */
+  text: string;
+  /**
+   * Expected function calling sequence.
+   *
+   * Sequence of operations (API operation or class function) that
+   * should be called by both `selector` and `caller` agents from
+   * the user's {@link text} conversation for the LLM
+   * (Large Language Model) function calling.
+   */
+  expected: IAgenticaBenchmarkExpected<Model>;
+}