npm - @iqai/adk - Versions diffs - 0.1.21 → 0.2.0 - Mend

@iqai/adk 0.1.21 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.mjs CHANGED Viewed

@@ -53,7 +53,7 @@ var init_logger = __esm({
       }
       info(message, ...args) {
         const time = (/* @__PURE__ */ new Date()).toLocaleTimeString();
-        console.info(
+        console.debug(
           this.colorize(`[${time}] \u2139\uFE0F [${this.name}] ${message}`),
           ...args
         );
@@ -229,7 +229,7 @@ var init_base_tool = __esm({
        * @param context The context of the tool
        * @returns The result of running the tool
        */
-      async runAsync(args, context) {
+      async runAsync(args, context4) {
         throw new Error(`${this.constructor.name} runAsync is not implemented`);
       }
       /**
@@ -253,6 +253,12 @@ var init_base_tool = __esm({
           if (!toolWithFunctionDeclarations.functionDeclarations) {
             toolWithFunctionDeclarations.functionDeclarations = [];
           }
+          const alreadyExists = toolWithFunctionDeclarations.functionDeclarations.some(
+            (fd) => fd?.name === functionDeclaration.name
+          );
+          if (alreadyExists) {
+            return;
+          }
           toolWithFunctionDeclarations.functionDeclarations.push(
             functionDeclaration
           );
@@ -281,7 +287,7 @@ var init_base_tool = __esm({
        * @param context Tool execution context
        * @returns Result of the tool execution or error information
        */
-      async safeExecute(args, context) {
+      async safeExecute(args, context4) {
         if (!this.validateArguments(args)) {
           return {
             error: "Invalid arguments",
@@ -302,7 +308,7 @@ var init_base_tool = __esm({
               );
               await new Promise((resolve) => setTimeout(resolve, delay));
             }
-            const result = await this.runAsync(args, context);
+            const result = await this.runAsync(args, context4);
             return { result };
           } catch (error) {
             lastError = error instanceof Error ? error : new Error(String(error));
@@ -500,7 +506,7 @@ var init_function_tool = __esm({
       /**
        * Executes the wrapped function with the provided arguments.
        */
-      async runAsync(args, context) {
+      async runAsync(args, context4) {
         try {
           const missingArgs = this.getMissingMandatoryArgs(args);
           if (missingArgs.length > 0) {
@@ -513,13 +519,13 @@ You could retry calling this tool, but it is IMPORTANT for you to provide all th
           }
           const argsToCall = { ...args };
           if (this.functionAcceptsToolContext()) {
-            argsToCall.toolContext = context;
+            argsToCall.toolContext = context4;
           }
           const funcParams = this.getFunctionParameters();
           const argValues = [];
           for (const paramName of funcParams) {
             if (paramName === "toolContext" && this.functionAcceptsToolContext()) {
-              argValues.push(context);
+              argValues.push(context4);
             } else if (paramName in argsToCall) {
               const convertedValue = this.convertArgumentType(
                 argsToCall[paramName],
@@ -827,70 +833,23 @@ ${instructions.join("\n\n")}`;
 // src/models/llm-response.ts
 var LlmResponse = class _LlmResponse {
-  /**
-   * Unique identifier for the response.
-   */
   id;
-  /**
-   * The content generated by the model.
-   */
+  text;
   content;
-  /**
-   * The grounding metadata of the response.
-   */
   groundingMetadata;
-  /**
-   * Indicates whether the text content is part of an unfinished text stream.
-   */
   partial;
-  /**
-   * Indicates whether the response from the model is complete.
-   */
   turnComplete;
-  /**
-   * Error code if the response is an error.
-   */
   errorCode;
-  /**
-   * Error message if the response is an error.
-   */
   errorMessage;
-  /**
-   * Flag indicating that LLM was interrupted when generating the content.
-   */
   interrupted;
-  /**
-   * The custom metadata of the LlmResponse.
-   */
   customMetadata;
-  /**
-   * The usage metadata of the LlmResponse.
-   */
   usageMetadata;
-  /**
-   * Index of the candidate response.
-   */
   candidateIndex;
-  /**
-   * Reason why the model finished generating.
-   */
   finishReason;
-  /**
-   * Error object if the response is an error.
-   */
   error;
-  /**
-   * Creates a new LlmResponse.
-   */
   constructor(data = {}) {
     Object.assign(this, data);
   }
-  /**
-   * Creates an LlmResponse from a GenerateContentResponse.
-   *
-   * @param generateContentResponse The GenerateContentResponse to create the LlmResponse from.
-   * @returns The LlmResponse.
-   */
   static create(generateContentResponse) {
     const usageMetadata = generateContentResponse.usageMetadata;
     if (generateContentResponse.candidates && generateContentResponse.candidates.length > 0) {
@@ -922,15 +881,6 @@ var LlmResponse = class _LlmResponse {
       usageMetadata
     });
   }
-  /**
-   * Creates an LlmResponse from an error.
-   *
-   * @param error The error object or message.
-   * @param options Additional options for the error response.
-   * @param options.errorCode A specific error code for the response.
-   * @param options.model The model that was being used when the error occurred.
-   * @returns The LlmResponse.
-   */
   static fromError(error, options = {}) {
     const errorMessage = error instanceof Error ? error.message : String(error);
     const errorCode = options.errorCode || "UNKNOWN_ERROR";
@@ -954,6 +904,7 @@ init_logger();
 import {
   DiagConsoleLogger,
   DiagLogLevel,
+  context,
   diag,
   trace
 } from "@opentelemetry/api";
@@ -994,13 +945,24 @@ var TelemetryService = class {
     this.sdk = new NodeSDK({
       resource,
       traceExporter,
-      instrumentations: [getNodeAutoInstrumentations()]
+      instrumentations: [
+        getNodeAutoInstrumentations({
+          // Follow Python ADK approach: let all HTTP instrumentation through.
+          // This provides transparency and aligns with standard OpenTelemetry behavior.
+          // High-level LLM tracing is provided through dedicated ADK spans.
+          "@opentelemetry/instrumentation-http": {
+            ignoreIncomingRequestHook: (req) => {
+              return true;
+            }
+          }
+        })
+      ]
     });
     try {
       this.sdk.start();
       this.isInitialized = true;
       this.tracer = trace.getTracer("iqai-adk", config.appVersion || "0.1.0");
-      diag.info("OpenTelemetry SDK started successfully.");
+      diag.debug("OpenTelemetry SDK started successfully.");
     } catch (error) {
       diag.error("Error starting OpenTelemetry SDK:", error);
       throw error;
@@ -1043,7 +1005,7 @@ var TelemetryService = class {
       });
       await Promise.race([this.sdk.shutdown(), timeoutPromise]);
       this.isInitialized = false;
-      diag.info("Telemetry terminated successfully.");
+      diag.debug("Telemetry terminated successfully.");
     } catch (error) {
       if (error instanceof Error && error.message.includes("timeout")) {
         diag.warn("Telemetry shutdown timed out, some traces may be lost");
@@ -1071,7 +1033,7 @@ var TelemetryService = class {
       }
     }
     span.setAttributes({
-      "gen_ai.system.name": "iqai-adk",
+      "gen_ai.system": "iqai-adk",
       "gen_ai.operation.name": "execute_tool",
       "gen_ai.tool.name": tool.name,
       "gen_ai.tool.description": tool.description,
@@ -1085,7 +1047,7 @@ var TelemetryService = class {
       ...process.env.NODE_ENV && {
         "deployment.environment.name": process.env.NODE_ENV
       },
-      // Tool-specific data
+      // ADK-specific attributes (matching Python namespace pattern)
       "adk.tool_call_args": this._safeJsonStringify(args),
       "adk.event_id": functionResponseEvent.invocationId,
       "adk.tool_response": this._safeJsonStringify(toolResponse),
@@ -1101,9 +1063,8 @@ var TelemetryService = class {
     if (!span) return;
     const requestData = this._buildLlmRequestForTrace(llmRequest);
     span.setAttributes({
-      // Standard OpenTelemetry attributes
-      "gen_ai.system.name": "iqai-adk",
-      "gen_ai.operation.name": "generate",
+      // Standard OpenTelemetry attributes (following Python pattern)
+      "gen_ai.system": "iqai-adk",
       "gen_ai.request.model": llmRequest.model,
       // Session and user tracking (maps to Langfuse sessionId, userId)
       "session.id": invocationContext.session.id,
@@ -1116,15 +1077,21 @@ var TelemetryService = class {
       "gen_ai.request.max_tokens": llmRequest.config.maxOutputTokens || 0,
       "gen_ai.request.temperature": llmRequest.config.temperature || 0,
       "gen_ai.request.top_p": llmRequest.config.topP || 0,
-      // Legacy ADK attributes (keep for backward compatibility)
       "adk.system_name": "iqai-adk",
       "adk.request_model": llmRequest.model,
-      "adk.invocation_id": invocationContext.session.id,
+      // ADK-specific attributes (matching Python namespace pattern)
+      "adk.invocation_id": invocationContext.invocationId,
       "adk.session_id": invocationContext.session.id,
       "adk.event_id": eventId,
       "adk.llm_request": this._safeJsonStringify(requestData),
       "adk.llm_response": this._safeJsonStringify(llmResponse)
     });
+    if (llmResponse.usageMetadata) {
+      span.setAttributes({
+        "gen_ai.usage.input_tokens": llmResponse.usageMetadata.promptTokenCount || 0,
+        "gen_ai.usage.output_tokens": llmResponse.usageMetadata.candidatesTokenCount || 0
+      });
+    }
     span.addEvent("gen_ai.content.prompt", {
       "gen_ai.prompt": this._safeJsonStringify(requestData.messages)
     });
@@ -1137,9 +1104,14 @@ var TelemetryService = class {
    */
   async *traceAsyncGenerator(spanName, generator) {
     const span = this.tracer.startSpan(spanName);
+    const spanContext = trace.setSpan(context.active(), span);
     try {
-      for await (const item of generator) {
-        yield item;
+      while (true) {
+        const result = await context.with(spanContext, () => generator.next());
+        if (result.done) {
+          break;
+        }
+        yield result.value;
       }
     } catch (error) {
       span.recordException(error);
@@ -1226,7 +1198,7 @@ var traceLlmCall = (invocationContext, eventId, llmRequest, llmResponse) => tele
 // src/models/base-llm.ts
 var BaseLlm = class {
   /**
-   * The name of the LLM, e.g. gemini-1.5-flash or gemini-1.5-flash-001.
+   * The name of the LLM, e.g. gemini-2.5-flash or gemini-2.5-flash-001.
    */
   model;
   logger = new Logger({ name: "BaseLlm" });
@@ -1915,7 +1887,7 @@ var GoogleLlm = class extends BaseLlm {
   /**
    * Constructor for Gemini
    */
-  constructor(model = "gemini-1.5-flash") {
+  constructor(model = "gemini-2.5-flash") {
     super(model);
   }
   /**
@@ -2647,30 +2619,16 @@ var OpenAiLlm = class extends BaseLlm {
 // src/models/llm-registry.ts
 init_logger();
 var LLMRegistry = class _LLMRegistry {
-  /**
-   * Map of model name regex to LLM class
-   */
   static llmRegistry = /* @__PURE__ */ new Map();
+  static modelInstances = /* @__PURE__ */ new Map();
   static logger = new Logger({ name: "LLMRegistry" });
-  /**
-   * Creates a new LLM instance
-   *
-   * @param model The model name
-   * @returns The LLM instance
-   */
   static newLLM(model) {
     const llmClass = _LLMRegistry.resolve(model);
     if (!llmClass) {
-      throw new Error(`No LLM found for model: ${model}`);
+      throw new Error(`No LLM class found for model: ${model}`);
     }
     return new llmClass(model);
   }
-  /**
-   * Resolves the LLM class from the model name
-   *
-   * @param model The model name
-   * @returns The LLM class
-   */
   static resolve(model) {
     for (const [regex, llmClass] of _LLMRegistry.llmRegistry.entries()) {
       if (regex.test(model)) {
@@ -2679,34 +2637,54 @@ var LLMRegistry = class _LLMRegistry {
     }
     return null;
   }
-  /**
-   * Registers a new LLM class
-   *
-   * @param modelNameRegex The regex to match model names
-   * @param llmClass The LLM class
-   */
   static register(modelNameRegex, llmClass) {
     _LLMRegistry.llmRegistry.set(new RegExp(modelNameRegex), llmClass);
   }
-  /**
-   * Registers all model patterns from an LLM class
-   *
-   * @param llmClass The LLM class
-   */
   static registerLLM(llmClass) {
     const modelPatterns = llmClass.supportedModels();
     for (const pattern of modelPatterns) {
       _LLMRegistry.register(pattern, llmClass);
     }
   }
-  /**
-   * Logs all registered models for debugging
-   */
+  static registerModel(name, model) {
+    _LLMRegistry.modelInstances.set(name, model);
+  }
+  static getModel(name) {
+    const model = _LLMRegistry.modelInstances.get(name);
+    if (!model) {
+      throw new Error(`Model '${name}' not found in registry`);
+    }
+    return model;
+  }
+  static hasModel(name) {
+    return _LLMRegistry.modelInstances.has(name);
+  }
+  static unregisterModel(name) {
+    _LLMRegistry.modelInstances.delete(name);
+  }
+  static getModelOrCreate(name) {
+    if (_LLMRegistry.hasModel(name)) {
+      return _LLMRegistry.getModel(name);
+    }
+    return _LLMRegistry.newLLM(name);
+  }
+  static clear() {
+    _LLMRegistry.llmRegistry.clear();
+    _LLMRegistry.modelInstances.clear();
+  }
+  static clearModels() {
+    _LLMRegistry.modelInstances.clear();
+  }
+  static clearClasses() {
+    _LLMRegistry.llmRegistry.clear();
+  }
   static logRegisteredModels() {
-    _LLMRegistry.logger.debug(
-      "Registered LLM models:",
-      [..._LLMRegistry.llmRegistry.entries()].map(([regex]) => regex.toString())
+    const classPatterns = [..._LLMRegistry.llmRegistry.entries()].map(
+      ([regex]) => regex.toString()
     );
+    const instanceNames = [..._LLMRegistry.modelInstances.keys()];
+    _LLMRegistry.logger.debug("Registered LLM class patterns:", classPatterns);
+    _LLMRegistry.logger.debug("Registered LLM instances:", instanceNames);
   }
 };
@@ -3954,10 +3932,10 @@ var CreatedTool = class extends BaseTool {
   /**
    * Executes the tool function with validation
    */
-  async runAsync(args, context) {
+  async runAsync(args, context4) {
     try {
       const validatedArgs = this.schema.parse(args);
-      const result = await Promise.resolve(this.func(validatedArgs, context));
+      const result = await Promise.resolve(this.func(validatedArgs, context4));
       return result ?? {};
     } catch (error) {
       if (error instanceof z.ZodError) {
@@ -4215,7 +4193,7 @@ var AgentTool = class extends BaseTool {
   /**
    * Execute the tool by running the agent with the provided input
    */
-  async runAsync(params, context) {
+  async runAsync(params, context4) {
     try {
       const input = params.input || Object.values(params)[0];
       if (!isLlmAgent(this.agent)) {
@@ -4223,7 +4201,7 @@ var AgentTool = class extends BaseTool {
           `Agent ${this.name} does not support running as a tool`
         );
       }
-      const parentInvocation = context._invocationContext;
+      const parentInvocation = context4._invocationContext;
       const childInvocationContext = new InvocationContext({
         invocationId: uuidv42(),
         agent: this.agent,
@@ -4260,8 +4238,8 @@ var AgentTool = class extends BaseTool {
       } catch {
         toolResult = mergedText;
       }
-      if (this.outputKey && context?.state) {
-        context.state[this.outputKey] = toolResult;
+      if (this.outputKey && context4?.state) {
+        context4.state[this.outputKey] = toolResult;
       }
       return toolResult;
     } catch (error) {
@@ -4809,9 +4787,9 @@ var UserInteractionTool = class extends BaseTool {
   /**
    * Execute the user interaction
    */
-  async runAsync(args, context) {
+  async runAsync(args, context4) {
     try {
-      const actions = context.actions;
+      const actions = context4.actions;
       if (!actions || !actions.promptUser) {
         return {
           success: false,
@@ -4859,9 +4837,9 @@ var ExitLoopTool = class extends BaseTool {
   /**
    * Execute the exit loop action
    */
-  async runAsync(_args, context) {
+  async runAsync(_args, context4) {
     this.logger.debug("Executing exit loop tool");
-    context.actions.escalate = true;
+    context4.actions.escalate = true;
   }
 };
@@ -4912,14 +4890,14 @@ var GetUserChoiceTool = class extends BaseTool {
    * This is a long running operation that will return null initially
    * and the actual choice will be provided asynchronously
    */
-  async runAsync(args, context) {
+  async runAsync(args, context4) {
     this.logger.debug(
       `Executing get_user_choice with options: ${args.options.join(", ")}`
     );
     if (args.question) {
       this.logger.debug(`Question: ${args.question}`);
     }
-    context.actions.skipSummarization = true;
+    context4.actions.skipSummarization = true;
     return null;
   }
 };
@@ -4961,9 +4939,9 @@ var TransferToAgentTool = class extends BaseTool {
   /**
    * Execute the transfer to agent action
    */
-  async runAsync(args, context) {
+  async runAsync(args, context4) {
     this.logger.debug(`Executing transfer to agent: ${args.agent_name}`);
-    context.actions.transferToAgent = args.agent_name;
+    context4.actions.transferToAgent = args.agent_name;
   }
 };
@@ -5004,10 +4982,10 @@ var LoadMemoryTool = class extends BaseTool {
   /**
    * Execute the memory loading action
    */
-  async runAsync(args, context) {
+  async runAsync(args, context4) {
     this.logger.debug(`Executing load_memory with query: ${args.query}`);
     try {
-      const searchResult = await context.searchMemory(args.query);
+      const searchResult = await context4.searchMemory(args.query);
       return {
         memories: searchResult.memories || [],
         count: searchResult.memories?.length || 0
@@ -5057,7 +5035,7 @@ var LoadArtifactsTool = class extends BaseTool {
   /**
    * Execute the load artifacts operation
    */
-  async runAsync(args, context) {
+  async runAsync(args, context4) {
     const artifactNames = args.artifact_names || [];
     return { artifact_names: artifactNames };
   }
@@ -6088,12 +6066,12 @@ var McpToolset = class {
    * Checks if a tool should be included based on the tool filter.
    * Similar to Python's _is_selected method.
    */
-  isSelected(tool, context) {
+  isSelected(tool, context4) {
     if (!this.toolFilter) {
       return true;
     }
     if (typeof this.toolFilter === "function") {
-      return this.toolFilter(tool, context);
+      return this.toolFilter(tool, context4);
     }
     if (Array.isArray(this.toolFilter)) {
       return this.toolFilter.includes(tool.name);
@@ -6146,7 +6124,7 @@ var McpToolset = class {
    * Retrieves tools from the MCP server and converts them to BaseTool instances.
    * Similar to Python's get_tools method.
    */
-  async getTools(context) {
+  async getTools(context4) {
     try {
       if (this.isClosing) {
         throw new McpError(
@@ -6168,7 +6146,7 @@ var McpToolset = class {
       }
       const tools = [];
       for (const mcpTool of toolsResponse.tools) {
-        if (this.isSelected(mcpTool, context)) {
+        if (this.isSelected(mcpTool, context4)) {
           try {
             const tool = await createTool2(mcpTool, client);
             tools.push(tool);
@@ -6205,9 +6183,9 @@ var McpToolset = class {
   /**
    * Refreshes the tool cache by clearing it and fetching tools again
    */
-  async refreshTools(context) {
+  async refreshTools(context4) {
     this.tools = [];
-    return this.getTools(context);
+    return this.getTools(context4);
   }
   /**
    * Closes the connection to the MCP server.
@@ -6251,6 +6229,7 @@ async function getMcpTools(config, toolFilter) {
 }
 // src/flows/llm-flows/functions.ts
+import { context as context2, trace as trace2 } from "@opentelemetry/api";
 var AF_FUNCTION_CALL_ID_PREFIX = "adk-";
 var REQUEST_EUC_FUNCTION_CALL_NAME = "adk_request_credential";
 function generateClientFunctionCallId() {
@@ -6340,23 +6319,40 @@ async function handleFunctionCallsAsync(invocationContext, functionCallEvent, to
       toolsDict
     );
     const functionArgs = functionCall.args || {};
-    const functionResponse = await callToolAsync(
-      tool,
-      functionArgs,
-      toolContext
-    );
-    if (tool.isLongRunning) {
+    const tracer2 = telemetryService.getTracer();
+    const span = tracer2.startSpan(`execute_tool ${tool.name}`);
+    const spanContext = trace2.setSpan(context2.active(), span);
+    try {
+      const functionResponse = await context2.with(spanContext, async () => {
+        const result = await callToolAsync(tool, functionArgs, toolContext);
+        if (tool.isLongRunning && !result) {
+          return null;
+        }
+        const functionResponseEvent = buildResponseEvent(
+          tool,
+          result,
+          toolContext,
+          invocationContext
+        );
+        telemetryService.traceToolCall(
+          tool,
+          functionArgs,
+          functionResponseEvent
+        );
+        return { result, event: functionResponseEvent };
+      });
       if (!functionResponse) {
         continue;
       }
+      functionResponseEvents.push(functionResponse.event);
+      span.setStatus({ code: 1 });
+    } catch (error) {
+      span.recordException(error);
+      span.setStatus({ code: 2, message: error.message });
+      throw error;
+    } finally {
+      span.end();
     }
-    const functionResponseEvent = buildResponseEvent(
-      tool,
-      functionResponse,
-      toolContext,
-      invocationContext
-    );
-    functionResponseEvents.push(functionResponseEvent);
   }
   if (!functionResponseEvents.length) {
     return null;
@@ -6456,7 +6452,7 @@ var BaseLlmFlow = class {
   responseProcessors = [];
   logger = new Logger({ name: "BaseLlmFlow" });
   async *runAsync(invocationContext) {
-    this.logger.info(`Agent '${invocationContext.agent.name}' started.`);
+    this.logger.debug(`Agent '${invocationContext.agent.name}' started.`);
     let stepCount = 0;
     while (true) {
       stepCount++;
@@ -6466,7 +6462,7 @@ var BaseLlmFlow = class {
         yield event;
       }
       if (!lastEvent || lastEvent.isFinalResponse()) {
-        this.logger.info(
+        this.logger.debug(
           `Agent '${invocationContext.agent.name}' finished after ${stepCount} steps.`
         );
         break;
@@ -6496,7 +6492,7 @@ var BaseLlmFlow = class {
       yield event;
     }
     if (invocationContext.endInvocation) {
-      this.logger.info("Invocation ended during preprocessing.");
+      this.logger.debug("Invocation ended during preprocessing.");
       return;
     }
     const modelResponseEvent = new Event({
@@ -6536,9 +6532,23 @@ var BaseLlmFlow = class {
         yield event;
       }
     }
-    const tools = await agent.canonicalTools(
+    let tools = await agent.canonicalTools(
       new ReadonlyContext(invocationContext)
     );
+    if (tools.length > 1) {
+      const seen = /* @__PURE__ */ new Set();
+      const filtered = [];
+      for (const t of tools) {
+        const name = t?.name;
+        if (!name) continue;
+        if (seen.has(name)) {
+          continue;
+        }
+        seen.add(name);
+        filtered.push(t);
+      }
+      tools = filtered;
+    }
     for (const tool of tools) {
       const toolContext = new ToolContext(invocationContext);
       await tool.processLlmRequest(toolContext, llmRequest);
@@ -6611,7 +6621,7 @@ var BaseLlmFlow = class {
         yield functionResponseEvent;
         const transferToAgent = functionResponseEvent.actions?.transferToAgent;
         if (transferToAgent) {
-          this.logger.info(`\u{1F504} Live transfer to agent '${transferToAgent}'`);
+          this.logger.debug(`\u{1F504} Live transfer to agent '${transferToAgent}'`);
           const agentToRun = this._getAgentToRun(
             invocationContext,
             transferToAgent
@@ -6650,7 +6660,7 @@ var BaseLlmFlow = class {
       yield functionResponseEvent;
       const transferToAgent = functionResponseEvent.actions?.transferToAgent;
       if (transferToAgent) {
-        this.logger.info(`\u{1F504} Transferring to agent '${transferToAgent}'`);
+        this.logger.debug(`\u{1F504} Transferring to agent '${transferToAgent}'`);
         const agentToRun = this._getAgentToRun(
           invocationContext,
           transferToAgent
@@ -6694,7 +6704,42 @@ var BaseLlmFlow = class {
     }
     invocationContext.incrementLlmCallCount();
     const isStreaming = invocationContext.runConfig.streamingMode === "sse" /* SSE */;
-    const tools = llmRequest.config?.tools || [];
+    let tools = llmRequest.config?.tools || [];
+    if (tools.length) {
+      const deduped = [];
+      const seenFn = /* @__PURE__ */ new Set();
+      for (const t of tools) {
+        const tool = t;
+        if (tool && Array.isArray(tool.functionDeclarations)) {
+          const newFds = tool.functionDeclarations.filter(
+            (fd) => {
+              if (fd?.name) {
+                if (seenFn.has(fd.name)) {
+                  return false;
+                }
+                seenFn.add(fd.name);
+              }
+              return true;
+            }
+          );
+          if (newFds.length) {
+            deduped.push({ ...tool, functionDeclarations: newFds });
+          }
+        } else if (tool?.name) {
+          if (seenFn.has(tool.name)) continue;
+          seenFn.add(tool.name);
+          deduped.push(tool);
+        } else {
+          deduped.push(tool);
+        }
+      }
+      if (deduped.length !== tools.length) {
+        this.logger.debug(
+          `\u{1F501} Deduplicated tool/function declarations: ${tools.length} -> ${deduped.length}`
+        );
+      }
+      llmRequest.config.tools = tools = deduped;
+    }
     const toolNames = tools.map((tool) => {
       if (tool.functionDeclarations && Array.isArray(tool.functionDeclarations)) {
         return tool.functionDeclarations.map((fn) => fn.name).join(", ");
@@ -7074,8 +7119,6 @@ var BasicLlmRequestProcessor = class extends BaseLlmRequestProcessor {
     llmRequest.liveConnectConfig.realtimeInputConfig = runConfig.realtimeInputConfig;
     llmRequest.liveConnectConfig.enableAffectiveDialog = runConfig.enableAffectiveDialog;
     llmRequest.liveConnectConfig.proactivity = runConfig.proactivity;
-    const tools = await agent.canonicalTools();
-    llmRequest.appendTools(tools);
     for await (const _ of []) {
       yield _;
     }
@@ -9069,19 +9112,19 @@ var LlmAgent = class _LlmAgent extends BaseAgent {
    * Core logic to run this agent via text-based conversation
    * This matches the Python implementation's _run_async_impl
    */
-  async *runAsyncImpl(context) {
+  async *runAsyncImpl(context4) {
     this.logger.debug(`Starting LlmAgent execution for "${this.name}"`);
     try {
-      for await (const event of this.llmFlow.runAsync(context)) {
+      for await (const event of this.llmFlow.runAsync(context4)) {
         this.maybeSaveOutputToState(event);
         yield event;
       }
     } catch (error) {
       this.logger.error("Error in LlmAgent execution:", error);
       const errorEvent = new Event({
-        invocationId: context.invocationId,
+        invocationId: context4.invocationId,
         author: this.name,
-        branch: context.branch,
+        branch: context4.branch,
         content: {
           parts: [
             {
@@ -9349,7 +9392,7 @@ var LangGraphAgent = class extends BaseAgent {
   /**
    * Gets the next nodes to execute based on the current node and its result
    */
-  async getNextNodes(currentNode, lastEvent, context) {
+  async getNextNodes(currentNode, lastEvent, context4) {
     if (!currentNode.targets || currentNode.targets.length === 0) {
       return [];
     }
@@ -9361,7 +9404,7 @@ var LangGraphAgent = class extends BaseAgent {
         continue;
       }
       if (targetNode.condition) {
-        const shouldExecute = await targetNode.condition(lastEvent, context);
+        const shouldExecute = await targetNode.condition(lastEvent, context4);
         if (!shouldExecute) {
           this.logger.debug(`Skipping node "${targetName}" due to condition`);
           continue;
@@ -9374,7 +9417,7 @@ var LangGraphAgent = class extends BaseAgent {
   /**
    * Core logic to run this agent via text-based conversation.
    */
-  async *runAsyncImpl(context) {
+  async *runAsyncImpl(context4) {
     this.logger.debug(
       `Starting graph execution from root node "${this.rootNode}"`
     );
@@ -9396,7 +9439,7 @@ var LangGraphAgent = class extends BaseAgent {
       return;
     }
     let stepCount = 0;
-    const nodesToExecute = [{ node: rootNode, context }];
+    const nodesToExecute = [{ node: rootNode, context: context4 }];
     const executedNodes = [];
     let lastEvent = null;
     while (nodesToExecute.length > 0 && stepCount < this.maxSteps) {
@@ -9404,7 +9447,7 @@ var LangGraphAgent = class extends BaseAgent {
       const { node } = nodesToExecute.shift();
       this.logger.debug(`Step ${stepCount}: Executing node "${node.name}"`);
       executedNodes.push(node.name);
-      const childContext = context.createChildContext(node.agent);
+      const childContext = context4.createChildContext(node.agent);
       try {
         const nodeEvents = [];
         for await (const event of node.agent.runAsync(childContext)) {
@@ -9417,7 +9460,7 @@ var LangGraphAgent = class extends BaseAgent {
           events: nodeEvents
         });
         if (lastEvent) {
-          const nextNodes = await this.getNextNodes(node, lastEvent, context);
+          const nextNodes = await this.getNextNodes(node, lastEvent, context4);
           for (const nextNode of nextNodes) {
             nodesToExecute.push({
               node: nextNode,
@@ -9460,8 +9503,8 @@ var LangGraphAgent = class extends BaseAgent {
    * Core logic to run this agent via video/audio-based conversation.
    * For LangGraph, this follows the same execution pattern as text-based.
    */
-  async *runLiveImpl(context) {
-    yield* this.runAsyncImpl(context);
+  async *runLiveImpl(context4) {
+    yield* this.runAsyncImpl(context4);
   }
   /**
    * Gets the execution results from the last run
@@ -9511,10 +9554,11 @@ var LangGraphAgent = class extends BaseAgent {
 };
 // src/agents/agent-builder.ts
+init_logger();
 import { generateId } from "ai";
 // src/runners.ts
-import { SpanStatusCode } from "@opentelemetry/api";
+import { SpanStatusCode, context as context3, trace as trace3 } from "@opentelemetry/api";
 // src/agents/run-config.ts
 var StreamingMode = /* @__PURE__ */ ((StreamingMode2) => {
@@ -9624,19 +9668,19 @@ var InMemoryArtifactService = class {
   }
   async saveArtifact(args) {
     const { appName, userId, sessionId, filename, artifact } = args;
-    const path2 = this.getArtifactPath(appName, userId, sessionId, filename);
-    if (!this.artifacts.has(path2)) {
-      this.artifacts.set(path2, []);
+    const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
+    if (!this.artifacts.has(path3)) {
+      this.artifacts.set(path3, []);
     }
-    const versions = this.artifacts.get(path2);
+    const versions = this.artifacts.get(path3);
     const version = versions.length;
     versions.push(artifact);
     return version;
   }
   async loadArtifact(args) {
     const { appName, userId, sessionId, filename, version } = args;
-    const path2 = this.getArtifactPath(appName, userId, sessionId, filename);
-    const versions = this.artifacts.get(path2);
+    const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
+    const versions = this.artifacts.get(path3);
     if (!versions || versions.length === 0) {
       return null;
     }
@@ -9657,12 +9701,12 @@ var InMemoryArtifactService = class {
     const sessionPrefix = `${appName}/${userId}/${sessionId}/`;
     const userNamespacePrefix = `${appName}/${userId}/user/`;
     const filenames = [];
-    for (const path2 of this.artifacts.keys()) {
-      if (path2.startsWith(sessionPrefix)) {
-        const filename = path2.substring(sessionPrefix.length);
+    for (const path3 of this.artifacts.keys()) {
+      if (path3.startsWith(sessionPrefix)) {
+        const filename = path3.substring(sessionPrefix.length);
         filenames.push(filename);
-      } else if (path2.startsWith(userNamespacePrefix)) {
-        const filename = path2.substring(userNamespacePrefix.length);
+      } else if (path3.startsWith(userNamespacePrefix)) {
+        const filename = path3.substring(userNamespacePrefix.length);
         filenames.push(filename);
       }
     }
@@ -9670,16 +9714,16 @@ var InMemoryArtifactService = class {
   }
   async deleteArtifact(args) {
     const { appName, userId, sessionId, filename } = args;
-    const path2 = this.getArtifactPath(appName, userId, sessionId, filename);
-    if (!this.artifacts.has(path2)) {
+    const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
+    if (!this.artifacts.has(path3)) {
       return;
     }
-    this.artifacts.delete(path2);
+    this.artifacts.delete(path3);
   }
   async listVersions(args) {
     const { appName, userId, sessionId, filename } = args;
-    const path2 = this.getArtifactPath(appName, userId, sessionId, filename);
-    const versions = this.artifacts.get(path2);
+    const path3 = this.getArtifactPath(appName, userId, sessionId, filename);
+    const versions = this.artifacts.get(path3);
     if (!versions || versions.length === 0) {
       return [];
     }
@@ -10149,7 +10193,7 @@ var Runner = class {
       }
     };
     invokeRunAsync();
-    return function* () {
+    return (function* () {
       while (true) {
         while (queueIndex >= eventQueue.length && !asyncCompleted) {
         }
@@ -10162,7 +10206,7 @@ var Runner = class {
         }
         yield event;
       }
-    }();
+    })();
   }
   /**
    * Main entry method to run the agent in this runner.
@@ -10174,11 +10218,11 @@ var Runner = class {
     runConfig = new RunConfig()
   }) {
     const span = tracer.startSpan("invocation");
+    const spanContext = trace3.setSpan(context3.active(), span);
     try {
-      const session = await this.sessionService.getSession(
-        this.appName,
-        userId,
-        sessionId
+      const session = await context3.with(
+        spanContext,
+        () => this.sessionService.getSession(this.appName, userId, sessionId)
       );
       if (!session) {
         throw new Error(`Session not found: ${sessionId}`);
@@ -10188,22 +10232,34 @@ var Runner = class {
         runConfig
       });
       if (newMessage) {
-        await this._appendNewMessageToSession(
-          session,
-          newMessage,
-          invocationContext,
-          runConfig.saveInputBlobsAsArtifacts || false
+        await context3.with(
+          spanContext,
+          () => this._appendNewMessageToSession(
+            session,
+            newMessage,
+            invocationContext,
+            runConfig.saveInputBlobsAsArtifacts || false
+          )
         );
       }
       invocationContext.agent = this._findAgentToRun(session, this.agent);
-      for await (const event of invocationContext.agent.runAsync(
-        invocationContext
-      )) {
+      const agentGenerator = invocationContext.agent.runAsync(invocationContext);
+      while (true) {
+        const result = await context3.with(
+          spanContext,
+          () => agentGenerator.next()
+        );
+        if (result.done) {
+          break;
+        }
+        const event = result.value;
         if (!event.partial) {
-          await this.sessionService.appendEvent(session, event);
-          if (this.memoryService) {
-            await this.memoryService.addSessionToMemory(session);
-          }
+          await context3.with(spanContext, async () => {
+            await this.sessionService.appendEvent(session, event);
+            if (this.memoryService) {
+              await this.memoryService.addSessionToMemory(session);
+            }
+          });
         }
         yield event;
       }
@@ -10350,6 +10406,12 @@ var AgentBuilder = class _AgentBuilder {
   artifactService;
   agentType = "llm";
   existingSession;
+  existingAgent;
+  // If provided, reuse directly
+  definitionLocked = false;
+  // Lock further definition mutation after withAgent
+  warnedMethods = /* @__PURE__ */ new Set();
+  logger = new Logger({ name: "AgentBuilder" });
   /**
    * Private constructor - use static create() method
    */
@@ -10378,6 +10440,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withModel(model) {
+    this.warnIfLocked("withModel");
     this.config.model = model;
     return this;
   }
@@ -10387,6 +10450,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withDescription(description) {
+    this.warnIfLocked("withDescription");
     this.config.description = description;
     return this;
   }
@@ -10396,14 +10460,17 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withInstruction(instruction) {
+    this.warnIfLocked("withInstruction");
     this.config.instruction = instruction;
     return this;
   }
   withInputSchema(schema) {
+    this.warnIfLocked("withInputSchema");
     this.config.inputSchema = schema;
     return this;
   }
   withOutputSchema(schema) {
+    this.warnIfLocked("withOutputSchema");
     this.config.outputSchema = schema;
     return this;
   }
@@ -10413,6 +10480,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withTools(...tools) {
+    this.warnIfLocked("withTools");
     this.config.tools = [...this.config.tools || [], ...tools];
     return this;
   }
@@ -10422,6 +10490,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withPlanner(planner) {
+    this.warnIfLocked("withPlanner");
     this.config.planner = planner;
     return this;
   }
@@ -10431,6 +10500,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withCodeExecutor(codeExecutor) {
+    this.warnIfLocked("withCodeExecutor");
     this.config.codeExecutor = codeExecutor;
     return this;
   }
@@ -10440,6 +10510,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withOutputKey(outputKey) {
+    this.warnIfLocked("withOutputKey");
     this.config.outputKey = outputKey;
     return this;
   }
@@ -10449,6 +10520,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withSubAgents(subAgents) {
+    this.warnIfLocked("withSubAgents");
     this.config.subAgents = subAgents;
     return this;
   }
@@ -10458,6 +10530,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withBeforeAgentCallback(callback) {
+    this.warnIfLocked("withBeforeAgentCallback");
     this.config.beforeAgentCallback = callback;
     return this;
   }
@@ -10467,15 +10540,29 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   withAfterAgentCallback(callback) {
+    this.warnIfLocked("withAfterAgentCallback");
     this.config.afterAgentCallback = callback;
     return this;
   }
+  /**
+   * Provide an already constructed agent instance. Further definition-mutating calls
+   * (model/tools/instruction/etc.) will be ignored with a dev warning.
+   */
+  withAgent(agent) {
+    this.existingAgent = agent;
+    this.definitionLocked = true;
+    if (this.config.name === "default_agent" && agent.name) {
+      this.config.name = agent.name;
+    }
+    return this;
+  }
   /**
    * Configure as a sequential agent
    * @param subAgents Sub-agents to execute in sequence
    * @returns This builder instance for chaining
    */
   asSequential(subAgents) {
+    this.warnIfLocked("asSequential");
     this.agentType = "sequential";
     this.config.subAgents = subAgents;
     return this;
@@ -10486,6 +10573,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   asParallel(subAgents) {
+    this.warnIfLocked("asParallel");
     this.agentType = "parallel";
     this.config.subAgents = subAgents;
     return this;
@@ -10497,6 +10585,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   asLoop(subAgents, maxIterations = 3) {
+    this.warnIfLocked("asLoop");
     this.agentType = "loop";
     this.config.subAgents = subAgents;
     this.config.maxIterations = maxIterations;
@@ -10509,6 +10598,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns This builder instance for chaining
    */
   asLangGraph(nodes, rootNode) {
+    this.warnIfLocked("asLangGraph");
     this.agentType = "langgraph";
     this.config.nodes = nodes;
     this.config.rootNode = rootNode;
@@ -10635,6 +10725,7 @@ var AgentBuilder = class _AgentBuilder {
    * @returns Created agent instance
    */
   createAgent() {
+    if (this.existingAgent) return this.existingAgent;
     switch (this.agentType) {
       case "llm": {
         if (!this.config.model) {
@@ -10765,6 +10856,22 @@ var AgentBuilder = class _AgentBuilder {
       }
     };
   }
+  /**
+   * Warn (once per method) if the definition has been locked by withAgent().
+   */
+  warnIfLocked(method) {
+    if (!this.definitionLocked) return;
+    if (this.warnedMethods.has(method)) return;
+    this.warnedMethods.add(method);
+    if (process.env.NODE_ENV !== "production") {
+      const msg = `AgentBuilder: attempted to call ${method} after withAgent(); ignoring. (Wrap the agent first OR configure before withAgent).`;
+      if (this.logger && typeof this.logger.warn === "function") {
+        this.logger.warn(msg);
+      } else {
+        console.warn(msg);
+      }
+    }
+  }
 };
 // src/memory/index.ts
@@ -10818,7 +10925,7 @@ var VertexAiSessionService = class extends BaseSessionService {
       path: `reasoningEngines/${reasoningEngineId}/sessions`,
       request_dict: sessionJsonDict
     });
-    console.info("Create Session response", apiResponse);
+    console.debug("Create Session response", apiResponse);
     const createdSessionId = apiResponse.name.split("/").slice(-3, -2)[0];
     const operationId = apiResponse.name.split("/").pop();
     let maxRetryAttempt = 5;
@@ -10929,14 +11036,14 @@ var VertexAiSessionService = class extends BaseSessionService {
   async listSessions(appName, userId) {
     const reasoningEngineId = this.getReasoningEngineId(appName);
     const apiClient = this.getApiClient();
-    let path2 = `reasoningEngines/${reasoningEngineId}/sessions`;
+    let path3 = `reasoningEngines/${reasoningEngineId}/sessions`;
     if (userId) {
       const parsedUserId = encodeURIComponent(`"${userId}"`);
-      path2 = `${path2}?filter=user_id=${parsedUserId}`;
+      path3 = `${path3}?filter=user_id=${parsedUserId}`;
     }
     const apiResponse = await apiClient.async_request({
       http_method: "GET",
-      path: path2,
+      path: path3,
       request_dict: {}
     });
     if (apiResponse.httpHeaders) {
@@ -11752,12 +11859,1299 @@ __export(flows_exports, {
   removeClientFunctionCallId: () => removeClientFunctionCallId
 });
+// src/evaluation/index.ts
+var evaluation_exports = {};
+__export(evaluation_exports, {
+  AgentEvaluator: () => AgentEvaluator,
+  EvalResult: () => EvalResult,
+  EvalStatus: () => EvalStatus,
+  Evaluator: () => Evaluator,
+  FinalResponseMatchV2Evaluator: () => FinalResponseMatchV2Evaluator,
+  LocalEvalService: () => LocalEvalService,
+  PrebuiltMetrics: () => PrebuiltMetrics,
+  RougeEvaluator: () => RougeEvaluator,
+  SafetyEvaluatorV1: () => SafetyEvaluatorV1,
+  TrajectoryEvaluator: () => TrajectoryEvaluator
+});
+// src/evaluation/evaluator.ts
+var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
+  EvalStatus2[EvalStatus2["PASSED"] = 1] = "PASSED";
+  EvalStatus2[EvalStatus2["FAILED"] = 2] = "FAILED";
+  EvalStatus2[EvalStatus2["NOT_EVALUATED"] = 3] = "NOT_EVALUATED";
+  return EvalStatus2;
+})(EvalStatus || {});
+var Evaluator = class {
+  constructor(metric) {
+    this.metric = metric;
+  }
+  static getMetricInfo(metricName) {
+    throw new Error("getMetricInfo() must be implemented by subclass");
+  }
+};
+// src/evaluation/eval-metrics.ts
+var PrebuiltMetrics = /* @__PURE__ */ ((PrebuiltMetrics2) => {
+  PrebuiltMetrics2["TOOL_TRAJECTORY_AVG_SCORE"] = "tool_trajectory_avg_score";
+  PrebuiltMetrics2["RESPONSE_EVALUATION_SCORE"] = "response_evaluation_score";
+  PrebuiltMetrics2["RESPONSE_MATCH_SCORE"] = "response_match_score";
+  PrebuiltMetrics2["SAFETY_V1"] = "safety_v1";
+  PrebuiltMetrics2["FINAL_RESPONSE_MATCH_V2"] = "final_response_match_v2";
+  PrebuiltMetrics2["TOOL_TRAJECTORY_SCORE"] = "tool_trajectory_score";
+  PrebuiltMetrics2["SAFETY"] = "safety";
+  PrebuiltMetrics2["RESPONSE_MATCH"] = "response_match";
+  return PrebuiltMetrics2;
+})(PrebuiltMetrics || {});
+// src/evaluation/eval-result.ts
+var EvalResult = class {
+  evalSetResultId;
+  evalSetResultName;
+  evalSetId;
+  evalCaseResults;
+  creationTimestamp;
+  constructor(init) {
+    this.evalSetResultId = init.evalSetResultId || "";
+    this.evalSetResultName = init.evalSetResultName;
+    this.evalSetId = init.evalSetId || "";
+    this.evalCaseResults = init.evalCaseResults || [];
+    this.creationTimestamp = init.creationTimestamp || Date.now() / 1e3;
+  }
+};
+// src/evaluation/agent-evaluator.ts
+import * as fs2 from "fs/promises";
+import * as path2 from "path";
+// src/evaluation/base-eval-service.ts
+var BaseEvalService = class {
+  async *evaluateSession(session) {
+    const inferenceResults = [];
+    for await (const result of this.performInference({
+      evalSetId: session.evalSetId,
+      evalCases: session.evalCases
+    })) {
+      inferenceResults.push(result);
+    }
+    for await (const result of this.evaluate({
+      inferenceResults,
+      evaluateConfig: session.evaluateConfig
+    })) {
+      yield result;
+    }
+  }
+};
+// src/evaluation/vertex-ai-eval-facade.ts
+var ERROR_MESSAGE_SUFFIX = `
+You should specify both project id and location. This metric uses Vertex Gen AI
+Eval SDK, and it requires google cloud credentials.
+If using an .env file add the values there, or explicitly set in the code using
+the template below:
+process.env.GOOGLE_CLOUD_LOCATION = <LOCATION>
+process.env.GOOGLE_CLOUD_PROJECT = <PROJECT ID>
+`;
+var VertexAiEvalFacade = class _VertexAiEvalFacade {
+  threshold;
+  metricName;
+  constructor(config) {
+    this.threshold = config.threshold;
+    this.metricName = config.metricName;
+  }
+  async evaluateInvocations(actualInvocations, expectedInvocations) {
+    let totalScore = 0;
+    let numInvocations = 0;
+    const perInvocationResults = [];
+    for (let i = 0; i < actualInvocations.length; i++) {
+      const actual = actualInvocations[i];
+      const expected = expectedInvocations[i];
+      const prompt = this._getText(expected.userContent);
+      const reference = this._getText(expected.finalResponse);
+      const response = this._getText(actual.finalResponse);
+      const evalCase = {
+        prompt,
+        reference,
+        response
+      };
+      try {
+        const evalCaseResult = await _VertexAiEvalFacade._performEval(
+          [evalCase],
+          [this.metricName]
+        );
+        const score = this._getScore(evalCaseResult);
+        perInvocationResults.push({
+          actualInvocation: actual,
+          expectedInvocation: expected,
+          score,
+          evalStatus: this._getEvalStatus(score)
+        });
+        if (score !== null && score !== void 0) {
+          totalScore += score;
+          numInvocations++;
+        }
+      } catch (error) {
+        console.error("Error evaluating invocation:", error);
+        perInvocationResults.push({
+          actualInvocation: actual,
+          expectedInvocation: expected,
+          score: void 0,
+          evalStatus: 3 /* NOT_EVALUATED */
+        });
+      }
+    }
+    if (perInvocationResults.length > 0) {
+      const overallScore = numInvocations > 0 ? totalScore / numInvocations : void 0;
+      return {
+        overallScore,
+        overallEvalStatus: this._getEvalStatus(overallScore),
+        perInvocationResults
+      };
+    }
+    return {
+      overallScore: void 0,
+      overallEvalStatus: 3 /* NOT_EVALUATED */,
+      perInvocationResults: []
+    };
+  }
+  _getText(content) {
+    if (content?.parts) {
+      return content.parts.map((p) => p.text || "").filter((text) => text.length > 0).join("\n");
+    }
+    return "";
+  }
+  _getScore(evalResult) {
+    if (evalResult?.summaryMetrics?.[0]?.meanScore !== void 0 && typeof evalResult.summaryMetrics[0].meanScore === "number" && !Number.isNaN(evalResult.summaryMetrics[0].meanScore)) {
+      return evalResult.summaryMetrics[0].meanScore;
+    }
+    return void 0;
+  }
+  _getEvalStatus(score) {
+    if (score !== null && score !== void 0) {
+      return score >= this.threshold ? 1 /* PASSED */ : 2 /* FAILED */;
+    }
+    return 3 /* NOT_EVALUATED */;
+  }
+  static async _performEval(dataset, metrics) {
+    const projectId = process.env.GOOGLE_CLOUD_PROJECT;
+    const location = process.env.GOOGLE_CLOUD_LOCATION;
+    if (!projectId) {
+      throw new Error(`Missing project id. ${ERROR_MESSAGE_SUFFIX}`);
+    }
+    if (!location) {
+      throw new Error(`Missing location. ${ERROR_MESSAGE_SUFFIX}`);
+    }
+    console.warn(
+      "Vertex AI evaluation is not fully implemented. Using mock response."
+    );
+    return {
+      summaryMetrics: [
+        {
+          meanScore: Math.random() * 0.5 + 0.5
+        }
+      ]
+    };
+  }
+};
+// src/evaluation/response-evaluator.ts
+var ResponseEvaluator = class extends Evaluator {
+  metricName;
+  threshold;
+  constructor(evalMetric) {
+    super(evalMetric);
+    if (evalMetric.metricName === "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */) {
+      this.metricName = "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */;
+    } else if (evalMetric.metricName === "response_match_score" /* RESPONSE_MATCH_SCORE */) {
+      this.metricName = "response_match_score" /* RESPONSE_MATCH_SCORE */;
+    } else {
+      throw new Error(`Metric ${evalMetric.metricName} is not supported.`);
+    }
+    this.threshold = evalMetric.threshold;
+  }
+  static getMetricInfo(metricName) {
+    if (metricName === "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */) {
+      return {
+        metricName: "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */,
+        description: "This metric evaluates how coherent agent's response was. Value range of this metric is [1,5], with values closer to 5 more desirable.",
+        metricValueInfo: {
+          interval: {
+            minValue: 1,
+            maxValue: 5,
+            openAtMin: false,
+            openAtMax: false
+          }
+        }
+      };
+    }
+    if (metricName === "response_match_score" /* RESPONSE_MATCH_SCORE */) {
+      return {
+        metricName: "response_match_score" /* RESPONSE_MATCH_SCORE */,
+        description: "This metric evaluates if agent's final response matches a golden/expected final response using Rouge_1 metric. Value range for this metric is [0,1], with values closer to 1 more desirable.",
+        metricValueInfo: {
+          interval: {
+            minValue: 0,
+            maxValue: 1,
+            openAtMin: false,
+            openAtMax: false
+          }
+        }
+      };
+    }
+    throw new Error(`Metric ${metricName} is not supported.`);
+  }
+  async evaluateInvocations(actualInvocations, expectedInvocations) {
+    if (this.metricName === "response_match_score" /* RESPONSE_MATCH_SCORE */) {
+      return this.evaluateRougeScore(actualInvocations, expectedInvocations);
+    }
+    const vertexAiFacade = new VertexAiEvalFacade({
+      threshold: this.threshold,
+      metricName: this.metricName
+    });
+    return vertexAiFacade.evaluateInvocations(
+      actualInvocations,
+      expectedInvocations
+    );
+  }
+  async evaluateRougeScore(actualInvocations, expectedInvocations) {
+    if (actualInvocations.length !== expectedInvocations.length) {
+      throw new Error("Number of actual and expected invocations must match");
+    }
+    const results = [];
+    for (let i = 0; i < actualInvocations.length; i++) {
+      const actual = actualInvocations[i];
+      const expected = expectedInvocations[i];
+      const result = await this.evaluateInvocation(actual, expected);
+      results.push(result);
+    }
+    const scores = results.map((r) => r.score).filter((s) => s !== void 0);
+    const overallScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : void 0;
+    const overallStatus = overallScore !== void 0 && overallScore >= this.threshold ? 1 /* PASSED */ : 2 /* FAILED */;
+    return {
+      overallScore,
+      overallEvalStatus: overallStatus,
+      perInvocationResults: results
+    };
+  }
+  async evaluateInvocation(actual, expected) {
+    if (!actual.finalResponse || !expected.finalResponse) {
+      return {
+        actualInvocation: actual,
+        expectedInvocation: expected,
+        evalStatus: 3 /* NOT_EVALUATED */
+      };
+    }
+    const score = await this.computeRougeScore(
+      actual.finalResponse,
+      expected.finalResponse
+    );
+    return {
+      actualInvocation: actual,
+      expectedInvocation: expected,
+      score,
+      evalStatus: score >= this.threshold ? 1 /* PASSED */ : 2 /* FAILED */
+    };
+  }
+  async computeRougeScore(actual, expected) {
+    const actualText = this.extractText(actual);
+    const expectedText = this.extractText(expected);
+    if (!actualText.trim() || !expectedText.trim()) {
+      return 0;
+    }
+    const actualTokens = this.tokenizeText(actualText);
+    const expectedTokens = this.tokenizeText(expectedText);
+    const actualUnigrams = new Set(actualTokens);
+    const expectedUnigrams = new Set(expectedTokens);
+    const commonUnigrams = new Set(
+      [...actualUnigrams].filter((token) => expectedUnigrams.has(token))
+    );
+    const precision = actualUnigrams.size > 0 ? commonUnigrams.size / actualUnigrams.size : 0;
+    const recall = expectedUnigrams.size > 0 ? commonUnigrams.size / expectedUnigrams.size : 0;
+    const fmeasure = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
+    return fmeasure;
+  }
+  extractText(content) {
+    if (content?.parts) {
+      return content.parts.map((p) => p.text || "").filter((text) => text.length > 0).join(" ");
+    }
+    return "";
+  }
+  tokenizeText(text) {
+    return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((token) => token.length > 0);
+  }
+};
+// src/evaluation/trajectory-evaluator.ts
+var TrajectoryEvaluator = class extends Evaluator {
+  static getMetricInfo() {
+    return {
+      metricName: "tool_trajectory_avg_score" /* TOOL_TRAJECTORY_AVG_SCORE */,
+      description: "This metric compares two tool call trajectories (expected vs. actual) for the same user interaction. It performs an exact match on the tool name and arguments for each step in the trajectory. A score of 1.0 indicates a perfect match, while 0.0 indicates a mismatch. Higher values are better.",
+      metricValueInfo: {
+        interval: {
+          minValue: 0,
+          maxValue: 1,
+          openAtMin: false,
+          openAtMax: false
+        }
+      }
+    };
+  }
+  async evaluateInvocations(actualInvocations, expectedInvocations) {
+    let totalToolUseAccuracy = 0;
+    let numInvocations = 0;
+    const perInvocationResults = [];
+    for (let i = 0; i < actualInvocations.length; i++) {
+      const actual = actualInvocations[i];
+      const expected = expectedInvocations[i];
+      if (!actual.intermediateData?.toolUses || !expected.intermediateData?.toolUses) {
+        perInvocationResults.push({
+          actualInvocation: actual,
+          expectedInvocation: expected,
+          evalStatus: 3 /* NOT_EVALUATED */
+        });
+        continue;
+      }
+      const toolUseAccuracy = this.areToolCallsEqual(
+        actual.intermediateData.toolUses,
+        expected.intermediateData.toolUses
+      ) ? 1 : 0;
+      perInvocationResults.push({
+        actualInvocation: actual,
+        expectedInvocation: expected,
+        score: toolUseAccuracy,
+        evalStatus: toolUseAccuracy >= this.metric.threshold ? 1 /* PASSED */ : 2 /* FAILED */
+      });
+      totalToolUseAccuracy += toolUseAccuracy;
+      numInvocations++;
+    }
+    const overallScore = numInvocations > 0 ? totalToolUseAccuracy / numInvocations : 0;
+    return {
+      overallScore,
+      overallEvalStatus: overallScore >= this.metric.threshold ? 1 /* PASSED */ : 2 /* FAILED */,
+      perInvocationResults
+    };
+  }
+  areToolCallsEqual(actual, expected) {
+    if (actual.length !== expected.length) {
+      return false;
+    }
+    return actual.every((actualCall, index) => {
+      const expectedCall = expected[index];
+      return this.isToolCallEqual(actualCall, expectedCall);
+    });
+  }
+  isToolCallEqual(actual, expected) {
+    if (actual.name !== expected.name) {
+      return false;
+    }
+    const actualArgs = actual.args || {};
+    const expectedArgs = expected.args || {};
+    const actualKeys = Object.keys(actualArgs).sort();
+    const expectedKeys = Object.keys(expectedArgs).sort();
+    if (actualKeys.length !== expectedKeys.length) {
+      return false;
+    }
+    return actualKeys.every((key, index) => {
+      const expectedKey = expectedKeys[index];
+      if (key !== expectedKey) {
+        return false;
+      }
+      return JSON.stringify(actualArgs[key]) === JSON.stringify(expectedArgs[key]);
+    });
+  }
+};
+// src/evaluation/safety-evaluator.ts
+var SafetyEvaluatorV1 = class extends Evaluator {
+  static getMetricInfo() {
+    return {
+      metricName: "safety_v1" /* SAFETY_V1 */,
+      description: "This metric evaluates the safety (harmlessness) of an Agent's Response. Value range of the metric is [0, 1], with values closer to 1 to be more desirable (safe).",
+      metricValueInfo: {
+        interval: {
+          minValue: 0,
+          maxValue: 1,
+          openAtMin: false,
+          openAtMax: false
+        }
+      }
+    };
+  }
+  async evaluateInvocations(actualInvocations, expectedInvocations) {
+    const facade = new VertexAiEvalFacade({
+      threshold: this.metric.threshold,
+      metricName: "safety_v1" /* SAFETY_V1 */
+    });
+    return await facade.evaluateInvocations(
+      actualInvocations,
+      expectedInvocations
+    );
+  }
+};
+// src/evaluation/llm-as-judge-utils.ts
+function getTextFromContent(content) {
+  if (content?.parts) {
+    return content.parts.map((part) => part.text).filter(Boolean).join("\n");
+  }
+  return "";
+}
+function getEvalStatus(score, threshold) {
+  return score >= threshold ? 1 /* PASSED */ : 2 /* FAILED */;
+}
+// src/evaluation/llm-as-judge.ts
+var LlmAsJudge = class {
+  async sampleJudge(prompt, numSamples, critiqueParser, judgeModelOptions) {
+    const modelName = judgeModelOptions?.judgeModel || "gemini-2.5-flash";
+    const model = LLMRegistry.getModelOrCreate(modelName);
+    const config = judgeModelOptions?.judgeModelConfig || {};
+    const samples = [];
+    for (let i = 0; i < numSamples; i++) {
+      try {
+        const response = await model.generateContent({
+          prompt,
+          ...config
+        });
+        const label = critiqueParser(response.text);
+        if (label !== "not_found" /* NOT_FOUND */) {
+          samples.push(label);
+        }
+      } catch (error) {
+        console.error("Error sampling judge model:", error);
+      }
+    }
+    return samples;
+  }
+};
+// src/evaluation/final-response-match-v2.ts
+var FINAL_RESPONSE_MATCH_V2_PROMPT = `You are an expert rater for an AI agent. The AI agent is going to call an API to answer the user query and generate API tool use code based for the choice of the API and API arguments. The ideal model response should be a function call that fulfills user query, or a natural language response hedges or asks users for further clarification if a function call does not apply.
+The primary focus of this rating task is to check correctness of the model responses.
+The data consists of:
+- A user query.
+- A model generated response for the prompt. The responses can consist of:
+  - Natural language, when the model is asking for clarification, or tells the user it does not possess the requested functionality / option.
+  - Code, in the form of one or multiple python function calls, and additional code as needed, for when the model is fulfilling the user request.
+You can use the help from a reference response annotated by a human rater. This reference response is of high quality. You can compare the agent's response with the reference response and decide if the agent's response is valid.
+Note sometimes the reference response only contains the key entities of the correct answer and you need to be flexible to allow the agent response to contain more information than the reference response, or to present the key entities in a different format or structure or in shorter or longer format.
+When the agent response is provided in the form of tables/dataframes or should be best provided in the form of tables/dataframes: focus on the key entities and main components requested in the user query and check whether you can retrieve those from the agent response. Likewise, if you have the reference response, then find out the key entities and main components in them and check whether you can retrieve those from the agent response. If the prompt does not specify any format instructions and the main items/components are included in the response then tolerate the differences in the formatting of those tables/dataframes.
+You should follow the constitutions below very carefully to rate the model response:
+- Allow flexibility of format even when reference code only uses one of the possible format, unless API spec or user prompt has explicit format requirement
+  - e.g. For state name, allow both abbreviation and full name unless API spec has explicit requirement. e.g. both 'tx' and 'Texas' should be allowed in the agent response even when reference code only uses one of them.
+  - e.g. If a reference response list outputs in a list format, the agent response is allowed to use sentence format and vice versa unless user prompt explicitly asks for a specific format.
+  - e.g. For numbers, allow flexibility of formatting, e.g. 1000000 vs 1,000,000.
+- The model shouldn't assume that it doesn't have access to according data or incapable of answering the question if reference response is able to find a legit answer.
+- If the model response contains the correct final answer, rate it as valid even when the model response contains more information than the reference response.
+- If the user prompt has csv or other table format data, don't read it yourself. Trust the reference response final answer instead.
+- When the validation needs maths, date calculations, do not use your own calculator. Trust the reference response final answer instead.
+- Be mindful about unit of numbers. For example, if the reference response says 100 miles, but the model response says 100 km, it is invalid.
+- When the agent response or the reference response is provided in the form of tables/dataframes: focus on the key entities and main components requested in the user query and check whether you can retrieve those from the agent response and whether those match the reference response. If the user query does not specify any format instructions and the main items/components are included in the response then tolerate the differences in the formatting of those tables/dataframes.
+- When the answer is in numeric format, check whether there are any format requirements in the numeric format, rounding, precision, number of decimals, etc. specified in the user query and the prompt. If there are no such instructions, then tolerate different numerical formats.
+- When the answer is in numeric format and there are rounding or precision differences between the agent response and the reference response, if no further instructions are provided evaluate if the rounding strategy or precision in the agent response follows the standards for that entity. For instance, model accuracy scores must be reported with at least two decimal places (e.g., 0.798 \u2192 0.80 is acceptable,  but 0.7 is not).
+Below are the inputs:
+{{
+  "User prompt": {prompt},
+  "Agent response": {response},
+  "Reference response": {golden_response},
+}}
+The answer should be a json alone which follows the json structure below:
+{{
+  "reasoning": [reasoning],
+  "is_the_agent_response_valid": [valid or invalid],
+}}
+Answer with assertiveness:
+`;
+var DEFAULT_NUM_SAMPLES = 5;
+function parseCritique(response) {
+  const labelMatchIsResponseValid = response.match(
+    /"is_the_agent_response_valid":\s*\[*[\n\s]*"*([^"^\]^\s]*)"*[\n\s]*\]*\s*[,\n\}]/
+  );
+  if (labelMatchIsResponseValid?.[1]) {
+    const label = labelMatchIsResponseValid[1].toLowerCase();
+    return label === "valid" ? "valid" /* VALID */ : "invalid" /* INVALID */;
+  }
+  return "not_found" /* NOT_FOUND */;
+}
+var FinalResponseMatchV2Evaluator = class extends Evaluator {
+  constructor(evalMetric, llmAsJudge = new LlmAsJudge()) {
+    super(evalMetric);
+    this.llmAsJudge = llmAsJudge;
+  }
+  static getMetricInfo() {
+    return {
+      metricName: "final_response_match_v2" /* FINAL_RESPONSE_MATCH_V2 */,
+      description: "This metric evaluates if the agent's final response matches a golden/expected final response using an LLM judge. Value range for this metric is [0,1], with values closer to 1 more desirable.",
+      metricValueInfo: {
+        interval: {
+          minValue: 0,
+          maxValue: 1,
+          openAtMin: false,
+          openAtMax: false
+        }
+      }
+    };
+  }
+  async evaluateInvocations(actualInvocations, expectedInvocations) {
+    const perInvocationResults = [];
+    let totalScore = 0;
+    let numInvocations = 0;
+    if (!actualInvocations.length) {
+      return {
+        overallEvalStatus: 3 /* NOT_EVALUATED */,
+        perInvocationResults: []
+      };
+    }
+    for (let i = 0; i < actualInvocations.length; i++) {
+      const actual = actualInvocations[i];
+      const expected = expectedInvocations[i];
+      const prompt = getTextFromContent(expected.userContent);
+      const response = getTextFromContent(actual.finalResponse);
+      const goldenResponse = getTextFromContent(expected.finalResponse);
+      const formattedPrompt = FINAL_RESPONSE_MATCH_V2_PROMPT.replace(
+        "{prompt}",
+        prompt
+      ).replace("{response}", response).replace("{golden_response}", goldenResponse);
+      const numSamples = this.metric.judgeModelOptions?.numSamples ?? DEFAULT_NUM_SAMPLES;
+      const labels = await this.llmAsJudge.sampleJudge(
+        formattedPrompt,
+        numSamples,
+        parseCritique,
+        this.metric.judgeModelOptions
+      );
+      const score = labels.filter((l) => l === "valid" /* VALID */).length / labels.length;
+      perInvocationResults.push({
+        actualInvocation: actual,
+        expectedInvocation: expected,
+        score,
+        evalStatus: getEvalStatus(score, this.metric.threshold)
+      });
+      totalScore += score;
+      numInvocations++;
+    }
+    const overallScore = totalScore / numInvocations;
+    return {
+      overallScore,
+      overallEvalStatus: getEvalStatus(overallScore, this.metric.threshold),
+      perInvocationResults
+    };
+  }
+};
+// src/evaluation/metric-evaluator-registry.ts
+var MetricEvaluatorRegistry = class {
+  registry = /* @__PURE__ */ new Map();
+  getEvaluator(evalMetric) {
+    const entry = this.registry.get(evalMetric.metricName);
+    if (!entry) {
+      throw new Error(`${evalMetric.metricName} not found in registry.`);
+    }
+    return new entry.evaluator(evalMetric);
+  }
+  registerEvaluator(metricInfo, evaluator) {
+    const metricName = metricInfo.metricName;
+    if (this.registry.has(metricName)) {
+      console.info(
+        `Updating Evaluator class for ${metricName} from ${this.registry.get(metricName)?.evaluator.name} to ${evaluator.name}`
+      );
+    }
+    this.registry.set(metricName, {
+      evaluator,
+      metricInfo: { ...metricInfo }
+    });
+  }
+  getRegisteredMetrics() {
+    return Array.from(this.registry.values()).map((entry) => ({
+      ...entry.metricInfo
+    }));
+  }
+};
+function getDefaultMetricEvaluatorRegistry() {
+  const registry = new MetricEvaluatorRegistry();
+  registry.registerEvaluator(
+    TrajectoryEvaluator.getMetricInfo(),
+    TrajectoryEvaluator
+  );
+  registry.registerEvaluator(
+    ResponseEvaluator.getMetricInfo("response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */),
+    ResponseEvaluator
+  );
+  registry.registerEvaluator(
+    ResponseEvaluator.getMetricInfo("response_match_score" /* RESPONSE_MATCH_SCORE */),
+    ResponseEvaluator
+  );
+  registry.registerEvaluator(
+    SafetyEvaluatorV1.getMetricInfo(),
+    SafetyEvaluatorV1
+  );
+  registry.registerEvaluator(
+    FinalResponseMatchV2Evaluator.getMetricInfo(),
+    FinalResponseMatchV2Evaluator
+  );
+  return registry;
+}
+var DEFAULT_METRIC_EVALUATOR_REGISTRY = getDefaultMetricEvaluatorRegistry();
+// src/evaluation/local-eval-service.ts
+var LocalEvalService = class extends BaseEvalService {
+  constructor(agent, parallelism = 4) {
+    super();
+    this.agent = agent;
+    this.parallelism = parallelism;
+    this.initializeRunner();
+  }
+  runner;
+  async initializeRunner() {
+    if ("ask" in this.agent) {
+      this.runner = this.agent;
+    } else {
+      try {
+        const { runner } = await AgentBuilder.create("eval_agent").withModel("gemini-2.5-flash").withDescription("Agent for evaluation purposes").build();
+        this.runner = {
+          ask: async (message) => {
+            return await runner.ask(message);
+          }
+        };
+      } catch (error) {
+        console.warn(
+          "Failed to create AgentBuilder runner, falling back to mock:",
+          error
+        );
+        this.runner = {
+          ask: async (message) => {
+            return `Mock response to: ${message}`;
+          }
+        };
+      }
+    }
+  }
+  async *performInference(request) {
+    for (const evalSet of request.evalCases) {
+      for (const evalCase of evalSet.evalCases) {
+        const expected = [];
+        for (const convo of evalCase.conversation) {
+          if (convo.finalResponse) {
+            expected.push({
+              invocationId: `${evalCase.evalId}-expected-${expected.length}`,
+              userContent: convo.userContent,
+              finalResponse: convo.finalResponse,
+              intermediateData: convo.intermediateData,
+              creationTimestamp: convo.creationTimestamp
+            });
+          }
+        }
+        const actual = await this.runInference(evalCase);
+        yield [...expected, ...actual];
+      }
+    }
+  }
+  async *evaluate(request) {
+    const { inferenceResults, evaluateConfig } = request;
+    const resultsByCase = /* @__PURE__ */ new Map();
+    for (const result of inferenceResults) {
+      const invocationId = result[0].invocationId;
+      if (!invocationId) continue;
+      const lastHyphenIndex = invocationId.lastIndexOf("-");
+      const evalId = lastHyphenIndex !== -1 ? invocationId.substring(0, lastHyphenIndex) : invocationId;
+      const existing = resultsByCase.get(evalId) || [];
+      resultsByCase.set(evalId, [...existing, ...result]);
+    }
+    for (const [evalId, results] of resultsByCase) {
+      const evalResult = {
+        evalSetResultId: `${evalId}-result-${Date.now()}`,
+        evalSetId: evalId,
+        evalCaseResults: [],
+        creationTimestamp: Date.now()
+      };
+      for (const evalMetric of evaluateConfig.evalMetrics) {
+        const evaluator = DEFAULT_METRIC_EVALUATOR_REGISTRY.getEvaluator(evalMetric);
+        const actual = results.filter(
+          (r) => !r.invocationId?.includes("expected")
+        );
+        const expected = results.filter(
+          (r) => r.invocationId?.includes("expected")
+        );
+        const result = await evaluator.evaluateInvocations(actual, expected);
+        evalResult.evalCaseResults.push({
+          evalSetId: evalId,
+          evalId,
+          finalEvalStatus: result.perInvocationResults.length > 0 ? result.perInvocationResults[0].evalStatus : 3 /* NOT_EVALUATED */,
+          overallEvalMetricResults: [],
+          sessionId: evalId,
+          evalMetricResultPerInvocation: result.perInvocationResults.map(
+            (r) => ({
+              actualInvocation: r.actualInvocation,
+              expectedInvocation: r.expectedInvocation,
+              evalMetricResults: [
+                {
+                  metricName: evalMetric.metricName,
+                  threshold: evalMetric.threshold,
+                  score: r.score,
+                  evalStatus: r.evalStatus
+                }
+              ]
+            })
+          )
+        });
+      }
+      yield evalResult;
+    }
+  }
+  async runInference(evalCase) {
+    const results = [];
+    if (!this.runner) {
+      await this.initializeRunner();
+    }
+    if (evalCase.sessionInput) {
+      try {
+        if (this.runner.initializeSession) {
+          await this.runner.initializeSession(evalCase.sessionInput);
+        } else if (this.runner.setSessionState) {
+          await this.runner.setSessionState(evalCase.sessionInput);
+        } else {
+          console.log(
+            `Session input provided for ${evalCase.evalId}:`,
+            evalCase.sessionInput
+          );
+        }
+      } catch (error) {
+        console.warn(
+          `Failed to initialize session for ${evalCase.evalId}:`,
+          error
+        );
+      }
+    }
+    for (const invocation of evalCase.conversation) {
+      try {
+        const response = await this.runner.ask(invocation.userContent);
+        results.push({
+          invocationId: `${evalCase.evalId}-${results.length}`,
+          userContent: invocation.userContent,
+          finalResponse: {
+            role: "model",
+            parts: [{ text: response || "" }]
+          },
+          intermediateData: {
+            toolUses: [],
+            intermediateResponses: []
+          },
+          creationTimestamp: Date.now()
+        });
+      } catch (error) {
+        console.error(`Error running inference for ${evalCase.evalId}:`, error);
+        results.push({
+          invocationId: `${evalCase.evalId}-${results.length}`,
+          userContent: invocation.userContent,
+          finalResponse: {
+            role: "model",
+            parts: [
+              {
+                text: `Error: ${error instanceof Error ? error.message : "Unknown error"}`
+              }
+            ]
+          },
+          intermediateData: {
+            toolUses: [],
+            intermediateResponses: []
+          },
+          creationTimestamp: Date.now()
+        });
+      }
+    }
+    return results;
+  }
+};
+// src/evaluation/agent-evaluator.ts
+var NUM_RUNS = 2;
+var TOOL_TRAJECTORY_SCORE_KEY = "tool_trajectory_avg_score" /* TOOL_TRAJECTORY_AVG_SCORE */;
+var RESPONSE_EVALUATION_SCORE_KEY = "response_evaluation_score" /* RESPONSE_EVALUATION_SCORE */;
+var RESPONSE_MATCH_SCORE_KEY = "response_match_score" /* RESPONSE_MATCH_SCORE */;
+var SAFETY_V1_KEY = "safety_v1" /* SAFETY_V1 */;
+var ALLOWED_CRITERIA = [
+  TOOL_TRAJECTORY_SCORE_KEY,
+  RESPONSE_EVALUATION_SCORE_KEY,
+  RESPONSE_MATCH_SCORE_KEY,
+  SAFETY_V1_KEY
+];
+var QUERY_COLUMN = "query";
+var REFERENCE_COLUMN = "reference";
+var EXPECTED_TOOL_USE_COLUMN = "expected_tool_use";
+var DEFAULT_CRITERIA = {
+  [TOOL_TRAJECTORY_SCORE_KEY]: 1,
+  [RESPONSE_MATCH_SCORE_KEY]: 0.8
+};
+var loadJson = async (filePath) => {
+  try {
+    const fileContent = await fs2.readFile(filePath, "utf-8");
+    return JSON.parse(fileContent);
+  } catch (error) {
+    throw new Error(`Failed to load JSON from ${filePath}: ${error}`);
+  }
+};
+var AgentEvaluator = class _AgentEvaluator {
+  static async findConfigForTestFile(testFile) {
+    const testFolder = path2.dirname(testFile);
+    const configPath = path2.join(testFolder, "test_config.json");
+    try {
+      await fs2.access(configPath);
+      const configData = await loadJson(configPath);
+      if ("criteria" in configData && typeof configData.criteria === "object") {
+        return configData.criteria;
+      }
+      throw new Error(
+        `Invalid format for test_config.json at ${configPath}. Expected a 'criteria' dictionary.`
+      );
+    } catch (error) {
+      return DEFAULT_CRITERIA;
+    }
+  }
+  static async evaluateEvalSet(agent, evalSet, criteria, numRuns = NUM_RUNS, printDetailedResults = false) {
+    const evalMetrics = Object.entries(criteria).map(
+      ([metricName, threshold]) => ({
+        metricName,
+        threshold
+      })
+    );
+    const evalResultsByEvalId = await _AgentEvaluator._getEvalResultsByEvalId(
+      agent,
+      evalSet,
+      evalMetrics,
+      numRuns
+    );
+    const failures = [];
+    for (const [_, evalResultsPerEvalId] of evalResultsByEvalId) {
+      const evalMetricResults = _AgentEvaluator._getEvalMetricResultsWithInvocation(
+        evalResultsPerEvalId
+      );
+      const failuresPerEvalCase = _AgentEvaluator._processMetricsAndGetFailures(
+        evalMetricResults,
+        printDetailedResults,
+        agent.name || "Unknown Agent"
+      );
+      failures.push(...failuresPerEvalCase);
+    }
+    if (failures.length > 0) {
+      throw new Error(
+        `Following are all the test failures. If you looking to get more details on the failures, then please re-run this test with \`printDetailedResults\` set to \`true\`.
+${failures.join(
+          "\n"
+        )}`
+      );
+    }
+  }
+  static async evaluate(agent, evalDatasetFilePathOrDir, numRuns = NUM_RUNS, initialSessionFile) {
+    const testFiles = [];
+    try {
+      const stat2 = await fs2.stat(evalDatasetFilePathOrDir);
+      if (stat2.isDirectory()) {
+        const files = await this._findTestFilesRecursively(
+          evalDatasetFilePathOrDir
+        );
+        testFiles.push(...files);
+      } else {
+        testFiles.push(evalDatasetFilePathOrDir);
+      }
+    } catch (error) {
+      throw new Error(`Invalid path: ${evalDatasetFilePathOrDir}`);
+    }
+    const initialSession = await _AgentEvaluator._getInitialSession(initialSessionFile);
+    for (const testFile of testFiles) {
+      const criteria = await _AgentEvaluator.findConfigForTestFile(testFile);
+      const evalSet = await _AgentEvaluator._loadEvalSetFromFile(
+        testFile,
+        criteria,
+        initialSession
+      );
+      await _AgentEvaluator.evaluateEvalSet(agent, evalSet, criteria, numRuns);
+    }
+  }
+  static async migrateEvalDataToNewSchema(oldEvalDataFile, newEvalDataFile, initialSessionFile) {
+    if (!oldEvalDataFile || !newEvalDataFile) {
+      throw new Error("One of oldEvalDataFile or newEvalDataFile is empty.");
+    }
+    const criteria = await _AgentEvaluator.findConfigForTestFile(oldEvalDataFile);
+    const initialSession = await _AgentEvaluator._getInitialSession(initialSessionFile);
+    const evalSet = await _AgentEvaluator._getEvalSetFromOldFormat(
+      oldEvalDataFile,
+      criteria,
+      initialSession
+    );
+    await fs2.writeFile(newEvalDataFile, JSON.stringify(evalSet, null, 2));
+  }
+  static async _findTestFilesRecursively(dir) {
+    const testFiles = [];
+    async function walk(currentDir) {
+      const entries = await fs2.readdir(currentDir, { withFileTypes: true });
+      for (const entry of entries) {
+        const fullPath = path2.join(currentDir, entry.name);
+        if (entry.isDirectory()) {
+          await walk(fullPath);
+        } else if (entry.name.endsWith(".test.json")) {
+          testFiles.push(fullPath);
+        }
+      }
+    }
+    await walk(dir);
+    return testFiles;
+  }
+  static async _loadEvalSetFromFile(evalSetFile, criteria, initialSession) {
+    try {
+      const content = await fs2.readFile(evalSetFile, "utf-8");
+      try {
+        const evalSet = JSON.parse(content);
+        if (evalSet.evalSetId && evalSet.evalCases) {
+          if (Object.keys(initialSession).length > 0) {
+            throw new Error(
+              "Initial session should be specified as a part of EvalSet file. Explicit initial session is only needed, when specifying data in the older schema."
+            );
+          }
+          return evalSet;
+        }
+      } catch (parseError) {
+        throw new Error(`Failed to parse eval set data: ${parseError}`);
+      }
+    } catch (error) {
+      throw new Error(`Failed to process eval set file: ${error}`);
+    }
+    console.warn(
+      `Contents of ${evalSetFile} appear to be in older format. To avoid this warning, please update your test files to contain data in EvalSet schema. You can use 'migrateEvalDataToNewSchema' for migrating your old test files.`
+    );
+    return _AgentEvaluator._getEvalSetFromOldFormat(
+      evalSetFile,
+      criteria,
+      initialSession
+    );
+  }
+  static async _getEvalSetFromOldFormat(evalSetFile, criteria, initialSession) {
+    const data = await _AgentEvaluator._loadDataset(evalSetFile);
+    _AgentEvaluator._validateInput(data, criteria);
+    return {
+      evalSetId: `eval-set-${Date.now()}`,
+      name: evalSetFile,
+      evalCases: data[0].map(
+        (item, index) => ({
+          evalId: `eval-${index}`,
+          conversation: [
+            {
+              invocationId: `invocation-${index}`,
+              userContent: {
+                role: "user",
+                parts: [{ text: item[QUERY_COLUMN] || "" }]
+              },
+              finalResponse: item[REFERENCE_COLUMN] ? {
+                role: "model",
+                parts: [{ text: item[REFERENCE_COLUMN] }]
+              } : void 0,
+              intermediateData: item[EXPECTED_TOOL_USE_COLUMN] ? {
+                toolUses: item[EXPECTED_TOOL_USE_COLUMN],
+                intermediateResponses: []
+              } : void 0,
+              creationTimestamp: Date.now()
+            }
+          ],
+          sessionInput: Object.keys(initialSession).length > 0 ? {
+            appName: "test-app",
+            userId: "test-user",
+            state: initialSession
+          } : void 0
+        })
+      ),
+      creationTimestamp: Date.now()
+    };
+  }
+  static async _getInitialSession(initialSessionFile) {
+    if (!initialSessionFile) {
+      return {};
+    }
+    try {
+      const content = await fs2.readFile(initialSessionFile, "utf-8");
+      return JSON.parse(content);
+    } catch (error) {
+      throw new Error(
+        `Failed to load initial session from ${initialSessionFile}: ${error}`
+      );
+    }
+  }
+  static async _loadDataset(inputData) {
+    const stat2 = await fs2.stat(inputData);
+    if (stat2.isDirectory()) {
+      const testFiles = await this._findTestFilesRecursively(inputData);
+      const results = await Promise.all(testFiles.map((f) => loadJson(f)));
+      return results.map((r) => Array.isArray(r) ? r : [r]);
+    }
+    if (stat2.isFile()) {
+      const data = await loadJson(inputData);
+      return [Array.isArray(data) ? data : [data]];
+    }
+    throw new Error(`Invalid input path: ${inputData}`);
+  }
+  static _validateInput(evalDataset, criteria) {
+    if (!evalDataset || evalDataset.length === 0) {
+      throw new Error("The evaluation dataset is None or empty.");
+    }
+    for (const key of Object.keys(criteria)) {
+      if (!ALLOWED_CRITERIA.includes(key)) {
+        throw new Error(
+          `Invalid criteria key: ${key}. Expected one of ${ALLOWED_CRITERIA.join(
+            ", "
+          )}.`
+        );
+      }
+    }
+    const sample = evalDataset[0];
+    if (!Array.isArray(sample) || sample.length === 0) {
+      throw new Error("The evaluation dataset is empty.");
+    }
+    const firstQuery = sample[0];
+    if (typeof firstQuery !== "object") {
+      throw new Error(
+        `Each evaluation dataset sample must be list of dictionary. But it's ${JSON.stringify(
+          evalDataset
+        )}`
+      );
+    }
+    if (TOOL_TRAJECTORY_SCORE_KEY in criteria) {
+      if (!(QUERY_COLUMN in firstQuery) || !(EXPECTED_TOOL_USE_COLUMN in firstQuery)) {
+        throw new Error(
+          `Samples for ${TOOL_TRAJECTORY_SCORE_KEY} must include '${QUERY_COLUMN}' and '${EXPECTED_TOOL_USE_COLUMN}' keys. The sample is ${JSON.stringify(sample)}.`
+        );
+      }
+    }
+    if (RESPONSE_EVALUATION_SCORE_KEY in criteria) {
+      if (!(QUERY_COLUMN in firstQuery)) {
+        throw new Error(
+          `Samples for ${RESPONSE_EVALUATION_SCORE_KEY} must include '${QUERY_COLUMN}' key. The sample is ${JSON.stringify(sample)}.`
+        );
+      }
+    }
+    if (RESPONSE_MATCH_SCORE_KEY in criteria) {
+      if (!(QUERY_COLUMN in firstQuery) || !(REFERENCE_COLUMN in firstQuery)) {
+        throw new Error(
+          `Samples for ${RESPONSE_MATCH_SCORE_KEY} must include '${QUERY_COLUMN}' and '${REFERENCE_COLUMN}' keys. The sample is ${JSON.stringify(sample)}.`
+        );
+      }
+    }
+  }
+  static _printDetails(evalMetricResultWithInvocations, overallEvalStatus, overallScore, metricName = "", threshold = 0) {
+    console.log(
+      `Summary: \`${overallEvalStatus}\` for Metric: \`${metricName}\`. Expected threshold: \`${threshold}\`, actual value: \`${overallScore}\`.`
+    );
+    const data = evalMetricResultWithInvocations.map((per) => ({
+      evalStatus: per.evalMetricResult.evalStatus,
+      score: per.evalMetricResult.score,
+      threshold,
+      prompt: _AgentEvaluator._convertContentToText(
+        per.expectedInvocation.userContent
+      ),
+      expectedResponse: _AgentEvaluator._convertContentToText(
+        per.expectedInvocation.finalResponse
+      ),
+      actualResponse: _AgentEvaluator._convertContentToText(
+        per.actualInvocation.finalResponse
+      ),
+      expectedToolCalls: _AgentEvaluator._convertToolCallsToText(
+        per.expectedInvocation.intermediateData
+      ),
+      actualToolCalls: _AgentEvaluator._convertToolCallsToText(
+        per.actualInvocation.intermediateData
+      )
+    }));
+    console.table(data);
+    console.log("\n\n");
+  }
+  static _convertContentToText(content) {
+    if (content?.parts) {
+      return content.parts.map((p) => p.text || "").filter((text) => text.length > 0).join("\n");
+    }
+    return "";
+  }
+  static _convertToolCallsToText(intermediateData) {
+    if (intermediateData?.toolUses) {
+      return intermediateData.toolUses.map((t) => JSON.stringify(t)).join("\n");
+    }
+    return "";
+  }
+  static async _getEvalResultsByEvalId(agent, evalSet, evalMetrics, numRuns) {
+    const evalService = new LocalEvalService(agent);
+    const inferenceResults = [];
+    for (let run = 0; run < numRuns; run++) {
+      for await (const result of evalService.performInference({
+        evalSetId: evalSet.evalSetId,
+        evalCases: [evalSet]
+      })) {
+        inferenceResults.push(result);
+      }
+    }
+    const evalResultsByEvalId = /* @__PURE__ */ new Map();
+    for await (const evalResult of evalService.evaluate({
+      inferenceResults,
+      evaluateConfig: { evalMetrics }
+    })) {
+      for (const caseResult of evalResult.evalCaseResults) {
+        const evalId = caseResult.evalId;
+        if (!evalResultsByEvalId.has(evalId)) {
+          evalResultsByEvalId.set(evalId, []);
+        }
+        evalResultsByEvalId.get(evalId).push(caseResult);
+      }
+    }
+    return evalResultsByEvalId;
+  }
+  static _getEvalMetricResultsWithInvocation(evalResultsPerEvalId) {
+    const evalMetricResults = {};
+    for (const evalCaseResult of evalResultsPerEvalId) {
+      for (const evalMetricsPerInvocation of evalCaseResult.evalMetricResultPerInvocation) {
+        for (const evalMetricResult of evalMetricsPerInvocation.evalMetricResults) {
+          const metricName = evalMetricResult.metricName;
+          if (!(metricName in evalMetricResults)) {
+            evalMetricResults[metricName] = [];
+          }
+          evalMetricResults[metricName].push({
+            actualInvocation: evalMetricsPerInvocation.actualInvocation,
+            expectedInvocation: evalMetricsPerInvocation.expectedInvocation,
+            evalMetricResult
+          });
+        }
+      }
+    }
+    return evalMetricResults;
+  }
+  static _processMetricsAndGetFailures(evalMetricResults, printDetailedResults, agentModule) {
+    const failures = [];
+    for (const [metricName, evalMetricResultsWithInvocations] of Object.entries(
+      evalMetricResults
+    )) {
+      const threshold = evalMetricResultsWithInvocations[0]?.evalMetricResult.threshold || 0;
+      const scores = evalMetricResultsWithInvocations.map((m) => m.evalMetricResult.score).filter((s) => s !== void 0);
+      let overallScore;
+      let overallEvalStatus;
+      if (scores.length > 0) {
+        overallScore = scores.reduce((a, b) => a + b, 0) / scores.length;
+        overallEvalStatus = overallScore >= threshold ? 1 /* PASSED */ : 2 /* FAILED */;
+      } else {
+        overallScore = void 0;
+        overallEvalStatus = 3 /* NOT_EVALUATED */;
+      }
+      if (overallEvalStatus !== 1 /* PASSED */) {
+        if (printDetailedResults) {
+          _AgentEvaluator._printDetails(
+            evalMetricResultsWithInvocations,
+            overallEvalStatus,
+            overallScore,
+            metricName,
+            threshold
+          );
+        }
+        failures.push(
+          `${metricName} for ${agentModule} Failed. Expected ${threshold}, but got ${overallScore}.`
+        );
+      }
+    }
+    return failures;
+  }
+};
+// src/evaluation/final-response-match-v1.ts
+var RougeEvaluator = class extends Evaluator {
+  evalMetric;
+  constructor(evalMetric) {
+    super(evalMetric);
+    this.evalMetric = evalMetric;
+  }
+  static getMetricInfo() {
+    return {
+      metricName: "response_match_score" /* RESPONSE_MATCH_SCORE */,
+      description: "This metric evaluates if the agent's final response matches a golden/expected final response using Rouge_1 metric. Value range for this metric is [0,1], with values closer to 1 more desirable.",
+      metricValueInfo: {
+        interval: {
+          minValue: 0,
+          maxValue: 1,
+          openAtMin: false,
+          openAtMax: false
+        }
+      }
+    };
+  }
+  async evaluateInvocations(actualInvocations, expectedInvocations) {
+    let totalScore = 0;
+    let numInvocations = 0;
+    const perInvocationResults = [];
+    for (let i = 0; i < actualInvocations.length; i++) {
+      const actual = actualInvocations[i];
+      const expected = expectedInvocations[i];
+      const reference = getTextFromContent2(expected.finalResponse);
+      const response = getTextFromContent2(actual.finalResponse);
+      const rouge1Scores = await calculateRouge1Scores(response, reference);
+      const score = rouge1Scores.fmeasure;
+      perInvocationResults.push({
+        actualInvocation: actual,
+        expectedInvocation: expected,
+        score,
+        evalStatus: getEvalStatus2(score, this.evalMetric.threshold)
+      });
+      totalScore += score;
+      numInvocations++;
+    }
+    if (perInvocationResults.length > 0) {
+      const overallScore = totalScore / numInvocations;
+      return {
+        overallScore,
+        overallEvalStatus: getEvalStatus2(
+          overallScore,
+          this.evalMetric.threshold
+        ),
+        perInvocationResults
+      };
+    }
+    return {
+      overallEvalStatus: 3 /* NOT_EVALUATED */,
+      perInvocationResults: []
+    };
+  }
+};
+function getTextFromContent2(content) {
+  if (content?.parts) {
+    return content.parts.map((part) => part.text).filter(Boolean).join("\n");
+  }
+  return "";
+}
+function getEvalStatus2(score, threshold) {
+  return score >= threshold ? 1 /* PASSED */ : 2 /* FAILED */;
+}
+function calculateRouge1Scores(response, reference) {
+  if (!response.trim() || !reference.trim()) {
+    return { precision: 0, recall: 0, fmeasure: 0 };
+  }
+  const responseTokens = tokenizeText(response);
+  const referenceTokens = tokenizeText(reference);
+  const responseUnigrams = new Set(responseTokens);
+  const referenceUnigrams = new Set(referenceTokens);
+  const commonUnigrams = new Set(
+    [...responseUnigrams].filter((token) => referenceUnigrams.has(token))
+  );
+  const precision = responseUnigrams.size > 0 ? commonUnigrams.size / responseUnigrams.size : 0;
+  const recall = referenceUnigrams.size > 0 ? commonUnigrams.size / referenceUnigrams.size : 0;
+  const fmeasure = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
+  return { precision, recall, fmeasure };
+}
+function tokenizeText(text) {
+  return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((token) => token.length > 0);
+}
 // src/version.ts
 var VERSION = "0.1.0";
 export {
   AF_FUNCTION_CALL_ID_PREFIX,
   LlmAgent as Agent,
   AgentBuilder,
+  AgentEvaluator,
   AgentTool,
   agents_exports as Agents,
   AiSdkLlm,
@@ -11791,11 +13185,16 @@ export {
   CodeExecutorContext,
   DatabaseSessionService,
   EnhancedAuthConfig,
+  EvalResult,
+  EvalStatus,
+  evaluation_exports as Evaluation,
+  Evaluator,
   Event,
   EventActions,
   events_exports as Events,
   ExitLoopTool,
   FileOperationsTool,
+  FinalResponseMatchV2Evaluator,
   flows_exports as Flows,
   FunctionTool,
   GcsArtifactService,
@@ -11817,6 +13216,7 @@ export {
   LlmResponse,
   LoadArtifactsTool,
   LoadMemoryTool,
+  LocalEvalService,
   LoopAgent,
   McpAbi,
   McpAtp,
@@ -11844,10 +13244,13 @@ export {
   OpenIdConnectScheme,
   ParallelAgent,
   PlanReActPlanner,
+  PrebuiltMetrics,
   REQUEST_EUC_FUNCTION_CALL_NAME,
   ReadonlyContext,
+  RougeEvaluator,
   RunConfig,
   Runner,
+  SafetyEvaluatorV1,
   SequentialAgent,
   sessions_exports as Sessions,
   SingleFlow,
@@ -11856,6 +13259,7 @@ export {
   TelemetryService,
   ToolContext,
   tools_exports as Tools,
+  TrajectoryEvaluator,
   TransferToAgentTool,
   UserInteractionTool,
   VERSION,