npm - openai - Versions diffs - 4.95.0 → 4.96.0 - Mend

openai 4.95.0 → 4.96.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/CHANGELOG.md +26 -0
package/_vendor/zod-to-json-schema/parsers/object.d.ts.map +1 -1
package/_vendor/zod-to-json-schema/parsers/object.js +6 -2
package/_vendor/zod-to-json-schema/parsers/object.js.map +1 -1
package/_vendor/zod-to-json-schema/parsers/object.mjs +6 -2
package/_vendor/zod-to-json-schema/parsers/object.mjs.map +1 -1
package/package.json +1 -1
package/resources/beta/assistants.d.ts +2 -0
package/resources/beta/assistants.d.ts.map +1 -1
package/resources/beta/assistants.js +1 -0
package/resources/beta/assistants.js.map +1 -1
package/resources/beta/assistants.mjs +1 -0
package/resources/beta/assistants.mjs.map +1 -1
package/resources/beta/realtime/realtime.d.ts +83 -2
package/resources/beta/realtime/realtime.d.ts.map +1 -1
package/resources/beta/realtime/realtime.js.map +1 -1
package/resources/beta/realtime/realtime.mjs.map +1 -1
package/resources/beta/threads/threads.d.ts +2 -1
package/resources/beta/threads/threads.d.ts.map +1 -1
package/resources/beta/threads/threads.js.map +1 -1
package/resources/beta/threads/threads.mjs.map +1 -1
package/resources/evals/evals.d.ts +546 -90
package/resources/evals/evals.d.ts.map +1 -1
package/resources/evals/evals.js.map +1 -1
package/resources/evals/evals.mjs.map +1 -1
package/resources/evals/runs/runs.d.ts +1111 -147
package/resources/evals/runs/runs.d.ts.map +1 -1
package/resources/evals/runs/runs.js.map +1 -1
package/resources/evals/runs/runs.mjs.map +1 -1
package/resources/fine-tuning/checkpoints/permissions.d.ts +1 -1
package/resources/fine-tuning/checkpoints/permissions.d.ts.map +1 -1
package/resources/fine-tuning/checkpoints/permissions.js +2 -2
package/resources/fine-tuning/checkpoints/permissions.js.map +1 -1
package/resources/fine-tuning/checkpoints/permissions.mjs +2 -2
package/resources/fine-tuning/checkpoints/permissions.mjs.map +1 -1
package/resources/images.d.ts +141 -40
package/resources/images.d.ts.map +1 -1
package/resources/images.js +4 -2
package/resources/images.js.map +1 -1
package/resources/images.mjs +4 -2
package/resources/images.mjs.map +1 -1
package/resources/responses/responses.d.ts +132 -2
package/resources/responses/responses.d.ts.map +1 -1
package/resources/responses/responses.js.map +1 -1
package/resources/responses/responses.mjs.map +1 -1
package/src/_vendor/zod-to-json-schema/parsers/object.ts +10 -2
package/src/resources/beta/assistants.ts +3 -0
package/src/resources/beta/realtime/realtime.ts +97 -1
package/src/resources/beta/threads/threads.ts +3 -3
package/src/resources/evals/evals.ts +652 -97
package/src/resources/evals/runs/runs.ts +1433 -266
package/src/resources/fine-tuning/checkpoints/permissions.ts +5 -1
package/src/resources/images.ts +162 -40
package/src/resources/responses/responses.ts +162 -0
package/src/version.ts +1 -1
package/version.d.ts +1 -1
package/version.js +1 -1
package/version.mjs +1 -1

package/src/resources/evals/runs/runs.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { APIResource } from '../../../resource';
 import { isRequestOptions } from '../../../core';
 import * as Core from '../../../core';
 import * as Shared from '../../shared';
+import * as ResponsesAPI from '../../responses/responses';
 import * as OutputItemsAPI from './output-items';
 import {
   OutputItemListParams,
@@ -83,15 +84,6 @@ export class RunListResponsesPage extends CursorPage<RunListResponse> {}
  * A CompletionsRunDataSource object describing a model sampling configuration.
  */
 export interface CreateEvalCompletionsRunDataSource {
-  input_messages:
-    | CreateEvalCompletionsRunDataSource.Template
-    | CreateEvalCompletionsRunDataSource.ItemReference;
-  /**
-   * The name of the model to use for generating completions (e.g. "o3-mini").
-   */
-  model: string;
   /**
    * A StoredCompletionsRunDataSource configuration describing a set of filters
    */
@@ -105,105 +97,19 @@ export interface CreateEvalCompletionsRunDataSource {
    */
   type: 'completions';
+  input_messages?:
+    | CreateEvalCompletionsRunDataSource.Template
+    | CreateEvalCompletionsRunDataSource.ItemReference;
+  /**
+   * The name of the model to use for generating completions (e.g. "o3-mini").
+   */
+  model?: string;
   sampling_params?: CreateEvalCompletionsRunDataSource.SamplingParams;
 }
 export namespace CreateEvalCompletionsRunDataSource {
-  export interface Template {
-    /**
-     * A list of chat messages forming the prompt or context. May include variable
-     * references to the "item" namespace, ie {{item.name}}.
-     */
-    template: Array<Template.ChatMessage | Template.InputMessage | Template.OutputMessage>;
-    /**
-     * The type of input messages. Always `template`.
-     */
-    type: 'template';
-  }
-  export namespace Template {
-    export interface ChatMessage {
-      /**
-       * The content of the message.
-       */
-      content: string;
-      /**
-       * The role of the message (e.g. "system", "assistant", "user").
-       */
-      role: string;
-    }
-    export interface InputMessage {
-      content: InputMessage.Content;
-      /**
-       * The role of the message. One of `user`, `system`, or `developer`.
-       */
-      role: 'user' | 'system' | 'developer';
-      /**
-       * The type of item, which is always `message`.
-       */
-      type: 'message';
-    }
-    export namespace InputMessage {
-      export interface Content {
-        /**
-         * The text content.
-         */
-        text: string;
-        /**
-         * The type of content, which is always `input_text`.
-         */
-        type: 'input_text';
-      }
-    }
-    export interface OutputMessage {
-      content: OutputMessage.Content;
-      /**
-       * The role of the message. Must be `assistant` for output.
-       */
-      role: 'assistant';
-      /**
-       * The type of item, which is always `message`.
-       */
-      type: 'message';
-    }
-    export namespace OutputMessage {
-      export interface Content {
-        /**
-         * The text content.
-         */
-        text: string;
-        /**
-         * The type of content, which is always `output_text`.
-         */
-        type: 'output_text';
-      }
-    }
-  }
-  export interface ItemReference {
-    /**
-     * A reference to a variable in the "item" namespace. Ie, "item.name"
-     */
-    item_reference: string;
-    /**
-     * The type of input messages. Always `item_reference`.
-     */
-    type: 'item_reference';
-  }
   export interface FileContent {
     /**
      * The content of the jsonl file.
@@ -240,20 +146,25 @@ export namespace CreateEvalCompletionsRunDataSource {
    * A StoredCompletionsRunDataSource configuration describing a set of filters
    */
   export interface StoredCompletions {
+    /**
+     * The type of source. Always `stored_completions`.
+     */
+    type: 'stored_completions';
     /**
      * An optional Unix timestamp to filter items created after this time.
      */
-    created_after: number | null;
+    created_after?: number | null;
     /**
      * An optional Unix timestamp to filter items created before this time.
      */
-    created_before: number | null;
+    created_before?: number | null;
     /**
      * An optional maximum number of items to return.
      */
-    limit: number | null;
+    limit?: number | null;
     /**
      * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -263,17 +174,81 @@ export namespace CreateEvalCompletionsRunDataSource {
      * Keys are strings with a maximum length of 64 characters. Values are strings with
      * a maximum length of 512 characters.
      */
-    metadata: Shared.Metadata | null;
+    metadata?: Shared.Metadata | null;
     /**
      * An optional model to filter by (e.g., 'gpt-4o').
      */
-    model: string | null;
+    model?: string | null;
+  }
+  export interface Template {
     /**
-     * The type of source. Always `stored_completions`.
+     * A list of chat messages forming the prompt or context. May include variable
+     * references to the "item" namespace, ie {{item.name}}.
      */
-    type: 'stored_completions';
+    template: Array<ResponsesAPI.EasyInputMessage | Template.Message>;
+    /**
+     * The type of input messages. Always `template`.
+     */
+    type: 'template';
+  }
+  export namespace Template {
+    /**
+     * A message input to the model with a role indicating instruction following
+     * hierarchy. Instructions given with the `developer` or `system` role take
+     * precedence over instructions given with the `user` role. Messages with the
+     * `assistant` role are presumed to have been generated by the model in previous
+     * interactions.
+     */
+    export interface Message {
+      /**
+       * Text inputs to the model - can contain template strings.
+       */
+      content: string | ResponsesAPI.ResponseInputText | Message.OutputText;
+      /**
+       * The role of the message input. One of `user`, `assistant`, `system`, or
+       * `developer`.
+       */
+      role: 'user' | 'assistant' | 'system' | 'developer';
+      /**
+       * The type of the message input. Always `message`.
+       */
+      type?: 'message';
+    }
+    export namespace Message {
+      /**
+       * A text output from the model.
+       */
+      export interface OutputText {
+        /**
+         * The text output from the model.
+         */
+        text: string;
+        /**
+         * The type of the output text. Always `output_text`.
+         */
+        type: 'output_text';
+      }
+    }
+  }
+  export interface ItemReference {
+    /**
+     * A reference to a variable in the "item" namespace. Ie, "item.name"
+     */
+    item_reference: string;
+    /**
+     * The type of input messages. Always `item_reference`.
+     */
+    type: 'item_reference';
   }
   export interface SamplingParams {
@@ -378,7 +353,10 @@ export interface RunCreateResponse {
   /**
    * Information about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunCreateResponse.Completions;
   /**
    * An object representing an error response from the Eval API.
@@ -442,162 +420,240 @@ export interface RunCreateResponse {
 }
 export namespace RunCreateResponse {
-  export interface PerModelUsage {
-    /**
-     * The number of tokens retrieved from cache.
-     */
-    cached_tokens: number;
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
     /**
-     * The number of completion tokens generated.
+     * A EvalResponsesSource object describing a run data source configuration.
      */
-    completion_tokens: number;
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
     /**
-     * The number of invocations.
+     * The type of run data source. Always `completions`.
      */
-    invocation_count: number;
+    type: 'completions';
-    /**
-     * The name of the model.
-     */
-    model_name: string;
+    input_messages?: Completions.Template | Completions.ItemReference;
     /**
-     * The number of prompt tokens used.
+     * The name of the model to use for generating completions (e.g. "o3-mini").
      */
-    prompt_tokens: number;
+    model?: string;
-    /**
-     * The total number of tokens used.
-     */
-    total_tokens: number;
+    sampling_params?: Completions.SamplingParams;
   }
-  export interface PerTestingCriteriaResult {
-    /**
-     * Number of tests failed for this criteria.
-     */
-    failed: number;
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
-    /**
-     * Number of tests passed for this criteria.
-     */
-    passed: number;
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
-    /**
-     * A description of the testing criteria.
-     */
-    testing_criteria: string;
-  }
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
-  /**
-   * Counters summarizing the outcomes of the evaluation run.
-   */
-  export interface ResultCounts {
-    /**
-     * Number of output items that resulted in an error.
-     */
-    errored: number;
+        sample?: Record<string, unknown>;
+      }
+    }
-    /**
-     * Number of output items that failed to pass the evaluation.
-     */
-    failed: number;
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
-    /**
-     * Number of output items that passed the evaluation.
-     */
-    passed: number;
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
     /**
-     * Total number of executed output items.
+     * A EvalResponsesSource object describing a run data source configuration.
      */
-    total: number;
-  }
-}
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
-/**
- * A schema representing an evaluation run.
- */
-export interface RunRetrieveResponse {
-  /**
-   * Unique identifier for the evaluation run.
-   */
-  id: string;
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
-  /**
-   * Unix timestamp (in seconds) when the evaluation run was created.
-   */
-  created_at: number;
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
-  /**
-   * Information about the run's data source.
-   */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
-  /**
-   * An object representing an error response from the Eval API.
-   */
-  error: EvalAPIError;
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
-  /**
-   * The identifier of the associated evaluation.
-   */
-  eval_id: string;
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
-  /**
-   * Set of 16 key-value pairs that can be attached to an object. This can be useful
-   * for storing additional information about the object in a structured format, and
-   * querying for objects via API or the dashboard.
-   *
-   * Keys are strings with a maximum length of 64 characters. Values are strings with
-   * a maximum length of 512 characters.
-   */
-  metadata: Shared.Metadata | null;
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
-  /**
-   * The model that is evaluated, if applicable.
-   */
-  model: string;
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
-  /**
-   * The name of the evaluation run.
-   */
-  name: string;
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
-  /**
-   * The type of the object. Always "eval.run".
-   */
-  object: 'eval.run';
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
-  /**
-   * Usage statistics for each model during the evaluation run.
-   */
-  per_model_usage: Array<RunRetrieveResponse.PerModelUsage>;
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
-  /**
-   * Results per testing criteria applied during the evaluation run.
-   */
-  per_testing_criteria_results: Array<RunRetrieveResponse.PerTestingCriteriaResult>;
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
-  /**
-   * The URL to the rendered evaluation run report on the UI dashboard.
-   */
-  report_url: string;
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
-  /**
-   * Counters summarizing the outcomes of the evaluation run.
-   */
-  result_counts: RunRetrieveResponse.ResultCounts;
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
-  /**
-   * The status of the evaluation run.
-   */
-  status: string;
-}
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
-export namespace RunRetrieveResponse {
   export interface PerModelUsage {
     /**
      * The number of tokens retrieved from cache.
@@ -676,7 +732,7 @@ export namespace RunRetrieveResponse {
 /**
  * A schema representing an evaluation run.
  */
-export interface RunListResponse {
+export interface RunRetrieveResponse {
   /**
    * Unique identifier for the evaluation run.
    */
@@ -690,7 +746,10 @@ export interface RunListResponse {
   /**
    * Information about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunRetrieveResponse.Completions;
   /**
    * An object representing an error response from the Eval API.
@@ -730,12 +789,12 @@ export interface RunListResponse {
   /**
    * Usage statistics for each model during the evaluation run.
    */
-  per_model_usage: Array<RunListResponse.PerModelUsage>;
+  per_model_usage: Array<RunRetrieveResponse.PerModelUsage>;
   /**
    * Results per testing criteria applied during the evaluation run.
    */
-  per_testing_criteria_results: Array<RunListResponse.PerTestingCriteriaResult>;
+  per_testing_criteria_results: Array<RunRetrieveResponse.PerTestingCriteriaResult>;
   /**
    * The URL to the rendered evaluation run report on the UI dashboard.
@@ -745,7 +804,7 @@ export interface RunListResponse {
   /**
    * Counters summarizing the outcomes of the evaluation run.
    */
-  result_counts: RunListResponse.ResultCounts;
+  result_counts: RunRetrieveResponse.ResultCounts;
   /**
    * The status of the evaluation run.
@@ -753,7 +812,241 @@ export interface RunListResponse {
   status: string;
 }
-export namespace RunListResponse {
+export namespace RunRetrieveResponse {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
+    /**
+     * The type of run data source. Always `completions`.
+     */
+    type: 'completions';
+    input_messages?: Completions.Template | Completions.ItemReference;
+    /**
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+    sampling_params?: Completions.SamplingParams;
+  }
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+        sample?: Record<string, unknown>;
+      }
+    }
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
   export interface PerModelUsage {
     /**
      * The number of tokens retrieved from cache.
@@ -829,18 +1122,10 @@ export namespace RunListResponse {
   }
 }
-export interface RunDeleteResponse {
-  deleted?: boolean;
-  object?: string;
-  run_id?: string;
-}
 /**
  * A schema representing an evaluation run.
  */
-export interface RunCancelResponse {
+export interface RunListResponse {
   /**
    * Unique identifier for the evaluation run.
    */
@@ -854,7 +1139,10 @@ export interface RunCancelResponse {
   /**
    * Information about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunListResponse.Completions;
   /**
    * An object representing an error response from the Eval API.
@@ -894,12 +1182,12 @@ export interface RunCancelResponse {
   /**
    * Usage statistics for each model during the evaluation run.
    */
-  per_model_usage: Array<RunCancelResponse.PerModelUsage>;
+  per_model_usage: Array<RunListResponse.PerModelUsage>;
   /**
    * Results per testing criteria applied during the evaluation run.
    */
-  per_testing_criteria_results: Array<RunCancelResponse.PerTestingCriteriaResult>;
+  per_testing_criteria_results: Array<RunListResponse.PerTestingCriteriaResult>;
   /**
    * The URL to the rendered evaluation run report on the UI dashboard.
@@ -909,7 +1197,7 @@ export interface RunCancelResponse {
   /**
    * Counters summarizing the outcomes of the evaluation run.
    */
-  result_counts: RunCancelResponse.ResultCounts;
+  result_counts: RunListResponse.ResultCounts;
   /**
    * The status of the evaluation run.
@@ -917,25 +1205,660 @@ export interface RunCancelResponse {
   status: string;
 }
-export namespace RunCancelResponse {
-  export interface PerModelUsage {
+export namespace RunListResponse {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
     /**
-     * The number of tokens retrieved from cache.
+     * A EvalResponsesSource object describing a run data source configuration.
      */
-    cached_tokens: number;
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
     /**
-     * The number of completion tokens generated.
+     * The type of run data source. Always `completions`.
      */
-    completion_tokens: number;
+    type: 'completions';
-    /**
-     * The number of invocations.
-     */
-    invocation_count: number;
+    input_messages?: Completions.Template | Completions.ItemReference;
     /**
-     * The name of the model.
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+    sampling_params?: Completions.SamplingParams;
+  }
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+        sample?: Record<string, unknown>;
+      }
+    }
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+  export interface PerModelUsage {
+    /**
+     * The number of tokens retrieved from cache.
+     */
+    cached_tokens: number;
+    /**
+     * The number of completion tokens generated.
+     */
+    completion_tokens: number;
+    /**
+     * The number of invocations.
+     */
+    invocation_count: number;
+    /**
+     * The name of the model.
+     */
+    model_name: string;
+    /**
+     * The number of prompt tokens used.
+     */
+    prompt_tokens: number;
+    /**
+     * The total number of tokens used.
+     */
+    total_tokens: number;
+  }
+  export interface PerTestingCriteriaResult {
+    /**
+     * Number of tests failed for this criteria.
+     */
+    failed: number;
+    /**
+     * Number of tests passed for this criteria.
+     */
+    passed: number;
+    /**
+     * A description of the testing criteria.
+     */
+    testing_criteria: string;
+  }
+  /**
+   * Counters summarizing the outcomes of the evaluation run.
+   */
+  export interface ResultCounts {
+    /**
+     * Number of output items that resulted in an error.
+     */
+    errored: number;
+    /**
+     * Number of output items that failed to pass the evaluation.
+     */
+    failed: number;
+    /**
+     * Number of output items that passed the evaluation.
+     */
+    passed: number;
+    /**
+     * Total number of executed output items.
+     */
+    total: number;
+  }
+}
+export interface RunDeleteResponse {
+  deleted?: boolean;
+  object?: string;
+  run_id?: string;
+}
+/**
+ * A schema representing an evaluation run.
+ */
+export interface RunCancelResponse {
+  /**
+   * Unique identifier for the evaluation run.
+   */
+  id: string;
+  /**
+   * Unix timestamp (in seconds) when the evaluation run was created.
+   */
+  created_at: number;
+  /**
+   * Information about the run's data source.
+   */
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunCancelResponse.Completions;
+  /**
+   * An object representing an error response from the Eval API.
+   */
+  error: EvalAPIError;
+  /**
+   * The identifier of the associated evaluation.
+   */
+  eval_id: string;
+  /**
+   * Set of 16 key-value pairs that can be attached to an object. This can be useful
+   * for storing additional information about the object in a structured format, and
+   * querying for objects via API or the dashboard.
+   *
+   * Keys are strings with a maximum length of 64 characters. Values are strings with
+   * a maximum length of 512 characters.
+   */
+  metadata: Shared.Metadata | null;
+  /**
+   * The model that is evaluated, if applicable.
+   */
+  model: string;
+  /**
+   * The name of the evaluation run.
+   */
+  name: string;
+  /**
+   * The type of the object. Always "eval.run".
+   */
+  object: 'eval.run';
+  /**
+   * Usage statistics for each model during the evaluation run.
+   */
+  per_model_usage: Array<RunCancelResponse.PerModelUsage>;
+  /**
+   * Results per testing criteria applied during the evaluation run.
+   */
+  per_testing_criteria_results: Array<RunCancelResponse.PerTestingCriteriaResult>;
+  /**
+   * The URL to the rendered evaluation run report on the UI dashboard.
+   */
+  report_url: string;
+  /**
+   * Counters summarizing the outcomes of the evaluation run.
+   */
+  result_counts: RunCancelResponse.ResultCounts;
+  /**
+   * The status of the evaluation run.
+   */
+  status: string;
+}
+export namespace RunCancelResponse {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface Completions {
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    source: Completions.FileContent | Completions.FileID | Completions.Responses;
+    /**
+     * The type of run data source. Always `completions`.
+     */
+    type: 'completions';
+    input_messages?: Completions.Template | Completions.ItemReference;
+    /**
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+    sampling_params?: Completions.SamplingParams;
+  }
+  export namespace Completions {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+        sample?: Record<string, unknown>;
+      }
+    }
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+  export interface PerModelUsage {
+    /**
+     * The number of tokens retrieved from cache.
+     */
+    cached_tokens: number;
+    /**
+     * The number of completion tokens generated.
+     */
+    completion_tokens: number;
+    /**
+     * The number of invocations.
+     */
+    invocation_count: number;
+    /**
+     * The name of the model.
      */
     model_name: string;
@@ -997,7 +1920,10 @@ export interface RunCreateParams {
   /**
    * Details about the run's data source.
    */
-  data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
+  data_source:
+    | CreateEvalJSONLRunDataSource
+    | CreateEvalCompletionsRunDataSource
+    | RunCreateParams.CreateEvalResponsesRunDataSource;
   /**
    * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1015,6 +1941,247 @@ export interface RunCreateParams {
   name?: string;
 }
+export namespace RunCreateParams {
+  /**
+   * A ResponsesRunDataSource object describing a model sampling configuration.
+   */
+  export interface CreateEvalResponsesRunDataSource {
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    source:
+      | CreateEvalResponsesRunDataSource.FileContent
+      | CreateEvalResponsesRunDataSource.FileID
+      | CreateEvalResponsesRunDataSource.Responses;
+    /**
+     * The type of run data source. Always `completions`.
+     */
+    type: 'completions';
+    input_messages?:
+      | CreateEvalResponsesRunDataSource.Template
+      | CreateEvalResponsesRunDataSource.ItemReference;
+    /**
+     * The name of the model to use for generating completions (e.g. "o3-mini").
+     */
+    model?: string;
+    sampling_params?: CreateEvalResponsesRunDataSource.SamplingParams;
+  }
+  export namespace CreateEvalResponsesRunDataSource {
+    export interface FileContent {
+      /**
+       * The content of the jsonl file.
+       */
+      content: Array<FileContent.Content>;
+      /**
+       * The type of jsonl source. Always `file_content`.
+       */
+      type: 'file_content';
+    }
+    export namespace FileContent {
+      export interface Content {
+        item: Record<string, unknown>;
+        sample?: Record<string, unknown>;
+      }
+    }
+    export interface FileID {
+      /**
+       * The identifier of the file.
+       */
+      id: string;
+      /**
+       * The type of jsonl source. Always `file_id`.
+       */
+      type: 'file_id';
+    }
+    /**
+     * A EvalResponsesSource object describing a run data source configuration.
+     */
+    export interface Responses {
+      /**
+       * The type of run data source. Always `responses`.
+       */
+      type: 'responses';
+      /**
+       * Whether to allow parallel tool calls. This is a query parameter used to select
+       * responses.
+       */
+      allow_parallel_tool_calls?: boolean | null;
+      /**
+       * Only include items created after this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_after?: number | null;
+      /**
+       * Only include items created before this timestamp (inclusive). This is a query
+       * parameter used to select responses.
+       */
+      created_before?: number | null;
+      /**
+       * Whether the response has tool calls. This is a query parameter used to select
+       * responses.
+       */
+      has_tool_calls?: boolean | null;
+      /**
+       * Optional search string for instructions. This is a query parameter used to
+       * select responses.
+       */
+      instructions_search?: string | null;
+      /**
+       * Metadata filter for the responses. This is a query parameter used to select
+       * responses.
+       */
+      metadata?: unknown | null;
+      /**
+       * The name of the model to find responses for. This is a query parameter used to
+       * select responses.
+       */
+      model?: string | null;
+      /**
+       * Optional reasoning effort parameter. This is a query parameter used to select
+       * responses.
+       */
+      reasoning_effort?: Shared.ReasoningEffort | null;
+      /**
+       * Sampling temperature. This is a query parameter used to select responses.
+       */
+      temperature?: number | null;
+      /**
+       * Nucleus sampling parameter. This is a query parameter used to select responses.
+       */
+      top_p?: number | null;
+      /**
+       * List of user identifiers. This is a query parameter used to select responses.
+       */
+      users?: Array<string> | null;
+    }
+    export interface Template {
+      /**
+       * A list of chat messages forming the prompt or context. May include variable
+       * references to the "item" namespace, ie {{item.name}}.
+       */
+      template: Array<Template.ChatMessage | Template.EvalItem>;
+      /**
+       * The type of input messages. Always `template`.
+       */
+      type: 'template';
+    }
+    export namespace Template {
+      export interface ChatMessage {
+        /**
+         * The content of the message.
+         */
+        content: string;
+        /**
+         * The role of the message (e.g. "system", "assistant", "user").
+         */
+        role: string;
+      }
+      /**
+       * A message input to the model with a role indicating instruction following
+       * hierarchy. Instructions given with the `developer` or `system` role take
+       * precedence over instructions given with the `user` role. Messages with the
+       * `assistant` role are presumed to have been generated by the model in previous
+       * interactions.
+       */
+      export interface EvalItem {
+        /**
+         * Text inputs to the model - can contain template strings.
+         */
+        content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
+        /**
+         * The role of the message input. One of `user`, `assistant`, `system`, or
+         * `developer`.
+         */
+        role: 'user' | 'assistant' | 'system' | 'developer';
+        /**
+         * The type of the message input. Always `message`.
+         */
+        type?: 'message';
+      }
+      export namespace EvalItem {
+        /**
+         * A text output from the model.
+         */
+        export interface OutputText {
+          /**
+           * The text output from the model.
+           */
+          text: string;
+          /**
+           * The type of the output text. Always `output_text`.
+           */
+          type: 'output_text';
+        }
+      }
+    }
+    export interface ItemReference {
+      /**
+       * A reference to a variable in the "item" namespace. Ie, "item.name"
+       */
+      item_reference: string;
+      /**
+       * The type of input messages. Always `item_reference`.
+       */
+      type: 'item_reference';
+    }
+    export interface SamplingParams {
+      /**
+       * The maximum number of tokens in the generated output.
+       */
+      max_completion_tokens?: number;
+      /**
+       * A seed value to initialize the randomness, during sampling.
+       */
+      seed?: number;
+      /**
+       * A higher temperature increases randomness in the outputs.
+       */
+      temperature?: number;
+      /**
+       * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
+       */
+      top_p?: number;
+    }
+  }
+}
 export interface RunListParams extends CursorPageParams {
   /**
    * Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
@@ -1023,8 +2190,8 @@ export interface RunListParams extends CursorPageParams {
   order?: 'asc' | 'desc';
   /**
-   * Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" |
-   * "canceled".
+   * Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+   * | `canceled`.
    */
   status?: 'queued' | 'in_progress' | 'completed' | 'canceled' | 'failed';
 }