openai 4.95.0 → 4.96.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/_vendor/zod-to-json-schema/parsers/object.d.ts.map +1 -1
  3. package/_vendor/zod-to-json-schema/parsers/object.js +6 -2
  4. package/_vendor/zod-to-json-schema/parsers/object.js.map +1 -1
  5. package/_vendor/zod-to-json-schema/parsers/object.mjs +6 -2
  6. package/_vendor/zod-to-json-schema/parsers/object.mjs.map +1 -1
  7. package/package.json +1 -1
  8. package/resources/beta/assistants.d.ts +2 -0
  9. package/resources/beta/assistants.d.ts.map +1 -1
  10. package/resources/beta/assistants.js +1 -0
  11. package/resources/beta/assistants.js.map +1 -1
  12. package/resources/beta/assistants.mjs +1 -0
  13. package/resources/beta/assistants.mjs.map +1 -1
  14. package/resources/beta/realtime/realtime.d.ts +83 -2
  15. package/resources/beta/realtime/realtime.d.ts.map +1 -1
  16. package/resources/beta/realtime/realtime.js.map +1 -1
  17. package/resources/beta/realtime/realtime.mjs.map +1 -1
  18. package/resources/beta/threads/threads.d.ts +2 -1
  19. package/resources/beta/threads/threads.d.ts.map +1 -1
  20. package/resources/beta/threads/threads.js.map +1 -1
  21. package/resources/beta/threads/threads.mjs.map +1 -1
  22. package/resources/evals/evals.d.ts +546 -90
  23. package/resources/evals/evals.d.ts.map +1 -1
  24. package/resources/evals/evals.js.map +1 -1
  25. package/resources/evals/evals.mjs.map +1 -1
  26. package/resources/evals/runs/runs.d.ts +1111 -147
  27. package/resources/evals/runs/runs.d.ts.map +1 -1
  28. package/resources/evals/runs/runs.js.map +1 -1
  29. package/resources/evals/runs/runs.mjs.map +1 -1
  30. package/resources/fine-tuning/checkpoints/permissions.d.ts +1 -1
  31. package/resources/fine-tuning/checkpoints/permissions.d.ts.map +1 -1
  32. package/resources/fine-tuning/checkpoints/permissions.js +2 -2
  33. package/resources/fine-tuning/checkpoints/permissions.js.map +1 -1
  34. package/resources/fine-tuning/checkpoints/permissions.mjs +2 -2
  35. package/resources/fine-tuning/checkpoints/permissions.mjs.map +1 -1
  36. package/resources/images.d.ts +141 -40
  37. package/resources/images.d.ts.map +1 -1
  38. package/resources/images.js +4 -2
  39. package/resources/images.js.map +1 -1
  40. package/resources/images.mjs +4 -2
  41. package/resources/images.mjs.map +1 -1
  42. package/resources/responses/responses.d.ts +132 -2
  43. package/resources/responses/responses.d.ts.map +1 -1
  44. package/resources/responses/responses.js.map +1 -1
  45. package/resources/responses/responses.mjs.map +1 -1
  46. package/src/_vendor/zod-to-json-schema/parsers/object.ts +10 -2
  47. package/src/resources/beta/assistants.ts +3 -0
  48. package/src/resources/beta/realtime/realtime.ts +97 -1
  49. package/src/resources/beta/threads/threads.ts +3 -3
  50. package/src/resources/evals/evals.ts +652 -97
  51. package/src/resources/evals/runs/runs.ts +1433 -266
  52. package/src/resources/fine-tuning/checkpoints/permissions.ts +5 -1
  53. package/src/resources/images.ts +162 -40
  54. package/src/resources/responses/responses.ts +162 -0
  55. package/src/version.ts +1 -1
  56. package/version.d.ts +1 -1
  57. package/version.js +1 -1
  58. package/version.mjs +1 -1
@@ -4,6 +4,7 @@ import { APIResource } from '../../../resource';
4
4
  import { isRequestOptions } from '../../../core';
5
5
  import * as Core from '../../../core';
6
6
  import * as Shared from '../../shared';
7
+ import * as ResponsesAPI from '../../responses/responses';
7
8
  import * as OutputItemsAPI from './output-items';
8
9
  import {
9
10
  OutputItemListParams,
@@ -83,15 +84,6 @@ export class RunListResponsesPage extends CursorPage<RunListResponse> {}
83
84
  * A CompletionsRunDataSource object describing a model sampling configuration.
84
85
  */
85
86
  export interface CreateEvalCompletionsRunDataSource {
86
- input_messages:
87
- | CreateEvalCompletionsRunDataSource.Template
88
- | CreateEvalCompletionsRunDataSource.ItemReference;
89
-
90
- /**
91
- * The name of the model to use for generating completions (e.g. "o3-mini").
92
- */
93
- model: string;
94
-
95
87
  /**
96
88
  * A StoredCompletionsRunDataSource configuration describing a set of filters
97
89
  */
@@ -105,105 +97,19 @@ export interface CreateEvalCompletionsRunDataSource {
105
97
  */
106
98
  type: 'completions';
107
99
 
100
+ input_messages?:
101
+ | CreateEvalCompletionsRunDataSource.Template
102
+ | CreateEvalCompletionsRunDataSource.ItemReference;
103
+
104
+ /**
105
+ * The name of the model to use for generating completions (e.g. "o3-mini").
106
+ */
107
+ model?: string;
108
+
108
109
  sampling_params?: CreateEvalCompletionsRunDataSource.SamplingParams;
109
110
  }
110
111
 
111
112
  export namespace CreateEvalCompletionsRunDataSource {
112
- export interface Template {
113
- /**
114
- * A list of chat messages forming the prompt or context. May include variable
115
- * references to the "item" namespace, ie {{item.name}}.
116
- */
117
- template: Array<Template.ChatMessage | Template.InputMessage | Template.OutputMessage>;
118
-
119
- /**
120
- * The type of input messages. Always `template`.
121
- */
122
- type: 'template';
123
- }
124
-
125
- export namespace Template {
126
- export interface ChatMessage {
127
- /**
128
- * The content of the message.
129
- */
130
- content: string;
131
-
132
- /**
133
- * The role of the message (e.g. "system", "assistant", "user").
134
- */
135
- role: string;
136
- }
137
-
138
- export interface InputMessage {
139
- content: InputMessage.Content;
140
-
141
- /**
142
- * The role of the message. One of `user`, `system`, or `developer`.
143
- */
144
- role: 'user' | 'system' | 'developer';
145
-
146
- /**
147
- * The type of item, which is always `message`.
148
- */
149
- type: 'message';
150
- }
151
-
152
- export namespace InputMessage {
153
- export interface Content {
154
- /**
155
- * The text content.
156
- */
157
- text: string;
158
-
159
- /**
160
- * The type of content, which is always `input_text`.
161
- */
162
- type: 'input_text';
163
- }
164
- }
165
-
166
- export interface OutputMessage {
167
- content: OutputMessage.Content;
168
-
169
- /**
170
- * The role of the message. Must be `assistant` for output.
171
- */
172
- role: 'assistant';
173
-
174
- /**
175
- * The type of item, which is always `message`.
176
- */
177
- type: 'message';
178
- }
179
-
180
- export namespace OutputMessage {
181
- export interface Content {
182
- /**
183
- * The text content.
184
- */
185
- text: string;
186
-
187
- /**
188
- * The type of content, which is always `output_text`.
189
- */
190
- type: 'output_text';
191
- }
192
- }
193
- }
194
-
195
- export interface ItemReference {
196
- /**
197
- * A reference to a variable in the "item" namespace. Ie, "item.name"
198
- */
199
- item_reference: string;
200
-
201
- /**
202
- * The type of input messages. Always `item_reference`.
203
- */
204
- type: 'item_reference';
205
- }
206
-
207
113
  export interface FileContent {
208
114
  /**
209
115
  * The content of the jsonl file.
@@ -240,20 +146,25 @@ export namespace CreateEvalCompletionsRunDataSource {
240
146
  * A StoredCompletionsRunDataSource configuration describing a set of filters
241
147
  */
242
148
  export interface StoredCompletions {
149
+ /**
150
+ * The type of source. Always `stored_completions`.
151
+ */
152
+ type: 'stored_completions';
153
+
243
154
  /**
244
155
  * An optional Unix timestamp to filter items created after this time.
245
156
  */
246
- created_after: number | null;
157
+ created_after?: number | null;
247
158
 
248
159
  /**
249
160
  * An optional Unix timestamp to filter items created before this time.
250
161
  */
251
- created_before: number | null;
162
+ created_before?: number | null;
252
163
 
253
164
  /**
254
165
  * An optional maximum number of items to return.
255
166
  */
256
- limit: number | null;
167
+ limit?: number | null;
257
168
 
258
169
  /**
259
170
  * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -263,17 +174,81 @@ export namespace CreateEvalCompletionsRunDataSource {
263
174
  * Keys are strings with a maximum length of 64 characters. Values are strings with
264
175
  * a maximum length of 512 characters.
265
176
  */
266
- metadata: Shared.Metadata | null;
177
+ metadata?: Shared.Metadata | null;
267
178
 
268
179
  /**
269
180
  * An optional model to filter by (e.g., 'gpt-4o').
270
181
  */
271
- model: string | null;
182
+ model?: string | null;
183
+ }
272
184
 
185
+ export interface Template {
273
186
  /**
274
- * The type of source. Always `stored_completions`.
187
+ * A list of chat messages forming the prompt or context. May include variable
188
+ * references to the "item" namespace, ie {{item.name}}.
275
189
  */
276
- type: 'stored_completions';
190
+ template: Array<ResponsesAPI.EasyInputMessage | Template.Message>;
191
+
192
+ /**
193
+ * The type of input messages. Always `template`.
194
+ */
195
+ type: 'template';
196
+ }
197
+
198
+ export namespace Template {
199
+ /**
200
+ * A message input to the model with a role indicating instruction following
201
+ * hierarchy. Instructions given with the `developer` or `system` role take
202
+ * precedence over instructions given with the `user` role. Messages with the
203
+ * `assistant` role are presumed to have been generated by the model in previous
204
+ * interactions.
205
+ */
206
+ export interface Message {
207
+ /**
208
+ * Text inputs to the model - can contain template strings.
209
+ */
210
+ content: string | ResponsesAPI.ResponseInputText | Message.OutputText;
211
+
212
+ /**
213
+ * The role of the message input. One of `user`, `assistant`, `system`, or
214
+ * `developer`.
215
+ */
216
+ role: 'user' | 'assistant' | 'system' | 'developer';
217
+
218
+ /**
219
+ * The type of the message input. Always `message`.
220
+ */
221
+ type?: 'message';
222
+ }
223
+
224
+ export namespace Message {
225
+ /**
226
+ * A text output from the model.
227
+ */
228
+ export interface OutputText {
229
+ /**
230
+ * The text output from the model.
231
+ */
232
+ text: string;
233
+
234
+ /**
235
+ * The type of the output text. Always `output_text`.
236
+ */
237
+ type: 'output_text';
238
+ }
239
+ }
240
+ }
241
+
242
+ export interface ItemReference {
243
+ /**
244
+ * A reference to a variable in the "item" namespace. Ie, "item.name"
245
+ */
246
+ item_reference: string;
247
+
248
+ /**
249
+ * The type of input messages. Always `item_reference`.
250
+ */
251
+ type: 'item_reference';
277
252
  }
278
253
 
279
254
  export interface SamplingParams {
@@ -378,7 +353,10 @@ export interface RunCreateResponse {
378
353
  /**
379
354
  * Information about the run's data source.
380
355
  */
381
- data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
356
+ data_source:
357
+ | CreateEvalJSONLRunDataSource
358
+ | CreateEvalCompletionsRunDataSource
359
+ | RunCreateResponse.Completions;
382
360
 
383
361
  /**
384
362
  * An object representing an error response from the Eval API.
@@ -442,162 +420,240 @@ export interface RunCreateResponse {
442
420
  }
443
421
 
444
422
  export namespace RunCreateResponse {
445
- export interface PerModelUsage {
446
- /**
447
- * The number of tokens retrieved from cache.
448
- */
449
- cached_tokens: number;
450
-
423
+ /**
424
+ * A ResponsesRunDataSource object describing a model sampling configuration.
425
+ */
426
+ export interface Completions {
451
427
  /**
452
- * The number of completion tokens generated.
428
+ * A EvalResponsesSource object describing a run data source configuration.
453
429
  */
454
- completion_tokens: number;
430
+ source: Completions.FileContent | Completions.FileID | Completions.Responses;
455
431
 
456
432
  /**
457
- * The number of invocations.
433
+ * The type of run data source. Always `completions`.
458
434
  */
459
- invocation_count: number;
435
+ type: 'completions';
460
436
 
461
- /**
462
- * The name of the model.
463
- */
464
- model_name: string;
437
+ input_messages?: Completions.Template | Completions.ItemReference;
465
438
 
466
439
  /**
467
- * The number of prompt tokens used.
440
+ * The name of the model to use for generating completions (e.g. "o3-mini").
468
441
  */
469
- prompt_tokens: number;
442
+ model?: string;
470
443
 
471
- /**
472
- * The total number of tokens used.
473
- */
474
- total_tokens: number;
444
+ sampling_params?: Completions.SamplingParams;
475
445
  }
476
446
 
477
- export interface PerTestingCriteriaResult {
478
- /**
479
- * Number of tests failed for this criteria.
480
- */
481
- failed: number;
447
+ export namespace Completions {
448
+ export interface FileContent {
449
+ /**
450
+ * The content of the jsonl file.
451
+ */
452
+ content: Array<FileContent.Content>;
482
453
 
483
- /**
484
- * Number of tests passed for this criteria.
485
- */
486
- passed: number;
454
+ /**
455
+ * The type of jsonl source. Always `file_content`.
456
+ */
457
+ type: 'file_content';
458
+ }
487
459
 
488
- /**
489
- * A description of the testing criteria.
490
- */
491
- testing_criteria: string;
492
- }
460
+ export namespace FileContent {
461
+ export interface Content {
462
+ item: Record<string, unknown>;
493
463
 
494
- /**
495
- * Counters summarizing the outcomes of the evaluation run.
496
- */
497
- export interface ResultCounts {
498
- /**
499
- * Number of output items that resulted in an error.
500
- */
501
- errored: number;
464
+ sample?: Record<string, unknown>;
465
+ }
466
+ }
502
467
 
503
- /**
504
- * Number of output items that failed to pass the evaluation.
505
- */
506
- failed: number;
468
+ export interface FileID {
469
+ /**
470
+ * The identifier of the file.
471
+ */
472
+ id: string;
507
473
 
508
- /**
509
- * Number of output items that passed the evaluation.
510
- */
511
- passed: number;
474
+ /**
475
+ * The type of jsonl source. Always `file_id`.
476
+ */
477
+ type: 'file_id';
478
+ }
512
479
 
513
480
  /**
514
- * Total number of executed output items.
481
+ * A EvalResponsesSource object describing a run data source configuration.
515
482
  */
516
- total: number;
517
- }
518
- }
483
+ export interface Responses {
484
+ /**
485
+ * The type of run data source. Always `responses`.
486
+ */
487
+ type: 'responses';
519
488
 
520
- /**
521
- * A schema representing an evaluation run.
522
- */
523
- export interface RunRetrieveResponse {
524
- /**
525
- * Unique identifier for the evaluation run.
526
- */
527
- id: string;
489
+ /**
490
+ * Whether to allow parallel tool calls. This is a query parameter used to select
491
+ * responses.
492
+ */
493
+ allow_parallel_tool_calls?: boolean | null;
528
494
 
529
- /**
530
- * Unix timestamp (in seconds) when the evaluation run was created.
531
- */
532
- created_at: number;
495
+ /**
496
+ * Only include items created after this timestamp (inclusive). This is a query
497
+ * parameter used to select responses.
498
+ */
499
+ created_after?: number | null;
533
500
 
534
- /**
535
- * Information about the run's data source.
536
- */
537
- data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
501
+ /**
502
+ * Only include items created before this timestamp (inclusive). This is a query
503
+ * parameter used to select responses.
504
+ */
505
+ created_before?: number | null;
538
506
 
539
- /**
540
- * An object representing an error response from the Eval API.
541
- */
542
- error: EvalAPIError;
507
+ /**
508
+ * Whether the response has tool calls. This is a query parameter used to select
509
+ * responses.
510
+ */
511
+ has_tool_calls?: boolean | null;
543
512
 
544
- /**
545
- * The identifier of the associated evaluation.
546
- */
547
- eval_id: string;
513
+ /**
514
+ * Optional search string for instructions. This is a query parameter used to
515
+ * select responses.
516
+ */
517
+ instructions_search?: string | null;
548
518
 
549
- /**
550
- * Set of 16 key-value pairs that can be attached to an object. This can be useful
551
- * for storing additional information about the object in a structured format, and
552
- * querying for objects via API or the dashboard.
553
- *
554
- * Keys are strings with a maximum length of 64 characters. Values are strings with
555
- * a maximum length of 512 characters.
556
- */
557
- metadata: Shared.Metadata | null;
519
+ /**
520
+ * Metadata filter for the responses. This is a query parameter used to select
521
+ * responses.
522
+ */
523
+ metadata?: unknown | null;
558
524
 
559
- /**
560
- * The model that is evaluated, if applicable.
561
- */
562
- model: string;
525
+ /**
526
+ * The name of the model to find responses for. This is a query parameter used to
527
+ * select responses.
528
+ */
529
+ model?: string | null;
563
530
 
564
- /**
565
- * The name of the evaluation run.
566
- */
567
- name: string;
531
+ /**
532
+ * Optional reasoning effort parameter. This is a query parameter used to select
533
+ * responses.
534
+ */
535
+ reasoning_effort?: Shared.ReasoningEffort | null;
568
536
 
569
- /**
570
- * The type of the object. Always "eval.run".
571
- */
572
- object: 'eval.run';
537
+ /**
538
+ * Sampling temperature. This is a query parameter used to select responses.
539
+ */
540
+ temperature?: number | null;
573
541
 
574
- /**
575
- * Usage statistics for each model during the evaluation run.
576
- */
577
- per_model_usage: Array<RunRetrieveResponse.PerModelUsage>;
542
+ /**
543
+ * Nucleus sampling parameter. This is a query parameter used to select responses.
544
+ */
545
+ top_p?: number | null;
578
546
 
579
- /**
580
- * Results per testing criteria applied during the evaluation run.
581
- */
582
- per_testing_criteria_results: Array<RunRetrieveResponse.PerTestingCriteriaResult>;
547
+ /**
548
+ * List of user identifiers. This is a query parameter used to select responses.
549
+ */
550
+ users?: Array<string> | null;
551
+ }
583
552
 
584
- /**
585
- * The URL to the rendered evaluation run report on the UI dashboard.
586
- */
587
- report_url: string;
553
+ export interface Template {
554
+ /**
555
+ * A list of chat messages forming the prompt or context. May include variable
556
+ * references to the "item" namespace, ie {{item.name}}.
557
+ */
558
+ template: Array<Template.ChatMessage | Template.EvalItem>;
588
559
 
589
- /**
590
- * Counters summarizing the outcomes of the evaluation run.
591
- */
592
- result_counts: RunRetrieveResponse.ResultCounts;
560
+ /**
561
+ * The type of input messages. Always `template`.
562
+ */
563
+ type: 'template';
564
+ }
593
565
 
594
- /**
595
- * The status of the evaluation run.
596
- */
597
- status: string;
598
- }
566
+ export namespace Template {
567
+ export interface ChatMessage {
568
+ /**
569
+ * The content of the message.
570
+ */
571
+ content: string;
572
+
573
+ /**
574
+ * The role of the message (e.g. "system", "assistant", "user").
575
+ */
576
+ role: string;
577
+ }
578
+
579
+ /**
580
+ * A message input to the model with a role indicating instruction following
581
+ * hierarchy. Instructions given with the `developer` or `system` role take
582
+ * precedence over instructions given with the `user` role. Messages with the
583
+ * `assistant` role are presumed to have been generated by the model in previous
584
+ * interactions.
585
+ */
586
+ export interface EvalItem {
587
+ /**
588
+ * Text inputs to the model - can contain template strings.
589
+ */
590
+ content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
591
+
592
+ /**
593
+ * The role of the message input. One of `user`, `assistant`, `system`, or
594
+ * `developer`.
595
+ */
596
+ role: 'user' | 'assistant' | 'system' | 'developer';
597
+
598
+ /**
599
+ * The type of the message input. Always `message`.
600
+ */
601
+ type?: 'message';
602
+ }
603
+
604
+ export namespace EvalItem {
605
+ /**
606
+ * A text output from the model.
607
+ */
608
+ export interface OutputText {
609
+ /**
610
+ * The text output from the model.
611
+ */
612
+ text: string;
613
+
614
+ /**
615
+ * The type of the output text. Always `output_text`.
616
+ */
617
+ type: 'output_text';
618
+ }
619
+ }
620
+ }
621
+
622
+ export interface ItemReference {
623
+ /**
624
+ * A reference to a variable in the "item" namespace. Ie, "item.name"
625
+ */
626
+ item_reference: string;
627
+
628
+ /**
629
+ * The type of input messages. Always `item_reference`.
630
+ */
631
+ type: 'item_reference';
632
+ }
633
+
634
+ export interface SamplingParams {
635
+ /**
636
+ * The maximum number of tokens in the generated output.
637
+ */
638
+ max_completion_tokens?: number;
639
+
640
+ /**
641
+ * A seed value to initialize the randomness, during sampling.
642
+ */
643
+ seed?: number;
644
+
645
+ /**
646
+ * A higher temperature increases randomness in the outputs.
647
+ */
648
+ temperature?: number;
649
+
650
+ /**
651
+ * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
652
+ */
653
+ top_p?: number;
654
+ }
655
+ }
599
656
 
600
- export namespace RunRetrieveResponse {
601
657
  export interface PerModelUsage {
602
658
  /**
603
659
  * The number of tokens retrieved from cache.
@@ -676,7 +732,7 @@ export namespace RunRetrieveResponse {
676
732
  /**
677
733
  * A schema representing an evaluation run.
678
734
  */
679
- export interface RunListResponse {
735
+ export interface RunRetrieveResponse {
680
736
  /**
681
737
  * Unique identifier for the evaluation run.
682
738
  */
@@ -690,7 +746,10 @@ export interface RunListResponse {
690
746
  /**
691
747
  * Information about the run's data source.
692
748
  */
693
- data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
749
+ data_source:
750
+ | CreateEvalJSONLRunDataSource
751
+ | CreateEvalCompletionsRunDataSource
752
+ | RunRetrieveResponse.Completions;
694
753
 
695
754
  /**
696
755
  * An object representing an error response from the Eval API.
@@ -730,12 +789,12 @@ export interface RunListResponse {
730
789
  /**
731
790
  * Usage statistics for each model during the evaluation run.
732
791
  */
733
- per_model_usage: Array<RunListResponse.PerModelUsage>;
792
+ per_model_usage: Array<RunRetrieveResponse.PerModelUsage>;
734
793
 
735
794
  /**
736
795
  * Results per testing criteria applied during the evaluation run.
737
796
  */
738
- per_testing_criteria_results: Array<RunListResponse.PerTestingCriteriaResult>;
797
+ per_testing_criteria_results: Array<RunRetrieveResponse.PerTestingCriteriaResult>;
739
798
 
740
799
  /**
741
800
  * The URL to the rendered evaluation run report on the UI dashboard.
@@ -745,7 +804,7 @@ export interface RunListResponse {
745
804
  /**
746
805
  * Counters summarizing the outcomes of the evaluation run.
747
806
  */
748
- result_counts: RunListResponse.ResultCounts;
807
+ result_counts: RunRetrieveResponse.ResultCounts;
749
808
 
750
809
  /**
751
810
  * The status of the evaluation run.
@@ -753,7 +812,241 @@ export interface RunListResponse {
753
812
  status: string;
754
813
  }
755
814
 
756
- export namespace RunListResponse {
815
+ export namespace RunRetrieveResponse {
816
+ /**
817
+ * A ResponsesRunDataSource object describing a model sampling configuration.
818
+ */
819
+ export interface Completions {
820
+ /**
821
+ * A EvalResponsesSource object describing a run data source configuration.
822
+ */
823
+ source: Completions.FileContent | Completions.FileID | Completions.Responses;
824
+
825
+ /**
826
+ * The type of run data source. Always `completions`.
827
+ */
828
+ type: 'completions';
829
+
830
+ input_messages?: Completions.Template | Completions.ItemReference;
831
+
832
+ /**
833
+ * The name of the model to use for generating completions (e.g. "o3-mini").
834
+ */
835
+ model?: string;
836
+
837
+ sampling_params?: Completions.SamplingParams;
838
+ }
839
+
840
+ export namespace Completions {
841
+ export interface FileContent {
842
+ /**
843
+ * The content of the jsonl file.
844
+ */
845
+ content: Array<FileContent.Content>;
846
+
847
+ /**
848
+ * The type of jsonl source. Always `file_content`.
849
+ */
850
+ type: 'file_content';
851
+ }
852
+
853
+ export namespace FileContent {
854
+ export interface Content {
855
+ item: Record<string, unknown>;
856
+
857
+ sample?: Record<string, unknown>;
858
+ }
859
+ }
860
+
861
+ export interface FileID {
862
+ /**
863
+ * The identifier of the file.
864
+ */
865
+ id: string;
866
+
867
+ /**
868
+ * The type of jsonl source. Always `file_id`.
869
+ */
870
+ type: 'file_id';
871
+ }
872
+
873
+ /**
874
+ * A EvalResponsesSource object describing a run data source configuration.
875
+ */
876
+ export interface Responses {
877
+ /**
878
+ * The type of run data source. Always `responses`.
879
+ */
880
+ type: 'responses';
881
+
882
+ /**
883
+ * Whether to allow parallel tool calls. This is a query parameter used to select
884
+ * responses.
885
+ */
886
+ allow_parallel_tool_calls?: boolean | null;
887
+
888
+ /**
889
+ * Only include items created after this timestamp (inclusive). This is a query
890
+ * parameter used to select responses.
891
+ */
892
+ created_after?: number | null;
893
+
894
+ /**
895
+ * Only include items created before this timestamp (inclusive). This is a query
896
+ * parameter used to select responses.
897
+ */
898
+ created_before?: number | null;
899
+
900
+ /**
901
+ * Whether the response has tool calls. This is a query parameter used to select
902
+ * responses.
903
+ */
904
+ has_tool_calls?: boolean | null;
905
+
906
+ /**
907
+ * Optional search string for instructions. This is a query parameter used to
908
+ * select responses.
909
+ */
910
+ instructions_search?: string | null;
911
+
912
+ /**
913
+ * Metadata filter for the responses. This is a query parameter used to select
914
+ * responses.
915
+ */
916
+ metadata?: unknown | null;
917
+
918
+ /**
919
+ * The name of the model to find responses for. This is a query parameter used to
920
+ * select responses.
921
+ */
922
+ model?: string | null;
923
+
924
+ /**
925
+ * Optional reasoning effort parameter. This is a query parameter used to select
926
+ * responses.
927
+ */
928
+ reasoning_effort?: Shared.ReasoningEffort | null;
929
+
930
+ /**
931
+ * Sampling temperature. This is a query parameter used to select responses.
932
+ */
933
+ temperature?: number | null;
934
+
935
+ /**
936
+ * Nucleus sampling parameter. This is a query parameter used to select responses.
937
+ */
938
+ top_p?: number | null;
939
+
940
+ /**
941
+ * List of user identifiers. This is a query parameter used to select responses.
942
+ */
943
+ users?: Array<string> | null;
944
+ }
945
+
946
+ export interface Template {
947
+ /**
948
+ * A list of chat messages forming the prompt or context. May include variable
949
+ * references to the "item" namespace, ie {{item.name}}.
950
+ */
951
+ template: Array<Template.ChatMessage | Template.EvalItem>;
952
+
953
+ /**
954
+ * The type of input messages. Always `template`.
955
+ */
956
+ type: 'template';
957
+ }
958
+
959
+ export namespace Template {
960
+ export interface ChatMessage {
961
+ /**
962
+ * The content of the message.
963
+ */
964
+ content: string;
965
+
966
+ /**
967
+ * The role of the message (e.g. "system", "assistant", "user").
968
+ */
969
+ role: string;
970
+ }
971
+
972
+ /**
973
+ * A message input to the model with a role indicating instruction following
974
+ * hierarchy. Instructions given with the `developer` or `system` role take
975
+ * precedence over instructions given with the `user` role. Messages with the
976
+ * `assistant` role are presumed to have been generated by the model in previous
977
+ * interactions.
978
+ */
979
+ export interface EvalItem {
980
+ /**
981
+ * Text inputs to the model - can contain template strings.
982
+ */
983
+ content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
984
+
985
+ /**
986
+ * The role of the message input. One of `user`, `assistant`, `system`, or
987
+ * `developer`.
988
+ */
989
+ role: 'user' | 'assistant' | 'system' | 'developer';
990
+
991
+ /**
992
+ * The type of the message input. Always `message`.
993
+ */
994
+ type?: 'message';
995
+ }
996
+
997
+ export namespace EvalItem {
998
+ /**
999
+ * A text output from the model.
1000
+ */
1001
+ export interface OutputText {
1002
+ /**
1003
+ * The text output from the model.
1004
+ */
1005
+ text: string;
1006
+
1007
+ /**
1008
+ * The type of the output text. Always `output_text`.
1009
+ */
1010
+ type: 'output_text';
1011
+ }
1012
+ }
1013
+ }
1014
+
1015
+ export interface ItemReference {
1016
+ /**
1017
+ * A reference to a variable in the "item" namespace. Ie, "item.name"
1018
+ */
1019
+ item_reference: string;
1020
+
1021
+ /**
1022
+ * The type of input messages. Always `item_reference`.
1023
+ */
1024
+ type: 'item_reference';
1025
+ }
1026
+
1027
+ export interface SamplingParams {
1028
+ /**
1029
+ * The maximum number of tokens in the generated output.
1030
+ */
1031
+ max_completion_tokens?: number;
1032
+
1033
+ /**
1034
+ * A seed value to initialize the randomness, during sampling.
1035
+ */
1036
+ seed?: number;
1037
+
1038
+ /**
1039
+ * A higher temperature increases randomness in the outputs.
1040
+ */
1041
+ temperature?: number;
1042
+
1043
+ /**
1044
+ * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
1045
+ */
1046
+ top_p?: number;
1047
+ }
1048
+ }
1049
+
757
1050
  export interface PerModelUsage {
758
1051
  /**
759
1052
  * The number of tokens retrieved from cache.
@@ -829,18 +1122,10 @@ export namespace RunListResponse {
829
1122
  }
830
1123
  }
831
1124
 
832
- export interface RunDeleteResponse {
833
- deleted?: boolean;
834
-
835
- object?: string;
836
-
837
- run_id?: string;
838
- }
839
-
840
1125
  /**
841
1126
  * A schema representing an evaluation run.
842
1127
  */
843
- export interface RunCancelResponse {
1128
+ export interface RunListResponse {
844
1129
  /**
845
1130
  * Unique identifier for the evaluation run.
846
1131
  */
@@ -854,7 +1139,10 @@ export interface RunCancelResponse {
854
1139
  /**
855
1140
  * Information about the run's data source.
856
1141
  */
857
- data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
1142
+ data_source:
1143
+ | CreateEvalJSONLRunDataSource
1144
+ | CreateEvalCompletionsRunDataSource
1145
+ | RunListResponse.Completions;
858
1146
 
859
1147
  /**
860
1148
  * An object representing an error response from the Eval API.
@@ -894,12 +1182,12 @@ export interface RunCancelResponse {
894
1182
  /**
895
1183
  * Usage statistics for each model during the evaluation run.
896
1184
  */
897
- per_model_usage: Array<RunCancelResponse.PerModelUsage>;
1185
+ per_model_usage: Array<RunListResponse.PerModelUsage>;
898
1186
 
899
1187
  /**
900
1188
  * Results per testing criteria applied during the evaluation run.
901
1189
  */
902
- per_testing_criteria_results: Array<RunCancelResponse.PerTestingCriteriaResult>;
1190
+ per_testing_criteria_results: Array<RunListResponse.PerTestingCriteriaResult>;
903
1191
 
904
1192
  /**
905
1193
  * The URL to the rendered evaluation run report on the UI dashboard.
@@ -909,7 +1197,7 @@ export interface RunCancelResponse {
909
1197
  /**
910
1198
  * Counters summarizing the outcomes of the evaluation run.
911
1199
  */
912
- result_counts: RunCancelResponse.ResultCounts;
1200
+ result_counts: RunListResponse.ResultCounts;
913
1201
 
914
1202
  /**
915
1203
  * The status of the evaluation run.
@@ -917,25 +1205,660 @@ export interface RunCancelResponse {
917
1205
  status: string;
918
1206
  }
919
1207
 
920
- export namespace RunCancelResponse {
921
- export interface PerModelUsage {
1208
+ export namespace RunListResponse {
1209
+ /**
1210
+ * A ResponsesRunDataSource object describing a model sampling configuration.
1211
+ */
1212
+ export interface Completions {
922
1213
  /**
923
- * The number of tokens retrieved from cache.
1214
+ * A EvalResponsesSource object describing a run data source configuration.
924
1215
  */
925
- cached_tokens: number;
1216
+ source: Completions.FileContent | Completions.FileID | Completions.Responses;
926
1217
 
927
1218
  /**
928
- * The number of completion tokens generated.
1219
+ * The type of run data source. Always `completions`.
929
1220
  */
930
- completion_tokens: number;
1221
+ type: 'completions';
931
1222
 
932
- /**
933
- * The number of invocations.
934
- */
935
- invocation_count: number;
1223
+ input_messages?: Completions.Template | Completions.ItemReference;
936
1224
 
937
1225
  /**
938
- * The name of the model.
1226
+ * The name of the model to use for generating completions (e.g. "o3-mini").
1227
+ */
1228
+ model?: string;
1229
+
1230
+ sampling_params?: Completions.SamplingParams;
1231
+ }
1232
+
1233
+ export namespace Completions {
1234
+ export interface FileContent {
1235
+ /**
1236
+ * The content of the jsonl file.
1237
+ */
1238
+ content: Array<FileContent.Content>;
1239
+
1240
+ /**
1241
+ * The type of jsonl source. Always `file_content`.
1242
+ */
1243
+ type: 'file_content';
1244
+ }
1245
+
1246
+ export namespace FileContent {
1247
+ export interface Content {
1248
+ item: Record<string, unknown>;
1249
+
1250
+ sample?: Record<string, unknown>;
1251
+ }
1252
+ }
1253
+
1254
+ export interface FileID {
1255
+ /**
1256
+ * The identifier of the file.
1257
+ */
1258
+ id: string;
1259
+
1260
+ /**
1261
+ * The type of jsonl source. Always `file_id`.
1262
+ */
1263
+ type: 'file_id';
1264
+ }
1265
+
1266
+ /**
1267
+ * A EvalResponsesSource object describing a run data source configuration.
1268
+ */
1269
+ export interface Responses {
1270
+ /**
1271
+ * The type of run data source. Always `responses`.
1272
+ */
1273
+ type: 'responses';
1274
+
1275
+ /**
1276
+ * Whether to allow parallel tool calls. This is a query parameter used to select
1277
+ * responses.
1278
+ */
1279
+ allow_parallel_tool_calls?: boolean | null;
1280
+
1281
+ /**
1282
+ * Only include items created after this timestamp (inclusive). This is a query
1283
+ * parameter used to select responses.
1284
+ */
1285
+ created_after?: number | null;
1286
+
1287
+ /**
1288
+ * Only include items created before this timestamp (inclusive). This is a query
1289
+ * parameter used to select responses.
1290
+ */
1291
+ created_before?: number | null;
1292
+
1293
+ /**
1294
+ * Whether the response has tool calls. This is a query parameter used to select
1295
+ * responses.
1296
+ */
1297
+ has_tool_calls?: boolean | null;
1298
+
1299
+ /**
1300
+ * Optional search string for instructions. This is a query parameter used to
1301
+ * select responses.
1302
+ */
1303
+ instructions_search?: string | null;
1304
+
1305
+ /**
1306
+ * Metadata filter for the responses. This is a query parameter used to select
1307
+ * responses.
1308
+ */
1309
+ metadata?: unknown | null;
1310
+
1311
+ /**
1312
+ * The name of the model to find responses for. This is a query parameter used to
1313
+ * select responses.
1314
+ */
1315
+ model?: string | null;
1316
+
1317
+ /**
1318
+ * Optional reasoning effort parameter. This is a query parameter used to select
1319
+ * responses.
1320
+ */
1321
+ reasoning_effort?: Shared.ReasoningEffort | null;
1322
+
1323
+ /**
1324
+ * Sampling temperature. This is a query parameter used to select responses.
1325
+ */
1326
+ temperature?: number | null;
1327
+
1328
+ /**
1329
+ * Nucleus sampling parameter. This is a query parameter used to select responses.
1330
+ */
1331
+ top_p?: number | null;
1332
+
1333
+ /**
1334
+ * List of user identifiers. This is a query parameter used to select responses.
1335
+ */
1336
+ users?: Array<string> | null;
1337
+ }
1338
+
1339
+ export interface Template {
1340
+ /**
1341
+ * A list of chat messages forming the prompt or context. May include variable
1342
+ * references to the "item" namespace, ie {{item.name}}.
1343
+ */
1344
+ template: Array<Template.ChatMessage | Template.EvalItem>;
1345
+
1346
+ /**
1347
+ * The type of input messages. Always `template`.
1348
+ */
1349
+ type: 'template';
1350
+ }
1351
+
1352
+ export namespace Template {
1353
+ export interface ChatMessage {
1354
+ /**
1355
+ * The content of the message.
1356
+ */
1357
+ content: string;
1358
+
1359
+ /**
1360
+ * The role of the message (e.g. "system", "assistant", "user").
1361
+ */
1362
+ role: string;
1363
+ }
1364
+
1365
+ /**
1366
+ * A message input to the model with a role indicating instruction following
1367
+ * hierarchy. Instructions given with the `developer` or `system` role take
1368
+ * precedence over instructions given with the `user` role. Messages with the
1369
+ * `assistant` role are presumed to have been generated by the model in previous
1370
+ * interactions.
1371
+ */
1372
+ export interface EvalItem {
1373
+ /**
1374
+ * Text inputs to the model - can contain template strings.
1375
+ */
1376
+ content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
1377
+
1378
+ /**
1379
+ * The role of the message input. One of `user`, `assistant`, `system`, or
1380
+ * `developer`.
1381
+ */
1382
+ role: 'user' | 'assistant' | 'system' | 'developer';
1383
+
1384
+ /**
1385
+ * The type of the message input. Always `message`.
1386
+ */
1387
+ type?: 'message';
1388
+ }
1389
+
1390
+ export namespace EvalItem {
1391
+ /**
1392
+ * A text output from the model.
1393
+ */
1394
+ export interface OutputText {
1395
+ /**
1396
+ * The text output from the model.
1397
+ */
1398
+ text: string;
1399
+
1400
+ /**
1401
+ * The type of the output text. Always `output_text`.
1402
+ */
1403
+ type: 'output_text';
1404
+ }
1405
+ }
1406
+ }
1407
+
1408
+ export interface ItemReference {
1409
+ /**
1410
+ * A reference to a variable in the "item" namespace. Ie, "item.name"
1411
+ */
1412
+ item_reference: string;
1413
+
1414
+ /**
1415
+ * The type of input messages. Always `item_reference`.
1416
+ */
1417
+ type: 'item_reference';
1418
+ }
1419
+
1420
+ export interface SamplingParams {
1421
+ /**
1422
+ * The maximum number of tokens in the generated output.
1423
+ */
1424
+ max_completion_tokens?: number;
1425
+
1426
+ /**
1427
+ * A seed value to initialize the randomness, during sampling.
1428
+ */
1429
+ seed?: number;
1430
+
1431
+ /**
1432
+ * A higher temperature increases randomness in the outputs.
1433
+ */
1434
+ temperature?: number;
1435
+
1436
+ /**
1437
+ * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
1438
+ */
1439
+ top_p?: number;
1440
+ }
1441
+ }
1442
+
1443
+ export interface PerModelUsage {
1444
+ /**
1445
+ * The number of tokens retrieved from cache.
1446
+ */
1447
+ cached_tokens: number;
1448
+
1449
+ /**
1450
+ * The number of completion tokens generated.
1451
+ */
1452
+ completion_tokens: number;
1453
+
1454
+ /**
1455
+ * The number of invocations.
1456
+ */
1457
+ invocation_count: number;
1458
+
1459
+ /**
1460
+ * The name of the model.
1461
+ */
1462
+ model_name: string;
1463
+
1464
+ /**
1465
+ * The number of prompt tokens used.
1466
+ */
1467
+ prompt_tokens: number;
1468
+
1469
+ /**
1470
+ * The total number of tokens used.
1471
+ */
1472
+ total_tokens: number;
1473
+ }
1474
+
1475
+ export interface PerTestingCriteriaResult {
1476
+ /**
1477
+ * Number of tests failed for this criteria.
1478
+ */
1479
+ failed: number;
1480
+
1481
+ /**
1482
+ * Number of tests passed for this criteria.
1483
+ */
1484
+ passed: number;
1485
+
1486
+ /**
1487
+ * A description of the testing criteria.
1488
+ */
1489
+ testing_criteria: string;
1490
+ }
1491
+
1492
+ /**
1493
+ * Counters summarizing the outcomes of the evaluation run.
1494
+ */
1495
+ export interface ResultCounts {
1496
+ /**
1497
+ * Number of output items that resulted in an error.
1498
+ */
1499
+ errored: number;
1500
+
1501
+ /**
1502
+ * Number of output items that failed to pass the evaluation.
1503
+ */
1504
+ failed: number;
1505
+
1506
+ /**
1507
+ * Number of output items that passed the evaluation.
1508
+ */
1509
+ passed: number;
1510
+
1511
+ /**
1512
+ * Total number of executed output items.
1513
+ */
1514
+ total: number;
1515
+ }
1516
+ }
1517
+
1518
+ export interface RunDeleteResponse {
1519
+ deleted?: boolean;
1520
+
1521
+ object?: string;
1522
+
1523
+ run_id?: string;
1524
+ }
1525
+
1526
+ /**
1527
+ * A schema representing an evaluation run.
1528
+ */
1529
+ export interface RunCancelResponse {
1530
+ /**
1531
+ * Unique identifier for the evaluation run.
1532
+ */
1533
+ id: string;
1534
+
1535
+ /**
1536
+ * Unix timestamp (in seconds) when the evaluation run was created.
1537
+ */
1538
+ created_at: number;
1539
+
1540
+ /**
1541
+ * Information about the run's data source.
1542
+ */
1543
+ data_source:
1544
+ | CreateEvalJSONLRunDataSource
1545
+ | CreateEvalCompletionsRunDataSource
1546
+ | RunCancelResponse.Completions;
1547
+
1548
+ /**
1549
+ * An object representing an error response from the Eval API.
1550
+ */
1551
+ error: EvalAPIError;
1552
+
1553
+ /**
1554
+ * The identifier of the associated evaluation.
1555
+ */
1556
+ eval_id: string;
1557
+
1558
+ /**
1559
+ * Set of 16 key-value pairs that can be attached to an object. This can be useful
1560
+ * for storing additional information about the object in a structured format, and
1561
+ * querying for objects via API or the dashboard.
1562
+ *
1563
+ * Keys are strings with a maximum length of 64 characters. Values are strings with
1564
+ * a maximum length of 512 characters.
1565
+ */
1566
+ metadata: Shared.Metadata | null;
1567
+
1568
+ /**
1569
+ * The model that is evaluated, if applicable.
1570
+ */
1571
+ model: string;
1572
+
1573
+ /**
1574
+ * The name of the evaluation run.
1575
+ */
1576
+ name: string;
1577
+
1578
+ /**
1579
+ * The type of the object. Always "eval.run".
1580
+ */
1581
+ object: 'eval.run';
1582
+
1583
+ /**
1584
+ * Usage statistics for each model during the evaluation run.
1585
+ */
1586
+ per_model_usage: Array<RunCancelResponse.PerModelUsage>;
1587
+
1588
+ /**
1589
+ * Results per testing criteria applied during the evaluation run.
1590
+ */
1591
+ per_testing_criteria_results: Array<RunCancelResponse.PerTestingCriteriaResult>;
1592
+
1593
+ /**
1594
+ * The URL to the rendered evaluation run report on the UI dashboard.
1595
+ */
1596
+ report_url: string;
1597
+
1598
+ /**
1599
+ * Counters summarizing the outcomes of the evaluation run.
1600
+ */
1601
+ result_counts: RunCancelResponse.ResultCounts;
1602
+
1603
+ /**
1604
+ * The status of the evaluation run.
1605
+ */
1606
+ status: string;
1607
+ }
1608
+
1609
+ export namespace RunCancelResponse {
1610
+ /**
1611
+ * A ResponsesRunDataSource object describing a model sampling configuration.
1612
+ */
1613
+ export interface Completions {
1614
+ /**
1615
+ * A EvalResponsesSource object describing a run data source configuration.
1616
+ */
1617
+ source: Completions.FileContent | Completions.FileID | Completions.Responses;
1618
+
1619
+ /**
1620
+ * The type of run data source. Always `completions`.
1621
+ */
1622
+ type: 'completions';
1623
+
1624
+ input_messages?: Completions.Template | Completions.ItemReference;
1625
+
1626
+ /**
1627
+ * The name of the model to use for generating completions (e.g. "o3-mini").
1628
+ */
1629
+ model?: string;
1630
+
1631
+ sampling_params?: Completions.SamplingParams;
1632
+ }
1633
+
1634
+ export namespace Completions {
1635
+ export interface FileContent {
1636
+ /**
1637
+ * The content of the jsonl file.
1638
+ */
1639
+ content: Array<FileContent.Content>;
1640
+
1641
+ /**
1642
+ * The type of jsonl source. Always `file_content`.
1643
+ */
1644
+ type: 'file_content';
1645
+ }
1646
+
1647
+ export namespace FileContent {
1648
+ export interface Content {
1649
+ item: Record<string, unknown>;
1650
+
1651
+ sample?: Record<string, unknown>;
1652
+ }
1653
+ }
1654
+
1655
+ export interface FileID {
1656
+ /**
1657
+ * The identifier of the file.
1658
+ */
1659
+ id: string;
1660
+
1661
+ /**
1662
+ * The type of jsonl source. Always `file_id`.
1663
+ */
1664
+ type: 'file_id';
1665
+ }
1666
+
1667
+ /**
1668
+ * A EvalResponsesSource object describing a run data source configuration.
1669
+ */
1670
+ export interface Responses {
1671
+ /**
1672
+ * The type of run data source. Always `responses`.
1673
+ */
1674
+ type: 'responses';
1675
+
1676
+ /**
1677
+ * Whether to allow parallel tool calls. This is a query parameter used to select
1678
+ * responses.
1679
+ */
1680
+ allow_parallel_tool_calls?: boolean | null;
1681
+
1682
+ /**
1683
+ * Only include items created after this timestamp (inclusive). This is a query
1684
+ * parameter used to select responses.
1685
+ */
1686
+ created_after?: number | null;
1687
+
1688
+ /**
1689
+ * Only include items created before this timestamp (inclusive). This is a query
1690
+ * parameter used to select responses.
1691
+ */
1692
+ created_before?: number | null;
1693
+
1694
+ /**
1695
+ * Whether the response has tool calls. This is a query parameter used to select
1696
+ * responses.
1697
+ */
1698
+ has_tool_calls?: boolean | null;
1699
+
1700
+ /**
1701
+ * Optional search string for instructions. This is a query parameter used to
1702
+ * select responses.
1703
+ */
1704
+ instructions_search?: string | null;
1705
+
1706
+ /**
1707
+ * Metadata filter for the responses. This is a query parameter used to select
1708
+ * responses.
1709
+ */
1710
+ metadata?: unknown | null;
1711
+
1712
+ /**
1713
+ * The name of the model to find responses for. This is a query parameter used to
1714
+ * select responses.
1715
+ */
1716
+ model?: string | null;
1717
+
1718
+ /**
1719
+ * Optional reasoning effort parameter. This is a query parameter used to select
1720
+ * responses.
1721
+ */
1722
+ reasoning_effort?: Shared.ReasoningEffort | null;
1723
+
1724
+ /**
1725
+ * Sampling temperature. This is a query parameter used to select responses.
1726
+ */
1727
+ temperature?: number | null;
1728
+
1729
+ /**
1730
+ * Nucleus sampling parameter. This is a query parameter used to select responses.
1731
+ */
1732
+ top_p?: number | null;
1733
+
1734
+ /**
1735
+ * List of user identifiers. This is a query parameter used to select responses.
1736
+ */
1737
+ users?: Array<string> | null;
1738
+ }
1739
+
1740
+ export interface Template {
1741
+ /**
1742
+ * A list of chat messages forming the prompt or context. May include variable
1743
+ * references to the "item" namespace, ie {{item.name}}.
1744
+ */
1745
+ template: Array<Template.ChatMessage | Template.EvalItem>;
1746
+
1747
+ /**
1748
+ * The type of input messages. Always `template`.
1749
+ */
1750
+ type: 'template';
1751
+ }
1752
+
1753
+ export namespace Template {
1754
+ export interface ChatMessage {
1755
+ /**
1756
+ * The content of the message.
1757
+ */
1758
+ content: string;
1759
+
1760
+ /**
1761
+ * The role of the message (e.g. "system", "assistant", "user").
1762
+ */
1763
+ role: string;
1764
+ }
1765
+
1766
+ /**
1767
+ * A message input to the model with a role indicating instruction following
1768
+ * hierarchy. Instructions given with the `developer` or `system` role take
1769
+ * precedence over instructions given with the `user` role. Messages with the
1770
+ * `assistant` role are presumed to have been generated by the model in previous
1771
+ * interactions.
1772
+ */
1773
+ export interface EvalItem {
1774
+ /**
1775
+ * Text inputs to the model - can contain template strings.
1776
+ */
1777
+ content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
1778
+
1779
+ /**
1780
+ * The role of the message input. One of `user`, `assistant`, `system`, or
1781
+ * `developer`.
1782
+ */
1783
+ role: 'user' | 'assistant' | 'system' | 'developer';
1784
+
1785
+ /**
1786
+ * The type of the message input. Always `message`.
1787
+ */
1788
+ type?: 'message';
1789
+ }
1790
+
1791
+ export namespace EvalItem {
1792
+ /**
1793
+ * A text output from the model.
1794
+ */
1795
+ export interface OutputText {
1796
+ /**
1797
+ * The text output from the model.
1798
+ */
1799
+ text: string;
1800
+
1801
+ /**
1802
+ * The type of the output text. Always `output_text`.
1803
+ */
1804
+ type: 'output_text';
1805
+ }
1806
+ }
1807
+ }
1808
+
1809
+ export interface ItemReference {
1810
+ /**
1811
+ * A reference to a variable in the "item" namespace. Ie, "item.name"
1812
+ */
1813
+ item_reference: string;
1814
+
1815
+ /**
1816
+ * The type of input messages. Always `item_reference`.
1817
+ */
1818
+ type: 'item_reference';
1819
+ }
1820
+
1821
+ export interface SamplingParams {
1822
+ /**
1823
+ * The maximum number of tokens in the generated output.
1824
+ */
1825
+ max_completion_tokens?: number;
1826
+
1827
+ /**
1828
+ * A seed value to initialize the randomness, during sampling.
1829
+ */
1830
+ seed?: number;
1831
+
1832
+ /**
1833
+ * A higher temperature increases randomness in the outputs.
1834
+ */
1835
+ temperature?: number;
1836
+
1837
+ /**
1838
+ * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
1839
+ */
1840
+ top_p?: number;
1841
+ }
1842
+ }
1843
+
1844
+ export interface PerModelUsage {
1845
+ /**
1846
+ * The number of tokens retrieved from cache.
1847
+ */
1848
+ cached_tokens: number;
1849
+
1850
+ /**
1851
+ * The number of completion tokens generated.
1852
+ */
1853
+ completion_tokens: number;
1854
+
1855
+ /**
1856
+ * The number of invocations.
1857
+ */
1858
+ invocation_count: number;
1859
+
1860
+ /**
1861
+ * The name of the model.
939
1862
  */
940
1863
  model_name: string;
941
1864
 
@@ -997,7 +1920,10 @@ export interface RunCreateParams {
997
1920
  /**
998
1921
  * Details about the run's data source.
999
1922
  */
1000
- data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource;
1923
+ data_source:
1924
+ | CreateEvalJSONLRunDataSource
1925
+ | CreateEvalCompletionsRunDataSource
1926
+ | RunCreateParams.CreateEvalResponsesRunDataSource;
1001
1927
 
1002
1928
  /**
1003
1929
  * Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1015,6 +1941,247 @@ export interface RunCreateParams {
1015
1941
  name?: string;
1016
1942
  }
1017
1943
 
1944
+ export namespace RunCreateParams {
1945
+ /**
1946
+ * A ResponsesRunDataSource object describing a model sampling configuration.
1947
+ */
1948
+ export interface CreateEvalResponsesRunDataSource {
1949
+ /**
1950
+ * A EvalResponsesSource object describing a run data source configuration.
1951
+ */
1952
+ source:
1953
+ | CreateEvalResponsesRunDataSource.FileContent
1954
+ | CreateEvalResponsesRunDataSource.FileID
1955
+ | CreateEvalResponsesRunDataSource.Responses;
1956
+
1957
+ /**
1958
+ * The type of run data source. Always `completions`.
1959
+ */
1960
+ type: 'completions';
1961
+
1962
+ input_messages?:
1963
+ | CreateEvalResponsesRunDataSource.Template
1964
+ | CreateEvalResponsesRunDataSource.ItemReference;
1965
+
1966
+ /**
1967
+ * The name of the model to use for generating completions (e.g. "o3-mini").
1968
+ */
1969
+ model?: string;
1970
+
1971
+ sampling_params?: CreateEvalResponsesRunDataSource.SamplingParams;
1972
+ }
1973
+
1974
+ export namespace CreateEvalResponsesRunDataSource {
1975
+ export interface FileContent {
1976
+ /**
1977
+ * The content of the jsonl file.
1978
+ */
1979
+ content: Array<FileContent.Content>;
1980
+
1981
+ /**
1982
+ * The type of jsonl source. Always `file_content`.
1983
+ */
1984
+ type: 'file_content';
1985
+ }
1986
+
1987
+ export namespace FileContent {
1988
+ export interface Content {
1989
+ item: Record<string, unknown>;
1990
+
1991
+ sample?: Record<string, unknown>;
1992
+ }
1993
+ }
1994
+
1995
+ export interface FileID {
1996
+ /**
1997
+ * The identifier of the file.
1998
+ */
1999
+ id: string;
2000
+
2001
+ /**
2002
+ * The type of jsonl source. Always `file_id`.
2003
+ */
2004
+ type: 'file_id';
2005
+ }
2006
+
2007
+ /**
2008
+ * A EvalResponsesSource object describing a run data source configuration.
2009
+ */
2010
+ export interface Responses {
2011
+ /**
2012
+ * The type of run data source. Always `responses`.
2013
+ */
2014
+ type: 'responses';
2015
+
2016
+ /**
2017
+ * Whether to allow parallel tool calls. This is a query parameter used to select
2018
+ * responses.
2019
+ */
2020
+ allow_parallel_tool_calls?: boolean | null;
2021
+
2022
+ /**
2023
+ * Only include items created after this timestamp (inclusive). This is a query
2024
+ * parameter used to select responses.
2025
+ */
2026
+ created_after?: number | null;
2027
+
2028
+ /**
2029
+ * Only include items created before this timestamp (inclusive). This is a query
2030
+ * parameter used to select responses.
2031
+ */
2032
+ created_before?: number | null;
2033
+
2034
+ /**
2035
+ * Whether the response has tool calls. This is a query parameter used to select
2036
+ * responses.
2037
+ */
2038
+ has_tool_calls?: boolean | null;
2039
+
2040
+ /**
2041
+ * Optional search string for instructions. This is a query parameter used to
2042
+ * select responses.
2043
+ */
2044
+ instructions_search?: string | null;
2045
+
2046
+ /**
2047
+ * Metadata filter for the responses. This is a query parameter used to select
2048
+ * responses.
2049
+ */
2050
+ metadata?: unknown | null;
2051
+
2052
+ /**
2053
+ * The name of the model to find responses for. This is a query parameter used to
2054
+ * select responses.
2055
+ */
2056
+ model?: string | null;
2057
+
2058
+ /**
2059
+ * Optional reasoning effort parameter. This is a query parameter used to select
2060
+ * responses.
2061
+ */
2062
+ reasoning_effort?: Shared.ReasoningEffort | null;
2063
+
2064
+ /**
2065
+ * Sampling temperature. This is a query parameter used to select responses.
2066
+ */
2067
+ temperature?: number | null;
2068
+
2069
+ /**
2070
+ * Nucleus sampling parameter. This is a query parameter used to select responses.
2071
+ */
2072
+ top_p?: number | null;
2073
+
2074
+ /**
2075
+ * List of user identifiers. This is a query parameter used to select responses.
2076
+ */
2077
+ users?: Array<string> | null;
2078
+ }
2079
+
2080
+ export interface Template {
2081
+ /**
2082
+ * A list of chat messages forming the prompt or context. May include variable
2083
+ * references to the "item" namespace, ie {{item.name}}.
2084
+ */
2085
+ template: Array<Template.ChatMessage | Template.EvalItem>;
2086
+
2087
+ /**
2088
+ * The type of input messages. Always `template`.
2089
+ */
2090
+ type: 'template';
2091
+ }
2092
+
2093
+ export namespace Template {
2094
+ export interface ChatMessage {
2095
+ /**
2096
+ * The content of the message.
2097
+ */
2098
+ content: string;
2099
+
2100
+ /**
2101
+ * The role of the message (e.g. "system", "assistant", "user").
2102
+ */
2103
+ role: string;
2104
+ }
2105
+
2106
+ /**
2107
+ * A message input to the model with a role indicating instruction following
2108
+ * hierarchy. Instructions given with the `developer` or `system` role take
2109
+ * precedence over instructions given with the `user` role. Messages with the
2110
+ * `assistant` role are presumed to have been generated by the model in previous
2111
+ * interactions.
2112
+ */
2113
+ export interface EvalItem {
2114
+ /**
2115
+ * Text inputs to the model - can contain template strings.
2116
+ */
2117
+ content: string | ResponsesAPI.ResponseInputText | EvalItem.OutputText;
2118
+
2119
+ /**
2120
+ * The role of the message input. One of `user`, `assistant`, `system`, or
2121
+ * `developer`.
2122
+ */
2123
+ role: 'user' | 'assistant' | 'system' | 'developer';
2124
+
2125
+ /**
2126
+ * The type of the message input. Always `message`.
2127
+ */
2128
+ type?: 'message';
2129
+ }
2130
+
2131
+ export namespace EvalItem {
2132
+ /**
2133
+ * A text output from the model.
2134
+ */
2135
+ export interface OutputText {
2136
+ /**
2137
+ * The text output from the model.
2138
+ */
2139
+ text: string;
2140
+
2141
+ /**
2142
+ * The type of the output text. Always `output_text`.
2143
+ */
2144
+ type: 'output_text';
2145
+ }
2146
+ }
2147
+ }
2148
+
2149
+ export interface ItemReference {
2150
+ /**
2151
+ * A reference to a variable in the "item" namespace. Ie, "item.name"
2152
+ */
2153
+ item_reference: string;
2154
+
2155
+ /**
2156
+ * The type of input messages. Always `item_reference`.
2157
+ */
2158
+ type: 'item_reference';
2159
+ }
2160
+
2161
+ export interface SamplingParams {
2162
+ /**
2163
+ * The maximum number of tokens in the generated output.
2164
+ */
2165
+ max_completion_tokens?: number;
2166
+
2167
+ /**
2168
+ * A seed value to initialize the randomness, during sampling.
2169
+ */
2170
+ seed?: number;
2171
+
2172
+ /**
2173
+ * A higher temperature increases randomness in the outputs.
2174
+ */
2175
+ temperature?: number;
2176
+
2177
+ /**
2178
+ * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
2179
+ */
2180
+ top_p?: number;
2181
+ }
2182
+ }
2183
+ }
2184
+
1018
2185
  export interface RunListParams extends CursorPageParams {
1019
2186
  /**
1020
2187
  * Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
@@ -1023,8 +2190,8 @@ export interface RunListParams extends CursorPageParams {
1023
2190
  order?: 'asc' | 'desc';
1024
2191
 
1025
2192
  /**
1026
- * Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" |
1027
- * "canceled".
2193
+ * Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
2194
+ * | `canceled`.
1028
2195
  */
1029
2196
  status?: 'queued' | 'in_progress' | 'completed' | 'canceled' | 'failed';
1030
2197
  }