opik 1.10.13 → 1.10.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -3001,6 +3001,8 @@ interface DatasetItem {
3001
3001
  source: DatasetItemSource;
3002
3002
  data: JsonNode;
3003
3003
  tags?: string[];
3004
+ evaluators?: EvaluatorItem[];
3005
+ executionPolicy?: ExecutionPolicy;
3004
3006
  experimentItems?: ExperimentItem[];
3005
3007
  datasetId?: string;
3006
3008
  createdAt?: Date;
@@ -3019,6 +3021,8 @@ interface DatasetItemCompare {
3019
3021
  source: DatasetItemCompareSource;
3020
3022
  data: JsonNode;
3021
3023
  tags?: string[];
3024
+ evaluators?: EvaluatorItemCompare[];
3025
+ executionPolicy?: ExecutionPolicyCompare;
3022
3026
  experimentItems?: ExperimentItemCompare[];
3023
3027
  datasetId?: string;
3024
3028
  createdAt?: Date;
@@ -3087,6 +3091,8 @@ interface DatasetItemPublic {
3087
3091
  source: DatasetItemPublicSource;
3088
3092
  data: JsonNode;
3089
3093
  tags?: string[];
3094
+ evaluators?: EvaluatorItemPublic[];
3095
+ executionPolicy?: ExecutionPolicyPublic;
3090
3096
  experimentItems?: ExperimentItemPublic[];
3091
3097
  datasetId?: string;
3092
3098
  createdAt?: Date;
@@ -3123,6 +3129,9 @@ interface DatasetItemUpdate {
3123
3129
  data?: JsonNode;
3124
3130
  /** Tags */
3125
3131
  tags?: string[];
3132
+ /** Evaluators */
3133
+ evaluators?: EvaluatorItem[];
3134
+ executionPolicy?: ExecutionPolicy;
3126
3135
  }
3127
3136
 
3128
3137
  interface DatasetItemWrite {
@@ -3132,6 +3141,8 @@ interface DatasetItemWrite {
3132
3141
  source: DatasetItemWriteSource;
3133
3142
  data: JsonNode;
3134
3143
  tags?: string[];
3144
+ evaluators?: EvaluatorItemWrite[];
3145
+ executionPolicy?: ExecutionPolicyWrite;
3135
3146
  }
3136
3147
 
3137
3148
  declare const DatasetItemWriteSource: {
@@ -3299,6 +3310,74 @@ interface ErrorInfoWrite {
3299
3310
  traceback: string;
3300
3311
  }
3301
3312
 
3313
+ interface EvaluatorItem {
3314
+ name: string;
3315
+ type: EvaluatorItemType;
3316
+ config: JsonNode;
3317
+ }
3318
+
3319
+ interface EvaluatorItemCompare {
3320
+ name: string;
3321
+ type: EvaluatorItemCompareType;
3322
+ config: JsonNodeCompare;
3323
+ }
3324
+
3325
+ declare const EvaluatorItemCompareType: {
3326
+ readonly LlmJudge: "llm_judge";
3327
+ readonly CodeMetric: "code_metric";
3328
+ };
3329
+ type EvaluatorItemCompareType = (typeof EvaluatorItemCompareType)[keyof typeof EvaluatorItemCompareType];
3330
+
3331
+ interface EvaluatorItemPublic {
3332
+ name: string;
3333
+ type: EvaluatorItemPublicType;
3334
+ config: JsonNodePublic;
3335
+ }
3336
+
3337
+ declare const EvaluatorItemPublicType: {
3338
+ readonly LlmJudge: "llm_judge";
3339
+ readonly CodeMetric: "code_metric";
3340
+ };
3341
+ type EvaluatorItemPublicType = (typeof EvaluatorItemPublicType)[keyof typeof EvaluatorItemPublicType];
3342
+
3343
+ declare const EvaluatorItemType: {
3344
+ readonly LlmJudge: "llm_judge";
3345
+ readonly CodeMetric: "code_metric";
3346
+ };
3347
+ type EvaluatorItemType = (typeof EvaluatorItemType)[keyof typeof EvaluatorItemType];
3348
+
3349
+ interface EvaluatorItemWrite {
3350
+ name: string;
3351
+ type: EvaluatorItemWriteType;
3352
+ config: JsonNodeWrite;
3353
+ }
3354
+
3355
+ declare const EvaluatorItemWriteType: {
3356
+ readonly LlmJudge: "llm_judge";
3357
+ readonly CodeMetric: "code_metric";
3358
+ };
3359
+ type EvaluatorItemWriteType = (typeof EvaluatorItemWriteType)[keyof typeof EvaluatorItemWriteType];
3360
+
3361
+ interface ExecutionPolicy {
3362
+ runsPerItem?: number;
3363
+ passThreshold?: number;
3364
+ }
3365
+
3366
+ interface ExecutionPolicyCompare {
3367
+ runsPerItem?: number;
3368
+ passThreshold?: number;
3369
+ }
3370
+
3371
+ interface ExecutionPolicyPublic {
3372
+ runsPerItem?: number;
3373
+ passThreshold?: number;
3374
+ }
3375
+
3376
+ interface ExecutionPolicyWrite {
3377
+ runsPerItem?: number;
3378
+ passThreshold?: number;
3379
+ }
3380
+
3302
3381
  interface ExperimentGroupAggregationsResponse {
3303
3382
  content?: Record<string, GroupContentWithAggregations>;
3304
3383
  }
@@ -3313,6 +3392,8 @@ interface ExperimentItem {
3313
3392
  experimentId: string;
3314
3393
  datasetItemId: string;
3315
3394
  traceId: string;
3395
+ projectId?: string;
3396
+ projectName?: string;
3316
3397
  input?: JsonListString;
3317
3398
  output?: JsonListString;
3318
3399
  feedbackScores?: FeedbackScore[];
@@ -3340,6 +3421,7 @@ interface ExperimentItemCompare {
3340
3421
  experimentId: string;
3341
3422
  datasetItemId: string;
3342
3423
  traceId: string;
3424
+ projectId?: string;
3343
3425
  input?: JsonListStringCompare;
3344
3426
  output?: JsonListStringCompare;
3345
3427
  feedbackScores?: FeedbackScoreCompare[];
@@ -3365,6 +3447,7 @@ interface ExperimentItemPublic {
3365
3447
  experimentId: string;
3366
3448
  datasetItemId: string;
3367
3449
  traceId: string;
3450
+ projectId?: string;
3368
3451
  createdAt?: Date;
3369
3452
  lastUpdatedAt?: Date;
3370
3453
  createdBy?: string;
@@ -3841,6 +3924,8 @@ type JsonListStringWrite = Record<string, unknown> | Record<string, unknown>[] |
3841
3924
 
3842
3925
  type JsonNode = Record<string, unknown>;
3843
3926
 
3927
+ type JsonNodeCompare = Record<string, unknown>;
3928
+
3844
3929
  type JsonNodeDetail = Record<string, unknown>;
3845
3930
 
3846
3931
  type JsonNodePublic = Record<string, unknown>;
@@ -4634,6 +4719,8 @@ interface Span$1 {
4634
4719
  totalEstimatedCostVersion?: string;
4635
4720
  /** Duration in milliseconds as a decimal number to support sub-millisecond precision */
4636
4721
  duration?: number;
4722
+ /** Time to first token in milliseconds */
4723
+ ttft?: number;
4637
4724
  }
4638
4725
 
4639
4726
  /**
@@ -4665,6 +4752,8 @@ interface SpanExperimentItemBulkWriteView {
4665
4752
  lastUpdatedAt?: Date;
4666
4753
  totalEstimatedCost?: number;
4667
4754
  totalEstimatedCostVersion?: string;
4755
+ /** Time to first token in milliseconds */
4756
+ ttft?: number;
4668
4757
  }
4669
4758
 
4670
4759
  declare const SpanExperimentItemBulkWriteViewType: {
@@ -4802,6 +4891,8 @@ interface SpanPublic {
4802
4891
  totalEstimatedCostVersion?: string;
4803
4892
  /** Duration in milliseconds as a decimal number to support sub-millisecond precision */
4804
4893
  duration?: number;
4894
+ /** Time to first token in milliseconds */
4895
+ ttft?: number;
4805
4896
  }
4806
4897
 
4807
4898
  declare const SpanPublicType: {
@@ -4843,6 +4934,7 @@ interface SpanUpdate$1 {
4843
4934
  usage?: Record<string, number>;
4844
4935
  totalEstimatedCost?: number;
4845
4936
  errorInfo?: ErrorInfo;
4937
+ ttft?: number;
4846
4938
  }
4847
4939
 
4848
4940
  declare const SpanUpdateType: {
@@ -4889,6 +4981,8 @@ interface SpanWrite {
4889
4981
  lastUpdatedAt?: Date;
4890
4982
  totalEstimatedCost?: number;
4891
4983
  totalEstimatedCostVersion?: string;
4984
+ /** Time to first token in milliseconds */
4985
+ ttft?: number;
4892
4986
  }
4893
4987
 
4894
4988
  declare const SpanWriteType: {
@@ -5017,6 +5111,8 @@ interface Trace$1 {
5017
5111
  spanCount?: number;
5018
5112
  /** Duration in milliseconds as a decimal number to support sub-millisecond precision */
5019
5113
  duration?: number;
5114
+ /** Time to first token in milliseconds */
5115
+ ttft?: number;
5020
5116
  threadId?: string;
5021
5117
  visibilityMode?: TraceVisibilityMode;
5022
5118
  llmSpanCount?: number;
@@ -5058,6 +5154,8 @@ interface TraceExperimentItemBulkWriteView {
5058
5154
  tags?: string[];
5059
5155
  errorInfo?: ErrorInfoExperimentItemBulkWriteView;
5060
5156
  lastUpdatedAt?: Date;
5157
+ /** Time to first token in milliseconds */
5158
+ ttft?: number;
5061
5159
  threadId?: string;
5062
5160
  }
5063
5161
 
@@ -5163,6 +5261,8 @@ interface TracePublic {
5163
5261
  spanCount?: number;
5164
5262
  /** Duration in milliseconds as a decimal number to support sub-millisecond precision */
5165
5263
  duration?: number;
5264
+ /** Time to first token in milliseconds */
5265
+ ttft?: number;
5166
5266
  threadId?: string;
5167
5267
  visibilityMode?: TracePublicVisibilityMode;
5168
5268
  llmSpanCount?: number;
@@ -5337,6 +5437,7 @@ interface TraceUpdate {
5337
5437
  tags?: string[];
5338
5438
  errorInfo?: ErrorInfo;
5339
5439
  threadId?: string;
5440
+ ttft?: number;
5340
5441
  }
5341
5442
 
5342
5443
  declare const TraceVisibilityMode: {
@@ -5358,6 +5459,8 @@ interface TraceWrite {
5358
5459
  tags?: string[];
5359
5460
  errorInfo?: ErrorInfoWrite;
5360
5461
  lastUpdatedAt?: Date;
5462
+ /** Time to first token in milliseconds */
5463
+ ttft?: number;
5361
5464
  threadId?: string;
5362
5465
  }
5363
5466
 
@@ -9782,9 +9885,11 @@ declare class Dataset<T extends DatasetItemData = DatasetItemData> {
9782
9885
  declare class ExperimentItemReferences {
9783
9886
  readonly datasetItemId: string;
9784
9887
  readonly traceId: string;
9888
+ readonly projectName?: string;
9785
9889
  constructor(params: {
9786
9890
  datasetItemId: string;
9787
9891
  traceId: string;
9892
+ projectName?: string;
9788
9893
  });
9789
9894
  }
9790
9895
  /**
@@ -10331,15 +10436,18 @@ declare class OpikClient {
10331
10436
  * Supported OQL format: `<COLUMN> <OPERATOR> <VALUE> [AND <COLUMN> <OPERATOR> <VALUE>]*`
10332
10437
  *
10333
10438
  * Supported columns:
10334
- * - `id`, `name`: String fields
10439
+ * - `id`, `name`, `description`: String fields
10440
+ * - `created_by`, `last_updated_by`: String fields
10441
+ * - `template_structure`: String field (e.g., "text" or "chat")
10442
+ * - `created_at`, `last_updated_at`: Date/time fields (ISO 8601 format)
10335
10443
  * - `tags`: List field (use "contains" operator only)
10336
- * - `created_by`: String field
10444
+ * - `version_count`: Number field
10337
10445
  *
10338
10446
  * Supported operators by column:
10339
- * - `id`: =, !=, contains, not_contains, starts_with, ends_with, >, <
10340
- * - `name`: =, !=, contains, not_contains, starts_with, ends_with, >, <
10341
- * - `created_by`: =, !=, contains, not_contains, starts_with, ends_with, >, <
10342
- * - `tags`: contains (only)
10447
+ * - String fields (`id`, `name`, `description`, `created_by`, `last_updated_by`, `template_structure`): =, !=, contains, not_contains, starts_with, ends_with, >, <
10448
+ * - Date/time fields (`created_at`, `last_updated_at`): =, >, <, >=, <=
10449
+ * - Number fields (`version_count`): =, !=, >, <, >=, <=
10450
+ * - List fields (`tags`): contains
10343
10451
  *
10344
10452
  * @returns Promise resolving to array of matching latest prompt versions
10345
10453
  * @throws Error if OQL filter syntax is invalid
@@ -10359,6 +10467,15 @@ declare class OpikClient {
10359
10467
  *
10360
10468
  * // Filter by creator
10361
10469
  * const prompts = await client.searchPrompts('created_by = "user@example.com"');
10470
+ *
10471
+ * // Filter by template structure
10472
+ * const chatPrompts = await client.searchPrompts('template_structure = "chat"');
10473
+ *
10474
+ * // Filter by date range
10475
+ * const recentPrompts = await client.searchPrompts('created_at >= "2024-01-01T00:00:00Z"');
10476
+ *
10477
+ * // Filter by version count
10478
+ * const multiVersion = await client.searchPrompts('version_count > 5');
10362
10479
  * ```
10363
10480
  */
10364
10481
  searchPrompts: (filterString?: string) => Promise<(Prompt | ChatPrompt)[]>;
@@ -10465,6 +10582,63 @@ declare class OpikClient {
10465
10582
  waitForAtLeast?: number;
10466
10583
  waitForTimeout?: number;
10467
10584
  }) => Promise<TraceThread[]>;
10585
+ /**
10586
+ * Search for spans in a project with optional filtering.
10587
+ *
10588
+ * Spans represent individual operations or steps within traces, such as LLM calls or function executions.
10589
+ * This method allows you to search and filter spans using Opik Query Language (OQL).
10590
+ *
10591
+ * @param options - Search options
10592
+ * @param options.projectName - Name of the project to search in. Defaults to the client's configured project.
10593
+ * @param options.filterString - Filter string using Opik Query Language (OQL).
10594
+ * Supports filtering by: model, provider, type, metadata, feedback_scores, usage, duration, etc.
10595
+ * Examples: 'model = "gpt-4"', 'provider = "openai"', 'type = "llm"', 'metadata.version = "1.0"'
10596
+ * @param options.maxResults - Maximum number of spans to return (default: 1000)
10597
+ * @param options.truncate - Whether to truncate large fields in the response (default: true)
10598
+ * @param options.waitForAtLeast - If specified, polls until at least this many spans are found
10599
+ * @param options.waitForTimeout - Timeout in seconds when using waitForAtLeast (default: 60)
10600
+ * @returns Promise resolving to an array of spans
10601
+ * @throws {SearchTimeoutError} If waitForAtLeast is specified and timeout is reached
10602
+ *
10603
+ * @example
10604
+ * ```typescript
10605
+ * // Get all spans in a project
10606
+ * const spans = await client.searchSpans({ projectName: "My Project" });
10607
+ *
10608
+ * // Filter by model
10609
+ * const gpt4Spans = await client.searchSpans({
10610
+ * projectName: "My Project",
10611
+ * filterString: 'model = "gpt-4"'
10612
+ * });
10613
+ *
10614
+ * // Filter by provider and type
10615
+ * const openaiLLMSpans = await client.searchSpans({
10616
+ * projectName: "My Project",
10617
+ * filterString: 'provider = "openai" and type = "llm"'
10618
+ * });
10619
+ *
10620
+ * // Filter by metadata
10621
+ * const prodSpans = await client.searchSpans({
10622
+ * projectName: "My Project",
10623
+ * filterString: 'metadata.environment = "production"'
10624
+ * });
10625
+ *
10626
+ * // Wait for at least 5 spans
10627
+ * const spans = await client.searchSpans({
10628
+ * projectName: "My Project",
10629
+ * waitForAtLeast: 5,
10630
+ * waitForTimeout: 30
10631
+ * });
10632
+ * ```
10633
+ */
10634
+ searchSpans: (options?: {
10635
+ projectName?: string;
10636
+ filterString?: string;
10637
+ maxResults?: number;
10638
+ truncate?: boolean;
10639
+ waitForAtLeast?: number;
10640
+ waitForTimeout?: number;
10641
+ }) => Promise<SpanPublic[]>;
10468
10642
  private logFeedbackScores;
10469
10643
  /**
10470
10644
  * Log feedback scores to existing traces in batch.
@@ -10748,7 +10922,7 @@ declare abstract class OpikBaseModel {
10748
10922
  /**
10749
10923
  * Creates a new model instance.
10750
10924
  *
10751
- * @param modelName - The name of the model (e.g., 'gpt-4o', 'claude-3-opus')
10925
+ * @param modelName - The name of the model (e.g., 'gpt-5-nano', 'claude-3-opus')
10752
10926
  */
10753
10927
  constructor(modelName: string);
10754
10928
  /**
@@ -10763,7 +10937,7 @@ declare abstract class OpikBaseModel {
10763
10937
  *
10764
10938
  * @example
10765
10939
  * ```typescript
10766
- * const model = new VercelAIChatModel('gpt-4o');
10940
+ * const model = new VercelAIChatModel('gpt-5-nano');
10767
10941
  *
10768
10942
  * // Simple text generation
10769
10943
  * const response = await model.generateString('What is 2+2?');
@@ -10791,7 +10965,7 @@ declare abstract class OpikBaseModel {
10791
10965
  *
10792
10966
  * @example
10793
10967
  * ```typescript
10794
- * const model = new VercelAIChatModel('gpt-4o');
10968
+ * const model = new VercelAIChatModel('gpt-5-nano');
10795
10969
  * const response = await model.generateProviderResponse([
10796
10970
  * { role: 'user', content: 'Hello!' }
10797
10971
  * ]);
@@ -10807,7 +10981,7 @@ declare abstract class OpikBaseModel {
10807
10981
  * @example
10808
10982
  * ```typescript
10809
10983
  * // Valid model IDs
10810
- * const model1: SupportedModelId = "gpt-4o";
10984
+ * const model1: SupportedModelId = "gpt-5-nano";
10811
10985
  * const model2: SupportedModelId = "claude-3-5-sonnet-latest";
10812
10986
  * const model3: SupportedModelId = "gemini-2.0-flash";
10813
10987
  * ```
@@ -10848,7 +11022,7 @@ type AllProviderOptions = OpenAIProviderOptions | AnthropicProviderOptions | Goo
10848
11022
  * @example
10849
11023
  * ```typescript
10850
11024
  * // TypeScript infers OpenAIProviderOptions
10851
- * const options1: ProviderOptionsForModel<"gpt-4o"> = {
11025
+ * const options1: ProviderOptionsForModel<"gpt-5-nano"> = {
10852
11026
  * apiKey: "sk-...",
10853
11027
  * organization: "org-123" // ✅ Valid OpenAI option
10854
11028
  * };
@@ -10864,7 +11038,7 @@ type ProviderOptionsForModel<T extends SupportedModelId> = T extends OpenAIChatM
10864
11038
  * Detects the provider from the model ID and creates the appropriate provider instance.
10865
11039
  * Uses pattern matching to automatically determine which provider to use.
10866
11040
  *
10867
- * @param modelId - Model ID (e.g., "gpt-4o", "claude-3-5-sonnet-latest", "gemini-2.0-flash")
11041
+ * @param modelId - Model ID (e.g., "gpt-5-nano", "claude-3-5-sonnet-latest", "gemini-2.0-flash")
10868
11042
  * @param options - Provider-specific configuration options
10869
11043
  * @returns Provider-specific model instance ready for use with Vercel AI SDK
10870
11044
  *
@@ -10873,7 +11047,7 @@ type ProviderOptionsForModel<T extends SupportedModelId> = T extends OpenAIChatM
10873
11047
  * @example
10874
11048
  * ```typescript
10875
11049
  * // OpenAI with organization
10876
- * const openaiModel = detectProvider("gpt-4o", {
11050
+ * const openaiModel = detectProvider("gpt-5-nano", {
10877
11051
  * apiKey: "sk-...",
10878
11052
  * organization: "org-123"
10879
11053
  * });
@@ -10903,20 +11077,20 @@ type VercelAIChatModelOptions = {
10903
11077
  * Supports:
10904
11078
  * - Direct LanguageModel instances for maximum flexibility
10905
11079
  * - Typed model IDs with automatic provider detection:
10906
- * - OpenAI: `"gpt-4o"`, `"gpt-4-turbo"`, `"o1"`, etc.
11080
+ * - OpenAI: `"gpt-5-nano"`, `"gpt-5"`, `"o1"`, etc.
10907
11081
  * - Anthropic: `"claude-3-5-sonnet-latest"`, `"claude-3-opus"`, etc.
10908
11082
  * - Google Gemini: `"gemini-2.0-flash"`, `"gemini-1.5-pro"`, etc.
10909
11083
  *
10910
11084
  * @example
10911
11085
  * ```typescript
10912
11086
  * // Using typed model ID
10913
- * const model1 = new VercelAIChatModel("gpt-4o", {
11087
+ * const model1 = new VercelAIChatModel("gpt-5-nano", {
10914
11088
  * apiKey: "sk-...",
10915
11089
  * organization: "org-123"
10916
11090
  * });
10917
11091
  *
10918
11092
  * // Using LanguageModel instance directly
10919
- * const customModel = openai("gpt-4o");
11093
+ * const customModel = openai("gpt-5-nano");
10920
11094
  * const model2 = new VercelAIChatModel(customModel);
10921
11095
  * ```
10922
11096
  */
@@ -10938,7 +11112,7 @@ declare class VercelAIChatModel extends OpikBaseModel {
10938
11112
  /**
10939
11113
  * Creates a new VercelAIChatModel instance with a typed model ID.
10940
11114
  *
10941
- * @param modelId - The model ID (e.g., 'gpt-4o', 'claude-3-5-sonnet-latest', 'gemini-2.0-flash')
11115
+ * @param modelId - The model ID (e.g., 'gpt-5-nano', 'claude-3-5-sonnet-latest', 'gemini-2.0-flash')
10942
11116
  * @param options - Provider-specific configuration options
10943
11117
  */
10944
11118
  constructor(modelId: SupportedModelId, options?: VercelAIChatModelOptions);
@@ -10954,7 +11128,7 @@ declare class VercelAIChatModel extends OpikBaseModel {
10954
11128
  *
10955
11129
  * @example
10956
11130
  * ```typescript
10957
- * const model = new VercelAIChatModel("gpt-4o");
11131
+ * const model = new VercelAIChatModel("gpt-5-nano");
10958
11132
  *
10959
11133
  * // Simple text generation
10960
11134
  * const response = await model.generateString("What is 2+2?");
@@ -10982,7 +11156,7 @@ declare class VercelAIChatModel extends OpikBaseModel {
10982
11156
  *
10983
11157
  * @example
10984
11158
  * ```typescript
10985
- * const model = new VercelAIChatModel("gpt-4o");
11159
+ * const model = new VercelAIChatModel("gpt-5-nano");
10986
11160
  * const response = await model.generateProviderResponse([
10987
11161
  * { role: 'user', content: 'Hello!' }
10988
11162
  * ]);
@@ -10999,14 +11173,14 @@ declare class VercelAIChatModel extends OpikBaseModel {
10999
11173
  * Supports multiple providers (OpenAI, Anthropic, Google Gemini) with automatic
11000
11174
  * provider detection based on model ID patterns.
11001
11175
  *
11002
- * @param modelId - Model ID (e.g., 'gpt-4o', 'claude-3-5-sonnet-latest', 'gemini-2.0-flash')
11176
+ * @param modelId - Model ID (e.g., 'gpt-5-nano', 'claude-3-5-sonnet-latest', 'gemini-2.0-flash')
11003
11177
  * @param options - Optional provider-specific configuration options
11004
11178
  * @returns An OpikBaseModel instance
11005
11179
  *
11006
11180
  * @example
11007
11181
  * ```typescript
11008
11182
  * // OpenAI with organization
11009
- * const model1 = createModel('gpt-4o', {
11183
+ * const model1 = createModel('gpt-5-nano', {
11010
11184
  * apiKey: 'sk-...',
11011
11185
  * organization: 'org-123'
11012
11186
  * });
@@ -11039,7 +11213,7 @@ declare function createModel(modelId: SupportedModelId, options?: VercelAIChatMo
11039
11213
  * import { anthropic } from '@ai-sdk/anthropic';
11040
11214
  *
11041
11215
  * // OpenAI with custom settings
11042
- * const customOpenAI = openai('gpt-4o', {
11216
+ * const customOpenAI = openai('gpt-5-nano', {
11043
11217
  * structuredOutputs: true,
11044
11218
  * });
11045
11219
  * const model1 = createModelFromInstance(customOpenAI);
@@ -11056,7 +11230,7 @@ declare function createModelFromInstance(languageModel: LanguageModel, options?:
11056
11230
  * Resolves a model identifier to an OpikBaseModel instance.
11057
11231
  *
11058
11232
  * This function implements a resolution strategy that handles multiple input types:
11059
- * 1. undefined/null → Creates default model (gpt-4o)
11233
+ * 1. undefined/null → Creates default model (gpt-5-nano)
11060
11234
  * 2. string → Creates model from model ID
11061
11235
  * 3. OpikBaseModel → Returns as-is
11062
11236
  * 4. LanguageModel → Wraps in OpikBaseModel adapter
@@ -11074,13 +11248,13 @@ declare function createModelFromInstance(languageModel: LanguageModel, options?:
11074
11248
  * const model1 = resolveModel();
11075
11249
  *
11076
11250
  * // Using model ID
11077
- * const model2 = resolveModel('gpt-4o');
11251
+ * const model2 = resolveModel('gpt-5-nano');
11078
11252
  *
11079
11253
  * // Using OpikBaseModel instance
11080
- * const model3 = resolveModel(new VercelAIChatModel('gpt-4o'));
11254
+ * const model3 = resolveModel(new VercelAIChatModel('gpt-5-nano'));
11081
11255
  *
11082
11256
  * // Using LanguageModel instance
11083
- * const model4 = resolveModel(openai('gpt-4o'));
11257
+ * const model4 = resolveModel(openai('gpt-5-nano'));
11084
11258
  * ```
11085
11259
  */
11086
11260
  declare function resolveModel(model?: SupportedModelId | LanguageModel | OpikBaseModel, options?: VercelAIChatModelOptions): OpikBaseModel;
@@ -11120,7 +11294,7 @@ declare class ModelConfigurationError extends ModelError {
11120
11294
  interface EvaluatePromptOptions extends Omit<EvaluateOptions, "task"> {
11121
11295
  /** Message templates with {{placeholders}} to be formatted with dataset variables */
11122
11296
  messages: OpikMessage[];
11123
- /** Model to use for generation. Can be model ID string, LanguageModel instance, or OpikBaseModel instance. Defaults to gpt-4o */
11297
+ /** Model to use for generation. Can be model ID string, LanguageModel instance, or OpikBaseModel instance. Defaults to gpt-5-nano */
11124
11298
  model?: SupportedModelId | LanguageModel | OpikBaseModel;
11125
11299
  /** Template engine type for variable substitution. Defaults to mustache */
11126
11300
  templateType?: PromptType;
@@ -11153,7 +11327,7 @@ interface EvaluatePromptOptions extends Omit<EvaluateOptions, "task"> {
11153
11327
  * messages: [
11154
11328
  * { role: 'user', content: 'Translate to {{language}}: {{text}}' }
11155
11329
  * ],
11156
- * model: 'gpt-4o', // or omit to use default model
11330
+ * model: 'gpt-5-nano', // or omit to use default model
11157
11331
  * temperature: 0.7,
11158
11332
  * seed: 42,
11159
11333
  * scoringMetrics: [new Equals()],
@@ -11161,7 +11335,7 @@ interface EvaluatePromptOptions extends Omit<EvaluateOptions, "task"> {
11161
11335
  *
11162
11336
  * // Using pre-configured LanguageModel instance
11163
11337
  * import { openai } from '@ai-sdk/openai';
11164
- * const customModel = openai('gpt-4o', { structuredOutputs: true });
11338
+ * const customModel = openai('gpt-5-nano', { structuredOutputs: true });
11165
11339
  * const result2 = await evaluatePrompt({
11166
11340
  * dataset,
11167
11341
  * messages: [
@@ -11407,7 +11581,7 @@ declare abstract class BaseLLMJudgeMetric extends BaseMetric {
11407
11581
  *
11408
11582
  * @param name - The name of the metric
11409
11583
  * @param options - Configuration options
11410
- * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-4o'.
11584
+ * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-5-nano'.
11411
11585
  * @param options.trackMetric - Whether to track the metric. Defaults to true.
11412
11586
  * @param options.temperature - Temperature setting (0.0-2.0). Controls randomness. Lower values make output more focused and deterministic.
11413
11587
  * @param options.seed - Random seed for reproducible outputs. Useful for testing and debugging.
@@ -11548,7 +11722,7 @@ type Input$4 = z.infer<typeof validationSchema$4>;
11548
11722
  * ```typescript
11549
11723
  * import { Moderation } from 'opik/evaluation/metrics';
11550
11724
  *
11551
- * // Using default model (gpt-4o)
11725
+ * // Using default model (gpt-5-nano)
11552
11726
  * const metric = new Moderation();
11553
11727
  * const result = await metric.score({ output: "Hello, how can I help you?" });
11554
11728
  * console.log(result.value); // 0.0 (safe content)
@@ -11556,14 +11730,14 @@ type Input$4 = z.infer<typeof validationSchema$4>;
11556
11730
  *
11557
11731
  * // Using custom model with temperature and seed
11558
11732
  * const customMetric = new Moderation({
11559
- * model: 'gpt-4-turbo',
11733
+ * model: 'gpt-5',
11560
11734
  * temperature: 0.3,
11561
11735
  * seed: 42
11562
11736
  * });
11563
11737
  *
11564
11738
  * // Using custom model instance
11565
11739
  * import { openai } from '@ai-sdk/openai';
11566
- * const customModel = openai('gpt-4o');
11740
+ * const customModel = openai('gpt-5-nano');
11567
11741
  * const instanceMetric = new Moderation({ model: customModel });
11568
11742
  *
11569
11743
  * // With advanced settings
@@ -11583,7 +11757,7 @@ declare class Moderation extends BaseLLMJudgeMetric {
11583
11757
  * Creates a new Moderation metric.
11584
11758
  *
11585
11759
  * @param options - Configuration options
11586
- * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-4o'.
11760
+ * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-5-nano'.
11587
11761
  * @param options.name - The name of the metric. Defaults to "moderation_metric".
11588
11762
  * @param options.fewShotExamples - Optional few-shot examples to guide the model
11589
11763
  * @param options.trackMetric - Whether to track the metric. Defaults to true.
@@ -11658,7 +11832,7 @@ type Input$3 = z.infer<typeof validationSchema$3>;
11658
11832
  * ```typescript
11659
11833
  * import { Usefulness } from 'opik/evaluation/metrics';
11660
11834
  *
11661
- * // Using default model (gpt-4o)
11835
+ * // Using default model (gpt-5-nano)
11662
11836
  * const metric = new Usefulness();
11663
11837
  * const result = await metric.score({
11664
11838
  * input: "What's the capital of France?",
@@ -11669,14 +11843,14 @@ type Input$3 = z.infer<typeof validationSchema$3>;
11669
11843
  *
11670
11844
  * // Using custom model with temperature and seed
11671
11845
  * const customMetric = new Usefulness({
11672
- * model: 'gpt-4-turbo',
11846
+ * model: 'gpt-5',
11673
11847
  * temperature: 0.7,
11674
11848
  * seed: 42
11675
11849
  * });
11676
11850
  *
11677
11851
  * // Using custom model instance
11678
11852
  * import { openai } from '@ai-sdk/openai';
11679
- * const customModel = openai('gpt-4o');
11853
+ * const customModel = openai('gpt-5-nano');
11680
11854
  * const instanceMetric = new Usefulness({ model: customModel });
11681
11855
  *
11682
11856
  * // With advanced settings
@@ -11695,7 +11869,7 @@ declare class Usefulness extends BaseLLMJudgeMetric {
11695
11869
  * Creates a new Usefulness metric.
11696
11870
  *
11697
11871
  * @param options - Configuration options
11698
- * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-4o'.
11872
+ * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-5-nano'.
11699
11873
  * @param options.name - The name of the metric. Defaults to "usefulness_metric".
11700
11874
  * @param options.trackMetric - Whether to track the metric. Defaults to true.
11701
11875
  * @param options.temperature - Temperature setting (0.0-2.0). Controls randomness. Lower values make output more focused and deterministic. See https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature
@@ -11776,7 +11950,7 @@ type Input$2 = z.infer<typeof validationSchema$2>;
11776
11950
  * ```typescript
11777
11951
  * import { Hallucination } from 'opik/evaluation/metrics';
11778
11952
  *
11779
- * // Using default model (gpt-4o)
11953
+ * // Using default model (gpt-5-nano)
11780
11954
  * const metric = new Hallucination();
11781
11955
  *
11782
11956
  * // With context
@@ -11797,7 +11971,7 @@ type Input$2 = z.infer<typeof validationSchema$2>;
11797
11971
  *
11798
11972
  * // Using custom model with few-shot examples
11799
11973
  * const customMetric = new Hallucination({
11800
- * model: 'gpt-4-turbo',
11974
+ * model: 'gpt-5',
11801
11975
  * temperature: 0.3,
11802
11976
  * seed: 42,
11803
11977
  * fewShotExamples: [
@@ -11828,7 +12002,7 @@ declare class Hallucination extends BaseLLMJudgeMetric {
11828
12002
  * Creates a new Hallucination metric.
11829
12003
  *
11830
12004
  * @param options - Configuration options
11831
- * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-4o'.
12005
+ * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-5-nano'.
11832
12006
  * @param options.name - The name of the metric. Defaults to "hallucination_metric".
11833
12007
  * @param options.fewShotExamples - Optional few-shot examples to guide the model
11834
12008
  * @param options.trackMetric - Whether to track the metric. Defaults to true.
@@ -11924,7 +12098,7 @@ type Input$1 = z.infer<typeof validationSchema$1>;
11924
12098
  * ```typescript
11925
12099
  * import { AnswerRelevance } from 'opik/evaluation/metrics';
11926
12100
  *
11927
- * // Using default model (gpt-4o)
12101
+ * // Using default model (gpt-5-nano)
11928
12102
  * const metric = new AnswerRelevance();
11929
12103
  *
11930
12104
  * // With context (default behavior)
@@ -11946,7 +12120,7 @@ type Input$1 = z.infer<typeof validationSchema$1>;
11946
12120
  *
11947
12121
  * // Using custom model with few-shot examples
11948
12122
  * const customMetric = new AnswerRelevance({
11949
- * model: 'gpt-4-turbo',
12123
+ * model: 'gpt-5',
11950
12124
  * temperature: 0.3,
11951
12125
  * seed: 42,
11952
12126
  * fewShotExamples: [
@@ -11981,7 +12155,7 @@ declare class AnswerRelevance extends BaseLLMJudgeMetric {
11981
12155
  * Creates a new AnswerRelevance metric.
11982
12156
  *
11983
12157
  * @param options - Configuration options
11984
- * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-4o'.
12158
+ * @param options.model - The language model to use. Can be a string (model ID), LanguageModel instance, or OpikBaseModel instance. Defaults to 'gpt-5-nano'.
11985
12159
  * @param options.name - The name of the metric. Defaults to "answer_relevance_metric".
11986
12160
  * @param options.fewShotExamples - Optional few-shot examples with context to guide the model. If not provided, default examples will be used.
11987
12161
  * @param options.fewShotExamplesNoContext - Optional few-shot examples without context for no-context mode. If not provided, default examples will be used.
@@ -12392,7 +12566,7 @@ declare class DatasetVersionNotFoundError extends OpikError {
12392
12566
  /**
12393
12567
  * Supported column types in OQL queries
12394
12568
  */
12395
- type ColumnType = "string" | "date_time" | "dictionary" | "feedback_scores_number" | "list" | "number";
12569
+ type ColumnType = "string" | "date_time" | "dictionary" | "feedback_scores_number" | "list" | "number" | "error_container" | "enum" | "map";
12396
12570
  /**
12397
12571
  * Parsed filter expression structure
12398
12572
  */
@@ -12400,10 +12574,33 @@ interface FilterExpression {
12400
12574
  field: string;
12401
12575
  key?: string;
12402
12576
  operator: string;
12403
- value: string;
12577
+ value: string | null;
12404
12578
  type?: ColumnType;
12405
12579
  }
12406
12580
 
12581
+ /**
12582
+ * Abstract base class for OQL (Opik Query Language) configuration
12583
+ */
12584
+
12585
+ declare abstract class OQLConfig {
12586
+ /**
12587
+ * Map of supported fields to their types
12588
+ */
12589
+ abstract get columns(): Record<string, ColumnType>;
12590
+ /**
12591
+ * Map of fields to their supported operators
12592
+ */
12593
+ abstract get supportedOperators(): Record<string, readonly string[]>;
12594
+ /**
12595
+ * Fields that support nested key access via dot notation
12596
+ */
12597
+ get nestedFields(): readonly string[];
12598
+ /**
12599
+ * Keys supported for the usage field
12600
+ */
12601
+ get usageKeys(): readonly string[];
12602
+ }
12603
+
12407
12604
  /**
12408
12605
  * This file contains the OQL parser and validator. It is currently limited in scope to only support
12409
12606
  * simple filters without "and" or "or" operators.
@@ -12431,7 +12628,24 @@ interface FilterExpression {
12431
12628
  declare class OpikQueryLanguage {
12432
12629
  private readonly filterExpressions;
12433
12630
  readonly parsedFilters: string | null;
12434
- constructor(queryString?: string);
12631
+ private readonly config;
12632
+ constructor(queryString?: string, config?: OQLConfig);
12633
+ /**
12634
+ * Create an OpikQueryLanguage instance for trace filtering
12635
+ */
12636
+ static forTraces(queryString?: string): OpikQueryLanguage;
12637
+ /**
12638
+ * Create an OpikQueryLanguage instance for span filtering
12639
+ */
12640
+ static forSpans(queryString?: string): OpikQueryLanguage;
12641
+ /**
12642
+ * Create an OpikQueryLanguage instance for trace thread filtering
12643
+ */
12644
+ static forThreads(queryString?: string): OpikQueryLanguage;
12645
+ /**
12646
+ * Create an OpikQueryLanguage instance for prompt filtering
12647
+ */
12648
+ static forPrompts(queryString?: string): OpikQueryLanguage;
12435
12649
  /**
12436
12650
  * Returns the parsed filter expressions
12437
12651
  */