opik 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -12672,6 +12672,28 @@ type EvaluationTestResult = {
12672
12672
  * Mirrors the Fern-generated ExecutionPolicyWrite type.
12673
12673
  */
12674
12674
  type ExecutionPolicy = ExecutionPolicyWrite;
12675
+ /**
12676
+ * A raw test suite item as returned by {@link TestSuite.getRawItems}.
12677
+ *
12678
+ * Unlike the view returned by {@link TestSuite.getItems}, this preserves:
12679
+ * - `evaluators` as raw {@link EvaluatorItemWrite} objects (not decoded to assertion strings)
12680
+ * - `executionPolicy` as the item-level value only (not merged with the suite-level default)
12681
+ *
12682
+ * `data` is the full stored payload exactly as returned by the dataset
12683
+ * (so if an item was stored with a `description`, it will be present in
12684
+ * `data` as well). `description` is additionally exposed as a top-level
12685
+ * field for ergonomic access.
12686
+ *
12687
+ * Use this when you need to introspect or forward the stored evaluator
12688
+ * config or per-item execution policy verbatim.
12689
+ */
12690
+ interface RawTestSuiteItem {
12691
+ id: string;
12692
+ data: DatasetItemData;
12693
+ description?: string;
12694
+ evaluators?: EvaluatorItemWrite[];
12695
+ executionPolicy?: ExecutionPolicy;
12696
+ }
12675
12697
  declare const DEFAULT_EXECUTION_POLICY: Required<ExecutionPolicy>;
12676
12698
  /**
12677
12699
  * A single item to be inserted into a test suite via `insert()`.
@@ -12854,6 +12876,24 @@ declare class TestSuite {
12854
12876
  assertions: string[];
12855
12877
  executionPolicy: Required<ExecutionPolicy>;
12856
12878
  }>>;
12879
+ /**
12880
+ * Retrieve items with full suite-specific metadata preserved verbatim.
12881
+ *
12882
+ * Unlike {@link getItems}, this does NOT:
12883
+ * - Decode `evaluators` into assertion strings (you get raw {@link EvaluatorItemWrite}[])
12884
+ * - Merge the item-level `executionPolicy` with the suite-level default
12885
+ *
12886
+ * `data` mirrors the stored payload (includes `description` if present);
12887
+ * `description` is additionally exposed at the top level for convenience.
12888
+ *
12889
+ * Use this when you need to inspect or forward the stored evaluator
12890
+ * config or per-item execution policy as-is.
12891
+ *
12892
+ * @param nbSamples Max items to retrieve. Omit to return all items.
12893
+ * @param lastRetrievedId Opaque cursor for pagination (last `id` from a previous call).
12894
+ * @returns Array of {@link RawTestSuiteItem} objects.
12895
+ */
12896
+ getRawItems(nbSamples?: number, lastRetrievedId?: string): Promise<RawTestSuiteItem[]>;
12857
12897
  getGlobalAssertions(): Promise<string[]>;
12858
12898
  getTags(): Promise<string[]>;
12859
12899
  getItemsCount(): Promise<number | undefined>;
@@ -13151,6 +13191,25 @@ declare class Experiment {
13151
13191
  getUrl(): Promise<string>;
13152
13192
  }
13153
13193
 
13194
+ interface TestSuiteExperimentData extends ExperimentData {
13195
+ passRate?: number;
13196
+ passedCount?: number;
13197
+ totalCount?: number;
13198
+ assertionScores?: AssertionScoreAveragePublic[];
13199
+ }
13200
+ /**
13201
+ * Represents an experiment run against a test suite. Extends `Experiment`
13202
+ * with the aggregate assertion statistics the backend populates only for
13203
+ * evaluation-suite experiments (null/undefined for regular dataset experiments).
13204
+ */
13205
+ declare class TestSuiteExperiment extends Experiment {
13206
+ readonly passRate?: number;
13207
+ readonly passedCount?: number;
13208
+ readonly totalCount?: number;
13209
+ readonly assertionScores?: AssertionScoreAveragePublic[];
13210
+ constructor(data: TestSuiteExperimentData, opik: OpikClient);
13211
+ }
13212
+
13154
13213
  interface ChatPromptData extends BasePromptData {
13155
13214
  messages: ChatMessage[];
13156
13215
  }
@@ -13596,6 +13655,25 @@ declare class OpikClient {
13596
13655
  * @throws {DatasetNotFoundError} If the dataset doesn't exist
13597
13656
  */
13598
13657
  getDatasetExperiments: (datasetName: string, maxResults?: number, projectName?: string) => Promise<Experiment[]>;
13658
+ /**
13659
+ * Retrieves all experiments associated with a test suite.
13660
+ *
13661
+ * @param name The name of the test suite
13662
+ * @param maxResults Maximum number of experiments to return (default: 100)
13663
+ * @param projectName Optional project name to scope the suite lookup. If not provided, uses the client's configured project.
13664
+ * @returns A list of TestSuiteExperiment objects associated with the test suite,
13665
+ * each carrying the suite-specific assertion aggregates (`passRate`, `passedCount`,
13666
+ * `totalCount`, `assertionScores`) populated by the backend.
13667
+ * @throws {DatasetNotFoundError} If the test suite doesn't exist
13668
+ */
13669
+ getTestSuiteExperiments: (name: string, maxResults?: number, projectName?: string) => Promise<TestSuiteExperiment[]>;
13670
+ /**
13671
+ * Paginated fetch of experiments for a given dataset ID, mapping each raw
13672
+ * `ExperimentPublic` row to a caller-chosen entity. Used internally by
13673
+ * `getDatasetExperiments` and `getTestSuiteExperiments` to share the
13674
+ * loop shape and only differ on the constructed type.
13675
+ */
13676
+ private findExperimentsByDatasetId;
13599
13677
  /**
13600
13678
  * Deletes an experiment by ID
13601
13679
  *
@@ -16013,4 +16091,4 @@ declare class ConfigMismatchError extends OpikError {
16013
16091
 
16014
16092
  declare function activateRunner(): void;
16015
16093
 
16016
- export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getGlobalClient, getTrackContext, logger, resetGlobalClient, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setGlobalClient, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
16094
+ export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, type RawTestSuiteItem, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getGlobalClient, getTrackContext, logger, resetGlobalClient, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setGlobalClient, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
package/dist/index.d.ts CHANGED
@@ -12672,6 +12672,28 @@ type EvaluationTestResult = {
12672
12672
  * Mirrors the Fern-generated ExecutionPolicyWrite type.
12673
12673
  */
12674
12674
  type ExecutionPolicy = ExecutionPolicyWrite;
12675
+ /**
12676
+ * A raw test suite item as returned by {@link TestSuite.getRawItems}.
12677
+ *
12678
+ * Unlike the view returned by {@link TestSuite.getItems}, this preserves:
12679
+ * - `evaluators` as raw {@link EvaluatorItemWrite} objects (not decoded to assertion strings)
12680
+ * - `executionPolicy` as the item-level value only (not merged with the suite-level default)
12681
+ *
12682
+ * `data` is the full stored payload exactly as returned by the dataset
12683
+ * (so if an item was stored with a `description`, it will be present in
12684
+ * `data` as well). `description` is additionally exposed as a top-level
12685
+ * field for ergonomic access.
12686
+ *
12687
+ * Use this when you need to introspect or forward the stored evaluator
12688
+ * config or per-item execution policy verbatim.
12689
+ */
12690
+ interface RawTestSuiteItem {
12691
+ id: string;
12692
+ data: DatasetItemData;
12693
+ description?: string;
12694
+ evaluators?: EvaluatorItemWrite[];
12695
+ executionPolicy?: ExecutionPolicy;
12696
+ }
12675
12697
  declare const DEFAULT_EXECUTION_POLICY: Required<ExecutionPolicy>;
12676
12698
  /**
12677
12699
  * A single item to be inserted into a test suite via `insert()`.
@@ -12854,6 +12876,24 @@ declare class TestSuite {
12854
12876
  assertions: string[];
12855
12877
  executionPolicy: Required<ExecutionPolicy>;
12856
12878
  }>>;
12879
+ /**
12880
+ * Retrieve items with full suite-specific metadata preserved verbatim.
12881
+ *
12882
+ * Unlike {@link getItems}, this does NOT:
12883
+ * - Decode `evaluators` into assertion strings (you get raw {@link EvaluatorItemWrite}[])
12884
+ * - Merge the item-level `executionPolicy` with the suite-level default
12885
+ *
12886
+ * `data` mirrors the stored payload (includes `description` if present);
12887
+ * `description` is additionally exposed at the top level for convenience.
12888
+ *
12889
+ * Use this when you need to inspect or forward the stored evaluator
12890
+ * config or per-item execution policy as-is.
12891
+ *
12892
+ * @param nbSamples Max items to retrieve. Omit to return all items.
12893
+ * @param lastRetrievedId Opaque cursor for pagination (last `id` from a previous call).
12894
+ * @returns Array of {@link RawTestSuiteItem} objects.
12895
+ */
12896
+ getRawItems(nbSamples?: number, lastRetrievedId?: string): Promise<RawTestSuiteItem[]>;
12857
12897
  getGlobalAssertions(): Promise<string[]>;
12858
12898
  getTags(): Promise<string[]>;
12859
12899
  getItemsCount(): Promise<number | undefined>;
@@ -13151,6 +13191,25 @@ declare class Experiment {
13151
13191
  getUrl(): Promise<string>;
13152
13192
  }
13153
13193
 
13194
+ interface TestSuiteExperimentData extends ExperimentData {
13195
+ passRate?: number;
13196
+ passedCount?: number;
13197
+ totalCount?: number;
13198
+ assertionScores?: AssertionScoreAveragePublic[];
13199
+ }
13200
+ /**
13201
+ * Represents an experiment run against a test suite. Extends `Experiment`
13202
+ * with the aggregate assertion statistics the backend populates only for
13203
+ * evaluation-suite experiments (null/undefined for regular dataset experiments).
13204
+ */
13205
+ declare class TestSuiteExperiment extends Experiment {
13206
+ readonly passRate?: number;
13207
+ readonly passedCount?: number;
13208
+ readonly totalCount?: number;
13209
+ readonly assertionScores?: AssertionScoreAveragePublic[];
13210
+ constructor(data: TestSuiteExperimentData, opik: OpikClient);
13211
+ }
13212
+
13154
13213
  interface ChatPromptData extends BasePromptData {
13155
13214
  messages: ChatMessage[];
13156
13215
  }
@@ -13596,6 +13655,25 @@ declare class OpikClient {
13596
13655
  * @throws {DatasetNotFoundError} If the dataset doesn't exist
13597
13656
  */
13598
13657
  getDatasetExperiments: (datasetName: string, maxResults?: number, projectName?: string) => Promise<Experiment[]>;
13658
+ /**
13659
+ * Retrieves all experiments associated with a test suite.
13660
+ *
13661
+ * @param name The name of the test suite
13662
+ * @param maxResults Maximum number of experiments to return (default: 100)
13663
+ * @param projectName Optional project name to scope the suite lookup. If not provided, uses the client's configured project.
13664
+ * @returns A list of TestSuiteExperiment objects associated with the test suite,
13665
+ * each carrying the suite-specific assertion aggregates (`passRate`, `passedCount`,
13666
+ * `totalCount`, `assertionScores`) populated by the backend.
13667
+ * @throws {DatasetNotFoundError} If the test suite doesn't exist
13668
+ */
13669
+ getTestSuiteExperiments: (name: string, maxResults?: number, projectName?: string) => Promise<TestSuiteExperiment[]>;
13670
+ /**
13671
+ * Paginated fetch of experiments for a given dataset ID, mapping each raw
13672
+ * `ExperimentPublic` row to a caller-chosen entity. Used internally by
13673
+ * `getDatasetExperiments` and `getTestSuiteExperiments` to share the
13674
+ * loop shape and only differ on the constructed type.
13675
+ */
13676
+ private findExperimentsByDatasetId;
13599
13677
  /**
13600
13678
  * Deletes an experiment by ID
13601
13679
  *
@@ -16013,4 +16091,4 @@ declare class ConfigMismatchError extends OpikError {
16013
16091
 
16014
16092
  declare function activateRunner(): void;
16015
16093
 
16016
- export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getGlobalClient, getTrackContext, logger, resetGlobalClient, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setGlobalClient, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
16094
+ export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, type RawTestSuiteItem, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getGlobalClient, getTrackContext, logger, resetGlobalClient, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setGlobalClient, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- import {Ba}from'./chunk-FTTQJ4TO.js';export{ra as AgentTaskCompletionJudge,qa as AgentToolCorrectnessJudge,ea as AnswerRelevance,aa as BaseLLMJudgeMetric,B as BaseMetric,C as BaseSuiteEvaluator,p as ChatPrompt,ta as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,Z as Contains,y as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,la as DemographicBiasJudge,ja as DialogueHelpfulnessJudge,Y as ExactMatch,fa as GEval,ga as GEvalPreset,na as GenderBiasJudge,da as Hallucination,$ as IsJson,P as LLMJudge,G as ModelConfigurationError,E as ModelError,F as ModelGenerationError,ba as Moderation,za as Opik,D as OpikBaseModel,q as OpikQueryLanguage,d as OpikSpanType,ma as PoliticalBiasJudge,o as Prompt,k as PromptType,sa as PromptUncertaintyJudge,ka as QARelevanceJudge,_ as RegexMatch,pa as RegionalBiasJudge,oa as ReligiousBiasJudge,O as ResponseSchema,M as SYSTEM_PROMPT,ia as SummarizationCoherenceJudge,ha as SummarizationConsistencyJudge,xa as TestSuite,z as TestSuiteResult,s as ThreadsAnnotationQueue,r as TracesAnnotationQueue,N as USER_PROMPT_TEMPLATE,ca as Usefulness,I as VercelAIChatModel,v as activateRunner,t as agentConfigContext,A as buildSuiteResult,J as createModel,K as createModelFromInstance,R as deserializeEvaluators,H as detectProvider,c as disableLogger,W as evaluate,X as evaluatePrompt,U as evaluateTestSuite,u as flushAll,e as generateId,l as getGlobalClient,w as getTrackContext,a as logger,n as resetGlobalClient,ua as resolveEvaluators,S as resolveExecutionPolicy,T as resolveItemExecutionPolicy,L as resolveModel,V as runTests,Q as serializeEvaluators,m as setGlobalClient,b as setLoggerLevel,x as track,va as validateEvaluators,wa as validateExecutionPolicy,Aa as z}from'./chunk-FTTQJ4TO.js';Ba();
1
+ import {Ba}from'./chunk-GRBNDL4U.js';export{ra as AgentTaskCompletionJudge,qa as AgentToolCorrectnessJudge,ea as AnswerRelevance,aa as BaseLLMJudgeMetric,B as BaseMetric,C as BaseSuiteEvaluator,p as ChatPrompt,ta as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,Z as Contains,y as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,la as DemographicBiasJudge,ja as DialogueHelpfulnessJudge,Y as ExactMatch,fa as GEval,ga as GEvalPreset,na as GenderBiasJudge,da as Hallucination,$ as IsJson,P as LLMJudge,G as ModelConfigurationError,E as ModelError,F as ModelGenerationError,ba as Moderation,za as Opik,D as OpikBaseModel,q as OpikQueryLanguage,d as OpikSpanType,ma as PoliticalBiasJudge,o as Prompt,k as PromptType,sa as PromptUncertaintyJudge,ka as QARelevanceJudge,_ as RegexMatch,pa as RegionalBiasJudge,oa as ReligiousBiasJudge,O as ResponseSchema,M as SYSTEM_PROMPT,ia as SummarizationCoherenceJudge,ha as SummarizationConsistencyJudge,xa as TestSuite,z as TestSuiteResult,s as ThreadsAnnotationQueue,r as TracesAnnotationQueue,N as USER_PROMPT_TEMPLATE,ca as Usefulness,I as VercelAIChatModel,v as activateRunner,t as agentConfigContext,A as buildSuiteResult,J as createModel,K as createModelFromInstance,R as deserializeEvaluators,H as detectProvider,c as disableLogger,W as evaluate,X as evaluatePrompt,U as evaluateTestSuite,u as flushAll,e as generateId,l as getGlobalClient,w as getTrackContext,a as logger,n as resetGlobalClient,ua as resolveEvaluators,S as resolveExecutionPolicy,T as resolveItemExecutionPolicy,L as resolveModel,V as runTests,Q as serializeEvaluators,m as setGlobalClient,b as setLoggerLevel,x as track,va as validateEvaluators,wa as validateExecutionPolicy,Aa as z}from'./chunk-GRBNDL4U.js';Ba();
@@ -1 +1 @@
1
- import {ya}from'./chunk-FTTQJ4TO.js';export{y as DEFAULT_EXECUTION_POLICY,xa as TestSuite,z as TestSuiteResult,A as buildSuiteResult,R as deserializeEvaluators,U as evaluateTestSuite,S as resolveExecutionPolicy,T as resolveItemExecutionPolicy,V as runTests,Q as serializeEvaluators}from'./chunk-FTTQJ4TO.js';ya();
1
+ import {ya}from'./chunk-GRBNDL4U.js';export{y as DEFAULT_EXECUTION_POLICY,xa as TestSuite,z as TestSuiteResult,A as buildSuiteResult,R as deserializeEvaluators,U as evaluateTestSuite,S as resolveExecutionPolicy,T as resolveItemExecutionPolicy,V as runTests,Q as serializeEvaluators}from'./chunk-GRBNDL4U.js';ya();
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "opik",
3
3
  "description": "Opik TypeScript and JavaScript SDK",
4
- "version": "2.0.7",
4
+ "version": "2.0.9",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "git+https://github.com/comet-ml/opik.git",