opik 1.11.13 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MHMIE52N.js → chunk-2AOEXUQ4.js} +35 -35
- package/dist/index.cjs +36 -36
- package/dist/index.d.cts +44 -13
- package/dist/index.d.ts +44 -13
- package/dist/index.js +1 -1
- package/dist/suite-DE3AOLJG.js +1 -0
- package/package.json +1 -1
- package/dist/suite-6XFUV2Y7.js +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -12681,22 +12681,50 @@ type ItemResult = {
|
|
|
12681
12681
|
hasAssertions: boolean;
|
|
12682
12682
|
runsPassed: number;
|
|
12683
12683
|
runsTotal: number;
|
|
12684
|
+
/** Configured runsPerItem from the execution policy. */
|
|
12685
|
+
configuredRunsPerItem: number;
|
|
12684
12686
|
passThreshold: number;
|
|
12685
12687
|
testResults: EvaluationTestResult[];
|
|
12686
12688
|
};
|
|
12687
12689
|
/**
|
|
12688
12690
|
* Result of a test suite run.
|
|
12689
|
-
|
|
12690
|
-
|
|
12691
|
-
|
|
12692
|
-
|
|
12693
|
-
|
|
12694
|
-
|
|
12695
|
-
|
|
12696
|
-
|
|
12697
|
-
|
|
12698
|
-
|
|
12699
|
-
|
|
12691
|
+
*
|
|
12692
|
+
* Contains pass/fail status for each item based on execution policy,
|
|
12693
|
+
* as well as overall suite pass/fail status.
|
|
12694
|
+
*/
|
|
12695
|
+
declare class TestSuiteResult {
|
|
12696
|
+
readonly allItemsPassed: boolean;
|
|
12697
|
+
readonly itemsPassed: number;
|
|
12698
|
+
readonly itemsTotal: number;
|
|
12699
|
+
readonly passRate: number | undefined;
|
|
12700
|
+
readonly itemResults: Map<string, ItemResult>;
|
|
12701
|
+
readonly experimentId: string;
|
|
12702
|
+
readonly experimentName?: string;
|
|
12703
|
+
readonly experimentUrl?: string;
|
|
12704
|
+
readonly suiteName?: string;
|
|
12705
|
+
readonly totalTime?: number;
|
|
12706
|
+
constructor(data: {
|
|
12707
|
+
allItemsPassed: boolean;
|
|
12708
|
+
itemsPassed: number;
|
|
12709
|
+
itemsTotal: number;
|
|
12710
|
+
passRate: number | undefined;
|
|
12711
|
+
itemResults: Map<string, ItemResult>;
|
|
12712
|
+
experimentId: string;
|
|
12713
|
+
experimentName?: string;
|
|
12714
|
+
experimentUrl?: string;
|
|
12715
|
+
suiteName?: string;
|
|
12716
|
+
totalTime?: number;
|
|
12717
|
+
});
|
|
12718
|
+
/**
|
|
12719
|
+
* Convert the result to a structured report dictionary.
|
|
12720
|
+
*
|
|
12721
|
+
* The returned object mirrors the structure produced by the Python SDK's
|
|
12722
|
+
* `to_report_dict()` method (with camelCase keys per TypeScript conventions).
|
|
12723
|
+
*/
|
|
12724
|
+
toReportDict(): Record<string, unknown>;
|
|
12725
|
+
/** Alias for {@link toReportDict}. */
|
|
12726
|
+
toDict(): Record<string, unknown>;
|
|
12727
|
+
}
|
|
12700
12728
|
|
|
12701
12729
|
/**
|
|
12702
12730
|
* Builds a TestSuiteResult from an EvaluationResult and execution policies.
|
|
@@ -12711,7 +12739,10 @@ type TestSuiteResult = {
|
|
|
12711
12739
|
* - allItemsPassed = itemsPassed === itemsTotal
|
|
12712
12740
|
* - passRate = itemsPassed / itemsWithAssertions (undefined if none have assertions)
|
|
12713
12741
|
*/
|
|
12714
|
-
declare function buildSuiteResult(evalResult: EvaluationResult
|
|
12742
|
+
declare function buildSuiteResult(evalResult: EvaluationResult, options?: {
|
|
12743
|
+
suiteName?: string;
|
|
12744
|
+
totalTime?: number;
|
|
12745
|
+
}): TestSuiteResult;
|
|
12715
12746
|
|
|
12716
12747
|
interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
|
|
12717
12748
|
/** The dataset to evaluate against */
|
|
@@ -15948,4 +15979,4 @@ declare class ConfigMismatchError extends OpikError {
|
|
|
15948
15979
|
|
|
15949
15980
|
declare function activateRunner(): void;
|
|
15950
15981
|
|
|
15951
|
-
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem,
|
|
15982
|
+
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getTrackContext, logger, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
|
package/dist/index.d.ts
CHANGED
|
@@ -12681,22 +12681,50 @@ type ItemResult = {
|
|
|
12681
12681
|
hasAssertions: boolean;
|
|
12682
12682
|
runsPassed: number;
|
|
12683
12683
|
runsTotal: number;
|
|
12684
|
+
/** Configured runsPerItem from the execution policy. */
|
|
12685
|
+
configuredRunsPerItem: number;
|
|
12684
12686
|
passThreshold: number;
|
|
12685
12687
|
testResults: EvaluationTestResult[];
|
|
12686
12688
|
};
|
|
12687
12689
|
/**
|
|
12688
12690
|
* Result of a test suite run.
|
|
12689
|
-
|
|
12690
|
-
|
|
12691
|
-
|
|
12692
|
-
|
|
12693
|
-
|
|
12694
|
-
|
|
12695
|
-
|
|
12696
|
-
|
|
12697
|
-
|
|
12698
|
-
|
|
12699
|
-
|
|
12691
|
+
*
|
|
12692
|
+
* Contains pass/fail status for each item based on execution policy,
|
|
12693
|
+
* as well as overall suite pass/fail status.
|
|
12694
|
+
*/
|
|
12695
|
+
declare class TestSuiteResult {
|
|
12696
|
+
readonly allItemsPassed: boolean;
|
|
12697
|
+
readonly itemsPassed: number;
|
|
12698
|
+
readonly itemsTotal: number;
|
|
12699
|
+
readonly passRate: number | undefined;
|
|
12700
|
+
readonly itemResults: Map<string, ItemResult>;
|
|
12701
|
+
readonly experimentId: string;
|
|
12702
|
+
readonly experimentName?: string;
|
|
12703
|
+
readonly experimentUrl?: string;
|
|
12704
|
+
readonly suiteName?: string;
|
|
12705
|
+
readonly totalTime?: number;
|
|
12706
|
+
constructor(data: {
|
|
12707
|
+
allItemsPassed: boolean;
|
|
12708
|
+
itemsPassed: number;
|
|
12709
|
+
itemsTotal: number;
|
|
12710
|
+
passRate: number | undefined;
|
|
12711
|
+
itemResults: Map<string, ItemResult>;
|
|
12712
|
+
experimentId: string;
|
|
12713
|
+
experimentName?: string;
|
|
12714
|
+
experimentUrl?: string;
|
|
12715
|
+
suiteName?: string;
|
|
12716
|
+
totalTime?: number;
|
|
12717
|
+
});
|
|
12718
|
+
/**
|
|
12719
|
+
* Convert the result to a structured report dictionary.
|
|
12720
|
+
*
|
|
12721
|
+
* The returned object mirrors the structure produced by the Python SDK's
|
|
12722
|
+
* `to_report_dict()` method (with camelCase keys per TypeScript conventions).
|
|
12723
|
+
*/
|
|
12724
|
+
toReportDict(): Record<string, unknown>;
|
|
12725
|
+
/** Alias for {@link toReportDict}. */
|
|
12726
|
+
toDict(): Record<string, unknown>;
|
|
12727
|
+
}
|
|
12700
12728
|
|
|
12701
12729
|
/**
|
|
12702
12730
|
* Builds a TestSuiteResult from an EvaluationResult and execution policies.
|
|
@@ -12711,7 +12739,10 @@ type TestSuiteResult = {
|
|
|
12711
12739
|
* - allItemsPassed = itemsPassed === itemsTotal
|
|
12712
12740
|
* - passRate = itemsPassed / itemsWithAssertions (undefined if none have assertions)
|
|
12713
12741
|
*/
|
|
12714
|
-
declare function buildSuiteResult(evalResult: EvaluationResult
|
|
12742
|
+
declare function buildSuiteResult(evalResult: EvaluationResult, options?: {
|
|
12743
|
+
suiteName?: string;
|
|
12744
|
+
totalTime?: number;
|
|
12745
|
+
}): TestSuiteResult;
|
|
12715
12746
|
|
|
12716
12747
|
interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
|
|
12717
12748
|
/** The dataset to evaluate against */
|
|
@@ -15948,4 +15979,4 @@ declare class ConfigMismatchError extends OpikError {
|
|
|
15948
15979
|
|
|
15949
15980
|
declare function activateRunner(): void;
|
|
15950
15981
|
|
|
15951
|
-
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem,
|
|
15982
|
+
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getTrackContext, logger, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
|
package/dist/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export{
|
|
1
|
+
export{oa as AgentTaskCompletionJudge,na as AgentToolCorrectnessJudge,ba as AnswerRelevance,Z as BaseLLMJudgeMetric,y as BaseMetric,z as BaseSuiteEvaluator,m as ChatPrompt,qa as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,W as Contains,v as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,ia as DemographicBiasJudge,ga as DialogueHelpfulnessJudge,V as ExactMatch,ca as GEval,da as GEvalPreset,ka as GenderBiasJudge,aa as Hallucination,Y as IsJson,M as LLMJudge,D as ModelConfigurationError,B as ModelError,C as ModelGenerationError,_ as Moderation,va as Opik,A as OpikBaseModel,n as OpikQueryLanguage,d as OpikSpanType,ja as PoliticalBiasJudge,l as Prompt,k as PromptType,pa as PromptUncertaintyJudge,ha as QARelevanceJudge,X as RegexMatch,ma as RegionalBiasJudge,la as ReligiousBiasJudge,L as ResponseSchema,J as SYSTEM_PROMPT,fa as SummarizationCoherenceJudge,ea as SummarizationConsistencyJudge,ua as TestSuite,w as TestSuiteResult,p as ThreadsAnnotationQueue,o as TracesAnnotationQueue,K as USER_PROMPT_TEMPLATE,$ as Usefulness,F as VercelAIChatModel,s as activateRunner,q as agentConfigContext,x as buildSuiteResult,G as createModel,H as createModelFromInstance,O as deserializeEvaluators,E as detectProvider,c as disableLogger,T as evaluate,U as evaluatePrompt,R as evaluateTestSuite,r as flushAll,e as generateId,t as getTrackContext,a as logger,ra as resolveEvaluators,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,I as resolveModel,S as runTests,N as serializeEvaluators,b as setLoggerLevel,u as track,sa as validateEvaluators,ta as validateExecutionPolicy,wa as z}from'./chunk-2AOEXUQ4.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export{v as DEFAULT_EXECUTION_POLICY,ua as TestSuite,w as TestSuiteResult,x as buildSuiteResult,O as deserializeEvaluators,R as evaluateTestSuite,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,S as runTests,N as serializeEvaluators}from'./chunk-2AOEXUQ4.js';
|
package/package.json
CHANGED
package/dist/suite-6XFUV2Y7.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export{v as DEFAULT_EXECUTION_POLICY,ta as TestSuite,w as buildSuiteResult,N as deserializeEvaluators,Q as evaluateTestSuite,O as resolveExecutionPolicy,P as resolveItemExecutionPolicy,R as runTests,M as serializeEvaluators}from'./chunk-MHMIE52N.js';
|