opik 1.11.14 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MHMIE52N.js → chunk-HRNPUK4B.js} +35 -35
- package/dist/index.cjs +36 -36
- package/dist/index.d.cts +57 -16
- package/dist/index.d.ts +57 -16
- package/dist/index.js +1 -1
- package/dist/suite-RWWP3MWS.js +1 -0
- package/package.json +1 -1
- package/dist/suite-6XFUV2Y7.js +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -12503,9 +12503,10 @@ declare class Dataset<T extends DatasetItemData = DatasetItemData> {
|
|
|
12503
12503
|
* Retrieve raw DatasetItem objects with full metadata (evaluators, executionPolicy) preserved.
|
|
12504
12504
|
*
|
|
12505
12505
|
* @param nbSamples The number of samples to retrieve. If not set - all items are returned
|
|
12506
|
+
* @param lastRetrievedId Optional ID of the last retrieved item for pagination
|
|
12506
12507
|
* @returns A list of DatasetItem objects
|
|
12507
12508
|
*/
|
|
12508
|
-
getRawItems(nbSamples?: number): Promise<DatasetItem<T>[]>;
|
|
12509
|
+
getRawItems(nbSamples?: number, lastRetrievedId?: string): Promise<DatasetItem<T>[]>;
|
|
12509
12510
|
/**
|
|
12510
12511
|
* Insert items from a JSON string array into the dataset.
|
|
12511
12512
|
*
|
|
@@ -12681,22 +12682,50 @@ type ItemResult = {
|
|
|
12681
12682
|
hasAssertions: boolean;
|
|
12682
12683
|
runsPassed: number;
|
|
12683
12684
|
runsTotal: number;
|
|
12685
|
+
/** Configured runsPerItem from the execution policy. */
|
|
12686
|
+
configuredRunsPerItem: number;
|
|
12684
12687
|
passThreshold: number;
|
|
12685
12688
|
testResults: EvaluationTestResult[];
|
|
12686
12689
|
};
|
|
12687
12690
|
/**
|
|
12688
12691
|
* Result of a test suite run.
|
|
12689
|
-
|
|
12690
|
-
|
|
12691
|
-
|
|
12692
|
-
|
|
12693
|
-
|
|
12694
|
-
|
|
12695
|
-
|
|
12696
|
-
|
|
12697
|
-
|
|
12698
|
-
|
|
12699
|
-
|
|
12692
|
+
*
|
|
12693
|
+
* Contains pass/fail status for each item based on execution policy,
|
|
12694
|
+
* as well as overall suite pass/fail status.
|
|
12695
|
+
*/
|
|
12696
|
+
declare class TestSuiteResult {
|
|
12697
|
+
readonly allItemsPassed: boolean;
|
|
12698
|
+
readonly itemsPassed: number;
|
|
12699
|
+
readonly itemsTotal: number;
|
|
12700
|
+
readonly passRate: number | undefined;
|
|
12701
|
+
readonly itemResults: Map<string, ItemResult>;
|
|
12702
|
+
readonly experimentId: string;
|
|
12703
|
+
readonly experimentName?: string;
|
|
12704
|
+
readonly experimentUrl?: string;
|
|
12705
|
+
readonly suiteName?: string;
|
|
12706
|
+
readonly totalTime?: number;
|
|
12707
|
+
constructor(data: {
|
|
12708
|
+
allItemsPassed: boolean;
|
|
12709
|
+
itemsPassed: number;
|
|
12710
|
+
itemsTotal: number;
|
|
12711
|
+
passRate: number | undefined;
|
|
12712
|
+
itemResults: Map<string, ItemResult>;
|
|
12713
|
+
experimentId: string;
|
|
12714
|
+
experimentName?: string;
|
|
12715
|
+
experimentUrl?: string;
|
|
12716
|
+
suiteName?: string;
|
|
12717
|
+
totalTime?: number;
|
|
12718
|
+
});
|
|
12719
|
+
/**
|
|
12720
|
+
* Convert the result to a structured report dictionary.
|
|
12721
|
+
*
|
|
12722
|
+
* The returned object mirrors the structure produced by the Python SDK's
|
|
12723
|
+
* `to_report_dict()` method (with camelCase keys per TypeScript conventions).
|
|
12724
|
+
*/
|
|
12725
|
+
toReportDict(): Record<string, unknown>;
|
|
12726
|
+
/** Alias for {@link toReportDict}. */
|
|
12727
|
+
toDict(): Record<string, unknown>;
|
|
12728
|
+
}
|
|
12700
12729
|
|
|
12701
12730
|
/**
|
|
12702
12731
|
* Builds a TestSuiteResult from an EvaluationResult and execution policies.
|
|
@@ -12711,7 +12740,10 @@ type TestSuiteResult = {
|
|
|
12711
12740
|
* - allItemsPassed = itemsPassed === itemsTotal
|
|
12712
12741
|
* - passRate = itemsPassed / itemsWithAssertions (undefined if none have assertions)
|
|
12713
12742
|
*/
|
|
12714
|
-
declare function buildSuiteResult(evalResult: EvaluationResult
|
|
12743
|
+
declare function buildSuiteResult(evalResult: EvaluationResult, options?: {
|
|
12744
|
+
suiteName?: string;
|
|
12745
|
+
totalTime?: number;
|
|
12746
|
+
}): TestSuiteResult;
|
|
12715
12747
|
|
|
12716
12748
|
interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
|
|
12717
12749
|
/** The dataset to evaluate against */
|
|
@@ -12732,6 +12764,10 @@ interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
|
|
|
12732
12764
|
client?: OpikClient;
|
|
12733
12765
|
/** Optional list of tags to associate with the experiment */
|
|
12734
12766
|
tags?: string[];
|
|
12767
|
+
/** Number of concurrent task executions (default: 16, matching Python SDK) */
|
|
12768
|
+
taskThreads?: number;
|
|
12769
|
+
/** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
|
|
12770
|
+
nbSamples?: number;
|
|
12735
12771
|
}
|
|
12736
12772
|
/**
|
|
12737
12773
|
* Run a test suite using evaluators and execution policy stored in the dataset version metadata.
|
|
@@ -12752,7 +12788,6 @@ interface CreateTestSuiteOptions {
|
|
|
12752
12788
|
interface UpdateTestSuiteOptions {
|
|
12753
12789
|
globalAssertions?: string[];
|
|
12754
12790
|
globalExecutionPolicy?: ExecutionPolicy;
|
|
12755
|
-
tags?: string[];
|
|
12756
12791
|
}
|
|
12757
12792
|
declare class TestSuite {
|
|
12758
12793
|
private readonly _dataset;
|
|
@@ -12789,7 +12824,7 @@ declare class TestSuite {
|
|
|
12789
12824
|
* @throws Error if any item is missing an `id`
|
|
12790
12825
|
*/
|
|
12791
12826
|
update(items: UpdateTestSuiteItem[]): Promise<void>;
|
|
12792
|
-
getItems(): Promise<Array<{
|
|
12827
|
+
getItems(nbSamples?: number, lastRetrievedId?: string): Promise<Array<{
|
|
12793
12828
|
id: string;
|
|
12794
12829
|
data: Record<string, unknown>;
|
|
12795
12830
|
description?: string;
|
|
@@ -12858,6 +12893,10 @@ interface RunTestsOptions {
|
|
|
12858
12893
|
experimentTags?: string[];
|
|
12859
12894
|
/** Optional model name override for LLMJudge evaluators */
|
|
12860
12895
|
model?: string;
|
|
12896
|
+
/** Number of concurrent task executions (default: 16, matching Python SDK) */
|
|
12897
|
+
taskThreads?: number;
|
|
12898
|
+
/** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
|
|
12899
|
+
nbSamples?: number;
|
|
12861
12900
|
}
|
|
12862
12901
|
/**
|
|
12863
12902
|
* Run a test suite evaluation against a task function.
|
|
@@ -14087,6 +14126,8 @@ interface EvaluateOptions<T = Record<string, unknown>> {
|
|
|
14087
14126
|
scoringKeyMapping?: ScoringKeyMappingType;
|
|
14088
14127
|
/** Optional list of tags to associate with the experiment */
|
|
14089
14128
|
tags?: string[];
|
|
14129
|
+
/** Number of concurrent task executions (default: 16, matching Python SDK) */
|
|
14130
|
+
taskThreads?: number;
|
|
14090
14131
|
/** Optional agent configuration blueprint ID to link with the experiment */
|
|
14091
14132
|
blueprintId?: string;
|
|
14092
14133
|
}
|
|
@@ -15948,4 +15989,4 @@ declare class ConfigMismatchError extends OpikError {
|
|
|
15948
15989
|
|
|
15949
15990
|
declare function activateRunner(): void;
|
|
15950
15991
|
|
|
15951
|
-
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem,
|
|
15992
|
+
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getTrackContext, logger, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
|
package/dist/index.d.ts
CHANGED
|
@@ -12503,9 +12503,10 @@ declare class Dataset<T extends DatasetItemData = DatasetItemData> {
|
|
|
12503
12503
|
* Retrieve raw DatasetItem objects with full metadata (evaluators, executionPolicy) preserved.
|
|
12504
12504
|
*
|
|
12505
12505
|
* @param nbSamples The number of samples to retrieve. If not set - all items are returned
|
|
12506
|
+
* @param lastRetrievedId Optional ID of the last retrieved item for pagination
|
|
12506
12507
|
* @returns A list of DatasetItem objects
|
|
12507
12508
|
*/
|
|
12508
|
-
getRawItems(nbSamples?: number): Promise<DatasetItem<T>[]>;
|
|
12509
|
+
getRawItems(nbSamples?: number, lastRetrievedId?: string): Promise<DatasetItem<T>[]>;
|
|
12509
12510
|
/**
|
|
12510
12511
|
* Insert items from a JSON string array into the dataset.
|
|
12511
12512
|
*
|
|
@@ -12681,22 +12682,50 @@ type ItemResult = {
|
|
|
12681
12682
|
hasAssertions: boolean;
|
|
12682
12683
|
runsPassed: number;
|
|
12683
12684
|
runsTotal: number;
|
|
12685
|
+
/** Configured runsPerItem from the execution policy. */
|
|
12686
|
+
configuredRunsPerItem: number;
|
|
12684
12687
|
passThreshold: number;
|
|
12685
12688
|
testResults: EvaluationTestResult[];
|
|
12686
12689
|
};
|
|
12687
12690
|
/**
|
|
12688
12691
|
* Result of a test suite run.
|
|
12689
|
-
|
|
12690
|
-
|
|
12691
|
-
|
|
12692
|
-
|
|
12693
|
-
|
|
12694
|
-
|
|
12695
|
-
|
|
12696
|
-
|
|
12697
|
-
|
|
12698
|
-
|
|
12699
|
-
|
|
12692
|
+
*
|
|
12693
|
+
* Contains pass/fail status for each item based on execution policy,
|
|
12694
|
+
* as well as overall suite pass/fail status.
|
|
12695
|
+
*/
|
|
12696
|
+
declare class TestSuiteResult {
|
|
12697
|
+
readonly allItemsPassed: boolean;
|
|
12698
|
+
readonly itemsPassed: number;
|
|
12699
|
+
readonly itemsTotal: number;
|
|
12700
|
+
readonly passRate: number | undefined;
|
|
12701
|
+
readonly itemResults: Map<string, ItemResult>;
|
|
12702
|
+
readonly experimentId: string;
|
|
12703
|
+
readonly experimentName?: string;
|
|
12704
|
+
readonly experimentUrl?: string;
|
|
12705
|
+
readonly suiteName?: string;
|
|
12706
|
+
readonly totalTime?: number;
|
|
12707
|
+
constructor(data: {
|
|
12708
|
+
allItemsPassed: boolean;
|
|
12709
|
+
itemsPassed: number;
|
|
12710
|
+
itemsTotal: number;
|
|
12711
|
+
passRate: number | undefined;
|
|
12712
|
+
itemResults: Map<string, ItemResult>;
|
|
12713
|
+
experimentId: string;
|
|
12714
|
+
experimentName?: string;
|
|
12715
|
+
experimentUrl?: string;
|
|
12716
|
+
suiteName?: string;
|
|
12717
|
+
totalTime?: number;
|
|
12718
|
+
});
|
|
12719
|
+
/**
|
|
12720
|
+
* Convert the result to a structured report dictionary.
|
|
12721
|
+
*
|
|
12722
|
+
* The returned object mirrors the structure produced by the Python SDK's
|
|
12723
|
+
* `to_report_dict()` method (with camelCase keys per TypeScript conventions).
|
|
12724
|
+
*/
|
|
12725
|
+
toReportDict(): Record<string, unknown>;
|
|
12726
|
+
/** Alias for {@link toReportDict}. */
|
|
12727
|
+
toDict(): Record<string, unknown>;
|
|
12728
|
+
}
|
|
12700
12729
|
|
|
12701
12730
|
/**
|
|
12702
12731
|
* Builds a TestSuiteResult from an EvaluationResult and execution policies.
|
|
@@ -12711,7 +12740,10 @@ type TestSuiteResult = {
|
|
|
12711
12740
|
* - allItemsPassed = itemsPassed === itemsTotal
|
|
12712
12741
|
* - passRate = itemsPassed / itemsWithAssertions (undefined if none have assertions)
|
|
12713
12742
|
*/
|
|
12714
|
-
declare function buildSuiteResult(evalResult: EvaluationResult
|
|
12743
|
+
declare function buildSuiteResult(evalResult: EvaluationResult, options?: {
|
|
12744
|
+
suiteName?: string;
|
|
12745
|
+
totalTime?: number;
|
|
12746
|
+
}): TestSuiteResult;
|
|
12715
12747
|
|
|
12716
12748
|
interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
|
|
12717
12749
|
/** The dataset to evaluate against */
|
|
@@ -12732,6 +12764,10 @@ interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
|
|
|
12732
12764
|
client?: OpikClient;
|
|
12733
12765
|
/** Optional list of tags to associate with the experiment */
|
|
12734
12766
|
tags?: string[];
|
|
12767
|
+
/** Number of concurrent task executions (default: 16, matching Python SDK) */
|
|
12768
|
+
taskThreads?: number;
|
|
12769
|
+
/** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
|
|
12770
|
+
nbSamples?: number;
|
|
12735
12771
|
}
|
|
12736
12772
|
/**
|
|
12737
12773
|
* Run a test suite using evaluators and execution policy stored in the dataset version metadata.
|
|
@@ -12752,7 +12788,6 @@ interface CreateTestSuiteOptions {
|
|
|
12752
12788
|
interface UpdateTestSuiteOptions {
|
|
12753
12789
|
globalAssertions?: string[];
|
|
12754
12790
|
globalExecutionPolicy?: ExecutionPolicy;
|
|
12755
|
-
tags?: string[];
|
|
12756
12791
|
}
|
|
12757
12792
|
declare class TestSuite {
|
|
12758
12793
|
private readonly _dataset;
|
|
@@ -12789,7 +12824,7 @@ declare class TestSuite {
|
|
|
12789
12824
|
* @throws Error if any item is missing an `id`
|
|
12790
12825
|
*/
|
|
12791
12826
|
update(items: UpdateTestSuiteItem[]): Promise<void>;
|
|
12792
|
-
getItems(): Promise<Array<{
|
|
12827
|
+
getItems(nbSamples?: number, lastRetrievedId?: string): Promise<Array<{
|
|
12793
12828
|
id: string;
|
|
12794
12829
|
data: Record<string, unknown>;
|
|
12795
12830
|
description?: string;
|
|
@@ -12858,6 +12893,10 @@ interface RunTestsOptions {
|
|
|
12858
12893
|
experimentTags?: string[];
|
|
12859
12894
|
/** Optional model name override for LLMJudge evaluators */
|
|
12860
12895
|
model?: string;
|
|
12896
|
+
/** Number of concurrent task executions (default: 16, matching Python SDK) */
|
|
12897
|
+
taskThreads?: number;
|
|
12898
|
+
/** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
|
|
12899
|
+
nbSamples?: number;
|
|
12861
12900
|
}
|
|
12862
12901
|
/**
|
|
12863
12902
|
* Run a test suite evaluation against a task function.
|
|
@@ -14087,6 +14126,8 @@ interface EvaluateOptions<T = Record<string, unknown>> {
|
|
|
14087
14126
|
scoringKeyMapping?: ScoringKeyMappingType;
|
|
14088
14127
|
/** Optional list of tags to associate with the experiment */
|
|
14089
14128
|
tags?: string[];
|
|
14129
|
+
/** Number of concurrent task executions (default: 16, matching Python SDK) */
|
|
14130
|
+
taskThreads?: number;
|
|
14090
14131
|
/** Optional agent configuration blueprint ID to link with the experiment */
|
|
14091
14132
|
blueprintId?: string;
|
|
14092
14133
|
}
|
|
@@ -15948,4 +15989,4 @@ declare class ConfigMismatchError extends OpikError {
|
|
|
15948
15989
|
|
|
15949
15990
|
declare function activateRunner(): void;
|
|
15950
15991
|
|
|
15951
|
-
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem,
|
|
15992
|
+
export { AgentTaskCompletionJudge, AgentToolCorrectnessJudge, type AllProviderOptions, AnnotationQueuePublicScope as AnnotationQueueScope, AnswerRelevance, type AnthropicProviderOptions, BaseLLMJudgeMetric, BaseMetric, BaseSuiteEvaluator, ChatPrompt, ComplianceRiskJudge, type Config, ConfigMismatchError, ConfigNotFoundError, Contains, type CreateTestSuiteOptions, DEFAULT_EXECUTION_POLICY, Dataset, type DatasetPublic, DatasetVersion, DatasetVersionNotFoundError, type DatasetVersionPublic, DemographicBiasJudge, DialogueHelpfulnessJudge, type ErrorInfo, type EvaluateOptions, type EvaluatePromptOptions, type EvaluateTestSuiteOptions, type EvaluationError, type EvaluationResult, type EvaluationScoreResult, type EvaluationTask, type EvaluationTestCase, type EvaluationTestResult, ExactMatch, type ExecutionPolicy, type FeedbackScoreData, type FewShotExampleAnswerRelevanceNoContext, type FewShotExampleAnswerRelevanceWithContext, type FewShotExampleHallucination, type FewShotExampleModeration, type FilterExpression, GEval, GEvalPreset, GenderBiasJudge, type GoogleProviderOptions, Hallucination, IsJson, type ItemResult, LLMJudge, type LLMJudgeConfig, type LLMJudgeModelSettings, type LLMJudgeOptions, type LLMJudgeResponseFormat, ModelConfigurationError, ModelError, ModelGenerationError, Moderation, type OpenAIProviderOptions, OpikClient as Opik, type OpikAssistantMessage, OpikBaseModel, type OpikConfig, type OpikMessage, OpikQueryLanguage, SpanType as OpikSpanType, type OpikSystemMessage, type OpikToolMessage, type OpikUserMessage, type Param, PoliticalBiasJudge, Prompt, PromptType, PromptUncertaintyJudge, type ProviderOptionsForModel, QARelevanceJudge, RegexMatch, RegionalBiasJudge, type RegistryEntry, ReligiousBiasJudge, ResponseSchema, type RunTestsOptions, SYSTEM_PROMPT, type ScoringKeyMappingType, Span, SpanType, SummarizationCoherenceJudge, SummarizationConsistencyJudge, type SupportedModelId, TestSuite, type TestSuiteItem, TestSuiteResult, ThreadsAnnotationQueue, Trace, TracesAnnotationQueue, USER_PROMPT_TEMPLATE, type UpdateTestSuiteItem, type UpdateTestSuiteOptions, Usefulness, VercelAIChatModel, activateRunner, agentConfigContext, buildSuiteResult, createModel, createModelFromInstance, deserializeEvaluators, detectProvider, disableLogger, evaluate, evaluatePrompt, evaluateTestSuite, flushAll, generateId, getTrackContext, logger, resolveEvaluators, resolveExecutionPolicy, resolveItemExecutionPolicy, resolveModel, runTests, serializeEvaluators, setLoggerLevel, track, validateEvaluators, validateExecutionPolicy };
|
package/dist/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export{
|
|
1
|
+
export{oa as AgentTaskCompletionJudge,na as AgentToolCorrectnessJudge,ba as AnswerRelevance,Z as BaseLLMJudgeMetric,y as BaseMetric,z as BaseSuiteEvaluator,m as ChatPrompt,qa as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,W as Contains,v as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,ia as DemographicBiasJudge,ga as DialogueHelpfulnessJudge,V as ExactMatch,ca as GEval,da as GEvalPreset,ka as GenderBiasJudge,aa as Hallucination,Y as IsJson,M as LLMJudge,D as ModelConfigurationError,B as ModelError,C as ModelGenerationError,_ as Moderation,va as Opik,A as OpikBaseModel,n as OpikQueryLanguage,d as OpikSpanType,ja as PoliticalBiasJudge,l as Prompt,k as PromptType,pa as PromptUncertaintyJudge,ha as QARelevanceJudge,X as RegexMatch,ma as RegionalBiasJudge,la as ReligiousBiasJudge,L as ResponseSchema,J as SYSTEM_PROMPT,fa as SummarizationCoherenceJudge,ea as SummarizationConsistencyJudge,ua as TestSuite,w as TestSuiteResult,p as ThreadsAnnotationQueue,o as TracesAnnotationQueue,K as USER_PROMPT_TEMPLATE,$ as Usefulness,F as VercelAIChatModel,s as activateRunner,q as agentConfigContext,x as buildSuiteResult,G as createModel,H as createModelFromInstance,O as deserializeEvaluators,E as detectProvider,c as disableLogger,T as evaluate,U as evaluatePrompt,R as evaluateTestSuite,r as flushAll,e as generateId,t as getTrackContext,a as logger,ra as resolveEvaluators,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,I as resolveModel,S as runTests,N as serializeEvaluators,b as setLoggerLevel,u as track,sa as validateEvaluators,ta as validateExecutionPolicy,wa as z}from'./chunk-HRNPUK4B.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export{v as DEFAULT_EXECUTION_POLICY,ua as TestSuite,w as TestSuiteResult,x as buildSuiteResult,O as deserializeEvaluators,R as evaluateTestSuite,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,S as runTests,N as serializeEvaluators}from'./chunk-HRNPUK4B.js';
|
package/package.json
CHANGED
package/dist/suite-6XFUV2Y7.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export{v as DEFAULT_EXECUTION_POLICY,ta as TestSuite,w as buildSuiteResult,N as deserializeEvaluators,Q as evaluateTestSuite,O as resolveExecutionPolicy,P as resolveItemExecutionPolicy,R as runTests,M as serializeEvaluators}from'./chunk-MHMIE52N.js';
|