opik 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -12503,9 +12503,10 @@ declare class Dataset<T extends DatasetItemData = DatasetItemData> {
12503
12503
  * Retrieve raw DatasetItem objects with full metadata (evaluators, executionPolicy) preserved.
12504
12504
  *
12505
12505
  * @param nbSamples The number of samples to retrieve. If not set - all items are returned
12506
+ * @param lastRetrievedId Optional ID of the last retrieved item for pagination
12506
12507
  * @returns A list of DatasetItem objects
12507
12508
  */
12508
- getRawItems(nbSamples?: number): Promise<DatasetItem<T>[]>;
12509
+ getRawItems(nbSamples?: number, lastRetrievedId?: string): Promise<DatasetItem<T>[]>;
12509
12510
  /**
12510
12511
  * Insert items from a JSON string array into the dataset.
12511
12512
  *
@@ -12763,6 +12764,10 @@ interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
12763
12764
  client?: OpikClient;
12764
12765
  /** Optional list of tags to associate with the experiment */
12765
12766
  tags?: string[];
12767
+ /** Number of concurrent task executions (default: 16, matching Python SDK) */
12768
+ taskThreads?: number;
12769
+ /** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
12770
+ nbSamples?: number;
12766
12771
  }
12767
12772
  /**
12768
12773
  * Run a test suite using evaluators and execution policy stored in the dataset version metadata.
@@ -12783,7 +12788,6 @@ interface CreateTestSuiteOptions {
12783
12788
  interface UpdateTestSuiteOptions {
12784
12789
  globalAssertions?: string[];
12785
12790
  globalExecutionPolicy?: ExecutionPolicy;
12786
- tags?: string[];
12787
12791
  }
12788
12792
  declare class TestSuite {
12789
12793
  private readonly _dataset;
@@ -12820,7 +12824,7 @@ declare class TestSuite {
12820
12824
  * @throws Error if any item is missing an `id`
12821
12825
  */
12822
12826
  update(items: UpdateTestSuiteItem[]): Promise<void>;
12823
- getItems(): Promise<Array<{
12827
+ getItems(nbSamples?: number, lastRetrievedId?: string): Promise<Array<{
12824
12828
  id: string;
12825
12829
  data: Record<string, unknown>;
12826
12830
  description?: string;
@@ -12889,6 +12893,10 @@ interface RunTestsOptions {
12889
12893
  experimentTags?: string[];
12890
12894
  /** Optional model name override for LLMJudge evaluators */
12891
12895
  model?: string;
12896
+ /** Number of concurrent task executions (default: 16, matching Python SDK) */
12897
+ taskThreads?: number;
12898
+ /** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
12899
+ nbSamples?: number;
12892
12900
  }
12893
12901
  /**
12894
12902
  * Run a test suite evaluation against a task function.
@@ -14118,6 +14126,8 @@ interface EvaluateOptions<T = Record<string, unknown>> {
14118
14126
  scoringKeyMapping?: ScoringKeyMappingType;
14119
14127
  /** Optional list of tags to associate with the experiment */
14120
14128
  tags?: string[];
14129
+ /** Number of concurrent task executions (default: 16, matching Python SDK) */
14130
+ taskThreads?: number;
14121
14131
  /** Optional agent configuration blueprint ID to link with the experiment */
14122
14132
  blueprintId?: string;
14123
14133
  }
package/dist/index.d.ts CHANGED
@@ -12503,9 +12503,10 @@ declare class Dataset<T extends DatasetItemData = DatasetItemData> {
12503
12503
  * Retrieve raw DatasetItem objects with full metadata (evaluators, executionPolicy) preserved.
12504
12504
  *
12505
12505
  * @param nbSamples The number of samples to retrieve. If not set - all items are returned
12506
+ * @param lastRetrievedId Optional ID of the last retrieved item for pagination
12506
12507
  * @returns A list of DatasetItem objects
12507
12508
  */
12508
- getRawItems(nbSamples?: number): Promise<DatasetItem<T>[]>;
12509
+ getRawItems(nbSamples?: number, lastRetrievedId?: string): Promise<DatasetItem<T>[]>;
12509
12510
  /**
12510
12511
  * Insert items from a JSON string array into the dataset.
12511
12512
  *
@@ -12763,6 +12764,10 @@ interface EvaluateTestSuiteOptions<T = Record<string, unknown>> {
12763
12764
  client?: OpikClient;
12764
12765
  /** Optional list of tags to associate with the experiment */
12765
12766
  tags?: string[];
12767
+ /** Number of concurrent task executions (default: 16, matching Python SDK) */
12768
+ taskThreads?: number;
12769
+ /** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
12770
+ nbSamples?: number;
12766
12771
  }
12767
12772
  /**
12768
12773
  * Run a test suite using evaluators and execution policy stored in the dataset version metadata.
@@ -12783,7 +12788,6 @@ interface CreateTestSuiteOptions {
12783
12788
  interface UpdateTestSuiteOptions {
12784
12789
  globalAssertions?: string[];
12785
12790
  globalExecutionPolicy?: ExecutionPolicy;
12786
- tags?: string[];
12787
12791
  }
12788
12792
  declare class TestSuite {
12789
12793
  private readonly _dataset;
@@ -12820,7 +12824,7 @@ declare class TestSuite {
12820
12824
  * @throws Error if any item is missing an `id`
12821
12825
  */
12822
12826
  update(items: UpdateTestSuiteItem[]): Promise<void>;
12823
- getItems(): Promise<Array<{
12827
+ getItems(nbSamples?: number, lastRetrievedId?: string): Promise<Array<{
12824
12828
  id: string;
12825
12829
  data: Record<string, unknown>;
12826
12830
  description?: string;
@@ -12889,6 +12893,10 @@ interface RunTestsOptions {
12889
12893
  experimentTags?: string[];
12890
12894
  /** Optional model name override for LLMJudge evaluators */
12891
12895
  model?: string;
12896
+ /** Number of concurrent task executions (default: 16, matching Python SDK) */
12897
+ taskThreads?: number;
12898
+ /** Limit the number of dataset items to evaluate. If not set, all items are evaluated. */
12899
+ nbSamples?: number;
12892
12900
  }
12893
12901
  /**
12894
12902
  * Run a test suite evaluation against a task function.
@@ -14118,6 +14126,8 @@ interface EvaluateOptions<T = Record<string, unknown>> {
14118
14126
  scoringKeyMapping?: ScoringKeyMappingType;
14119
14127
  /** Optional list of tags to associate with the experiment */
14120
14128
  tags?: string[];
14129
+ /** Number of concurrent task executions (default: 16, matching Python SDK) */
14130
+ taskThreads?: number;
14121
14131
  /** Optional agent configuration blueprint ID to link with the experiment */
14122
14132
  blueprintId?: string;
14123
14133
  }
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- export{oa as AgentTaskCompletionJudge,na as AgentToolCorrectnessJudge,ba as AnswerRelevance,Z as BaseLLMJudgeMetric,y as BaseMetric,z as BaseSuiteEvaluator,m as ChatPrompt,qa as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,W as Contains,v as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,ia as DemographicBiasJudge,ga as DialogueHelpfulnessJudge,V as ExactMatch,ca as GEval,da as GEvalPreset,ka as GenderBiasJudge,aa as Hallucination,Y as IsJson,M as LLMJudge,D as ModelConfigurationError,B as ModelError,C as ModelGenerationError,_ as Moderation,va as Opik,A as OpikBaseModel,n as OpikQueryLanguage,d as OpikSpanType,ja as PoliticalBiasJudge,l as Prompt,k as PromptType,pa as PromptUncertaintyJudge,ha as QARelevanceJudge,X as RegexMatch,ma as RegionalBiasJudge,la as ReligiousBiasJudge,L as ResponseSchema,J as SYSTEM_PROMPT,fa as SummarizationCoherenceJudge,ea as SummarizationConsistencyJudge,ua as TestSuite,w as TestSuiteResult,p as ThreadsAnnotationQueue,o as TracesAnnotationQueue,K as USER_PROMPT_TEMPLATE,$ as Usefulness,F as VercelAIChatModel,s as activateRunner,q as agentConfigContext,x as buildSuiteResult,G as createModel,H as createModelFromInstance,O as deserializeEvaluators,E as detectProvider,c as disableLogger,T as evaluate,U as evaluatePrompt,R as evaluateTestSuite,r as flushAll,e as generateId,t as getTrackContext,a as logger,ra as resolveEvaluators,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,I as resolveModel,S as runTests,N as serializeEvaluators,b as setLoggerLevel,u as track,sa as validateEvaluators,ta as validateExecutionPolicy,wa as z}from'./chunk-2AOEXUQ4.js';
1
+ export{oa as AgentTaskCompletionJudge,na as AgentToolCorrectnessJudge,ba as AnswerRelevance,Z as BaseLLMJudgeMetric,y as BaseMetric,z as BaseSuiteEvaluator,m as ChatPrompt,qa as ComplianceRiskJudge,i as ConfigMismatchError,h as ConfigNotFoundError,W as Contains,v as DEFAULT_EXECUTION_POLICY,j as Dataset,f as DatasetVersion,g as DatasetVersionNotFoundError,ia as DemographicBiasJudge,ga as DialogueHelpfulnessJudge,V as ExactMatch,ca as GEval,da as GEvalPreset,ka as GenderBiasJudge,aa as Hallucination,Y as IsJson,M as LLMJudge,D as ModelConfigurationError,B as ModelError,C as ModelGenerationError,_ as Moderation,va as Opik,A as OpikBaseModel,n as OpikQueryLanguage,d as OpikSpanType,ja as PoliticalBiasJudge,l as Prompt,k as PromptType,pa as PromptUncertaintyJudge,ha as QARelevanceJudge,X as RegexMatch,ma as RegionalBiasJudge,la as ReligiousBiasJudge,L as ResponseSchema,J as SYSTEM_PROMPT,fa as SummarizationCoherenceJudge,ea as SummarizationConsistencyJudge,ua as TestSuite,w as TestSuiteResult,p as ThreadsAnnotationQueue,o as TracesAnnotationQueue,K as USER_PROMPT_TEMPLATE,$ as Usefulness,F as VercelAIChatModel,s as activateRunner,q as agentConfigContext,x as buildSuiteResult,G as createModel,H as createModelFromInstance,O as deserializeEvaluators,E as detectProvider,c as disableLogger,T as evaluate,U as evaluatePrompt,R as evaluateTestSuite,r as flushAll,e as generateId,t as getTrackContext,a as logger,ra as resolveEvaluators,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,I as resolveModel,S as runTests,N as serializeEvaluators,b as setLoggerLevel,u as track,sa as validateEvaluators,ta as validateExecutionPolicy,wa as z}from'./chunk-HRNPUK4B.js';
@@ -1 +1 @@
1
- export{v as DEFAULT_EXECUTION_POLICY,ua as TestSuite,w as TestSuiteResult,x as buildSuiteResult,O as deserializeEvaluators,R as evaluateTestSuite,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,S as runTests,N as serializeEvaluators}from'./chunk-2AOEXUQ4.js';
1
+ export{v as DEFAULT_EXECUTION_POLICY,ua as TestSuite,w as TestSuiteResult,x as buildSuiteResult,O as deserializeEvaluators,R as evaluateTestSuite,P as resolveExecutionPolicy,Q as resolveItemExecutionPolicy,S as runTests,N as serializeEvaluators}from'./chunk-HRNPUK4B.js';
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "opik",
3
3
  "description": "Opik TypeScript and JavaScript SDK",
4
- "version": "2.0.0",
4
+ "version": "2.0.1",
5
5
  "repository": {
6
6
  "type": "git",
7
7
  "url": "git+https://github.com/comet-ml/opik.git",