@assay-ai/core 0.2.0-beta → 0.3.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +576 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +69 -1
- package/dist/index.d.ts +69 -1
- package/dist/index.js +568 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -19,6 +19,7 @@ interface LLMTestCase {
|
|
|
19
19
|
completionTime?: number;
|
|
20
20
|
name?: string;
|
|
21
21
|
tags?: string[];
|
|
22
|
+
conversation?: ConversationalTestCase;
|
|
22
23
|
}
|
|
23
24
|
interface ConversationalTestCase {
|
|
24
25
|
turns: Array<{
|
|
@@ -238,6 +239,49 @@ declare class JsonCorrectnessMetric extends BaseMetric {
|
|
|
238
239
|
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
239
240
|
}
|
|
240
241
|
|
|
242
|
+
interface ToolCorrectnessConfig extends MetricConfig {
|
|
243
|
+
/** Whether to also compare tool input parameters (default: false) */
|
|
244
|
+
matchParameters?: boolean;
|
|
245
|
+
}
|
|
246
|
+
declare class ToolCorrectnessMetric extends BaseMetric {
|
|
247
|
+
readonly name = "Tool Correctness";
|
|
248
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
249
|
+
readonly requiresProvider = false;
|
|
250
|
+
private readonly matchParameters;
|
|
251
|
+
constructor(config?: ToolCorrectnessConfig);
|
|
252
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
declare class TaskCompletionMetric extends BaseMetric {
|
|
256
|
+
readonly name = "Task Completion";
|
|
257
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
258
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
declare class GoalAccuracyMetric extends BaseMetric {
|
|
262
|
+
readonly name = "Goal Accuracy";
|
|
263
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
264
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
declare class ConversationCompletenessMetric extends BaseMetric {
|
|
268
|
+
readonly name = "Conversation Completeness";
|
|
269
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
270
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
declare class KnowledgeRetentionMetric extends BaseMetric {
|
|
274
|
+
readonly name = "Knowledge Retention";
|
|
275
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
276
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
declare class RoleAdherenceMetric extends BaseMetric {
|
|
280
|
+
readonly name = "Role Adherence";
|
|
281
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
282
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
283
|
+
}
|
|
284
|
+
|
|
241
285
|
declare class OpenAIProvider extends BaseLLMProvider {
|
|
242
286
|
private client;
|
|
243
287
|
constructor(config?: ProviderConfig);
|
|
@@ -259,6 +303,30 @@ declare class OllamaProvider extends BaseLLMProvider {
|
|
|
259
303
|
generate(prompt: string): Promise<string>;
|
|
260
304
|
}
|
|
261
305
|
|
|
306
|
+
declare class GeminiProvider extends BaseLLMProvider {
|
|
307
|
+
private readonly apiKey;
|
|
308
|
+
constructor(config?: ProviderConfig);
|
|
309
|
+
get providerName(): string;
|
|
310
|
+
generate(prompt: string): Promise<string>;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
interface AzureOpenAIConfig extends ProviderConfig {
|
|
314
|
+
endpoint?: string;
|
|
315
|
+
deploymentName?: string;
|
|
316
|
+
apiVersion?: string;
|
|
317
|
+
}
|
|
318
|
+
declare class AzureOpenAIProvider extends BaseLLMProvider {
|
|
319
|
+
private readonly azureConfig;
|
|
320
|
+
private client;
|
|
321
|
+
private readonly endpoint;
|
|
322
|
+
private readonly apiKey;
|
|
323
|
+
private readonly deploymentName;
|
|
324
|
+
private readonly apiVersion;
|
|
325
|
+
constructor(azureConfig?: AzureOpenAIConfig);
|
|
326
|
+
get providerName(): string;
|
|
327
|
+
generate(prompt: string): Promise<string>;
|
|
328
|
+
}
|
|
329
|
+
|
|
262
330
|
/**
|
|
263
331
|
* Resolve a provider from a string name, provider instance, or auto-detect from env vars.
|
|
264
332
|
* Returns a noop provider if undefined (for non-LLM metrics).
|
|
@@ -457,4 +525,4 @@ declare function weightedAverage(values: number[], weights: number[]): number;
|
|
|
457
525
|
*/
|
|
458
526
|
declare function meanAveragePrecision(relevances: boolean[]): number;
|
|
459
527
|
|
|
460
|
-
export { AnswerRelevancyMetric, AnthropicProvider, type AssayConfig, type AssertEvalOptions, type AssertEvalResult, BaseLLMProvider, BaseMetric, BiasMetric, ConsoleReporter, ContextualPrecisionMetric, ContextualRecallMetric, ContextualRelevancyMetric, type ConversationalTestCase, type EvaluateConfig, type EvaluateResult, type EvaluationDataset, type EvaluationSummary, type ExactMatchConfig, ExactMatchMetric, FaithfulnessMetric, GEval, type GEvalConfig, type Golden, HallucinationMetric, type JsonCorrectnessConfig, JsonCorrectnessMetric, type LLMTestCase, type Limiter, type MetricConfig, type MetricResult, OllamaProvider, OpenAIProvider, type ParseJsonOptions, type ProviderConfig, SummarizationMetric, type TestCaseResult, type ToolCall, ToxicityMetric, assertEval, createLimiter, evaluate, meanAveragePrecision, parseJson, ratio, resetConfigCache, resolveConfig, resolveProvider, tryParseJson, weightedAverage };
|
|
528
|
+
export { AnswerRelevancyMetric, AnthropicProvider, type AssayConfig, type AssertEvalOptions, type AssertEvalResult, type AzureOpenAIConfig, AzureOpenAIProvider, BaseLLMProvider, BaseMetric, BiasMetric, ConsoleReporter, ContextualPrecisionMetric, ContextualRecallMetric, ContextualRelevancyMetric, ConversationCompletenessMetric, type ConversationalTestCase, type EvaluateConfig, type EvaluateResult, type EvaluationDataset, type EvaluationSummary, type ExactMatchConfig, ExactMatchMetric, FaithfulnessMetric, GEval, type GEvalConfig, GeminiProvider, GoalAccuracyMetric, type Golden, HallucinationMetric, type JsonCorrectnessConfig, JsonCorrectnessMetric, KnowledgeRetentionMetric, type LLMTestCase, type Limiter, type MetricConfig, type MetricResult, OllamaProvider, OpenAIProvider, type ParseJsonOptions, type ProviderConfig, RoleAdherenceMetric, SummarizationMetric, TaskCompletionMetric, type TestCaseResult, type ToolCall, type ToolCorrectnessConfig, ToolCorrectnessMetric, ToxicityMetric, assertEval, createLimiter, evaluate, meanAveragePrecision, parseJson, ratio, resetConfigCache, resolveConfig, resolveProvider, tryParseJson, weightedAverage };
|
package/dist/index.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ interface LLMTestCase {
|
|
|
19
19
|
completionTime?: number;
|
|
20
20
|
name?: string;
|
|
21
21
|
tags?: string[];
|
|
22
|
+
conversation?: ConversationalTestCase;
|
|
22
23
|
}
|
|
23
24
|
interface ConversationalTestCase {
|
|
24
25
|
turns: Array<{
|
|
@@ -238,6 +239,49 @@ declare class JsonCorrectnessMetric extends BaseMetric {
|
|
|
238
239
|
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
239
240
|
}
|
|
240
241
|
|
|
242
|
+
interface ToolCorrectnessConfig extends MetricConfig {
|
|
243
|
+
/** Whether to also compare tool input parameters (default: false) */
|
|
244
|
+
matchParameters?: boolean;
|
|
245
|
+
}
|
|
246
|
+
declare class ToolCorrectnessMetric extends BaseMetric {
|
|
247
|
+
readonly name = "Tool Correctness";
|
|
248
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
249
|
+
readonly requiresProvider = false;
|
|
250
|
+
private readonly matchParameters;
|
|
251
|
+
constructor(config?: ToolCorrectnessConfig);
|
|
252
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
declare class TaskCompletionMetric extends BaseMetric {
|
|
256
|
+
readonly name = "Task Completion";
|
|
257
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
258
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
declare class GoalAccuracyMetric extends BaseMetric {
|
|
262
|
+
readonly name = "Goal Accuracy";
|
|
263
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
264
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
declare class ConversationCompletenessMetric extends BaseMetric {
|
|
268
|
+
readonly name = "Conversation Completeness";
|
|
269
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
270
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
declare class KnowledgeRetentionMetric extends BaseMetric {
|
|
274
|
+
readonly name = "Knowledge Retention";
|
|
275
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
276
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
declare class RoleAdherenceMetric extends BaseMetric {
|
|
280
|
+
readonly name = "Role Adherence";
|
|
281
|
+
readonly requiredFields: (keyof LLMTestCase)[];
|
|
282
|
+
measure(testCase: LLMTestCase): Promise<MetricResult>;
|
|
283
|
+
}
|
|
284
|
+
|
|
241
285
|
declare class OpenAIProvider extends BaseLLMProvider {
|
|
242
286
|
private client;
|
|
243
287
|
constructor(config?: ProviderConfig);
|
|
@@ -259,6 +303,30 @@ declare class OllamaProvider extends BaseLLMProvider {
|
|
|
259
303
|
generate(prompt: string): Promise<string>;
|
|
260
304
|
}
|
|
261
305
|
|
|
306
|
+
declare class GeminiProvider extends BaseLLMProvider {
|
|
307
|
+
private readonly apiKey;
|
|
308
|
+
constructor(config?: ProviderConfig);
|
|
309
|
+
get providerName(): string;
|
|
310
|
+
generate(prompt: string): Promise<string>;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
interface AzureOpenAIConfig extends ProviderConfig {
|
|
314
|
+
endpoint?: string;
|
|
315
|
+
deploymentName?: string;
|
|
316
|
+
apiVersion?: string;
|
|
317
|
+
}
|
|
318
|
+
declare class AzureOpenAIProvider extends BaseLLMProvider {
|
|
319
|
+
private readonly azureConfig;
|
|
320
|
+
private client;
|
|
321
|
+
private readonly endpoint;
|
|
322
|
+
private readonly apiKey;
|
|
323
|
+
private readonly deploymentName;
|
|
324
|
+
private readonly apiVersion;
|
|
325
|
+
constructor(azureConfig?: AzureOpenAIConfig);
|
|
326
|
+
get providerName(): string;
|
|
327
|
+
generate(prompt: string): Promise<string>;
|
|
328
|
+
}
|
|
329
|
+
|
|
262
330
|
/**
|
|
263
331
|
* Resolve a provider from a string name, provider instance, or auto-detect from env vars.
|
|
264
332
|
* Returns a noop provider if undefined (for non-LLM metrics).
|
|
@@ -457,4 +525,4 @@ declare function weightedAverage(values: number[], weights: number[]): number;
|
|
|
457
525
|
*/
|
|
458
526
|
declare function meanAveragePrecision(relevances: boolean[]): number;
|
|
459
527
|
|
|
460
|
-
export { AnswerRelevancyMetric, AnthropicProvider, type AssayConfig, type AssertEvalOptions, type AssertEvalResult, BaseLLMProvider, BaseMetric, BiasMetric, ConsoleReporter, ContextualPrecisionMetric, ContextualRecallMetric, ContextualRelevancyMetric, type ConversationalTestCase, type EvaluateConfig, type EvaluateResult, type EvaluationDataset, type EvaluationSummary, type ExactMatchConfig, ExactMatchMetric, FaithfulnessMetric, GEval, type GEvalConfig, type Golden, HallucinationMetric, type JsonCorrectnessConfig, JsonCorrectnessMetric, type LLMTestCase, type Limiter, type MetricConfig, type MetricResult, OllamaProvider, OpenAIProvider, type ParseJsonOptions, type ProviderConfig, SummarizationMetric, type TestCaseResult, type ToolCall, ToxicityMetric, assertEval, createLimiter, evaluate, meanAveragePrecision, parseJson, ratio, resetConfigCache, resolveConfig, resolveProvider, tryParseJson, weightedAverage };
|
|
528
|
+
export { AnswerRelevancyMetric, AnthropicProvider, type AssayConfig, type AssertEvalOptions, type AssertEvalResult, type AzureOpenAIConfig, AzureOpenAIProvider, BaseLLMProvider, BaseMetric, BiasMetric, ConsoleReporter, ContextualPrecisionMetric, ContextualRecallMetric, ContextualRelevancyMetric, ConversationCompletenessMetric, type ConversationalTestCase, type EvaluateConfig, type EvaluateResult, type EvaluationDataset, type EvaluationSummary, type ExactMatchConfig, ExactMatchMetric, FaithfulnessMetric, GEval, type GEvalConfig, GeminiProvider, GoalAccuracyMetric, type Golden, HallucinationMetric, type JsonCorrectnessConfig, JsonCorrectnessMetric, KnowledgeRetentionMetric, type LLMTestCase, type Limiter, type MetricConfig, type MetricResult, OllamaProvider, OpenAIProvider, type ParseJsonOptions, type ProviderConfig, RoleAdherenceMetric, SummarizationMetric, TaskCompletionMetric, type TestCaseResult, type ToolCall, type ToolCorrectnessConfig, ToolCorrectnessMetric, ToxicityMetric, assertEval, createLimiter, evaluate, meanAveragePrecision, parseJson, ratio, resetConfigCache, resolveConfig, resolveProvider, tryParseJson, weightedAverage };
|