@arizeai/phoenix-evals 0.5.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +37 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +43 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/esm/__generated__/default_templates/index.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/index.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/index.js +4 -0
- package/dist/esm/__generated__/default_templates/index.js.map +1 -0
- package/dist/esm/__generated__/types.d.ts +9 -0
- package/dist/esm/__generated__/types.d.ts.map +1 -0
- package/dist/esm/__generated__/types.js +3 -0
- package/dist/esm/__generated__/types.js.map +1 -0
- package/dist/esm/core/EvaluatorBase.d.ts +19 -0
- package/dist/esm/core/EvaluatorBase.d.ts.map +1 -0
- package/dist/esm/core/EvaluatorBase.js +18 -0
- package/dist/esm/core/EvaluatorBase.js.map +1 -0
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +1 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.d.ts +19 -2
- package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.js +29 -2
- package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/LLMEvaluator.d.ts +4 -7
- package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/LLMEvaluator.js +4 -7
- package/dist/esm/llm/LLMEvaluator.js.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts +44 -0
- package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -0
- package/dist/esm/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +11 -12
- package/dist/esm/llm/createDocumentRelevanceEvaluator.js.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +6 -7
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js +3 -4
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/esm/llm/index.d.ts +1 -1
- package/dist/esm/llm/index.js +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/data.d.ts +194 -0
- package/dist/esm/types/data.d.ts.map +1 -0
- package/dist/esm/types/data.js +2 -0
- package/dist/esm/types/data.js.map +1 -0
- package/dist/esm/types/evals.d.ts +17 -3
- package/dist/esm/types/evals.d.ts.map +1 -1
- package/dist/esm/types/index.d.ts +1 -0
- package/dist/esm/types/index.d.ts.map +1 -1
- package/dist/esm/types/index.js +1 -0
- package/dist/esm/types/index.js.map +1 -1
- package/dist/esm/types/otel.d.ts +14 -13
- package/dist/esm/types/otel.d.ts.map +1 -1
- package/dist/esm/utils/bindEvaluator.d.ts +219 -0
- package/dist/esm/utils/bindEvaluator.d.ts.map +1 -0
- package/dist/esm/utils/bindEvaluator.js +163 -0
- package/dist/esm/utils/bindEvaluator.js.map +1 -0
- package/dist/esm/utils/index.d.ts +2 -0
- package/dist/esm/utils/index.d.ts.map +1 -0
- package/dist/esm/utils/index.js +2 -0
- package/dist/esm/utils/index.js.map +1 -0
- package/dist/esm/utils/objectMappingUtils.d.ts +166 -0
- package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -0
- package/dist/esm/utils/objectMappingUtils.js +191 -0
- package/dist/esm/utils/objectMappingUtils.js.map +1 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +40 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +46 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/src/__generated__/default_templates/index.d.ts +3 -0
- package/dist/src/__generated__/default_templates/index.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/index.js +9 -0
- package/dist/src/__generated__/default_templates/index.js.map +1 -0
- package/dist/src/__generated__/types.d.ts +9 -0
- package/dist/src/__generated__/types.d.ts.map +1 -0
- package/dist/src/__generated__/types.js +4 -0
- package/dist/src/__generated__/types.js.map +1 -0
- package/dist/src/core/EvaluatorBase.d.ts +19 -0
- package/dist/src/core/EvaluatorBase.d.ts.map +1 -0
- package/dist/src/core/EvaluatorBase.js +17 -0
- package/dist/src/core/EvaluatorBase.js.map +1 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.d.ts +19 -2
- package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.js +13 -2
- package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/LLMEvaluator.d.ts +4 -7
- package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/src/llm/LLMEvaluator.js +16 -5
- package/dist/src/llm/LLMEvaluator.js.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts +44 -0
- package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -0
- package/dist/src/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +13 -13
- package/dist/src/llm/createDocumentRelevanceEvaluator.js.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts +6 -7
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.js +4 -4
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/src/llm/index.d.ts +1 -1
- package/dist/src/llm/index.js +1 -1
- package/dist/src/types/data.d.ts +194 -0
- package/dist/src/types/data.d.ts.map +1 -0
- package/dist/src/types/data.js +3 -0
- package/dist/src/types/data.js.map +1 -0
- package/dist/src/types/evals.d.ts +17 -3
- package/dist/src/types/evals.d.ts.map +1 -1
- package/dist/src/types/index.d.ts +1 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/types/otel.d.ts +14 -13
- package/dist/src/types/otel.d.ts.map +1 -1
- package/dist/src/utils/bindEvaluator.d.ts +219 -0
- package/dist/src/utils/bindEvaluator.d.ts.map +1 -0
- package/dist/src/utils/bindEvaluator.js +166 -0
- package/dist/src/utils/bindEvaluator.js.map +1 -0
- package/dist/src/utils/index.d.ts +2 -0
- package/dist/src/utils/index.d.ts.map +1 -0
- package/dist/src/{default_templates → utils}/index.js +1 -2
- package/dist/src/utils/index.js.map +1 -0
- package/dist/src/utils/objectMappingUtils.d.ts +166 -0
- package/dist/src/utils/objectMappingUtils.d.ts.map +1 -0
- package/dist/src/utils/objectMappingUtils.js +191 -0
- package/dist/src/utils/objectMappingUtils.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -1
- package/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts +39 -0
- package/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +45 -0
- package/src/__generated__/default_templates/index.ts +4 -0
- package/src/__generated__/types.ts +11 -0
- package/src/core/EvaluatorBase.ts +43 -0
- package/src/index.ts +1 -0
- package/src/llm/ClassificationEvaluator.ts +39 -3
- package/src/llm/LLMEvaluator.ts +7 -16
- package/src/llm/createClassificationEvaluator.ts +1 -1
- package/src/llm/createDocumentRelevanceEvaluator.ts +79 -0
- package/src/llm/createHallucinationEvaluator.ts +17 -19
- package/src/llm/index.ts +1 -1
- package/src/types/data.ts +200 -0
- package/src/types/evals.ts +25 -5
- package/src/types/index.ts +1 -0
- package/src/types/otel.ts +15 -13
- package/src/utils/bindEvaluator.ts +229 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/objectMappingUtils.ts +202 -0
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -25
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +0 -31
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
- package/dist/esm/default_templates/index.d.ts +0 -3
- package/dist/esm/default_templates/index.d.ts.map +0 -1
- package/dist/esm/default_templates/index.js +0 -3
- package/dist/esm/default_templates/index.js.map +0 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts +0 -43
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +0 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +0 -1
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -28
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +0 -34
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
- package/dist/src/default_templates/index.d.ts +0 -3
- package/dist/src/default_templates/index.d.ts.map +0 -1
- package/dist/src/default_templates/index.js.map +0 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts +0 -43
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +0 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +0 -1
- package/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.ts +0 -25
- package/src/default_templates/HALLUCINATION_TEMPLATE.ts +0 -31
- package/src/default_templates/index.ts +0 -2
- package/src/llm/createDocumentRelevancyEvaluator.ts +0 -77
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// This file is generated. Do not edit by hand.
|
|
2
|
+
|
|
3
|
+
import type { ClassificationEvaluatorConfig } from "../types";
|
|
4
|
+
|
|
5
|
+
export const HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG: ClassificationEvaluatorConfig = {
|
|
6
|
+
name: "hallucination",
|
|
7
|
+
description: "A specialized evaluator for detecting hallucinations in grounded LLM responses.",
|
|
8
|
+
optimizationDirection: "MINIMIZE",
|
|
9
|
+
template: [
|
|
10
|
+
{
|
|
11
|
+
role: "user",
|
|
12
|
+
content: `
|
|
13
|
+
In this task, you will be presented with a query, some context and a response. The response
|
|
14
|
+
is generated to the question based on the context. The response may contain false
|
|
15
|
+
information. You must use the context to determine if the response to the question
|
|
16
|
+
contains false information, if the response is a hallucination of facts. Your objective is
|
|
17
|
+
to determine whether the response text contains factual information and is not a
|
|
18
|
+
hallucination. A 'hallucination' refers to a response that is not based on the context or
|
|
19
|
+
assumes information that is not available in the context. Your response should be a single
|
|
20
|
+
word: either 'factual' or 'hallucinated', and it should not include any other text or
|
|
21
|
+
characters. 'hallucinated' indicates that the response provides factually inaccurate
|
|
22
|
+
information to the query based on the context. 'factual' indicates that the response to
|
|
23
|
+
the question is correct relative to the context, and does not contain made up
|
|
24
|
+
information. Please read the query and context carefully before determining your
|
|
25
|
+
response.
|
|
26
|
+
|
|
27
|
+
[BEGIN DATA]
|
|
28
|
+
************
|
|
29
|
+
[Query]: {{input}}
|
|
30
|
+
************
|
|
31
|
+
[Context]: {{context}}
|
|
32
|
+
************
|
|
33
|
+
[Response]: {{output}}
|
|
34
|
+
************
|
|
35
|
+
[END DATA]
|
|
36
|
+
|
|
37
|
+
Is the response above factual or hallucinated based on the query and context?
|
|
38
|
+
`,
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
choices: {
|
|
42
|
+
"hallucinated": 1,
|
|
43
|
+
"factual": 0
|
|
44
|
+
},
|
|
45
|
+
};
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
// This file is generated. Do not edit by hand.
|
|
2
|
+
|
|
3
|
+
export { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "./DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG";
|
|
4
|
+
export { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "./HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG";
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// This file is generated. Do not edit by hand.
|
|
2
|
+
|
|
3
|
+
import type { PromptTemplate } from "../types/templating";
|
|
4
|
+
|
|
5
|
+
export type ClassificationEvaluatorConfig = {
|
|
6
|
+
name: string;
|
|
7
|
+
description: string;
|
|
8
|
+
optimizationDirection: "MINIMIZE" | "MAXIMIZE";
|
|
9
|
+
template: PromptTemplate;
|
|
10
|
+
choices: Record<string, number>;
|
|
11
|
+
};
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import {
|
|
2
|
+
CreateEvaluatorArgs,
|
|
3
|
+
EvaluationKind,
|
|
4
|
+
EvaluationResult,
|
|
5
|
+
EvaluatorInterface,
|
|
6
|
+
OptimizationDirection,
|
|
7
|
+
TelemetryConfig,
|
|
8
|
+
} from "../types";
|
|
9
|
+
import { ObjectMapping } from "../types/data";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Base class for all evaluators
|
|
13
|
+
*/
|
|
14
|
+
export abstract class EvaluatorBase<RecordType extends Record<string, unknown>>
|
|
15
|
+
implements EvaluatorInterface<RecordType>
|
|
16
|
+
{
|
|
17
|
+
readonly name: string;
|
|
18
|
+
readonly kind: EvaluationKind;
|
|
19
|
+
readonly optimizationDirection?: OptimizationDirection;
|
|
20
|
+
readonly inputMapping?: ObjectMapping<RecordType>;
|
|
21
|
+
readonly telemetry?: TelemetryConfig;
|
|
22
|
+
constructor({
|
|
23
|
+
name,
|
|
24
|
+
kind,
|
|
25
|
+
optimizationDirection,
|
|
26
|
+
inputMapping,
|
|
27
|
+
telemetry,
|
|
28
|
+
}: CreateEvaluatorArgs<RecordType>) {
|
|
29
|
+
this.name = name;
|
|
30
|
+
this.kind = kind;
|
|
31
|
+
this.optimizationDirection = optimizationDirection;
|
|
32
|
+
this.inputMapping = inputMapping;
|
|
33
|
+
this.telemetry = telemetry;
|
|
34
|
+
}
|
|
35
|
+
abstract evaluate(_example: RecordType): Promise<EvaluationResult>;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Binds the input mapping to the evaluator. It makes a a copy of the evaluator and returns it.
|
|
39
|
+
*/
|
|
40
|
+
abstract bindInputMapping(
|
|
41
|
+
inputMapping: ObjectMapping<RecordType>
|
|
42
|
+
): EvaluatorBase<RecordType>;
|
|
43
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
import { getTemplateVariables } from "../template";
|
|
2
2
|
import {
|
|
3
|
+
ClassificationChoicesMap,
|
|
3
4
|
CreateClassificationEvaluatorArgs,
|
|
4
5
|
EvaluatorFn,
|
|
5
6
|
PromptTemplate,
|
|
6
7
|
WithPromptTemplate,
|
|
7
8
|
} from "../types";
|
|
9
|
+
import { ObjectMapping } from "../types/data";
|
|
10
|
+
import { remapObject } from "../utils/objectMappingUtils";
|
|
8
11
|
|
|
9
12
|
import { createClassifierFn } from "./createClassifierFn";
|
|
10
13
|
import { LLMEvaluator } from "./LLMEvaluator";
|
|
11
14
|
|
|
15
|
+
import { LanguageModel } from "ai";
|
|
16
|
+
|
|
12
17
|
/**
|
|
13
18
|
* An LLM evaluator that performs evaluation via classification
|
|
14
19
|
*/
|
|
@@ -18,14 +23,34 @@ export class ClassificationEvaluator<RecordType extends Record<string, unknown>>
|
|
|
18
23
|
{
|
|
19
24
|
readonly evaluatorFn: EvaluatorFn<RecordType>;
|
|
20
25
|
readonly promptTemplate: PromptTemplate;
|
|
26
|
+
/**
|
|
27
|
+
* A dynamically computed set of prompt template variables
|
|
28
|
+
*/
|
|
21
29
|
private _promptTemplateVariables: string[] | undefined;
|
|
22
|
-
|
|
30
|
+
/**
|
|
31
|
+
* The model to use for classification
|
|
32
|
+
*/
|
|
33
|
+
readonly model: LanguageModel;
|
|
34
|
+
/**
|
|
35
|
+
* The choices to classify the example into
|
|
36
|
+
*/
|
|
37
|
+
readonly choices: ClassificationChoicesMap;
|
|
38
|
+
|
|
39
|
+
constructor(args: CreateClassificationEvaluatorArgs<RecordType>) {
|
|
23
40
|
super(args);
|
|
24
41
|
this.promptTemplate = args.promptTemplate;
|
|
25
|
-
this.
|
|
42
|
+
this.model = args.model;
|
|
43
|
+
this.choices = args.choices;
|
|
44
|
+
this.evaluatorFn = createClassifierFn<RecordType>({
|
|
45
|
+
...args,
|
|
46
|
+
});
|
|
26
47
|
}
|
|
27
48
|
evaluate = (example: RecordType) => {
|
|
28
|
-
return this.evaluatorFn(
|
|
49
|
+
return this.evaluatorFn(
|
|
50
|
+
this.inputMapping
|
|
51
|
+
? remapObject<RecordType>(example, this.inputMapping)
|
|
52
|
+
: example
|
|
53
|
+
);
|
|
29
54
|
};
|
|
30
55
|
/**
|
|
31
56
|
* List out the prompt template variables needed to perform evaluation
|
|
@@ -40,4 +65,15 @@ export class ClassificationEvaluator<RecordType extends Record<string, unknown>>
|
|
|
40
65
|
// Give a copy of the variables
|
|
41
66
|
return [...this._promptTemplateVariables];
|
|
42
67
|
}
|
|
68
|
+
/**
|
|
69
|
+
* Binds the input mapping to the evaluator. It makes a a copy of the evaluator and returns it.
|
|
70
|
+
*/
|
|
71
|
+
bindInputMapping(
|
|
72
|
+
inputMapping: ObjectMapping<RecordType>
|
|
73
|
+
): ClassificationEvaluator<RecordType> {
|
|
74
|
+
return new ClassificationEvaluator({
|
|
75
|
+
...this,
|
|
76
|
+
inputMapping,
|
|
77
|
+
});
|
|
78
|
+
}
|
|
43
79
|
}
|
package/src/llm/LLMEvaluator.ts
CHANGED
|
@@ -1,22 +1,13 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
EvaluationResult,
|
|
4
|
-
Evaluator,
|
|
5
|
-
OptimizationDirection,
|
|
6
|
-
} from "../types";
|
|
1
|
+
import { EvaluatorBase } from "../core/EvaluatorBase";
|
|
2
|
+
import { CreateLLMEvaluatorArgs } from "../types";
|
|
7
3
|
|
|
8
4
|
/**
|
|
9
5
|
* Base class for llm evaluation metrics / scores
|
|
10
6
|
*/
|
|
11
|
-
export abstract class LLMEvaluator<
|
|
12
|
-
|
|
13
|
-
{
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
readonly optimizationDirection?: OptimizationDirection;
|
|
17
|
-
constructor({ name, optimizationDirection }: CreateEvaluatorArgs) {
|
|
18
|
-
this.name = name;
|
|
19
|
-
this.optimizationDirection = optimizationDirection;
|
|
7
|
+
export abstract class LLMEvaluator<
|
|
8
|
+
RecordType extends Record<string, unknown>,
|
|
9
|
+
> extends EvaluatorBase<RecordType> {
|
|
10
|
+
constructor({ ...args }: CreateLLMEvaluatorArgs<RecordType>) {
|
|
11
|
+
super({ kind: "LLM", ...args });
|
|
20
12
|
}
|
|
21
|
-
abstract evaluate(_example: RecordType): Promise<EvaluationResult>;
|
|
22
13
|
}
|
|
@@ -5,7 +5,7 @@ import { ClassificationEvaluator } from "./ClassificationEvaluator";
|
|
|
5
5
|
export function createClassificationEvaluator<
|
|
6
6
|
RecordType extends Record<string, unknown>,
|
|
7
7
|
>(
|
|
8
|
-
args: CreateClassificationEvaluatorArgs
|
|
8
|
+
args: CreateClassificationEvaluatorArgs<RecordType>
|
|
9
9
|
): ClassificationEvaluator<RecordType> {
|
|
10
10
|
return new ClassificationEvaluator<RecordType>(args);
|
|
11
11
|
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
|
|
2
|
+
import { CreateClassificationEvaluatorArgs } from "../types/evals";
|
|
3
|
+
|
|
4
|
+
import { ClassificationEvaluator } from "./ClassificationEvaluator";
|
|
5
|
+
import { createClassificationEvaluator } from "./createClassificationEvaluator";
|
|
6
|
+
|
|
7
|
+
export interface DocumentRelevanceEvaluatorArgs<
|
|
8
|
+
RecordType extends Record<
|
|
9
|
+
string,
|
|
10
|
+
unknown
|
|
11
|
+
> = DocumentRelevanceEvaluationRecord,
|
|
12
|
+
> extends Omit<
|
|
13
|
+
CreateClassificationEvaluatorArgs<RecordType>,
|
|
14
|
+
"promptTemplate" | "choices" | "optimizationDirection" | "name"
|
|
15
|
+
> {
|
|
16
|
+
optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
|
|
17
|
+
name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
|
|
18
|
+
choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
|
|
19
|
+
promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* A record to be evaluated by the document relevance evaluator.
|
|
24
|
+
*/
|
|
25
|
+
export interface DocumentRelevanceEvaluationRecord {
|
|
26
|
+
input: string;
|
|
27
|
+
documentText: string;
|
|
28
|
+
[key: string]: unknown;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Creates a document relevance evaluator function.
|
|
33
|
+
*
|
|
34
|
+
* This function returns an evaluator that determines whether a given document text
|
|
35
|
+
* is relevant to a provided input question. The evaluator uses a classification model
|
|
36
|
+
* and a prompt template to make its determination.
|
|
37
|
+
*
|
|
38
|
+
* @param args - The arguments for creating the document relevance evaluator.
|
|
39
|
+
* @param args.model - The model to use for classification.
|
|
40
|
+
* @param args.choices - The possible classification choices (defaults to DOCUMENT_RELEVANCE_CHOICES).
|
|
41
|
+
* @param args.promptTemplate - The prompt template to use (defaults to DOCUMENT_RELEVANCE_TEMPLATE).
|
|
42
|
+
* @param args.telemetry - The telemetry to use for the evaluator.
|
|
43
|
+
*
|
|
44
|
+
* @returns An evaluator function that takes a {@link DocumentRelevanceExample} and returns a classification result
|
|
45
|
+
* indicating whether the document is relevant to the input question.
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```ts
|
|
49
|
+
* const evaluator = createDocumentRelevanceEvaluator({ model: openai("gpt-4o-mini") });
|
|
50
|
+
* const result = await evaluator.evaluate({
|
|
51
|
+
* input: "What is the capital of France?",
|
|
52
|
+
* documentText: "Paris is the capital and most populous city of France.",
|
|
53
|
+
* });
|
|
54
|
+
* console.log(result.label); // "relevant" or "unrelated"
|
|
55
|
+
* ```
|
|
56
|
+
*/
|
|
57
|
+
export function createDocumentRelevanceEvaluator<
|
|
58
|
+
RecordType extends Record<
|
|
59
|
+
string,
|
|
60
|
+
unknown
|
|
61
|
+
> = DocumentRelevanceEvaluationRecord,
|
|
62
|
+
>(
|
|
63
|
+
args: DocumentRelevanceEvaluatorArgs<RecordType>
|
|
64
|
+
): ClassificationEvaluator<RecordType> {
|
|
65
|
+
const {
|
|
66
|
+
choices = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.choices,
|
|
67
|
+
promptTemplate = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.template,
|
|
68
|
+
optimizationDirection = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
|
|
69
|
+
name = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.name,
|
|
70
|
+
...rest
|
|
71
|
+
} = args;
|
|
72
|
+
return createClassificationEvaluator<RecordType>({
|
|
73
|
+
...rest,
|
|
74
|
+
promptTemplate,
|
|
75
|
+
choices,
|
|
76
|
+
optimizationDirection,
|
|
77
|
+
name,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
@@ -1,21 +1,19 @@
|
|
|
1
|
-
import {
|
|
2
|
-
HALLUCINATION_CHOICES,
|
|
3
|
-
HALLUCINATION_TEMPLATE,
|
|
4
|
-
} from "../default_templates/HALLUCINATION_TEMPLATE";
|
|
1
|
+
import { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
|
|
5
2
|
import { CreateClassificationEvaluatorArgs } from "../types/evals";
|
|
6
3
|
|
|
7
4
|
import { ClassificationEvaluator } from "./ClassificationEvaluator";
|
|
8
5
|
import { createClassificationEvaluator } from "./createClassificationEvaluator";
|
|
9
6
|
|
|
10
|
-
export interface HallucinationEvaluatorArgs
|
|
11
|
-
extends
|
|
12
|
-
|
|
7
|
+
export interface HallucinationEvaluatorArgs<
|
|
8
|
+
RecordType extends Record<string, unknown> = HallucinationEvaluationRecord,
|
|
9
|
+
> extends Omit<
|
|
10
|
+
CreateClassificationEvaluatorArgs<RecordType>,
|
|
13
11
|
"promptTemplate" | "choices" | "optimizationDirection" | "name"
|
|
14
12
|
> {
|
|
15
|
-
optimizationDirection?: CreateClassificationEvaluatorArgs["optimizationDirection"];
|
|
16
|
-
name?: CreateClassificationEvaluatorArgs["name"];
|
|
17
|
-
choices?: CreateClassificationEvaluatorArgs["choices"];
|
|
18
|
-
promptTemplate?: CreateClassificationEvaluatorArgs["promptTemplate"];
|
|
13
|
+
optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
|
|
14
|
+
name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
|
|
15
|
+
choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
|
|
16
|
+
promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
|
|
19
17
|
}
|
|
20
18
|
|
|
21
19
|
/**
|
|
@@ -24,7 +22,6 @@ export interface HallucinationEvaluatorArgs
|
|
|
24
22
|
export type HallucinationEvaluationRecord = {
|
|
25
23
|
input: string;
|
|
26
24
|
output: string;
|
|
27
|
-
reference?: string;
|
|
28
25
|
context?: string;
|
|
29
26
|
};
|
|
30
27
|
/**
|
|
@@ -35,20 +32,21 @@ export type HallucinationEvaluationRecord = {
|
|
|
35
32
|
*/
|
|
36
33
|
export function createHallucinationEvaluator<
|
|
37
34
|
RecordType extends Record<string, unknown> = HallucinationEvaluationRecord,
|
|
38
|
-
>(
|
|
35
|
+
>(
|
|
36
|
+
args: HallucinationEvaluatorArgs<RecordType>
|
|
37
|
+
): ClassificationEvaluator<RecordType> {
|
|
39
38
|
const {
|
|
40
|
-
choices =
|
|
41
|
-
promptTemplate =
|
|
42
|
-
optimizationDirection =
|
|
43
|
-
name =
|
|
39
|
+
choices = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.choices,
|
|
40
|
+
promptTemplate = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.template,
|
|
41
|
+
optimizationDirection = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
|
|
42
|
+
name = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.name,
|
|
44
43
|
...rest
|
|
45
44
|
} = args;
|
|
46
45
|
return createClassificationEvaluator<RecordType>({
|
|
47
|
-
...
|
|
46
|
+
...rest,
|
|
48
47
|
promptTemplate,
|
|
49
48
|
choices,
|
|
50
49
|
optimizationDirection,
|
|
51
50
|
name,
|
|
52
|
-
...rest,
|
|
53
51
|
});
|
|
54
52
|
}
|
package/src/llm/index.ts
CHANGED
|
@@ -2,6 +2,6 @@ export * from "./generateClassification";
|
|
|
2
2
|
export * from "./createClassifierFn";
|
|
3
3
|
export * from "./createClassificationEvaluator";
|
|
4
4
|
export * from "./createHallucinationEvaluator";
|
|
5
|
-
export * from "./
|
|
5
|
+
export * from "./createDocumentRelevanceEvaluator";
|
|
6
6
|
export * from "./ClassificationEvaluator";
|
|
7
7
|
export * from "./LLMEvaluator";
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A value extractor that can retrieve data from an object using various methods.
|
|
3
|
+
*
|
|
4
|
+
* This type supports multiple ways to extract values from your data structure:
|
|
5
|
+
* - **String paths**: Simple property names, dot notation, or JSONPath expressions
|
|
6
|
+
* - **Function extractors**: Custom transformation functions
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* **Simple property access:**
|
|
10
|
+
* ```typescript
|
|
11
|
+
* const getter: ValueGetter<{ name: string }> = "name";
|
|
12
|
+
* ```
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* **Dot notation for nested properties:**
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const getter: ValueGetter<{ user: { profile: { name: string } } }> = "user.profile.name";
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* **Array element access:**
|
|
22
|
+
* ```typescript
|
|
23
|
+
* const getter: ValueGetter<{ items: string[] }> = "items[0]";
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* **JSONPath expression:**
|
|
28
|
+
* ```typescript
|
|
29
|
+
* const getter: ValueGetter<{ items: Array<{ id: number }> }> = "$.items[*].id";
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* **Function-based extraction:**
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const getter: ValueGetter<{ firstName: string; lastName: string }> =
|
|
36
|
+
* (data) => `${data.firstName} ${data.lastName}`;
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* **Complex transformation:**
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const getter: ValueGetter<{ scores: number[] }> =
|
|
43
|
+
* (data) => data.scores.reduce((a, b) => a + b, 0) / data.scores.length;
|
|
44
|
+
* ```
|
|
45
|
+
*
|
|
46
|
+
* @typeParam DataType - The type of the data object to extract values from
|
|
47
|
+
*/
|
|
48
|
+
export type ValueGetter<DataType extends Record<string, unknown>> =
|
|
49
|
+
| string
|
|
50
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
51
|
+
| ((data: DataType) => any);
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* A mapping configuration that transforms data from one structure to another.
|
|
55
|
+
*
|
|
56
|
+
* This type defines how to map fields from your data structure to the fields
|
|
57
|
+
* expected by an evaluator or other component. The mapping is flexible and
|
|
58
|
+
* supports multiple extraction methods.
|
|
59
|
+
*
|
|
60
|
+
* **Key Features:**
|
|
61
|
+
* - Preserves original data fields
|
|
62
|
+
* - Adds/overrides fields with mapped values
|
|
63
|
+
* - Supports nested property access
|
|
64
|
+
* - Supports array element access
|
|
65
|
+
* - Supports JSONPath expressions for complex queries
|
|
66
|
+
* - Supports function-based transformations
|
|
67
|
+
*
|
|
68
|
+
* @example
|
|
69
|
+
* **Basic field mapping:**
|
|
70
|
+
* ```typescript
|
|
71
|
+
* type MyData = {
|
|
72
|
+
* userQuery: string;
|
|
73
|
+
* context: string;
|
|
74
|
+
* response: string;
|
|
75
|
+
* };
|
|
76
|
+
*
|
|
77
|
+
* const mapping: ObjectMapping<MyData> = {
|
|
78
|
+
* input: "userQuery", // Map "input" to "userQuery"
|
|
79
|
+
* reference: "context", // Map "reference" to "context"
|
|
80
|
+
* output: "response", // Map "output" to "response"
|
|
81
|
+
* };
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* **Nested property mapping:**
|
|
86
|
+
* ```typescript
|
|
87
|
+
* type ApiData = {
|
|
88
|
+
* request: {
|
|
89
|
+
* body: {
|
|
90
|
+
* query: string;
|
|
91
|
+
* context: string;
|
|
92
|
+
* };
|
|
93
|
+
* };
|
|
94
|
+
* response: {
|
|
95
|
+
* data: {
|
|
96
|
+
* text: string;
|
|
97
|
+
* };
|
|
98
|
+
* };
|
|
99
|
+
* };
|
|
100
|
+
*
|
|
101
|
+
* const mapping: ObjectMapping<ApiData> = {
|
|
102
|
+
* input: "request.body.query",
|
|
103
|
+
* reference: "request.body.context",
|
|
104
|
+
* output: "response.data.text",
|
|
105
|
+
* };
|
|
106
|
+
* ```
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* **Array element access:**
|
|
110
|
+
* ```typescript
|
|
111
|
+
* type DataWithArrays = {
|
|
112
|
+
* messages: Array<{ role: string; content: string }>;
|
|
113
|
+
* sources: string[];
|
|
114
|
+
* };
|
|
115
|
+
*
|
|
116
|
+
* const mapping: ObjectMapping<DataWithArrays> = {
|
|
117
|
+
* firstMessage: "messages[0].content",
|
|
118
|
+
* lastSource: "sources[-1]", // Last element
|
|
119
|
+
* allRoles: "$.messages[*].role", // JSONPath for all roles
|
|
120
|
+
* };
|
|
121
|
+
* ```
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* **Function-based transformations:**
|
|
125
|
+
* ```typescript
|
|
126
|
+
* type RawData = {
|
|
127
|
+
* firstName: string;
|
|
128
|
+
* lastName: string;
|
|
129
|
+
* contexts: string[];
|
|
130
|
+
* scores: number[];
|
|
131
|
+
* };
|
|
132
|
+
*
|
|
133
|
+
* const mapping: ObjectMapping<RawData> = {
|
|
134
|
+
* // Combine fields
|
|
135
|
+
* fullName: (data) => `${data.firstName} ${data.lastName}`,
|
|
136
|
+
* // Transform array to string
|
|
137
|
+
* contextText: (data) => data.contexts.join("\n\n"),
|
|
138
|
+
* // Calculate derived value
|
|
139
|
+
* averageScore: (data) =>
|
|
140
|
+
* data.scores.reduce((a, b) => a + b, 0) / data.scores.length,
|
|
141
|
+
* // Conditional logic
|
|
142
|
+
* status: (data) => data.scores.length > 0 ? "active" : "inactive",
|
|
143
|
+
* };
|
|
144
|
+
* ```
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* **Mixed mapping types:**
|
|
148
|
+
* ```typescript
|
|
149
|
+
* type ComplexData = {
|
|
150
|
+
* user: {
|
|
151
|
+
* name: string;
|
|
152
|
+
* email: string;
|
|
153
|
+
* };
|
|
154
|
+
* items: Array<{ id: number; name: string }>;
|
|
155
|
+
* metadata: {
|
|
156
|
+
* tags: string[];
|
|
157
|
+
* };
|
|
158
|
+
* };
|
|
159
|
+
*
|
|
160
|
+
* const mapping: ObjectMapping<ComplexData> = {
|
|
161
|
+
* // Simple dot notation
|
|
162
|
+
* userName: "user.name",
|
|
163
|
+
* // Array access
|
|
164
|
+
* firstItemId: "items[0].id",
|
|
165
|
+
* // JSONPath for complex query
|
|
166
|
+
* allItemNames: "$.items[*].name",
|
|
167
|
+
* // Function for transformation
|
|
168
|
+
* formattedTags: (data) => data.metadata.tags.map(t => `#${t}`).join(" "),
|
|
169
|
+
* };
|
|
170
|
+
* ```
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* **Real-world evaluator binding:**
|
|
174
|
+
* ```typescript
|
|
175
|
+
* import { bindEvaluator, createHallucinationEvaluator } from "@arizeai/phoenix-evals";
|
|
176
|
+
*
|
|
177
|
+
* type QAData = {
|
|
178
|
+
* question: string;
|
|
179
|
+
* context: string;
|
|
180
|
+
* answer: string;
|
|
181
|
+
* };
|
|
182
|
+
*
|
|
183
|
+
* const mapping: ObjectMapping<QAData> = {
|
|
184
|
+
* input: "question", // Evaluator expects "input"
|
|
185
|
+
* reference: "context", // Evaluator expects "reference"
|
|
186
|
+
* output: "answer", // Evaluator expects "output"
|
|
187
|
+
* };
|
|
188
|
+
*
|
|
189
|
+
* const evaluator = bindEvaluator(
|
|
190
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
191
|
+
* { inputMapping: mapping }
|
|
192
|
+
* );
|
|
193
|
+
* ```
|
|
194
|
+
*
|
|
195
|
+
* @typeParam DataType - The type of the data object being mapped
|
|
196
|
+
*/
|
|
197
|
+
export type ObjectMapping<DataType extends Record<string, unknown>> = Record<
|
|
198
|
+
string,
|
|
199
|
+
ValueGetter<DataType>
|
|
200
|
+
>;
|
package/src/types/evals.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { ObjectMapping } from "./data";
|
|
1
2
|
import { WithTelemetry } from "./otel";
|
|
2
3
|
import { PromptTemplate } from "./templating";
|
|
3
4
|
|
|
@@ -81,22 +82,41 @@ export interface CreateClassifierArgs extends WithTelemetry {
|
|
|
81
82
|
promptTemplate: PromptTemplate;
|
|
82
83
|
}
|
|
83
84
|
|
|
84
|
-
export interface CreateEvaluatorArgs
|
|
85
|
+
export interface CreateEvaluatorArgs<
|
|
86
|
+
ExampleType extends Record<string, unknown> = Record<string, unknown>,
|
|
87
|
+
> extends WithTelemetry {
|
|
85
88
|
/**
|
|
86
89
|
* The name of the metric that the evaluator produces
|
|
87
90
|
* E.x. "correctness"
|
|
88
91
|
*/
|
|
89
92
|
name: string;
|
|
93
|
+
/**
|
|
94
|
+
* The kind of the evaluation. Also known as the "kind" of evaluator.
|
|
95
|
+
*/
|
|
96
|
+
kind: EvaluationKind;
|
|
90
97
|
/**
|
|
91
98
|
* If present, represents the direction in which you want the metric to be optimized
|
|
92
99
|
* E.x. "MAXIMIZE" means you want the number to be higher.
|
|
93
100
|
*/
|
|
94
101
|
optimizationDirection?: OptimizationDirection;
|
|
102
|
+
/**
|
|
103
|
+
* The mapping of the input to evaluate to the shape that the evaluator expects
|
|
104
|
+
*/
|
|
105
|
+
inputMapping?: ObjectMapping<ExampleType>;
|
|
95
106
|
}
|
|
96
107
|
|
|
97
|
-
export
|
|
98
|
-
|
|
99
|
-
|
|
108
|
+
export type CreateLLMEvaluatorArgs<RecordType extends Record<string, unknown>> =
|
|
109
|
+
Omit<CreateEvaluatorArgs<RecordType>, "kind">;
|
|
110
|
+
|
|
111
|
+
export interface CreateClassificationEvaluatorArgs<
|
|
112
|
+
RecordType extends Record<string, unknown>,
|
|
113
|
+
> extends CreateClassifierArgs,
|
|
114
|
+
CreateLLMEvaluatorArgs<RecordType> {
|
|
115
|
+
/**
|
|
116
|
+
* The prompt template to use for classification
|
|
117
|
+
*/
|
|
118
|
+
promptTemplate: PromptTemplate;
|
|
119
|
+
}
|
|
100
120
|
|
|
101
121
|
export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (
|
|
102
122
|
args: ExampleType
|
|
@@ -136,7 +156,7 @@ interface EvaluatorDescription {
|
|
|
136
156
|
* The Base Evaluator interface
|
|
137
157
|
* This is the interface that all evaluators must implement
|
|
138
158
|
*/
|
|
139
|
-
export interface
|
|
159
|
+
export interface EvaluatorInterface<ExampleType extends Record<string, unknown>>
|
|
140
160
|
extends EvaluatorDescription {
|
|
141
161
|
/**
|
|
142
162
|
* The function that evaluates the example
|
package/src/types/index.ts
CHANGED
package/src/types/otel.ts
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
import { Tracer } from "@opentelemetry/api";
|
|
2
2
|
|
|
3
|
+
export type TelemetryConfig = {
|
|
4
|
+
/**
|
|
5
|
+
* Whether OpenTelemetry is enabled on the call.
|
|
6
|
+
* Defaults to true for visibility into the evals calls.
|
|
7
|
+
* @default true
|
|
8
|
+
*/
|
|
9
|
+
isEnabled?: boolean;
|
|
10
|
+
/**
|
|
11
|
+
* The tracer to use for the call.
|
|
12
|
+
* If not provided, the traces will get picked up by the global tracer.
|
|
13
|
+
*/
|
|
14
|
+
tracer?: Tracer;
|
|
15
|
+
};
|
|
16
|
+
|
|
3
17
|
export type WithTelemetry = {
|
|
4
|
-
telemetry?:
|
|
5
|
-
/**
|
|
6
|
-
* Whether OpenTelemetry is enabled on the call.
|
|
7
|
-
* Defaults to true for visibility into the evals calls.
|
|
8
|
-
* @default true
|
|
9
|
-
*/
|
|
10
|
-
isEnabled?: boolean;
|
|
11
|
-
/**
|
|
12
|
-
* The tracer to use for the call.
|
|
13
|
-
* If not provided, the traces will get picked up by the global tracer.
|
|
14
|
-
*/
|
|
15
|
-
tracer?: Tracer;
|
|
16
|
-
};
|
|
18
|
+
telemetry?: TelemetryConfig;
|
|
17
19
|
};
|