@arizeai/phoenix-evals 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +6 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +1 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +31 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +1 -0
- package/dist/esm/default_templates/index.d.ts +2 -0
- package/dist/esm/default_templates/index.d.ts.map +1 -0
- package/dist/esm/default_templates/index.js +2 -0
- package/dist/esm/default_templates/index.js.map +1 -0
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +1 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/createClassifier.d.ts +6 -0
- package/dist/esm/llm/createClassifier.d.ts.map +1 -0
- package/dist/esm/llm/createClassifier.js +40 -0
- package/dist/esm/llm/createClassifier.js.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +14 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.js +18 -0
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -0
- package/dist/esm/llm/generateClassification.d.ts +22 -0
- package/dist/esm/llm/generateClassification.d.ts.map +1 -0
- package/dist/esm/llm/generateClassification.js +23 -0
- package/dist/esm/llm/generateClassification.js.map +1 -0
- package/dist/esm/llm/index.d.ts +4 -0
- package/dist/esm/llm/index.d.ts.map +1 -0
- package/dist/esm/llm/index.js +4 -0
- package/dist/esm/llm/index.js.map +1 -0
- package/dist/esm/template/applyTemplate.d.ts +10 -0
- package/dist/esm/template/applyTemplate.d.ts.map +1 -0
- package/dist/esm/template/applyTemplate.js +10 -0
- package/dist/esm/template/applyTemplate.js.map +1 -0
- package/dist/esm/template/index.d.ts +2 -0
- package/dist/esm/template/index.d.ts.map +1 -0
- package/dist/esm/template/index.js +2 -0
- package/dist/esm/template/index.js.map +1 -0
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/evals.d.ts +73 -0
- package/dist/esm/types/evals.d.ts.map +1 -0
- package/dist/esm/types/evals.js +2 -0
- package/dist/esm/types/evals.js.map +1 -0
- package/dist/esm/types/index.d.ts +3 -0
- package/dist/esm/types/index.d.ts.map +1 -0
- package/dist/esm/types/index.js +3 -0
- package/dist/esm/types/index.js.map +1 -0
- package/dist/esm/types/prompts.d.ts +21 -0
- package/dist/esm/types/prompts.d.ts.map +1 -0
- package/dist/esm/types/prompts.js +2 -0
- package/dist/esm/types/prompts.js.map +1 -0
- package/dist/esm/types/templating.d.ts +3 -0
- package/dist/esm/types/templating.d.ts.map +1 -0
- package/dist/esm/types/templating.js +2 -0
- package/dist/esm/types/templating.js.map +1 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +6 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +1 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +34 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +1 -0
- package/dist/src/default_templates/index.d.ts +2 -0
- package/dist/src/default_templates/index.d.ts.map +1 -0
- package/dist/src/default_templates/index.js +18 -0
- package/dist/src/default_templates/index.js.map +1 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +16 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/createClassifier.d.ts +6 -0
- package/dist/src/llm/createClassifier.d.ts.map +1 -0
- package/dist/src/llm/createClassifier.js +47 -0
- package/dist/src/llm/createClassifier.js.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts +14 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.js +18 -0
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -0
- package/dist/src/llm/generateClassification.d.ts +22 -0
- package/dist/src/llm/generateClassification.d.ts.map +1 -0
- package/dist/src/llm/generateClassification.js +44 -0
- package/dist/src/llm/generateClassification.js.map +1 -0
- package/dist/src/llm/index.d.ts +4 -0
- package/dist/src/llm/index.d.ts.map +1 -0
- package/dist/src/llm/index.js +20 -0
- package/dist/src/llm/index.js.map +1 -0
- package/dist/src/template/applyTemplate.d.ts +10 -0
- package/dist/src/template/applyTemplate.d.ts.map +1 -0
- package/dist/src/template/applyTemplate.js +16 -0
- package/dist/src/template/applyTemplate.js.map +1 -0
- package/dist/src/template/index.d.ts +2 -0
- package/dist/src/template/index.d.ts.map +1 -0
- package/dist/src/template/index.js +18 -0
- package/dist/src/template/index.js.map +1 -0
- package/dist/src/types/evals.d.ts +73 -0
- package/dist/src/types/evals.d.ts.map +1 -0
- package/dist/src/types/evals.js +3 -0
- package/dist/src/types/evals.js.map +1 -0
- package/dist/src/types/index.d.ts +3 -0
- package/dist/src/types/index.d.ts.map +1 -0
- package/dist/src/types/index.js +19 -0
- package/dist/src/types/index.js.map +1 -0
- package/dist/src/types/prompts.d.ts +21 -0
- package/dist/src/types/prompts.d.ts.map +1 -0
- package/dist/src/types/prompts.js +3 -0
- package/dist/src/types/prompts.js.map +1 -0
- package/dist/src/types/templating.d.ts +3 -0
- package/dist/src/types/templating.d.ts.map +1 -0
- package/dist/src/types/templating.js +3 -0
- package/dist/src/types/templating.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +42 -5
- package/src/default_templates/HALLUCINATION_TEMPLATE.ts +31 -0
- package/src/default_templates/index.ts +1 -0
- package/src/index.ts +1 -0
- package/src/llm/createClassifier.ts +59 -0
- package/src/llm/createHallucinationEvaluator.ts +32 -0
- package/src/llm/generateClassification.ts +42 -0
- package/src/llm/index.ts +3 -0
- package/src/template/applyTemplate.ts +14 -0
- package/src/template/index.ts +1 -0
- package/src/types/evals.ts +84 -0
- package/src/types/index.ts +2 -0
- package/src/types/prompts.ts +21 -0
- package/src/types/templating.ts +2 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { ClassificationResult, WithLLM } from "../types/evals";
|
|
2
|
+
import type { WithPrompt } from "../types/prompts";
|
|
3
|
+
import { generateObject } from "ai";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
|
|
6
|
+
interface ClassifyArgs extends WithLLM, WithPrompt {
|
|
7
|
+
/**
|
|
8
|
+
* The labels to classify the example into. E.x. ["correct", "incorrect"]
|
|
9
|
+
*/
|
|
10
|
+
labels: [string, ...string[]];
|
|
11
|
+
/**
|
|
12
|
+
* The name of the schema for generating the label and explanation.
|
|
13
|
+
*/
|
|
14
|
+
schemaName?: string;
|
|
15
|
+
/**
|
|
16
|
+
* The description of the schema for generating the label and explanation.
|
|
17
|
+
*/
|
|
18
|
+
schemaDescription?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* A function that leverages an llm to perform a classification
|
|
22
|
+
*/
|
|
23
|
+
export async function generateClassification(
|
|
24
|
+
args: ClassifyArgs
|
|
25
|
+
): Promise<ClassificationResult> {
|
|
26
|
+
const { labels, model, schemaName, schemaDescription, ...prompt } = args;
|
|
27
|
+
|
|
28
|
+
const result = await generateObject({
|
|
29
|
+
model,
|
|
30
|
+
schemaName,
|
|
31
|
+
schemaDescription,
|
|
32
|
+
schema: z.object({
|
|
33
|
+
explanation: z.string(), // We place the explanation in hopes it uses reasoning to explain the label.
|
|
34
|
+
label: z.enum(labels),
|
|
35
|
+
}),
|
|
36
|
+
...prompt,
|
|
37
|
+
});
|
|
38
|
+
return {
|
|
39
|
+
label: result.object.label,
|
|
40
|
+
explanation: result.object.explanation,
|
|
41
|
+
};
|
|
42
|
+
}
|
package/src/llm/index.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Template } from "../types/templating";
|
|
2
|
+
import Mustache from "mustache";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* A function that applies a set of variables to a template (e.g. a prompt)
|
|
6
|
+
* Uses the Mustache library to apply the variables to the template
|
|
7
|
+
*/
|
|
8
|
+
export function formatTemplate(args: {
|
|
9
|
+
template: Template;
|
|
10
|
+
variables: Record<string, unknown>;
|
|
11
|
+
}) {
|
|
12
|
+
const { template, variables } = args;
|
|
13
|
+
return Mustache.render(template, variables);
|
|
14
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./applyTemplate";
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { LanguageModel } from "ai";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* The arguments for an evaluation
|
|
5
|
+
*/
|
|
6
|
+
export interface EvaluationArgs<OutputType, InputType> {
|
|
7
|
+
output: OutputType;
|
|
8
|
+
expected?: OutputType;
|
|
9
|
+
input?: InputType;
|
|
10
|
+
[key: string]: unknown;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface WithLLM {
|
|
14
|
+
model: LanguageModel;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface LLMEvaluationArgs<OutputType, InputType>
|
|
18
|
+
extends EvaluationArgs<OutputType, InputType>,
|
|
19
|
+
WithLLM {}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* The result of an evaluation
|
|
23
|
+
*/
|
|
24
|
+
export interface EvaluationResult {
|
|
25
|
+
/**
|
|
26
|
+
* The score of the evaluation.
|
|
27
|
+
* @example 0.95
|
|
28
|
+
*/
|
|
29
|
+
score?: number;
|
|
30
|
+
/**
|
|
31
|
+
* The label of the evaluation.
|
|
32
|
+
* @example "correct"
|
|
33
|
+
*/
|
|
34
|
+
label?: string;
|
|
35
|
+
/**
|
|
36
|
+
* The explanation of the evaluation.
|
|
37
|
+
* @example "The model correctly identified the sentiment of the text."
|
|
38
|
+
*/
|
|
39
|
+
explanation?: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* The result of a classification
|
|
44
|
+
*/
|
|
45
|
+
export interface ClassificationResult {
|
|
46
|
+
label: string;
|
|
47
|
+
explanation?: string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* The choice (e.g. the label and score mapping) of a classification based evaluation
|
|
52
|
+
*/
|
|
53
|
+
export interface ClassificationChoice {
|
|
54
|
+
label: string;
|
|
55
|
+
score: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* A mapping of labels to scores
|
|
60
|
+
*/
|
|
61
|
+
export type ClassificationChoicesMap = Record<string, number>;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* The arguments for creating a classification-based evaluator
|
|
65
|
+
*/
|
|
66
|
+
export interface CreateClassifierArgs {
|
|
67
|
+
/**
|
|
68
|
+
* The LLM to use for classification / evaluation
|
|
69
|
+
*/
|
|
70
|
+
model: LanguageModel;
|
|
71
|
+
/**
|
|
72
|
+
* The choices to classify the example into.
|
|
73
|
+
* e.g. { "correct": 1, "incorrect": 0 }
|
|
74
|
+
*/
|
|
75
|
+
choices: ClassificationChoicesMap;
|
|
76
|
+
/**
|
|
77
|
+
* The prompt template to use for classification
|
|
78
|
+
*/
|
|
79
|
+
promptTemplate: string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export type EvaluatorFn<OutputType, InputType> = (
|
|
83
|
+
args: EvaluationArgs<OutputType, InputType>
|
|
84
|
+
) => Promise<EvaluationResult>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { CoreMessage, Message } from "ai";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Prompt part of the AI function options.
|
|
5
|
+
* It contains a system message, a simple text prompt, or a list of messages.
|
|
6
|
+
* Note: this is pulled from the `ai` package and is used as a compatibility type.
|
|
7
|
+
*/
|
|
8
|
+
export interface WithPrompt {
|
|
9
|
+
/**
|
|
10
|
+
* System message to include in the prompt. Can be used with `prompt` or `messages`.
|
|
11
|
+
*/
|
|
12
|
+
system?: string;
|
|
13
|
+
/**
|
|
14
|
+
* A simple text prompt. You can either use `prompt` or `messages` but not both.
|
|
15
|
+
*/
|
|
16
|
+
prompt?: string;
|
|
17
|
+
/**
|
|
18
|
+
* A list of messages. You can either use `prompt` or `messages` but not both.
|
|
19
|
+
*/
|
|
20
|
+
messages?: Array<CoreMessage> | Array<Omit<Message, "id">>;
|
|
21
|
+
}
|