@arizeai/phoenix-evals 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +6 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +1 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +31 -0
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +1 -0
- package/dist/esm/default_templates/index.d.ts +2 -0
- package/dist/esm/default_templates/index.d.ts.map +1 -0
- package/dist/esm/default_templates/index.js +2 -0
- package/dist/esm/default_templates/index.js.map +1 -0
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +1 -1
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/createClassifier.d.ts +6 -0
- package/dist/esm/llm/createClassifier.d.ts.map +1 -0
- package/dist/esm/llm/createClassifier.js +40 -0
- package/dist/esm/llm/createClassifier.js.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +14 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.js +18 -0
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -0
- package/dist/esm/llm/generateClassification.d.ts +22 -0
- package/dist/esm/llm/generateClassification.d.ts.map +1 -0
- package/dist/esm/llm/generateClassification.js +23 -0
- package/dist/esm/llm/generateClassification.js.map +1 -0
- package/dist/esm/llm/index.d.ts +4 -0
- package/dist/esm/llm/index.d.ts.map +1 -0
- package/dist/esm/llm/index.js +4 -0
- package/dist/esm/llm/index.js.map +1 -0
- package/dist/esm/template/applyTemplate.d.ts +10 -0
- package/dist/esm/template/applyTemplate.d.ts.map +1 -0
- package/dist/esm/template/applyTemplate.js +10 -0
- package/dist/esm/template/applyTemplate.js.map +1 -0
- package/dist/esm/template/index.d.ts +2 -0
- package/dist/esm/template/index.d.ts.map +1 -0
- package/dist/esm/template/index.js +2 -0
- package/dist/esm/template/index.js.map +1 -0
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/evals.d.ts +73 -0
- package/dist/esm/types/evals.d.ts.map +1 -0
- package/dist/esm/types/evals.js +2 -0
- package/dist/esm/types/evals.js.map +1 -0
- package/dist/esm/types/index.d.ts +3 -0
- package/dist/esm/types/index.d.ts.map +1 -0
- package/dist/esm/types/index.js +3 -0
- package/dist/esm/types/index.js.map +1 -0
- package/dist/esm/types/prompts.d.ts +21 -0
- package/dist/esm/types/prompts.d.ts.map +1 -0
- package/dist/esm/types/prompts.js +2 -0
- package/dist/esm/types/prompts.js.map +1 -0
- package/dist/esm/types/templating.d.ts +3 -0
- package/dist/esm/types/templating.d.ts.map +1 -0
- package/dist/esm/types/templating.js +2 -0
- package/dist/esm/types/templating.js.map +1 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +6 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +1 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +34 -0
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +1 -0
- package/dist/src/default_templates/index.d.ts +2 -0
- package/dist/src/default_templates/index.d.ts.map +1 -0
- package/dist/src/default_templates/index.js +18 -0
- package/dist/src/default_templates/index.js.map +1 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +16 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/createClassifier.d.ts +6 -0
- package/dist/src/llm/createClassifier.d.ts.map +1 -0
- package/dist/src/llm/createClassifier.js +47 -0
- package/dist/src/llm/createClassifier.js.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts +14 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.js +18 -0
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -0
- package/dist/src/llm/generateClassification.d.ts +22 -0
- package/dist/src/llm/generateClassification.d.ts.map +1 -0
- package/dist/src/llm/generateClassification.js +44 -0
- package/dist/src/llm/generateClassification.js.map +1 -0
- package/dist/src/llm/index.d.ts +4 -0
- package/dist/src/llm/index.d.ts.map +1 -0
- package/dist/src/llm/index.js +20 -0
- package/dist/src/llm/index.js.map +1 -0
- package/dist/src/template/applyTemplate.d.ts +10 -0
- package/dist/src/template/applyTemplate.d.ts.map +1 -0
- package/dist/src/template/applyTemplate.js +16 -0
- package/dist/src/template/applyTemplate.js.map +1 -0
- package/dist/src/template/index.d.ts +2 -0
- package/dist/src/template/index.d.ts.map +1 -0
- package/dist/src/template/index.js +18 -0
- package/dist/src/template/index.js.map +1 -0
- package/dist/src/types/evals.d.ts +73 -0
- package/dist/src/types/evals.d.ts.map +1 -0
- package/dist/src/types/evals.js +3 -0
- package/dist/src/types/evals.js.map +1 -0
- package/dist/src/types/index.d.ts +3 -0
- package/dist/src/types/index.d.ts.map +1 -0
- package/dist/src/types/index.js +19 -0
- package/dist/src/types/index.js.map +1 -0
- package/dist/src/types/prompts.d.ts +21 -0
- package/dist/src/types/prompts.d.ts.map +1 -0
- package/dist/src/types/prompts.js +3 -0
- package/dist/src/types/prompts.js.map +1 -0
- package/dist/src/types/templating.d.ts +3 -0
- package/dist/src/types/templating.d.ts.map +1 -0
- package/dist/src/types/templating.js +3 -0
- package/dist/src/types/templating.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +42 -5
- package/src/default_templates/HALLUCINATION_TEMPLATE.ts +31 -0
- package/src/default_templates/index.ts +1 -0
- package/src/index.ts +1 -0
- package/src/llm/createClassifier.ts +59 -0
- package/src/llm/createHallucinationEvaluator.ts +32 -0
- package/src/llm/generateClassification.ts +42 -0
- package/src/llm/index.ts +3 -0
- package/src/template/applyTemplate.ts +14 -0
- package/src/template/index.ts +1 -0
- package/src/types/evals.ts +84 -0
- package/src/types/index.ts +2 -0
- package/src/types/prompts.ts +21 -0
- package/src/types/templating.ts +2 -0
package/README.md
CHANGED
|
@@ -22,3 +22,59 @@ This package provides a TypeScript evaluation library. It is vendor agnostic and
|
|
|
22
22
|
# or yarn, pnpm, bun, etc...
|
|
23
23
|
npm install @arizeai/phoenix-evals
|
|
24
24
|
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
### Creating a Classifier
|
|
29
|
+
|
|
30
|
+
The library provides a `createClassifier` function that allows you to create custom evaluators for different tasks like hallucination detection, relevance scoring, or any binary/multi-class classification.
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { createClassifier } from "@arizeai/phoenix-evals/llm";
|
|
34
|
+
import { openai } from "@ai-sdk/openai";
|
|
35
|
+
|
|
36
|
+
const model = openai("gpt-4o-mini");
|
|
37
|
+
|
|
38
|
+
const promptTemplate = `
|
|
39
|
+
In this task, you will be presented with a query, a reference text and an answer. The answer is
|
|
40
|
+
generated to the question based on the reference text. The answer may contain false information. You
|
|
41
|
+
must use the reference text to determine if the answer to the question contains false information,
|
|
42
|
+
if the answer is a hallucination of facts. Your objective is to determine whether the answer text
|
|
43
|
+
contains factual information and is not a hallucination. A 'hallucination' refers to
|
|
44
|
+
an answer that is not based on the reference text or assumes information that is not available in
|
|
45
|
+
the reference text. Your response should be a single word: either "factual" or "hallucinated", and
|
|
46
|
+
it should not include any other text or characters.
|
|
47
|
+
|
|
48
|
+
[BEGIN DATA]
|
|
49
|
+
************
|
|
50
|
+
[Query]: {{input}}
|
|
51
|
+
************
|
|
52
|
+
[Reference text]: {{reference}}
|
|
53
|
+
************
|
|
54
|
+
[Answer]: {{output}}
|
|
55
|
+
************
|
|
56
|
+
[END DATA]
|
|
57
|
+
|
|
58
|
+
Is the answer above factual or hallucinated based on the query and reference text?
|
|
59
|
+
`;
|
|
60
|
+
|
|
61
|
+
// Create the classifier
|
|
62
|
+
const evaluator = await createClassifier({
|
|
63
|
+
model,
|
|
64
|
+
choices: { factual: 1, hallucinated: 0 },
|
|
65
|
+
promptTemplate: promptTemplate,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Use the classifier
|
|
69
|
+
const result = await evaluator({
|
|
70
|
+
output: "Arize is not open source.",
|
|
71
|
+
input: "Is Arize Phoenix Open Source?",
|
|
72
|
+
reference:
|
|
73
|
+
"Arize Phoenix is a platform for building and deploying AI applications. It is open source.",
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
console.log(result);
|
|
77
|
+
// Output: { label: "hallucinated", score: 0 }
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
See the complete example in [`examples/classifier_example.ts`](examples/classifier_example.ts).
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare const HALLUCINATION_TEMPLATE = "\nIn this task, you will be presented with a query, a reference text and an answer. The answer is\ngenerated to the question based on the reference text. The answer may contain false information. You\nmust use the reference text to determine if the answer to the question contains false information,\nif the answer is a hallucination of facts. Your objective is to determine whether the answer text\ncontains factual information and is not a hallucination. A 'hallucination' refers to\nan answer that is not based on the reference text or assumes information that is not available in\nthe reference text. Your response should be a single word: either \"factual\" or \"hallucinated\", and\nit should not include any other text or characters. \"hallucinated\" indicates that the answer\nprovides factually inaccurate information to the query based on the reference text. \"factual\"\nindicates that the answer to the question is correct relative to the reference text, and does not\ncontain made up information. Please read the query and reference text carefully before determining\nyour response.\n\n [BEGIN DATA]\n ************\n [Query]: {{input}}\n ************\n [Reference text]: {{reference}}\n ************\n [Answer]: {{output}}\n ************\n [END DATA]\n\nIs the answer above factual or hallucinated based on the query and reference text?\n";
|
|
2
|
+
export declare const HALLUCINATION_CHOICES: {
|
|
3
|
+
factual: number;
|
|
4
|
+
hallucinated: number;
|
|
5
|
+
};
|
|
6
|
+
//# sourceMappingURL=HALLUCINATION_TEMPLATE.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HALLUCINATION_TEMPLATE.d.ts","sourceRoot":"","sources":["../../../src/default_templates/HALLUCINATION_TEMPLATE.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,sBAAsB,u2CAyBlC,CAAC;AAEF,eAAO,MAAM,qBAAqB;;;CAGjC,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
export const HALLUCINATION_TEMPLATE = `
|
|
2
|
+
In this task, you will be presented with a query, a reference text and an answer. The answer is
|
|
3
|
+
generated to the question based on the reference text. The answer may contain false information. You
|
|
4
|
+
must use the reference text to determine if the answer to the question contains false information,
|
|
5
|
+
if the answer is a hallucination of facts. Your objective is to determine whether the answer text
|
|
6
|
+
contains factual information and is not a hallucination. A 'hallucination' refers to
|
|
7
|
+
an answer that is not based on the reference text or assumes information that is not available in
|
|
8
|
+
the reference text. Your response should be a single word: either "factual" or "hallucinated", and
|
|
9
|
+
it should not include any other text or characters. "hallucinated" indicates that the answer
|
|
10
|
+
provides factually inaccurate information to the query based on the reference text. "factual"
|
|
11
|
+
indicates that the answer to the question is correct relative to the reference text, and does not
|
|
12
|
+
contain made up information. Please read the query and reference text carefully before determining
|
|
13
|
+
your response.
|
|
14
|
+
|
|
15
|
+
[BEGIN DATA]
|
|
16
|
+
************
|
|
17
|
+
[Query]: {{input}}
|
|
18
|
+
************
|
|
19
|
+
[Reference text]: {{reference}}
|
|
20
|
+
************
|
|
21
|
+
[Answer]: {{output}}
|
|
22
|
+
************
|
|
23
|
+
[END DATA]
|
|
24
|
+
|
|
25
|
+
Is the answer above factual or hallucinated based on the query and reference text?
|
|
26
|
+
`;
|
|
27
|
+
export const HALLUCINATION_CHOICES = {
|
|
28
|
+
factual: 1,
|
|
29
|
+
hallucinated: 0,
|
|
30
|
+
};
|
|
31
|
+
//# sourceMappingURL=HALLUCINATION_TEMPLATE.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HALLUCINATION_TEMPLATE.js","sourceRoot":"","sources":["../../../src/default_templates/HALLUCINATION_TEMPLATE.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,sBAAsB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;CAyBrC,CAAC;AAEF,MAAM,CAAC,MAAM,qBAAqB,GAAG;IACnC,OAAO,EAAE,CAAC;IACV,YAAY,EAAE,CAAC;CAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/default_templates/index.ts"],"names":[],"mappings":"AAAA,cAAc,0BAA0B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/default_templates/index.ts"],"names":[],"mappings":"AAAA,cAAc,0BAA0B,CAAC"}
|
package/dist/esm/index.d.ts
CHANGED
package/dist/esm/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":""}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC"}
|
package/dist/esm/index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
export * from "./llm/index.js";
|
|
2
2
|
//# sourceMappingURL=index.js.map
|
package/dist/esm/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":""}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { CreateClassifierArgs, EvaluatorFn } from "../types/evals.js";
|
|
2
|
+
/**
|
|
3
|
+
* A function that serves as a factory that will output a classification evaluator
|
|
4
|
+
*/
|
|
5
|
+
export declare function createClassifier<OutputType, InputType>(args: CreateClassifierArgs): EvaluatorFn<OutputType, InputType>;
|
|
6
|
+
//# sourceMappingURL=createClassifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createClassifier.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,oBAAoB,EACpB,WAAW,EACZ,MAAM,gBAAgB,CAAC;AAkBxB;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,SAAS,EACpD,IAAI,EAAE,oBAAoB,GACzB,WAAW,CAAC,UAAU,EAAE,SAAS,CAAC,CA6BpC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { generateClassification } from "./generateClassification.js";
|
|
2
|
+
import { formatTemplate } from "../template/index.js";
|
|
3
|
+
/**
|
|
4
|
+
* Convert a mapping of choices to labels
|
|
5
|
+
* Asserts that the choices are valid
|
|
6
|
+
*/
|
|
7
|
+
function choicesToLabels(choices) {
|
|
8
|
+
const labels = Object.keys(choices);
|
|
9
|
+
if (labels.length < 1) {
|
|
10
|
+
throw new Error("No choices provided");
|
|
11
|
+
}
|
|
12
|
+
return labels;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* A function that serves as a factory that will output a classification evaluator
|
|
16
|
+
*/
|
|
17
|
+
export function createClassifier(args) {
|
|
18
|
+
const { model, choices, promptTemplate } = args;
|
|
19
|
+
return async (args) => {
|
|
20
|
+
const templateVariables = {
|
|
21
|
+
...args,
|
|
22
|
+
};
|
|
23
|
+
const prompt = formatTemplate({
|
|
24
|
+
template: promptTemplate,
|
|
25
|
+
variables: templateVariables,
|
|
26
|
+
});
|
|
27
|
+
const classification = await generateClassification({
|
|
28
|
+
model,
|
|
29
|
+
labels: choicesToLabels(choices),
|
|
30
|
+
prompt,
|
|
31
|
+
});
|
|
32
|
+
// Post-process the classification result and map it to the choices
|
|
33
|
+
const score = choices[classification.label];
|
|
34
|
+
return {
|
|
35
|
+
score,
|
|
36
|
+
...classification,
|
|
37
|
+
};
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=createClassifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createClassifier.js","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE7C;;;GAGG;AACH,SAAS,eAAe,CACtB,OAAiC;IAEjC,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,MAA+B,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC9B,IAA0B;IAE1B,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,GAAG,IAAI,CAAC;IAEhD,OAAO,KAAK,EACV,IAA2C,EAChB,EAAE;QAC7B,MAAM,iBAAiB,GAAG;YACxB,GAAG,IAAI;SACR,CAAC;QAEF,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,QAAQ,EAAE,cAAc;YACxB,SAAS,EAAE,iBAAiB;SAC7B,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,MAAM,sBAAsB,CAAC;YAClD,KAAK;YACL,MAAM,EAAE,eAAe,CAAC,OAAO,CAAC;YAChC,MAAM;SACP,CAAC,CAAC;QAEH,mEAAmE;QACnE,MAAM,KAAK,GAAG,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAE5C,OAAO;YACL,KAAK;YACL,GAAG,cAAc;SAClB,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { CreateClassifierArgs, EvaluatorFn } from "../types/evals.js";
|
|
2
|
+
interface HallucinationEvaluatorArgs extends Omit<CreateClassifierArgs, "promptTemplate" | "choices"> {
|
|
3
|
+
choices?: CreateClassifierArgs["choices"];
|
|
4
|
+
promptTemplate?: CreateClassifierArgs["promptTemplate"];
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Creates a function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
8
|
+
*
|
|
9
|
+
* @param args - The arguments for creating the hallucination evaluator.
|
|
10
|
+
* @returns A function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
11
|
+
*/
|
|
12
|
+
export declare function createHallucinationEvaluator(args: HallucinationEvaluatorArgs): EvaluatorFn<string, string>;
|
|
13
|
+
export {};
|
|
14
|
+
//# sourceMappingURL=createHallucinationEvaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createHallucinationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAMnE,UAAU,0BACR,SAAQ,IAAI,CAAC,oBAAoB,EAAE,gBAAgB,GAAG,SAAS,CAAC;IAChE,OAAO,CAAC,EAAE,oBAAoB,CAAC,SAAS,CAAC,CAAC;IAC1C,cAAc,CAAC,EAAE,oBAAoB,CAAC,gBAAgB,CAAC,CAAC;CACzD;AACD;;;;;GAKG;AACH,wBAAgB,4BAA4B,CAC1C,IAAI,EAAE,0BAA0B,GAC/B,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAW7B"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { createClassifier } from "./createClassifier.js";
|
|
2
|
+
import { HALLUCINATION_TEMPLATE, HALLUCINATION_CHOICES, } from "../default_templates/HALLUCINATION_TEMPLATE.js";
|
|
3
|
+
/**
|
|
4
|
+
* Creates a function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
5
|
+
*
|
|
6
|
+
* @param args - The arguments for creating the hallucination evaluator.
|
|
7
|
+
* @returns A function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
8
|
+
*/
|
|
9
|
+
export function createHallucinationEvaluator(args) {
|
|
10
|
+
const { choices = HALLUCINATION_CHOICES, promptTemplate = HALLUCINATION_TEMPLATE, } = args;
|
|
11
|
+
const hallucinationEvaluatorFn = createClassifier({
|
|
12
|
+
...args,
|
|
13
|
+
promptTemplate,
|
|
14
|
+
choices,
|
|
15
|
+
});
|
|
16
|
+
return hallucinationEvaluatorFn;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=createHallucinationEvaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createHallucinationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAEtD,OAAO,EACL,sBAAsB,EACtB,qBAAqB,GACtB,MAAM,6CAA6C,CAAC;AAOrD;;;;;GAKG;AACH,MAAM,UAAU,4BAA4B,CAC1C,IAAgC;IAEhC,MAAM,EACJ,OAAO,GAAG,qBAAqB,EAC/B,cAAc,GAAG,sBAAsB,GACxC,GAAG,IAAI,CAAC;IACT,MAAM,wBAAwB,GAAG,gBAAgB,CAAiB;QAChE,GAAG,IAAI;QACP,cAAc;QACd,OAAO;KACR,CAAC,CAAC;IACH,OAAO,wBAAwB,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { ClassificationResult, WithLLM } from "../types/evals.js";
|
|
2
|
+
import type { WithPrompt } from "../types/prompts.js";
|
|
3
|
+
interface ClassifyArgs extends WithLLM, WithPrompt {
|
|
4
|
+
/**
|
|
5
|
+
* The labels to classify the example into. E.x. ["correct", "incorrect"]
|
|
6
|
+
*/
|
|
7
|
+
labels: [string, ...string[]];
|
|
8
|
+
/**
|
|
9
|
+
* The name of the schema for generating the label and explanation.
|
|
10
|
+
*/
|
|
11
|
+
schemaName?: string;
|
|
12
|
+
/**
|
|
13
|
+
* The description of the schema for generating the label and explanation.
|
|
14
|
+
*/
|
|
15
|
+
schemaDescription?: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* A function that leverages an llm to perform a classification
|
|
19
|
+
*/
|
|
20
|
+
export declare function generateClassification(args: ClassifyArgs): Promise<ClassificationResult>;
|
|
21
|
+
export {};
|
|
22
|
+
//# sourceMappingURL=generateClassification.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generateClassification.d.ts","sourceRoot":"","sources":["../../../src/llm/generateClassification.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAInD,UAAU,YAAa,SAAQ,OAAO,EAAE,UAAU;IAChD;;OAEG;IACH,MAAM,EAAE,CAAC,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;IAC9B;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AACD;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,YAAY,GACjB,OAAO,CAAC,oBAAoB,CAAC,CAiB/B"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { generateObject } from "ai";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
/**
|
|
4
|
+
* A function that leverages an llm to perform a classification
|
|
5
|
+
*/
|
|
6
|
+
export async function generateClassification(args) {
|
|
7
|
+
const { labels, model, schemaName, schemaDescription, ...prompt } = args;
|
|
8
|
+
const result = await generateObject({
|
|
9
|
+
model,
|
|
10
|
+
schemaName,
|
|
11
|
+
schemaDescription,
|
|
12
|
+
schema: z.object({
|
|
13
|
+
explanation: z.string(), // We place the explanation in hopes it uses reasoning to explain the label.
|
|
14
|
+
label: z.enum(labels),
|
|
15
|
+
}),
|
|
16
|
+
...prompt,
|
|
17
|
+
});
|
|
18
|
+
return {
|
|
19
|
+
label: result.object.label,
|
|
20
|
+
explanation: result.object.explanation,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=generateClassification.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generateClassification.js","sourceRoot":"","sources":["../../../src/llm/generateClassification.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAgBxB;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,IAAkB;IAElB,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,iBAAiB,EAAE,GAAG,MAAM,EAAE,GAAG,IAAI,CAAC;IAEzE,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC;QAClC,KAAK;QACL,UAAU;QACV,iBAAiB;QACjB,MAAM,EAAE,CAAC,CAAC,MAAM,CAAC;YACf,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,EAAE,4EAA4E;YACrG,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;SACtB,CAAC;QACF,GAAG,MAAM;KACV,CAAC,CAAC;IACH,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK;QAC1B,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,WAAW;KACvC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/llm/index.ts"],"names":[],"mappings":"AAAA,cAAc,0BAA0B,CAAC;AACzC,cAAc,oBAAoB,CAAC;AACnC,cAAc,gCAAgC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/llm/index.ts"],"names":[],"mappings":"AAAA,cAAc,0BAA0B,CAAC;AACzC,cAAc,oBAAoB,CAAC;AACnC,cAAc,gCAAgC,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { Template } from "../types/templating.js";
|
|
2
|
+
/**
|
|
3
|
+
* A function that applies a set of variables to a template (e.g. a prompt)
|
|
4
|
+
* Uses the Mustache library to apply the variables to the template
|
|
5
|
+
*/
|
|
6
|
+
export declare function formatTemplate(args: {
|
|
7
|
+
template: Template;
|
|
8
|
+
variables: Record<string, unknown>;
|
|
9
|
+
}): string;
|
|
10
|
+
//# sourceMappingURL=applyTemplate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"applyTemplate.d.ts","sourceRoot":"","sources":["../../../src/template/applyTemplate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAG/C;;;GAGG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE;IACnC,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC,UAGA"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import Mustache from "mustache";
|
|
2
|
+
/**
|
|
3
|
+
* A function that applies a set of variables to a template (e.g. a prompt)
|
|
4
|
+
* Uses the Mustache library to apply the variables to the template
|
|
5
|
+
*/
|
|
6
|
+
export function formatTemplate(args) {
|
|
7
|
+
const { template, variables } = args;
|
|
8
|
+
return Mustache.render(template, variables);
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=applyTemplate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"applyTemplate.js","sourceRoot":"","sources":["../../../src/template/applyTemplate.ts"],"names":[],"mappings":"AACA,OAAO,QAAQ,MAAM,UAAU,CAAC;AAEhC;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,IAG9B;IACC,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC;IACrC,OAAO,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;AAC9C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/template/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/template/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC"}
|