@arizeai/phoenix-evals 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -22,3 +22,59 @@ This package provides a TypeScript evaluation library. It is vendor agnostic and
|
|
|
22
22
|
# or yarn, pnpm, bun, etc...
|
|
23
23
|
npm install @arizeai/phoenix-evals
|
|
24
24
|
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
### Creating a Classifier
|
|
29
|
+
|
|
30
|
+
The library provides a `createClassifier` function that allows you to create custom evaluators for different tasks like hallucination detection, relevance scoring, or any binary/multi-class classification.
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { createClassifier } from "@arizeai/phoenix-evals/llm";
|
|
34
|
+
import { openai } from "@ai-sdk/openai";
|
|
35
|
+
|
|
36
|
+
const model = openai("gpt-4o-mini");
|
|
37
|
+
|
|
38
|
+
const promptTemplate = `
|
|
39
|
+
In this task, you will be presented with a query, a reference text and an answer. The answer is
|
|
40
|
+
generated to the question based on the reference text. The answer may contain false information. You
|
|
41
|
+
must use the reference text to determine if the answer to the question contains false information,
|
|
42
|
+
if the answer is a hallucination of facts. Your objective is to determine whether the answer text
|
|
43
|
+
contains factual information and is not a hallucination. A 'hallucination' refers to
|
|
44
|
+
an answer that is not based on the reference text or assumes information that is not available in
|
|
45
|
+
the reference text. Your response should be a single word: either "factual" or "hallucinated", and
|
|
46
|
+
it should not include any other text or characters.
|
|
47
|
+
|
|
48
|
+
[BEGIN DATA]
|
|
49
|
+
************
|
|
50
|
+
[Query]: {{input}}
|
|
51
|
+
************
|
|
52
|
+
[Reference text]: {{reference}}
|
|
53
|
+
************
|
|
54
|
+
[Answer]: {{output}}
|
|
55
|
+
************
|
|
56
|
+
[END DATA]
|
|
57
|
+
|
|
58
|
+
Is the answer above factual or hallucinated based on the query and reference text?
|
|
59
|
+
`;
|
|
60
|
+
|
|
61
|
+
// Create the classifier
|
|
62
|
+
const evaluator = await createClassifier({
|
|
63
|
+
model,
|
|
64
|
+
choices: { factual: 1, hallucinated: 0 },
|
|
65
|
+
promptTemplate: promptTemplate,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Use the classifier
|
|
69
|
+
const result = await evaluator({
|
|
70
|
+
output: "Arize is not open source.",
|
|
71
|
+
input: "Is Arize Phoenix Open Source?",
|
|
72
|
+
reference:
|
|
73
|
+
"Arize Phoenix is a platform for building and deploying AI applications. It is open source.",
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
console.log(result);
|
|
77
|
+
// Output: { label: "hallucinated", score: 0 }
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
See the complete example in [`examples/classifier_example.ts`](examples/classifier_example.ts).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arizeai/phoenix-evals",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.3",
|
|
4
4
|
"description": "A library for running evaluations for AI use cases",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
"typedoc": "^0.27.9",
|
|
50
50
|
"vitest": "^2.1.9",
|
|
51
51
|
"typescript": "^5.8.2",
|
|
52
|
-
"@arizeai/phoenix-client": "2.3.
|
|
52
|
+
"@arizeai/phoenix-client": "2.3.2"
|
|
53
53
|
},
|
|
54
54
|
"engines": {
|
|
55
55
|
"node": ">=18"
|