@arizeai/phoenix-evals 0.5.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +37 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +43 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/esm/__generated__/default_templates/index.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/index.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/index.js +4 -0
- package/dist/esm/__generated__/default_templates/index.js.map +1 -0
- package/dist/esm/__generated__/types.d.ts +9 -0
- package/dist/esm/__generated__/types.d.ts.map +1 -0
- package/dist/esm/__generated__/types.js +3 -0
- package/dist/esm/__generated__/types.js.map +1 -0
- package/dist/esm/core/EvaluatorBase.d.ts +19 -0
- package/dist/esm/core/EvaluatorBase.d.ts.map +1 -0
- package/dist/esm/core/EvaluatorBase.js +18 -0
- package/dist/esm/core/EvaluatorBase.js.map +1 -0
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +1 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.d.ts +19 -2
- package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.js +29 -2
- package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/LLMEvaluator.d.ts +4 -7
- package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/LLMEvaluator.js +4 -7
- package/dist/esm/llm/LLMEvaluator.js.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts +44 -0
- package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -0
- package/dist/esm/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +11 -12
- package/dist/esm/llm/createDocumentRelevanceEvaluator.js.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +6 -7
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js +3 -4
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/esm/llm/index.d.ts +1 -1
- package/dist/esm/llm/index.js +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/data.d.ts +194 -0
- package/dist/esm/types/data.d.ts.map +1 -0
- package/dist/esm/types/data.js +2 -0
- package/dist/esm/types/data.js.map +1 -0
- package/dist/esm/types/evals.d.ts +17 -3
- package/dist/esm/types/evals.d.ts.map +1 -1
- package/dist/esm/types/index.d.ts +1 -0
- package/dist/esm/types/index.d.ts.map +1 -1
- package/dist/esm/types/index.js +1 -0
- package/dist/esm/types/index.js.map +1 -1
- package/dist/esm/types/otel.d.ts +14 -13
- package/dist/esm/types/otel.d.ts.map +1 -1
- package/dist/esm/utils/bindEvaluator.d.ts +219 -0
- package/dist/esm/utils/bindEvaluator.d.ts.map +1 -0
- package/dist/esm/utils/bindEvaluator.js +163 -0
- package/dist/esm/utils/bindEvaluator.js.map +1 -0
- package/dist/esm/utils/index.d.ts +2 -0
- package/dist/esm/utils/index.d.ts.map +1 -0
- package/dist/esm/utils/index.js +2 -0
- package/dist/esm/utils/index.js.map +1 -0
- package/dist/esm/utils/objectMappingUtils.d.ts +166 -0
- package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -0
- package/dist/esm/utils/objectMappingUtils.js +191 -0
- package/dist/esm/utils/objectMappingUtils.js.map +1 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +40 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +46 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/src/__generated__/default_templates/index.d.ts +3 -0
- package/dist/src/__generated__/default_templates/index.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/index.js +9 -0
- package/dist/src/__generated__/default_templates/index.js.map +1 -0
- package/dist/src/__generated__/types.d.ts +9 -0
- package/dist/src/__generated__/types.d.ts.map +1 -0
- package/dist/src/__generated__/types.js +4 -0
- package/dist/src/__generated__/types.js.map +1 -0
- package/dist/src/core/EvaluatorBase.d.ts +19 -0
- package/dist/src/core/EvaluatorBase.d.ts.map +1 -0
- package/dist/src/core/EvaluatorBase.js +17 -0
- package/dist/src/core/EvaluatorBase.js.map +1 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.d.ts +19 -2
- package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.js +13 -2
- package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/LLMEvaluator.d.ts +4 -7
- package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/src/llm/LLMEvaluator.js +16 -5
- package/dist/src/llm/LLMEvaluator.js.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts +44 -0
- package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -0
- package/dist/src/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +13 -13
- package/dist/src/llm/createDocumentRelevanceEvaluator.js.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts +6 -7
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.js +4 -4
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/src/llm/index.d.ts +1 -1
- package/dist/src/llm/index.js +1 -1
- package/dist/src/types/data.d.ts +194 -0
- package/dist/src/types/data.d.ts.map +1 -0
- package/dist/src/types/data.js +3 -0
- package/dist/src/types/data.js.map +1 -0
- package/dist/src/types/evals.d.ts +17 -3
- package/dist/src/types/evals.d.ts.map +1 -1
- package/dist/src/types/index.d.ts +1 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/types/otel.d.ts +14 -13
- package/dist/src/types/otel.d.ts.map +1 -1
- package/dist/src/utils/bindEvaluator.d.ts +219 -0
- package/dist/src/utils/bindEvaluator.d.ts.map +1 -0
- package/dist/src/utils/bindEvaluator.js +166 -0
- package/dist/src/utils/bindEvaluator.js.map +1 -0
- package/dist/src/utils/index.d.ts +2 -0
- package/dist/src/utils/index.d.ts.map +1 -0
- package/dist/src/{default_templates → utils}/index.js +1 -2
- package/dist/src/utils/index.js.map +1 -0
- package/dist/src/utils/objectMappingUtils.d.ts +166 -0
- package/dist/src/utils/objectMappingUtils.d.ts.map +1 -0
- package/dist/src/utils/objectMappingUtils.js +191 -0
- package/dist/src/utils/objectMappingUtils.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -1
- package/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts +39 -0
- package/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +45 -0
- package/src/__generated__/default_templates/index.ts +4 -0
- package/src/__generated__/types.ts +11 -0
- package/src/core/EvaluatorBase.ts +43 -0
- package/src/index.ts +1 -0
- package/src/llm/ClassificationEvaluator.ts +39 -3
- package/src/llm/LLMEvaluator.ts +7 -16
- package/src/llm/createClassificationEvaluator.ts +1 -1
- package/src/llm/createDocumentRelevanceEvaluator.ts +79 -0
- package/src/llm/createHallucinationEvaluator.ts +17 -19
- package/src/llm/index.ts +1 -1
- package/src/types/data.ts +200 -0
- package/src/types/evals.ts +25 -5
- package/src/types/index.ts +1 -0
- package/src/types/otel.ts +15 -13
- package/src/utils/bindEvaluator.ts +229 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/objectMappingUtils.ts +202 -0
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -25
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +0 -31
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
- package/dist/esm/default_templates/index.d.ts +0 -3
- package/dist/esm/default_templates/index.d.ts.map +0 -1
- package/dist/esm/default_templates/index.js +0 -3
- package/dist/esm/default_templates/index.js.map +0 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts +0 -43
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +0 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +0 -1
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -28
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +0 -34
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
- package/dist/src/default_templates/index.d.ts +0 -3
- package/dist/src/default_templates/index.d.ts.map +0 -1
- package/dist/src/default_templates/index.js.map +0 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts +0 -43
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +0 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +0 -1
- package/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.ts +0 -25
- package/src/default_templates/HALLUCINATION_TEMPLATE.ts +0 -31
- package/src/default_templates/index.ts +0 -2
- package/src/llm/createDocumentRelevancyEvaluator.ts +0 -77
package/README.md
CHANGED
|
@@ -111,6 +111,48 @@ console.log(result);
|
|
|
111
111
|
// Output: { label: "hallucinated", score: 0, explanation: "..." }
|
|
112
112
|
```
|
|
113
113
|
|
|
114
|
+
### Data Mapping
|
|
115
|
+
|
|
116
|
+
When your data structure doesn't match what an evaluator expects, use `bindEvaluator` to map your fields to the evaluator's expected input format:
|
|
117
|
+
|
|
118
|
+
```typescript
|
|
119
|
+
import {
|
|
120
|
+
bindEvaluator,
|
|
121
|
+
createHallucinationEvaluator,
|
|
122
|
+
} from "@arizeai/phoenix-evals";
|
|
123
|
+
import { openai } from "@ai-sdk/openai";
|
|
124
|
+
|
|
125
|
+
const model = openai("gpt-4o-mini");
|
|
126
|
+
|
|
127
|
+
type ExampleType = {
|
|
128
|
+
question: string;
|
|
129
|
+
context: string;
|
|
130
|
+
answer: string;
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const evaluator = bindEvaluator<ExampleType>(
|
|
134
|
+
createHallucinationEvaluator({ model }),
|
|
135
|
+
{
|
|
136
|
+
inputMapping: {
|
|
137
|
+
input: "question", // Map "input" from "question"
|
|
138
|
+
reference: "context", // Map "reference" from "context"
|
|
139
|
+
output: "answer", // Map "output" from "answer"
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
const result = await evaluator.evaluate({
|
|
145
|
+
question: "Is Arize Phoenix Open Source?",
|
|
146
|
+
context:
|
|
147
|
+
"Arize Phoenix is a platform for building and deploying AI applications. It is open source.",
|
|
148
|
+
answer: "Arize is not open source.",
|
|
149
|
+
});
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Mapping supports simple properties (`"fieldName"`), dot notation (`"user.profile.name"`), array access (`"items[0].id"`), JSONPath expressions (`"$.items[*].id"`), and function extractors (`(data) => data.customField`).
|
|
153
|
+
|
|
154
|
+
See the complete example in [`examples/bind_evaluator_example.ts`](examples/bind_evaluator_example.ts).
|
|
155
|
+
|
|
114
156
|
## Experimentation with Phoenix
|
|
115
157
|
|
|
116
158
|
This package works seamlessly with [`@arizeai/phoenix-client`](https://www.npmjs.com/package/@arizeai/phoenix-client) to enable experimentation workflows. You can create datasets, run experiments, and trace evaluation calls for analysis and debugging.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,UAAU,CAAC;AAE9D,eAAO,MAAM,kDAAkD,EAAE,6BAkChE,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// This file is generated. Do not edit by hand.
|
|
2
|
+
export const DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG = {
|
|
3
|
+
name: "document_relevance",
|
|
4
|
+
description: "A specialized evaluator for determining document relevance to a given question.",
|
|
5
|
+
optimizationDirection: "MAXIMIZE",
|
|
6
|
+
template: [
|
|
7
|
+
{
|
|
8
|
+
role: "user",
|
|
9
|
+
content: `
|
|
10
|
+
You are comparing a document to a question and trying to determine
|
|
11
|
+
if the document text contains information relevant to answering the
|
|
12
|
+
question. Here is the data:
|
|
13
|
+
|
|
14
|
+
[BEGIN DATA]
|
|
15
|
+
************
|
|
16
|
+
[Question]: {{input}}
|
|
17
|
+
************
|
|
18
|
+
[Document text]: {{documentText}}
|
|
19
|
+
************
|
|
20
|
+
[END DATA]
|
|
21
|
+
|
|
22
|
+
Compare the question above to the document text. You must determine
|
|
23
|
+
whether the document text contains information that can answer the
|
|
24
|
+
question. Please focus on whether the very specific question can be
|
|
25
|
+
answered by the information in the document text. Your response must be
|
|
26
|
+
either "relevant" or "unrelated". "unrelated" means that the document
|
|
27
|
+
text does not contain an answer to the question. "relevant" means the
|
|
28
|
+
document text contains an answer to the question.
|
|
29
|
+
`,
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
choices: {
|
|
33
|
+
"relevant": 1,
|
|
34
|
+
"unrelated": 0
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
//# sourceMappingURL=DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAI/C,MAAM,CAAC,MAAM,kDAAkD,GAAkC;IAC/F,IAAI,EAAE,oBAAoB;IAC1B,WAAW,EAAE,iFAAiF;IAC9F,qBAAqB,EAAE,UAAU;IACjC,QAAQ,EAAE;QACR;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;CAoBd;SACI;KACF;IACD,OAAO,EAAE;QACT,UAAU,EAAE,CAAC;QACb,WAAW,EAAE,CAAC;KACf;CACA,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,UAAU,CAAC;AAE9D,eAAO,MAAM,6CAA6C,EAAE,6BAwC3D,CAAC"}
|
package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// This file is generated. Do not edit by hand.
|
|
2
|
+
export const HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG = {
|
|
3
|
+
name: "hallucination",
|
|
4
|
+
description: "A specialized evaluator for detecting hallucinations in grounded LLM responses.",
|
|
5
|
+
optimizationDirection: "MINIMIZE",
|
|
6
|
+
template: [
|
|
7
|
+
{
|
|
8
|
+
role: "user",
|
|
9
|
+
content: `
|
|
10
|
+
In this task, you will be presented with a query, some context and a response. The response
|
|
11
|
+
is generated to the question based on the context. The response may contain false
|
|
12
|
+
information. You must use the context to determine if the response to the question
|
|
13
|
+
contains false information, if the response is a hallucination of facts. Your objective is
|
|
14
|
+
to determine whether the response text contains factual information and is not a
|
|
15
|
+
hallucination. A 'hallucination' refers to a response that is not based on the context or
|
|
16
|
+
assumes information that is not available in the context. Your response should be a single
|
|
17
|
+
word: either 'factual' or 'hallucinated', and it should not include any other text or
|
|
18
|
+
characters. 'hallucinated' indicates that the response provides factually inaccurate
|
|
19
|
+
information to the query based on the context. 'factual' indicates that the response to
|
|
20
|
+
the question is correct relative to the context, and does not contain made up
|
|
21
|
+
information. Please read the query and context carefully before determining your
|
|
22
|
+
response.
|
|
23
|
+
|
|
24
|
+
[BEGIN DATA]
|
|
25
|
+
************
|
|
26
|
+
[Query]: {{input}}
|
|
27
|
+
************
|
|
28
|
+
[Context]: {{context}}
|
|
29
|
+
************
|
|
30
|
+
[Response]: {{output}}
|
|
31
|
+
************
|
|
32
|
+
[END DATA]
|
|
33
|
+
|
|
34
|
+
Is the response above factual or hallucinated based on the query and context?
|
|
35
|
+
`,
|
|
36
|
+
},
|
|
37
|
+
],
|
|
38
|
+
choices: {
|
|
39
|
+
"hallucinated": 1,
|
|
40
|
+
"factual": 0
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
//# sourceMappingURL=HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAI/C,MAAM,CAAC,MAAM,6CAA6C,GAAkC;IAC1F,IAAI,EAAE,eAAe;IACrB,WAAW,EAAE,iFAAiF;IAC9F,qBAAqB,EAAE,UAAU;IACjC,QAAQ,EAAE;QACR;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;CA0Bd;SACI;KACF;IACD,OAAO,EAAE;QACT,cAAc,EAAE,CAAC;QACjB,SAAS,EAAE,CAAC;KACb;CACA,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "./DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js";
|
|
2
|
+
export { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "./HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js";
|
|
3
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kDAAkD,EAAE,MAAM,sDAAsD,CAAC;AAC1H,OAAO,EAAE,6CAA6C,EAAE,MAAM,iDAAiD,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
// This file is generated. Do not edit by hand.
|
|
2
|
+
export { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "./DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js";
|
|
3
|
+
export { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "./HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js";
|
|
4
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/index.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAE/C,OAAO,EAAE,kDAAkD,EAAE,MAAM,sDAAsD,CAAC;AAC1H,OAAO,EAAE,6CAA6C,EAAE,MAAM,iDAAiD,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { PromptTemplate } from "../types/templating.js";
|
|
2
|
+
export type ClassificationEvaluatorConfig = {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
optimizationDirection: "MINIMIZE" | "MAXIMIZE";
|
|
6
|
+
template: PromptTemplate;
|
|
7
|
+
choices: Record<string, number>;
|
|
8
|
+
};
|
|
9
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/__generated__/types.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE1D,MAAM,MAAM,6BAA6B,GAAG;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,qBAAqB,EAAE,UAAU,GAAG,UAAU,CAAC;IAC/C,QAAQ,EAAE,cAAc,CAAC;IACzB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/__generated__/types.ts"],"names":[],"mappings":"AAAA,+CAA+C"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { CreateEvaluatorArgs, EvaluationKind, EvaluationResult, EvaluatorInterface, OptimizationDirection, TelemetryConfig } from "../types/index.js";
|
|
2
|
+
import { ObjectMapping } from "../types/data.js";
|
|
3
|
+
/**
|
|
4
|
+
* Base class for all evaluators
|
|
5
|
+
*/
|
|
6
|
+
export declare abstract class EvaluatorBase<RecordType extends Record<string, unknown>> implements EvaluatorInterface<RecordType> {
|
|
7
|
+
readonly name: string;
|
|
8
|
+
readonly kind: EvaluationKind;
|
|
9
|
+
readonly optimizationDirection?: OptimizationDirection;
|
|
10
|
+
readonly inputMapping?: ObjectMapping<RecordType>;
|
|
11
|
+
readonly telemetry?: TelemetryConfig;
|
|
12
|
+
constructor({ name, kind, optimizationDirection, inputMapping, telemetry, }: CreateEvaluatorArgs<RecordType>);
|
|
13
|
+
abstract evaluate(_example: RecordType): Promise<EvaluationResult>;
|
|
14
|
+
/**
|
|
15
|
+
* Binds the input mapping to the evaluator. It makes a a copy of the evaluator and returns it.
|
|
16
|
+
*/
|
|
17
|
+
abstract bindInputMapping(inputMapping: ObjectMapping<RecordType>): EvaluatorBase<RecordType>;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=EvaluatorBase.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EvaluatorBase.d.ts","sourceRoot":"","sources":["../../../src/core/EvaluatorBase.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,EACrB,eAAe,EAChB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAE9C;;GAEG;AACH,8BAAsB,aAAa,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC5E,YAAW,kBAAkB,CAAC,UAAU,CAAC;IAEzC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC;IAC9B,QAAQ,CAAC,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IACvD,QAAQ,CAAC,YAAY,CAAC,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;IAClD,QAAQ,CAAC,SAAS,CAAC,EAAE,eAAe,CAAC;gBACzB,EACV,IAAI,EACJ,IAAI,EACJ,qBAAqB,EACrB,YAAY,EACZ,SAAS,GACV,EAAE,mBAAmB,CAAC,UAAU,CAAC;IAOlC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAElE;;OAEG;IACH,QAAQ,CAAC,gBAAgB,CACvB,YAAY,EAAE,aAAa,CAAC,UAAU,CAAC,GACtC,aAAa,CAAC,UAAU,CAAC;CAC7B"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base class for all evaluators
|
|
3
|
+
*/
|
|
4
|
+
export class EvaluatorBase {
|
|
5
|
+
name;
|
|
6
|
+
kind;
|
|
7
|
+
optimizationDirection;
|
|
8
|
+
inputMapping;
|
|
9
|
+
telemetry;
|
|
10
|
+
constructor({ name, kind, optimizationDirection, inputMapping, telemetry, }) {
|
|
11
|
+
this.name = name;
|
|
12
|
+
this.kind = kind;
|
|
13
|
+
this.optimizationDirection = optimizationDirection;
|
|
14
|
+
this.inputMapping = inputMapping;
|
|
15
|
+
this.telemetry = telemetry;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=EvaluatorBase.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EvaluatorBase.js","sourceRoot":"","sources":["../../../src/core/EvaluatorBase.ts"],"names":[],"mappings":"AAUA;;GAEG;AACH,MAAM,OAAgB,aAAa;IAGxB,IAAI,CAAS;IACb,IAAI,CAAiB;IACrB,qBAAqB,CAAyB;IAC9C,YAAY,CAA6B;IACzC,SAAS,CAAmB;IACrC,YAAY,EACV,IAAI,EACJ,IAAI,EACJ,qBAAqB,EACrB,YAAY,EACZ,SAAS,GACuB;QAChC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,qBAAqB,GAAG,qBAAqB,CAAC;QACnD,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;CASF"}
|
package/dist/esm/index.d.ts
CHANGED
package/dist/esm/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC;AACxB,cAAc,SAAS,CAAC"}
|
package/dist/esm/index.js
CHANGED
package/dist/esm/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,OAAO,CAAC;AACtB,cAAc,YAAY,CAAC;AAC3B,cAAc,SAAS,CAAC;AACxB,cAAc,SAAS,CAAC"}
|
|
@@ -1,17 +1,34 @@
|
|
|
1
|
-
import { CreateClassificationEvaluatorArgs, EvaluatorFn, PromptTemplate, WithPromptTemplate } from "../types/index.js";
|
|
1
|
+
import { ClassificationChoicesMap, CreateClassificationEvaluatorArgs, EvaluatorFn, PromptTemplate, WithPromptTemplate } from "../types/index.js";
|
|
2
|
+
import { ObjectMapping } from "../types/data.js";
|
|
2
3
|
import { LLMEvaluator } from "./LLMEvaluator.js";
|
|
4
|
+
import { LanguageModel } from "ai";
|
|
3
5
|
/**
|
|
4
6
|
* An LLM evaluator that performs evaluation via classification
|
|
5
7
|
*/
|
|
6
8
|
export declare class ClassificationEvaluator<RecordType extends Record<string, unknown>> extends LLMEvaluator<RecordType> implements WithPromptTemplate {
|
|
7
9
|
readonly evaluatorFn: EvaluatorFn<RecordType>;
|
|
8
10
|
readonly promptTemplate: PromptTemplate;
|
|
11
|
+
/**
|
|
12
|
+
* A dynamically computed set of prompt template variables
|
|
13
|
+
*/
|
|
9
14
|
private _promptTemplateVariables;
|
|
10
|
-
|
|
15
|
+
/**
|
|
16
|
+
* The model to use for classification
|
|
17
|
+
*/
|
|
18
|
+
readonly model: LanguageModel;
|
|
19
|
+
/**
|
|
20
|
+
* The choices to classify the example into
|
|
21
|
+
*/
|
|
22
|
+
readonly choices: ClassificationChoicesMap;
|
|
23
|
+
constructor(args: CreateClassificationEvaluatorArgs<RecordType>);
|
|
11
24
|
evaluate: (example: RecordType) => Promise<import("../types/index.js").EvaluationResult>;
|
|
12
25
|
/**
|
|
13
26
|
* List out the prompt template variables needed to perform evaluation
|
|
14
27
|
*/
|
|
15
28
|
get promptTemplateVariables(): string[];
|
|
29
|
+
/**
|
|
30
|
+
* Binds the input mapping to the evaluator. It makes a a copy of the evaluator and returns it.
|
|
31
|
+
*/
|
|
32
|
+
bindInputMapping(inputMapping: ObjectMapping<RecordType>): ClassificationEvaluator<RecordType>;
|
|
16
33
|
}
|
|
17
34
|
//# sourceMappingURL=ClassificationEvaluator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ClassificationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/ClassificationEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EACL,iCAAiC,EACjC,WAAW,EACX,cAAc,EACd,kBAAkB,EACnB,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"ClassificationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/ClassificationEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wBAAwB,EACxB,iCAAiC,EACjC,WAAW,EACX,cAAc,EACd,kBAAkB,EACnB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAI9C,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAEnC;;GAEG;AACH,qBAAa,uBAAuB,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC7E,SAAQ,YAAY,CAAC,UAAU,CAC/B,YAAW,kBAAkB;IAE7B,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAC,UAAU,CAAC,CAAC;IAC9C,QAAQ,CAAC,cAAc,EAAE,cAAc,CAAC;IACxC;;OAEG;IACH,OAAO,CAAC,wBAAwB,CAAuB;IACvD;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE,aAAa,CAAC;IAC9B;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE,wBAAwB,CAAC;gBAE/B,IAAI,EAAE,iCAAiC,CAAC,UAAU,CAAC;IAS/D,QAAQ,GAAI,SAAS,UAAU,kDAM7B;IACF;;OAEG;IACH,IAAI,uBAAuB,IAAI,MAAM,EAAE,CAStC;IACD;;OAEG;IACH,gBAAgB,CACd,YAAY,EAAE,aAAa,CAAC,UAAU,CAAC,GACtC,uBAAuB,CAAC,UAAU,CAAC;CAMvC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { getTemplateVariables } from "../template/index.js";
|
|
2
|
+
import { remapObject } from "../utils/objectMappingUtils.js";
|
|
2
3
|
import { createClassifierFn } from "./createClassifierFn.js";
|
|
3
4
|
import { LLMEvaluator } from "./LLMEvaluator.js";
|
|
4
5
|
/**
|
|
@@ -7,14 +8,31 @@ import { LLMEvaluator } from "./LLMEvaluator.js";
|
|
|
7
8
|
export class ClassificationEvaluator extends LLMEvaluator {
|
|
8
9
|
evaluatorFn;
|
|
9
10
|
promptTemplate;
|
|
11
|
+
/**
|
|
12
|
+
* A dynamically computed set of prompt template variables
|
|
13
|
+
*/
|
|
10
14
|
_promptTemplateVariables;
|
|
15
|
+
/**
|
|
16
|
+
* The model to use for classification
|
|
17
|
+
*/
|
|
18
|
+
model;
|
|
19
|
+
/**
|
|
20
|
+
* The choices to classify the example into
|
|
21
|
+
*/
|
|
22
|
+
choices;
|
|
11
23
|
constructor(args) {
|
|
12
24
|
super(args);
|
|
13
25
|
this.promptTemplate = args.promptTemplate;
|
|
14
|
-
this.
|
|
26
|
+
this.model = args.model;
|
|
27
|
+
this.choices = args.choices;
|
|
28
|
+
this.evaluatorFn = createClassifierFn({
|
|
29
|
+
...args,
|
|
30
|
+
});
|
|
15
31
|
}
|
|
16
32
|
evaluate = (example) => {
|
|
17
|
-
return this.evaluatorFn(
|
|
33
|
+
return this.evaluatorFn(this.inputMapping
|
|
34
|
+
? remapObject(example, this.inputMapping)
|
|
35
|
+
: example);
|
|
18
36
|
};
|
|
19
37
|
/**
|
|
20
38
|
* List out the prompt template variables needed to perform evaluation
|
|
@@ -29,5 +47,14 @@ export class ClassificationEvaluator extends LLMEvaluator {
|
|
|
29
47
|
// Give a copy of the variables
|
|
30
48
|
return [...this._promptTemplateVariables];
|
|
31
49
|
}
|
|
50
|
+
/**
|
|
51
|
+
* Binds the input mapping to the evaluator. It makes a a copy of the evaluator and returns it.
|
|
52
|
+
*/
|
|
53
|
+
bindInputMapping(inputMapping) {
|
|
54
|
+
return new ClassificationEvaluator({
|
|
55
|
+
...this,
|
|
56
|
+
inputMapping,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
32
59
|
}
|
|
33
60
|
//# sourceMappingURL=ClassificationEvaluator.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ClassificationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/ClassificationEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"ClassificationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/ClassificationEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AASnD,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC;AAE1D,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAI9C;;GAEG;AACH,MAAM,OAAO,uBACX,SAAQ,YAAwB;IAGvB,WAAW,CAA0B;IACrC,cAAc,CAAiB;IACxC;;OAEG;IACK,wBAAwB,CAAuB;IACvD;;OAEG;IACM,KAAK,CAAgB;IAC9B;;OAEG;IACM,OAAO,CAA2B;IAE3C,YAAY,IAAmD;QAC7D,KAAK,CAAC,IAAI,CAAC,CAAC;QACZ,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,cAAc,CAAC;QAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACxB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,CAAC,WAAW,GAAG,kBAAkB,CAAa;YAChD,GAAG,IAAI;SACR,CAAC,CAAC;IACL,CAAC;IACD,QAAQ,GAAG,CAAC,OAAmB,EAAE,EAAE;QACjC,OAAO,IAAI,CAAC,WAAW,CACrB,IAAI,CAAC,YAAY;YACf,CAAC,CAAC,WAAW,CAAa,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC;YACrD,CAAC,CAAC,OAAO,CACZ,CAAC;IACJ,CAAC,CAAC;IACF;;OAEG;IACH,IAAI,uBAAuB;QACzB,0DAA0D;QAC1D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,EAAE,CAAC;YAClD,IAAI,CAAC,wBAAwB,GAAG,oBAAoB,CAAC;gBACnD,QAAQ,EAAE,IAAI,CAAC,cAAc;aAC9B,CAAC,CAAC;QACL,CAAC;QACD,+BAA+B;QAC/B,OAAO,CAAC,GAAG,IAAI,CAAC,wBAAwB,CAAC,CAAC;IAC5C,CAAC;IACD;;OAEG;IACH,gBAAgB,CACd,YAAuC;QAEvC,OAAO,IAAI,uBAAuB,CAAC;YACjC,GAAG,IAAI;YACP,YAAY;SACb,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { EvaluatorBase } from "../core/EvaluatorBase.js";
|
|
2
|
+
import { CreateLLMEvaluatorArgs } from "../types/index.js";
|
|
2
3
|
/**
|
|
3
4
|
* Base class for llm evaluation metrics / scores
|
|
4
5
|
*/
|
|
5
|
-
export declare abstract class LLMEvaluator<RecordType extends Record<string, unknown>>
|
|
6
|
-
|
|
7
|
-
readonly kind: "LLM";
|
|
8
|
-
readonly optimizationDirection?: OptimizationDirection;
|
|
9
|
-
constructor({ name, optimizationDirection }: CreateEvaluatorArgs);
|
|
10
|
-
abstract evaluate(_example: RecordType): Promise<EvaluationResult>;
|
|
6
|
+
export declare abstract class LLMEvaluator<RecordType extends Record<string, unknown>> extends EvaluatorBase<RecordType> {
|
|
7
|
+
constructor({ ...args }: CreateLLMEvaluatorArgs<RecordType>);
|
|
11
8
|
}
|
|
12
9
|
//# sourceMappingURL=LLMEvaluator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LLMEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/LLMEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"LLMEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/LLMEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,sBAAsB,EAAE,MAAM,UAAU,CAAC;AAElD;;GAEG;AACH,8BAAsB,YAAY,CAChC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC1C,SAAQ,aAAa,CAAC,UAAU,CAAC;gBACrB,EAAE,GAAG,IAAI,EAAE,EAAE,sBAAsB,CAAC,UAAU,CAAC;CAG5D"}
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
+
import { EvaluatorBase } from "../core/EvaluatorBase.js";
|
|
1
2
|
/**
|
|
2
3
|
* Base class for llm evaluation metrics / scores
|
|
3
4
|
*/
|
|
4
|
-
export class LLMEvaluator {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
optimizationDirection;
|
|
8
|
-
constructor({ name, optimizationDirection }) {
|
|
9
|
-
this.name = name;
|
|
10
|
-
this.optimizationDirection = optimizationDirection;
|
|
5
|
+
export class LLMEvaluator extends EvaluatorBase {
|
|
6
|
+
constructor({ ...args }) {
|
|
7
|
+
super({ kind: "LLM", ...args });
|
|
11
8
|
}
|
|
12
9
|
}
|
|
13
10
|
//# sourceMappingURL=LLMEvaluator.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LLMEvaluator.js","sourceRoot":"","sources":["../../../src/llm/LLMEvaluator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"LLMEvaluator.js","sourceRoot":"","sources":["../../../src/llm/LLMEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAGtD;;GAEG;AACH,MAAM,OAAgB,YAEpB,SAAQ,aAAyB;IACjC,YAAY,EAAE,GAAG,IAAI,EAAsC;QACzD,KAAK,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,IAAI,EAAE,CAAC,CAAC;IAClC,CAAC;CACF"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { CreateClassificationEvaluatorArgs } from "../types/evals.js";
|
|
2
2
|
import { ClassificationEvaluator } from "./ClassificationEvaluator.js";
|
|
3
|
-
export declare function createClassificationEvaluator<RecordType extends Record<string, unknown>>(args: CreateClassificationEvaluatorArgs): ClassificationEvaluator<RecordType>;
|
|
3
|
+
export declare function createClassificationEvaluator<RecordType extends Record<string, unknown>>(args: CreateClassificationEvaluatorArgs<RecordType>): ClassificationEvaluator<RecordType>;
|
|
4
4
|
//# sourceMappingURL=createClassificationEvaluator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createClassificationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassificationEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iCAAiC,EAAE,MAAM,gBAAgB,CAAC;AAEnE,OAAO,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,wBAAgB,6BAA6B,CAC3C,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAE1C,IAAI,EAAE,iCAAiC,
|
|
1
|
+
{"version":3,"file":"createClassificationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassificationEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iCAAiC,EAAE,MAAM,gBAAgB,CAAC;AAEnE,OAAO,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,wBAAgB,6BAA6B,CAC3C,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAE1C,IAAI,EAAE,iCAAiC,CAAC,UAAU,CAAC,GAClD,uBAAuB,CAAC,UAAU,CAAC,CAErC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createClassificationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/createClassificationEvaluator.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,MAAM,UAAU,6BAA6B,CAG3C,
|
|
1
|
+
{"version":3,"file":"createClassificationEvaluator.js","sourceRoot":"","sources":["../../../src/llm/createClassificationEvaluator.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,MAAM,UAAU,6BAA6B,CAG3C,IAAmD;IAEnD,OAAO,IAAI,uBAAuB,CAAa,IAAI,CAAC,CAAC;AACvD,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { CreateClassificationEvaluatorArgs } from "../types/evals.js";
|
|
2
|
+
import { ClassificationEvaluator } from "./ClassificationEvaluator.js";
|
|
3
|
+
export interface DocumentRelevanceEvaluatorArgs<RecordType extends Record<string, unknown> = DocumentRelevanceEvaluationRecord> extends Omit<CreateClassificationEvaluatorArgs<RecordType>, "promptTemplate" | "choices" | "optimizationDirection" | "name"> {
|
|
4
|
+
optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
|
|
5
|
+
name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
|
|
6
|
+
choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
|
|
7
|
+
promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* A record to be evaluated by the document relevance evaluator.
|
|
11
|
+
*/
|
|
12
|
+
export interface DocumentRelevanceEvaluationRecord {
|
|
13
|
+
input: string;
|
|
14
|
+
documentText: string;
|
|
15
|
+
[key: string]: unknown;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Creates a document relevance evaluator function.
|
|
19
|
+
*
|
|
20
|
+
* This function returns an evaluator that determines whether a given document text
|
|
21
|
+
* is relevant to a provided input question. The evaluator uses a classification model
|
|
22
|
+
* and a prompt template to make its determination.
|
|
23
|
+
*
|
|
24
|
+
* @param args - The arguments for creating the document relevance evaluator.
|
|
25
|
+
* @param args.model - The model to use for classification.
|
|
26
|
+
* @param args.choices - The possible classification choices (defaults to DOCUMENT_RELEVANCE_CHOICES).
|
|
27
|
+
* @param args.promptTemplate - The prompt template to use (defaults to DOCUMENT_RELEVANCE_TEMPLATE).
|
|
28
|
+
* @param args.telemetry - The telemetry to use for the evaluator.
|
|
29
|
+
*
|
|
30
|
+
* @returns An evaluator function that takes a {@link DocumentRelevanceExample} and returns a classification result
|
|
31
|
+
* indicating whether the document is relevant to the input question.
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```ts
|
|
35
|
+
* const evaluator = createDocumentRelevanceEvaluator({ model: openai("gpt-4o-mini") });
|
|
36
|
+
* const result = await evaluator.evaluate({
|
|
37
|
+
* input: "What is the capital of France?",
|
|
38
|
+
* documentText: "Paris is the capital and most populous city of France.",
|
|
39
|
+
* });
|
|
40
|
+
* console.log(result.label); // "relevant" or "unrelated"
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export declare function createDocumentRelevanceEvaluator<RecordType extends Record<string, unknown> = DocumentRelevanceEvaluationRecord>(args: DocumentRelevanceEvaluatorArgs<RecordType>): ClassificationEvaluator<RecordType>;
|
|
44
|
+
//# sourceMappingURL=createDocumentRelevanceEvaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createDocumentRelevanceEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createDocumentRelevanceEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iCAAiC,EAAE,MAAM,gBAAgB,CAAC;AAEnE,OAAO,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAC;AAGpE,MAAM,WAAW,8BAA8B,CAC7C,UAAU,SAAS,MAAM,CACvB,MAAM,EACN,OAAO,CACR,GAAG,iCAAiC,CACrC,SAAQ,IAAI,CACV,iCAAiC,CAAC,UAAU,CAAC,EAC7C,gBAAgB,GAAG,SAAS,GAAG,uBAAuB,GAAG,MAAM,CAChE;IACD,qBAAqB,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC;IAC/F,IAAI,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;IAC7D,OAAO,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,SAAS,CAAC,CAAC;IACnE,cAAc,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,gBAAgB,CAAC,CAAC;CAClF;AAED;;GAEG;AACH,MAAM,WAAW,iCAAiC;IAChD,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,CAAC;IACrB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,wBAAgB,gCAAgC,CAC9C,UAAU,SAAS,MAAM,CACvB,MAAM,EACN,OAAO,CACR,GAAG,iCAAiC,EAErC,IAAI,EAAE,8BAA8B,CAAC,UAAU,CAAC,GAC/C,uBAAuB,CAAC,UAAU,CAAC,CAerC"}
|
package/dist/esm/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js}
RENAMED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates/index.js";
|
|
2
2
|
import { createClassificationEvaluator } from "./createClassificationEvaluator.js";
|
|
3
3
|
/**
|
|
4
|
-
* Creates a document
|
|
4
|
+
* Creates a document relevance evaluator function.
|
|
5
5
|
*
|
|
6
6
|
* This function returns an evaluator that determines whether a given document text
|
|
7
7
|
* is relevant to a provided input question. The evaluator uses a classification model
|
|
8
8
|
* and a prompt template to make its determination.
|
|
9
9
|
*
|
|
10
|
-
* @param args - The arguments for creating the document
|
|
10
|
+
* @param args - The arguments for creating the document relevance evaluator.
|
|
11
11
|
* @param args.model - The model to use for classification.
|
|
12
|
-
* @param args.choices - The possible classification choices (defaults to
|
|
13
|
-
* @param args.promptTemplate - The prompt template to use (defaults to
|
|
12
|
+
* @param args.choices - The possible classification choices (defaults to DOCUMENT_RELEVANCE_CHOICES).
|
|
13
|
+
* @param args.promptTemplate - The prompt template to use (defaults to DOCUMENT_RELEVANCE_TEMPLATE).
|
|
14
14
|
* @param args.telemetry - The telemetry to use for the evaluator.
|
|
15
15
|
*
|
|
16
|
-
* @returns An evaluator function that takes a {@link
|
|
16
|
+
* @returns An evaluator function that takes a {@link DocumentRelevanceExample} and returns a classification result
|
|
17
17
|
* indicating whether the document is relevant to the input question.
|
|
18
18
|
*
|
|
19
19
|
* @example
|
|
20
20
|
* ```ts
|
|
21
|
-
* const evaluator =
|
|
21
|
+
* const evaluator = createDocumentRelevanceEvaluator({ model: openai("gpt-4o-mini") });
|
|
22
22
|
* const result = await evaluator.evaluate({
|
|
23
23
|
* input: "What is the capital of France?",
|
|
24
24
|
* documentText: "Paris is the capital and most populous city of France.",
|
|
@@ -26,15 +26,14 @@ import { createClassificationEvaluator } from "./createClassificationEvaluator.j
|
|
|
26
26
|
* console.log(result.label); // "relevant" or "unrelated"
|
|
27
27
|
* ```
|
|
28
28
|
*/
|
|
29
|
-
export function
|
|
30
|
-
const { choices =
|
|
29
|
+
export function createDocumentRelevanceEvaluator(args) {
|
|
30
|
+
const { choices = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.choices, promptTemplate = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.template, optimizationDirection = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection, name = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.name, ...rest } = args;
|
|
31
31
|
return createClassificationEvaluator({
|
|
32
|
-
...
|
|
32
|
+
...rest,
|
|
33
33
|
promptTemplate,
|
|
34
34
|
choices,
|
|
35
35
|
optimizationDirection,
|
|
36
36
|
name,
|
|
37
|
-
...rest,
|
|
38
37
|
});
|
|
39
38
|
}
|
|
40
|
-
//# sourceMappingURL=
|
|
39
|
+
//# sourceMappingURL=createDocumentRelevanceEvaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createDocumentRelevanceEvaluator.js","sourceRoot":"","sources":["../../../src/llm/createDocumentRelevanceEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kDAAkD,EAAE,MAAM,oCAAoC,CAAC;AAIxG,OAAO,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AA0BhF;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,MAAM,UAAU,gCAAgC,CAM9C,IAAgD;IAEhD,MAAM,EACJ,OAAO,GAAG,kDAAkD,CAAC,OAAO,EACpE,cAAc,GAAG,kDAAkD,CAAC,QAAQ,EAC5E,qBAAqB,GAAG,kDAAkD,CAAC,qBAAqB,EAChG,IAAI,GAAG,kDAAkD,CAAC,IAAI,EAC9D,GAAG,IAAI,EACR,GAAG,IAAI,CAAC;IACT,OAAO,6BAA6B,CAAa;QAC/C,GAAG,IAAI;QACP,cAAc;QACd,OAAO;QACP,qBAAqB;QACrB,IAAI;KACL,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { CreateClassificationEvaluatorArgs } from "../types/evals.js";
|
|
2
2
|
import { ClassificationEvaluator } from "./ClassificationEvaluator.js";
|
|
3
|
-
export interface HallucinationEvaluatorArgs extends Omit<CreateClassificationEvaluatorArgs
|
|
4
|
-
optimizationDirection?: CreateClassificationEvaluatorArgs["optimizationDirection"];
|
|
5
|
-
name?: CreateClassificationEvaluatorArgs["name"];
|
|
6
|
-
choices?: CreateClassificationEvaluatorArgs["choices"];
|
|
7
|
-
promptTemplate?: CreateClassificationEvaluatorArgs["promptTemplate"];
|
|
3
|
+
export interface HallucinationEvaluatorArgs<RecordType extends Record<string, unknown> = HallucinationEvaluationRecord> extends Omit<CreateClassificationEvaluatorArgs<RecordType>, "promptTemplate" | "choices" | "optimizationDirection" | "name"> {
|
|
4
|
+
optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
|
|
5
|
+
name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
|
|
6
|
+
choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
|
|
7
|
+
promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
|
|
8
8
|
}
|
|
9
9
|
/**
|
|
10
10
|
* A record to be evaluated by the hallucination evaluator.
|
|
@@ -12,7 +12,6 @@ export interface HallucinationEvaluatorArgs extends Omit<CreateClassificationEva
|
|
|
12
12
|
export type HallucinationEvaluationRecord = {
|
|
13
13
|
input: string;
|
|
14
14
|
output: string;
|
|
15
|
-
reference?: string;
|
|
16
15
|
context?: string;
|
|
17
16
|
};
|
|
18
17
|
/**
|
|
@@ -21,5 +20,5 @@ export type HallucinationEvaluationRecord = {
|
|
|
21
20
|
* @param args - The arguments for creating the hallucination evaluator.
|
|
22
21
|
* @returns A function that evaluates whether an answer is factual or hallucinated based on a query and reference text.
|
|
23
22
|
*/
|
|
24
|
-
export declare function createHallucinationEvaluator<RecordType extends Record<string, unknown> = HallucinationEvaluationRecord>(args: HallucinationEvaluatorArgs): ClassificationEvaluator<RecordType>;
|
|
23
|
+
export declare function createHallucinationEvaluator<RecordType extends Record<string, unknown> = HallucinationEvaluationRecord>(args: HallucinationEvaluatorArgs<RecordType>): ClassificationEvaluator<RecordType>;
|
|
25
24
|
//# sourceMappingURL=createHallucinationEvaluator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"createHallucinationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"createHallucinationEvaluator.d.ts","sourceRoot":"","sources":["../../../src/llm/createHallucinationEvaluator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iCAAiC,EAAE,MAAM,gBAAgB,CAAC;AAEnE,OAAO,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAC;AAGpE,MAAM,WAAW,0BAA0B,CACzC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,6BAA6B,CAC1E,SAAQ,IAAI,CACV,iCAAiC,CAAC,UAAU,CAAC,EAC7C,gBAAgB,GAAG,SAAS,GAAG,uBAAuB,GAAG,MAAM,CAChE;IACD,qBAAqB,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC;IAC/F,IAAI,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;IAC7D,OAAO,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,SAAS,CAAC,CAAC;IACnE,cAAc,CAAC,EAAE,iCAAiC,CAAC,UAAU,CAAC,CAAC,gBAAgB,CAAC,CAAC;CAClF;AAED;;GAEG;AACH,MAAM,MAAM,6BAA6B,GAAG;IAC1C,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,CAAC;AACF;;;;;GAKG;AACH,wBAAgB,4BAA4B,CAC1C,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,6BAA6B,EAE1E,IAAI,EAAE,0BAA0B,CAAC,UAAU,CAAC,GAC3C,uBAAuB,CAAC,UAAU,CAAC,CAerC"}
|