@arizeai/phoenix-evals 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +37 -0
- package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +43 -0
- package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/esm/__generated__/default_templates/index.d.ts +3 -0
- package/dist/esm/__generated__/default_templates/index.d.ts.map +1 -0
- package/dist/esm/__generated__/default_templates/index.js +4 -0
- package/dist/esm/__generated__/default_templates/index.js.map +1 -0
- package/dist/esm/__generated__/types.d.ts +9 -0
- package/dist/esm/__generated__/types.d.ts.map +1 -0
- package/dist/esm/__generated__/types.js +3 -0
- package/dist/esm/__generated__/types.js.map +1 -0
- package/dist/esm/core/FunctionEvaluator.d.ts +16 -0
- package/dist/esm/core/FunctionEvaluator.d.ts.map +1 -0
- package/dist/esm/core/FunctionEvaluator.js +18 -0
- package/dist/esm/core/FunctionEvaluator.js.map +1 -0
- package/dist/esm/helpers/asEvaluatorFn.d.ts +6 -0
- package/dist/esm/helpers/asEvaluatorFn.d.ts.map +1 -0
- package/dist/esm/helpers/asEvaluatorFn.js +15 -0
- package/dist/esm/helpers/asEvaluatorFn.js.map +1 -0
- package/dist/esm/helpers/createEvaluator.d.ts +141 -0
- package/dist/esm/helpers/createEvaluator.d.ts.map +1 -0
- package/dist/esm/helpers/createEvaluator.js +96 -0
- package/dist/esm/helpers/createEvaluator.js.map +1 -0
- package/dist/esm/helpers/index.d.ts +2 -0
- package/dist/esm/helpers/index.d.ts.map +1 -0
- package/dist/esm/helpers/index.js +2 -0
- package/dist/esm/helpers/index.js.map +1 -0
- package/dist/esm/helpers/toEvaluationResult.d.ts +67 -0
- package/dist/esm/helpers/toEvaluationResult.d.ts.map +1 -0
- package/dist/esm/helpers/toEvaluationResult.js +133 -0
- package/dist/esm/helpers/toEvaluationResult.js.map +1 -0
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +1 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/{createDocumentRelevancyEvaluator.d.ts → createDocumentRelevanceEvaluator.d.ts} +11 -11
- package/dist/esm/llm/{createDocumentRelevancyEvaluator.d.ts.map → createDocumentRelevanceEvaluator.d.ts.map} +1 -1
- package/dist/esm/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +10 -10
- package/dist/esm/llm/createDocumentRelevanceEvaluator.js.map +1 -0
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +0 -1
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js +2 -2
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/esm/llm/index.d.ts +1 -1
- package/dist/esm/llm/index.js +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/base.d.ts +2 -0
- package/dist/esm/types/base.d.ts.map +1 -0
- package/dist/esm/types/base.js +2 -0
- package/dist/esm/types/base.js.map +1 -0
- package/dist/esm/types/index.d.ts +1 -0
- package/dist/esm/types/index.d.ts.map +1 -1
- package/dist/esm/types/index.js +1 -0
- package/dist/esm/types/index.js.map +1 -1
- package/dist/esm/utils/bindEvaluator.d.ts +1 -1
- package/dist/esm/utils/bindEvaluator.js +1 -1
- package/dist/esm/utils/typeUtils.d.ts +7 -0
- package/dist/esm/utils/typeUtils.d.ts.map +1 -0
- package/dist/esm/utils/typeUtils.js +13 -0
- package/dist/esm/utils/typeUtils.js.map +1 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +40 -0
- package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +46 -0
- package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
- package/dist/src/__generated__/default_templates/index.d.ts +3 -0
- package/dist/src/__generated__/default_templates/index.d.ts.map +1 -0
- package/dist/src/__generated__/default_templates/index.js +9 -0
- package/dist/src/__generated__/default_templates/index.js.map +1 -0
- package/dist/src/__generated__/types.d.ts +9 -0
- package/dist/src/__generated__/types.d.ts.map +1 -0
- package/dist/src/__generated__/types.js +4 -0
- package/dist/src/__generated__/types.js.map +1 -0
- package/dist/src/core/FunctionEvaluator.d.ts +16 -0
- package/dist/src/core/FunctionEvaluator.d.ts.map +1 -0
- package/dist/src/core/FunctionEvaluator.js +33 -0
- package/dist/src/core/FunctionEvaluator.js.map +1 -0
- package/dist/src/helpers/asEvaluatorFn.d.ts +6 -0
- package/dist/src/helpers/asEvaluatorFn.d.ts.map +1 -0
- package/dist/src/helpers/asEvaluatorFn.js +18 -0
- package/dist/src/helpers/asEvaluatorFn.js.map +1 -0
- package/dist/src/helpers/createEvaluator.d.ts +141 -0
- package/dist/src/helpers/createEvaluator.d.ts.map +1 -0
- package/dist/src/helpers/createEvaluator.js +99 -0
- package/dist/src/helpers/createEvaluator.js.map +1 -0
- package/dist/src/helpers/index.d.ts +2 -0
- package/dist/src/helpers/index.d.ts.map +1 -0
- package/dist/src/{default_templates → helpers}/index.js +1 -2
- package/dist/src/helpers/index.js.map +1 -0
- package/dist/src/helpers/toEvaluationResult.d.ts +67 -0
- package/dist/src/helpers/toEvaluationResult.d.ts.map +1 -0
- package/dist/src/helpers/toEvaluationResult.js +136 -0
- package/dist/src/helpers/toEvaluationResult.js.map +1 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/{createDocumentRelevancyEvaluator.d.ts → createDocumentRelevanceEvaluator.d.ts} +11 -11
- package/dist/src/llm/{createDocumentRelevancyEvaluator.d.ts.map → createDocumentRelevanceEvaluator.d.ts.map} +1 -1
- package/dist/src/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +11 -11
- package/dist/src/llm/createDocumentRelevanceEvaluator.js.map +1 -0
- package/dist/src/llm/createHallucinationEvaluator.d.ts +0 -1
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.js +2 -2
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/src/llm/index.d.ts +1 -1
- package/dist/src/llm/index.js +1 -1
- package/dist/src/types/base.d.ts +2 -0
- package/dist/src/types/base.d.ts.map +1 -0
- package/dist/src/types/base.js +3 -0
- package/dist/src/types/base.js.map +1 -0
- package/dist/src/types/index.d.ts +1 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/utils/bindEvaluator.d.ts +1 -1
- package/dist/src/utils/bindEvaluator.js +1 -1
- package/dist/src/utils/typeUtils.d.ts +7 -0
- package/dist/src/utils/typeUtils.d.ts.map +1 -0
- package/dist/src/utils/typeUtils.js +16 -0
- package/dist/src/utils/typeUtils.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +7 -1
- package/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts +39 -0
- package/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +45 -0
- package/src/__generated__/default_templates/index.ts +4 -0
- package/src/__generated__/types.ts +11 -0
- package/src/core/FunctionEvaluator.ts +28 -0
- package/src/helpers/asEvaluatorFn.ts +19 -0
- package/src/helpers/createEvaluator.ts +184 -0
- package/src/helpers/index.ts +1 -0
- package/src/helpers/toEvaluationResult.ts +145 -0
- package/src/index.ts +1 -0
- package/src/llm/{createDocumentRelevancyEvaluator.ts → createDocumentRelevanceEvaluator.ts} +18 -21
- package/src/llm/createHallucinationEvaluator.ts +5 -9
- package/src/llm/index.ts +1 -1
- package/src/types/base.ts +2 -0
- package/src/types/index.ts +1 -0
- package/src/utils/bindEvaluator.ts +1 -1
- package/src/utils/typeUtils.ts +14 -0
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -25
- package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +0 -31
- package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
- package/dist/esm/default_templates/index.d.ts +0 -3
- package/dist/esm/default_templates/index.d.ts.map +0 -1
- package/dist/esm/default_templates/index.js +0 -3
- package/dist/esm/default_templates/index.js.map +0 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +0 -1
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -28
- package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +0 -34
- package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
- package/dist/src/default_templates/index.d.ts +0 -3
- package/dist/src/default_templates/index.d.ts.map +0 -1
- package/dist/src/default_templates/index.js.map +0 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +0 -1
- package/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.ts +0 -25
- package/src/default_templates/HALLUCINATION_TEMPLATE.ts +0 -31
- package/src/default_templates/index.ts +0 -2
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"typeUtils.d.ts","sourceRoot":"","sources":["../../../src/utils/typeUtils.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,CAAC,GAAG,OAAO,EAAE,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,OAAO,CAAC,CAAC,CAAC,CAQ1E"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type guard for if a function is a Promise
|
|
3
|
+
* @param value
|
|
4
|
+
* @returns true if it is a Promise
|
|
5
|
+
*/
|
|
6
|
+
export function isPromise(value) {
|
|
7
|
+
return (!!value &&
|
|
8
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
9
|
+
typeof value?.then === "function" &&
|
|
10
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
11
|
+
typeof value?.catch === "function");
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=typeUtils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"typeUtils.js","sourceRoot":"","sources":["../../../src/utils/typeUtils.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,MAAM,UAAU,SAAS,CAAc,KAAc;IACnD,OAAO,CACL,CAAC,CAAC,KAAK;QACP,8DAA8D;QAC9D,OAAQ,KAAa,EAAE,IAAI,KAAK,UAAU;QAC1C,8DAA8D;QAC9D,OAAQ,KAAa,EAAE,KAAK,KAAK,UAAU,CAC5C,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,UAAU,CAAC;AAE9D,eAAO,MAAM,kDAAkD,EAAE,6BAkChE,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// This file is generated. Do not edit by hand.
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG = void 0;
|
|
5
|
+
exports.DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG = {
|
|
6
|
+
name: "document_relevance",
|
|
7
|
+
description: "A specialized evaluator for determining document relevance to a given question.",
|
|
8
|
+
optimizationDirection: "MAXIMIZE",
|
|
9
|
+
template: [
|
|
10
|
+
{
|
|
11
|
+
role: "user",
|
|
12
|
+
content: `
|
|
13
|
+
You are comparing a document to a question and trying to determine
|
|
14
|
+
if the document text contains information relevant to answering the
|
|
15
|
+
question. Here is the data:
|
|
16
|
+
|
|
17
|
+
[BEGIN DATA]
|
|
18
|
+
************
|
|
19
|
+
[Question]: {{input}}
|
|
20
|
+
************
|
|
21
|
+
[Document text]: {{documentText}}
|
|
22
|
+
************
|
|
23
|
+
[END DATA]
|
|
24
|
+
|
|
25
|
+
Compare the question above to the document text. You must determine
|
|
26
|
+
whether the document text contains information that can answer the
|
|
27
|
+
question. Please focus on whether the very specific question can be
|
|
28
|
+
answered by the information in the document text. Your response must be
|
|
29
|
+
either "relevant" or "unrelated". "unrelated" means that the document
|
|
30
|
+
text does not contain an answer to the question. "relevant" means the
|
|
31
|
+
document text contains an answer to the question.
|
|
32
|
+
`,
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
choices: {
|
|
36
|
+
"relevant": 1,
|
|
37
|
+
"unrelated": 0
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
//# sourceMappingURL=DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAIlC,QAAA,kDAAkD,GAAkC;IAC/F,IAAI,EAAE,oBAAoB;IAC1B,WAAW,EAAE,iFAAiF;IAC9F,qBAAqB,EAAE,UAAU;IACjC,QAAQ,EAAE;QACR;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;CAoBd;SACI;KACF;IACD,OAAO,EAAE;QACT,UAAU,EAAE,CAAC;QACb,WAAW,EAAE,CAAC;KACf;CACA,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,UAAU,CAAC;AAE9D,eAAO,MAAM,6CAA6C,EAAE,6BAwC3D,CAAC"}
|
package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// This file is generated. Do not edit by hand.
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG = void 0;
|
|
5
|
+
exports.HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG = {
|
|
6
|
+
name: "hallucination",
|
|
7
|
+
description: "A specialized evaluator for detecting hallucinations in grounded LLM responses.",
|
|
8
|
+
optimizationDirection: "MINIMIZE",
|
|
9
|
+
template: [
|
|
10
|
+
{
|
|
11
|
+
role: "user",
|
|
12
|
+
content: `
|
|
13
|
+
In this task, you will be presented with a query, some context and a response. The response
|
|
14
|
+
is generated to the question based on the context. The response may contain false
|
|
15
|
+
information. You must use the context to determine if the response to the question
|
|
16
|
+
contains false information, if the response is a hallucination of facts. Your objective is
|
|
17
|
+
to determine whether the response text contains factual information and is not a
|
|
18
|
+
hallucination. A 'hallucination' refers to a response that is not based on the context or
|
|
19
|
+
assumes information that is not available in the context. Your response should be a single
|
|
20
|
+
word: either 'factual' or 'hallucinated', and it should not include any other text or
|
|
21
|
+
characters. 'hallucinated' indicates that the response provides factually inaccurate
|
|
22
|
+
information to the query based on the context. 'factual' indicates that the response to
|
|
23
|
+
the question is correct relative to the context, and does not contain made up
|
|
24
|
+
information. Please read the query and context carefully before determining your
|
|
25
|
+
response.
|
|
26
|
+
|
|
27
|
+
[BEGIN DATA]
|
|
28
|
+
************
|
|
29
|
+
[Query]: {{input}}
|
|
30
|
+
************
|
|
31
|
+
[Context]: {{context}}
|
|
32
|
+
************
|
|
33
|
+
[Response]: {{output}}
|
|
34
|
+
************
|
|
35
|
+
[END DATA]
|
|
36
|
+
|
|
37
|
+
Is the response above factual or hallucinated based on the query and context?
|
|
38
|
+
`,
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
choices: {
|
|
42
|
+
"hallucinated": 1,
|
|
43
|
+
"factual": 0
|
|
44
|
+
},
|
|
45
|
+
};
|
|
46
|
+
//# sourceMappingURL=HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAIlC,QAAA,6CAA6C,GAAkC;IAC1F,IAAI,EAAE,eAAe;IACrB,WAAW,EAAE,iFAAiF;IAC9F,qBAAqB,EAAE,UAAU;IACjC,QAAQ,EAAE;QACR;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;CA0Bd;SACI;KACF;IACD,OAAO,EAAE;QACT,cAAc,EAAE,CAAC;QACjB,SAAS,EAAE,CAAC;KACb;CACA,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kDAAkD,EAAE,MAAM,sDAAsD,CAAC;AAC1H,OAAO,EAAE,6CAA6C,EAAE,MAAM,iDAAiD,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// This file is generated. Do not edit by hand.
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG = exports.DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG = void 0;
|
|
5
|
+
var DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG_1 = require("./DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG");
|
|
6
|
+
Object.defineProperty(exports, "DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG", { enumerable: true, get: function () { return DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG_1.DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG; } });
|
|
7
|
+
var HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG_1 = require("./HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG");
|
|
8
|
+
Object.defineProperty(exports, "HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG", { enumerable: true, get: function () { return HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG_1.HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG; } });
|
|
9
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/__generated__/default_templates/index.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAE/C,2HAA0H;AAAjH,wLAAA,kDAAkD,OAAA;AAC3D,iHAAgH;AAAvG,8KAAA,6CAA6C,OAAA"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { PromptTemplate } from "../types/templating";
|
|
2
|
+
export type ClassificationEvaluatorConfig = {
|
|
3
|
+
name: string;
|
|
4
|
+
description: string;
|
|
5
|
+
optimizationDirection: "MINIMIZE" | "MAXIMIZE";
|
|
6
|
+
template: PromptTemplate;
|
|
7
|
+
choices: Record<string, number>;
|
|
8
|
+
};
|
|
9
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/__generated__/types.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE1D,MAAM,MAAM,6BAA6B,GAAG;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,qBAAqB,EAAE,UAAU,GAAG,UAAU,CAAC;IAC/C,QAAQ,EAAE,cAAc,CAAC;IACzB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/__generated__/types.ts"],"names":[],"mappings":";AAAA,+CAA+C"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { CreateEvaluatorArgs, EvaluationResult, ObjectMapping } from "../types";
|
|
2
|
+
import { EvaluatorBase } from "./EvaluatorBase";
|
|
3
|
+
type FunctionEvaluatorArgs<RecordType extends Record<string, unknown>> = CreateEvaluatorArgs<RecordType> & {
|
|
4
|
+
evaluateFn: (args: RecordType) => Promise<EvaluationResult>;
|
|
5
|
+
};
|
|
6
|
+
/**
|
|
7
|
+
* A class that constructs an evaluator based on an evaluate function.
|
|
8
|
+
*/
|
|
9
|
+
export declare class FunctionEvaluator<RecordType extends Record<string, unknown>> extends EvaluatorBase<RecordType> {
|
|
10
|
+
readonly evaluateFn: (args: RecordType) => Promise<EvaluationResult>;
|
|
11
|
+
constructor({ evaluateFn, ...args }: FunctionEvaluatorArgs<RecordType>);
|
|
12
|
+
evaluate(args: RecordType): Promise<EvaluationResult>;
|
|
13
|
+
bindInputMapping(inputMapping: ObjectMapping<RecordType>): FunctionEvaluator<RecordType>;
|
|
14
|
+
}
|
|
15
|
+
export {};
|
|
16
|
+
//# sourceMappingURL=FunctionEvaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FunctionEvaluator.d.ts","sourceRoot":"","sources":["../../../src/core/FunctionEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEhF,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,KAAK,qBAAqB,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IACnE,mBAAmB,CAAC,UAAU,CAAC,GAAG;IAChC,UAAU,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;CAC7D,CAAC;AACJ;;GAEG;AACH,qBAAa,iBAAiB,CAC5B,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC1C,SAAQ,aAAa,CAAC,UAAU,CAAC;IACjC,QAAQ,CAAC,UAAU,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;gBACzD,EAAE,UAAU,EAAE,GAAG,IAAI,EAAE,EAAE,qBAAqB,CAAC,UAAU,CAAC;IAIhE,QAAQ,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAG3D,gBAAgB,CACd,YAAY,EAAE,aAAa,CAAC,UAAU,CAAC,GACtC,iBAAiB,CAAC,UAAU,CAAC;CAGjC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __rest = (this && this.__rest) || function (s, e) {
|
|
3
|
+
var t = {};
|
|
4
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
|
|
5
|
+
t[p] = s[p];
|
|
6
|
+
if (s != null && typeof Object.getOwnPropertySymbols === "function")
|
|
7
|
+
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
|
|
8
|
+
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
|
|
9
|
+
t[p[i]] = s[p[i]];
|
|
10
|
+
}
|
|
11
|
+
return t;
|
|
12
|
+
};
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.FunctionEvaluator = void 0;
|
|
15
|
+
const EvaluatorBase_1 = require("./EvaluatorBase");
|
|
16
|
+
/**
|
|
17
|
+
* A class that constructs an evaluator based on an evaluate function.
|
|
18
|
+
*/
|
|
19
|
+
class FunctionEvaluator extends EvaluatorBase_1.EvaluatorBase {
|
|
20
|
+
constructor(_a) {
|
|
21
|
+
var { evaluateFn } = _a, args = __rest(_a, ["evaluateFn"]);
|
|
22
|
+
super(Object.assign({}, args));
|
|
23
|
+
this.evaluateFn = evaluateFn;
|
|
24
|
+
}
|
|
25
|
+
async evaluate(args) {
|
|
26
|
+
return this.evaluateFn(args);
|
|
27
|
+
}
|
|
28
|
+
bindInputMapping(inputMapping) {
|
|
29
|
+
return new FunctionEvaluator(Object.assign(Object.assign({}, this), { inputMapping }));
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
exports.FunctionEvaluator = FunctionEvaluator;
|
|
33
|
+
//# sourceMappingURL=FunctionEvaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FunctionEvaluator.js","sourceRoot":"","sources":["../../../src/core/FunctionEvaluator.ts"],"names":[],"mappings":";;;;;;;;;;;;;;AAEA,mDAAgD;AAMhD;;GAEG;AACH,MAAa,iBAEX,SAAQ,6BAAyB;IAEjC,YAAY,EAA0D;YAA1D,EAAE,UAAU,OAA8C,EAAzC,IAAI,cAArB,cAAuB,CAAF;QAC/B,KAAK,mBAAM,IAAI,EAAG,CAAC;QACnB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IACD,KAAK,CAAC,QAAQ,CAAC,IAAgB;QAC7B,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;IAC/B,CAAC;IACD,gBAAgB,CACd,YAAuC;QAEvC,OAAO,IAAI,iBAAiB,iCAAM,IAAI,KAAE,YAAY,IAAG,CAAC;IAC1D,CAAC;CACF;AAhBD,8CAgBC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { AnyFn, EvaluatorFn } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* A function that converts a generic function into an evaluator function
|
|
4
|
+
*/
|
|
5
|
+
export declare function asEvaluatorFn<RecordType extends Record<string, unknown>>(fn: AnyFn): EvaluatorFn<RecordType>;
|
|
6
|
+
//# sourceMappingURL=asEvaluatorFn.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"asEvaluatorFn.d.ts","sourceRoot":"","sources":["../../../src/helpers/asEvaluatorFn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAK9C;;GAEG;AACH,wBAAgB,aAAa,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACtE,EAAE,EAAE,KAAK,GACR,WAAW,CAAC,UAAU,CAAC,CAQzB"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.asEvaluatorFn = asEvaluatorFn;
|
|
4
|
+
const typeUtils_1 = require("../utils/typeUtils");
|
|
5
|
+
const toEvaluationResult_1 = require("./toEvaluationResult");
|
|
6
|
+
/**
|
|
7
|
+
* A function that converts a generic function into an evaluator function
|
|
8
|
+
*/
|
|
9
|
+
function asEvaluatorFn(fn) {
|
|
10
|
+
return async (...args) => {
|
|
11
|
+
let result = fn(...args);
|
|
12
|
+
if ((0, typeUtils_1.isPromise)(result)) {
|
|
13
|
+
result = await result;
|
|
14
|
+
}
|
|
15
|
+
return (0, toEvaluationResult_1.toEvaluationResult)(result);
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=asEvaluatorFn.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"asEvaluatorFn.js","sourceRoot":"","sources":["../../../src/helpers/asEvaluatorFn.ts"],"names":[],"mappings":";;AAQA,sCAUC;AAjBD,kDAA+C;AAE/C,6DAA0D;AAE1D;;GAEG;AACH,SAAgB,aAAa,CAC3B,EAAS;IAET,OAAO,KAAK,EAAE,GAAG,IAAI,EAAE,EAAE;QACvB,IAAI,MAAM,GAAG,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;QACzB,IAAI,IAAA,qBAAS,EAAC,MAAM,CAAC,EAAE,CAAC;YACtB,MAAM,GAAG,MAAM,MAAM,CAAC;QACxB,CAAC;QACD,OAAO,IAAA,uCAAkB,EAAC,MAAM,CAAC,CAAC;IACpC,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { EvaluatorBase } from "../core/EvaluatorBase";
|
|
2
|
+
import { EvaluationKind, OptimizationDirection, TelemetryConfig } from "../types";
|
|
3
|
+
type AnyFn = (...args: any[]) => any;
|
|
4
|
+
/**
|
|
5
|
+
* Options for creating a custom evaluator using {@link CreateEvaluator}.
|
|
6
|
+
*
|
|
7
|
+
* @public
|
|
8
|
+
*/
|
|
9
|
+
export type CreateEvaluatorOptions = {
|
|
10
|
+
/**
|
|
11
|
+
* The name of the evaluator / metric that it measures.
|
|
12
|
+
*
|
|
13
|
+
* If not provided, the function will attempt to infer the name from the function's `name` property.
|
|
14
|
+
* If the function has no name, a unique name will be generated.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const evaluator = CreateEvaluator(myFunction, { name: "custom-metric" });
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
name?: string;
|
|
22
|
+
/**
|
|
23
|
+
* The kind of the evaluation.
|
|
24
|
+
*
|
|
25
|
+
* - `"CODE"`: Code-based evaluator that runs custom logic
|
|
26
|
+
* - `"LLM"`: LLM-based evaluator that uses a language model
|
|
27
|
+
*
|
|
28
|
+
* @defaultValue `"CODE"`
|
|
29
|
+
*
|
|
30
|
+
* @example
|
|
31
|
+
* ```typescript
|
|
32
|
+
* const evaluator = CreateEvaluator(myFunction, { kind: "CODE" });
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
kind?: EvaluationKind;
|
|
36
|
+
/**
|
|
37
|
+
* The direction to optimize the numeric evaluation score.
|
|
38
|
+
*
|
|
39
|
+
* - `"MAXIMIZE"`: Higher scores are better (e.g., accuracy, F1 score)
|
|
40
|
+
* - `"MINIMIZE"`: Lower scores are better (e.g., error rate, latency)
|
|
41
|
+
*
|
|
42
|
+
* @defaultValue `"MAXIMIZE"`
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* ```typescript
|
|
46
|
+
* const evaluator = CreateEvaluator(myFunction, {
|
|
47
|
+
* optimizationDirection: "MAXIMIZE"
|
|
48
|
+
* });
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
optimizationDirection?: OptimizationDirection;
|
|
52
|
+
/**
|
|
53
|
+
* The telemetry configuration for the evaluator.
|
|
54
|
+
*
|
|
55
|
+
* When enabled, the evaluator will automatically create OpenTelemetry spans
|
|
56
|
+
* for tracing and observability. This allows you to track evaluator performance
|
|
57
|
+
* and debug issues in distributed systems.
|
|
58
|
+
*
|
|
59
|
+
* @defaultValue `{ isEnabled: true }`
|
|
60
|
+
*
|
|
61
|
+
* @example
|
|
62
|
+
* ```typescript
|
|
63
|
+
* const evaluator = CreateEvaluator(myFunction, {
|
|
64
|
+
* telemetry: { isEnabled: true, tracer: myTracer }
|
|
65
|
+
* });
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
telemetry?: TelemetryConfig;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* A factory function for creating a custom evaluator from any function.
|
|
72
|
+
*
|
|
73
|
+
* This function wraps a user-provided function into an evaluator that can be used
|
|
74
|
+
* with Phoenix experiments and evaluations. The function can be synchronous or
|
|
75
|
+
* asynchronous, and can return a number, an {@link EvaluationResult} object, or
|
|
76
|
+
* a value that will be automatically converted to an evaluation result.
|
|
77
|
+
*
|
|
78
|
+
* The evaluator will automatically:
|
|
79
|
+
* - Convert the function's return value to an {@link EvaluationResult}
|
|
80
|
+
* - Handle both sync and async functions
|
|
81
|
+
* - Wrap the function with OpenTelemetry spans if telemetry is enabled
|
|
82
|
+
* - Infer the evaluator name from the function name if not provided
|
|
83
|
+
*
|
|
84
|
+
* @typeParam RecordType - The type of the input record that the evaluator expects.
|
|
85
|
+
* Must extend `Record<string, unknown>`.
|
|
86
|
+
* @typeParam Fn - The type of the function being wrapped. Must be a function that
|
|
87
|
+
* accepts the record type and returns a value compatible with {@link EvaluationResult}.
|
|
88
|
+
*
|
|
89
|
+
* @param fn - The function to wrap as an evaluator. Can be synchronous or asynchronous.
|
|
90
|
+
* The function should accept a record of type `RecordType` and return either:
|
|
91
|
+
* - A number (will be converted to `{ score: number }`)
|
|
92
|
+
* - An {@link EvaluationResult} object
|
|
93
|
+
* - Any value that can be converted to an evaluation result
|
|
94
|
+
*
|
|
95
|
+
* @param options - Optional configuration for the evaluator. See {@link CreateEvaluatorOptions}
|
|
96
|
+
* for details on available options.
|
|
97
|
+
*
|
|
98
|
+
* @returns An {@link EvaluatorInterface} that can be used with Phoenix experiments
|
|
99
|
+
* and evaluation workflows.
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* Basic usage with a simple scoring function:
|
|
103
|
+
* ```typescript
|
|
104
|
+
* const accuracyEvaluator = CreateEvaluator(
|
|
105
|
+
* ({ output, expected }) => {
|
|
106
|
+
* return output === expected ? 1 : 0;
|
|
107
|
+
* },
|
|
108
|
+
* {
|
|
109
|
+
* name: "accuracy",
|
|
110
|
+
* kind: "CODE",
|
|
111
|
+
* optimizationDirection: "MAXIMIZE"
|
|
112
|
+
* }
|
|
113
|
+
* );
|
|
114
|
+
*
|
|
115
|
+
* const result = await accuracyEvaluator.evaluate({
|
|
116
|
+
* output: "correct answer",
|
|
117
|
+
* expected: "correct answer"
|
|
118
|
+
* });
|
|
119
|
+
* // result: { score: 1 }
|
|
120
|
+
* ```
|
|
121
|
+
*
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* Returning a full EvaluationResult:
|
|
125
|
+
* ```typescript
|
|
126
|
+
* const qualityEvaluator = CreateEvaluator(
|
|
127
|
+
* ({ output }) => {
|
|
128
|
+
* const score = calculateQuality(output);
|
|
129
|
+
* return {
|
|
130
|
+
* score,
|
|
131
|
+
* label: score > 0.8 ? "high" : "low",
|
|
132
|
+
* explanation: `Quality score: ${score}`
|
|
133
|
+
* };
|
|
134
|
+
* },
|
|
135
|
+
* { name: "quality" }
|
|
136
|
+
* );
|
|
137
|
+
* ```
|
|
138
|
+
*/
|
|
139
|
+
export declare function createEvaluator<RecordType extends Record<string, unknown> = Record<string, unknown>, Fn extends AnyFn = AnyFn>(fn: Fn, options?: CreateEvaluatorOptions): EvaluatorBase<RecordType>;
|
|
140
|
+
export {};
|
|
141
|
+
//# sourceMappingURL=createEvaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createEvaluator.d.ts","sourceRoot":"","sources":["../../../src/helpers/createEvaluator.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,EACL,cAAc,EACd,qBAAqB,EACrB,eAAe,EAChB,MAAM,UAAU,CAAC;AAKlB,KAAK,KAAK,GAAG,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,GAAG,CAAC;AAMrC;;;;GAIG;AACH,MAAM,MAAM,sBAAsB,GAAG;IACnC;;;;;;;;;;OAUG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;;;;;;;;;OAYG;IACH,IAAI,CAAC,EAAE,cAAc,CAAC;IACtB;;;;;;;;;;;;;;OAcG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAC9C;;;;;;;;;;;;;;;OAeG;IACH,SAAS,CAAC,EAAE,eAAe,CAAC;CAC7B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoEG;AACH,wBAAgB,eAAe,CAC7B,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACpE,EAAE,SAAS,KAAK,GAAG,KAAK,EACxB,EAAE,EAAE,EAAE,EAAE,OAAO,CAAC,EAAE,sBAAsB,GAAG,aAAa,CAAC,UAAU,CAAC,CAyBrE"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createEvaluator = createEvaluator;
|
|
4
|
+
const openinference_core_1 = require("@arizeai/openinference-core");
|
|
5
|
+
const FunctionEvaluator_1 = require("../core/FunctionEvaluator");
|
|
6
|
+
const asEvaluatorFn_1 = require("./asEvaluatorFn");
|
|
7
|
+
function generateUniqueName() {
|
|
8
|
+
return `evaluator-${Math.random().toString(36).substring(2, 15)}`;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* A factory function for creating a custom evaluator from any function.
|
|
12
|
+
*
|
|
13
|
+
* This function wraps a user-provided function into an evaluator that can be used
|
|
14
|
+
* with Phoenix experiments and evaluations. The function can be synchronous or
|
|
15
|
+
* asynchronous, and can return a number, an {@link EvaluationResult} object, or
|
|
16
|
+
* a value that will be automatically converted to an evaluation result.
|
|
17
|
+
*
|
|
18
|
+
* The evaluator will automatically:
|
|
19
|
+
* - Convert the function's return value to an {@link EvaluationResult}
|
|
20
|
+
* - Handle both sync and async functions
|
|
21
|
+
* - Wrap the function with OpenTelemetry spans if telemetry is enabled
|
|
22
|
+
* - Infer the evaluator name from the function name if not provided
|
|
23
|
+
*
|
|
24
|
+
* @typeParam RecordType - The type of the input record that the evaluator expects.
|
|
25
|
+
* Must extend `Record<string, unknown>`.
|
|
26
|
+
* @typeParam Fn - The type of the function being wrapped. Must be a function that
|
|
27
|
+
* accepts the record type and returns a value compatible with {@link EvaluationResult}.
|
|
28
|
+
*
|
|
29
|
+
* @param fn - The function to wrap as an evaluator. Can be synchronous or asynchronous.
|
|
30
|
+
* The function should accept a record of type `RecordType` and return either:
|
|
31
|
+
* - A number (will be converted to `{ score: number }`)
|
|
32
|
+
* - An {@link EvaluationResult} object
|
|
33
|
+
* - Any value that can be converted to an evaluation result
|
|
34
|
+
*
|
|
35
|
+
* @param options - Optional configuration for the evaluator. See {@link CreateEvaluatorOptions}
|
|
36
|
+
* for details on available options.
|
|
37
|
+
*
|
|
38
|
+
* @returns An {@link EvaluatorInterface} that can be used with Phoenix experiments
|
|
39
|
+
* and evaluation workflows.
|
|
40
|
+
*
|
|
41
|
+
* @example
|
|
42
|
+
* Basic usage with a simple scoring function:
|
|
43
|
+
* ```typescript
|
|
44
|
+
* const accuracyEvaluator = CreateEvaluator(
|
|
45
|
+
* ({ output, expected }) => {
|
|
46
|
+
* return output === expected ? 1 : 0;
|
|
47
|
+
* },
|
|
48
|
+
* {
|
|
49
|
+
* name: "accuracy",
|
|
50
|
+
* kind: "CODE",
|
|
51
|
+
* optimizationDirection: "MAXIMIZE"
|
|
52
|
+
* }
|
|
53
|
+
* );
|
|
54
|
+
*
|
|
55
|
+
* const result = await accuracyEvaluator.evaluate({
|
|
56
|
+
* output: "correct answer",
|
|
57
|
+
* expected: "correct answer"
|
|
58
|
+
* });
|
|
59
|
+
* // result: { score: 1 }
|
|
60
|
+
* ```
|
|
61
|
+
*
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* Returning a full EvaluationResult:
|
|
65
|
+
* ```typescript
|
|
66
|
+
* const qualityEvaluator = CreateEvaluator(
|
|
67
|
+
* ({ output }) => {
|
|
68
|
+
* const score = calculateQuality(output);
|
|
69
|
+
* return {
|
|
70
|
+
* score,
|
|
71
|
+
* label: score > 0.8 ? "high" : "low",
|
|
72
|
+
* explanation: `Quality score: ${score}`
|
|
73
|
+
* };
|
|
74
|
+
* },
|
|
75
|
+
* { name: "quality" }
|
|
76
|
+
* );
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
function createEvaluator(fn, options) {
|
|
80
|
+
const { name, kind, optimizationDirection, telemetry = { isEnabled: true }, } = options || {};
|
|
81
|
+
const evaluatorName = name || fn.name || generateUniqueName();
|
|
82
|
+
let evaluateFn = (0, asEvaluatorFn_1.asEvaluatorFn)(fn);
|
|
83
|
+
// Add OpenTelemetry span wrapping if telemetry is enabled
|
|
84
|
+
if (telemetry && telemetry.isEnabled) {
|
|
85
|
+
evaluateFn = (0, openinference_core_1.withSpan)(evaluateFn, {
|
|
86
|
+
tracer: telemetry.tracer,
|
|
87
|
+
name: evaluatorName,
|
|
88
|
+
kind: "EVALUATOR",
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
return new FunctionEvaluator_1.FunctionEvaluator({
|
|
92
|
+
evaluateFn,
|
|
93
|
+
name: evaluatorName,
|
|
94
|
+
kind: kind || "CODE",
|
|
95
|
+
optimizationDirection: optimizationDirection || "MAXIMIZE",
|
|
96
|
+
telemetry,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=createEvaluator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"createEvaluator.js","sourceRoot":"","sources":["../../../src/helpers/createEvaluator.ts"],"names":[],"mappings":";;AA2JA,0CA4BC;AAvLD,oEAAuD;AAGvD,iEAA8D;AAO9D,mDAAgD;AAKhD,SAAS,kBAAkB;IACzB,OAAO,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AACpE,CAAC;AAqED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoEG;AACH,SAAgB,eAAe,CAG7B,EAAM,EAAE,OAAgC;IACxC,MAAM,EACJ,IAAI,EACJ,IAAI,EACJ,qBAAqB,EACrB,SAAS,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,GAChC,GAAG,OAAO,IAAI,EAAE,CAAC;IAClB,MAAM,aAAa,GAAG,IAAI,IAAI,EAAE,CAAC,IAAI,IAAI,kBAAkB,EAAE,CAAC;IAC9D,IAAI,UAAU,GAAG,IAAA,6BAAa,EAAa,EAAE,CAAC,CAAC;IAE/C,0DAA0D;IAC1D,IAAI,SAAS,IAAI,SAAS,CAAC,SAAS,EAAE,CAAC;QACrC,UAAU,GAAG,IAAA,6BAAQ,EAAC,UAAU,EAAE;YAChC,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,IAAI,EAAE,aAAa;YACnB,IAAI,EAAE,WAAW;SAClB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,IAAI,qCAAiB,CAAa;QACvC,UAAU;QACV,IAAI,EAAE,aAAa;QACnB,IAAI,EAAE,IAAI,IAAI,MAAM;QACpB,qBAAqB,EAAE,qBAAqB,IAAI,UAAU;QAC1D,SAAS;KACV,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/helpers/index.ts"],"names":[],"mappings":"AAAA,cAAc,mBAAmB,CAAC"}
|
|
@@ -14,6 +14,5 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
__exportStar(require("./
|
|
18
|
-
__exportStar(require("./DOCUMENT_RELEVANCY_TEMPLATE"), exports);
|
|
17
|
+
__exportStar(require("./createEvaluator"), exports);
|
|
19
18
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/helpers/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,oDAAkC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { EvaluationResult } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* Converts an unknown value to an {@link EvaluationResult}.
|
|
4
|
+
*
|
|
5
|
+
* This function provides a flexible way to normalize various return types from
|
|
6
|
+
* evaluator functions into a standardized `EvaluationResult` format. It handles
|
|
7
|
+
* multiple input types:
|
|
8
|
+
*
|
|
9
|
+
* - **Numbers**: Converted to `{ score: number }`
|
|
10
|
+
* - **Strings**: Converted to `{ label: string }`
|
|
11
|
+
* - **Objects**: Extracts `score`, `label`, and `explanation` properties if present
|
|
12
|
+
* - **Other types**: Returns an empty `EvaluationResult` object
|
|
13
|
+
*
|
|
14
|
+
* This is particularly useful when creating evaluators from functions that may
|
|
15
|
+
* return different types, ensuring consistent evaluation result formatting.
|
|
16
|
+
*
|
|
17
|
+
* @param result - The value to convert to an EvaluationResult. Can be:
|
|
18
|
+
* - A number (converted to score)
|
|
19
|
+
* - A string (converted to label)
|
|
20
|
+
* - An object with optional `score`, `label`, and/or `explanation` properties
|
|
21
|
+
* - Any other value (returns empty object)
|
|
22
|
+
*
|
|
23
|
+
* @returns An {@link EvaluationResult} object with extracted properties
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* Convert a number to an EvaluationResult:
|
|
27
|
+
* ```typescript
|
|
28
|
+
* const result = toEvaluationResult(0.95);
|
|
29
|
+
* // Returns: { score: 0.95 }
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* Convert a string to an EvaluationResult:
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const result = toEvaluationResult("correct");
|
|
36
|
+
* // Returns: { label: "correct" }
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* Convert an object with all properties:
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const result = toEvaluationResult({
|
|
43
|
+
* score: 0.9,
|
|
44
|
+
* label: "high",
|
|
45
|
+
* explanation: "High quality output"
|
|
46
|
+
* });
|
|
47
|
+
* // Returns: { score: 0.9, label: "high", explanation: "High quality output" }
|
|
48
|
+
* ```
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* Convert an object with partial properties:
|
|
52
|
+
* ```typescript
|
|
53
|
+
* const result = toEvaluationResult({ score: 0.8 });
|
|
54
|
+
* // Returns: { score: 0.8 }
|
|
55
|
+
* ```
|
|
56
|
+
*
|
|
57
|
+
* @example
|
|
58
|
+
* Handle null or undefined:
|
|
59
|
+
* ```typescript
|
|
60
|
+
* const result = toEvaluationResult(null);
|
|
61
|
+
* // Returns: {}
|
|
62
|
+
* ```
|
|
63
|
+
*
|
|
64
|
+
* @public
|
|
65
|
+
*/
|
|
66
|
+
export declare function toEvaluationResult(result: unknown): EvaluationResult;
|
|
67
|
+
//# sourceMappingURL=toEvaluationResult.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"toEvaluationResult.d.ts","sourceRoot":"","sources":["../../../src/helpers/toEvaluationResult.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAuD5C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,OAAO,GAAG,gBAAgB,CAyBpE"}
|