@arizeai/phoenix-evals 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  2. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  3. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +37 -0
  4. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  5. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  6. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  7. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +43 -0
  8. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  9. package/dist/esm/__generated__/default_templates/index.d.ts +3 -0
  10. package/dist/esm/__generated__/default_templates/index.d.ts.map +1 -0
  11. package/dist/esm/__generated__/default_templates/index.js +4 -0
  12. package/dist/esm/__generated__/default_templates/index.js.map +1 -0
  13. package/dist/esm/__generated__/types.d.ts +9 -0
  14. package/dist/esm/__generated__/types.d.ts.map +1 -0
  15. package/dist/esm/__generated__/types.js +3 -0
  16. package/dist/esm/__generated__/types.js.map +1 -0
  17. package/dist/esm/core/FunctionEvaluator.d.ts +16 -0
  18. package/dist/esm/core/FunctionEvaluator.d.ts.map +1 -0
  19. package/dist/esm/core/FunctionEvaluator.js +18 -0
  20. package/dist/esm/core/FunctionEvaluator.js.map +1 -0
  21. package/dist/esm/helpers/asEvaluatorFn.d.ts +6 -0
  22. package/dist/esm/helpers/asEvaluatorFn.d.ts.map +1 -0
  23. package/dist/esm/helpers/asEvaluatorFn.js +15 -0
  24. package/dist/esm/helpers/asEvaluatorFn.js.map +1 -0
  25. package/dist/esm/helpers/createEvaluator.d.ts +141 -0
  26. package/dist/esm/helpers/createEvaluator.d.ts.map +1 -0
  27. package/dist/esm/helpers/createEvaluator.js +96 -0
  28. package/dist/esm/helpers/createEvaluator.js.map +1 -0
  29. package/dist/esm/helpers/index.d.ts +2 -0
  30. package/dist/esm/helpers/index.d.ts.map +1 -0
  31. package/dist/esm/helpers/index.js +2 -0
  32. package/dist/esm/helpers/index.js.map +1 -0
  33. package/dist/esm/helpers/toEvaluationResult.d.ts +67 -0
  34. package/dist/esm/helpers/toEvaluationResult.d.ts.map +1 -0
  35. package/dist/esm/helpers/toEvaluationResult.js +133 -0
  36. package/dist/esm/helpers/toEvaluationResult.js.map +1 -0
  37. package/dist/esm/index.d.ts +1 -0
  38. package/dist/esm/index.d.ts.map +1 -1
  39. package/dist/esm/index.js +1 -0
  40. package/dist/esm/index.js.map +1 -1
  41. package/dist/esm/llm/{createDocumentRelevancyEvaluator.d.ts → createDocumentRelevanceEvaluator.d.ts} +11 -11
  42. package/dist/esm/llm/{createDocumentRelevancyEvaluator.d.ts.map → createDocumentRelevanceEvaluator.d.ts.map} +1 -1
  43. package/dist/esm/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +10 -10
  44. package/dist/esm/llm/createDocumentRelevanceEvaluator.js.map +1 -0
  45. package/dist/esm/llm/createHallucinationEvaluator.d.ts +0 -1
  46. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  47. package/dist/esm/llm/createHallucinationEvaluator.js +2 -2
  48. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  49. package/dist/esm/llm/index.d.ts +1 -1
  50. package/dist/esm/llm/index.js +1 -1
  51. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  52. package/dist/esm/types/base.d.ts +2 -0
  53. package/dist/esm/types/base.d.ts.map +1 -0
  54. package/dist/esm/types/base.js +2 -0
  55. package/dist/esm/types/base.js.map +1 -0
  56. package/dist/esm/types/index.d.ts +1 -0
  57. package/dist/esm/types/index.d.ts.map +1 -1
  58. package/dist/esm/types/index.js +1 -0
  59. package/dist/esm/types/index.js.map +1 -1
  60. package/dist/esm/utils/bindEvaluator.d.ts +1 -1
  61. package/dist/esm/utils/bindEvaluator.js +1 -1
  62. package/dist/esm/utils/typeUtils.d.ts +7 -0
  63. package/dist/esm/utils/typeUtils.d.ts.map +1 -0
  64. package/dist/esm/utils/typeUtils.js +13 -0
  65. package/dist/esm/utils/typeUtils.js.map +1 -0
  66. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  67. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  68. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +40 -0
  69. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  70. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  71. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  72. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +46 -0
  73. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  74. package/dist/src/__generated__/default_templates/index.d.ts +3 -0
  75. package/dist/src/__generated__/default_templates/index.d.ts.map +1 -0
  76. package/dist/src/__generated__/default_templates/index.js +9 -0
  77. package/dist/src/__generated__/default_templates/index.js.map +1 -0
  78. package/dist/src/__generated__/types.d.ts +9 -0
  79. package/dist/src/__generated__/types.d.ts.map +1 -0
  80. package/dist/src/__generated__/types.js +4 -0
  81. package/dist/src/__generated__/types.js.map +1 -0
  82. package/dist/src/core/FunctionEvaluator.d.ts +16 -0
  83. package/dist/src/core/FunctionEvaluator.d.ts.map +1 -0
  84. package/dist/src/core/FunctionEvaluator.js +33 -0
  85. package/dist/src/core/FunctionEvaluator.js.map +1 -0
  86. package/dist/src/helpers/asEvaluatorFn.d.ts +6 -0
  87. package/dist/src/helpers/asEvaluatorFn.d.ts.map +1 -0
  88. package/dist/src/helpers/asEvaluatorFn.js +18 -0
  89. package/dist/src/helpers/asEvaluatorFn.js.map +1 -0
  90. package/dist/src/helpers/createEvaluator.d.ts +141 -0
  91. package/dist/src/helpers/createEvaluator.d.ts.map +1 -0
  92. package/dist/src/helpers/createEvaluator.js +99 -0
  93. package/dist/src/helpers/createEvaluator.js.map +1 -0
  94. package/dist/src/helpers/index.d.ts +2 -0
  95. package/dist/src/helpers/index.d.ts.map +1 -0
  96. package/dist/src/{default_templates → helpers}/index.js +1 -2
  97. package/dist/src/helpers/index.js.map +1 -0
  98. package/dist/src/helpers/toEvaluationResult.d.ts +67 -0
  99. package/dist/src/helpers/toEvaluationResult.d.ts.map +1 -0
  100. package/dist/src/helpers/toEvaluationResult.js +136 -0
  101. package/dist/src/helpers/toEvaluationResult.js.map +1 -0
  102. package/dist/src/index.d.ts +1 -0
  103. package/dist/src/index.d.ts.map +1 -1
  104. package/dist/src/index.js +1 -0
  105. package/dist/src/index.js.map +1 -1
  106. package/dist/src/llm/{createDocumentRelevancyEvaluator.d.ts → createDocumentRelevanceEvaluator.d.ts} +11 -11
  107. package/dist/src/llm/{createDocumentRelevancyEvaluator.d.ts.map → createDocumentRelevanceEvaluator.d.ts.map} +1 -1
  108. package/dist/src/llm/{createDocumentRelevancyEvaluator.js → createDocumentRelevanceEvaluator.js} +11 -11
  109. package/dist/src/llm/createDocumentRelevanceEvaluator.js.map +1 -0
  110. package/dist/src/llm/createHallucinationEvaluator.d.ts +0 -1
  111. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  112. package/dist/src/llm/createHallucinationEvaluator.js +2 -2
  113. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  114. package/dist/src/llm/index.d.ts +1 -1
  115. package/dist/src/llm/index.js +1 -1
  116. package/dist/src/types/base.d.ts +2 -0
  117. package/dist/src/types/base.d.ts.map +1 -0
  118. package/dist/src/types/base.js +3 -0
  119. package/dist/src/types/base.js.map +1 -0
  120. package/dist/src/types/index.d.ts +1 -0
  121. package/dist/src/types/index.d.ts.map +1 -1
  122. package/dist/src/types/index.js +1 -0
  123. package/dist/src/types/index.js.map +1 -1
  124. package/dist/src/utils/bindEvaluator.d.ts +1 -1
  125. package/dist/src/utils/bindEvaluator.js +1 -1
  126. package/dist/src/utils/typeUtils.d.ts +7 -0
  127. package/dist/src/utils/typeUtils.d.ts.map +1 -0
  128. package/dist/src/utils/typeUtils.js +16 -0
  129. package/dist/src/utils/typeUtils.js.map +1 -0
  130. package/dist/tsconfig.tsbuildinfo +1 -1
  131. package/package.json +7 -1
  132. package/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts +39 -0
  133. package/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +45 -0
  134. package/src/__generated__/default_templates/index.ts +4 -0
  135. package/src/__generated__/types.ts +11 -0
  136. package/src/core/FunctionEvaluator.ts +28 -0
  137. package/src/helpers/asEvaluatorFn.ts +19 -0
  138. package/src/helpers/createEvaluator.ts +184 -0
  139. package/src/helpers/index.ts +1 -0
  140. package/src/helpers/toEvaluationResult.ts +145 -0
  141. package/src/index.ts +1 -0
  142. package/src/llm/{createDocumentRelevancyEvaluator.ts → createDocumentRelevanceEvaluator.ts} +18 -21
  143. package/src/llm/createHallucinationEvaluator.ts +5 -9
  144. package/src/llm/index.ts +1 -1
  145. package/src/types/base.ts +2 -0
  146. package/src/types/index.ts +1 -0
  147. package/src/utils/bindEvaluator.ts +1 -1
  148. package/src/utils/typeUtils.ts +14 -0
  149. package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
  150. package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
  151. package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -25
  152. package/dist/esm/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
  153. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
  154. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
  155. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +0 -31
  156. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
  157. package/dist/esm/default_templates/index.d.ts +0 -3
  158. package/dist/esm/default_templates/index.d.ts.map +0 -1
  159. package/dist/esm/default_templates/index.js +0 -3
  160. package/dist/esm/default_templates/index.js.map +0 -1
  161. package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +0 -1
  162. package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts +0 -6
  163. package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map +0 -1
  164. package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js +0 -28
  165. package/dist/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.js.map +0 -1
  166. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +0 -6
  167. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts.map +0 -1
  168. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +0 -34
  169. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +0 -1
  170. package/dist/src/default_templates/index.d.ts +0 -3
  171. package/dist/src/default_templates/index.d.ts.map +0 -1
  172. package/dist/src/default_templates/index.js.map +0 -1
  173. package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +0 -1
  174. package/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.ts +0 -25
  175. package/src/default_templates/HALLUCINATION_TEMPLATE.ts +0 -31
  176. package/src/default_templates/index.ts +0 -2
@@ -0,0 +1,39 @@
1
+ // This file is generated. Do not edit by hand.
2
+
3
+ import type { ClassificationEvaluatorConfig } from "../types";
4
+
5
+ export const DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG: ClassificationEvaluatorConfig = {
6
+ name: "document_relevance",
7
+ description: "A specialized evaluator for determining document relevance to a given question.",
8
+ optimizationDirection: "MAXIMIZE",
9
+ template: [
10
+ {
11
+ role: "user",
12
+ content: `
13
+ You are comparing a document to a question and trying to determine
14
+ if the document text contains information relevant to answering the
15
+ question. Here is the data:
16
+
17
+ [BEGIN DATA]
18
+ ************
19
+ [Question]: {{input}}
20
+ ************
21
+ [Document text]: {{documentText}}
22
+ ************
23
+ [END DATA]
24
+
25
+ Compare the question above to the document text. You must determine
26
+ whether the document text contains information that can answer the
27
+ question. Please focus on whether the very specific question can be
28
+ answered by the information in the document text. Your response must be
29
+ either "relevant" or "unrelated". "unrelated" means that the document
30
+ text does not contain an answer to the question. "relevant" means the
31
+ document text contains an answer to the question.
32
+ `,
33
+ },
34
+ ],
35
+ choices: {
36
+ "relevant": 1,
37
+ "unrelated": 0
38
+ },
39
+ };
@@ -0,0 +1,45 @@
1
+ // This file is generated. Do not edit by hand.
2
+
3
+ import type { ClassificationEvaluatorConfig } from "../types";
4
+
5
+ export const HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG: ClassificationEvaluatorConfig = {
6
+ name: "hallucination",
7
+ description: "A specialized evaluator for detecting hallucinations in grounded LLM responses.",
8
+ optimizationDirection: "MINIMIZE",
9
+ template: [
10
+ {
11
+ role: "user",
12
+ content: `
13
+ In this task, you will be presented with a query, some context and a response. The response
14
+ is generated to the question based on the context. The response may contain false
15
+ information. You must use the context to determine if the response to the question
16
+ contains false information, if the response is a hallucination of facts. Your objective is
17
+ to determine whether the response text contains factual information and is not a
18
+ hallucination. A 'hallucination' refers to a response that is not based on the context or
19
+ assumes information that is not available in the context. Your response should be a single
20
+ word: either 'factual' or 'hallucinated', and it should not include any other text or
21
+ characters. 'hallucinated' indicates that the response provides factually inaccurate
22
+ information to the query based on the context. 'factual' indicates that the response to
23
+ the question is correct relative to the context, and does not contain made up
24
+ information. Please read the query and context carefully before determining your
25
+ response.
26
+
27
+ [BEGIN DATA]
28
+ ************
29
+ [Query]: {{input}}
30
+ ************
31
+ [Context]: {{context}}
32
+ ************
33
+ [Response]: {{output}}
34
+ ************
35
+ [END DATA]
36
+
37
+ Is the response above factual or hallucinated based on the query and context?
38
+ `,
39
+ },
40
+ ],
41
+ choices: {
42
+ "hallucinated": 1,
43
+ "factual": 0
44
+ },
45
+ };
@@ -0,0 +1,4 @@
1
+ // This file is generated. Do not edit by hand.
2
+
3
+ export { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "./DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG";
4
+ export { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "./HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG";
@@ -0,0 +1,11 @@
1
+ // This file is generated. Do not edit by hand.
2
+
3
+ import type { PromptTemplate } from "../types/templating";
4
+
5
+ export type ClassificationEvaluatorConfig = {
6
+ name: string;
7
+ description: string;
8
+ optimizationDirection: "MINIMIZE" | "MAXIMIZE";
9
+ template: PromptTemplate;
10
+ choices: Record<string, number>;
11
+ };
@@ -0,0 +1,28 @@
1
+ import { CreateEvaluatorArgs, EvaluationResult, ObjectMapping } from "../types";
2
+
3
+ import { EvaluatorBase } from "./EvaluatorBase";
4
+
5
+ type FunctionEvaluatorArgs<RecordType extends Record<string, unknown>> =
6
+ CreateEvaluatorArgs<RecordType> & {
7
+ evaluateFn: (args: RecordType) => Promise<EvaluationResult>;
8
+ };
9
+ /**
10
+ * A class that constructs an evaluator based on an evaluate function.
11
+ */
12
+ export class FunctionEvaluator<
13
+ RecordType extends Record<string, unknown>,
14
+ > extends EvaluatorBase<RecordType> {
15
+ readonly evaluateFn: (args: RecordType) => Promise<EvaluationResult>;
16
+ constructor({ evaluateFn, ...args }: FunctionEvaluatorArgs<RecordType>) {
17
+ super({ ...args });
18
+ this.evaluateFn = evaluateFn;
19
+ }
20
+ async evaluate(args: RecordType): Promise<EvaluationResult> {
21
+ return this.evaluateFn(args);
22
+ }
23
+ bindInputMapping(
24
+ inputMapping: ObjectMapping<RecordType>
25
+ ): FunctionEvaluator<RecordType> {
26
+ return new FunctionEvaluator({ ...this, inputMapping });
27
+ }
28
+ }
@@ -0,0 +1,19 @@
1
+ import { AnyFn, EvaluatorFn } from "../types";
2
+ import { isPromise } from "../utils/typeUtils";
3
+
4
+ import { toEvaluationResult } from "./toEvaluationResult";
5
+
6
+ /**
7
+ * A function that converts a generic function into an evaluator function
8
+ */
9
+ export function asEvaluatorFn<RecordType extends Record<string, unknown>>(
10
+ fn: AnyFn
11
+ ): EvaluatorFn<RecordType> {
12
+ return async (...args) => {
13
+ let result = fn(...args);
14
+ if (isPromise(result)) {
15
+ result = await result;
16
+ }
17
+ return toEvaluationResult(result);
18
+ };
19
+ }
@@ -0,0 +1,184 @@
1
+ import { withSpan } from "@arizeai/openinference-core";
2
+
3
+ import { EvaluatorBase } from "../core/EvaluatorBase";
4
+ import { FunctionEvaluator } from "../core/FunctionEvaluator";
5
+ import {
6
+ EvaluationKind,
7
+ OptimizationDirection,
8
+ TelemetryConfig,
9
+ } from "../types";
10
+
11
+ import { asEvaluatorFn } from "./asEvaluatorFn";
12
+
13
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
14
+ type AnyFn = (...args: any[]) => any;
15
+
16
+ function generateUniqueName(): string {
17
+ return `evaluator-${Math.random().toString(36).substring(2, 15)}`;
18
+ }
19
+
20
+ /**
21
+ * Options for creating a custom evaluator using {@link CreateEvaluator}.
22
+ *
23
+ * @public
24
+ */
25
+ export type CreateEvaluatorOptions = {
26
+ /**
27
+ * The name of the evaluator / metric that it measures.
28
+ *
29
+ * If not provided, the function will attempt to infer the name from the function's `name` property.
30
+ * If the function has no name, a unique name will be generated.
31
+ *
32
+ * @example
33
+ * ```typescript
34
+ * const evaluator = CreateEvaluator(myFunction, { name: "custom-metric" });
35
+ * ```
36
+ */
37
+ name?: string;
38
+ /**
39
+ * The kind of the evaluation.
40
+ *
41
+ * - `"CODE"`: Code-based evaluator that runs custom logic
42
+ * - `"LLM"`: LLM-based evaluator that uses a language model
43
+ *
44
+ * @defaultValue `"CODE"`
45
+ *
46
+ * @example
47
+ * ```typescript
48
+ * const evaluator = CreateEvaluator(myFunction, { kind: "CODE" });
49
+ * ```
50
+ */
51
+ kind?: EvaluationKind;
52
+ /**
53
+ * The direction to optimize the numeric evaluation score.
54
+ *
55
+ * - `"MAXIMIZE"`: Higher scores are better (e.g., accuracy, F1 score)
56
+ * - `"MINIMIZE"`: Lower scores are better (e.g., error rate, latency)
57
+ *
58
+ * @defaultValue `"MAXIMIZE"`
59
+ *
60
+ * @example
61
+ * ```typescript
62
+ * const evaluator = CreateEvaluator(myFunction, {
63
+ * optimizationDirection: "MAXIMIZE"
64
+ * });
65
+ * ```
66
+ */
67
+ optimizationDirection?: OptimizationDirection;
68
+ /**
69
+ * The telemetry configuration for the evaluator.
70
+ *
71
+ * When enabled, the evaluator will automatically create OpenTelemetry spans
72
+ * for tracing and observability. This allows you to track evaluator performance
73
+ * and debug issues in distributed systems.
74
+ *
75
+ * @defaultValue `{ isEnabled: true }`
76
+ *
77
+ * @example
78
+ * ```typescript
79
+ * const evaluator = CreateEvaluator(myFunction, {
80
+ * telemetry: { isEnabled: true, tracer: myTracer }
81
+ * });
82
+ * ```
83
+ */
84
+ telemetry?: TelemetryConfig;
85
+ };
86
+
87
+ /**
88
+ * A factory function for creating a custom evaluator from any function.
89
+ *
90
+ * This function wraps a user-provided function into an evaluator that can be used
91
+ * with Phoenix experiments and evaluations. The function can be synchronous or
92
+ * asynchronous, and can return a number, an {@link EvaluationResult} object, or
93
+ * a value that will be automatically converted to an evaluation result.
94
+ *
95
+ * The evaluator will automatically:
96
+ * - Convert the function's return value to an {@link EvaluationResult}
97
+ * - Handle both sync and async functions
98
+ * - Wrap the function with OpenTelemetry spans if telemetry is enabled
99
+ * - Infer the evaluator name from the function name if not provided
100
+ *
101
+ * @typeParam RecordType - The type of the input record that the evaluator expects.
102
+ * Must extend `Record<string, unknown>`.
103
+ * @typeParam Fn - The type of the function being wrapped. Must be a function that
104
+ * accepts the record type and returns a value compatible with {@link EvaluationResult}.
105
+ *
106
+ * @param fn - The function to wrap as an evaluator. Can be synchronous or asynchronous.
107
+ * The function should accept a record of type `RecordType` and return either:
108
+ * - A number (will be converted to `{ score: number }`)
109
+ * - An {@link EvaluationResult} object
110
+ * - Any value that can be converted to an evaluation result
111
+ *
112
+ * @param options - Optional configuration for the evaluator. See {@link CreateEvaluatorOptions}
113
+ * for details on available options.
114
+ *
115
+ * @returns An {@link EvaluatorInterface} that can be used with Phoenix experiments
116
+ * and evaluation workflows.
117
+ *
118
+ * @example
119
+ * Basic usage with a simple scoring function:
120
+ * ```typescript
121
+ * const accuracyEvaluator = CreateEvaluator(
122
+ * ({ output, expected }) => {
123
+ * return output === expected ? 1 : 0;
124
+ * },
125
+ * {
126
+ * name: "accuracy",
127
+ * kind: "CODE",
128
+ * optimizationDirection: "MAXIMIZE"
129
+ * }
130
+ * );
131
+ *
132
+ * const result = await accuracyEvaluator.evaluate({
133
+ * output: "correct answer",
134
+ * expected: "correct answer"
135
+ * });
136
+ * // result: { score: 1 }
137
+ * ```
138
+ *
139
+ *
140
+ * @example
141
+ * Returning a full EvaluationResult:
142
+ * ```typescript
143
+ * const qualityEvaluator = CreateEvaluator(
144
+ * ({ output }) => {
145
+ * const score = calculateQuality(output);
146
+ * return {
147
+ * score,
148
+ * label: score > 0.8 ? "high" : "low",
149
+ * explanation: `Quality score: ${score}`
150
+ * };
151
+ * },
152
+ * { name: "quality" }
153
+ * );
154
+ * ```
155
+ */
156
+ export function createEvaluator<
157
+ RecordType extends Record<string, unknown> = Record<string, unknown>,
158
+ Fn extends AnyFn = AnyFn,
159
+ >(fn: Fn, options?: CreateEvaluatorOptions): EvaluatorBase<RecordType> {
160
+ const {
161
+ name,
162
+ kind,
163
+ optimizationDirection,
164
+ telemetry = { isEnabled: true },
165
+ } = options || {};
166
+ const evaluatorName = name || fn.name || generateUniqueName();
167
+ let evaluateFn = asEvaluatorFn<RecordType>(fn);
168
+
169
+ // Add OpenTelemetry span wrapping if telemetry is enabled
170
+ if (telemetry && telemetry.isEnabled) {
171
+ evaluateFn = withSpan(evaluateFn, {
172
+ tracer: telemetry.tracer,
173
+ name: evaluatorName,
174
+ kind: "EVALUATOR",
175
+ });
176
+ }
177
+ return new FunctionEvaluator<RecordType>({
178
+ evaluateFn,
179
+ name: evaluatorName,
180
+ kind: kind || "CODE",
181
+ optimizationDirection: optimizationDirection || "MAXIMIZE",
182
+ telemetry,
183
+ });
184
+ }
@@ -0,0 +1 @@
1
+ export * from "./createEvaluator";
@@ -0,0 +1,145 @@
1
+ import { EvaluationResult } from "../types";
2
+
3
+ /**
4
+ * Type guard to check if a value has a score property.
5
+ *
6
+ * @param result - The value to check
7
+ * @returns True if the value is an object with a numeric score property
8
+ *
9
+ * @internal
10
+ */
11
+ function resultHasScore(result: unknown): result is { score: number } {
12
+ return (
13
+ typeof result === "object" &&
14
+ result !== null &&
15
+ "score" in result &&
16
+ typeof result.score === "number"
17
+ );
18
+ }
19
+
20
+ /**
21
+ * Type guard to check if a value has a label property.
22
+ *
23
+ * @param result - The value to check
24
+ * @returns True if the value is an object with a string label property
25
+ *
26
+ * @internal
27
+ */
28
+ function resultHasLabel(result: unknown): result is { label: string } {
29
+ return (
30
+ typeof result === "object" &&
31
+ result !== null &&
32
+ "label" in result &&
33
+ typeof result.label === "string"
34
+ );
35
+ }
36
+
37
+ /**
38
+ * Type guard to check if a value has an explanation property.
39
+ *
40
+ * @param result - The value to check
41
+ * @returns True if the value is an object with a string explanation property
42
+ *
43
+ * @internal
44
+ */
45
+ function resultHasExplanation(
46
+ result: unknown
47
+ ): result is { explanation: string } {
48
+ return (
49
+ typeof result === "object" &&
50
+ result !== null &&
51
+ "explanation" in result &&
52
+ typeof result.explanation === "string"
53
+ );
54
+ }
55
+
56
+ /**
57
+ * Converts an unknown value to an {@link EvaluationResult}.
58
+ *
59
+ * This function provides a flexible way to normalize various return types from
60
+ * evaluator functions into a standardized `EvaluationResult` format. It handles
61
+ * multiple input types:
62
+ *
63
+ * - **Numbers**: Converted to `{ score: number }`
64
+ * - **Strings**: Converted to `{ label: string }`
65
+ * - **Objects**: Extracts `score`, `label`, and `explanation` properties if present
66
+ * - **Other types**: Returns an empty `EvaluationResult` object
67
+ *
68
+ * This is particularly useful when creating evaluators from functions that may
69
+ * return different types, ensuring consistent evaluation result formatting.
70
+ *
71
+ * @param result - The value to convert to an EvaluationResult. Can be:
72
+ * - A number (converted to score)
73
+ * - A string (converted to label)
74
+ * - An object with optional `score`, `label`, and/or `explanation` properties
75
+ * - Any other value (returns empty object)
76
+ *
77
+ * @returns An {@link EvaluationResult} object with extracted properties
78
+ *
79
+ * @example
80
+ * Convert a number to an EvaluationResult:
81
+ * ```typescript
82
+ * const result = toEvaluationResult(0.95);
83
+ * // Returns: { score: 0.95 }
84
+ * ```
85
+ *
86
+ * @example
87
+ * Convert a string to an EvaluationResult:
88
+ * ```typescript
89
+ * const result = toEvaluationResult("correct");
90
+ * // Returns: { label: "correct" }
91
+ * ```
92
+ *
93
+ * @example
94
+ * Convert an object with all properties:
95
+ * ```typescript
96
+ * const result = toEvaluationResult({
97
+ * score: 0.9,
98
+ * label: "high",
99
+ * explanation: "High quality output"
100
+ * });
101
+ * // Returns: { score: 0.9, label: "high", explanation: "High quality output" }
102
+ * ```
103
+ *
104
+ * @example
105
+ * Convert an object with partial properties:
106
+ * ```typescript
107
+ * const result = toEvaluationResult({ score: 0.8 });
108
+ * // Returns: { score: 0.8 }
109
+ * ```
110
+ *
111
+ * @example
112
+ * Handle null or undefined:
113
+ * ```typescript
114
+ * const result = toEvaluationResult(null);
115
+ * // Returns: {}
116
+ * ```
117
+ *
118
+ * @public
119
+ */
120
+ export function toEvaluationResult(result: unknown): EvaluationResult {
121
+ if (typeof result === "number") {
122
+ return {
123
+ score: result,
124
+ };
125
+ }
126
+ if (typeof result === "string") {
127
+ return {
128
+ label: result,
129
+ };
130
+ }
131
+ if (typeof result === "object" && result !== null) {
132
+ const evaluationResult: EvaluationResult = {};
133
+ if (resultHasScore(result)) {
134
+ evaluationResult.score = result.score;
135
+ }
136
+ if (resultHasLabel(result)) {
137
+ evaluationResult.label = result.label;
138
+ }
139
+ if (resultHasExplanation(result)) {
140
+ evaluationResult.explanation = result.explanation;
141
+ }
142
+ return evaluationResult;
143
+ }
144
+ return {};
145
+ }
package/src/index.ts CHANGED
@@ -2,3 +2,4 @@ export * from "./llm";
2
2
  export * from "./template";
3
3
  export * from "./types";
4
4
  export * from "./utils";
5
+ export * from "./helpers";
@@ -1,17 +1,14 @@
1
- import {
2
- DOCUMENT_RELEVANCY_CHOICES,
3
- DOCUMENT_RELEVANCY_TEMPLATE,
4
- } from "../default_templates/DOCUMENT_RELEVANCY_TEMPLATE";
1
+ import { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
5
2
  import { CreateClassificationEvaluatorArgs } from "../types/evals";
6
3
 
7
4
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
8
5
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
9
6
 
10
- export interface DocumentRelevancyEvaluatorArgs<
7
+ export interface DocumentRelevanceEvaluatorArgs<
11
8
  RecordType extends Record<
12
9
  string,
13
10
  unknown
14
- > = DocumentRelevancyEvaluationRecord,
11
+ > = DocumentRelevanceEvaluationRecord,
15
12
  > extends Omit<
16
13
  CreateClassificationEvaluatorArgs<RecordType>,
17
14
  "promptTemplate" | "choices" | "optimizationDirection" | "name"
@@ -23,33 +20,33 @@ export interface DocumentRelevancyEvaluatorArgs<
23
20
  }
24
21
 
25
22
  /**
26
- * A record to be evaluated by the document relevancy evaluator.
23
+ * A record to be evaluated by the document relevance evaluator.
27
24
  */
28
- export interface DocumentRelevancyEvaluationRecord {
25
+ export interface DocumentRelevanceEvaluationRecord {
29
26
  input: string;
30
27
  documentText: string;
31
28
  [key: string]: unknown;
32
29
  }
33
30
 
34
31
  /**
35
- * Creates a document relevancy evaluator function.
32
+ * Creates a document relevance evaluator function.
36
33
  *
37
34
  * This function returns an evaluator that determines whether a given document text
38
35
  * is relevant to a provided input question. The evaluator uses a classification model
39
36
  * and a prompt template to make its determination.
40
37
  *
41
- * @param args - The arguments for creating the document relevancy evaluator.
38
+ * @param args - The arguments for creating the document relevance evaluator.
42
39
  * @param args.model - The model to use for classification.
43
- * @param args.choices - The possible classification choices (defaults to DOCUMENT_RELEVANCY_CHOICES).
44
- * @param args.promptTemplate - The prompt template to use (defaults to DOCUMENT_RELEVANCY_TEMPLATE).
40
+ * @param args.choices - The possible classification choices (defaults to DOCUMENT_RELEVANCE_CHOICES).
41
+ * @param args.promptTemplate - The prompt template to use (defaults to DOCUMENT_RELEVANCE_TEMPLATE).
45
42
  * @param args.telemetry - The telemetry to use for the evaluator.
46
43
  *
47
- * @returns An evaluator function that takes a {@link DocumentRelevancyExample} and returns a classification result
44
+ * @returns An evaluator function that takes a {@link DocumentRelevanceExample} and returns a classification result
48
45
  * indicating whether the document is relevant to the input question.
49
46
  *
50
47
  * @example
51
48
  * ```ts
52
- * const evaluator = createDocumentRelevancyEvaluator({ model: openai("gpt-4o-mini") });
49
+ * const evaluator = createDocumentRelevanceEvaluator({ model: openai("gpt-4o-mini") });
53
50
  * const result = await evaluator.evaluate({
54
51
  * input: "What is the capital of France?",
55
52
  * documentText: "Paris is the capital and most populous city of France.",
@@ -57,19 +54,19 @@ export interface DocumentRelevancyEvaluationRecord {
57
54
  * console.log(result.label); // "relevant" or "unrelated"
58
55
  * ```
59
56
  */
60
- export function createDocumentRelevancyEvaluator<
57
+ export function createDocumentRelevanceEvaluator<
61
58
  RecordType extends Record<
62
59
  string,
63
60
  unknown
64
- > = DocumentRelevancyEvaluationRecord,
61
+ > = DocumentRelevanceEvaluationRecord,
65
62
  >(
66
- args: DocumentRelevancyEvaluatorArgs<RecordType>
63
+ args: DocumentRelevanceEvaluatorArgs<RecordType>
67
64
  ): ClassificationEvaluator<RecordType> {
68
65
  const {
69
- choices = DOCUMENT_RELEVANCY_CHOICES,
70
- promptTemplate = DOCUMENT_RELEVANCY_TEMPLATE,
71
- optimizationDirection = "MAXIMIZE",
72
- name = "document_relevancy",
66
+ choices = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.choices,
67
+ promptTemplate = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.template,
68
+ optimizationDirection = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
69
+ name = DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.name,
73
70
  ...rest
74
71
  } = args;
75
72
  return createClassificationEvaluator<RecordType>({
@@ -1,7 +1,4 @@
1
- import {
2
- HALLUCINATION_CHOICES,
3
- HALLUCINATION_TEMPLATE,
4
- } from "../default_templates/HALLUCINATION_TEMPLATE";
1
+ import { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
5
2
  import { CreateClassificationEvaluatorArgs } from "../types/evals";
6
3
 
7
4
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
@@ -25,7 +22,6 @@ export interface HallucinationEvaluatorArgs<
25
22
  export type HallucinationEvaluationRecord = {
26
23
  input: string;
27
24
  output: string;
28
- reference?: string;
29
25
  context?: string;
30
26
  };
31
27
  /**
@@ -40,10 +36,10 @@ export function createHallucinationEvaluator<
40
36
  args: HallucinationEvaluatorArgs<RecordType>
41
37
  ): ClassificationEvaluator<RecordType> {
42
38
  const {
43
- choices = HALLUCINATION_CHOICES,
44
- promptTemplate = HALLUCINATION_TEMPLATE,
45
- optimizationDirection = "MINIMIZE",
46
- name = "hallucination",
39
+ choices = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.choices,
40
+ promptTemplate = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.template,
41
+ optimizationDirection = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
42
+ name = HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.name,
47
43
  ...rest
48
44
  } = args;
49
45
  return createClassificationEvaluator<RecordType>({
package/src/llm/index.ts CHANGED
@@ -2,6 +2,6 @@ export * from "./generateClassification";
2
2
  export * from "./createClassifierFn";
3
3
  export * from "./createClassificationEvaluator";
4
4
  export * from "./createHallucinationEvaluator";
5
- export * from "./createDocumentRelevancyEvaluator";
5
+ export * from "./createDocumentRelevanceEvaluator";
6
6
  export * from "./ClassificationEvaluator";
7
7
  export * from "./LLMEvaluator";
@@ -0,0 +1,2 @@
1
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
2
+ export type AnyFn = (...args: any[]) => any;
@@ -2,3 +2,4 @@ export * from "./evals";
2
2
  export * from "./templating";
3
3
  export * from "./otel";
4
4
  export * from "./data";
5
+ export * from "./base";
@@ -206,7 +206,7 @@ export type BindingContext<RecordType extends Record<string, unknown>> = {
206
206
  *
207
207
  * // Document relevancy evaluator (if it exists)
208
208
  * const relevancyEvaluator = bindEvaluator<EvaluationData>(
209
- * createDocumentRelevancyEvaluator({ model: openai("gpt-4") }),
209
+ * createDocumentRelevanceEvaluator({ model: openai("gpt-4") }),
210
210
  * {
211
211
  * inputMapping: {
212
212
  * query: "userQuery",
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Type guard for if a function is a Promise
3
+ * @param value
4
+ * @returns true if it is a Promise
5
+ */
6
+ export function isPromise<T = unknown>(value: unknown): value is Promise<T> {
7
+ return (
8
+ !!value &&
9
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
10
+ typeof (value as any)?.then === "function" &&
11
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
12
+ typeof (value as any)?.catch === "function"
13
+ );
14
+ }
@@ -1,6 +0,0 @@
1
- export declare const DOCUMENT_RELEVANCY_TEMPLATE = "\nYou are comparing a document to a question and trying to determine if the document text\ncontains information relevant to answering the question. Here is the data:\n\n [BEGIN DATA]\n ************\n [Question]: {{input}}\n ************\n [Document text]: {{documentText}}\n ************\n [END DATA]\n\nCompare the Question above to the Document text. You must determine whether the Document text\ncontains information that can answer the Question. Please focus on whether the very specific\nquestion can be answered by the information in the Document text.\nYour response must be single word, either \"relevant\" or \"unrelated\",\nand should not contain any text or characters aside from that word.\n\"unrelated\" means that the document text does not contain an answer to the Question.\n\"relevant\" means the document text contains an answer to the Question.\n";
2
- export declare const DOCUMENT_RELEVANCY_CHOICES: {
3
- relevant: number;
4
- unrelated: number;
5
- };
6
- //# sourceMappingURL=DOCUMENT_RELEVANCY_TEMPLATE.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"DOCUMENT_RELEVANCY_TEMPLATE.d.ts","sourceRoot":"","sources":["../../../src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,2BAA2B,23BAmBvC,CAAC;AAEF,eAAO,MAAM,0BAA0B;;;CAGtC,CAAC"}