@arizeai/phoenix-evals 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. package/README.md +23 -23
  2. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  3. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  4. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js +58 -0
  5. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  6. package/dist/esm/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  7. package/dist/esm/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +9 -1
  8. package/dist/esm/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  9. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  10. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +14 -3
  11. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  12. package/dist/esm/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  13. package/dist/esm/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +16 -2
  14. package/dist/esm/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  15. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  16. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +15 -1
  17. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  18. package/dist/esm/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  19. package/dist/esm/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js +61 -11
  20. package/dist/esm/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  21. package/dist/esm/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  22. package/dist/esm/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  23. package/dist/esm/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.js +86 -0
  24. package/dist/esm/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  25. package/dist/esm/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  26. package/dist/esm/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js +44 -8
  27. package/dist/esm/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  28. package/dist/esm/__generated__/default_templates/index.d.ts +2 -0
  29. package/dist/esm/__generated__/default_templates/index.d.ts.map +1 -1
  30. package/dist/esm/__generated__/default_templates/index.js +2 -0
  31. package/dist/esm/__generated__/default_templates/index.js.map +1 -1
  32. package/dist/esm/core/EvaluatorBase.d.ts +2 -2
  33. package/dist/esm/core/EvaluatorBase.d.ts.map +1 -1
  34. package/dist/esm/core/FunctionEvaluator.d.ts +1 -1
  35. package/dist/esm/core/FunctionEvaluator.d.ts.map +1 -1
  36. package/dist/esm/core/FunctionEvaluator.js.map +1 -1
  37. package/dist/esm/helpers/asEvaluatorFn.d.ts +1 -1
  38. package/dist/esm/helpers/asEvaluatorFn.d.ts.map +1 -1
  39. package/dist/esm/helpers/asEvaluatorFn.js.map +1 -1
  40. package/dist/esm/helpers/createEvaluator.d.ts +2 -2
  41. package/dist/esm/helpers/createEvaluator.d.ts.map +1 -1
  42. package/dist/esm/helpers/createEvaluator.js.map +1 -1
  43. package/dist/esm/helpers/toEvaluationResult.d.ts +1 -1
  44. package/dist/esm/helpers/toEvaluationResult.d.ts.map +1 -1
  45. package/dist/esm/llm/ClassificationEvaluator.d.ts +3 -3
  46. package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
  47. package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
  48. package/dist/esm/llm/LLMEvaluator.d.ts +1 -1
  49. package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
  50. package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
  51. package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
  52. package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
  53. package/dist/esm/llm/createClassifierFn.d.ts +1 -1
  54. package/dist/esm/llm/createClassifierFn.d.ts.map +1 -1
  55. package/dist/esm/llm/createClassifierFn.js.map +1 -1
  56. package/dist/esm/llm/createConcisenessEvaluator.d.ts +43 -0
  57. package/dist/esm/llm/createConcisenessEvaluator.d.ts.map +1 -0
  58. package/dist/esm/llm/createConcisenessEvaluator.js +39 -0
  59. package/dist/esm/llm/createConcisenessEvaluator.js.map +1 -0
  60. package/dist/esm/llm/createCorrectnessEvaluator.d.ts +2 -2
  61. package/dist/esm/llm/createCorrectnessEvaluator.d.ts.map +1 -1
  62. package/dist/esm/llm/createCorrectnessEvaluator.js.map +1 -1
  63. package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts +2 -2
  64. package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -1
  65. package/dist/esm/llm/createDocumentRelevanceEvaluator.js.map +1 -1
  66. package/dist/esm/llm/createFaithfulnessEvaluator.d.ts +2 -2
  67. package/dist/esm/llm/createFaithfulnessEvaluator.d.ts.map +1 -1
  68. package/dist/esm/llm/createFaithfulnessEvaluator.js.map +1 -1
  69. package/dist/esm/llm/createHallucinationEvaluator.d.ts +2 -2
  70. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  71. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  72. package/dist/esm/llm/createToolInvocationEvaluator.d.ts +2 -2
  73. package/dist/esm/llm/createToolInvocationEvaluator.d.ts.map +1 -1
  74. package/dist/esm/llm/createToolInvocationEvaluator.js.map +1 -1
  75. package/dist/esm/llm/createToolResponseHandlingEvaluator.d.ts +78 -0
  76. package/dist/esm/llm/createToolResponseHandlingEvaluator.d.ts.map +1 -0
  77. package/dist/esm/llm/createToolResponseHandlingEvaluator.js +59 -0
  78. package/dist/esm/llm/createToolResponseHandlingEvaluator.js.map +1 -0
  79. package/dist/esm/llm/createToolSelectionEvaluator.d.ts +64 -0
  80. package/dist/esm/llm/createToolSelectionEvaluator.d.ts.map +1 -0
  81. package/dist/esm/llm/createToolSelectionEvaluator.js +50 -0
  82. package/dist/esm/llm/createToolSelectionEvaluator.js.map +1 -0
  83. package/dist/esm/llm/generateClassification.d.ts +2 -2
  84. package/dist/esm/llm/generateClassification.d.ts.map +1 -1
  85. package/dist/esm/llm/generateClassification.js +1 -1
  86. package/dist/esm/llm/generateClassification.js.map +1 -1
  87. package/dist/esm/llm/index.d.ts +3 -0
  88. package/dist/esm/llm/index.d.ts.map +1 -1
  89. package/dist/esm/llm/index.js +3 -0
  90. package/dist/esm/llm/index.js.map +1 -1
  91. package/dist/esm/template/applyTemplate.d.ts +1 -1
  92. package/dist/esm/template/applyTemplate.d.ts.map +1 -1
  93. package/dist/esm/template/applyTemplate.js +1 -1
  94. package/dist/esm/template/applyTemplate.js.map +1 -1
  95. package/dist/esm/template/getTemplateVariables.d.ts +1 -1
  96. package/dist/esm/template/getTemplateVariables.d.ts.map +1 -1
  97. package/dist/esm/template/getTemplateVariables.js.map +1 -1
  98. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  99. package/dist/esm/types/evals.d.ts +4 -4
  100. package/dist/esm/types/evals.d.ts.map +1 -1
  101. package/dist/esm/types/otel.d.ts +1 -1
  102. package/dist/esm/types/otel.d.ts.map +1 -1
  103. package/dist/esm/utils/bindEvaluator.d.ts +2 -2
  104. package/dist/esm/utils/bindEvaluator.d.ts.map +1 -1
  105. package/dist/esm/utils/objectMappingUtils.d.ts +1 -1
  106. package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -1
  107. package/dist/esm/utils/objectMappingUtils.js.map +1 -1
  108. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  109. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  110. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js +61 -0
  111. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  112. package/dist/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  113. package/dist/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +9 -1
  114. package/dist/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  115. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  116. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +14 -3
  117. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  118. package/dist/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  119. package/dist/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +16 -2
  120. package/dist/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  121. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  122. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +15 -1
  123. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  124. package/dist/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  125. package/dist/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js +61 -11
  126. package/dist/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  127. package/dist/src/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  128. package/dist/src/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  129. package/dist/src/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.js +89 -0
  130. package/dist/src/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  131. package/dist/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  132. package/dist/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js +44 -8
  133. package/dist/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  134. package/dist/src/__generated__/default_templates/index.d.ts +2 -0
  135. package/dist/src/__generated__/default_templates/index.d.ts.map +1 -1
  136. package/dist/src/__generated__/default_templates/index.js +5 -1
  137. package/dist/src/__generated__/default_templates/index.js.map +1 -1
  138. package/dist/src/core/EvaluatorBase.d.ts +2 -2
  139. package/dist/src/core/EvaluatorBase.d.ts.map +1 -1
  140. package/dist/src/core/FunctionEvaluator.d.ts +1 -1
  141. package/dist/src/core/FunctionEvaluator.d.ts.map +1 -1
  142. package/dist/src/core/FunctionEvaluator.js.map +1 -1
  143. package/dist/src/helpers/asEvaluatorFn.d.ts +1 -1
  144. package/dist/src/helpers/asEvaluatorFn.d.ts.map +1 -1
  145. package/dist/src/helpers/asEvaluatorFn.js.map +1 -1
  146. package/dist/src/helpers/createEvaluator.d.ts +2 -2
  147. package/dist/src/helpers/createEvaluator.d.ts.map +1 -1
  148. package/dist/src/helpers/createEvaluator.js.map +1 -1
  149. package/dist/src/helpers/toEvaluationResult.d.ts +1 -1
  150. package/dist/src/helpers/toEvaluationResult.d.ts.map +1 -1
  151. package/dist/src/llm/ClassificationEvaluator.d.ts +3 -3
  152. package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
  153. package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
  154. package/dist/src/llm/LLMEvaluator.d.ts +1 -1
  155. package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
  156. package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
  157. package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
  158. package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
  159. package/dist/src/llm/createClassifierFn.d.ts +1 -1
  160. package/dist/src/llm/createClassifierFn.d.ts.map +1 -1
  161. package/dist/src/llm/createClassifierFn.js.map +1 -1
  162. package/dist/src/llm/createConcisenessEvaluator.d.ts +43 -0
  163. package/dist/src/llm/createConcisenessEvaluator.d.ts.map +1 -0
  164. package/dist/src/llm/createConcisenessEvaluator.js +50 -0
  165. package/dist/src/llm/createConcisenessEvaluator.js.map +1 -0
  166. package/dist/src/llm/createCorrectnessEvaluator.d.ts +2 -2
  167. package/dist/src/llm/createCorrectnessEvaluator.d.ts.map +1 -1
  168. package/dist/src/llm/createCorrectnessEvaluator.js.map +1 -1
  169. package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts +2 -2
  170. package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -1
  171. package/dist/src/llm/createDocumentRelevanceEvaluator.js.map +1 -1
  172. package/dist/src/llm/createFaithfulnessEvaluator.d.ts +2 -2
  173. package/dist/src/llm/createFaithfulnessEvaluator.d.ts.map +1 -1
  174. package/dist/src/llm/createFaithfulnessEvaluator.js.map +1 -1
  175. package/dist/src/llm/createHallucinationEvaluator.d.ts +2 -2
  176. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  177. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  178. package/dist/src/llm/createToolInvocationEvaluator.d.ts +2 -2
  179. package/dist/src/llm/createToolInvocationEvaluator.d.ts.map +1 -1
  180. package/dist/src/llm/createToolInvocationEvaluator.js.map +1 -1
  181. package/dist/src/llm/createToolResponseHandlingEvaluator.d.ts +78 -0
  182. package/dist/src/llm/createToolResponseHandlingEvaluator.d.ts.map +1 -0
  183. package/dist/src/llm/createToolResponseHandlingEvaluator.js +70 -0
  184. package/dist/src/llm/createToolResponseHandlingEvaluator.js.map +1 -0
  185. package/dist/src/llm/createToolSelectionEvaluator.d.ts +64 -0
  186. package/dist/src/llm/createToolSelectionEvaluator.d.ts.map +1 -0
  187. package/dist/src/llm/createToolSelectionEvaluator.js +61 -0
  188. package/dist/src/llm/createToolSelectionEvaluator.js.map +1 -0
  189. package/dist/src/llm/generateClassification.d.ts +2 -2
  190. package/dist/src/llm/generateClassification.d.ts.map +1 -1
  191. package/dist/src/llm/generateClassification.js +1 -1
  192. package/dist/src/llm/generateClassification.js.map +1 -1
  193. package/dist/src/llm/index.d.ts +3 -0
  194. package/dist/src/llm/index.d.ts.map +1 -1
  195. package/dist/src/llm/index.js +3 -0
  196. package/dist/src/llm/index.js.map +1 -1
  197. package/dist/src/template/applyTemplate.d.ts +1 -1
  198. package/dist/src/template/applyTemplate.d.ts.map +1 -1
  199. package/dist/src/template/applyTemplate.js +1 -1
  200. package/dist/src/template/applyTemplate.js.map +1 -1
  201. package/dist/src/template/getTemplateVariables.d.ts +1 -1
  202. package/dist/src/template/getTemplateVariables.d.ts.map +1 -1
  203. package/dist/src/template/getTemplateVariables.js.map +1 -1
  204. package/dist/src/types/evals.d.ts +4 -4
  205. package/dist/src/types/evals.d.ts.map +1 -1
  206. package/dist/src/types/otel.d.ts +1 -1
  207. package/dist/src/types/otel.d.ts.map +1 -1
  208. package/dist/src/utils/bindEvaluator.d.ts +2 -2
  209. package/dist/src/utils/bindEvaluator.d.ts.map +1 -1
  210. package/dist/src/utils/objectMappingUtils.d.ts +1 -1
  211. package/dist/src/utils/objectMappingUtils.d.ts.map +1 -1
  212. package/dist/src/utils/objectMappingUtils.js.map +1 -1
  213. package/dist/tsconfig.tsbuildinfo +1 -1
  214. package/package.json +37 -38
  215. package/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.ts +60 -0
  216. package/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.ts +9 -1
  217. package/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts +14 -3
  218. package/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.ts +16 -2
  219. package/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +15 -1
  220. package/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +61 -11
  221. package/src/__generated__/default_templates/TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.ts +88 -0
  222. package/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.ts +44 -8
  223. package/src/__generated__/default_templates/index.ts +2 -0
  224. package/src/core/EvaluatorBase.ts +2 -2
  225. package/src/core/FunctionEvaluator.ts +5 -2
  226. package/src/helpers/asEvaluatorFn.ts +1 -2
  227. package/src/helpers/createEvaluator.ts +2 -3
  228. package/src/helpers/toEvaluationResult.ts +1 -1
  229. package/src/llm/ClassificationEvaluator.ts +4 -5
  230. package/src/llm/LLMEvaluator.ts +1 -1
  231. package/src/llm/createClassificationEvaluator.ts +1 -2
  232. package/src/llm/createClassifierFn.ts +1 -2
  233. package/src/llm/createConcisenessEvaluator.ts +71 -0
  234. package/src/llm/createCorrectnessEvaluator.ts +2 -3
  235. package/src/llm/createDocumentRelevanceEvaluator.ts +2 -3
  236. package/src/llm/createFaithfulnessEvaluator.ts +2 -3
  237. package/src/llm/createHallucinationEvaluator.ts +2 -3
  238. package/src/llm/createToolInvocationEvaluator.ts +2 -3
  239. package/src/llm/createToolResponseHandlingEvaluator.ts +108 -0
  240. package/src/llm/createToolSelectionEvaluator.ts +92 -0
  241. package/src/llm/generateClassification.ts +5 -5
  242. package/src/llm/index.ts +3 -0
  243. package/src/template/applyTemplate.ts +2 -3
  244. package/src/template/getTemplateVariables.ts +2 -2
  245. package/src/types/evals.ts +4 -4
  246. package/src/types/otel.ts +1 -1
  247. package/src/utils/bindEvaluator.ts +2 -2
  248. package/src/utils/objectMappingUtils.ts +2 -2
@@ -1,19 +1,18 @@
1
+ import type { LanguageModel } from "ai";
2
+
1
3
  import { getTemplateVariables } from "../template";
2
- import {
4
+ import type {
3
5
  ClassificationChoicesMap,
4
6
  CreateClassificationEvaluatorArgs,
5
7
  EvaluatorFn,
6
8
  PromptTemplate,
7
9
  WithPromptTemplate,
8
10
  } from "../types";
9
- import { ObjectMapping } from "../types/data";
11
+ import type { ObjectMapping } from "../types/data";
10
12
  import { remapObject } from "../utils/objectMappingUtils";
11
-
12
13
  import { createClassifierFn } from "./createClassifierFn";
13
14
  import { LLMEvaluator } from "./LLMEvaluator";
14
15
 
15
- import { LanguageModel } from "ai";
16
-
17
16
  /**
18
17
  * An LLM evaluator that performs evaluation via classification
19
18
  */
@@ -1,5 +1,5 @@
1
1
  import { EvaluatorBase } from "../core/EvaluatorBase";
2
- import { CreateLLMEvaluatorArgs } from "../types";
2
+ import type { CreateLLMEvaluatorArgs } from "../types";
3
3
 
4
4
  /**
5
5
  * Base class for llm evaluation metrics / scores
@@ -1,5 +1,4 @@
1
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
2
-
1
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
2
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
4
3
 
5
4
  export function createClassificationEvaluator<
@@ -1,11 +1,10 @@
1
1
  import { formatTemplate } from "../template";
2
- import {
2
+ import type {
3
3
  ClassificationChoicesMap,
4
4
  CreateClassifierArgs,
5
5
  EvaluationResult,
6
6
  EvaluatorFn,
7
7
  } from "../types/evals";
8
-
9
8
  import { generateClassification } from "./generateClassification";
10
9
 
11
10
  /**
@@ -0,0 +1,71 @@
1
+ import { CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
4
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
5
+
6
+ export interface ConcisenessEvaluatorArgs<
7
+ RecordType extends Record<string, unknown> = ConcisenessEvaluationRecord,
8
+ > extends Omit<
9
+ CreateClassificationEvaluatorArgs<RecordType>,
10
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
11
+ > {
12
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
13
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
14
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
15
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
16
+ }
17
+
18
+ /**
19
+ * A record to be evaluated by the conciseness evaluator.
20
+ */
21
+ export type ConcisenessEvaluationRecord = {
22
+ input: string;
23
+ output: string;
24
+ };
25
+
26
+ /**
27
+ * Creates a conciseness evaluator function.
28
+ *
29
+ * This function returns an evaluator that determines whether a given output
30
+ * is concise and free of unnecessary content such as pleasantries, hedging,
31
+ * meta-commentary, or redundant information.
32
+ *
33
+ * @param args - The arguments for creating the conciseness evaluator.
34
+ * @param args.model - The model to use for classification.
35
+ * @param args.choices - The possible classification choices (defaults to CONCISENESS_CHOICES).
36
+ * @param args.promptTemplate - The prompt template to use (defaults to CONCISENESS_TEMPLATE).
37
+ * @param args.telemetry - The telemetry to use for the evaluator.
38
+ *
39
+ * @returns An evaluator function that takes a {@link ConcisenessEvaluationRecord} and returns a classification result
40
+ * indicating whether the output is concise or verbose.
41
+ *
42
+ * @example
43
+ * ```ts
44
+ * const evaluator = createConcisenessEvaluator({ model: openai("gpt-4o-mini") });
45
+ * const result = await evaluator.evaluate({
46
+ * input: "What is the capital of France?",
47
+ * output: "Paris.",
48
+ * });
49
+ * console.log(result.label); // "concise" or "verbose"
50
+ * ```
51
+ */
52
+ export function createConcisenessEvaluator<
53
+ RecordType extends Record<string, unknown> = ConcisenessEvaluationRecord,
54
+ >(
55
+ args: ConcisenessEvaluatorArgs<RecordType>
56
+ ): ClassificationEvaluator<RecordType> {
57
+ const {
58
+ choices = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.choices,
59
+ promptTemplate = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.template,
60
+ optimizationDirection = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
61
+ name = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.name,
62
+ ...rest
63
+ } = args;
64
+ return createClassificationEvaluator<RecordType>({
65
+ ...rest,
66
+ promptTemplate,
67
+ choices,
68
+ optimizationDirection,
69
+ name,
70
+ });
71
+ }
@@ -1,7 +1,6 @@
1
1
  import { CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface CorrectnessEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface DocumentRelevanceEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface FaithfulnessEvaluatorArgs<
@@ -6,9 +6,8 @@
6
6
  */
7
7
 
8
8
  import { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
9
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
10
-
11
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
9
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
10
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
12
11
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
13
12
 
14
13
  export interface HallucinationEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface ToolInvocationEvaluatorArgs<
@@ -0,0 +1,108 @@
1
+ import { TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
4
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
5
+
6
+ export interface ToolResponseHandlingEvaluatorArgs<
7
+ RecordType extends Record<string, unknown> =
8
+ ToolResponseHandlingEvaluationRecord,
9
+ > extends Omit<
10
+ CreateClassificationEvaluatorArgs<RecordType>,
11
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
12
+ > {
13
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
14
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
15
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
16
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
17
+ }
18
+
19
+ /**
20
+ * A record to be evaluated by the tool response handling evaluator.
21
+ */
22
+ export type ToolResponseHandlingEvaluationRecord = {
23
+ /**
24
+ * The user query or conversation context.
25
+ */
26
+ input: string;
27
+ /**
28
+ * The tool invocation(s) made by the agent, including arguments.
29
+ */
30
+ toolCall: string;
31
+ /**
32
+ * The tool's response (data, errors, or partial results).
33
+ */
34
+ toolResult: string;
35
+ /**
36
+ * The agent's handling after receiving the tool result
37
+ * (may include retries, follow-ups, or final response).
38
+ */
39
+ output: string;
40
+ };
41
+
42
+ /**
43
+ * Creates a tool response handling evaluator function.
44
+ *
45
+ * This function returns an evaluator that determines whether an AI agent properly
46
+ * handled a tool's response, including error handling, data extraction,
47
+ * transformation, and safe information disclosure.
48
+ *
49
+ * @param args - The arguments for creating the tool response handling evaluator.
50
+ * @param args.model - The model to use for classification.
51
+ * @param args.choices - The possible classification choices (defaults to correct/incorrect).
52
+ * @param args.promptTemplate - The prompt template to use.
53
+ * @param args.telemetry - The telemetry to use for the evaluator.
54
+ *
55
+ * @returns An evaluator function that takes a {@link ToolResponseHandlingEvaluationRecord}
56
+ * and returns a classification result indicating whether the tool response handling
57
+ * is correct or incorrect.
58
+ *
59
+ * @example
60
+ * ```ts
61
+ * const evaluator = createToolResponseHandlingEvaluator({ model: openai("gpt-4o-mini") });
62
+ *
63
+ * // Example: Correct extraction from tool result
64
+ * const result = await evaluator.evaluate({
65
+ * input: "What's the weather in Seattle?",
66
+ * toolCall: 'get_weather(location="Seattle")',
67
+ * toolResult: JSON.stringify({
68
+ * temperature: 58,
69
+ * unit: "fahrenheit",
70
+ * conditions: "partly cloudy"
71
+ * }),
72
+ * output: "The weather in Seattle is 58°F and partly cloudy."
73
+ * });
74
+ * console.log(result.label); // "correct"
75
+ *
76
+ * // Example: Hallucinated data (incorrect)
77
+ * const resultHallucinated = await evaluator.evaluate({
78
+ * input: "What restaurants are nearby?",
79
+ * toolCall: 'search_restaurants(location="downtown")',
80
+ * toolResult: JSON.stringify({
81
+ * results: [{ name: "Cafe Luna", rating: 4.2 }]
82
+ * }),
83
+ * output: "I found Cafe Luna (4.2 stars) and Mario's Italian (4.8 stars) nearby."
84
+ * });
85
+ * console.log(resultHallucinated.label); // "incorrect" - Mario's was hallucinated
86
+ * ```
87
+ */
88
+ export function createToolResponseHandlingEvaluator<
89
+ RecordType extends Record<string, unknown> =
90
+ ToolResponseHandlingEvaluationRecord,
91
+ >(
92
+ args: ToolResponseHandlingEvaluatorArgs<RecordType>
93
+ ): ClassificationEvaluator<RecordType> {
94
+ const {
95
+ choices = TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.choices,
96
+ promptTemplate = TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.template,
97
+ optimizationDirection = TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
98
+ name = TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG.name,
99
+ ...rest
100
+ } = args;
101
+ return createClassificationEvaluator<RecordType>({
102
+ ...rest,
103
+ promptTemplate,
104
+ choices,
105
+ optimizationDirection,
106
+ name,
107
+ });
108
+ }
@@ -0,0 +1,92 @@
1
+ import { TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
4
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
5
+
6
+ export interface ToolSelectionEvaluatorArgs<
7
+ RecordType extends Record<string, unknown> = ToolSelectionEvaluationRecord,
8
+ > extends Omit<
9
+ CreateClassificationEvaluatorArgs<RecordType>,
10
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
11
+ > {
12
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
13
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
14
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
15
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
16
+ }
17
+
18
+ /**
19
+ * A record to be evaluated by the tool selection evaluator.
20
+ */
21
+ export type ToolSelectionEvaluationRecord = {
22
+ /**
23
+ * The input query or conversation context.
24
+ */
25
+ input: string;
26
+ /**
27
+ * The available tools that the LLM could use.
28
+ */
29
+ availableTools: string;
30
+ /**
31
+ * The tool or tools selected by the LLM.
32
+ */
33
+ toolSelection: string;
34
+ };
35
+
36
+ /**
37
+ * Creates a tool selection evaluator function.
38
+ *
39
+ * This function returns an evaluator that determines whether the correct tool
40
+ * was selected for a given context. Unlike the tool invocation evaluator which
41
+ * checks if the tool was called correctly with proper arguments, this evaluator
42
+ * focuses on whether the right tool was chosen in the first place.
43
+ *
44
+ * The evaluator checks for:
45
+ * - Whether the LLM chose the best available tool for the user query
46
+ * - Whether the tool name exists in the available tools list
47
+ * - Whether the correct number of tools were selected for the task
48
+ * - Whether the tool selection is safe and appropriate
49
+ *
50
+ * @param args - The arguments for creating the tool selection evaluator.
51
+ * @param args.model - The model to use for classification.
52
+ * @param args.choices - The possible classification choices (defaults to correct/incorrect).
53
+ * @param args.promptTemplate - The prompt template to use (defaults to TOOL_SELECTION_TEMPLATE).
54
+ * @param args.telemetry - The telemetry to use for the evaluator.
55
+ *
56
+ * @returns An evaluator function that takes a {@link ToolSelectionEvaluationRecord} and returns
57
+ * a classification result indicating whether the tool selection is correct or incorrect.
58
+ *
59
+ * @example
60
+ * ```ts
61
+ * const evaluator = createToolSelectionEvaluator({ model: openai("gpt-4o-mini") });
62
+ *
63
+ * const result = await evaluator.evaluate({
64
+ * input: "User: What is the weather in San Francisco?",
65
+ * availableTools: `WeatherTool: Get the current weather for a location.
66
+ * NewsTool: Stay connected to global events with our up-to-date news around the world.
67
+ * MusicTool: Create playlists, search for music, and check the latest music trends.`,
68
+ * toolSelection: "WeatherTool"
69
+ * });
70
+ * console.log(result.label); // "correct" or "incorrect"
71
+ * ```
72
+ */
73
+ export function createToolSelectionEvaluator<
74
+ RecordType extends Record<string, unknown> = ToolSelectionEvaluationRecord,
75
+ >(
76
+ args: ToolSelectionEvaluatorArgs<RecordType>
77
+ ): ClassificationEvaluator<RecordType> {
78
+ const {
79
+ choices = TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.choices,
80
+ promptTemplate = TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.template,
81
+ optimizationDirection = TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
82
+ name = TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.name,
83
+ ...rest
84
+ } = args;
85
+ return createClassificationEvaluator<RecordType>({
86
+ ...rest,
87
+ promptTemplate,
88
+ choices,
89
+ optimizationDirection,
90
+ name,
91
+ });
92
+ }
@@ -1,10 +1,10 @@
1
- import { tracer } from "../telemetry";
2
- import { ClassificationResult, WithLLM } from "../types/evals";
3
- import { WithTelemetry } from "../types/otel";
4
- import type { WithPrompt } from "../types/prompts";
5
-
6
1
  import { generateObject } from "ai";
7
2
  import { z } from "zod";
3
+
4
+ import { tracer } from "../telemetry";
5
+ import type { ClassificationResult, WithLLM } from "../types/evals";
6
+ import type { WithTelemetry } from "../types/otel";
7
+ import type { WithPrompt } from "../types/prompts";
8
8
  export type ClassifyArgs = WithLLM &
9
9
  WithTelemetry &
10
10
  WithPrompt & {
package/src/llm/index.ts CHANGED
@@ -1,10 +1,13 @@
1
1
  export * from "./ClassificationEvaluator";
2
2
  export * from "./createClassificationEvaluator";
3
3
  export * from "./createClassifierFn";
4
+ export * from "./createConcisenessEvaluator";
4
5
  export * from "./createCorrectnessEvaluator";
5
6
  export * from "./createDocumentRelevanceEvaluator";
6
7
  export * from "./createFaithfulnessEvaluator";
7
8
  export * from "./createHallucinationEvaluator"; // Deprecated: use createFaithfulnessEvaluator
8
9
  export * from "./createToolInvocationEvaluator";
10
+ export * from "./createToolResponseHandlingEvaluator";
11
+ export * from "./createToolSelectionEvaluator";
9
12
  export * from "./generateClassification";
10
13
  export * from "./LLMEvaluator";
@@ -1,9 +1,8 @@
1
- import { PromptTemplate, RenderedPrompt } from "../types/templating";
1
+ import Mustache from "mustache";
2
2
 
3
+ import type { PromptTemplate, RenderedPrompt } from "../types/templating";
3
4
  import { createTemplateVariablesProxy } from "./createTemplateVariablesProxy";
4
5
 
5
- import Mustache from "mustache";
6
-
7
6
  /**
8
7
  * A function that applies a set of variables to a template (e.g. a prompt)
9
8
  * Uses the Mustache library to apply the variables to the template
@@ -1,7 +1,7 @@
1
- import { PromptTemplate } from "../types/templating";
2
-
3
1
  import Mustache from "mustache";
4
2
 
3
+ import type { PromptTemplate } from "../types/templating";
4
+
5
5
  type GetTemplateVariableArgs = {
6
6
  template: PromptTemplate;
7
7
  };
@@ -1,8 +1,8 @@
1
- import { ObjectMapping } from "./data";
2
- import { WithTelemetry } from "./otel";
3
- import { PromptTemplate } from "./templating";
1
+ import type { LanguageModel } from "ai";
4
2
 
5
- import { LanguageModel } from "ai";
3
+ import type { ObjectMapping } from "./data";
4
+ import type { WithTelemetry } from "./otel";
5
+ import type { PromptTemplate } from "./templating";
6
6
 
7
7
  /**
8
8
  * A specific AI example that is under evaluation
package/src/types/otel.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Tracer } from "@opentelemetry/api";
1
+ import type { Tracer } from "@opentelemetry/api";
2
2
 
3
3
  export type TelemetryConfig = {
4
4
  /**
@@ -1,5 +1,5 @@
1
- import { EvaluatorBase } from "../core/EvaluatorBase";
2
- import { ObjectMapping } from "../types/data";
1
+ import type { EvaluatorBase } from "../core/EvaluatorBase";
2
+ import type { ObjectMapping } from "../types/data";
3
3
 
4
4
  /**
5
5
  * Context for binding an evaluator with input mapping configuration.
@@ -1,7 +1,7 @@
1
- import { ObjectMapping, ValueGetter } from "../types/data";
2
-
3
1
  import { JSONPath } from "jsonpath-plus";
4
2
 
3
+ import type { ObjectMapping, ValueGetter } from "../types/data";
4
+
5
5
  /**
6
6
  * Remaps an object by applying field mappings while preserving original data.
7
7
  *