@arizeai/phoenix-evals 0.9.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/README.md +23 -23
  2. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  3. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  4. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js +58 -0
  5. package/dist/esm/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  6. package/dist/esm/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  7. package/dist/esm/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +9 -1
  8. package/dist/esm/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  9. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  10. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +14 -3
  11. package/dist/esm/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  12. package/dist/esm/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  13. package/dist/esm/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +16 -2
  14. package/dist/esm/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  15. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  16. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +15 -1
  17. package/dist/esm/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  18. package/dist/esm/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  19. package/dist/esm/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  20. package/dist/esm/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.js +59 -0
  21. package/dist/esm/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  22. package/dist/esm/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  23. package/dist/esm/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js +61 -11
  24. package/dist/esm/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  25. package/dist/esm/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  26. package/dist/esm/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js +44 -8
  27. package/dist/esm/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  28. package/dist/esm/__generated__/default_templates/index.d.ts +2 -0
  29. package/dist/esm/__generated__/default_templates/index.d.ts.map +1 -1
  30. package/dist/esm/__generated__/default_templates/index.js +2 -0
  31. package/dist/esm/__generated__/default_templates/index.js.map +1 -1
  32. package/dist/esm/__generated__/types.d.ts +1 -1
  33. package/dist/esm/__generated__/types.d.ts.map +1 -1
  34. package/dist/esm/core/EvaluatorBase.d.ts +2 -2
  35. package/dist/esm/core/EvaluatorBase.d.ts.map +1 -1
  36. package/dist/esm/core/FunctionEvaluator.d.ts +1 -1
  37. package/dist/esm/core/FunctionEvaluator.d.ts.map +1 -1
  38. package/dist/esm/core/FunctionEvaluator.js.map +1 -1
  39. package/dist/esm/helpers/asEvaluatorFn.d.ts +1 -1
  40. package/dist/esm/helpers/asEvaluatorFn.d.ts.map +1 -1
  41. package/dist/esm/helpers/asEvaluatorFn.js.map +1 -1
  42. package/dist/esm/helpers/createEvaluator.d.ts +2 -2
  43. package/dist/esm/helpers/createEvaluator.d.ts.map +1 -1
  44. package/dist/esm/helpers/createEvaluator.js.map +1 -1
  45. package/dist/esm/helpers/toEvaluationResult.d.ts +1 -1
  46. package/dist/esm/helpers/toEvaluationResult.d.ts.map +1 -1
  47. package/dist/esm/llm/ClassificationEvaluator.d.ts +3 -3
  48. package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
  49. package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
  50. package/dist/esm/llm/LLMEvaluator.d.ts +1 -1
  51. package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
  52. package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
  53. package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
  54. package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
  55. package/dist/esm/llm/createClassifierFn.d.ts +1 -1
  56. package/dist/esm/llm/createClassifierFn.d.ts.map +1 -1
  57. package/dist/esm/llm/createClassifierFn.js.map +1 -1
  58. package/dist/esm/llm/createConcisenessEvaluator.d.ts +43 -0
  59. package/dist/esm/llm/createConcisenessEvaluator.d.ts.map +1 -0
  60. package/dist/esm/llm/createConcisenessEvaluator.js +39 -0
  61. package/dist/esm/llm/createConcisenessEvaluator.js.map +1 -0
  62. package/dist/esm/llm/createCorrectnessEvaluator.d.ts +2 -2
  63. package/dist/esm/llm/createCorrectnessEvaluator.d.ts.map +1 -1
  64. package/dist/esm/llm/createCorrectnessEvaluator.js.map +1 -1
  65. package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts +2 -2
  66. package/dist/esm/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -1
  67. package/dist/esm/llm/createDocumentRelevanceEvaluator.js.map +1 -1
  68. package/dist/esm/llm/createFaithfulnessEvaluator.d.ts +2 -2
  69. package/dist/esm/llm/createFaithfulnessEvaluator.d.ts.map +1 -1
  70. package/dist/esm/llm/createFaithfulnessEvaluator.js.map +1 -1
  71. package/dist/esm/llm/createHallucinationEvaluator.d.ts +2 -2
  72. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  73. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  74. package/dist/esm/llm/createRefusalEvaluator.d.ts +44 -0
  75. package/dist/esm/llm/createRefusalEvaluator.d.ts.map +1 -0
  76. package/dist/esm/llm/createRefusalEvaluator.js +40 -0
  77. package/dist/esm/llm/createRefusalEvaluator.js.map +1 -0
  78. package/dist/esm/llm/createToolInvocationEvaluator.d.ts +2 -2
  79. package/dist/esm/llm/createToolInvocationEvaluator.d.ts.map +1 -1
  80. package/dist/esm/llm/createToolInvocationEvaluator.js.map +1 -1
  81. package/dist/esm/llm/createToolResponseHandlingEvaluator.d.ts +2 -2
  82. package/dist/esm/llm/createToolResponseHandlingEvaluator.d.ts.map +1 -1
  83. package/dist/esm/llm/createToolResponseHandlingEvaluator.js.map +1 -1
  84. package/dist/esm/llm/createToolSelectionEvaluator.d.ts +2 -2
  85. package/dist/esm/llm/createToolSelectionEvaluator.d.ts.map +1 -1
  86. package/dist/esm/llm/createToolSelectionEvaluator.js.map +1 -1
  87. package/dist/esm/llm/generateClassification.d.ts +2 -2
  88. package/dist/esm/llm/generateClassification.d.ts.map +1 -1
  89. package/dist/esm/llm/generateClassification.js +1 -1
  90. package/dist/esm/llm/generateClassification.js.map +1 -1
  91. package/dist/esm/llm/index.d.ts +2 -0
  92. package/dist/esm/llm/index.d.ts.map +1 -1
  93. package/dist/esm/llm/index.js +2 -0
  94. package/dist/esm/llm/index.js.map +1 -1
  95. package/dist/esm/template/applyTemplate.d.ts +1 -1
  96. package/dist/esm/template/applyTemplate.d.ts.map +1 -1
  97. package/dist/esm/template/applyTemplate.js +1 -1
  98. package/dist/esm/template/applyTemplate.js.map +1 -1
  99. package/dist/esm/template/getTemplateVariables.d.ts +1 -1
  100. package/dist/esm/template/getTemplateVariables.d.ts.map +1 -1
  101. package/dist/esm/template/getTemplateVariables.js.map +1 -1
  102. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  103. package/dist/esm/types/evals.d.ts +5 -5
  104. package/dist/esm/types/evals.d.ts.map +1 -1
  105. package/dist/esm/types/otel.d.ts +1 -1
  106. package/dist/esm/types/otel.d.ts.map +1 -1
  107. package/dist/esm/utils/bindEvaluator.d.ts +2 -2
  108. package/dist/esm/utils/bindEvaluator.d.ts.map +1 -1
  109. package/dist/esm/utils/objectMappingUtils.d.ts +1 -1
  110. package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -1
  111. package/dist/esm/utils/objectMappingUtils.js.map +1 -1
  112. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  113. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  114. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js +61 -0
  115. package/dist/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  116. package/dist/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  117. package/dist/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +9 -1
  118. package/dist/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  119. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  120. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js +14 -3
  121. package/dist/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  122. package/dist/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  123. package/dist/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js +16 -2
  124. package/dist/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  125. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  126. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js +15 -1
  127. package/dist/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  128. package/dist/src/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.d.ts +3 -0
  129. package/dist/src/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -0
  130. package/dist/src/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.js +62 -0
  131. package/dist/src/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -0
  132. package/dist/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  133. package/dist/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js +61 -11
  134. package/dist/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  135. package/dist/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.d.ts.map +1 -1
  136. package/dist/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js +44 -8
  137. package/dist/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.js.map +1 -1
  138. package/dist/src/__generated__/default_templates/index.d.ts +2 -0
  139. package/dist/src/__generated__/default_templates/index.d.ts.map +1 -1
  140. package/dist/src/__generated__/default_templates/index.js +5 -1
  141. package/dist/src/__generated__/default_templates/index.js.map +1 -1
  142. package/dist/src/__generated__/types.d.ts +1 -1
  143. package/dist/src/__generated__/types.d.ts.map +1 -1
  144. package/dist/src/core/EvaluatorBase.d.ts +2 -2
  145. package/dist/src/core/EvaluatorBase.d.ts.map +1 -1
  146. package/dist/src/core/FunctionEvaluator.d.ts +1 -1
  147. package/dist/src/core/FunctionEvaluator.d.ts.map +1 -1
  148. package/dist/src/core/FunctionEvaluator.js.map +1 -1
  149. package/dist/src/helpers/asEvaluatorFn.d.ts +1 -1
  150. package/dist/src/helpers/asEvaluatorFn.d.ts.map +1 -1
  151. package/dist/src/helpers/asEvaluatorFn.js.map +1 -1
  152. package/dist/src/helpers/createEvaluator.d.ts +2 -2
  153. package/dist/src/helpers/createEvaluator.d.ts.map +1 -1
  154. package/dist/src/helpers/createEvaluator.js.map +1 -1
  155. package/dist/src/helpers/toEvaluationResult.d.ts +1 -1
  156. package/dist/src/helpers/toEvaluationResult.d.ts.map +1 -1
  157. package/dist/src/llm/ClassificationEvaluator.d.ts +3 -3
  158. package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
  159. package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
  160. package/dist/src/llm/LLMEvaluator.d.ts +1 -1
  161. package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
  162. package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
  163. package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
  164. package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
  165. package/dist/src/llm/createClassifierFn.d.ts +1 -1
  166. package/dist/src/llm/createClassifierFn.d.ts.map +1 -1
  167. package/dist/src/llm/createClassifierFn.js.map +1 -1
  168. package/dist/src/llm/createConcisenessEvaluator.d.ts +43 -0
  169. package/dist/src/llm/createConcisenessEvaluator.d.ts.map +1 -0
  170. package/dist/src/llm/createConcisenessEvaluator.js +50 -0
  171. package/dist/src/llm/createConcisenessEvaluator.js.map +1 -0
  172. package/dist/src/llm/createCorrectnessEvaluator.d.ts +2 -2
  173. package/dist/src/llm/createCorrectnessEvaluator.d.ts.map +1 -1
  174. package/dist/src/llm/createCorrectnessEvaluator.js.map +1 -1
  175. package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts +2 -2
  176. package/dist/src/llm/createDocumentRelevanceEvaluator.d.ts.map +1 -1
  177. package/dist/src/llm/createDocumentRelevanceEvaluator.js.map +1 -1
  178. package/dist/src/llm/createFaithfulnessEvaluator.d.ts +2 -2
  179. package/dist/src/llm/createFaithfulnessEvaluator.d.ts.map +1 -1
  180. package/dist/src/llm/createFaithfulnessEvaluator.js.map +1 -1
  181. package/dist/src/llm/createHallucinationEvaluator.d.ts +2 -2
  182. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  183. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  184. package/dist/src/llm/createRefusalEvaluator.d.ts +44 -0
  185. package/dist/src/llm/createRefusalEvaluator.d.ts.map +1 -0
  186. package/dist/src/llm/createRefusalEvaluator.js +51 -0
  187. package/dist/src/llm/createRefusalEvaluator.js.map +1 -0
  188. package/dist/src/llm/createToolInvocationEvaluator.d.ts +2 -2
  189. package/dist/src/llm/createToolInvocationEvaluator.d.ts.map +1 -1
  190. package/dist/src/llm/createToolInvocationEvaluator.js.map +1 -1
  191. package/dist/src/llm/createToolResponseHandlingEvaluator.d.ts +2 -2
  192. package/dist/src/llm/createToolResponseHandlingEvaluator.d.ts.map +1 -1
  193. package/dist/src/llm/createToolResponseHandlingEvaluator.js.map +1 -1
  194. package/dist/src/llm/createToolSelectionEvaluator.d.ts +2 -2
  195. package/dist/src/llm/createToolSelectionEvaluator.d.ts.map +1 -1
  196. package/dist/src/llm/createToolSelectionEvaluator.js.map +1 -1
  197. package/dist/src/llm/generateClassification.d.ts +2 -2
  198. package/dist/src/llm/generateClassification.d.ts.map +1 -1
  199. package/dist/src/llm/generateClassification.js +1 -1
  200. package/dist/src/llm/generateClassification.js.map +1 -1
  201. package/dist/src/llm/index.d.ts +2 -0
  202. package/dist/src/llm/index.d.ts.map +1 -1
  203. package/dist/src/llm/index.js +2 -0
  204. package/dist/src/llm/index.js.map +1 -1
  205. package/dist/src/template/applyTemplate.d.ts +1 -1
  206. package/dist/src/template/applyTemplate.d.ts.map +1 -1
  207. package/dist/src/template/applyTemplate.js +1 -1
  208. package/dist/src/template/applyTemplate.js.map +1 -1
  209. package/dist/src/template/getTemplateVariables.d.ts +1 -1
  210. package/dist/src/template/getTemplateVariables.d.ts.map +1 -1
  211. package/dist/src/template/getTemplateVariables.js.map +1 -1
  212. package/dist/src/types/evals.d.ts +5 -5
  213. package/dist/src/types/evals.d.ts.map +1 -1
  214. package/dist/src/types/otel.d.ts +1 -1
  215. package/dist/src/types/otel.d.ts.map +1 -1
  216. package/dist/src/utils/bindEvaluator.d.ts +2 -2
  217. package/dist/src/utils/bindEvaluator.d.ts.map +1 -1
  218. package/dist/src/utils/objectMappingUtils.d.ts +1 -1
  219. package/dist/src/utils/objectMappingUtils.d.ts.map +1 -1
  220. package/dist/src/utils/objectMappingUtils.js.map +1 -1
  221. package/dist/tsconfig.tsbuildinfo +1 -1
  222. package/package.json +38 -39
  223. package/src/__generated__/default_templates/CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.ts +60 -0
  224. package/src/__generated__/default_templates/CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG.ts +9 -1
  225. package/src/__generated__/default_templates/DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG.ts +14 -3
  226. package/src/__generated__/default_templates/FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG.ts +16 -2
  227. package/src/__generated__/default_templates/HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +15 -1
  228. package/src/__generated__/default_templates/REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.ts +61 -0
  229. package/src/__generated__/default_templates/TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG.ts +61 -11
  230. package/src/__generated__/default_templates/TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG.ts +44 -8
  231. package/src/__generated__/default_templates/index.ts +2 -0
  232. package/src/__generated__/types.ts +1 -1
  233. package/src/core/EvaluatorBase.ts +2 -2
  234. package/src/core/FunctionEvaluator.ts +5 -2
  235. package/src/helpers/asEvaluatorFn.ts +1 -2
  236. package/src/helpers/createEvaluator.ts +2 -3
  237. package/src/helpers/toEvaluationResult.ts +1 -1
  238. package/src/llm/ClassificationEvaluator.ts +4 -5
  239. package/src/llm/LLMEvaluator.ts +1 -1
  240. package/src/llm/createClassificationEvaluator.ts +1 -2
  241. package/src/llm/createClassifierFn.ts +1 -2
  242. package/src/llm/createConcisenessEvaluator.ts +71 -0
  243. package/src/llm/createCorrectnessEvaluator.ts +2 -3
  244. package/src/llm/createDocumentRelevanceEvaluator.ts +2 -3
  245. package/src/llm/createFaithfulnessEvaluator.ts +2 -3
  246. package/src/llm/createHallucinationEvaluator.ts +2 -3
  247. package/src/llm/createRefusalEvaluator.ts +70 -0
  248. package/src/llm/createToolInvocationEvaluator.ts +2 -3
  249. package/src/llm/createToolResponseHandlingEvaluator.ts +2 -3
  250. package/src/llm/createToolSelectionEvaluator.ts +2 -3
  251. package/src/llm/generateClassification.ts +5 -5
  252. package/src/llm/index.ts +2 -0
  253. package/src/template/applyTemplate.ts +2 -3
  254. package/src/template/getTemplateVariables.ts +2 -2
  255. package/src/types/evals.ts +5 -5
  256. package/src/types/otel.ts +1 -1
  257. package/src/utils/bindEvaluator.ts +2 -2
  258. package/src/utils/objectMappingUtils.ts +2 -2
@@ -1,19 +1,18 @@
1
+ import type { LanguageModel } from "ai";
2
+
1
3
  import { getTemplateVariables } from "../template";
2
- import {
4
+ import type {
3
5
  ClassificationChoicesMap,
4
6
  CreateClassificationEvaluatorArgs,
5
7
  EvaluatorFn,
6
8
  PromptTemplate,
7
9
  WithPromptTemplate,
8
10
  } from "../types";
9
- import { ObjectMapping } from "../types/data";
11
+ import type { ObjectMapping } from "../types/data";
10
12
  import { remapObject } from "../utils/objectMappingUtils";
11
-
12
13
  import { createClassifierFn } from "./createClassifierFn";
13
14
  import { LLMEvaluator } from "./LLMEvaluator";
14
15
 
15
- import { LanguageModel } from "ai";
16
-
17
16
  /**
18
17
  * An LLM evaluator that performs evaluation via classification
19
18
  */
@@ -1,5 +1,5 @@
1
1
  import { EvaluatorBase } from "../core/EvaluatorBase";
2
- import { CreateLLMEvaluatorArgs } from "../types";
2
+ import type { CreateLLMEvaluatorArgs } from "../types";
3
3
 
4
4
  /**
5
5
  * Base class for llm evaluation metrics / scores
@@ -1,5 +1,4 @@
1
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
2
-
1
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
2
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
4
3
 
5
4
  export function createClassificationEvaluator<
@@ -1,11 +1,10 @@
1
1
  import { formatTemplate } from "../template";
2
- import {
2
+ import type {
3
3
  ClassificationChoicesMap,
4
4
  CreateClassifierArgs,
5
5
  EvaluationResult,
6
6
  EvaluatorFn,
7
7
  } from "../types/evals";
8
-
9
8
  import { generateClassification } from "./generateClassification";
10
9
 
11
10
  /**
@@ -0,0 +1,71 @@
1
+ import { CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
4
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
5
+
6
+ export interface ConcisenessEvaluatorArgs<
7
+ RecordType extends Record<string, unknown> = ConcisenessEvaluationRecord,
8
+ > extends Omit<
9
+ CreateClassificationEvaluatorArgs<RecordType>,
10
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
11
+ > {
12
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
13
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
14
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
15
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
16
+ }
17
+
18
+ /**
19
+ * A record to be evaluated by the conciseness evaluator.
20
+ */
21
+ export type ConcisenessEvaluationRecord = {
22
+ input: string;
23
+ output: string;
24
+ };
25
+
26
+ /**
27
+ * Creates a conciseness evaluator function.
28
+ *
29
+ * This function returns an evaluator that determines whether a given output
30
+ * is concise and free of unnecessary content such as pleasantries, hedging,
31
+ * meta-commentary, or redundant information.
32
+ *
33
+ * @param args - The arguments for creating the conciseness evaluator.
34
+ * @param args.model - The model to use for classification.
35
+ * @param args.choices - The possible classification choices (defaults to CONCISENESS_CHOICES).
36
+ * @param args.promptTemplate - The prompt template to use (defaults to CONCISENESS_TEMPLATE).
37
+ * @param args.telemetry - The telemetry to use for the evaluator.
38
+ *
39
+ * @returns An evaluator function that takes a {@link ConcisenessEvaluationRecord} and returns a classification result
40
+ * indicating whether the output is concise or verbose.
41
+ *
42
+ * @example
43
+ * ```ts
44
+ * const evaluator = createConcisenessEvaluator({ model: openai("gpt-4o-mini") });
45
+ * const result = await evaluator.evaluate({
46
+ * input: "What is the capital of France?",
47
+ * output: "Paris.",
48
+ * });
49
+ * console.log(result.label); // "concise" or "verbose"
50
+ * ```
51
+ */
52
+ export function createConcisenessEvaluator<
53
+ RecordType extends Record<string, unknown> = ConcisenessEvaluationRecord,
54
+ >(
55
+ args: ConcisenessEvaluatorArgs<RecordType>
56
+ ): ClassificationEvaluator<RecordType> {
57
+ const {
58
+ choices = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.choices,
59
+ promptTemplate = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.template,
60
+ optimizationDirection = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
61
+ name = CONCISENESS_CLASSIFICATION_EVALUATOR_CONFIG.name,
62
+ ...rest
63
+ } = args;
64
+ return createClassificationEvaluator<RecordType>({
65
+ ...rest,
66
+ promptTemplate,
67
+ choices,
68
+ optimizationDirection,
69
+ name,
70
+ });
71
+ }
@@ -1,7 +1,6 @@
1
1
  import { CORRECTNESS_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface CorrectnessEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { DOCUMENT_RELEVANCE_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface DocumentRelevanceEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { FAITHFULNESS_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface FaithfulnessEvaluatorArgs<
@@ -6,9 +6,8 @@
6
6
  */
7
7
 
8
8
  import { HALLUCINATION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
9
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
10
-
11
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
9
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
10
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
12
11
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
13
12
 
14
13
  export interface HallucinationEvaluatorArgs<
@@ -0,0 +1,70 @@
1
+ import { REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
4
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
5
+
6
+ export interface RefusalEvaluatorArgs<
7
+ RecordType extends Record<string, unknown> = RefusalEvaluationRecord,
8
+ > extends Omit<
9
+ CreateClassificationEvaluatorArgs<RecordType>,
10
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
11
+ > {
12
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
13
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
14
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
15
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
16
+ }
17
+
18
+ /**
19
+ * A record to be evaluated by the refusal evaluator.
20
+ */
21
+ export type RefusalEvaluationRecord = {
22
+ input: string;
23
+ output: string;
24
+ };
25
+
26
+ /**
27
+ * Creates a refusal evaluator function.
28
+ *
29
+ * This function returns an evaluator that detects when an LLM refuses,
30
+ * declines, or avoids answering a user query. It is use-case agnostic:
31
+ * it only detects whether a refusal occurred, not whether the refusal
32
+ * was appropriate.
33
+ *
34
+ * @param args - The arguments for creating the refusal evaluator.
35
+ * @param args.model - The model to use for classification.
36
+ * @param args.choices - The possible classification choices (defaults to REFUSAL_CHOICES).
37
+ * @param args.promptTemplate - The prompt template to use (defaults to REFUSAL_TEMPLATE).
38
+ * @param args.telemetry - The telemetry to use for the evaluator.
39
+ *
40
+ * @returns An evaluator function that takes a {@link RefusalEvaluationRecord} and returns a classification result
41
+ * indicating whether the output is a refusal or an answer.
42
+ *
43
+ * @example
44
+ * ```ts
45
+ * const evaluator = createRefusalEvaluator({ model: openai("gpt-4o-mini") });
46
+ * const result = await evaluator.evaluate({
47
+ * input: "What is the capital of France?",
48
+ * output: "I'm sorry, I can only help with technical questions.",
49
+ * });
50
+ * console.log(result.label); // "refused" or "answered"
51
+ * ```
52
+ */
53
+ export function createRefusalEvaluator<
54
+ RecordType extends Record<string, unknown> = RefusalEvaluationRecord,
55
+ >(args: RefusalEvaluatorArgs<RecordType>): ClassificationEvaluator<RecordType> {
56
+ const {
57
+ choices = REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.choices,
58
+ promptTemplate = REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.template,
59
+ optimizationDirection = REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.optimizationDirection,
60
+ name = REFUSAL_CLASSIFICATION_EVALUATOR_CONFIG.name,
61
+ ...rest
62
+ } = args;
63
+ return createClassificationEvaluator<RecordType>({
64
+ ...rest,
65
+ promptTemplate,
66
+ choices,
67
+ optimizationDirection,
68
+ name,
69
+ });
70
+ }
@@ -1,7 +1,6 @@
1
1
  import { TOOL_INVOCATION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface ToolInvocationEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { TOOL_RESPONSE_HANDLING_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface ToolResponseHandlingEvaluatorArgs<
@@ -1,7 +1,6 @@
1
1
  import { TOOL_SELECTION_CLASSIFICATION_EVALUATOR_CONFIG } from "../__generated__/default_templates";
2
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
3
-
4
- import { ClassificationEvaluator } from "./ClassificationEvaluator";
2
+ import type { CreateClassificationEvaluatorArgs } from "../types/evals";
3
+ import type { ClassificationEvaluator } from "./ClassificationEvaluator";
5
4
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
6
5
 
7
6
  export interface ToolSelectionEvaluatorArgs<
@@ -1,10 +1,10 @@
1
- import { tracer } from "../telemetry";
2
- import { ClassificationResult, WithLLM } from "../types/evals";
3
- import { WithTelemetry } from "../types/otel";
4
- import type { WithPrompt } from "../types/prompts";
5
-
6
1
  import { generateObject } from "ai";
7
2
  import { z } from "zod";
3
+
4
+ import { tracer } from "../telemetry";
5
+ import type { ClassificationResult, WithLLM } from "../types/evals";
6
+ import type { WithTelemetry } from "../types/otel";
7
+ import type { WithPrompt } from "../types/prompts";
8
8
  export type ClassifyArgs = WithLLM &
9
9
  WithTelemetry &
10
10
  WithPrompt & {
package/src/llm/index.ts CHANGED
@@ -1,10 +1,12 @@
1
1
  export * from "./ClassificationEvaluator";
2
2
  export * from "./createClassificationEvaluator";
3
3
  export * from "./createClassifierFn";
4
+ export * from "./createConcisenessEvaluator";
4
5
  export * from "./createCorrectnessEvaluator";
5
6
  export * from "./createDocumentRelevanceEvaluator";
6
7
  export * from "./createFaithfulnessEvaluator";
7
8
  export * from "./createHallucinationEvaluator"; // Deprecated: use createFaithfulnessEvaluator
9
+ export * from "./createRefusalEvaluator";
8
10
  export * from "./createToolInvocationEvaluator";
9
11
  export * from "./createToolResponseHandlingEvaluator";
10
12
  export * from "./createToolSelectionEvaluator";
@@ -1,9 +1,8 @@
1
- import { PromptTemplate, RenderedPrompt } from "../types/templating";
1
+ import Mustache from "mustache";
2
2
 
3
+ import type { PromptTemplate, RenderedPrompt } from "../types/templating";
3
4
  import { createTemplateVariablesProxy } from "./createTemplateVariablesProxy";
4
5
 
5
- import Mustache from "mustache";
6
-
7
6
  /**
8
7
  * A function that applies a set of variables to a template (e.g. a prompt)
9
8
  * Uses the Mustache library to apply the variables to the template
@@ -1,7 +1,7 @@
1
- import { PromptTemplate } from "../types/templating";
2
-
3
1
  import Mustache from "mustache";
4
2
 
3
+ import type { PromptTemplate } from "../types/templating";
4
+
5
5
  type GetTemplateVariableArgs = {
6
6
  template: PromptTemplate;
7
7
  };
@@ -1,8 +1,8 @@
1
- import { ObjectMapping } from "./data";
2
- import { WithTelemetry } from "./otel";
3
- import { PromptTemplate } from "./templating";
1
+ import type { LanguageModel } from "ai";
4
2
 
5
- import { LanguageModel } from "ai";
3
+ import type { ObjectMapping } from "./data";
4
+ import type { WithTelemetry } from "./otel";
5
+ import type { PromptTemplate } from "./templating";
6
6
 
7
7
  /**
8
8
  * A specific AI example that is under evaluation
@@ -131,7 +131,7 @@ export type EvaluationKind = "LLM" | "CODE";
131
131
  * The direction to optimize the numeric evaluation score
132
132
  * E.x. "MAXIMIZE" means that the higher the score, the better the evaluation
133
133
  */
134
- export type OptimizationDirection = "MAXIMIZE" | "MINIMIZE";
134
+ export type OptimizationDirection = "MAXIMIZE" | "MINIMIZE" | "NEUTRAL";
135
135
 
136
136
  /**
137
137
  * The description of an evaluator
package/src/types/otel.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { Tracer } from "@opentelemetry/api";
1
+ import type { Tracer } from "@opentelemetry/api";
2
2
 
3
3
  export type TelemetryConfig = {
4
4
  /**
@@ -1,5 +1,5 @@
1
- import { EvaluatorBase } from "../core/EvaluatorBase";
2
- import { ObjectMapping } from "../types/data";
1
+ import type { EvaluatorBase } from "../core/EvaluatorBase";
2
+ import type { ObjectMapping } from "../types/data";
3
3
 
4
4
  /**
5
5
  * Context for binding an evaluator with input mapping configuration.
@@ -1,7 +1,7 @@
1
- import { ObjectMapping, ValueGetter } from "../types/data";
2
-
3
1
  import { JSONPath } from "jsonpath-plus";
4
2
 
3
+ import type { ObjectMapping, ValueGetter } from "../types/data";
4
+
5
5
  /**
6
6
  * Remaps an object by applying field mappings while preserving original data.
7
7
  *