judgeval 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/cjs/common/logger.js +28 -24
  2. package/dist/cjs/common/logger.js.map +1 -1
  3. package/dist/cjs/common/tracer.js +80 -130
  4. package/dist/cjs/common/tracer.js.map +1 -1
  5. package/dist/cjs/constants.js +2 -1
  6. package/dist/cjs/constants.js.map +1 -1
  7. package/dist/cjs/data/datasets/eval-dataset-client.js +45 -0
  8. package/dist/cjs/data/datasets/eval-dataset-client.js.map +1 -1
  9. package/dist/cjs/e2etests/eval-operations.test.js +3 -3
  10. package/dist/cjs/exporters/otel-exporter.js +352 -0
  11. package/dist/cjs/exporters/otel-exporter.js.map +1 -0
  12. package/dist/cjs/judges/index.js +217 -0
  13. package/dist/cjs/judges/index.js.map +1 -0
  14. package/dist/cjs/run-evaluation.js +13 -13
  15. package/dist/cjs/run-evaluation.js.map +1 -1
  16. package/dist/cjs/scorers/metrics/answer-correctness/answer-correctness.js +610 -0
  17. package/dist/cjs/scorers/metrics/answer-correctness/answer-correctness.js.map +1 -0
  18. package/dist/cjs/scorers/metrics/answer-correctness/index.js +19 -0
  19. package/dist/cjs/scorers/metrics/answer-correctness/index.js.map +1 -0
  20. package/dist/cjs/scorers/metrics/answer-correctness/prompts.js +175 -0
  21. package/dist/cjs/scorers/metrics/answer-correctness/prompts.js.map +1 -0
  22. package/dist/cjs/scorers/metrics/answer-relevancy/answer-relevancy.js +525 -0
  23. package/dist/cjs/scorers/metrics/answer-relevancy/answer-relevancy.js.map +1 -0
  24. package/dist/cjs/scorers/metrics/answer-relevancy/index.js +19 -0
  25. package/dist/cjs/scorers/metrics/answer-relevancy/index.js.map +1 -0
  26. package/dist/cjs/scorers/metrics/answer-relevancy/prompts.js +179 -0
  27. package/dist/cjs/scorers/metrics/answer-relevancy/prompts.js.map +1 -0
  28. package/dist/cjs/scorers/metrics/faithfulness/faithfulness.js +524 -0
  29. package/dist/cjs/scorers/metrics/faithfulness/faithfulness.js.map +1 -0
  30. package/dist/cjs/scorers/metrics/faithfulness/index.js +19 -0
  31. package/dist/cjs/scorers/metrics/faithfulness/index.js.map +1 -0
  32. package/dist/cjs/scorers/metrics/faithfulness/prompts.js +232 -0
  33. package/dist/cjs/scorers/metrics/faithfulness/prompts.js.map +1 -0
  34. package/dist/cjs/scorers/metrics/hallucination/hallucination.js +390 -0
  35. package/dist/cjs/scorers/metrics/hallucination/hallucination.js.map +1 -0
  36. package/dist/cjs/scorers/metrics/hallucination/index.js +11 -0
  37. package/dist/cjs/scorers/metrics/hallucination/index.js.map +1 -0
  38. package/dist/cjs/scorers/metrics/hallucination/prompts.js +106 -0
  39. package/dist/cjs/scorers/metrics/hallucination/prompts.js.map +1 -0
  40. package/dist/cjs/scorers/metrics/instruction-adherence/index.js +19 -0
  41. package/dist/cjs/scorers/metrics/instruction-adherence/index.js.map +1 -0
  42. package/dist/cjs/scorers/metrics/instruction-adherence/instruction-adherence.js +382 -0
  43. package/dist/cjs/scorers/metrics/instruction-adherence/instruction-adherence.js.map +1 -0
  44. package/dist/cjs/scorers/metrics/instruction-adherence/prompts.js +124 -0
  45. package/dist/cjs/scorers/metrics/instruction-adherence/prompts.js.map +1 -0
  46. package/dist/esm/common/logger.js +16 -11
  47. package/dist/esm/common/logger.js.map +1 -1
  48. package/dist/esm/common/tracer.js +78 -128
  49. package/dist/esm/common/tracer.js.map +1 -1
  50. package/dist/esm/constants.js +1 -0
  51. package/dist/esm/constants.js.map +1 -1
  52. package/dist/esm/data/datasets/eval-dataset-client.js +46 -1
  53. package/dist/esm/data/datasets/eval-dataset-client.js.map +1 -1
  54. package/dist/esm/e2etests/eval-operations.test.js +3 -3
  55. package/dist/esm/exporters/otel-exporter.js +348 -0
  56. package/dist/esm/exporters/otel-exporter.js.map +1 -0
  57. package/dist/esm/judges/index.js +185 -0
  58. package/dist/esm/judges/index.js.map +1 -0
  59. package/dist/esm/scorers/metrics/answer-correctness/answer-correctness.js +601 -0
  60. package/dist/esm/scorers/metrics/answer-correctness/answer-correctness.js.map +1 -0
  61. package/dist/esm/scorers/metrics/answer-correctness/index.js +3 -0
  62. package/dist/esm/scorers/metrics/answer-correctness/index.js.map +1 -0
  63. package/dist/esm/scorers/metrics/answer-correctness/prompts.js +171 -0
  64. package/dist/esm/scorers/metrics/answer-correctness/prompts.js.map +1 -0
  65. package/dist/esm/scorers/metrics/answer-relevancy/answer-relevancy.js +521 -0
  66. package/dist/esm/scorers/metrics/answer-relevancy/answer-relevancy.js.map +1 -0
  67. package/dist/esm/scorers/metrics/answer-relevancy/index.js +3 -0
  68. package/dist/esm/scorers/metrics/answer-relevancy/index.js.map +1 -0
  69. package/dist/esm/scorers/metrics/answer-relevancy/prompts.js +175 -0
  70. package/dist/esm/scorers/metrics/answer-relevancy/prompts.js.map +1 -0
  71. package/dist/esm/scorers/metrics/faithfulness/faithfulness.js +520 -0
  72. package/dist/esm/scorers/metrics/faithfulness/faithfulness.js.map +1 -0
  73. package/dist/esm/scorers/metrics/faithfulness/index.js +3 -0
  74. package/dist/esm/scorers/metrics/faithfulness/index.js.map +1 -0
  75. package/dist/esm/scorers/metrics/faithfulness/prompts.js +228 -0
  76. package/dist/esm/scorers/metrics/faithfulness/prompts.js.map +1 -0
  77. package/dist/esm/scorers/metrics/hallucination/hallucination.js +386 -0
  78. package/dist/esm/scorers/metrics/hallucination/hallucination.js.map +1 -0
  79. package/dist/esm/scorers/metrics/hallucination/index.js +3 -0
  80. package/dist/esm/scorers/metrics/hallucination/index.js.map +1 -0
  81. package/dist/esm/scorers/metrics/hallucination/prompts.js +102 -0
  82. package/dist/esm/scorers/metrics/hallucination/prompts.js.map +1 -0
  83. package/dist/esm/scorers/metrics/instruction-adherence/index.js +3 -0
  84. package/dist/esm/scorers/metrics/instruction-adherence/index.js.map +1 -0
  85. package/dist/esm/scorers/metrics/instruction-adherence/instruction-adherence.js +378 -0
  86. package/dist/esm/scorers/metrics/instruction-adherence/instruction-adherence.js.map +1 -0
  87. package/dist/esm/scorers/metrics/instruction-adherence/prompts.js +120 -0
  88. package/dist/esm/scorers/metrics/instruction-adherence/prompts.js.map +1 -0
  89. package/dist/types/common/logger.d.ts +1 -1
  90. package/dist/types/constants.d.ts +1 -0
  91. package/dist/types/data/datasets/eval-dataset-client.d.ts +5 -0
  92. package/dist/types/exporters/otel-exporter.d.ts +16 -0
  93. package/dist/types/judges/index.d.ts +50 -0
  94. package/dist/types/scorers/metrics/answer-correctness/answer-correctness.d.ts +99 -0
  95. package/dist/types/scorers/metrics/answer-correctness/index.d.ts +2 -0
  96. package/dist/types/scorers/metrics/answer-correctness/prompts.d.ts +71 -0
  97. package/dist/types/scorers/metrics/answer-relevancy/answer-relevancy.d.ts +78 -0
  98. package/dist/types/scorers/metrics/answer-relevancy/index.d.ts +2 -0
  99. package/dist/types/scorers/metrics/answer-relevancy/prompts.d.ts +71 -0
  100. package/dist/types/scorers/metrics/faithfulness/faithfulness.d.ts +77 -0
  101. package/dist/types/scorers/metrics/faithfulness/index.d.ts +2 -0
  102. package/dist/types/scorers/metrics/faithfulness/prompts.d.ts +94 -0
  103. package/dist/types/scorers/metrics/hallucination/hallucination.d.ts +67 -0
  104. package/dist/types/scorers/metrics/hallucination/index.d.ts +3 -0
  105. package/dist/types/scorers/metrics/hallucination/prompts.d.ts +63 -0
  106. package/dist/types/scorers/metrics/instruction-adherence/index.d.ts +2 -0
  107. package/dist/types/scorers/metrics/instruction-adherence/instruction-adherence.d.ts +67 -0
  108. package/dist/types/scorers/metrics/instruction-adherence/prompts.d.ts +78 -0
  109. package/package.json +32 -14
@@ -0,0 +1,232 @@
1
+ "use strict";
2
+ /**
3
+ * Utility prompts for FaithfulnessScorer
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.FaithfulnessTemplate = exports.ReasonSchema = exports.ClaimsSchema = exports.TruthsSchema = exports.VerdictsSchema = exports.FaithfulnessVerdictSchema = void 0;
7
+ const zod_1 = require("zod");
8
+ // Schema definitions for LLM responses
9
+ exports.FaithfulnessVerdictSchema = zod_1.z.object({
10
+ verdict: zod_1.z.string(),
11
+ reason: zod_1.z.string().optional()
12
+ });
13
+ exports.VerdictsSchema = zod_1.z.object({
14
+ verdicts: zod_1.z.array(exports.FaithfulnessVerdictSchema)
15
+ });
16
+ exports.TruthsSchema = zod_1.z.object({
17
+ truths: zod_1.z.array(zod_1.z.string())
18
+ });
19
+ exports.ClaimsSchema = zod_1.z.object({
20
+ claims: zod_1.z.array(zod_1.z.object({
21
+ claim: zod_1.z.string(),
22
+ quote: zod_1.z.string()
23
+ }))
24
+ });
25
+ exports.ReasonSchema = zod_1.z.object({
26
+ reason: zod_1.z.string()
27
+ });
28
+ /**
29
+ * Template prompts for the FaithfulnessScorer
30
+ */
31
+ class FaithfulnessTemplate {
32
+ /**
33
+ * Generate a prompt to extract claims from the actual output
34
+ */
35
+ static findClaims(text, allClaims = false) {
36
+ return `==== TASK INSTRUCTIONS ====
37
+ You will be provided with a passage of text. Based on the text, your task is to generate a comprehensive list of ALL CLAIMS that can be inferred from the text.
38
+ For every claim that you derive from the text, provide the source of the claim via quoting the original text. Please try to extract EVERY CLAIM that is in the original text; priortize generating the most claims rather than being concise.
39
+ You should NOT include any prior knowledge, and take the text at face value when extracting claims.
40
+
41
+ ==== FORMATTING YOUR ANSWER ====
42
+ Please return your answer in JSON format, with the "claims" key as a list of JSON objects with keys "claim" and "quote". No words or explanation beyond the output JSON is needed.
43
+
44
+ ==== EXAMPLES ====
45
+
46
+ ---- START OF EXAMPLE 1 ----
47
+ Example Text:
48
+ "Einstein won the nobel prize in 1968 for his discovery of the photoelectric effect."
49
+
50
+ Example JSON:
51
+ {
52
+ "claims": [
53
+ {
54
+ "claim": "Einstein won the nobel prize for his discovery of the photoelectric effect.",
55
+ "quote": "Einstein won the nobel prize in 1968 for his discovery of the photoelectric effect."
56
+ },
57
+ {
58
+ "claim": "Einstein won the nobel prize in 1968.",
59
+ "quote": "Einstein won the nobel prize in 1968 for his discovery of the photoelectric effect."
60
+ }
61
+ ]
62
+ }
63
+ ---- END OF EXAMPLE 1 ----
64
+
65
+ ---- START OF EXAMPLE 2 ----
66
+ Example Text: "The Wright brothers successfully flew the first powered airplane on December 17, 1903, in Kitty Hawk, North Carolina."
67
+
68
+ {
69
+ "claims": [
70
+ {
71
+ "claim": "The Wright brothers flew the first powered airplane.",
72
+ "quote": "The Wright brothers successfully flew the first powered airplane on December 17, 1903, in Kitty Hawk, North Carolina."
73
+ },
74
+ {
75
+ "claim": "The Wright brothers made their flight in Kitty Hawk, North Carolina.",
76
+ "quote": "The Wright brothers successfully flew the first powered airplane on December 17, 1903, in Kitty Hawk, North Carolina."
77
+ },
78
+ {
79
+ "claim": "The first powered airplane flight occurred on December 17, 1903.",
80
+ "quote": "The Wright brothers successfully flew the first powered airplane on December 17, 1903, in Kitty Hawk, North Carolina."
81
+ }
82
+ ]
83
+ }
84
+ ---- END OF EXAMPLE 2 ----
85
+
86
+ ---- START OF EXAMPLE 3 ----
87
+ Example Text:
88
+ "The Great Wall of China was built over many centuries by different Chinese dynasties. Construction began more than 2,000 years ago during the Warring States period. The most famous sections were built during the Ming Dynasty. The wall stretches for thousands of miles across northern China and was primarily built for military defense."
89
+
90
+ Example JSON:
91
+ {
92
+ "claims": [
93
+ {
94
+ "claim": "The Great Wall of China was built by multiple Chinese dynasties",
95
+ "quote": "The Great Wall of China was built over many centuries by different Chinese dynasties."
96
+ },
97
+ {
98
+ "claim": "Construction of the Great Wall began over 2,000 years ago",
99
+ "quote": "Construction began more than 2,000 years ago during the Warring States period."
100
+ },
101
+ {
102
+ "claim": "Construction started during the Warring States period",
103
+ "quote": "Construction began more than 2,000 years ago during the Warring States period."
104
+ },
105
+ {
106
+ "claim": "The most well-known parts of the wall were constructed during the Ming Dynasty",
107
+ "quote": "The most famous sections were built during the Ming Dynasty."
108
+ },
109
+ {
110
+ "claim": "The Great Wall extends for thousands of miles across northern China",
111
+ "quote": "The wall stretches for thousands of miles across northern China and was primarily built for military defense."
112
+ },
113
+ {
114
+ "claim": "The Great Wall was mainly constructed for defense purposes",
115
+ "quote": "The wall stretches for thousands of miles across northern China and was primarily built for military defense."
116
+ }
117
+ ]
118
+ }
119
+ ---- END OF EXAMPLE 3 ----
120
+
121
+ ==== YOUR TURN ====
122
+ Text:
123
+ ${text}
124
+
125
+ JSON:`;
126
+ }
127
+ /**
128
+ * Generate a prompt to evaluate claims against the retrieval context
129
+ */
130
+ static generateVerdicts(claims, retrievalContext) {
131
+ const claimsStr = claims.map((claim, i) => `Claim ${i + 1}: ${claim}`).join('\n');
132
+ return `==== TASK INSTRUCTIONS ====
133
+ You will be provided with a set of claims and a retrieval context. Your task is to determine whether each claim is supported by the retrieval context.
134
+
135
+ For each claim, provide a verdict of "yes" if the claim is supported by the retrieval context, "no" if the claim is contradicted by the retrieval context, or "partially" if the claim is partially supported.
136
+
137
+ Also provide a reason for your verdict, citing evidence from the retrieval context.
138
+
139
+ ==== FORMATTING YOUR ANSWER ====
140
+ Please return your answer in JSON format, with the "verdicts" key as a list of JSON objects with keys "verdict" and "reason". The verdict should be one of "yes", "no", or "partially". The reason should explain your verdict.
141
+
142
+ ==== EXAMPLES ====
143
+
144
+ ---- START OF EXAMPLE 1 ----
145
+ Example Claims:
146
+ Claim 1: Einstein won the Nobel Prize in 1921.
147
+ Claim 2: Einstein won the Nobel Prize for his work on relativity.
148
+
149
+ Example Retrieval Context:
150
+ "Albert Einstein was awarded the Nobel Prize in Physics in 1921 for his explanation of the photoelectric effect, not for his theories of relativity which were still controversial at the time."
151
+
152
+ Example JSON:
153
+ {
154
+ "verdicts": [
155
+ {
156
+ "verdict": "yes",
157
+ "reason": "The retrieval context explicitly states that 'Albert Einstein was awarded the Nobel Prize in Physics in 1921'."
158
+ },
159
+ {
160
+ "verdict": "no",
161
+ "reason": "The retrieval context contradicts this claim, stating that Einstein won the Nobel Prize 'for his explanation of the photoelectric effect, not for his theories of relativity'."
162
+ }
163
+ ]
164
+ }
165
+ ---- END OF EXAMPLE 1 ----
166
+
167
+ ---- START OF EXAMPLE 2 ----
168
+ Example Claims:
169
+ Claim 1: The Great Wall of China is visible from space.
170
+ Claim 2: The Great Wall of China was built during the Ming Dynasty.
171
+
172
+ Example Retrieval Context:
173
+ "The Great Wall of China was built over many centuries by different Chinese dynasties, with the most famous sections built during the Ming Dynasty (1368-1644). Contrary to popular belief, the Great Wall is not visible from space with the naked eye, as confirmed by multiple astronauts."
174
+
175
+ Example JSON:
176
+ {
177
+ "verdicts": [
178
+ {
179
+ "verdict": "no",
180
+ "reason": "The retrieval context explicitly contradicts this claim, stating that 'the Great Wall is not visible from space with the naked eye, as confirmed by multiple astronauts'."
181
+ },
182
+ {
183
+ "verdict": "partially",
184
+ "reason": "The retrieval context states that 'the most famous sections [were] built during the Ming Dynasty', but also mentions that the wall 'was built over many centuries by different Chinese dynasties'. This suggests that while significant portions were built during the Ming Dynasty, the entire wall was not built during this period."
185
+ }
186
+ ]
187
+ }
188
+ ---- END OF EXAMPLE 2 ----
189
+
190
+ ==== YOUR TURN ====
191
+ Claims:
192
+ ${claimsStr}
193
+
194
+ Retrieval Context:
195
+ ${retrievalContext}
196
+
197
+ JSON:`;
198
+ }
199
+ /**
200
+ * Generate a prompt to explain the score based on verdicts
201
+ */
202
+ static generateReason(verdicts, score) {
203
+ const verdictsStr = verdicts.map((v, i) => {
204
+ return `Verdict ${i + 1}: ${v.verdict}\nReason: ${v.reason || 'No reason provided'}\n------`;
205
+ }).join('\n');
206
+ return `==== TASK INSTRUCTIONS ====
207
+ You will be provided with a faithfulness score and a list of verdicts for claims made in a model's output. Your task is to provide a CLEAR and CONCISE reason for the faithfulness score.
208
+
209
+ The faithfulness score represents how well the model's output is supported by the provided retrieval context. A score of 1.0 means all claims are fully supported, while a score of 0.0 means none of the claims are supported.
210
+
211
+ You should explain why the score is what it is, highlighting key supported and unsupported claims.
212
+
213
+ ==== FORMATTING YOUR ANSWER ====
214
+ IMPORTANT: Please make sure to only return in JSON format, with the 'reason' key providing the reason.
215
+ Example JSON:
216
+ {
217
+ "reason": "The faithfulness score is <score> because <your_reason>."
218
+ }
219
+
220
+ ==== YOUR TURN ====
221
+ ---- FAITHFULNESS SCORE ----
222
+ ${score}
223
+
224
+ ---- VERDICTS ----
225
+ ${verdictsStr}
226
+
227
+ ---- YOUR RESPONSE ----
228
+ JSON:`;
229
+ }
230
+ }
231
+ exports.FaithfulnessTemplate = FaithfulnessTemplate;
232
+ //# sourceMappingURL=prompts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompts.js","sourceRoot":"","sources":["../../../../../src/scorers/metrics/faithfulness/prompts.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,6BAAwB;AAExB,uCAAuC;AAC1B,QAAA,yBAAyB,GAAG,OAAC,CAAC,MAAM,CAAC;IAChD,OAAO,EAAE,OAAC,CAAC,MAAM,EAAE;IACnB,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAIU,QAAA,cAAc,GAAG,OAAC,CAAC,MAAM,CAAC;IACrC,QAAQ,EAAE,OAAC,CAAC,KAAK,CAAC,iCAAyB,CAAC;CAC7C,CAAC,CAAC;AAIU,QAAA,YAAY,GAAG,OAAC,CAAC,MAAM,CAAC;IACnC,MAAM,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,EAAE,CAAC;CAC5B,CAAC,CAAC;AAIU,QAAA,YAAY,GAAG,OAAC,CAAC,MAAM,CAAC;IACnC,MAAM,EAAE,OAAC,CAAC,KAAK,CAAC,OAAC,CAAC,MAAM,CAAC;QACvB,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE;QACjB,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE;KAClB,CAAC,CAAC;CACJ,CAAC,CAAC;AAIU,QAAA,YAAY,GAAG,OAAC,CAAC,MAAM,CAAC;IACnC,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE;CACnB,CAAC,CAAC;AAIH;;GAEG;AACH,MAAa,oBAAoB;IAC/B;;OAEG;IACH,MAAM,CAAC,UAAU,CAAC,IAAY,EAAE,YAAqB,KAAK;QACxD,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAuFT,IAAI;;MAEA,CAAC;IACL,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,gBAAgB,CAAC,MAAgB,EAAE,gBAAwB;QAChE,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAC,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEhF,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA4DT,SAAS;;;EAGT,gBAAgB;;MAEZ,CAAC;IACL,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,cAAc,CAAC,QAA+B,EAAE,KAAa;QAClE,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACxC,OAAO,WAAW,CAAC,GAAC,CAAC,KAAK,CAAC,CAAC,OAAO,aAAa,CAAC,CAAC,MAAM,IAAI,oBAAoB,UAAU,CAAC;QAC7F,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEd,OAAO;;;;;;;;;;;;;;;;EAgBT,KAAK;;;EAGL,WAAW;;;MAGP,CAAC;IACL,CAAC;CACF;AA3MD,oDA2MC"}
@@ -0,0 +1,390 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.HallucinationScorer = void 0;
13
+ const base_scorer_js_1 = require("../../base-scorer.js");
14
+ const constants_js_1 = require("../../../constants.js");
15
+ const logger_js_1 = require("../../../common/logger.js");
16
+ const prompts_js_1 = require("./prompts.js");
17
+ const index_js_1 = require("../../../judges/index.js");
18
+ // Required parameters for this scorer
19
+ const required_params = ['actualOutput', 'context'];
20
+ /**
21
+ * HallucinationScorer evaluates whether an LLM's output contains hallucinations
22
+ * by comparing it against provided context.
23
+ *
24
+ * The score is the fraction of context segments that contradict the output.
25
+ * Lower scores are better (0 = no hallucinations, 1 = all contexts contradict the output).
26
+ */
27
+ class HallucinationScorer extends base_scorer_js_1.JudgevalScorer {
28
+ /**
29
+ * Create a new HallucinationScorer
30
+ *
31
+ * @param threshold - Success threshold (default: 0.5)
32
+ * @param model - Model to use for evaluation (default: DefaultJudge)
33
+ * @param include_reason - Whether to include a reason for the score (default: true)
34
+ * @param async_mode - Whether to use async mode (default: false)
35
+ * @param strict_mode - Whether to use strict mode (default: false)
36
+ * @param verbose_mode - Whether to include verbose logs (default: false)
37
+ */
38
+ constructor(threshold = 0.5, model = undefined, include_reason = true, async_mode = false, strict_mode = false, verbose_mode = false) {
39
+ super(constants_js_1.APIScorer.HALLUCINATION, strict_mode ? 1 : threshold, undefined, include_reason, async_mode, strict_mode, verbose_mode);
40
+ this._verdicts = [];
41
+ const { judge, usingNativeModel } = (0, index_js_1.createJudge)(model);
42
+ this.model = judge;
43
+ this.using_native_model = usingNativeModel;
44
+ this.evaluation_model = this.model.getModelName();
45
+ }
46
+ /**
47
+ * Generate verdicts for each context
48
+ */
49
+ _aGenerateVerdicts(actualOutput, contexts) {
50
+ return __awaiter(this, void 0, void 0, function* () {
51
+ const prompt = prompts_js_1.HallucinationTemplate.generateVerdicts(actualOutput, contexts);
52
+ if (this.using_native_model) {
53
+ const res = yield this.model.aGenerate(prompt);
54
+ try {
55
+ const data = JSON.parse(res);
56
+ return data.verdicts.map((item) => ({
57
+ verdict: item.verdict,
58
+ reason: item.reason
59
+ }));
60
+ }
61
+ catch (error) {
62
+ throw new Error(`Failed to parse response: ${error}`);
63
+ }
64
+ }
65
+ else {
66
+ try {
67
+ const parseVerdictsResponse = (response) => {
68
+ const parsed = JSON.parse(response);
69
+ const result = prompts_js_1.VerdictsSchema.safeParse(parsed);
70
+ if (result.success) {
71
+ return result.data;
72
+ }
73
+ throw new Error(`Invalid response format: ${result.error}`);
74
+ };
75
+ const res = yield this.model.aGenerate(prompt);
76
+ return parseVerdictsResponse(res).verdicts;
77
+ }
78
+ catch (error) {
79
+ const res = yield this.model.aGenerate(prompt);
80
+ try {
81
+ const data = JSON.parse(res);
82
+ return data.verdicts.map((item) => ({
83
+ verdict: item.verdict,
84
+ reason: item.reason
85
+ }));
86
+ }
87
+ catch (parseError) {
88
+ throw new Error(`Failed to parse response: ${parseError}`);
89
+ }
90
+ }
91
+ }
92
+ });
93
+ }
94
+ /**
95
+ * Generate verdicts for each context (synchronous)
96
+ */
97
+ _generateVerdicts(actualOutput, contexts) {
98
+ const prompt = prompts_js_1.HallucinationTemplate.generateVerdicts(actualOutput, contexts);
99
+ if (this.using_native_model) {
100
+ const res = this.model.generate(prompt);
101
+ try {
102
+ const data = JSON.parse(res);
103
+ return data.verdicts.map((item) => ({
104
+ verdict: item.verdict,
105
+ reason: item.reason
106
+ }));
107
+ }
108
+ catch (error) {
109
+ throw new Error(`Failed to parse response: ${error}`);
110
+ }
111
+ }
112
+ else {
113
+ try {
114
+ const parseVerdictsResponse = (response) => {
115
+ const parsed = JSON.parse(response);
116
+ const result = prompts_js_1.VerdictsSchema.safeParse(parsed);
117
+ if (result.success) {
118
+ return result.data;
119
+ }
120
+ throw new Error(`Invalid response format: ${result.error}`);
121
+ };
122
+ const res = this.model.generate(prompt);
123
+ return parseVerdictsResponse(res).verdicts;
124
+ }
125
+ catch (error) {
126
+ const res = this.model.generate(prompt);
127
+ try {
128
+ const data = JSON.parse(res);
129
+ return data.verdicts.map((item) => ({
130
+ verdict: item.verdict,
131
+ reason: item.reason
132
+ }));
133
+ }
134
+ catch (parseError) {
135
+ throw new Error(`Failed to parse response: ${parseError}`);
136
+ }
137
+ }
138
+ }
139
+ }
140
+ /**
141
+ * Generate a reason for the score
142
+ */
143
+ _aGenerateReason(actualOutput, contexts) {
144
+ return __awaiter(this, void 0, void 0, function* () {
145
+ if (!this.include_reason) {
146
+ return "No reason provided (include_reason is false)";
147
+ }
148
+ const prompt = prompts_js_1.HallucinationTemplate.generateReason(actualOutput, contexts);
149
+ if (this.using_native_model) {
150
+ const res = yield this.model.aGenerate(prompt);
151
+ try {
152
+ const data = JSON.parse(res);
153
+ return data.reason || "No reason provided in response";
154
+ }
155
+ catch (error) {
156
+ throw new Error(`Failed to parse response: ${error}`);
157
+ }
158
+ }
159
+ else {
160
+ try {
161
+ const parseReasonResponse = (response) => {
162
+ const parsed = JSON.parse(response);
163
+ const result = prompts_js_1.ReasonSchema.safeParse(parsed);
164
+ if (result.success) {
165
+ return result.data;
166
+ }
167
+ throw new Error(`Invalid response format: ${result.error}`);
168
+ };
169
+ const res = yield this.model.aGenerate(prompt);
170
+ return parseReasonResponse(res).reason;
171
+ }
172
+ catch (error) {
173
+ const res = yield this.model.aGenerate(prompt);
174
+ try {
175
+ const data = JSON.parse(res);
176
+ return data.reason || "No reason provided in response";
177
+ }
178
+ catch (parseError) {
179
+ throw new Error(`Failed to parse response: ${parseError}`);
180
+ }
181
+ }
182
+ }
183
+ });
184
+ }
185
+ /**
186
+ * Generate a reason for the score (synchronous)
187
+ */
188
+ _generateReason(actualOutput, contexts) {
189
+ if (!this.include_reason) {
190
+ return "No reason provided (include_reason is false)";
191
+ }
192
+ const prompt = prompts_js_1.HallucinationTemplate.generateReason(actualOutput, contexts);
193
+ if (this.using_native_model) {
194
+ const res = this.model.generate(prompt);
195
+ try {
196
+ const data = JSON.parse(res);
197
+ return data.reason || "No reason provided in response";
198
+ }
199
+ catch (error) {
200
+ throw new Error(`Failed to parse response: ${error}`);
201
+ }
202
+ }
203
+ else {
204
+ try {
205
+ const parseReasonResponse = (response) => {
206
+ const parsed = JSON.parse(response);
207
+ const result = prompts_js_1.ReasonSchema.safeParse(parsed);
208
+ if (result.success) {
209
+ return result.data;
210
+ }
211
+ throw new Error(`Invalid response format: ${result.error}`);
212
+ };
213
+ const res = this.model.generate(prompt);
214
+ return parseReasonResponse(res).reason;
215
+ }
216
+ catch (error) {
217
+ const res = this.model.generate(prompt);
218
+ try {
219
+ const data = JSON.parse(res);
220
+ return data.reason || "No reason provided in response";
221
+ }
222
+ catch (parseError) {
223
+ throw new Error(`Failed to parse response: ${parseError}`);
224
+ }
225
+ }
226
+ }
227
+ }
228
+ /**
229
+ * Calculate the hallucination score
230
+ */
231
+ _computeScore() {
232
+ if (this._verdicts.length === 0) {
233
+ return 0;
234
+ }
235
+ let contradictions = 0;
236
+ for (const verdict of this._verdicts) {
237
+ if (verdict.verdict.trim().toLowerCase() === "no") {
238
+ contradictions += 1;
239
+ }
240
+ }
241
+ return contradictions / this._verdicts.length;
242
+ }
243
+ /**
244
+ * Create verbose logs for debugging
245
+ */
246
+ _createVerboseLogs() {
247
+ if (!this.verbose_mode) {
248
+ return null;
249
+ }
250
+ const steps = [
251
+ `Verdicts:\n${JSON.stringify(this._verdicts, null, 2)}`,
252
+ `Score: ${this.score}\nReason: ${this.reason || "No reason provided"}`
253
+ ];
254
+ return steps.join('\n\n');
255
+ }
256
+ /**
257
+ * Check if example has required parameters
258
+ */
259
+ _checkExampleParams(example) {
260
+ for (const param of required_params) {
261
+ if (param === 'actualOutput' && !example.actualOutput) {
262
+ throw new Error(`Example is missing required parameter: actualOutput`);
263
+ }
264
+ else if (param === 'context' && !example.context) {
265
+ throw new Error(`Example is missing required parameter: context`);
266
+ }
267
+ }
268
+ }
269
+ /**
270
+ * Score an example synchronously
271
+ */
272
+ syncScoreExample(example) {
273
+ (0, logger_js_1.info)("Starting example scoring (sync mode)");
274
+ try {
275
+ // Check required parameters
276
+ this._checkExampleParams(example);
277
+ // Process example
278
+ const contexts = Array.isArray(example.context) ? example.context : [example.context || ''];
279
+ this._verdicts = this._generateVerdicts(example.actualOutput, contexts);
280
+ // Calculate score
281
+ this.score = this._computeScore();
282
+ this.reason = this._generateReason(example.actualOutput, contexts) || '';
283
+ this.success = this._successCheck();
284
+ // Create verbose logs if enabled
285
+ const verbose_logs = this._createVerboseLogs();
286
+ (0, logger_js_1.info)(`Scoring completed with score: ${this.score}`);
287
+ // Return ScorerData object
288
+ return {
289
+ name: this.type,
290
+ threshold: this.threshold,
291
+ success: this.success,
292
+ score: this.score,
293
+ reason: this.reason,
294
+ strict_mode: this.strict_mode,
295
+ evaluation_model: this.evaluation_model || null,
296
+ error: null,
297
+ evaluation_cost: null,
298
+ verbose_logs: verbose_logs,
299
+ additional_metadata: {
300
+ verdicts: this._verdicts
301
+ }
302
+ };
303
+ }
304
+ catch (error) {
305
+ // Handle errors
306
+ const errorMessage = error instanceof Error ? error.message : String(error);
307
+ this.error = errorMessage;
308
+ return {
309
+ name: this.type,
310
+ threshold: this.threshold,
311
+ success: false,
312
+ score: 0,
313
+ reason: `Error during scoring: ${errorMessage}`,
314
+ strict_mode: this.strict_mode,
315
+ evaluation_model: this.evaluation_model || null,
316
+ error: errorMessage,
317
+ evaluation_cost: null,
318
+ verbose_logs: null,
319
+ additional_metadata: {}
320
+ };
321
+ }
322
+ }
323
+ /**
324
+ * Score an example asynchronously
325
+ */
326
+ scoreExample(example) {
327
+ return __awaiter(this, void 0, void 0, function* () {
328
+ if (!this.async_mode) {
329
+ return this.syncScoreExample(example);
330
+ }
331
+ (0, logger_js_1.info)("Starting example scoring (async mode)");
332
+ try {
333
+ // Check required parameters
334
+ this._checkExampleParams(example);
335
+ // Process example
336
+ const contexts = Array.isArray(example.context) ? example.context : [example.context || ''];
337
+ this._verdicts = yield this._aGenerateVerdicts(example.actualOutput, contexts);
338
+ // Calculate score
339
+ this.score = this._computeScore();
340
+ this.reason = (yield this._aGenerateReason(example.actualOutput, contexts)) || '';
341
+ this.success = this._successCheck();
342
+ // Create verbose logs if enabled
343
+ const verbose_logs = this._createVerboseLogs();
344
+ (0, logger_js_1.info)(`Scoring completed with score: ${this.score}`);
345
+ // Return ScorerData object
346
+ return {
347
+ name: this.type,
348
+ threshold: this.threshold,
349
+ success: this.success,
350
+ score: this.score,
351
+ reason: this.reason,
352
+ strict_mode: this.strict_mode,
353
+ evaluation_model: this.evaluation_model || null,
354
+ error: null,
355
+ evaluation_cost: null,
356
+ verbose_logs: verbose_logs,
357
+ additional_metadata: {
358
+ verdicts: this._verdicts
359
+ }
360
+ };
361
+ }
362
+ catch (error) {
363
+ // Handle errors
364
+ const errorMessage = error instanceof Error ? error.message : String(error);
365
+ this.error = errorMessage;
366
+ return {
367
+ name: this.type,
368
+ threshold: this.threshold,
369
+ success: false,
370
+ score: 0,
371
+ reason: `Error during scoring: ${errorMessage}`,
372
+ strict_mode: this.strict_mode,
373
+ evaluation_model: this.evaluation_model || null,
374
+ error: errorMessage,
375
+ evaluation_cost: null,
376
+ verbose_logs: null,
377
+ additional_metadata: {}
378
+ };
379
+ }
380
+ });
381
+ }
382
+ /**
383
+ * Get the name of the scorer
384
+ */
385
+ get name() {
386
+ return "Hallucination";
387
+ }
388
+ }
389
+ exports.HallucinationScorer = HallucinationScorer;
390
+ //# sourceMappingURL=hallucination.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hallucination.js","sourceRoot":"","sources":["../../../../../src/scorers/metrics/hallucination/hallucination.ts"],"names":[],"mappings":";;;;;;;;;;;;AAEA,yDAAsD;AACtD,wDAAkD;AAClD,yDAAmE;AACnE,6CAKsB;AACtB,uDAA8D;AAE9D,sCAAsC;AACtC,MAAM,eAAe,GAAG,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;AAEpD;;;;;;GAMG;AACH,MAAa,mBAAoB,SAAQ,+BAAc;IAKrD;;;;;;;;;OASG;IACH,YACE,YAAoB,GAAG,EACvB,QAAoC,SAAS,EAC7C,iBAA0B,IAAI,EAC9B,aAAsB,KAAK,EAC3B,cAAuB,KAAK,EAC5B,eAAwB,KAAK;QAE7B,KAAK,CACH,wBAAS,CAAC,aAAa,EACvB,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,EAC3B,SAAS,EACT,cAAc,EACd,UAAU,EACV,WAAW,EACX,YAAY,CACb,CAAC;QA5BI,cAAS,GAA2B,EAAE,CAAC;QA8B7C,MAAM,EAAE,KAAK,EAAE,gBAAgB,EAAE,GAAG,IAAA,sBAAW,EAAC,KAAK,CAAC,CAAC;QACvD,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,kBAAkB,GAAG,gBAAgB,CAAC;QAC3C,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;IACpD,CAAC;IAED;;OAEG;IACW,kBAAkB,CAAC,YAAoB,EAAE,QAAkB;;YACvE,MAAM,MAAM,GAAG,kCAAqB,CAAC,gBAAgB,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;YAE9E,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBAC5B,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBAC/C,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBAC7B,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,CAAC;wBACvC,OAAO,EAAE,IAAI,CAAC,OAAO;wBACrB,MAAM,EAAE,IAAI,CAAC,MAAM;qBACpB,CAAC,CAAC,CAAC;gBACN,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;gBACxD,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC;oBACH,MAAM,qBAAqB,GAAG,CAAC,QAAgB,EAAwC,EAAE;wBACvF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;wBACpC,MAAM,MAAM,GAAG,2BAAc,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;wBAChD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;4BACnB,OAAO,MAAM,CAAC,IAAI,CAAC;wBACrB,CAAC;wBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;oBAC9D,CAAC,CAAC;oBAEF,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBAC/C,OAAO,qBAAqB,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;gBAC7C,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBAC/C,IAAI,CAAC;wBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;wBAC7B,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,CAAC;4BACvC,OAAO,EAAE,IAAI,CAAC,OAAO;4BACrB,MAAM,EAAE,IAAI,CAAC,MAAM;yBACpB,CAAC,CAAC,CAAC;oBACN,CAAC;oBAAC,OAAO,UAAU,EAAE,CAAC;wBACpB,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;oBAC7D,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;KAAA;IAED;;OAEG;IACK,iBAAiB,CAAC,YAAoB,EAAE,QAAkB;QAChE,MAAM,MAAM,GAAG,kCAAqB,CAAC,gBAAgB,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QAE9E,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC7B,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,CAAC;oBACvC,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,MAAM,EAAE,IAAI,CAAC,MAAM;iBACpB,CAAC,CAAC,CAAC;YACN,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC;gBACH,MAAM,qBAAqB,GAAG,CAAC,QAAgB,EAAwC,EAAE;oBACvF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBACpC,MAAM,MAAM,GAAG,2BAAc,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBAChD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;wBACnB,OAAO,MAAM,CAAC,IAAI,CAAC;oBACrB,CAAC;oBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,CAAC,CAAC;gBAEF,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACxC,OAAO,qBAAqB,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YAC7C,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACxC,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBAC7B,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,CAAC;wBACvC,OAAO,EAAE,IAAI,CAAC,OAAO;wBACrB,MAAM,EAAE,IAAI,CAAC,MAAM;qBACpB,CAAC,CAAC,CAAC;gBACN,CAAC;gBAAC,OAAO,UAAU,EAAE,CAAC;oBACpB,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACW,gBAAgB,CAAC,YAAoB,EAAE,QAAkB;;YACrE,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;gBACzB,OAAO,8CAA8C,CAAC;YACxD,CAAC;YAED,MAAM,MAAM,GAAG,kCAAqB,CAAC,cAAc,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;YAE5E,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBAC5B,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBAC/C,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBAC7B,OAAO,IAAI,CAAC,MAAM,IAAI,gCAAgC,CAAC;gBACzD,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;gBACxD,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC;oBACH,MAAM,mBAAmB,GAAG,CAAC,QAAgB,EAAsB,EAAE;wBACnE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;wBACpC,MAAM,MAAM,GAAG,yBAAY,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;wBAC9C,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;4BACnB,OAAO,MAAM,CAAC,IAAI,CAAC;wBACrB,CAAC;wBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;oBAC9D,CAAC,CAAC;oBAEF,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBAC/C,OAAO,mBAAmB,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;gBACzC,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBAC/C,IAAI,CAAC;wBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;wBAC7B,OAAO,IAAI,CAAC,MAAM,IAAI,gCAAgC,CAAC;oBACzD,CAAC;oBAAC,OAAO,UAAU,EAAE,CAAC;wBACpB,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;oBAC7D,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;KAAA;IAED;;OAEG;IACK,eAAe,CAAC,YAAoB,EAAE,QAAkB;QAC9D,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACzB,OAAO,8CAA8C,CAAC;QACxD,CAAC;QAED,MAAM,MAAM,GAAG,kCAAqB,CAAC,cAAc,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QAE5E,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC7B,OAAO,IAAI,CAAC,MAAM,IAAI,gCAAgC,CAAC;YACzD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,CAAC;gBACH,MAAM,mBAAmB,GAAG,CAAC,QAAgB,EAAsB,EAAE;oBACnE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBACpC,MAAM,MAAM,GAAG,yBAAY,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBAC9C,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;wBACnB,OAAO,MAAM,CAAC,IAAI,CAAC;oBACrB,CAAC;oBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,CAAC,CAAC;gBAEF,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACxC,OAAO,mBAAmB,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;YACzC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACxC,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;oBAC7B,OAAO,IAAI,CAAC,MAAM,IAAI,gCAAgC,CAAC;gBACzD,CAAC;gBAAC,OAAO,UAAU,EAAE,CAAC;oBACpB,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,CAAC;QACX,CAAC;QAED,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACrC,IAAI,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,KAAK,IAAI,EAAE,CAAC;gBAClD,cAAc,IAAI,CAAC,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,kBAAkB;QACxB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG;YACZ,cAAc,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE;YACvD,UAAU,IAAI,CAAC,KAAK,aAAa,IAAI,CAAC,MAAM,IAAI,oBAAoB,EAAE;SACvE,CAAC;QAEF,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,OAAgB;QAC1C,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;YACpC,IAAI,KAAK,KAAK,cAAc,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;gBACtD,MAAM,IAAI,KAAK,CAAC,qDAAqD,CAAC,CAAC;YACzE,CAAC;iBAAM,IAAI,KAAK,KAAK,SAAS,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBACnD,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,OAAgB;QAC/B,IAAA,gBAAI,EAAC,sCAAsC,CAAC,CAAC;QAE7C,IAAI,CAAC;YACH,4BAA4B;YAC5B,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;YAElC,kBAAkB;YAClB,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;YAC5F,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,YAAsB,EAAE,QAAQ,CAAC,CAAC;YAElF,kBAAkB;YAClB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;YAClC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,YAAsB,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnF,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;YAEpC,iCAAiC;YACjC,MAAM,YAAY,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAE/C,IAAA,gBAAI,EAAC,iCAAiC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YAEpD,2BAA2B;YAC3B,OAAO;gBACL,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB,IAAI,IAAI;gBAC/C,KAAK,EAAE,IAAI;gBACX,eAAe,EAAE,IAAI;gBACrB,YAAY,EAAE,YAAY;gBAC1B,mBAAmB,EAAE;oBACnB,QAAQ,EAAE,IAAI,CAAC,SAAS;iBACzB;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,gBAAgB;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,IAAI,CAAC,KAAK,GAAG,YAAY,CAAC;YAE1B,OAAO;gBACL,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;gBAC/C,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB,IAAI,IAAI;gBAC/C,KAAK,EAAE,YAAY;gBACnB,eAAe,EAAE,IAAI;gBACrB,YAAY,EAAE,IAAI;gBAClB,mBAAmB,EAAE,EAAE;aACxB,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACG,YAAY,CAAC,OAAgB;;YACjC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;gBACrB,OAAO,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YACxC,CAAC;YAED,IAAA,gBAAI,EAAC,uCAAuC,CAAC,CAAC;YAE9C,IAAI,CAAC;gBACH,4BAA4B;gBAC5B,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC;gBAElC,kBAAkB;gBAClB,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;gBAC5F,IAAI,CAAC,SAAS,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,YAAsB,EAAE,QAAQ,CAAC,CAAC;gBAEzF,kBAAkB;gBAClB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;gBAClC,IAAI,CAAC,MAAM,GAAG,CAAA,MAAM,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,YAAsB,EAAE,QAAQ,CAAC,KAAI,EAAE,CAAC;gBAC1F,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;gBAEpC,iCAAiC;gBACjC,MAAM,YAAY,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBAE/C,IAAA,gBAAI,EAAC,iCAAiC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;gBAEpD,2BAA2B;gBAC3B,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB,IAAI,IAAI;oBAC/C,KAAK,EAAE,IAAI;oBACX,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,YAAY;oBAC1B,mBAAmB,EAAE;wBACnB,QAAQ,EAAE,IAAI,CAAC,SAAS;qBACzB;iBACF,CAAC;YACJ,CAAC;YAAC,OAAO,KAAU,EAAE,CAAC;gBACpB,gBAAgB;gBAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC5E,IAAI,CAAC,KAAK,GAAG,YAAY,CAAC;gBAE1B,OAAO;oBACL,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAC;oBACR,MAAM,EAAE,yBAAyB,YAAY,EAAE;oBAC/C,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB,IAAI,IAAI;oBAC/C,KAAK,EAAE,YAAY;oBACnB,eAAe,EAAE,IAAI;oBACrB,YAAY,EAAE,IAAI;oBAClB,mBAAmB,EAAE,EAAE;iBACxB,CAAC;YACJ,CAAC;QACH,CAAC;KAAA;IAED;;OAEG;IACH,IAAI,IAAI;QACN,OAAO,eAAe,CAAC;IACzB,CAAC;CACF;AA1YD,kDA0YC"}
@@ -0,0 +1,11 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ReasonSchema = exports.VerdictsSchema = exports.HallucinationVerdictSchema = exports.HallucinationTemplate = exports.HallucinationScorer = void 0;
4
+ var hallucination_js_1 = require("./hallucination.js");
5
+ Object.defineProperty(exports, "HallucinationScorer", { enumerable: true, get: function () { return hallucination_js_1.HallucinationScorer; } });
6
+ var prompts_js_1 = require("./prompts.js");
7
+ Object.defineProperty(exports, "HallucinationTemplate", { enumerable: true, get: function () { return prompts_js_1.HallucinationTemplate; } });
8
+ Object.defineProperty(exports, "HallucinationVerdictSchema", { enumerable: true, get: function () { return prompts_js_1.HallucinationVerdictSchema; } });
9
+ Object.defineProperty(exports, "VerdictsSchema", { enumerable: true, get: function () { return prompts_js_1.VerdictsSchema; } });
10
+ Object.defineProperty(exports, "ReasonSchema", { enumerable: true, get: function () { return prompts_js_1.ReasonSchema; } });
11
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/scorers/metrics/hallucination/index.ts"],"names":[],"mappings":";;;AAAA,uDAAyD;AAAhD,uHAAA,mBAAmB,OAAA;AAC5B,2CAA+G;AAAtG,mHAAA,qBAAqB,OAAA;AAAE,wHAAA,0BAA0B,OAAA;AAAE,4GAAA,cAAc,OAAA;AAAE,0GAAA,YAAY,OAAA"}