judgeval 0.0.31__py3-none-any.whl → 0.0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. judgeval/__init__.py +3 -1
  2. judgeval/common/s3_storage.py +93 -0
  3. judgeval/common/tracer.py +869 -183
  4. judgeval/constants.py +1 -1
  5. judgeval/data/datasets/dataset.py +5 -1
  6. judgeval/data/datasets/eval_dataset_client.py +2 -2
  7. judgeval/data/sequence.py +16 -26
  8. judgeval/data/sequence_run.py +2 -0
  9. judgeval/judgment_client.py +44 -166
  10. judgeval/rules.py +4 -7
  11. judgeval/run_evaluation.py +2 -2
  12. judgeval/scorers/__init__.py +4 -4
  13. judgeval/scorers/judgeval_scorers/__init__.py +0 -176
  14. judgeval/version_check.py +22 -0
  15. {judgeval-0.0.31.dist-info → judgeval-0.0.34.dist-info}/METADATA +15 -2
  16. judgeval-0.0.34.dist-info/RECORD +63 -0
  17. judgeval/scorers/base_scorer.py +0 -58
  18. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -27
  19. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  20. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -276
  21. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  22. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  23. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  24. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  25. judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
  26. judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -161
  27. judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -222
  28. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  29. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  30. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  31. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  32. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  33. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  34. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  35. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  36. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  37. judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -3
  38. judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -156
  39. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  40. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -318
  41. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  42. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  43. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -264
  44. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  45. judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -232
  46. judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -102
  47. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  48. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  49. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  50. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  51. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -551
  52. judgeval-0.0.31.dist-info/RECORD +0 -96
  53. {judgeval-0.0.31.dist-info → judgeval-0.0.34.dist-info}/WHEEL +0 -0
  54. {judgeval-0.0.31.dist-info → judgeval-0.0.34.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,551 +0,0 @@
1
- from typing import List, Optional, Union
2
- import asyncio
3
-
4
- from judgeval.constants import APIScorer
5
- from judgeval.scorers.utils import (
6
- get_or_create_event_loop,
7
- scorer_progress_meter,
8
- create_verbose_logs,
9
- parse_response_json,
10
- check_example_params
11
- )
12
- from judgeval.scorers import JudgevalScorer
13
- from judgeval.judges import JudgevalJudge
14
- from judgeval.judges.utils import create_judge
15
- from judgeval.data import Example, ExampleParams
16
- from judgeval.scorers.judgeval_scorers.local_implementations.faithfulness.prompts import (
17
- FaithfulnessTemplate,
18
- Claims
19
- )
20
- from judgeval.scorers.judgeval_scorers.local_implementations.summarization.prompts import *
21
-
22
-
23
- required_params = [
24
- ExampleParams.INPUT,
25
- ExampleParams.ACTUAL_OUTPUT,
26
- ]
27
-
28
-
29
- class SummarizationScorer(JudgevalScorer):
30
- def __init__(
31
- self,
32
- threshold: float = 0.5,
33
- n: int = 5,
34
- model: Optional[Union[str, JudgevalJudge]] = None,
35
- assessment_questions: Optional[List[str]] = None,
36
- include_reason: bool = True,
37
- async_mode=True,
38
- strict_mode: bool = False,
39
- verbose_mode: bool = False,
40
- ):
41
- super().__init__(
42
- score_type=APIScorer.SUMMARIZATION,
43
- threshold=1 if strict_mode else threshold,
44
- evaluation_model=None,
45
- include_reason=include_reason,
46
- async_mode=async_mode,
47
- strict_mode=strict_mode,
48
- verbose_mode=verbose_mode
49
- )
50
- self.model, self.using_native_model = create_judge(model)
51
- self.evaluation_model = self.model.get_model_name()
52
-
53
- if assessment_questions is not None and len(assessment_questions) == 0:
54
- self.assessment_questions = None
55
- else:
56
- self.assessment_questions = assessment_questions
57
-
58
- self.include_reason = include_reason
59
- self.n = n
60
- self.async_mode = async_mode
61
- self.strict_mode = strict_mode
62
- self.verbose_mode = verbose_mode
63
-
64
- def score_example(
65
- self,
66
- example: Example,
67
- _show_indicator: bool = True,
68
- ) -> float:
69
- check_example_params(example, required_params, self)
70
- try:
71
- with scorer_progress_meter(self, display_meter=_show_indicator):
72
- if self.async_mode:
73
- loop = get_or_create_event_loop()
74
- loop.run_until_complete(
75
- self.a_score_example(example, _show_indicator=False)
76
- )
77
- else:
78
- self.claims: List[str] = self._generate_claims(
79
- example.actual_output
80
- )
81
-
82
- self.info_coverage_verdicts: List[InfoCoverageVerdict] = (
83
- self._generate_info_coverage_verdicts(example)
84
- )
85
-
86
- self.contradiction_verdicts: List[ContradictionVerdict] = (
87
- self._generate_contradiction_verdicts(example)
88
- )
89
-
90
- contradiction_score = self._calculate_score(ScoreType.CONTRADICTION)
91
- info_coverage_score = self._calculate_score(ScoreType.INFO_COVERAGE)
92
- self.score_breakdown = {
93
- ScoreType.CONTRADICTION.value: contradiction_score,
94
- ScoreType.INFO_COVERAGE.value: info_coverage_score,
95
- }
96
- self.score = min(contradiction_score, info_coverage_score)
97
- self.reason = self._generate_reason()
98
- self.success = self.score >= self.threshold
99
- self.verbose_logs = create_verbose_logs(
100
- self,
101
- steps=[
102
- f"Claims:\n{self.claims}",
103
- f"Assessment Questions:\n{self.assessment_questions}",
104
- f"Info Coverage Verdicts:\n{[v.model_dump() for v in self.info_coverage_verdicts]}",
105
- f"Contradiction Verdicts:\n{[v.model_dump() for v in self.contradiction_verdicts]}",
106
- f"Score: {self.score}\nReason: {self.reason}",
107
- ],
108
- )
109
-
110
- return self.score
111
- except Exception as e:
112
- print(f"Error in SummarizationScorer score_example: {e}")
113
- raise
114
-
115
- async def a_score_example(
116
- self,
117
- example: Example,
118
- _show_indicator: bool = True,
119
- ) -> float:
120
- """
121
- To score, we take the following steps:
122
- 1. Generate claims from the actual output
123
- - Extract key factual claims from the summary text
124
-
125
- 2. Generate info coverage verdicts:
126
- a. Generate assessment questions if not provided
127
- b. Generate answers to the assessment questions for both summary and original text
128
- c. Compare answers to determine if summary adequately covers key information
129
- d. Calculate info coverage score based on matching answers
130
-
131
- 3. Generate contradiction verdicts:
132
- a. Generate claims from the actual output
133
- b. Verify each claim against the original text for factual accuracy
134
- c. Calculate contradiction score based on verified claims
135
-
136
- 4. Calculate final score:
137
- - Take minimum of info coverage and contradiction scores
138
- - Generate reason explaining the scoring
139
- - Check if score meets threshold for success
140
- """
141
- check_example_params(example, required_params, self)
142
- try:
143
- with scorer_progress_meter(
144
- self,
145
- async_mode=True,
146
- display_meter=_show_indicator,
147
- ):
148
- self.claims = await self._a_generate_claims(example.actual_output),
149
-
150
- self.info_coverage_verdicts, self.contradiction_verdicts = await asyncio.gather(
151
- self._a_generate_info_coverage_verdicts(example),
152
- self._a_generate_contradiction_verdicts(example),
153
- )
154
-
155
- contradiction_score = self._calculate_score(ScoreType.CONTRADICTION)
156
- info_coverage_score = self._calculate_score(ScoreType.INFO_COVERAGE)
157
- self.score_breakdown = {
158
- ScoreType.CONTRADICTION.value: contradiction_score,
159
- ScoreType.INFO_COVERAGE.value: info_coverage_score,
160
- }
161
- self.score = min(contradiction_score, info_coverage_score)
162
- self.reason = await self._a_generate_reason()
163
- self.success = self.score >= self.threshold
164
- self.verbose_logs = create_verbose_logs(
165
- self,
166
- steps=[
167
- f"Claims:\n{self.claims}",
168
- f"Assessment Questions:\n{self.assessment_questions}",
169
- f"Info Coverage Verdicts:\n{[v.model_dump() for v in self.info_coverage_verdicts]}",
170
- f"Contradiction Verdicts:\n{[v.model_dump() for v in self.contradiction_verdicts]}",
171
- f"Score: {self.score}\nReason: {self.reason}",
172
- ],
173
- )
174
-
175
- return self.score
176
- except Exception as e:
177
- print(f"Error in SummarizationScorer a_score_example: {e}")
178
- raise
179
-
180
- async def _a_generate_reason(self) -> str:
181
- if self.include_reason is False:
182
- return None
183
-
184
- contradictions = []
185
- redundancies = []
186
- for verdict in self.contradiction_verdicts:
187
- if verdict.verdict.strip().lower() == "no":
188
- contradictions.append(verdict.reason)
189
- elif verdict.verdict.strip().lower() == "idk":
190
- redundancies.append(verdict.reason)
191
-
192
- questions = []
193
- if self.info_coverage_verdicts:
194
- for verdict in self.info_coverage_verdicts:
195
- if (
196
- verdict.original_verdict.strip().lower() == "yes"
197
- and verdict.summary_verdict.strip().lower() == "no"
198
- ):
199
- questions.append(verdict.question)
200
-
201
- prompt: dict = SummarizationTemplate.generate_reason(
202
- contradictions=contradictions,
203
- redundancies=redundancies,
204
- questions=questions,
205
- score=format(self.score, ".2f"),
206
- )
207
-
208
- if len(questions) > 0:
209
- prompt += f"""Questions the original text can answer but not the summary:
210
- {questions}
211
-
212
- """
213
- prompt += """JSON:
214
- """
215
-
216
- if self.using_native_model:
217
- res = await self.model.a_generate(prompt)
218
- data = parse_response_json(res, self)
219
- return data["reason"]
220
- else:
221
- try:
222
- res: Reason = await self.model.a_generate(prompt, schema=Reason)
223
- return res.reason
224
- except TypeError:
225
- res = await self.model.a_generate(prompt)
226
- data = parse_response_json(res, self)
227
- return data["reason"]
228
-
229
- def _generate_reason(self) -> str:
230
- if self.include_reason is False:
231
- return None
232
-
233
- contradictions = []
234
- redundancies = []
235
- for verdict in self.contradiction_verdicts:
236
- if verdict.verdict.strip().lower() == "no":
237
- contradictions.append(verdict.reason)
238
- elif verdict.verdict.strip().lower() == "idk":
239
- redundancies.append(verdict.reason)
240
-
241
- questions = []
242
- if self.info_coverage_verdicts:
243
- for verdict in self.info_coverage_verdicts:
244
- if (
245
- verdict.original_verdict.strip().lower() == "yes"
246
- and verdict.summary_verdict.strip().lower() == "no"
247
- ):
248
- questions.append(verdict.question)
249
-
250
- prompt: dict = SummarizationTemplate.generate_reason(
251
- contradictions=contradictions,
252
- redundancies=redundancies,
253
- questions=questions,
254
- score=format(self.score, ".2f"),
255
- )
256
-
257
- if len(questions) > 0:
258
- prompt += f"""Questions the original text can answer but not the summary:
259
- {questions}
260
-
261
- """
262
- prompt += """JSON:
263
- """
264
-
265
- if self.using_native_model:
266
- res = self.model.generate(prompt)
267
- data = parse_response_json(res, self)
268
- return data["reason"]
269
- else:
270
- try:
271
- res: Reason = self.model.generate(prompt, schema=Reason)
272
- return res.reason
273
- except TypeError:
274
- res = self.model.generate(prompt)
275
- data = parse_response_json(res, self)
276
- return data["reason"]
277
-
278
- def _calculate_score(self, score_type: ScoreType) -> float:
279
- if score_type == ScoreType.CONTRADICTION:
280
- total = len(self.contradiction_verdicts)
281
- if total == 0:
282
- return 0
283
- faithfulness_count = 0
284
- for verdict in self.contradiction_verdicts:
285
- # Different from the faithfulness score, this
286
- # penalizes 'idk' (full of fluff) summaries
287
- if verdict.verdict.strip().lower() == "yes":
288
- faithfulness_count += 1
289
-
290
- score = faithfulness_count / total
291
-
292
- else:
293
- if self.assessment_questions is None:
294
- return 1
295
- total = 0
296
- coverage_count = 0
297
- for verdict in self.info_coverage_verdicts:
298
- if verdict.original_verdict.strip().lower() == "yes":
299
- total += 1
300
- if verdict.summary_verdict.strip().lower() == "yes":
301
- coverage_count += 1
302
-
303
- if total == 0:
304
- return 0
305
-
306
- score = coverage_count / total
307
-
308
- return 0 if self.strict_mode and score < self.threshold else score
309
-
310
- async def _a_generate_answers(self, text: str) -> List[str]:
311
- prompt = SummarizationTemplate.generate_answers(
312
- questions=self.assessment_questions, text=text
313
- )
314
- if self.using_native_model:
315
- res = await self.model.a_generate(prompt)
316
- data = parse_response_json(res, self)
317
- return data["answers"]
318
- else:
319
- try:
320
- res: Answers = await self.model.a_generate(
321
- prompt, schema=Answers
322
- )
323
- return res.answers
324
- except TypeError:
325
- res = await self.model.a_generate(prompt)
326
- data = parse_response_json(res, self)
327
- return data["answers"]
328
-
329
- def _generate_answers(self, text: str) -> List[str]:
330
- prompt = SummarizationTemplate.generate_answers(
331
- questions=self.assessment_questions, text=text
332
- )
333
- if self.using_native_model:
334
- res = self.model.generate(prompt)
335
- data = parse_response_json(res, self)
336
- return data["answers"]
337
- else:
338
- try:
339
- res: Answers = self.model.generate(prompt, schema=Answers)
340
- return res.answers
341
- except TypeError:
342
- res = self.model.generate(prompt)
343
- data = parse_response_json(res, self)
344
- return data["answers"]
345
-
346
- async def _a_generate_assessment_questions(self, text: str):
347
- prompt = SummarizationTemplate.generate_questions(text=text, n=self.n)
348
- if self.using_native_model:
349
- res = await self.model.a_generate(prompt)
350
- data = parse_response_json(res, self)
351
- return data["questions"]
352
- else:
353
- try:
354
- res: Questions = await self.model.a_generate(
355
- prompt, schema=Questions
356
- )
357
- return res.questions
358
- except TypeError:
359
- res = await self.model.a_generate(prompt)
360
- data = parse_response_json(res, self)
361
- return data["questions"]
362
-
363
- def _generate_assessment_questions(self, text: str):
364
- prompt = SummarizationTemplate.generate_questions(text=text, n=self.n)
365
- if self.using_native_model:
366
- res = self.model.generate(prompt)
367
- data = parse_response_json(res, self)
368
- return data["questions"]
369
- else:
370
- try:
371
- res: Questions = self.model.generate(prompt, schema=Questions)
372
- return res.questions
373
- except TypeError:
374
- res = self.model.generate(prompt)
375
- data = parse_response_json(res, self)
376
- return data["questions"]
377
-
378
- async def _a_generate_info_coverage_verdicts(
379
- self, example: Example
380
- ) -> List[InfoCoverageVerdict]:
381
- if self.assessment_questions is None:
382
- self.assessment_questions = (
383
- await self._a_generate_assessment_questions(example.input)
384
- )
385
-
386
- tasks = [
387
- self._a_generate_answers(example.input),
388
- self._a_generate_answers(example.actual_output),
389
- ]
390
- results = await asyncio.gather(*tasks)
391
- original_answers = results[0]
392
- summary_answers = results[1]
393
-
394
- if len(original_answers) != len(summary_answers):
395
- raise ValueError("Number of verdicts generated does not equal.")
396
-
397
- coverage_veridcts: List[InfoCoverageVerdict] = []
398
- for i in range(len(original_answers)):
399
- coverage_veridcts.append(
400
- InfoCoverageVerdict(
401
- summary_verdict=summary_answers[i],
402
- original_verdict=original_answers[i],
403
- question=self.assessment_questions[i],
404
- )
405
- )
406
- return coverage_veridcts
407
-
408
- def _generate_info_coverage_verdicts(
409
- self, example: Example
410
- ) -> List[InfoCoverageVerdict]:
411
- if self.assessment_questions is None:
412
- self.assessment_questions = self._generate_assessment_questions(
413
- example.input
414
- )
415
-
416
- original_answers = self._generate_answers(example.input)
417
- summary_answers = self._generate_answers(example.actual_output)
418
-
419
- if len(original_answers) != len(summary_answers):
420
- raise ValueError("Number of verdicts generated does not equal.")
421
-
422
- coverage_veridcts: List[InfoCoverageVerdict] = []
423
- for i in range(len(original_answers)):
424
- coverage_veridcts.append(
425
- InfoCoverageVerdict(
426
- summary_verdict=summary_answers[i],
427
- original_verdict=original_answers[i],
428
- question=self.assessment_questions[i],
429
- )
430
- )
431
-
432
- return coverage_veridcts
433
-
434
- async def _a_generate_contradiction_verdicts(
435
- self,
436
- example: Example,
437
- ) -> List[ContradictionVerdict]:
438
- if len(self.claims) == 0:
439
- return []
440
-
441
- verdicts: List[ContradictionVerdict] = []
442
-
443
- prompt = SummarizationTemplate.generate_contradiction_verdicts(
444
- original_text=example.input,
445
- summary_claims=self.claims
446
- )
447
- if self.using_native_model:
448
- res = await self.model.a_generate(prompt)
449
- data = parse_response_json(res, self)
450
- verdicts = [
451
- ContradictionVerdict(**item)
452
- for item in data["verdicts"]
453
- ]
454
- return verdicts
455
- else:
456
- try:
457
- res: Verdicts = await self.model.a_generate(
458
- prompt, schema=Verdicts
459
- )
460
- verdicts = [item for item in res.verdicts]
461
- return verdicts
462
- except TypeError:
463
- res = await self.model.a_generate(prompt)
464
- data = parse_response_json(res, self)
465
- verdicts = [
466
- ContradictionVerdict(**item)
467
- for item in data["verdicts"]
468
- ]
469
- return verdicts
470
-
471
- def _generate_contradiction_verdicts(
472
- self,
473
- example: Example,
474
- ) -> List[ContradictionVerdict]:
475
- if len(self.claims) == 0:
476
- return []
477
-
478
- verdicts: List[ContradictionVerdict] = []
479
-
480
- prompt = SummarizationTemplate.generate_contradiction_verdicts(
481
- original_text=example.input,
482
- summary_claims=self.claims
483
- )
484
- if self.using_native_model:
485
- res = self.model.generate(prompt)
486
- data = parse_response_json(res, self)
487
- verdicts = [
488
- ContradictionVerdict(**item)
489
- for item in data["verdicts"]
490
- ]
491
- return verdicts
492
- else:
493
- try:
494
- res: Verdicts = self.model.generate(prompt, schema=Verdicts)
495
- verdicts = [item for item in res.verdicts]
496
- return verdicts
497
- except TypeError:
498
- res = self.model.generate(prompt)
499
- data = parse_response_json(res, self)
500
- verdicts = [
501
- ContradictionVerdict(**item)
502
- for item in data["verdicts"]
503
- ]
504
- return verdicts
505
-
506
- async def _a_generate_claims(self, text: str) -> List[str]:
507
- # Borrow faithfulness template since it already works
508
- prompt = FaithfulnessTemplate.find_claims(text=text)
509
- if self.using_native_model:
510
- res = await self.model.a_generate(prompt)
511
- data = parse_response_json(res, self)
512
- return data["claims"]
513
- else:
514
- try:
515
- res: Claims = await self.model.a_generate(prompt, schema=Claims)
516
- return res.claims
517
- except TypeError:
518
- res = await self.model.a_generate(prompt)
519
- data = parse_response_json(res, self)
520
- return data["claims"]
521
-
522
- def _generate_claims(self, text: str) -> List[str]:
523
- # Borrow faithfulness template
524
- prompt = FaithfulnessTemplate.find_claims(text=text)
525
- if self.using_native_model:
526
- res = self.model.generate(prompt)
527
- data = parse_response_json(res, self)
528
- return data["claims"]
529
- else:
530
- try:
531
- res: Claims = self.model.generate(prompt, schema=Claims)
532
- return res.claims
533
- except TypeError:
534
- res = self.model.generate(prompt)
535
- data = parse_response_json(res, self)
536
- return data["claims"]
537
-
538
- def _success_check(self) -> bool:
539
- if self.error is not None:
540
- self.success = False
541
- else:
542
- try:
543
- self.success = self.score >= self.threshold
544
- except:
545
- self.success = False
546
- return self.success
547
-
548
- @property
549
- def __name__(self):
550
- return "Summarization"
551
-
@@ -1,96 +0,0 @@
1
- judgeval/__init__.py,sha256=dtXxsCmI4eEsZdGSUMy8P_pA0bc2-OSGAgb2C__yJoA,252
2
- judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
- judgeval/constants.py,sha256=XTqijsuuLEhUBXTjzNJVsee5U_Gl14ULLO5uQVW_nEE,5398
4
- judgeval/evaluation_run.py,sha256=WGzx-Ug2qhSmunFo8NrmSstBRsOUc5KpKq0Lc51rqsM,6739
5
- judgeval/judgment_client.py,sha256=FncHkjyFx2vfXv4cu4DzbOO0ideHNOWtHVbc8pSXNxk,29754
6
- judgeval/rules.py,sha256=B0ZL0pn72D4Jnlr0zMQ6CPHi7D8AQQRariXCVsiCMiI,20542
7
- judgeval/run_evaluation.py,sha256=2Mv1iLthJeFQZSVhjLOcJKRZ52Sy6OxLb2KyQ_yVwnA,28484
8
- judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
9
- judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
10
- judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
11
- judgeval/common/tracer.py,sha256=9Qga-7rLFlQK-oM5eK1O_8Mn1SewIrPtFwWbSZFtSII,59651
12
- judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
13
- judgeval/data/__init__.py,sha256=xuKx_KCVHGp6CXvQuVmKl3v7pJp-qDaz0NccKxwjtO0,481
14
- judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
15
- judgeval/data/example.py,sha256=cJrmPGLel_P2sy1UaRvuVSAi35EnA9XMR11Lhp4aDLo,5930
16
- judgeval/data/result.py,sha256=Gb9tiSDsk1amXgh0cFG6JmlW_BMKxS2kuTwNA0rrHjA,3184
17
- judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
18
- judgeval/data/sequence.py,sha256=DlQUjyWQJB6iNmiftDZ9N6C-nPtrOC1e0JZ57U00zZk,2387
19
- judgeval/data/sequence_run.py,sha256=GrnYSZBcZmt4tKQYA_1v09MFB8n3ccrkOJd4qyweHMg,1987
20
- judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
21
- judgeval/data/datasets/dataset.py,sha256=AFYjksV_wXx5CqFYJsl3aN8yZ6hC50O1myRuOJ8s8_E,12867
22
- judgeval/data/datasets/eval_dataset_client.py,sha256=xzXlBJRBEEmwsB79_eepm0Da-Bz8yRodX7ttk-u-BxU,14986
23
- judgeval/integrations/langgraph.py,sha256=J-cQfFP52TjJewdSTe-fcsUC4HDvjNbXoxmbmF0SgiE,11743
24
- judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
25
- judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
26
- judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
27
- judgeval/judges/mixture_of_judges.py,sha256=IJoi4Twk8ze1CJWVEp69k6TSqTCTGrmVYQ0qdffer60,15549
28
- judgeval/judges/together_judge.py,sha256=l00hhPerAZXg3oYBd8cyMtWsOTNt_0FIqoxhKJKQe3k,2302
29
- judgeval/judges/utils.py,sha256=9lvUxziGV86ISvVFxYBWc09TWFyAQgUTyPf_a9mD5Rs,2686
30
- judgeval/scorers/__init__.py,sha256=Z_88Sr45gLFAIbMHzG1BF24TUQGCDiuP9QpmVFvSYJM,1204
31
- judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B0,2688
32
- judgeval/scorers/base_scorer.py,sha256=xdUlY3CnLdCQ1Z5iUeY22Bim5v-OQruZmaVF_4Y1mC0,2183
33
- judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
34
- judgeval/scorers/judgeval_scorer.py,sha256=79-JJurqHP-qTaWNWInx4SjvQYwXc9lvfPPNgwsh2yA,6773
35
- judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
36
- judgeval/scorers/score.py,sha256=r9QiT4-LIvivcJ6XxByrbswKSO8eQTtAD1UlXT_lcmo,18741
37
- judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
38
- judgeval/scorers/judgeval_scorers/__init__.py,sha256=kSmQWKeBvLeZMfLYNQSc2qbJYo1MFIQnf3P-D4ltuSM,6232
39
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=_sDUBxSG536KGqXNi6dFpaYKghjEAadxBxaaxV9HuuE,1764
40
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=Fnd9CVIOZ73sWEWymsU5eBrrZqPFjMZ0BKpeW-PDyTg,711
41
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=oETeN9K0HSIRdL2SDqn82Vskpwh5SlKnZvs5VDm2OBU,658
42
- judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=kuzf9OWvpY38yYSwlBgneLkUZwJNM4FQqvbS66keA90,1249
43
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=tpSuzFAaW8X9xqA0aLLKwh7qmBK0Pc_bJZMIe_q412U,770
44
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=pFVhk4pLtQ-FnNlbI-dFF-SIh69Jza7erHqiPkFWoBo,758
45
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=RQ6DZwEhChfecd89Ey-T7ke--7qTaXZlRsNxwH8gaME,823
46
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=V9WPuwNMm097V7IknKs8UkmAk0yjnBXTcJha_BHXxTA,475
47
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=Pb3CiNF2Ca826B92wJCVAi_68lJjLhqqCKwQKaflSUg,1294
48
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=-BwOapqjryYNKNydtdkUiKIij76dY0O1jBmdc6dKazQ,692
49
- judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=ntEEeTANEOsGlcbiTAF_3r6BeSJEaVDns8po8T0L6Vg,692
50
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=k5gDOki-8KXrZXydvdSqDt3NZqQ28hXoOCHQf6jNxr4,686
51
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=XnSGEkQfwVqaqnHEGMCsxNiHVzrsrej48uDbLoWc8CQ,678
52
- judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=mMKEuR87_yanEuZJ5YSGFMHDD_oLVZ6-rQuciFaDOMA,1095
53
- judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=QmWB8bVbDYHY5FcF0rYZE_3c2XXgMLRmR6aXJWfdMC4,655
54
- judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
55
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
56
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
57
- judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=k_t-THIAtsk7lNvm9faj0u24dPZjn7qRbZ8YGjQ21xs,1926
58
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
59
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=3Dpm8BIIe0Th2p0ccO5bb-le93lywjOLSo712HwEIUE,10196
60
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py,sha256=hBUqEd8Hy3g8peOVjpSmRb31fPtpodDzdRUonhKRl30,6686
61
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py,sha256=r6yae5iaWtlBL_cP8I-1SuhS9dulsy1e7W9Rcz82v6E,169
62
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py,sha256=qoeoFyXXDtqqc7ZSLajqexeSxw5STmrL-uPQIMY3zSw,10529
63
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py,sha256=-OO3QmkXqGCvdIRKsAuT4wQ1ZqWBQDdb1j3lc3W9q3w,6540
64
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py,sha256=QnwSTgYx_zyz6K27WTe89MoTcO12WYn_jqE_xj7_H2U,5497
66
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py,sha256=c49aCzyxCogjUTipQUmS13LemFC89X9xEuPNQ_LVHgw,31345
67
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py,sha256=J6tc-T60AVOEaNVuoVU0XIG6dvQri99Q0tnX_Tm-0vc,108
68
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py,sha256=tRgRyjGpc4Pe3nQ1c-5NeNYFvbulL7YEnoRa9zLp1gc,9649
69
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py,sha256=pN0AURDWSV3iGt11MtJIwzXMuKbM4oC3zdb9yqnjNdU,4875
70
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py,sha256=4kjfqD_95muHZFo75S8_fbTcC1DI1onNIfMmr8gMZaI,99
71
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py,sha256=hwAv_x3XwGDnSW3a75CTCgIW6eVg8ymdjDdJQvw5p0Y,9260
72
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py,sha256=k5xdCw8gnBvG1_dDSbtBftDDtOZ4qKL2-iQ9AQHsuUI,6541
73
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py,sha256=JPCvrekKLbl_xdD49evhtiFIVocuegCpCBkn1auzTSE,184
74
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=BtVgE7z-9PHfFRcvn96aEG5mXVcWBweVyty934hZdiU,8915
75
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py,sha256=uO-8Uo7VrXu4xWpxjIx6_UI3aw5KuJxubSHb71Nzm6Q,4574
76
- judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py,sha256=DpOHbjYEhVmP-RiaTEa5PZHpoPvduNXG5p6k9lR0AS0,157
77
- judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py,sha256=y-Ag8YuzEvExUIj4qU7y53INVLH9L_TUTJLIxCIdAQo,5458
78
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py,sha256=NbkSqPwxgF4T8KsvuIWhVyRwdOlo7mNHMFuRStTFnvk,154
79
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=LPVTGHBBJSpE6TrgzZQS2_vw4P9HiUYmykrwo6UMdws,11251
80
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=vNLjF4NKZJSV4VNenHzoAUB2xVZz6tt_5AzryKmOVrI,11690
81
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py,sha256=fZk3UQxI9Nljf5qjCRLRkF0D-AERFHElI9cC83_cgV8,158
82
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py,sha256=q9qdmwq96stbTRVA4Egv9eO1KI8zf77jwYkZXaOZePw,9511
83
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py,sha256=TJWKheQnaJ-pdVzkOTDr3BNZ9bfCrC81S3kvjm4Zjh8,4329
84
- judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py,sha256=keZRmLe5oyIVE98h1nOiW54__xcEv2QInwZcJ34ZKhs,8175
85
- judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py,sha256=jv-1Z7K1EhLjy4NGKS35cIcShh7ZDQCXVPqoJnAnDqk,3598
86
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py,sha256=xQDw7o9JQ6qajusPnBH0MWBRJ5ct_Ao3pJELXxxVMRo,175
87
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py,sha256=RkI-mARc2AYCw5dTb5OSY4UWXIwDcYS3ViiJOVIq0Nw,4339
88
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py,sha256=mv6-XeLSV5yj1H98YYV2iTYVd88zKftZJP42Lgl6R80,89
89
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py,sha256=6GnRz2h-6Fwt4sl__0RgQOyo3n3iDO4MNuHWxdu-rrM,10242
90
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
91
- judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
92
- judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
93
- judgeval-0.0.31.dist-info/METADATA,sha256=g9288fIE7NDwXuqUylqCV0mby5hAY7yEztR8TOn5sNk,5418
94
- judgeval-0.0.31.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
95
- judgeval-0.0.31.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
96
- judgeval-0.0.31.dist-info/RECORD,,