judgeval 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. judgeval/common/s3_storage.py +93 -0
  2. judgeval/common/tracer.py +612 -123
  3. judgeval/data/sequence.py +4 -10
  4. judgeval/judgment_client.py +25 -86
  5. judgeval/rules.py +4 -7
  6. judgeval/run_evaluation.py +1 -1
  7. judgeval/scorers/__init__.py +4 -4
  8. judgeval/scorers/judgeval_scorers/__init__.py +0 -176
  9. {judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/METADATA +15 -2
  10. judgeval-0.0.33.dist-info/RECORD +63 -0
  11. judgeval/scorers/base_scorer.py +0 -58
  12. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -27
  13. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  14. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -276
  15. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  16. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  17. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  18. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  19. judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
  20. judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -161
  21. judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -222
  22. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  23. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  24. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  25. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  26. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  27. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  28. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  29. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  30. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  31. judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -3
  32. judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -156
  33. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  34. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -318
  35. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  36. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  37. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -264
  38. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  39. judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -232
  40. judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -102
  41. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  42. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  43. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  44. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  45. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -551
  46. judgeval-0.0.32.dist-info/RECORD +0 -97
  47. {judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/WHEEL +0 -0
  48. {judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,551 +0,0 @@
1
- from typing import List, Optional, Union
2
- import asyncio
3
-
4
- from judgeval.constants import APIScorer
5
- from judgeval.scorers.utils import (
6
- get_or_create_event_loop,
7
- scorer_progress_meter,
8
- create_verbose_logs,
9
- parse_response_json,
10
- check_example_params
11
- )
12
- from judgeval.scorers import JudgevalScorer
13
- from judgeval.judges import JudgevalJudge
14
- from judgeval.judges.utils import create_judge
15
- from judgeval.data import Example, ExampleParams
16
- from judgeval.scorers.judgeval_scorers.local_implementations.faithfulness.prompts import (
17
- FaithfulnessTemplate,
18
- Claims
19
- )
20
- from judgeval.scorers.judgeval_scorers.local_implementations.summarization.prompts import *
21
-
22
-
23
- required_params = [
24
- ExampleParams.INPUT,
25
- ExampleParams.ACTUAL_OUTPUT,
26
- ]
27
-
28
-
29
- class SummarizationScorer(JudgevalScorer):
30
- def __init__(
31
- self,
32
- threshold: float = 0.5,
33
- n: int = 5,
34
- model: Optional[Union[str, JudgevalJudge]] = None,
35
- assessment_questions: Optional[List[str]] = None,
36
- include_reason: bool = True,
37
- async_mode=True,
38
- strict_mode: bool = False,
39
- verbose_mode: bool = False,
40
- ):
41
- super().__init__(
42
- score_type=APIScorer.SUMMARIZATION,
43
- threshold=1 if strict_mode else threshold,
44
- evaluation_model=None,
45
- include_reason=include_reason,
46
- async_mode=async_mode,
47
- strict_mode=strict_mode,
48
- verbose_mode=verbose_mode
49
- )
50
- self.model, self.using_native_model = create_judge(model)
51
- self.evaluation_model = self.model.get_model_name()
52
-
53
- if assessment_questions is not None and len(assessment_questions) == 0:
54
- self.assessment_questions = None
55
- else:
56
- self.assessment_questions = assessment_questions
57
-
58
- self.include_reason = include_reason
59
- self.n = n
60
- self.async_mode = async_mode
61
- self.strict_mode = strict_mode
62
- self.verbose_mode = verbose_mode
63
-
64
- def score_example(
65
- self,
66
- example: Example,
67
- _show_indicator: bool = True,
68
- ) -> float:
69
- check_example_params(example, required_params, self)
70
- try:
71
- with scorer_progress_meter(self, display_meter=_show_indicator):
72
- if self.async_mode:
73
- loop = get_or_create_event_loop()
74
- loop.run_until_complete(
75
- self.a_score_example(example, _show_indicator=False)
76
- )
77
- else:
78
- self.claims: List[str] = self._generate_claims(
79
- example.actual_output
80
- )
81
-
82
- self.info_coverage_verdicts: List[InfoCoverageVerdict] = (
83
- self._generate_info_coverage_verdicts(example)
84
- )
85
-
86
- self.contradiction_verdicts: List[ContradictionVerdict] = (
87
- self._generate_contradiction_verdicts(example)
88
- )
89
-
90
- contradiction_score = self._calculate_score(ScoreType.CONTRADICTION)
91
- info_coverage_score = self._calculate_score(ScoreType.INFO_COVERAGE)
92
- self.score_breakdown = {
93
- ScoreType.CONTRADICTION.value: contradiction_score,
94
- ScoreType.INFO_COVERAGE.value: info_coverage_score,
95
- }
96
- self.score = min(contradiction_score, info_coverage_score)
97
- self.reason = self._generate_reason()
98
- self.success = self.score >= self.threshold
99
- self.verbose_logs = create_verbose_logs(
100
- self,
101
- steps=[
102
- f"Claims:\n{self.claims}",
103
- f"Assessment Questions:\n{self.assessment_questions}",
104
- f"Info Coverage Verdicts:\n{[v.model_dump() for v in self.info_coverage_verdicts]}",
105
- f"Contradiction Verdicts:\n{[v.model_dump() for v in self.contradiction_verdicts]}",
106
- f"Score: {self.score}\nReason: {self.reason}",
107
- ],
108
- )
109
-
110
- return self.score
111
- except Exception as e:
112
- print(f"Error in SummarizationScorer score_example: {e}")
113
- raise
114
-
115
- async def a_score_example(
116
- self,
117
- example: Example,
118
- _show_indicator: bool = True,
119
- ) -> float:
120
- """
121
- To score, we take the following steps:
122
- 1. Generate claims from the actual output
123
- - Extract key factual claims from the summary text
124
-
125
- 2. Generate info coverage verdicts:
126
- a. Generate assessment questions if not provided
127
- b. Generate answers to the assessment questions for both summary and original text
128
- c. Compare answers to determine if summary adequately covers key information
129
- d. Calculate info coverage score based on matching answers
130
-
131
- 3. Generate contradiction verdicts:
132
- a. Generate claims from the actual output
133
- b. Verify each claim against the original text for factual accuracy
134
- c. Calculate contradiction score based on verified claims
135
-
136
- 4. Calculate final score:
137
- - Take minimum of info coverage and contradiction scores
138
- - Generate reason explaining the scoring
139
- - Check if score meets threshold for success
140
- """
141
- check_example_params(example, required_params, self)
142
- try:
143
- with scorer_progress_meter(
144
- self,
145
- async_mode=True,
146
- display_meter=_show_indicator,
147
- ):
148
- self.claims = await self._a_generate_claims(example.actual_output),
149
-
150
- self.info_coverage_verdicts, self.contradiction_verdicts = await asyncio.gather(
151
- self._a_generate_info_coverage_verdicts(example),
152
- self._a_generate_contradiction_verdicts(example),
153
- )
154
-
155
- contradiction_score = self._calculate_score(ScoreType.CONTRADICTION)
156
- info_coverage_score = self._calculate_score(ScoreType.INFO_COVERAGE)
157
- self.score_breakdown = {
158
- ScoreType.CONTRADICTION.value: contradiction_score,
159
- ScoreType.INFO_COVERAGE.value: info_coverage_score,
160
- }
161
- self.score = min(contradiction_score, info_coverage_score)
162
- self.reason = await self._a_generate_reason()
163
- self.success = self.score >= self.threshold
164
- self.verbose_logs = create_verbose_logs(
165
- self,
166
- steps=[
167
- f"Claims:\n{self.claims}",
168
- f"Assessment Questions:\n{self.assessment_questions}",
169
- f"Info Coverage Verdicts:\n{[v.model_dump() for v in self.info_coverage_verdicts]}",
170
- f"Contradiction Verdicts:\n{[v.model_dump() for v in self.contradiction_verdicts]}",
171
- f"Score: {self.score}\nReason: {self.reason}",
172
- ],
173
- )
174
-
175
- return self.score
176
- except Exception as e:
177
- print(f"Error in SummarizationScorer a_score_example: {e}")
178
- raise
179
-
180
- async def _a_generate_reason(self) -> str:
181
- if self.include_reason is False:
182
- return None
183
-
184
- contradictions = []
185
- redundancies = []
186
- for verdict in self.contradiction_verdicts:
187
- if verdict.verdict.strip().lower() == "no":
188
- contradictions.append(verdict.reason)
189
- elif verdict.verdict.strip().lower() == "idk":
190
- redundancies.append(verdict.reason)
191
-
192
- questions = []
193
- if self.info_coverage_verdicts:
194
- for verdict in self.info_coverage_verdicts:
195
- if (
196
- verdict.original_verdict.strip().lower() == "yes"
197
- and verdict.summary_verdict.strip().lower() == "no"
198
- ):
199
- questions.append(verdict.question)
200
-
201
- prompt: dict = SummarizationTemplate.generate_reason(
202
- contradictions=contradictions,
203
- redundancies=redundancies,
204
- questions=questions,
205
- score=format(self.score, ".2f"),
206
- )
207
-
208
- if len(questions) > 0:
209
- prompt += f"""Questions the original text can answer but not the summary:
210
- {questions}
211
-
212
- """
213
- prompt += """JSON:
214
- """
215
-
216
- if self.using_native_model:
217
- res = await self.model.a_generate(prompt)
218
- data = parse_response_json(res, self)
219
- return data["reason"]
220
- else:
221
- try:
222
- res: Reason = await self.model.a_generate(prompt, schema=Reason)
223
- return res.reason
224
- except TypeError:
225
- res = await self.model.a_generate(prompt)
226
- data = parse_response_json(res, self)
227
- return data["reason"]
228
-
229
- def _generate_reason(self) -> str:
230
- if self.include_reason is False:
231
- return None
232
-
233
- contradictions = []
234
- redundancies = []
235
- for verdict in self.contradiction_verdicts:
236
- if verdict.verdict.strip().lower() == "no":
237
- contradictions.append(verdict.reason)
238
- elif verdict.verdict.strip().lower() == "idk":
239
- redundancies.append(verdict.reason)
240
-
241
- questions = []
242
- if self.info_coverage_verdicts:
243
- for verdict in self.info_coverage_verdicts:
244
- if (
245
- verdict.original_verdict.strip().lower() == "yes"
246
- and verdict.summary_verdict.strip().lower() == "no"
247
- ):
248
- questions.append(verdict.question)
249
-
250
- prompt: dict = SummarizationTemplate.generate_reason(
251
- contradictions=contradictions,
252
- redundancies=redundancies,
253
- questions=questions,
254
- score=format(self.score, ".2f"),
255
- )
256
-
257
- if len(questions) > 0:
258
- prompt += f"""Questions the original text can answer but not the summary:
259
- {questions}
260
-
261
- """
262
- prompt += """JSON:
263
- """
264
-
265
- if self.using_native_model:
266
- res = self.model.generate(prompt)
267
- data = parse_response_json(res, self)
268
- return data["reason"]
269
- else:
270
- try:
271
- res: Reason = self.model.generate(prompt, schema=Reason)
272
- return res.reason
273
- except TypeError:
274
- res = self.model.generate(prompt)
275
- data = parse_response_json(res, self)
276
- return data["reason"]
277
-
278
- def _calculate_score(self, score_type: ScoreType) -> float:
279
- if score_type == ScoreType.CONTRADICTION:
280
- total = len(self.contradiction_verdicts)
281
- if total == 0:
282
- return 0
283
- faithfulness_count = 0
284
- for verdict in self.contradiction_verdicts:
285
- # Different from the faithfulness score, this
286
- # penalizes 'idk' (full of fluff) summaries
287
- if verdict.verdict.strip().lower() == "yes":
288
- faithfulness_count += 1
289
-
290
- score = faithfulness_count / total
291
-
292
- else:
293
- if self.assessment_questions is None:
294
- return 1
295
- total = 0
296
- coverage_count = 0
297
- for verdict in self.info_coverage_verdicts:
298
- if verdict.original_verdict.strip().lower() == "yes":
299
- total += 1
300
- if verdict.summary_verdict.strip().lower() == "yes":
301
- coverage_count += 1
302
-
303
- if total == 0:
304
- return 0
305
-
306
- score = coverage_count / total
307
-
308
- return 0 if self.strict_mode and score < self.threshold else score
309
-
310
- async def _a_generate_answers(self, text: str) -> List[str]:
311
- prompt = SummarizationTemplate.generate_answers(
312
- questions=self.assessment_questions, text=text
313
- )
314
- if self.using_native_model:
315
- res = await self.model.a_generate(prompt)
316
- data = parse_response_json(res, self)
317
- return data["answers"]
318
- else:
319
- try:
320
- res: Answers = await self.model.a_generate(
321
- prompt, schema=Answers
322
- )
323
- return res.answers
324
- except TypeError:
325
- res = await self.model.a_generate(prompt)
326
- data = parse_response_json(res, self)
327
- return data["answers"]
328
-
329
- def _generate_answers(self, text: str) -> List[str]:
330
- prompt = SummarizationTemplate.generate_answers(
331
- questions=self.assessment_questions, text=text
332
- )
333
- if self.using_native_model:
334
- res = self.model.generate(prompt)
335
- data = parse_response_json(res, self)
336
- return data["answers"]
337
- else:
338
- try:
339
- res: Answers = self.model.generate(prompt, schema=Answers)
340
- return res.answers
341
- except TypeError:
342
- res = self.model.generate(prompt)
343
- data = parse_response_json(res, self)
344
- return data["answers"]
345
-
346
- async def _a_generate_assessment_questions(self, text: str):
347
- prompt = SummarizationTemplate.generate_questions(text=text, n=self.n)
348
- if self.using_native_model:
349
- res = await self.model.a_generate(prompt)
350
- data = parse_response_json(res, self)
351
- return data["questions"]
352
- else:
353
- try:
354
- res: Questions = await self.model.a_generate(
355
- prompt, schema=Questions
356
- )
357
- return res.questions
358
- except TypeError:
359
- res = await self.model.a_generate(prompt)
360
- data = parse_response_json(res, self)
361
- return data["questions"]
362
-
363
- def _generate_assessment_questions(self, text: str):
364
- prompt = SummarizationTemplate.generate_questions(text=text, n=self.n)
365
- if self.using_native_model:
366
- res = self.model.generate(prompt)
367
- data = parse_response_json(res, self)
368
- return data["questions"]
369
- else:
370
- try:
371
- res: Questions = self.model.generate(prompt, schema=Questions)
372
- return res.questions
373
- except TypeError:
374
- res = self.model.generate(prompt)
375
- data = parse_response_json(res, self)
376
- return data["questions"]
377
-
378
- async def _a_generate_info_coverage_verdicts(
379
- self, example: Example
380
- ) -> List[InfoCoverageVerdict]:
381
- if self.assessment_questions is None:
382
- self.assessment_questions = (
383
- await self._a_generate_assessment_questions(example.input)
384
- )
385
-
386
- tasks = [
387
- self._a_generate_answers(example.input),
388
- self._a_generate_answers(example.actual_output),
389
- ]
390
- results = await asyncio.gather(*tasks)
391
- original_answers = results[0]
392
- summary_answers = results[1]
393
-
394
- if len(original_answers) != len(summary_answers):
395
- raise ValueError("Number of verdicts generated does not equal.")
396
-
397
- coverage_veridcts: List[InfoCoverageVerdict] = []
398
- for i in range(len(original_answers)):
399
- coverage_veridcts.append(
400
- InfoCoverageVerdict(
401
- summary_verdict=summary_answers[i],
402
- original_verdict=original_answers[i],
403
- question=self.assessment_questions[i],
404
- )
405
- )
406
- return coverage_veridcts
407
-
408
- def _generate_info_coverage_verdicts(
409
- self, example: Example
410
- ) -> List[InfoCoverageVerdict]:
411
- if self.assessment_questions is None:
412
- self.assessment_questions = self._generate_assessment_questions(
413
- example.input
414
- )
415
-
416
- original_answers = self._generate_answers(example.input)
417
- summary_answers = self._generate_answers(example.actual_output)
418
-
419
- if len(original_answers) != len(summary_answers):
420
- raise ValueError("Number of verdicts generated does not equal.")
421
-
422
- coverage_veridcts: List[InfoCoverageVerdict] = []
423
- for i in range(len(original_answers)):
424
- coverage_veridcts.append(
425
- InfoCoverageVerdict(
426
- summary_verdict=summary_answers[i],
427
- original_verdict=original_answers[i],
428
- question=self.assessment_questions[i],
429
- )
430
- )
431
-
432
- return coverage_veridcts
433
-
434
- async def _a_generate_contradiction_verdicts(
435
- self,
436
- example: Example,
437
- ) -> List[ContradictionVerdict]:
438
- if len(self.claims) == 0:
439
- return []
440
-
441
- verdicts: List[ContradictionVerdict] = []
442
-
443
- prompt = SummarizationTemplate.generate_contradiction_verdicts(
444
- original_text=example.input,
445
- summary_claims=self.claims
446
- )
447
- if self.using_native_model:
448
- res = await self.model.a_generate(prompt)
449
- data = parse_response_json(res, self)
450
- verdicts = [
451
- ContradictionVerdict(**item)
452
- for item in data["verdicts"]
453
- ]
454
- return verdicts
455
- else:
456
- try:
457
- res: Verdicts = await self.model.a_generate(
458
- prompt, schema=Verdicts
459
- )
460
- verdicts = [item for item in res.verdicts]
461
- return verdicts
462
- except TypeError:
463
- res = await self.model.a_generate(prompt)
464
- data = parse_response_json(res, self)
465
- verdicts = [
466
- ContradictionVerdict(**item)
467
- for item in data["verdicts"]
468
- ]
469
- return verdicts
470
-
471
- def _generate_contradiction_verdicts(
472
- self,
473
- example: Example,
474
- ) -> List[ContradictionVerdict]:
475
- if len(self.claims) == 0:
476
- return []
477
-
478
- verdicts: List[ContradictionVerdict] = []
479
-
480
- prompt = SummarizationTemplate.generate_contradiction_verdicts(
481
- original_text=example.input,
482
- summary_claims=self.claims
483
- )
484
- if self.using_native_model:
485
- res = self.model.generate(prompt)
486
- data = parse_response_json(res, self)
487
- verdicts = [
488
- ContradictionVerdict(**item)
489
- for item in data["verdicts"]
490
- ]
491
- return verdicts
492
- else:
493
- try:
494
- res: Verdicts = self.model.generate(prompt, schema=Verdicts)
495
- verdicts = [item for item in res.verdicts]
496
- return verdicts
497
- except TypeError:
498
- res = self.model.generate(prompt)
499
- data = parse_response_json(res, self)
500
- verdicts = [
501
- ContradictionVerdict(**item)
502
- for item in data["verdicts"]
503
- ]
504
- return verdicts
505
-
506
- async def _a_generate_claims(self, text: str) -> List[str]:
507
- # Borrow faithfulness template since it already works
508
- prompt = FaithfulnessTemplate.find_claims(text=text)
509
- if self.using_native_model:
510
- res = await self.model.a_generate(prompt)
511
- data = parse_response_json(res, self)
512
- return data["claims"]
513
- else:
514
- try:
515
- res: Claims = await self.model.a_generate(prompt, schema=Claims)
516
- return res.claims
517
- except TypeError:
518
- res = await self.model.a_generate(prompt)
519
- data = parse_response_json(res, self)
520
- return data["claims"]
521
-
522
- def _generate_claims(self, text: str) -> List[str]:
523
- # Borrow faithfulness template
524
- prompt = FaithfulnessTemplate.find_claims(text=text)
525
- if self.using_native_model:
526
- res = self.model.generate(prompt)
527
- data = parse_response_json(res, self)
528
- return data["claims"]
529
- else:
530
- try:
531
- res: Claims = self.model.generate(prompt, schema=Claims)
532
- return res.claims
533
- except TypeError:
534
- res = self.model.generate(prompt)
535
- data = parse_response_json(res, self)
536
- return data["claims"]
537
-
538
- def _success_check(self) -> bool:
539
- if self.error is not None:
540
- self.success = False
541
- else:
542
- try:
543
- self.success = self.score >= self.threshold
544
- except:
545
- self.success = False
546
- return self.success
547
-
548
- @property
549
- def __name__(self):
550
- return "Summarization"
551
-
@@ -1,97 +0,0 @@
1
- judgeval/__init__.py,sha256=x9HWt4waJwJMAqTuJSg2MezF9Zg-macEjeU-ajbly-8,330
2
- judgeval/clients.py,sha256=6VQmEqmfCngUdS2MuPBIpHvtDFqOENm8-_BmMvjLyRQ,944
3
- judgeval/constants.py,sha256=_XmVAkebMyGrDvvanAVlMgVd4p6MLHdEVsTQFI0kz1k,5411
4
- judgeval/evaluation_run.py,sha256=WGzx-Ug2qhSmunFo8NrmSstBRsOUc5KpKq0Lc51rqsM,6739
5
- judgeval/judgment_client.py,sha256=k0q2s5A0RkhF9ElD9o-KWN10H36t3Of2PrvNF-silf8,26141
6
- judgeval/rules.py,sha256=B0ZL0pn72D4Jnlr0zMQ6CPHi7D8AQQRariXCVsiCMiI,20542
7
- judgeval/run_evaluation.py,sha256=hnEY8QckEviXYNJutf-6tLFq2DWCzqWV1EVyPvrVXyA,28512
8
- judgeval/version_check.py,sha256=bvJEidB7rAeXozoUbN9Yb97QOR_s2hgvpvj74jJ5HlY,943
9
- judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
10
- judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
11
- judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
12
- judgeval/common/tracer.py,sha256=owRRfIZXPUOVCCn0macygnf18mcp8am1eULGnZXD0Kk,68876
13
- judgeval/common/utils.py,sha256=LUQV5JfDr6wj7xHAJoNq-gofNZ6mjXbeKrGKzBME1KM,33533
14
- judgeval/data/__init__.py,sha256=xuKx_KCVHGp6CXvQuVmKl3v7pJp-qDaz0NccKxwjtO0,481
15
- judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
16
- judgeval/data/example.py,sha256=cJrmPGLel_P2sy1UaRvuVSAi35EnA9XMR11Lhp4aDLo,5930
17
- judgeval/data/result.py,sha256=Gb9tiSDsk1amXgh0cFG6JmlW_BMKxS2kuTwNA0rrHjA,3184
18
- judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
19
- judgeval/data/sequence.py,sha256=Fkk2HJGnPboH-Fvwgxub_ryG0eUXa3cbsj7ZD0qkeBo,2204
20
- judgeval/data/sequence_run.py,sha256=RmYjfWKMWg-pcF5PLeiWfrhuDkjDZi5VEmAIEXN3Ib0,2104
21
- judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
22
- judgeval/data/datasets/dataset.py,sha256=dhLo30hvpmmOK2R6O5wDs_neawUJ4lS8bb4S42SufNQ,13034
23
- judgeval/data/datasets/eval_dataset_client.py,sha256=xjj66BO9Es9IxXqzQe1RT_e0kpeKlt7OrhRoSuj4KHM,15085
24
- judgeval/integrations/langgraph.py,sha256=J-cQfFP52TjJewdSTe-fcsUC4HDvjNbXoxmbmF0SgiE,11743
25
- judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
26
- judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
27
- judgeval/judges/litellm_judge.py,sha256=EIL58Teptv8DzZUO3yP2RDQCDq-aoBB6HPZzPdK6KTg,2424
28
- judgeval/judges/mixture_of_judges.py,sha256=IJoi4Twk8ze1CJWVEp69k6TSqTCTGrmVYQ0qdffer60,15549
29
- judgeval/judges/together_judge.py,sha256=l00hhPerAZXg3oYBd8cyMtWsOTNt_0FIqoxhKJKQe3k,2302
30
- judgeval/judges/utils.py,sha256=9lvUxziGV86ISvVFxYBWc09TWFyAQgUTyPf_a9mD5Rs,2686
31
- judgeval/scorers/__init__.py,sha256=Z_88Sr45gLFAIbMHzG1BF24TUQGCDiuP9QpmVFvSYJM,1204
32
- judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B0,2688
33
- judgeval/scorers/base_scorer.py,sha256=xdUlY3CnLdCQ1Z5iUeY22Bim5v-OQruZmaVF_4Y1mC0,2183
34
- judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
35
- judgeval/scorers/judgeval_scorer.py,sha256=79-JJurqHP-qTaWNWInx4SjvQYwXc9lvfPPNgwsh2yA,6773
36
- judgeval/scorers/prompt_scorer.py,sha256=PaAs2qRolw1P3_I061Xvk9qzvF4O-JR8g_39RqXnHcM,17728
37
- judgeval/scorers/score.py,sha256=r9QiT4-LIvivcJ6XxByrbswKSO8eQTtAD1UlXT_lcmo,18741
38
- judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
39
- judgeval/scorers/judgeval_scorers/__init__.py,sha256=kSmQWKeBvLeZMfLYNQSc2qbJYo1MFIQnf3P-D4ltuSM,6232
40
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=_sDUBxSG536KGqXNi6dFpaYKghjEAadxBxaaxV9HuuE,1764
41
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=Fnd9CVIOZ73sWEWymsU5eBrrZqPFjMZ0BKpeW-PDyTg,711
42
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=oETeN9K0HSIRdL2SDqn82Vskpwh5SlKnZvs5VDm2OBU,658
43
- judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=kuzf9OWvpY38yYSwlBgneLkUZwJNM4FQqvbS66keA90,1249
44
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=tpSuzFAaW8X9xqA0aLLKwh7qmBK0Pc_bJZMIe_q412U,770
45
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=pFVhk4pLtQ-FnNlbI-dFF-SIh69Jza7erHqiPkFWoBo,758
46
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=RQ6DZwEhChfecd89Ey-T7ke--7qTaXZlRsNxwH8gaME,823
47
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=V9WPuwNMm097V7IknKs8UkmAk0yjnBXTcJha_BHXxTA,475
48
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=Pb3CiNF2Ca826B92wJCVAi_68lJjLhqqCKwQKaflSUg,1294
49
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=-BwOapqjryYNKNydtdkUiKIij76dY0O1jBmdc6dKazQ,692
50
- judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=ntEEeTANEOsGlcbiTAF_3r6BeSJEaVDns8po8T0L6Vg,692
51
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=k5gDOki-8KXrZXydvdSqDt3NZqQ28hXoOCHQf6jNxr4,686
52
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=XnSGEkQfwVqaqnHEGMCsxNiHVzrsrej48uDbLoWc8CQ,678
53
- judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=mMKEuR87_yanEuZJ5YSGFMHDD_oLVZ6-rQuciFaDOMA,1095
54
- judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=QmWB8bVbDYHY5FcF0rYZE_3c2XXgMLRmR6aXJWfdMC4,655
55
- judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
56
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
57
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=ly72Z7s_c8NID6-nQnuW8qEGEW2MqdvpJ-5WfXzbAQg,2579
58
- judgeval/scorers/judgeval_scorers/local_implementations/__init__.py,sha256=k_t-THIAtsk7lNvm9faj0u24dPZjn7qRbZ8YGjQ21xs,1926
59
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py,sha256=cxxUEspgoIdSzJbwIIioamC0-xDqhYVfYAWxaYF-D_Y,177
60
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py,sha256=3Dpm8BIIe0Th2p0ccO5bb-le93lywjOLSo712HwEIUE,10196
61
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py,sha256=hBUqEd8Hy3g8peOVjpSmRb31fPtpodDzdRUonhKRl30,6686
62
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py,sha256=r6yae5iaWtlBL_cP8I-1SuhS9dulsy1e7W9Rcz82v6E,169
63
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py,sha256=qoeoFyXXDtqqc7ZSLajqexeSxw5STmrL-uPQIMY3zSw,10529
64
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py,sha256=-OO3QmkXqGCvdIRKsAuT4wQ1ZqWBQDdb1j3lc3W9q3w,6540
65
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py,sha256=QnwSTgYx_zyz6K27WTe89MoTcO12WYn_jqE_xj7_H2U,5497
67
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py,sha256=c49aCzyxCogjUTipQUmS13LemFC89X9xEuPNQ_LVHgw,31345
68
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py,sha256=J6tc-T60AVOEaNVuoVU0XIG6dvQri99Q0tnX_Tm-0vc,108
69
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py,sha256=tRgRyjGpc4Pe3nQ1c-5NeNYFvbulL7YEnoRa9zLp1gc,9649
70
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py,sha256=pN0AURDWSV3iGt11MtJIwzXMuKbM4oC3zdb9yqnjNdU,4875
71
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py,sha256=4kjfqD_95muHZFo75S8_fbTcC1DI1onNIfMmr8gMZaI,99
72
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py,sha256=hwAv_x3XwGDnSW3a75CTCgIW6eVg8ymdjDdJQvw5p0Y,9260
73
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py,sha256=k5xdCw8gnBvG1_dDSbtBftDDtOZ4qKL2-iQ9AQHsuUI,6541
74
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py,sha256=JPCvrekKLbl_xdD49evhtiFIVocuegCpCBkn1auzTSE,184
75
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py,sha256=BtVgE7z-9PHfFRcvn96aEG5mXVcWBweVyty934hZdiU,8915
76
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py,sha256=uO-8Uo7VrXu4xWpxjIx6_UI3aw5KuJxubSHb71Nzm6Q,4574
77
- judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py,sha256=DpOHbjYEhVmP-RiaTEa5PZHpoPvduNXG5p6k9lR0AS0,157
78
- judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py,sha256=y-Ag8YuzEvExUIj4qU7y53INVLH9L_TUTJLIxCIdAQo,5458
79
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py,sha256=NbkSqPwxgF4T8KsvuIWhVyRwdOlo7mNHMFuRStTFnvk,154
80
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py,sha256=LPVTGHBBJSpE6TrgzZQS2_vw4P9HiUYmykrwo6UMdws,11251
81
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py,sha256=vNLjF4NKZJSV4VNenHzoAUB2xVZz6tt_5AzryKmOVrI,11690
82
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py,sha256=fZk3UQxI9Nljf5qjCRLRkF0D-AERFHElI9cC83_cgV8,158
83
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py,sha256=q9qdmwq96stbTRVA4Egv9eO1KI8zf77jwYkZXaOZePw,9511
84
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py,sha256=TJWKheQnaJ-pdVzkOTDr3BNZ9bfCrC81S3kvjm4Zjh8,4329
85
- judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py,sha256=keZRmLe5oyIVE98h1nOiW54__xcEv2QInwZcJ34ZKhs,8175
86
- judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py,sha256=jv-1Z7K1EhLjy4NGKS35cIcShh7ZDQCXVPqoJnAnDqk,3598
87
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py,sha256=xQDw7o9JQ6qajusPnBH0MWBRJ5ct_Ao3pJELXxxVMRo,175
88
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py,sha256=RkI-mARc2AYCw5dTb5OSY4UWXIwDcYS3ViiJOVIq0Nw,4339
89
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py,sha256=mv6-XeLSV5yj1H98YYV2iTYVd88zKftZJP42Lgl6R80,89
90
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py,sha256=6GnRz2h-6Fwt4sl__0RgQOyo3n3iDO4MNuHWxdu-rrM,10242
91
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py,sha256=Qk7lwHgRPYeGoxTOyclAh1VfGItfvHJ6l1t7Nk3SWFM,20927
92
- judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
93
- judgeval/utils/alerts.py,sha256=O19Xj7DA0YVjl8PWiuH4zfdZeu3yiLVvHfY8ah2wG0g,2759
94
- judgeval-0.0.32.dist-info/METADATA,sha256=RJzqlHJwfYiOXEcyEEO5WQBM0DC1zQDuoN-Plix6U38,5418
95
- judgeval-0.0.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
96
- judgeval-0.0.32.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
97
- judgeval-0.0.32.dist-info/RECORD,,