judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from judgeval.v1.internal.api.api_types import ScorerConfig
6
+ from judgeval.v1.scorers.base_scorer import BaseScorer
7
+
8
+
9
+ class APIScorer(BaseScorer):
10
+ __slots__ = (
11
+ "_score_type",
12
+ "_required_params",
13
+ "_threshold",
14
+ "_name",
15
+ "_strict_mode",
16
+ "_model",
17
+ "_additional_properties",
18
+ )
19
+
20
+ def __init__(
21
+ self,
22
+ score_type: str,
23
+ required_params: Optional[List[str]] = None,
24
+ threshold: float = 0.5,
25
+ name: Optional[str] = None,
26
+ strict_mode: bool = False,
27
+ model: Optional[str] = None,
28
+ **additional_properties: Any,
29
+ ):
30
+ self._score_type = score_type
31
+ self._required_params = required_params or []
32
+ self._threshold = threshold
33
+ self._name = name or score_type
34
+ self._strict_mode = strict_mode
35
+ self._model = model
36
+ self._additional_properties = additional_properties
37
+
38
+ def get_name(self) -> str:
39
+ return self._name
40
+
41
+ def get_score_type(self) -> str:
42
+ return self._score_type
43
+
44
+ def get_threshold(self) -> float:
45
+ return self._threshold
46
+
47
+ def get_strict_mode(self) -> bool:
48
+ return self._strict_mode
49
+
50
+ def get_model(self) -> Optional[str]:
51
+ return self._model
52
+
53
+ def get_required_params(self) -> List[str]:
54
+ return self._required_params.copy()
55
+
56
+ def set_threshold(self, threshold: float) -> None:
57
+ if threshold < 0 or threshold > 1:
58
+ raise ValueError(f"Threshold must be between 0 and 1, got: {threshold}")
59
+ self._threshold = threshold
60
+
61
+ def set_name(self, name: str) -> None:
62
+ self._name = name
63
+
64
+ def set_strict_mode(self, strict_mode: bool) -> None:
65
+ self._strict_mode = strict_mode
66
+
67
+ def set_model(self, model: str) -> None:
68
+ self._model = model
69
+
70
+ def get_scorer_config(self) -> ScorerConfig:
71
+ kwargs: Dict[str, Any] = dict(self._additional_properties)
72
+ if self._model:
73
+ kwargs["model"] = self._model
74
+
75
+ return ScorerConfig(
76
+ score_type=self._score_type,
77
+ threshold=self._threshold,
78
+ name=self._name,
79
+ strict_mode=self._strict_mode,
80
+ required_params=self._required_params,
81
+ kwargs=kwargs,
82
+ )
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from judgeval.v1.internal.api.api_types import ScorerConfig
6
+
7
+
8
+ class BaseScorer(ABC):
9
+ __slots__ = ()
10
+
11
+ @abstractmethod
12
+ def get_name(self) -> str:
13
+ pass
14
+
15
+ @abstractmethod
16
+ def get_scorer_config(self) -> ScorerConfig:
17
+ pass
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.scorers.built_in.answer_correctness import AnswerCorrectnessScorer
4
+ from judgeval.v1.scorers.built_in.answer_relevancy import AnswerRelevancyScorer
5
+ from judgeval.v1.scorers.built_in.built_in_factory import BuiltInScorersFactory
6
+ from judgeval.v1.scorers.built_in.faithfulness import FaithfulnessScorer
7
+ from judgeval.v1.scorers.built_in.instruction_adherence import (
8
+ InstructionAdherenceScorer,
9
+ )
10
+
11
+ __all__ = [
12
+ "AnswerCorrectnessScorer",
13
+ "AnswerRelevancyScorer",
14
+ "FaithfulnessScorer",
15
+ "InstructionAdherenceScorer",
16
+ "BuiltInScorersFactory",
17
+ ]
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from judgeval.constants import APIScorerType
6
+ from judgeval.v1.scorers.api_scorer import APIScorer
7
+
8
+
9
+ class AnswerCorrectnessScorer(APIScorer):
10
+ def __init__(
11
+ self,
12
+ threshold: float = 0.5,
13
+ name: Optional[str] = None,
14
+ strict_mode: bool = False,
15
+ model: Optional[str] = None,
16
+ ):
17
+ super().__init__(
18
+ score_type=APIScorerType.ANSWER_CORRECTNESS.value,
19
+ required_params=["input", "actual_output", "expected_output"],
20
+ threshold=threshold,
21
+ name=name,
22
+ strict_mode=strict_mode,
23
+ model=model,
24
+ )
25
+
26
+ @staticmethod
27
+ def create(threshold: float = 0.5) -> AnswerCorrectnessScorer:
28
+ return AnswerCorrectnessScorer(threshold=threshold)
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from judgeval.constants import APIScorerType
6
+ from judgeval.v1.scorers.api_scorer import APIScorer
7
+
8
+
9
+ class AnswerRelevancyScorer(APIScorer):
10
+ def __init__(
11
+ self,
12
+ threshold: float = 0.5,
13
+ name: Optional[str] = None,
14
+ strict_mode: bool = False,
15
+ model: Optional[str] = None,
16
+ ):
17
+ super().__init__(
18
+ score_type=APIScorerType.ANSWER_RELEVANCY.value,
19
+ required_params=["input", "actual_output"],
20
+ threshold=threshold,
21
+ name=name,
22
+ strict_mode=strict_mode,
23
+ model=model,
24
+ )
25
+
26
+ @staticmethod
27
+ def create(threshold: float = 0.5) -> AnswerRelevancyScorer:
28
+ return AnswerRelevancyScorer(threshold=threshold)
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.scorers.built_in.answer_correctness import AnswerCorrectnessScorer
4
+ from judgeval.v1.scorers.built_in.answer_relevancy import AnswerRelevancyScorer
5
+ from judgeval.v1.scorers.built_in.faithfulness import FaithfulnessScorer
6
+ from judgeval.v1.scorers.built_in.instruction_adherence import (
7
+ InstructionAdherenceScorer,
8
+ )
9
+
10
+
11
+ class BuiltInScorersFactory:
12
+ __slots__ = ()
13
+
14
+ def answer_correctness(self, threshold: float = 0.5) -> AnswerCorrectnessScorer:
15
+ return AnswerCorrectnessScorer.create(threshold)
16
+
17
+ def answer_relevancy(self, threshold: float = 0.5) -> AnswerRelevancyScorer:
18
+ return AnswerRelevancyScorer.create(threshold)
19
+
20
+ def faithfulness(self, threshold: float = 0.5) -> FaithfulnessScorer:
21
+ return FaithfulnessScorer.create(threshold)
22
+
23
+ def instruction_adherence(
24
+ self, threshold: float = 0.5
25
+ ) -> InstructionAdherenceScorer:
26
+ return InstructionAdherenceScorer.create(threshold)
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from judgeval.constants import APIScorerType
6
+ from judgeval.v1.scorers.api_scorer import APIScorer
7
+
8
+
9
+ class FaithfulnessScorer(APIScorer):
10
+ def __init__(
11
+ self,
12
+ threshold: float = 0.5,
13
+ name: Optional[str] = None,
14
+ strict_mode: bool = False,
15
+ model: Optional[str] = None,
16
+ ):
17
+ super().__init__(
18
+ score_type=APIScorerType.FAITHFULNESS.value,
19
+ required_params=["context", "actual_output"],
20
+ threshold=threshold,
21
+ name=name,
22
+ strict_mode=strict_mode,
23
+ model=model,
24
+ )
25
+
26
+ @staticmethod
27
+ def create(threshold: float = 0.5) -> FaithfulnessScorer:
28
+ return FaithfulnessScorer(threshold=threshold)
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from judgeval.constants import APIScorerType
6
+ from judgeval.v1.scorers.api_scorer import APIScorer
7
+
8
+
9
+ class InstructionAdherenceScorer(APIScorer):
10
+ def __init__(
11
+ self,
12
+ threshold: float = 0.5,
13
+ name: Optional[str] = None,
14
+ strict_mode: bool = False,
15
+ model: Optional[str] = None,
16
+ ):
17
+ super().__init__(
18
+ score_type=APIScorerType.INSTRUCTION_ADHERENCE.value,
19
+ required_params=["input", "actual_output"],
20
+ threshold=threshold,
21
+ name=name,
22
+ strict_mode=strict_mode,
23
+ model=model,
24
+ )
25
+
26
+ @staticmethod
27
+ def create(threshold: float = 0.5) -> InstructionAdherenceScorer:
28
+ return InstructionAdherenceScorer(threshold=threshold)
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.scorers.custom_scorer.custom_scorer import CustomScorer
4
+ from judgeval.v1.scorers.custom_scorer.custom_scorer_factory import CustomScorerFactory
5
+
6
+ __all__ = ["CustomScorer", "CustomScorerFactory"]
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from judgeval.constants import APIScorerType
5
+
6
+ if TYPE_CHECKING:
7
+ from judgeval.v1.internal.api.api_types import (
8
+ BaseScorer as BaseScorerDict,
9
+ ScorerConfig,
10
+ )
11
+
12
+ from judgeval.v1.scorers.base_scorer import BaseScorer
13
+
14
+
15
+ class CustomScorer(BaseScorer):
16
+ __slots__ = (
17
+ "_name",
18
+ "_class_name",
19
+ "_server_hosted",
20
+ )
21
+
22
+ def __init__(
23
+ self,
24
+ name: str,
25
+ class_name: str = "",
26
+ server_hosted: bool = True,
27
+ ):
28
+ self._name = name
29
+ self._class_name = class_name or name
30
+ self._server_hosted = server_hosted
31
+
32
+ def get_name(self) -> str:
33
+ return self._name
34
+
35
+ def get_class_name(self) -> str:
36
+ return self._class_name
37
+
38
+ def is_server_hosted(self) -> bool:
39
+ return self._server_hosted
40
+
41
+ def get_scorer_config(self) -> ScorerConfig:
42
+ raise NotImplementedError("CustomScorer does not use get_scorer_config")
43
+
44
+ def to_dict(self) -> BaseScorerDict:
45
+ return {
46
+ "score_type": APIScorerType.CUSTOM.value,
47
+ "name": self._name,
48
+ "class_name": self._class_name,
49
+ "server_hosted": self._server_hosted,
50
+ }
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from judgeval.v1.scorers.custom_scorer.custom_scorer import CustomScorer
6
+
7
+
8
+ class CustomScorerFactory:
9
+ __slots__ = ()
10
+
11
+ def get(self, name: str, class_name: Optional[str] = None) -> CustomScorer:
12
+ return CustomScorer(
13
+ name=name,
14
+ class_name=class_name or name,
15
+ server_hosted=True,
16
+ )
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.scorers.prompt_scorer.prompt_scorer import PromptScorer
4
+ from judgeval.v1.scorers.prompt_scorer.prompt_scorer_factory import PromptScorerFactory
5
+
6
+ __all__ = ["PromptScorer", "PromptScorerFactory"]
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+ from judgeval.constants import APIScorerType
6
+ from judgeval.v1.internal.api.api_types import ScorerConfig
7
+ from judgeval.v1.scorers.api_scorer import APIScorer
8
+
9
+
10
+ class PromptScorer(APIScorer):
11
+ __slots__ = (
12
+ "_prompt",
13
+ "_options",
14
+ "_description",
15
+ "_judgment_api_key",
16
+ "_organization_id",
17
+ "_is_trace",
18
+ )
19
+
20
+ def __init__(
21
+ self,
22
+ name: str,
23
+ prompt: str,
24
+ threshold: float = 0.5,
25
+ options: Optional[Dict[str, float]] = None,
26
+ model: Optional[str] = None,
27
+ description: Optional[str] = None,
28
+ judgment_api_key: str = "",
29
+ organization_id: str = "",
30
+ is_trace: bool = False,
31
+ ):
32
+ score_type = (
33
+ APIScorerType.TRACE_PROMPT_SCORER
34
+ if is_trace
35
+ else APIScorerType.PROMPT_SCORER
36
+ )
37
+ super().__init__(
38
+ score_type=score_type,
39
+ threshold=threshold,
40
+ name=name,
41
+ model=model,
42
+ )
43
+ self._prompt = prompt
44
+ self._options = options.copy() if options else None
45
+ self._description = description
46
+ self._judgment_api_key = judgment_api_key
47
+ self._organization_id = organization_id
48
+ self._is_trace = is_trace
49
+
50
+ def get_prompt(self) -> str:
51
+ return self._prompt
52
+
53
+ def get_options(self) -> Optional[Dict[str, float]]:
54
+ return self._options.copy() if self._options else None
55
+
56
+ def get_description(self) -> Optional[str]:
57
+ return self._description
58
+
59
+ def set_prompt(self, prompt: str) -> None:
60
+ self._prompt = prompt
61
+
62
+ def set_options(self, options: Dict[str, float]) -> None:
63
+ self._options = options.copy()
64
+
65
+ def set_description(self, description: str) -> None:
66
+ self._description = description
67
+
68
+ def append_to_prompt(self, addition: str) -> None:
69
+ self._prompt = self._prompt + addition
70
+
71
+ def get_scorer_config(self) -> ScorerConfig:
72
+ kwargs: Dict[str, Any] = {"prompt": self._prompt}
73
+
74
+ if self._options:
75
+ kwargs["options"] = self._options
76
+ if self._model:
77
+ kwargs["model"] = self._model
78
+ if self._description:
79
+ kwargs["description"] = self._description
80
+
81
+ return ScorerConfig(
82
+ score_type=self._score_type,
83
+ threshold=self._threshold,
84
+ name=self._name,
85
+ kwargs=kwargs,
86
+ )
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Tuple
4
+
5
+ from judgeval.v1.internal.api import JudgmentSyncClient
6
+ from judgeval.v1.internal.api.api_types import (
7
+ FetchPromptScorersRequest,
8
+ FetchPromptScorersResponse,
9
+ PromptScorer as APIPromptScorer,
10
+ )
11
+ from judgeval.exceptions import JudgmentAPIError
12
+ from judgeval.v1.scorers.prompt_scorer.prompt_scorer import PromptScorer
13
+
14
+
15
+ class PromptScorerFactory:
16
+ __slots__ = ("_client", "_is_trace")
17
+ _cache: Dict[Tuple[str, str, str, bool], APIPromptScorer] = {}
18
+
19
+ def __init__(
20
+ self,
21
+ client: JudgmentSyncClient,
22
+ is_trace: bool,
23
+ ):
24
+ self._client = client
25
+ self._is_trace = is_trace
26
+
27
+ def get(self, name: str) -> PromptScorer:
28
+ cache_key = (
29
+ name,
30
+ self._client.organization_id,
31
+ self._client.api_key,
32
+ self._is_trace,
33
+ )
34
+ cached = self._cache.get(cache_key)
35
+
36
+ if cached is None:
37
+ request: FetchPromptScorersRequest = {"names": [name]}
38
+ if self._is_trace is not None:
39
+ request["is_trace"] = self._is_trace
40
+
41
+ try:
42
+ response: FetchPromptScorersResponse = self._client.fetch_scorers(
43
+ request
44
+ )
45
+ scorers = response.get("scorers", [])
46
+
47
+ if not scorers:
48
+ raise JudgmentAPIError(
49
+ 404, f"Failed to fetch prompt scorer '{name}': not found", None
50
+ )
51
+
52
+ scorer = scorers[0]
53
+ scorer_is_trace = scorer.get("is_trace", False)
54
+
55
+ if scorer_is_trace != self._is_trace:
56
+ expected_type = (
57
+ "TracePromptScorer" if self._is_trace else "PromptScorer"
58
+ )
59
+ actual_type = (
60
+ "TracePromptScorer" if scorer_is_trace else "PromptScorer"
61
+ )
62
+ raise JudgmentAPIError(
63
+ 400,
64
+ f"Scorer with name {name} is a {actual_type}, not a {expected_type}",
65
+ None,
66
+ )
67
+
68
+ self._cache[cache_key] = scorer
69
+ cached = scorer
70
+ except JudgmentAPIError:
71
+ raise
72
+ except Exception as e:
73
+ raise JudgmentAPIError(
74
+ 500, f"Failed to fetch prompt scorer '{name}': {e}", None
75
+ )
76
+
77
+ return PromptScorer(
78
+ name=name,
79
+ prompt=cached.get("prompt", ""),
80
+ threshold=cached.get("threshold", 0.5),
81
+ options=cached.get("options"),
82
+ model=cached.get("model"),
83
+ description=cached.get("description"),
84
+ is_trace=self._is_trace,
85
+ )
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.internal.api import JudgmentSyncClient
4
+
5
+
6
+ class ScorersFactory:
7
+ __slots__ = "_client"
8
+
9
+ def __init__(
10
+ self,
11
+ client: JudgmentSyncClient,
12
+ ):
13
+ self._client = client
14
+
15
+ @property
16
+ def prompt_scorer(self):
17
+ from judgeval.v1.scorers.prompt_scorer.prompt_scorer_factory import (
18
+ PromptScorerFactory,
19
+ )
20
+
21
+ return PromptScorerFactory(
22
+ client=self._client,
23
+ is_trace=False,
24
+ )
25
+
26
+ @property
27
+ def trace_prompt_scorer(self):
28
+ from judgeval.v1.scorers.prompt_scorer.prompt_scorer_factory import (
29
+ PromptScorerFactory,
30
+ )
31
+
32
+ return PromptScorerFactory(
33
+ client=self._client,
34
+ is_trace=True,
35
+ )
36
+
37
+ @property
38
+ def custom_scorer(self):
39
+ from judgeval.v1.scorers.custom_scorer.custom_scorer_factory import (
40
+ CustomScorerFactory,
41
+ )
42
+
43
+ return CustomScorerFactory()
44
+
45
+ @property
46
+ def built_in(self):
47
+ from judgeval.v1.scorers.built_in.built_in_factory import BuiltInScorersFactory
48
+
49
+ return BuiltInScorersFactory()
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.tracer.base_tracer import BaseTracer
4
+ from judgeval.v1.tracer.tracer import Tracer
5
+ from judgeval.v1.tracer.tracer_factory import TracerFactory
6
+
7
+ __all__ = ["BaseTracer", "Tracer", "TracerFactory"]