judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,21 +1,17 @@
1
- from judgeval.scorers.base_scorer import BaseScorer
2
- from judgeval.data import Trace
3
- from typing import List, Optional
4
- from abc import abstractmethod
1
+ # from judgeval.scorers.base_scorer import BaseScorer
2
+ # from judgeval.data.judgment_types import Trace as JudgmentTrace
3
+ # from typing import List, Optional
4
+ # from abc import abstractmethod
5
5
 
6
- from judgeval.common.logger import warning, error
7
6
 
8
-
9
- class AgentScorer(BaseScorer):
10
- @abstractmethod
11
- async def a_score_trace(
12
- self, trace: Trace, tools: Optional[List] = None, *args, **kwargs
13
- ) -> float:
14
- """
15
- Asynchronously measures the score on a trace
16
- """
17
- warning("Attempting to call unimplemented a_score_trace method")
18
- error("a_score_trace method not implemented")
19
- raise NotImplementedError(
20
- "You must implement the `a_score_trace` method in your custom scorer"
21
- )
7
+ # class TraceScorer(BaseScorer):
8
+ # @abstractmethod
9
+ # async def a_score_trace(
10
+ # self, trace: JudgmentTrace, tools: Optional[List] = None, *args, **kwargs
11
+ # ) -> float:
12
+ # """
13
+ # Asynchronously measures the score on a trace
14
+ # """
15
+ # raise NotImplementedError(
16
+ # "You must implement the `a_score_trace` method in your custom scorer"
17
+ # )
@@ -4,11 +4,13 @@ Judgment Scorer class.
4
4
  Scores `Example`s using ready-made Judgment evaluators.
5
5
  """
6
6
 
7
+ from __future__ import annotations
8
+
7
9
  from pydantic import BaseModel, field_validator
8
10
  from typing import List
9
- from judgeval.data import ExampleParams
10
- from judgeval.constants import APIScorerType, UNBOUNDED_SCORERS
11
- from judgeval.common.logger import judgeval_logger
11
+ from judgeval.constants import APIScorerType
12
+ from judgeval.data.example import ExampleParams
13
+ from judgeval.env import JUDGMENT_DEFAULT_GPT_MODEL
12
14
 
13
15
 
14
16
  class APIScorerConfig(BaseModel):
@@ -28,9 +30,10 @@ class APIScorerConfig(BaseModel):
28
30
  name: str = ""
29
31
  threshold: float = 0.5
30
32
  strict_mode: bool = False
31
- required_params: List[
32
- ExampleParams
33
- ] = [] # This is used to check if the example has the required parameters before running the scorer
33
+ model: str = JUDGMENT_DEFAULT_GPT_MODEL
34
+
35
+ required_params: List[ExampleParams] = []
36
+
34
37
  kwargs: dict = {}
35
38
 
36
39
  @field_validator("threshold")
@@ -40,31 +43,26 @@ class APIScorerConfig(BaseModel):
40
43
  Validates that the threshold is between 0 and 1 inclusive.
41
44
  """
42
45
  score_type = info.data.get("score_type")
43
- if score_type in UNBOUNDED_SCORERS:
44
- if v < 0:
45
- judgeval_logger.error(
46
- f"Threshold for {score_type} must be greater than 0, got: {v}"
47
- )
48
- raise ValueError(
49
- f"Threshold for {score_type} must be greater than 0, got: {v}"
50
- )
51
- else:
52
- if not 0 <= v <= 1:
53
- judgeval_logger.error(
54
- f"Threshold for {score_type} must be between 0 and 1, got: {v}"
55
- )
56
- raise ValueError(
57
- f"Threshold for {score_type} must be between 0 and 1, got: {v}"
58
- )
46
+ if not 0 <= v <= 1:
47
+ raise ValueError(
48
+ f"Threshold for {score_type} must be between 0 and 1, got: {v}"
49
+ )
59
50
  return v
60
51
 
61
52
  @field_validator("name", mode="after")
62
53
  @classmethod
63
54
  def set_name_to_score_type_if_none(cls, v, info):
64
- """Set name to score_type if not provided"""
65
55
  if v is None:
66
56
  return info.data.get("score_type")
67
57
  return v
68
58
 
69
59
  def __str__(self):
70
60
  return f"JudgmentScorer(score_type={self.score_type.value}, threshold={self.threshold})"
61
+
62
+
63
+ class ExampleAPIScorerConfig(APIScorerConfig):
64
+ pass
65
+
66
+
67
+ class TraceAPIScorerConfig(APIScorerConfig):
68
+ pass
@@ -2,6 +2,7 @@
2
2
  Base class for all scorers.
3
3
  """
4
4
 
5
+ from __future__ import annotations
5
6
  from typing import Dict, Optional
6
7
 
7
8
  from pydantic import BaseModel
@@ -19,45 +20,63 @@ class BaseScorer(BaseModel):
19
20
  where none of Judgment's scorers are suitable.
20
21
  """
21
22
 
22
- score_type: str # type of your scorer (Faithfulness, PromptScorer)
23
- threshold: float = (
24
- 0.5 # The threshold to pass a test while using this scorer as a scorer
25
- )
26
- name: Optional[str] = (
27
- None # name of your scorer (Faithfulness, PromptScorer-randomslug)
28
- )
29
- score: Optional[float] = None # The float score of the scorer run on the test case
23
+ # type of your scorer (Faithfulness, PromptScorer)
24
+ score_type: str
25
+
26
+ # The threshold to pass a test while using this scorer as a scorer
27
+ threshold: float = 0.5
28
+
29
+ # name of your scorer (Faithfulness, PromptScorer-randomslug)
30
+ name: str = ""
31
+
32
+ # The name of the class of the scorer
33
+ class_name: Optional[str] = None
34
+
35
+ # The float score of the scorer run on the test case
36
+ score: Optional[float] = None
37
+
30
38
  score_breakdown: Optional[Dict] = None
31
39
  reason: Optional[str] = ""
32
- using_native_model: Optional[bool] = None # Whether the model is a native model
33
- success: Optional[bool] = None # Whether the test case passed or failed
34
- model: Optional[str] = None # The name of the model used to evaluate the test case
35
- model_client: Optional[Any] = Field(
36
- default=None, exclude=True
37
- ) # The model used to evaluate the test case
38
- strict_mode: bool = False # Whether to run the scorer in strict mode
39
- error: Optional[str] = None # The error message if the scorer failed
40
- additional_metadata: Optional[Dict] = None # Additional metadata for the scorer
41
- user: Optional[str] = None # The user ID of the scorer
42
-
43
- @model_validator(mode="before")
44
- @classmethod
45
- def enforce_strict_threshold(cls, data: dict):
46
- if data.get("strict_mode"):
47
- data["threshold"] = 1.0
48
- return data
40
+
41
+ # Whether the model is a native model
42
+ using_native_model: Optional[bool] = None
43
+
44
+ # Whether the test case passed or failed
45
+ success: bool = False
46
+
47
+ # The name of the model used to evaluate the test case
48
+ model: Optional[str] = None
49
+
50
+ # The model used to evaluate the test case
51
+ model_client: Optional[Any] = Field(default=None, exclude=True)
52
+
53
+ # Whether to run the scorer in strict mode
54
+ strict_mode: bool = False
55
+
56
+ # The error message if the scorer failed
57
+ error: Optional[str] = None
58
+
59
+ # Additional metadata for the scorer
60
+ additional_metadata: Optional[Dict] = None
61
+
62
+ # The user ID of the scorer
63
+ user: Optional[str] = None
64
+
65
+ # Whether the scorer is hosted on the server
66
+ server_hosted: bool = False
67
+
68
+ @model_validator(mode="after")
69
+ def enforce_strict_threshold(self):
70
+ if self.strict_mode:
71
+ self.threshold = 1.0
72
+ return self
49
73
 
50
74
  @model_validator(mode="after")
51
- @classmethod
52
- def default_name(cls, m: "BaseScorer") -> "BaseScorer":
53
- if not m.name:
54
- # Try to use the class name if it exists and is not empty
55
- class_name = getattr(m, "__class__", None)
56
- if class_name and getattr(m.__class__, "__name__", None):
57
- m.name = m.__class__.__name__
58
- else:
59
- m.name = m.score_type
60
- return m
75
+ def default_name(self):
76
+ self.class_name = self.__class__.__name__
77
+ if not self.name:
78
+ self.name = self.class_name
79
+ return self
61
80
 
62
81
  def _add_model(self, model: str):
63
82
  """
@@ -66,7 +85,6 @@ class BaseScorer(BaseModel):
66
85
  This method is used at eval time
67
86
  """
68
87
  self.model_client, self.using_native_model = create_judge(model)
69
- self.model = self.model_client.get_model_name() or model
70
88
 
71
89
  def success_check(self) -> bool:
72
90
  """
@@ -2,18 +2,16 @@ from judgeval.scorers.base_scorer import BaseScorer
2
2
  from judgeval.data import Example
3
3
  from typing import List
4
4
  from pydantic import Field
5
- from judgeval.common.logger import judgeval_logger
6
5
 
7
6
 
8
7
  class ExampleScorer(BaseScorer):
9
- score_type: str = "Custom" # default to custom score type
8
+ score_type: str = "Custom"
10
9
  required_params: List[str] = Field(default_factory=list)
11
10
 
12
11
  async def a_score_example(self, example: Example, *args, **kwargs) -> float:
13
12
  """
14
13
  Asynchronously measures the score on a single example
15
14
  """
16
- judgeval_logger.error("a_score_example method not implemented")
17
15
  raise NotImplementedError(
18
16
  "You must implement the `a_score_example` method in your custom scorer"
19
17
  )
@@ -1,9 +1,3 @@
1
- from judgeval.scorers.judgeval_scorers.api_scorers.execution_order import (
2
- ExecutionOrderScorer,
3
- )
4
- from judgeval.scorers.judgeval_scorers.api_scorers.hallucination import (
5
- HallucinationScorer,
6
- )
7
1
  from judgeval.scorers.judgeval_scorers.api_scorers.faithfulness import (
8
2
  FaithfulnessScorer,
9
3
  )
@@ -16,32 +10,16 @@ from judgeval.scorers.judgeval_scorers.api_scorers.answer_correctness import (
16
10
  from judgeval.scorers.judgeval_scorers.api_scorers.instruction_adherence import (
17
11
  InstructionAdherenceScorer,
18
12
  )
19
- from judgeval.scorers.judgeval_scorers.api_scorers.derailment_scorer import (
20
- DerailmentScorer,
21
- )
22
- from judgeval.scorers.judgeval_scorers.api_scorers.tool_order import ToolOrderScorer
23
13
  from judgeval.scorers.judgeval_scorers.api_scorers.prompt_scorer import (
14
+ TracePromptScorer,
24
15
  PromptScorer,
25
16
  )
26
- from judgeval.scorers.judgeval_scorers.api_scorers.tool_dependency import (
27
- ToolDependencyScorer,
28
- )
29
17
 
30
18
  __all__ = [
31
- "ExecutionOrderScorer",
32
- "JSONCorrectnessScorer",
33
- "SummarizationScorer",
34
- "HallucinationScorer",
35
19
  "FaithfulnessScorer",
36
- "ContextualRelevancyScorer",
37
- "ContextualPrecisionScorer",
38
- "ContextualRecallScorer",
39
20
  "AnswerRelevancyScorer",
40
21
  "AnswerCorrectnessScorer",
41
22
  "InstructionAdherenceScorer",
42
- "GroundednessScorer",
43
- "DerailmentScorer",
44
- "ToolOrderScorer",
23
+ "TracePromptScorer",
45
24
  "PromptScorer",
46
- "ToolDependencyScorer",
47
25
  ]
@@ -1,18 +1,10 @@
1
- """
2
- `judgeval` answer relevancy scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIScorerConfig
1
+ from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
10
2
  from judgeval.constants import APIScorerType
11
3
  from judgeval.data import ExampleParams
12
4
  from typing import List
13
5
 
14
6
 
15
- class AnswerCorrectnessScorer(APIScorerConfig):
7
+ class AnswerCorrectnessScorer(ExampleAPIScorerConfig):
16
8
  score_type: APIScorerType = APIScorerType.ANSWER_CORRECTNESS
17
9
  required_params: List[ExampleParams] = [
18
10
  ExampleParams.INPUT,
@@ -1,10 +1,10 @@
1
- from judgeval.scorers.api_scorer import APIScorerConfig
1
+ from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
2
2
  from judgeval.constants import APIScorerType
3
3
  from judgeval.data import ExampleParams
4
4
  from typing import List
5
5
 
6
6
 
7
- class AnswerRelevancyScorer(APIScorerConfig):
7
+ class AnswerRelevancyScorer(ExampleAPIScorerConfig):
8
8
  score_type: APIScorerType = APIScorerType.ANSWER_RELEVANCY
9
9
  required_params: List[ExampleParams] = [
10
10
  ExampleParams.INPUT,
@@ -1,18 +1,10 @@
1
- """
2
- `judgeval` faithfulness scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIScorerConfig
1
+ from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
10
2
  from judgeval.constants import APIScorerType
11
3
  from judgeval.data import ExampleParams
12
4
  from typing import List
13
5
 
14
6
 
15
- class FaithfulnessScorer(APIScorerConfig):
7
+ class FaithfulnessScorer(ExampleAPIScorerConfig):
16
8
  score_type: APIScorerType = APIScorerType.FAITHFULNESS
17
9
  required_params: List[ExampleParams] = [
18
10
  ExampleParams.INPUT,
@@ -1,17 +1,9 @@
1
- """
2
- `judgeval` instruction adherence scorer
3
-
4
- TODO add link to docs page for this scorer
5
-
6
- """
7
-
8
- # Internal imports
9
- from judgeval.scorers.api_scorer import APIScorerConfig
1
+ from judgeval.scorers.api_scorer import ExampleAPIScorerConfig
10
2
  from judgeval.constants import APIScorerType
11
3
  from judgeval.data import ExampleParams
12
4
 
13
5
 
14
- class InstructionAdherenceScorer(APIScorerConfig):
6
+ class InstructionAdherenceScorer(ExampleAPIScorerConfig):
15
7
  def __init__(self, threshold: float):
16
8
  super().__init__(
17
9
  threshold=threshold,
@@ -21,7 +13,3 @@ class InstructionAdherenceScorer(APIScorerConfig):
21
13
  ExampleParams.ACTUAL_OUTPUT,
22
14
  ],
23
15
  )
24
-
25
- @property
26
- def __name__(self):
27
- return "Instruction Adherence"