judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show
  1. judgeval/__init__.py +177 -12
  2. judgeval/api/__init__.py +519 -0
  3. judgeval/api/api_types.py +407 -0
  4. judgeval/cli.py +79 -0
  5. judgeval/constants.py +76 -47
  6. judgeval/data/__init__.py +3 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +15 -56
  9. judgeval/data/judgment_types.py +450 -0
  10. judgeval/data/result.py +29 -73
  11. judgeval/data/scorer_data.py +29 -62
  12. judgeval/data/scripts/fix_default_factory.py +23 -0
  13. judgeval/data/scripts/openapi_transform.py +123 -0
  14. judgeval/data/trace.py +121 -0
  15. judgeval/dataset/__init__.py +264 -0
  16. judgeval/env.py +52 -0
  17. judgeval/evaluation/__init__.py +344 -0
  18. judgeval/exceptions.py +27 -0
  19. judgeval/integrations/langgraph/__init__.py +13 -0
  20. judgeval/integrations/openlit/__init__.py +50 -0
  21. judgeval/judges/__init__.py +2 -3
  22. judgeval/judges/base_judge.py +2 -3
  23. judgeval/judges/litellm_judge.py +100 -20
  24. judgeval/judges/together_judge.py +101 -20
  25. judgeval/judges/utils.py +20 -24
  26. judgeval/logger.py +62 -0
  27. judgeval/prompt/__init__.py +330 -0
  28. judgeval/scorers/__init__.py +18 -25
  29. judgeval/scorers/agent_scorer.py +17 -0
  30. judgeval/scorers/api_scorer.py +45 -41
  31. judgeval/scorers/base_scorer.py +83 -38
  32. judgeval/scorers/example_scorer.py +17 -0
  33. judgeval/scorers/exceptions.py +1 -0
  34. judgeval/scorers/judgeval_scorers/__init__.py +0 -148
  35. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
  36. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
  37. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
  38. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
  40. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
  41. judgeval/scorers/score.py +77 -306
  42. judgeval/scorers/utils.py +4 -199
  43. judgeval/tracer/__init__.py +1122 -2
  44. judgeval/tracer/constants.py +1 -0
  45. judgeval/tracer/exporters/__init__.py +40 -0
  46. judgeval/tracer/exporters/s3.py +119 -0
  47. judgeval/tracer/exporters/store.py +59 -0
  48. judgeval/tracer/exporters/utils.py +32 -0
  49. judgeval/tracer/keys.py +63 -0
  50. judgeval/tracer/llm/__init__.py +7 -0
  51. judgeval/tracer/llm/config.py +78 -0
  52. judgeval/tracer/llm/constants.py +9 -0
  53. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  54. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  55. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  56. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  57. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  58. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  59. judgeval/tracer/llm/llm_google/config.py +6 -0
  60. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  61. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  62. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  63. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  64. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  65. judgeval/tracer/llm/llm_openai/config.py +6 -0
  66. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  67. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  68. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  69. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  70. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  71. judgeval/tracer/llm/llm_together/config.py +6 -0
  72. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  73. judgeval/tracer/llm/providers.py +19 -0
  74. judgeval/tracer/managers.py +167 -0
  75. judgeval/tracer/processors/__init__.py +220 -0
  76. judgeval/tracer/utils.py +19 -0
  77. judgeval/trainer/__init__.py +14 -0
  78. judgeval/trainer/base_trainer.py +122 -0
  79. judgeval/trainer/config.py +128 -0
  80. judgeval/trainer/console.py +144 -0
  81. judgeval/trainer/fireworks_trainer.py +396 -0
  82. judgeval/trainer/trainable_model.py +243 -0
  83. judgeval/trainer/trainer.py +70 -0
  84. judgeval/utils/async_utils.py +39 -0
  85. judgeval/utils/decorators/__init__.py +0 -0
  86. judgeval/utils/decorators/dont_throw.py +37 -0
  87. judgeval/utils/decorators/use_once.py +13 -0
  88. judgeval/utils/file_utils.py +97 -0
  89. judgeval/utils/guards.py +36 -0
  90. judgeval/utils/meta.py +27 -0
  91. judgeval/utils/project.py +15 -0
  92. judgeval/utils/serialize.py +253 -0
  93. judgeval/utils/testing.py +70 -0
  94. judgeval/utils/url.py +10 -0
  95. judgeval/utils/version_check.py +28 -0
  96. judgeval/utils/wrappers/README.md +3 -0
  97. judgeval/utils/wrappers/__init__.py +15 -0
  98. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  99. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  100. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  101. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  102. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  103. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  104. judgeval/utils/wrappers/py.typed +0 -0
  105. judgeval/utils/wrappers/utils.py +35 -0
  106. judgeval/version.py +5 -0
  107. judgeval/warnings.py +4 -0
  108. judgeval-0.22.2.dist-info/METADATA +265 -0
  109. judgeval-0.22.2.dist-info/RECORD +112 -0
  110. judgeval-0.22.2.dist-info/entry_points.txt +2 -0
  111. judgeval/clients.py +0 -39
  112. judgeval/common/__init__.py +0 -8
  113. judgeval/common/exceptions.py +0 -28
  114. judgeval/common/logger.py +0 -189
  115. judgeval/common/tracer.py +0 -798
  116. judgeval/common/utils.py +0 -763
  117. judgeval/data/api_example.py +0 -111
  118. judgeval/data/datasets/__init__.py +0 -5
  119. judgeval/data/datasets/dataset.py +0 -286
  120. judgeval/data/datasets/eval_dataset_client.py +0 -193
  121. judgeval/data/datasets/ground_truth.py +0 -54
  122. judgeval/data/datasets/utils.py +0 -74
  123. judgeval/evaluation_run.py +0 -132
  124. judgeval/judges/mixture_of_judges.py +0 -248
  125. judgeval/judgment_client.py +0 -354
  126. judgeval/run_evaluation.py +0 -439
  127. judgeval/scorers/judgeval_scorer.py +0 -140
  128. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
  129. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
  130. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
  131. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
  132. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
  133. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
  134. judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
  135. judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
  136. judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
  137. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
  138. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
  139. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  140. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
  141. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  142. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  143. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  144. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  145. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  146. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  147. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  148. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  149. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  150. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  151. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  152. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  153. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  154. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  155. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
  156. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  157. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  158. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
  159. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  160. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  161. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  162. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  163. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  164. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
  165. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
  166. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
  167. judgeval/scorers/prompt_scorer.py +0 -439
  168. judgeval-0.0.11.dist-info/METADATA +0 -36
  169. judgeval-0.0.11.dist-info/RECORD +0 -84
  170. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
  171. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,70 @@
1
+ from typing import Optional
2
+ from .config import TrainerConfig
3
+ from .base_trainer import BaseTrainer
4
+ from .fireworks_trainer import FireworksTrainer
5
+ from .trainable_model import TrainableModel
6
+ from judgeval.tracer import Tracer
7
+ from judgeval.exceptions import JudgmentRuntimeError
8
+
9
+
10
+ def JudgmentTrainer(
11
+ config: TrainerConfig,
12
+ trainable_model: TrainableModel,
13
+ tracer: Tracer,
14
+ project_name: Optional[str] = None,
15
+ ) -> BaseTrainer:
16
+ """
17
+ Factory function for creating reinforcement learning trainers.
18
+
19
+ This factory creates and returns provider-specific trainer implementations
20
+ (FireworksTrainer, VerifiersTrainer, etc.) based on the configured RFT provider.
21
+
22
+ The factory pattern allows for easy extension to support multiple training
23
+ providers without changing the client-facing API.
24
+
25
+ Example:
26
+ config = TrainerConfig(
27
+ deployment_id="my-deployment",
28
+ user_id="my-user",
29
+ model_id="my-model",
30
+ rft_provider="fireworks" # or "verifiers" in the future
31
+ )
32
+
33
+ # User creates and configures the trainable model
34
+ trainable_model = TrainableModel(config)
35
+ tracer = Tracer()
36
+
37
+ # JudgmentTrainer automatically creates the appropriate provider-specific trainer
38
+ trainer = JudgmentTrainer(config, trainable_model, tracer)
39
+
40
+ # The returned trainer implements the BaseTrainer interface
41
+ model_config = await trainer.train(agent_function, scorers, prompts)
42
+
43
+ Args:
44
+ config: TrainerConfig instance with training parameters including rft_provider
45
+ trainable_model: Provider-specific trainable model instance (e.g., TrainableModel for Fireworks)
46
+ tracer: Tracer for observability
47
+ project_name: Project name for organizing training runs and evaluations
48
+
49
+ Returns:
50
+ Provider-specific trainer instance (FireworksTrainer, etc.) that implements
51
+ the BaseTrainer interface
52
+
53
+ Raises:
54
+ JudgmentRuntimeError: If the specified provider is not supported
55
+ """
56
+ provider = config.rft_provider.lower()
57
+
58
+ if provider == "fireworks":
59
+ return FireworksTrainer(config, trainable_model, tracer, project_name)
60
+ elif provider == "verifiers":
61
+ # Placeholder for future implementation
62
+ raise JudgmentRuntimeError(
63
+ "Verifiers provider is not yet implemented. "
64
+ "Currently supported providers: 'fireworks'"
65
+ )
66
+ else:
67
+ raise JudgmentRuntimeError(
68
+ f"Unsupported RFT provider: '{config.rft_provider}'. "
69
+ f"Currently supported providers: 'fireworks'"
70
+ )
@@ -0,0 +1,39 @@
1
+ """Async utilities for judgeval."""
2
+
3
+ import asyncio
4
+ import concurrent.futures
5
+ from typing import Awaitable, TypeVar, Coroutine
6
+
7
+
8
+ T = TypeVar("T")
9
+
10
+
11
+ def safe_run_async(coro: Awaitable[T]) -> T:
12
+ """Safely execute an async *coro* from synchronous code.
13
+
14
+ This helper handles two common situations:
15
+
16
+ 1. **No running event loop** - Simply delegates to ``asyncio.run``.
17
+ 2. **Existing running loop** - Executes the coroutine in a separate
18
+ thread so that we don't attempt to nest event loops (which would raise
19
+ ``RuntimeError``).
20
+
21
+ Args:
22
+ coro: The coroutine to execute.
23
+
24
+ Returns:
25
+ The result returned by *coro*.
26
+ """
27
+ if not isinstance(coro, Coroutine):
28
+ raise TypeError("The provided awaitable must be a coroutine.")
29
+
30
+ try:
31
+ asyncio.get_running_loop()
32
+ except RuntimeError:
33
+ return asyncio.run(coro)
34
+
35
+ with concurrent.futures.ThreadPoolExecutor() as executor:
36
+ future: concurrent.futures.Future[T] = executor.submit(
37
+ lambda: asyncio.run(coro)
38
+ )
39
+ return future.result()
File without changes
@@ -0,0 +1,37 @@
1
+ from functools import wraps
2
+ from typing import Any, Callable, ParamSpec, TypeVar, overload
3
+
4
+ from judgeval.logger import judgeval_logger
5
+
6
+ T = TypeVar("T")
7
+ D = TypeVar("D")
8
+ P = ParamSpec("P")
9
+
10
+
11
+ @overload
12
+ def dont_throw(func: Callable[P, T], /) -> Callable[P, T | None]: ...
13
+
14
+
15
+ @overload
16
+ def dont_throw(
17
+ func: None = None, /, *, default: D
18
+ ) -> Callable[[Callable[P, T]], Callable[P, T | D]]: ...
19
+
20
+
21
+ def dont_throw(func: Callable[P, T] | None = None, /, *, default: Any = None):
22
+ def decorator(f: Callable[P, T]) -> Callable[P, T | Any]:
23
+ @wraps(f)
24
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T | Any:
25
+ try:
26
+ return f(*args, **kwargs)
27
+ except Exception as e:
28
+ judgeval_logger.debug(
29
+ f"[Caught] An exception was raised in {f.__name__}", exc_info=e
30
+ )
31
+ return default
32
+
33
+ return wrapper
34
+
35
+ if func is None:
36
+ return decorator
37
+ return decorator(func)
@@ -0,0 +1,13 @@
1
+ from functools import lru_cache, wraps
2
+ from typing import Callable, TypeVar
3
+
4
+ T = TypeVar("T")
5
+
6
+
7
+ def use_once(func: Callable[..., T]) -> Callable[..., T]:
8
+ @lru_cache(maxsize=1)
9
+ @wraps(func)
10
+ def wrapper(*args, **kwargs):
11
+ return func(*args, **kwargs)
12
+
13
+ return wrapper
@@ -0,0 +1,97 @@
1
+ import importlib.util
2
+ import yaml
3
+ import orjson
4
+ from pathlib import Path
5
+ from typing import List
6
+ from judgeval.logger import judgeval_logger
7
+
8
+ from judgeval.data.example import Example
9
+
10
+
11
+ def get_examples_from_yaml(file_path: str) -> List[Example]:
12
+ """
13
+ Adds examples from a YAML file.
14
+
15
+ The YAML file is expected to have the following format:
16
+ - key_01: value_01
17
+ key_02: value_02
18
+ - key_11: value_11
19
+ key_12: value_12
20
+ key_13: value_13
21
+ ...
22
+ """
23
+ try:
24
+ with open(file_path, "r") as file:
25
+ payload = yaml.safe_load(file)
26
+ if payload is None:
27
+ raise ValueError("The YAML file is empty.")
28
+ except FileNotFoundError:
29
+ judgeval_logger.error(f"YAML file not found: {file_path}")
30
+ raise FileNotFoundError(f"The file {file_path} was not found.")
31
+ except yaml.YAMLError:
32
+ judgeval_logger.error(f"Invalid YAML file: {file_path}")
33
+ raise ValueError(f"The file {file_path} is not a valid YAML file.")
34
+
35
+ new_examples = [Example(**e) for e in payload]
36
+ return new_examples
37
+
38
+
39
+ def get_examples_from_json(file_path: str) -> List[Example]:
40
+ """
41
+ Adds examples from a JSON file.
42
+
43
+ The JSON file is expected to have the following format:
44
+ [
45
+ {
46
+ "key_01": "value_01",
47
+ "key_02": "value_02"
48
+ },
49
+ {
50
+ "key_11": "value_11",
51
+ "key_12": "value_12",
52
+ "key_13": "value_13"
53
+ },
54
+ ...
55
+ ]
56
+ """
57
+ try:
58
+ with open(file_path, "rb") as file:
59
+ payload = orjson.loads(file.read())
60
+ except FileNotFoundError:
61
+ judgeval_logger.error(f"JSON file not found: {file_path}")
62
+ raise FileNotFoundError(f"The file {file_path} was not found.")
63
+ except orjson.JSONDecodeError:
64
+ judgeval_logger.error(f"Invalid JSON file: {file_path}")
65
+ raise ValueError(f"The file {file_path} is not a valid JSON file.")
66
+
67
+ new_examples = [Example(**e) for e in payload]
68
+ return new_examples
69
+
70
+
71
+ def extract_scorer_name(scorer_file_path: str) -> str:
72
+ try:
73
+ spec = importlib.util.spec_from_file_location("scorer_module", scorer_file_path)
74
+ if spec is None or spec.loader is None:
75
+ raise ImportError(f"Could not load spec from {scorer_file_path}")
76
+
77
+ module = importlib.util.module_from_spec(spec)
78
+ spec.loader.exec_module(module)
79
+
80
+ for attr_name in dir(module):
81
+ attr = getattr(module, attr_name)
82
+ if (
83
+ isinstance(attr, type)
84
+ and any("Scorer" in str(base) for base in attr.__mro__)
85
+ and attr.__module__ == "scorer_module"
86
+ ):
87
+ try:
88
+ scorer_instance = attr()
89
+ if hasattr(scorer_instance, "name"):
90
+ return scorer_instance.name
91
+ except Exception:
92
+ continue
93
+
94
+ raise AttributeError("No scorer class found or could be instantiated")
95
+ except Exception as e:
96
+ judgeval_logger.warning(f"Could not extract scorer name: {e}")
97
+ return Path(scorer_file_path).stem
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+ from judgeval.logger import judgeval_logger
5
+
6
+ if TYPE_CHECKING:
7
+ from typing import TypeVar
8
+
9
+ T = TypeVar("T")
10
+
11
+
12
+ def expect_exists(value: T | None, message: str, default: T) -> T:
13
+ if not value:
14
+ judgeval_logger.error(message)
15
+ return default
16
+
17
+ return value
18
+
19
+
20
+ def expect_api_key(api_key: str | None) -> str | None:
21
+ return expect_exists(
22
+ api_key,
23
+ "API Key is not set, please set JUDGMENT_API_KEY in the environment variables or pass it as `api_key`",
24
+ default=None,
25
+ )
26
+
27
+
28
+ def expect_organization_id(organization_id: str | None) -> str | None:
29
+ return expect_exists(
30
+ organization_id,
31
+ "Organization ID is not set, please set JUDGMENT_ORG_ID in the environment variables or pass it as `organization_id`",
32
+ default=None,
33
+ )
34
+
35
+
36
+ __all__ = ("expect_exists", "expect_api_key", "expect_organization_id")
judgeval/utils/meta.py ADDED
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+ from typing import TypeVar, Dict, cast, Type
3
+
4
+ T = TypeVar("T")
5
+
6
+
7
+ class SingletonMeta(type):
8
+ """
9
+ Metaclass for creating singleton classes.
10
+ """
11
+
12
+ _instances: Dict[type, object] = {}
13
+
14
+ def __call__(cls, *args, **kwargs):
15
+ if cls not in SingletonMeta._instances:
16
+ SingletonMeta._instances[cls] = super(SingletonMeta, cls).__call__(
17
+ *args, **kwargs
18
+ )
19
+ return SingletonMeta._instances[cls]
20
+
21
+ def get_instance(cls: Type[T]) -> T | None:
22
+ """Get the singleton instance if it exists, otherwise return None"""
23
+ instance = SingletonMeta._instances.get(cls, None)
24
+ return cast(T, instance) if instance is not None else None
25
+
26
+
27
+ __all__ = ("SingletonMeta",)
@@ -0,0 +1,15 @@
1
+ from judgeval.utils.decorators.dont_throw import dont_throw
2
+ import functools
3
+ from judgeval.api import JudgmentSyncClient
4
+
5
+
6
+ @dont_throw
7
+ @functools.lru_cache(maxsize=64)
8
+ def _resolve_project_id(project_name: str, api_key: str, organization_id: str) -> str:
9
+ """Resolve project_id from project_name using the API."""
10
+ client = JudgmentSyncClient(
11
+ api_key=api_key,
12
+ organization_id=organization_id,
13
+ )
14
+ response = client.projects_resolve({"project_name": project_name})
15
+ return response["project_id"]
@@ -0,0 +1,253 @@
1
+ """
2
+
3
+ This is a modified version of https://docs.powertools.aws.dev/lambda/python/2.35.1/api/event_handler/openapi/encoders.html
4
+
5
+ """
6
+
7
+ import dataclasses
8
+ import datetime
9
+ from collections import defaultdict, deque
10
+ from decimal import Decimal
11
+ from enum import Enum
12
+ from pathlib import Path, PurePath
13
+ from re import Pattern
14
+ from types import GeneratorType
15
+ from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, Union
16
+ from uuid import UUID
17
+
18
+ from pydantic import BaseModel
19
+ from pydantic.types import SecretBytes, SecretStr
20
+ import orjson
21
+
22
+ from judgeval.logger import judgeval_logger
23
+
24
+
25
+ """
26
+ This module contains the encoders used by jsonable_encoder to convert Python objects to JSON serializable data types.
27
+ """
28
+
29
+
30
+ def _model_dump(
31
+ model: BaseModel, mode: Literal["json", "python"] = "json", **kwargs: Any
32
+ ) -> Any:
33
+ return model.model_dump(mode=mode, **kwargs)
34
+
35
+
36
+ def json_encoder(
37
+ obj: Any,
38
+ custom_serializer: Optional[Callable[[Any], str]] = None,
39
+ ) -> Any:
40
+ """
41
+ JSON encodes an arbitrary Python object into JSON serializable data types.
42
+
43
+ This is a modified version of fastapi.encoders.jsonable_encoder that supports
44
+ encoding of pydantic.BaseModel objects.
45
+
46
+ Parameters
47
+ ----------
48
+ obj : Any
49
+ The object to encode
50
+ custom_serializer : Callable, optional
51
+ A custom serializer to use for encoding the object, when everything else fails.
52
+
53
+ Returns
54
+ -------
55
+ Any
56
+ The JSON serializable data types
57
+ """
58
+ # Pydantic models
59
+ if isinstance(obj, BaseModel):
60
+ return _dump_base_model(
61
+ obj=obj,
62
+ )
63
+
64
+ # Dataclasses
65
+ if dataclasses.is_dataclass(obj):
66
+ obj_dict = dataclasses.asdict(obj) # type: ignore[arg-type]
67
+ return json_encoder(
68
+ obj_dict,
69
+ )
70
+
71
+ # Enums
72
+ if isinstance(obj, Enum):
73
+ return obj.value
74
+
75
+ # Paths
76
+ if isinstance(obj, PurePath):
77
+ return str(obj)
78
+
79
+ # Scalars
80
+ if isinstance(obj, (str, int, float, type(None))):
81
+ return obj
82
+
83
+ # Dictionaries
84
+ if isinstance(obj, dict):
85
+ return _dump_dict(
86
+ obj=obj,
87
+ )
88
+
89
+ # Sequences
90
+ if isinstance(obj, (list, set, frozenset, tuple, deque)):
91
+ return _dump_sequence(
92
+ obj=obj,
93
+ )
94
+
95
+ # Other types
96
+ if type(obj) in ENCODERS_BY_TYPE:
97
+ return ENCODERS_BY_TYPE[type(obj)](obj)
98
+
99
+ for encoder, classes_tuple in encoders_by_class_tuples.items():
100
+ if isinstance(obj, classes_tuple):
101
+ return encoder(obj)
102
+
103
+ # Use custom serializer if present
104
+ if custom_serializer:
105
+ return custom_serializer(obj)
106
+
107
+ # Default
108
+ return _dump_other(
109
+ obj=obj,
110
+ )
111
+
112
+
113
+ def _dump_base_model(
114
+ *,
115
+ obj: Any,
116
+ ):
117
+ """
118
+ Dump a BaseModel object to a dict, using the same parameters as jsonable_encoder
119
+ """
120
+ obj_dict = _model_dump(
121
+ obj,
122
+ mode="json",
123
+ )
124
+ if "__root__" in obj_dict:
125
+ obj_dict = obj_dict["__root__"]
126
+
127
+ return json_encoder(
128
+ obj_dict,
129
+ )
130
+
131
+
132
+ def _dump_dict(
133
+ *,
134
+ obj: Any,
135
+ ) -> Dict[str, Any]:
136
+ """
137
+ Dump a dict to a dict, using the same parameters as jsonable_encoder
138
+ """
139
+ encoded_dict = {}
140
+ allowed_keys = set(obj.keys())
141
+ for key, value in obj.items():
142
+ if key in allowed_keys:
143
+ encoded_key = json_encoder(
144
+ key,
145
+ )
146
+ encoded_value = json_encoder(
147
+ value,
148
+ )
149
+ encoded_dict[encoded_key] = encoded_value
150
+ return encoded_dict
151
+
152
+
153
+ def _dump_sequence(
154
+ *,
155
+ obj: Any,
156
+ ) -> List[Any]:
157
+ """
158
+ Dump a sequence to a list, using the same parameters as jsonable_encoder
159
+ """
160
+ encoded_list = []
161
+ for item in obj:
162
+ encoded_list.append(
163
+ json_encoder(
164
+ item,
165
+ ),
166
+ )
167
+ return encoded_list
168
+
169
+
170
+ def _dump_other(
171
+ *,
172
+ obj: Any,
173
+ ) -> Any:
174
+ """
175
+ Dump an object to a representation without iterating it.
176
+
177
+ Avoids calling dict(obj) which can consume iterators/generators or
178
+ invoke user-defined iteration protocols.
179
+ """
180
+ try:
181
+ return repr(obj)
182
+ except Exception:
183
+ return str(obj)
184
+
185
+
186
+ def iso_format(o: Union[datetime.date, datetime.time]) -> str:
187
+ """
188
+ ISO format for date and time
189
+ """
190
+ return o.isoformat()
191
+
192
+
193
+ def decimal_encoder(dec_value: Decimal) -> Union[int, float]:
194
+ """
195
+ Encodes a Decimal as int of there's no exponent, otherwise float
196
+
197
+ This is useful when we use ConstrainedDecimal to represent Numeric(x,0)
198
+ where an integer (but not int typed) is used. Encoding this as a float
199
+ results in failed round-tripping between encode and parse.
200
+
201
+ >>> decimal_encoder(Decimal("1.0"))
202
+ 1.0
203
+
204
+ >>> decimal_encoder(Decimal("1"))
205
+ 1
206
+ """
207
+ if dec_value.as_tuple().exponent >= 0: # type: ignore[operator]
208
+ return int(dec_value)
209
+ else:
210
+ return float(dec_value)
211
+
212
+
213
+ ENCODERS_BY_TYPE: Dict[Type[Any], Callable[[Any], Any]] = {
214
+ bytes: lambda o: o.decode(),
215
+ datetime.date: iso_format,
216
+ datetime.datetime: iso_format,
217
+ datetime.time: iso_format,
218
+ datetime.timedelta: lambda td: td.total_seconds(),
219
+ Decimal: decimal_encoder,
220
+ Enum: lambda o: o.value,
221
+ frozenset: list,
222
+ deque: list,
223
+ GeneratorType: repr,
224
+ Path: str,
225
+ Pattern: lambda o: o.pattern,
226
+ SecretBytes: str,
227
+ SecretStr: str,
228
+ set: list,
229
+ UUID: str,
230
+ }
231
+
232
+
233
+ # Generates a mapping of encoders to a tuple of classes that they can encode
234
+ def generate_encoders_by_class_tuples(
235
+ type_encoder_map: Dict[Any, Callable[[Any], Any]],
236
+ ) -> Dict[Callable[[Any], Any], Tuple[Any, ...]]:
237
+ encoders: Dict[Callable[[Any], Any], Tuple[Any, ...]] = defaultdict(tuple)
238
+ for type_, encoder in type_encoder_map.items():
239
+ encoders[encoder] += (type_,)
240
+ return encoders
241
+
242
+
243
+ # Mapping of encoders to a tuple of classes that they can encode
244
+ encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
245
+
246
+
247
+ # Seralize arbitrary object to a json string
248
+ def safe_serialize(obj: Any) -> str:
249
+ try:
250
+ return orjson.dumps(json_encoder(obj), option=orjson.OPT_NON_STR_KEYS).decode()
251
+ except Exception as e:
252
+ judgeval_logger.warning(f"Error serializing object: {e}")
253
+ return repr(obj)
@@ -0,0 +1,70 @@
1
+ from rich import print as rprint
2
+
3
+ from typing import List
4
+ from judgeval.evaluation import ScoringResult
5
+ from judgeval.data import ScorerData
6
+ from judgeval.exceptions import JudgmentTestError
7
+
8
+
9
+ def assert_test_results(scoring_results: List[ScoringResult]) -> None:
10
+ failed_cases: List[List[ScorerData]] = []
11
+ for result in scoring_results:
12
+ if not result.success:
13
+ test_case = []
14
+ if result.scorers_data:
15
+ for scorer_data in result.scorers_data:
16
+ if not scorer_data.success:
17
+ test_case.append(scorer_data)
18
+ failed_cases.append(test_case)
19
+
20
+ if failed_cases:
21
+ error_msg = "The following test cases failed: \n"
22
+ for fail_case in failed_cases:
23
+ for fail_scorer in fail_case:
24
+ error_msg += (
25
+ f"\nScorer Name: {fail_scorer.name}\n"
26
+ f"Threshold: {fail_scorer.threshold}\n"
27
+ f"Success: {fail_scorer.success}\n"
28
+ f"Score: {fail_scorer.score}\n"
29
+ f"Reason: {fail_scorer.reason}\n"
30
+ f"Strict Mode: {fail_scorer.strict_mode}\n"
31
+ f"Evaluation Model: {fail_scorer.evaluation_model}\n"
32
+ f"Error: {fail_scorer.error}\n"
33
+ f"Additional Metadata: {fail_scorer.additional_metadata}\n"
34
+ )
35
+ error_msg += "-" * 100
36
+
37
+ total_tests = len(scoring_results)
38
+ failed_tests = len(failed_cases)
39
+ passed_tests = total_tests - failed_tests
40
+
41
+ rprint("\n" + "=" * 80)
42
+ if failed_tests == 0:
43
+ rprint(
44
+ f"[bold green]🎉 ALL TESTS PASSED! {passed_tests}/{total_tests} tests successful[/bold green]"
45
+ )
46
+ else:
47
+ rprint(
48
+ f"[bold red]⚠️ TEST RESULTS: {passed_tests}/{total_tests} passed ({failed_tests} failed)[/bold red]"
49
+ )
50
+ rprint("=" * 80 + "\n")
51
+
52
+ for i, result in enumerate(scoring_results):
53
+ test_num = i + 1
54
+ if result.success:
55
+ rprint(f"[green]✓ Test {test_num}: PASSED[/green]")
56
+ else:
57
+ rprint(f"[red]✗ Test {test_num}: FAILED[/red]")
58
+ if result.scorers_data:
59
+ for scorer_data in result.scorers_data:
60
+ if not scorer_data.success:
61
+ rprint(f" [yellow]Scorer: {scorer_data.name}[/yellow]")
62
+ rprint(f" [red] Score: {scorer_data.score}[/red]")
63
+ rprint(f" [red] Reason: {scorer_data.reason}[/red]")
64
+ if scorer_data.error:
65
+ rprint(f" [red] Error: {scorer_data.error}[/red]")
66
+ rprint(" " + "-" * 40)
67
+
68
+ rprint("\n" + "=" * 80)
69
+ if failed_tests > 0:
70
+ raise JudgmentTestError(failed_cases)
judgeval/utils/url.py ADDED
@@ -0,0 +1,10 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from judgeval.env import JUDGMENT_API_URL
4
+
5
+
6
+ def url_for(path: str, base: str = JUDGMENT_API_URL) -> str:
7
+ return urljoin(base, path)
8
+
9
+
10
+ __all__ = ("url_for",)