judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/utils/url.py ADDED
@@ -0,0 +1,10 @@
1
+ from urllib.parse import urljoin
2
+
3
+ from judgeval.env import JUDGMENT_API_URL
4
+
5
+
6
+ def url_for(path: str, base: str = JUDGMENT_API_URL) -> str:
7
+ return urljoin(base, path)
8
+
9
+
10
+ __all__ = ("url_for",)
@@ -1,14 +1,16 @@
1
1
  import importlib.metadata
2
- from judgeval.utils.requests import requests
2
+ import httpx
3
3
  import threading
4
- from judgeval.common.logger import judgeval_logger
4
+ from judgeval.logger import judgeval_logger
5
+ from judgeval.utils.decorators.use_once import use_once
5
6
 
6
7
 
8
+ @use_once
7
9
  def check_latest_version(package_name: str = "judgeval"):
8
10
  def _check():
9
11
  try:
10
12
  current_version = importlib.metadata.version(package_name)
11
- response = requests.get(
13
+ response = httpx.get(
12
14
  f"https://pypi.org/pypi/{package_name}/json", timeout=2
13
15
  )
14
16
  latest_version = response.json()["info"]["version"]
@@ -0,0 +1,3 @@
1
+ # Wrapper Utilities
2
+
3
+ Ensure 100% test coverage for all files in this folder
@@ -0,0 +1,15 @@
1
+ from .immutable_wrap_sync import immutable_wrap_sync
2
+ from .immutable_wrap_async import immutable_wrap_async
3
+ from .immutable_wrap_sync_iterator import immutable_wrap_sync_iterator
4
+ from .immutable_wrap_async_iterator import immutable_wrap_async_iterator
5
+ from .mutable_wrap_sync import mutable_wrap_sync
6
+ from .mutable_wrap_async import mutable_wrap_async
7
+
8
+ __all__ = [
9
+ "immutable_wrap_sync",
10
+ "immutable_wrap_async",
11
+ "immutable_wrap_sync_iterator",
12
+ "immutable_wrap_async_iterator",
13
+ "mutable_wrap_sync",
14
+ "mutable_wrap_async",
15
+ ]
@@ -0,0 +1,74 @@
1
+ from functools import wraps
2
+ from typing import (
3
+ Awaitable,
4
+ Callable,
5
+ TypeVar,
6
+ Any,
7
+ Dict,
8
+ ParamSpec,
9
+ Concatenate,
10
+ )
11
+
12
+ from judgeval.utils.decorators.dont_throw import dont_throw
13
+
14
+ P = ParamSpec("P")
15
+ R = TypeVar("R")
16
+ Ctx = Dict[str, Any]
17
+
18
+
19
+ def _void_pre_hook(ctx: Ctx, *args: Any, **kwargs: Any) -> None:
20
+ pass
21
+
22
+
23
+ def _void_post_hook(ctx: Ctx, result: Any) -> None:
24
+ pass
25
+
26
+
27
+ def _void_error_hook(ctx: Ctx, error: Exception) -> None:
28
+ pass
29
+
30
+
31
+ def _void_finally_hook(ctx: Ctx) -> None:
32
+ pass
33
+
34
+
35
+ def immutable_wrap_async(
36
+ func: Callable[P, Awaitable[R]],
37
+ /,
38
+ *,
39
+ pre_hook: Callable[Concatenate[Ctx, P], None] = _void_pre_hook,
40
+ post_hook: Callable[[Ctx, R], None] = _void_post_hook,
41
+ error_hook: Callable[[Ctx, Exception], None] = _void_error_hook,
42
+ finally_hook: Callable[[Ctx], None] = _void_finally_hook,
43
+ ) -> Callable[P, Awaitable[R]]:
44
+ """
45
+ Wraps an async function with lifecycle hooks.
46
+
47
+ - pre_hook: called before func with (ctx, *args, **kwargs) matching func's signature
48
+ - post_hook: called after successful func execution with (ctx, result)
49
+ - error_hook: called if func raises an exception with (ctx, error)
50
+ - finally_hook: called in finally block with (ctx)
51
+
52
+ The wrapped function's result is returned unchanged, and exceptions are re-raised.
53
+ """
54
+
55
+ pre_hook = dont_throw(pre_hook)
56
+ post_hook = dont_throw(post_hook)
57
+ error_hook = dont_throw(error_hook)
58
+ finally_hook = dont_throw(finally_hook)
59
+
60
+ @wraps(func)
61
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
62
+ ctx: Ctx = {}
63
+ pre_hook(ctx, *args, **kwargs)
64
+ try:
65
+ result = await func(*args, **kwargs)
66
+ post_hook(ctx, result)
67
+ return result
68
+ except Exception as e:
69
+ error_hook(ctx, e)
70
+ raise
71
+ finally:
72
+ finally_hook(ctx)
73
+
74
+ return wrapper
@@ -0,0 +1,84 @@
1
+ from functools import wraps
2
+ from typing import (
3
+ Callable,
4
+ TypeVar,
5
+ Any,
6
+ Dict,
7
+ Mapping,
8
+ ParamSpec,
9
+ AsyncIterator,
10
+ Concatenate,
11
+ )
12
+
13
+ from judgeval.utils.decorators.dont_throw import dont_throw
14
+
15
+ P = ParamSpec("P")
16
+ Y = TypeVar("Y")
17
+ Ctx = Dict[str, Any]
18
+ ImmCtx = Mapping[str, Any]
19
+
20
+
21
+ def _void_pre_hook(ctx: Ctx, *args: Any, **kwargs: Any) -> None:
22
+ pass
23
+
24
+
25
+ def _void_yield_hook(ctx: Ctx, value: Any) -> None:
26
+ pass
27
+
28
+
29
+ def _void_post_hook(ctx: Ctx) -> None:
30
+ pass
31
+
32
+
33
+ def _void_error_hook(ctx: Ctx, error: Exception) -> None:
34
+ pass
35
+
36
+
37
+ def _void_finally_hook(ctx: Ctx) -> None:
38
+ pass
39
+
40
+
41
+ def immutable_wrap_async_iterator(
42
+ func: Callable[P, AsyncIterator[Y]],
43
+ /,
44
+ *,
45
+ pre_hook: Callable[Concatenate[Ctx, P], None] = _void_pre_hook,
46
+ yield_hook: Callable[[Ctx, Y], None] = _void_yield_hook,
47
+ post_hook: Callable[[Ctx], None] = _void_post_hook,
48
+ error_hook: Callable[[Ctx, Exception], None] = _void_error_hook,
49
+ finally_hook: Callable[[Ctx], None] = _void_finally_hook,
50
+ ) -> Callable[P, AsyncIterator[Y]]:
51
+ """
52
+ Wraps an async iterator function with lifecycle hooks.
53
+
54
+ - pre_hook: called when iterator function is invoked with (ctx, *args, **kwargs) matching func's signature
55
+ - yield_hook: called after each yield with (ctx, yielded_value)
56
+ - post_hook: called when iterator completes successfully with (ctx)
57
+ - error_hook: called if iterator raises an exception with (ctx, error)
58
+ - finally_hook: called when iterator closes with (ctx)
59
+
60
+ The wrapped iterator yields values unchanged, and exceptions are re-raised.
61
+ """
62
+
63
+ pre_hook = dont_throw(pre_hook)
64
+ yield_hook = dont_throw(yield_hook)
65
+ post_hook = dont_throw(post_hook)
66
+ error_hook = dont_throw(error_hook)
67
+ finally_hook = dont_throw(finally_hook)
68
+
69
+ @wraps(func)
70
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> AsyncIterator[Y]:
71
+ ctx: Ctx = {}
72
+ pre_hook(ctx, *args, **kwargs)
73
+ try:
74
+ async for value in func(*args, **kwargs):
75
+ yield_hook(ctx, value)
76
+ yield value
77
+ post_hook(ctx)
78
+ except Exception as e:
79
+ error_hook(ctx, e)
80
+ raise
81
+ finally:
82
+ finally_hook(ctx)
83
+
84
+ return wrapper
@@ -0,0 +1,66 @@
1
+ from functools import wraps
2
+ from typing import Callable, TypeVar, Any, Dict, ParamSpec, Concatenate
3
+
4
+ from judgeval.utils.decorators.dont_throw import dont_throw
5
+
6
+ P = ParamSpec("P")
7
+ R = TypeVar("R")
8
+ Ctx = Dict[str, Any]
9
+
10
+
11
+ def _void_pre_hook(ctx: Ctx, *args: Any, **kwargs: Any) -> None:
12
+ pass
13
+
14
+
15
+ def _void_post_hook(ctx: Ctx, result: Any) -> None:
16
+ pass
17
+
18
+
19
+ def _void_error_hook(ctx: Ctx, error: Exception) -> None:
20
+ pass
21
+
22
+
23
+ def _void_finally_hook(ctx: Ctx) -> None:
24
+ pass
25
+
26
+
27
+ def immutable_wrap_sync(
28
+ func: Callable[P, R],
29
+ /,
30
+ *,
31
+ pre_hook: Callable[Concatenate[Ctx, P], None] = _void_pre_hook,
32
+ post_hook: Callable[[Ctx, R], None] = _void_post_hook,
33
+ error_hook: Callable[[Ctx, Exception], None] = _void_error_hook,
34
+ finally_hook: Callable[[Ctx], None] = _void_finally_hook,
35
+ ) -> Callable[P, R]:
36
+ """
37
+ Wraps a function with lifecycle hooks.
38
+
39
+ - pre_hook: called before func with (ctx, *args, **kwargs) matching func's signature
40
+ - post_hook: called after successful func execution with (ctx, result)
41
+ - error_hook: called if func raises an exception with (ctx, error)
42
+ - finally_hook: called in finally block with (ctx)
43
+
44
+ The wrapped function's result is returned unchanged, and exceptions are re-raised.
45
+ """
46
+
47
+ pre_hook = dont_throw(pre_hook)
48
+ post_hook = dont_throw(post_hook)
49
+ error_hook = dont_throw(error_hook)
50
+ finally_hook = dont_throw(finally_hook)
51
+
52
+ @wraps(func)
53
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
54
+ ctx: Ctx = {}
55
+ pre_hook(ctx, *args, **kwargs)
56
+ try:
57
+ result = func(*args, **kwargs)
58
+ post_hook(ctx, result)
59
+ return result
60
+ except Exception as e:
61
+ error_hook(ctx, e)
62
+ raise
63
+ finally:
64
+ finally_hook(ctx)
65
+
66
+ return wrapper
@@ -0,0 +1,84 @@
1
+ from functools import wraps
2
+ from typing import (
3
+ Callable,
4
+ TypeVar,
5
+ Any,
6
+ Dict,
7
+ Mapping,
8
+ ParamSpec,
9
+ Iterator,
10
+ Concatenate,
11
+ )
12
+
13
+ from judgeval.utils.decorators.dont_throw import dont_throw
14
+
15
+ P = ParamSpec("P")
16
+ Y = TypeVar("Y")
17
+ Ctx = Dict[str, Any]
18
+ ImmCtx = Mapping[str, Any]
19
+
20
+
21
+ def _void_pre_hook(ctx: Ctx, *args: Any, **kwargs: Any) -> None:
22
+ pass
23
+
24
+
25
+ def _void_yield_hook(ctx: Ctx, value: Any) -> None:
26
+ pass
27
+
28
+
29
+ def _void_post_hook(ctx: Ctx) -> None:
30
+ pass
31
+
32
+
33
+ def _void_error_hook(ctx: Ctx, error: Exception) -> None:
34
+ pass
35
+
36
+
37
+ def _void_finally_hook(ctx: Ctx) -> None:
38
+ pass
39
+
40
+
41
+ def immutable_wrap_sync_iterator(
42
+ func: Callable[P, Iterator[Y]],
43
+ /,
44
+ *,
45
+ pre_hook: Callable[Concatenate[Ctx, P], None] = _void_pre_hook,
46
+ yield_hook: Callable[[Ctx, Y], None] = _void_yield_hook,
47
+ post_hook: Callable[[Ctx], None] = _void_post_hook,
48
+ error_hook: Callable[[Ctx, Exception], None] = _void_error_hook,
49
+ finally_hook: Callable[[Ctx], None] = _void_finally_hook,
50
+ ) -> Callable[P, Iterator[Y]]:
51
+ """
52
+ Wraps an iterator function with lifecycle hooks.
53
+
54
+ - pre_hook: called when iterator function is invoked with (ctx, *args, **kwargs) matching func's signature
55
+ - yield_hook: called after each yield with (ctx, yielded_value)
56
+ - post_hook: called when iterator completes successfully with (ctx)
57
+ - error_hook: called if iterator raises an exception with (ctx, error)
58
+ - finally_hook: called when iterator closes with (ctx)
59
+
60
+ The wrapped iterator yields values unchanged, and exceptions are re-raised.
61
+ """
62
+
63
+ pre_hook = dont_throw(pre_hook)
64
+ yield_hook = dont_throw(yield_hook)
65
+ post_hook = dont_throw(post_hook)
66
+ error_hook = dont_throw(error_hook)
67
+ finally_hook = dont_throw(finally_hook)
68
+
69
+ @wraps(func)
70
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> Iterator[Y]:
71
+ ctx: Ctx = {}
72
+ pre_hook(ctx, *args, **kwargs)
73
+ try:
74
+ for value in func(*args, **kwargs):
75
+ yield_hook(ctx, value)
76
+ yield value
77
+ post_hook(ctx)
78
+ except Exception as e:
79
+ error_hook(ctx, e)
80
+ raise
81
+ finally:
82
+ finally_hook(ctx)
83
+
84
+ return wrapper
@@ -0,0 +1,67 @@
1
+ from functools import wraps
2
+ from typing import Awaitable, Callable, TypeVar, Any, Dict, ParamSpec, Concatenate
3
+
4
+ from judgeval.utils.decorators.dont_throw import dont_throw
5
+ from judgeval.utils.wrappers.utils import identity_on_throw
6
+
7
+ P = ParamSpec("P")
8
+ R = TypeVar("R")
9
+ Ctx = Dict[str, Any]
10
+
11
+
12
+ def mutable_wrap_async(
13
+ func: Callable[P, Awaitable[R]],
14
+ /,
15
+ *,
16
+ pre_hook: Callable[Concatenate[Ctx, P], None] | None = None,
17
+ mutate_args_hook: Callable[[Ctx, tuple[Any, ...]], tuple[Any, ...]] | None = None,
18
+ mutate_kwargs_hook: Callable[[Ctx, dict[str, Any]], dict[str, Any]] | None = None,
19
+ post_hook: Callable[[Ctx, R], None] | None = None,
20
+ mutate_hook: Callable[[Ctx, R], R] | None = None,
21
+ error_hook: Callable[[Ctx, Exception], None] | None = None,
22
+ finally_hook: Callable[[Ctx], None] | None = None,
23
+ ) -> Callable[P, Awaitable[R]]:
24
+ """
25
+ Wraps an async function with lifecycle hooks that can mutate args, kwargs, and result.
26
+
27
+ - pre_hook: called before func with (ctx, *args, **kwargs) matching func's signature
28
+ - mutate_args_hook: called after pre_hook with (ctx, args), returns potentially modified args
29
+ - mutate_kwargs_hook: called after pre_hook with (ctx, kwargs), returns potentially modified kwargs
30
+ - post_hook: called after successful func execution with (ctx, result)
31
+ - mutate_hook: called after post_hook with (ctx, result), returns potentially modified result
32
+ - error_hook: called if func raises an exception with (ctx, error)
33
+ - finally_hook: called in finally block with (ctx)
34
+
35
+ The mutate hooks can transform args/kwargs/result. Exceptions are re-raised.
36
+ """
37
+
38
+ safe_pre_hook = dont_throw(pre_hook) if pre_hook else (lambda ctx, *a, **kw: None)
39
+ safe_post_hook = dont_throw(post_hook) if post_hook else (lambda ctx, r: None)
40
+ safe_error_hook = dont_throw(error_hook) if error_hook else (lambda ctx, e: None)
41
+ safe_finally_hook = dont_throw(finally_hook) if finally_hook else (lambda ctx: None)
42
+
43
+ safe_mutate_args = identity_on_throw(mutate_args_hook) if mutate_args_hook else None
44
+ safe_mutate_kwargs = (
45
+ identity_on_throw(mutate_kwargs_hook) if mutate_kwargs_hook else None
46
+ )
47
+ safe_mutate_hook = identity_on_throw(mutate_hook) if mutate_hook else None
48
+
49
+ @wraps(func)
50
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
51
+ ctx: Ctx = {}
52
+ safe_pre_hook(ctx, *args, **kwargs)
53
+
54
+ final_args = safe_mutate_args(ctx, args) if safe_mutate_args else args
55
+ final_kwargs = safe_mutate_kwargs(ctx, kwargs) if safe_mutate_kwargs else kwargs
56
+
57
+ try:
58
+ result = await func(*final_args, **final_kwargs)
59
+ safe_post_hook(ctx, result)
60
+ return safe_mutate_hook(ctx, result) if safe_mutate_hook else result
61
+ except Exception as e:
62
+ safe_error_hook(ctx, e)
63
+ raise
64
+ finally:
65
+ safe_finally_hook(ctx)
66
+
67
+ return wrapper
@@ -0,0 +1,67 @@
1
+ from functools import wraps
2
+ from typing import Callable, TypeVar, Any, Dict, ParamSpec, Concatenate
3
+
4
+ from judgeval.utils.decorators.dont_throw import dont_throw
5
+ from judgeval.utils.wrappers.utils import identity_on_throw
6
+
7
+ P = ParamSpec("P")
8
+ R = TypeVar("R")
9
+ Ctx = Dict[str, Any]
10
+
11
+
12
+ def mutable_wrap_sync(
13
+ func: Callable[P, R],
14
+ /,
15
+ *,
16
+ pre_hook: Callable[Concatenate[Ctx, P], None] | None = None,
17
+ mutate_args_hook: Callable[[Ctx, tuple[Any, ...]], tuple[Any, ...]] | None = None,
18
+ mutate_kwargs_hook: Callable[[Ctx, dict[str, Any]], dict[str, Any]] | None = None,
19
+ post_hook: Callable[[Ctx, R], None] | None = None,
20
+ mutate_hook: Callable[[Ctx, R], R] | None = None,
21
+ error_hook: Callable[[Ctx, Exception], None] | None = None,
22
+ finally_hook: Callable[[Ctx], None] | None = None,
23
+ ) -> Callable[P, R]:
24
+ """
25
+ Wraps a function with lifecycle hooks that can mutate args, kwargs, and result.
26
+
27
+ - pre_hook: called before func with (ctx, *args, **kwargs) matching func's signature
28
+ - mutate_args_hook: called after pre_hook with (ctx, args), returns potentially modified args
29
+ - mutate_kwargs_hook: called after pre_hook with (ctx, kwargs), returns potentially modified kwargs
30
+ - post_hook: called after successful func execution with (ctx, result)
31
+ - mutate_hook: called after post_hook with (ctx, result), returns potentially modified result
32
+ - error_hook: called if func raises an exception with (ctx, error)
33
+ - finally_hook: called in finally block with (ctx)
34
+
35
+ The mutate hooks can transform args/kwargs/result. Exceptions are re-raised.
36
+ """
37
+
38
+ safe_pre_hook = dont_throw(pre_hook) if pre_hook else (lambda ctx, *a, **kw: None)
39
+ safe_post_hook = dont_throw(post_hook) if post_hook else (lambda ctx, r: None)
40
+ safe_error_hook = dont_throw(error_hook) if error_hook else (lambda ctx, e: None)
41
+ safe_finally_hook = dont_throw(finally_hook) if finally_hook else (lambda ctx: None)
42
+
43
+ safe_mutate_args = identity_on_throw(mutate_args_hook) if mutate_args_hook else None
44
+ safe_mutate_kwargs = (
45
+ identity_on_throw(mutate_kwargs_hook) if mutate_kwargs_hook else None
46
+ )
47
+ safe_mutate_hook = identity_on_throw(mutate_hook) if mutate_hook else None
48
+
49
+ @wraps(func)
50
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
51
+ ctx: Ctx = {}
52
+ safe_pre_hook(ctx, *args, **kwargs)
53
+
54
+ final_args = safe_mutate_args(ctx, args) if safe_mutate_args else args
55
+ final_kwargs = safe_mutate_kwargs(ctx, kwargs) if safe_mutate_kwargs else kwargs
56
+
57
+ try:
58
+ result = func(*final_args, **final_kwargs)
59
+ safe_post_hook(ctx, result)
60
+ return safe_mutate_hook(ctx, result) if safe_mutate_hook else result
61
+ except Exception as e:
62
+ safe_error_hook(ctx, e)
63
+ raise
64
+ finally:
65
+ safe_finally_hook(ctx)
66
+
67
+ return wrapper
File without changes
@@ -0,0 +1,35 @@
1
+ from typing import Callable, TypeVar, ParamSpec
2
+
3
+ from judgeval.logger import judgeval_logger
4
+
5
+ P = ParamSpec("P")
6
+ T = TypeVar("T")
7
+
8
+
9
+ def identity_on_throw(func: Callable[P, T]) -> Callable[P, T]:
10
+ """
11
+ Wraps a mutation function to preserve the last argument (identity) if it fails.
12
+
13
+ This is used for mutation hooks where we want to fall back to the original value
14
+ if the mutation fails, ensuring the wrapper is always safe and non-breaking.
15
+
16
+ Args:
17
+ func: A mutation function where the last positional argument is the value to mutate.
18
+ The function should return a potentially modified version of this value.
19
+
20
+ Returns:
21
+ A wrapped function that returns the last positional argument (original value) if mutation fails
22
+ """
23
+
24
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
25
+ try:
26
+ return func(*args, **kwargs)
27
+ except Exception as e:
28
+ judgeval_logger.debug(
29
+ f"[Caught] Mutation function {func.__name__} failed, using identity",
30
+ exc_info=e,
31
+ )
32
+ # The last positional argument is always the value to mutate
33
+ return args[-1] # type: ignore[return-value]
34
+
35
+ return wrapper
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from judgeval.v1.internal.api import JudgmentSyncClient
6
+ from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_API_URL, JUDGMENT_ORG_ID
7
+
8
+
9
+ class Judgeval:
10
+ __slots__ = ("_api_key", "_organization_id", "_api_url", "_internal_client")
11
+
12
+ def __init__(
13
+ self,
14
+ api_key: Optional[str] = None,
15
+ organization_id: Optional[str] = None,
16
+ api_url: Optional[str] = None,
17
+ ):
18
+ api_key = api_key or JUDGMENT_API_KEY
19
+ organization_id = organization_id or JUDGMENT_ORG_ID
20
+ api_url = api_url or JUDGMENT_API_URL
21
+
22
+ if not api_key:
23
+ raise ValueError("api_key is required")
24
+ if not organization_id:
25
+ raise ValueError("organization_id is required")
26
+ if not api_url:
27
+ raise ValueError("api_url is required")
28
+
29
+ self._api_key = api_key
30
+ self._organization_id = organization_id
31
+ self._api_url = api_url
32
+
33
+ self._internal_client = JudgmentSyncClient(
34
+ self._api_url,
35
+ self._api_key,
36
+ self._organization_id,
37
+ )
38
+
39
+ @property
40
+ def tracer(self):
41
+ from judgeval.v1.tracer.tracer_factory import TracerFactory
42
+
43
+ return TracerFactory(
44
+ client=self._internal_client,
45
+ )
46
+
47
+ @property
48
+ def scorers(self):
49
+ from judgeval.v1.scorers.scorers_factory import ScorersFactory
50
+
51
+ return ScorersFactory(
52
+ client=self._internal_client,
53
+ )
54
+
55
+ @property
56
+ def evaluation(self):
57
+ from judgeval.v1.evaluation.evaluation_factory import EvaluationFactory
58
+
59
+ return EvaluationFactory(
60
+ client=self._internal_client,
61
+ )
62
+
63
+ @property
64
+ def trainers(self):
65
+ from judgeval.v1.trainers.trainers_factory import TrainersFactory
66
+
67
+ return TrainersFactory(
68
+ client=self._internal_client,
69
+ )
70
+
71
+ @property
72
+ def datasets(self):
73
+ from judgeval.v1.datasets.dataset_factory import DatasetFactory
74
+
75
+ return DatasetFactory(
76
+ client=self._internal_client,
77
+ )
78
+
79
+ @property
80
+ def prompts(self):
81
+ from judgeval.v1.prompts.prompt_factory import PromptFactory
82
+
83
+ return PromptFactory(
84
+ client=self._internal_client,
85
+ )
86
+
87
+
88
+ __all__ = ["Judgeval"]
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.data.example import Example
4
+ from judgeval.v1.data.scorer_data import ScorerData
5
+ from judgeval.v1.data.scoring_result import ScoringResult
6
+
7
+ __all__ = ["Example", "ScorerData", "ScoringResult"]
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from typing import Any, Dict, Optional
7
+
8
+ from judgeval.v1.internal.api.api_types import Example as APIExample
9
+
10
+
11
+ @dataclass(slots=True)
12
+ class Example:
13
+ example_id: str = field(default_factory=lambda: str(uuid.uuid4()))
14
+ created_at: str = field(default_factory=lambda: datetime.utcnow().isoformat())
15
+ name: Optional[str] = None
16
+ _properties: Dict[str, Any] = field(default_factory=dict)
17
+
18
+ def set_property(self, key: str, value: Any) -> Example:
19
+ self._properties[key] = value
20
+ return self
21
+
22
+ def get_property(self, key: str) -> Any:
23
+ return self._properties.get(key)
24
+
25
+ @classmethod
26
+ def create(cls, **kwargs: Any) -> Example:
27
+ example = cls()
28
+ for key, value in kwargs.items():
29
+ example.set_property(key, value)
30
+ return example
31
+
32
+ def to_dict(self) -> APIExample:
33
+ result: APIExample = {
34
+ "example_id": self.example_id,
35
+ "created_at": self.created_at,
36
+ "name": self.name,
37
+ }
38
+ for key, value in self._properties.items():
39
+ result[key] = value # type: ignore[literal-required]
40
+ return result
41
+
42
+ @property
43
+ def properties(self) -> Dict[str, Any]:
44
+ return self._properties.copy()