judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Optional
4
+
5
+ from opentelemetry.sdk.trace import TracerProvider
6
+ from opentelemetry.trace import Tracer, NoOpTracer
7
+ from opentelemetry.util.types import Attributes
8
+
9
+ from judgeval.logger import judgeval_logger
10
+ from judgeval.v1.tracer.base_tracer import BaseTracer
11
+
12
+ FilterTracerCallback = Callable[[str, Optional[str], Optional[str], Attributes], bool]
13
+
14
+
15
+ class JudgmentTracerProvider(TracerProvider):
16
+ __slots__ = ("_filter_tracer",)
17
+
18
+ def __init__(
19
+ self,
20
+ filter_tracer: Optional[FilterTracerCallback] = None,
21
+ **kwargs,
22
+ ):
23
+ super().__init__(**kwargs)
24
+ self._filter_tracer = (
25
+ filter_tracer if filter_tracer is not None else lambda *_: True
26
+ )
27
+
28
+ def get_tracer(
29
+ self,
30
+ instrumenting_module_name: str,
31
+ instrumenting_library_version: Optional[str] = None,
32
+ schema_url: Optional[str] = None,
33
+ attributes: Attributes = None,
34
+ ) -> Tracer:
35
+ if instrumenting_module_name == BaseTracer.TRACER_NAME:
36
+ return super().get_tracer(
37
+ instrumenting_module_name,
38
+ instrumenting_library_version,
39
+ schema_url,
40
+ attributes,
41
+ )
42
+
43
+ try:
44
+ if self._filter_tracer(
45
+ instrumenting_module_name,
46
+ instrumenting_library_version,
47
+ schema_url,
48
+ attributes,
49
+ ):
50
+ return super().get_tracer(
51
+ instrumenting_module_name,
52
+ instrumenting_library_version,
53
+ schema_url,
54
+ attributes,
55
+ )
56
+ else:
57
+ judgeval_logger.debug(
58
+ f"[JudgmentTracerProvider] Returning NoOpTracer for tracer {instrumenting_module_name} as it is disallowed by the filterTracer callback."
59
+ )
60
+ return NoOpTracer()
61
+ except Exception as error:
62
+ judgeval_logger.error(
63
+ f"[JudgmentTracerProvider] Failed to filter tracer {instrumenting_module_name}: {error}."
64
+ )
65
+ return super().get_tracer(
66
+ instrumenting_module_name,
67
+ instrumenting_library_version,
68
+ schema_url,
69
+ attributes,
70
+ )
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.tracer.processors.judgment_span_processor import JudgmentSpanProcessor
4
+ from judgeval.v1.tracer.processors.noop_span_processor import NoOpJudgmentSpanProcessor
5
+
6
+ __all__ = ["JudgmentSpanProcessor", "NoOpJudgmentSpanProcessor"]
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.tracer.processors._lifecycles.customer_id_processor import (
4
+ CustomerIdProcessor,
5
+ )
6
+ from judgeval.v1.tracer.processors._lifecycles.agent_id_processor import (
7
+ AgentIdProcessor,
8
+ )
9
+ from judgeval.v1.tracer.processors._lifecycles.registry import get_all, register
10
+ from judgeval.v1.tracer.processors._lifecycles.context_keys import (
11
+ CUSTOMER_ID_KEY,
12
+ AGENT_ID_KEY,
13
+ PARENT_AGENT_ID_KEY,
14
+ AGENT_CLASS_NAME_KEY,
15
+ AGENT_INSTANCE_NAME_KEY,
16
+ )
17
+
18
+ __all__ = [
19
+ "CustomerIdProcessor",
20
+ "AgentIdProcessor",
21
+ "get_all",
22
+ "register",
23
+ "CUSTOMER_ID_KEY",
24
+ "AGENT_ID_KEY",
25
+ "PARENT_AGENT_ID_KEY",
26
+ "AGENT_CLASS_NAME_KEY",
27
+ "AGENT_INSTANCE_NAME_KEY",
28
+ ]
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from opentelemetry.context import Context, get_value
6
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
7
+
8
+ from judgeval.v1.tracer.processors._lifecycles.registry import register
9
+ from judgeval.v1.tracer.processors._lifecycles.context_keys import (
10
+ AGENT_ID_KEY,
11
+ PARENT_AGENT_ID_KEY,
12
+ AGENT_CLASS_NAME_KEY,
13
+ AGENT_INSTANCE_NAME_KEY,
14
+ )
15
+ from judgeval.judgment_attribute_keys import AttributeKeys
16
+
17
+
18
+ class AgentIdProcessor(SpanProcessor):
19
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
20
+ agent_id = get_value(AGENT_ID_KEY, context=parent_context)
21
+ if agent_id is not None:
22
+ span.set_attribute(AttributeKeys.JUDGMENT_AGENT_ID, str(agent_id))
23
+
24
+ parent_agent_id = get_value(PARENT_AGENT_ID_KEY, context=parent_context)
25
+ if parent_agent_id is not None:
26
+ span.set_attribute(
27
+ AttributeKeys.JUDGMENT_PARENT_AGENT_ID, str(parent_agent_id)
28
+ )
29
+
30
+ class_name = get_value(AGENT_CLASS_NAME_KEY, context=parent_context)
31
+ if class_name is not None:
32
+ span.set_attribute(AttributeKeys.JUDGMENT_AGENT_CLASS_NAME, str(class_name))
33
+
34
+ instance_name = get_value(AGENT_INSTANCE_NAME_KEY, context=parent_context)
35
+ if instance_name is not None:
36
+ span.set_attribute(
37
+ AttributeKeys.JUDGMENT_AGENT_INSTANCE_NAME, str(instance_name)
38
+ )
39
+
40
+ if agent_id is not None and agent_id != parent_agent_id:
41
+ span.set_attribute(AttributeKeys.JUDGMENT_IS_AGENT_ENTRY_POINT, True)
42
+
43
+ def on_end(self, span: ReadableSpan) -> None:
44
+ pass
45
+
46
+ def shutdown(self) -> None:
47
+ pass
48
+
49
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
50
+ return True
51
+
52
+
53
+ register(AgentIdProcessor)
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from opentelemetry.context import create_key
4
+ from judgeval.judgment_attribute_keys import AttributeKeys
5
+
6
+
7
+ CUSTOMER_ID_KEY = create_key(AttributeKeys.JUDGMENT_CUSTOMER_ID)
8
+ AGENT_ID_KEY = create_key(AttributeKeys.JUDGMENT_AGENT_ID)
9
+ PARENT_AGENT_ID_KEY = create_key(AttributeKeys.JUDGMENT_PARENT_AGENT_ID)
10
+ AGENT_CLASS_NAME_KEY = create_key(AttributeKeys.JUDGMENT_AGENT_CLASS_NAME)
11
+ AGENT_INSTANCE_NAME_KEY = create_key(AttributeKeys.JUDGMENT_AGENT_INSTANCE_NAME)
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from opentelemetry.context import Context, get_value
6
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
7
+
8
+ from judgeval.v1.tracer.processors._lifecycles.registry import register
9
+ from judgeval.v1.tracer.processors._lifecycles.context_keys import CUSTOMER_ID_KEY
10
+ from judgeval.judgment_attribute_keys import AttributeKeys
11
+
12
+
13
+ class CustomerIdProcessor(SpanProcessor):
14
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
15
+ customer_id = get_value(CUSTOMER_ID_KEY, context=parent_context)
16
+ if customer_id is not None:
17
+ span.set_attribute(AttributeKeys.JUDGMENT_CUSTOMER_ID, str(customer_id))
18
+
19
+ def on_end(self, span: ReadableSpan) -> None:
20
+ pass
21
+
22
+ def shutdown(self) -> None:
23
+ pass
24
+
25
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
26
+ return True
27
+
28
+
29
+ register(CustomerIdProcessor)
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, List
4
+
5
+ from opentelemetry.sdk.trace import SpanProcessor
6
+
7
+
8
+ ProcessorFactory = Callable[[], SpanProcessor]
9
+
10
+ _lifecycle_processors: List[ProcessorFactory] = []
11
+
12
+
13
+ def register(processor_class: ProcessorFactory) -> None:
14
+ _lifecycle_processors.append(processor_class)
15
+
16
+
17
+ def get_all() -> List[SpanProcessor]:
18
+ return [factory() for factory in _lifecycle_processors]
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Optional
4
+
5
+ from collections import defaultdict
6
+
7
+ from opentelemetry.context import Context
8
+ from opentelemetry.sdk.trace import ReadableSpan, Span
9
+ from opentelemetry.trace import get_current_span
10
+ from opentelemetry.trace.span import SpanContext
11
+ from opentelemetry.sdk.trace.export import (
12
+ BatchSpanProcessor,
13
+ SpanExporter,
14
+ )
15
+
16
+ from judgeval.judgment_attribute_keys import AttributeKeys
17
+ from judgeval.tracer.keys import InternalAttributeKeys
18
+ from judgeval.utils.decorators.dont_throw import dont_throw
19
+ from judgeval.v1.tracer.processors._lifecycles import get_all
20
+
21
+
22
+ if TYPE_CHECKING:
23
+ from judgeval.v1.tracer import BaseTracer
24
+
25
+
26
+ class JudgmentSpanProcessor(BatchSpanProcessor):
27
+ __slots__ = ("tracer", "resource_attributes", "_internal_attributes")
28
+
29
+ def __init__(
30
+ self,
31
+ tracer: BaseTracer,
32
+ exporter: SpanExporter,
33
+ /,
34
+ *,
35
+ max_queue_size: int | None = None,
36
+ schedule_delay_millis: float | None = None,
37
+ max_export_batch_size: int | None = None,
38
+ export_timeout_millis: float | None = None,
39
+ ):
40
+ self.tracer = tracer
41
+
42
+ super().__init__(
43
+ exporter,
44
+ max_queue_size=max_queue_size,
45
+ schedule_delay_millis=schedule_delay_millis,
46
+ max_export_batch_size=max_export_batch_size,
47
+ export_timeout_millis=export_timeout_millis,
48
+ )
49
+ self._internal_attributes: defaultdict[tuple[int, int], dict[str, Any]] = (
50
+ defaultdict(dict)
51
+ )
52
+
53
+ def _get_span_key(self, span_context: SpanContext) -> tuple[int, int]:
54
+ return (span_context.trace_id, span_context.span_id)
55
+
56
+ def set_internal_attribute(
57
+ self, span_context: SpanContext, key: str, value: Any
58
+ ) -> None:
59
+ span_key = self._get_span_key(span_context)
60
+ self._internal_attributes[span_key][key] = value
61
+
62
+ def get_internal_attribute(
63
+ self, span_context: SpanContext, key: str, default: Any = None
64
+ ) -> Any:
65
+ span_key = self._get_span_key(span_context)
66
+ return self._internal_attributes[span_key].get(key, default)
67
+
68
+ def increment_update_id(self, span_context: SpanContext) -> int:
69
+ current_id = self.get_internal_attribute(
70
+ span_context=span_context, key=AttributeKeys.JUDGMENT_UPDATE_ID, default=0
71
+ )
72
+ new_id = current_id + 1
73
+ self.set_internal_attribute(
74
+ span_context=span_context,
75
+ key=AttributeKeys.JUDGMENT_UPDATE_ID,
76
+ value=new_id,
77
+ )
78
+ return current_id
79
+
80
+ def _cleanup_span_state(self, span_key: tuple[int, int]) -> None:
81
+ self._internal_attributes.pop(span_key, None)
82
+
83
+ @dont_throw
84
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
85
+ for processor in get_all():
86
+ processor.on_start(span, parent_context)
87
+
88
+ @dont_throw
89
+ def emit_partial(self) -> None:
90
+ current_span = get_current_span()
91
+ if (
92
+ not current_span
93
+ or not current_span.is_recording()
94
+ or not isinstance(current_span, ReadableSpan)
95
+ ):
96
+ return
97
+
98
+ span_context = current_span.get_span_context()
99
+ if self.get_internal_attribute(
100
+ span_context, InternalAttributeKeys.DISABLE_PARTIAL_EMIT, False
101
+ ):
102
+ return
103
+
104
+ attributes = dict(current_span.attributes or {})
105
+ attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = self.increment_update_id(
106
+ span_context
107
+ )
108
+
109
+ partial_span = ReadableSpan(
110
+ name=current_span.name,
111
+ context=span_context,
112
+ parent=current_span.parent,
113
+ resource=current_span.resource,
114
+ attributes=attributes,
115
+ events=current_span.events,
116
+ links=current_span.links,
117
+ status=current_span.status,
118
+ kind=current_span.kind,
119
+ start_time=current_span.start_time,
120
+ end_time=None,
121
+ instrumentation_scope=current_span.instrumentation_scope,
122
+ )
123
+
124
+ super().on_end(partial_span)
125
+
126
+ @dont_throw
127
+ def on_end(self, span: ReadableSpan) -> None:
128
+ for processor in get_all():
129
+ processor.on_end(span)
130
+
131
+ if not span.context:
132
+ super().on_end(span)
133
+ return
134
+
135
+ span_key = self._get_span_key(span.context)
136
+
137
+ if self.get_internal_attribute(
138
+ span.context, InternalAttributeKeys.CANCELLED, False
139
+ ):
140
+ self._cleanup_span_state(span_key)
141
+ return
142
+
143
+ if span.end_time is not None:
144
+ attributes = dict(span.attributes or {})
145
+ attributes[AttributeKeys.JUDGMENT_UPDATE_ID] = 20
146
+
147
+ final_span = ReadableSpan(
148
+ name=span.name,
149
+ context=span.context,
150
+ parent=span.parent,
151
+ resource=span.resource,
152
+ attributes=attributes,
153
+ events=span.events,
154
+ links=span.links,
155
+ status=span.status,
156
+ kind=span.kind,
157
+ start_time=span.start_time,
158
+ end_time=span.end_time,
159
+ instrumentation_scope=span.instrumentation_scope,
160
+ )
161
+
162
+ self._cleanup_span_state(span_key)
163
+ super().on_end(final_span)
164
+ else:
165
+ super().on_end(span)
@@ -0,0 +1,42 @@
1
+ from opentelemetry.sdk.trace import Span, ReadableSpan
2
+ from opentelemetry.context import Context
3
+ from typing import Any, Optional
4
+
5
+ from opentelemetry.trace import SpanContext
6
+
7
+ from judgeval.v1.tracer.processors.judgment_span_processor import JudgmentSpanProcessor
8
+
9
+
10
+ class NoOpJudgmentSpanProcessor(JudgmentSpanProcessor):
11
+ __slots__ = ("resource_attributes",)
12
+
13
+ def __init__(self):
14
+ self.resource_attributes = {}
15
+
16
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
17
+ pass
18
+
19
+ def on_end(self, span: ReadableSpan) -> None:
20
+ pass
21
+
22
+ def shutdown(self) -> None:
23
+ pass
24
+
25
+ def force_flush(self, timeout_millis: int | None = 30000) -> bool:
26
+ return True
27
+
28
+ def emit_partial(self) -> None:
29
+ pass
30
+
31
+ def set_internal_attribute(
32
+ self, span_context: SpanContext, key: str, value: Any
33
+ ) -> None:
34
+ pass
35
+
36
+ def get_internal_attribute(
37
+ self, span_context: SpanContext, key: str, default: Any = None
38
+ ) -> Any:
39
+ return default
40
+
41
+ def increment_update_id(self, span_context: SpanContext) -> int:
42
+ return 0
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Callable, Optional
4
+
5
+ from opentelemetry import trace
6
+ from opentelemetry.sdk.resources import Resource
7
+ from opentelemetry.sdk.trace import TracerProvider
8
+
9
+ from judgeval.v1.internal.api import JudgmentSyncClient
10
+ from judgeval.logger import judgeval_logger
11
+ from judgeval.v1.tracer.judgment_tracer_provider import JudgmentTracerProvider
12
+ from judgeval.version import get_version
13
+ from judgeval.v1.tracer.base_tracer import BaseTracer
14
+ from judgeval.v1.tracer.judgment_tracer_provider import FilterTracerCallback
15
+
16
+
17
+ class Tracer(BaseTracer):
18
+ __slots__ = ("_tracer_provider", "_filter_tracer")
19
+
20
+ def __init__(
21
+ self,
22
+ project_name: str,
23
+ enable_evaluation: bool,
24
+ api_client: JudgmentSyncClient,
25
+ serializer: Callable[[Any], str],
26
+ initialize: bool,
27
+ filter_tracer: Optional[FilterTracerCallback] = None,
28
+ ):
29
+ super().__init__(
30
+ project_name=project_name,
31
+ enable_evaluation=enable_evaluation,
32
+ api_client=api_client,
33
+ serializer=serializer,
34
+ )
35
+ self._tracer_provider: Optional[TracerProvider] = None
36
+ self._filter_tracer = filter_tracer
37
+
38
+ if initialize:
39
+ self.initialize()
40
+
41
+ def initialize(self) -> None:
42
+ resource = Resource.create(
43
+ {
44
+ "service.name": self.project_name,
45
+ "telemetry.sdk.name": self.TRACER_NAME,
46
+ "telemetry.sdk.version": get_version(),
47
+ }
48
+ )
49
+
50
+ self._tracer_provider = JudgmentTracerProvider(
51
+ resource=resource, filter_tracer=self._filter_tracer
52
+ )
53
+ self._tracer_provider.add_span_processor(self.get_span_processor())
54
+
55
+ trace.set_tracer_provider(self._tracer_provider)
56
+
57
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
58
+ if self._tracer_provider is None:
59
+ judgeval_logger.error("Cannot forceFlush: tracer not initialized")
60
+ return False
61
+ return self._tracer_provider.force_flush(timeout_millis)
62
+
63
+ def shutdown(self, timeout_millis: int = 30000) -> None:
64
+ if self._tracer_provider is None:
65
+ judgeval_logger.error("Cannot shutdown: tracer not initialized")
66
+ return
67
+ self._tracer_provider.shutdown()
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Callable, Optional
4
+
5
+ from judgeval.utils.serialize import safe_serialize
6
+ from judgeval.v1.internal.api import JudgmentSyncClient
7
+ from judgeval.v1.tracer.judgment_tracer_provider import FilterTracerCallback
8
+ from judgeval.v1.tracer.tracer import Tracer
9
+
10
+
11
+ class TracerFactory:
12
+ __slots__ = "_client"
13
+
14
+ def __init__(
15
+ self,
16
+ client: JudgmentSyncClient,
17
+ ):
18
+ self._client = client
19
+
20
+ def create(
21
+ self,
22
+ project_name: str,
23
+ enable_evaluation: bool = True,
24
+ serializer: Optional[Callable[[Any], str]] = None,
25
+ filter_tracer: Optional[FilterTracerCallback] = None,
26
+ initialize: bool = True,
27
+ ) -> Tracer:
28
+ if serializer is None:
29
+ serializer = safe_serialize
30
+
31
+ return Tracer(
32
+ project_name=project_name,
33
+ enable_evaluation=enable_evaluation,
34
+ api_client=self._client,
35
+ serializer=serializer,
36
+ initialize=initialize,
37
+ filter_tracer=filter_tracer,
38
+ )
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from judgeval.v1.trainers.trainers_factory import TrainersFactory
4
+
5
+ __all__ = ["TrainersFactory"]
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
5
+
6
+ if TYPE_CHECKING:
7
+ from judgeval.v1.tracer.tracer import Tracer
8
+ from judgeval.v1.trainers.trainable_model import TrainableModel
9
+ from judgeval.v1.trainers.config import TrainerConfig, ModelConfig
10
+ from judgeval.v1.scorers.base_scorer import BaseScorer
11
+
12
+
13
+ class BaseTrainer(ABC):
14
+ __slots__ = ("config", "trainable_model", "tracer", "project_name")
15
+
16
+ def __init__(
17
+ self,
18
+ config: TrainerConfig,
19
+ trainable_model: TrainableModel,
20
+ tracer: Tracer,
21
+ project_name: Optional[str] = None,
22
+ ):
23
+ self.config = config
24
+ self.trainable_model = trainable_model
25
+ self.tracer = tracer
26
+ self.project_name = project_name or "judgment_training"
27
+
28
+ @abstractmethod
29
+ async def generate_rollouts_and_rewards(
30
+ self,
31
+ agent_function: Callable[[Any], Any],
32
+ scorers: List[BaseScorer],
33
+ prompts: dict[int, dict[Any, Any]],
34
+ num_prompts_per_step: Optional[int] = None,
35
+ num_generations_per_prompt: Optional[int] = None,
36
+ concurrency: Optional[int] = None,
37
+ ) -> Any:
38
+ pass
39
+
40
+ @abstractmethod
41
+ async def run_reinforcement_learning(
42
+ self,
43
+ agent_function: Callable[[Any], Any],
44
+ scorers: List[BaseScorer],
45
+ prompts: dict[int, dict[Any, Any]],
46
+ ) -> "ModelConfig":
47
+ pass
48
+
49
+ @abstractmethod
50
+ async def train(
51
+ self,
52
+ agent_function: Callable[[Any], Any],
53
+ scorers: List[BaseScorer],
54
+ prompts: dict[int, dict[Any, Any]],
55
+ ) -> "ModelConfig":
56
+ pass
57
+
58
+ @abstractmethod
59
+ def _extract_message_history_from_spans(
60
+ self, trace_id: str
61
+ ) -> List[Dict[str, str]]:
62
+ pass