judgeval 0.0.42__tar.gz → 0.0.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. {judgeval-0.0.42 → judgeval-0.0.44}/PKG-INFO +1 -1
  2. {judgeval-0.0.42 → judgeval-0.0.44}/pyproject.toml +1 -1
  3. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/common/tracer.py +98 -76
  4. {judgeval-0.0.42 → judgeval-0.0.44}/.github/pull_request_template.md +0 -0
  5. {judgeval-0.0.42 → judgeval-0.0.44}/.github/workflows/blocked-pr.yaml +0 -0
  6. {judgeval-0.0.42 → judgeval-0.0.44}/.github/workflows/ci.yaml +0 -0
  7. {judgeval-0.0.42 → judgeval-0.0.44}/.github/workflows/merge-branch-check.yaml +0 -0
  8. {judgeval-0.0.42 → judgeval-0.0.44}/.github/workflows/release.yaml +0 -0
  9. {judgeval-0.0.42 → judgeval-0.0.44}/.github/workflows/validate-branch.yaml +0 -0
  10. {judgeval-0.0.42 → judgeval-0.0.44}/.gitignore +0 -0
  11. {judgeval-0.0.42 → judgeval-0.0.44}/LICENSE.md +0 -0
  12. {judgeval-0.0.42 → judgeval-0.0.44}/README.md +0 -0
  13. {judgeval-0.0.42 → judgeval-0.0.44}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  14. {judgeval-0.0.42 → judgeval-0.0.44}/assets/dataset_clustering_screenshot.png +0 -0
  15. {judgeval-0.0.42 → judgeval-0.0.44}/assets/dataset_clustering_screenshot_dm.png +0 -0
  16. {judgeval-0.0.42 → judgeval-0.0.44}/assets/datasets_preview_screenshot.png +0 -0
  17. {judgeval-0.0.42 → judgeval-0.0.44}/assets/experiments_dashboard_screenshot.png +0 -0
  18. {judgeval-0.0.42 → judgeval-0.0.44}/assets/experiments_page.png +0 -0
  19. {judgeval-0.0.42 → judgeval-0.0.44}/assets/experiments_pagev2.png +0 -0
  20. {judgeval-0.0.42 → judgeval-0.0.44}/assets/logo-dark.svg +0 -0
  21. {judgeval-0.0.42 → judgeval-0.0.44}/assets/logo-light.svg +0 -0
  22. {judgeval-0.0.42 → judgeval-0.0.44}/assets/monitoring_screenshot.png +0 -0
  23. {judgeval-0.0.42 → judgeval-0.0.44}/assets/new_darkmode.svg +0 -0
  24. {judgeval-0.0.42 → judgeval-0.0.44}/assets/new_lightmode.svg +0 -0
  25. {judgeval-0.0.42 → judgeval-0.0.44}/assets/trace_demo.png +0 -0
  26. {judgeval-0.0.42 → judgeval-0.0.44}/assets/trace_screenshot.png +0 -0
  27. {judgeval-0.0.42 → judgeval-0.0.44}/docs/README.md +0 -0
  28. {judgeval-0.0.42 → judgeval-0.0.44}/docs/alerts/notifications.mdx +0 -0
  29. {judgeval-0.0.42 → judgeval-0.0.44}/docs/alerts/platform_notifications.mdx +0 -0
  30. {judgeval-0.0.42 → judgeval-0.0.44}/docs/alerts/rules.mdx +0 -0
  31. {judgeval-0.0.42 → judgeval-0.0.44}/docs/api_reference/judgment_client.mdx +0 -0
  32. {judgeval-0.0.42 → judgeval-0.0.44}/docs/api_reference/trace.mdx +0 -0
  33. {judgeval-0.0.42 → judgeval-0.0.44}/docs/changelog/2025-04-21.mdx +0 -0
  34. {judgeval-0.0.42 → judgeval-0.0.44}/docs/clustering/clustering.mdx +0 -0
  35. {judgeval-0.0.42 → judgeval-0.0.44}/docs/compliance/certifications.mdx +0 -0
  36. {judgeval-0.0.42 → judgeval-0.0.44}/docs/development.mdx +0 -0
  37. {judgeval-0.0.42 → judgeval-0.0.44}/docs/essentials/code.mdx +0 -0
  38. {judgeval-0.0.42 → judgeval-0.0.44}/docs/essentials/images.mdx +0 -0
  39. {judgeval-0.0.42 → judgeval-0.0.44}/docs/essentials/markdown.mdx +0 -0
  40. {judgeval-0.0.42 → judgeval-0.0.44}/docs/essentials/navigation.mdx +0 -0
  41. {judgeval-0.0.42 → judgeval-0.0.44}/docs/essentials/reusable-snippets.mdx +0 -0
  42. {judgeval-0.0.42 → judgeval-0.0.44}/docs/essentials/settings.mdx +0 -0
  43. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/data_datasets.mdx +0 -0
  44. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/data_examples.mdx +0 -0
  45. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/data_sequences.mdx +0 -0
  46. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/experiment_comparisons.mdx +0 -0
  47. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/introduction.mdx +0 -0
  48. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/judges.mdx +0 -0
  49. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/agent/derailment.mdx +0 -0
  50. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  51. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  52. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/answer_correctness.mdx +0 -0
  53. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/answer_relevancy.mdx +0 -0
  54. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/comparison.mdx +0 -0
  55. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/contextual_precision.mdx +0 -0
  56. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/contextual_recall.mdx +0 -0
  57. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/contextual_relevancy.mdx +0 -0
  58. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/execution_order.mdx +0 -0
  59. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/faithfulness.mdx +0 -0
  60. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/groundedness.mdx +0 -0
  61. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/json_correctness.mdx +0 -0
  62. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/default/summarization.mdx +0 -0
  63. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/scorers/introduction.mdx +0 -0
  64. {judgeval-0.0.42 → judgeval-0.0.44}/docs/evaluation/unit_testing.mdx +0 -0
  65. {judgeval-0.0.42 → judgeval-0.0.44}/docs/favicon.svg +0 -0
  66. {judgeval-0.0.42 → judgeval-0.0.44}/docs/getting_started.mdx +0 -0
  67. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/annotation_queue_ui.png +0 -0
  68. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/basic_trace_example.png +0 -0
  69. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/checks-passed.png +0 -0
  70. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/cluster.png +0 -0
  71. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/cluster_button.png +0 -0
  72. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/create_aggressive_scorer.png +0 -0
  73. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/create_scorer.png +0 -0
  74. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/dashboard_annotation_queue_button.png +0 -0
  75. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/evaluation_diagram.png +0 -0
  76. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/experiment-comparison-page-2.png +0 -0
  77. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/experiment-page-comparison.png +0 -0
  78. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/experiment-popout-comparison.png +0 -0
  79. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/experiments-page-comparison-2.png +0 -0
  80. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/experiments-page-comparison.png +0 -0
  81. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/export-dataset.png +0 -0
  82. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/hero-dark.svg +0 -0
  83. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/hero-light.svg +0 -0
  84. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/notifications_page.png +0 -0
  85. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/online_eval_fault.png +0 -0
  86. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/reports_modal.png +0 -0
  87. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/synth_data_button.png +0 -0
  88. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/synth_data_window.png +0 -0
  89. {judgeval-0.0.42 → judgeval-0.0.44}/docs/images/trace_ss.png +0 -0
  90. {judgeval-0.0.42 → judgeval-0.0.44}/docs/integration/langgraph.mdx +0 -0
  91. {judgeval-0.0.42 → judgeval-0.0.44}/docs/introduction.mdx +0 -0
  92. {judgeval-0.0.42 → judgeval-0.0.44}/docs/judgment_cli/installation.mdx +0 -0
  93. {judgeval-0.0.42 → judgeval-0.0.44}/docs/judgment_cli/self-hosting.mdx +0 -0
  94. {judgeval-0.0.42 → judgeval-0.0.44}/docs/judgment_cli/supabase-org-id.png +0 -0
  95. {judgeval-0.0.42 → judgeval-0.0.44}/docs/logo/dark.svg +0 -0
  96. {judgeval-0.0.42 → judgeval-0.0.44}/docs/logo/light.svg +0 -0
  97. {judgeval-0.0.42 → judgeval-0.0.44}/docs/mint.json +0 -0
  98. {judgeval-0.0.42 → judgeval-0.0.44}/docs/monitoring/annotations.mdx +0 -0
  99. {judgeval-0.0.42 → judgeval-0.0.44}/docs/monitoring/introduction.mdx +0 -0
  100. {judgeval-0.0.42 → judgeval-0.0.44}/docs/monitoring/production_insights.mdx +0 -0
  101. {judgeval-0.0.42 → judgeval-0.0.44}/docs/monitoring/tracing.mdx +0 -0
  102. {judgeval-0.0.42 → judgeval-0.0.44}/docs/monitoring/tracing_s3.mdx +0 -0
  103. {judgeval-0.0.42 → judgeval-0.0.44}/docs/notebooks/create_dataset.ipynb +0 -0
  104. {judgeval-0.0.42 → judgeval-0.0.44}/docs/notebooks/create_scorer.ipynb +0 -0
  105. {judgeval-0.0.42 → judgeval-0.0.44}/docs/notebooks/demo.ipynb +0 -0
  106. {judgeval-0.0.42 → judgeval-0.0.44}/docs/notebooks/prompt_scorer.ipynb +0 -0
  107. {judgeval-0.0.42 → judgeval-0.0.44}/docs/notebooks/quickstart.ipynb +0 -0
  108. {judgeval-0.0.42 → judgeval-0.0.44}/docs/optimization/osiris_agent.mdx +0 -0
  109. {judgeval-0.0.42 → judgeval-0.0.44}/docs/quickstart.mdx +0 -0
  110. {judgeval-0.0.42 → judgeval-0.0.44}/docs/self_hosting/get_started.mdx +0 -0
  111. {judgeval-0.0.42 → judgeval-0.0.44}/docs/snippets/snippet-intro.mdx +0 -0
  112. {judgeval-0.0.42 → judgeval-0.0.44}/docs/synthetic_data/synthetic_data.mdx +0 -0
  113. {judgeval-0.0.42 → judgeval-0.0.44}/pytest.ini +0 -0
  114. {judgeval-0.0.42 → judgeval-0.0.44}/src/.coveragerc +0 -0
  115. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/__init__.py +0 -0
  116. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/clients.py +0 -0
  117. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/common/__init__.py +0 -0
  118. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/common/exceptions.py +0 -0
  119. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/common/logger.py +0 -0
  120. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/common/s3_storage.py +0 -0
  121. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/common/utils.py +0 -0
  122. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/constants.py +0 -0
  123. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/__init__.py +0 -0
  124. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/custom_example.py +0 -0
  125. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/datasets/__init__.py +0 -0
  126. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/datasets/dataset.py +0 -0
  127. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/datasets/eval_dataset_client.py +0 -0
  128. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/example.py +0 -0
  129. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/result.py +0 -0
  130. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/scorer_data.py +0 -0
  131. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/tool.py +0 -0
  132. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/trace.py +0 -0
  133. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/data/trace_run.py +0 -0
  134. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/evaluation_run.py +0 -0
  135. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/integrations/langgraph.py +0 -0
  136. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judges/__init__.py +0 -0
  137. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judges/base_judge.py +0 -0
  138. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judges/litellm_judge.py +0 -0
  139. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judges/mixture_of_judges.py +0 -0
  140. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judges/together_judge.py +0 -0
  141. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judges/utils.py +0 -0
  142. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/judgment_client.py +0 -0
  143. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/rules.py +0 -0
  144. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/run_evaluation.py +0 -0
  145. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/__init__.py +0 -0
  146. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/api_scorer.py +0 -0
  147. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/exceptions.py +0 -0
  148. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorer.py +0 -0
  149. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  150. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  151. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  152. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  153. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -0
  154. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -0
  155. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  156. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  157. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  158. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
  159. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
  160. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  161. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -0
  162. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  163. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  164. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  165. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  166. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
  167. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
  168. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  169. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  170. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  171. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/prompt_scorer.py +0 -0
  172. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/score.py +0 -0
  173. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/scorers/utils.py +0 -0
  174. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/tracer/__init__.py +0 -0
  175. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/utils/alerts.py +0 -0
  176. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/utils/data_utils.py +0 -0
  177. {judgeval-0.0.42 → judgeval-0.0.44}/src/judgeval/version_check.py +0 -0
  178. {judgeval-0.0.42 → judgeval-0.0.44}/update_version.py +0 -0
  179. {judgeval-0.0.42 → judgeval-0.0.44}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.42
3
+ Version: 0.0.44
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.0.42"
3
+ version = "0.0.44"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -1557,7 +1557,6 @@ class _DeepTracer:
1557
1557
  # current_trace.record_output({"log": message})
1558
1558
 
1559
1559
  class Tracer:
1560
- _instance = None
1561
1560
 
1562
1561
  # Tracer.current_trace class variable is currently used in wrap()
1563
1562
  # TODO: Keep track of cross-context state for current trace and current span ID solely through class variables instead of instance variables?
@@ -1567,11 +1566,6 @@ class Tracer:
1567
1566
 
1568
1567
  trace_across_async_contexts: bool = False # BY default, we don't trace across async contexts
1569
1568
 
1570
- def __new__(cls, *args, **kwargs):
1571
- if cls._instance is None:
1572
- cls._instance = super(Tracer, cls).__new__(cls)
1573
- return cls._instance
1574
-
1575
1569
  def __init__(
1576
1570
  self,
1577
1571
  api_key: str = os.getenv("JUDGMENT_API_KEY"),
@@ -1595,66 +1589,56 @@ class Tracer:
1595
1589
  span_flush_interval: float = 1.0, # Time in seconds between automatic flushes
1596
1590
  span_num_workers: int = 10 # Number of worker threads for span processing
1597
1591
  ):
1598
- if not hasattr(self, 'initialized'):
1599
- if not api_key:
1600
- raise ValueError("Tracer must be configured with a Judgment API key")
1601
-
1602
- result, response = validate_api_key(api_key)
1603
- if not result:
1604
- raise JudgmentAPIError(f"Issue with passed in Judgment API key: {response}")
1605
-
1606
- if not organization_id:
1607
- raise ValueError("Tracer must be configured with an Organization ID")
1608
- if use_s3 and not s3_bucket_name:
1609
- raise ValueError("S3 bucket name must be provided when use_s3 is True")
1610
-
1611
- self.api_key: str = api_key
1612
- self.project_name: str = project_name or str(uuid.uuid4())
1613
- self.organization_id: str = organization_id
1614
- self.rules: List[Rule] = rules or [] # Store rules at tracer level
1615
- self.traces: List[Trace] = []
1616
- self.initialized: bool = True
1617
- self.enable_monitoring: bool = enable_monitoring
1618
- self.enable_evaluations: bool = enable_evaluations
1619
- self.class_identifiers: Dict[str, str] = {} # Dictionary to store class identifiers
1620
- self.span_id_to_previous_span_id: Dict[str, str] = {}
1621
- self.trace_id_to_previous_trace: Dict[str, TraceClient] = {}
1622
- self.current_span_id: Optional[str] = None
1623
- self.current_trace: Optional[TraceClient] = None
1624
- self.trace_across_async_contexts: bool = trace_across_async_contexts
1625
- Tracer.trace_across_async_contexts = trace_across_async_contexts
1626
-
1627
- # Initialize S3 storage if enabled
1628
- self.use_s3 = use_s3
1629
- if use_s3:
1630
- from judgeval.common.s3_storage import S3Storage
1631
- self.s3_storage = S3Storage(
1632
- bucket_name=s3_bucket_name,
1633
- aws_access_key_id=s3_aws_access_key_id,
1634
- aws_secret_access_key=s3_aws_secret_access_key,
1635
- region_name=s3_region_name
1636
- )
1637
- self.offline_mode: bool = offline_mode
1638
- self.deep_tracing: bool = deep_tracing # NEW: Store deep tracing setting
1639
-
1640
- # Initialize background span service
1641
- self.enable_background_spans: bool = enable_background_spans
1642
- self.background_span_service: Optional[BackgroundSpanService] = None
1643
- if enable_background_spans and not offline_mode:
1644
- self.background_span_service = BackgroundSpanService(
1645
- judgment_api_key=api_key,
1646
- organization_id=organization_id,
1647
- batch_size=span_batch_size,
1648
- flush_interval=span_flush_interval,
1649
- num_workers=span_num_workers
1650
- )
1651
-
1652
- elif hasattr(self, 'project_name') and self.project_name != project_name:
1653
- warnings.warn(
1654
- f"Attempting to initialize Tracer with project_name='{project_name}' but it was already initialized with "
1655
- f"project_name='{self.project_name}'. Due to the singleton pattern, the original project_name will be used. "
1656
- "To use a different project name, ensure the first Tracer initialization uses the desired project name.",
1657
- RuntimeWarning
1592
+ if not api_key:
1593
+ raise ValueError("Tracer must be configured with a Judgment API key")
1594
+
1595
+ result, response = validate_api_key(api_key)
1596
+ if not result:
1597
+ raise JudgmentAPIError(f"Issue with passed in Judgment API key: {response}")
1598
+
1599
+ if not organization_id:
1600
+ raise ValueError("Tracer must be configured with an Organization ID")
1601
+ if use_s3 and not s3_bucket_name:
1602
+ raise ValueError("S3 bucket name must be provided when use_s3 is True")
1603
+
1604
+ self.api_key: str = api_key
1605
+ self.project_name: str = project_name or str(uuid.uuid4())
1606
+ self.organization_id: str = organization_id
1607
+ self.rules: List[Rule] = rules or [] # Store rules at tracer level
1608
+ self.traces: List[Trace] = []
1609
+ self.enable_monitoring: bool = enable_monitoring
1610
+ self.enable_evaluations: bool = enable_evaluations
1611
+ self.class_identifiers: Dict[str, str] = {} # Dictionary to store class identifiers
1612
+ self.span_id_to_previous_span_id: Dict[str, str] = {}
1613
+ self.trace_id_to_previous_trace: Dict[str, TraceClient] = {}
1614
+ self.current_span_id: Optional[str] = None
1615
+ self.current_trace: Optional[TraceClient] = None
1616
+ self.trace_across_async_contexts: bool = trace_across_async_contexts
1617
+ Tracer.trace_across_async_contexts = trace_across_async_contexts
1618
+
1619
+ # Initialize S3 storage if enabled
1620
+ self.use_s3 = use_s3
1621
+ if use_s3:
1622
+ from judgeval.common.s3_storage import S3Storage
1623
+ self.s3_storage = S3Storage(
1624
+ bucket_name=s3_bucket_name,
1625
+ aws_access_key_id=s3_aws_access_key_id,
1626
+ aws_secret_access_key=s3_aws_secret_access_key,
1627
+ region_name=s3_region_name
1628
+ )
1629
+ self.offline_mode: bool = offline_mode
1630
+ self.deep_tracing: bool = deep_tracing # NEW: Store deep tracing setting
1631
+
1632
+ # Initialize background span service
1633
+ self.enable_background_spans: bool = enable_background_spans
1634
+ self.background_span_service: Optional[BackgroundSpanService] = None
1635
+ if enable_background_spans and not offline_mode:
1636
+ self.background_span_service = BackgroundSpanService(
1637
+ judgment_api_key=api_key,
1638
+ organization_id=organization_id,
1639
+ batch_size=span_batch_size,
1640
+ flush_interval=span_flush_interval,
1641
+ num_workers=span_num_workers
1658
1642
  )
1659
1643
 
1660
1644
  def set_current_span(self, span_id: str):
@@ -2237,7 +2221,7 @@ def wrap(client: Any, trace_across_async_contexts: bool = Tracer.trace_across_as
2237
2221
  Supports OpenAI, Together, Anthropic, and Google GenAI clients.
2238
2222
  Patches both '.create' and Anthropic's '.stream' methods using a wrapper class.
2239
2223
  """
2240
- span_name, original_create, original_responses_create, original_stream = _get_client_config(client)
2224
+ span_name, original_create, original_responses_create, original_stream, original_beta_parse = _get_client_config(client)
2241
2225
 
2242
2226
  def _get_current_trace():
2243
2227
  if trace_across_async_contexts:
@@ -2307,6 +2291,22 @@ def wrap(client: Any, trace_across_async_contexts: bool = Tracer.trace_across_as
2307
2291
  _capture_exception_for_trace(span, sys.exc_info())
2308
2292
  raise e
2309
2293
 
2294
+ async def traced_beta_parse_async(*args, **kwargs):
2295
+ current_trace = _get_current_trace()
2296
+ if not current_trace:
2297
+ return await original_beta_parse(*args, **kwargs)
2298
+
2299
+ with current_trace.span(span_name, span_type="llm") as span:
2300
+ is_streaming = _record_input_and_check_streaming(span, kwargs)
2301
+
2302
+ try:
2303
+ response_or_iterator = await original_beta_parse(*args, **kwargs)
2304
+ return _format_and_record_output(span, response_or_iterator, is_streaming, True, False)
2305
+ except Exception as e:
2306
+ _capture_exception_for_trace(span, sys.exc_info())
2307
+ raise e
2308
+
2309
+
2310
2310
  # Async responses for OpenAI clients
2311
2311
  async def traced_response_create_async(*args, **kwargs):
2312
2312
  current_trace = _get_current_trace()
@@ -2354,6 +2354,21 @@ def wrap(client: Any, trace_across_async_contexts: bool = Tracer.trace_across_as
2354
2354
  except Exception as e:
2355
2355
  _capture_exception_for_trace(span, sys.exc_info())
2356
2356
  raise e
2357
+
2358
+ def traced_beta_parse_sync(*args, **kwargs):
2359
+ current_trace = _get_current_trace()
2360
+ if not current_trace:
2361
+ return original_beta_parse(*args, **kwargs)
2362
+
2363
+ with current_trace.span(span_name, span_type="llm") as span:
2364
+ is_streaming = _record_input_and_check_streaming(span, kwargs)
2365
+
2366
+ try:
2367
+ response_or_iterator = original_beta_parse(*args, **kwargs)
2368
+ return _format_and_record_output(span, response_or_iterator, is_streaming, False, False)
2369
+ except Exception as e:
2370
+ _capture_exception_for_trace(span, sys.exc_info())
2371
+ raise e
2357
2372
 
2358
2373
  def traced_response_create_sync(*args, **kwargs):
2359
2374
  current_trace = _get_current_trace()
@@ -2392,7 +2407,7 @@ def wrap(client: Any, trace_across_async_contexts: bool = Tracer.trace_across_as
2392
2407
  if hasattr(client, "responses") and hasattr(client.responses, "create"):
2393
2408
  client.responses.create = traced_response_create_async
2394
2409
  if hasattr(client, "beta") and hasattr(client.beta, "chat") and hasattr(client.beta.chat, "completions") and hasattr(client.beta.chat.completions, "parse"):
2395
- client.beta.chat.completions.parse = traced_create_async
2410
+ client.beta.chat.completions.parse = traced_beta_parse_async
2396
2411
  elif isinstance(client, AsyncAnthropic):
2397
2412
  client.messages.create = traced_create_async
2398
2413
  if original_stream:
@@ -2404,7 +2419,7 @@ def wrap(client: Any, trace_across_async_contexts: bool = Tracer.trace_across_as
2404
2419
  if hasattr(client, "responses") and hasattr(client.responses, "create"):
2405
2420
  client.responses.create = traced_response_create_sync
2406
2421
  if hasattr(client, "beta") and hasattr(client.beta, "chat") and hasattr(client.beta.chat, "completions") and hasattr(client.beta.chat.completions, "parse"):
2407
- client.beta.chat.completions.parse = traced_create_sync
2422
+ client.beta.chat.completions.parse = traced_beta_parse_sync
2408
2423
  elif isinstance(client, Anthropic):
2409
2424
  client.messages.create = traced_create_sync
2410
2425
  if original_stream:
@@ -2423,23 +2438,24 @@ def _get_client_config(client: ApiClient) -> tuple[str, callable, Optional[calla
2423
2438
  client: An instance of OpenAI, Together, or Anthropic client
2424
2439
 
2425
2440
  Returns:
2426
- tuple: (span_name, create_method, stream_method)
2441
+ tuple: (span_name, create_method, responses_method, stream_method, beta_parse_method)
2427
2442
  - span_name: String identifier for tracing
2428
2443
  - create_method: Reference to the client's creation method
2429
2444
  - responses_method: Reference to the client's responses method (if applicable)
2430
2445
  - stream_method: Reference to the client's stream method (if applicable)
2446
+ - beta_parse_method: Reference to the client's beta parse method (if applicable)
2431
2447
 
2432
2448
  Raises:
2433
2449
  ValueError: If client type is not supported
2434
2450
  """
2435
2451
  if isinstance(client, (OpenAI, AsyncOpenAI)):
2436
- return "OPENAI_API_CALL", client.chat.completions.create, client.responses.create, None
2452
+ return "OPENAI_API_CALL", client.chat.completions.create, client.responses.create, None, client.beta.chat.completions.parse
2437
2453
  elif isinstance(client, (Together, AsyncTogether)):
2438
- return "TOGETHER_API_CALL", client.chat.completions.create, None, None
2454
+ return "TOGETHER_API_CALL", client.chat.completions.create, None, None, None
2439
2455
  elif isinstance(client, (Anthropic, AsyncAnthropic)):
2440
- return "ANTHROPIC_API_CALL", client.messages.create, None, client.messages.stream
2456
+ return "ANTHROPIC_API_CALL", client.messages.create, None, client.messages.stream, None
2441
2457
  elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
2442
- return "GOOGLE_API_CALL", client.models.generate_content, None, None
2458
+ return "GOOGLE_API_CALL", client.models.generate_content, None, None, None
2443
2459
  raise ValueError(f"Unsupported client type: {type(client)}")
2444
2460
 
2445
2461
  def _format_input_data(client: ApiClient, **kwargs) -> dict:
@@ -2449,10 +2465,13 @@ def _format_input_data(client: ApiClient, **kwargs) -> dict:
2449
2465
  to ensure consistent tracing across different APIs.
2450
2466
  """
2451
2467
  if isinstance(client, (OpenAI, Together, AsyncOpenAI, AsyncTogether)):
2452
- return {
2468
+ input_data = {
2453
2469
  "model": kwargs.get("model"),
2454
2470
  "messages": kwargs.get("messages"),
2455
2471
  }
2472
+ if kwargs.get("response_format"):
2473
+ input_data["response_format"] = kwargs.get("response_format")
2474
+ return input_data
2456
2475
  elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
2457
2476
  return {
2458
2477
  "model": kwargs.get("model"),
@@ -2522,7 +2541,10 @@ def _format_output_data(client: ApiClient, response: Any) -> dict:
2522
2541
  model_name = response.model
2523
2542
  prompt_tokens = response.usage.prompt_tokens
2524
2543
  completion_tokens = response.usage.completion_tokens
2525
- message_content = response.choices[0].message.content
2544
+ if hasattr(response.choices[0].message, "parsed") and response.choices[0].message.parsed:
2545
+ message_content = response.choices[0].message.parsed
2546
+ else:
2547
+ message_content = response.choices[0].message.content
2526
2548
  elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
2527
2549
  model_name = response.model_version
2528
2550
  prompt_tokens = response.usage_metadata.prompt_token_count
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes