judgeval 0.0.13__tar.gz → 0.0.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {judgeval-0.0.13 → judgeval-0.0.14}/PKG-INFO +1 -1
  2. {judgeval-0.0.13 → judgeval-0.0.14}/pyproject.toml +1 -1
  3. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/openai_travel_agent/agent.py +2 -2
  4. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/common/tracer.py +19 -30
  5. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/datasets/dataset.py +3 -2
  6. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/datasets/eval_dataset_client.py +16 -9
  7. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/example.py +8 -1
  8. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/evaluation_run.py +1 -0
  9. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judgment_client.py +18 -12
  10. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/run_evaluation.py +10 -6
  11. {judgeval-0.0.13 → judgeval-0.0.14}/.github/workflows/ci.yaml +0 -0
  12. {judgeval-0.0.13 → judgeval-0.0.14}/.gitignore +0 -0
  13. {judgeval-0.0.13 → judgeval-0.0.14}/LICENSE.md +0 -0
  14. {judgeval-0.0.13 → judgeval-0.0.14}/Pipfile +0 -0
  15. {judgeval-0.0.13 → judgeval-0.0.14}/Pipfile.lock +0 -0
  16. {judgeval-0.0.13 → judgeval-0.0.14}/README.md +0 -0
  17. {judgeval-0.0.13 → judgeval-0.0.14}/docs/README.md +0 -0
  18. {judgeval-0.0.13 → judgeval-0.0.14}/docs/api_reference/judgment_client.mdx +0 -0
  19. {judgeval-0.0.13 → judgeval-0.0.14}/docs/api_reference/trace.mdx +0 -0
  20. {judgeval-0.0.13 → judgeval-0.0.14}/docs/development.mdx +0 -0
  21. {judgeval-0.0.13 → judgeval-0.0.14}/docs/essentials/code.mdx +0 -0
  22. {judgeval-0.0.13 → judgeval-0.0.14}/docs/essentials/images.mdx +0 -0
  23. {judgeval-0.0.13 → judgeval-0.0.14}/docs/essentials/markdown.mdx +0 -0
  24. {judgeval-0.0.13 → judgeval-0.0.14}/docs/essentials/navigation.mdx +0 -0
  25. {judgeval-0.0.13 → judgeval-0.0.14}/docs/essentials/reusable-snippets.mdx +0 -0
  26. {judgeval-0.0.13 → judgeval-0.0.14}/docs/essentials/settings.mdx +0 -0
  27. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/data_datasets.mdx +0 -0
  28. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/data_examples.mdx +0 -0
  29. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/introduction.mdx +0 -0
  30. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/judges.mdx +0 -0
  31. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/answer_correctness.mdx +0 -0
  32. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/answer_relevancy.mdx +0 -0
  33. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/classifier_scorer.mdx +0 -0
  34. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/contextual_precision.mdx +0 -0
  35. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/contextual_recall.mdx +0 -0
  36. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/contextual_relevancy.mdx +0 -0
  37. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/custom_scorers.mdx +0 -0
  38. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/faithfulness.mdx +0 -0
  39. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/hallucination.mdx +0 -0
  40. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/introduction.mdx +0 -0
  41. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/json_correctness.mdx +0 -0
  42. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/summarization.mdx +0 -0
  43. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/scorers/tool_correctness.mdx +0 -0
  44. {judgeval-0.0.13 → judgeval-0.0.14}/docs/evaluation/unit_testing.mdx +0 -0
  45. {judgeval-0.0.13 → judgeval-0.0.14}/docs/favicon.svg +0 -0
  46. {judgeval-0.0.13 → judgeval-0.0.14}/docs/getting_started.mdx +0 -0
  47. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/basic_trace_example.png +0 -0
  48. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/checks-passed.png +0 -0
  49. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/create_aggressive_scorer.png +0 -0
  50. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/create_scorer.png +0 -0
  51. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/evaluation_diagram.png +0 -0
  52. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/hero-dark.svg +0 -0
  53. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/hero-light.svg +0 -0
  54. {judgeval-0.0.13 → judgeval-0.0.14}/docs/images/trace_screenshot.png +0 -0
  55. {judgeval-0.0.13 → judgeval-0.0.14}/docs/introduction.mdx +0 -0
  56. {judgeval-0.0.13 → judgeval-0.0.14}/docs/judgment/introduction.mdx +0 -0
  57. {judgeval-0.0.13 → judgeval-0.0.14}/docs/logo/dark.svg +0 -0
  58. {judgeval-0.0.13 → judgeval-0.0.14}/docs/logo/light.svg +0 -0
  59. {judgeval-0.0.13 → judgeval-0.0.14}/docs/mint.json +0 -0
  60. {judgeval-0.0.13 → judgeval-0.0.14}/docs/monitoring/introduction.mdx +0 -0
  61. {judgeval-0.0.13 → judgeval-0.0.14}/docs/monitoring/production_insights.mdx +0 -0
  62. {judgeval-0.0.13 → judgeval-0.0.14}/docs/monitoring/tracing.mdx +0 -0
  63. {judgeval-0.0.13 → judgeval-0.0.14}/docs/notebooks/create_dataset.ipynb +0 -0
  64. {judgeval-0.0.13 → judgeval-0.0.14}/docs/notebooks/create_scorer.ipynb +0 -0
  65. {judgeval-0.0.13 → judgeval-0.0.14}/docs/notebooks/demo.ipynb +0 -0
  66. {judgeval-0.0.13 → judgeval-0.0.14}/docs/notebooks/prompt_scorer.ipynb +0 -0
  67. {judgeval-0.0.13 → judgeval-0.0.14}/docs/notebooks/quickstart.ipynb +0 -0
  68. {judgeval-0.0.13 → judgeval-0.0.14}/docs/quickstart.mdx +0 -0
  69. {judgeval-0.0.13 → judgeval-0.0.14}/docs/snippets/snippet-intro.mdx +0 -0
  70. {judgeval-0.0.13 → judgeval-0.0.14}/pytest.ini +0 -0
  71. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/ci_testing/ci_testing.py +0 -0
  72. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/ci_testing/travel_response.txt +0 -0
  73. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/custom_scorers/competitor_mentions.py +0 -0
  74. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/custom_scorers/text2sql.py +0 -0
  75. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/langchain_basic_rag/basic_agentic_rag.ipynb +0 -0
  76. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/langchain_basic_rag/tesla_q3.pdf +0 -0
  77. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/langchain_sales/example_product_price_id_mapping.json +0 -0
  78. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/langchain_sales/sales_agent_with_context.ipynb +0 -0
  79. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/langchain_sales/sample_product_catalog.txt +0 -0
  80. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/new_bot/basic_bot.py +0 -0
  81. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/openai_travel_agent/populate_db.py +0 -0
  82. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/cookbooks/openai_travel_agent/tools.py +0 -0
  83. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/basic_test.py +0 -0
  84. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/cstone_data.csv +0 -0
  85. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/data.csv +0 -0
  86. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/faithfulness_testing.py +0 -0
  87. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/galen_data.csv +0 -0
  88. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/playground.py +0 -0
  89. {judgeval-0.0.13 → judgeval-0.0.14}/src/demo/customer_use/cstone/results.csv +0 -0
  90. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/__init__.py +0 -0
  91. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/clients.py +0 -0
  92. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/common/__init__.py +0 -0
  93. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/common/exceptions.py +0 -0
  94. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/common/logger.py +0 -0
  95. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/common/utils.py +0 -0
  96. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/constants.py +0 -0
  97. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/__init__.py +0 -0
  98. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/api_example.py +0 -0
  99. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/datasets/__init__.py +0 -0
  100. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/datasets/ground_truth.py +0 -0
  101. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/datasets/utils.py +0 -0
  102. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/result.py +0 -0
  103. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/data/scorer_data.py +0 -0
  104. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judges/__init__.py +0 -0
  105. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judges/base_judge.py +0 -0
  106. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judges/litellm_judge.py +0 -0
  107. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judges/mixture_of_judges.py +0 -0
  108. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judges/together_judge.py +0 -0
  109. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/judges/utils.py +0 -0
  110. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/__init__.py +0 -0
  111. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/api_scorer.py +0 -0
  112. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/base_scorer.py +0 -0
  113. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/exceptions.py +0 -0
  114. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorer.py +0 -0
  115. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  116. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  117. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  118. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  119. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -0
  120. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -0
  121. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -0
  122. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  123. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
  124. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -0
  125. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -0
  126. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -0
  127. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
  128. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
  129. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
  130. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -0
  131. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -0
  132. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -0
  133. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -0
  134. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -0
  135. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -0
  136. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -0
  137. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -0
  138. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -0
  139. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -0
  140. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -0
  141. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -0
  142. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -0
  143. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -0
  144. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -0
  145. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -0
  146. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -0
  147. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -0
  148. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -0
  149. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -0
  150. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -0
  151. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -0
  152. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -0
  153. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -0
  154. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -0
  155. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -0
  156. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -0
  157. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -0
  158. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -0
  159. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/prompt_scorer.py +0 -0
  160. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/score.py +0 -0
  161. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/scorers/utils.py +0 -0
  162. {judgeval-0.0.13 → judgeval-0.0.14}/src/judgeval/tracer/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.13
3
+ Version: 0.0.14
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.0.13"
3
+ version = "0.0.14"
4
4
  authors = [
5
5
  { name="Andrew Li", email="andrew@judgmentlabs.ai" },
6
6
  { name="Alex Shan", email="alex@judgmentlabs.ai" },
@@ -50,7 +50,7 @@ async def get_flights(destination):
50
50
  judgment.get_current_trace().async_evaluate(
51
51
  scorers=[AnswerRelevancyScorer(threshold=0.5)],
52
52
  input=prompt,
53
- actual_output=flights_search,
53
+ actual_output=flights_search["results"],
54
54
  model="gpt-4",
55
55
  )
56
56
  return flights_search
@@ -63,7 +63,7 @@ async def get_weather(destination, start_date, end_date):
63
63
  judgment.get_current_trace().async_evaluate(
64
64
  scorers=[AnswerRelevancyScorer(threshold=0.5)],
65
65
  input=prompt,
66
- actual_output=weather_search,
66
+ actual_output=weather_search["results"],
67
67
  model="gpt-4",
68
68
  )
69
69
  return weather_search
@@ -188,8 +188,9 @@ class TraceManagerClient:
188
188
  - Saving a trace
189
189
  - Deleting a trace
190
190
  """
191
- def __init__(self, judgment_api_key: str):
191
+ def __init__(self, judgment_api_key: str, organization_id: str):
192
192
  self.judgment_api_key = judgment_api_key
193
+ self.organization_id = organization_id
193
194
 
194
195
  def fetch_trace(self, trace_id: str):
195
196
  """
@@ -199,11 +200,11 @@ class TraceManagerClient:
199
200
  JUDGMENT_TRACES_FETCH_API_URL,
200
201
  json={
201
202
  "trace_id": trace_id,
202
- # "judgment_api_key": self.judgment_api_key,
203
203
  },
204
204
  headers={
205
205
  "Content-Type": "application/json",
206
- "Authorization": f"Bearer {self.judgment_api_key}"
206
+ "Authorization": f"Bearer {self.judgment_api_key}",
207
+ "X-Organization-Id": self.organization_id
207
208
  }
208
209
  )
209
210
 
@@ -226,7 +227,8 @@ class TraceManagerClient:
226
227
  json=trace_data,
227
228
  headers={
228
229
  "Content-Type": "application/json",
229
- "Authorization": f"Bearer {self.judgment_api_key}"
230
+ "Authorization": f"Bearer {self.judgment_api_key}",
231
+ "X-Organization-Id": self.organization_id
230
232
  }
231
233
  )
232
234
 
@@ -245,12 +247,12 @@ class TraceManagerClient:
245
247
  response = requests.delete(
246
248
  JUDGMENT_TRACES_DELETE_API_URL,
247
249
  json={
248
- "judgment_api_key": self.judgment_api_key,
249
250
  "trace_ids": [trace_id],
250
251
  },
251
252
  headers={
252
253
  "Content-Type": "application/json",
253
- "Authorization": f"Bearer {self.judgment_api_key}"
254
+ "Authorization": f"Bearer {self.judgment_api_key}",
255
+ "X-Organization-Id": self.organization_id
254
256
  }
255
257
  )
256
258
 
@@ -266,12 +268,12 @@ class TraceManagerClient:
266
268
  response = requests.delete(
267
269
  JUDGMENT_TRACES_DELETE_API_URL,
268
270
  json={
269
- # "judgment_api_key": self.judgment_api_key,
270
271
  "trace_ids": trace_ids,
271
272
  },
272
273
  headers={
273
274
  "Content-Type": "application/json",
274
- "Authorization": f"Bearer {self.judgment_api_key}"
275
+ "Authorization": f"Bearer {self.judgment_api_key}",
276
+ "X-Organization-Id": self.organization_id
275
277
  }
276
278
  )
277
279
 
@@ -294,7 +296,7 @@ class TraceClient:
294
296
  self.span_type = None
295
297
  self._current_span: Optional[TraceEntry] = None
296
298
  self.overwrite = overwrite
297
- self.trace_manager_client = TraceManagerClient(tracer.api_key) # Manages DB operations for trace data
299
+ self.trace_manager_client = TraceManagerClient(tracer.api_key, tracer.organization_id) # Manages DB operations for trace data
298
300
 
299
301
  @contextmanager
300
302
  def span(self, name: str, span_type: SpanType = "span"):
@@ -371,6 +373,7 @@ class TraceClient:
371
373
  raise ValueError(f"Failed to load scorers: {str(e)}")
372
374
 
373
375
  eval_run = EvaluationRun(
376
+ organization_id=self.tracer.organization_id,
374
377
  log_results=log_results,
375
378
  project_name=self.project_name,
376
379
  eval_name=f"{self.name.capitalize()}-"
@@ -546,7 +549,6 @@ class TraceClient:
546
549
  # Create trace document
547
550
  trace_data = {
548
551
  "trace_id": self.trace_id,
549
- "api_key": self.tracer.api_key,
550
552
  "name": self.name,
551
553
  "project_name": self.project_name,
552
554
  "created_at": datetime.fromtimestamp(self.start_time).isoformat(),
@@ -568,6 +570,8 @@ class TraceClient:
568
570
  channel = connection.channel()
569
571
 
570
572
  channel.queue_declare(queue=RABBITMQ_QUEUE, durable=True)
573
+ trace_data["judgment_api_key"] = self.tracer.api_key
574
+ trace_data["organization_id"] = self.tracer.organization_id
571
575
 
572
576
  channel.basic_publish(
573
577
  exchange='',
@@ -580,25 +584,6 @@ class TraceClient:
580
584
 
581
585
  self.trace_manager_client.save_trace(trace_data, empty_save)
582
586
 
583
-
584
- # Save trace data by making POST request to API
585
- response = requests.post(
586
- JUDGMENT_TRACES_SAVE_API_URL,
587
- json=trace_data,
588
- headers={
589
- "Content-Type": "application/json",
590
- "Authorization": f"Bearer {self.tracer.api_key}" # Bearer token format
591
- }
592
- )
593
-
594
- if response.status_code == HTTPStatus.BAD_REQUEST:
595
- raise ValueError(f"Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: {response.text}")
596
- elif response.status_code != HTTPStatus.OK:
597
- raise ValueError(f"Failed to save trace data: {response.text}")
598
-
599
- if not empty_save and "ui_results_url" in response.json():
600
- rprint(f"\n🔍 You can view your trace data here: [rgb(106,0,255)]{response.json()['ui_results_url']}[/]\n")
601
-
602
587
  return self.trace_id, trace_data
603
588
 
604
589
  def delete(self):
@@ -612,14 +597,18 @@ class Tracer:
612
597
  cls._instance = super(Tracer, cls).__new__(cls)
613
598
  return cls._instance
614
599
 
615
- def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project"):
600
+ def __init__(self, api_key: str = os.getenv("JUDGMENT_API_KEY"), project_name: str = "default_project", organization_id: str = os.getenv("ORGANIZATION_ID")):
616
601
  if not hasattr(self, 'initialized'):
617
602
  if not api_key:
618
603
  raise ValueError("Tracer must be configured with a Judgment API key")
619
604
 
605
+ if not organization_id:
606
+ raise ValueError("Tracer must be configured with an Organization ID")
607
+
620
608
  self.api_key: str = api_key
621
609
  self.project_name: str = project_name
622
610
  self.client: JudgmentClient = JudgmentClient(judgment_api_key=api_key)
611
+ self.organization_id: str = organization_id
623
612
  self.depth: int = 0
624
613
  self._current_trace: Optional[str] = None
625
614
  self.initialized: bool = True
@@ -17,9 +17,10 @@ class EvalDataset:
17
17
  _alias: Union[str, None] = field(default=None)
18
18
  _id: Union[str, None] = field(default=None)
19
19
  judgment_api_key: str = field(default="")
20
-
20
+ organization_id: str = field(default="")
21
21
  def __init__(self,
22
22
  judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"),
23
+ organization_id: str = os.getenv("ORGANIZATION_ID"),
23
24
  ground_truths: List[GroundTruthExample] = [],
24
25
  examples: List[Example] = [],
25
26
  ):
@@ -31,7 +32,7 @@ class EvalDataset:
31
32
  self._alias = None
32
33
  self._id = None
33
34
  self.judgment_api_key = judgment_api_key
34
-
35
+ self.organization_id = organization_id
35
36
 
36
37
  def add_from_json(self, file_path: str) -> None:
37
38
  debug(f"Loading dataset from JSON file: {file_path}")
@@ -19,8 +19,9 @@ from judgeval.data.datasets.ground_truth import GroundTruthExample
19
19
 
20
20
 
21
21
  class EvalDatasetClient:
22
- def __init__(self, judgment_api_key: str):
22
+ def __init__(self, judgment_api_key: str, organization_id: str):
23
23
  self.judgment_api_key = judgment_api_key
24
+ self.organization_id = organization_id
24
25
 
25
26
  def create_dataset(self) -> EvalDataset:
26
27
  return EvalDataset(judgment_api_key=self.judgment_api_key)
@@ -58,7 +59,6 @@ class EvalDatasetClient:
58
59
  "ground_truths": [g.to_dict() for g in dataset.ground_truths],
59
60
  "examples": [e.to_dict() for e in dataset.examples],
60
61
  "overwrite": overwrite,
61
- # "judgment_api_key": dataset.judgment_api_key
62
62
  }
63
63
  try:
64
64
  response = requests.post(
@@ -66,7 +66,8 @@ class EvalDatasetClient:
66
66
  json=content,
67
67
  headers={
68
68
  "Content-Type": "application/json",
69
- "Authorization": f"Bearer {self.judgment_api_key}"
69
+ "Authorization": f"Bearer {self.judgment_api_key}",
70
+ "X-Organization-Id": self.organization_id
70
71
  }
71
72
  )
72
73
  if response.status_code == 500:
@@ -121,7 +122,6 @@ class EvalDatasetClient:
121
122
  )
122
123
  request_body = {
123
124
  "alias": alias,
124
- # "judgment_api_key": self.judgment_api_key
125
125
  }
126
126
 
127
127
  try:
@@ -130,7 +130,8 @@ class EvalDatasetClient:
130
130
  json=request_body,
131
131
  headers={
132
132
  "Content-Type": "application/json",
133
- "Authorization": f"Bearer {self.judgment_api_key}"
133
+ "Authorization": f"Bearer {self.judgment_api_key}",
134
+ "X-Organization-Id": self.organization_id
134
135
  }
135
136
  )
136
137
  response.raise_for_status()
@@ -179,7 +180,6 @@ class EvalDatasetClient:
179
180
  total=100,
180
181
  )
181
182
  request_body = {
182
- # "judgment_api_key": self.judgment_api_key
183
183
  }
184
184
 
185
185
  try:
@@ -188,7 +188,8 @@ class EvalDatasetClient:
188
188
  json=request_body,
189
189
  headers={
190
190
  "Content-Type": "application/json",
191
- "Authorization": f"Bearer {self.judgment_api_key}"
191
+ "Authorization": f"Bearer {self.judgment_api_key}",
192
+ "X-Organization-Id": self.organization_id
192
193
  }
193
194
  )
194
195
  response.raise_for_status()
@@ -238,7 +239,12 @@ class EvalDatasetClient:
238
239
  try:
239
240
  response = requests.post(
240
241
  JUDGMENT_DATASETS_EDIT_API_URL,
241
- json=content
242
+ json=content,
243
+ headers={
244
+ "Content-Type": "application/json",
245
+ "Authorization": f"Bearer {self.judgment_api_key}",
246
+ "X-Organization-Id": self.organization_id
247
+ }
242
248
  )
243
249
  response.raise_for_status()
244
250
  except requests.exceptions.RequestException as e:
@@ -266,7 +272,8 @@ class EvalDatasetClient:
266
272
  json={"alias": alias},
267
273
  headers={
268
274
  "Content-Type": "application/json",
269
- "Authorization": f"Bearer {self.judgment_api_key}"
275
+ "Authorization": f"Bearer {self.judgment_api_key}",
276
+ "X-Organization-Id": self.organization_id
270
277
  },
271
278
  stream=True
272
279
  )
@@ -5,7 +5,7 @@ Classes for representing examples in a dataset.
5
5
 
6
6
  from typing import TypeVar, Optional, Any, Dict, List
7
7
  from uuid import uuid4
8
- from pydantic import BaseModel, Field
8
+ from pydantic import BaseModel, Field, field_validator
9
9
  from enum import Enum
10
10
  from datetime import datetime
11
11
  import time
@@ -40,6 +40,13 @@ class Example(BaseModel):
40
40
  timestamp: Optional[str] = None
41
41
  trace_id: Optional[str] = None
42
42
 
43
+ @field_validator('input', 'actual_output', mode='before')
44
+ def convert_to_str(cls, value):
45
+ try:
46
+ return str(value)
47
+ except Exception:
48
+ return repr(value)
49
+
43
50
  def __init__(self, **data):
44
51
  if 'example_id' not in data:
45
52
  data['example_id'] = str(uuid4())
@@ -24,6 +24,7 @@ class EvaluationRun(BaseModel):
24
24
 
25
25
  # The user will specify whether they want log_results when they call run_eval
26
26
  log_results: bool = False # NOTE: log_results has to be set first because it is used to validate project_name and eval_name
27
+ organization_id: Optional[str] = None
27
28
  project_name: Optional[str] = None
28
29
  eval_name: Optional[str] = None
29
30
  examples: List[Example]
@@ -34,9 +34,10 @@ class EvalRunRequestBody(BaseModel):
34
34
 
35
35
 
36
36
  class JudgmentClient:
37
- def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY")):
37
+ def __init__(self, judgment_api_key: str = os.getenv("JUDGMENT_API_KEY"), organization_id: str = os.getenv("ORGANIZATION_ID")):
38
38
  self.judgment_api_key = judgment_api_key
39
- self.eval_dataset_client = EvalDatasetClient(judgment_api_key)
39
+ self.organization_id = organization_id
40
+ self.eval_dataset_client = EvalDatasetClient(judgment_api_key, organization_id)
40
41
 
41
42
  # Verify API key is valid
42
43
  result, response = self._validate_api_key()
@@ -78,7 +79,8 @@ class JudgmentClient:
78
79
  model=model,
79
80
  aggregator=aggregator,
80
81
  metadata=metadata,
81
- judgment_api_key=self.judgment_api_key
82
+ judgment_api_key=self.judgment_api_key,
83
+ organization_id=self.organization_id
82
84
  )
83
85
  return run_eval(eval, override)
84
86
  except ValueError as e:
@@ -115,7 +117,8 @@ class JudgmentClient:
115
117
  model=model,
116
118
  aggregator=aggregator,
117
119
  metadata=metadata,
118
- judgment_api_key=self.judgment_api_key
120
+ judgment_api_key=self.judgment_api_key,
121
+ organization_id=self.organization_id
119
122
  )
120
123
  return run_eval(evaluation_run)
121
124
  except ValueError as e:
@@ -189,7 +192,8 @@ class JudgmentClient:
189
192
  eval_run = requests.post(JUDGMENT_EVAL_FETCH_API_URL,
190
193
  headers={
191
194
  "Content-Type": "application/json",
192
- "Authorization": f"Bearer {self.judgment_api_key}"
195
+ "Authorization": f"Bearer {self.judgment_api_key}",
196
+ "X-Organization-Id": self.organization_id
193
197
  },
194
198
  json=eval_run_request_body.model_dump())
195
199
  if eval_run.status_code != requests.codes.ok:
@@ -222,7 +226,8 @@ class JudgmentClient:
222
226
  json=eval_run_request_body.model_dump(),
223
227
  headers={
224
228
  "Content-Type": "application/json",
225
- "Authorization": f"Bearer {self.judgment_api_key}"
229
+ "Authorization": f"Bearer {self.judgment_api_key}",
230
+ "X-Organization-Id": self.organization_id
226
231
  })
227
232
  if response.status_code != requests.codes.ok:
228
233
  raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -241,11 +246,12 @@ class JudgmentClient:
241
246
  response = requests.delete(JUDGMENT_EVAL_DELETE_PROJECT_API_URL,
242
247
  json={
243
248
  "project_name": project_name,
244
- "judgment_api_key": self.judgment_api_key
249
+ "judgment_api_key": self.judgment_api_key,
245
250
  },
246
251
  headers={
247
252
  "Content-Type": "application/json",
248
- "Authorization": f"Bearer {self.judgment_api_key}"
253
+ "Authorization": f"Bearer {self.judgment_api_key}",
254
+ "X-Organization-Id": self.organization_id
249
255
  })
250
256
  if response.status_code != requests.codes.ok:
251
257
  raise ValueError(f"Error deleting eval results: {response.json()}")
@@ -283,7 +289,6 @@ class JudgmentClient:
283
289
  """
284
290
  request_body = {
285
291
  "slug": slug,
286
- # "judgment_api_key": self.judgment_api_key
287
292
  }
288
293
 
289
294
  response = requests.post(
@@ -291,7 +296,8 @@ class JudgmentClient:
291
296
  json=request_body,
292
297
  headers={
293
298
  "Content-Type": "application/json",
294
- "Authorization": f"Bearer {self.judgment_api_key}"
299
+ "Authorization": f"Bearer {self.judgment_api_key}",
300
+ "X-Organization-Id": self.organization_id
295
301
  }
296
302
  )
297
303
 
@@ -325,7 +331,6 @@ class JudgmentClient:
325
331
  "name": scorer.name,
326
332
  "conversation": scorer.conversation,
327
333
  "options": scorer.options,
328
- # "judgment_api_key": self.judgment_api_key,
329
334
  "slug": slug
330
335
  }
331
336
 
@@ -334,7 +339,8 @@ class JudgmentClient:
334
339
  json=request_body,
335
340
  headers={
336
341
  "Content-Type": "application/json",
337
- "Authorization": f"Bearer {self.judgment_api_key}"
342
+ "Authorization": f"Bearer {self.judgment_api_key}",
343
+ "X-Organization-Id": self.organization_id
338
344
  }
339
345
  )
340
346
 
@@ -50,7 +50,8 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> List[Dict]:
50
50
  response = requests.post(
51
51
  JUDGMENT_EVAL_API_URL, headers={
52
52
  "Content-Type": "application/json",
53
- "Authorization": f"Bearer {evaluation_run.judgment_api_key}"
53
+ "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
54
+ "X-Organization-Id": evaluation_run.organization_id
54
55
  },
55
56
  json=payload)
56
57
  response_data = response.json()
@@ -140,7 +141,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
140
141
  return results
141
142
 
142
143
 
143
- def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str) -> None:
144
+ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_key: str, organization_id: str) -> None:
144
145
  """
145
146
  Checks if an evaluation run name already exists for a given project.
146
147
 
@@ -158,7 +159,8 @@ def check_eval_run_name_exists(eval_name: str, project_name: str, judgment_api_k
158
159
  f"{ROOT_API}/eval-run-name-exists/",
159
160
  headers={
160
161
  "Content-Type": "application/json",
161
- "Authorization": f"Bearer {judgment_api_key}"
162
+ "Authorization": f"Bearer {judgment_api_key}",
163
+ "X-Organization-Id": organization_id
162
164
  },
163
165
  json={
164
166
  "eval_name": eval_name,
@@ -199,11 +201,11 @@ def log_evaluation_results(merged_results: List[ScoringResult], evaluation_run:
199
201
  JUDGMENT_EVAL_LOG_API_URL,
200
202
  headers={
201
203
  "Content-Type": "application/json",
202
- "Authorization": f"Bearer {evaluation_run.judgment_api_key}"
204
+ "Authorization": f"Bearer {evaluation_run.judgment_api_key}",
205
+ "X-Organization-Id": evaluation_run.organization_id
203
206
  },
204
207
  json={
205
208
  "results": [result.to_dict() for result in merged_results],
206
- "judgment_api_key": evaluation_run.judgment_api_key,
207
209
  "project_name": evaluation_run.project_name,
208
210
  "eval_name": evaluation_run.eval_name,
209
211
  }
@@ -254,7 +256,8 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
254
256
  check_eval_run_name_exists(
255
257
  evaluation_run.eval_name,
256
258
  evaluation_run.project_name,
257
- evaluation_run.judgment_api_key
259
+ evaluation_run.judgment_api_key,
260
+ evaluation_run.organization_id
258
261
  )
259
262
 
260
263
  # Set example IDs if not already set
@@ -312,6 +315,7 @@ def run_eval(evaluation_run: EvaluationRun, override: bool = False) -> List[Scor
312
315
  aggregator=evaluation_run.aggregator,
313
316
  metadata=evaluation_run.metadata,
314
317
  judgment_api_key=evaluation_run.judgment_api_key,
318
+ organization_id=evaluation_run.organization_id,
315
319
  log_results=evaluation_run.log_results
316
320
  )
317
321
  debug("Sending request to Judgment API")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes