judgeval 0.16.7__tar.gz → 0.16.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (178) hide show
  1. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/ci.yaml +3 -1
  2. {judgeval-0.16.7 → judgeval-0.16.8}/.pre-commit-config.yaml +2 -2
  3. {judgeval-0.16.7 → judgeval-0.16.8}/PKG-INFO +1 -1
  4. {judgeval-0.16.7 → judgeval-0.16.8}/pyproject.toml +2 -2
  5. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/api/api_types.py +1 -2
  6. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/judgment_types.py +1 -2
  7. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/__init__.py +7 -52
  8. judgeval-0.16.8/src/judgeval/tracer/llm/config.py +78 -0
  9. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/llm/constants.py +0 -1
  10. judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  11. judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/messages.py +440 -0
  12. judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  13. judgeval-0.16.8/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  14. judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/__init__.py +3 -0
  15. judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/config.py +6 -0
  16. judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/generate_content.py +125 -0
  17. judgeval-0.16.8/src/judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  18. judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py +192 -0
  19. judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/chat_completions.py +437 -0
  20. judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/config.py +6 -0
  21. judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/responses.py +444 -0
  22. judgeval-0.16.8/src/judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  23. judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/__init__.py +3 -0
  24. judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/chat_completions.py +398 -0
  25. judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/config.py +6 -0
  26. judgeval-0.16.8/src/judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  27. judgeval-0.16.8/src/judgeval/tracer/llm/providers.py +19 -0
  28. judgeval-0.16.8/src/judgeval/utils/decorators/dont_throw.py +37 -0
  29. judgeval-0.16.8/src/judgeval/utils/wrappers/README.md +3 -0
  30. judgeval-0.16.8/src/judgeval/utils/wrappers/__init__.py +15 -0
  31. judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  32. judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  33. judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  34. judgeval-0.16.8/src/judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  35. judgeval-0.16.8/src/judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  36. judgeval-0.16.8/src/judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  37. judgeval-0.16.8/src/judgeval/utils/wrappers/utils.py +35 -0
  38. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/version.py +1 -1
  39. {judgeval-0.16.7 → judgeval-0.16.8}/uv.lock +744 -626
  40. judgeval-0.16.7/src/judgeval/tracer/llm/config.py +0 -110
  41. judgeval-0.16.7/src/judgeval/tracer/llm/llm_anthropic/config.py +0 -20
  42. judgeval-0.16.7/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +0 -640
  43. judgeval-0.16.7/src/judgeval/tracer/llm/llm_google/config.py +0 -24
  44. judgeval-0.16.7/src/judgeval/tracer/llm/llm_google/wrapper.py +0 -465
  45. judgeval-0.16.7/src/judgeval/tracer/llm/llm_groq/config.py +0 -23
  46. judgeval-0.16.7/src/judgeval/tracer/llm/llm_groq/wrapper.py +0 -498
  47. judgeval-0.16.7/src/judgeval/tracer/llm/llm_openai/config.py +0 -32
  48. judgeval-0.16.7/src/judgeval/tracer/llm/llm_openai/wrapper.py +0 -661
  49. judgeval-0.16.7/src/judgeval/tracer/llm/llm_together/__init__.py +0 -0
  50. judgeval-0.16.7/src/judgeval/tracer/llm/llm_together/config.py +0 -23
  51. judgeval-0.16.7/src/judgeval/tracer/llm/llm_together/wrapper.py +0 -503
  52. judgeval-0.16.7/src/judgeval/tracer/llm/providers.py +0 -63
  53. judgeval-0.16.7/src/judgeval/tracer/local_eval_queue.py +0 -199
  54. judgeval-0.16.7/src/judgeval/utils/decorators/__init__.py +0 -0
  55. judgeval-0.16.7/src/judgeval/utils/decorators/dont_throw.py +0 -21
  56. {judgeval-0.16.7 → judgeval-0.16.8}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  57. {judgeval-0.16.7 → judgeval-0.16.8}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  58. {judgeval-0.16.7 → judgeval-0.16.8}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  59. {judgeval-0.16.7 → judgeval-0.16.8}/.github/pull_request_template.md +0 -0
  60. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/blocked-pr.yaml +0 -0
  61. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/claude-code-review.yml +0 -0
  62. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/claude.yml +0 -0
  63. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/lint.yaml +0 -0
  64. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/merge-branch-check.yaml +0 -0
  65. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/mypy.yaml +0 -0
  66. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  67. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/release.yaml +0 -0
  68. {judgeval-0.16.7 → judgeval-0.16.8}/.github/workflows/validate-branch.yaml +0 -0
  69. {judgeval-0.16.7 → judgeval-0.16.8}/.gitignore +0 -0
  70. {judgeval-0.16.7 → judgeval-0.16.8}/CONTRIBUTING.md +0 -0
  71. {judgeval-0.16.7 → judgeval-0.16.8}/LICENSE.md +0 -0
  72. {judgeval-0.16.7 → judgeval-0.16.8}/README.md +0 -0
  73. {judgeval-0.16.7 → judgeval-0.16.8}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  74. {judgeval-0.16.7 → judgeval-0.16.8}/assets/agent.gif +0 -0
  75. {judgeval-0.16.7 → judgeval-0.16.8}/assets/agent_trace_example.png +0 -0
  76. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/company.jpg +0 -0
  77. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/company_banner.jpg +0 -0
  78. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/darkmode.svg +0 -0
  79. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/full_logo.png +0 -0
  80. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/icon.png +0 -0
  81. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/lightmode.svg +0 -0
  82. {judgeval-0.16.7 → judgeval-0.16.8}/assets/brand/white_background.png +0 -0
  83. {judgeval-0.16.7 → judgeval-0.16.8}/assets/custom_scorer_online_abm.png +0 -0
  84. {judgeval-0.16.7 → judgeval-0.16.8}/assets/data.gif +0 -0
  85. {judgeval-0.16.7 → judgeval-0.16.8}/assets/dataset_clustering_screenshot.png +0 -0
  86. {judgeval-0.16.7 → judgeval-0.16.8}/assets/dataset_clustering_screenshot_dm.png +0 -0
  87. {judgeval-0.16.7 → judgeval-0.16.8}/assets/datasets_preview_screenshot.png +0 -0
  88. {judgeval-0.16.7 → judgeval-0.16.8}/assets/document.gif +0 -0
  89. {judgeval-0.16.7 → judgeval-0.16.8}/assets/error_analysis_dashboard.png +0 -0
  90. {judgeval-0.16.7 → judgeval-0.16.8}/assets/errors.png +0 -0
  91. {judgeval-0.16.7 → judgeval-0.16.8}/assets/experiments_dashboard_screenshot.png +0 -0
  92. {judgeval-0.16.7 → judgeval-0.16.8}/assets/experiments_page.png +0 -0
  93. {judgeval-0.16.7 → judgeval-0.16.8}/assets/experiments_pagev2.png +0 -0
  94. {judgeval-0.16.7 → judgeval-0.16.8}/assets/logo_darkmode.svg +0 -0
  95. {judgeval-0.16.7 → judgeval-0.16.8}/assets/logo_lightmode.svg +0 -0
  96. {judgeval-0.16.7 → judgeval-0.16.8}/assets/monitoring_screenshot.png +0 -0
  97. {judgeval-0.16.7 → judgeval-0.16.8}/assets/online_eval.png +0 -0
  98. {judgeval-0.16.7 → judgeval-0.16.8}/assets/product_shot.png +0 -0
  99. {judgeval-0.16.7 → judgeval-0.16.8}/assets/quickstart_trajectory_ss.png +0 -0
  100. {judgeval-0.16.7 → judgeval-0.16.8}/assets/test.png +0 -0
  101. {judgeval-0.16.7 → judgeval-0.16.8}/assets/tests.png +0 -0
  102. {judgeval-0.16.7 → judgeval-0.16.8}/assets/trace.gif +0 -0
  103. {judgeval-0.16.7 → judgeval-0.16.8}/assets/trace_demo.png +0 -0
  104. {judgeval-0.16.7 → judgeval-0.16.8}/assets/trace_screenshot.png +0 -0
  105. {judgeval-0.16.7 → judgeval-0.16.8}/assets/trace_screenshot_old.png +0 -0
  106. {judgeval-0.16.7 → judgeval-0.16.8}/pytest.ini +0 -0
  107. {judgeval-0.16.7 → judgeval-0.16.8}/scripts/api_generator.py +0 -0
  108. {judgeval-0.16.7 → judgeval-0.16.8}/scripts/openapi_transform.py +0 -0
  109. {judgeval-0.16.7 → judgeval-0.16.8}/scripts/update_types.sh +0 -0
  110. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/__init__.py +0 -0
  111. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/api/__init__.py +0 -0
  112. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/cli.py +0 -0
  113. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/constants.py +0 -0
  114. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/__init__.py +0 -0
  115. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/evaluation_run.py +0 -0
  116. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/example.py +0 -0
  117. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/result.py +0 -0
  118. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/scorer_data.py +0 -0
  119. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  120. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  121. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/data/trace.py +0 -0
  122. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/dataset/__init__.py +0 -0
  123. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/env.py +0 -0
  124. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/evaluation/__init__.py +0 -0
  125. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/exceptions.py +0 -0
  126. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/integrations/langgraph/__init__.py +0 -0
  127. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/integrations/openlit/__init__.py +0 -0
  128. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/judges/__init__.py +0 -0
  129. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/judges/base_judge.py +0 -0
  130. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/judges/litellm_judge.py +0 -0
  131. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/judges/together_judge.py +0 -0
  132. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/judges/utils.py +0 -0
  133. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/logger.py +0 -0
  134. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/__init__.py +0 -0
  135. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/agent_scorer.py +0 -0
  136. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/api_scorer.py +0 -0
  137. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/base_scorer.py +0 -0
  138. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/example_scorer.py +0 -0
  139. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/exceptions.py +0 -0
  140. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  141. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
  142. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  143. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  144. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  145. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  146. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
  147. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/score.py +0 -0
  148. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/scorers/utils.py +0 -0
  149. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/constants.py +0 -0
  150. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/exporters/__init__.py +0 -0
  151. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/exporters/s3.py +0 -0
  152. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/exporters/store.py +0 -0
  153. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/exporters/utils.py +0 -0
  154. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/keys.py +0 -0
  155. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/llm/__init__.py +0 -0
  156. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/llm/llm_anthropic/__init__.py +0 -0
  157. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/llm/llm_openai/__init__.py +0 -0
  158. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/managers.py +0 -0
  159. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/processors/__init__.py +0 -0
  160. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/tracer/utils.py +0 -0
  161. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/trainer/__init__.py +0 -0
  162. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/trainer/config.py +0 -0
  163. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/trainer/console.py +0 -0
  164. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/trainer/trainable_model.py +0 -0
  165. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/trainer/trainer.py +0 -0
  166. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/async_utils.py +0 -0
  167. {judgeval-0.16.7/src/judgeval/tracer/llm/llm_google → judgeval-0.16.8/src/judgeval/utils/decorators}/__init__.py +0 -0
  168. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/decorators/use_once.py +0 -0
  169. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/file_utils.py +0 -0
  170. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/guards.py +0 -0
  171. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/meta.py +0 -0
  172. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/serialize.py +0 -0
  173. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/testing.py +0 -0
  174. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/url.py +0 -0
  175. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/utils/version_check.py +0 -0
  176. /judgeval-0.16.7/src/judgeval/tracer/llm/llm_groq/__init__.py → /judgeval-0.16.8/src/judgeval/utils/wrappers/py.typed +0 -0
  177. {judgeval-0.16.7 → judgeval-0.16.8}/src/judgeval/warnings.py +0 -0
  178. {judgeval-0.16.7 → judgeval-0.16.8}/update_version.py +0 -0
@@ -28,6 +28,8 @@ jobs:
28
28
  PYTHONPATH: "."
29
29
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
30
30
  TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
31
+ GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
32
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
31
33
  JUDGMENT_DEV: true
32
34
 
33
35
  steps:
@@ -49,7 +51,7 @@ jobs:
49
51
  cd src
50
52
  export JUDGMENT_API_KEY="$JUDGEVAL_GH_JUDGMENT_API_KEY"
51
53
  export JUDGMENT_ORG_ID="$JUDGEVAL_GH_JUDGMENT_ORG_ID"
52
- uv run pytest tests
54
+ uv run pytest tests -n auto
53
55
 
54
56
  run-e2e-tests:
55
57
  needs: [validate-branch]
@@ -1,11 +1,11 @@
1
1
  repos:
2
2
  - repo: https://github.com/astral-sh/uv-pre-commit
3
- rev: 0.8.23
3
+ rev: 0.9.2
4
4
  hooks:
5
5
  - id: uv-lock
6
6
 
7
7
  - repo: https://github.com/astral-sh/ruff-pre-commit
8
- rev: v0.13.3
8
+ rev: v0.14.0
9
9
  hooks:
10
10
  - id: ruff
11
11
  name: ruff (linter)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.16.7
3
+ Version: 0.16.8
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "judgeval"
3
- version = "0.16.7"
3
+ version = "0.16.8"
4
4
  authors = [
5
5
  { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
6
6
  { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
@@ -19,7 +19,7 @@ license-files = ["LICENSE.md"]
19
19
  dependencies = [
20
20
  "dotenv",
21
21
  "httpx>=0.28.1",
22
- "litellm>=1.75.0",
22
+ "litellm>=1.75.0",
23
23
  "opentelemetry-exporter-otlp>=1.36.0",
24
24
  "opentelemetry-sdk>=1.36.0",
25
25
  "orjson>=3.9.0",
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-09T00:16:42+00:00
3
+ # timestamp: 2025-10-15T19:25:00+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -94,7 +94,6 @@ class ResolveProjectNameRequest(TypedDict):
94
94
 
95
95
  class ResolveProjectNameResponse(TypedDict):
96
96
  project_id: str
97
- project_created: bool
98
97
 
99
98
 
100
99
  class TraceIdRequest(TypedDict):
@@ -1,6 +1,6 @@
1
1
  # generated by datamodel-codegen:
2
2
  # filename: .openapi.json
3
- # timestamp: 2025-10-09T00:16:41+00:00
3
+ # timestamp: 2025-10-15T19:24:59+00:00
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Annotated, Any, Dict, List, Optional, Union
@@ -101,7 +101,6 @@ class ResolveProjectNameRequest(BaseModel):
101
101
 
102
102
  class ResolveProjectNameResponse(BaseModel):
103
103
  project_id: Annotated[str, Field(title="Project Id")]
104
- project_created: Annotated[bool, Field(title="Project Created")]
105
104
 
106
105
 
107
106
  class TraceIdRequest(BaseModel):
@@ -66,7 +66,6 @@ from judgeval.tracer.keys import AttributeKeys, InternalAttributeKeys
66
66
  from judgeval.api import JudgmentSyncClient
67
67
  from judgeval.tracer.llm import wrap_provider
68
68
  from judgeval.utils.url import url_for
69
- from judgeval.tracer.local_eval_queue import LocalEvaluationQueue
70
69
  from judgeval.tracer.processors import (
71
70
  JudgmentSpanProcessor,
72
71
  NoOpJudgmentSpanProcessor,
@@ -99,7 +98,6 @@ class Tracer(metaclass=SingletonMeta):
99
98
  "enable_evaluation",
100
99
  "resource_attributes",
101
100
  "api_client",
102
- "local_eval_queue",
103
101
  "judgment_processor",
104
102
  "tracer",
105
103
  "agent_context",
@@ -113,7 +111,6 @@ class Tracer(metaclass=SingletonMeta):
113
111
  enable_evaluation: bool
114
112
  resource_attributes: Optional[Dict[str, Any]]
115
113
  api_client: JudgmentSyncClient
116
- local_eval_queue: LocalEvaluationQueue
117
114
  judgment_processor: JudgmentSpanProcessor
118
115
  tracer: ABCTracer
119
116
  agent_context: ContextVar[Optional[AgentContext]]
@@ -148,7 +145,6 @@ class Tracer(metaclass=SingletonMeta):
148
145
  api_key=self.api_key,
149
146
  organization_id=self.organization_id,
150
147
  )
151
- self.local_eval_queue = LocalEvaluationQueue()
152
148
 
153
149
  if initialize:
154
150
  self.initialize()
@@ -159,14 +155,10 @@ class Tracer(metaclass=SingletonMeta):
159
155
 
160
156
  self.judgment_processor = NoOpJudgmentSpanProcessor()
161
157
  if self.enable_monitoring:
162
- project_id, project_created = Tracer._resolve_project_id(
158
+ project_id = Tracer._resolve_project_id(
163
159
  self.project_name, self.api_key, self.organization_id
164
- ) or (None, False)
160
+ )
165
161
  if project_id:
166
- if project_created:
167
- judgeval_logger.info(
168
- f"Project {self.project_name} was autocreated successfully."
169
- )
170
162
  self.judgment_processor = self.get_processor(
171
163
  tracer=self,
172
164
  project_name=self.project_name,
@@ -190,9 +182,6 @@ class Tracer(metaclass=SingletonMeta):
190
182
  get_version(),
191
183
  )
192
184
 
193
- if self.enable_evaluation and self.enable_monitoring:
194
- self.local_eval_queue.start_workers()
195
-
196
185
  self._initialized = True
197
186
  atexit.register(self._atexit_flush)
198
187
  return self
@@ -240,14 +229,14 @@ class Tracer(metaclass=SingletonMeta):
240
229
  @staticmethod
241
230
  def _resolve_project_id(
242
231
  project_name: str, api_key: str, organization_id: str
243
- ) -> Tuple[str, bool]:
232
+ ) -> str:
244
233
  """Resolve project_id from project_name using the API."""
245
234
  client = JudgmentSyncClient(
246
235
  api_key=api_key,
247
236
  organization_id=organization_id,
248
237
  )
249
238
  response = client.projects_resolve({"project_name": project_name})
250
- return response["project_id"], response["project_created"]
239
+ return response["project_id"]
251
240
 
252
241
  def get_current_span(self):
253
242
  return get_current_span()
@@ -299,6 +288,7 @@ class Tracer(metaclass=SingletonMeta):
299
288
  )
300
289
  current_agent_context["is_agent_entry_point"] = False
301
290
 
291
+ @dont_throw
302
292
  def record_instance_state(self, record_point: Literal["before", "after"], span):
303
293
  current_agent_context = self.agent_context.get()
304
294
 
@@ -955,45 +945,10 @@ class Tracer(metaclass=SingletonMeta):
955
945
  eval_run.model_dump(warnings=False) # type: ignore
956
946
  )
957
947
  else:
958
- # Enqueue the evaluation run to the local evaluation queue
959
- self.local_eval_queue.enqueue(eval_run)
960
-
961
- def wait_for_completion(self, timeout: Optional[float] = 30.0) -> bool:
962
- """Wait for all evaluations and span processing to complete.
963
-
964
- This method blocks until all queued evaluations are processed and
965
- all pending spans are flushed to the server.
966
-
967
- Args:
968
- timeout: Maximum time to wait in seconds. Defaults to 30 seconds.
969
- None means wait indefinitely.
970
-
971
- Returns:
972
- True if all processing completed within the timeout, False otherwise.
973
-
974
- """
975
- try:
976
- judgeval_logger.debug(
977
- "Waiting for all evaluations and spans to complete..."
948
+ judgeval_logger.warning(
949
+ "The scorer provided is not hosted, skipping evaluation."
978
950
  )
979
951
 
980
- # Wait for all queued evaluation work to complete
981
- eval_completed = self.local_eval_queue.wait_for_completion()
982
- if not eval_completed:
983
- judgeval_logger.warning(
984
- f"Local evaluation queue did not complete within {timeout} seconds"
985
- )
986
- return False
987
-
988
- self.force_flush()
989
-
990
- judgeval_logger.debug("All evaluations and spans completed successfully")
991
- return True
992
-
993
- except Exception as e:
994
- judgeval_logger.warning(f"Error while waiting for completion: {e}")
995
- return False
996
-
997
952
 
998
953
  def wrap(client: ApiClient) -> ApiClient:
999
954
  try:
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+ from judgeval.logger import judgeval_logger
4
+
5
+ from judgeval.tracer.llm.constants import ProviderType
6
+ from judgeval.tracer.llm.providers import (
7
+ HAS_OPENAI,
8
+ HAS_TOGETHER,
9
+ HAS_ANTHROPIC,
10
+ HAS_GOOGLE_GENAI,
11
+ ApiClient,
12
+ )
13
+
14
+ if TYPE_CHECKING:
15
+ from judgeval.tracer import Tracer
16
+
17
+
18
+ def _detect_provider(client: ApiClient) -> ProviderType:
19
+ if HAS_OPENAI:
20
+ from openai import OpenAI, AsyncOpenAI
21
+
22
+ if isinstance(client, (OpenAI, AsyncOpenAI)):
23
+ return ProviderType.OPENAI
24
+
25
+ if HAS_ANTHROPIC:
26
+ from anthropic import Anthropic, AsyncAnthropic
27
+
28
+ if isinstance(client, (Anthropic, AsyncAnthropic)):
29
+ return ProviderType.ANTHROPIC
30
+
31
+ if HAS_TOGETHER:
32
+ from together import Together, AsyncTogether # type: ignore[import-untyped]
33
+
34
+ if isinstance(client, (Together, AsyncTogether)):
35
+ return ProviderType.TOGETHER
36
+
37
+ if HAS_GOOGLE_GENAI:
38
+ from google.genai import Client as GoogleClient
39
+
40
+ if isinstance(client, GoogleClient):
41
+ return ProviderType.GOOGLE
42
+
43
+ judgeval_logger.warning(
44
+ f"Unknown client type {type(client)}, Trying to wrap as OpenAI-compatible. "
45
+ "If this is a mistake or you think we should support this client, please file an issue at https://github.com/JudgmentLabs/judgeval/issues!"
46
+ )
47
+
48
+ return ProviderType.DEFAULT
49
+
50
+
51
+ def wrap_provider(tracer: Tracer, client: ApiClient) -> ApiClient:
52
+ """
53
+ Wraps an API client to add tracing capabilities.
54
+ Supports OpenAI, Together, Anthropic, and Google GenAI clients.
55
+ """
56
+ provider_type = _detect_provider(client)
57
+
58
+ if provider_type == ProviderType.OPENAI:
59
+ from .llm_openai.wrapper import wrap_openai_client
60
+
61
+ return wrap_openai_client(tracer, client)
62
+ elif provider_type == ProviderType.ANTHROPIC:
63
+ from .llm_anthropic.wrapper import wrap_anthropic_client
64
+
65
+ return wrap_anthropic_client(tracer, client)
66
+ elif provider_type == ProviderType.TOGETHER:
67
+ from .llm_together.wrapper import wrap_together_client
68
+
69
+ return wrap_together_client(tracer, client)
70
+ elif provider_type == ProviderType.GOOGLE:
71
+ from .llm_google.wrapper import wrap_google_client
72
+
73
+ return wrap_google_client(tracer, client)
74
+ else:
75
+ # Default to OpenAI-compatible wrapping for unknown clients
76
+ from .llm_openai.wrapper import wrap_openai_client
77
+
78
+ return wrap_openai_client(tracer, client)
@@ -6,5 +6,4 @@ class ProviderType(Enum):
6
6
  ANTHROPIC = "anthropic"
7
7
  TOGETHER = "together"
8
8
  GOOGLE = "google"
9
- GROQ = "groq"
10
9
  DEFAULT = "default"
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+ import importlib.util
3
+
4
+ HAS_ANTHROPIC = importlib.util.find_spec("anthropic") is not None
5
+
6
+ __all__ = ["HAS_ANTHROPIC"]