judgeval 0.7.1__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. judgeval-0.9.0/.github/workflows/claude-code-review.yml +35 -0
  2. judgeval-0.9.0/.github/workflows/claude.yml +40 -0
  3. {judgeval-0.7.1 → judgeval-0.9.0}/PKG-INFO +12 -14
  4. judgeval-0.9.0/pyproject.toml +94 -0
  5. judgeval-0.9.0/scripts/api_generator.py +360 -0
  6. judgeval-0.9.0/scripts/openapi_transform.py +122 -0
  7. judgeval-0.9.0/scripts/update_types.sh +35 -0
  8. judgeval-0.9.0/src/judgeval/__init__.py +142 -0
  9. judgeval-0.9.0/src/judgeval/api/__init__.py +501 -0
  10. judgeval-0.9.0/src/judgeval/api/api_types.py +344 -0
  11. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/cli.py +2 -4
  12. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/constants.py +10 -26
  13. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/evaluation_run.py +49 -26
  14. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/example.py +2 -2
  15. judgeval-0.9.0/src/judgeval/data/judgment_types.py +398 -0
  16. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/result.py +4 -5
  17. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/scorer_data.py +4 -2
  18. judgeval-0.9.0/src/judgeval/data/tool.py +5 -0
  19. judgeval-0.9.0/src/judgeval/data/trace.py +40 -0
  20. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/trace_run.py +7 -4
  21. judgeval-0.7.1/src/judgeval/dataset.py → judgeval-0.9.0/src/judgeval/dataset/__init__.py +43 -28
  22. judgeval-0.9.0/src/judgeval/env.py +67 -0
  23. judgeval-0.7.1/src/judgeval/run_evaluation.py → judgeval-0.9.0/src/judgeval/evaluation/__init__.py +29 -95
  24. judgeval-0.9.0/src/judgeval/exceptions.py +27 -0
  25. judgeval-0.9.0/src/judgeval/integrations/langgraph/__init__.py +788 -0
  26. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/judges/__init__.py +2 -2
  27. judgeval-0.9.0/src/judgeval/judges/litellm_judge.py +129 -0
  28. judgeval-0.9.0/src/judgeval/judges/together_judge.py +136 -0
  29. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/judges/utils.py +7 -21
  30. {judgeval-0.7.1/src/judgeval/common → judgeval-0.9.0/src/judgeval}/logger.py +8 -6
  31. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/__init__.py +0 -4
  32. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/agent_scorer.py +3 -7
  33. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/api_scorer.py +8 -13
  34. judgeval-0.9.0/src/judgeval/scorers/base_scorer.py +98 -0
  35. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/example_scorer.py +1 -3
  36. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
  37. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
  38. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
  39. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
  40. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/score.py +21 -31
  41. judgeval-0.9.0/src/judgeval/scorers/trace_api_scorer.py +5 -0
  42. judgeval-0.9.0/src/judgeval/scorers/utils.py +17 -0
  43. judgeval-0.9.0/src/judgeval/tracer/__init__.py +1076 -0
  44. judgeval-0.9.0/src/judgeval/tracer/constants.py +1 -0
  45. judgeval-0.9.0/src/judgeval/tracer/exporters/__init__.py +37 -0
  46. judgeval-0.9.0/src/judgeval/tracer/exporters/s3.py +119 -0
  47. judgeval-0.9.0/src/judgeval/tracer/exporters/store.py +43 -0
  48. judgeval-0.9.0/src/judgeval/tracer/exporters/utils.py +32 -0
  49. judgeval-0.9.0/src/judgeval/tracer/keys.py +67 -0
  50. judgeval-0.9.0/src/judgeval/tracer/llm/__init__.py +1233 -0
  51. {judgeval-0.7.1/src/judgeval/common/tracer → judgeval-0.9.0/src/judgeval/tracer/llm}/providers.py +5 -10
  52. {judgeval-0.7.1/src/judgeval → judgeval-0.9.0/src/judgeval/tracer}/local_eval_queue.py +15 -10
  53. judgeval-0.9.0/src/judgeval/tracer/managers.py +188 -0
  54. judgeval-0.9.0/src/judgeval/tracer/processors/__init__.py +181 -0
  55. judgeval-0.9.0/src/judgeval/tracer/utils.py +20 -0
  56. judgeval-0.9.0/src/judgeval/trainer/__init__.py +5 -0
  57. {judgeval-0.7.1/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/config.py +12 -9
  58. {judgeval-0.7.1/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/console.py +2 -9
  59. {judgeval-0.7.1/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/trainable_model.py +12 -7
  60. {judgeval-0.7.1/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/trainer.py +119 -17
  61. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/utils/async_utils.py +2 -3
  62. judgeval-0.9.0/src/judgeval/utils/decorators.py +24 -0
  63. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/utils/file_utils.py +37 -4
  64. judgeval-0.9.0/src/judgeval/utils/guards.py +32 -0
  65. judgeval-0.9.0/src/judgeval/utils/meta.py +14 -0
  66. judgeval-0.7.1/src/judgeval/common/api/json_encoder.py → judgeval-0.9.0/src/judgeval/utils/serialize.py +7 -1
  67. judgeval-0.9.0/src/judgeval/utils/testing.py +88 -0
  68. judgeval-0.9.0/src/judgeval/utils/url.py +10 -0
  69. {judgeval-0.7.1/src/judgeval → judgeval-0.9.0/src/judgeval/utils}/version_check.py +3 -3
  70. judgeval-0.9.0/src/judgeval/version.py +5 -0
  71. judgeval-0.9.0/src/judgeval/warnings.py +4 -0
  72. judgeval-0.9.0/uv.lock +3941 -0
  73. judgeval-0.7.1/pyproject.toml +0 -109
  74. judgeval-0.7.1/src/.coveragerc +0 -4
  75. judgeval-0.7.1/src/judgeval/__init__.py +0 -15
  76. judgeval-0.7.1/src/judgeval/clients.py +0 -35
  77. judgeval-0.7.1/src/judgeval/common/__init__.py +0 -13
  78. judgeval-0.7.1/src/judgeval/common/api/__init__.py +0 -3
  79. judgeval-0.7.1/src/judgeval/common/api/api.py +0 -375
  80. judgeval-0.7.1/src/judgeval/common/api/constants.py +0 -186
  81. judgeval-0.7.1/src/judgeval/common/exceptions.py +0 -27
  82. judgeval-0.7.1/src/judgeval/common/storage/__init__.py +0 -6
  83. judgeval-0.7.1/src/judgeval/common/storage/s3_storage.py +0 -97
  84. judgeval-0.7.1/src/judgeval/common/tracer/__init__.py +0 -31
  85. judgeval-0.7.1/src/judgeval/common/tracer/constants.py +0 -22
  86. judgeval-0.7.1/src/judgeval/common/tracer/core.py +0 -2427
  87. judgeval-0.7.1/src/judgeval/common/tracer/otel_exporter.py +0 -108
  88. judgeval-0.7.1/src/judgeval/common/tracer/otel_span_processor.py +0 -188
  89. judgeval-0.7.1/src/judgeval/common/tracer/span_processor.py +0 -37
  90. judgeval-0.7.1/src/judgeval/common/tracer/span_transformer.py +0 -207
  91. judgeval-0.7.1/src/judgeval/common/tracer/trace_manager.py +0 -101
  92. judgeval-0.7.1/src/judgeval/common/trainer/__init__.py +0 -5
  93. judgeval-0.7.1/src/judgeval/common/utils.py +0 -948
  94. judgeval-0.7.1/src/judgeval/data/judgment_types.py +0 -214
  95. judgeval-0.7.1/src/judgeval/data/tool.py +0 -5
  96. judgeval-0.7.1/src/judgeval/data/trace.py +0 -83
  97. judgeval-0.7.1/src/judgeval/integrations/langgraph.py +0 -844
  98. judgeval-0.7.1/src/judgeval/judges/litellm_judge.py +0 -69
  99. judgeval-0.7.1/src/judgeval/judges/mixture_of_judges.py +0 -287
  100. judgeval-0.7.1/src/judgeval/judges/together_judge.py +0 -68
  101. judgeval-0.7.1/src/judgeval/judgment_client.py +0 -267
  102. judgeval-0.7.1/src/judgeval/rules.py +0 -521
  103. judgeval-0.7.1/src/judgeval/scorers/base_scorer.py +0 -78
  104. judgeval-0.7.1/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  105. judgeval-0.7.1/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  106. judgeval-0.7.1/src/judgeval/scorers/utils.py +0 -119
  107. judgeval-0.7.1/src/judgeval/tracer/__init__.py +0 -3
  108. judgeval-0.7.1/src/judgeval/utils/alerts.py +0 -93
  109. judgeval-0.7.1/src/judgeval/utils/requests.py +0 -50
  110. judgeval-0.7.1/src/update_types.sh +0 -14
  111. judgeval-0.7.1/uv.lock +0 -4562
  112. {judgeval-0.7.1 → judgeval-0.9.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  113. {judgeval-0.7.1 → judgeval-0.9.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  114. {judgeval-0.7.1 → judgeval-0.9.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  115. {judgeval-0.7.1 → judgeval-0.9.0}/.github/pull_request_template.md +0 -0
  116. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/blocked-pr.yaml +0 -0
  117. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/ci.yaml +0 -0
  118. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/lint.yaml +0 -0
  119. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/merge-branch-check.yaml +0 -0
  120. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/mypy.yaml +0 -0
  121. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
  122. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/release.yaml +0 -0
  123. {judgeval-0.7.1 → judgeval-0.9.0}/.github/workflows/validate-branch.yaml +0 -0
  124. {judgeval-0.7.1 → judgeval-0.9.0}/.gitignore +0 -0
  125. {judgeval-0.7.1 → judgeval-0.9.0}/.pre-commit-config.yaml +0 -0
  126. {judgeval-0.7.1 → judgeval-0.9.0}/LICENSE.md +0 -0
  127. {judgeval-0.7.1 → judgeval-0.9.0}/README.md +0 -0
  128. {judgeval-0.7.1 → judgeval-0.9.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
  129. {judgeval-0.7.1 → judgeval-0.9.0}/assets/agent.gif +0 -0
  130. {judgeval-0.7.1 → judgeval-0.9.0}/assets/agent_trace_example.png +0 -0
  131. {judgeval-0.7.1 → judgeval-0.9.0}/assets/data.gif +0 -0
  132. {judgeval-0.7.1 → judgeval-0.9.0}/assets/dataset_clustering_screenshot.png +0 -0
  133. {judgeval-0.7.1 → judgeval-0.9.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
  134. {judgeval-0.7.1 → judgeval-0.9.0}/assets/datasets_preview_screenshot.png +0 -0
  135. {judgeval-0.7.1 → judgeval-0.9.0}/assets/document.gif +0 -0
  136. {judgeval-0.7.1 → judgeval-0.9.0}/assets/error_analysis_dashboard.png +0 -0
  137. {judgeval-0.7.1 → judgeval-0.9.0}/assets/errors.png +0 -0
  138. {judgeval-0.7.1 → judgeval-0.9.0}/assets/experiments_dashboard_screenshot.png +0 -0
  139. {judgeval-0.7.1 → judgeval-0.9.0}/assets/experiments_page.png +0 -0
  140. {judgeval-0.7.1 → judgeval-0.9.0}/assets/experiments_pagev2.png +0 -0
  141. {judgeval-0.7.1 → judgeval-0.9.0}/assets/logo-dark.svg +0 -0
  142. {judgeval-0.7.1 → judgeval-0.9.0}/assets/logo-light.svg +0 -0
  143. {judgeval-0.7.1 → judgeval-0.9.0}/assets/monitoring_screenshot.png +0 -0
  144. {judgeval-0.7.1 → judgeval-0.9.0}/assets/new_darkmode.svg +0 -0
  145. {judgeval-0.7.1 → judgeval-0.9.0}/assets/new_lightmode.svg +0 -0
  146. {judgeval-0.7.1 → judgeval-0.9.0}/assets/online_eval.png +0 -0
  147. {judgeval-0.7.1 → judgeval-0.9.0}/assets/product_shot.png +0 -0
  148. {judgeval-0.7.1 → judgeval-0.9.0}/assets/test.png +0 -0
  149. {judgeval-0.7.1 → judgeval-0.9.0}/assets/tests.png +0 -0
  150. {judgeval-0.7.1 → judgeval-0.9.0}/assets/trace.gif +0 -0
  151. {judgeval-0.7.1 → judgeval-0.9.0}/assets/trace_demo.png +0 -0
  152. {judgeval-0.7.1 → judgeval-0.9.0}/assets/trace_screenshot.png +0 -0
  153. {judgeval-0.7.1 → judgeval-0.9.0}/assets/trace_screenshot_old.png +0 -0
  154. {judgeval-0.7.1 → judgeval-0.9.0}/pytest.ini +0 -0
  155. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/__init__.py +0 -0
  156. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
  157. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
  158. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/judges/base_judge.py +0 -0
  159. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/exceptions.py +0 -0
  160. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
  161. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
  162. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
  163. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
  164. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
  165. {judgeval-0.7.1 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
  166. {judgeval-0.7.1 → judgeval-0.9.0}/update_version.py +0 -0
@@ -0,0 +1,35 @@
1
+ name: Claude Code Review
2
+
3
+ on:
4
+ issue_comment:
5
+ types: [created]
6
+ jobs:
7
+ claude-review:
8
+ if: github.event.issue.pull_request && contains(github.event.comment.body, '/claude review')
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ contents: read
12
+ pull-requests: read
13
+ issues: read
14
+ id-token: write
15
+
16
+ steps:
17
+ - name: Checkout repository
18
+ uses: actions/checkout@v4
19
+ with:
20
+ fetch-depth: 1
21
+
22
+ - name: Run Claude Code Review
23
+ id: claude-review
24
+ uses: anthropics/claude-code-action@beta
25
+ with:
26
+ anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
27
+ direct_prompt: |
28
+ Please review this pull request and provide feedback on:
29
+ - Code quality and best practices
30
+ - Potential bugs or issues
31
+ - Performance considerations
32
+ - Security concerns
33
+ - Test coverage
34
+
35
+ Be constructive and helpful in your feedback.
@@ -0,0 +1,40 @@
1
+ name: Claude Code
2
+
3
+ on:
4
+ issue_comment:
5
+ types: [created]
6
+ pull_request_review_comment:
7
+ types: [created]
8
+ issues:
9
+ types: [opened, assigned]
10
+ pull_request_review:
11
+ types: [submitted]
12
+
13
+ jobs:
14
+ claude:
15
+ if: |
16
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
17
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
18
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
19
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20
+ runs-on: ubuntu-latest
21
+ permissions:
22
+ contents: read
23
+ pull-requests: read
24
+ issues: read
25
+ id-token: write
26
+ actions: read
27
+ steps:
28
+ - name: Checkout repository
29
+ uses: actions/checkout@v4
30
+ with:
31
+ fetch-depth: 1
32
+
33
+ - name: Run Claude Code
34
+ id: claude
35
+ uses: anthropics/claude-code-action@beta
36
+ with:
37
+ anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
38
+
39
+ additional_permissions: |
40
+ actions: read
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.7.1
3
+ Version: 0.9.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -10,27 +10,25 @@ License-File: LICENSE.md
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
- Requires-Dist: boto3
13
+ Requires-Dist: boto3>=1.40.11
14
14
  Requires-Dist: click<8.2.0
15
- Requires-Dist: fireworks-ai>=0.19.18
16
- Requires-Dist: langchain-anthropic
17
- Requires-Dist: langchain-core
18
- Requires-Dist: langchain-huggingface
19
- Requires-Dist: langchain-openai
20
- Requires-Dist: litellm>=1.61.15
21
- Requires-Dist: nest-asyncio>=1.6.0
22
- Requires-Dist: opentelemetry-api>=1.34.1
23
- Requires-Dist: opentelemetry-sdk>=1.34.1
15
+ Requires-Dist: dotenv
16
+ Requires-Dist: httpx>=0.28.1
17
+ Requires-Dist: litellm<1.75.0
18
+ Requires-Dist: opentelemetry-exporter-otlp>=1.36.0
19
+ Requires-Dist: opentelemetry-sdk>=1.36.0
20
+ Requires-Dist: opentelemetry-semantic-conventions>=0.57b0
24
21
  Requires-Dist: orjson>=3.9.0
25
- Requires-Dist: python-dotenv
26
- Requires-Dist: requests
27
- Requires-Dist: rich
28
22
  Requires-Dist: typer>=0.9.0
29
23
  Provides-Extra: langchain
30
24
  Requires-Dist: langchain-anthropic; extra == 'langchain'
31
25
  Requires-Dist: langchain-core; extra == 'langchain'
32
26
  Requires-Dist: langchain-huggingface; extra == 'langchain'
33
27
  Requires-Dist: langchain-openai; extra == 'langchain'
28
+ Provides-Extra: s3
29
+ Requires-Dist: boto3>=1.40.11; extra == 's3'
30
+ Provides-Extra: trainer
31
+ Requires-Dist: fireworks-ai>=0.19.18; extra == 'trainer'
34
32
  Description-Content-Type: text/markdown
35
33
 
36
34
  <div align="center">
@@ -0,0 +1,94 @@
1
+ [project]
2
+ name = "judgeval"
3
+ version = "0.9.0"
4
+ authors = [
5
+ { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
6
+ { name = "Alex Shan", email = "alex@judgmentlabs.ai" },
7
+ { name = "Joseph Camyre", email = "joseph@judgmentlabs.ai" },
8
+ ]
9
+ description = "Judgeval Package"
10
+ readme = "README.md"
11
+ requires-python = ">=3.11"
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "Operating System :: OS Independent",
15
+ ]
16
+ license = "Apache-2.0"
17
+ license-files = ["LICENSE.md"]
18
+
19
+ dependencies = [
20
+ "dotenv",
21
+ "httpx>=0.28.1",
22
+ "litellm<1.75.0", # https://github.com/BerriAI/litellm/issues/13081
23
+ "opentelemetry-exporter-otlp>=1.36.0",
24
+ "opentelemetry-sdk>=1.36.0",
25
+ "opentelemetry-semantic-conventions>=0.57b0",
26
+ "orjson>=3.9.0",
27
+ "click<8.2.0",
28
+ "typer>=0.9.0",
29
+ "boto3>=1.40.11",
30
+ ]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/JudgmentLabs/judgeval"
34
+ Issues = "https://github.com/JudgmentLabs/judgeval/issues"
35
+
36
+ [project.scripts]
37
+ judgeval = "judgeval.cli:app"
38
+
39
+ [build-system]
40
+ requires = ["hatchling"]
41
+ build-backend = "hatchling.build"
42
+
43
+ [tool.hatch.build.targets.wheel]
44
+ packages = ["src/judgeval"]
45
+ include = ["/src/judgeval", "/src/judgeval/**/*.py"]
46
+
47
+ [project.optional-dependencies]
48
+ langchain = [
49
+ "langchain-huggingface",
50
+ "langchain-openai",
51
+ "langchain-anthropic",
52
+ "langchain-core",
53
+ ]
54
+ s3 = ["boto3>=1.40.11"]
55
+ trainer = ["fireworks-ai>=0.19.18"]
56
+
57
+
58
+ [dependency-groups]
59
+ dev = [
60
+ "anthropic>=0.61.0",
61
+ "boto3-stubs[s3]>=1.40.11",
62
+ "datamodel-code-generator>=0.32.0",
63
+ "google-genai>=1.28.0",
64
+ "groq>=0.30.0",
65
+ "langchain-core>=0.3.72",
66
+ "langgraph>=0.6.4",
67
+ "mypy>=1.17.1",
68
+ "openai>=1.78.1",
69
+ "opentelemetry-instrumentation-openai>=0.44.1",
70
+ "ruff>=0.9.1,<0.10.0",
71
+ "together>=1.5.21",
72
+ "types-pyyaml>=6.0.12.20250516",
73
+ "pre-commit>=4.2.0",
74
+ "pytest>=8.4.1",
75
+ "pytest-cov>=6.2.1",
76
+ "types-tqdm>=4.67.0.20250809",
77
+ "pytest-asyncio>=1.1.0",
78
+ ]
79
+
80
+
81
+ [tool.hatch.build]
82
+ directory = "dist"
83
+ artifacts = ["src/judgeval/**/*.py"]
84
+ exclude = ["src/e2etests/*", "src/tests/*", "src/demo/*"]
85
+
86
+ [tool.ruff]
87
+ exclude = ["docs"]
88
+
89
+ [tool.ruff.lint]
90
+ ignore = [
91
+ "F403",
92
+ "F405",
93
+ "E402",
94
+ ] # F403: star import, F405: undefined name from star import
@@ -0,0 +1,360 @@
1
+ from __future__ import annotations
2
+
3
+ import orjson
4
+ import sys
5
+ from typing import Any, Dict, List, Optional
6
+ import httpx
7
+ import re
8
+
9
+ spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"
10
+
11
+ if spec_file.startswith("http"):
12
+ r = httpx.get(spec_file)
13
+ r.raise_for_status()
14
+ SPEC = r.json()
15
+ else:
16
+ with open(spec_file, "rb") as f:
17
+ SPEC = orjson.loads(f.read())
18
+
19
+ JUDGEVAL_PATHS: List[str] = [
20
+ "/traces/spans/batch/",
21
+ "/traces/evaluation_runs/batch/",
22
+ "/traces/fetch/",
23
+ "/traces/upsert/",
24
+ "/traces/add_to_dataset/",
25
+ "/projects/add/",
26
+ "/projects/delete_from_judgeval/",
27
+ "/evaluate/traces",
28
+ "/evaluate/examples",
29
+ "/evaluate_trace/",
30
+ "/log_eval_results/",
31
+ "/fetch_experiment_run/",
32
+ "/add_to_run_eval_queue/examples",
33
+ "/add_to_run_eval_queue/traces",
34
+ "/get_evaluation_status/",
35
+ "/save_scorer/",
36
+ "/fetch_scorer/",
37
+ "/scorer_exists/",
38
+ "/upload_custom_scorer/",
39
+ "/datasets/push/",
40
+ "/datasets/insert_examples/",
41
+ "/datasets/pull_for_judgeval/",
42
+ "/datasets/fetch_stats_by_project/",
43
+ "/projects/resolve/",
44
+ "/e2e_fetch_trace/",
45
+ "/e2e_fetch_span_score/",
46
+ ]
47
+
48
+
49
+ def resolve_ref(ref: str) -> str:
50
+ assert ref.startswith("#/components/schemas/"), (
51
+ "Reference must start with #/components/schemas/"
52
+ )
53
+ return ref.replace("#/components/schemas/", "")
54
+
55
+
56
+ def to_snake_case(name: str) -> str:
57
+ name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
58
+ return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
59
+
60
+
61
+ def get_method_name_from_path(path: str, method: str) -> str:
62
+ return path.strip("/").replace("/", "_").replace("-", "_")
63
+
64
+
65
+ def get_query_parameters(operation: Dict[str, Any]) -> List[Dict[str, Any]]:
66
+ """Extract query parameters from the operation."""
67
+ parameters = operation.get("parameters", [])
68
+ query_params = []
69
+
70
+ for param in parameters:
71
+ if param.get("in") == "query":
72
+ param_info = {
73
+ "name": param["name"],
74
+ "required": param.get("required", False),
75
+ "type": param.get("schema", {}).get("type", "str"),
76
+ }
77
+ query_params.append(param_info)
78
+
79
+ return query_params
80
+
81
+
82
+ def get_request_schema(operation: Dict[str, Any]) -> Optional[str]:
83
+ request_body = operation.get("requestBody", {})
84
+ if not request_body:
85
+ return None
86
+
87
+ content = request_body.get("content", {})
88
+ if "application/json" in content:
89
+ schema = content["application/json"].get("schema", {})
90
+ if "$ref" in schema:
91
+ return resolve_ref(schema["$ref"])
92
+
93
+ return None
94
+
95
+
96
+ def get_response_schema(operation: Dict[str, Any]) -> Optional[str]:
97
+ responses = operation.get("responses", {})
98
+ for status_code in ["200", "201"]:
99
+ if status_code in responses:
100
+ response = responses[status_code]
101
+ content = response.get("content", {})
102
+ if "application/json" in content:
103
+ schema = content["application/json"].get("schema", {})
104
+ if "$ref" in schema:
105
+ return resolve_ref(schema["$ref"])
106
+
107
+ return None
108
+
109
+
110
+ def generate_method_signature(
111
+ method_name: str,
112
+ request_type: Optional[str],
113
+ query_params: List[Dict[str, Any]],
114
+ response_type: str,
115
+ is_async: bool = False,
116
+ ) -> str:
117
+ async_prefix = "async " if is_async else ""
118
+
119
+ params = ["self"]
120
+
121
+ # Add required query parameters first
122
+ for param in query_params:
123
+ if param["required"]:
124
+ param_name = param["name"]
125
+ param_type = "str" # Default to str for simplicity
126
+ params.append(f"{param_name}: {param_type}")
127
+
128
+ # Add request body parameter if it exists
129
+ if request_type:
130
+ params.append(f"payload: {request_type}")
131
+
132
+ # Add optional query parameters last
133
+ for param in query_params:
134
+ if not param["required"]:
135
+ param_name = param["name"]
136
+ param_type = "str" # Default to str for simplicity
137
+ params.append(f"{param_name}: Optional[{param_type}] = None")
138
+
139
+ params_str = ", ".join(params)
140
+ return f"{async_prefix}def {method_name}({params_str}) -> {response_type}:"
141
+
142
+
143
+ def generate_method_body(
144
+ method_name: str,
145
+ path: str,
146
+ method: str,
147
+ request_type: Optional[str],
148
+ query_params: List[Dict[str, Any]],
149
+ is_async: bool = False,
150
+ ) -> str:
151
+ async_prefix = "await " if is_async else ""
152
+
153
+ # Build query parameters dict if they exist
154
+ if query_params:
155
+ query_lines = ["query_params = {}"]
156
+ for param in query_params:
157
+ param_name = param["name"]
158
+ if param["required"]:
159
+ query_lines.append(f"query_params['{param_name}'] = {param_name}")
160
+ else:
161
+ query_lines.append(f"if {param_name} is not None:")
162
+ query_lines.append(f" query_params['{param_name}'] = {param_name}")
163
+ query_setup = "\n ".join(query_lines)
164
+ query_param = "query_params"
165
+ else:
166
+ query_setup = ""
167
+ query_param = "{}"
168
+
169
+ if method == "GET":
170
+ if query_setup:
171
+ return f'{query_setup}\n return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {query_param},\n )'
172
+ else:
173
+ return f'return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {{}},\n )'
174
+ else:
175
+ if request_type:
176
+ if query_setup:
177
+ return f'{query_setup}\n return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n payload,\n params={query_param},\n )'
178
+ else:
179
+ return f'return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n payload,\n )'
180
+ else:
181
+ if query_setup:
182
+ return f'{query_setup}\n return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {{}},\n params={query_param},\n )'
183
+ else:
184
+ return f'return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {{}},\n )'
185
+
186
+
187
+ def generate_client_class(
188
+ class_name: str, methods: List[Dict[str, Any]], is_async: bool = False
189
+ ) -> str:
190
+ lines = [f"class {class_name}:"]
191
+ lines.append(' __slots__ = ("api_key", "organization_id", "client")')
192
+ lines.append("")
193
+
194
+ lines.append(" def __init__(self, api_key: str, organization_id: str):")
195
+ lines.append(" self.api_key = api_key")
196
+ lines.append(" self.organization_id = organization_id")
197
+ client_type = "httpx.AsyncClient" if is_async else "httpx.Client"
198
+ lines.append(f" self.client = {client_type}(timeout=30)")
199
+ lines.append("")
200
+
201
+ request_method = "async def _request" if is_async else "def _request"
202
+ lines.append(f" {request_method}(")
203
+ lines.append(
204
+ ' self, method: Literal["POST", "PATCH", "GET", "DELETE"], url: str, payload: Any, params: Optional[Dict[str, Any]] = None'
205
+ )
206
+ lines.append(" ) -> Any:")
207
+ lines.append(' if method == "GET":')
208
+ lines.append(" r = self.client.request(")
209
+ lines.append(" method,")
210
+ lines.append(" url,")
211
+ lines.append(" params=payload if params is None else params,")
212
+ lines.append(
213
+ " headers=_headers(self.api_key, self.organization_id),"
214
+ )
215
+ lines.append(" )")
216
+ lines.append(" else:")
217
+ lines.append(" r = self.client.request(")
218
+ lines.append(" method,")
219
+ lines.append(" url,")
220
+ lines.append(" json=json_encoder(payload),")
221
+ lines.append(" params=params,")
222
+ lines.append(
223
+ " headers=_headers(self.api_key, self.organization_id),"
224
+ )
225
+ lines.append(" )")
226
+ if is_async:
227
+ lines.append(" return _handle_response(await r)")
228
+ else:
229
+ lines.append(" return _handle_response(r)")
230
+ lines.append("")
231
+
232
+ for method_info in methods:
233
+ method_name = method_info["name"]
234
+ path = method_info["path"]
235
+ http_method = method_info["method"]
236
+ request_type = method_info["request_type"]
237
+ query_params = method_info["query_params"]
238
+ response_type = method_info["response_type"]
239
+
240
+ signature = generate_method_signature(
241
+ method_name, request_type, query_params, response_type, is_async
242
+ )
243
+ lines.append(f" {signature}")
244
+
245
+ body = generate_method_body(
246
+ method_name, path, http_method, request_type, query_params, is_async
247
+ )
248
+ lines.append(f" {body}")
249
+ lines.append("")
250
+
251
+ return "\n".join(lines)
252
+
253
+
254
+ def generate_api_file() -> str:
255
+ lines = [
256
+ "from typing import List, Dict, Any, Mapping, Literal, Optional",
257
+ "import httpx",
258
+ "from httpx import Response",
259
+ "from judgeval.exceptions import JudgmentAPIError",
260
+ "from judgeval.utils.url import url_for",
261
+ "from judgeval.utils.serialize import json_encoder",
262
+ "from judgeval.api.api_types import *",
263
+ "",
264
+ "",
265
+ "def _headers(api_key: str, organization_id: str) -> Mapping[str, str]:",
266
+ " return {",
267
+ ' "Content-Type": "application/json",',
268
+ ' "Authorization": f"Bearer {api_key}",',
269
+ ' "X-Organization-Id": organization_id,',
270
+ " }",
271
+ "",
272
+ "",
273
+ "def _handle_response(r: Response) -> Any:",
274
+ " if r.status_code >= 400:",
275
+ " try:",
276
+ ' detail = r.json().get("detail", "")',
277
+ " except Exception:",
278
+ " detail = r.text",
279
+ " raise JudgmentAPIError(r.status_code, detail, r)",
280
+ " return r.json()",
281
+ "",
282
+ "",
283
+ ]
284
+
285
+ filtered_paths = {
286
+ path: spec_data
287
+ for path, spec_data in SPEC["paths"].items()
288
+ if path in JUDGEVAL_PATHS
289
+ }
290
+
291
+ for path in JUDGEVAL_PATHS:
292
+ if path not in SPEC["paths"]:
293
+ print(f"Path {path} not found in OpenAPI spec", file=sys.stderr)
294
+
295
+ sync_methods = []
296
+ async_methods = []
297
+
298
+ for path, path_data in filtered_paths.items():
299
+ for method, operation in path_data.items():
300
+ if method.upper() in ["GET", "POST", "PUT", "PATCH", "DELETE"]:
301
+ method_name = get_method_name_from_path(path, method.upper())
302
+ request_schema = get_request_schema(operation)
303
+ response_schema = get_response_schema(operation)
304
+ query_params = get_query_parameters(operation)
305
+
306
+ print(
307
+ method_name,
308
+ request_schema,
309
+ response_schema,
310
+ query_params,
311
+ file=sys.stderr,
312
+ )
313
+
314
+ if not request_schema:
315
+ print(f"No request type found for {method_name}", file=sys.stderr)
316
+
317
+ if not response_schema:
318
+ print(
319
+ f"No response schema found for {method_name}", file=sys.stderr
320
+ )
321
+
322
+ request_type = request_schema if request_schema else None
323
+ response_type = response_schema if response_schema else "Any"
324
+
325
+ method_info = {
326
+ "name": method_name,
327
+ "path": path,
328
+ "method": method.upper(),
329
+ "request_type": request_type,
330
+ "query_params": query_params,
331
+ "response_type": response_type,
332
+ }
333
+
334
+ sync_methods.append(method_info)
335
+ async_methods.append(method_info)
336
+
337
+ sync_client = generate_client_class(
338
+ "JudgmentSyncClient", sync_methods, is_async=False
339
+ )
340
+ async_client = generate_client_class(
341
+ "JudgmentAsyncClient", async_methods, is_async=True
342
+ )
343
+
344
+ lines.append(sync_client)
345
+ lines.append("")
346
+ lines.append("")
347
+ lines.append(async_client)
348
+ lines.append("")
349
+ lines.append("")
350
+ lines.append("__all__ = [")
351
+ lines.append(' "JudgmentSyncClient",')
352
+ lines.append(' "JudgmentAsyncClient",')
353
+ lines.append("]")
354
+
355
+ return "\n".join(lines)
356
+
357
+
358
+ if __name__ == "__main__":
359
+ api_code = generate_api_file()
360
+ print(api_code)