judgeval 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval-0.9.0/.github/workflows/claude-code-review.yml +35 -0
- judgeval-0.9.0/.github/workflows/claude.yml +40 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/PKG-INFO +12 -14
- judgeval-0.9.0/pyproject.toml +94 -0
- judgeval-0.9.0/scripts/api_generator.py +360 -0
- judgeval-0.9.0/scripts/openapi_transform.py +122 -0
- judgeval-0.9.0/scripts/update_types.sh +35 -0
- judgeval-0.9.0/src/judgeval/__init__.py +142 -0
- judgeval-0.9.0/src/judgeval/api/__init__.py +501 -0
- judgeval-0.9.0/src/judgeval/api/api_types.py +344 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/cli.py +2 -4
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/constants.py +10 -26
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/evaluation_run.py +49 -26
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/example.py +2 -2
- judgeval-0.9.0/src/judgeval/data/judgment_types.py +398 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/result.py +4 -5
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/scorer_data.py +4 -2
- judgeval-0.9.0/src/judgeval/data/tool.py +5 -0
- judgeval-0.9.0/src/judgeval/data/trace.py +40 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/trace_run.py +7 -4
- judgeval-0.8.0/src/judgeval/dataset.py → judgeval-0.9.0/src/judgeval/dataset/__init__.py +43 -28
- judgeval-0.9.0/src/judgeval/env.py +67 -0
- judgeval-0.8.0/src/judgeval/run_evaluation.py → judgeval-0.9.0/src/judgeval/evaluation/__init__.py +29 -95
- judgeval-0.9.0/src/judgeval/exceptions.py +27 -0
- judgeval-0.9.0/src/judgeval/integrations/langgraph/__init__.py +788 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/judges/__init__.py +2 -2
- judgeval-0.9.0/src/judgeval/judges/litellm_judge.py +129 -0
- judgeval-0.9.0/src/judgeval/judges/together_judge.py +136 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/judges/utils.py +7 -21
- {judgeval-0.8.0/src/judgeval/common → judgeval-0.9.0/src/judgeval}/logger.py +8 -6
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/__init__.py +0 -4
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/agent_scorer.py +3 -7
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/api_scorer.py +8 -13
- judgeval-0.9.0/src/judgeval/scorers/base_scorer.py +98 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/example_scorer.py +1 -3
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -14
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +45 -20
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +2 -2
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +3 -3
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/score.py +21 -31
- judgeval-0.9.0/src/judgeval/scorers/trace_api_scorer.py +5 -0
- judgeval-0.9.0/src/judgeval/scorers/utils.py +17 -0
- judgeval-0.9.0/src/judgeval/tracer/__init__.py +1076 -0
- judgeval-0.9.0/src/judgeval/tracer/constants.py +1 -0
- judgeval-0.9.0/src/judgeval/tracer/exporters/__init__.py +37 -0
- judgeval-0.9.0/src/judgeval/tracer/exporters/s3.py +119 -0
- judgeval-0.9.0/src/judgeval/tracer/exporters/store.py +43 -0
- judgeval-0.9.0/src/judgeval/tracer/exporters/utils.py +32 -0
- judgeval-0.9.0/src/judgeval/tracer/keys.py +67 -0
- judgeval-0.9.0/src/judgeval/tracer/llm/__init__.py +1233 -0
- {judgeval-0.8.0/src/judgeval/common/tracer → judgeval-0.9.0/src/judgeval/tracer/llm}/providers.py +5 -10
- {judgeval-0.8.0/src/judgeval → judgeval-0.9.0/src/judgeval/tracer}/local_eval_queue.py +15 -10
- judgeval-0.9.0/src/judgeval/tracer/managers.py +188 -0
- judgeval-0.9.0/src/judgeval/tracer/processors/__init__.py +181 -0
- judgeval-0.9.0/src/judgeval/tracer/utils.py +20 -0
- judgeval-0.9.0/src/judgeval/trainer/__init__.py +5 -0
- {judgeval-0.8.0/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/config.py +12 -9
- {judgeval-0.8.0/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/console.py +2 -9
- {judgeval-0.8.0/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/trainable_model.py +12 -7
- {judgeval-0.8.0/src/judgeval/common → judgeval-0.9.0/src/judgeval}/trainer/trainer.py +119 -17
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/utils/async_utils.py +2 -3
- judgeval-0.9.0/src/judgeval/utils/decorators.py +24 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/utils/file_utils.py +37 -4
- judgeval-0.9.0/src/judgeval/utils/guards.py +32 -0
- judgeval-0.9.0/src/judgeval/utils/meta.py +14 -0
- judgeval-0.8.0/src/judgeval/common/api/json_encoder.py → judgeval-0.9.0/src/judgeval/utils/serialize.py +7 -1
- judgeval-0.9.0/src/judgeval/utils/testing.py +88 -0
- judgeval-0.9.0/src/judgeval/utils/url.py +10 -0
- {judgeval-0.8.0/src/judgeval → judgeval-0.9.0/src/judgeval/utils}/version_check.py +3 -3
- judgeval-0.9.0/src/judgeval/version.py +5 -0
- judgeval-0.9.0/src/judgeval/warnings.py +4 -0
- judgeval-0.9.0/uv.lock +3941 -0
- judgeval-0.8.0/pyproject.toml +0 -109
- judgeval-0.8.0/src/.coveragerc +0 -4
- judgeval-0.8.0/src/judgeval/__init__.py +0 -15
- judgeval-0.8.0/src/judgeval/clients.py +0 -35
- judgeval-0.8.0/src/judgeval/common/__init__.py +0 -13
- judgeval-0.8.0/src/judgeval/common/api/__init__.py +0 -3
- judgeval-0.8.0/src/judgeval/common/api/api.py +0 -375
- judgeval-0.8.0/src/judgeval/common/api/constants.py +0 -186
- judgeval-0.8.0/src/judgeval/common/exceptions.py +0 -27
- judgeval-0.8.0/src/judgeval/common/storage/__init__.py +0 -6
- judgeval-0.8.0/src/judgeval/common/storage/s3_storage.py +0 -97
- judgeval-0.8.0/src/judgeval/common/tracer/__init__.py +0 -31
- judgeval-0.8.0/src/judgeval/common/tracer/constants.py +0 -22
- judgeval-0.8.0/src/judgeval/common/tracer/core.py +0 -2427
- judgeval-0.8.0/src/judgeval/common/tracer/otel_exporter.py +0 -108
- judgeval-0.8.0/src/judgeval/common/tracer/otel_span_processor.py +0 -188
- judgeval-0.8.0/src/judgeval/common/tracer/span_processor.py +0 -37
- judgeval-0.8.0/src/judgeval/common/tracer/span_transformer.py +0 -207
- judgeval-0.8.0/src/judgeval/common/tracer/trace_manager.py +0 -101
- judgeval-0.8.0/src/judgeval/common/trainer/__init__.py +0 -5
- judgeval-0.8.0/src/judgeval/common/utils.py +0 -948
- judgeval-0.8.0/src/judgeval/data/judgment_types.py +0 -214
- judgeval-0.8.0/src/judgeval/data/tool.py +0 -5
- judgeval-0.8.0/src/judgeval/data/trace.py +0 -83
- judgeval-0.8.0/src/judgeval/integrations/langgraph.py +0 -844
- judgeval-0.8.0/src/judgeval/judges/litellm_judge.py +0 -69
- judgeval-0.8.0/src/judgeval/judges/mixture_of_judges.py +0 -287
- judgeval-0.8.0/src/judgeval/judges/together_judge.py +0 -68
- judgeval-0.8.0/src/judgeval/judgment_client.py +0 -267
- judgeval-0.8.0/src/judgeval/rules.py +0 -521
- judgeval-0.8.0/src/judgeval/scorers/base_scorer.py +0 -78
- judgeval-0.8.0/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
- judgeval-0.8.0/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
- judgeval-0.8.0/src/judgeval/scorers/utils.py +0 -119
- judgeval-0.8.0/src/judgeval/tracer/__init__.py +0 -3
- judgeval-0.8.0/src/judgeval/utils/alerts.py +0 -93
- judgeval-0.8.0/src/judgeval/utils/requests.py +0 -50
- judgeval-0.8.0/src/update_types.sh +0 -14
- judgeval-0.8.0/uv.lock +0 -4562
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/pull_request_template.md +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/ci.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/release.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.gitignore +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/.pre-commit-config.yaml +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/LICENSE.md +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/README.md +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/agent.gif +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/agent_trace_example.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/data.gif +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/document.gif +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/errors.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/experiments_page.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/logo-dark.svg +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/logo-light.svg +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/new_darkmode.svg +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/new_lightmode.svg +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/online_eval.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/product_shot.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/test.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/tests.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/trace.gif +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/trace_demo.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/trace_screenshot.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/pytest.ini +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.8.0 → judgeval-0.9.0}/update_version.py +0 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
name: Claude Code Review
|
2
|
+
|
3
|
+
on:
|
4
|
+
issue_comment:
|
5
|
+
types: [created]
|
6
|
+
jobs:
|
7
|
+
claude-review:
|
8
|
+
if: github.event.issue.pull_request && contains(github.event.comment.body, '/claude review')
|
9
|
+
runs-on: ubuntu-latest
|
10
|
+
permissions:
|
11
|
+
contents: read
|
12
|
+
pull-requests: read
|
13
|
+
issues: read
|
14
|
+
id-token: write
|
15
|
+
|
16
|
+
steps:
|
17
|
+
- name: Checkout repository
|
18
|
+
uses: actions/checkout@v4
|
19
|
+
with:
|
20
|
+
fetch-depth: 1
|
21
|
+
|
22
|
+
- name: Run Claude Code Review
|
23
|
+
id: claude-review
|
24
|
+
uses: anthropics/claude-code-action@beta
|
25
|
+
with:
|
26
|
+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
27
|
+
direct_prompt: |
|
28
|
+
Please review this pull request and provide feedback on:
|
29
|
+
- Code quality and best practices
|
30
|
+
- Potential bugs or issues
|
31
|
+
- Performance considerations
|
32
|
+
- Security concerns
|
33
|
+
- Test coverage
|
34
|
+
|
35
|
+
Be constructive and helpful in your feedback.
|
@@ -0,0 +1,40 @@
|
|
1
|
+
name: Claude Code
|
2
|
+
|
3
|
+
on:
|
4
|
+
issue_comment:
|
5
|
+
types: [created]
|
6
|
+
pull_request_review_comment:
|
7
|
+
types: [created]
|
8
|
+
issues:
|
9
|
+
types: [opened, assigned]
|
10
|
+
pull_request_review:
|
11
|
+
types: [submitted]
|
12
|
+
|
13
|
+
jobs:
|
14
|
+
claude:
|
15
|
+
if: |
|
16
|
+
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
17
|
+
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
18
|
+
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
19
|
+
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
20
|
+
runs-on: ubuntu-latest
|
21
|
+
permissions:
|
22
|
+
contents: read
|
23
|
+
pull-requests: read
|
24
|
+
issues: read
|
25
|
+
id-token: write
|
26
|
+
actions: read
|
27
|
+
steps:
|
28
|
+
- name: Checkout repository
|
29
|
+
uses: actions/checkout@v4
|
30
|
+
with:
|
31
|
+
fetch-depth: 1
|
32
|
+
|
33
|
+
- name: Run Claude Code
|
34
|
+
id: claude
|
35
|
+
uses: anthropics/claude-code-action@beta
|
36
|
+
with:
|
37
|
+
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
|
38
|
+
|
39
|
+
additional_permissions: |
|
40
|
+
actions: read
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.9.0
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -10,27 +10,25 @@ License-File: LICENSE.md
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
12
|
Requires-Python: >=3.11
|
13
|
-
Requires-Dist: boto3
|
13
|
+
Requires-Dist: boto3>=1.40.11
|
14
14
|
Requires-Dist: click<8.2.0
|
15
|
-
Requires-Dist:
|
16
|
-
Requires-Dist:
|
17
|
-
Requires-Dist:
|
18
|
-
Requires-Dist:
|
19
|
-
Requires-Dist:
|
20
|
-
Requires-Dist:
|
21
|
-
Requires-Dist: nest-asyncio>=1.6.0
|
22
|
-
Requires-Dist: opentelemetry-api>=1.34.1
|
23
|
-
Requires-Dist: opentelemetry-sdk>=1.34.1
|
15
|
+
Requires-Dist: dotenv
|
16
|
+
Requires-Dist: httpx>=0.28.1
|
17
|
+
Requires-Dist: litellm<1.75.0
|
18
|
+
Requires-Dist: opentelemetry-exporter-otlp>=1.36.0
|
19
|
+
Requires-Dist: opentelemetry-sdk>=1.36.0
|
20
|
+
Requires-Dist: opentelemetry-semantic-conventions>=0.57b0
|
24
21
|
Requires-Dist: orjson>=3.9.0
|
25
|
-
Requires-Dist: python-dotenv
|
26
|
-
Requires-Dist: requests
|
27
|
-
Requires-Dist: rich
|
28
22
|
Requires-Dist: typer>=0.9.0
|
29
23
|
Provides-Extra: langchain
|
30
24
|
Requires-Dist: langchain-anthropic; extra == 'langchain'
|
31
25
|
Requires-Dist: langchain-core; extra == 'langchain'
|
32
26
|
Requires-Dist: langchain-huggingface; extra == 'langchain'
|
33
27
|
Requires-Dist: langchain-openai; extra == 'langchain'
|
28
|
+
Provides-Extra: s3
|
29
|
+
Requires-Dist: boto3>=1.40.11; extra == 's3'
|
30
|
+
Provides-Extra: trainer
|
31
|
+
Requires-Dist: fireworks-ai>=0.19.18; extra == 'trainer'
|
34
32
|
Description-Content-Type: text/markdown
|
35
33
|
|
36
34
|
<div align="center">
|
@@ -0,0 +1,94 @@
|
|
1
|
+
[project]
|
2
|
+
name = "judgeval"
|
3
|
+
version = "0.9.0"
|
4
|
+
authors = [
|
5
|
+
{ name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
|
6
|
+
{ name = "Alex Shan", email = "alex@judgmentlabs.ai" },
|
7
|
+
{ name = "Joseph Camyre", email = "joseph@judgmentlabs.ai" },
|
8
|
+
]
|
9
|
+
description = "Judgeval Package"
|
10
|
+
readme = "README.md"
|
11
|
+
requires-python = ">=3.11"
|
12
|
+
classifiers = [
|
13
|
+
"Programming Language :: Python :: 3",
|
14
|
+
"Operating System :: OS Independent",
|
15
|
+
]
|
16
|
+
license = "Apache-2.0"
|
17
|
+
license-files = ["LICENSE.md"]
|
18
|
+
|
19
|
+
dependencies = [
|
20
|
+
"dotenv",
|
21
|
+
"httpx>=0.28.1",
|
22
|
+
"litellm<1.75.0", # https://github.com/BerriAI/litellm/issues/13081
|
23
|
+
"opentelemetry-exporter-otlp>=1.36.0",
|
24
|
+
"opentelemetry-sdk>=1.36.0",
|
25
|
+
"opentelemetry-semantic-conventions>=0.57b0",
|
26
|
+
"orjson>=3.9.0",
|
27
|
+
"click<8.2.0",
|
28
|
+
"typer>=0.9.0",
|
29
|
+
"boto3>=1.40.11",
|
30
|
+
]
|
31
|
+
|
32
|
+
[project.urls]
|
33
|
+
Homepage = "https://github.com/JudgmentLabs/judgeval"
|
34
|
+
Issues = "https://github.com/JudgmentLabs/judgeval/issues"
|
35
|
+
|
36
|
+
[project.scripts]
|
37
|
+
judgeval = "judgeval.cli:app"
|
38
|
+
|
39
|
+
[build-system]
|
40
|
+
requires = ["hatchling"]
|
41
|
+
build-backend = "hatchling.build"
|
42
|
+
|
43
|
+
[tool.hatch.build.targets.wheel]
|
44
|
+
packages = ["src/judgeval"]
|
45
|
+
include = ["/src/judgeval", "/src/judgeval/**/*.py"]
|
46
|
+
|
47
|
+
[project.optional-dependencies]
|
48
|
+
langchain = [
|
49
|
+
"langchain-huggingface",
|
50
|
+
"langchain-openai",
|
51
|
+
"langchain-anthropic",
|
52
|
+
"langchain-core",
|
53
|
+
]
|
54
|
+
s3 = ["boto3>=1.40.11"]
|
55
|
+
trainer = ["fireworks-ai>=0.19.18"]
|
56
|
+
|
57
|
+
|
58
|
+
[dependency-groups]
|
59
|
+
dev = [
|
60
|
+
"anthropic>=0.61.0",
|
61
|
+
"boto3-stubs[s3]>=1.40.11",
|
62
|
+
"datamodel-code-generator>=0.32.0",
|
63
|
+
"google-genai>=1.28.0",
|
64
|
+
"groq>=0.30.0",
|
65
|
+
"langchain-core>=0.3.72",
|
66
|
+
"langgraph>=0.6.4",
|
67
|
+
"mypy>=1.17.1",
|
68
|
+
"openai>=1.78.1",
|
69
|
+
"opentelemetry-instrumentation-openai>=0.44.1",
|
70
|
+
"ruff>=0.9.1,<0.10.0",
|
71
|
+
"together>=1.5.21",
|
72
|
+
"types-pyyaml>=6.0.12.20250516",
|
73
|
+
"pre-commit>=4.2.0",
|
74
|
+
"pytest>=8.4.1",
|
75
|
+
"pytest-cov>=6.2.1",
|
76
|
+
"types-tqdm>=4.67.0.20250809",
|
77
|
+
"pytest-asyncio>=1.1.0",
|
78
|
+
]
|
79
|
+
|
80
|
+
|
81
|
+
[tool.hatch.build]
|
82
|
+
directory = "dist"
|
83
|
+
artifacts = ["src/judgeval/**/*.py"]
|
84
|
+
exclude = ["src/e2etests/*", "src/tests/*", "src/demo/*"]
|
85
|
+
|
86
|
+
[tool.ruff]
|
87
|
+
exclude = ["docs"]
|
88
|
+
|
89
|
+
[tool.ruff.lint]
|
90
|
+
ignore = [
|
91
|
+
"F403",
|
92
|
+
"F405",
|
93
|
+
"E402",
|
94
|
+
] # F403: star import, F405: undefined name from star import
|
@@ -0,0 +1,360 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import orjson
|
4
|
+
import sys
|
5
|
+
from typing import Any, Dict, List, Optional
|
6
|
+
import httpx
|
7
|
+
import re
|
8
|
+
|
9
|
+
spec_file = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000/openapi.json"
|
10
|
+
|
11
|
+
if spec_file.startswith("http"):
|
12
|
+
r = httpx.get(spec_file)
|
13
|
+
r.raise_for_status()
|
14
|
+
SPEC = r.json()
|
15
|
+
else:
|
16
|
+
with open(spec_file, "rb") as f:
|
17
|
+
SPEC = orjson.loads(f.read())
|
18
|
+
|
19
|
+
JUDGEVAL_PATHS: List[str] = [
|
20
|
+
"/traces/spans/batch/",
|
21
|
+
"/traces/evaluation_runs/batch/",
|
22
|
+
"/traces/fetch/",
|
23
|
+
"/traces/upsert/",
|
24
|
+
"/traces/add_to_dataset/",
|
25
|
+
"/projects/add/",
|
26
|
+
"/projects/delete_from_judgeval/",
|
27
|
+
"/evaluate/traces",
|
28
|
+
"/evaluate/examples",
|
29
|
+
"/evaluate_trace/",
|
30
|
+
"/log_eval_results/",
|
31
|
+
"/fetch_experiment_run/",
|
32
|
+
"/add_to_run_eval_queue/examples",
|
33
|
+
"/add_to_run_eval_queue/traces",
|
34
|
+
"/get_evaluation_status/",
|
35
|
+
"/save_scorer/",
|
36
|
+
"/fetch_scorer/",
|
37
|
+
"/scorer_exists/",
|
38
|
+
"/upload_custom_scorer/",
|
39
|
+
"/datasets/push/",
|
40
|
+
"/datasets/insert_examples/",
|
41
|
+
"/datasets/pull_for_judgeval/",
|
42
|
+
"/datasets/fetch_stats_by_project/",
|
43
|
+
"/projects/resolve/",
|
44
|
+
"/e2e_fetch_trace/",
|
45
|
+
"/e2e_fetch_span_score/",
|
46
|
+
]
|
47
|
+
|
48
|
+
|
49
|
+
def resolve_ref(ref: str) -> str:
|
50
|
+
assert ref.startswith("#/components/schemas/"), (
|
51
|
+
"Reference must start with #/components/schemas/"
|
52
|
+
)
|
53
|
+
return ref.replace("#/components/schemas/", "")
|
54
|
+
|
55
|
+
|
56
|
+
def to_snake_case(name: str) -> str:
|
57
|
+
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
58
|
+
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
|
59
|
+
|
60
|
+
|
61
|
+
def get_method_name_from_path(path: str, method: str) -> str:
|
62
|
+
return path.strip("/").replace("/", "_").replace("-", "_")
|
63
|
+
|
64
|
+
|
65
|
+
def get_query_parameters(operation: Dict[str, Any]) -> List[Dict[str, Any]]:
|
66
|
+
"""Extract query parameters from the operation."""
|
67
|
+
parameters = operation.get("parameters", [])
|
68
|
+
query_params = []
|
69
|
+
|
70
|
+
for param in parameters:
|
71
|
+
if param.get("in") == "query":
|
72
|
+
param_info = {
|
73
|
+
"name": param["name"],
|
74
|
+
"required": param.get("required", False),
|
75
|
+
"type": param.get("schema", {}).get("type", "str"),
|
76
|
+
}
|
77
|
+
query_params.append(param_info)
|
78
|
+
|
79
|
+
return query_params
|
80
|
+
|
81
|
+
|
82
|
+
def get_request_schema(operation: Dict[str, Any]) -> Optional[str]:
|
83
|
+
request_body = operation.get("requestBody", {})
|
84
|
+
if not request_body:
|
85
|
+
return None
|
86
|
+
|
87
|
+
content = request_body.get("content", {})
|
88
|
+
if "application/json" in content:
|
89
|
+
schema = content["application/json"].get("schema", {})
|
90
|
+
if "$ref" in schema:
|
91
|
+
return resolve_ref(schema["$ref"])
|
92
|
+
|
93
|
+
return None
|
94
|
+
|
95
|
+
|
96
|
+
def get_response_schema(operation: Dict[str, Any]) -> Optional[str]:
|
97
|
+
responses = operation.get("responses", {})
|
98
|
+
for status_code in ["200", "201"]:
|
99
|
+
if status_code in responses:
|
100
|
+
response = responses[status_code]
|
101
|
+
content = response.get("content", {})
|
102
|
+
if "application/json" in content:
|
103
|
+
schema = content["application/json"].get("schema", {})
|
104
|
+
if "$ref" in schema:
|
105
|
+
return resolve_ref(schema["$ref"])
|
106
|
+
|
107
|
+
return None
|
108
|
+
|
109
|
+
|
110
|
+
def generate_method_signature(
|
111
|
+
method_name: str,
|
112
|
+
request_type: Optional[str],
|
113
|
+
query_params: List[Dict[str, Any]],
|
114
|
+
response_type: str,
|
115
|
+
is_async: bool = False,
|
116
|
+
) -> str:
|
117
|
+
async_prefix = "async " if is_async else ""
|
118
|
+
|
119
|
+
params = ["self"]
|
120
|
+
|
121
|
+
# Add required query parameters first
|
122
|
+
for param in query_params:
|
123
|
+
if param["required"]:
|
124
|
+
param_name = param["name"]
|
125
|
+
param_type = "str" # Default to str for simplicity
|
126
|
+
params.append(f"{param_name}: {param_type}")
|
127
|
+
|
128
|
+
# Add request body parameter if it exists
|
129
|
+
if request_type:
|
130
|
+
params.append(f"payload: {request_type}")
|
131
|
+
|
132
|
+
# Add optional query parameters last
|
133
|
+
for param in query_params:
|
134
|
+
if not param["required"]:
|
135
|
+
param_name = param["name"]
|
136
|
+
param_type = "str" # Default to str for simplicity
|
137
|
+
params.append(f"{param_name}: Optional[{param_type}] = None")
|
138
|
+
|
139
|
+
params_str = ", ".join(params)
|
140
|
+
return f"{async_prefix}def {method_name}({params_str}) -> {response_type}:"
|
141
|
+
|
142
|
+
|
143
|
+
def generate_method_body(
|
144
|
+
method_name: str,
|
145
|
+
path: str,
|
146
|
+
method: str,
|
147
|
+
request_type: Optional[str],
|
148
|
+
query_params: List[Dict[str, Any]],
|
149
|
+
is_async: bool = False,
|
150
|
+
) -> str:
|
151
|
+
async_prefix = "await " if is_async else ""
|
152
|
+
|
153
|
+
# Build query parameters dict if they exist
|
154
|
+
if query_params:
|
155
|
+
query_lines = ["query_params = {}"]
|
156
|
+
for param in query_params:
|
157
|
+
param_name = param["name"]
|
158
|
+
if param["required"]:
|
159
|
+
query_lines.append(f"query_params['{param_name}'] = {param_name}")
|
160
|
+
else:
|
161
|
+
query_lines.append(f"if {param_name} is not None:")
|
162
|
+
query_lines.append(f" query_params['{param_name}'] = {param_name}")
|
163
|
+
query_setup = "\n ".join(query_lines)
|
164
|
+
query_param = "query_params"
|
165
|
+
else:
|
166
|
+
query_setup = ""
|
167
|
+
query_param = "{}"
|
168
|
+
|
169
|
+
if method == "GET":
|
170
|
+
if query_setup:
|
171
|
+
return f'{query_setup}\n return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {query_param},\n )'
|
172
|
+
else:
|
173
|
+
return f'return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {{}},\n )'
|
174
|
+
else:
|
175
|
+
if request_type:
|
176
|
+
if query_setup:
|
177
|
+
return f'{query_setup}\n return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n payload,\n params={query_param},\n )'
|
178
|
+
else:
|
179
|
+
return f'return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n payload,\n )'
|
180
|
+
else:
|
181
|
+
if query_setup:
|
182
|
+
return f'{query_setup}\n return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {{}},\n params={query_param},\n )'
|
183
|
+
else:
|
184
|
+
return f'return {async_prefix}self._request(\n "{method}",\n url_for("{path}"),\n {{}},\n )'
|
185
|
+
|
186
|
+
|
187
|
+
def generate_client_class(
|
188
|
+
class_name: str, methods: List[Dict[str, Any]], is_async: bool = False
|
189
|
+
) -> str:
|
190
|
+
lines = [f"class {class_name}:"]
|
191
|
+
lines.append(' __slots__ = ("api_key", "organization_id", "client")')
|
192
|
+
lines.append("")
|
193
|
+
|
194
|
+
lines.append(" def __init__(self, api_key: str, organization_id: str):")
|
195
|
+
lines.append(" self.api_key = api_key")
|
196
|
+
lines.append(" self.organization_id = organization_id")
|
197
|
+
client_type = "httpx.AsyncClient" if is_async else "httpx.Client"
|
198
|
+
lines.append(f" self.client = {client_type}(timeout=30)")
|
199
|
+
lines.append("")
|
200
|
+
|
201
|
+
request_method = "async def _request" if is_async else "def _request"
|
202
|
+
lines.append(f" {request_method}(")
|
203
|
+
lines.append(
|
204
|
+
' self, method: Literal["POST", "PATCH", "GET", "DELETE"], url: str, payload: Any, params: Optional[Dict[str, Any]] = None'
|
205
|
+
)
|
206
|
+
lines.append(" ) -> Any:")
|
207
|
+
lines.append(' if method == "GET":')
|
208
|
+
lines.append(" r = self.client.request(")
|
209
|
+
lines.append(" method,")
|
210
|
+
lines.append(" url,")
|
211
|
+
lines.append(" params=payload if params is None else params,")
|
212
|
+
lines.append(
|
213
|
+
" headers=_headers(self.api_key, self.organization_id),"
|
214
|
+
)
|
215
|
+
lines.append(" )")
|
216
|
+
lines.append(" else:")
|
217
|
+
lines.append(" r = self.client.request(")
|
218
|
+
lines.append(" method,")
|
219
|
+
lines.append(" url,")
|
220
|
+
lines.append(" json=json_encoder(payload),")
|
221
|
+
lines.append(" params=params,")
|
222
|
+
lines.append(
|
223
|
+
" headers=_headers(self.api_key, self.organization_id),"
|
224
|
+
)
|
225
|
+
lines.append(" )")
|
226
|
+
if is_async:
|
227
|
+
lines.append(" return _handle_response(await r)")
|
228
|
+
else:
|
229
|
+
lines.append(" return _handle_response(r)")
|
230
|
+
lines.append("")
|
231
|
+
|
232
|
+
for method_info in methods:
|
233
|
+
method_name = method_info["name"]
|
234
|
+
path = method_info["path"]
|
235
|
+
http_method = method_info["method"]
|
236
|
+
request_type = method_info["request_type"]
|
237
|
+
query_params = method_info["query_params"]
|
238
|
+
response_type = method_info["response_type"]
|
239
|
+
|
240
|
+
signature = generate_method_signature(
|
241
|
+
method_name, request_type, query_params, response_type, is_async
|
242
|
+
)
|
243
|
+
lines.append(f" {signature}")
|
244
|
+
|
245
|
+
body = generate_method_body(
|
246
|
+
method_name, path, http_method, request_type, query_params, is_async
|
247
|
+
)
|
248
|
+
lines.append(f" {body}")
|
249
|
+
lines.append("")
|
250
|
+
|
251
|
+
return "\n".join(lines)
|
252
|
+
|
253
|
+
|
254
|
+
def generate_api_file() -> str:
|
255
|
+
lines = [
|
256
|
+
"from typing import List, Dict, Any, Mapping, Literal, Optional",
|
257
|
+
"import httpx",
|
258
|
+
"from httpx import Response",
|
259
|
+
"from judgeval.exceptions import JudgmentAPIError",
|
260
|
+
"from judgeval.utils.url import url_for",
|
261
|
+
"from judgeval.utils.serialize import json_encoder",
|
262
|
+
"from judgeval.api.api_types import *",
|
263
|
+
"",
|
264
|
+
"",
|
265
|
+
"def _headers(api_key: str, organization_id: str) -> Mapping[str, str]:",
|
266
|
+
" return {",
|
267
|
+
' "Content-Type": "application/json",',
|
268
|
+
' "Authorization": f"Bearer {api_key}",',
|
269
|
+
' "X-Organization-Id": organization_id,',
|
270
|
+
" }",
|
271
|
+
"",
|
272
|
+
"",
|
273
|
+
"def _handle_response(r: Response) -> Any:",
|
274
|
+
" if r.status_code >= 400:",
|
275
|
+
" try:",
|
276
|
+
' detail = r.json().get("detail", "")',
|
277
|
+
" except Exception:",
|
278
|
+
" detail = r.text",
|
279
|
+
" raise JudgmentAPIError(r.status_code, detail, r)",
|
280
|
+
" return r.json()",
|
281
|
+
"",
|
282
|
+
"",
|
283
|
+
]
|
284
|
+
|
285
|
+
filtered_paths = {
|
286
|
+
path: spec_data
|
287
|
+
for path, spec_data in SPEC["paths"].items()
|
288
|
+
if path in JUDGEVAL_PATHS
|
289
|
+
}
|
290
|
+
|
291
|
+
for path in JUDGEVAL_PATHS:
|
292
|
+
if path not in SPEC["paths"]:
|
293
|
+
print(f"Path {path} not found in OpenAPI spec", file=sys.stderr)
|
294
|
+
|
295
|
+
sync_methods = []
|
296
|
+
async_methods = []
|
297
|
+
|
298
|
+
for path, path_data in filtered_paths.items():
|
299
|
+
for method, operation in path_data.items():
|
300
|
+
if method.upper() in ["GET", "POST", "PUT", "PATCH", "DELETE"]:
|
301
|
+
method_name = get_method_name_from_path(path, method.upper())
|
302
|
+
request_schema = get_request_schema(operation)
|
303
|
+
response_schema = get_response_schema(operation)
|
304
|
+
query_params = get_query_parameters(operation)
|
305
|
+
|
306
|
+
print(
|
307
|
+
method_name,
|
308
|
+
request_schema,
|
309
|
+
response_schema,
|
310
|
+
query_params,
|
311
|
+
file=sys.stderr,
|
312
|
+
)
|
313
|
+
|
314
|
+
if not request_schema:
|
315
|
+
print(f"No request type found for {method_name}", file=sys.stderr)
|
316
|
+
|
317
|
+
if not response_schema:
|
318
|
+
print(
|
319
|
+
f"No response schema found for {method_name}", file=sys.stderr
|
320
|
+
)
|
321
|
+
|
322
|
+
request_type = request_schema if request_schema else None
|
323
|
+
response_type = response_schema if response_schema else "Any"
|
324
|
+
|
325
|
+
method_info = {
|
326
|
+
"name": method_name,
|
327
|
+
"path": path,
|
328
|
+
"method": method.upper(),
|
329
|
+
"request_type": request_type,
|
330
|
+
"query_params": query_params,
|
331
|
+
"response_type": response_type,
|
332
|
+
}
|
333
|
+
|
334
|
+
sync_methods.append(method_info)
|
335
|
+
async_methods.append(method_info)
|
336
|
+
|
337
|
+
sync_client = generate_client_class(
|
338
|
+
"JudgmentSyncClient", sync_methods, is_async=False
|
339
|
+
)
|
340
|
+
async_client = generate_client_class(
|
341
|
+
"JudgmentAsyncClient", async_methods, is_async=True
|
342
|
+
)
|
343
|
+
|
344
|
+
lines.append(sync_client)
|
345
|
+
lines.append("")
|
346
|
+
lines.append("")
|
347
|
+
lines.append(async_client)
|
348
|
+
lines.append("")
|
349
|
+
lines.append("")
|
350
|
+
lines.append("__all__ = [")
|
351
|
+
lines.append(' "JudgmentSyncClient",')
|
352
|
+
lines.append(' "JudgmentAsyncClient",')
|
353
|
+
lines.append("]")
|
354
|
+
|
355
|
+
return "\n".join(lines)
|
356
|
+
|
357
|
+
|
358
|
+
if __name__ == "__main__":
|
359
|
+
api_code = generate_api_file()
|
360
|
+
print(api_code)
|