judgeval 0.3.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/pull_request_template.md +1 -8
- {judgeval-0.3.2 → judgeval-0.5.0}/PKG-INFO +9 -12
- {judgeval-0.3.2 → judgeval-0.5.0}/pyproject.toml +28 -32
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/__init__.py +2 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/clients.py +2 -1
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/api/api.py +4 -18
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/api/constants.py +1 -1
- judgeval-0.5.0/src/judgeval/common/api/json_encoder.py +242 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/core.py +498 -215
- judgeval-0.5.0/src/judgeval/common/tracer/providers.py +119 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/span_transformer.py +14 -25
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/constants.py +1 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/judgment_types.py +2 -1
- judgeval-0.5.0/src/judgeval/data/trace.py +82 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/trace_run.py +2 -1
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/dataset.py +2 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/evaluation_run.py +6 -2
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judges/litellm_judge.py +2 -1
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judges/mixture_of_judges.py +2 -1
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judges/utils.py +2 -1
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judgment_client.py +11 -6
- judgeval-0.5.0/src/judgeval/local_eval_queue.py +192 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/run_evaluation.py +11 -6
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +18 -19
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/score.py +34 -11
- judgeval-0.5.0/src/judgeval/utils/async_utils.py +36 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/uv.lock +29 -652
- judgeval-0.3.2/src/judgeval/data/trace.py +0 -199
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/ci.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/release.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.gitignore +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/.pre-commit-config.yaml +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/LICENSE.md +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/README.md +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/agent.gif +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/agent_trace_example.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/data.gif +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/document.gif +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/errors.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/experiments_page.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/logo-dark.svg +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/logo-light.svg +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/new_darkmode.svg +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/new_lightmode.svg +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/online_eval.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/product_shot.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/test.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/tests.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/trace.gif +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/trace_demo.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/trace_screenshot.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/pytest.ini +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/.coveragerc +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/api/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/exceptions.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/logger.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/storage/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/storage/s3_storage.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/constants.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/otel_exporter.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/otel_span_processor.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/span_processor.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/tracer/trace_manager.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/common/utils.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/example.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/result.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/data/tool.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/integrations/langgraph.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/rules.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/utils/alerts.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/utils/requests.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/judgeval/version_check.py +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/src/update_types.sh +0 -0
- {judgeval-0.3.2 → judgeval-0.5.0}/update_version.py +0 -0
@@ -10,14 +10,7 @@
|
|
10
10
|
-->
|
11
11
|
- [ ] 1. ...
|
12
12
|
|
13
|
-
## 🎥 Demo of Changes
|
14
|
-
|
15
|
-
<!-- Add a short 1-3 minute video describing/demoing the changes -->
|
16
|
-
|
17
13
|
## ✅ Checklist
|
18
14
|
|
19
|
-
- [ ] Tagged Linear ticket in PR title. Ie. PR Title (JUD-XXXX)
|
20
|
-
- [ ] Video demo of changes
|
21
|
-
- [ ] Reviewers assigned
|
22
15
|
- [ ] Docs updated ([if necessary](https://github.com/JudgmentLabs/docs))
|
23
|
-
- [ ]
|
16
|
+
- [ ] Changelogs are updated ([if necessary](https://github.com/JudgmentLabs/docs/tree/main/content/docs/changelog/%28weekly%29))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -10,27 +10,24 @@ License-File: LICENSE.md
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
12
|
Requires-Python: >=3.11
|
13
|
-
Requires-Dist: anthropic
|
14
13
|
Requires-Dist: boto3
|
15
|
-
Requires-Dist: datamodel-code-generator>=0.31.1
|
16
|
-
Requires-Dist: google-genai
|
17
|
-
Requires-Dist: groq>=0.30.0
|
18
14
|
Requires-Dist: langchain-anthropic
|
19
15
|
Requires-Dist: langchain-core
|
20
16
|
Requires-Dist: langchain-huggingface
|
21
17
|
Requires-Dist: langchain-openai
|
22
18
|
Requires-Dist: litellm>=1.61.15
|
23
|
-
Requires-Dist:
|
24
|
-
Requires-Dist: nest-asyncio
|
25
|
-
Requires-Dist: openai
|
19
|
+
Requires-Dist: nest-asyncio>=1.6.0
|
26
20
|
Requires-Dist: opentelemetry-api>=1.34.1
|
27
21
|
Requires-Dist: opentelemetry-sdk>=1.34.1
|
28
22
|
Requires-Dist: orjson>=3.9.0
|
29
|
-
Requires-Dist:
|
30
|
-
Requires-Dist: python-dotenv==1.0.1
|
31
|
-
Requires-Dist: python-slugify>=8.0.4
|
23
|
+
Requires-Dist: python-dotenv
|
32
24
|
Requires-Dist: requests
|
33
|
-
Requires-Dist:
|
25
|
+
Requires-Dist: rich
|
26
|
+
Provides-Extra: langchain
|
27
|
+
Requires-Dist: langchain-anthropic; extra == 'langchain'
|
28
|
+
Requires-Dist: langchain-core; extra == 'langchain'
|
29
|
+
Requires-Dist: langchain-huggingface; extra == 'langchain'
|
30
|
+
Requires-Dist: langchain-openai; extra == 'langchain'
|
34
31
|
Description-Content-Type: text/markdown
|
35
32
|
|
36
33
|
<div align="center">
|
@@ -1,10 +1,10 @@
|
|
1
1
|
[project]
|
2
2
|
name = "judgeval"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.0"
|
4
4
|
authors = [
|
5
|
-
{ name="Andrew Li", email="andrew@judgmentlabs.ai" },
|
6
|
-
{ name="Alex Shan", email="alex@judgmentlabs.ai" },
|
7
|
-
{ name="Joseph Camyre", email="joseph@judgmentlabs.ai" },
|
5
|
+
{ name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
|
6
|
+
{ name = "Alex Shan", email = "alex@judgmentlabs.ai" },
|
7
|
+
{ name = "Joseph Camyre", email = "joseph@judgmentlabs.ai" },
|
8
8
|
]
|
9
9
|
description = "Judgeval Package"
|
10
10
|
readme = "README.md"
|
@@ -16,27 +16,19 @@ classifiers = [
|
|
16
16
|
license = "Apache-2.0"
|
17
17
|
license-files = ["LICENSE.md"]
|
18
18
|
dependencies = [
|
19
|
+
"rich",
|
19
20
|
"litellm>=1.61.15",
|
20
|
-
"python-dotenv
|
21
|
+
"python-dotenv",
|
21
22
|
"requests",
|
22
|
-
"pandas",
|
23
|
-
"openai",
|
24
|
-
"together",
|
25
|
-
"anthropic",
|
26
|
-
"nest-asyncio",
|
27
|
-
"langchain-huggingface",
|
28
|
-
"langchain-openai",
|
29
|
-
"langchain-anthropic",
|
30
|
-
"langchain-core",
|
31
|
-
"google-genai",
|
32
23
|
"boto3",
|
33
|
-
"matplotlib>=3.10.3",
|
34
|
-
"python-slugify>=8.0.4",
|
35
|
-
"datamodel-code-generator>=0.31.1",
|
36
|
-
"groq>=0.30.0",
|
37
24
|
"opentelemetry-api>=1.34.1",
|
38
25
|
"opentelemetry-sdk>=1.34.1",
|
39
26
|
"orjson>=3.9.0",
|
27
|
+
"nest-asyncio>=1.6.0",
|
28
|
+
"langchain-huggingface",
|
29
|
+
"langchain-openai",
|
30
|
+
"langchain-anthropic",
|
31
|
+
"langchain-core",
|
40
32
|
]
|
41
33
|
|
42
34
|
[project.urls]
|
@@ -49,21 +41,24 @@ build-backend = "hatchling.build"
|
|
49
41
|
|
50
42
|
[tool.hatch.build.targets.wheel]
|
51
43
|
packages = ["src/judgeval"]
|
52
|
-
include = [
|
53
|
-
|
54
|
-
|
44
|
+
include = ["/src/judgeval", "/src/judgeval/**/*.py"]
|
45
|
+
|
46
|
+
[project.optional-dependencies]
|
47
|
+
langchain = [
|
48
|
+
"langchain-huggingface",
|
49
|
+
"langchain-openai",
|
50
|
+
"langchain-anthropic",
|
51
|
+
"langchain-core",
|
55
52
|
]
|
56
53
|
|
57
54
|
[dependency-groups]
|
58
55
|
dev = [
|
59
56
|
"chromadb>=1.0.12",
|
60
|
-
"langchain-community>=0.3.24",
|
61
57
|
"pytest>=8.4.0",
|
62
58
|
"pytest-asyncio>=1.0.0",
|
63
59
|
"pytest-cov>=6.1.1",
|
64
60
|
"pytest-mock>=3.14.1",
|
65
61
|
"tavily-python>=0.7.5",
|
66
|
-
"langgraph>=0.4.3",
|
67
62
|
"pre-commit>=4.2.0",
|
68
63
|
"types-requests>=2.32.4.20250611",
|
69
64
|
"mypy>=1.17.0",
|
@@ -90,18 +85,19 @@ dev = [
|
|
90
85
|
"types-tqdm>=4.67.0.20250516",
|
91
86
|
"types-tree-sitter-languages>=1.10.0.20250530",
|
92
87
|
"types-xmltodict>=0.14.0.20241009",
|
88
|
+
"datamodel-code-generator>=0.31.2",
|
89
|
+
"openai",
|
90
|
+
"together",
|
91
|
+
"anthropic",
|
92
|
+
"google-genai",
|
93
|
+
"groq",
|
94
|
+
"langgraph>=0.4.3",
|
93
95
|
]
|
94
96
|
|
95
97
|
[tool.hatch.build]
|
96
98
|
directory = "dist"
|
97
|
-
artifacts = [
|
98
|
-
|
99
|
-
]
|
100
|
-
exclude = [
|
101
|
-
"src/e2etests/*",
|
102
|
-
"src/tests/*",
|
103
|
-
"src/demo/*"
|
104
|
-
]
|
99
|
+
artifacts = ["src/judgeval/**/*.py"]
|
100
|
+
exclude = ["src/e2etests/*", "src/tests/*", "src/demo/*"]
|
105
101
|
|
106
102
|
[tool.ruff]
|
107
103
|
exclude = ["docs"]
|
@@ -2,6 +2,7 @@
|
|
2
2
|
from judgeval.clients import client, together_client
|
3
3
|
from judgeval.judgment_client import JudgmentClient
|
4
4
|
from judgeval.version_check import check_latest_version
|
5
|
+
from judgeval.local_eval_queue import LocalEvaluationQueue
|
5
6
|
|
6
7
|
check_latest_version()
|
7
8
|
|
@@ -10,4 +11,5 @@ __all__ = [
|
|
10
11
|
"client",
|
11
12
|
"together_client",
|
12
13
|
"JudgmentClient",
|
14
|
+
"LocalEvaluationQueue",
|
13
15
|
]
|
@@ -2,7 +2,6 @@ import os
|
|
2
2
|
from dotenv import load_dotenv
|
3
3
|
from openai import OpenAI
|
4
4
|
from typing import Optional
|
5
|
-
from together import Together, AsyncTogether
|
6
5
|
|
7
6
|
PATH_TO_DOTENV = os.path.join(os.path.dirname(__file__), ".env")
|
8
7
|
load_dotenv(dotenv_path=PATH_TO_DOTENV)
|
@@ -28,6 +27,8 @@ async_together_client: Optional["AsyncTogether"] = None
|
|
28
27
|
together_api_key = os.getenv("TOGETHERAI_API_KEY") or os.getenv("TOGETHER_API_KEY")
|
29
28
|
if together_api_key:
|
30
29
|
try:
|
30
|
+
from together import Together, AsyncTogether
|
31
|
+
|
31
32
|
together_client = Together(api_key=together_api_key)
|
32
33
|
async_together_client = AsyncTogether(api_key=together_api_key)
|
33
34
|
except Exception:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Literal, List, Dict, Any, Union
|
1
|
+
from typing import Literal, List, Dict, Any, Union, Optional
|
2
2
|
from requests import exceptions
|
3
3
|
from judgeval.common.api.constants import (
|
4
4
|
JUDGMENT_TRACES_FETCH_API_URL,
|
@@ -53,8 +53,7 @@ from judgeval.common.api.constants import (
|
|
53
53
|
CheckExampleKeysPayload,
|
54
54
|
)
|
55
55
|
from judgeval.utils.requests import requests
|
56
|
-
|
57
|
-
import orjson
|
56
|
+
from judgeval.common.api.json_encoder import json_encoder
|
58
57
|
|
59
58
|
|
60
59
|
class JudgmentAPIException(exceptions.HTTPError):
|
@@ -111,7 +110,7 @@ class JudgmentApiClient:
|
|
111
110
|
r = requests.request(
|
112
111
|
method,
|
113
112
|
url,
|
114
|
-
|
113
|
+
json=json_encoder(payload),
|
115
114
|
headers=self._headers(),
|
116
115
|
**self._request_kwargs(),
|
117
116
|
)
|
@@ -238,7 +237,7 @@ class JudgmentApiClient:
|
|
238
237
|
}
|
239
238
|
return self._do_request("POST", JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL, payload)
|
240
239
|
|
241
|
-
def save_scorer(self, name: str, prompt: str, options: dict):
|
240
|
+
def save_scorer(self, name: str, prompt: str, options: Optional[dict] = None):
|
242
241
|
payload: ScorerSavePayload = {
|
243
242
|
"name": name,
|
244
243
|
"prompt": prompt,
|
@@ -368,16 +367,3 @@ class JudgmentApiClient:
|
|
368
367
|
"verify": True,
|
369
368
|
"timeout": 30,
|
370
369
|
}
|
371
|
-
|
372
|
-
def _serialize(self, data: Any) -> str:
|
373
|
-
def fallback_encoder(obj):
|
374
|
-
try:
|
375
|
-
return repr(obj)
|
376
|
-
except Exception:
|
377
|
-
try:
|
378
|
-
return str(obj)
|
379
|
-
except Exception as e:
|
380
|
-
return f"<Unserializable object of type {type(obj).__name__}: {e}>"
|
381
|
-
|
382
|
-
# orjson returns bytes, so we need to decode to str
|
383
|
-
return orjson.dumps(data, default=fallback_encoder).decode("utf-8")
|
@@ -0,0 +1,242 @@
|
|
1
|
+
"""
|
2
|
+
|
3
|
+
This is a modified version of https://docs.powertools.aws.dev/lambda/python/2.35.1/api/event_handler/openapi/encoders.html
|
4
|
+
|
5
|
+
"""
|
6
|
+
|
7
|
+
import dataclasses
|
8
|
+
import datetime
|
9
|
+
from collections import defaultdict, deque
|
10
|
+
from decimal import Decimal
|
11
|
+
from enum import Enum
|
12
|
+
from pathlib import Path, PurePath
|
13
|
+
from re import Pattern
|
14
|
+
from types import GeneratorType
|
15
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, Union
|
16
|
+
from uuid import UUID
|
17
|
+
|
18
|
+
from pydantic import BaseModel
|
19
|
+
from pydantic.types import SecretBytes, SecretStr
|
20
|
+
|
21
|
+
|
22
|
+
"""
|
23
|
+
This module contains the encoders used by jsonable_encoder to convert Python objects to JSON serializable data types.
|
24
|
+
"""
|
25
|
+
|
26
|
+
|
27
|
+
def _model_dump(
|
28
|
+
model: BaseModel, mode: Literal["json", "python"] = "json", **kwargs: Any
|
29
|
+
) -> Any:
|
30
|
+
return model.model_dump(mode=mode, **kwargs)
|
31
|
+
|
32
|
+
|
33
|
+
def json_encoder(
|
34
|
+
obj: Any,
|
35
|
+
custom_serializer: Optional[Callable[[Any], str]] = None,
|
36
|
+
) -> Any:
|
37
|
+
"""
|
38
|
+
JSON encodes an arbitrary Python object into JSON serializable data types.
|
39
|
+
|
40
|
+
This is a modified version of fastapi.encoders.jsonable_encoder that supports
|
41
|
+
encoding of pydantic.BaseModel objects.
|
42
|
+
|
43
|
+
Parameters
|
44
|
+
----------
|
45
|
+
obj : Any
|
46
|
+
The object to encode
|
47
|
+
custom_serializer : Callable, optional
|
48
|
+
A custom serializer to use for encoding the object, when everything else fails.
|
49
|
+
|
50
|
+
Returns
|
51
|
+
-------
|
52
|
+
Any
|
53
|
+
The JSON serializable data types
|
54
|
+
"""
|
55
|
+
# Pydantic models
|
56
|
+
if isinstance(obj, BaseModel):
|
57
|
+
return _dump_base_model(
|
58
|
+
obj=obj,
|
59
|
+
)
|
60
|
+
|
61
|
+
# Dataclasses
|
62
|
+
if dataclasses.is_dataclass(obj):
|
63
|
+
obj_dict = dataclasses.asdict(obj)
|
64
|
+
return json_encoder(
|
65
|
+
obj_dict,
|
66
|
+
)
|
67
|
+
|
68
|
+
# Enums
|
69
|
+
if isinstance(obj, Enum):
|
70
|
+
return obj.value
|
71
|
+
|
72
|
+
# Paths
|
73
|
+
if isinstance(obj, PurePath):
|
74
|
+
return str(obj)
|
75
|
+
|
76
|
+
# Scalars
|
77
|
+
if isinstance(obj, (str, int, float, type(None))):
|
78
|
+
return obj
|
79
|
+
|
80
|
+
# Dictionaries
|
81
|
+
if isinstance(obj, dict):
|
82
|
+
return _dump_dict(
|
83
|
+
obj=obj,
|
84
|
+
)
|
85
|
+
|
86
|
+
# Sequences
|
87
|
+
if isinstance(obj, (list, set, frozenset, GeneratorType, tuple, deque)):
|
88
|
+
return _dump_sequence(
|
89
|
+
obj=obj,
|
90
|
+
)
|
91
|
+
|
92
|
+
# Other types
|
93
|
+
if type(obj) in ENCODERS_BY_TYPE:
|
94
|
+
return ENCODERS_BY_TYPE[type(obj)](obj)
|
95
|
+
|
96
|
+
for encoder, classes_tuple in encoders_by_class_tuples.items():
|
97
|
+
if isinstance(obj, classes_tuple):
|
98
|
+
return encoder(obj)
|
99
|
+
|
100
|
+
# Use custom serializer if present
|
101
|
+
if custom_serializer:
|
102
|
+
return custom_serializer(obj)
|
103
|
+
|
104
|
+
# Default
|
105
|
+
return _dump_other(
|
106
|
+
obj=obj,
|
107
|
+
)
|
108
|
+
|
109
|
+
|
110
|
+
def _dump_base_model(
|
111
|
+
*,
|
112
|
+
obj: Any,
|
113
|
+
):
|
114
|
+
"""
|
115
|
+
Dump a BaseModel object to a dict, using the same parameters as jsonable_encoder
|
116
|
+
"""
|
117
|
+
obj_dict = _model_dump(
|
118
|
+
obj,
|
119
|
+
mode="json",
|
120
|
+
)
|
121
|
+
if "__root__" in obj_dict:
|
122
|
+
obj_dict = obj_dict["__root__"]
|
123
|
+
|
124
|
+
return json_encoder(
|
125
|
+
obj_dict,
|
126
|
+
)
|
127
|
+
|
128
|
+
|
129
|
+
def _dump_dict(
|
130
|
+
*,
|
131
|
+
obj: Any,
|
132
|
+
) -> Dict[str, Any]:
|
133
|
+
"""
|
134
|
+
Dump a dict to a dict, using the same parameters as jsonable_encoder
|
135
|
+
"""
|
136
|
+
encoded_dict = {}
|
137
|
+
allowed_keys = set(obj.keys())
|
138
|
+
for key, value in obj.items():
|
139
|
+
if key in allowed_keys:
|
140
|
+
encoded_key = json_encoder(
|
141
|
+
key,
|
142
|
+
)
|
143
|
+
encoded_value = json_encoder(
|
144
|
+
value,
|
145
|
+
)
|
146
|
+
encoded_dict[encoded_key] = encoded_value
|
147
|
+
return encoded_dict
|
148
|
+
|
149
|
+
|
150
|
+
def _dump_sequence(
|
151
|
+
*,
|
152
|
+
obj: Any,
|
153
|
+
) -> List[Any]:
|
154
|
+
"""
|
155
|
+
Dump a sequence to a list, using the same parameters as jsonable_encoder
|
156
|
+
"""
|
157
|
+
encoded_list = []
|
158
|
+
for item in obj:
|
159
|
+
encoded_list.append(
|
160
|
+
json_encoder(
|
161
|
+
item,
|
162
|
+
),
|
163
|
+
)
|
164
|
+
return encoded_list
|
165
|
+
|
166
|
+
|
167
|
+
def _dump_other(
|
168
|
+
*,
|
169
|
+
obj: Any,
|
170
|
+
) -> Any:
|
171
|
+
"""
|
172
|
+
Dump an object to a hashable object, using the same parameters as jsonable_encoder
|
173
|
+
"""
|
174
|
+
try:
|
175
|
+
data = dict(obj)
|
176
|
+
except Exception:
|
177
|
+
return repr(obj)
|
178
|
+
|
179
|
+
return json_encoder(
|
180
|
+
data,
|
181
|
+
)
|
182
|
+
|
183
|
+
|
184
|
+
def iso_format(o: Union[datetime.date, datetime.time]) -> str:
|
185
|
+
"""
|
186
|
+
ISO format for date and time
|
187
|
+
"""
|
188
|
+
return o.isoformat()
|
189
|
+
|
190
|
+
|
191
|
+
def decimal_encoder(dec_value: Decimal) -> Union[int, float]:
|
192
|
+
"""
|
193
|
+
Encodes a Decimal as int of there's no exponent, otherwise float
|
194
|
+
|
195
|
+
This is useful when we use ConstrainedDecimal to represent Numeric(x,0)
|
196
|
+
where an integer (but not int typed) is used. Encoding this as a float
|
197
|
+
results in failed round-tripping between encode and parse.
|
198
|
+
|
199
|
+
>>> decimal_encoder(Decimal("1.0"))
|
200
|
+
1.0
|
201
|
+
|
202
|
+
>>> decimal_encoder(Decimal("1"))
|
203
|
+
1
|
204
|
+
"""
|
205
|
+
if dec_value.as_tuple().exponent >= 0: # type: ignore[operator]
|
206
|
+
return int(dec_value)
|
207
|
+
else:
|
208
|
+
return float(dec_value)
|
209
|
+
|
210
|
+
|
211
|
+
ENCODERS_BY_TYPE: Dict[Type[Any], Callable[[Any], Any]] = {
|
212
|
+
bytes: lambda o: o.decode(),
|
213
|
+
datetime.date: iso_format,
|
214
|
+
datetime.datetime: iso_format,
|
215
|
+
datetime.time: iso_format,
|
216
|
+
datetime.timedelta: lambda td: td.total_seconds(),
|
217
|
+
Decimal: decimal_encoder,
|
218
|
+
Enum: lambda o: o.value,
|
219
|
+
frozenset: list,
|
220
|
+
deque: list,
|
221
|
+
GeneratorType: list,
|
222
|
+
Path: str,
|
223
|
+
Pattern: lambda o: o.pattern,
|
224
|
+
SecretBytes: str,
|
225
|
+
SecretStr: str,
|
226
|
+
set: list,
|
227
|
+
UUID: str,
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
# Generates a mapping of encoders to a tuple of classes that they can encode
|
232
|
+
def generate_encoders_by_class_tuples(
|
233
|
+
type_encoder_map: Dict[Any, Callable[[Any], Any]],
|
234
|
+
) -> Dict[Callable[[Any], Any], Tuple[Any, ...]]:
|
235
|
+
encoders: Dict[Callable[[Any], Any], Tuple[Any, ...]] = defaultdict(tuple)
|
236
|
+
for type_, encoder in type_encoder_map.items():
|
237
|
+
encoders[encoder] += (type_,)
|
238
|
+
return encoders
|
239
|
+
|
240
|
+
|
241
|
+
# Mapping of encoders to a tuple of classes that they can encode
|
242
|
+
encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
|