judgeval 0.16.9__tar.gz → 0.18.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/ci.yaml +1 -1
- {judgeval-0.16.9 → judgeval-0.18.0}/PKG-INFO +2 -3
- {judgeval-0.16.9 → judgeval-0.18.0}/README.md +1 -2
- {judgeval-0.16.9 → judgeval-0.18.0}/pyproject.toml +1 -1
- {judgeval-0.16.9 → judgeval-0.18.0}/scripts/api_generator.py +5 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/scripts/openapi_transform.py +5 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/__init__.py +29 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/api/__init__.py +108 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/api/api_types.py +56 -1
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/cli.py +7 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/judgment_types.py +56 -1
- judgeval-0.18.0/src/judgeval/prompts/prompt.py +320 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -12
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/__init__.py +71 -33
- judgeval-0.18.0/src/judgeval/tracer/exporters/store.py +59 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/keys.py +1 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/messages.py +4 -4
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py +2 -2
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/generate_content.py +1 -1
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py +2 -2
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/chat_completions.py +4 -4
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/responses.py +4 -4
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/chat_completions.py +4 -4
- judgeval-0.18.0/src/judgeval/trainer/__init__.py +14 -0
- judgeval-0.18.0/src/judgeval/trainer/base_trainer.py +122 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/trainer/config.py +1 -1
- judgeval-0.16.9/src/judgeval/trainer/trainer.py → judgeval-0.18.0/src/judgeval/trainer/fireworks_trainer.py +35 -44
- judgeval-0.18.0/src/judgeval/trainer/trainer.py +70 -0
- judgeval-0.18.0/src/judgeval/utils/project.py +15 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/version.py +1 -1
- judgeval-0.16.9/src/judgeval/tracer/exporters/store.py +0 -43
- judgeval-0.16.9/src/judgeval/trainer/__init__.py +0 -5
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/pull_request_template.md +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/claude-code-review.yml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/claude.yml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/release.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.gitignore +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/.pre-commit-config.yaml +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/CONTRIBUTING.md +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/LICENSE.md +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/agent.gif +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/agent_trace_example.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/company.jpg +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/company_banner.jpg +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/darkmode.svg +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/full_logo.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/icon.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/lightmode.svg +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/brand/white_background.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/custom_scorer_online_abm.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/data.gif +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/document.gif +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/errors.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/experiments_page.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/logo_darkmode.svg +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/logo_lightmode.svg +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/online_eval.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/product_shot.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/quickstart_trajectory_ss.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/test.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/tests.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace.gif +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace_demo.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace_screenshot.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/pytest.ini +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/scripts/update_types.sh +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/constants.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/evaluation_run.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/example.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/result.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/data/trace.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/dataset/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/env.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/evaluation/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/exceptions.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/integrations/langgraph/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/integrations/openlit/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/logger.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/score.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/constants.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/exporters/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/exporters/s3.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/exporters/utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/config.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/constants.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/config.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_anthropic/wrapper.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/config.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_google/wrapper.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/config.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_openai/wrapper.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/config.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/llm_together/wrapper.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/llm/providers.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/managers.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/processors/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/tracer/utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/trainer/console.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/trainer/trainable_model.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/async_utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/decorators/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/decorators/dont_throw.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/decorators/use_once.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/guards.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/meta.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/serialize.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/testing.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/url.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/version_check.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/README.md +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/__init__.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_async.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_async_iterator.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_sync.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/mutable_wrap_async.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/mutable_wrap_sync.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/py.typed +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/utils/wrappers/utils.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/src/judgeval/warnings.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/update_version.py +0 -0
- {judgeval-0.16.9 → judgeval-0.18.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: judgeval
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.18.0
|
|
4
4
|
Summary: Judgeval Package
|
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
|
@@ -63,8 +63,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
|
|
|
63
63
|
await trainer.train(
|
|
64
64
|
agent_function=your_agent_function, # entry point to your agent
|
|
65
65
|
scorers=[RewardScorer()], # Custom scorer you define based on task criteria, acts as reward
|
|
66
|
-
prompts=training_prompts
|
|
67
|
-
rft_provider="fireworks"
|
|
66
|
+
prompts=training_prompts # Tasks
|
|
68
67
|
)
|
|
69
68
|
```
|
|
70
69
|
|
|
@@ -36,8 +36,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
|
|
|
36
36
|
await trainer.train(
|
|
37
37
|
agent_function=your_agent_function, # entry point to your agent
|
|
38
38
|
scorers=[RewardScorer()], # Custom scorer you define based on task criteria, acts as reward
|
|
39
|
-
prompts=training_prompts
|
|
40
|
-
rft_provider="fireworks"
|
|
39
|
+
prompts=training_prompts # Tasks
|
|
41
40
|
)
|
|
42
41
|
```
|
|
43
42
|
|
|
@@ -43,6 +43,11 @@ JUDGEVAL_PATHS: List[str] = [
|
|
|
43
43
|
"/e2e_fetch_trace/",
|
|
44
44
|
"/e2e_fetch_span_score/",
|
|
45
45
|
"/e2e_fetch_trace_scorer_span_score/",
|
|
46
|
+
"/prompts/insert/",
|
|
47
|
+
"/prompts/fetch/",
|
|
48
|
+
"/prompts/tag/",
|
|
49
|
+
"/prompts/untag/",
|
|
50
|
+
"/prompts/get_prompt_versions/",
|
|
46
51
|
]
|
|
47
52
|
|
|
48
53
|
|
|
@@ -6,6 +6,7 @@ from judgeval.data.evaluation_run import ExampleEvaluationRun
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
from typing import List, Optional, Union, Sequence
|
|
9
|
+
import ast
|
|
9
10
|
from judgeval.scorers import ExampleAPIScorerConfig
|
|
10
11
|
from judgeval.scorers.example_scorer import ExampleScorer
|
|
11
12
|
from judgeval.data.example import Example
|
|
@@ -81,6 +82,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
81
82
|
scorer_file_path: str,
|
|
82
83
|
requirements_file_path: Optional[str] = None,
|
|
83
84
|
unique_name: Optional[str] = None,
|
|
85
|
+
overwrite: bool = False,
|
|
84
86
|
) -> bool:
|
|
85
87
|
"""
|
|
86
88
|
Upload custom ExampleScorer from files to backend.
|
|
@@ -89,6 +91,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
89
91
|
scorer_file_path: Path to Python file containing CustomScorer class
|
|
90
92
|
requirements_file_path: Optional path to requirements.txt
|
|
91
93
|
unique_name: Optional unique identifier (auto-detected from scorer.name if not provided)
|
|
94
|
+
overwrite: Whether to overwrite existing scorer if it already exists
|
|
92
95
|
|
|
93
96
|
Returns:
|
|
94
97
|
bool: True if upload successful
|
|
@@ -111,6 +114,31 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
111
114
|
with open(scorer_file_path, "r") as f:
|
|
112
115
|
scorer_code = f.read()
|
|
113
116
|
|
|
117
|
+
try:
|
|
118
|
+
tree = ast.parse(scorer_code, filename=scorer_file_path)
|
|
119
|
+
except SyntaxError as e:
|
|
120
|
+
error_msg = f"Invalid Python syntax in {scorer_file_path}: {e}"
|
|
121
|
+
judgeval_logger.error(error_msg)
|
|
122
|
+
raise ValueError(error_msg)
|
|
123
|
+
|
|
124
|
+
scorer_classes = []
|
|
125
|
+
for node in ast.walk(tree):
|
|
126
|
+
if isinstance(node, ast.ClassDef):
|
|
127
|
+
for base in node.bases:
|
|
128
|
+
if (isinstance(base, ast.Name) and base.id == "ExampleScorer") or (
|
|
129
|
+
isinstance(base, ast.Attribute) and base.attr == "ExampleScorer"
|
|
130
|
+
):
|
|
131
|
+
scorer_classes.append(node.name)
|
|
132
|
+
|
|
133
|
+
if len(scorer_classes) > 1:
|
|
134
|
+
error_msg = f"Multiple ExampleScorer classes found in {scorer_file_path}: {scorer_classes}. Please only upload one scorer class per file."
|
|
135
|
+
judgeval_logger.error(error_msg)
|
|
136
|
+
raise ValueError(error_msg)
|
|
137
|
+
elif len(scorer_classes) == 0:
|
|
138
|
+
error_msg = f"No ExampleScorer class was found in {scorer_file_path}. Please ensure the file contains a valid scorer class that inherits from ExampleScorer."
|
|
139
|
+
judgeval_logger.error(error_msg)
|
|
140
|
+
raise ValueError(error_msg)
|
|
141
|
+
|
|
114
142
|
# Read requirements (optional)
|
|
115
143
|
requirements_text = ""
|
|
116
144
|
if requirements_file_path and os.path.exists(requirements_file_path):
|
|
@@ -127,6 +155,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
127
155
|
"scorer_name": unique_name,
|
|
128
156
|
"scorer_code": scorer_code,
|
|
129
157
|
"requirements_text": requirements_text,
|
|
158
|
+
"overwrite": overwrite,
|
|
130
159
|
}
|
|
131
160
|
)
|
|
132
161
|
|
|
@@ -189,6 +189,59 @@ class JudgmentSyncClient:
|
|
|
189
189
|
payload,
|
|
190
190
|
)
|
|
191
191
|
|
|
192
|
+
def prompts_insert(self, payload: PromptInsertRequest) -> PromptInsertResponse:
|
|
193
|
+
return self._request(
|
|
194
|
+
"POST",
|
|
195
|
+
url_for("/prompts/insert/"),
|
|
196
|
+
payload,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
|
|
200
|
+
return self._request(
|
|
201
|
+
"POST",
|
|
202
|
+
url_for("/prompts/tag/"),
|
|
203
|
+
payload,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
|
|
207
|
+
return self._request(
|
|
208
|
+
"POST",
|
|
209
|
+
url_for("/prompts/untag/"),
|
|
210
|
+
payload,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def prompts_fetch(
|
|
214
|
+
self,
|
|
215
|
+
project_id: str,
|
|
216
|
+
name: str,
|
|
217
|
+
commit_id: Optional[str] = None,
|
|
218
|
+
tag: Optional[str] = None,
|
|
219
|
+
) -> PromptFetchResponse:
|
|
220
|
+
query_params = {}
|
|
221
|
+
query_params["project_id"] = project_id
|
|
222
|
+
query_params["name"] = name
|
|
223
|
+
if commit_id is not None:
|
|
224
|
+
query_params["commit_id"] = commit_id
|
|
225
|
+
if tag is not None:
|
|
226
|
+
query_params["tag"] = tag
|
|
227
|
+
return self._request(
|
|
228
|
+
"GET",
|
|
229
|
+
url_for("/prompts/fetch/"),
|
|
230
|
+
query_params,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def prompts_get_prompt_versions(
|
|
234
|
+
self, project_id: str, name: str
|
|
235
|
+
) -> PromptVersionsResponse:
|
|
236
|
+
query_params = {}
|
|
237
|
+
query_params["project_id"] = project_id
|
|
238
|
+
query_params["name"] = name
|
|
239
|
+
return self._request(
|
|
240
|
+
"GET",
|
|
241
|
+
url_for("/prompts/get_prompt_versions/"),
|
|
242
|
+
query_params,
|
|
243
|
+
)
|
|
244
|
+
|
|
192
245
|
def projects_resolve(
|
|
193
246
|
self, payload: ResolveProjectNameRequest
|
|
194
247
|
) -> ResolveProjectNameResponse:
|
|
@@ -381,6 +434,61 @@ class JudgmentAsyncClient:
|
|
|
381
434
|
payload,
|
|
382
435
|
)
|
|
383
436
|
|
|
437
|
+
async def prompts_insert(
|
|
438
|
+
self, payload: PromptInsertRequest
|
|
439
|
+
) -> PromptInsertResponse:
|
|
440
|
+
return await self._request(
|
|
441
|
+
"POST",
|
|
442
|
+
url_for("/prompts/insert/"),
|
|
443
|
+
payload,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
async def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
|
|
447
|
+
return await self._request(
|
|
448
|
+
"POST",
|
|
449
|
+
url_for("/prompts/tag/"),
|
|
450
|
+
payload,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
async def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
|
|
454
|
+
return await self._request(
|
|
455
|
+
"POST",
|
|
456
|
+
url_for("/prompts/untag/"),
|
|
457
|
+
payload,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
async def prompts_fetch(
|
|
461
|
+
self,
|
|
462
|
+
project_id: str,
|
|
463
|
+
name: str,
|
|
464
|
+
commit_id: Optional[str] = None,
|
|
465
|
+
tag: Optional[str] = None,
|
|
466
|
+
) -> PromptFetchResponse:
|
|
467
|
+
query_params = {}
|
|
468
|
+
query_params["project_id"] = project_id
|
|
469
|
+
query_params["name"] = name
|
|
470
|
+
if commit_id is not None:
|
|
471
|
+
query_params["commit_id"] = commit_id
|
|
472
|
+
if tag is not None:
|
|
473
|
+
query_params["tag"] = tag
|
|
474
|
+
return await self._request(
|
|
475
|
+
"GET",
|
|
476
|
+
url_for("/prompts/fetch/"),
|
|
477
|
+
query_params,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
async def prompts_get_prompt_versions(
|
|
481
|
+
self, project_id: str, name: str
|
|
482
|
+
) -> PromptVersionsResponse:
|
|
483
|
+
query_params = {}
|
|
484
|
+
query_params["project_id"] = project_id
|
|
485
|
+
query_params["name"] = name
|
|
486
|
+
return await self._request(
|
|
487
|
+
"GET",
|
|
488
|
+
url_for("/prompts/get_prompt_versions/"),
|
|
489
|
+
query_params,
|
|
490
|
+
)
|
|
491
|
+
|
|
384
492
|
async def projects_resolve(
|
|
385
493
|
self, payload: ResolveProjectNameRequest
|
|
386
494
|
) -> ResolveProjectNameResponse:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-21T01:37:42+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
|
@@ -80,6 +80,7 @@ class CustomScorerUploadPayload(TypedDict):
|
|
|
80
80
|
scorer_name: str
|
|
81
81
|
scorer_code: str
|
|
82
82
|
requirements_text: str
|
|
83
|
+
overwrite: NotRequired[bool]
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
class CustomScorerTemplateResponse(TypedDict):
|
|
@@ -88,6 +89,40 @@ class CustomScorerTemplateResponse(TypedDict):
|
|
|
88
89
|
message: str
|
|
89
90
|
|
|
90
91
|
|
|
92
|
+
class PromptInsertRequest(TypedDict):
|
|
93
|
+
project_id: str
|
|
94
|
+
name: str
|
|
95
|
+
prompt: str
|
|
96
|
+
tags: List[str]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class PromptInsertResponse(TypedDict):
|
|
100
|
+
commit_id: str
|
|
101
|
+
parent_commit_id: NotRequired[Optional[str]]
|
|
102
|
+
created_at: str
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class PromptTagRequest(TypedDict):
|
|
106
|
+
project_id: str
|
|
107
|
+
name: str
|
|
108
|
+
commit_id: str
|
|
109
|
+
tags: List[str]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class PromptTagResponse(TypedDict):
|
|
113
|
+
commit_id: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class PromptUntagRequest(TypedDict):
|
|
117
|
+
project_id: str
|
|
118
|
+
name: str
|
|
119
|
+
tags: List[str]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class PromptUntagResponse(TypedDict):
|
|
123
|
+
commit_ids: List[str]
|
|
124
|
+
|
|
125
|
+
|
|
91
126
|
class ResolveProjectNameRequest(TypedDict):
|
|
92
127
|
project_name: str
|
|
93
128
|
|
|
@@ -169,6 +204,18 @@ class PromptScorer(TypedDict):
|
|
|
169
204
|
is_trace: NotRequired[Optional[bool]]
|
|
170
205
|
|
|
171
206
|
|
|
207
|
+
class PromptCommitInfo(TypedDict):
|
|
208
|
+
name: str
|
|
209
|
+
prompt: str
|
|
210
|
+
tags: List[str]
|
|
211
|
+
commit_id: str
|
|
212
|
+
parent_commit_id: NotRequired[Optional[str]]
|
|
213
|
+
created_at: str
|
|
214
|
+
first_name: str
|
|
215
|
+
last_name: str
|
|
216
|
+
user_email: str
|
|
217
|
+
|
|
218
|
+
|
|
172
219
|
class ScorerData(TypedDict):
|
|
173
220
|
id: NotRequired[str]
|
|
174
221
|
name: str
|
|
@@ -265,6 +312,14 @@ class FetchPromptScorersResponse(TypedDict):
|
|
|
265
312
|
scorers: List[PromptScorer]
|
|
266
313
|
|
|
267
314
|
|
|
315
|
+
class PromptFetchResponse(TypedDict):
|
|
316
|
+
commit: NotRequired[Optional[PromptCommitInfo]]
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class PromptVersionsResponse(TypedDict):
|
|
320
|
+
versions: List[PromptCommitInfo]
|
|
321
|
+
|
|
322
|
+
|
|
268
323
|
class ScoringResult(TypedDict):
|
|
269
324
|
success: bool
|
|
270
325
|
scorers_data: List[ScorerData]
|
|
@@ -26,6 +26,12 @@ def upload_scorer(
|
|
|
26
26
|
unique_name: str = typer.Option(
|
|
27
27
|
None, help="Custom name for the scorer (auto-detected if not provided)"
|
|
28
28
|
),
|
|
29
|
+
overwrite: bool = typer.Option(
|
|
30
|
+
False,
|
|
31
|
+
"--overwrite",
|
|
32
|
+
"-o",
|
|
33
|
+
help="Overwrite existing scorer if it already exists",
|
|
34
|
+
),
|
|
29
35
|
):
|
|
30
36
|
# Validate file paths
|
|
31
37
|
if not Path(scorer_file_path).exists():
|
|
@@ -43,6 +49,7 @@ def upload_scorer(
|
|
|
43
49
|
scorer_file_path=scorer_file_path,
|
|
44
50
|
requirements_file_path=requirements_file_path,
|
|
45
51
|
unique_name=unique_name,
|
|
52
|
+
overwrite=overwrite,
|
|
46
53
|
)
|
|
47
54
|
|
|
48
55
|
if not result:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-21T01:37:41+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Annotated, Any, Dict, List, Optional, Union
|
|
@@ -87,6 +87,7 @@ class CustomScorerUploadPayload(BaseModel):
|
|
|
87
87
|
scorer_name: Annotated[str, Field(title="Scorer Name")]
|
|
88
88
|
scorer_code: Annotated[str, Field(title="Scorer Code")]
|
|
89
89
|
requirements_text: Annotated[str, Field(title="Requirements Text")]
|
|
90
|
+
overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
|
|
90
91
|
|
|
91
92
|
|
|
92
93
|
class CustomScorerTemplateResponse(BaseModel):
|
|
@@ -95,6 +96,40 @@ class CustomScorerTemplateResponse(BaseModel):
|
|
|
95
96
|
message: Annotated[str, Field(title="Message")]
|
|
96
97
|
|
|
97
98
|
|
|
99
|
+
class PromptInsertRequest(BaseModel):
|
|
100
|
+
project_id: Annotated[str, Field(title="Project Id")]
|
|
101
|
+
name: Annotated[str, Field(title="Name")]
|
|
102
|
+
prompt: Annotated[str, Field(title="Prompt")]
|
|
103
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PromptInsertResponse(BaseModel):
|
|
107
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
108
|
+
parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
|
|
109
|
+
created_at: Annotated[str, Field(title="Created At")]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class PromptTagRequest(BaseModel):
|
|
113
|
+
project_id: Annotated[str, Field(title="Project Id")]
|
|
114
|
+
name: Annotated[str, Field(title="Name")]
|
|
115
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
116
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class PromptTagResponse(BaseModel):
|
|
120
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class PromptUntagRequest(BaseModel):
|
|
124
|
+
project_id: Annotated[str, Field(title="Project Id")]
|
|
125
|
+
name: Annotated[str, Field(title="Name")]
|
|
126
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class PromptUntagResponse(BaseModel):
|
|
130
|
+
commit_ids: Annotated[List[str], Field(title="Commit Ids")]
|
|
131
|
+
|
|
132
|
+
|
|
98
133
|
class ResolveProjectNameRequest(BaseModel):
|
|
99
134
|
project_name: Annotated[str, Field(title="Project Name")]
|
|
100
135
|
|
|
@@ -187,6 +222,18 @@ class PromptScorer(BaseModel):
|
|
|
187
222
|
is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
|
|
188
223
|
|
|
189
224
|
|
|
225
|
+
class PromptCommitInfo(BaseModel):
|
|
226
|
+
name: Annotated[str, Field(title="Name")]
|
|
227
|
+
prompt: Annotated[str, Field(title="Prompt")]
|
|
228
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
229
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
230
|
+
parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
|
|
231
|
+
created_at: Annotated[str, Field(title="Created At")]
|
|
232
|
+
first_name: Annotated[str, Field(title="First Name")]
|
|
233
|
+
last_name: Annotated[str, Field(title="Last Name")]
|
|
234
|
+
user_email: Annotated[str, Field(title="User Email")]
|
|
235
|
+
|
|
236
|
+
|
|
190
237
|
class ScorerData(BaseModel):
|
|
191
238
|
id: Annotated[Optional[str], Field(title="Id")] = None
|
|
192
239
|
name: Annotated[str, Field(title="Name")]
|
|
@@ -299,6 +346,14 @@ class FetchPromptScorersResponse(BaseModel):
|
|
|
299
346
|
scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
|
|
300
347
|
|
|
301
348
|
|
|
349
|
+
class PromptFetchResponse(BaseModel):
|
|
350
|
+
commit: Optional[PromptCommitInfo] = None
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class PromptVersionsResponse(BaseModel):
|
|
354
|
+
versions: Annotated[List[PromptCommitInfo], Field(title="Versions")]
|
|
355
|
+
|
|
356
|
+
|
|
302
357
|
class ScoringResult(BaseModel):
|
|
303
358
|
success: Annotated[bool, Field(title="Success")]
|
|
304
359
|
scorers_data: Annotated[List[ScorerData], Field(title="Scorers Data")]
|