judgeval 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {judgeval-0.3.0 → judgeval-0.3.2}/PKG-INFO +1 -1
- {judgeval-0.3.0 → judgeval-0.3.2}/pyproject.toml +1 -1
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/api/constants.py +1 -1
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/trace_manager.py +5 -1
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/constants.py +2 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/judgment_types.py +1 -2
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/together_judge.py +2 -1
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/run_evaluation.py +1 -1
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/pull_request_template.md +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/ci.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/mypy.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/pre-commit-autoupdate.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/release.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.gitignore +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/.pre-commit-config.yaml +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/LICENSE.md +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/README.md +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/agent.gif +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/agent_trace_example.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/data.gif +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/document.gif +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/errors.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/experiments_page.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/logo-dark.svg +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/logo-light.svg +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/new_darkmode.svg +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/new_lightmode.svg +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/online_eval.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/product_shot.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/test.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/tests.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace.gif +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace_demo.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace_screenshot.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/pytest.ini +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/.coveragerc +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/clients.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/api/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/api/api.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/exceptions.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/logger.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/storage/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/storage/s3_storage.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/constants.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/core.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/otel_exporter.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/otel_span_processor.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/span_processor.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/tracer/span_transformer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/common/utils.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/example.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/result.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/tool.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/trace.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/data/trace_run.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/dataset.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/evaluation_run.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/integrations/langgraph.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/mixture_of_judges.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/judgment_client.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/rules.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/score.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/utils/alerts.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/utils/requests.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/version_check.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/src/update_types.sh +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/update_version.py +0 -0
- {judgeval-0.3.0 → judgeval-0.3.2}/uv.lock +0 -0
@@ -142,7 +142,7 @@ class DatasetStatsPayload(TypedDict):
|
|
142
142
|
|
143
143
|
|
144
144
|
# Projects API
|
145
|
-
JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval"
|
145
|
+
JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete_from_judgeval/"
|
146
146
|
JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
|
147
147
|
|
148
148
|
|
@@ -54,7 +54,6 @@ class TraceManagerClient:
|
|
54
54
|
Returns:
|
55
55
|
dict: Server response containing UI URL and other metadata
|
56
56
|
"""
|
57
|
-
server_response = self.api_client.upsert_trace(trace_data)
|
58
57
|
|
59
58
|
if self.tracer and self.tracer.use_s3 and final_save:
|
60
59
|
try:
|
@@ -67,6 +66,11 @@ class TraceManagerClient:
|
|
67
66
|
except Exception as e:
|
68
67
|
judgeval_logger.warning(f"Failed to save trace to S3: {str(e)}")
|
69
68
|
|
69
|
+
trace_data.pop("trace_spans", None)
|
70
|
+
trace_data.pop("evaluation_runs", None)
|
71
|
+
|
72
|
+
server_response = self.api_client.upsert_trace(trace_data)
|
73
|
+
|
70
74
|
if not offline_mode and show_link and "ui_results_url" in server_response:
|
71
75
|
pretty_str = f"\n🔍 You can view your trace data here: [rgb(106,0,255)][link={server_response['ui_results_url']}]View Trace[/link]\n"
|
72
76
|
rprint(pretty_str)
|
@@ -104,6 +104,8 @@ TOGETHER_SUPPORTED_MODELS = [
|
|
104
104
|
"mistralai/Mistral-7B-Instruct-v0.1",
|
105
105
|
]
|
106
106
|
|
107
|
+
DEFAULT_TOGETHER_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct-Lite"
|
108
|
+
|
107
109
|
JUDGMENT_SUPPORTED_MODELS = {"osiris-large", "osiris-mini", "osiris"}
|
108
110
|
|
109
111
|
ACCEPTABLE_MODELS = (
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# generated by datamodel-codegen:
|
2
2
|
# filename: openapi_new.json
|
3
|
-
# timestamp: 2025-07-
|
3
|
+
# timestamp: 2025-07-29T18:13:07+00:00
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
@@ -135,7 +135,6 @@ class TraceJudgmentType(BaseModel):
|
|
135
135
|
created_at: Annotated[str, Field(title="Created At")]
|
136
136
|
duration: Annotated[float, Field(title="Duration")]
|
137
137
|
trace_spans: Annotated[List[TraceSpanJudgmentType], Field(title="Trace Spans")]
|
138
|
-
overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
|
139
138
|
offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
|
140
139
|
rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = Field(
|
141
140
|
default_factory=dict
|
@@ -11,6 +11,7 @@ from judgeval.common.utils import (
|
|
11
11
|
afetch_together_api_response,
|
12
12
|
)
|
13
13
|
from judgeval.common.logger import judgeval_logger
|
14
|
+
from judgeval.constants import DEFAULT_TOGETHER_MODEL
|
14
15
|
|
15
16
|
BASE_CONVERSATION = [
|
16
17
|
{"role": "system", "content": "You are a helpful assistant."},
|
@@ -18,7 +19,7 @@ BASE_CONVERSATION = [
|
|
18
19
|
|
19
20
|
|
20
21
|
class TogetherJudge(JudgevalJudge):
|
21
|
-
def __init__(self, model: str =
|
22
|
+
def __init__(self, model: str = DEFAULT_TOGETHER_MODEL, **kwargs):
|
22
23
|
self.model = model
|
23
24
|
self.kwargs = kwargs
|
24
25
|
super().__init__(model_name=model)
|
@@ -414,7 +414,7 @@ def _poll_evaluation_until_complete(
|
|
414
414
|
expected_scorer_data_count: int,
|
415
415
|
poll_interval_seconds: float = 5,
|
416
416
|
max_failures: int = 5,
|
417
|
-
max_poll_count: int =
|
417
|
+
max_poll_count: int = 60, # This should be equivalent to 5 minutes
|
418
418
|
) -> Tuple[List[ScoringResult], str]:
|
419
419
|
"""
|
420
420
|
Polls until the evaluation is complete and returns the results.
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.3.0 → judgeval-0.3.2}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png"
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py
RENAMED
File without changes
|
{judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py
RENAMED
File without changes
|
File without changes
|
{judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py
RENAMED
File without changes
|
File without changes
|
{judgeval-0.3.0 → judgeval-0.3.2}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|