judgeval 0.0.53__tar.gz → 0.0.54__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {judgeval-0.0.53 → judgeval-0.0.54}/PKG-INFO +5 -5
- {judgeval-0.0.53 → judgeval-0.0.54}/README.md +4 -4
- {judgeval-0.0.53 → judgeval-0.0.54}/pyproject.toml +1 -1
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/pull_request_template.md +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/workflows/blocked-pr.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/workflows/ci.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/workflows/lint.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/workflows/merge-branch-check.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/workflows/release.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.github/workflows/validate-branch.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.gitignore +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/.pre-commit-config.yaml +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/LICENSE.md +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png" +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/agent.gif +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/data.gif +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/dataset_clustering_screenshot.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/dataset_clustering_screenshot_dm.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/datasets_preview_screenshot.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/document.gif +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/error_analysis_dashboard.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/experiments_dashboard_screenshot.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/experiments_page.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/experiments_pagev2.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/logo-dark.svg +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/logo-light.svg +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/monitoring_screenshot.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/new_darkmode.svg +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/new_lightmode.svg +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/product_shot.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/trace.gif +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/trace_demo.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/trace_screenshot.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/assets/trace_screenshot_old.png +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/pytest.ini +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/.coveragerc +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/clients.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/common/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/common/exceptions.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/common/logger.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/common/s3_storage.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/common/tracer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/common/utils.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/constants.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/datasets/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/datasets/dataset.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/datasets/eval_dataset_client.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/example.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/judgment_types.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/result.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/scorer_data.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/scripts/fix_default_factory.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/scripts/openapi_transform.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/tool.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/trace.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/data/trace_run.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/evaluation_run.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/integrations/langgraph.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judges/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judges/base_judge.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judges/litellm_judge.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judges/mixture_of_judges.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judges/together_judge.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judges/utils.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/judgment_client.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/rules.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/run_evaluation.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/agent_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/api_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/base_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/example_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/exceptions.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/score.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/utils.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/tracer/__init__.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/utils/alerts.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/utils/file_utils.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/utils/requests.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/version_check.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/src/update_types.sh +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/update_version.py +0 -0
- {judgeval-0.0.53 → judgeval-0.0.54}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: judgeval
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.54
|
4
4
|
Summary: Judgeval Package
|
5
5
|
Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
|
6
6
|
Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
|
@@ -151,10 +151,10 @@ You'll see your trace exported to the Judgment Platform:
|
|
151
151
|
|
152
152
|
| | |
|
153
153
|
|:---|:---:|
|
154
|
-
| <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic)
|
155
|
-
| <h3>🧪 Evals</h3>
|
156
|
-
| <h3>📡 Monitoring</h3>
|
157
|
-
| <h3>📊 Datasets</h3>Export
|
154
|
+
| <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic). **Tracks inputs/outputs, agent tool calls, latency, cost, and custom metadata** at every step.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 📋 Collecting agent environment data <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
|
155
|
+
| <h3>🧪 Evals</h3>Build custom evaluators on top of your agents. Judgeval supports LLM-as-a-judge, manual labeling, and code-based evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 A/B testing <br>• 🛡️ Online guardrails | <p align="center"><img src="assets/experiments_page.png" alt="Evaluation metrics" width="800"/></p> |
|
156
|
+
| <h3>📡 Monitoring</h3>Get Slack alerts when you agent failures in production. Add custom hooks to address production regressions.<br><br> **Useful for:** <br>• 📉 Identifying degradation early <br>• 📈 Visualizing performance trends across agent versions and time | <p align="center"><img src="assets/error_analysis_dashboard.png" alt="Monitoring Dashboard" width="1200"/></p> |
|
157
|
+
| <h3>📊 Datasets</h3>Export traces and test cases to datasets for scaled analysis and optimization. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations, enabling continuous learning from production interactions. <br><br> **Useful for:**<br>• 🗃️ Agent environment interaction data for optimization<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
|
158
158
|
|
159
159
|
## 🏢 Self-Hosting
|
160
160
|
|
@@ -121,10 +121,10 @@ You'll see your trace exported to the Judgment Platform:
|
|
121
121
|
|
122
122
|
| | |
|
123
123
|
|:---|:---:|
|
124
|
-
| <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic)
|
125
|
-
| <h3>🧪 Evals</h3>
|
126
|
-
| <h3>📡 Monitoring</h3>
|
127
|
-
| <h3>📊 Datasets</h3>Export
|
124
|
+
| <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic). **Tracks inputs/outputs, agent tool calls, latency, cost, and custom metadata** at every step.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 📋 Collecting agent environment data <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
|
125
|
+
| <h3>🧪 Evals</h3>Build custom evaluators on top of your agents. Judgeval supports LLM-as-a-judge, manual labeling, and code-based evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 A/B testing <br>• 🛡️ Online guardrails | <p align="center"><img src="assets/experiments_page.png" alt="Evaluation metrics" width="800"/></p> |
|
126
|
+
| <h3>📡 Monitoring</h3>Get Slack alerts when you agent failures in production. Add custom hooks to address production regressions.<br><br> **Useful for:** <br>• 📉 Identifying degradation early <br>• 📈 Visualizing performance trends across agent versions and time | <p align="center"><img src="assets/error_analysis_dashboard.png" alt="Monitoring Dashboard" width="1200"/></p> |
|
127
|
+
| <h3>📊 Datasets</h3>Export traces and test cases to datasets for scaled analysis and optimization. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations, enabling continuous learning from production interactions. <br><br> **Useful for:**<br>• 🗃️ Agent environment interaction data for optimization<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
|
128
128
|
|
129
129
|
## 🏢 Self-Hosting
|
130
130
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.0.53 → judgeval-0.0.54}/assets/Screenshot 2025-05-17 at 8.14.27/342/200/257PM.png"
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py
RENAMED
File without changes
|
{judgeval-0.0.53 → judgeval-0.0.54}/src/judgeval/scorers/judgeval_scorers/classifiers/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|