judgeval 0.0.44__py3-none-any.whl → 0.0.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. judgeval/__init__.py +5 -4
  2. judgeval/clients.py +6 -6
  3. judgeval/common/__init__.py +7 -2
  4. judgeval/common/exceptions.py +2 -3
  5. judgeval/common/logger.py +74 -49
  6. judgeval/common/s3_storage.py +30 -23
  7. judgeval/common/tracer.py +1273 -939
  8. judgeval/common/utils.py +416 -244
  9. judgeval/constants.py +73 -61
  10. judgeval/data/__init__.py +1 -1
  11. judgeval/data/custom_example.py +3 -2
  12. judgeval/data/datasets/dataset.py +80 -54
  13. judgeval/data/datasets/eval_dataset_client.py +131 -181
  14. judgeval/data/example.py +67 -43
  15. judgeval/data/result.py +11 -9
  16. judgeval/data/scorer_data.py +4 -2
  17. judgeval/data/tool.py +25 -16
  18. judgeval/data/trace.py +57 -29
  19. judgeval/data/trace_run.py +5 -11
  20. judgeval/evaluation_run.py +22 -82
  21. judgeval/integrations/langgraph.py +546 -184
  22. judgeval/judges/base_judge.py +1 -2
  23. judgeval/judges/litellm_judge.py +33 -11
  24. judgeval/judges/mixture_of_judges.py +128 -78
  25. judgeval/judges/together_judge.py +22 -9
  26. judgeval/judges/utils.py +14 -5
  27. judgeval/judgment_client.py +259 -271
  28. judgeval/rules.py +169 -142
  29. judgeval/run_evaluation.py +462 -305
  30. judgeval/scorers/api_scorer.py +20 -11
  31. judgeval/scorers/exceptions.py +1 -0
  32. judgeval/scorers/judgeval_scorer.py +77 -58
  33. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +46 -15
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +3 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +3 -2
  36. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +12 -11
  37. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +7 -5
  38. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +3 -2
  39. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +3 -2
  40. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +5 -2
  41. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +2 -1
  42. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +17 -8
  43. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +3 -2
  44. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +3 -2
  45. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +3 -2
  46. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +3 -2
  47. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +8 -9
  48. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +4 -4
  49. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +5 -5
  50. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +5 -2
  51. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +9 -10
  52. judgeval/scorers/prompt_scorer.py +48 -37
  53. judgeval/scorers/score.py +86 -53
  54. judgeval/scorers/utils.py +11 -7
  55. judgeval/tracer/__init__.py +1 -1
  56. judgeval/utils/alerts.py +23 -12
  57. judgeval/utils/{data_utils.py → file_utils.py} +5 -9
  58. judgeval/utils/requests.py +29 -0
  59. judgeval/version_check.py +5 -2
  60. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/METADATA +79 -135
  61. judgeval-0.0.46.dist-info/RECORD +69 -0
  62. judgeval-0.0.44.dist-info/RECORD +0 -68
  63. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/WHEEL +0 -0
  64. {judgeval-0.0.44.dist-info → judgeval-0.0.46.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.0.44
3
+ Version: 0.0.46
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -34,57 +34,60 @@ Description-Content-Type: text/markdown
34
34
 
35
35
  <br>
36
36
  <div style="font-size: 1.5em;">
37
- Open source tracing, evals, and metrics to debug, test, and monitor LLM agents.
37
+ Enable self-learning agents with traces, evals, and environment data.
38
38
  </div>
39
39
 
40
- ## [Judgment Cloud](https://app.judgmentlabs.ai/register) • [Self-Host](https://docs.judgmentlabs.ai/self-hosting/get_started) • [Demo](https://www.youtube.com/watch?v=1S4LixpVbcc)
40
+ ## [Docs](https://docs.judgmentlabs.ai/) • [Judgment Cloud](https://app.judgmentlabs.ai/register) • [Self-Host](https://docs.judgmentlabs.ai/self-hosting/get_started)
41
41
 
42
- [Docs](https://docs.judgmentlabs.ai/introduction) • [Bug Reports](https://github.com/JudgmentLabs/judgeval/issues) • [Changelog](https://docs.judgmentlabs.ai/changelog/2025-04-21)
42
+ [Demo](https://www.youtube.com/watch?v=1S4LixpVbcc) • [Bug Reports](https://github.com/JudgmentLabs/judgeval/issues) • [Changelog](https://docs.judgmentlabs.ai/changelog/2025-04-21)
43
43
 
44
- We're hiring! Join us in our mission to unleash optimized agents.
44
+ We're hiring! Join us in our mission to enable self-learning agents by providing the data and signals needed for continuous improvement.
45
45
 
46
46
  [![X](https://img.shields.io/badge/-X/Twitter-000?logo=x&logoColor=white)](https://x.com/JudgmentLabs)
47
47
  [![LinkedIn](https://custom-icon-badges.demolab.com/badge/LinkedIn%20-0A66C2?logo=linkedin-white&logoColor=fff)](https://www.linkedin.com/company/judgmentlabs)
48
- [![Discord](https://img.shields.io/badge/-Discord-5865F2?logo=discord&logoColor=white)](https://discord.gg/ZCnSXYug)
48
+ [![Discord](https://img.shields.io/badge/-Discord-5865F2?logo=discord&logoColor=white)](https://discord.gg/tGVFf8UBUY)
49
49
 
50
- <img src="assets/experiments_pagev2.png" alt="Judgment Platform Experiments Page" width="800" />
50
+ <img src="assets/product_shot.png" alt="Judgment Platform" width="800" />
51
51
 
52
52
  </div>
53
53
 
54
-
55
- Judgeval offers **robust open-source tooling** for tracing, evaluating, and monitoring LLM agents. It helps AI teams effectively **test and monitor** agents in development and production, **closing the agent feedback loop**.
56
-
57
- Judgeval can be set up **(cloud-hosted or self-hosted) in 5 minutes**!
58
- > 🎁 Generous monthly [free tier](https://judgmentlabs.ai/pricing) (10k traces, 1k evals) - No credit card required!
59
-
60
- Judgeval is created and maintained by [Judgment Labs](https://judgmentlabs.ai/).
54
+ Judgeval offers **open-source tooling** for tracing, evaluating, and monitoring LLM agents. **Provides comprehensive data from agent-environment interactions** for continuous learning and self-improvement—**enabling the future of autonomous agents**.
55
+
56
+ ## 🎬 See Judgeval in Action
57
+
58
+ **[Multi-Agent System](https://github.com/JudgmentLabs/judgment-cookbook/tree/main/cookbooks/agents/multi-agent) with complete observability:** (1) A multi-agent system spawns agents to research topics on the internet. (2) With just **3 lines of code**, Judgeval traces every input/output + environment response across all agent tool calls for debugging. (3) After completion, (4) export all interaction data to enable further environment-specific learning and optimization.
59
+
60
+ <table style="width: 100%; max-width: 800px; table-layout: fixed;">
61
+ <tr>
62
+ <td align="center" style="padding: 8px; width: 50%;">
63
+ <img src="assets/agent.gif" alt="Agent Demo" style="width: 100%; max-width: 350px; height: auto;" />
64
+ <br><strong>🤖 Agents Running</strong>
65
+ </td>
66
+ <td align="center" style="padding: 8px; width: 50%;">
67
+ <img src="assets/trace.gif" alt="Trace Demo" style="width: 100%; max-width: 350px; height: auto;" />
68
+ <br><strong>📊 Real-time Tracing</strong>
69
+ </td>
70
+ </tr>
71
+ <tr>
72
+ <td align="center" style="padding: 8px; width: 50%;">
73
+ <img src="assets/document.gif" alt="Agent Completed Demo" style="width: 100%; max-width: 350px; height: auto;" />
74
+ <br><strong>✅ Agents Completed Running</strong>
75
+ </td>
76
+ <td align="center" style="padding: 8px; width: 50%;">
77
+ <img src="assets/data.gif" alt="Data Export Demo" style="width: 100%; max-width: 350px; height: auto;" />
78
+ <br><strong>📤 Exporting Agent Environment Data</strong>
79
+ </td>
80
+ </tr>
81
+
82
+ </table>
61
83
 
62
84
  ## 📋 Table of Contents
63
- - [✨ Features](#-features)
64
85
  - [🛠️ Installation](#️-installation)
65
86
  - [🏁 Quickstarts](#-quickstarts)
66
- - [🛰️ Tracing](#️-tracing)
67
- - [📝 Offline Evaluations](#-offline-evaluations)
68
- - [📡 Online Evaluations](#-online-evaluations)
87
+ - [ Features](#-features)
69
88
  - [🏢 Self-Hosting](#-self-hosting)
70
- - [Key Features](#key-features)
71
- - [Getting Started](#getting-started)
72
89
  - [📚 Cookbooks](#-cookbooks)
73
90
  - [💻 Development with Cursor](#-development-with-cursor)
74
- - [⭐ Star Us on GitHub](#-star-us-on-github)
75
- - [❤️ Contributors](#️-contributors)
76
-
77
- <!-- Created by https://github.com/ekalinin/github-markdown-toc -->
78
-
79
-
80
- ## ✨ Features
81
-
82
- | | |
83
- |:---|:---:|
84
- | <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic): **tracking inputs/outputs, agent tool calls, latency, and cost** at every step.<br><br>Online evals can be applied to traces to measure quality on production data in real-time.<br><br>Export trace data to the Judgment Platform or your own S3 buckets, {Parquet, JSON, YAML} files, or data warehouse.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 👤 Tracking user activity <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
85
- | <h3>🧪 Evals</h3>Evals are the key to regression testing for agents. Judgeval provides 15+ research-backed metrics including tool call accuracy, hallucinations, instruction adherence, and retrieval context recall.<br><br>Judgeval supports LLM-as-a-judge, manual labeling, and custom evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 Experimental prompt testing<br>• 🛡️ Online guardrails | <p align="center"><img src="assets/experiments_page.png" alt="Evaluation metrics" width="800"/></p> |
86
- | <h3>📡 Monitoring</h3>Track all your agent metrics in production. **Catch production regressions early.**<br><br>Configure alerts to trigger automated actions when metric thresholds are exceeded (add agent trace to review queue/dataset, Slack notification, etc.).<br><br> **Useful for:** <br>• 📉 Identifying degradation early <br>• 📈 Visualizing performance trends across agent versions and time | <p align="center"><img src="assets/monitoring_screenshot.png" alt="Monitoring Dashboard" width="1200"/></p> |
87
- | <h3>📊 Datasets</h3>Export trace data or import external testcases to datasets for scaled unit testing and structured experiments. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations. <br><br> **Useful for:**<br>• 🗃️ Filtered agent runtime data for fine tuning<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
88
91
 
89
92
  ## 🛠️ Installation
90
93
 
@@ -94,7 +97,7 @@ Get started with Judgeval by installing our SDK using pip:
94
97
  pip install judgeval
95
98
  ```
96
99
 
97
- Ensure you have your `JUDGMENT_API_KEY` and `JUDGMENT_ORG_ID` environment variables set to connect to the [Judgment platform](https://app.judgmentlabs.ai/).
100
+ Ensure you have your `JUDGMENT_API_KEY` and `JUDGMENT_ORG_ID` environment variables set to connect to the [Judgment Platform](https://app.judgmentlabs.ai/).
98
101
 
99
102
  ```bash
100
103
  export JUDGMENT_API_KEY=...
@@ -107,106 +110,50 @@ export JUDGMENT_ORG_ID=...
107
110
 
108
111
  ### 🛰️ Tracing
109
112
 
110
- Create a file named `traces.py` with the following code:
113
+ Create a file named `agent.py` with the following code:
111
114
 
112
115
  ```python
113
- from judgeval.common.tracer import Tracer, wrap
116
+ from judgeval.tracer import Tracer, wrap
114
117
  from openai import OpenAI
115
118
 
116
- client = wrap(OpenAI())
119
+ client = wrap(OpenAI()) # tracks all LLM calls
117
120
  judgment = Tracer(project_name="my_project")
118
121
 
119
122
  @judgment.observe(span_type="tool")
120
- def my_tool():
121
- return "What's the capital of the U.S.?"
123
+ def format_question(question: str) -> str:
124
+ # dummy tool
125
+ return f"Question : {question}"
122
126
 
123
127
  @judgment.observe(span_type="function")
124
- def main():
125
- task_input = my_tool()
126
- res = client.chat.completions.create(
128
+ def run_agent(prompt: str) -> str:
129
+ task = format_question(prompt)
130
+ response = client.chat.completions.create(
127
131
  model="gpt-4.1",
128
- messages=[{"role": "user", "content": f"{task_input}"}]
132
+ messages=[{"role": "user", "content": task}]
129
133
  )
130
- return res.choices[0].message.content
131
-
132
- main()
134
+ return response.choices[0].message.content
135
+
136
+ run_agent("What is the capital of the United States?")
133
137
  ```
134
138
  You'll see your trace exported to the Judgment Platform:
135
139
 
136
140
  <p align="center"><img src="assets/trace_demo.png" alt="Judgment Platform Trace Example" width="800" /></p>
137
141
 
138
142
 
139
- [Click here](https://docs.judgmentlabs.ai/getting-started#create-your-first-trace) for a more detailed explanation.
143
+ [Click here](https://docs.judgmentlabs.ai/tracing/introduction) for a more detailed explanation.
140
144
 
141
- ### 📝 Offline Evaluations
142
-
143
- Create a file named `evaluate.py` with the following code:
144
-
145
- ```python evaluate.py
146
- from judgeval import JudgmentClient
147
- from judgeval.data import Example
148
- from judgeval.scorers import FaithfulnessScorer
149
-
150
- client = JudgmentClient()
151
-
152
- example = Example(
153
- input="What if these shoes don't fit?",
154
- actual_output="We offer a 30-day full refund at no extra cost.",
155
- retrieval_context=["All customers are eligible for a 30 day full refund at no extra cost."],
156
- )
157
-
158
- scorer = FaithfulnessScorer(threshold=0.5) # Hallucination detector
159
- results = client.run_evaluation(
160
- examples=[example],
161
- scorers=[scorer],
162
- model="gpt-4.1",
163
- )
164
- print(results)
165
- ```
166
-
167
- [Click here](https://docs.judgmentlabs.ai/getting-started#create-your-first-experiment) for a more detailed explanation.
168
-
169
- ### 📡 Online Evaluations
170
-
171
- Attach performance monitoring on traces to measure the quality of your systems in production.
172
145
 
173
- Using the same `traces.py` file we created earlier, modify `main` function:
174
-
175
- ```python
176
- from judgeval.common.tracer import Tracer, wrap
177
- from judgeval.scorers import AnswerRelevancyScorer
178
- from openai import OpenAI
179
-
180
- client = wrap(OpenAI())
181
- judgment = Tracer(project_name="my_project")
182
-
183
- @judgment.observe(span_type="tool")
184
- def my_tool():
185
- return "Hello world!"
186
-
187
- @judgment.observe(span_type="function")
188
- def main():
189
- task_input = my_tool()
190
- res = client.chat.completions.create(
191
- model="gpt-4.1",
192
- messages=[{"role": "user", "content": f"{task_input}"}]
193
- ).choices[0].message.content
194
-
195
- judgment.async_evaluate(
196
- scorers=[AnswerRelevancyScorer(threshold=0.5)],
197
- input=task_input,
198
- actual_output=res,
199
- model="gpt-4.1"
200
- )
201
- print("Online evaluation submitted.")
202
- return res
146
+ <!-- Created by https://github.com/ekalinin/github-markdown-toc -->
203
147
 
204
- main()
205
- ```
206
148
 
207
- You should see an evaluation attached to your trace on the Judgment Platform.
149
+ ## Features
208
150
 
209
- [Click here](https://docs.judgmentlabs.ai/getting-started#create-your-first-online-evaluation) for a more detailed explanation.
151
+ | | |
152
+ |:---|:---:|
153
+ | <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic): **tracking inputs/outputs, agent tool calls, latency, and cost** at every step.<br><br>Online evals can be applied to traces to measure quality on production data in real-time. Export data per individual trace for detailed analysis.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 📋 Collecting agent environment data <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
154
+ | <h3>🧪 Evals</h3>Evals are the key to regression testing for agents. Judgeval provides 15+ research-backed metrics including tool call accuracy, hallucinations, instruction adherence, and retrieval context recall.<br><br>Judgeval supports LLM-as-a-judge, manual labeling, and custom evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 Experimental prompt testing<br>• 🛡️ Online guardrails | <p align="center"><img src="assets/experiments_page.png" alt="Evaluation metrics" width="800"/></p> |
155
+ | <h3>📡 Monitoring</h3>Track all your agent metrics in production. **Catch production regressions early.**<br><br>Configure alerts to trigger automated actions when metric thresholds are exceeded (add agent trace to review queue/dataset, Slack notification, etc.).<br><br> **Useful for:** <br>• 📉 Identifying degradation early <br>• 📈 Visualizing performance trends across agent versions and time | <p align="center"><img src="assets/error_analysis_dashboard.png" alt="Monitoring Dashboard" width="1200"/></p> |
156
+ | <h3>📊 Datasets</h3>Export comprehensive agent-environment interaction data or import external testcases to datasets for scaled analysis and optimization. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations, enabling continuous learning from production interactions. <br><br> **Useful for:**<br>• 🗃️ Agent environment interaction data for optimization<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
210
157
 
211
158
  ## 🏢 Self-Hosting
212
159
 
@@ -224,14 +171,9 @@ Run Judgment on your own infrastructure: we provide comprehensive self-hosting c
224
171
 
225
172
  ## 📚 Cookbooks
226
173
 
227
- Have your own? We're happy to feature it if you create a PR or message us on [Discord](https://discord.gg/taAufyhf).
174
+ Have your own? We're happy to feature it if you create a PR or message us on [Discord](https://discord.gg/tGVFf8UBUY).
228
175
 
229
- You can access our repo of cookbooks [here](https://github.com/JudgmentLabs/judgment-cookbook). Here are some highlights:
230
-
231
- ### Sample Agents
232
-
233
- #### [Multi-Agent System](https://github.com/JudgmentLabs/judgment-cookbook/tree/main/cookbooks/agents/multi-agent)
234
- A multi-agent system augmented with tool calls designed for general purpose tasks like financial research and math. Traced and evaluated on Faithfulness (factual adherence to retrieval context).
176
+ You can access our repo of cookbooks [here](https://github.com/JudgmentLabs/judgment-cookbook).
235
177
 
236
178
  ## 💻 Development with Cursor
237
179
  When building agents and LLM workflows in Cursor, providing proper context to your coding assistant helps ensure seamless integration with Judgment. This rule file supplies the essential context your coding assistant needs for successful implementation.
@@ -1243,10 +1185,10 @@ Judgeval is created and maintained by @Judgment Labs.
1243
1185
 
1244
1186
  | | |
1245
1187
  |:---|:---:|
1246
- | <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic): **tracking inputs/outputs, latency, and cost** at every step.<br><br>Online evals can be applied to traces to measure quality on production data in real-time.<br><br>Export trace data to the Judgment Platform or your own S3 buckets, {Parquet, JSON, YAML} files, or data warehouse.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 👤 Tracking user activity <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
1188
+ | <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic): **tracking inputs/outputs, agent tool calls, latency, and cost** at every step.<br><br>Online evals can be applied to traces to measure quality on production data in real-time.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 📋 Collecting agent environment data <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/trace_screenshot.png" alt="Tracing visualization" width="1200"/></p> |
1247
1189
  | <h3>🧪 Evals</h3>15+ research-backed metrics including tool call accuracy, hallucinations, instruction adherence, and retrieval context recall.<br><br>Build custom evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 Experimental prompt testing<br>• 🛡️ Online guardrails <br><br> | <p align="center"><img src="assets/experiments_page.png" alt="Evaluation metrics" width="800"/></p> |
1248
1190
  | <h3>📡 Monitoring</h3>Real-time performance tracking of your agents in production environments. **Track all your metrics in one place.**<br><br>Set up **Slack/email alerts** for critical metrics and receive notifications when thresholds are exceeded.<br><br> **Useful for:** <br>•📉 Identifying degradation early <br>•📈 Visualizing performance trends across versions and time | <p align="center"><img src="assets/monitoring_screenshot.png" alt="Monitoring Dashboard" width="1200"/></p> |
1249
- | <h3>📊 Datasets</h3>Export trace data or import external testcases to datasets hosted on Judgment's Platform. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations. <br><br> **Useful for:**<br>• 🔄 Scaled analysis for A/B tests <br>• 🗃️ Filtered collections of agent runtime data| <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
1191
+ | <h3>📊 Datasets</h3>Export trace data or import external testcases to datasets for scaled unit testing and structured experiments. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations. <br><br> **Useful for:**<br>• 🗃️ Filtered agent runtime data for fine tuning<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
1250
1192
  | <h3>💡 Insights</h3>Cluster on your data to reveal common use cases and failure modes.<br><br>Trace failures to their exact source with Judgment's Osiris agent, which localizes errors to specific components for precise fixes.<br><br> **Useful for:**<br>•🔮 Surfacing common inputs that lead to error<br>•🤖 Investigating agent/user behavior for optimization <br>| <p align="center"><img src="assets/dataset_clustering_screenshot_dm.png" alt="Insights dashboard" width="1200"/></p> |
1251
1193
 
1252
1194
  ## 🛠️ Installation
@@ -1271,26 +1213,27 @@ Track your agent execution with full observability with just a few lines of code
1271
1213
  Create a file named `traces.py` with the following code:
1272
1214
 
1273
1215
  ```python
1274
- from judgeval.common.tracer import Tracer, wrap
1216
+ from judgeval.tracer import Tracer, wrap
1275
1217
  from openai import OpenAI
1276
1218
 
1277
- client = wrap(OpenAI())
1219
+ client = wrap(OpenAI()) # tracks all LLM calls
1278
1220
  judgment = Tracer(project_name="my_project")
1279
1221
 
1280
1222
  @judgment.observe(span_type="tool")
1281
- def my_tool():
1282
- return "What's the capital of the U.S.?"
1223
+ def format_question(question: str) -> str:
1224
+ # dummy tool
1225
+ return f"Question : {question}"
1283
1226
 
1284
1227
  @judgment.observe(span_type="function")
1285
- def main():
1286
- task_input = my_tool()
1287
- res = client.chat.completions.create(
1228
+ def run_agent(prompt: str) -> str:
1229
+ task = format_question(prompt)
1230
+ response = client.chat.completions.create(
1288
1231
  model="gpt-4.1",
1289
- messages=[{"role": "user", "content": f"{task_input}"}]
1232
+ messages=[{"role": "user", "content": task}]
1290
1233
  )
1291
- return res.choices[0].message.content
1234
+ return response.choices[0].message.content
1292
1235
 
1293
- main()
1236
+ run_agent("What is the capital of the United States?")
1294
1237
  ```
1295
1238
 
1296
1239
  @Click here for a more detailed explanation.
@@ -1418,13 +1361,11 @@ There are many ways to contribute to Judgeval:
1418
1361
  @![Contributors](https://github.com/JudgmentLabs/judgeval/graphs/contributors)
1419
1362
 
1420
1363
  ````
1421
-
1422
1364
  </details>
1423
1365
 
1424
1366
  ## ⭐ Star Us on GitHub
1425
1367
 
1426
- If you find Judgeval useful, please consider giving us a star on GitHub! Your support helps us grow our community and continue improving the product.
1427
-
1368
+ If you find Judgeval useful, please consider giving us a star on GitHub! Your support helps us grow our community and continue improving the repository.
1428
1369
 
1429
1370
  ## ❤️ Contributors
1430
1371
 
@@ -1437,3 +1378,6 @@ There are many ways to contribute to Judgeval:
1437
1378
  <!-- Contributors collage -->
1438
1379
  [![Contributors](https://contributors-img.web.app/image?repo=JudgmentLabs/judgeval)](https://github.com/JudgmentLabs/judgeval/graphs/contributors)
1439
1380
 
1381
+ ---
1382
+
1383
+ Judgeval is created and maintained by [Judgment Labs](https://judgmentlabs.ai/).
@@ -0,0 +1,69 @@
1
+ judgeval/__init__.py,sha256=HM1M8hmqRum6G554QKkXhB4DF4f5eh_xtYo0Kf-t3kw,332
2
+ judgeval/clients.py,sha256=JnB8n90GyXiYaGmSEYaA67mdJSnr3SIrzArao7NGebw,980
3
+ judgeval/constants.py,sha256=IwW428u2VxThczHiL6ZnRwrIzb6QwOE4kdKonktVFYA,6032
4
+ judgeval/evaluation_run.py,sha256=9fYFWJ2ZXtnNcRqxLjzKkZHAba2xi_f1uzOXDJ37Pgw,3233
5
+ judgeval/judgment_client.py,sha256=RGqjw6Q50DOaTPa5SfCzSSGjsm7zlkZ6N7LOvewCxVU,21510
6
+ judgeval/rules.py,sha256=TKI1K_Wlo3GDoSCztGcDoTioVKpvfG6zVkONyou8v5c,20465
7
+ judgeval/run_evaluation.py,sha256=JohxsU5EajwPgBhBGt_wTrNSGdVIbSJmMAR5ffCSg7c,51478
8
+ judgeval/version_check.py,sha256=FlKE8AQGwu50d3kdWSiBZYVW9sicnFInCZjakKt37w4,1003
9
+ judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
10
+ judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
11
+ judgeval/common/logger.py,sha256=_nNV4waaMB4NkjwAG0kYZ3cfBe19BY6b2vsCdKd2YR4,6112
12
+ judgeval/common/s3_storage.py,sha256=ukylTrBZ2QuT8BGbOY7D738RvHFAzVaPwmuWQ4R5xkE,3986
13
+ judgeval/common/tracer.py,sha256=7vvPY632z4ExsqIuNRjfpJfa6CpJKohz8kvBiSwbjFE,129624
14
+ judgeval/common/utils.py,sha256=p8C_BM0nNcIiVHTBH4BqsR106RNUlZ9wM0SxWY4IozE,35543
15
+ judgeval/data/__init__.py,sha256=Nuy_F6oll5c5qLOF2gGFWFYyXeOgXSh7R4Vm2kMiXDM,531
16
+ judgeval/data/custom_example.py,sha256=o4baSEeyNhS-k9PiOJdN4NfBFBGJMvhnU5RBvVRFRd8,734
17
+ judgeval/data/example.py,sha256=8wsqBJ98Nw7IaVNXZmUoz3UuQUWkBbnHI6t4_1pqmr8,7234
18
+ judgeval/data/result.py,sha256=4TfBPukRpF2iaF14zEU1RP-wHxsPWrX8PaXYnhxN8MM,3132
19
+ judgeval/data/scorer_data.py,sha256=FnePIXS-4oNqrM2Eo97-hL3g3ZKFIvEKLdkl0CnpHEI,3283
20
+ judgeval/data/tool.py,sha256=QMYJO8kyhGum8iiXxZZ_9pGcxcqp7Fjp0R0sh6i_9rU,1915
21
+ judgeval/data/trace.py,sha256=tn1ctv99UI_vG_1UmFlzvt2L20mviUSwbvVs8ow8X-o,5797
22
+ judgeval/data/trace_run.py,sha256=NMUkf5bxMW_jWXxZ-JI8-gOKSASldS7oAMH4MH4oSYE,1841
23
+ judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
24
+ judgeval/data/datasets/dataset.py,sha256=VDHQpOUoWUfaPmCeolKP-hhSzQcCHq1muRg3EtLRpf0,12933
25
+ judgeval/data/datasets/eval_dataset_client.py,sha256=93Pxb3aCgDwvi263N0CgugApIwKbHbPSfuz7j0IhHSY,12880
26
+ judgeval/integrations/langgraph.py,sha256=3fKMOhAjuDdH_q3F9OlW2T_fx_vzBg2Sz4WP4WFvBuw,35909
27
+ judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
28
+ judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY,986
29
+ judgeval/judges/litellm_judge.py,sha256=pHKdNkhdBMlrok3ZMTWaomGX6DKFXYV9zHqvCL7_2jo,2653
30
+ judgeval/judges/mixture_of_judges.py,sha256=jcE3H47bVMdqzYRuxa0LD8wudF1kxkRujEpbVV-rkcM,15913
31
+ judgeval/judges/together_judge.py,sha256=DZKlsij2ikmDiYbLZKWm8oqDRNNuvCBiGM0JcycwqWM,2424
32
+ judgeval/judges/utils.py,sha256=0CF9qtIUQUL3-W-qTGpmTjZbkUUBAM6TslDsrCHnTBU,2725
33
+ judgeval/scorers/__init__.py,sha256=VKPveyGCv5Rc0YtuT7iAxSv-M5EuikqAVeaGNnYMuWE,1340
34
+ judgeval/scorers/api_scorer.py,sha256=2LNqcwIMerb37WooGD-hw5WIVLcTXnxWxzwZ0h9CXq0,2795
35
+ judgeval/scorers/exceptions.py,sha256=ACDHK5-TWiF3NTk-wycaedpbrdobm-CvvC1JA_iP-Mk,179
36
+ judgeval/scorers/judgeval_scorer.py,sha256=VoiAQdJzgoiVyFYS9gLEGtQwfQY6tUBoWBBDyGBfo-Q,7321
37
+ judgeval/scorers/prompt_scorer.py,sha256=w0tW76J956smL4D8PsOHswjwYFb8W08E_0E9ad5_aQ8,12124
38
+ judgeval/scorers/score.py,sha256=_mKQuoZHEqrF9PaydPtzWN3zjE6PeKYETw_1UryzJ3s,19399
39
+ judgeval/scorers/utils.py,sha256=UKssYyqsJ_hckeqa1aGcXLLxiScRDzYilyuT1RqkVyo,6853
40
+ judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=mmGIBCWN2WByjSUn9o5-xmHV2W-fDNyRofNsEpSuqyQ,2248
42
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=xY7vY4uIfncEiCksGu5SFT8dUjzkY9suNgyvipQ1avU,712
43
+ judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=t2ClO5nL6rM_atKV9YFgOCrQEPI_KjNqs1tyF3WqQig,659
44
+ judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py,sha256=USeIQ1nozvQhMIrRLpST3nqNOekOFW5XJG4NSP7w0RI,4430
45
+ judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=H4K_NIMabYd_OPlMz3CNNMIM3vYk7PunTXygMnyp6sc,1240
46
+ judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=QldMhW7k16jPPiHQAeLH-2VilPTuNHVi6OMsWvWnycE,771
47
+ judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=GDxEljGD4E-8j6t9DpV2cve0gcKZiUYHn2bfyXChbu0,759
48
+ judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=4E6Sa1aaI1k9PvA8afzNwIdrBCxv4UOqMtmfnLlWeWs,826
49
+ judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=jiKi8EfwP_yuOwHhYStbIUQIn2LPwJEbkh8PQeOoDTs,475
50
+ judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=guG37tQm8m4Gs1bwYS1eaNau-RJYwteb1hwYQ0YMIbk,1357
51
+ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=6iK6Da0FWoyDe_OH7UMnc4gpnByNqfIx6BW8nUbvlC0,693
52
+ judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=RrGgBMgwVPpxb9cHm-yXQBgoh6CHUm_GkFYGSp-KcUc,693
53
+ judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=VbvEEawOZ1XA3SWS986cbR2m3Clyliv21nzHe9GrQxo,687
54
+ judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=nk4_lpq2eIe6v8GtBm2g6O1CLCg5sP7-wspye6qNuXE,679
55
+ judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=9gKX32g9dAvFdHXzQmR-CFabBPLIZHu8aCnICK3t7j8,1066
56
+ judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=Wz5wtpqeXMdK8oRXRKnWqow4s1mmqGFQqHK42wo6cNQ,648
57
+ judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=wzgprwQ3hcsc9itHG0DkcXyNnvVVd-s1UpNyZxw49Sw,590
58
+ judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=462fR2m-67FR2TdHu6cCNZLRkIT_yTAOrMeb-1AuQe8,576
59
+ judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
60
+ judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
61
+ judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=gloLzThkFsr8sHQargDAH8XaDrlF6OCuc_69hyNslFU,2589
62
+ judgeval/tracer/__init__.py,sha256=wkuXtOGDCrwgPPXlh_sSJmvGuWaAMHyNzk1TzB5f9aI,148
63
+ judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
64
+ judgeval/utils/file_utils.py,sha256=M6a_BPRGMwEFBPdF_Tbcbbk4YldHcOhuoU9oRlmninE,1858
65
+ judgeval/utils/requests.py,sha256=rbmZTaiyWI8t2YUkhk11SIe3dF7j2j25L1BuFp_1PII,770
66
+ judgeval-0.0.46.dist-info/METADATA,sha256=VZl8DWjZYO8FEejrrk9wSY-k0BbO4AyCgFHJCq5VC3M,54676
67
+ judgeval-0.0.46.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
+ judgeval-0.0.46.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
69
+ judgeval-0.0.46.dist-info/RECORD,,
@@ -1,68 +0,0 @@
1
- judgeval/__init__.py,sha256=x9HWt4waJwJMAqTuJSg2MezF9Zg-macEjeU-ajbly-8,330
2
- judgeval/clients.py,sha256=EiTmvvWksTPyWIuMC9jz06SPY2vFzokIJUIGoScpisA,989
3
- judgeval/constants.py,sha256=MmkgNXdwQOyYSVJc_I8EjX12OWZdFEzjaqXduRowuU4,6033
4
- judgeval/evaluation_run.py,sha256=KNGtaGAwD18pDNOKF7PCMlLnQe9SpRLTs0XWFMrCiLc,6684
5
- judgeval/judgment_client.py,sha256=JO3AkU-disPHQVK5g1SM-bs_EUSy8QZ3AaAj_Q2ag6s,24968
6
- judgeval/rules.py,sha256=LLojqmiKzQ90jAczccfaOoc3b9LBJCWX0hZ7p439no8,21110
7
- judgeval/run_evaluation.py,sha256=JI-BCyEVKW61JJ4qxFMk1ww4tams-1g_0aaCE4cHrU8,50252
8
- judgeval/version_check.py,sha256=bvJEidB7rAeXozoUbN9Yb97QOR_s2hgvpvj74jJ5HlY,943
9
- judgeval/common/__init__.py,sha256=7d24BRxtncpMj3AAJCj8RS7TqgjXmW777HVZH6-3sBs,289
10
- judgeval/common/exceptions.py,sha256=U-TxHLn7oVMezsMuoYouNDb2XuS8RCggfntYf5_6u4E,565
11
- judgeval/common/logger.py,sha256=KO75wWXCxhUHUMvLaTU31ZzOk6tkZBa7heQ7y0f-zFE,6062
12
- judgeval/common/s3_storage.py,sha256=UZZzQ8CP9_42SKDoKpPncJx8CL5Dchh4jFlKxDKi-cs,3938
13
- judgeval/common/tracer.py,sha256=I8qR6YYcjHDS5BVp9rEfGi_EOMnmcSVYk4ykHwuTBuA,127885
14
- judgeval/common/utils.py,sha256=l2nvm3-LeeScZ02H9TB2AcJh1gJSK1lNdi1Tu0p_fNQ,34276
15
- judgeval/data/__init__.py,sha256=GX_GloDtBB35mv3INWbSTP2r9cwCU2IeIYjzRT0SAd8,530
16
- judgeval/data/custom_example.py,sha256=QRBqiRiZS8UgVeTRHY0r1Jzm6yAYsyg6zmHxQGxdiQs,739
17
- judgeval/data/example.py,sha256=jcK78ff-TKNl9Qtxvbd1g61crpo-s4fWHaqyMIbQNq0,6877
18
- judgeval/data/result.py,sha256=KfU9lhAKG_Xo2eGDm2uKVVRZpf177IDASg1cIwedJwE,3184
19
- judgeval/data/scorer_data.py,sha256=JVlaTx1EP2jw2gh3Vgx1CSEsvIFABAN26IquKyxwiJQ,3273
20
- judgeval/data/tool.py,sha256=eEEvGDNNYWhcQiI6cjDv3rO1VoOJJS5LWGS76Gb_gtY,1813
21
- judgeval/data/trace.py,sha256=5HSJbCMvNTF4O8D_364dGv2cs-0oa4rCQcYR_hS5FG4,4881
22
- judgeval/data/trace_run.py,sha256=fiB5Z5il9U9XqvksdA2DbLNd96U_Wrz8K00RuFJBy38,2324
23
- judgeval/data/datasets/__init__.py,sha256=IdNKhQv9yYZ_op0rdBacrFaFVmiiYQ3JTzXzxOTsEVQ,176
24
- judgeval/data/datasets/dataset.py,sha256=pq9-A1mg2Brpjg1TufDU_eLo9sQhX0nw-UTGaf3jCXA,12952
25
- judgeval/data/datasets/eval_dataset_client.py,sha256=LJ1bf1sZAC4ZBCRTQ1Y4VrJuNSslYBQ1y9YKuhYxwqY,15176
26
- judgeval/integrations/langgraph.py,sha256=Ogk3MFE116WfRV4w_2c6mp3d27Uea7vmLstltML8VBM,31963
27
- judgeval/judges/__init__.py,sha256=6X7VSwrwsdxGBNxCyapVRWGghhKOy3MVxFNMQ62kCXM,308
28
- judgeval/judges/base_judge.py,sha256=ch_S7uBB7lyv44Lf1d7mIGFpveOO58zOkkpImKgd9_4,994
29
- judgeval/judges/litellm_judge.py,sha256=DhB6px9ELZL3gbMb2w4FkBliuTlaCVIcjE8v149G6NM,2425
30
- judgeval/judges/mixture_of_judges.py,sha256=D97h8L-6saPwwppVwitrIdlMAjizzxGWeVOfNyVnXZA,15550
31
- judgeval/judges/together_judge.py,sha256=l00hhPerAZXg3oYBd8cyMtWsOTNt_0FIqoxhKJKQe3k,2302
32
- judgeval/judges/utils.py,sha256=vL-15_udU94JHUAiyrAvHAKMj6Fqypg01ek4YH5zVCM,2687
33
- judgeval/scorers/__init__.py,sha256=VKPveyGCv5Rc0YtuT7iAxSv-M5EuikqAVeaGNnYMuWE,1340
34
- judgeval/scorers/api_scorer.py,sha256=NQ_CrrUPhSUk1k2Q8rKpCG_TU2FT32sFEqvb-Yi54B0,2688
35
- judgeval/scorers/exceptions.py,sha256=eGW5CuJgZ5YJBFrE4FHDSF651PO1dKAZ379mJ8gOsfo,178
36
- judgeval/scorers/judgeval_scorer.py,sha256=_qtXzl5aa1FH_50kVPnRfiwyCtuXPKyrGU71_3pOrBw,7288
37
- judgeval/scorers/prompt_scorer.py,sha256=Uf_QZhytd78cInKZv8wr66Angz5sxLklP5hEEcoabq4,12001
38
- judgeval/scorers/score.py,sha256=h4eVlbItqG8R0nQgSgeyicYSIraZV9MvV-RRaFu46mg,18762
39
- judgeval/scorers/utils.py,sha256=iHQVTlIANbmCTXz9kTeSdOytgUZ_T74Re61ajqsk_WQ,6827
40
- judgeval/scorers/judgeval_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py,sha256=knJr1fMqdisS2dt1caMyiMmVkP9QMZBTBTRgjoIRKdQ,2112
42
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=Fnd9CVIOZ73sWEWymsU5eBrrZqPFjMZ0BKpeW-PDyTg,711
43
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=oETeN9K0HSIRdL2SDqn82Vskpwh5SlKnZvs5VDm2OBU,658
44
- judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py,sha256=s5DKbvLgWN5kV00isu56A5U4R7w1ahlGVN1yxscqHHc,4515
45
- judgeval/scorers/judgeval_scorers/api_scorers/comparison.py,sha256=kuzf9OWvpY38yYSwlBgneLkUZwJNM4FQqvbS66keA90,1249
46
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py,sha256=tpSuzFAaW8X9xqA0aLLKwh7qmBK0Pc_bJZMIe_q412U,770
47
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py,sha256=pFVhk4pLtQ-FnNlbI-dFF-SIh69Jza7erHqiPkFWoBo,758
48
- judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py,sha256=RQ6DZwEhChfecd89Ey-T7ke--7qTaXZlRsNxwH8gaME,823
49
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py,sha256=V9WPuwNMm097V7IknKs8UkmAk0yjnBXTcJha_BHXxTA,475
50
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py,sha256=Pb3CiNF2Ca826B92wJCVAi_68lJjLhqqCKwQKaflSUg,1294
51
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=-BwOapqjryYNKNydtdkUiKIij76dY0O1jBmdc6dKazQ,692
52
- judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py,sha256=ntEEeTANEOsGlcbiTAF_3r6BeSJEaVDns8po8T0L6Vg,692
53
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py,sha256=k5gDOki-8KXrZXydvdSqDt3NZqQ28hXoOCHQf6jNxr4,686
54
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=XnSGEkQfwVqaqnHEGMCsxNiHVzrsrej48uDbLoWc8CQ,678
55
- judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py,sha256=mMKEuR87_yanEuZJ5YSGFMHDD_oLVZ6-rQuciFaDOMA,1095
56
- judgeval/scorers/judgeval_scorers/api_scorers/summarization.py,sha256=QmWB8bVbDYHY5FcF0rYZE_3c2XXgMLRmR6aXJWfdMC4,655
57
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py,sha256=xHt4NsPCOyQkI5mUnN35D-vBLLFu6ZCMaiIVc4RTlj8,620
58
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py,sha256=urm8LgkeZA7e-ePWo6AToKGheQYSp6MOpKon5NF5EJw,570
59
- judgeval/scorers/judgeval_scorers/classifiers/__init__.py,sha256=Qt81W5ZCwMvBAne0LfQDb8xvg5iOG1vEYP7WizgwAZo,67
60
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py,sha256=8iTzMvou1Dr8pybul6lZHKjc9Ye2-0_racRGYkhEdTY,74
61
- judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py,sha256=O9xq2Cxcg16pFNZwHTb_MDJ5ehFab6oDiiNtC47AnY4,2584
62
- judgeval/tracer/__init__.py,sha256=wy3DYpH8U_z0GO_K_gOSkK0tTTD-u5eLDo0T5xIBoAc,147
63
- judgeval/utils/alerts.py,sha256=7HO42fEskQpwocUU-lq6EX4LGPzpxbIhaiJ5pkH31-I,3278
64
- judgeval/utils/data_utils.py,sha256=pB4GBWi8XoM2zSR2NlLXH5kqcQ029BVhDxaVKkdmiBY,1860
65
- judgeval-0.0.44.dist-info/METADATA,sha256=qDopKywsOERUmD2Rjy8lxSEU1C9xrRhRfiTIwN5Vi40,55748
66
- judgeval-0.0.44.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
67
- judgeval-0.0.44.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
68
- judgeval-0.0.44.dist-info/RECORD,,