judgeval 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,12 +7,11 @@ import os
7
7
  import importlib.util
8
8
  from pathlib import Path
9
9
  from uuid import uuid4
10
- from typing import Optional, List, Dict, Any, Union, Callable, TYPE_CHECKING
10
+ from typing import Optional, List, Dict, Union
11
11
 
12
12
  from judgeval.data import (
13
13
  ScoringResult,
14
14
  Example,
15
- Trace,
16
15
  )
17
16
  from judgeval.scorers import (
18
17
  APIScorerConfig,
@@ -22,19 +21,14 @@ from judgeval.data.evaluation_run import EvaluationRun
22
21
  from judgeval.run_evaluation import (
23
22
  run_eval,
24
23
  assert_test,
25
- run_trace_eval,
26
24
  )
27
- from judgeval.data.trace_run import TraceRun
28
25
  from judgeval.common.api import JudgmentApiClient
29
26
  from judgeval.common.exceptions import JudgmentAPIError
30
- from judgeval.common.tracer import Tracer
31
27
  from judgeval.common.utils import validate_api_key
32
28
  from pydantic import BaseModel
33
29
  from judgeval.common.logger import judgeval_logger
34
30
 
35
31
 
36
- if TYPE_CHECKING:
37
- from judgeval.integrations.langgraph import JudgevalCallbackHandler
38
32
  from judgeval.constants import DEFAULT_GPT_MODEL
39
33
 
40
34
 
@@ -86,47 +80,6 @@ class JudgmentClient(metaclass=SingletonMeta):
86
80
  else:
87
81
  judgeval_logger.info("Successfully initialized JudgmentClient!")
88
82
 
89
- def run_trace_evaluation(
90
- self,
91
- scorers: List[Union[APIScorerConfig, BaseScorer]],
92
- examples: Optional[List[Example]] = None,
93
- function: Optional[Callable] = None,
94
- tracer: Optional[Union[Tracer, JudgevalCallbackHandler]] = None,
95
- traces: Optional[List[Trace]] = None,
96
- tools: Optional[List[Dict[str, Any]]] = None,
97
- project_name: str = "default_project",
98
- eval_run_name: str = "default_eval_trace",
99
- model: Optional[str] = DEFAULT_GPT_MODEL,
100
- ) -> List[ScoringResult]:
101
- try:
102
- if examples and not function:
103
- raise ValueError("Cannot pass in examples without a function")
104
-
105
- if traces and function:
106
- raise ValueError("Cannot pass in traces and function")
107
-
108
- if examples and traces:
109
- raise ValueError("Cannot pass in both examples and traces")
110
-
111
- trace_run = TraceRun(
112
- project_name=project_name,
113
- eval_name=eval_run_name,
114
- traces=traces,
115
- scorers=scorers,
116
- model=model,
117
- organization_id=self.organization_id,
118
- tools=tools,
119
- )
120
- return run_trace_eval(
121
- trace_run, self.judgment_api_key, function, tracer, examples
122
- )
123
- except ValueError as e:
124
- raise ValueError(
125
- f"Please check your TraceRun object, one or more fields are invalid: \n{str(e)}"
126
- )
127
- except Exception as e:
128
- raise Exception(f"An unexpected error occurred during evaluation: {str(e)}")
129
-
130
83
  def run_evaluation(
131
84
  self,
132
85
  examples: List[Example],
@@ -134,6 +87,7 @@ class JudgmentClient(metaclass=SingletonMeta):
134
87
  model: Optional[str] = DEFAULT_GPT_MODEL,
135
88
  project_name: str = "default_project",
136
89
  eval_run_name: str = "default_eval_run",
90
+ show_url: bool = True,
137
91
  ) -> List[ScoringResult]:
138
92
  """
139
93
  Executes an evaluation of `Example`s using one or more `Scorer`s
@@ -161,6 +115,7 @@ class JudgmentClient(metaclass=SingletonMeta):
161
115
  return run_eval(
162
116
  eval,
163
117
  self.judgment_api_key,
118
+ show_url=show_url,
164
119
  )
165
120
  except ValueError as e:
166
121
  raise ValueError(
@@ -217,57 +172,6 @@ class JudgmentClient(metaclass=SingletonMeta):
217
172
  )
218
173
  assert_test(results)
219
174
 
220
- def assert_trace_test(
221
- self,
222
- scorers: List[Union[APIScorerConfig, BaseScorer]],
223
- examples: Optional[List[Example]] = None,
224
- function: Optional[Callable] = None,
225
- tracer: Optional[Union[Tracer, JudgevalCallbackHandler]] = None,
226
- traces: Optional[List[Trace]] = None,
227
- tools: Optional[List[Dict[str, Any]]] = None,
228
- model: Optional[str] = DEFAULT_GPT_MODEL,
229
- project_name: str = "default_test",
230
- eval_run_name: str = str(uuid4()),
231
- ) -> None:
232
- """
233
- Asserts a test by running the evaluation and checking the results for success
234
-
235
- Args:
236
- examples (List[Example]): The examples to evaluate.
237
- scorers (List[Union[APIScorerConfig, BaseScorer]]): A list of scorers to use for evaluation
238
- model (str): The model used as a judge when using LLM as a Judge
239
- project_name (str): The name of the project the evaluation results belong to
240
- eval_run_name (str): A name for this evaluation run
241
- function (Optional[Callable]): A function to use for evaluation
242
- tracer (Optional[Union[Tracer, BaseCallbackHandler]]): A tracer to use for evaluation
243
- tools (Optional[List[Dict[str, Any]]]): A list of tools to use for evaluation
244
- """
245
-
246
- # Check for enable_param_checking and tools
247
- for scorer in scorers:
248
- if hasattr(scorer, "kwargs") and scorer.kwargs is not None:
249
- if scorer.kwargs.get("enable_param_checking") is True:
250
- if not tools:
251
- raise ValueError(
252
- f"You must provide the 'tools' argument to assert_test when using a scorer with enable_param_checking=True. If you do not want to do param checking, explicitly set enable_param_checking=False for the {scorer.__name__} scorer."
253
- )
254
-
255
- results: List[ScoringResult]
256
-
257
- results = self.run_trace_evaluation(
258
- examples=examples,
259
- traces=traces,
260
- scorers=scorers,
261
- model=model,
262
- project_name=project_name,
263
- eval_run_name=eval_run_name,
264
- function=function,
265
- tracer=tracer,
266
- tools=tools,
267
- )
268
-
269
- assert_test(results)
270
-
271
175
  def _extract_scorer_name(self, scorer_file_path: str) -> str:
272
176
  """Extract scorer name from the scorer file by importing it."""
273
177
  try:
@@ -301,7 +205,7 @@ class JudgmentClient(metaclass=SingletonMeta):
301
205
  judgeval_logger.warning(f"Could not extract scorer name: {e}")
302
206
  return Path(scorer_file_path).stem
303
207
 
304
- def save_custom_scorer(
208
+ def upload_custom_scorer(
305
209
  self,
306
210
  scorer_file_path: str,
307
211
  requirements_file_path: Optional[str] = None,
@@ -342,10 +246,6 @@ class JudgmentClient(metaclass=SingletonMeta):
342
246
  with open(requirements_file_path, "r") as f:
343
247
  requirements_text = f.read()
344
248
 
345
- # Upload to backend
346
- judgeval_logger.info(
347
- f"Uploading custom scorer: {unique_name}, this can take a couple of minutes..."
348
- )
349
249
  try:
350
250
  response = self.api_client.upload_custom_scorer(
351
251
  scorer_name=unique_name,
@@ -6,10 +6,10 @@ import time
6
6
  import orjson
7
7
  import sys
8
8
  import threading
9
- from typing import List, Dict, Union, Optional, Callable, Tuple, Any, TYPE_CHECKING
9
+ from typing import List, Dict, Union, Tuple, Any, TYPE_CHECKING
10
10
  from rich import print as rprint
11
11
 
12
- from judgeval.data import ScorerData, ScoringResult, Example, Trace
12
+ from judgeval.data import ScorerData, ScoringResult, Example
13
13
  from judgeval.scorers import BaseScorer, APIScorerConfig
14
14
  from judgeval.scorers.score import a_execute_scoring
15
15
  from judgeval.common.api import JudgmentApiClient
@@ -22,10 +22,7 @@ from judgeval.common.logger import judgeval_logger
22
22
 
23
23
 
24
24
  if TYPE_CHECKING:
25
- from judgeval.common.tracer import Tracer
26
- from judgeval.data.trace_run import TraceRun
27
25
  from judgeval.data.evaluation_run import EvaluationRun
28
- from judgeval.integrations.langgraph import JudgevalCallbackHandler
29
26
 
30
27
 
31
28
  def safe_run_async(coro):
@@ -99,29 +96,6 @@ def execute_api_eval(evaluation_run: EvaluationRun) -> Dict:
99
96
  )
100
97
 
101
98
 
102
- def execute_api_trace_eval(trace_run: TraceRun, judgment_api_key: str) -> Dict:
103
- """
104
- Executes an evaluation of a list of `Trace`s using one or more `JudgmentScorer`s via the Judgment API.
105
- """
106
-
107
- try:
108
- # submit API request to execute evals
109
- if not judgment_api_key or not trace_run.organization_id:
110
- raise ValueError("API key and organization ID are required")
111
- api_client = JudgmentApiClient(judgment_api_key, trace_run.organization_id)
112
- return api_client.run_trace_evaluation(trace_run.model_dump(warnings=False))
113
- except Exception as e:
114
- judgeval_logger.error(f"Error: {e}")
115
-
116
- details = "An unknown error occurred."
117
- if isinstance(e, JudgmentAPIException):
118
- details = e.response_json.get("detail", "An unknown error occurred.")
119
-
120
- raise JudgmentAPIError(
121
- "An error occurred while executing the Judgment API request: " + details
122
- )
123
-
124
-
125
99
  def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResult]:
126
100
  """
127
101
  Checks if any `ScoringResult` objects are missing `scorers_data`.
@@ -142,7 +116,7 @@ def check_missing_scorer_data(results: List[ScoringResult]) -> List[ScoringResul
142
116
 
143
117
  def log_evaluation_results(
144
118
  scoring_results: List[ScoringResult],
145
- run: Union[EvaluationRun, TraceRun],
119
+ run: EvaluationRun,
146
120
  judgment_api_key: str,
147
121
  ) -> str:
148
122
  """
@@ -208,81 +182,6 @@ def check_examples(
208
182
  rprint("[green]Continuing...[/green]")
209
183
 
210
184
 
211
- def run_trace_eval(
212
- trace_run: TraceRun,
213
- judgment_api_key: str,
214
- function: Optional[Callable] = None,
215
- tracer: Optional[Union[Tracer, "JudgevalCallbackHandler"]] = None,
216
- examples: Optional[List[Example]] = None,
217
- ) -> List[ScoringResult]:
218
- if function and tracer and examples is not None:
219
- new_traces: List[Trace] = []
220
-
221
- # Handle case where tracer is actually a callback handler
222
- actual_tracer = tracer
223
- if hasattr(tracer, "tracer") and hasattr(tracer.tracer, "traces"):
224
- # This is a callback handler, get the underlying tracer
225
- actual_tracer = tracer.tracer
226
-
227
- if trace_run.project_name != actual_tracer.project_name:
228
- raise ValueError(
229
- f"Project name mismatch between run_trace_eval and tracer. "
230
- f"Trace run: {trace_run.project_name}, "
231
- f"Tracer: {actual_tracer.project_name}"
232
- )
233
-
234
- actual_tracer.offline_mode = True
235
- actual_tracer.traces = []
236
- judgeval_logger.info("Running agent function: ")
237
- for example in examples:
238
- if example.input:
239
- if isinstance(example.input, str):
240
- function(example.input)
241
- elif isinstance(example.input, dict):
242
- function(**example.input)
243
- else:
244
- raise ValueError(
245
- f"Input must be string or dict, got {type(example.input)}"
246
- )
247
- else:
248
- function()
249
-
250
- for i, trace in enumerate(actual_tracer.traces):
251
- # We set the root-level trace span with the expected tools of the Trace
252
- trace = Trace(**trace)
253
- trace.trace_spans[0].expected_tools = examples[i].expected_tools
254
- new_traces.append(trace)
255
- trace_run.traces = new_traces
256
- actual_tracer.traces = []
257
-
258
- # Execute evaluation using Judgment API
259
- try: # execute an EvaluationRun with just JudgmentScorers
260
- judgeval_logger.info("Executing Trace Evaluation... ")
261
- response_data: Dict = execute_api_trace_eval(trace_run, judgment_api_key)
262
- scoring_results = [
263
- ScoringResult(**result) for result in response_data["results"]
264
- ]
265
- except JudgmentAPIError as e:
266
- raise JudgmentAPIError(
267
- f"An error occurred while executing the Judgment API request: {str(e)}"
268
- )
269
- except ValueError as e:
270
- raise ValueError(
271
- f"Please check your TraceRun object, one or more fields are invalid: {str(e)}"
272
- )
273
-
274
- # Convert the response data to `ScoringResult` objects
275
- # TODO: allow for custom scorer on traces
276
-
277
- url = log_evaluation_results(
278
- response_data["agent_results"], trace_run, judgment_api_key
279
- )
280
- rprint(
281
- f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
282
- )
283
- return scoring_results
284
-
285
-
286
185
  def _poll_evaluation_until_complete(
287
186
  experiment_run_id: str,
288
187
  project_name: str,
@@ -387,12 +286,15 @@ def progress_logger(stop_event, msg="Working...", interval=5):
387
286
  def run_eval(
388
287
  evaluation_run: EvaluationRun,
389
288
  judgment_api_key: str,
289
+ show_url: bool = True,
390
290
  ) -> List[ScoringResult]:
391
291
  """
392
292
  Executes an evaluation of `Example`s using one or more `Scorer`s
393
293
 
394
294
  Args:
395
295
  evaluation_run (EvaluationRun): Stores example and evaluation together for running
296
+ judgment_api_key (str): API key for authentication
297
+ show_url (bool): Whether to display the evaluation results URL. Defaults to True.
396
298
 
397
299
  Returns:
398
300
  List[ScoringResult]: A list of ScoringResult objects
@@ -481,9 +383,10 @@ def run_eval(
481
383
  scoring_result.model_dump(warnings=False) for scoring_result in results
482
384
  ]
483
385
  url = log_evaluation_results(send_results, evaluation_run, judgment_api_key)
484
- rprint(
485
- f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
486
- )
386
+ if show_url:
387
+ rprint(
388
+ f"\n🔍 You can view your evaluation results here: [rgb(106,0,255)][link={url}]View Results[/link]\n"
389
+ )
487
390
  return results
488
391
 
489
392
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.11
13
13
  Requires-Dist: boto3
14
14
  Requires-Dist: click<8.2.0
15
+ Requires-Dist: fireworks-ai>=0.19.18
15
16
  Requires-Dist: langchain-anthropic
16
17
  Requires-Dist: langchain-core
17
18
  Requires-Dist: langchain-huggingface
@@ -39,7 +40,7 @@ Description-Content-Type: text/markdown
39
40
 
40
41
  <br>
41
42
  <div style="font-size: 1.5em;">
42
- Enable self-learning agents with traces, evals, and environment data.
43
+ Enable self-learning agents with environment data and evals.
43
44
  </div>
44
45
 
45
46
  ## [Docs](https://docs.judgmentlabs.ai/) • [Judgment Cloud](https://app.judgmentlabs.ai/register) • [Self-Host](https://docs.judgmentlabs.ai/documentation/self-hosting/get-started) • [Landing Page](https://judgmentlabs.ai/)
@@ -56,11 +57,11 @@ We're hiring! Join us in our mission to enable self-learning agents by providing
56
57
 
57
58
  </div>
58
59
 
59
- Judgeval offers **open-source tooling** for tracing and evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
60
+ Judgeval offers **open-source tooling** for evaluating autonomous, stateful agents. It **provides runtime data from agent-environment interactions** for continuous learning and self-improvement.
60
61
 
61
62
  ## 🎬 See Judgeval in Action
62
63
 
63
- **[Multi-Agent System](https://github.com/JudgmentLabs/judgment-cookbook/tree/main/cookbooks/agents/multi-agent) with complete observability:** (1) A multi-agent system spawns agents to research topics on the internet. (2) With just **3 lines of code**, Judgeval traces every input/output + environment response across all agent tool calls for debugging. (3) After completion, (4) export all interaction data to enable further environment-specific learning and optimization.
64
+ **[Multi-Agent System](https://github.com/JudgmentLabs/judgment-cookbook/tree/main/cookbooks/agents/multi-agent) with complete observability:** (1) A multi-agent system spawns agents to research topics on the internet. (2) With just **3 lines of code**, Judgeval captures all environment responses across all agent tool calls for monitoring. (3) After completion, (4) export all interaction data to enable further environment-specific learning and optimization.
64
65
 
65
66
  <table style="width: 100%; max-width: 800px; table-layout: fixed;">
66
67
  <tr>
@@ -69,8 +70,8 @@ Judgeval offers **open-source tooling** for tracing and evaluating autonomous, s
69
70
  <br><strong>🤖 Agents Running</strong>
70
71
  </td>
71
72
  <td align="center" style="padding: 8px; width: 50%;">
72
- <img src="assets/trace.gif" alt="Trace Demo" style="width: 100%; max-width: 350px; height: auto;" />
73
- <br><strong>📊 Real-time Tracing</strong>
73
+ <img src="assets/trace.gif" alt="Capturing Environment Data Demo" style="width: 100%; max-width: 350px; height: auto;" />
74
+ <br><strong>📊 Capturing Environment Data </strong>
74
75
  </td>
75
76
  </tr>
76
77
  <tr>
@@ -111,54 +112,14 @@ export JUDGMENT_ORG_ID=...
111
112
 
112
113
  **If you don't have keys, [create an account](https://app.judgmentlabs.ai/register) on the platform!**
113
114
 
114
- ## 🏁 Quickstarts
115
-
116
- ### 🛰️ Tracing
117
-
118
- Create a file named `agent.py` with the following code:
119
-
120
- ```python
121
- from judgeval.tracer import Tracer, wrap
122
- from openai import OpenAI
123
-
124
- client = wrap(OpenAI()) # tracks all LLM calls
125
- judgment = Tracer(project_name="my_project")
126
-
127
- @judgment.observe(span_type="tool")
128
- def format_question(question: str) -> str:
129
- # dummy tool
130
- return f"Question : {question}"
131
-
132
- @judgment.observe(span_type="function")
133
- def run_agent(prompt: str) -> str:
134
- task = format_question(prompt)
135
- response = client.chat.completions.create(
136
- model="gpt-4.1",
137
- messages=[{"role": "user", "content": task}]
138
- )
139
- return response.choices[0].message.content
140
-
141
- run_agent("What is the capital of the United States?")
142
- ```
143
- You'll see your trace exported to the Judgment Platform:
144
-
145
- <p align="center"><img src="assets/online_eval.png" alt="Judgment Platform Trace Example" width="1500" /></p>
146
-
147
-
148
- [Click here](https://docs.judgmentlabs.ai/documentation/tracing/introduction) for a more detailed explanation.
149
-
150
-
151
- <!-- Created by https://github.com/ekalinin/github-markdown-toc -->
152
-
153
115
 
154
116
  ## ✨ Features
155
117
 
156
118
  | | |
157
119
  |:---|:---:|
158
- | <h3>🔍 Tracing</h3>Automatic agent tracing integrated with common frameworks (LangGraph, OpenAI, Anthropic). **Tracks inputs/outputs, agent tool calls, latency, cost, and custom metadata** at every step.<br><br>**Useful for:**<br>• 🐛 Debugging agent runs <br>• 📋 Collecting agent environment data <br>• 🔬 Pinpointing performance bottlenecks| <p align="center"><img src="assets/agent_trace_example.png" alt="Tracing visualization" width="1200"/></p> |
159
120
  | <h3>🧪 Evals</h3>Build custom evaluators on top of your agents. Judgeval supports LLM-as-a-judge, manual labeling, and code-based evaluators that connect with our metric-tracking infrastructure. <br><br>**Useful for:**<br>• ⚠️ Unit-testing <br>• 🔬 A/B testing <br>• 🛡️ Online guardrails | <p align="center"><img src="assets/test.png" alt="Evaluation metrics" width="800"/></p> |
160
121
  | <h3>📡 Monitoring</h3>Get Slack alerts for agent failures in production. Add custom hooks to address production regressions.<br><br> **Useful for:** <br>• 📉 Identifying degradation early <br>• 📈 Visualizing performance trends across agent versions and time | <p align="center"><img src="assets/errors.png" alt="Monitoring Dashboard" width="1200"/></p> |
161
- | <h3>📊 Datasets</h3>Export traces and test cases to datasets for scaled analysis and optimization. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations, enabling continuous learning from production interactions. <br><br> **Useful for:**<br>• 🗃️ Agent environment interaction data for optimization<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
122
+ | <h3>📊 Datasets</h3>Export environment interactions and test cases to datasets for scaled analysis and optimization. Move datasets to/from Parquet, S3, etc. <br><br>Run evals on datasets as unit tests or to A/B test different agent configurations, enabling continuous learning from production interactions. <br><br> **Useful for:**<br>• 🗃️ Agent environment interaction data for optimization<br>• 🔄 Scaled analysis for A/B tests | <p align="center"><img src="assets/datasets_preview_screenshot.png" alt="Dataset management" width="1200"/></p> |
162
123
 
163
124
  ## 🏢 Self-Hosting
164
125
 
@@ -1,12 +1,12 @@
1
1
  judgeval/__init__.py,sha256=5Lm1JMYFREJGN_8X-Wpruu_ovwGLJ08gCzNAt-u-pQE,419
2
- judgeval/cli.py,sha256=IcL4_bGr9CtEeea1-AFqM_TEV_VomDlArlxh4IomiSQ,1754
2
+ judgeval/cli.py,sha256=WTFTJKQ6LZI7K9o9KnCfTzsTEJnKfPuSURUpRFLiHp8,1756
3
3
  judgeval/clients.py,sha256=HHul68PV1om0dxsVZZu90TtCiy5zaqAwph16jXTQzQo,989
4
4
  judgeval/constants.py,sha256=UNoTLHgbpZHRInPM2ZaI3m0XokPkee5ILlg20reqhzo,4180
5
5
  judgeval/dataset.py,sha256=vOrDKam2I-K1WcVF5IBkQruCDvXTc8PRaFm4-dV0lXs,6220
6
- judgeval/judgment_client.py,sha256=-7xcBFowzXKedMINwfZCOL4FKucECWPNEY9QVMo_cys,13644
6
+ judgeval/judgment_client.py,sha256=KxQP-EmhZUJOIFM2Zf_OJbxrgDpN1dRwxo4iVI9zLdA,9390
7
7
  judgeval/local_eval_queue.py,sha256=GmlXeZt7bfAJe1hPUjDg_irth4RkNqL2Zdi7VzboBzI,6984
8
8
  judgeval/rules.py,sha256=CoQjqmP8daEXewMkplmA-7urubDtweOr5O6z8klVwLI,20031
9
- judgeval/run_evaluation.py,sha256=gs-_v_i95LKlJj95G2RmQXvIyBfoldnd1pWCNO4UqsM,21985
9
+ judgeval/run_evaluation.py,sha256=ETAP7srohMBAsRqvxHQHKsR5zt3Rzns_kNM_2ulxVdU,18084
10
10
  judgeval/version_check.py,sha256=FoLEtpCjDw2HuDQdpw5yT29UtwumSc6ZZN6AV_c9Mnw,1057
11
11
  judgeval/common/__init__.py,sha256=KH-QJyWtQ60R6yFIBDYS3WGRiNpEu1guynpxivZvpBQ,309
12
12
  judgeval/common/exceptions.py,sha256=OkgDznu2wpBQZMXiZarLJYNk1HIcC8qYW7VypDC3Ook,556
@@ -14,19 +14,24 @@ judgeval/common/logger.py,sha256=514eFLYWS_UL8VY-zAR2ePUlpQe4rbYlleLASFllLE4,151
14
14
  judgeval/common/utils.py,sha256=oxGDRVWOICKWeyGgsoc36_yAyHSYF4XtH842Mkznwis,34739
15
15
  judgeval/common/api/__init__.py,sha256=-E7lpZz1fG8puR_aYUMfPmQ-Vyhd0bgzoaU5EhIuFjQ,114
16
16
  judgeval/common/api/api.py,sha256=fWtMNln0o1wOhJ9wangWpyY_j3WF7P3at_LYPJEicP0,13670
17
- judgeval/common/api/constants.py,sha256=y0BDcQqHBZ7MwLd4gT5hLUF8UMs_GVwsJGC-ibfxCAw,4698
17
+ judgeval/common/api/constants.py,sha256=N6rQZqMhFv2U8tOw-6pMH0uV7aGT9m8sw57ZkfDW97c,4689
18
18
  judgeval/common/api/json_encoder.py,sha256=QQgCe2FBmW1uWKx8yvuhr4U7_b4D0sG97GZtXHKnBdk,5881
19
19
  judgeval/common/storage/__init__.py,sha256=a-PI7OL-ydyzugGUKmJKRBASnK-Q-gs82L9K9rSyJP8,90
20
20
  judgeval/common/storage/s3_storage.py,sha256=0-bNKheqJJyBZ92KGrzQtd1zocIRWBlfn_58L4a-Ay0,3719
21
21
  judgeval/common/tracer/__init__.py,sha256=tJCJsmVmrL89Phv88gNCJ-j0ITPez6lh8vhMAAlLNSc,795
22
22
  judgeval/common/tracer/constants.py,sha256=yu5y8gMe5yb1AaBkPtAH-BNwIaAR3NwYCRoSf45wp5U,621
23
- judgeval/common/tracer/core.py,sha256=TQ80NODaJx7gzmntevDLA3evVJ3m2Zy2s0Pwd7APG9Y,84867
23
+ judgeval/common/tracer/core.py,sha256=Vhh2LRgLdxa_yxUfMunv7l83tksuztm7F_oSwD92EXs,91681
24
24
  judgeval/common/tracer/otel_exporter.py,sha256=kZLlOQ6afQE4dmb9H1wgU4P3H5PG1D_zKyvnpWcT5Ak,3899
25
25
  judgeval/common/tracer/otel_span_processor.py,sha256=BD-FKXaZft5_3zqy1Qe_tpkudVOLop9AGhBjZUgp-Z8,6502
26
26
  judgeval/common/tracer/providers.py,sha256=3c3YOtKuoBjlTL0rc2HAGnUpppqvsyzrN5H6EKCqEi0,2733
27
27
  judgeval/common/tracer/span_processor.py,sha256=1NQxNSVWcb8qCFLmslSVMnaWdkOZmiFJnxeeN0i6vnU,1150
28
28
  judgeval/common/tracer/span_transformer.py,sha256=cfzz6RpTCOG9Io9knNlwtAW34p3wyK-u8jSNMu24p1w,7382
29
- judgeval/common/tracer/trace_manager.py,sha256=ltiXcWC-68DRc8uSa28qHiWRSIBf6NpYOPkZYooR8tg,3086
29
+ judgeval/common/tracer/trace_manager.py,sha256=FAlkTNomb_TzSSnF7DnmP5nImBgHaA_SFNW1INzE1aI,3178
30
+ judgeval/common/trainer/__init__.py,sha256=fkaBjtAynh1GZbvK2xbNTjuLFSDpPzj7u4Chf4vZsfs,209
31
+ judgeval/common/trainer/config.py,sha256=kaWz0ni4ijtXpu8SF2jLEnw5yA2HqaUbvjiyqEnSrXE,4195
32
+ judgeval/common/trainer/console.py,sha256=sZCoJqI6ZRArbJpxl3ZwNb9taYoEkgCpz9PF4IUbGjE,4818
33
+ judgeval/common/trainer/trainable_model.py,sha256=tnhFH2Mp5hVht3utHVFPs2BxKoBQgRJrAzgzE5IfKEU,8842
34
+ judgeval/common/trainer/trainer.py,sha256=dE-sOU26dNaWxPaN88XuN3f3XCizdHrRPNylrspCWQc,11815
30
35
  judgeval/data/__init__.py,sha256=1QagDcSQtfnJ632t9Dnq8d7XjAqhmY4mInOWt8qH9tM,455
31
36
  judgeval/data/evaluation_run.py,sha256=IirmYZ1_9N99eep7DDuoyshwjmpNK9bQCxCWXnnhhuI,4053
32
37
  judgeval/data/example.py,sha256=kRskIgsjwcvv2Y8jaPwV-PND7zlmMbFsvRVQ_b7SZY0,914
@@ -70,8 +75,8 @@ judgeval/utils/alerts.py,sha256=3w_AjQrgfmOZvfqCridW8WAnHVxHHXokX9jNzVFyGjA,3297
70
75
  judgeval/utils/async_utils.py,sha256=uNx1SopEc0quSjc8GBQqyba0SmCMAzv2NKIq6xYwttc,989
71
76
  judgeval/utils/file_utils.py,sha256=PWHRs8dUr8iDwpglSSk4Yjd7C6ZhDzUaO-jV3m7riHM,1987
72
77
  judgeval/utils/requests.py,sha256=K3gUKrwL6TvwYKVYO5OeLWdUHn9NiUPmnIXhZEiEaHU,1534
73
- judgeval-0.6.0.dist-info/METADATA,sha256=CulXMs0v5YrHjR3ntVX8xWKcZyxwEpo_nOYs_hkaeN8,10403
74
- judgeval-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
75
- judgeval-0.6.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
76
- judgeval-0.6.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
77
- judgeval-0.6.0.dist-info/RECORD,,
78
+ judgeval-0.7.0.dist-info/METADATA,sha256=WvjnS9cY6RvmrLdtpJbNJN3AssRmIWp61dYr2ZUn0Bo,8877
79
+ judgeval-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
80
+ judgeval-0.7.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
81
+ judgeval-0.7.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
82
+ judgeval-0.7.0.dist-info/RECORD,,