judgeval 0.16.9__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

@@ -1,405 +1,70 @@
1
- import asyncio
2
- import json
3
- import time
4
- from typing import Optional, Callable, Any, List, Union, Dict
5
- from fireworks import Dataset # type: ignore[import-not-found]
6
- from .config import TrainerConfig, ModelConfig
1
+ from typing import Optional
2
+ from .config import TrainerConfig
3
+ from .base_trainer import BaseTrainer
4
+ from .fireworks_trainer import FireworksTrainer
7
5
  from .trainable_model import TrainableModel
8
6
  from judgeval.tracer import Tracer
9
- from judgeval.tracer.exporters.store import SpanStore
10
- from judgeval.tracer.exporters import InMemorySpanExporter
11
- from judgeval.tracer.keys import AttributeKeys
12
- from judgeval import JudgmentClient
13
- from judgeval.scorers import ExampleScorer, ExampleAPIScorerConfig
14
- from judgeval.data import Example
15
- from .console import _spinner_progress, _print_progress, _print_progress_update
16
7
  from judgeval.exceptions import JudgmentRuntimeError
17
8
 
18
9
 
19
- class JudgmentTrainer:
10
+ def JudgmentTrainer(
11
+ config: TrainerConfig,
12
+ trainable_model: TrainableModel,
13
+ tracer: Tracer,
14
+ project_name: Optional[str] = None,
15
+ ) -> BaseTrainer:
20
16
  """
21
- A reinforcement learning trainer for Judgment models using Fine-Tuning.
17
+ Factory function for creating reinforcement learning trainers.
22
18
 
23
- This class handles the iterative training process where models are improved
24
- through reinforcement learning fine-tuning based on generated rollouts and rewards.
25
- """
26
-
27
- def __init__(
28
- self,
29
- config: TrainerConfig,
30
- trainable_model: TrainableModel,
31
- tracer: Tracer,
32
- project_name: Optional[str] = None,
33
- ):
34
- """
35
- Initialize the JudgmentTrainer.
36
-
37
- Args:
38
- config: TrainerConfig instance with training parameters. If None, uses default config.
39
- tracer: Optional tracer for observability
40
- trainable_model: Optional trainable model instance
41
- project_name: Project name for organizing training runs and evaluations
42
- """
43
- try:
44
- self.config = config
45
- self.tracer = tracer
46
- self.project_name = project_name or "judgment_training"
47
- self.trainable_model = trainable_model
48
-
49
- self.judgment_client = JudgmentClient()
50
- self.span_store = SpanStore()
51
- self.span_exporter = InMemorySpanExporter(self.span_store)
52
- except Exception as e:
53
- raise JudgmentRuntimeError(
54
- f"Failed to initialize JudgmentTrainer: {str(e)}"
55
- ) from e
56
-
57
- def _extract_message_history_from_spans(self) -> List[Dict[str, str]]:
58
- """
59
- Extract message history from spans in the span store for training purposes.
60
-
61
- This method processes trace spans to reconstruct the conversation flow,
62
- extracting messages in chronological order from LLM, user, and tool spans.
63
-
64
- Returns:
65
- List of message dictionaries with 'role' and 'content' keys
66
- """
67
- spans = self.span_store.get_all()
68
- if not spans:
69
- return []
70
-
71
- messages = []
72
- first_found = False
73
-
74
- for span in sorted(spans, key=lambda s: getattr(s, "start_time", 0)):
75
- span_attributes = span.attributes or {}
76
- span_type = span_attributes.get(AttributeKeys.JUDGMENT_SPAN_KIND, "span")
77
-
78
- if (
79
- not span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
80
- and span_type != "llm"
81
- ):
82
- continue
83
-
84
- if span_type == "llm":
85
- if not first_found and span_attributes.get(
86
- AttributeKeys.JUDGMENT_INPUT
87
- ):
88
- input_data: Any = span_attributes.get(
89
- AttributeKeys.JUDGMENT_INPUT, {}
90
- )
91
- if isinstance(input_data, dict) and "messages" in input_data:
92
- input_messages = input_data["messages"]
93
- if input_messages:
94
- first_found = True
95
- for msg in input_messages:
96
- if (
97
- isinstance(msg, dict)
98
- and "role" in msg
99
- and "content" in msg
100
- ):
101
- messages.append(
102
- {"role": msg["role"], "content": msg["content"]}
103
- )
104
-
105
- # Add assistant response from span output
106
- output = span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
107
- if output is not None:
108
- content = str(output)
109
- try:
110
- parsed = json.loads(content)
111
- if isinstance(parsed, dict) and "messages" in parsed:
112
- # Extract the actual assistant message content
113
- for msg in parsed["messages"]:
114
- if (
115
- isinstance(msg, dict)
116
- and msg.get("role") == "assistant"
117
- ):
118
- content = msg.get("content", content)
119
- break
120
- except (json.JSONDecodeError, KeyError):
121
- pass
122
- messages.append({"role": "assistant", "content": content})
123
-
124
- elif span_type == "user":
125
- output = span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
126
- if output is not None:
127
- content = str(output)
128
- try:
129
- parsed = json.loads(content)
130
- if isinstance(parsed, dict) and "messages" in parsed:
131
- for msg in parsed["messages"]:
132
- if isinstance(msg, dict) and msg.get("role") == "user":
133
- content = msg.get("content", content)
134
- break
135
- except (json.JSONDecodeError, KeyError):
136
- pass
137
- messages.append({"role": "user", "content": content})
19
+ This factory creates and returns provider-specific trainer implementations
20
+ (FireworksTrainer, VerifiersTrainer, etc.) based on the configured RFT provider.
138
21
 
139
- elif span_type == "tool":
140
- output = span_attributes.get(AttributeKeys.JUDGMENT_OUTPUT)
141
- if output is not None:
142
- content = str(output)
143
- try:
144
- parsed = json.loads(content)
145
- if isinstance(parsed, dict) and "messages" in parsed:
146
- for msg in parsed["messages"]:
147
- if isinstance(msg, dict) and msg.get("role") == "user":
148
- content = msg.get("content", content)
149
- break
150
- except (json.JSONDecodeError, KeyError):
151
- pass
152
- messages.append({"role": "user", "content": content})
22
+ The factory pattern allows for easy extension to support multiple training
23
+ providers without changing the client-facing API.
153
24
 
154
- return messages
155
-
156
- async def generate_rollouts_and_rewards(
157
- self,
158
- agent_function: Callable[[Any], Any],
159
- scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
160
- prompts: List[Any],
161
- num_prompts_per_step: Optional[int] = None,
162
- num_generations_per_prompt: Optional[int] = None,
163
- concurrency: Optional[int] = None,
164
- ):
165
- """
166
- Generate rollouts and compute rewards using the current model snapshot.
167
- Each sample contains multiple generations for reinforcement learning optimization.
168
-
169
- Args:
170
- agent_function: Function/agent to call for generating responses
171
- scorers: List of scorer objects to evaluate responses
172
- prompts: List of prompts to use for training
173
- num_prompts_per_step: Number of prompts to use per step (defaults to config value, limited by prompts list length)
174
- num_generations_per_prompt: Generations per prompt (defaults to config value)
175
- concurrency: Concurrency limit (defaults to config value)
176
-
177
- Returns:
178
- List of dataset rows containing samples with messages and evaluations
179
- """
180
- num_prompts_per_step = min(
181
- num_prompts_per_step or self.config.num_prompts_per_step, len(prompts)
182
- )
183
- num_generations_per_prompt = (
184
- num_generations_per_prompt or self.config.num_generations_per_prompt
25
+ Example:
26
+ config = TrainerConfig(
27
+ deployment_id="my-deployment",
28
+ user_id="my-user",
29
+ model_id="my-model",
30
+ rft_provider="fireworks" # or "verifiers" in the future
185
31
  )
186
- concurrency = concurrency or self.config.concurrency
187
-
188
- semaphore = asyncio.Semaphore(concurrency)
189
-
190
- @self.tracer.observe(span_type="function")
191
- async def generate_single_response(prompt_id, generation_id):
192
- async with semaphore:
193
- prompt_input = prompts[prompt_id]
194
- response_data = await agent_function(**prompt_input)
195
- messages = response_data.get("messages", [])
196
-
197
- try:
198
- traced_messages = self._extract_message_history_from_spans()
199
- if traced_messages:
200
- messages = traced_messages
201
- except Exception as e:
202
- print(f"Warning: Failed to get message history from trace: {e}")
203
- pass
204
-
205
- finally:
206
- self.span_store.spans = []
207
-
208
- example = Example(
209
- input=prompt_input,
210
- messages=messages,
211
- actual_output=response_data,
212
- )
213
-
214
- scoring_results = self.judgment_client.run_evaluation(
215
- examples=[example],
216
- scorers=scorers,
217
- project_name=self.project_name,
218
- eval_run_name=f"training_step_{self.trainable_model.current_step}_prompt_{prompt_id}_gen_{generation_id}",
219
- )
220
-
221
- if scoring_results and scoring_results[0].scorers_data:
222
- scores = [
223
- scorer_data.score
224
- for scorer_data in scoring_results[0].scorers_data
225
- if scorer_data.score is not None
226
- ]
227
- reward = sum(scores) / len(scores) if scores else 0.0
228
- else:
229
- reward = 0.0
230
-
231
- return {
232
- "prompt_id": prompt_id,
233
- "generation_id": generation_id,
234
- "messages": messages,
235
- "evals": {"score": reward},
236
- }
237
-
238
- coros = []
239
- for prompt_id in range(num_prompts_per_step):
240
- for generation_id in range(num_generations_per_prompt):
241
- coro = generate_single_response(prompt_id, generation_id)
242
- coros.append(coro)
243
32
 
244
- with _spinner_progress(f"Generating {len(coros)} rollouts..."):
245
- num_completed = 0
246
- results = []
33
+ # User creates and configures the trainable model
34
+ trainable_model = TrainableModel(config)
35
+ tracer = Tracer()
247
36
 
248
- for coro in asyncio.as_completed(coros):
249
- result = await coro
250
- results.append(result)
251
- num_completed += 1
37
+ # JudgmentTrainer automatically creates the appropriate provider-specific trainer
38
+ trainer = JudgmentTrainer(config, trainable_model, tracer)
252
39
 
253
- _print_progress(f"Generated {len(results)} rollouts successfully")
254
-
255
- dataset_rows = []
256
- for prompt_id in range(num_prompts_per_step):
257
- prompt_generations = [r for r in results if r["prompt_id"] == prompt_id]
258
- sample_generations = [
259
- {"messages": gen["messages"], "evals": gen["evals"]}
260
- for gen in prompt_generations
261
- ]
262
- dataset_rows.append({"samples": sample_generations})
263
-
264
- return dataset_rows
265
-
266
- async def run_reinforcement_learning(
267
- self,
268
- agent_function: Callable[[Any], Any],
269
- scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
270
- prompts: List[Any],
271
- ) -> ModelConfig:
272
- """
273
- Run the iterative reinforcement learning fine-tuning loop.
274
-
275
- This method performs multiple steps of reinforcement learning, where each step:
276
- 1. Advances to the appropriate model snapshot
277
- 2. Generates rollouts and computes rewards using scorers
278
- 3. Trains a new model using reinforcement learning
279
- 4. Waits for training completion
40
+ # The returned trainer implements the BaseTrainer interface
41
+ model_config = await trainer.train(agent_function, scorers, prompts)
280
42
 
281
43
  Args:
282
- agent_function: Function/agent to call for generating responses
283
- scorers: List of scorer objects to evaluate responses
284
- prompts: List of prompts to use for training
44
+ config: TrainerConfig instance with training parameters including rft_provider
45
+ trainable_model: Provider-specific trainable model instance (e.g., TrainableModel for Fireworks)
46
+ tracer: Tracer for observability
47
+ project_name: Project name for organizing training runs and evaluations
285
48
 
286
49
  Returns:
287
- ModelConfig: Configuration of the trained model for inference and future training
288
- """
289
-
290
- _print_progress("Starting reinforcement learning training")
291
-
292
- training_params = {
293
- "num_steps": self.config.num_steps,
294
- "num_prompts_per_step": self.config.num_prompts_per_step,
295
- "num_generations_per_prompt": self.config.num_generations_per_prompt,
296
- "epochs": self.config.epochs,
297
- "learning_rate": self.config.learning_rate,
298
- "accelerator_count": self.config.accelerator_count,
299
- "accelerator_type": self.config.accelerator_type,
300
- "temperature": self.config.temperature,
301
- "max_tokens": self.config.max_tokens,
302
- }
303
-
304
- start_step = self.trainable_model.current_step
305
-
306
- for step in range(start_step, self.config.num_steps):
307
- step_num = step + 1
308
- _print_progress(
309
- f"Starting training step {step_num}", step_num, self.config.num_steps
310
- )
311
-
312
- self.trainable_model.advance_to_next_step(step)
313
-
314
- dataset_rows = await self.generate_rollouts_and_rewards(
315
- agent_function, scorers, prompts
316
- )
317
-
318
- with _spinner_progress(
319
- "Preparing training dataset", step_num, self.config.num_steps
320
- ):
321
- dataset = Dataset.from_list(dataset_rows)
322
- dataset.sync()
323
-
324
- _print_progress(
325
- "Starting reinforcement training", step_num, self.config.num_steps
326
- )
327
- job = self.trainable_model.perform_reinforcement_step(dataset, step)
328
-
329
- last_state = None
330
- with _spinner_progress(
331
- "Training job in progress", step_num, self.config.num_steps
332
- ):
333
- while not job.is_completed:
334
- job.raise_if_bad_state()
335
- current_state = job.state
336
-
337
- if current_state != last_state:
338
- if current_state in ["uploading", "validating"]:
339
- _print_progress_update(
340
- f"Training job: {current_state} data"
341
- )
342
- elif current_state == "training":
343
- _print_progress_update(
344
- "Training job: model training in progress"
345
- )
346
- else:
347
- _print_progress_update(f"Training job: {current_state}")
348
- last_state = current_state
349
-
350
- time.sleep(10)
351
- job = job.get()
352
- if job is None:
353
- raise JudgmentRuntimeError(
354
- "Training job was deleted while waiting for completion"
355
- )
50
+ Provider-specific trainer instance (FireworksTrainer, etc.) that implements
51
+ the BaseTrainer interface
356
52
 
357
- _print_progress(
358
- f"Training completed! New model: {job.output_model}",
359
- step_num,
360
- self.config.num_steps,
361
- )
362
-
363
- dataset.delete()
364
-
365
- _print_progress("All training steps completed!")
366
-
367
- with _spinner_progress("Deploying final trained model"):
368
- self.trainable_model.advance_to_next_step(self.config.num_steps)
369
-
370
- return self.trainable_model.get_model_config(training_params)
371
-
372
- async def train(
373
- self,
374
- agent_function: Callable[[Any], Any],
375
- scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
376
- prompts: List[Any],
377
- rft_provider: Optional[str] = None,
378
- ) -> ModelConfig:
379
- """
380
- Start the reinforcement learning fine-tuning process.
381
-
382
- This is the main entry point for running the reinforcement learning training.
383
-
384
- Args:
385
- agent_function: Function/agent to call for generating responses.
386
- scorers: List of scorer objects to evaluate responses
387
- prompts: List of prompts to use for training
388
- rft_provider: RFT provider to use for training. Currently only "fireworks" is supported.
389
- Support for other providers is planned for future releases.
390
-
391
- Returns:
392
- ModelConfig: Configuration of the trained model for future loading
393
- """
394
- try:
395
- if rft_provider is not None:
396
- self.config.rft_provider = rft_provider
397
-
398
- return await self.run_reinforcement_learning(
399
- agent_function, scorers, prompts
400
- )
401
- except JudgmentRuntimeError:
402
- # Re-raise JudgmentAPIError as-is
403
- raise
404
- except Exception as e:
405
- raise JudgmentRuntimeError(f"Training process failed: {str(e)}") from e
53
+ Raises:
54
+ JudgmentRuntimeError: If the specified provider is not supported
55
+ """
56
+ provider = config.rft_provider.lower()
57
+
58
+ if provider == "fireworks":
59
+ return FireworksTrainer(config, trainable_model, tracer, project_name)
60
+ elif provider == "verifiers":
61
+ # Placeholder for future implementation
62
+ raise JudgmentRuntimeError(
63
+ "Verifiers provider is not yet implemented. "
64
+ "Currently supported providers: 'fireworks'"
65
+ )
66
+ else:
67
+ raise JudgmentRuntimeError(
68
+ f"Unsupported RFT provider: '{config.rft_provider}'. "
69
+ f"Currently supported providers: 'fireworks'"
70
+ )
@@ -0,0 +1,15 @@
1
+ from judgeval.utils.decorators.dont_throw import dont_throw
2
+ import functools
3
+ from judgeval.api import JudgmentSyncClient
4
+
5
+
6
+ @dont_throw
7
+ @functools.lru_cache(maxsize=64)
8
+ def _resolve_project_id(project_name: str, api_key: str, organization_id: str) -> str:
9
+ """Resolve project_id from project_name using the API."""
10
+ client = JudgmentSyncClient(
11
+ api_key=api_key,
12
+ organization_id=organization_id,
13
+ )
14
+ response = client.projects_resolve({"project_name": project_name})
15
+ return response["project_id"]
judgeval/version.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.16.9"
1
+ __version__ = "0.18.0"
2
2
 
3
3
 
4
4
  def get_version() -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: judgeval
3
- Version: 0.16.9
3
+ Version: 0.18.0
4
4
  Summary: Judgeval Package
5
5
  Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
6
6
  Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues
@@ -63,8 +63,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
63
63
  await trainer.train(
64
64
  agent_function=your_agent_function, # entry point to your agent
65
65
  scorers=[RewardScorer()], # Custom scorer you define based on task criteria, acts as reward
66
- prompts=training_prompts, # Tasks
67
- rft_provider="fireworks"
66
+ prompts=training_prompts # Tasks
68
67
  )
69
68
  ```
70
69
 
@@ -1,17 +1,17 @@
1
- judgeval/__init__.py,sha256=MyRK0G0khifeEoYMUkkOH722h_TOSdApoNmXtwnEi0w,5179
2
- judgeval/cli.py,sha256=C7QfElL1sys6hjRpI8rNkFgGqmnsLQGk3lU-9OGbXT0,1612
1
+ judgeval/__init__.py,sha256=74WyDtb9SisWwYZ8juQSUJpa6c0KTI6zzkiTX7Wvalc,6601
2
+ judgeval/cli.py,sha256=bkwsDqX0sdfChLxm9aTLAIw0sPYv-fUbjmaFeBgPgk8,1803
3
3
  judgeval/constants.py,sha256=JZZJ1MqzZZDVk-5PRPRbmLnM8mXI-RDL5vxa1JFuscs,3408
4
4
  judgeval/env.py,sha256=37Mn4g0OkpFxXCZGlO_CLqKJnyX-jx_R24tC28XJzig,2112
5
5
  judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
6
6
  judgeval/logger.py,sha256=VP5blbsJ53mvJbNHfBf5p2KrARUrkrErpPkB-__Hh3U,1562
7
- judgeval/version.py,sha256=na4SICn1_ldveglTM2Suf3pZLRnw2qbMJMUmIhGkh0Q,74
7
+ judgeval/version.py,sha256=CybtPmbwRv_x6bsmmn5cZhdYjBHKkklFsk3eOsP-fMs,74
8
8
  judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
9
- judgeval/api/__init__.py,sha256=ho8L4wC9y-STYEpk5zHwc2mZJhC4ezW8jiGgOIERBVY,12058
10
- judgeval/api/api_types.py,sha256=xOHcgK8NTHMuBr1HBHlCvoSYldVOtG8DQsXeo23-YQk,8874
9
+ judgeval/api/__init__.py,sha256=dGZm9KtgLMnmbiyDEJ_D7suuVqmsibR_Cd0YZRJ7qHI,15210
10
+ judgeval/api/api_types.py,sha256=PvwRVxP0_vCXg_ii7jo4SzbB_kbZcL8tiVnX7qotJA8,9878
11
11
  judgeval/data/__init__.py,sha256=1tU0EN0ThIfQ1fad5I3dKxAfTcZ5U8cvTLcQ6qLVLU0,407
12
12
  judgeval/data/evaluation_run.py,sha256=O41p99wNAuCAf6lsLNKzkZ6W-kL9LlzCYxVls7IcKkA,4727
13
13
  judgeval/data/example.py,sha256=eGJpF-lyUH734Cg90B7WtU9f8iKoS3VFGeV6R-GVCCc,1039
14
- judgeval/data/judgment_types.py,sha256=zrO41lBma66C_Iiz_VCyXLoI2CJjV7fIXAGDlf2fQ-g,16577
14
+ judgeval/data/judgment_types.py,sha256=_LUqYW-fXQcEfa1RQzqTNETnqdNQQ3eH21qBcfJnObU,18542
15
15
  judgeval/data/result.py,sha256=XufFGSAkBDfevPUmzSgsR9HEqytISkM0U5HkhJmsjpY,2102
16
16
  judgeval/data/scorer_data.py,sha256=HeP15ZgftFTJCF8JmDJCLWXRnZJIaGDJCzl7Hg6gWwE,2006
17
17
  judgeval/data/trace.py,sha256=zSiR3o6xt8Z46XA3M9fJBtViF0BsPO6yKp9jxdscOSc,3881
@@ -26,6 +26,7 @@ judgeval/judges/base_judge.py,sha256=_dz0qWsKRxzXxpRY9l6mrxTRYPSF2FE4ZXkrzhZ4gbY
26
26
  judgeval/judges/litellm_judge.py,sha256=5vEF0IUo7HVWnOF2ww-DMke8Xkarnz32B_qbgKjc0-I,4182
27
27
  judgeval/judges/together_judge.py,sha256=GzwlXZJzle8hT-vWKmq39JyIeanJqJfHDOkrksUbzk0,4398
28
28
  judgeval/judges/utils.py,sha256=ITbYwvjU3o9-FIAReFvxh24yJrx9LV3l9BnSBgKUpxg,2068
29
+ judgeval/prompts/prompt.py,sha256=N6G7ncVsmeXgTXzYNDrMw2NESzBJjSKvp4h-BACpX_8,10220
29
30
  judgeval/scorers/__init__.py,sha256=pomKzEy4YNFyygYp8vbS3co8iB5CMstRkQwdUgi1u4g,744
30
31
  judgeval/scorers/agent_scorer.py,sha256=-qcNSkY6i7ur2LXkM7H1jTKuuFbDuXbjTq42o3vjeQ8,595
31
32
  judgeval/scorers/api_scorer.py,sha256=jPBQUBs_T3Xq33QoIbIXDzUaXinz56qeDfo96dfdX0g,2036
@@ -40,15 +41,15 @@ judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py,sha256=WUeFy
40
41
  judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py,sha256=ciiFBQQC4UDsk9qou9OiKbAR31s82eRUY1ZTt1gdM-0,407
41
42
  judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ucYOI6ztAjfoYmcgTDzN8u5RrehlVqrkeLEfss9b1fk,441
42
43
  judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=V3RdrWhnR_vLBrtWw7QbgN9K_A-Och7-v9I2fN4z8gY,506
43
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=SBIjL9oe1YWjVijrC6tkNxFf1M9h31Gq6sAXD2VvnoQ,11360
44
- judgeval/tracer/__init__.py,sha256=7Axz7nHXCeSyiGqwAHo_Gd4oe4cJaAeRvI8BCbgXDEc,34890
44
+ judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=YdGr2bO5miAtF7fDn2t488RFRi0oYmycqkvm69qCrWs,10754
45
+ judgeval/tracer/__init__.py,sha256=U5RRAYuL_vg1SlWuaaufnobdoLS-J8ovM3FedP_vny4,36398
45
46
  judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
46
- judgeval/tracer/keys.py,sha256=8HsLASFPfqNtXwdXpihIbmFONpUdUGpYvHQK9jqrHvQ,2220
47
+ judgeval/tracer/keys.py,sha256=G2Qgb5ZlFsZvXPMylh-OLhHSnWYQ23g0GdtY9n9XuoE,2280
47
48
  judgeval/tracer/managers.py,sha256=NEkovnK8Qaod1U5msT0_hyHUqo9C2uFro2IzNlC8jCo,5071
48
49
  judgeval/tracer/utils.py,sha256=xWha5iwC733wCf2HKbNqzxOPS1ovO1OymWIUFLz-UpQ,537
49
50
  judgeval/tracer/exporters/__init__.py,sha256=3WDXC28iY5gYMM5s7ejmy7P-DVDQ_iIuzwovZxUKJXg,1295
50
51
  judgeval/tracer/exporters/s3.py,sha256=N9gmw17cnR0VkfAQQkLsNj5BksgNRETThR5qYhWRjP4,4360
51
- judgeval/tracer/exporters/store.py,sha256=KQV3cyqteesByQjR-9VdPXT9OlUZ-6F08ogqj837_c0,1012
52
+ judgeval/tracer/exporters/store.py,sha256=pA_KINcm0amO0WEDYmMFU05SSsMOgJ5ogIRaevSX1sk,1885
52
53
  judgeval/tracer/exporters/utils.py,sha256=JRcoSQuEHxMDJbXfyrUIfA2SHBVkZM82h4bTbYGxkNw,1154
53
54
  judgeval/tracer/llm/__init__.py,sha256=ENxApieKSktYrIviofXWP9GU0WnhBm0Q9mlGe_m_gMY,139
54
55
  judgeval/tracer/llm/config.py,sha256=J8-bTL82bgDqdTJPN-Px3Epvoa9FG7L-X329kitwBTc,2525
@@ -56,33 +57,36 @@ judgeval/tracer/llm/constants.py,sha256=IWa3CMes8wIt_UG7jrGEOztg2sHz54fdOMWIOOr-
56
57
  judgeval/tracer/llm/providers.py,sha256=VAimkmChOOjhC1cUv-0iG8pa5PhOw1HIOyt3zrIrbcM,628
57
58
  judgeval/tracer/llm/llm_anthropic/__init__.py,sha256=HG0gIlTgaRt-Y0u1ERPQ19pUgb4YHkTh7tZQPeyR4oM,80
58
59
  judgeval/tracer/llm/llm_anthropic/config.py,sha256=ICfKODPQvZsRxpK4xWQ-YE79pmWJTmY2wryddxpNdpM,153
59
- judgeval/tracer/llm/llm_anthropic/messages.py,sha256=dwiXr3g-Cv0zow5aA19EDxTUyW1_6LFkKRdoaMYVnOo,15168
60
- judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=20VwyDMLeaECw_Gf_FTXj-TgaqGvRMCKY-ZuzLcQK0I,12148
60
+ judgeval/tracer/llm/llm_anthropic/messages.py,sha256=U11364nrTt6M58K218uj8AxGPrNwzJ4idhEmZQtFuik,15152
61
+ judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=ZhHigQujU-zHhklgwSVoQYtSsL_7yC5Rwpq9vozekMc,12140
61
62
  judgeval/tracer/llm/llm_anthropic/wrapper.py,sha256=JILcyC4NvjXZSqlFoZp-VB-JsCYZkQPMFEYaB4AysrA,1849
62
63
  judgeval/tracer/llm/llm_google/__init__.py,sha256=otBZETsAfVZjtZaN5N36Ln0kw-I9jVB4tFGrV6novHo,74
63
64
  judgeval/tracer/llm/llm_google/config.py,sha256=S3yCAE9oHbXjLVYiz5mGD16yIgXMBBUu5UN4lBjoCNQ,162
64
- judgeval/tracer/llm/llm_google/generate_content.py,sha256=Ilo-TJYg4_G0u_8XgFCC63Lwr6B1JQMC8kdBRQTGy_M,3977
65
+ judgeval/tracer/llm/llm_google/generate_content.py,sha256=meLWeoZ7J2JtSkpt2Lt8qapYi_mxv0204cXWaFZ0FKs,3973
65
66
  judgeval/tracer/llm/llm_google/wrapper.py,sha256=jqaMXGoM9dlPBbCFadMI5EqFrNHzBt0h9VkNn7KPVLk,901
66
67
  judgeval/tracer/llm/llm_openai/__init__.py,sha256=CyzwhY0-zmqWKlEno7JPBcvO7G_hI8dp6-_5_KEzFqg,74
67
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=Uj5L2twn4mp5Br8T3X85h7CS4cUpSYCvP2BnYdm-M-g,6498
68
- judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=4ZvZGzmma-2WS4-aJcEWYyIyMiyjI7BCyPGaynRtqDI,15641
68
+ judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=KwetlVexleDSSyRBEezC7Fk5do1Vub3FwLbRhCPgktc,6490
69
+ judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=NWPE_BQTGfTRfsqhzXtNlQAv1Cr2GymolrTGzIbr9Ok,15625
69
70
  judgeval/tracer/llm/llm_openai/config.py,sha256=NE0ixKhd4WVeAVjY8jNTncuKYH6R4MQDLPmcCsd3zWY,144
70
- judgeval/tracer/llm/llm_openai/responses.py,sha256=fLTzvrpyJ6Be6DvlpN3PzC0JmSnPH_3eUOfIGH2QcC8,15891
71
+ judgeval/tracer/llm/llm_openai/responses.py,sha256=lhs4yS-rJU255vo5gsJkGFRloYurlfnXIkstjMwR3vA,15875
71
72
  judgeval/tracer/llm/llm_openai/wrapper.py,sha256=Z5Ndib228yd1pXEQ4xIu7_CJHxpW_t0ofZAC6FLc5eU,2055
72
73
  judgeval/tracer/llm/llm_together/__init__.py,sha256=MEnsF77IgFD4h73hNCMpo-9a1PHHdm-OxPlOalXOMac,78
73
- judgeval/tracer/llm/llm_together/chat_completions.py,sha256=YxVL1zqG7Tjoss0BH3hm74UEHq-BaYuPHcxGaSJVdhM,14172
74
+ judgeval/tracer/llm/llm_together/chat_completions.py,sha256=KC8sk40l1VDuFStuVGIV1GLLx3vrtPDk5Y2vJsnRe70,14156
74
75
  judgeval/tracer/llm/llm_together/config.py,sha256=jCJY0KQcHJZZJk2vq038GKIDUMusqgvRjQ0B6OV5uEc,150
75
76
  judgeval/tracer/llm/llm_together/wrapper.py,sha256=HFqy_MabQeSq8oj2diZhEuk1SDt_hDfk5MFdPn9MFhg,1733
76
77
  judgeval/tracer/processors/__init__.py,sha256=BdOOPOD1RfMI5YHW76DNPKR07EAev-JxoolZ3KaXNNU,7100
77
- judgeval/trainer/__init__.py,sha256=h_DDVV7HFF7HUPAJFpt2d9wjqgnmEVcHxqZyB1k7pPQ,257
78
- judgeval/trainer/config.py,sha256=sAAVBgeoFDJWYjGIgOvoQoiO0gtqNAOI6MHncwdN_mk,4292
78
+ judgeval/trainer/__init__.py,sha256=nJo913vFdss3E_PR-M1OUjznS0SYgNZ-MP-Y_6Mj5PA,437
79
+ judgeval/trainer/base_trainer.py,sha256=Lxm6OxJpifonLKofNIRG3TU7n_jZWQZ0I_f_jwtb_WU,4018
80
+ judgeval/trainer/config.py,sha256=7ZSwr6p7vq0MRadh9axm6XB-RAotdWqULZ5yDl0xGbQ,4340
79
81
  judgeval/trainer/console.py,sha256=SvokkFEU-K1vLV4Rd1m6YJJ7HyYwTr4Azdzwx_JPZUY,4351
82
+ judgeval/trainer/fireworks_trainer.py,sha256=_B-fWovdhIpxh1RbXU0W5BlFGc9ZzuYtFw7CBtKTRO8,16074
80
83
  judgeval/trainer/trainable_model.py,sha256=T-Sioi_sXtfYlcu3lE0cd60PHs8DrYaZ-Kxb4h1nU04,8993
81
- judgeval/trainer/trainer.py,sha256=FBhHq2YPooKADDCC_IEKex81L6a5quCmAMyl9mn3QLk,16675
84
+ judgeval/trainer/trainer.py,sha256=twLEHNaomelTg6ZYG6veI9OpB3wzhPCtPVQMTnDZWx4,2626
82
85
  judgeval/utils/async_utils.py,sha256=AF1xdu8Ao5GyhFvfaLOaKJHn1RISyXZ4U70UZe9zfBA,1083
83
86
  judgeval/utils/file_utils.py,sha256=vq-n5WZEZjVbZ5S9QTkW8nSH6Pvw-Jx0ttsQ1t0wnPQ,3140
84
87
  judgeval/utils/guards.py,sha256=QBb6m6KElxdvt2bskLZCKh_zGHbBcqV-VfGzT63o3hY,807
85
88
  judgeval/utils/meta.py,sha256=RAqZuvOlymqMwFoS0joBW_r65lcN9bY8BpNYHoytKps,773
89
+ judgeval/utils/project.py,sha256=kGpYmp6QGTD6h-GjQ-ovT7kBmGnyb99MWDJmRGFQHOg,527
86
90
  judgeval/utils/serialize.py,sha256=QXR-8Nj5rqOrI9zLx0oRLdk6DW6Bc7j8eyF4zQ7PLxA,6256
87
91
  judgeval/utils/testing.py,sha256=m5Nexv65tmfSj1XvAPK5Ear7aJ7w5xjDtZN0tLZ_RBk,2939
88
92
  judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
@@ -100,8 +104,8 @@ judgeval/utils/wrappers/mutable_wrap_async.py,sha256=stHISOUCGFUJXY8seXmxUo4ZpMF
100
104
  judgeval/utils/wrappers/mutable_wrap_sync.py,sha256=t5jygAQ1vqhy8s1GfiLeYygYgaLTgfoYASN47U5JiPs,2888
101
105
  judgeval/utils/wrappers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
106
  judgeval/utils/wrappers/utils.py,sha256=j18vaa6JWDw2s3nQy1z5PfV_9Xxio-bVARaHG_0XyL0,1228
103
- judgeval-0.16.9.dist-info/METADATA,sha256=OiLnf6tEWwnFyLkEjqBbqORUSfcTgjJSyK9nFr6dxHo,11513
104
- judgeval-0.16.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
105
- judgeval-0.16.9.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
106
- judgeval-0.16.9.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
107
- judgeval-0.16.9.dist-info/RECORD,,
107
+ judgeval-0.18.0.dist-info/METADATA,sha256=rkPsc8z-trMM27wunxLLI_3CGJNb1UXjuByMomklKIU,11483
108
+ judgeval-0.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
109
+ judgeval-0.18.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
110
+ judgeval-0.18.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
111
+ judgeval-0.18.0.dist-info/RECORD,,