judgeval 0.16.9__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (37) hide show
  1. judgeval/__init__.py +32 -2
  2. judgeval/api/__init__.py +108 -0
  3. judgeval/api/api_types.py +76 -15
  4. judgeval/cli.py +16 -1
  5. judgeval/data/judgment_types.py +76 -20
  6. judgeval/dataset/__init__.py +11 -2
  7. judgeval/env.py +2 -11
  8. judgeval/evaluation/__init__.py +4 -0
  9. judgeval/prompt/__init__.py +330 -0
  10. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +1 -13
  11. judgeval/tracer/__init__.py +371 -257
  12. judgeval/tracer/constants.py +1 -1
  13. judgeval/tracer/exporters/store.py +32 -16
  14. judgeval/tracer/keys.py +11 -9
  15. judgeval/tracer/llm/llm_anthropic/messages.py +38 -26
  16. judgeval/tracer/llm/llm_anthropic/messages_stream.py +14 -14
  17. judgeval/tracer/llm/llm_google/generate_content.py +9 -7
  18. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +38 -14
  19. judgeval/tracer/llm/llm_openai/chat_completions.py +90 -26
  20. judgeval/tracer/llm/llm_openai/responses.py +88 -26
  21. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  22. judgeval/tracer/llm/llm_together/chat_completions.py +26 -18
  23. judgeval/tracer/managers.py +4 -0
  24. judgeval/trainer/__init__.py +10 -1
  25. judgeval/trainer/base_trainer.py +122 -0
  26. judgeval/trainer/config.py +1 -1
  27. judgeval/trainer/fireworks_trainer.py +396 -0
  28. judgeval/trainer/trainer.py +52 -387
  29. judgeval/utils/guards.py +9 -5
  30. judgeval/utils/project.py +15 -0
  31. judgeval/utils/serialize.py +2 -2
  32. judgeval/version.py +1 -1
  33. {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/METADATA +2 -3
  34. {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/RECORD +37 -32
  35. {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
  36. {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/entry_points.txt +0 -0
  37. {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,330 @@
1
+ from typing import List, Optional, Dict
2
+ from judgeval.api import JudgmentSyncClient
3
+ from judgeval.exceptions import JudgmentAPIError
4
+ from judgeval.api.api_types import (
5
+ PromptCommitInfo,
6
+ PromptTagResponse,
7
+ PromptUntagResponse,
8
+ PromptVersionsResponse,
9
+ )
10
+ from dataclasses import dataclass, field
11
+ import re
12
+ from string import Template
13
+ from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
14
+ from judgeval.utils.project import _resolve_project_id
15
+
16
+
17
+ def push_prompt(
18
+ project_name: str,
19
+ name: str,
20
+ prompt: str,
21
+ tags: List[str],
22
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
23
+ organization_id: str | None = JUDGMENT_ORG_ID,
24
+ ) -> tuple[str, Optional[str], str]:
25
+ if not judgment_api_key or not organization_id:
26
+ raise ValueError("Judgment API key and organization ID are required")
27
+ client = JudgmentSyncClient(judgment_api_key, organization_id)
28
+ try:
29
+ project_id = _resolve_project_id(
30
+ project_name, judgment_api_key, organization_id
31
+ )
32
+ if not project_id:
33
+ raise JudgmentAPIError(
34
+ status_code=404,
35
+ detail=f"Project '{project_name}' not found",
36
+ response=None, # type: ignore
37
+ )
38
+ r = client.prompts_insert(
39
+ payload={
40
+ "project_id": project_id,
41
+ "name": name,
42
+ "prompt": prompt,
43
+ "tags": tags,
44
+ }
45
+ )
46
+ return r["commit_id"], r.get("parent_commit_id"), r["created_at"]
47
+ except JudgmentAPIError as e:
48
+ raise JudgmentAPIError(
49
+ status_code=e.status_code,
50
+ detail=f"Failed to save prompt: {e.detail}",
51
+ response=e.response,
52
+ )
53
+
54
+
55
+ def fetch_prompt(
56
+ project_name: str,
57
+ name: str,
58
+ commit_id: Optional[str] = None,
59
+ tag: Optional[str] = None,
60
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
61
+ organization_id: str | None = JUDGMENT_ORG_ID,
62
+ ) -> Optional[PromptCommitInfo]:
63
+ if not judgment_api_key or not organization_id:
64
+ raise ValueError("Judgment API key and organization ID are required")
65
+ client = JudgmentSyncClient(judgment_api_key, organization_id)
66
+ try:
67
+ project_id = _resolve_project_id(
68
+ project_name, judgment_api_key, organization_id
69
+ )
70
+ if not project_id:
71
+ raise JudgmentAPIError(
72
+ status_code=404,
73
+ detail=f"Project '{project_name}' not found",
74
+ response=None, # type: ignore
75
+ )
76
+ prompt_config = client.prompts_fetch(
77
+ name=name,
78
+ project_id=project_id,
79
+ commit_id=commit_id,
80
+ tag=tag,
81
+ )
82
+ return prompt_config["commit"]
83
+ except JudgmentAPIError as e:
84
+ raise JudgmentAPIError(
85
+ status_code=e.status_code,
86
+ detail=f"Failed to fetch prompt '{name}': {e.detail}",
87
+ response=e.response,
88
+ )
89
+
90
+
91
+ def tag_prompt(
92
+ project_name: str,
93
+ name: str,
94
+ commit_id: str,
95
+ tags: List[str],
96
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
97
+ organization_id: str | None = JUDGMENT_ORG_ID,
98
+ ) -> PromptTagResponse:
99
+ if not judgment_api_key or not organization_id:
100
+ raise ValueError("Judgment API key and organization ID are required")
101
+ client = JudgmentSyncClient(judgment_api_key, organization_id)
102
+ try:
103
+ project_id = _resolve_project_id(
104
+ project_name, judgment_api_key, organization_id
105
+ )
106
+ if not project_id:
107
+ raise JudgmentAPIError(
108
+ status_code=404,
109
+ detail=f"Project '{project_name}' not found",
110
+ response=None, # type: ignore
111
+ )
112
+ prompt_config = client.prompts_tag(
113
+ payload={
114
+ "project_id": project_id,
115
+ "name": name,
116
+ "commit_id": commit_id,
117
+ "tags": tags,
118
+ }
119
+ )
120
+ return prompt_config
121
+ except JudgmentAPIError as e:
122
+ raise JudgmentAPIError(
123
+ status_code=e.status_code,
124
+ detail=f"Failed to tag prompt '{name}': {e.detail}",
125
+ response=e.response,
126
+ )
127
+
128
+
129
+ def untag_prompt(
130
+ project_name: str,
131
+ name: str,
132
+ tags: List[str],
133
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
134
+ organization_id: str | None = JUDGMENT_ORG_ID,
135
+ ) -> PromptUntagResponse:
136
+ if not judgment_api_key or not organization_id:
137
+ raise ValueError("Judgment API key and organization ID are required")
138
+ client = JudgmentSyncClient(judgment_api_key, organization_id)
139
+ try:
140
+ project_id = _resolve_project_id(
141
+ project_name, judgment_api_key, organization_id
142
+ )
143
+ if not project_id:
144
+ raise JudgmentAPIError(
145
+ status_code=404,
146
+ detail=f"Project '{project_name}' not found",
147
+ response=None, # type: ignore
148
+ )
149
+ prompt_config = client.prompts_untag(
150
+ payload={"project_id": project_id, "name": name, "tags": tags}
151
+ )
152
+ return prompt_config
153
+ except JudgmentAPIError as e:
154
+ raise JudgmentAPIError(
155
+ status_code=e.status_code,
156
+ detail=f"Failed to untag prompt '{name}': {e.detail}",
157
+ response=e.response,
158
+ )
159
+
160
+
161
+ def list_prompt(
162
+ project_name: str,
163
+ name: str,
164
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
165
+ organization_id: str | None = JUDGMENT_ORG_ID,
166
+ ) -> PromptVersionsResponse:
167
+ if not judgment_api_key or not organization_id:
168
+ raise ValueError("Judgment API key and organization ID are required")
169
+ client = JudgmentSyncClient(judgment_api_key, organization_id)
170
+ try:
171
+ project_id = _resolve_project_id(
172
+ project_name, judgment_api_key, organization_id
173
+ )
174
+ if not project_id:
175
+ raise JudgmentAPIError(
176
+ status_code=404,
177
+ detail=f"Project '{project_name}' not found",
178
+ response=None, # type: ignore
179
+ )
180
+ prompt_config = client.prompts_get_prompt_versions(
181
+ project_id=project_id, name=name
182
+ )
183
+ return prompt_config
184
+ except JudgmentAPIError as e:
185
+ raise JudgmentAPIError(
186
+ status_code=e.status_code,
187
+ detail=f"Failed to list prompt '{name}': {e.detail}",
188
+ response=e.response,
189
+ )
190
+
191
+
192
+ @dataclass
193
+ class Prompt:
194
+ name: str
195
+ prompt: str
196
+ created_at: str
197
+ tags: List[str]
198
+ commit_id: str
199
+ parent_commit_id: Optional[str] = None
200
+ metadata: Dict[str, str] = field(default_factory=dict)
201
+ _template: Template = field(init=False, repr=False)
202
+
203
+ def __post_init__(self):
204
+ template_str = re.sub(r"\{\{([^}]+)\}\}", r"$\1", self.prompt)
205
+ self._template = Template(template_str)
206
+
207
+ @classmethod
208
+ def create(
209
+ cls,
210
+ project_name: str,
211
+ name: str,
212
+ prompt: str,
213
+ tags: Optional[List[str]] = None,
214
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
215
+ organization_id: str | None = JUDGMENT_ORG_ID,
216
+ ):
217
+ if tags is None:
218
+ tags = []
219
+ commit_id, parent_commit_id, created_at = push_prompt(
220
+ project_name, name, prompt, tags, judgment_api_key, organization_id
221
+ )
222
+ return cls(
223
+ name=name,
224
+ prompt=prompt,
225
+ created_at=created_at,
226
+ tags=tags,
227
+ commit_id=commit_id,
228
+ parent_commit_id=parent_commit_id,
229
+ )
230
+
231
+ @classmethod
232
+ def get(
233
+ cls,
234
+ project_name: str,
235
+ name: str,
236
+ commit_id: Optional[str] = None,
237
+ tag: Optional[str] = None,
238
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
239
+ organization_id: str | None = JUDGMENT_ORG_ID,
240
+ ):
241
+ if commit_id is not None and tag is not None:
242
+ raise ValueError(
243
+ "You cannot fetch a prompt by both commit_id and tag at the same time"
244
+ )
245
+ prompt_config = fetch_prompt(
246
+ project_name, name, commit_id, tag, judgment_api_key, organization_id
247
+ )
248
+ if prompt_config is None:
249
+ raise JudgmentAPIError(
250
+ status_code=404,
251
+ detail=f"Prompt '{name}' not found in project '{project_name}'",
252
+ response=None, # type: ignore
253
+ )
254
+ return cls(
255
+ name=prompt_config["name"],
256
+ prompt=prompt_config["prompt"],
257
+ created_at=prompt_config["created_at"],
258
+ tags=prompt_config["tags"],
259
+ commit_id=prompt_config["commit_id"],
260
+ parent_commit_id=prompt_config.get("parent_commit_id"),
261
+ metadata={
262
+ "creator_first_name": prompt_config["first_name"],
263
+ "creator_last_name": prompt_config["last_name"],
264
+ "creator_email": prompt_config["user_email"],
265
+ },
266
+ )
267
+
268
+ @classmethod
269
+ def tag(
270
+ cls,
271
+ project_name: str,
272
+ name: str,
273
+ commit_id: str,
274
+ tags: List[str],
275
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
276
+ organization_id: str | None = JUDGMENT_ORG_ID,
277
+ ):
278
+ prompt_config = tag_prompt(
279
+ project_name, name, commit_id, tags, judgment_api_key, organization_id
280
+ )
281
+ return prompt_config["commit_id"]
282
+
283
+ @classmethod
284
+ def untag(
285
+ cls,
286
+ project_name: str,
287
+ name: str,
288
+ tags: List[str],
289
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
290
+ organization_id: str | None = JUDGMENT_ORG_ID,
291
+ ):
292
+ prompt_config = untag_prompt(
293
+ project_name, name, tags, judgment_api_key, organization_id
294
+ )
295
+ return prompt_config["commit_ids"]
296
+
297
+ @classmethod
298
+ def list(
299
+ cls,
300
+ project_name: str,
301
+ name: str,
302
+ judgment_api_key: str | None = JUDGMENT_API_KEY,
303
+ organization_id: str | None = JUDGMENT_ORG_ID,
304
+ ):
305
+ prompt_configs = list_prompt(
306
+ project_name, name, judgment_api_key, organization_id
307
+ )["versions"]
308
+ return [
309
+ cls(
310
+ name=prompt_config["name"],
311
+ prompt=prompt_config["prompt"],
312
+ tags=prompt_config["tags"],
313
+ created_at=prompt_config["created_at"],
314
+ commit_id=prompt_config["commit_id"],
315
+ parent_commit_id=prompt_config.get("parent_commit_id"),
316
+ metadata={
317
+ "creator_first_name": prompt_config["first_name"],
318
+ "creator_last_name": prompt_config["last_name"],
319
+ "creator_email": prompt_config["user_email"],
320
+ },
321
+ )
322
+ for prompt_config in prompt_configs
323
+ ]
324
+
325
+ def compile(self, **kwargs) -> str:
326
+ try:
327
+ return self._template.substitute(**kwargs)
328
+ except KeyError as e:
329
+ missing_var = str(e).strip("'")
330
+ raise ValueError(f"Missing required variable: {missing_var}")
@@ -40,18 +40,12 @@ def push_prompt_scorer(
40
40
  }
41
41
  )
42
42
  except JudgmentAPIError as e:
43
- if e.status_code == 500:
44
- raise JudgmentAPIError(
45
- status_code=e.status_code,
46
- detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
47
- response=e.response,
48
- )
49
43
  raise JudgmentAPIError(
50
44
  status_code=e.status_code,
51
45
  detail=f"Failed to save prompt scorer: {e.detail}",
52
46
  response=e.response,
53
47
  )
54
- return r["name"]
48
+ return r["scorer_response"]["name"]
55
49
 
56
50
 
57
51
  def fetch_prompt_scorer(
@@ -75,12 +69,6 @@ def fetch_prompt_scorer(
75
69
  scorer_config.pop("updated_at")
76
70
  return scorer_config
77
71
  except JudgmentAPIError as e:
78
- if e.status_code == 500:
79
- raise JudgmentAPIError(
80
- status_code=e.status_code,
81
- detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
82
- response=e.response,
83
- )
84
72
  raise JudgmentAPIError(
85
73
  status_code=e.status_code,
86
74
  detail=f"Failed to fetch prompt scorer '{name}': {e.detail}",