judgeval 0.16.9__py3-none-any.whl → 0.22.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/__init__.py +32 -2
- judgeval/api/__init__.py +108 -0
- judgeval/api/api_types.py +76 -15
- judgeval/cli.py +16 -1
- judgeval/data/judgment_types.py +76 -20
- judgeval/dataset/__init__.py +11 -2
- judgeval/env.py +2 -11
- judgeval/evaluation/__init__.py +4 -0
- judgeval/prompt/__init__.py +330 -0
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +1 -13
- judgeval/tracer/__init__.py +371 -257
- judgeval/tracer/constants.py +1 -1
- judgeval/tracer/exporters/store.py +32 -16
- judgeval/tracer/keys.py +11 -9
- judgeval/tracer/llm/llm_anthropic/messages.py +38 -26
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +14 -14
- judgeval/tracer/llm/llm_google/generate_content.py +9 -7
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +38 -14
- judgeval/tracer/llm/llm_openai/chat_completions.py +90 -26
- judgeval/tracer/llm/llm_openai/responses.py +88 -26
- judgeval/tracer/llm/llm_openai/utils.py +42 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +26 -18
- judgeval/tracer/managers.py +4 -0
- judgeval/trainer/__init__.py +10 -1
- judgeval/trainer/base_trainer.py +122 -0
- judgeval/trainer/config.py +1 -1
- judgeval/trainer/fireworks_trainer.py +396 -0
- judgeval/trainer/trainer.py +52 -387
- judgeval/utils/guards.py +9 -5
- judgeval/utils/project.py +15 -0
- judgeval/utils/serialize.py +2 -2
- judgeval/version.py +1 -1
- {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/METADATA +2 -3
- {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/RECORD +37 -32
- {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
- {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/entry_points.txt +0 -0
- {judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
from typing import List, Optional, Dict
|
|
2
|
+
from judgeval.api import JudgmentSyncClient
|
|
3
|
+
from judgeval.exceptions import JudgmentAPIError
|
|
4
|
+
from judgeval.api.api_types import (
|
|
5
|
+
PromptCommitInfo,
|
|
6
|
+
PromptTagResponse,
|
|
7
|
+
PromptUntagResponse,
|
|
8
|
+
PromptVersionsResponse,
|
|
9
|
+
)
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
import re
|
|
12
|
+
from string import Template
|
|
13
|
+
from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
|
|
14
|
+
from judgeval.utils.project import _resolve_project_id
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def push_prompt(
|
|
18
|
+
project_name: str,
|
|
19
|
+
name: str,
|
|
20
|
+
prompt: str,
|
|
21
|
+
tags: List[str],
|
|
22
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
23
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
24
|
+
) -> tuple[str, Optional[str], str]:
|
|
25
|
+
if not judgment_api_key or not organization_id:
|
|
26
|
+
raise ValueError("Judgment API key and organization ID are required")
|
|
27
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
28
|
+
try:
|
|
29
|
+
project_id = _resolve_project_id(
|
|
30
|
+
project_name, judgment_api_key, organization_id
|
|
31
|
+
)
|
|
32
|
+
if not project_id:
|
|
33
|
+
raise JudgmentAPIError(
|
|
34
|
+
status_code=404,
|
|
35
|
+
detail=f"Project '{project_name}' not found",
|
|
36
|
+
response=None, # type: ignore
|
|
37
|
+
)
|
|
38
|
+
r = client.prompts_insert(
|
|
39
|
+
payload={
|
|
40
|
+
"project_id": project_id,
|
|
41
|
+
"name": name,
|
|
42
|
+
"prompt": prompt,
|
|
43
|
+
"tags": tags,
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
return r["commit_id"], r.get("parent_commit_id"), r["created_at"]
|
|
47
|
+
except JudgmentAPIError as e:
|
|
48
|
+
raise JudgmentAPIError(
|
|
49
|
+
status_code=e.status_code,
|
|
50
|
+
detail=f"Failed to save prompt: {e.detail}",
|
|
51
|
+
response=e.response,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def fetch_prompt(
|
|
56
|
+
project_name: str,
|
|
57
|
+
name: str,
|
|
58
|
+
commit_id: Optional[str] = None,
|
|
59
|
+
tag: Optional[str] = None,
|
|
60
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
61
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
62
|
+
) -> Optional[PromptCommitInfo]:
|
|
63
|
+
if not judgment_api_key or not organization_id:
|
|
64
|
+
raise ValueError("Judgment API key and organization ID are required")
|
|
65
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
66
|
+
try:
|
|
67
|
+
project_id = _resolve_project_id(
|
|
68
|
+
project_name, judgment_api_key, organization_id
|
|
69
|
+
)
|
|
70
|
+
if not project_id:
|
|
71
|
+
raise JudgmentAPIError(
|
|
72
|
+
status_code=404,
|
|
73
|
+
detail=f"Project '{project_name}' not found",
|
|
74
|
+
response=None, # type: ignore
|
|
75
|
+
)
|
|
76
|
+
prompt_config = client.prompts_fetch(
|
|
77
|
+
name=name,
|
|
78
|
+
project_id=project_id,
|
|
79
|
+
commit_id=commit_id,
|
|
80
|
+
tag=tag,
|
|
81
|
+
)
|
|
82
|
+
return prompt_config["commit"]
|
|
83
|
+
except JudgmentAPIError as e:
|
|
84
|
+
raise JudgmentAPIError(
|
|
85
|
+
status_code=e.status_code,
|
|
86
|
+
detail=f"Failed to fetch prompt '{name}': {e.detail}",
|
|
87
|
+
response=e.response,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def tag_prompt(
|
|
92
|
+
project_name: str,
|
|
93
|
+
name: str,
|
|
94
|
+
commit_id: str,
|
|
95
|
+
tags: List[str],
|
|
96
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
97
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
98
|
+
) -> PromptTagResponse:
|
|
99
|
+
if not judgment_api_key or not organization_id:
|
|
100
|
+
raise ValueError("Judgment API key and organization ID are required")
|
|
101
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
102
|
+
try:
|
|
103
|
+
project_id = _resolve_project_id(
|
|
104
|
+
project_name, judgment_api_key, organization_id
|
|
105
|
+
)
|
|
106
|
+
if not project_id:
|
|
107
|
+
raise JudgmentAPIError(
|
|
108
|
+
status_code=404,
|
|
109
|
+
detail=f"Project '{project_name}' not found",
|
|
110
|
+
response=None, # type: ignore
|
|
111
|
+
)
|
|
112
|
+
prompt_config = client.prompts_tag(
|
|
113
|
+
payload={
|
|
114
|
+
"project_id": project_id,
|
|
115
|
+
"name": name,
|
|
116
|
+
"commit_id": commit_id,
|
|
117
|
+
"tags": tags,
|
|
118
|
+
}
|
|
119
|
+
)
|
|
120
|
+
return prompt_config
|
|
121
|
+
except JudgmentAPIError as e:
|
|
122
|
+
raise JudgmentAPIError(
|
|
123
|
+
status_code=e.status_code,
|
|
124
|
+
detail=f"Failed to tag prompt '{name}': {e.detail}",
|
|
125
|
+
response=e.response,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def untag_prompt(
|
|
130
|
+
project_name: str,
|
|
131
|
+
name: str,
|
|
132
|
+
tags: List[str],
|
|
133
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
134
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
135
|
+
) -> PromptUntagResponse:
|
|
136
|
+
if not judgment_api_key or not organization_id:
|
|
137
|
+
raise ValueError("Judgment API key and organization ID are required")
|
|
138
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
139
|
+
try:
|
|
140
|
+
project_id = _resolve_project_id(
|
|
141
|
+
project_name, judgment_api_key, organization_id
|
|
142
|
+
)
|
|
143
|
+
if not project_id:
|
|
144
|
+
raise JudgmentAPIError(
|
|
145
|
+
status_code=404,
|
|
146
|
+
detail=f"Project '{project_name}' not found",
|
|
147
|
+
response=None, # type: ignore
|
|
148
|
+
)
|
|
149
|
+
prompt_config = client.prompts_untag(
|
|
150
|
+
payload={"project_id": project_id, "name": name, "tags": tags}
|
|
151
|
+
)
|
|
152
|
+
return prompt_config
|
|
153
|
+
except JudgmentAPIError as e:
|
|
154
|
+
raise JudgmentAPIError(
|
|
155
|
+
status_code=e.status_code,
|
|
156
|
+
detail=f"Failed to untag prompt '{name}': {e.detail}",
|
|
157
|
+
response=e.response,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def list_prompt(
|
|
162
|
+
project_name: str,
|
|
163
|
+
name: str,
|
|
164
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
165
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
166
|
+
) -> PromptVersionsResponse:
|
|
167
|
+
if not judgment_api_key or not organization_id:
|
|
168
|
+
raise ValueError("Judgment API key and organization ID are required")
|
|
169
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
170
|
+
try:
|
|
171
|
+
project_id = _resolve_project_id(
|
|
172
|
+
project_name, judgment_api_key, organization_id
|
|
173
|
+
)
|
|
174
|
+
if not project_id:
|
|
175
|
+
raise JudgmentAPIError(
|
|
176
|
+
status_code=404,
|
|
177
|
+
detail=f"Project '{project_name}' not found",
|
|
178
|
+
response=None, # type: ignore
|
|
179
|
+
)
|
|
180
|
+
prompt_config = client.prompts_get_prompt_versions(
|
|
181
|
+
project_id=project_id, name=name
|
|
182
|
+
)
|
|
183
|
+
return prompt_config
|
|
184
|
+
except JudgmentAPIError as e:
|
|
185
|
+
raise JudgmentAPIError(
|
|
186
|
+
status_code=e.status_code,
|
|
187
|
+
detail=f"Failed to list prompt '{name}': {e.detail}",
|
|
188
|
+
response=e.response,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@dataclass
|
|
193
|
+
class Prompt:
|
|
194
|
+
name: str
|
|
195
|
+
prompt: str
|
|
196
|
+
created_at: str
|
|
197
|
+
tags: List[str]
|
|
198
|
+
commit_id: str
|
|
199
|
+
parent_commit_id: Optional[str] = None
|
|
200
|
+
metadata: Dict[str, str] = field(default_factory=dict)
|
|
201
|
+
_template: Template = field(init=False, repr=False)
|
|
202
|
+
|
|
203
|
+
def __post_init__(self):
|
|
204
|
+
template_str = re.sub(r"\{\{([^}]+)\}\}", r"$\1", self.prompt)
|
|
205
|
+
self._template = Template(template_str)
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def create(
|
|
209
|
+
cls,
|
|
210
|
+
project_name: str,
|
|
211
|
+
name: str,
|
|
212
|
+
prompt: str,
|
|
213
|
+
tags: Optional[List[str]] = None,
|
|
214
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
215
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
216
|
+
):
|
|
217
|
+
if tags is None:
|
|
218
|
+
tags = []
|
|
219
|
+
commit_id, parent_commit_id, created_at = push_prompt(
|
|
220
|
+
project_name, name, prompt, tags, judgment_api_key, organization_id
|
|
221
|
+
)
|
|
222
|
+
return cls(
|
|
223
|
+
name=name,
|
|
224
|
+
prompt=prompt,
|
|
225
|
+
created_at=created_at,
|
|
226
|
+
tags=tags,
|
|
227
|
+
commit_id=commit_id,
|
|
228
|
+
parent_commit_id=parent_commit_id,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
@classmethod
|
|
232
|
+
def get(
|
|
233
|
+
cls,
|
|
234
|
+
project_name: str,
|
|
235
|
+
name: str,
|
|
236
|
+
commit_id: Optional[str] = None,
|
|
237
|
+
tag: Optional[str] = None,
|
|
238
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
239
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
240
|
+
):
|
|
241
|
+
if commit_id is not None and tag is not None:
|
|
242
|
+
raise ValueError(
|
|
243
|
+
"You cannot fetch a prompt by both commit_id and tag at the same time"
|
|
244
|
+
)
|
|
245
|
+
prompt_config = fetch_prompt(
|
|
246
|
+
project_name, name, commit_id, tag, judgment_api_key, organization_id
|
|
247
|
+
)
|
|
248
|
+
if prompt_config is None:
|
|
249
|
+
raise JudgmentAPIError(
|
|
250
|
+
status_code=404,
|
|
251
|
+
detail=f"Prompt '{name}' not found in project '{project_name}'",
|
|
252
|
+
response=None, # type: ignore
|
|
253
|
+
)
|
|
254
|
+
return cls(
|
|
255
|
+
name=prompt_config["name"],
|
|
256
|
+
prompt=prompt_config["prompt"],
|
|
257
|
+
created_at=prompt_config["created_at"],
|
|
258
|
+
tags=prompt_config["tags"],
|
|
259
|
+
commit_id=prompt_config["commit_id"],
|
|
260
|
+
parent_commit_id=prompt_config.get("parent_commit_id"),
|
|
261
|
+
metadata={
|
|
262
|
+
"creator_first_name": prompt_config["first_name"],
|
|
263
|
+
"creator_last_name": prompt_config["last_name"],
|
|
264
|
+
"creator_email": prompt_config["user_email"],
|
|
265
|
+
},
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
@classmethod
|
|
269
|
+
def tag(
|
|
270
|
+
cls,
|
|
271
|
+
project_name: str,
|
|
272
|
+
name: str,
|
|
273
|
+
commit_id: str,
|
|
274
|
+
tags: List[str],
|
|
275
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
276
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
277
|
+
):
|
|
278
|
+
prompt_config = tag_prompt(
|
|
279
|
+
project_name, name, commit_id, tags, judgment_api_key, organization_id
|
|
280
|
+
)
|
|
281
|
+
return prompt_config["commit_id"]
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
def untag(
|
|
285
|
+
cls,
|
|
286
|
+
project_name: str,
|
|
287
|
+
name: str,
|
|
288
|
+
tags: List[str],
|
|
289
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
290
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
291
|
+
):
|
|
292
|
+
prompt_config = untag_prompt(
|
|
293
|
+
project_name, name, tags, judgment_api_key, organization_id
|
|
294
|
+
)
|
|
295
|
+
return prompt_config["commit_ids"]
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def list(
|
|
299
|
+
cls,
|
|
300
|
+
project_name: str,
|
|
301
|
+
name: str,
|
|
302
|
+
judgment_api_key: str | None = JUDGMENT_API_KEY,
|
|
303
|
+
organization_id: str | None = JUDGMENT_ORG_ID,
|
|
304
|
+
):
|
|
305
|
+
prompt_configs = list_prompt(
|
|
306
|
+
project_name, name, judgment_api_key, organization_id
|
|
307
|
+
)["versions"]
|
|
308
|
+
return [
|
|
309
|
+
cls(
|
|
310
|
+
name=prompt_config["name"],
|
|
311
|
+
prompt=prompt_config["prompt"],
|
|
312
|
+
tags=prompt_config["tags"],
|
|
313
|
+
created_at=prompt_config["created_at"],
|
|
314
|
+
commit_id=prompt_config["commit_id"],
|
|
315
|
+
parent_commit_id=prompt_config.get("parent_commit_id"),
|
|
316
|
+
metadata={
|
|
317
|
+
"creator_first_name": prompt_config["first_name"],
|
|
318
|
+
"creator_last_name": prompt_config["last_name"],
|
|
319
|
+
"creator_email": prompt_config["user_email"],
|
|
320
|
+
},
|
|
321
|
+
)
|
|
322
|
+
for prompt_config in prompt_configs
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
def compile(self, **kwargs) -> str:
|
|
326
|
+
try:
|
|
327
|
+
return self._template.substitute(**kwargs)
|
|
328
|
+
except KeyError as e:
|
|
329
|
+
missing_var = str(e).strip("'")
|
|
330
|
+
raise ValueError(f"Missing required variable: {missing_var}")
|
|
@@ -40,18 +40,12 @@ def push_prompt_scorer(
|
|
|
40
40
|
}
|
|
41
41
|
)
|
|
42
42
|
except JudgmentAPIError as e:
|
|
43
|
-
if e.status_code == 500:
|
|
44
|
-
raise JudgmentAPIError(
|
|
45
|
-
status_code=e.status_code,
|
|
46
|
-
detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
|
|
47
|
-
response=e.response,
|
|
48
|
-
)
|
|
49
43
|
raise JudgmentAPIError(
|
|
50
44
|
status_code=e.status_code,
|
|
51
45
|
detail=f"Failed to save prompt scorer: {e.detail}",
|
|
52
46
|
response=e.response,
|
|
53
47
|
)
|
|
54
|
-
return r["name"]
|
|
48
|
+
return r["scorer_response"]["name"]
|
|
55
49
|
|
|
56
50
|
|
|
57
51
|
def fetch_prompt_scorer(
|
|
@@ -75,12 +69,6 @@ def fetch_prompt_scorer(
|
|
|
75
69
|
scorer_config.pop("updated_at")
|
|
76
70
|
return scorer_config
|
|
77
71
|
except JudgmentAPIError as e:
|
|
78
|
-
if e.status_code == 500:
|
|
79
|
-
raise JudgmentAPIError(
|
|
80
|
-
status_code=e.status_code,
|
|
81
|
-
detail=f"The server is temporarily unavailable. Please try your request again in a few moments. Error details: {e.detail}",
|
|
82
|
-
response=e.response,
|
|
83
|
-
)
|
|
84
72
|
raise JudgmentAPIError(
|
|
85
73
|
status_code=e.status_code,
|
|
86
74
|
detail=f"Failed to fetch prompt scorer '{name}': {e.detail}",
|