judgeval 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of judgeval might be problematic. Click here for more details.
- judgeval/__init__.py +29 -0
- judgeval/api/__init__.py +108 -0
- judgeval/api/api_types.py +56 -1
- judgeval/cli.py +7 -0
- judgeval/data/judgment_types.py +56 -1
- judgeval/prompts/prompt.py +320 -0
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +0 -12
- judgeval/tracer/__init__.py +71 -33
- judgeval/tracer/exporters/store.py +32 -16
- judgeval/tracer/keys.py +1 -0
- judgeval/tracer/llm/llm_anthropic/messages.py +4 -4
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +2 -2
- judgeval/tracer/llm/llm_google/generate_content.py +1 -1
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +2 -2
- judgeval/tracer/llm/llm_openai/chat_completions.py +4 -4
- judgeval/tracer/llm/llm_openai/responses.py +4 -4
- judgeval/tracer/llm/llm_together/chat_completions.py +4 -4
- judgeval/trainer/base_trainer.py +6 -1
- judgeval/trainer/fireworks_trainer.py +21 -6
- judgeval/utils/project.py +15 -0
- judgeval/version.py +1 -1
- {judgeval-0.17.0.dist-info → judgeval-0.19.0.dist-info}/METADATA +1 -1
- {judgeval-0.17.0.dist-info → judgeval-0.19.0.dist-info}/RECORD +26 -24
- {judgeval-0.17.0.dist-info → judgeval-0.19.0.dist-info}/WHEEL +0 -0
- {judgeval-0.17.0.dist-info → judgeval-0.19.0.dist-info}/entry_points.txt +0 -0
- {judgeval-0.17.0.dist-info → judgeval-0.19.0.dist-info}/licenses/LICENSE.md +0 -0
judgeval/__init__.py
CHANGED
|
@@ -6,6 +6,7 @@ from judgeval.data.evaluation_run import ExampleEvaluationRun
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
from typing import List, Optional, Union, Sequence
|
|
9
|
+
import ast
|
|
9
10
|
from judgeval.scorers import ExampleAPIScorerConfig
|
|
10
11
|
from judgeval.scorers.example_scorer import ExampleScorer
|
|
11
12
|
from judgeval.data.example import Example
|
|
@@ -81,6 +82,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
81
82
|
scorer_file_path: str,
|
|
82
83
|
requirements_file_path: Optional[str] = None,
|
|
83
84
|
unique_name: Optional[str] = None,
|
|
85
|
+
overwrite: bool = False,
|
|
84
86
|
) -> bool:
|
|
85
87
|
"""
|
|
86
88
|
Upload custom ExampleScorer from files to backend.
|
|
@@ -89,6 +91,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
89
91
|
scorer_file_path: Path to Python file containing CustomScorer class
|
|
90
92
|
requirements_file_path: Optional path to requirements.txt
|
|
91
93
|
unique_name: Optional unique identifier (auto-detected from scorer.name if not provided)
|
|
94
|
+
overwrite: Whether to overwrite existing scorer if it already exists
|
|
92
95
|
|
|
93
96
|
Returns:
|
|
94
97
|
bool: True if upload successful
|
|
@@ -111,6 +114,31 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
111
114
|
with open(scorer_file_path, "r") as f:
|
|
112
115
|
scorer_code = f.read()
|
|
113
116
|
|
|
117
|
+
try:
|
|
118
|
+
tree = ast.parse(scorer_code, filename=scorer_file_path)
|
|
119
|
+
except SyntaxError as e:
|
|
120
|
+
error_msg = f"Invalid Python syntax in {scorer_file_path}: {e}"
|
|
121
|
+
judgeval_logger.error(error_msg)
|
|
122
|
+
raise ValueError(error_msg)
|
|
123
|
+
|
|
124
|
+
scorer_classes = []
|
|
125
|
+
for node in ast.walk(tree):
|
|
126
|
+
if isinstance(node, ast.ClassDef):
|
|
127
|
+
for base in node.bases:
|
|
128
|
+
if (isinstance(base, ast.Name) and base.id == "ExampleScorer") or (
|
|
129
|
+
isinstance(base, ast.Attribute) and base.attr == "ExampleScorer"
|
|
130
|
+
):
|
|
131
|
+
scorer_classes.append(node.name)
|
|
132
|
+
|
|
133
|
+
if len(scorer_classes) > 1:
|
|
134
|
+
error_msg = f"Multiple ExampleScorer classes found in {scorer_file_path}: {scorer_classes}. Please only upload one scorer class per file."
|
|
135
|
+
judgeval_logger.error(error_msg)
|
|
136
|
+
raise ValueError(error_msg)
|
|
137
|
+
elif len(scorer_classes) == 0:
|
|
138
|
+
error_msg = f"No ExampleScorer class was found in {scorer_file_path}. Please ensure the file contains a valid scorer class that inherits from ExampleScorer."
|
|
139
|
+
judgeval_logger.error(error_msg)
|
|
140
|
+
raise ValueError(error_msg)
|
|
141
|
+
|
|
114
142
|
# Read requirements (optional)
|
|
115
143
|
requirements_text = ""
|
|
116
144
|
if requirements_file_path and os.path.exists(requirements_file_path):
|
|
@@ -127,6 +155,7 @@ class JudgmentClient(metaclass=SingletonMeta):
|
|
|
127
155
|
"scorer_name": unique_name,
|
|
128
156
|
"scorer_code": scorer_code,
|
|
129
157
|
"requirements_text": requirements_text,
|
|
158
|
+
"overwrite": overwrite,
|
|
130
159
|
}
|
|
131
160
|
)
|
|
132
161
|
|
judgeval/api/__init__.py
CHANGED
|
@@ -189,6 +189,59 @@ class JudgmentSyncClient:
|
|
|
189
189
|
payload,
|
|
190
190
|
)
|
|
191
191
|
|
|
192
|
+
def prompts_insert(self, payload: PromptInsertRequest) -> PromptInsertResponse:
|
|
193
|
+
return self._request(
|
|
194
|
+
"POST",
|
|
195
|
+
url_for("/prompts/insert/"),
|
|
196
|
+
payload,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
|
|
200
|
+
return self._request(
|
|
201
|
+
"POST",
|
|
202
|
+
url_for("/prompts/tag/"),
|
|
203
|
+
payload,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
|
|
207
|
+
return self._request(
|
|
208
|
+
"POST",
|
|
209
|
+
url_for("/prompts/untag/"),
|
|
210
|
+
payload,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def prompts_fetch(
|
|
214
|
+
self,
|
|
215
|
+
project_id: str,
|
|
216
|
+
name: str,
|
|
217
|
+
commit_id: Optional[str] = None,
|
|
218
|
+
tag: Optional[str] = None,
|
|
219
|
+
) -> PromptFetchResponse:
|
|
220
|
+
query_params = {}
|
|
221
|
+
query_params["project_id"] = project_id
|
|
222
|
+
query_params["name"] = name
|
|
223
|
+
if commit_id is not None:
|
|
224
|
+
query_params["commit_id"] = commit_id
|
|
225
|
+
if tag is not None:
|
|
226
|
+
query_params["tag"] = tag
|
|
227
|
+
return self._request(
|
|
228
|
+
"GET",
|
|
229
|
+
url_for("/prompts/fetch/"),
|
|
230
|
+
query_params,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def prompts_get_prompt_versions(
|
|
234
|
+
self, project_id: str, name: str
|
|
235
|
+
) -> PromptVersionsResponse:
|
|
236
|
+
query_params = {}
|
|
237
|
+
query_params["project_id"] = project_id
|
|
238
|
+
query_params["name"] = name
|
|
239
|
+
return self._request(
|
|
240
|
+
"GET",
|
|
241
|
+
url_for("/prompts/get_prompt_versions/"),
|
|
242
|
+
query_params,
|
|
243
|
+
)
|
|
244
|
+
|
|
192
245
|
def projects_resolve(
|
|
193
246
|
self, payload: ResolveProjectNameRequest
|
|
194
247
|
) -> ResolveProjectNameResponse:
|
|
@@ -381,6 +434,61 @@ class JudgmentAsyncClient:
|
|
|
381
434
|
payload,
|
|
382
435
|
)
|
|
383
436
|
|
|
437
|
+
async def prompts_insert(
|
|
438
|
+
self, payload: PromptInsertRequest
|
|
439
|
+
) -> PromptInsertResponse:
|
|
440
|
+
return await self._request(
|
|
441
|
+
"POST",
|
|
442
|
+
url_for("/prompts/insert/"),
|
|
443
|
+
payload,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
async def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
|
|
447
|
+
return await self._request(
|
|
448
|
+
"POST",
|
|
449
|
+
url_for("/prompts/tag/"),
|
|
450
|
+
payload,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
async def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
|
|
454
|
+
return await self._request(
|
|
455
|
+
"POST",
|
|
456
|
+
url_for("/prompts/untag/"),
|
|
457
|
+
payload,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
async def prompts_fetch(
|
|
461
|
+
self,
|
|
462
|
+
project_id: str,
|
|
463
|
+
name: str,
|
|
464
|
+
commit_id: Optional[str] = None,
|
|
465
|
+
tag: Optional[str] = None,
|
|
466
|
+
) -> PromptFetchResponse:
|
|
467
|
+
query_params = {}
|
|
468
|
+
query_params["project_id"] = project_id
|
|
469
|
+
query_params["name"] = name
|
|
470
|
+
if commit_id is not None:
|
|
471
|
+
query_params["commit_id"] = commit_id
|
|
472
|
+
if tag is not None:
|
|
473
|
+
query_params["tag"] = tag
|
|
474
|
+
return await self._request(
|
|
475
|
+
"GET",
|
|
476
|
+
url_for("/prompts/fetch/"),
|
|
477
|
+
query_params,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
async def prompts_get_prompt_versions(
|
|
481
|
+
self, project_id: str, name: str
|
|
482
|
+
) -> PromptVersionsResponse:
|
|
483
|
+
query_params = {}
|
|
484
|
+
query_params["project_id"] = project_id
|
|
485
|
+
query_params["name"] = name
|
|
486
|
+
return await self._request(
|
|
487
|
+
"GET",
|
|
488
|
+
url_for("/prompts/get_prompt_versions/"),
|
|
489
|
+
query_params,
|
|
490
|
+
)
|
|
491
|
+
|
|
384
492
|
async def projects_resolve(
|
|
385
493
|
self, payload: ResolveProjectNameRequest
|
|
386
494
|
) -> ResolveProjectNameResponse:
|
judgeval/api/api_types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-21T01:37:42+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
|
@@ -80,6 +80,7 @@ class CustomScorerUploadPayload(TypedDict):
|
|
|
80
80
|
scorer_name: str
|
|
81
81
|
scorer_code: str
|
|
82
82
|
requirements_text: str
|
|
83
|
+
overwrite: NotRequired[bool]
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
class CustomScorerTemplateResponse(TypedDict):
|
|
@@ -88,6 +89,40 @@ class CustomScorerTemplateResponse(TypedDict):
|
|
|
88
89
|
message: str
|
|
89
90
|
|
|
90
91
|
|
|
92
|
+
class PromptInsertRequest(TypedDict):
|
|
93
|
+
project_id: str
|
|
94
|
+
name: str
|
|
95
|
+
prompt: str
|
|
96
|
+
tags: List[str]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class PromptInsertResponse(TypedDict):
|
|
100
|
+
commit_id: str
|
|
101
|
+
parent_commit_id: NotRequired[Optional[str]]
|
|
102
|
+
created_at: str
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class PromptTagRequest(TypedDict):
|
|
106
|
+
project_id: str
|
|
107
|
+
name: str
|
|
108
|
+
commit_id: str
|
|
109
|
+
tags: List[str]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class PromptTagResponse(TypedDict):
|
|
113
|
+
commit_id: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class PromptUntagRequest(TypedDict):
|
|
117
|
+
project_id: str
|
|
118
|
+
name: str
|
|
119
|
+
tags: List[str]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class PromptUntagResponse(TypedDict):
|
|
123
|
+
commit_ids: List[str]
|
|
124
|
+
|
|
125
|
+
|
|
91
126
|
class ResolveProjectNameRequest(TypedDict):
|
|
92
127
|
project_name: str
|
|
93
128
|
|
|
@@ -169,6 +204,18 @@ class PromptScorer(TypedDict):
|
|
|
169
204
|
is_trace: NotRequired[Optional[bool]]
|
|
170
205
|
|
|
171
206
|
|
|
207
|
+
class PromptCommitInfo(TypedDict):
|
|
208
|
+
name: str
|
|
209
|
+
prompt: str
|
|
210
|
+
tags: List[str]
|
|
211
|
+
commit_id: str
|
|
212
|
+
parent_commit_id: NotRequired[Optional[str]]
|
|
213
|
+
created_at: str
|
|
214
|
+
first_name: str
|
|
215
|
+
last_name: str
|
|
216
|
+
user_email: str
|
|
217
|
+
|
|
218
|
+
|
|
172
219
|
class ScorerData(TypedDict):
|
|
173
220
|
id: NotRequired[str]
|
|
174
221
|
name: str
|
|
@@ -265,6 +312,14 @@ class FetchPromptScorersResponse(TypedDict):
|
|
|
265
312
|
scorers: List[PromptScorer]
|
|
266
313
|
|
|
267
314
|
|
|
315
|
+
class PromptFetchResponse(TypedDict):
|
|
316
|
+
commit: NotRequired[Optional[PromptCommitInfo]]
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class PromptVersionsResponse(TypedDict):
|
|
320
|
+
versions: List[PromptCommitInfo]
|
|
321
|
+
|
|
322
|
+
|
|
268
323
|
class ScoringResult(TypedDict):
|
|
269
324
|
success: bool
|
|
270
325
|
scorers_data: List[ScorerData]
|
judgeval/cli.py
CHANGED
|
@@ -26,6 +26,12 @@ def upload_scorer(
|
|
|
26
26
|
unique_name: str = typer.Option(
|
|
27
27
|
None, help="Custom name for the scorer (auto-detected if not provided)"
|
|
28
28
|
),
|
|
29
|
+
overwrite: bool = typer.Option(
|
|
30
|
+
False,
|
|
31
|
+
"--overwrite",
|
|
32
|
+
"-o",
|
|
33
|
+
help="Overwrite existing scorer if it already exists",
|
|
34
|
+
),
|
|
29
35
|
):
|
|
30
36
|
# Validate file paths
|
|
31
37
|
if not Path(scorer_file_path).exists():
|
|
@@ -43,6 +49,7 @@ def upload_scorer(
|
|
|
43
49
|
scorer_file_path=scorer_file_path,
|
|
44
50
|
requirements_file_path=requirements_file_path,
|
|
45
51
|
unique_name=unique_name,
|
|
52
|
+
overwrite=overwrite,
|
|
46
53
|
)
|
|
47
54
|
|
|
48
55
|
if not result:
|
judgeval/data/judgment_types.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# generated by datamodel-codegen:
|
|
2
2
|
# filename: .openapi.json
|
|
3
|
-
# timestamp: 2025-10-
|
|
3
|
+
# timestamp: 2025-10-21T01:37:41+00:00
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Annotated, Any, Dict, List, Optional, Union
|
|
@@ -87,6 +87,7 @@ class CustomScorerUploadPayload(BaseModel):
|
|
|
87
87
|
scorer_name: Annotated[str, Field(title="Scorer Name")]
|
|
88
88
|
scorer_code: Annotated[str, Field(title="Scorer Code")]
|
|
89
89
|
requirements_text: Annotated[str, Field(title="Requirements Text")]
|
|
90
|
+
overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
|
|
90
91
|
|
|
91
92
|
|
|
92
93
|
class CustomScorerTemplateResponse(BaseModel):
|
|
@@ -95,6 +96,40 @@ class CustomScorerTemplateResponse(BaseModel):
|
|
|
95
96
|
message: Annotated[str, Field(title="Message")]
|
|
96
97
|
|
|
97
98
|
|
|
99
|
+
class PromptInsertRequest(BaseModel):
|
|
100
|
+
project_id: Annotated[str, Field(title="Project Id")]
|
|
101
|
+
name: Annotated[str, Field(title="Name")]
|
|
102
|
+
prompt: Annotated[str, Field(title="Prompt")]
|
|
103
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class PromptInsertResponse(BaseModel):
|
|
107
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
108
|
+
parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
|
|
109
|
+
created_at: Annotated[str, Field(title="Created At")]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class PromptTagRequest(BaseModel):
|
|
113
|
+
project_id: Annotated[str, Field(title="Project Id")]
|
|
114
|
+
name: Annotated[str, Field(title="Name")]
|
|
115
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
116
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class PromptTagResponse(BaseModel):
|
|
120
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class PromptUntagRequest(BaseModel):
|
|
124
|
+
project_id: Annotated[str, Field(title="Project Id")]
|
|
125
|
+
name: Annotated[str, Field(title="Name")]
|
|
126
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class PromptUntagResponse(BaseModel):
|
|
130
|
+
commit_ids: Annotated[List[str], Field(title="Commit Ids")]
|
|
131
|
+
|
|
132
|
+
|
|
98
133
|
class ResolveProjectNameRequest(BaseModel):
|
|
99
134
|
project_name: Annotated[str, Field(title="Project Name")]
|
|
100
135
|
|
|
@@ -187,6 +222,18 @@ class PromptScorer(BaseModel):
|
|
|
187
222
|
is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
|
|
188
223
|
|
|
189
224
|
|
|
225
|
+
class PromptCommitInfo(BaseModel):
|
|
226
|
+
name: Annotated[str, Field(title="Name")]
|
|
227
|
+
prompt: Annotated[str, Field(title="Prompt")]
|
|
228
|
+
tags: Annotated[List[str], Field(title="Tags")]
|
|
229
|
+
commit_id: Annotated[str, Field(title="Commit Id")]
|
|
230
|
+
parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
|
|
231
|
+
created_at: Annotated[str, Field(title="Created At")]
|
|
232
|
+
first_name: Annotated[str, Field(title="First Name")]
|
|
233
|
+
last_name: Annotated[str, Field(title="Last Name")]
|
|
234
|
+
user_email: Annotated[str, Field(title="User Email")]
|
|
235
|
+
|
|
236
|
+
|
|
190
237
|
class ScorerData(BaseModel):
|
|
191
238
|
id: Annotated[Optional[str], Field(title="Id")] = None
|
|
192
239
|
name: Annotated[str, Field(title="Name")]
|
|
@@ -299,6 +346,14 @@ class FetchPromptScorersResponse(BaseModel):
|
|
|
299
346
|
scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
|
|
300
347
|
|
|
301
348
|
|
|
349
|
+
class PromptFetchResponse(BaseModel):
|
|
350
|
+
commit: Optional[PromptCommitInfo] = None
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class PromptVersionsResponse(BaseModel):
|
|
354
|
+
versions: Annotated[List[PromptCommitInfo], Field(title="Versions")]
|
|
355
|
+
|
|
356
|
+
|
|
302
357
|
class ScoringResult(BaseModel):
|
|
303
358
|
success: Annotated[bool, Field(title="Success")]
|
|
304
359
|
scorers_data: Annotated[List[ScorerData], Field(title="Scorers Data")]
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
from typing import List, Optional, Dict
|
|
2
|
+
from judgeval.api import JudgmentSyncClient
|
|
3
|
+
from judgeval.exceptions import JudgmentAPIError
|
|
4
|
+
from judgeval.api.api_types import (
|
|
5
|
+
PromptCommitInfo,
|
|
6
|
+
PromptTagResponse,
|
|
7
|
+
PromptUntagResponse,
|
|
8
|
+
PromptVersionsResponse,
|
|
9
|
+
)
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
import re
|
|
12
|
+
from string import Template
|
|
13
|
+
from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
|
|
14
|
+
from judgeval.utils.project import _resolve_project_id
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def push_prompt(
|
|
18
|
+
project_name: str,
|
|
19
|
+
name: str,
|
|
20
|
+
prompt: str,
|
|
21
|
+
tags: List[str],
|
|
22
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
23
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
24
|
+
) -> tuple[str, Optional[str], str]:
|
|
25
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
26
|
+
try:
|
|
27
|
+
project_id = _resolve_project_id(
|
|
28
|
+
project_name, judgment_api_key, organization_id
|
|
29
|
+
)
|
|
30
|
+
if not project_id:
|
|
31
|
+
raise JudgmentAPIError(
|
|
32
|
+
status_code=404,
|
|
33
|
+
detail=f"Project '{project_name}' not found",
|
|
34
|
+
response=None, # type: ignore
|
|
35
|
+
)
|
|
36
|
+
r = client.prompts_insert(
|
|
37
|
+
payload={
|
|
38
|
+
"project_id": project_id,
|
|
39
|
+
"name": name,
|
|
40
|
+
"prompt": prompt,
|
|
41
|
+
"tags": tags,
|
|
42
|
+
}
|
|
43
|
+
)
|
|
44
|
+
return r["commit_id"], r.get("parent_commit_id"), r["created_at"]
|
|
45
|
+
except JudgmentAPIError as e:
|
|
46
|
+
raise JudgmentAPIError(
|
|
47
|
+
status_code=e.status_code,
|
|
48
|
+
detail=f"Failed to save prompt: {e.detail}",
|
|
49
|
+
response=e.response,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def fetch_prompt(
|
|
54
|
+
project_name: str,
|
|
55
|
+
name: str,
|
|
56
|
+
commit_id: Optional[str] = None,
|
|
57
|
+
tag: Optional[str] = None,
|
|
58
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
59
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
60
|
+
) -> Optional[PromptCommitInfo]:
|
|
61
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
62
|
+
try:
|
|
63
|
+
project_id = _resolve_project_id(
|
|
64
|
+
project_name, judgment_api_key, organization_id
|
|
65
|
+
)
|
|
66
|
+
if not project_id:
|
|
67
|
+
raise JudgmentAPIError(
|
|
68
|
+
status_code=404,
|
|
69
|
+
detail=f"Project '{project_name}' not found",
|
|
70
|
+
response=None, # type: ignore
|
|
71
|
+
)
|
|
72
|
+
prompt_config = client.prompts_fetch(
|
|
73
|
+
name=name,
|
|
74
|
+
project_id=project_id,
|
|
75
|
+
commit_id=commit_id,
|
|
76
|
+
tag=tag,
|
|
77
|
+
)
|
|
78
|
+
return prompt_config["commit"]
|
|
79
|
+
except JudgmentAPIError as e:
|
|
80
|
+
raise JudgmentAPIError(
|
|
81
|
+
status_code=e.status_code,
|
|
82
|
+
detail=f"Failed to fetch prompt '{name}': {e.detail}",
|
|
83
|
+
response=e.response,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def tag_prompt(
|
|
88
|
+
project_name: str,
|
|
89
|
+
name: str,
|
|
90
|
+
commit_id: str,
|
|
91
|
+
tags: List[str],
|
|
92
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
93
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
94
|
+
) -> PromptTagResponse:
|
|
95
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
96
|
+
try:
|
|
97
|
+
project_id = _resolve_project_id(
|
|
98
|
+
project_name, judgment_api_key, organization_id
|
|
99
|
+
)
|
|
100
|
+
if not project_id:
|
|
101
|
+
raise JudgmentAPIError(
|
|
102
|
+
status_code=404,
|
|
103
|
+
detail=f"Project '{project_name}' not found",
|
|
104
|
+
response=None, # type: ignore
|
|
105
|
+
)
|
|
106
|
+
prompt_config = client.prompts_tag(
|
|
107
|
+
payload={
|
|
108
|
+
"project_id": project_id,
|
|
109
|
+
"name": name,
|
|
110
|
+
"commit_id": commit_id,
|
|
111
|
+
"tags": tags,
|
|
112
|
+
}
|
|
113
|
+
)
|
|
114
|
+
return prompt_config
|
|
115
|
+
except JudgmentAPIError as e:
|
|
116
|
+
raise JudgmentAPIError(
|
|
117
|
+
status_code=e.status_code,
|
|
118
|
+
detail=f"Failed to tag prompt '{name}': {e.detail}",
|
|
119
|
+
response=e.response,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def untag_prompt(
|
|
124
|
+
project_name: str,
|
|
125
|
+
name: str,
|
|
126
|
+
tags: List[str],
|
|
127
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
128
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
129
|
+
) -> PromptUntagResponse:
|
|
130
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
131
|
+
try:
|
|
132
|
+
project_id = _resolve_project_id(
|
|
133
|
+
project_name, judgment_api_key, organization_id
|
|
134
|
+
)
|
|
135
|
+
if not project_id:
|
|
136
|
+
raise JudgmentAPIError(
|
|
137
|
+
status_code=404,
|
|
138
|
+
detail=f"Project '{project_name}' not found",
|
|
139
|
+
response=None, # type: ignore
|
|
140
|
+
)
|
|
141
|
+
prompt_config = client.prompts_untag(
|
|
142
|
+
payload={"project_id": project_id, "name": name, "tags": tags}
|
|
143
|
+
)
|
|
144
|
+
return prompt_config
|
|
145
|
+
except JudgmentAPIError as e:
|
|
146
|
+
raise JudgmentAPIError(
|
|
147
|
+
status_code=e.status_code,
|
|
148
|
+
detail=f"Failed to untag prompt '{name}': {e.detail}",
|
|
149
|
+
response=e.response,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def list_prompt(
|
|
154
|
+
project_name: str,
|
|
155
|
+
name: str,
|
|
156
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
157
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
158
|
+
) -> PromptVersionsResponse:
|
|
159
|
+
client = JudgmentSyncClient(judgment_api_key, organization_id)
|
|
160
|
+
try:
|
|
161
|
+
project_id = _resolve_project_id(
|
|
162
|
+
project_name, judgment_api_key, organization_id
|
|
163
|
+
)
|
|
164
|
+
if not project_id:
|
|
165
|
+
raise JudgmentAPIError(
|
|
166
|
+
status_code=404,
|
|
167
|
+
detail=f"Project '{project_name}' not found",
|
|
168
|
+
response=None, # type: ignore
|
|
169
|
+
)
|
|
170
|
+
prompt_config = client.prompts_get_prompt_versions(
|
|
171
|
+
project_id=project_id, name=name
|
|
172
|
+
)
|
|
173
|
+
return prompt_config
|
|
174
|
+
except JudgmentAPIError as e:
|
|
175
|
+
raise JudgmentAPIError(
|
|
176
|
+
status_code=e.status_code,
|
|
177
|
+
detail=f"Failed to list prompt '{name}': {e.detail}",
|
|
178
|
+
response=e.response,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@dataclass
|
|
183
|
+
class Prompt:
|
|
184
|
+
name: str
|
|
185
|
+
prompt: str
|
|
186
|
+
created_at: str
|
|
187
|
+
tags: List[str]
|
|
188
|
+
commit_id: str
|
|
189
|
+
parent_commit_id: Optional[str] = None
|
|
190
|
+
metadata: Dict[str, str] = field(default_factory=dict)
|
|
191
|
+
_template: Template = field(init=False, repr=False)
|
|
192
|
+
|
|
193
|
+
def __post_init__(self):
|
|
194
|
+
template_str = re.sub(r"\{\{([^}]+)\}\}", r"$\1", self.prompt)
|
|
195
|
+
self._template = Template(template_str)
|
|
196
|
+
|
|
197
|
+
@classmethod
|
|
198
|
+
def create(
|
|
199
|
+
cls,
|
|
200
|
+
project_name: str,
|
|
201
|
+
name: str,
|
|
202
|
+
prompt: str,
|
|
203
|
+
tags: Optional[List[str]] = None,
|
|
204
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
205
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
206
|
+
):
|
|
207
|
+
if tags is None:
|
|
208
|
+
tags = []
|
|
209
|
+
commit_id, parent_commit_id, created_at = push_prompt(
|
|
210
|
+
project_name, name, prompt, tags, judgment_api_key, organization_id
|
|
211
|
+
)
|
|
212
|
+
return cls(
|
|
213
|
+
name=name,
|
|
214
|
+
prompt=prompt,
|
|
215
|
+
created_at=created_at,
|
|
216
|
+
tags=tags,
|
|
217
|
+
commit_id=commit_id,
|
|
218
|
+
parent_commit_id=parent_commit_id,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
@classmethod
|
|
222
|
+
def get(
|
|
223
|
+
cls,
|
|
224
|
+
project_name: str,
|
|
225
|
+
name: str,
|
|
226
|
+
commit_id: Optional[str] = None,
|
|
227
|
+
tag: Optional[str] = None,
|
|
228
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
229
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
230
|
+
):
|
|
231
|
+
if commit_id is not None and tag is not None:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
"You cannot fetch a prompt by both commit_id and tag at the same time"
|
|
234
|
+
)
|
|
235
|
+
prompt_config = fetch_prompt(
|
|
236
|
+
project_name, name, commit_id, tag, judgment_api_key, organization_id
|
|
237
|
+
)
|
|
238
|
+
if prompt_config is None:
|
|
239
|
+
raise JudgmentAPIError(
|
|
240
|
+
status_code=404,
|
|
241
|
+
detail=f"Prompt '{name}' not found in project '{project_name}'",
|
|
242
|
+
response=None, # type: ignore
|
|
243
|
+
)
|
|
244
|
+
return cls(
|
|
245
|
+
name=prompt_config["name"],
|
|
246
|
+
prompt=prompt_config["prompt"],
|
|
247
|
+
created_at=prompt_config["created_at"],
|
|
248
|
+
tags=prompt_config["tags"],
|
|
249
|
+
commit_id=prompt_config["commit_id"],
|
|
250
|
+
parent_commit_id=prompt_config.get("parent_commit_id"),
|
|
251
|
+
metadata={
|
|
252
|
+
"creator_first_name": prompt_config["first_name"],
|
|
253
|
+
"creator_last_name": prompt_config["last_name"],
|
|
254
|
+
"creator_email": prompt_config["user_email"],
|
|
255
|
+
},
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
@classmethod
|
|
259
|
+
def tag(
|
|
260
|
+
cls,
|
|
261
|
+
project_name: str,
|
|
262
|
+
name: str,
|
|
263
|
+
commit_id: str,
|
|
264
|
+
tags: List[str],
|
|
265
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
266
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
267
|
+
):
|
|
268
|
+
prompt_config = tag_prompt(
|
|
269
|
+
project_name, name, commit_id, tags, judgment_api_key, organization_id
|
|
270
|
+
)
|
|
271
|
+
return prompt_config["commit_id"]
|
|
272
|
+
|
|
273
|
+
@classmethod
|
|
274
|
+
def untag(
|
|
275
|
+
cls,
|
|
276
|
+
project_name: str,
|
|
277
|
+
name: str,
|
|
278
|
+
tags: List[str],
|
|
279
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
280
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
281
|
+
):
|
|
282
|
+
prompt_config = untag_prompt(
|
|
283
|
+
project_name, name, tags, judgment_api_key, organization_id
|
|
284
|
+
)
|
|
285
|
+
return prompt_config["commit_ids"]
|
|
286
|
+
|
|
287
|
+
@classmethod
|
|
288
|
+
def list(
|
|
289
|
+
cls,
|
|
290
|
+
project_name: str,
|
|
291
|
+
name: str,
|
|
292
|
+
judgment_api_key: str = JUDGMENT_API_KEY,
|
|
293
|
+
organization_id: str = JUDGMENT_ORG_ID,
|
|
294
|
+
):
|
|
295
|
+
prompt_configs = list_prompt(
|
|
296
|
+
project_name, name, judgment_api_key, organization_id
|
|
297
|
+
)["versions"]
|
|
298
|
+
return [
|
|
299
|
+
cls(
|
|
300
|
+
name=prompt_config["name"],
|
|
301
|
+
prompt=prompt_config["prompt"],
|
|
302
|
+
tags=prompt_config["tags"],
|
|
303
|
+
created_at=prompt_config["created_at"],
|
|
304
|
+
commit_id=prompt_config["commit_id"],
|
|
305
|
+
parent_commit_id=prompt_config.get("parent_commit_id"),
|
|
306
|
+
metadata={
|
|
307
|
+
"creator_first_name": prompt_config["first_name"],
|
|
308
|
+
"creator_last_name": prompt_config["last_name"],
|
|
309
|
+
"creator_email": prompt_config["user_email"],
|
|
310
|
+
},
|
|
311
|
+
)
|
|
312
|
+
for prompt_config in prompt_configs
|
|
313
|
+
]
|
|
314
|
+
|
|
315
|
+
def compile(self, **kwargs) -> str:
|
|
316
|
+
try:
|
|
317
|
+
return self._template.substitute(**kwargs)
|
|
318
|
+
except KeyError as e:
|
|
319
|
+
missing_var = str(e).strip("'")
|
|
320
|
+
raise ValueError(f"Missing required variable: {missing_var}")
|