judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,525 @@
1
+ from typing import Dict, Any, Mapping, Literal, Optional
2
+ import httpx
3
+ from httpx import Response
4
+ from judgeval.exceptions import JudgmentAPIError
5
+ from judgeval.utils.url import url_for
6
+ from judgeval.utils.serialize import json_encoder
7
+ from judgeval.v1.internal.api.api_types import *
8
+
9
+
10
+ def _headers(api_key: str, organization_id: str) -> Mapping[str, str]:
11
+ return {
12
+ "Content-Type": "application/json",
13
+ "Authorization": f"Bearer {api_key}",
14
+ "X-Organization-Id": organization_id,
15
+ }
16
+
17
+
18
+ def _handle_response(r: Response) -> Any:
19
+ if r.status_code >= 400:
20
+ try:
21
+ detail = r.json().get("detail", "")
22
+ except Exception:
23
+ detail = r.text
24
+ raise JudgmentAPIError(r.status_code, detail, r)
25
+ return r.json()
26
+
27
+
28
+ class JudgmentSyncClient:
29
+ __slots__ = ("base_url", "api_key", "organization_id", "client")
30
+
31
+ def __init__(self, base_url: str, api_key: str, organization_id: str):
32
+ self.base_url = base_url
33
+ self.api_key = api_key
34
+ self.organization_id = organization_id
35
+ self.client = httpx.Client(timeout=30)
36
+
37
+ def _request(
38
+ self,
39
+ method: Literal["POST", "PATCH", "GET", "DELETE"],
40
+ url: str,
41
+ payload: Any,
42
+ params: Optional[Dict[str, Any]] = None,
43
+ ) -> Any:
44
+ if method == "GET":
45
+ r = self.client.request(
46
+ method,
47
+ url,
48
+ params=payload if params is None else params,
49
+ headers=_headers(self.api_key, self.organization_id),
50
+ )
51
+ else:
52
+ r = self.client.request(
53
+ method,
54
+ url,
55
+ json=json_encoder(payload),
56
+ params=params,
57
+ headers=_headers(self.api_key, self.organization_id),
58
+ )
59
+ return _handle_response(r)
60
+
61
+ def add_to_run_eval_queue_examples(self, payload: ExampleEvaluationRun) -> Any:
62
+ return self._request(
63
+ "POST",
64
+ url_for("/add_to_run_eval_queue/examples", self.base_url),
65
+ payload,
66
+ )
67
+
68
+ def add_to_run_eval_queue_traces(self, payload: TraceEvaluationRun) -> Any:
69
+ return self._request(
70
+ "POST",
71
+ url_for("/add_to_run_eval_queue/traces", self.base_url),
72
+ payload,
73
+ )
74
+
75
+ def evaluate_examples(
76
+ self, payload: ExampleEvaluationRun, stream: Optional[str] = None
77
+ ) -> EvaluateResponse:
78
+ query_params = {}
79
+ if stream is not None:
80
+ query_params["stream"] = stream
81
+ return self._request(
82
+ "POST",
83
+ url_for("/evaluate/examples", self.base_url),
84
+ payload,
85
+ params=query_params,
86
+ )
87
+
88
+ def evaluate_traces(
89
+ self, payload: TraceEvaluationRun, stream: Optional[str] = None
90
+ ) -> EvaluateResponse:
91
+ query_params = {}
92
+ if stream is not None:
93
+ query_params["stream"] = stream
94
+ return self._request(
95
+ "POST",
96
+ url_for("/evaluate/traces", self.base_url),
97
+ payload,
98
+ params=query_params,
99
+ )
100
+
101
+ def log_eval_results(self, payload: EvalResults) -> LogEvalResultsResponse:
102
+ return self._request(
103
+ "POST",
104
+ url_for("/log_eval_results/", self.base_url),
105
+ payload,
106
+ )
107
+
108
+ def fetch_experiment_run(
109
+ self, payload: EvalResultsFetch
110
+ ) -> FetchExperimentRunResponse:
111
+ return self._request(
112
+ "POST",
113
+ url_for("/fetch_experiment_run/", self.base_url),
114
+ payload,
115
+ )
116
+
117
+ def datasets_insert_examples_for_judgeval(
118
+ self, payload: DatasetInsertExamples
119
+ ) -> Any:
120
+ return self._request(
121
+ "POST",
122
+ url_for("/datasets/insert_examples_for_judgeval/", self.base_url),
123
+ payload,
124
+ )
125
+
126
+ def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
127
+ return self._request(
128
+ "POST",
129
+ url_for("/datasets/pull_for_judgeval/", self.base_url),
130
+ payload,
131
+ )
132
+
133
+ def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
134
+ return self._request(
135
+ "POST",
136
+ url_for("/datasets/pull_all_for_judgeval/", self.base_url),
137
+ payload,
138
+ )
139
+
140
+ def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
141
+ return self._request(
142
+ "POST",
143
+ url_for("/datasets/create_for_judgeval/", self.base_url),
144
+ payload,
145
+ )
146
+
147
+ def projects_add(self, payload: ProjectAdd) -> ProjectAddResponse:
148
+ return self._request(
149
+ "POST",
150
+ url_for("/projects/add/", self.base_url),
151
+ payload,
152
+ )
153
+
154
+ def projects_delete_from_judgeval(
155
+ self, payload: ProjectDeleteFromJudgevalResponse
156
+ ) -> ProjectDeleteResponse:
157
+ return self._request(
158
+ "DELETE",
159
+ url_for("/projects/delete_from_judgeval/", self.base_url),
160
+ payload,
161
+ )
162
+
163
+ def scorer_exists(self, payload: ScorerExistsRequest) -> ScorerExistsResponse:
164
+ return self._request(
165
+ "POST",
166
+ url_for("/scorer_exists/", self.base_url),
167
+ payload,
168
+ )
169
+
170
+ def save_scorer(self, payload: SavePromptScorerRequest) -> SavePromptScorerResponse:
171
+ return self._request(
172
+ "POST",
173
+ url_for("/save_scorer/", self.base_url),
174
+ payload,
175
+ )
176
+
177
+ def fetch_scorers(
178
+ self, payload: FetchPromptScorersRequest
179
+ ) -> FetchPromptScorersResponse:
180
+ return self._request(
181
+ "POST",
182
+ url_for("/fetch_scorers/", self.base_url),
183
+ payload,
184
+ )
185
+
186
+ def upload_custom_scorer(
187
+ self, payload: CustomScorerUploadPayload
188
+ ) -> CustomScorerTemplateResponse:
189
+ return self._request(
190
+ "POST",
191
+ url_for("/upload_custom_scorer/", self.base_url),
192
+ payload,
193
+ )
194
+
195
+ def prompts_insert(self, payload: PromptInsertRequest) -> PromptInsertResponse:
196
+ return self._request(
197
+ "POST",
198
+ url_for("/prompts/insert/", self.base_url),
199
+ payload,
200
+ )
201
+
202
+ def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
203
+ return self._request(
204
+ "POST",
205
+ url_for("/prompts/tag/", self.base_url),
206
+ payload,
207
+ )
208
+
209
+ def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
210
+ return self._request(
211
+ "POST",
212
+ url_for("/prompts/untag/", self.base_url),
213
+ payload,
214
+ )
215
+
216
+ def prompts_fetch(
217
+ self,
218
+ project_id: str,
219
+ name: str,
220
+ commit_id: Optional[str] = None,
221
+ tag: Optional[str] = None,
222
+ ) -> PromptFetchResponse:
223
+ query_params = {}
224
+ query_params["project_id"] = project_id
225
+ query_params["name"] = name
226
+ if commit_id is not None:
227
+ query_params["commit_id"] = commit_id
228
+ if tag is not None:
229
+ query_params["tag"] = tag
230
+ return self._request(
231
+ "GET",
232
+ url_for("/prompts/fetch/", self.base_url),
233
+ query_params,
234
+ )
235
+
236
+ def prompts_get_prompt_versions(
237
+ self, project_id: str, name: str
238
+ ) -> PromptVersionsResponse:
239
+ query_params = {}
240
+ query_params["project_id"] = project_id
241
+ query_params["name"] = name
242
+ return self._request(
243
+ "GET",
244
+ url_for("/prompts/get_prompt_versions/", self.base_url),
245
+ query_params,
246
+ )
247
+
248
+ def projects_resolve(
249
+ self, payload: ResolveProjectNameRequest
250
+ ) -> ResolveProjectNameResponse:
251
+ return self._request(
252
+ "POST",
253
+ url_for("/projects/resolve/", self.base_url),
254
+ payload,
255
+ )
256
+
257
+ def e2e_fetch_trace(self, payload: TraceIdRequest) -> Any:
258
+ return self._request(
259
+ "POST",
260
+ url_for("/e2e_fetch_trace/", self.base_url),
261
+ payload,
262
+ )
263
+
264
+ def e2e_fetch_span_score(self, payload: SpanScoreRequest) -> Any:
265
+ return self._request(
266
+ "POST",
267
+ url_for("/e2e_fetch_span_score/", self.base_url),
268
+ payload,
269
+ )
270
+
271
+
272
+ class JudgmentAsyncClient:
273
+ __slots__ = ("base_url", "api_key", "organization_id", "client")
274
+
275
+ def __init__(self, base_url: str, api_key: str, organization_id: str):
276
+ self.base_url = base_url
277
+ self.api_key = api_key
278
+ self.organization_id = organization_id
279
+ self.client = httpx.AsyncClient(timeout=30)
280
+
281
+ async def _request(
282
+ self,
283
+ method: Literal["POST", "PATCH", "GET", "DELETE"],
284
+ url: str,
285
+ payload: Any,
286
+ params: Optional[Dict[str, Any]] = None,
287
+ ) -> Any:
288
+ if method == "GET":
289
+ r = self.client.request(
290
+ method,
291
+ url,
292
+ params=payload if params is None else params,
293
+ headers=_headers(self.api_key, self.organization_id),
294
+ )
295
+ else:
296
+ r = self.client.request(
297
+ method,
298
+ url,
299
+ json=json_encoder(payload),
300
+ params=params,
301
+ headers=_headers(self.api_key, self.organization_id),
302
+ )
303
+ return _handle_response(await r)
304
+
305
+ async def add_to_run_eval_queue_examples(
306
+ self, payload: ExampleEvaluationRun
307
+ ) -> Any:
308
+ return await self._request(
309
+ "POST",
310
+ url_for("/add_to_run_eval_queue/examples", self.base_url),
311
+ payload,
312
+ )
313
+
314
+ async def add_to_run_eval_queue_traces(self, payload: TraceEvaluationRun) -> Any:
315
+ return await self._request(
316
+ "POST",
317
+ url_for("/add_to_run_eval_queue/traces", self.base_url),
318
+ payload,
319
+ )
320
+
321
+ async def evaluate_examples(
322
+ self, payload: ExampleEvaluationRun, stream: Optional[str] = None
323
+ ) -> EvaluateResponse:
324
+ query_params = {}
325
+ if stream is not None:
326
+ query_params["stream"] = stream
327
+ return await self._request(
328
+ "POST",
329
+ url_for("/evaluate/examples", self.base_url),
330
+ payload,
331
+ params=query_params,
332
+ )
333
+
334
+ async def evaluate_traces(
335
+ self, payload: TraceEvaluationRun, stream: Optional[str] = None
336
+ ) -> EvaluateResponse:
337
+ query_params = {}
338
+ if stream is not None:
339
+ query_params["stream"] = stream
340
+ return await self._request(
341
+ "POST",
342
+ url_for("/evaluate/traces", self.base_url),
343
+ payload,
344
+ params=query_params,
345
+ )
346
+
347
+ async def log_eval_results(self, payload: EvalResults) -> LogEvalResultsResponse:
348
+ return await self._request(
349
+ "POST",
350
+ url_for("/log_eval_results/", self.base_url),
351
+ payload,
352
+ )
353
+
354
+ async def fetch_experiment_run(
355
+ self, payload: EvalResultsFetch
356
+ ) -> FetchExperimentRunResponse:
357
+ return await self._request(
358
+ "POST",
359
+ url_for("/fetch_experiment_run/", self.base_url),
360
+ payload,
361
+ )
362
+
363
+ async def datasets_insert_examples_for_judgeval(
364
+ self, payload: DatasetInsertExamples
365
+ ) -> Any:
366
+ return await self._request(
367
+ "POST",
368
+ url_for("/datasets/insert_examples_for_judgeval/", self.base_url),
369
+ payload,
370
+ )
371
+
372
+ async def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
373
+ return await self._request(
374
+ "POST",
375
+ url_for("/datasets/pull_for_judgeval/", self.base_url),
376
+ payload,
377
+ )
378
+
379
+ async def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
380
+ return await self._request(
381
+ "POST",
382
+ url_for("/datasets/pull_all_for_judgeval/", self.base_url),
383
+ payload,
384
+ )
385
+
386
+ async def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
387
+ return await self._request(
388
+ "POST",
389
+ url_for("/datasets/create_for_judgeval/", self.base_url),
390
+ payload,
391
+ )
392
+
393
+ async def projects_add(self, payload: ProjectAdd) -> ProjectAddResponse:
394
+ return await self._request(
395
+ "POST",
396
+ url_for("/projects/add/", self.base_url),
397
+ payload,
398
+ )
399
+
400
+ async def projects_delete_from_judgeval(
401
+ self, payload: ProjectDeleteFromJudgevalResponse
402
+ ) -> ProjectDeleteResponse:
403
+ return await self._request(
404
+ "DELETE",
405
+ url_for("/projects/delete_from_judgeval/", self.base_url),
406
+ payload,
407
+ )
408
+
409
+ async def scorer_exists(self, payload: ScorerExistsRequest) -> ScorerExistsResponse:
410
+ return await self._request(
411
+ "POST",
412
+ url_for("/scorer_exists/", self.base_url),
413
+ payload,
414
+ )
415
+
416
+ async def save_scorer(
417
+ self, payload: SavePromptScorerRequest
418
+ ) -> SavePromptScorerResponse:
419
+ return await self._request(
420
+ "POST",
421
+ url_for("/save_scorer/", self.base_url),
422
+ payload,
423
+ )
424
+
425
+ async def fetch_scorers(
426
+ self, payload: FetchPromptScorersRequest
427
+ ) -> FetchPromptScorersResponse:
428
+ return await self._request(
429
+ "POST",
430
+ url_for("/fetch_scorers/", self.base_url),
431
+ payload,
432
+ )
433
+
434
+ async def upload_custom_scorer(
435
+ self, payload: CustomScorerUploadPayload
436
+ ) -> CustomScorerTemplateResponse:
437
+ return await self._request(
438
+ "POST",
439
+ url_for("/upload_custom_scorer/", self.base_url),
440
+ payload,
441
+ )
442
+
443
+ async def prompts_insert(
444
+ self, payload: PromptInsertRequest
445
+ ) -> PromptInsertResponse:
446
+ return await self._request(
447
+ "POST",
448
+ url_for("/prompts/insert/", self.base_url),
449
+ payload,
450
+ )
451
+
452
+ async def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
453
+ return await self._request(
454
+ "POST",
455
+ url_for("/prompts/tag/", self.base_url),
456
+ payload,
457
+ )
458
+
459
+ async def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
460
+ return await self._request(
461
+ "POST",
462
+ url_for("/prompts/untag/", self.base_url),
463
+ payload,
464
+ )
465
+
466
+ async def prompts_fetch(
467
+ self,
468
+ project_id: str,
469
+ name: str,
470
+ commit_id: Optional[str] = None,
471
+ tag: Optional[str] = None,
472
+ ) -> PromptFetchResponse:
473
+ query_params = {}
474
+ query_params["project_id"] = project_id
475
+ query_params["name"] = name
476
+ if commit_id is not None:
477
+ query_params["commit_id"] = commit_id
478
+ if tag is not None:
479
+ query_params["tag"] = tag
480
+ return await self._request(
481
+ "GET",
482
+ url_for("/prompts/fetch/", self.base_url),
483
+ query_params,
484
+ )
485
+
486
+ async def prompts_get_prompt_versions(
487
+ self, project_id: str, name: str
488
+ ) -> PromptVersionsResponse:
489
+ query_params = {}
490
+ query_params["project_id"] = project_id
491
+ query_params["name"] = name
492
+ return await self._request(
493
+ "GET",
494
+ url_for("/prompts/get_prompt_versions/", self.base_url),
495
+ query_params,
496
+ )
497
+
498
+ async def projects_resolve(
499
+ self, payload: ResolveProjectNameRequest
500
+ ) -> ResolveProjectNameResponse:
501
+ return await self._request(
502
+ "POST",
503
+ url_for("/projects/resolve/", self.base_url),
504
+ payload,
505
+ )
506
+
507
+ async def e2e_fetch_trace(self, payload: TraceIdRequest) -> Any:
508
+ return await self._request(
509
+ "POST",
510
+ url_for("/e2e_fetch_trace/", self.base_url),
511
+ payload,
512
+ )
513
+
514
+ async def e2e_fetch_span_score(self, payload: SpanScoreRequest) -> Any:
515
+ return await self._request(
516
+ "POST",
517
+ url_for("/e2e_fetch_span_score/", self.base_url),
518
+ payload,
519
+ )
520
+
521
+
522
+ __all__ = [
523
+ "JudgmentSyncClient",
524
+ "JudgmentAsyncClient",
525
+ ]