judgeval 0.0.11__py3-none-any.whl → 0.22.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of judgeval might be problematic. Click here for more details.

Files changed (171) hide show
  1. judgeval/__init__.py +177 -12
  2. judgeval/api/__init__.py +519 -0
  3. judgeval/api/api_types.py +407 -0
  4. judgeval/cli.py +79 -0
  5. judgeval/constants.py +76 -47
  6. judgeval/data/__init__.py +3 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +15 -56
  9. judgeval/data/judgment_types.py +450 -0
  10. judgeval/data/result.py +29 -73
  11. judgeval/data/scorer_data.py +29 -62
  12. judgeval/data/scripts/fix_default_factory.py +23 -0
  13. judgeval/data/scripts/openapi_transform.py +123 -0
  14. judgeval/data/trace.py +121 -0
  15. judgeval/dataset/__init__.py +264 -0
  16. judgeval/env.py +52 -0
  17. judgeval/evaluation/__init__.py +344 -0
  18. judgeval/exceptions.py +27 -0
  19. judgeval/integrations/langgraph/__init__.py +13 -0
  20. judgeval/integrations/openlit/__init__.py +50 -0
  21. judgeval/judges/__init__.py +2 -3
  22. judgeval/judges/base_judge.py +2 -3
  23. judgeval/judges/litellm_judge.py +100 -20
  24. judgeval/judges/together_judge.py +101 -20
  25. judgeval/judges/utils.py +20 -24
  26. judgeval/logger.py +62 -0
  27. judgeval/prompt/__init__.py +330 -0
  28. judgeval/scorers/__init__.py +18 -25
  29. judgeval/scorers/agent_scorer.py +17 -0
  30. judgeval/scorers/api_scorer.py +45 -41
  31. judgeval/scorers/base_scorer.py +83 -38
  32. judgeval/scorers/example_scorer.py +17 -0
  33. judgeval/scorers/exceptions.py +1 -0
  34. judgeval/scorers/judgeval_scorers/__init__.py +0 -148
  35. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +19 -17
  36. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +13 -19
  37. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +12 -19
  38. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +13 -19
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +15 -0
  40. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +327 -0
  41. judgeval/scorers/score.py +77 -306
  42. judgeval/scorers/utils.py +4 -199
  43. judgeval/tracer/__init__.py +1122 -2
  44. judgeval/tracer/constants.py +1 -0
  45. judgeval/tracer/exporters/__init__.py +40 -0
  46. judgeval/tracer/exporters/s3.py +119 -0
  47. judgeval/tracer/exporters/store.py +59 -0
  48. judgeval/tracer/exporters/utils.py +32 -0
  49. judgeval/tracer/keys.py +63 -0
  50. judgeval/tracer/llm/__init__.py +7 -0
  51. judgeval/tracer/llm/config.py +78 -0
  52. judgeval/tracer/llm/constants.py +9 -0
  53. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  54. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  55. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  56. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  57. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  58. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  59. judgeval/tracer/llm/llm_google/config.py +6 -0
  60. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  61. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  62. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  63. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  64. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  65. judgeval/tracer/llm/llm_openai/config.py +6 -0
  66. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  67. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  68. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  69. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  70. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  71. judgeval/tracer/llm/llm_together/config.py +6 -0
  72. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  73. judgeval/tracer/llm/providers.py +19 -0
  74. judgeval/tracer/managers.py +167 -0
  75. judgeval/tracer/processors/__init__.py +220 -0
  76. judgeval/tracer/utils.py +19 -0
  77. judgeval/trainer/__init__.py +14 -0
  78. judgeval/trainer/base_trainer.py +122 -0
  79. judgeval/trainer/config.py +128 -0
  80. judgeval/trainer/console.py +144 -0
  81. judgeval/trainer/fireworks_trainer.py +396 -0
  82. judgeval/trainer/trainable_model.py +243 -0
  83. judgeval/trainer/trainer.py +70 -0
  84. judgeval/utils/async_utils.py +39 -0
  85. judgeval/utils/decorators/__init__.py +0 -0
  86. judgeval/utils/decorators/dont_throw.py +37 -0
  87. judgeval/utils/decorators/use_once.py +13 -0
  88. judgeval/utils/file_utils.py +97 -0
  89. judgeval/utils/guards.py +36 -0
  90. judgeval/utils/meta.py +27 -0
  91. judgeval/utils/project.py +15 -0
  92. judgeval/utils/serialize.py +253 -0
  93. judgeval/utils/testing.py +70 -0
  94. judgeval/utils/url.py +10 -0
  95. judgeval/utils/version_check.py +28 -0
  96. judgeval/utils/wrappers/README.md +3 -0
  97. judgeval/utils/wrappers/__init__.py +15 -0
  98. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  99. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  100. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  101. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  102. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  103. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  104. judgeval/utils/wrappers/py.typed +0 -0
  105. judgeval/utils/wrappers/utils.py +35 -0
  106. judgeval/version.py +5 -0
  107. judgeval/warnings.py +4 -0
  108. judgeval-0.22.2.dist-info/METADATA +265 -0
  109. judgeval-0.22.2.dist-info/RECORD +112 -0
  110. judgeval-0.22.2.dist-info/entry_points.txt +2 -0
  111. judgeval/clients.py +0 -39
  112. judgeval/common/__init__.py +0 -8
  113. judgeval/common/exceptions.py +0 -28
  114. judgeval/common/logger.py +0 -189
  115. judgeval/common/tracer.py +0 -798
  116. judgeval/common/utils.py +0 -763
  117. judgeval/data/api_example.py +0 -111
  118. judgeval/data/datasets/__init__.py +0 -5
  119. judgeval/data/datasets/dataset.py +0 -286
  120. judgeval/data/datasets/eval_dataset_client.py +0 -193
  121. judgeval/data/datasets/ground_truth.py +0 -54
  122. judgeval/data/datasets/utils.py +0 -74
  123. judgeval/evaluation_run.py +0 -132
  124. judgeval/judges/mixture_of_judges.py +0 -248
  125. judgeval/judgment_client.py +0 -354
  126. judgeval/run_evaluation.py +0 -439
  127. judgeval/scorers/judgeval_scorer.py +0 -140
  128. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -19
  129. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -19
  130. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -22
  131. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -19
  132. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -32
  133. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -20
  134. judgeval/scorers/judgeval_scorers/api_scorers/tool_correctness.py +0 -19
  135. judgeval/scorers/judgeval_scorers/classifiers/__init__.py +0 -3
  136. judgeval/scorers/judgeval_scorers/classifiers/text2sql/__init__.py +0 -3
  137. judgeval/scorers/judgeval_scorers/classifiers/text2sql/text2sql_scorer.py +0 -54
  138. judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -24
  139. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
  140. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -277
  141. judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
  142. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
  143. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
  144. judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
  145. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
  146. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
  147. judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
  148. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
  149. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
  150. judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
  151. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
  152. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
  153. judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
  154. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
  155. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -325
  156. judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
  157. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
  158. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -263
  159. judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
  160. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
  161. judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
  162. judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
  163. judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
  164. judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -550
  165. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/__init__.py +0 -3
  166. judgeval/scorers/judgeval_scorers/local_implementations/tool_correctness/tool_correctness_scorer.py +0 -157
  167. judgeval/scorers/prompt_scorer.py +0 -439
  168. judgeval-0.0.11.dist-info/METADATA +0 -36
  169. judgeval-0.0.11.dist-info/RECORD +0 -84
  170. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
  171. {judgeval-0.0.11.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0
judgeval/__init__.py CHANGED
@@ -1,12 +1,177 @@
1
- # Import key components that should be publicly accessible
2
- from judgeval.clients import client, langfuse, together_client
3
- from judgeval.judgment_client import JudgmentClient
4
-
5
- __all__ = [
6
- # Clients
7
- 'client',
8
- 'langfuse',
9
- 'together_client',
10
-
11
- 'JudgmentClient',
12
- ]
1
+ from __future__ import annotations
2
+
3
+ from judgeval.data.result import ScoringResult
4
+ from judgeval.evaluation import run_eval
5
+ from judgeval.data.evaluation_run import ExampleEvaluationRun
6
+
7
+
8
+ from typing import List, Optional, Union, Sequence
9
+ import ast
10
+ from judgeval.scorers import ExampleAPIScorerConfig
11
+ from judgeval.scorers.example_scorer import ExampleScorer
12
+ from judgeval.data.example import Example
13
+ from judgeval.logger import judgeval_logger
14
+ from judgeval.env import JUDGMENT_API_KEY, JUDGMENT_ORG_ID
15
+ from judgeval.utils.meta import SingletonMeta
16
+ from judgeval.exceptions import JudgmentRuntimeError, JudgmentTestError
17
+ from judgeval.api import JudgmentSyncClient
18
+ from judgeval.utils.file_utils import extract_scorer_name
19
+ from judgeval.utils.guards import expect_api_key, expect_organization_id
20
+ from judgeval.utils.version_check import check_latest_version
21
+ from judgeval.utils.testing import assert_test_results
22
+
23
+ check_latest_version()
24
+
25
+
26
+ class JudgmentClient(metaclass=SingletonMeta):
27
+ __slots__ = ("api_key", "organization_id")
28
+
29
+ def __init__(
30
+ self,
31
+ api_key: Optional[str] = None,
32
+ organization_id: Optional[str] = None,
33
+ ):
34
+ _api_key = api_key or JUDGMENT_API_KEY
35
+ _organization_id = organization_id or JUDGMENT_ORG_ID
36
+
37
+ self.api_key = expect_api_key(_api_key)
38
+ self.organization_id = expect_organization_id(_organization_id)
39
+
40
+ def run_evaluation(
41
+ self,
42
+ examples: List[Example],
43
+ scorers: Sequence[Union[ExampleAPIScorerConfig, ExampleScorer, None]],
44
+ project_name: str = "default_project",
45
+ eval_run_name: str = "default_eval_run",
46
+ model: Optional[str] = None,
47
+ assert_test: bool = False,
48
+ ) -> List[ScoringResult]:
49
+ try:
50
+ for scorer in scorers:
51
+ if scorer is None:
52
+ raise ValueError(
53
+ "Failed to run evaluation: At least one Prompt Scorer was not successfuly retrieved."
54
+ )
55
+ eval = ExampleEvaluationRun(
56
+ project_name=project_name,
57
+ eval_name=eval_run_name,
58
+ examples=examples,
59
+ scorers=scorers, # type: ignore
60
+ model=model,
61
+ )
62
+
63
+ results = run_eval(eval)
64
+ if assert_test:
65
+ assert_test_results(results)
66
+
67
+ return results
68
+
69
+ except JudgmentTestError as e:
70
+ raise JudgmentTestError(e)
71
+ except ValueError as e:
72
+ raise ValueError(
73
+ f"Please check your EvaluationRun object, one or more fields are invalid: \n{e}"
74
+ )
75
+ except Exception as e:
76
+ raise JudgmentRuntimeError(
77
+ f"An unexpected error occured during evaluation: {e}"
78
+ ) from e
79
+
80
+ def upload_custom_scorer(
81
+ self,
82
+ scorer_file_path: str,
83
+ requirements_file_path: Optional[str] = None,
84
+ unique_name: Optional[str] = None,
85
+ overwrite: bool = False,
86
+ ) -> bool:
87
+ """
88
+ Upload custom ExampleScorer from files to backend.
89
+
90
+ Args:
91
+ scorer_file_path: Path to Python file containing CustomScorer class
92
+ requirements_file_path: Optional path to requirements.txt
93
+ unique_name: Optional unique identifier (auto-detected from scorer.name if not provided)
94
+ overwrite: Whether to overwrite existing scorer if it already exists
95
+
96
+ Returns:
97
+ bool: True if upload successful
98
+
99
+ Raises:
100
+ ValueError: If scorer file is invalid
101
+ FileNotFoundError: If scorer file doesn't exist
102
+ """
103
+ import os
104
+
105
+ if not os.path.exists(scorer_file_path):
106
+ raise FileNotFoundError(f"Scorer file not found: {scorer_file_path}")
107
+
108
+ # Auto-detect scorer name if not provided
109
+ if unique_name is None:
110
+ unique_name = extract_scorer_name(scorer_file_path)
111
+ judgeval_logger.info(f"Auto-detected scorer name: '{unique_name}'")
112
+
113
+ # Read scorer code
114
+ with open(scorer_file_path, "r") as f:
115
+ scorer_code = f.read()
116
+
117
+ try:
118
+ tree = ast.parse(scorer_code, filename=scorer_file_path)
119
+ except SyntaxError as e:
120
+ error_msg = f"Invalid Python syntax in {scorer_file_path}: {e}"
121
+ judgeval_logger.error(error_msg)
122
+ raise ValueError(error_msg)
123
+
124
+ scorer_classes = []
125
+ for node in ast.walk(tree):
126
+ if isinstance(node, ast.ClassDef):
127
+ for base in node.bases:
128
+ if (isinstance(base, ast.Name) and base.id == "ExampleScorer") or (
129
+ isinstance(base, ast.Attribute) and base.attr == "ExampleScorer"
130
+ ):
131
+ scorer_classes.append(node.name)
132
+
133
+ if len(scorer_classes) > 1:
134
+ error_msg = f"Multiple ExampleScorer classes found in {scorer_file_path}: {scorer_classes}. Please only upload one scorer class per file."
135
+ judgeval_logger.error(error_msg)
136
+ raise ValueError(error_msg)
137
+ elif len(scorer_classes) == 0:
138
+ error_msg = f"No ExampleScorer class was found in {scorer_file_path}. Please ensure the file contains a valid scorer class that inherits from ExampleScorer."
139
+ judgeval_logger.error(error_msg)
140
+ raise ValueError(error_msg)
141
+
142
+ # Read requirements (optional)
143
+ requirements_text = ""
144
+ if requirements_file_path and os.path.exists(requirements_file_path):
145
+ with open(requirements_file_path, "r") as f:
146
+ requirements_text = f.read()
147
+
148
+ try:
149
+ if not self.api_key or not self.organization_id:
150
+ raise ValueError("Judgment API key and organization ID are required")
151
+ client = JudgmentSyncClient(
152
+ api_key=self.api_key,
153
+ organization_id=self.organization_id,
154
+ )
155
+ response = client.upload_custom_scorer(
156
+ payload={
157
+ "scorer_name": unique_name,
158
+ "scorer_code": scorer_code,
159
+ "requirements_text": requirements_text,
160
+ "overwrite": overwrite,
161
+ }
162
+ )
163
+
164
+ if response.get("status") == "success":
165
+ judgeval_logger.info(
166
+ f"Successfully uploaded custom scorer: {unique_name}"
167
+ )
168
+ return True
169
+ else:
170
+ judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
171
+ return False
172
+
173
+ except Exception:
174
+ raise
175
+
176
+
177
+ __all__ = ("JudgmentClient",)
@@ -0,0 +1,519 @@
1
+ from typing import Dict, Any, Mapping, Literal, Optional
2
+ import httpx
3
+ from httpx import Response
4
+ from judgeval.exceptions import JudgmentAPIError
5
+ from judgeval.utils.url import url_for
6
+ from judgeval.utils.serialize import json_encoder
7
+ from judgeval.api.api_types import *
8
+
9
+
10
+ def _headers(api_key: str, organization_id: str) -> Mapping[str, str]:
11
+ return {
12
+ "Content-Type": "application/json",
13
+ "Authorization": f"Bearer {api_key}",
14
+ "X-Organization-Id": organization_id,
15
+ }
16
+
17
+
18
+ def _handle_response(r: Response) -> Any:
19
+ if r.status_code >= 400:
20
+ try:
21
+ detail = r.json().get("detail", "")
22
+ except Exception:
23
+ detail = r.text
24
+ raise JudgmentAPIError(r.status_code, detail, r)
25
+ return r.json()
26
+
27
+
28
+ class JudgmentSyncClient:
29
+ __slots__ = ("api_key", "organization_id", "client")
30
+
31
+ def __init__(self, api_key: str, organization_id: str):
32
+ self.api_key = api_key
33
+ self.organization_id = organization_id
34
+ self.client = httpx.Client(timeout=30)
35
+
36
+ def _request(
37
+ self,
38
+ method: Literal["POST", "PATCH", "GET", "DELETE"],
39
+ url: str,
40
+ payload: Any,
41
+ params: Optional[Dict[str, Any]] = None,
42
+ ) -> Any:
43
+ if method == "GET":
44
+ r = self.client.request(
45
+ method,
46
+ url,
47
+ params=payload if params is None else params,
48
+ headers=_headers(self.api_key, self.organization_id),
49
+ )
50
+ else:
51
+ r = self.client.request(
52
+ method,
53
+ url,
54
+ json=json_encoder(payload),
55
+ params=params,
56
+ headers=_headers(self.api_key, self.organization_id),
57
+ )
58
+ return _handle_response(r)
59
+
60
+ def add_to_run_eval_queue_examples(self, payload: ExampleEvaluationRun) -> Any:
61
+ return self._request(
62
+ "POST",
63
+ url_for("/add_to_run_eval_queue/examples"),
64
+ payload,
65
+ )
66
+
67
+ def add_to_run_eval_queue_traces(self, payload: TraceEvaluationRun) -> Any:
68
+ return self._request(
69
+ "POST",
70
+ url_for("/add_to_run_eval_queue/traces"),
71
+ payload,
72
+ )
73
+
74
+ def evaluate_examples(
75
+ self, payload: ExampleEvaluationRun, stream: Optional[str] = None
76
+ ) -> EvaluateResponse:
77
+ query_params = {}
78
+ if stream is not None:
79
+ query_params["stream"] = stream
80
+ return self._request(
81
+ "POST",
82
+ url_for("/evaluate/examples"),
83
+ payload,
84
+ params=query_params,
85
+ )
86
+
87
+ def evaluate_traces(
88
+ self, payload: TraceEvaluationRun, stream: Optional[str] = None
89
+ ) -> EvaluateResponse:
90
+ query_params = {}
91
+ if stream is not None:
92
+ query_params["stream"] = stream
93
+ return self._request(
94
+ "POST",
95
+ url_for("/evaluate/traces"),
96
+ payload,
97
+ params=query_params,
98
+ )
99
+
100
+ def log_eval_results(self, payload: EvalResults) -> Any:
101
+ return self._request(
102
+ "POST",
103
+ url_for("/log_eval_results/"),
104
+ payload,
105
+ )
106
+
107
+ def fetch_experiment_run(self, payload: EvalResultsFetch) -> Any:
108
+ return self._request(
109
+ "POST",
110
+ url_for("/fetch_experiment_run/"),
111
+ payload,
112
+ )
113
+
114
+ def datasets_insert_examples_for_judgeval(
115
+ self, payload: DatasetInsertExamples
116
+ ) -> Any:
117
+ return self._request(
118
+ "POST",
119
+ url_for("/datasets/insert_examples_for_judgeval/"),
120
+ payload,
121
+ )
122
+
123
+ def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
124
+ return self._request(
125
+ "POST",
126
+ url_for("/datasets/pull_for_judgeval/"),
127
+ payload,
128
+ )
129
+
130
+ def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
131
+ return self._request(
132
+ "POST",
133
+ url_for("/datasets/pull_all_for_judgeval/"),
134
+ payload,
135
+ )
136
+
137
+ def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
138
+ return self._request(
139
+ "POST",
140
+ url_for("/datasets/create_for_judgeval/"),
141
+ payload,
142
+ )
143
+
144
+ def projects_add(self, payload: ProjectAdd) -> ProjectAddResponse:
145
+ return self._request(
146
+ "POST",
147
+ url_for("/projects/add/"),
148
+ payload,
149
+ )
150
+
151
+ def projects_delete_from_judgeval(
152
+ self, payload: ProjectDeleteFromJudgevalResponse
153
+ ) -> ProjectDeleteResponse:
154
+ return self._request(
155
+ "DELETE",
156
+ url_for("/projects/delete_from_judgeval/"),
157
+ payload,
158
+ )
159
+
160
+ def scorer_exists(self, payload: ScorerExistsRequest) -> ScorerExistsResponse:
161
+ return self._request(
162
+ "POST",
163
+ url_for("/scorer_exists/"),
164
+ payload,
165
+ )
166
+
167
+ def save_scorer(self, payload: SavePromptScorerRequest) -> SavePromptScorerResponse:
168
+ return self._request(
169
+ "POST",
170
+ url_for("/save_scorer/"),
171
+ payload,
172
+ )
173
+
174
+ def fetch_scorers(
175
+ self, payload: FetchPromptScorersRequest
176
+ ) -> FetchPromptScorersResponse:
177
+ return self._request(
178
+ "POST",
179
+ url_for("/fetch_scorers/"),
180
+ payload,
181
+ )
182
+
183
+ def upload_custom_scorer(
184
+ self, payload: CustomScorerUploadPayload
185
+ ) -> CustomScorerTemplateResponse:
186
+ return self._request(
187
+ "POST",
188
+ url_for("/upload_custom_scorer/"),
189
+ payload,
190
+ )
191
+
192
+ def prompts_insert(self, payload: PromptInsertRequest) -> PromptInsertResponse:
193
+ return self._request(
194
+ "POST",
195
+ url_for("/prompts/insert/"),
196
+ payload,
197
+ )
198
+
199
+ def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
200
+ return self._request(
201
+ "POST",
202
+ url_for("/prompts/tag/"),
203
+ payload,
204
+ )
205
+
206
+ def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
207
+ return self._request(
208
+ "POST",
209
+ url_for("/prompts/untag/"),
210
+ payload,
211
+ )
212
+
213
+ def prompts_fetch(
214
+ self,
215
+ project_id: str,
216
+ name: str,
217
+ commit_id: Optional[str] = None,
218
+ tag: Optional[str] = None,
219
+ ) -> PromptFetchResponse:
220
+ query_params = {}
221
+ query_params["project_id"] = project_id
222
+ query_params["name"] = name
223
+ if commit_id is not None:
224
+ query_params["commit_id"] = commit_id
225
+ if tag is not None:
226
+ query_params["tag"] = tag
227
+ return self._request(
228
+ "GET",
229
+ url_for("/prompts/fetch/"),
230
+ query_params,
231
+ )
232
+
233
+ def prompts_get_prompt_versions(
234
+ self, project_id: str, name: str
235
+ ) -> PromptVersionsResponse:
236
+ query_params = {}
237
+ query_params["project_id"] = project_id
238
+ query_params["name"] = name
239
+ return self._request(
240
+ "GET",
241
+ url_for("/prompts/get_prompt_versions/"),
242
+ query_params,
243
+ )
244
+
245
+ def projects_resolve(
246
+ self, payload: ResolveProjectNameRequest
247
+ ) -> ResolveProjectNameResponse:
248
+ return self._request(
249
+ "POST",
250
+ url_for("/projects/resolve/"),
251
+ payload,
252
+ )
253
+
254
+ def e2e_fetch_trace(self, payload: TraceIdRequest) -> Any:
255
+ return self._request(
256
+ "POST",
257
+ url_for("/e2e_fetch_trace/"),
258
+ payload,
259
+ )
260
+
261
+ def e2e_fetch_span_score(self, payload: SpanScoreRequest) -> Any:
262
+ return self._request(
263
+ "POST",
264
+ url_for("/e2e_fetch_span_score/"),
265
+ payload,
266
+ )
267
+
268
+
269
+ class JudgmentAsyncClient:
270
+ __slots__ = ("api_key", "organization_id", "client")
271
+
272
+ def __init__(self, api_key: str, organization_id: str):
273
+ self.api_key = api_key
274
+ self.organization_id = organization_id
275
+ self.client = httpx.AsyncClient(timeout=30)
276
+
277
+ async def _request(
278
+ self,
279
+ method: Literal["POST", "PATCH", "GET", "DELETE"],
280
+ url: str,
281
+ payload: Any,
282
+ params: Optional[Dict[str, Any]] = None,
283
+ ) -> Any:
284
+ if method == "GET":
285
+ r = self.client.request(
286
+ method,
287
+ url,
288
+ params=payload if params is None else params,
289
+ headers=_headers(self.api_key, self.organization_id),
290
+ )
291
+ else:
292
+ r = self.client.request(
293
+ method,
294
+ url,
295
+ json=json_encoder(payload),
296
+ params=params,
297
+ headers=_headers(self.api_key, self.organization_id),
298
+ )
299
+ return _handle_response(await r)
300
+
301
+ async def add_to_run_eval_queue_examples(
302
+ self, payload: ExampleEvaluationRun
303
+ ) -> Any:
304
+ return await self._request(
305
+ "POST",
306
+ url_for("/add_to_run_eval_queue/examples"),
307
+ payload,
308
+ )
309
+
310
+ async def add_to_run_eval_queue_traces(self, payload: TraceEvaluationRun) -> Any:
311
+ return await self._request(
312
+ "POST",
313
+ url_for("/add_to_run_eval_queue/traces"),
314
+ payload,
315
+ )
316
+
317
+ async def evaluate_examples(
318
+ self, payload: ExampleEvaluationRun, stream: Optional[str] = None
319
+ ) -> EvaluateResponse:
320
+ query_params = {}
321
+ if stream is not None:
322
+ query_params["stream"] = stream
323
+ return await self._request(
324
+ "POST",
325
+ url_for("/evaluate/examples"),
326
+ payload,
327
+ params=query_params,
328
+ )
329
+
330
+ async def evaluate_traces(
331
+ self, payload: TraceEvaluationRun, stream: Optional[str] = None
332
+ ) -> EvaluateResponse:
333
+ query_params = {}
334
+ if stream is not None:
335
+ query_params["stream"] = stream
336
+ return await self._request(
337
+ "POST",
338
+ url_for("/evaluate/traces"),
339
+ payload,
340
+ params=query_params,
341
+ )
342
+
343
+ async def log_eval_results(self, payload: EvalResults) -> Any:
344
+ return await self._request(
345
+ "POST",
346
+ url_for("/log_eval_results/"),
347
+ payload,
348
+ )
349
+
350
+ async def fetch_experiment_run(self, payload: EvalResultsFetch) -> Any:
351
+ return await self._request(
352
+ "POST",
353
+ url_for("/fetch_experiment_run/"),
354
+ payload,
355
+ )
356
+
357
+ async def datasets_insert_examples_for_judgeval(
358
+ self, payload: DatasetInsertExamples
359
+ ) -> Any:
360
+ return await self._request(
361
+ "POST",
362
+ url_for("/datasets/insert_examples_for_judgeval/"),
363
+ payload,
364
+ )
365
+
366
+ async def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
367
+ return await self._request(
368
+ "POST",
369
+ url_for("/datasets/pull_for_judgeval/"),
370
+ payload,
371
+ )
372
+
373
+ async def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
374
+ return await self._request(
375
+ "POST",
376
+ url_for("/datasets/pull_all_for_judgeval/"),
377
+ payload,
378
+ )
379
+
380
+ async def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
381
+ return await self._request(
382
+ "POST",
383
+ url_for("/datasets/create_for_judgeval/"),
384
+ payload,
385
+ )
386
+
387
+ async def projects_add(self, payload: ProjectAdd) -> ProjectAddResponse:
388
+ return await self._request(
389
+ "POST",
390
+ url_for("/projects/add/"),
391
+ payload,
392
+ )
393
+
394
+ async def projects_delete_from_judgeval(
395
+ self, payload: ProjectDeleteFromJudgevalResponse
396
+ ) -> ProjectDeleteResponse:
397
+ return await self._request(
398
+ "DELETE",
399
+ url_for("/projects/delete_from_judgeval/"),
400
+ payload,
401
+ )
402
+
403
+ async def scorer_exists(self, payload: ScorerExistsRequest) -> ScorerExistsResponse:
404
+ return await self._request(
405
+ "POST",
406
+ url_for("/scorer_exists/"),
407
+ payload,
408
+ )
409
+
410
+ async def save_scorer(
411
+ self, payload: SavePromptScorerRequest
412
+ ) -> SavePromptScorerResponse:
413
+ return await self._request(
414
+ "POST",
415
+ url_for("/save_scorer/"),
416
+ payload,
417
+ )
418
+
419
+ async def fetch_scorers(
420
+ self, payload: FetchPromptScorersRequest
421
+ ) -> FetchPromptScorersResponse:
422
+ return await self._request(
423
+ "POST",
424
+ url_for("/fetch_scorers/"),
425
+ payload,
426
+ )
427
+
428
+ async def upload_custom_scorer(
429
+ self, payload: CustomScorerUploadPayload
430
+ ) -> CustomScorerTemplateResponse:
431
+ return await self._request(
432
+ "POST",
433
+ url_for("/upload_custom_scorer/"),
434
+ payload,
435
+ )
436
+
437
+ async def prompts_insert(
438
+ self, payload: PromptInsertRequest
439
+ ) -> PromptInsertResponse:
440
+ return await self._request(
441
+ "POST",
442
+ url_for("/prompts/insert/"),
443
+ payload,
444
+ )
445
+
446
+ async def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
447
+ return await self._request(
448
+ "POST",
449
+ url_for("/prompts/tag/"),
450
+ payload,
451
+ )
452
+
453
+ async def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
454
+ return await self._request(
455
+ "POST",
456
+ url_for("/prompts/untag/"),
457
+ payload,
458
+ )
459
+
460
+ async def prompts_fetch(
461
+ self,
462
+ project_id: str,
463
+ name: str,
464
+ commit_id: Optional[str] = None,
465
+ tag: Optional[str] = None,
466
+ ) -> PromptFetchResponse:
467
+ query_params = {}
468
+ query_params["project_id"] = project_id
469
+ query_params["name"] = name
470
+ if commit_id is not None:
471
+ query_params["commit_id"] = commit_id
472
+ if tag is not None:
473
+ query_params["tag"] = tag
474
+ return await self._request(
475
+ "GET",
476
+ url_for("/prompts/fetch/"),
477
+ query_params,
478
+ )
479
+
480
+ async def prompts_get_prompt_versions(
481
+ self, project_id: str, name: str
482
+ ) -> PromptVersionsResponse:
483
+ query_params = {}
484
+ query_params["project_id"] = project_id
485
+ query_params["name"] = name
486
+ return await self._request(
487
+ "GET",
488
+ url_for("/prompts/get_prompt_versions/"),
489
+ query_params,
490
+ )
491
+
492
+ async def projects_resolve(
493
+ self, payload: ResolveProjectNameRequest
494
+ ) -> ResolveProjectNameResponse:
495
+ return await self._request(
496
+ "POST",
497
+ url_for("/projects/resolve/"),
498
+ payload,
499
+ )
500
+
501
+ async def e2e_fetch_trace(self, payload: TraceIdRequest) -> Any:
502
+ return await self._request(
503
+ "POST",
504
+ url_for("/e2e_fetch_trace/"),
505
+ payload,
506
+ )
507
+
508
+ async def e2e_fetch_span_score(self, payload: SpanScoreRequest) -> Any:
509
+ return await self._request(
510
+ "POST",
511
+ url_for("/e2e_fetch_span_score/"),
512
+ payload,
513
+ )
514
+
515
+
516
+ __all__ = [
517
+ "JudgmentSyncClient",
518
+ "JudgmentAsyncClient",
519
+ ]