judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,206 +1,456 @@
1
1
  # generated by datamodel-codegen:
2
- # filename: openapi_new.json
3
- # timestamp: 2025-07-17T03:14:16+00:00
2
+ # filename: .openapi.json
3
+ # timestamp: 2025-11-18T18:52:09+00:00
4
4
 
5
5
  from __future__ import annotations
6
-
7
6
  from typing import Annotated, Any, Dict, List, Optional, Union
7
+ from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel
8
+ from enum import Enum
8
9
 
9
- from pydantic import BaseModel, Field
10
10
 
11
+ class TraceAndSpanId(RootModel[List]):
12
+ root: Annotated[List, Field(max_length=2, min_length=2)]
11
13
 
12
- class ValidationErrorJudgmentType(BaseModel):
13
- loc: Annotated[List[Union[str, int]], Field(title="Location")]
14
- msg: Annotated[str, Field(title="Message")]
15
- type: Annotated[str, Field(title="Error Type")]
14
+
15
+ class LogEvalResultsResponse(BaseModel):
16
+ ui_results_url: Annotated[str, Field(title="Ui Results Url")]
17
+
18
+
19
+ class EvalResultsFetch(BaseModel):
20
+ experiment_run_id: Annotated[str, Field(title="Experiment Run Id")]
21
+ project_name: Annotated[str, Field(title="Project Name")]
22
+
23
+
24
+ class FetchExperimentRunResponse(BaseModel):
25
+ results: Annotated[Optional[List], Field(title="Results")] = None
26
+ ui_results_url: Annotated[Optional[str], Field(title="Ui Results Url")] = None
27
+
28
+
29
+ class DatasetFetch(BaseModel):
30
+ dataset_name: Annotated[str, Field(title="Dataset Name")]
31
+ project_name: Annotated[str, Field(title="Project Name")]
32
+
33
+
34
+ class DatasetsFetch(BaseModel):
35
+ project_name: Annotated[str, Field(title="Project Name")]
36
+
37
+
38
+ class ProjectAdd(BaseModel):
39
+ project_name: Annotated[str, Field(title="Project Name")]
40
+
41
+
42
+ class ProjectAddResponse(BaseModel):
43
+ project_id: Annotated[str, Field(title="Project Id")]
44
+
45
+
46
+ class ProjectDeleteFromJudgevalResponse(BaseModel):
47
+ project_name: Annotated[str, Field(title="Project Name")]
48
+
49
+
50
+ class ProjectDeleteResponse(BaseModel):
51
+ message: Annotated[str, Field(title="Message")]
52
+
53
+
54
+ class ScorerExistsRequest(BaseModel):
55
+ name: Annotated[str, Field(title="Name")]
56
+
57
+
58
+ class ScorerExistsResponse(BaseModel):
59
+ exists: Annotated[bool, Field(title="Exists")]
16
60
 
17
61
 
18
- class ScorerDataJudgmentType(BaseModel):
62
+ class SavePromptScorerRequest(BaseModel):
19
63
  name: Annotated[str, Field(title="Name")]
64
+ prompt: Annotated[str, Field(title="Prompt")]
20
65
  threshold: Annotated[float, Field(title="Threshold")]
21
- success: Annotated[bool, Field(title="Success")]
66
+ model: Annotated[Optional[str], Field(title="Model")] = "gpt-5-mini"
67
+ is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
68
+ options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
69
+ description: Annotated[Optional[str], Field(title="Description")] = None
70
+
71
+
72
+ class FetchPromptScorersRequest(BaseModel):
73
+ names: Annotated[Optional[List[str]], Field(title="Names")] = None
74
+ is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = None
75
+
76
+
77
+ class CustomScorerUploadPayload(BaseModel):
78
+ scorer_name: Annotated[str, Field(title="Scorer Name")]
79
+ scorer_code: Annotated[str, Field(title="Scorer Code")]
80
+ requirements_text: Annotated[str, Field(title="Requirements Text")]
81
+ overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
82
+
83
+
84
+ class CustomScorerTemplateResponse(BaseModel):
85
+ scorer_name: Annotated[str, Field(title="Scorer Name")]
86
+ status: Annotated[str, Field(title="Status")]
87
+ message: Annotated[str, Field(title="Message")]
88
+
89
+
90
+ class PromptInsertRequest(BaseModel):
91
+ project_id: Annotated[str, Field(title="Project Id")]
92
+ name: Annotated[str, Field(title="Name")]
93
+ prompt: Annotated[str, Field(title="Prompt")]
94
+ tags: Annotated[List[str], Field(title="Tags")]
95
+
96
+
97
+ class PromptInsertResponse(BaseModel):
98
+ commit_id: Annotated[str, Field(title="Commit Id")]
99
+ parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
100
+ created_at: Annotated[str, Field(title="Created At")]
101
+
102
+
103
+ class PromptTagRequest(BaseModel):
104
+ project_id: Annotated[str, Field(title="Project Id")]
105
+ name: Annotated[str, Field(title="Name")]
106
+ commit_id: Annotated[str, Field(title="Commit Id")]
107
+ tags: Annotated[List[str], Field(title="Tags")]
108
+
109
+
110
+ class PromptTagResponse(BaseModel):
111
+ commit_id: Annotated[str, Field(title="Commit Id")]
112
+
113
+
114
+ class PromptUntagRequest(BaseModel):
115
+ project_id: Annotated[str, Field(title="Project Id")]
116
+ name: Annotated[str, Field(title="Name")]
117
+ tags: Annotated[List[str], Field(title="Tags")]
118
+
119
+
120
+ class PromptUntagResponse(BaseModel):
121
+ commit_ids: Annotated[List[str], Field(title="Commit Ids")]
122
+
123
+
124
+ class ResolveProjectNameRequest(BaseModel):
125
+ project_name: Annotated[str, Field(title="Project Name")]
126
+
127
+
128
+ class ResolveProjectNameResponse(BaseModel):
129
+ project_id: Annotated[str, Field(title="Project Id")]
130
+
131
+
132
+ class TraceIdRequest(BaseModel):
133
+ trace_id: Annotated[str, Field(title="Trace Id")]
134
+
135
+
136
+ class SpanScoreRequest(BaseModel):
137
+ span_id: Annotated[str, Field(title="Span Id")]
138
+ trace_id: Annotated[str, Field(title="Trace Id")]
139
+
140
+
141
+ class BaseScorer(BaseModel):
142
+ score_type: Annotated[str, Field(title="Score Type")]
143
+ threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
144
+ name: Annotated[Optional[str], Field(title="Name")] = None
145
+ class_name: Annotated[Optional[str], Field(title="Class Name")] = None
22
146
  score: Annotated[Optional[float], Field(title="Score")] = None
23
- reason: Annotated[Optional[str], Field(title="Reason")] = None
24
- strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = None
25
- evaluation_model: Annotated[
26
- Optional[Union[List[str], str]], Field(title="Evaluation Model")
147
+ score_breakdown: Annotated[
148
+ Optional[Dict[str, Any]], Field(title="Score Breakdown")
27
149
  ] = None
150
+ reason: Annotated[Optional[str], Field(title="Reason")] = ""
151
+ using_native_model: Annotated[Optional[bool], Field(title="Using Native Model")] = (
152
+ None
153
+ )
154
+ success: Annotated[Optional[bool], Field(title="Success")] = None
155
+ model: Annotated[Optional[str], Field(title="Model")] = None
156
+ model_client: Annotated[Any, Field(title="Model Client")] = None
157
+ strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
28
158
  error: Annotated[Optional[str], Field(title="Error")] = None
29
159
  additional_metadata: Annotated[
30
160
  Optional[Dict[str, Any]], Field(title="Additional Metadata")
31
161
  ] = None
162
+ user: Annotated[Optional[str], Field(title="User")] = None
163
+ server_hosted: Annotated[Optional[bool], Field(title="Server Hosted")] = False
32
164
 
33
165
 
34
- class ScorerConfigJudgmentType(BaseModel):
166
+ class ScorerConfig(BaseModel):
35
167
  score_type: Annotated[str, Field(title="Score Type")]
36
168
  name: Annotated[Optional[str], Field(title="Name")] = None
37
169
  threshold: Annotated[Optional[float], Field(title="Threshold")] = 0.5
38
170
  strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = False
39
- required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = (
40
- Field(default_factory=list)
41
- )
171
+ required_params: Annotated[Optional[List[str]], Field(title="Required Params")] = []
42
172
  kwargs: Annotated[Optional[Dict[str, Any]], Field(title="Kwargs")] = None
43
173
 
44
174
 
45
- class TraceUsageJudgmentType(BaseModel):
46
- prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
47
- completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
48
- cache_creation_input_tokens: Annotated[
49
- Optional[int], Field(title="Cache Creation Input Tokens")
50
- ] = None
51
- cache_read_input_tokens: Annotated[
52
- Optional[int], Field(title="Cache Read Input Tokens")
53
- ] = None
54
- total_tokens: Annotated[Optional[int], Field(title="Total Tokens")] = None
55
- prompt_tokens_cost_usd: Annotated[
56
- Optional[float], Field(title="Prompt Tokens Cost Usd")
57
- ] = None
58
- completion_tokens_cost_usd: Annotated[
59
- Optional[float], Field(title="Completion Tokens Cost Usd")
60
- ] = None
61
- total_cost_usd: Annotated[Optional[float], Field(title="Total Cost Usd")] = None
62
- model_name: Annotated[Optional[str], Field(title="Model Name")] = None
175
+ class Example(BaseModel):
176
+ model_config = ConfigDict(
177
+ extra="allow",
178
+ )
179
+ example_id: Annotated[Optional[str], Field(title="Example Id")] = None
180
+ created_at: Annotated[Optional[str], Field(title="Created At")] = None
181
+ name: Annotated[Optional[str], Field(title="Name")] = None
63
182
 
64
183
 
65
- class ToolJudgmentType(BaseModel):
66
- tool_name: Annotated[str, Field(title="Tool Name")]
67
- parameters: Annotated[Optional[Dict[str, Any]], Field(title="Parameters")] = None
68
- agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
69
- result_dependencies: Annotated[
70
- Optional[List[Dict[str, Any]]], Field(title="Result Dependencies")
71
- ] = None
72
- action_dependencies: Annotated[
73
- Optional[List[Dict[str, Any]]], Field(title="Action Dependencies")
74
- ] = None
75
- require_all: Annotated[Optional[bool], Field(title="Require All")] = None
184
+ class ValidationError(BaseModel):
185
+ loc: Annotated[List[Union[str, int]], Field(title="Location")]
186
+ msg: Annotated[str, Field(title="Message")]
187
+ type: Annotated[str, Field(title="Error Type")]
76
188
 
77
189
 
78
- class HTTPValidationErrorJudgmentType(BaseModel):
79
- detail: Annotated[
80
- Optional[List[ValidationErrorJudgmentType]], Field(title="Detail")
81
- ] = None
190
+ class UsageInfo(BaseModel):
191
+ total_judgees: Annotated[int, Field(title="Total Judgees")]
192
+ regular_use: Annotated[int, Field(title="Regular Use")]
193
+ pay_as_you_go_use: Annotated[int, Field(title="Pay As You Go Use")]
194
+ remaining_regular: Annotated[int, Field(title="Remaining Regular")]
195
+ remaining_after: Annotated[int, Field(title="Remaining After")]
82
196
 
83
197
 
84
- class TraceSpanJudgmentType(BaseModel):
85
- span_id: Annotated[str, Field(title="Span Id")]
86
- trace_id: Annotated[str, Field(title="Trace Id")]
87
- function: Annotated[str, Field(title="Function")]
88
- depth: Annotated[int, Field(title="Depth")]
89
- created_at: Annotated[Any, Field(title="Created At")] = None
90
- parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
91
- span_type: Annotated[Optional[str], Field(title="Span Type")] = "span"
92
- inputs: Annotated[Optional[Dict[str, Any]], Field(title="Inputs")] = None
93
- error: Annotated[Optional[Dict[str, Any]], Field(title="Error")] = None
94
- output: Annotated[Any, Field(title="Output")] = None
95
- usage: Optional[TraceUsageJudgmentType] = None
96
- duration: Annotated[Optional[float], Field(title="Duration")] = None
97
- expected_tools: Annotated[
98
- Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
99
- ] = None
198
+ class DatasetKind(Enum):
199
+ trace = "trace"
200
+ example = "example"
201
+
202
+
203
+ class PromptScorer(BaseModel):
204
+ id: Annotated[str, Field(title="Id")]
205
+ user_id: Annotated[str, Field(title="User Id")]
206
+ organization_id: Annotated[str, Field(title="Organization Id")]
207
+ name: Annotated[str, Field(title="Name")]
208
+ prompt: Annotated[str, Field(title="Prompt")]
209
+ threshold: Annotated[float, Field(title="Threshold")]
210
+ model: Annotated[Optional[str], Field(title="Model")] = "gpt-5-mini"
211
+ options: Annotated[Optional[Dict[str, float]], Field(title="Options")] = None
212
+ description: Annotated[Optional[str], Field(title="Description")] = None
213
+ created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
214
+ updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
215
+ is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
216
+ is_bucket_rubric: Annotated[Optional[bool], Field(title="Is Bucket Rubric")] = None
217
+
218
+
219
+ class PromptCommitInfo(BaseModel):
220
+ name: Annotated[str, Field(title="Name")]
221
+ prompt: Annotated[str, Field(title="Prompt")]
222
+ tags: Annotated[List[str], Field(title="Tags")]
223
+ commit_id: Annotated[str, Field(title="Commit Id")]
224
+ parent_commit_id: Annotated[Optional[str], Field(title="Parent Commit Id")] = None
225
+ created_at: Annotated[str, Field(title="Created At")]
226
+ first_name: Annotated[str, Field(title="First Name")]
227
+ last_name: Annotated[str, Field(title="Last Name")]
228
+ user_email: Annotated[str, Field(title="User Email")]
229
+
230
+
231
+ class ScorerData(BaseModel):
232
+ id: Annotated[Optional[str], Field(title="Id")] = None
233
+ name: Annotated[str, Field(title="Name")]
234
+ threshold: Annotated[float, Field(title="Threshold")]
235
+ success: Annotated[bool, Field(title="Success")]
236
+ score: Annotated[Optional[float], Field(title="Score")] = None
237
+ reason: Annotated[Optional[str], Field(title="Reason")] = None
238
+ strict_mode: Annotated[Optional[bool], Field(title="Strict Mode")] = None
239
+ evaluation_model: Annotated[Optional[str], Field(title="Evaluation Model")] = None
240
+ error: Annotated[Optional[str], Field(title="Error")] = None
100
241
  additional_metadata: Annotated[
101
242
  Optional[Dict[str, Any]], Field(title="Additional Metadata")
102
243
  ] = None
103
- has_evaluation: Annotated[Optional[bool], Field(title="Has Evaluation")] = False
104
- agent_name: Annotated[Optional[str], Field(title="Agent Name")] = None
105
- state_before: Annotated[Optional[Dict[str, Any]], Field(title="State Before")] = (
106
- None
107
- )
108
- state_after: Annotated[Optional[Dict[str, Any]], Field(title="State After")] = None
109
- update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
110
244
 
111
245
 
112
- class ExampleJudgmentType(BaseModel):
113
- input: Annotated[Optional[Union[str, Dict[str, Any]]], Field(title="Input")] = None
114
- actual_output: Annotated[
115
- Optional[Union[str, List[str]]], Field(title="Actual Output")
116
- ] = None
117
- expected_output: Annotated[
118
- Optional[Union[str, List[str]]], Field(title="Expected Output")
119
- ] = None
120
- context: Annotated[Optional[List[str]], Field(title="Context")] = None
121
- retrieval_context: Annotated[
122
- Optional[List[str]], Field(title="Retrieval Context")
246
+ class OtelTraceSpan(BaseModel):
247
+ organization_id: Annotated[str, Field(title="Organization Id")]
248
+ project_id: Annotated[Optional[str], Field(title="Project Id")] = None
249
+ user_id: Annotated[str, Field(title="User Id")]
250
+ timestamp: Annotated[str, Field(title="Timestamp")]
251
+ trace_id: Annotated[str, Field(title="Trace Id")]
252
+ span_id: Annotated[str, Field(title="Span Id")]
253
+ parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
254
+ trace_state: Annotated[Optional[str], Field(title="Trace State")] = None
255
+ span_name: Annotated[Optional[str], Field(title="Span Name")] = None
256
+ span_kind: Annotated[Optional[str], Field(title="Span Kind")] = None
257
+ service_name: Annotated[Optional[str], Field(title="Service Name")] = None
258
+ resource_attributes: Annotated[
259
+ Optional[Dict[str, Any]], Field(title="Resource Attributes")
123
260
  ] = None
124
- additional_metadata: Annotated[
125
- Optional[Dict[str, Any]], Field(title="Additional Metadata")
261
+ span_attributes: Annotated[
262
+ Optional[Dict[str, Any]], Field(title="Span Attributes")
126
263
  ] = None
127
- tools_called: Annotated[Optional[List[str]], Field(title="Tools Called")] = Field(
128
- default_factory=list
129
- )
130
- expected_tools: Annotated[
131
- Optional[List[ToolJudgmentType]], Field(title="Expected Tools")
132
- ] = Field(default_factory=list)
133
- name: Annotated[Optional[str], Field(title="Name")] = None
134
- example_id: Annotated[str, Field(title="Example Id")]
135
- example_index: Annotated[Optional[int], Field(title="Example Index")] = None
264
+ duration: Annotated[Optional[int], Field(title="Duration")] = None
265
+ status_code: Annotated[Optional[int], Field(title="Status Code")] = None
266
+ status_message: Annotated[Optional[str], Field(title="Status Message")] = None
267
+ events: Annotated[Optional[List[Dict[str, Any]]], Field(title="Events")] = None
268
+ links: Annotated[Optional[List[Dict[str, Any]]], Field(title="Links")] = None
269
+
270
+
271
+ class OtelSpanListItemScores(BaseModel):
272
+ success: Annotated[bool, Field(title="Success")]
273
+ score: Annotated[float, Field(title="Score")]
274
+ reason: Annotated[Optional[str], Field(title="Reason")] = None
275
+ name: Annotated[str, Field(title="Name")]
276
+
277
+
278
+ class OtelSpanDetailScores(BaseModel):
279
+ success: Annotated[bool, Field(title="Success")]
280
+ score: Annotated[float, Field(title="Score")]
281
+ reason: Annotated[Optional[str], Field(title="Reason")] = None
282
+ name: Annotated[str, Field(title="Name")]
283
+ example_id: Annotated[Optional[str], Field(title="Example Id")] = None
284
+
285
+
286
+ class ExampleEvaluationRun(BaseModel):
287
+ id: Annotated[Optional[str], Field(title="Id")] = None
288
+ project_name: Annotated[str, Field(title="Project Name")]
289
+ eval_name: Annotated[str, Field(title="Eval Name")]
290
+ custom_scorers: Annotated[
291
+ Optional[List[BaseScorer]], Field(title="Custom Scorers")
292
+ ] = []
293
+ judgment_scorers: Annotated[
294
+ Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
295
+ ] = []
136
296
  created_at: Annotated[Optional[str], Field(title="Created At")] = None
137
- trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
297
+ examples: Annotated[List[Example], Field(title="Examples")]
138
298
  trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
139
- dataset_id: Annotated[Optional[str], Field(title="Dataset Id")] = None
299
+ trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
140
300
 
141
301
 
142
- class TraceJudgmentType(BaseModel):
143
- trace_id: Annotated[str, Field(title="Trace Id")]
302
+ class HTTPValidationError(BaseModel):
303
+ detail: Annotated[Optional[List[ValidationError]], Field(title="Detail")] = None
304
+
305
+
306
+ class TraceEvaluationRun(BaseModel):
307
+ id: Annotated[Optional[str], Field(title="Id")] = None
308
+ project_name: Annotated[str, Field(title="Project Name")]
309
+ eval_name: Annotated[str, Field(title="Eval Name")]
310
+ custom_scorers: Annotated[
311
+ Optional[List[BaseScorer]], Field(title="Custom Scorers")
312
+ ] = []
313
+ judgment_scorers: Annotated[
314
+ Optional[List[ScorerConfig]], Field(title="Judgment Scorers")
315
+ ] = []
316
+ created_at: Annotated[Optional[str], Field(title="Created At")] = None
317
+ trace_and_span_ids: Annotated[
318
+ List[TraceAndSpanId], Field(title="Trace And Span Ids")
319
+ ]
320
+ is_offline: Annotated[Optional[bool], Field(title="Is Offline")] = False
321
+ is_bucket_run: Annotated[Optional[bool], Field(title="Is Bucket Run")] = False
322
+
323
+
324
+ class DatasetInsertExamples(BaseModel):
325
+ dataset_name: Annotated[str, Field(title="Dataset Name")]
326
+ examples: Annotated[List[Example], Field(title="Examples")]
327
+ project_name: Annotated[str, Field(title="Project Name")]
328
+
329
+
330
+ class DatasetInfo(BaseModel):
331
+ dataset_id: Annotated[str, Field(title="Dataset Id")]
144
332
  name: Annotated[str, Field(title="Name")]
145
333
  created_at: Annotated[str, Field(title="Created At")]
146
- duration: Annotated[float, Field(title="Duration")]
147
- trace_spans: Annotated[List[TraceSpanJudgmentType], Field(title="Trace Spans")]
148
- overwrite: Annotated[Optional[bool], Field(title="Overwrite")] = False
149
- offline_mode: Annotated[Optional[bool], Field(title="Offline Mode")] = False
150
- rules: Annotated[Optional[Dict[str, Any]], Field(title="Rules")] = Field(
151
- default_factory=dict
152
- )
153
- has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = False
154
- customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
155
- tags: Annotated[Optional[List[str]], Field(title="Tags")] = Field(
156
- default_factory=list
157
- )
158
- metadata: Annotated[Optional[Dict[str, Any]], Field(title="Metadata")] = Field(
159
- default_factory=dict
160
- )
161
- update_id: Annotated[Optional[int], Field(title="Update Id")] = 1
334
+ kind: DatasetKind
335
+ entries: Annotated[int, Field(title="Entries")]
336
+ creator: Annotated[str, Field(title="Creator")]
337
+
338
+
339
+ class DatasetCreate(BaseModel):
340
+ name: Annotated[str, Field(title="Name")]
341
+ dataset_kind: DatasetKind
342
+ project_name: Annotated[str, Field(title="Project Name")]
343
+ examples: Annotated[List[Example], Field(title="Examples")]
344
+ overwrite: Annotated[bool, Field(title="Overwrite")]
162
345
 
163
346
 
164
- class ScoringResultJudgmentType(BaseModel):
347
+ class SavePromptScorerResponse(BaseModel):
348
+ scorer_response: PromptScorer
349
+
350
+
351
+ class FetchPromptScorersResponse(BaseModel):
352
+ scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
353
+
354
+
355
+ class PromptFetchResponse(BaseModel):
356
+ commit: Optional[PromptCommitInfo] = None
357
+
358
+
359
+ class PromptVersionsResponse(BaseModel):
360
+ versions: Annotated[List[PromptCommitInfo], Field(title="Versions")]
361
+
362
+
363
+ class ScoringResult(BaseModel):
165
364
  success: Annotated[bool, Field(title="Success")]
166
- scorers_data: Annotated[
167
- Optional[List[ScorerDataJudgmentType]], Field(title="Scorers Data")
168
- ]
365
+ scorers_data: Annotated[List[ScorerData], Field(title="Scorers Data")]
169
366
  name: Annotated[Optional[str], Field(title="Name")] = None
170
367
  data_object: Annotated[
171
- Optional[Union[TraceSpanJudgmentType, ExampleJudgmentType]],
172
- Field(title="Data Object"),
368
+ Optional[Union[OtelTraceSpan, Example]], Field(title="Data Object")
173
369
  ] = None
174
370
  trace_id: Annotated[Optional[str], Field(title="Trace Id")] = None
175
371
  run_duration: Annotated[Optional[float], Field(title="Run Duration")] = None
176
372
  evaluation_cost: Annotated[Optional[float], Field(title="Evaluation Cost")] = None
177
373
 
178
374
 
179
- class TraceRunJudgmentType(BaseModel):
180
- project_name: Annotated[Optional[str], Field(title="Project Name")] = None
181
- eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
182
- traces: Annotated[List[TraceJudgmentType], Field(title="Traces")]
183
- scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
184
- model: Annotated[str, Field(title="Model")]
185
- append: Annotated[Optional[bool], Field(title="Append")] = False
186
- override: Annotated[Optional[bool], Field(title="Override")] = False
187
- trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
188
- tools: Annotated[Optional[List[Dict[str, Any]]], Field(title="Tools")] = None
375
+ class OtelTraceListItem(BaseModel):
376
+ organization_id: Annotated[str, Field(title="Organization Id")]
377
+ project_id: Annotated[str, Field(title="Project Id")]
378
+ trace_id: Annotated[str, Field(title="Trace Id")]
379
+ created_at: Annotated[AwareDatetime, Field(title="Created At")]
380
+ duration: Annotated[Optional[int], Field(title="Duration")] = None
381
+ tags: Annotated[Optional[List[str]], Field(title="Tags")] = None
382
+ experiment_run_id: Annotated[Optional[str], Field(title="Experiment Run Id")] = None
383
+ span_name: Annotated[Optional[str], Field(title="Span Name")] = None
384
+ llm_cost: Annotated[Optional[float], Field(title="Llm Cost")] = None
385
+ error: Annotated[Optional[str], Field(title="Error")] = ""
386
+ scores: Annotated[
387
+ Optional[List[OtelSpanListItemScores]], Field(title="Scores")
388
+ ] = []
389
+ rules_invoked: Annotated[Optional[List[str]], Field(title="Rules Invoked")] = []
390
+ customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
391
+ input: Annotated[Optional[str], Field(title="Input")] = None
392
+ output: Annotated[Optional[str], Field(title="Output")] = None
393
+ input_preview: Annotated[Optional[str], Field(title="Input Preview")] = None
394
+ output_preview: Annotated[Optional[str], Field(title="Output Preview")] = None
395
+ annotation_count: Annotated[Optional[int], Field(title="Annotation Count")] = 0
396
+ span_id: Annotated[str, Field(title="Span Id")]
397
+ rule_id: Annotated[Optional[str], Field(title="Rule Id")] = None
189
398
 
190
399
 
191
- class JudgmentEvalJudgmentType(BaseModel):
192
- project_name: Annotated[Optional[str], Field(title="Project Name")] = None
193
- eval_name: Annotated[Optional[str], Field(title="Eval Name")] = None
194
- examples: Annotated[List[ExampleJudgmentType], Field(title="Examples")]
195
- scorers: Annotated[List[ScorerConfigJudgmentType], Field(title="Scorers")]
196
- model: Annotated[str, Field(title="Model")]
197
- append: Annotated[Optional[bool], Field(title="Append")] = False
198
- override: Annotated[Optional[bool], Field(title="Override")] = False
199
- trace_span_id: Annotated[Optional[str], Field(title="Trace Span Id")] = None
400
+ class OtelSpanDetail(BaseModel):
401
+ organization_id: Annotated[str, Field(title="Organization Id")]
402
+ project_id: Annotated[str, Field(title="Project Id")]
403
+ timestamp: Annotated[AwareDatetime, Field(title="Timestamp")]
404
+ trace_id: Annotated[str, Field(title="Trace Id")]
405
+ span_id: Annotated[str, Field(title="Span Id")]
406
+ parent_span_id: Annotated[Optional[str], Field(title="Parent Span Id")] = None
407
+ trace_state: Annotated[Optional[str], Field(title="Trace State")] = None
408
+ span_name: Annotated[Optional[str], Field(title="Span Name")] = None
409
+ span_kind: Annotated[Optional[str], Field(title="Span Kind")] = None
410
+ service_name: Annotated[Optional[str], Field(title="Service Name")] = None
411
+ resource_attributes: Annotated[
412
+ Optional[Dict[str, Any]], Field(title="Resource Attributes")
413
+ ] = None
414
+ span_attributes: Annotated[
415
+ Optional[Dict[str, Any]], Field(title="Span Attributes")
416
+ ] = None
417
+ duration: Annotated[Optional[int], Field(title="Duration")] = None
418
+ status_code: Annotated[Optional[int], Field(title="Status Code")] = None
419
+ status_message: Annotated[Optional[str], Field(title="Status Message")] = None
420
+ events: Annotated[Optional[List[Dict[str, Any]]], Field(title="Events")] = None
421
+ links: Annotated[
422
+ Optional[Union[List[Dict[str, Any]], Dict[str, Any]]], Field(title="Links")
423
+ ] = None
424
+ llm_cost: Annotated[Optional[float], Field(title="Llm Cost")] = None
425
+ prompt_tokens: Annotated[Optional[int], Field(title="Prompt Tokens")] = None
426
+ completion_tokens: Annotated[Optional[int], Field(title="Completion Tokens")] = None
427
+ scores: Annotated[Optional[List[OtelSpanDetailScores]], Field(title="Scores")] = (
428
+ None
429
+ )
200
430
 
201
431
 
202
- class EvalResultsJudgmentType(BaseModel):
203
- results: Annotated[List[ScoringResultJudgmentType], Field(title="Results")]
204
- run: Annotated[
205
- Union[TraceRunJudgmentType, JudgmentEvalJudgmentType], Field(title="Run")
206
- ]
432
+ class EvaluateResponse(BaseModel):
433
+ status: Annotated[str, Field(title="Status")]
434
+ results: Annotated[List[ScoringResult], Field(title="Results")]
435
+ resource_usage: Optional[UsageInfo] = None
436
+
437
+
438
+ class EvalResults(BaseModel):
439
+ results: Annotated[List[ScoringResult], Field(title="Results")]
440
+ run: Annotated[Union[ExampleEvaluationRun, TraceEvaluationRun], Field(title="Run")]
441
+
442
+
443
+ class DatasetTraceWithSpans(BaseModel):
444
+ dataset_id: Annotated[str, Field(title="Dataset Id")]
445
+ trace_detail: OtelTraceListItem
446
+ spans: Annotated[List[OtelSpanDetail], Field(title="Spans")]
447
+
448
+
449
+ class DatasetReturn(BaseModel):
450
+ name: Annotated[str, Field(title="Name")]
451
+ project_name: Annotated[str, Field(title="Project Name")]
452
+ dataset_kind: DatasetKind
453
+ examples: Annotated[Optional[List[Example]], Field(title="Examples")] = None
454
+ traces: Annotated[Optional[List[DatasetTraceWithSpans]], Field(title="Traces")] = (
455
+ None
456
+ )