judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. judgeval/__init__.py +173 -10
  2. judgeval/api/__init__.py +523 -0
  3. judgeval/api/api_types.py +413 -0
  4. judgeval/cli.py +112 -0
  5. judgeval/constants.py +7 -30
  6. judgeval/data/__init__.py +1 -3
  7. judgeval/data/evaluation_run.py +125 -0
  8. judgeval/data/example.py +14 -40
  9. judgeval/data/judgment_types.py +396 -146
  10. judgeval/data/result.py +11 -18
  11. judgeval/data/scorer_data.py +3 -26
  12. judgeval/data/scripts/openapi_transform.py +5 -5
  13. judgeval/data/trace.py +115 -194
  14. judgeval/dataset/__init__.py +335 -0
  15. judgeval/env.py +55 -0
  16. judgeval/evaluation/__init__.py +346 -0
  17. judgeval/exceptions.py +28 -0
  18. judgeval/integrations/langgraph/__init__.py +13 -0
  19. judgeval/integrations/openlit/__init__.py +51 -0
  20. judgeval/judges/__init__.py +2 -2
  21. judgeval/judges/litellm_judge.py +77 -16
  22. judgeval/judges/together_judge.py +88 -17
  23. judgeval/judges/utils.py +7 -20
  24. judgeval/judgment_attribute_keys.py +55 -0
  25. judgeval/{common/logger.py → logger.py} +24 -8
  26. judgeval/prompt/__init__.py +330 -0
  27. judgeval/scorers/__init__.py +11 -11
  28. judgeval/scorers/agent_scorer.py +15 -19
  29. judgeval/scorers/api_scorer.py +21 -23
  30. judgeval/scorers/base_scorer.py +54 -36
  31. judgeval/scorers/example_scorer.py +1 -3
  32. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
  34. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
  35. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
  36. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
  37. judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
  38. judgeval/scorers/score.py +64 -47
  39. judgeval/scorers/utils.py +2 -107
  40. judgeval/tracer/__init__.py +1111 -2
  41. judgeval/tracer/constants.py +1 -0
  42. judgeval/tracer/exporters/__init__.py +40 -0
  43. judgeval/tracer/exporters/s3.py +119 -0
  44. judgeval/tracer/exporters/store.py +59 -0
  45. judgeval/tracer/exporters/utils.py +32 -0
  46. judgeval/tracer/keys.py +63 -0
  47. judgeval/tracer/llm/__init__.py +7 -0
  48. judgeval/tracer/llm/config.py +78 -0
  49. judgeval/tracer/llm/constants.py +9 -0
  50. judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
  51. judgeval/tracer/llm/llm_anthropic/config.py +6 -0
  52. judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
  53. judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
  54. judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
  55. judgeval/tracer/llm/llm_google/__init__.py +3 -0
  56. judgeval/tracer/llm/llm_google/config.py +6 -0
  57. judgeval/tracer/llm/llm_google/generate_content.py +127 -0
  58. judgeval/tracer/llm/llm_google/wrapper.py +30 -0
  59. judgeval/tracer/llm/llm_openai/__init__.py +3 -0
  60. judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
  61. judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
  62. judgeval/tracer/llm/llm_openai/config.py +6 -0
  63. judgeval/tracer/llm/llm_openai/responses.py +506 -0
  64. judgeval/tracer/llm/llm_openai/utils.py +42 -0
  65. judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
  66. judgeval/tracer/llm/llm_together/__init__.py +3 -0
  67. judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
  68. judgeval/tracer/llm/llm_together/config.py +6 -0
  69. judgeval/tracer/llm/llm_together/wrapper.py +52 -0
  70. judgeval/tracer/llm/providers.py +19 -0
  71. judgeval/tracer/managers.py +167 -0
  72. judgeval/tracer/processors/__init__.py +220 -0
  73. judgeval/tracer/utils.py +19 -0
  74. judgeval/trainer/__init__.py +14 -0
  75. judgeval/trainer/base_trainer.py +122 -0
  76. judgeval/trainer/config.py +123 -0
  77. judgeval/trainer/console.py +144 -0
  78. judgeval/trainer/fireworks_trainer.py +392 -0
  79. judgeval/trainer/trainable_model.py +252 -0
  80. judgeval/trainer/trainer.py +70 -0
  81. judgeval/utils/async_utils.py +39 -0
  82. judgeval/utils/decorators/__init__.py +0 -0
  83. judgeval/utils/decorators/dont_throw.py +37 -0
  84. judgeval/utils/decorators/use_once.py +13 -0
  85. judgeval/utils/file_utils.py +74 -28
  86. judgeval/utils/guards.py +36 -0
  87. judgeval/utils/meta.py +27 -0
  88. judgeval/utils/project.py +15 -0
  89. judgeval/utils/serialize.py +253 -0
  90. judgeval/utils/testing.py +70 -0
  91. judgeval/utils/url.py +10 -0
  92. judgeval/{version_check.py → utils/version_check.py} +5 -3
  93. judgeval/utils/wrappers/README.md +3 -0
  94. judgeval/utils/wrappers/__init__.py +15 -0
  95. judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
  96. judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
  97. judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
  98. judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
  99. judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
  100. judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
  101. judgeval/utils/wrappers/py.typed +0 -0
  102. judgeval/utils/wrappers/utils.py +35 -0
  103. judgeval/v1/__init__.py +88 -0
  104. judgeval/v1/data/__init__.py +7 -0
  105. judgeval/v1/data/example.py +44 -0
  106. judgeval/v1/data/scorer_data.py +42 -0
  107. judgeval/v1/data/scoring_result.py +44 -0
  108. judgeval/v1/datasets/__init__.py +6 -0
  109. judgeval/v1/datasets/dataset.py +214 -0
  110. judgeval/v1/datasets/dataset_factory.py +94 -0
  111. judgeval/v1/evaluation/__init__.py +6 -0
  112. judgeval/v1/evaluation/evaluation.py +182 -0
  113. judgeval/v1/evaluation/evaluation_factory.py +17 -0
  114. judgeval/v1/instrumentation/__init__.py +6 -0
  115. judgeval/v1/instrumentation/llm/__init__.py +7 -0
  116. judgeval/v1/instrumentation/llm/config.py +78 -0
  117. judgeval/v1/instrumentation/llm/constants.py +11 -0
  118. judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
  119. judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
  120. judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
  121. judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
  122. judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
  123. judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
  124. judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
  125. judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
  126. judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
  127. judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
  128. judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
  129. judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
  130. judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
  131. judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
  132. judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
  133. judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
  134. judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
  135. judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
  136. judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
  137. judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
  138. judgeval/v1/instrumentation/llm/providers.py +19 -0
  139. judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
  140. judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
  141. judgeval/v1/integrations/langgraph/__init__.py +13 -0
  142. judgeval/v1/integrations/openlit/__init__.py +47 -0
  143. judgeval/v1/internal/api/__init__.py +525 -0
  144. judgeval/v1/internal/api/api_types.py +413 -0
  145. judgeval/v1/prompts/__init__.py +6 -0
  146. judgeval/v1/prompts/prompt.py +29 -0
  147. judgeval/v1/prompts/prompt_factory.py +189 -0
  148. judgeval/v1/py.typed +0 -0
  149. judgeval/v1/scorers/__init__.py +6 -0
  150. judgeval/v1/scorers/api_scorer.py +82 -0
  151. judgeval/v1/scorers/base_scorer.py +17 -0
  152. judgeval/v1/scorers/built_in/__init__.py +17 -0
  153. judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
  154. judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
  155. judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
  156. judgeval/v1/scorers/built_in/faithfulness.py +28 -0
  157. judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
  158. judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
  159. judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
  160. judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
  161. judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
  162. judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
  163. judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
  164. judgeval/v1/scorers/scorers_factory.py +49 -0
  165. judgeval/v1/tracer/__init__.py +7 -0
  166. judgeval/v1/tracer/base_tracer.py +520 -0
  167. judgeval/v1/tracer/exporters/__init__.py +14 -0
  168. judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
  169. judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
  170. judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
  171. judgeval/v1/tracer/exporters/span_store.py +50 -0
  172. judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
  173. judgeval/v1/tracer/processors/__init__.py +6 -0
  174. judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
  175. judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
  176. judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
  177. judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
  178. judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
  179. judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
  180. judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
  181. judgeval/v1/tracer/tracer.py +67 -0
  182. judgeval/v1/tracer/tracer_factory.py +38 -0
  183. judgeval/v1/trainers/__init__.py +5 -0
  184. judgeval/v1/trainers/base_trainer.py +62 -0
  185. judgeval/v1/trainers/config.py +123 -0
  186. judgeval/v1/trainers/console.py +144 -0
  187. judgeval/v1/trainers/fireworks_trainer.py +392 -0
  188. judgeval/v1/trainers/trainable_model.py +252 -0
  189. judgeval/v1/trainers/trainers_factory.py +37 -0
  190. judgeval/v1/utils.py +18 -0
  191. judgeval/version.py +5 -0
  192. judgeval/warnings.py +4 -0
  193. judgeval-0.23.0.dist-info/METADATA +266 -0
  194. judgeval-0.23.0.dist-info/RECORD +201 -0
  195. judgeval-0.23.0.dist-info/entry_points.txt +2 -0
  196. judgeval/clients.py +0 -34
  197. judgeval/common/__init__.py +0 -13
  198. judgeval/common/api/__init__.py +0 -3
  199. judgeval/common/api/api.py +0 -352
  200. judgeval/common/api/constants.py +0 -165
  201. judgeval/common/exceptions.py +0 -27
  202. judgeval/common/storage/__init__.py +0 -6
  203. judgeval/common/storage/s3_storage.py +0 -98
  204. judgeval/common/tracer/__init__.py +0 -31
  205. judgeval/common/tracer/constants.py +0 -22
  206. judgeval/common/tracer/core.py +0 -1916
  207. judgeval/common/tracer/otel_exporter.py +0 -108
  208. judgeval/common/tracer/otel_span_processor.py +0 -234
  209. judgeval/common/tracer/span_processor.py +0 -37
  210. judgeval/common/tracer/span_transformer.py +0 -211
  211. judgeval/common/tracer/trace_manager.py +0 -92
  212. judgeval/common/utils.py +0 -940
  213. judgeval/data/datasets/__init__.py +0 -4
  214. judgeval/data/datasets/dataset.py +0 -341
  215. judgeval/data/datasets/eval_dataset_client.py +0 -214
  216. judgeval/data/tool.py +0 -5
  217. judgeval/data/trace_run.py +0 -37
  218. judgeval/evaluation_run.py +0 -75
  219. judgeval/integrations/langgraph.py +0 -843
  220. judgeval/judges/mixture_of_judges.py +0 -286
  221. judgeval/judgment_client.py +0 -369
  222. judgeval/rules.py +0 -521
  223. judgeval/run_evaluation.py +0 -684
  224. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
  225. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
  226. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
  227. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
  228. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
  229. judgeval/utils/alerts.py +0 -93
  230. judgeval/utils/requests.py +0 -50
  231. judgeval-0.1.0.dist-info/METADATA +0 -202
  232. judgeval-0.1.0.dist-info/RECORD +0 -73
  233. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
  234. {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
@@ -1,165 +0,0 @@
1
- import os
2
- from typing import Optional, TypedDict, List, Dict, Any
3
-
4
- ROOT_API = os.getenv("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
5
-
6
- # Traces API
7
- JUDGMENT_TRACES_FETCH_API_URL = f"{ROOT_API}/traces/fetch/"
8
- JUDGMENT_TRACES_SAVE_API_URL = f"{ROOT_API}/traces/save/"
9
- JUDGMENT_TRACES_UPSERT_API_URL = f"{ROOT_API}/traces/upsert/"
10
- JUDGMENT_TRACES_DELETE_API_URL = f"{ROOT_API}/traces/delete/"
11
- JUDGMENT_TRACES_SPANS_BATCH_API_URL = f"{ROOT_API}/traces/spans/batch/"
12
- JUDGMENT_TRACES_EVALUATION_RUNS_BATCH_API_URL = (
13
- f"{ROOT_API}/traces/evaluation_runs/batch/"
14
- )
15
-
16
-
17
- class TraceFetchPayload(TypedDict):
18
- trace_id: str
19
-
20
-
21
- class TraceDeletePayload(TypedDict):
22
- trace_ids: List[str]
23
-
24
-
25
- class SpansBatchPayload(TypedDict):
26
- spans: List[Dict[str, Any]]
27
- organization_id: str
28
-
29
-
30
- class EvaluationEntryResponse(TypedDict):
31
- evaluation_run: Dict[str, Any]
32
- associated_span: Dict[str, Any]
33
- queued_at: Optional[float]
34
-
35
-
36
- class EvaluationRunsBatchPayload(TypedDict):
37
- organization_id: str
38
- evaluation_entries: List[EvaluationEntryResponse]
39
-
40
-
41
- # Evaluation API
42
- JUDGMENT_EVAL_API_URL = f"{ROOT_API}/evaluate/"
43
- JUDGMENT_TRACE_EVAL_API_URL = f"{ROOT_API}/evaluate_trace/"
44
- JUDGMENT_EVAL_LOG_API_URL = f"{ROOT_API}/log_eval_results/"
45
- JUDGMENT_EVAL_FETCH_API_URL = f"{ROOT_API}/fetch_experiment_run/"
46
- JUDGMENT_EVAL_DELETE_API_URL = (
47
- f"{ROOT_API}/delete_eval_results_by_project_and_run_names/"
48
- )
49
- JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
50
- JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
51
- JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
52
- JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL = f"{ROOT_API}/check_experiment_type/"
53
- JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL = f"{ROOT_API}/eval-run-name-exists/"
54
-
55
-
56
- # Evaluation API Payloads
57
- class EvalRunRequestBody(TypedDict):
58
- eval_name: str
59
- project_name: str
60
- judgment_api_key: str
61
-
62
-
63
- class DeleteEvalRunRequestBody(TypedDict):
64
- eval_names: List[str]
65
- project_name: str
66
- judgment_api_key: str
67
-
68
-
69
- class EvalLogPayload(TypedDict):
70
- results: List[Dict[str, Any]]
71
- run: Dict[str, Any]
72
-
73
-
74
- class EvalStatusPayload(TypedDict):
75
- eval_name: str
76
- project_name: str
77
- judgment_api_key: str
78
-
79
-
80
- class CheckExperimentTypePayload(TypedDict):
81
- eval_name: str
82
- project_name: str
83
- judgment_api_key: str
84
- is_trace: bool
85
-
86
-
87
- class EvalRunNameExistsPayload(TypedDict):
88
- eval_name: str
89
- project_name: str
90
- judgment_api_key: str
91
-
92
-
93
- # Datasets API
94
- JUDGMENT_DATASETS_PUSH_API_URL = f"{ROOT_API}/datasets/push/"
95
- JUDGMENT_DATASETS_APPEND_EXAMPLES_API_URL = f"{ROOT_API}/datasets/insert_examples/"
96
- JUDGMENT_DATASETS_PULL_API_URL = f"{ROOT_API}/datasets/pull_for_judgeval/"
97
- JUDGMENT_DATASETS_DELETE_API_URL = f"{ROOT_API}/datasets/delete/"
98
- JUDGMENT_DATASETS_EXPORT_JSONL_API_URL = f"{ROOT_API}/datasets/export_jsonl/"
99
- JUDGMENT_DATASETS_PROJECT_STATS_API_URL = f"{ROOT_API}/datasets/fetch_stats_by_project/"
100
- JUDGMENT_DATASETS_INSERT_API_URL = f"{ROOT_API}/datasets/insert_examples/"
101
-
102
-
103
- class DatasetPushPayload(TypedDict):
104
- dataset_alias: str
105
- project_name: str
106
- examples: List[Dict[str, Any]]
107
- traces: List[Dict[str, Any]]
108
- overwrite: bool
109
-
110
-
111
- class DatasetAppendPayload(TypedDict):
112
- dataset_alias: str
113
- project_name: str
114
- examples: List[Dict[str, Any]]
115
-
116
-
117
- class DatasetPullPayload(TypedDict):
118
- dataset_alias: str
119
- project_name: str
120
-
121
-
122
- class DatasetDeletePayload(TypedDict):
123
- dataset_alias: str
124
- project_name: str
125
-
126
-
127
- class DatasetExportPayload(TypedDict):
128
- dataset_alias: str
129
- project_name: str
130
-
131
-
132
- class DatasetStatsPayload(TypedDict):
133
- project_name: str
134
-
135
-
136
- # Projects API
137
- JUDGMENT_PROJECT_DELETE_API_URL = f"{ROOT_API}/projects/delete/"
138
- JUDGMENT_PROJECT_CREATE_API_URL = f"{ROOT_API}/projects/add/"
139
-
140
-
141
- class ProjectDeletePayload(TypedDict):
142
- project_list: List[str]
143
-
144
-
145
- class ProjectCreatePayload(TypedDict):
146
- project_name: str
147
-
148
-
149
- JUDGMENT_SCORER_SAVE_API_URL = f"{ROOT_API}/save_scorer/"
150
- JUDGMENT_SCORER_FETCH_API_URL = f"{ROOT_API}/fetch_scorer/"
151
- JUDGMENT_SCORER_EXISTS_API_URL = f"{ROOT_API}/scorer_exists/"
152
-
153
-
154
- class ScorerSavePayload(TypedDict):
155
- name: str
156
- prompt: str
157
- options: dict
158
-
159
-
160
- class ScorerFetchPayload(TypedDict):
161
- name: str
162
-
163
-
164
- class ScorerExistsPayload(TypedDict):
165
- name: str
@@ -1,27 +0,0 @@
1
- """
2
- Common Exceptions in Judgeval
3
- """
4
-
5
-
6
- class MissingTestCaseParamsError(Exception):
7
- pass
8
-
9
-
10
- class JudgmentAPIError(Exception):
11
- """
12
- Exception raised when an error occurs while executing a Judgment API request
13
- """
14
-
15
- def __init__(self, message: str):
16
- super().__init__(message)
17
- self.message = message
18
-
19
-
20
- class InvalidJudgeModelError(Exception):
21
- """
22
- Exception raised when an invalid judge model is provided
23
- """
24
-
25
- def __init__(self, message: str):
26
- super().__init__(message)
27
- self.message = message
@@ -1,6 +0,0 @@
1
- from judgeval.common.storage.s3_storage import S3Storage
2
-
3
-
4
- __all__ = [
5
- "S3Storage",
6
- ]
@@ -1,98 +0,0 @@
1
- import os
2
- import json
3
- import boto3
4
- from typing import Optional
5
- from datetime import datetime, UTC
6
- from botocore.exceptions import ClientError
7
- from judgeval.common.logger import judgeval_logger
8
-
9
-
10
- class S3Storage:
11
- """Utility class for storing and retrieving trace data from S3."""
12
-
13
- def __init__(
14
- self,
15
- bucket_name: str,
16
- aws_access_key_id: Optional[str] = None,
17
- aws_secret_access_key: Optional[str] = None,
18
- region_name: Optional[str] = None,
19
- ):
20
- """Initialize S3 storage with credentials and bucket name.
21
-
22
- Args:
23
- bucket_name: Name of the S3 bucket to store traces in
24
- aws_access_key_id: AWS access key ID (optional, will use environment variables if not provided)
25
- aws_secret_access_key: AWS secret access key (optional, will use environment variables if not provided)
26
- region_name: AWS region name (optional, will use environment variables if not provided)
27
- """
28
- self.bucket_name = bucket_name
29
- self.s3_client = boto3.client(
30
- "s3",
31
- aws_access_key_id=aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"),
32
- aws_secret_access_key=aws_secret_access_key
33
- or os.getenv("AWS_SECRET_ACCESS_KEY"),
34
- region_name=region_name or os.getenv("AWS_REGION", "us-west-1"),
35
- )
36
-
37
- def _ensure_bucket_exists(self):
38
- """Ensure the S3 bucket exists, creating it if necessary."""
39
- try:
40
- self.s3_client.head_bucket(Bucket=self.bucket_name)
41
- except ClientError as e:
42
- error_code = e.response["Error"]["Code"]
43
- if error_code == "404":
44
- # Bucket doesn't exist, create it
45
- try:
46
- self.s3_client.create_bucket(
47
- Bucket=self.bucket_name,
48
- CreateBucketConfiguration={
49
- "LocationConstraint": self.s3_client.meta.region_name
50
- },
51
- ) if self.s3_client.meta.region_name != "us-east-1" else self.s3_client.create_bucket(
52
- Bucket=self.bucket_name
53
- )
54
- except ClientError as create_error:
55
- if (
56
- create_error.response["Error"]["Code"]
57
- == "BucketAlreadyOwnedByYou"
58
- ):
59
- # Bucket was just created by another process
60
- judgeval_logger.warning(
61
- f"Bucket {self.bucket_name} was just created by another process"
62
- )
63
- pass
64
- else:
65
- raise create_error
66
- else:
67
- # Some other error occurred
68
- raise e
69
-
70
- def save_trace(self, trace_data: dict, trace_id: str, project_name: str) -> str:
71
- """Save trace data to S3.
72
-
73
- Args:
74
- trace_data: The trace data to save
75
- trace_id: Unique identifier for the trace
76
- project_name: Name of the project the trace belongs to
77
-
78
- Returns:
79
- str: S3 key where the trace was saved
80
- """
81
- # Ensure bucket exists before saving
82
- self._ensure_bucket_exists()
83
-
84
- # Create a timestamped key for the trace
85
- timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
86
- s3_key = f"traces/{project_name}/{trace_id}_{timestamp}.json"
87
-
88
- # Convert trace data to JSON string
89
- trace_json = json.dumps(trace_data)
90
-
91
- self.s3_client.put_object(
92
- Bucket=self.bucket_name,
93
- Key=s3_key,
94
- Body=trace_json,
95
- ContentType="application/json",
96
- )
97
-
98
- return s3_key
@@ -1,31 +0,0 @@
1
- from judgeval.common.tracer.core import (
2
- TraceClient,
3
- _DeepTracer,
4
- Tracer,
5
- wrap,
6
- current_span_var,
7
- current_trace_var,
8
- SpanType,
9
- cost_per_token,
10
- )
11
- from judgeval.common.tracer.otel_exporter import JudgmentAPISpanExporter
12
- from judgeval.common.tracer.otel_span_processor import JudgmentSpanProcessor
13
- from judgeval.common.tracer.span_processor import SpanProcessorBase
14
- from judgeval.common.tracer.trace_manager import TraceManagerClient
15
- from judgeval.data import TraceSpan
16
-
17
- __all__ = [
18
- "_DeepTracer",
19
- "TraceClient",
20
- "Tracer",
21
- "wrap",
22
- "current_span_var",
23
- "current_trace_var",
24
- "TraceManagerClient",
25
- "JudgmentAPISpanExporter",
26
- "JudgmentSpanProcessor",
27
- "SpanProcessorBase",
28
- "SpanType",
29
- "cost_per_token",
30
- "TraceSpan",
31
- ]
@@ -1,22 +0,0 @@
1
- import os
2
- import site
3
- import sysconfig
4
-
5
-
6
- # NOTE: This builds once, can be tweaked if we are missing / capturing other unncessary modules
7
- # @link https://docs.python.org/3.13/library/sysconfig.html
8
- _TRACE_FILEPATH_BLOCKLIST = tuple(
9
- os.path.realpath(p) + os.sep
10
- for p in {
11
- sysconfig.get_paths()["stdlib"],
12
- sysconfig.get_paths().get("platstdlib", ""),
13
- *site.getsitepackages(),
14
- site.getusersitepackages(),
15
- *(
16
- [os.path.join(os.path.dirname(__file__), "../../judgeval/")]
17
- if os.environ.get("JUDGMENT_DEV")
18
- else []
19
- ),
20
- }
21
- if p
22
- )