judgeval 0.0.32__py3-none-any.whl → 0.0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/common/s3_storage.py +93 -0
- judgeval/common/tracer.py +612 -123
- judgeval/data/sequence.py +4 -10
- judgeval/judgment_client.py +25 -86
- judgeval/rules.py +4 -7
- judgeval/run_evaluation.py +1 -1
- judgeval/scorers/__init__.py +4 -4
- judgeval/scorers/judgeval_scorers/__init__.py +0 -176
- {judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/METADATA +15 -2
- judgeval-0.0.33.dist-info/RECORD +63 -0
- judgeval/scorers/base_scorer.py +0 -58
- judgeval/scorers/judgeval_scorers/local_implementations/__init__.py +0 -27
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/__init__.py +0 -4
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/answer_correctness_scorer.py +0 -276
- judgeval/scorers/judgeval_scorers/local_implementations/answer_correctness/prompts.py +0 -169
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/__init__.py +0 -4
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/answer_relevancy_scorer.py +0 -298
- judgeval/scorers/judgeval_scorers/local_implementations/answer_relevancy/prompts.py +0 -174
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/__init__.py +0 -0
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/comparison_scorer.py +0 -161
- judgeval/scorers/judgeval_scorers/local_implementations/comparison/prompts.py +0 -222
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/contextual_precision_scorer.py +0 -264
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_precision/prompts.py +0 -106
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/contextual_recall_scorer.py +0 -254
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_recall/prompts.py +0 -142
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/contextual_relevancy_scorer.py +0 -245
- judgeval/scorers/judgeval_scorers/local_implementations/contextual_relevancy/prompts.py +0 -121
- judgeval/scorers/judgeval_scorers/local_implementations/execution_order/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/execution_order/execution_order.py +0 -156
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/faithfulness_scorer.py +0 -318
- judgeval/scorers/judgeval_scorers/local_implementations/faithfulness/prompts.py +0 -268
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/hallucination_scorer.py +0 -264
- judgeval/scorers/judgeval_scorers/local_implementations/hallucination/prompts.py +0 -104
- judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/instruction_adherence.py +0 -232
- judgeval/scorers/judgeval_scorers/local_implementations/instruction_adherence/prompt.py +0 -102
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/__init__.py +0 -5
- judgeval/scorers/judgeval_scorers/local_implementations/json_correctness/json_correctness_scorer.py +0 -134
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/__init__.py +0 -3
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/prompts.py +0 -247
- judgeval/scorers/judgeval_scorers/local_implementations/summarization/summarization_scorer.py +0 -551
- judgeval-0.0.32.dist-info/RECORD +0 -97
- {judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/WHEEL +0 -0
- {judgeval-0.0.32.dist-info → judgeval-0.0.33.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,93 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import boto3
|
4
|
+
from typing import Optional
|
5
|
+
from datetime import datetime, UTC
|
6
|
+
from botocore.exceptions import ClientError
|
7
|
+
from judgeval.common.logger import warning, info
|
8
|
+
|
9
|
+
class S3Storage:
|
10
|
+
"""Utility class for storing and retrieving trace data from S3."""
|
11
|
+
|
12
|
+
def __init__(
|
13
|
+
self,
|
14
|
+
bucket_name: str,
|
15
|
+
aws_access_key_id: Optional[str] = None,
|
16
|
+
aws_secret_access_key: Optional[str] = None,
|
17
|
+
region_name: Optional[str] = None
|
18
|
+
):
|
19
|
+
"""Initialize S3 storage with credentials and bucket name.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
bucket_name: Name of the S3 bucket to store traces in
|
23
|
+
aws_access_key_id: AWS access key ID (optional, will use environment variables if not provided)
|
24
|
+
aws_secret_access_key: AWS secret access key (optional, will use environment variables if not provided)
|
25
|
+
region_name: AWS region name (optional, will use environment variables if not provided)
|
26
|
+
"""
|
27
|
+
self.bucket_name = bucket_name
|
28
|
+
self.s3_client = boto3.client(
|
29
|
+
's3',
|
30
|
+
aws_access_key_id=aws_access_key_id or os.getenv('AWS_ACCESS_KEY_ID'),
|
31
|
+
aws_secret_access_key=aws_secret_access_key or os.getenv('AWS_SECRET_ACCESS_KEY'),
|
32
|
+
region_name=region_name or os.getenv('AWS_REGION', 'us-west-1')
|
33
|
+
)
|
34
|
+
|
35
|
+
def _ensure_bucket_exists(self):
|
36
|
+
"""Ensure the S3 bucket exists, creating it if necessary."""
|
37
|
+
try:
|
38
|
+
self.s3_client.head_bucket(Bucket=self.bucket_name)
|
39
|
+
except ClientError as e:
|
40
|
+
error_code = e.response['Error']['Code']
|
41
|
+
if error_code == '404':
|
42
|
+
# Bucket doesn't exist, create it
|
43
|
+
info(f"Bucket {self.bucket_name} doesn't exist, creating it ...")
|
44
|
+
try:
|
45
|
+
self.s3_client.create_bucket(
|
46
|
+
Bucket=self.bucket_name,
|
47
|
+
CreateBucketConfiguration={
|
48
|
+
'LocationConstraint': self.s3_client.meta.region_name
|
49
|
+
}
|
50
|
+
)
|
51
|
+
info(f"Created S3 bucket: {self.bucket_name}")
|
52
|
+
except ClientError as create_error:
|
53
|
+
if create_error.response['Error']['Code'] == 'BucketAlreadyOwnedByYou':
|
54
|
+
# Bucket was just created by another process
|
55
|
+
warning(f"Bucket {self.bucket_name} was just created by another process")
|
56
|
+
pass
|
57
|
+
else:
|
58
|
+
raise create_error
|
59
|
+
else:
|
60
|
+
# Some other error occurred
|
61
|
+
raise e
|
62
|
+
|
63
|
+
def save_trace(self, trace_data: dict, trace_id: str, project_name: str) -> str:
|
64
|
+
"""Save trace data to S3.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
trace_data: The trace data to save
|
68
|
+
trace_id: Unique identifier for the trace
|
69
|
+
project_name: Name of the project the trace belongs to
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
str: S3 key where the trace was saved
|
73
|
+
"""
|
74
|
+
# Ensure bucket exists before saving
|
75
|
+
self._ensure_bucket_exists()
|
76
|
+
|
77
|
+
# Create a timestamped key for the trace
|
78
|
+
timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
|
79
|
+
s3_key = f"traces/{project_name}/{trace_id}_{timestamp}.json"
|
80
|
+
|
81
|
+
# Convert trace data to JSON string
|
82
|
+
trace_json = json.dumps(trace_data)
|
83
|
+
|
84
|
+
# Upload to S3
|
85
|
+
info(f"Uploading trace to S3 at key {s3_key}, in bucket {self.bucket_name} ...")
|
86
|
+
self.s3_client.put_object(
|
87
|
+
Bucket=self.bucket_name,
|
88
|
+
Key=s3_key,
|
89
|
+
Body=trace_json,
|
90
|
+
ContentType='application/json'
|
91
|
+
)
|
92
|
+
|
93
|
+
return s3_key
|