judgeval 0.0.51__py3-none-any.whl → 0.0.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. judgeval/common/logger.py +46 -199
  2. judgeval/common/s3_storage.py +2 -6
  3. judgeval/common/tracer.py +182 -262
  4. judgeval/common/utils.py +16 -36
  5. judgeval/constants.py +14 -20
  6. judgeval/data/__init__.py +0 -2
  7. judgeval/data/datasets/dataset.py +6 -10
  8. judgeval/data/datasets/eval_dataset_client.py +25 -27
  9. judgeval/data/example.py +5 -138
  10. judgeval/data/judgment_types.py +214 -0
  11. judgeval/data/result.py +7 -25
  12. judgeval/data/scorer_data.py +28 -40
  13. judgeval/data/scripts/fix_default_factory.py +23 -0
  14. judgeval/data/scripts/openapi_transform.py +123 -0
  15. judgeval/data/tool.py +3 -54
  16. judgeval/data/trace.py +31 -50
  17. judgeval/data/trace_run.py +3 -3
  18. judgeval/evaluation_run.py +16 -23
  19. judgeval/integrations/langgraph.py +11 -12
  20. judgeval/judges/litellm_judge.py +3 -6
  21. judgeval/judges/mixture_of_judges.py +8 -25
  22. judgeval/judges/together_judge.py +3 -6
  23. judgeval/judgment_client.py +22 -24
  24. judgeval/rules.py +7 -19
  25. judgeval/run_evaluation.py +79 -242
  26. judgeval/scorers/__init__.py +4 -20
  27. judgeval/scorers/agent_scorer.py +21 -0
  28. judgeval/scorers/api_scorer.py +28 -38
  29. judgeval/scorers/base_scorer.py +98 -0
  30. judgeval/scorers/example_scorer.py +19 -0
  31. judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +0 -20
  32. judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +10 -17
  33. judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +9 -24
  34. judgeval/scorers/judgeval_scorers/api_scorers/classifier_scorer.py +16 -68
  35. judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +4 -12
  36. judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +4 -4
  37. judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +10 -17
  38. judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +4 -4
  39. judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +4 -4
  40. judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +4 -4
  41. judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +18 -14
  42. judgeval/scorers/score.py +45 -330
  43. judgeval/scorers/utils.py +6 -88
  44. judgeval/utils/file_utils.py +4 -6
  45. judgeval/version_check.py +3 -2
  46. {judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/METADATA +3 -2
  47. judgeval-0.0.53.dist-info/RECORD +65 -0
  48. judgeval/data/custom_example.py +0 -19
  49. judgeval/scorers/judgeval_scorer.py +0 -177
  50. judgeval/scorers/judgeval_scorers/api_scorers/comparison.py +0 -45
  51. judgeval/scorers/judgeval_scorers/api_scorers/contextual_precision.py +0 -29
  52. judgeval/scorers/judgeval_scorers/api_scorers/contextual_recall.py +0 -29
  53. judgeval/scorers/judgeval_scorers/api_scorers/contextual_relevancy.py +0 -32
  54. judgeval/scorers/judgeval_scorers/api_scorers/groundedness.py +0 -28
  55. judgeval/scorers/judgeval_scorers/api_scorers/json_correctness.py +0 -38
  56. judgeval/scorers/judgeval_scorers/api_scorers/summarization.py +0 -27
  57. judgeval/scorers/prompt_scorer.py +0 -296
  58. judgeval-0.0.51.dist-info/RECORD +0 -69
  59. {judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/WHEEL +0 -0
  60. {judgeval-0.0.51.dist-info → judgeval-0.0.53.dist-info}/licenses/LICENSE.md +0 -0
judgeval/common/logger.py CHANGED
@@ -1,213 +1,60 @@
1
+ # logger.py
2
+
1
3
  import logging
2
- from logging.handlers import RotatingFileHandler
3
4
  import sys
4
- from pathlib import Path
5
- from contextlib import contextmanager
6
-
7
- # Global variables
8
- logger = None
9
-
10
-
11
- class LoggingState:
12
- enabled: bool = False
13
- path: str | None = None
14
-
15
-
16
- LOGGING_STATE = LoggingState()
5
+ import os
17
6
 
18
- # Add these as module-level variables
19
- current_example_id = None
20
- current_timestamp = None
21
-
22
-
23
- @contextmanager
24
- def enable_logging(
25
- name: str = "judgeval",
26
- path: str = "./logs",
27
- max_bytes: int = 1024 * 1024,
28
- backup_count: int = 5,
29
- ):
30
- """
31
- Context manager to temporarily enable logging for a specific block of code.
32
- """
33
- global logger
34
- LOGGING_STATE.enabled = True
35
- LOGGING_STATE.path = path
36
- # Initialize logger if not already initialized
37
- if logger is None:
38
- logger = _initialize_logger(
39
- name=name, path=path, max_bytes=max_bytes, backup_count=backup_count
40
- )
41
- try:
42
- logger.info("Logging enabled")
43
- yield
44
- finally:
45
- logger.info("Logging disabled")
46
- LOGGING_STATE.enabled = False
47
- LOGGING_STATE.path = None
7
+ # ANSI escape sequences
8
+ RESET = "\033[0m"
9
+ RED = "\033[31m"
10
+ YELLOW = "\033[33m"
11
+ BLUE = "\033[34m"
12
+ GRAY = "\033[90m"
48
13
 
49
14
 
50
- def _initialize_logger(
51
- name: str = "judgeval",
52
- max_bytes: int = 1024 * 1024, # 1MB
53
- backup_count: int = 5,
54
- path: str = "./logs", # Added path parameter with default
55
- ) -> logging.Logger:
15
+ class ColorFormatter(logging.Formatter):
56
16
  """
57
- Initialize the global logger instance if it doesn't exist.
58
- Returns the global logger instance.
17
+ Wrap the final formatted log record in ANSI color codes based on level.
59
18
  """
60
- global logger
61
19
 
62
- log_dir = Path(path)
63
- log_dir.mkdir(exist_ok=True, parents=True)
64
- log_file = log_dir / f"{name}.log"
65
- if log_file.exists():
66
- log_file.unlink() # Delete existing log file
67
-
68
- if logger is not None:
69
- return logger
70
-
71
- # Create logs directory if it doesn't exist
72
- log_dir = Path(path)
73
- log_dir.mkdir(exist_ok=True)
74
-
75
- # Create a custom formatter that includes example info when available
76
- class ExampleFormatter(logging.Formatter):
77
- def format(self, record):
78
- if current_example_id is not None and current_timestamp is not None:
79
- record.example_id = current_example_id
80
- record.timestamp = current_timestamp
81
- return logging.Formatter(
82
- "%(asctime)s - %(name)s - %(levelname)s - [Example_%(example_id)s][%(timestamp)s] %(message)s",
83
- datefmt="%Y-%m-%d %H:%M:%S",
84
- ).format(record)
85
- return logging.Formatter(
86
- "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
87
- datefmt="%Y-%m-%d %H:%M:%S",
88
- ).format(record)
89
-
90
- # Use the custom formatter
91
- console_handler = logging.StreamHandler(sys.stdout)
92
- console_handler.setFormatter(ExampleFormatter())
93
- console_handler.setLevel(logging.DEBUG)
94
-
95
- log_filename = f"{name}.log"
96
- file_handler = RotatingFileHandler(
97
- log_dir / log_filename, maxBytes=max_bytes, backupCount=backup_count, mode="a"
20
+ COLORS = {
21
+ logging.DEBUG: GRAY,
22
+ logging.INFO: GRAY,
23
+ logging.WARNING: YELLOW,
24
+ logging.ERROR: RED,
25
+ logging.CRITICAL: RED,
26
+ }
27
+
28
+ def __init__(self, fmt=None, datefmt=None, use_color=True):
29
+ super().__init__(fmt=fmt, datefmt=datefmt)
30
+ self.use_color = use_color and sys.stdout.isatty()
31
+
32
+ def format(self, record):
33
+ message = super().format(record)
34
+ if self.use_color:
35
+ color = self.COLORS.get(record.levelno, "")
36
+ if color:
37
+ message = f"{color}{message}{RESET}"
38
+ return message
39
+
40
+
41
+ def _setup_judgeval_logger():
42
+ use_color = sys.stdout.isatty() and os.getenv("NO_COLOR") is None
43
+ handler = logging.StreamHandler(sys.stdout)
44
+ handler.setLevel(logging.DEBUG)
45
+ handler.setFormatter(
46
+ ColorFormatter(
47
+ fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
48
+ datefmt="%Y-%m-%d %H:%M:%S",
49
+ use_color=use_color,
50
+ )
98
51
  )
99
- file_handler.setFormatter(ExampleFormatter())
100
- file_handler.setLevel(logging.DEBUG)
101
52
 
102
- # Get logger
103
- logger = logging.getLogger(name)
53
+ logger = logging.getLogger("judgeval")
104
54
  logger.setLevel(logging.DEBUG)
105
-
106
- # Prevent adding handlers multiple times
107
- if not logger.handlers:
108
- logger.addHandler(console_handler)
109
- logger.addHandler(file_handler)
110
-
55
+ logger.addHandler(handler)
111
56
  return logger
112
57
 
113
58
 
114
- # Initialize the global logger when module is imported
115
- # logger = _initialize_logger()
116
-
117
-
118
- def log_if_enabled(func):
119
- """Decorator to check if logging is enabled before executing logging statements"""
120
-
121
- def wrapper(*args, **kwargs):
122
- if LOGGING_STATE.enabled:
123
- return func(*args, **kwargs)
124
-
125
- return wrapper
126
-
127
-
128
- @log_if_enabled
129
- def debug(msg: str, example_idx: int | None = None):
130
- """Log debug message if logging is enabled"""
131
- if logger:
132
- logger.debug(msg)
133
-
134
-
135
- @log_if_enabled
136
- def info(msg: str, example_idx: int | None = None):
137
- """Log info message if logging is enabled"""
138
- if logger:
139
- logger.info(msg)
140
-
141
-
142
- @log_if_enabled
143
- def warning(msg: str, example_idx: int | None = None):
144
- """Log warning message if logging is enabled"""
145
- if logger:
146
- logger.warning(msg)
147
-
148
-
149
- @log_if_enabled
150
- def error(msg: str, example_idx: int | None = None):
151
- """Log error message if logging is enabled"""
152
- if logger:
153
- logger.error(msg)
154
-
155
-
156
- def create_example_handler(
157
- timestamp: str,
158
- example_idx: int,
159
- path: str = "./logs", # Added path parameter with default
160
- ) -> RotatingFileHandler:
161
- """Creates a file handler for a specific example"""
162
- debug(
163
- f"Creating example handler for timestamp={timestamp}, example_idx={example_idx}"
164
- )
165
- log_dir = Path(path) / "examples"
166
- log_dir.mkdir(exist_ok=True, parents=True)
167
-
168
- formatter = logging.Formatter(
169
- fmt="%(asctime)s - %(name)s - %(levelname)s - [Example_%(example_id)s][%(timestamp)s] %(message)s",
170
- datefmt="%Y-%m-%d %H:%M:%S",
171
- )
172
-
173
- # Create a unique file for each example
174
- file_handler = RotatingFileHandler(
175
- log_dir / f"{timestamp}_example_{example_idx}.log",
176
- maxBytes=1024 * 1024, # 1MB
177
- backupCount=5,
178
- mode="a",
179
- )
180
- file_handler.setFormatter(formatter)
181
- file_handler.setLevel(logging.DEBUG)
182
- info(f"Created example handler for example {example_idx}")
183
- return file_handler
184
-
185
-
186
- @contextmanager
187
- def example_logging_context(timestamp: str, example_idx: int):
188
- """Context manager for example-specific logging"""
189
- if not LOGGING_STATE.enabled:
190
- yield
191
- return
192
-
193
- global current_example_id, current_timestamp
194
-
195
- debug(f"Entering example logging context for example {example_idx}")
196
- current_example_id = example_idx
197
- current_timestamp = timestamp
198
-
199
- if LOGGING_STATE.path:
200
- handler = create_example_handler(
201
- timestamp, example_idx, path=LOGGING_STATE.path
202
- )
203
- if handler and logger:
204
- logger.addHandler(handler)
205
- try:
206
- yield
207
- finally:
208
- current_example_id = None
209
- current_timestamp = None
210
- if handler and logger:
211
- logger.removeHandler(handler)
212
- handler.close()
213
- debug(f"Closed example handler for example {example_idx}")
59
+ # Global logger you can import elsewhere
60
+ judgeval_logger = _setup_judgeval_logger()
@@ -4,7 +4,7 @@ import boto3
4
4
  from typing import Optional
5
5
  from datetime import datetime, UTC
6
6
  from botocore.exceptions import ClientError
7
- from judgeval.common.logger import warning, info
7
+ from judgeval.common.logger import judgeval_logger
8
8
 
9
9
 
10
10
  class S3Storage:
@@ -42,7 +42,6 @@ class S3Storage:
42
42
  error_code = e.response["Error"]["Code"]
43
43
  if error_code == "404":
44
44
  # Bucket doesn't exist, create it
45
- info(f"Bucket {self.bucket_name} doesn't exist, creating it ...")
46
45
  try:
47
46
  self.s3_client.create_bucket(
48
47
  Bucket=self.bucket_name,
@@ -52,14 +51,13 @@ class S3Storage:
52
51
  ) if self.s3_client.meta.region_name != "us-east-1" else self.s3_client.create_bucket(
53
52
  Bucket=self.bucket_name
54
53
  )
55
- info(f"Created S3 bucket: {self.bucket_name}")
56
54
  except ClientError as create_error:
57
55
  if (
58
56
  create_error.response["Error"]["Code"]
59
57
  == "BucketAlreadyOwnedByYou"
60
58
  ):
61
59
  # Bucket was just created by another process
62
- warning(
60
+ judgeval_logger.warning(
63
61
  f"Bucket {self.bucket_name} was just created by another process"
64
62
  )
65
63
  pass
@@ -90,8 +88,6 @@ class S3Storage:
90
88
  # Convert trace data to JSON string
91
89
  trace_json = json.dumps(trace_data)
92
90
 
93
- # Upload to S3
94
- info(f"Uploading trace to S3 at key {s3_key}, in bucket {self.bucket_name} ...")
95
91
  self.s3_client.put_object(
96
92
  Bucket=self.bucket_name,
97
93
  Key=s3_key,