judgeval 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. judgeval/__init__.py +83 -0
  2. judgeval/clients.py +19 -0
  3. judgeval/common/__init__.py +8 -0
  4. judgeval/common/exceptions.py +28 -0
  5. judgeval/common/logger.py +189 -0
  6. judgeval/common/tracer.py +587 -0
  7. judgeval/common/utils.py +763 -0
  8. judgeval/constants.py +55 -0
  9. judgeval/data/__init__.py +14 -0
  10. judgeval/data/api_example.py +111 -0
  11. judgeval/data/datasets/__init__.py +4 -0
  12. judgeval/data/datasets/dataset.py +407 -0
  13. judgeval/data/datasets/ground_truth.py +54 -0
  14. judgeval/data/datasets/utils.py +74 -0
  15. judgeval/data/example.py +76 -0
  16. judgeval/data/result.py +83 -0
  17. judgeval/data/scorer_data.py +86 -0
  18. judgeval/evaluation_run.py +130 -0
  19. judgeval/judges/__init__.py +7 -0
  20. judgeval/judges/base_judge.py +44 -0
  21. judgeval/judges/litellm_judge.py +49 -0
  22. judgeval/judges/mixture_of_judges.py +248 -0
  23. judgeval/judges/together_judge.py +55 -0
  24. judgeval/judges/utils.py +45 -0
  25. judgeval/judgment_client.py +244 -0
  26. judgeval/run_evaluation.py +355 -0
  27. judgeval/scorers/__init__.py +30 -0
  28. judgeval/scorers/base_scorer.py +51 -0
  29. judgeval/scorers/custom_scorer.py +134 -0
  30. judgeval/scorers/judgeval_scorers/__init__.py +21 -0
  31. judgeval/scorers/judgeval_scorers/answer_relevancy.py +19 -0
  32. judgeval/scorers/judgeval_scorers/contextual_precision.py +19 -0
  33. judgeval/scorers/judgeval_scorers/contextual_recall.py +19 -0
  34. judgeval/scorers/judgeval_scorers/contextual_relevancy.py +22 -0
  35. judgeval/scorers/judgeval_scorers/faithfulness.py +19 -0
  36. judgeval/scorers/judgeval_scorers/hallucination.py +19 -0
  37. judgeval/scorers/judgeval_scorers/json_correctness.py +32 -0
  38. judgeval/scorers/judgeval_scorers/summarization.py +20 -0
  39. judgeval/scorers/judgeval_scorers/tool_correctness.py +19 -0
  40. judgeval/scorers/prompt_scorer.py +439 -0
  41. judgeval/scorers/score.py +427 -0
  42. judgeval/scorers/utils.py +175 -0
  43. judgeval-0.0.1.dist-info/METADATA +40 -0
  44. judgeval-0.0.1.dist-info/RECORD +46 -0
  45. judgeval-0.0.1.dist-info/WHEEL +4 -0
  46. judgeval-0.0.1.dist-info/licenses/LICENSE.md +202 -0
judgeval/__init__.py ADDED
@@ -0,0 +1,83 @@
1
+ # Import key components that should be publicly accessible
2
+ from judgeval.common.utils import (
3
+ get_chat_completion,
4
+ aget_chat_completion,
5
+ get_completion_multiple_models,
6
+ aget_completion_multiple_models
7
+ )
8
+ from judgeval.data import (
9
+ Example,
10
+ ProcessExample,
11
+ ScorerData,
12
+ ScoringResult,
13
+ )
14
+ from judgeval.data.datasets import (
15
+ EvalDataset,
16
+ GroundTruthExample
17
+ )
18
+
19
+ from judgeval.judges import (
20
+ judgevalJudge,
21
+ LiteLLMJudge,
22
+ TogetherJudge,
23
+ MixtureOfJudges
24
+ )
25
+ from judgeval.scorers import (
26
+ JudgmentScorer,
27
+ CustomScorer,
28
+ PromptScorer,
29
+ ClassifierScorer,
30
+ ToolCorrectnessScorer,
31
+ JSONCorrectnessScorer,
32
+ SummarizationScorer,
33
+ HallucinationScorer,
34
+ FaithfulnessScorer,
35
+ ContextualRelevancyScorer,
36
+ ContextualPrecisionScorer,
37
+ ContextualRecallScorer,
38
+ AnswerRelevancyScorer
39
+ )
40
+ from judgeval.clients import client, langfuse, together_client
41
+ from judgeval.judgment_client import JudgmentClient
42
+
43
+ __all__ = [
44
+ # Clients
45
+ 'client',
46
+ 'langfuse',
47
+ 'together_client',
48
+
49
+ # # Common utilities
50
+ # 'get_chat_completion',
51
+ # 'aget_chat_completion',
52
+ # 'get_completion_multiple_models',
53
+ # 'aget_completion_multiple_models',
54
+
55
+ # # Data classes
56
+ # 'Example',
57
+ # 'ProcessExample',
58
+ # 'ScorerData',
59
+ # 'ScoringResult',
60
+
61
+ # # Judges
62
+ # 'judgevalJudge',
63
+ # 'LiteLLMJudge',
64
+ # 'TogetherJudge',
65
+ # 'MixtureOfJudges',
66
+
67
+ # # Scorers
68
+ # 'JudgmentScorer',
69
+ # 'CustomScorer',
70
+ # 'PromptScorer',
71
+ # 'ClassifierScorer',
72
+ # 'ToolCorrectnessScorer',
73
+ # 'JSONCorrectnessScorer',
74
+ # 'SummarizationScorer',
75
+ # 'HallucinationScorer',
76
+ # 'FaithfulnessScorer',
77
+ # 'ContextualRelevancyScorer',
78
+ # 'ContextualPrecisionScorer',
79
+ # 'ContextualRecallScorer',
80
+ # 'AnswerRelevancyScorer',
81
+
82
+ 'JudgmentClient',
83
+ ]
judgeval/clients.py ADDED
@@ -0,0 +1,19 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from openai import OpenAI
4
+ from langfuse import Langfuse
5
+ from together import Together, AsyncTogether
6
+
7
+ PATH_TO_DOTENV = os.path.join(os.path.dirname(__file__), ".env")
8
+ load_dotenv(dotenv_path=PATH_TO_DOTENV)
9
+
10
+ # Initialize clients
11
+ client = OpenAI()
12
+ langfuse = Langfuse(
13
+ secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
14
+ public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
15
+ host=os.getenv("LANGFUSE_HOST"),
16
+ )
17
+ together_client = Together(api_key=os.getenv("TOGETHERAI_API_KEY"))
18
+ async_together_client = AsyncTogether(api_key=os.getenv("TOGETHERAI_API_KEY"))
19
+
@@ -0,0 +1,8 @@
1
+ from judgeval.common.utils import (
2
+ get_chat_completion,
3
+ aget_chat_completion,
4
+ get_completion_multiple_models,
5
+ aget_completion_multiple_models
6
+ )
7
+
8
+ __all__ = ["get_chat_completion", "aget_chat_completion", "get_completion_multiple_models", "aget_completion_multiple_models"]
@@ -0,0 +1,28 @@
1
+ """
2
+ Common Exceptions in Judgeval
3
+ """
4
+
5
+
6
+ class MissingTestCaseParamsError(Exception):
7
+ pass
8
+
9
+
10
+ class JudgmentAPIError(Exception):
11
+ """
12
+ Exception raised when an error occurs while executing a Judgment API request
13
+ """
14
+
15
+ def __init__(self, message: str):
16
+ super().__init__(message)
17
+ self.message = message
18
+
19
+
20
+ class InvalidJudgeModelError(Exception):
21
+ """
22
+ Exception raised when an invalid judge model is provided
23
+ """
24
+
25
+ def __init__(self, message: str):
26
+ super().__init__(message)
27
+ self.message = message
28
+
@@ -0,0 +1,189 @@
1
+ import logging
2
+ from logging.handlers import RotatingFileHandler
3
+ import sys
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+ from contextlib import contextmanager
7
+
8
+ # Global variables
9
+ logger = None
10
+ class LoggingState:
11
+ enabled = False
12
+ path = None
13
+
14
+ LOGGING_STATE = LoggingState()
15
+
16
+ # Add these as module-level variables
17
+ current_example_id = None
18
+ current_timestamp = None
19
+
20
+
21
+ @contextmanager
22
+ def enable_logging(name: str = "judgeval", path: str = "./logs", max_bytes: int = 1024 * 1024, backup_count: int = 5):
23
+ """
24
+ Context manager to temporarily enable logging for a specific block of code.
25
+ """
26
+ global logger
27
+ LOGGING_STATE.enabled = True
28
+ LOGGING_STATE.path = path
29
+ # Initialize logger if not already initialized
30
+ if logger is None:
31
+ logger = _initialize_logger(name=name, path=path, max_bytes=max_bytes, backup_count=backup_count)
32
+ try:
33
+ logger.info("Logging enabled")
34
+ yield
35
+ finally:
36
+ logger.info("Logging disabled")
37
+ LOGGING_STATE.enabled = False
38
+ LOGGING_STATE.path = None
39
+
40
+ def _initialize_logger(
41
+ name: str = "judgeval",
42
+ max_bytes: int = 1024 * 1024, # 1MB
43
+ backup_count: int = 5,
44
+ path: str = "./logs" # Added path parameter with default
45
+ ) -> logging.Logger:
46
+ """
47
+ Initialize the global logger instance if it doesn't exist.
48
+ Returns the global logger instance.
49
+ """
50
+ global logger
51
+
52
+ log_dir = Path(path)
53
+ log_dir.mkdir(exist_ok=True, parents=True)
54
+ log_file = log_dir / f"{name}.log"
55
+ if log_file.exists():
56
+ log_file.unlink() # Delete existing log file
57
+
58
+ if logger is not None:
59
+ return logger
60
+
61
+ # Create logs directory if it doesn't exist
62
+ log_dir = Path(path)
63
+ log_dir.mkdir(exist_ok=True)
64
+
65
+ # Create formatter
66
+ formatter = logging.Formatter(
67
+ fmt='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
68
+ datefmt='%Y-%m-%d %H:%M:%S'
69
+ )
70
+
71
+ # Create a custom formatter that includes example info when available
72
+ class ExampleFormatter(logging.Formatter):
73
+ def format(self, record):
74
+ if current_example_id is not None and current_timestamp is not None:
75
+ record.example_id = current_example_id
76
+ record.timestamp = current_timestamp
77
+ return logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - [Example_%(example_id)s][%(timestamp)s] %(message)s',
78
+ datefmt='%Y-%m-%d %H:%M:%S').format(record)
79
+ return logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s',
80
+ datefmt='%Y-%m-%d %H:%M:%S').format(record)
81
+
82
+ # Use the custom formatter
83
+ console_handler = logging.StreamHandler(sys.stdout)
84
+ console_handler.setFormatter(ExampleFormatter())
85
+ console_handler.setLevel(logging.DEBUG)
86
+
87
+ log_filename = f"{name}.log"
88
+ file_handler = RotatingFileHandler(
89
+ log_dir / log_filename,
90
+ maxBytes=max_bytes,
91
+ backupCount=backup_count,
92
+ mode='a'
93
+ )
94
+ file_handler.setFormatter(ExampleFormatter())
95
+ file_handler.setLevel(logging.DEBUG)
96
+
97
+ # Get logger
98
+ logger = logging.getLogger(name)
99
+ logger.setLevel(logging.DEBUG)
100
+
101
+ # Prevent adding handlers multiple times
102
+ if not logger.handlers:
103
+ logger.addHandler(console_handler)
104
+ logger.addHandler(file_handler)
105
+
106
+ return logger
107
+
108
+ # Initialize the global logger when module is imported
109
+ # logger = _initialize_logger()
110
+
111
+ def log_if_enabled(func):
112
+ """Decorator to check if logging is enabled before executing logging statements"""
113
+ def wrapper(*args, **kwargs):
114
+ if LOGGING_STATE.enabled:
115
+ return func(*args, **kwargs)
116
+ return wrapper
117
+
118
+ @log_if_enabled
119
+ def debug(msg: str, example_idx: int = None):
120
+ """Log debug message if logging is enabled"""
121
+ logger.debug(msg)
122
+
123
+ @log_if_enabled
124
+ def info(msg: str, example_idx: int = None):
125
+ """Log info message if logging is enabled"""
126
+ logger.info(msg)
127
+
128
+ @log_if_enabled
129
+ def warning(msg: str, example_idx: int = None):
130
+ """Log warning message if logging is enabled"""
131
+ logger.warning(msg)
132
+
133
+ @log_if_enabled
134
+ def error(msg: str, example_idx: int = None):
135
+ """Log error message if logging is enabled"""
136
+ logger.error(msg)
137
+
138
+ def create_example_handler(
139
+ timestamp: str,
140
+ example_idx: int,
141
+ path: str = "./logs" # Added path parameter with default
142
+ ) -> RotatingFileHandler:
143
+ """Creates a file handler for a specific example"""
144
+ debug(f"Creating example handler for timestamp={timestamp}, example_idx={example_idx}")
145
+ log_dir = Path(path) / "examples"
146
+ log_dir.mkdir(exist_ok=True, parents=True)
147
+
148
+ formatter = logging.Formatter(
149
+ fmt='%(asctime)s - %(name)s - %(levelname)s - [Example_%(example_id)s][%(timestamp)s] %(message)s',
150
+ datefmt='%Y-%m-%d %H:%M:%S'
151
+ )
152
+
153
+ # Create a unique file for each example
154
+ file_handler = RotatingFileHandler(
155
+ log_dir / f"{timestamp}_example_{example_idx}.log",
156
+ maxBytes=1024 * 1024, # 1MB
157
+ backupCount=5,
158
+ mode='a'
159
+ )
160
+ file_handler.setFormatter(formatter)
161
+ file_handler.setLevel(logging.DEBUG)
162
+ info(f"Created example handler for example {example_idx}")
163
+ return file_handler
164
+
165
+ @contextmanager
166
+ def example_logging_context(timestamp: str, example_idx: int):
167
+ """Context manager for example-specific logging"""
168
+ if not LOGGING_STATE.enabled:
169
+ yield
170
+ return
171
+
172
+ global current_example_id, current_timestamp
173
+
174
+ debug(f"Entering example logging context for example {example_idx}")
175
+ current_example_id = example_idx
176
+ current_timestamp = timestamp
177
+
178
+ handler = create_example_handler(timestamp, example_idx, path=LOGGING_STATE.path)
179
+ if handler:
180
+ logger.addHandler(handler)
181
+ try:
182
+ yield
183
+ finally:
184
+ current_example_id = None
185
+ current_timestamp = None
186
+ if handler:
187
+ logger.removeHandler(handler)
188
+ handler.close()
189
+ debug(f"Closed example handler for example {example_idx}")