openlit 1.34.29__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +111 -24
- openlit/instrumentation/crewai/async_crewai.py +114 -0
- openlit/instrumentation/crewai/crewai.py +104 -131
- openlit/instrumentation/crewai/utils.py +615 -0
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +312 -101
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +660 -186
- openlit/instrumentation/openai_agents/__init__.py +6 -2
- openlit/instrumentation/openai_agents/processor.py +409 -537
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +101 -7
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.29.dist-info/RECORD +0 -166
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
openlit/__init__.py
CHANGED
@@ -137,7 +137,9 @@ def module_exists(module_name):
|
|
137
137
|
return True
|
138
138
|
|
139
139
|
|
140
|
-
def instrument_if_available(
|
140
|
+
def instrument_if_available(
|
141
|
+
instrumentor_name, instrumentor_instance, config, disabled_instrumentors
|
142
|
+
):
|
141
143
|
"""Instruments the specified instrumentor if its library is available."""
|
142
144
|
if instrumentor_name in disabled_instrumentors:
|
143
145
|
logger.info("Instrumentor %s is disabled", instrumentor_name)
|
@@ -242,13 +244,13 @@ def init(
|
|
242
244
|
|
243
245
|
# Setup events based on the provided or default configuration.
|
244
246
|
event_provider = setup_events(
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
247
|
+
application_name=application_name,
|
248
|
+
environment=environment,
|
249
|
+
event_logger=event_logger,
|
250
|
+
otlp_endpoint=None,
|
251
|
+
otlp_headers=None,
|
252
|
+
disable_batch=disable_batch,
|
253
|
+
)
|
252
254
|
|
253
255
|
if not event_provider:
|
254
256
|
logger.error("OpenLIT events setup failed. Events will not be available")
|
@@ -268,8 +270,11 @@ def init(
|
|
268
270
|
)
|
269
271
|
return
|
270
272
|
|
271
|
-
if
|
272
|
-
|
273
|
+
if (
|
274
|
+
os.getenv("OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "").lower
|
275
|
+
== "false"
|
276
|
+
):
|
277
|
+
capture_message_content = False
|
273
278
|
|
274
279
|
# Update global configuration with the provided settings.
|
275
280
|
config.update_config(
|
@@ -462,9 +467,7 @@ def trace(wrapped):
|
|
462
467
|
SERVICE_NAME,
|
463
468
|
OpenlitConfig.application_name,
|
464
469
|
)
|
465
|
-
span.set_attribute(
|
466
|
-
DEPLOYMENT_ENVIRONMENT, OpenlitConfig.environment
|
467
|
-
)
|
470
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT, OpenlitConfig.environment)
|
468
471
|
except Exception as meta_exception:
|
469
472
|
logging.error(
|
470
473
|
"Failed to set metadata for %s: %s",
|
openlit/_instrumentors.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
OpenLIT Instrumentors
|
3
3
|
"""
|
4
|
+
|
4
5
|
import importlib
|
5
6
|
|
6
7
|
# Mapping of instrumentor names to their required Python packages
|
@@ -49,7 +50,7 @@ MODULE_NAME_MAP = {
|
|
49
50
|
"letta": "letta",
|
50
51
|
"together": "together",
|
51
52
|
"openai-agents": "agents",
|
52
|
-
"pydantic_ai": "pydantic_ai"
|
53
|
+
"pydantic_ai": "pydantic_ai",
|
53
54
|
}
|
54
55
|
|
55
56
|
# Dictionary mapping instrumentor names to their full module paths
|
openlit/evals/all.py
CHANGED
@@ -11,11 +11,14 @@ from openlit.evals.utils import (
|
|
11
11
|
llm_response,
|
12
12
|
parse_llm_response,
|
13
13
|
eval_metrics,
|
14
|
-
eval_metric_attributes
|
14
|
+
eval_metric_attributes,
|
15
15
|
)
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
|
18
|
+
def get_system_prompt(
|
19
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
20
|
+
threshold_score: Optional[float] = 0.5,
|
21
|
+
) -> str:
|
19
22
|
"""
|
20
23
|
Returns the system prompt used for LLM analysis, including custom categories if provided.
|
21
24
|
|
@@ -51,8 +54,12 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
51
54
|
"""
|
52
55
|
|
53
56
|
if custom_categories:
|
54
|
-
custom_categories_str = "\n".join(
|
55
|
-
|
57
|
+
custom_categories_str = "\n".join(
|
58
|
+
[f"- {key}: {value}" for key, value in custom_categories.items()]
|
59
|
+
)
|
60
|
+
base_prompt += (
|
61
|
+
f"\n Additional Hallucination Categories:\n{custom_categories_str}"
|
62
|
+
)
|
56
63
|
|
57
64
|
base_prompt += """
|
58
65
|
|
@@ -96,6 +103,7 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
96
103
|
"""
|
97
104
|
return base_prompt
|
98
105
|
|
106
|
+
|
99
107
|
class All:
|
100
108
|
"""
|
101
109
|
A class to detect Bias, Hallucination and Toxicity in AI responses using LLM or custom categories.
|
@@ -108,11 +116,16 @@ class All:
|
|
108
116
|
custom_categories (Optional[Dict[str, str]]): Additional categories for prompt injections.
|
109
117
|
"""
|
110
118
|
|
111
|
-
def __init__(
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
119
|
+
def __init__(
|
120
|
+
self,
|
121
|
+
provider: Optional[str] = "openai",
|
122
|
+
api_key: Optional[str] = None,
|
123
|
+
model: Optional[str] = None,
|
124
|
+
base_url: Optional[str] = None,
|
125
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
126
|
+
collect_metrics: Optional[bool] = False,
|
127
|
+
threshold_score: Optional[float] = 0.5,
|
128
|
+
):
|
116
129
|
"""
|
117
130
|
Initializes the All Evals detector with specified LLM settings, custom rules, and categories.
|
118
131
|
|
@@ -130,15 +143,22 @@ class All:
|
|
130
143
|
self.provider = provider
|
131
144
|
if self.provider is None:
|
132
145
|
raise ValueError("An LLM provider must be specified evaluation.")
|
133
|
-
self.api_key, self.model, self.base_url = setup_provider(
|
146
|
+
self.api_key, self.model, self.base_url = setup_provider(
|
147
|
+
provider, api_key, model, base_url
|
148
|
+
)
|
134
149
|
self.collect_metrics = collect_metrics
|
135
150
|
self.custom_categories = custom_categories
|
136
151
|
self.threshold_score = threshold_score
|
137
|
-
self.system_prompt = get_system_prompt(
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
152
|
+
self.system_prompt = get_system_prompt(
|
153
|
+
self.custom_categories, self.threshold_score
|
154
|
+
)
|
155
|
+
|
156
|
+
def measure(
|
157
|
+
self,
|
158
|
+
prompt: Optional[str] = "",
|
159
|
+
contexts: Optional[List[str]] = None,
|
160
|
+
text: Optional[str] = None,
|
161
|
+
) -> JsonOutput:
|
142
162
|
"""
|
143
163
|
Detects toxicity in AI output using LLM or custom rules.
|
144
164
|
|
@@ -156,14 +176,23 @@ class All:
|
|
156
176
|
llm_result = parse_llm_response(response)
|
157
177
|
result_verdict = "yes" if llm_result.score > self.threshold_score else "no"
|
158
178
|
|
159
|
-
result = JsonOutput(
|
160
|
-
|
161
|
-
|
179
|
+
result = JsonOutput(
|
180
|
+
score=llm_result.score,
|
181
|
+
evaluation=llm_result.evaluation,
|
182
|
+
classification=llm_result.classification,
|
183
|
+
explanation=llm_result.explanation,
|
184
|
+
verdict=result_verdict,
|
185
|
+
)
|
162
186
|
|
163
187
|
if self.collect_metrics:
|
164
188
|
eval_counter = eval_metrics()
|
165
|
-
attributes = eval_metric_attributes(
|
166
|
-
|
189
|
+
attributes = eval_metric_attributes(
|
190
|
+
result_verdict,
|
191
|
+
result.score,
|
192
|
+
result.evaluation,
|
193
|
+
result.classification,
|
194
|
+
result.explanation,
|
195
|
+
)
|
167
196
|
eval_counter.add(1, attributes)
|
168
197
|
|
169
198
|
return result
|
openlit/evals/bias_detection.py
CHANGED
@@ -11,11 +11,14 @@ from openlit.evals.utils import (
|
|
11
11
|
llm_response,
|
12
12
|
parse_llm_response,
|
13
13
|
eval_metrics,
|
14
|
-
eval_metric_attributes
|
14
|
+
eval_metric_attributes,
|
15
15
|
)
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
|
18
|
+
def get_system_prompt(
|
19
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
20
|
+
threshold_score: Optional[float] = 0.5,
|
21
|
+
) -> str:
|
19
22
|
"""
|
20
23
|
Returns the system prompt used for LLM analysis, including custom categories if provided.
|
21
24
|
|
@@ -55,7 +58,9 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
55
58
|
"""
|
56
59
|
|
57
60
|
if custom_categories:
|
58
|
-
custom_categories_str = "\n".join(
|
61
|
+
custom_categories_str = "\n".join(
|
62
|
+
[f"- {key}: {value}" for key, value in custom_categories.items()]
|
63
|
+
)
|
59
64
|
base_prompt += f"\n Additional Bias Categories:\n{custom_categories_str}"
|
60
65
|
|
61
66
|
base_prompt += """
|
@@ -100,6 +105,7 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
100
105
|
"""
|
101
106
|
return base_prompt
|
102
107
|
|
108
|
+
|
103
109
|
class BiasDetector:
|
104
110
|
"""
|
105
111
|
A class to detect Bias in AI responses using LLM or custom categories.
|
@@ -112,11 +118,16 @@ class BiasDetector:
|
|
112
118
|
custom_categories (Optional[Dict[str, str]]): Additional categories for prompt injections.
|
113
119
|
"""
|
114
120
|
|
115
|
-
def __init__(
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
121
|
+
def __init__(
|
122
|
+
self,
|
123
|
+
provider: Optional[str] = "openai",
|
124
|
+
api_key: Optional[str] = None,
|
125
|
+
model: Optional[str] = None,
|
126
|
+
base_url: Optional[str] = None,
|
127
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
128
|
+
collect_metrics: Optional[bool] = False,
|
129
|
+
threshold_score: Optional[float] = 0.5,
|
130
|
+
):
|
120
131
|
"""
|
121
132
|
Initializes the Bias detector with specified LLM settings, custom rules, and categories.
|
122
133
|
|
@@ -134,15 +145,22 @@ class BiasDetector:
|
|
134
145
|
self.provider = provider
|
135
146
|
if self.provider is None:
|
136
147
|
raise ValueError("An LLM provider must be specified for Bias detection.")
|
137
|
-
self.api_key, self.model, self.base_url = setup_provider(
|
148
|
+
self.api_key, self.model, self.base_url = setup_provider(
|
149
|
+
provider, api_key, model, base_url
|
150
|
+
)
|
138
151
|
self.collect_metrics = collect_metrics
|
139
152
|
self.custom_categories = custom_categories
|
140
153
|
self.threshold_score = threshold_score
|
141
|
-
self.system_prompt = get_system_prompt(
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
154
|
+
self.system_prompt = get_system_prompt(
|
155
|
+
self.custom_categories, self.threshold_score
|
156
|
+
)
|
157
|
+
|
158
|
+
def measure(
|
159
|
+
self,
|
160
|
+
prompt: Optional[str] = "",
|
161
|
+
contexts: Optional[List[str]] = None,
|
162
|
+
text: Optional[str] = None,
|
163
|
+
) -> JsonOutput:
|
146
164
|
"""
|
147
165
|
Detects toxicity in AI output using LLM or custom rules.
|
148
166
|
|
@@ -160,14 +178,23 @@ class BiasDetector:
|
|
160
178
|
llm_result = parse_llm_response(response)
|
161
179
|
result_verdict = "yes" if llm_result.score > self.threshold_score else "no"
|
162
180
|
|
163
|
-
result = JsonOutput(
|
164
|
-
|
165
|
-
|
181
|
+
result = JsonOutput(
|
182
|
+
score=llm_result.score,
|
183
|
+
evaluation=llm_result.evaluation,
|
184
|
+
classification=llm_result.classification,
|
185
|
+
explanation=llm_result.explanation,
|
186
|
+
verdict=result_verdict,
|
187
|
+
)
|
166
188
|
|
167
189
|
if self.collect_metrics:
|
168
190
|
eval_counter = eval_metrics()
|
169
|
-
attributes = eval_metric_attributes(
|
170
|
-
|
191
|
+
attributes = eval_metric_attributes(
|
192
|
+
result_verdict,
|
193
|
+
result.score,
|
194
|
+
result.evaluation,
|
195
|
+
result.classification,
|
196
|
+
result.explanation,
|
197
|
+
)
|
171
198
|
eval_counter.add(1, attributes)
|
172
199
|
|
173
200
|
return result
|
openlit/evals/hallucination.py
CHANGED
@@ -11,12 +11,15 @@ from openlit.evals.utils import (
|
|
11
11
|
llm_response,
|
12
12
|
parse_llm_response,
|
13
13
|
eval_metrics,
|
14
|
-
eval_metric_attributes
|
14
|
+
eval_metric_attributes,
|
15
15
|
)
|
16
16
|
|
17
|
+
|
17
18
|
# pylint: disable=unused-argument
|
18
|
-
def get_system_prompt(
|
19
|
-
|
19
|
+
def get_system_prompt(
|
20
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
21
|
+
threshold_score: Optional[float] = 0.5,
|
22
|
+
) -> str:
|
20
23
|
"""
|
21
24
|
Returns the system prompt used for LLM analysis, including custom categories if provided.
|
22
25
|
|
@@ -49,8 +52,12 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
49
52
|
"""
|
50
53
|
|
51
54
|
if custom_categories:
|
52
|
-
custom_categories_str = "\n".join(
|
53
|
-
|
55
|
+
custom_categories_str = "\n".join(
|
56
|
+
[f"- {key}: {value}" for key, value in custom_categories.items()]
|
57
|
+
)
|
58
|
+
base_prompt += (
|
59
|
+
f"\n Additional Hallucination Categories:\n{custom_categories_str}"
|
60
|
+
)
|
54
61
|
|
55
62
|
base_prompt += """
|
56
63
|
|
@@ -98,6 +105,7 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
98
105
|
|
99
106
|
return base_prompt
|
100
107
|
|
108
|
+
|
101
109
|
class Hallucination:
|
102
110
|
"""
|
103
111
|
A class to detect hallucinations in AI responses using LLM or custom categories.
|
@@ -110,11 +118,16 @@ class Hallucination:
|
|
110
118
|
custom_categories (Optional[Dict[str, str]]): Additional categories for prompt injections.
|
111
119
|
"""
|
112
120
|
|
113
|
-
def __init__(
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
121
|
+
def __init__(
|
122
|
+
self,
|
123
|
+
provider: Optional[str] = "openai",
|
124
|
+
api_key: Optional[str] = None,
|
125
|
+
model: Optional[str] = None,
|
126
|
+
base_url: Optional[str] = None,
|
127
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
128
|
+
collect_metrics: Optional[bool] = False,
|
129
|
+
threshold_score: Optional[float] = 0.5,
|
130
|
+
):
|
118
131
|
"""
|
119
132
|
Initializes the Hallucination detector with specified LLM settings, custom rules, and categories.
|
120
133
|
|
@@ -131,16 +144,25 @@ class Hallucination:
|
|
131
144
|
|
132
145
|
self.provider = provider
|
133
146
|
if self.provider is None:
|
134
|
-
raise ValueError(
|
135
|
-
|
147
|
+
raise ValueError(
|
148
|
+
"An LLM provider must be specified for Hallucination detection."
|
149
|
+
)
|
150
|
+
self.api_key, self.model, self.base_url = setup_provider(
|
151
|
+
provider, api_key, model, base_url
|
152
|
+
)
|
136
153
|
self.collect_metrics = collect_metrics
|
137
154
|
self.custom_categories = custom_categories
|
138
155
|
self.threshold_score = threshold_score
|
139
|
-
self.system_prompt = get_system_prompt(
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
156
|
+
self.system_prompt = get_system_prompt(
|
157
|
+
self.custom_categories, self.threshold_score
|
158
|
+
)
|
159
|
+
|
160
|
+
def measure(
|
161
|
+
self,
|
162
|
+
prompt: Optional[str] = "",
|
163
|
+
contexts: Optional[List[str]] = None,
|
164
|
+
text: Optional[str] = None,
|
165
|
+
) -> JsonOutput:
|
144
166
|
"""
|
145
167
|
Detects hallucinations in AI output using LLM or custom rules.
|
146
168
|
|
@@ -157,14 +179,23 @@ class Hallucination:
|
|
157
179
|
response = llm_response(self.provider, llm_prompt, self.model, self.base_url)
|
158
180
|
llm_result = parse_llm_response(response)
|
159
181
|
result_verdict = "yes" if llm_result.score > self.threshold_score else "no"
|
160
|
-
result = JsonOutput(
|
161
|
-
|
162
|
-
|
182
|
+
result = JsonOutput(
|
183
|
+
score=llm_result.score,
|
184
|
+
evaluation=llm_result.evaluation,
|
185
|
+
classification=llm_result.classification,
|
186
|
+
explanation=llm_result.explanation,
|
187
|
+
verdict=result_verdict,
|
188
|
+
)
|
163
189
|
|
164
190
|
if self.collect_metrics:
|
165
191
|
eval_counter = eval_metrics()
|
166
|
-
attributes = eval_metric_attributes(
|
167
|
-
|
192
|
+
attributes = eval_metric_attributes(
|
193
|
+
result_verdict,
|
194
|
+
result.score,
|
195
|
+
result.evaluation,
|
196
|
+
result.classification,
|
197
|
+
result.explanation,
|
198
|
+
)
|
168
199
|
eval_counter.add(1, attributes)
|
169
200
|
|
170
201
|
return result
|
openlit/evals/toxicity.py
CHANGED
@@ -11,11 +11,14 @@ from openlit.evals.utils import (
|
|
11
11
|
llm_response,
|
12
12
|
parse_llm_response,
|
13
13
|
eval_metrics,
|
14
|
-
eval_metric_attributes
|
14
|
+
eval_metric_attributes,
|
15
15
|
)
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
|
18
|
+
def get_system_prompt(
|
19
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
20
|
+
threshold_score: Optional[float] = 0.5,
|
21
|
+
) -> str:
|
19
22
|
"""
|
20
23
|
Returns the system prompt used for LLM analysis, including custom categories if provided.
|
21
24
|
|
@@ -50,7 +53,9 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
50
53
|
"""
|
51
54
|
|
52
55
|
if custom_categories:
|
53
|
-
custom_categories_str = "\n".join(
|
56
|
+
custom_categories_str = "\n".join(
|
57
|
+
[f"- {key}: {value}" for key, value in custom_categories.items()]
|
58
|
+
)
|
54
59
|
base_prompt += f"\n Additional Toxicity Categories:\n{custom_categories_str}"
|
55
60
|
|
56
61
|
base_prompt += """
|
@@ -95,6 +100,7 @@ def get_system_prompt(custom_categories: Optional[Dict[str, str]] = None,
|
|
95
100
|
"""
|
96
101
|
return base_prompt
|
97
102
|
|
103
|
+
|
98
104
|
class ToxicityDetector:
|
99
105
|
"""
|
100
106
|
A class to detect Toxicity in AI responses using LLM or custom categories.
|
@@ -107,11 +113,16 @@ class ToxicityDetector:
|
|
107
113
|
custom_categories (Optional[Dict[str, str]]): Additional categories for prompt injections.
|
108
114
|
"""
|
109
115
|
|
110
|
-
def __init__(
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
116
|
+
def __init__(
|
117
|
+
self,
|
118
|
+
provider: Optional[str] = "openai",
|
119
|
+
api_key: Optional[str] = None,
|
120
|
+
model: Optional[str] = None,
|
121
|
+
base_url: Optional[str] = None,
|
122
|
+
custom_categories: Optional[Dict[str, str]] = None,
|
123
|
+
collect_metrics: Optional[bool] = False,
|
124
|
+
threshold_score: Optional[float] = 0.5,
|
125
|
+
):
|
115
126
|
"""
|
116
127
|
Initializes the toxicity detector with specified LLM settings, custom rules, and categories.
|
117
128
|
|
@@ -128,16 +139,25 @@ class ToxicityDetector:
|
|
128
139
|
|
129
140
|
self.provider = provider
|
130
141
|
if self.provider is None:
|
131
|
-
raise ValueError(
|
132
|
-
|
142
|
+
raise ValueError(
|
143
|
+
"An LLM provider must be specified for toxicity detection."
|
144
|
+
)
|
145
|
+
self.api_key, self.model, self.base_url = setup_provider(
|
146
|
+
provider, api_key, model, base_url
|
147
|
+
)
|
133
148
|
self.collect_metrics = collect_metrics
|
134
149
|
self.custom_categories = custom_categories
|
135
150
|
self.threshold_score = threshold_score
|
136
|
-
self.system_prompt = get_system_prompt(
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
151
|
+
self.system_prompt = get_system_prompt(
|
152
|
+
self.custom_categories, self.threshold_score
|
153
|
+
)
|
154
|
+
|
155
|
+
def measure(
|
156
|
+
self,
|
157
|
+
prompt: Optional[str] = "",
|
158
|
+
contexts: Optional[List[str]] = "",
|
159
|
+
text: Optional[str] = None,
|
160
|
+
) -> JsonOutput:
|
141
161
|
"""
|
142
162
|
Detects toxicity in AI output using LLM or custom rules.
|
143
163
|
|
@@ -155,14 +175,23 @@ class ToxicityDetector:
|
|
155
175
|
llm_result = parse_llm_response(response)
|
156
176
|
result_verdict = "yes" if llm_result.score > self.threshold_score else "no"
|
157
177
|
|
158
|
-
result = JsonOutput(
|
159
|
-
|
160
|
-
|
178
|
+
result = JsonOutput(
|
179
|
+
score=llm_result.score,
|
180
|
+
evaluation=llm_result.evaluation,
|
181
|
+
classification=llm_result.classification,
|
182
|
+
explanation=llm_result.explanation,
|
183
|
+
verdict=result_verdict,
|
184
|
+
)
|
161
185
|
|
162
186
|
if self.collect_metrics:
|
163
187
|
eval_counter = eval_metrics()
|
164
|
-
attributes = eval_metric_attributes(
|
165
|
-
|
188
|
+
attributes = eval_metric_attributes(
|
189
|
+
result_verdict,
|
190
|
+
result.score,
|
191
|
+
result.evaluation,
|
192
|
+
result.classification,
|
193
|
+
result.explanation,
|
194
|
+
)
|
166
195
|
eval_counter.add(1, attributes)
|
167
196
|
|
168
197
|
return result
|