rag-sentinel 0.1.2__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rag_sentinel-0.1.2/src/rag_sentinel.egg-info → rag_sentinel-0.1.4}/PKG-INFO +1 -1
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/pyproject.toml +1 -1
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/__init__.py +1 -1
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/cli.py +44 -7
- rag_sentinel-0.1.4/src/rag_sentinel/evaluator.py +363 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4/src/rag_sentinel.egg-info}/PKG-INFO +1 -1
- rag_sentinel-0.1.2/src/rag_sentinel/evaluator.py +0 -392
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/LICENSE +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/MANIFEST.in +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/README.md +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/setup.cfg +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/templates/.env.template +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/templates/config.ini.template +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/templates/rag_eval_config.yaml +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/SOURCES.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/dependency_links.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/entry_points.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/requires.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/top_level.txt +0 -0
|
@@ -20,7 +20,9 @@ import socket
|
|
|
20
20
|
import subprocess
|
|
21
21
|
import time
|
|
22
22
|
import argparse
|
|
23
|
+
import configparser
|
|
23
24
|
from pathlib import Path
|
|
25
|
+
from urllib.parse import urlparse
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
# =============================================================================
|
|
@@ -34,6 +36,39 @@ TEMPLATES_DIR = Path(__file__).parent / "templates"
|
|
|
34
36
|
# Helper Functions
|
|
35
37
|
# =============================================================================
|
|
36
38
|
|
|
39
|
+
def get_mlflow_host_port():
|
|
40
|
+
"""
|
|
41
|
+
Read MLflow tracking_uri from config.ini and parse host and port.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
tuple: (host, port) - defaults to ("127.0.0.1", 5001) if not configured
|
|
45
|
+
"""
|
|
46
|
+
default_host = "127.0.0.1"
|
|
47
|
+
default_port = 5000
|
|
48
|
+
|
|
49
|
+
config_path = Path("config.ini")
|
|
50
|
+
if not config_path.exists():
|
|
51
|
+
return default_host, default_port
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
ini = configparser.ConfigParser()
|
|
55
|
+
ini.read(config_path)
|
|
56
|
+
|
|
57
|
+
tracking_uri = ini.get("mlflow", "tracking_uri", fallback=None)
|
|
58
|
+
if not tracking_uri:
|
|
59
|
+
return default_host, default_port
|
|
60
|
+
|
|
61
|
+
# Parse the URI (e.g., "http://192.168.1.100:5000")
|
|
62
|
+
parsed = urlparse(tracking_uri)
|
|
63
|
+
|
|
64
|
+
host = parsed.hostname or default_host
|
|
65
|
+
port = parsed.port or default_port
|
|
66
|
+
|
|
67
|
+
return host, port
|
|
68
|
+
except Exception:
|
|
69
|
+
return default_host, default_port
|
|
70
|
+
|
|
71
|
+
|
|
37
72
|
def is_port_in_use(host, port):
|
|
38
73
|
"""
|
|
39
74
|
Check if a port is already in use.
|
|
@@ -49,7 +84,7 @@ def is_port_in_use(host, port):
|
|
|
49
84
|
return s.connect_ex((host, port)) == 0
|
|
50
85
|
|
|
51
86
|
|
|
52
|
-
def start_mlflow_server(host
|
|
87
|
+
def start_mlflow_server(host, port):
|
|
53
88
|
"""
|
|
54
89
|
Start MLflow tracking server as a background process.
|
|
55
90
|
|
|
@@ -57,8 +92,8 @@ def start_mlflow_server(host="127.0.0.1", port=5001):
|
|
|
57
92
|
will skip starting a new instance.
|
|
58
93
|
|
|
59
94
|
Args:
|
|
60
|
-
host: The hostname to bind the server to
|
|
61
|
-
port: The port number for the server
|
|
95
|
+
host: The hostname to bind the server to
|
|
96
|
+
port: The port number for the server
|
|
62
97
|
|
|
63
98
|
Returns:
|
|
64
99
|
subprocess.Popen or None: The server process, or None if already running
|
|
@@ -144,8 +179,9 @@ def cmd_run(args):
|
|
|
144
179
|
|
|
145
180
|
This command:
|
|
146
181
|
1. Validates that all required config files exist
|
|
147
|
-
2.
|
|
148
|
-
3.
|
|
182
|
+
2. Reads MLflow host/port from config.ini
|
|
183
|
+
3. Starts MLflow server (unless --no-server is specified)
|
|
184
|
+
4. Runs the evaluation using the evaluator module
|
|
149
185
|
|
|
150
186
|
Args:
|
|
151
187
|
args: Parsed command-line arguments (includes --no-server flag)
|
|
@@ -161,9 +197,10 @@ def cmd_run(args):
|
|
|
161
197
|
print("\nRun 'rag-sentinel init' first to create config files.")
|
|
162
198
|
sys.exit(1)
|
|
163
199
|
|
|
164
|
-
# Start MLflow server if not disabled
|
|
200
|
+
# Start MLflow server if not disabled (uses host/port from config.ini)
|
|
165
201
|
if not args.no_server:
|
|
166
|
-
|
|
202
|
+
host, port = get_mlflow_host_port()
|
|
203
|
+
start_mlflow_server(host, port)
|
|
167
204
|
|
|
168
205
|
# Import and run the evaluation
|
|
169
206
|
from rag_sentinel.evaluator import run_evaluation
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RAGSentinel Evaluator - Core evaluation logic.
|
|
3
|
+
|
|
4
|
+
This module contains the main evaluation pipeline for RAGSentinel.
|
|
5
|
+
It handles configuration loading, LLM initialization, API communication,
|
|
6
|
+
Ragas metrics evaluation, and MLflow result logging.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import yaml
|
|
12
|
+
import configparser
|
|
13
|
+
import requests
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import mlflow
|
|
16
|
+
from dotenv import load_dotenv
|
|
17
|
+
from datasets import Dataset
|
|
18
|
+
from ragas import evaluate
|
|
19
|
+
from ragas.run_config import RunConfig
|
|
20
|
+
from ragas.metrics import (
|
|
21
|
+
Faithfulness,
|
|
22
|
+
AnswerRelevancy,
|
|
23
|
+
ContextPrecision,
|
|
24
|
+
AnswerCorrectness,
|
|
25
|
+
)
|
|
26
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings, AzureChatOpenAI, AzureOpenAIEmbeddings
|
|
27
|
+
from langchain_ollama import ChatOllama, OllamaEmbeddings
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Configuration Loading
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_config(yaml_file='rag_eval_config.yaml'):
|
|
36
|
+
"""
|
|
37
|
+
Load configuration from YAML file with values resolved from .env and config.ini.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
dict: Fully resolved configuration dictionary
|
|
41
|
+
"""
|
|
42
|
+
load_dotenv('.env')
|
|
43
|
+
|
|
44
|
+
ini = configparser.ConfigParser()
|
|
45
|
+
ini.read('config.ini')
|
|
46
|
+
|
|
47
|
+
def resolve(obj):
|
|
48
|
+
if isinstance(obj, dict):
|
|
49
|
+
return {k: resolve(v) for k, v in obj.items()}
|
|
50
|
+
if isinstance(obj, list):
|
|
51
|
+
return [resolve(i) for i in obj]
|
|
52
|
+
if isinstance(obj, str):
|
|
53
|
+
# Resolve ${ENV:VAR} and ${INI:section.key} placeholders
|
|
54
|
+
result = re.sub(r'\$\{ENV:([^}]+)\}', lambda m: os.getenv(m.group(1), ''), obj)
|
|
55
|
+
result = re.sub(r'\$\{INI:([^.]+)\.([^}]+)\}',
|
|
56
|
+
lambda m: ini.get(m.group(1), m.group(2), fallback=''), result)
|
|
57
|
+
# Convert types
|
|
58
|
+
if result.lower() == 'true': return True
|
|
59
|
+
if result.lower() == 'false': return False
|
|
60
|
+
try:
|
|
61
|
+
if '.' in result: return float(result)
|
|
62
|
+
except ValueError:
|
|
63
|
+
pass
|
|
64
|
+
return result
|
|
65
|
+
return obj
|
|
66
|
+
|
|
67
|
+
with open(yaml_file, 'r') as f:
|
|
68
|
+
return resolve(yaml.safe_load(f))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_llm(config):
|
|
72
|
+
"""Initialize LLM based on config."""
|
|
73
|
+
provider = config['ragas']['llm']['provider']
|
|
74
|
+
|
|
75
|
+
if provider == "azure":
|
|
76
|
+
azure_config = config['ragas']['llm']['azure']
|
|
77
|
+
return AzureChatOpenAI(
|
|
78
|
+
azure_endpoint=azure_config['endpoint'],
|
|
79
|
+
api_key=azure_config['api_key'],
|
|
80
|
+
deployment_name=azure_config['deployment_name'],
|
|
81
|
+
model=azure_config['model'],
|
|
82
|
+
temperature=azure_config['temperature'],
|
|
83
|
+
api_version=azure_config['api_version']
|
|
84
|
+
)
|
|
85
|
+
elif provider == "openai":
|
|
86
|
+
openai_config = config['ragas']['llm']['openai']
|
|
87
|
+
return ChatOpenAI(
|
|
88
|
+
model=openai_config['model'],
|
|
89
|
+
temperature=openai_config['temperature'],
|
|
90
|
+
api_key=openai_config['api_key']
|
|
91
|
+
)
|
|
92
|
+
elif provider == "ollama":
|
|
93
|
+
ollama_config = config['ragas']['llm']['ollama']
|
|
94
|
+
return ChatOllama(
|
|
95
|
+
base_url=ollama_config['base_url'],
|
|
96
|
+
model=ollama_config['model'],
|
|
97
|
+
temperature=ollama_config['temperature']
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError(f"Unsupported LLM provider: {provider}")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_embeddings(config):
|
|
104
|
+
"""Initialize embeddings based on config."""
|
|
105
|
+
provider = config['ragas']['embeddings']['provider']
|
|
106
|
+
|
|
107
|
+
if provider == "azure":
|
|
108
|
+
azure_config = config['ragas']['embeddings']['azure']
|
|
109
|
+
return AzureOpenAIEmbeddings(
|
|
110
|
+
azure_endpoint=azure_config['endpoint'],
|
|
111
|
+
api_key=azure_config['api_key'],
|
|
112
|
+
deployment=azure_config['deployment_name'],
|
|
113
|
+
api_version=azure_config['api_version']
|
|
114
|
+
)
|
|
115
|
+
elif provider == "openai":
|
|
116
|
+
openai_config = config['ragas']['embeddings']['openai']
|
|
117
|
+
return OpenAIEmbeddings(
|
|
118
|
+
model=openai_config['model'],
|
|
119
|
+
api_key=openai_config['api_key']
|
|
120
|
+
)
|
|
121
|
+
elif provider == "ollama":
|
|
122
|
+
ollama_config = config['ragas']['embeddings']['ollama']
|
|
123
|
+
return OllamaEmbeddings(
|
|
124
|
+
base_url=ollama_config['base_url'],
|
|
125
|
+
model=ollama_config['model']
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
raise ValueError(f"Unsupported embeddings provider: {provider}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_metrics(config):
|
|
132
|
+
"""Get Ragas metrics based on config."""
|
|
133
|
+
metric_map = {
|
|
134
|
+
"Faithfulness": Faithfulness(),
|
|
135
|
+
"AnswerRelevancy": AnswerRelevancy(),
|
|
136
|
+
"ContextPrecision": ContextPrecision(),
|
|
137
|
+
"AnswerCorrectness": AnswerCorrectness(),
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
metric_names = config['ragas']['metrics']
|
|
141
|
+
return [metric_map[name] for name in metric_names if name in metric_map]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def get_auth_headers_and_cookies(config):
|
|
145
|
+
"""Get authentication headers and cookies based on config."""
|
|
146
|
+
auth_config = config['backend']['auth']
|
|
147
|
+
auth_type = auth_config.get('type', 'none')
|
|
148
|
+
|
|
149
|
+
headers = {}
|
|
150
|
+
cookies = {}
|
|
151
|
+
|
|
152
|
+
if auth_type == "cookie":
|
|
153
|
+
cookies[auth_config['cookie_name']] = auth_config['cookie_value']
|
|
154
|
+
elif auth_type == "bearer":
|
|
155
|
+
headers['Authorization'] = f"Bearer {auth_config['bearer_token']}"
|
|
156
|
+
elif auth_type == "header":
|
|
157
|
+
headers[auth_config['header_name']] = auth_config['header_value']
|
|
158
|
+
|
|
159
|
+
return headers, cookies
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def extract_response_data(response, endpoint_config):
|
|
163
|
+
"""Extract data from API response."""
|
|
164
|
+
if endpoint_config.get('stream', False):
|
|
165
|
+
return "".join(chunk.decode() for chunk in response.iter_content(chunk_size=None))
|
|
166
|
+
|
|
167
|
+
# Try to parse as JSON first
|
|
168
|
+
try:
|
|
169
|
+
data = response.json()
|
|
170
|
+
response_key = endpoint_config.get('response_key')
|
|
171
|
+
if response_key:
|
|
172
|
+
return data.get(response_key)
|
|
173
|
+
return data
|
|
174
|
+
except:
|
|
175
|
+
# If JSON parsing fails, return as plain text
|
|
176
|
+
return response.text
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
|
|
180
|
+
"""Make API request to backend."""
|
|
181
|
+
url = base_url + endpoint_config['path']
|
|
182
|
+
method = endpoint_config.get('method', 'POST')
|
|
183
|
+
|
|
184
|
+
headers = {**endpoint_config.get('headers', {}), **auth_headers}
|
|
185
|
+
|
|
186
|
+
# Flexible body preparation
|
|
187
|
+
body = {}
|
|
188
|
+
for key, value in endpoint_config.get('body', {}).items():
|
|
189
|
+
if isinstance(value, str) and ("{query}" in value or "{chat_id}" in value):
|
|
190
|
+
body[key] = value.format(query=query, chat_id=chat_id)
|
|
191
|
+
elif key == "chat_id":
|
|
192
|
+
try:
|
|
193
|
+
body[key] = int(chat_id)
|
|
194
|
+
except (ValueError, TypeError):
|
|
195
|
+
body[key] = chat_id
|
|
196
|
+
else:
|
|
197
|
+
body[key] = value
|
|
198
|
+
|
|
199
|
+
if method.upper() == 'POST':
|
|
200
|
+
resp = requests.post(
|
|
201
|
+
url,
|
|
202
|
+
json=body,
|
|
203
|
+
headers=headers,
|
|
204
|
+
cookies=auth_cookies,
|
|
205
|
+
stream=endpoint_config.get('stream', False),
|
|
206
|
+
verify=verify_ssl
|
|
207
|
+
)
|
|
208
|
+
elif method.upper() == 'GET':
|
|
209
|
+
resp = requests.get(
|
|
210
|
+
url,
|
|
211
|
+
params=body,
|
|
212
|
+
headers=headers,
|
|
213
|
+
cookies=auth_cookies,
|
|
214
|
+
verify=verify_ssl
|
|
215
|
+
)
|
|
216
|
+
else:
|
|
217
|
+
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
218
|
+
|
|
219
|
+
resp.raise_for_status()
|
|
220
|
+
return resp
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_context(config, query, chat_id, auth_headers, auth_cookies):
|
|
224
|
+
"""Retrieve context from backend API."""
|
|
225
|
+
base_url = config['backend']['base_url']
|
|
226
|
+
endpoint_config = config['backend']['endpoints']['context']
|
|
227
|
+
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
228
|
+
|
|
229
|
+
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
230
|
+
context = extract_response_data(response, endpoint_config)
|
|
231
|
+
|
|
232
|
+
if isinstance(context, str):
|
|
233
|
+
return [context]
|
|
234
|
+
elif isinstance(context, list):
|
|
235
|
+
return context
|
|
236
|
+
else:
|
|
237
|
+
return [str(context)]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def get_answer(config, query, chat_id, auth_headers, auth_cookies):
|
|
241
|
+
"""Get answer from backend API."""
|
|
242
|
+
base_url = config['backend']['base_url']
|
|
243
|
+
endpoint_config = config['backend']['endpoints']['answer']
|
|
244
|
+
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
245
|
+
|
|
246
|
+
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
247
|
+
answer = extract_response_data(response, endpoint_config)
|
|
248
|
+
|
|
249
|
+
return str(answer)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def run_evaluation():
|
|
254
|
+
"""Main evaluation function."""
|
|
255
|
+
print("=" * 60)
|
|
256
|
+
print("RAGSentinel - RAG Evaluation Framework")
|
|
257
|
+
print("=" * 60)
|
|
258
|
+
|
|
259
|
+
print("\n📁 Loading configuration...")
|
|
260
|
+
config = load_config()
|
|
261
|
+
|
|
262
|
+
dataset_path = config['dataset']['path']
|
|
263
|
+
print(f"📊 Loading dataset from {dataset_path}...")
|
|
264
|
+
dataset = pd.read_csv(dataset_path)
|
|
265
|
+
|
|
266
|
+
auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
|
|
267
|
+
|
|
268
|
+
results = []
|
|
269
|
+
print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
|
|
270
|
+
|
|
271
|
+
for idx, row in dataset.iterrows():
|
|
272
|
+
chat_id = str(row['chat_id'])
|
|
273
|
+
query = row['query']
|
|
274
|
+
ground_truth = row['ground_truth']
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
context = get_context(config, query, chat_id, auth_headers, auth_cookies)
|
|
278
|
+
answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
|
|
279
|
+
|
|
280
|
+
results.append({
|
|
281
|
+
'question': query,
|
|
282
|
+
'contexts': context,
|
|
283
|
+
'answer': answer,
|
|
284
|
+
'ground_truth': ground_truth
|
|
285
|
+
})
|
|
286
|
+
print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
|
|
287
|
+
except Exception as e:
|
|
288
|
+
print(f" ✗ Error processing query {idx + 1}: {e}")
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
if not results:
|
|
292
|
+
print("\n❌ No results collected. Exiting.")
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
eval_df = pd.DataFrame(results)
|
|
296
|
+
print(f"\n✓ Collected {len(eval_df)} responses")
|
|
297
|
+
|
|
298
|
+
print("\n🤖 Initializing LLM and embeddings...")
|
|
299
|
+
llm = get_llm(config)
|
|
300
|
+
embeddings = get_embeddings(config)
|
|
301
|
+
|
|
302
|
+
metrics = get_metrics(config)
|
|
303
|
+
print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
|
|
304
|
+
|
|
305
|
+
print("\n📈 Preparing data for RAGAS evaluation...")
|
|
306
|
+
ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
|
|
307
|
+
|
|
308
|
+
for _, row in eval_df.iterrows():
|
|
309
|
+
contexts = row.get("contexts", [])
|
|
310
|
+
if not isinstance(contexts, list):
|
|
311
|
+
contexts = [str(contexts)]
|
|
312
|
+
contexts = [str(c) for c in contexts if c and str(c).strip()]
|
|
313
|
+
if not contexts:
|
|
314
|
+
contexts = ["No context available."]
|
|
315
|
+
|
|
316
|
+
ragas_data["question"].append(str(row["question"]))
|
|
317
|
+
ragas_data["answer"].append(str(row["answer"]))
|
|
318
|
+
ragas_data["contexts"].append(contexts)
|
|
319
|
+
ragas_data["ground_truth"].append(str(row["ground_truth"]))
|
|
320
|
+
|
|
321
|
+
dataset = Dataset.from_dict(ragas_data)
|
|
322
|
+
|
|
323
|
+
print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
|
|
324
|
+
|
|
325
|
+
run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
|
|
326
|
+
|
|
327
|
+
ragas_result = evaluate(
|
|
328
|
+
dataset,
|
|
329
|
+
metrics=metrics,
|
|
330
|
+
llm=llm,
|
|
331
|
+
embeddings=embeddings,
|
|
332
|
+
batch_size=2,
|
|
333
|
+
run_config=run_config,
|
|
334
|
+
raise_exceptions=False
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
print("\n📊 Processing results...")
|
|
338
|
+
scores_df = ragas_result.to_pandas()
|
|
339
|
+
numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
|
|
340
|
+
mean_scores = scores_df[numeric_columns].mean().to_dict()
|
|
341
|
+
|
|
342
|
+
mlflow_config = config['mlflow']
|
|
343
|
+
mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
|
|
344
|
+
mlflow.set_experiment(mlflow_config['experiment_name'])
|
|
345
|
+
|
|
346
|
+
print("\n📤 Logging results to MLflow...")
|
|
347
|
+
run_name = mlflow_config.get('run_name', 'RAG Evaluation')
|
|
348
|
+
with mlflow.start_run(run_name=run_name):
|
|
349
|
+
print("\n" + "=" * 40)
|
|
350
|
+
print("📊 EVALUATION RESULTS")
|
|
351
|
+
print("=" * 40)
|
|
352
|
+
for metric_name, value in mean_scores.items():
|
|
353
|
+
mlflow.log_metric(metric_name, value)
|
|
354
|
+
print(f" {metric_name}: {value:.4f}")
|
|
355
|
+
|
|
356
|
+
mlflow.log_param("dataset_path", dataset_path)
|
|
357
|
+
mlflow.log_param("num_samples", len(eval_df))
|
|
358
|
+
mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
|
|
359
|
+
|
|
360
|
+
print("\n" + "=" * 60)
|
|
361
|
+
print("✅ Evaluation complete!")
|
|
362
|
+
print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
|
|
363
|
+
print("=" * 60)
|
|
@@ -1,392 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
RAGSentinel Evaluator - Core evaluation logic.
|
|
3
|
-
|
|
4
|
-
This module contains the main evaluation pipeline for RAGSentinel.
|
|
5
|
-
It handles configuration loading, LLM initialization, API communication,
|
|
6
|
-
Ragas metrics evaluation, and MLflow result logging.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import os
|
|
10
|
-
import re
|
|
11
|
-
import yaml
|
|
12
|
-
import configparser
|
|
13
|
-
import requests
|
|
14
|
-
import pandas as pd
|
|
15
|
-
import mlflow
|
|
16
|
-
from dotenv import load_dotenv
|
|
17
|
-
from datasets import Dataset
|
|
18
|
-
from ragas import evaluate, RunConfig
|
|
19
|
-
from ragas.metrics import faithfulness, answer_relevancy, context_precision, answer_correctness
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# =============================================================================
|
|
23
|
-
# Configuration Loading
|
|
24
|
-
# =============================================================================
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def resolve_placeholder(value, env_vars, ini_config):
|
|
28
|
-
"""
|
|
29
|
-
Resolve ${ENV:...} and ${INI:...} placeholders in a string value.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
value: String that may contain placeholders
|
|
33
|
-
env_vars: Dictionary of environment variables
|
|
34
|
-
ini_config: ConfigParser object with ini file contents
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
str: Value with all placeholders resolved
|
|
38
|
-
"""
|
|
39
|
-
if not isinstance(value, str):
|
|
40
|
-
return value
|
|
41
|
-
|
|
42
|
-
# Resolve ${ENV:VAR_NAME} - reads from environment variables
|
|
43
|
-
env_pattern = r'\$\{ENV:([^}]+)\}'
|
|
44
|
-
def env_replacer(match):
|
|
45
|
-
var_name = match.group(1)
|
|
46
|
-
return env_vars.get(var_name, '')
|
|
47
|
-
value = re.sub(env_pattern, env_replacer, value)
|
|
48
|
-
|
|
49
|
-
# Resolve ${INI:section.key} - reads from config.ini
|
|
50
|
-
ini_pattern = r'\$\{INI:([^}]+)\}'
|
|
51
|
-
def ini_replacer(match):
|
|
52
|
-
path = match.group(1)
|
|
53
|
-
parts = path.split('.')
|
|
54
|
-
if len(parts) == 2:
|
|
55
|
-
section, key = parts
|
|
56
|
-
if ini_config.has_option(section, key):
|
|
57
|
-
return ini_config.get(section, key)
|
|
58
|
-
return ''
|
|
59
|
-
value = re.sub(ini_pattern, ini_replacer, value)
|
|
60
|
-
|
|
61
|
-
return value
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def resolve_config(obj, env_vars, ini_config):
|
|
65
|
-
"""
|
|
66
|
-
Recursively resolve all placeholders in a configuration object.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
obj: Configuration object (dict, list, or str)
|
|
70
|
-
env_vars: Dictionary of environment variables
|
|
71
|
-
ini_config: ConfigParser object
|
|
72
|
-
|
|
73
|
-
Returns:
|
|
74
|
-
Configuration object with all placeholders resolved
|
|
75
|
-
"""
|
|
76
|
-
if isinstance(obj, dict):
|
|
77
|
-
return {k: resolve_config(v, env_vars, ini_config) for k, v in obj.items()}
|
|
78
|
-
elif isinstance(obj, list):
|
|
79
|
-
return [resolve_config(item, env_vars, ini_config) for item in obj]
|
|
80
|
-
elif isinstance(obj, str):
|
|
81
|
-
return resolve_placeholder(obj, env_vars, ini_config)
|
|
82
|
-
return obj
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def load_config():
|
|
86
|
-
"""
|
|
87
|
-
Load and merge configuration from .env, config.ini, and rag_eval_config.yaml.
|
|
88
|
-
|
|
89
|
-
Returns:
|
|
90
|
-
dict: Fully resolved configuration dictionary
|
|
91
|
-
"""
|
|
92
|
-
# Load environment variables from .env file
|
|
93
|
-
load_dotenv('.env')
|
|
94
|
-
env_vars = dict(os.environ)
|
|
95
|
-
|
|
96
|
-
# Load INI configuration
|
|
97
|
-
ini_config = configparser.ConfigParser()
|
|
98
|
-
ini_config.read('config.ini')
|
|
99
|
-
|
|
100
|
-
# Load YAML configuration and resolve all placeholders
|
|
101
|
-
with open('rag_eval_config.yaml', 'r') as f:
|
|
102
|
-
yaml_config = yaml.safe_load(f)
|
|
103
|
-
|
|
104
|
-
return resolve_config(yaml_config, env_vars, ini_config)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def get_llm(config):
|
|
108
|
-
"""Initialize LLM based on provider."""
|
|
109
|
-
llm_config = config['ragas']['llm']
|
|
110
|
-
provider = llm_config['provider'].lower()
|
|
111
|
-
|
|
112
|
-
if provider == 'azure':
|
|
113
|
-
from langchain_openai import AzureChatOpenAI
|
|
114
|
-
return AzureChatOpenAI(
|
|
115
|
-
azure_endpoint=llm_config['azure_endpoint'],
|
|
116
|
-
api_key=llm_config['api_key'],
|
|
117
|
-
api_version=llm_config.get('api_version', '2024-02-15-preview'),
|
|
118
|
-
deployment_name=llm_config['model'],
|
|
119
|
-
temperature=float(llm_config.get('temperature', 0.0))
|
|
120
|
-
)
|
|
121
|
-
elif provider == 'openai':
|
|
122
|
-
from langchain_openai import ChatOpenAI
|
|
123
|
-
return ChatOpenAI(
|
|
124
|
-
api_key=llm_config['api_key'],
|
|
125
|
-
model=llm_config['model'],
|
|
126
|
-
temperature=float(llm_config.get('temperature', 0.0))
|
|
127
|
-
)
|
|
128
|
-
elif provider == 'ollama':
|
|
129
|
-
from langchain_ollama import ChatOllama
|
|
130
|
-
return ChatOllama(
|
|
131
|
-
base_url=llm_config.get('base_url', 'http://localhost:11434'),
|
|
132
|
-
model=llm_config['model'],
|
|
133
|
-
temperature=float(llm_config.get('temperature', 0.0))
|
|
134
|
-
)
|
|
135
|
-
else:
|
|
136
|
-
raise ValueError(f"Unknown LLM provider: {provider}")
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def get_embeddings(config):
|
|
140
|
-
"""Initialize embeddings based on provider."""
|
|
141
|
-
emb_config = config['ragas']['embeddings']
|
|
142
|
-
provider = emb_config['provider'].lower()
|
|
143
|
-
|
|
144
|
-
if provider == 'azure':
|
|
145
|
-
from langchain_openai import AzureOpenAIEmbeddings
|
|
146
|
-
return AzureOpenAIEmbeddings(
|
|
147
|
-
azure_endpoint=emb_config['azure_endpoint'],
|
|
148
|
-
api_key=emb_config['api_key'],
|
|
149
|
-
api_version=emb_config.get('api_version', '2024-02-15-preview'),
|
|
150
|
-
deployment=emb_config['model']
|
|
151
|
-
)
|
|
152
|
-
elif provider == 'openai':
|
|
153
|
-
from langchain_openai import OpenAIEmbeddings
|
|
154
|
-
return OpenAIEmbeddings(
|
|
155
|
-
api_key=emb_config['api_key'],
|
|
156
|
-
model=emb_config['model']
|
|
157
|
-
)
|
|
158
|
-
elif provider == 'ollama':
|
|
159
|
-
from langchain_ollama import OllamaEmbeddings
|
|
160
|
-
return OllamaEmbeddings(
|
|
161
|
-
base_url=emb_config.get('base_url', 'http://localhost:11434'),
|
|
162
|
-
model=emb_config['model']
|
|
163
|
-
)
|
|
164
|
-
else:
|
|
165
|
-
raise ValueError(f"Unknown embeddings provider: {provider}")
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def get_metrics(config):
|
|
169
|
-
"""Get list of Ragas metrics."""
|
|
170
|
-
metric_map = {
|
|
171
|
-
'faithfulness': faithfulness,
|
|
172
|
-
'answer_relevancy': answer_relevancy,
|
|
173
|
-
'context_precision': context_precision,
|
|
174
|
-
'answer_correctness': answer_correctness
|
|
175
|
-
}
|
|
176
|
-
return [metric_map[m] for m in config['ragas']['metrics'] if m in metric_map]
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def get_auth_headers_and_cookies(config):
|
|
180
|
-
"""
|
|
181
|
-
Get authentication headers and cookies from backend config.
|
|
182
|
-
|
|
183
|
-
Supports three authentication types:
|
|
184
|
-
- cookie: Session cookie authentication
|
|
185
|
-
- bearer: Bearer token authentication
|
|
186
|
-
- header: Custom header authentication
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
config: Full configuration dictionary
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
tuple: (headers dict, cookies dict)
|
|
193
|
-
"""
|
|
194
|
-
# Auth config is nested under backend.auth in the YAML
|
|
195
|
-
auth_config = config.get('backend', {}).get('auth', {})
|
|
196
|
-
auth_type = auth_config.get('type', 'none').lower()
|
|
197
|
-
headers = {}
|
|
198
|
-
cookies = {}
|
|
199
|
-
|
|
200
|
-
if auth_type == 'cookie':
|
|
201
|
-
cookie_name = auth_config.get('cookie_name', 'session')
|
|
202
|
-
cookie_value = auth_config.get('cookie_value', '')
|
|
203
|
-
if cookie_value:
|
|
204
|
-
cookies[cookie_name] = cookie_value
|
|
205
|
-
elif auth_type == 'bearer':
|
|
206
|
-
token = auth_config.get('bearer_token', '')
|
|
207
|
-
if token:
|
|
208
|
-
headers['Authorization'] = f'Bearer {token}'
|
|
209
|
-
elif auth_type == 'header':
|
|
210
|
-
header_name = auth_config.get('header_name', '')
|
|
211
|
-
header_value = auth_config.get('header_value', '')
|
|
212
|
-
if header_name and header_value:
|
|
213
|
-
headers[header_name] = header_value
|
|
214
|
-
|
|
215
|
-
return headers, cookies
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def extract_response_data(response, endpoint_config):
|
|
219
|
-
"""Extract data from API response."""
|
|
220
|
-
data = response.json()
|
|
221
|
-
response_path = endpoint_config.get('response_path', '')
|
|
222
|
-
|
|
223
|
-
if response_path:
|
|
224
|
-
for key in response_path.split('.'):
|
|
225
|
-
if isinstance(data, dict) and key in data:
|
|
226
|
-
data = data[key]
|
|
227
|
-
elif isinstance(data, list) and key.isdigit():
|
|
228
|
-
data = data[int(key)]
|
|
229
|
-
else:
|
|
230
|
-
return data
|
|
231
|
-
return data
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
|
|
235
|
-
"""Make API request to backend."""
|
|
236
|
-
url = base_url.rstrip('/') + endpoint_config['path']
|
|
237
|
-
method = endpoint_config.get('method', 'POST').upper()
|
|
238
|
-
|
|
239
|
-
body = endpoint_config.get('body', {}).copy()
|
|
240
|
-
body['query'] = query
|
|
241
|
-
body['chat_id'] = chat_id
|
|
242
|
-
|
|
243
|
-
headers = {'Content-Type': 'application/json'}
|
|
244
|
-
headers.update(auth_headers)
|
|
245
|
-
|
|
246
|
-
if method == 'POST':
|
|
247
|
-
response = requests.post(url, json=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
|
|
248
|
-
else:
|
|
249
|
-
response = requests.get(url, params=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
|
|
250
|
-
|
|
251
|
-
response.raise_for_status()
|
|
252
|
-
return response
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
def get_context(config, query, chat_id, auth_headers, auth_cookies):
|
|
256
|
-
"""Get context from backend API."""
|
|
257
|
-
base_url = config['backend']['base_url']
|
|
258
|
-
endpoint_config = config['backend']['endpoints']['context']
|
|
259
|
-
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
260
|
-
|
|
261
|
-
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
262
|
-
context = extract_response_data(response, endpoint_config)
|
|
263
|
-
|
|
264
|
-
if isinstance(context, list):
|
|
265
|
-
return [str(c) for c in context]
|
|
266
|
-
return [str(context)]
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def get_answer(config, query, chat_id, auth_headers, auth_cookies):
|
|
270
|
-
"""Get answer from backend API."""
|
|
271
|
-
base_url = config['backend']['base_url']
|
|
272
|
-
endpoint_config = config['backend']['endpoints']['answer']
|
|
273
|
-
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
274
|
-
|
|
275
|
-
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
276
|
-
answer = extract_response_data(response, endpoint_config)
|
|
277
|
-
|
|
278
|
-
return str(answer)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def run_evaluation():
|
|
283
|
-
"""Main evaluation function."""
|
|
284
|
-
print("=" * 60)
|
|
285
|
-
print("RAGSentinel - RAG Evaluation Framework")
|
|
286
|
-
print("=" * 60)
|
|
287
|
-
|
|
288
|
-
print("\n📁 Loading configuration...")
|
|
289
|
-
config = load_config()
|
|
290
|
-
|
|
291
|
-
dataset_path = config['dataset']['path']
|
|
292
|
-
print(f"📊 Loading dataset from {dataset_path}...")
|
|
293
|
-
dataset = pd.read_csv(dataset_path)
|
|
294
|
-
|
|
295
|
-
auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
|
|
296
|
-
|
|
297
|
-
results = []
|
|
298
|
-
print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
|
|
299
|
-
|
|
300
|
-
for idx, row in dataset.iterrows():
|
|
301
|
-
chat_id = str(row['chat_id'])
|
|
302
|
-
query = row['query']
|
|
303
|
-
ground_truth = row['ground_truth']
|
|
304
|
-
|
|
305
|
-
try:
|
|
306
|
-
context = get_context(config, query, chat_id, auth_headers, auth_cookies)
|
|
307
|
-
answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
|
|
308
|
-
|
|
309
|
-
results.append({
|
|
310
|
-
'question': query,
|
|
311
|
-
'contexts': context,
|
|
312
|
-
'answer': answer,
|
|
313
|
-
'ground_truth': ground_truth
|
|
314
|
-
})
|
|
315
|
-
print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
|
|
316
|
-
except Exception as e:
|
|
317
|
-
print(f" ✗ Error processing query {idx + 1}: {e}")
|
|
318
|
-
continue
|
|
319
|
-
|
|
320
|
-
if not results:
|
|
321
|
-
print("\n❌ No results collected. Exiting.")
|
|
322
|
-
return
|
|
323
|
-
|
|
324
|
-
eval_df = pd.DataFrame(results)
|
|
325
|
-
print(f"\n✓ Collected {len(eval_df)} responses")
|
|
326
|
-
|
|
327
|
-
print("\n🤖 Initializing LLM and embeddings...")
|
|
328
|
-
llm = get_llm(config)
|
|
329
|
-
embeddings = get_embeddings(config)
|
|
330
|
-
|
|
331
|
-
metrics = get_metrics(config)
|
|
332
|
-
print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
|
|
333
|
-
|
|
334
|
-
print("\n📈 Preparing data for RAGAS evaluation...")
|
|
335
|
-
ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
|
|
336
|
-
|
|
337
|
-
for _, row in eval_df.iterrows():
|
|
338
|
-
contexts = row.get("contexts", [])
|
|
339
|
-
if not isinstance(contexts, list):
|
|
340
|
-
contexts = [str(contexts)]
|
|
341
|
-
contexts = [str(c) for c in contexts if c and str(c).strip()]
|
|
342
|
-
if not contexts:
|
|
343
|
-
contexts = ["No context available."]
|
|
344
|
-
|
|
345
|
-
ragas_data["question"].append(str(row["question"]))
|
|
346
|
-
ragas_data["answer"].append(str(row["answer"]))
|
|
347
|
-
ragas_data["contexts"].append(contexts)
|
|
348
|
-
ragas_data["ground_truth"].append(str(row["ground_truth"]))
|
|
349
|
-
|
|
350
|
-
dataset = Dataset.from_dict(ragas_data)
|
|
351
|
-
|
|
352
|
-
print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
|
|
353
|
-
|
|
354
|
-
run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
|
|
355
|
-
|
|
356
|
-
ragas_result = evaluate(
|
|
357
|
-
dataset,
|
|
358
|
-
metrics=metrics,
|
|
359
|
-
llm=llm,
|
|
360
|
-
embeddings=embeddings,
|
|
361
|
-
batch_size=2,
|
|
362
|
-
run_config=run_config,
|
|
363
|
-
raise_exceptions=False
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
print("\n📊 Processing results...")
|
|
367
|
-
scores_df = ragas_result.to_pandas()
|
|
368
|
-
numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
|
|
369
|
-
mean_scores = scores_df[numeric_columns].mean().to_dict()
|
|
370
|
-
|
|
371
|
-
mlflow_config = config['mlflow']
|
|
372
|
-
mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
|
|
373
|
-
mlflow.set_experiment(mlflow_config['experiment_name'])
|
|
374
|
-
|
|
375
|
-
print("\n📤 Logging results to MLflow...")
|
|
376
|
-
run_name = mlflow_config.get('run_name', 'RAG Evaluation')
|
|
377
|
-
with mlflow.start_run(run_name=run_name):
|
|
378
|
-
print("\n" + "=" * 40)
|
|
379
|
-
print("📊 EVALUATION RESULTS")
|
|
380
|
-
print("=" * 40)
|
|
381
|
-
for metric_name, value in mean_scores.items():
|
|
382
|
-
mlflow.log_metric(metric_name, value)
|
|
383
|
-
print(f" {metric_name}: {value:.4f}")
|
|
384
|
-
|
|
385
|
-
mlflow.log_param("dataset_path", dataset_path)
|
|
386
|
-
mlflow.log_param("num_samples", len(eval_df))
|
|
387
|
-
mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
|
|
388
|
-
|
|
389
|
-
print("\n" + "=" * 60)
|
|
390
|
-
print("✅ Evaluation complete!")
|
|
391
|
-
print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
|
|
392
|
-
print("=" * 60)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|