rag-sentinel 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rag_sentinel-0.1.2/src/rag_sentinel.egg-info → rag_sentinel-0.1.3}/PKG-INFO +1 -1
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/pyproject.toml +1 -1
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/__init__.py +1 -1
- rag_sentinel-0.1.3/src/rag_sentinel/evaluator.py +363 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3/src/rag_sentinel.egg-info}/PKG-INFO +1 -1
- rag_sentinel-0.1.2/src/rag_sentinel/evaluator.py +0 -392
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/LICENSE +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/MANIFEST.in +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/README.md +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/setup.cfg +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/cli.py +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/templates/.env.template +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/templates/config.ini.template +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/templates/rag_eval_config.yaml +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/SOURCES.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/dependency_links.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/entry_points.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/requires.txt +0 -0
- {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RAGSentinel Evaluator - Core evaluation logic.
|
|
3
|
+
|
|
4
|
+
This module contains the main evaluation pipeline for RAGSentinel.
|
|
5
|
+
It handles configuration loading, LLM initialization, API communication,
|
|
6
|
+
Ragas metrics evaluation, and MLflow result logging.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import yaml
|
|
12
|
+
import configparser
|
|
13
|
+
import requests
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import mlflow
|
|
16
|
+
from dotenv import load_dotenv
|
|
17
|
+
from datasets import Dataset
|
|
18
|
+
from ragas import evaluate
|
|
19
|
+
from ragas.run_config import RunConfig
|
|
20
|
+
from ragas.metrics import (
|
|
21
|
+
Faithfulness,
|
|
22
|
+
AnswerRelevancy,
|
|
23
|
+
ContextPrecision,
|
|
24
|
+
AnswerCorrectness,
|
|
25
|
+
)
|
|
26
|
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings, AzureChatOpenAI, AzureOpenAIEmbeddings
|
|
27
|
+
from langchain_ollama import ChatOllama, OllamaEmbeddings
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Configuration Loading
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_config(yaml_file='rag_eval_config.yaml'):
|
|
36
|
+
"""
|
|
37
|
+
Load configuration from YAML file with values resolved from .env and config.ini.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
dict: Fully resolved configuration dictionary
|
|
41
|
+
"""
|
|
42
|
+
load_dotenv('.env')
|
|
43
|
+
|
|
44
|
+
ini = configparser.ConfigParser()
|
|
45
|
+
ini.read('config.ini')
|
|
46
|
+
|
|
47
|
+
def resolve(obj):
|
|
48
|
+
if isinstance(obj, dict):
|
|
49
|
+
return {k: resolve(v) for k, v in obj.items()}
|
|
50
|
+
if isinstance(obj, list):
|
|
51
|
+
return [resolve(i) for i in obj]
|
|
52
|
+
if isinstance(obj, str):
|
|
53
|
+
# Resolve ${ENV:VAR} and ${INI:section.key} placeholders
|
|
54
|
+
result = re.sub(r'\$\{ENV:([^}]+)\}', lambda m: os.getenv(m.group(1), ''), obj)
|
|
55
|
+
result = re.sub(r'\$\{INI:([^.]+)\.([^}]+)\}',
|
|
56
|
+
lambda m: ini.get(m.group(1), m.group(2), fallback=''), result)
|
|
57
|
+
# Convert types
|
|
58
|
+
if result.lower() == 'true': return True
|
|
59
|
+
if result.lower() == 'false': return False
|
|
60
|
+
try:
|
|
61
|
+
if '.' in result: return float(result)
|
|
62
|
+
except ValueError:
|
|
63
|
+
pass
|
|
64
|
+
return result
|
|
65
|
+
return obj
|
|
66
|
+
|
|
67
|
+
with open(yaml_file, 'r') as f:
|
|
68
|
+
return resolve(yaml.safe_load(f))
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_llm(config):
|
|
72
|
+
"""Initialize LLM based on config."""
|
|
73
|
+
provider = config['ragas']['llm']['provider']
|
|
74
|
+
|
|
75
|
+
if provider == "azure":
|
|
76
|
+
azure_config = config['ragas']['llm']['azure']
|
|
77
|
+
return AzureChatOpenAI(
|
|
78
|
+
azure_endpoint=azure_config['endpoint'],
|
|
79
|
+
api_key=azure_config['api_key'],
|
|
80
|
+
deployment_name=azure_config['deployment_name'],
|
|
81
|
+
model=azure_config['model'],
|
|
82
|
+
temperature=azure_config['temperature'],
|
|
83
|
+
api_version=azure_config['api_version']
|
|
84
|
+
)
|
|
85
|
+
elif provider == "openai":
|
|
86
|
+
openai_config = config['ragas']['llm']['openai']
|
|
87
|
+
return ChatOpenAI(
|
|
88
|
+
model=openai_config['model'],
|
|
89
|
+
temperature=openai_config['temperature'],
|
|
90
|
+
api_key=openai_config['api_key']
|
|
91
|
+
)
|
|
92
|
+
elif provider == "ollama":
|
|
93
|
+
ollama_config = config['ragas']['llm']['ollama']
|
|
94
|
+
return ChatOllama(
|
|
95
|
+
base_url=ollama_config['base_url'],
|
|
96
|
+
model=ollama_config['model'],
|
|
97
|
+
temperature=ollama_config['temperature']
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError(f"Unsupported LLM provider: {provider}")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_embeddings(config):
|
|
104
|
+
"""Initialize embeddings based on config."""
|
|
105
|
+
provider = config['ragas']['embeddings']['provider']
|
|
106
|
+
|
|
107
|
+
if provider == "azure":
|
|
108
|
+
azure_config = config['ragas']['embeddings']['azure']
|
|
109
|
+
return AzureOpenAIEmbeddings(
|
|
110
|
+
azure_endpoint=azure_config['endpoint'],
|
|
111
|
+
api_key=azure_config['api_key'],
|
|
112
|
+
deployment=azure_config['deployment_name'],
|
|
113
|
+
api_version=azure_config['api_version']
|
|
114
|
+
)
|
|
115
|
+
elif provider == "openai":
|
|
116
|
+
openai_config = config['ragas']['embeddings']['openai']
|
|
117
|
+
return OpenAIEmbeddings(
|
|
118
|
+
model=openai_config['model'],
|
|
119
|
+
api_key=openai_config['api_key']
|
|
120
|
+
)
|
|
121
|
+
elif provider == "ollama":
|
|
122
|
+
ollama_config = config['ragas']['embeddings']['ollama']
|
|
123
|
+
return OllamaEmbeddings(
|
|
124
|
+
base_url=ollama_config['base_url'],
|
|
125
|
+
model=ollama_config['model']
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
raise ValueError(f"Unsupported embeddings provider: {provider}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_metrics(config):
|
|
132
|
+
"""Get Ragas metrics based on config."""
|
|
133
|
+
metric_map = {
|
|
134
|
+
"Faithfulness": Faithfulness(),
|
|
135
|
+
"AnswerRelevancy": AnswerRelevancy(),
|
|
136
|
+
"ContextPrecision": ContextPrecision(),
|
|
137
|
+
"AnswerCorrectness": AnswerCorrectness(),
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
metric_names = config['ragas']['metrics']
|
|
141
|
+
return [metric_map[name] for name in metric_names if name in metric_map]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def get_auth_headers_and_cookies(config):
|
|
145
|
+
"""Get authentication headers and cookies based on config."""
|
|
146
|
+
auth_config = config['backend']['auth']
|
|
147
|
+
auth_type = auth_config.get('type', 'none')
|
|
148
|
+
|
|
149
|
+
headers = {}
|
|
150
|
+
cookies = {}
|
|
151
|
+
|
|
152
|
+
if auth_type == "cookie":
|
|
153
|
+
cookies[auth_config['cookie_name']] = auth_config['cookie_value']
|
|
154
|
+
elif auth_type == "bearer":
|
|
155
|
+
headers['Authorization'] = f"Bearer {auth_config['bearer_token']}"
|
|
156
|
+
elif auth_type == "header":
|
|
157
|
+
headers[auth_config['header_name']] = auth_config['header_value']
|
|
158
|
+
|
|
159
|
+
return headers, cookies
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def extract_response_data(response, endpoint_config):
|
|
163
|
+
"""Extract data from API response."""
|
|
164
|
+
if endpoint_config.get('stream', False):
|
|
165
|
+
return "".join(chunk.decode() for chunk in response.iter_content(chunk_size=None))
|
|
166
|
+
|
|
167
|
+
# Try to parse as JSON first
|
|
168
|
+
try:
|
|
169
|
+
data = response.json()
|
|
170
|
+
response_key = endpoint_config.get('response_key')
|
|
171
|
+
if response_key:
|
|
172
|
+
return data.get(response_key)
|
|
173
|
+
return data
|
|
174
|
+
except:
|
|
175
|
+
# If JSON parsing fails, return as plain text
|
|
176
|
+
return response.text
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
|
|
180
|
+
"""Make API request to backend."""
|
|
181
|
+
url = base_url + endpoint_config['path']
|
|
182
|
+
method = endpoint_config.get('method', 'POST')
|
|
183
|
+
|
|
184
|
+
headers = {**endpoint_config.get('headers', {}), **auth_headers}
|
|
185
|
+
|
|
186
|
+
# Flexible body preparation
|
|
187
|
+
body = {}
|
|
188
|
+
for key, value in endpoint_config.get('body', {}).items():
|
|
189
|
+
if isinstance(value, str) and ("{query}" in value or "{chat_id}" in value):
|
|
190
|
+
body[key] = value.format(query=query, chat_id=chat_id)
|
|
191
|
+
elif key == "chat_id":
|
|
192
|
+
try:
|
|
193
|
+
body[key] = int(chat_id)
|
|
194
|
+
except (ValueError, TypeError):
|
|
195
|
+
body[key] = chat_id
|
|
196
|
+
else:
|
|
197
|
+
body[key] = value
|
|
198
|
+
|
|
199
|
+
if method.upper() == 'POST':
|
|
200
|
+
resp = requests.post(
|
|
201
|
+
url,
|
|
202
|
+
json=body,
|
|
203
|
+
headers=headers,
|
|
204
|
+
cookies=auth_cookies,
|
|
205
|
+
stream=endpoint_config.get('stream', False),
|
|
206
|
+
verify=verify_ssl
|
|
207
|
+
)
|
|
208
|
+
elif method.upper() == 'GET':
|
|
209
|
+
resp = requests.get(
|
|
210
|
+
url,
|
|
211
|
+
params=body,
|
|
212
|
+
headers=headers,
|
|
213
|
+
cookies=auth_cookies,
|
|
214
|
+
verify=verify_ssl
|
|
215
|
+
)
|
|
216
|
+
else:
|
|
217
|
+
raise ValueError(f"Unsupported HTTP method: {method}")
|
|
218
|
+
|
|
219
|
+
resp.raise_for_status()
|
|
220
|
+
return resp
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_context(config, query, chat_id, auth_headers, auth_cookies):
|
|
224
|
+
"""Retrieve context from backend API."""
|
|
225
|
+
base_url = config['backend']['base_url']
|
|
226
|
+
endpoint_config = config['backend']['endpoints']['context']
|
|
227
|
+
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
228
|
+
|
|
229
|
+
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
230
|
+
context = extract_response_data(response, endpoint_config)
|
|
231
|
+
|
|
232
|
+
if isinstance(context, str):
|
|
233
|
+
return [context]
|
|
234
|
+
elif isinstance(context, list):
|
|
235
|
+
return context
|
|
236
|
+
else:
|
|
237
|
+
return [str(context)]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def get_answer(config, query, chat_id, auth_headers, auth_cookies):
|
|
241
|
+
"""Get answer from backend API."""
|
|
242
|
+
base_url = config['backend']['base_url']
|
|
243
|
+
endpoint_config = config['backend']['endpoints']['answer']
|
|
244
|
+
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
245
|
+
|
|
246
|
+
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
247
|
+
answer = extract_response_data(response, endpoint_config)
|
|
248
|
+
|
|
249
|
+
return str(answer)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def run_evaluation():
|
|
254
|
+
"""Main evaluation function."""
|
|
255
|
+
print("=" * 60)
|
|
256
|
+
print("RAGSentinel - RAG Evaluation Framework")
|
|
257
|
+
print("=" * 60)
|
|
258
|
+
|
|
259
|
+
print("\n📁 Loading configuration...")
|
|
260
|
+
config = load_config()
|
|
261
|
+
|
|
262
|
+
dataset_path = config['dataset']['path']
|
|
263
|
+
print(f"📊 Loading dataset from {dataset_path}...")
|
|
264
|
+
dataset = pd.read_csv(dataset_path)
|
|
265
|
+
|
|
266
|
+
auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
|
|
267
|
+
|
|
268
|
+
results = []
|
|
269
|
+
print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
|
|
270
|
+
|
|
271
|
+
for idx, row in dataset.iterrows():
|
|
272
|
+
chat_id = str(row['chat_id'])
|
|
273
|
+
query = row['query']
|
|
274
|
+
ground_truth = row['ground_truth']
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
context = get_context(config, query, chat_id, auth_headers, auth_cookies)
|
|
278
|
+
answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
|
|
279
|
+
|
|
280
|
+
results.append({
|
|
281
|
+
'question': query,
|
|
282
|
+
'contexts': context,
|
|
283
|
+
'answer': answer,
|
|
284
|
+
'ground_truth': ground_truth
|
|
285
|
+
})
|
|
286
|
+
print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
|
|
287
|
+
except Exception as e:
|
|
288
|
+
print(f" ✗ Error processing query {idx + 1}: {e}")
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
if not results:
|
|
292
|
+
print("\n❌ No results collected. Exiting.")
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
eval_df = pd.DataFrame(results)
|
|
296
|
+
print(f"\n✓ Collected {len(eval_df)} responses")
|
|
297
|
+
|
|
298
|
+
print("\n🤖 Initializing LLM and embeddings...")
|
|
299
|
+
llm = get_llm(config)
|
|
300
|
+
embeddings = get_embeddings(config)
|
|
301
|
+
|
|
302
|
+
metrics = get_metrics(config)
|
|
303
|
+
print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
|
|
304
|
+
|
|
305
|
+
print("\n📈 Preparing data for RAGAS evaluation...")
|
|
306
|
+
ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
|
|
307
|
+
|
|
308
|
+
for _, row in eval_df.iterrows():
|
|
309
|
+
contexts = row.get("contexts", [])
|
|
310
|
+
if not isinstance(contexts, list):
|
|
311
|
+
contexts = [str(contexts)]
|
|
312
|
+
contexts = [str(c) for c in contexts if c and str(c).strip()]
|
|
313
|
+
if not contexts:
|
|
314
|
+
contexts = ["No context available."]
|
|
315
|
+
|
|
316
|
+
ragas_data["question"].append(str(row["question"]))
|
|
317
|
+
ragas_data["answer"].append(str(row["answer"]))
|
|
318
|
+
ragas_data["contexts"].append(contexts)
|
|
319
|
+
ragas_data["ground_truth"].append(str(row["ground_truth"]))
|
|
320
|
+
|
|
321
|
+
dataset = Dataset.from_dict(ragas_data)
|
|
322
|
+
|
|
323
|
+
print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
|
|
324
|
+
|
|
325
|
+
run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
|
|
326
|
+
|
|
327
|
+
ragas_result = evaluate(
|
|
328
|
+
dataset,
|
|
329
|
+
metrics=metrics,
|
|
330
|
+
llm=llm,
|
|
331
|
+
embeddings=embeddings,
|
|
332
|
+
batch_size=2,
|
|
333
|
+
run_config=run_config,
|
|
334
|
+
raise_exceptions=False
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
print("\n📊 Processing results...")
|
|
338
|
+
scores_df = ragas_result.to_pandas()
|
|
339
|
+
numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
|
|
340
|
+
mean_scores = scores_df[numeric_columns].mean().to_dict()
|
|
341
|
+
|
|
342
|
+
mlflow_config = config['mlflow']
|
|
343
|
+
mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
|
|
344
|
+
mlflow.set_experiment(mlflow_config['experiment_name'])
|
|
345
|
+
|
|
346
|
+
print("\n📤 Logging results to MLflow...")
|
|
347
|
+
run_name = mlflow_config.get('run_name', 'RAG Evaluation')
|
|
348
|
+
with mlflow.start_run(run_name=run_name):
|
|
349
|
+
print("\n" + "=" * 40)
|
|
350
|
+
print("📊 EVALUATION RESULTS")
|
|
351
|
+
print("=" * 40)
|
|
352
|
+
for metric_name, value in mean_scores.items():
|
|
353
|
+
mlflow.log_metric(metric_name, value)
|
|
354
|
+
print(f" {metric_name}: {value:.4f}")
|
|
355
|
+
|
|
356
|
+
mlflow.log_param("dataset_path", dataset_path)
|
|
357
|
+
mlflow.log_param("num_samples", len(eval_df))
|
|
358
|
+
mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
|
|
359
|
+
|
|
360
|
+
print("\n" + "=" * 60)
|
|
361
|
+
print("✅ Evaluation complete!")
|
|
362
|
+
print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
|
|
363
|
+
print("=" * 60)
|
|
@@ -1,392 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
RAGSentinel Evaluator - Core evaluation logic.
|
|
3
|
-
|
|
4
|
-
This module contains the main evaluation pipeline for RAGSentinel.
|
|
5
|
-
It handles configuration loading, LLM initialization, API communication,
|
|
6
|
-
Ragas metrics evaluation, and MLflow result logging.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import os
|
|
10
|
-
import re
|
|
11
|
-
import yaml
|
|
12
|
-
import configparser
|
|
13
|
-
import requests
|
|
14
|
-
import pandas as pd
|
|
15
|
-
import mlflow
|
|
16
|
-
from dotenv import load_dotenv
|
|
17
|
-
from datasets import Dataset
|
|
18
|
-
from ragas import evaluate, RunConfig
|
|
19
|
-
from ragas.metrics import faithfulness, answer_relevancy, context_precision, answer_correctness
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# =============================================================================
|
|
23
|
-
# Configuration Loading
|
|
24
|
-
# =============================================================================
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def resolve_placeholder(value, env_vars, ini_config):
|
|
28
|
-
"""
|
|
29
|
-
Resolve ${ENV:...} and ${INI:...} placeholders in a string value.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
value: String that may contain placeholders
|
|
33
|
-
env_vars: Dictionary of environment variables
|
|
34
|
-
ini_config: ConfigParser object with ini file contents
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
str: Value with all placeholders resolved
|
|
38
|
-
"""
|
|
39
|
-
if not isinstance(value, str):
|
|
40
|
-
return value
|
|
41
|
-
|
|
42
|
-
# Resolve ${ENV:VAR_NAME} - reads from environment variables
|
|
43
|
-
env_pattern = r'\$\{ENV:([^}]+)\}'
|
|
44
|
-
def env_replacer(match):
|
|
45
|
-
var_name = match.group(1)
|
|
46
|
-
return env_vars.get(var_name, '')
|
|
47
|
-
value = re.sub(env_pattern, env_replacer, value)
|
|
48
|
-
|
|
49
|
-
# Resolve ${INI:section.key} - reads from config.ini
|
|
50
|
-
ini_pattern = r'\$\{INI:([^}]+)\}'
|
|
51
|
-
def ini_replacer(match):
|
|
52
|
-
path = match.group(1)
|
|
53
|
-
parts = path.split('.')
|
|
54
|
-
if len(parts) == 2:
|
|
55
|
-
section, key = parts
|
|
56
|
-
if ini_config.has_option(section, key):
|
|
57
|
-
return ini_config.get(section, key)
|
|
58
|
-
return ''
|
|
59
|
-
value = re.sub(ini_pattern, ini_replacer, value)
|
|
60
|
-
|
|
61
|
-
return value
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def resolve_config(obj, env_vars, ini_config):
|
|
65
|
-
"""
|
|
66
|
-
Recursively resolve all placeholders in a configuration object.
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
obj: Configuration object (dict, list, or str)
|
|
70
|
-
env_vars: Dictionary of environment variables
|
|
71
|
-
ini_config: ConfigParser object
|
|
72
|
-
|
|
73
|
-
Returns:
|
|
74
|
-
Configuration object with all placeholders resolved
|
|
75
|
-
"""
|
|
76
|
-
if isinstance(obj, dict):
|
|
77
|
-
return {k: resolve_config(v, env_vars, ini_config) for k, v in obj.items()}
|
|
78
|
-
elif isinstance(obj, list):
|
|
79
|
-
return [resolve_config(item, env_vars, ini_config) for item in obj]
|
|
80
|
-
elif isinstance(obj, str):
|
|
81
|
-
return resolve_placeholder(obj, env_vars, ini_config)
|
|
82
|
-
return obj
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def load_config():
|
|
86
|
-
"""
|
|
87
|
-
Load and merge configuration from .env, config.ini, and rag_eval_config.yaml.
|
|
88
|
-
|
|
89
|
-
Returns:
|
|
90
|
-
dict: Fully resolved configuration dictionary
|
|
91
|
-
"""
|
|
92
|
-
# Load environment variables from .env file
|
|
93
|
-
load_dotenv('.env')
|
|
94
|
-
env_vars = dict(os.environ)
|
|
95
|
-
|
|
96
|
-
# Load INI configuration
|
|
97
|
-
ini_config = configparser.ConfigParser()
|
|
98
|
-
ini_config.read('config.ini')
|
|
99
|
-
|
|
100
|
-
# Load YAML configuration and resolve all placeholders
|
|
101
|
-
with open('rag_eval_config.yaml', 'r') as f:
|
|
102
|
-
yaml_config = yaml.safe_load(f)
|
|
103
|
-
|
|
104
|
-
return resolve_config(yaml_config, env_vars, ini_config)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def get_llm(config):
|
|
108
|
-
"""Initialize LLM based on provider."""
|
|
109
|
-
llm_config = config['ragas']['llm']
|
|
110
|
-
provider = llm_config['provider'].lower()
|
|
111
|
-
|
|
112
|
-
if provider == 'azure':
|
|
113
|
-
from langchain_openai import AzureChatOpenAI
|
|
114
|
-
return AzureChatOpenAI(
|
|
115
|
-
azure_endpoint=llm_config['azure_endpoint'],
|
|
116
|
-
api_key=llm_config['api_key'],
|
|
117
|
-
api_version=llm_config.get('api_version', '2024-02-15-preview'),
|
|
118
|
-
deployment_name=llm_config['model'],
|
|
119
|
-
temperature=float(llm_config.get('temperature', 0.0))
|
|
120
|
-
)
|
|
121
|
-
elif provider == 'openai':
|
|
122
|
-
from langchain_openai import ChatOpenAI
|
|
123
|
-
return ChatOpenAI(
|
|
124
|
-
api_key=llm_config['api_key'],
|
|
125
|
-
model=llm_config['model'],
|
|
126
|
-
temperature=float(llm_config.get('temperature', 0.0))
|
|
127
|
-
)
|
|
128
|
-
elif provider == 'ollama':
|
|
129
|
-
from langchain_ollama import ChatOllama
|
|
130
|
-
return ChatOllama(
|
|
131
|
-
base_url=llm_config.get('base_url', 'http://localhost:11434'),
|
|
132
|
-
model=llm_config['model'],
|
|
133
|
-
temperature=float(llm_config.get('temperature', 0.0))
|
|
134
|
-
)
|
|
135
|
-
else:
|
|
136
|
-
raise ValueError(f"Unknown LLM provider: {provider}")
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def get_embeddings(config):
|
|
140
|
-
"""Initialize embeddings based on provider."""
|
|
141
|
-
emb_config = config['ragas']['embeddings']
|
|
142
|
-
provider = emb_config['provider'].lower()
|
|
143
|
-
|
|
144
|
-
if provider == 'azure':
|
|
145
|
-
from langchain_openai import AzureOpenAIEmbeddings
|
|
146
|
-
return AzureOpenAIEmbeddings(
|
|
147
|
-
azure_endpoint=emb_config['azure_endpoint'],
|
|
148
|
-
api_key=emb_config['api_key'],
|
|
149
|
-
api_version=emb_config.get('api_version', '2024-02-15-preview'),
|
|
150
|
-
deployment=emb_config['model']
|
|
151
|
-
)
|
|
152
|
-
elif provider == 'openai':
|
|
153
|
-
from langchain_openai import OpenAIEmbeddings
|
|
154
|
-
return OpenAIEmbeddings(
|
|
155
|
-
api_key=emb_config['api_key'],
|
|
156
|
-
model=emb_config['model']
|
|
157
|
-
)
|
|
158
|
-
elif provider == 'ollama':
|
|
159
|
-
from langchain_ollama import OllamaEmbeddings
|
|
160
|
-
return OllamaEmbeddings(
|
|
161
|
-
base_url=emb_config.get('base_url', 'http://localhost:11434'),
|
|
162
|
-
model=emb_config['model']
|
|
163
|
-
)
|
|
164
|
-
else:
|
|
165
|
-
raise ValueError(f"Unknown embeddings provider: {provider}")
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def get_metrics(config):
|
|
169
|
-
"""Get list of Ragas metrics."""
|
|
170
|
-
metric_map = {
|
|
171
|
-
'faithfulness': faithfulness,
|
|
172
|
-
'answer_relevancy': answer_relevancy,
|
|
173
|
-
'context_precision': context_precision,
|
|
174
|
-
'answer_correctness': answer_correctness
|
|
175
|
-
}
|
|
176
|
-
return [metric_map[m] for m in config['ragas']['metrics'] if m in metric_map]
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def get_auth_headers_and_cookies(config):
|
|
180
|
-
"""
|
|
181
|
-
Get authentication headers and cookies from backend config.
|
|
182
|
-
|
|
183
|
-
Supports three authentication types:
|
|
184
|
-
- cookie: Session cookie authentication
|
|
185
|
-
- bearer: Bearer token authentication
|
|
186
|
-
- header: Custom header authentication
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
config: Full configuration dictionary
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
tuple: (headers dict, cookies dict)
|
|
193
|
-
"""
|
|
194
|
-
# Auth config is nested under backend.auth in the YAML
|
|
195
|
-
auth_config = config.get('backend', {}).get('auth', {})
|
|
196
|
-
auth_type = auth_config.get('type', 'none').lower()
|
|
197
|
-
headers = {}
|
|
198
|
-
cookies = {}
|
|
199
|
-
|
|
200
|
-
if auth_type == 'cookie':
|
|
201
|
-
cookie_name = auth_config.get('cookie_name', 'session')
|
|
202
|
-
cookie_value = auth_config.get('cookie_value', '')
|
|
203
|
-
if cookie_value:
|
|
204
|
-
cookies[cookie_name] = cookie_value
|
|
205
|
-
elif auth_type == 'bearer':
|
|
206
|
-
token = auth_config.get('bearer_token', '')
|
|
207
|
-
if token:
|
|
208
|
-
headers['Authorization'] = f'Bearer {token}'
|
|
209
|
-
elif auth_type == 'header':
|
|
210
|
-
header_name = auth_config.get('header_name', '')
|
|
211
|
-
header_value = auth_config.get('header_value', '')
|
|
212
|
-
if header_name and header_value:
|
|
213
|
-
headers[header_name] = header_value
|
|
214
|
-
|
|
215
|
-
return headers, cookies
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def extract_response_data(response, endpoint_config):
|
|
219
|
-
"""Extract data from API response."""
|
|
220
|
-
data = response.json()
|
|
221
|
-
response_path = endpoint_config.get('response_path', '')
|
|
222
|
-
|
|
223
|
-
if response_path:
|
|
224
|
-
for key in response_path.split('.'):
|
|
225
|
-
if isinstance(data, dict) and key in data:
|
|
226
|
-
data = data[key]
|
|
227
|
-
elif isinstance(data, list) and key.isdigit():
|
|
228
|
-
data = data[int(key)]
|
|
229
|
-
else:
|
|
230
|
-
return data
|
|
231
|
-
return data
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
|
|
235
|
-
"""Make API request to backend."""
|
|
236
|
-
url = base_url.rstrip('/') + endpoint_config['path']
|
|
237
|
-
method = endpoint_config.get('method', 'POST').upper()
|
|
238
|
-
|
|
239
|
-
body = endpoint_config.get('body', {}).copy()
|
|
240
|
-
body['query'] = query
|
|
241
|
-
body['chat_id'] = chat_id
|
|
242
|
-
|
|
243
|
-
headers = {'Content-Type': 'application/json'}
|
|
244
|
-
headers.update(auth_headers)
|
|
245
|
-
|
|
246
|
-
if method == 'POST':
|
|
247
|
-
response = requests.post(url, json=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
|
|
248
|
-
else:
|
|
249
|
-
response = requests.get(url, params=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
|
|
250
|
-
|
|
251
|
-
response.raise_for_status()
|
|
252
|
-
return response
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
def get_context(config, query, chat_id, auth_headers, auth_cookies):
|
|
256
|
-
"""Get context from backend API."""
|
|
257
|
-
base_url = config['backend']['base_url']
|
|
258
|
-
endpoint_config = config['backend']['endpoints']['context']
|
|
259
|
-
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
260
|
-
|
|
261
|
-
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
262
|
-
context = extract_response_data(response, endpoint_config)
|
|
263
|
-
|
|
264
|
-
if isinstance(context, list):
|
|
265
|
-
return [str(c) for c in context]
|
|
266
|
-
return [str(context)]
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def get_answer(config, query, chat_id, auth_headers, auth_cookies):
|
|
270
|
-
"""Get answer from backend API."""
|
|
271
|
-
base_url = config['backend']['base_url']
|
|
272
|
-
endpoint_config = config['backend']['endpoints']['answer']
|
|
273
|
-
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
274
|
-
|
|
275
|
-
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
276
|
-
answer = extract_response_data(response, endpoint_config)
|
|
277
|
-
|
|
278
|
-
return str(answer)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
def run_evaluation():
|
|
283
|
-
"""Main evaluation function."""
|
|
284
|
-
print("=" * 60)
|
|
285
|
-
print("RAGSentinel - RAG Evaluation Framework")
|
|
286
|
-
print("=" * 60)
|
|
287
|
-
|
|
288
|
-
print("\n📁 Loading configuration...")
|
|
289
|
-
config = load_config()
|
|
290
|
-
|
|
291
|
-
dataset_path = config['dataset']['path']
|
|
292
|
-
print(f"📊 Loading dataset from {dataset_path}...")
|
|
293
|
-
dataset = pd.read_csv(dataset_path)
|
|
294
|
-
|
|
295
|
-
auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
|
|
296
|
-
|
|
297
|
-
results = []
|
|
298
|
-
print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
|
|
299
|
-
|
|
300
|
-
for idx, row in dataset.iterrows():
|
|
301
|
-
chat_id = str(row['chat_id'])
|
|
302
|
-
query = row['query']
|
|
303
|
-
ground_truth = row['ground_truth']
|
|
304
|
-
|
|
305
|
-
try:
|
|
306
|
-
context = get_context(config, query, chat_id, auth_headers, auth_cookies)
|
|
307
|
-
answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
|
|
308
|
-
|
|
309
|
-
results.append({
|
|
310
|
-
'question': query,
|
|
311
|
-
'contexts': context,
|
|
312
|
-
'answer': answer,
|
|
313
|
-
'ground_truth': ground_truth
|
|
314
|
-
})
|
|
315
|
-
print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
|
|
316
|
-
except Exception as e:
|
|
317
|
-
print(f" ✗ Error processing query {idx + 1}: {e}")
|
|
318
|
-
continue
|
|
319
|
-
|
|
320
|
-
if not results:
|
|
321
|
-
print("\n❌ No results collected. Exiting.")
|
|
322
|
-
return
|
|
323
|
-
|
|
324
|
-
eval_df = pd.DataFrame(results)
|
|
325
|
-
print(f"\n✓ Collected {len(eval_df)} responses")
|
|
326
|
-
|
|
327
|
-
print("\n🤖 Initializing LLM and embeddings...")
|
|
328
|
-
llm = get_llm(config)
|
|
329
|
-
embeddings = get_embeddings(config)
|
|
330
|
-
|
|
331
|
-
metrics = get_metrics(config)
|
|
332
|
-
print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
|
|
333
|
-
|
|
334
|
-
print("\n📈 Preparing data for RAGAS evaluation...")
|
|
335
|
-
ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
|
|
336
|
-
|
|
337
|
-
for _, row in eval_df.iterrows():
|
|
338
|
-
contexts = row.get("contexts", [])
|
|
339
|
-
if not isinstance(contexts, list):
|
|
340
|
-
contexts = [str(contexts)]
|
|
341
|
-
contexts = [str(c) for c in contexts if c and str(c).strip()]
|
|
342
|
-
if not contexts:
|
|
343
|
-
contexts = ["No context available."]
|
|
344
|
-
|
|
345
|
-
ragas_data["question"].append(str(row["question"]))
|
|
346
|
-
ragas_data["answer"].append(str(row["answer"]))
|
|
347
|
-
ragas_data["contexts"].append(contexts)
|
|
348
|
-
ragas_data["ground_truth"].append(str(row["ground_truth"]))
|
|
349
|
-
|
|
350
|
-
dataset = Dataset.from_dict(ragas_data)
|
|
351
|
-
|
|
352
|
-
print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
|
|
353
|
-
|
|
354
|
-
run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
|
|
355
|
-
|
|
356
|
-
ragas_result = evaluate(
|
|
357
|
-
dataset,
|
|
358
|
-
metrics=metrics,
|
|
359
|
-
llm=llm,
|
|
360
|
-
embeddings=embeddings,
|
|
361
|
-
batch_size=2,
|
|
362
|
-
run_config=run_config,
|
|
363
|
-
raise_exceptions=False
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
print("\n📊 Processing results...")
|
|
367
|
-
scores_df = ragas_result.to_pandas()
|
|
368
|
-
numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
|
|
369
|
-
mean_scores = scores_df[numeric_columns].mean().to_dict()
|
|
370
|
-
|
|
371
|
-
mlflow_config = config['mlflow']
|
|
372
|
-
mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
|
|
373
|
-
mlflow.set_experiment(mlflow_config['experiment_name'])
|
|
374
|
-
|
|
375
|
-
print("\n📤 Logging results to MLflow...")
|
|
376
|
-
run_name = mlflow_config.get('run_name', 'RAG Evaluation')
|
|
377
|
-
with mlflow.start_run(run_name=run_name):
|
|
378
|
-
print("\n" + "=" * 40)
|
|
379
|
-
print("📊 EVALUATION RESULTS")
|
|
380
|
-
print("=" * 40)
|
|
381
|
-
for metric_name, value in mean_scores.items():
|
|
382
|
-
mlflow.log_metric(metric_name, value)
|
|
383
|
-
print(f" {metric_name}: {value:.4f}")
|
|
384
|
-
|
|
385
|
-
mlflow.log_param("dataset_path", dataset_path)
|
|
386
|
-
mlflow.log_param("num_samples", len(eval_df))
|
|
387
|
-
mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
|
|
388
|
-
|
|
389
|
-
print("\n" + "=" * 60)
|
|
390
|
-
print("✅ Evaluation complete!")
|
|
391
|
-
print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
|
|
392
|
-
print("=" * 60)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|