rag-sentinel 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {rag_sentinel-0.1.2/src/rag_sentinel.egg-info → rag_sentinel-0.1.3}/PKG-INFO +1 -1
  2. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/pyproject.toml +1 -1
  3. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/__init__.py +1 -1
  4. rag_sentinel-0.1.3/src/rag_sentinel/evaluator.py +363 -0
  5. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3/src/rag_sentinel.egg-info}/PKG-INFO +1 -1
  6. rag_sentinel-0.1.2/src/rag_sentinel/evaluator.py +0 -392
  7. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/LICENSE +0 -0
  8. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/MANIFEST.in +0 -0
  9. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/README.md +0 -0
  10. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/setup.cfg +0 -0
  11. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/cli.py +0 -0
  12. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/templates/.env.template +0 -0
  13. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/templates/config.ini.template +0 -0
  14. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel/templates/rag_eval_config.yaml +0 -0
  15. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/SOURCES.txt +0 -0
  16. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/dependency_links.txt +0 -0
  17. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/entry_points.txt +0 -0
  18. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/requires.txt +0 -0
  19. {rag_sentinel-0.1.2 → rag_sentinel-0.1.3}/src/rag_sentinel.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rag-sentinel
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: RAG Evaluation Framework using Ragas metrics and MLflow tracking
5
5
  Author: RAGSentinel Team
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rag-sentinel"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  description = "RAG Evaluation Framework using Ragas metrics and MLflow tracking"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -20,5 +20,5 @@ Author: RAGSentinel Team
20
20
  License: MIT
21
21
  """
22
22
 
23
- __version__ = "0.1.0"
23
+ __version__ = "0.1.2"
24
24
 
@@ -0,0 +1,363 @@
1
+ """
2
+ RAGSentinel Evaluator - Core evaluation logic.
3
+
4
+ This module contains the main evaluation pipeline for RAGSentinel.
5
+ It handles configuration loading, LLM initialization, API communication,
6
+ Ragas metrics evaluation, and MLflow result logging.
7
+ """
8
+
9
+ import os
10
+ import re
11
+ import yaml
12
+ import configparser
13
+ import requests
14
+ import pandas as pd
15
+ import mlflow
16
+ from dotenv import load_dotenv
17
+ from datasets import Dataset
18
+ from ragas import evaluate
19
+ from ragas.run_config import RunConfig
20
+ from ragas.metrics import (
21
+ Faithfulness,
22
+ AnswerRelevancy,
23
+ ContextPrecision,
24
+ AnswerCorrectness,
25
+ )
26
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings, AzureChatOpenAI, AzureOpenAIEmbeddings
27
+ from langchain_ollama import ChatOllama, OllamaEmbeddings
28
+
29
+
30
+ # =============================================================================
31
+ # Configuration Loading
32
+ # =============================================================================
33
+
34
+
35
+ def load_config(yaml_file='rag_eval_config.yaml'):
36
+ """
37
+ Load configuration from YAML file with values resolved from .env and config.ini.
38
+
39
+ Returns:
40
+ dict: Fully resolved configuration dictionary
41
+ """
42
+ load_dotenv('.env')
43
+
44
+ ini = configparser.ConfigParser()
45
+ ini.read('config.ini')
46
+
47
+ def resolve(obj):
48
+ if isinstance(obj, dict):
49
+ return {k: resolve(v) for k, v in obj.items()}
50
+ if isinstance(obj, list):
51
+ return [resolve(i) for i in obj]
52
+ if isinstance(obj, str):
53
+ # Resolve ${ENV:VAR} and ${INI:section.key} placeholders
54
+ result = re.sub(r'\$\{ENV:([^}]+)\}', lambda m: os.getenv(m.group(1), ''), obj)
55
+ result = re.sub(r'\$\{INI:([^.]+)\.([^}]+)\}',
56
+ lambda m: ini.get(m.group(1), m.group(2), fallback=''), result)
57
+ # Convert types
58
+ if result.lower() == 'true': return True
59
+ if result.lower() == 'false': return False
60
+ try:
61
+ if '.' in result: return float(result)
62
+ except ValueError:
63
+ pass
64
+ return result
65
+ return obj
66
+
67
+ with open(yaml_file, 'r') as f:
68
+ return resolve(yaml.safe_load(f))
69
+
70
+
71
+ def get_llm(config):
72
+ """Initialize LLM based on config."""
73
+ provider = config['ragas']['llm']['provider']
74
+
75
+ if provider == "azure":
76
+ azure_config = config['ragas']['llm']['azure']
77
+ return AzureChatOpenAI(
78
+ azure_endpoint=azure_config['endpoint'],
79
+ api_key=azure_config['api_key'],
80
+ deployment_name=azure_config['deployment_name'],
81
+ model=azure_config['model'],
82
+ temperature=azure_config['temperature'],
83
+ api_version=azure_config['api_version']
84
+ )
85
+ elif provider == "openai":
86
+ openai_config = config['ragas']['llm']['openai']
87
+ return ChatOpenAI(
88
+ model=openai_config['model'],
89
+ temperature=openai_config['temperature'],
90
+ api_key=openai_config['api_key']
91
+ )
92
+ elif provider == "ollama":
93
+ ollama_config = config['ragas']['llm']['ollama']
94
+ return ChatOllama(
95
+ base_url=ollama_config['base_url'],
96
+ model=ollama_config['model'],
97
+ temperature=ollama_config['temperature']
98
+ )
99
+ else:
100
+ raise ValueError(f"Unsupported LLM provider: {provider}")
101
+
102
+
103
+ def get_embeddings(config):
104
+ """Initialize embeddings based on config."""
105
+ provider = config['ragas']['embeddings']['provider']
106
+
107
+ if provider == "azure":
108
+ azure_config = config['ragas']['embeddings']['azure']
109
+ return AzureOpenAIEmbeddings(
110
+ azure_endpoint=azure_config['endpoint'],
111
+ api_key=azure_config['api_key'],
112
+ deployment=azure_config['deployment_name'],
113
+ api_version=azure_config['api_version']
114
+ )
115
+ elif provider == "openai":
116
+ openai_config = config['ragas']['embeddings']['openai']
117
+ return OpenAIEmbeddings(
118
+ model=openai_config['model'],
119
+ api_key=openai_config['api_key']
120
+ )
121
+ elif provider == "ollama":
122
+ ollama_config = config['ragas']['embeddings']['ollama']
123
+ return OllamaEmbeddings(
124
+ base_url=ollama_config['base_url'],
125
+ model=ollama_config['model']
126
+ )
127
+ else:
128
+ raise ValueError(f"Unsupported embeddings provider: {provider}")
129
+
130
+
131
+ def get_metrics(config):
132
+ """Get Ragas metrics based on config."""
133
+ metric_map = {
134
+ "Faithfulness": Faithfulness(),
135
+ "AnswerRelevancy": AnswerRelevancy(),
136
+ "ContextPrecision": ContextPrecision(),
137
+ "AnswerCorrectness": AnswerCorrectness(),
138
+ }
139
+
140
+ metric_names = config['ragas']['metrics']
141
+ return [metric_map[name] for name in metric_names if name in metric_map]
142
+
143
+
144
+ def get_auth_headers_and_cookies(config):
145
+ """Get authentication headers and cookies based on config."""
146
+ auth_config = config['backend']['auth']
147
+ auth_type = auth_config.get('type', 'none')
148
+
149
+ headers = {}
150
+ cookies = {}
151
+
152
+ if auth_type == "cookie":
153
+ cookies[auth_config['cookie_name']] = auth_config['cookie_value']
154
+ elif auth_type == "bearer":
155
+ headers['Authorization'] = f"Bearer {auth_config['bearer_token']}"
156
+ elif auth_type == "header":
157
+ headers[auth_config['header_name']] = auth_config['header_value']
158
+
159
+ return headers, cookies
160
+
161
+
162
+ def extract_response_data(response, endpoint_config):
163
+ """Extract data from API response."""
164
+ if endpoint_config.get('stream', False):
165
+ return "".join(chunk.decode() for chunk in response.iter_content(chunk_size=None))
166
+
167
+ # Try to parse as JSON first
168
+ try:
169
+ data = response.json()
170
+ response_key = endpoint_config.get('response_key')
171
+ if response_key:
172
+ return data.get(response_key)
173
+ return data
174
+ except:
175
+ # If JSON parsing fails, return as plain text
176
+ return response.text
177
+
178
+
179
+ def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
180
+ """Make API request to backend."""
181
+ url = base_url + endpoint_config['path']
182
+ method = endpoint_config.get('method', 'POST')
183
+
184
+ headers = {**endpoint_config.get('headers', {}), **auth_headers}
185
+
186
+ # Flexible body preparation
187
+ body = {}
188
+ for key, value in endpoint_config.get('body', {}).items():
189
+ if isinstance(value, str) and ("{query}" in value or "{chat_id}" in value):
190
+ body[key] = value.format(query=query, chat_id=chat_id)
191
+ elif key == "chat_id":
192
+ try:
193
+ body[key] = int(chat_id)
194
+ except (ValueError, TypeError):
195
+ body[key] = chat_id
196
+ else:
197
+ body[key] = value
198
+
199
+ if method.upper() == 'POST':
200
+ resp = requests.post(
201
+ url,
202
+ json=body,
203
+ headers=headers,
204
+ cookies=auth_cookies,
205
+ stream=endpoint_config.get('stream', False),
206
+ verify=verify_ssl
207
+ )
208
+ elif method.upper() == 'GET':
209
+ resp = requests.get(
210
+ url,
211
+ params=body,
212
+ headers=headers,
213
+ cookies=auth_cookies,
214
+ verify=verify_ssl
215
+ )
216
+ else:
217
+ raise ValueError(f"Unsupported HTTP method: {method}")
218
+
219
+ resp.raise_for_status()
220
+ return resp
221
+
222
+
223
+ def get_context(config, query, chat_id, auth_headers, auth_cookies):
224
+ """Retrieve context from backend API."""
225
+ base_url = config['backend']['base_url']
226
+ endpoint_config = config['backend']['endpoints']['context']
227
+ verify_ssl = config['backend'].get('verify_ssl', True)
228
+
229
+ response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
230
+ context = extract_response_data(response, endpoint_config)
231
+
232
+ if isinstance(context, str):
233
+ return [context]
234
+ elif isinstance(context, list):
235
+ return context
236
+ else:
237
+ return [str(context)]
238
+
239
+
240
+ def get_answer(config, query, chat_id, auth_headers, auth_cookies):
241
+ """Get answer from backend API."""
242
+ base_url = config['backend']['base_url']
243
+ endpoint_config = config['backend']['endpoints']['answer']
244
+ verify_ssl = config['backend'].get('verify_ssl', True)
245
+
246
+ response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
247
+ answer = extract_response_data(response, endpoint_config)
248
+
249
+ return str(answer)
250
+
251
+
252
+
253
+ def run_evaluation():
254
+ """Main evaluation function."""
255
+ print("=" * 60)
256
+ print("RAGSentinel - RAG Evaluation Framework")
257
+ print("=" * 60)
258
+
259
+ print("\n📁 Loading configuration...")
260
+ config = load_config()
261
+
262
+ dataset_path = config['dataset']['path']
263
+ print(f"📊 Loading dataset from {dataset_path}...")
264
+ dataset = pd.read_csv(dataset_path)
265
+
266
+ auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
267
+
268
+ results = []
269
+ print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
270
+
271
+ for idx, row in dataset.iterrows():
272
+ chat_id = str(row['chat_id'])
273
+ query = row['query']
274
+ ground_truth = row['ground_truth']
275
+
276
+ try:
277
+ context = get_context(config, query, chat_id, auth_headers, auth_cookies)
278
+ answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
279
+
280
+ results.append({
281
+ 'question': query,
282
+ 'contexts': context,
283
+ 'answer': answer,
284
+ 'ground_truth': ground_truth
285
+ })
286
+ print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
287
+ except Exception as e:
288
+ print(f" ✗ Error processing query {idx + 1}: {e}")
289
+ continue
290
+
291
+ if not results:
292
+ print("\n❌ No results collected. Exiting.")
293
+ return
294
+
295
+ eval_df = pd.DataFrame(results)
296
+ print(f"\n✓ Collected {len(eval_df)} responses")
297
+
298
+ print("\n🤖 Initializing LLM and embeddings...")
299
+ llm = get_llm(config)
300
+ embeddings = get_embeddings(config)
301
+
302
+ metrics = get_metrics(config)
303
+ print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
304
+
305
+ print("\n📈 Preparing data for RAGAS evaluation...")
306
+ ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
307
+
308
+ for _, row in eval_df.iterrows():
309
+ contexts = row.get("contexts", [])
310
+ if not isinstance(contexts, list):
311
+ contexts = [str(contexts)]
312
+ contexts = [str(c) for c in contexts if c and str(c).strip()]
313
+ if not contexts:
314
+ contexts = ["No context available."]
315
+
316
+ ragas_data["question"].append(str(row["question"]))
317
+ ragas_data["answer"].append(str(row["answer"]))
318
+ ragas_data["contexts"].append(contexts)
319
+ ragas_data["ground_truth"].append(str(row["ground_truth"]))
320
+
321
+ dataset = Dataset.from_dict(ragas_data)
322
+
323
+ print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
324
+
325
+ run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
326
+
327
+ ragas_result = evaluate(
328
+ dataset,
329
+ metrics=metrics,
330
+ llm=llm,
331
+ embeddings=embeddings,
332
+ batch_size=2,
333
+ run_config=run_config,
334
+ raise_exceptions=False
335
+ )
336
+
337
+ print("\n📊 Processing results...")
338
+ scores_df = ragas_result.to_pandas()
339
+ numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
340
+ mean_scores = scores_df[numeric_columns].mean().to_dict()
341
+
342
+ mlflow_config = config['mlflow']
343
+ mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
344
+ mlflow.set_experiment(mlflow_config['experiment_name'])
345
+
346
+ print("\n📤 Logging results to MLflow...")
347
+ run_name = mlflow_config.get('run_name', 'RAG Evaluation')
348
+ with mlflow.start_run(run_name=run_name):
349
+ print("\n" + "=" * 40)
350
+ print("📊 EVALUATION RESULTS")
351
+ print("=" * 40)
352
+ for metric_name, value in mean_scores.items():
353
+ mlflow.log_metric(metric_name, value)
354
+ print(f" {metric_name}: {value:.4f}")
355
+
356
+ mlflow.log_param("dataset_path", dataset_path)
357
+ mlflow.log_param("num_samples", len(eval_df))
358
+ mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
359
+
360
+ print("\n" + "=" * 60)
361
+ print("✅ Evaluation complete!")
362
+ print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
363
+ print("=" * 60)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rag-sentinel
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: RAG Evaluation Framework using Ragas metrics and MLflow tracking
5
5
  Author: RAGSentinel Team
6
6
  License: MIT
@@ -1,392 +0,0 @@
1
- """
2
- RAGSentinel Evaluator - Core evaluation logic.
3
-
4
- This module contains the main evaluation pipeline for RAGSentinel.
5
- It handles configuration loading, LLM initialization, API communication,
6
- Ragas metrics evaluation, and MLflow result logging.
7
- """
8
-
9
- import os
10
- import re
11
- import yaml
12
- import configparser
13
- import requests
14
- import pandas as pd
15
- import mlflow
16
- from dotenv import load_dotenv
17
- from datasets import Dataset
18
- from ragas import evaluate, RunConfig
19
- from ragas.metrics import faithfulness, answer_relevancy, context_precision, answer_correctness
20
-
21
-
22
- # =============================================================================
23
- # Configuration Loading
24
- # =============================================================================
25
-
26
-
27
- def resolve_placeholder(value, env_vars, ini_config):
28
- """
29
- Resolve ${ENV:...} and ${INI:...} placeholders in a string value.
30
-
31
- Args:
32
- value: String that may contain placeholders
33
- env_vars: Dictionary of environment variables
34
- ini_config: ConfigParser object with ini file contents
35
-
36
- Returns:
37
- str: Value with all placeholders resolved
38
- """
39
- if not isinstance(value, str):
40
- return value
41
-
42
- # Resolve ${ENV:VAR_NAME} - reads from environment variables
43
- env_pattern = r'\$\{ENV:([^}]+)\}'
44
- def env_replacer(match):
45
- var_name = match.group(1)
46
- return env_vars.get(var_name, '')
47
- value = re.sub(env_pattern, env_replacer, value)
48
-
49
- # Resolve ${INI:section.key} - reads from config.ini
50
- ini_pattern = r'\$\{INI:([^}]+)\}'
51
- def ini_replacer(match):
52
- path = match.group(1)
53
- parts = path.split('.')
54
- if len(parts) == 2:
55
- section, key = parts
56
- if ini_config.has_option(section, key):
57
- return ini_config.get(section, key)
58
- return ''
59
- value = re.sub(ini_pattern, ini_replacer, value)
60
-
61
- return value
62
-
63
-
64
- def resolve_config(obj, env_vars, ini_config):
65
- """
66
- Recursively resolve all placeholders in a configuration object.
67
-
68
- Args:
69
- obj: Configuration object (dict, list, or str)
70
- env_vars: Dictionary of environment variables
71
- ini_config: ConfigParser object
72
-
73
- Returns:
74
- Configuration object with all placeholders resolved
75
- """
76
- if isinstance(obj, dict):
77
- return {k: resolve_config(v, env_vars, ini_config) for k, v in obj.items()}
78
- elif isinstance(obj, list):
79
- return [resolve_config(item, env_vars, ini_config) for item in obj]
80
- elif isinstance(obj, str):
81
- return resolve_placeholder(obj, env_vars, ini_config)
82
- return obj
83
-
84
-
85
- def load_config():
86
- """
87
- Load and merge configuration from .env, config.ini, and rag_eval_config.yaml.
88
-
89
- Returns:
90
- dict: Fully resolved configuration dictionary
91
- """
92
- # Load environment variables from .env file
93
- load_dotenv('.env')
94
- env_vars = dict(os.environ)
95
-
96
- # Load INI configuration
97
- ini_config = configparser.ConfigParser()
98
- ini_config.read('config.ini')
99
-
100
- # Load YAML configuration and resolve all placeholders
101
- with open('rag_eval_config.yaml', 'r') as f:
102
- yaml_config = yaml.safe_load(f)
103
-
104
- return resolve_config(yaml_config, env_vars, ini_config)
105
-
106
-
107
- def get_llm(config):
108
- """Initialize LLM based on provider."""
109
- llm_config = config['ragas']['llm']
110
- provider = llm_config['provider'].lower()
111
-
112
- if provider == 'azure':
113
- from langchain_openai import AzureChatOpenAI
114
- return AzureChatOpenAI(
115
- azure_endpoint=llm_config['azure_endpoint'],
116
- api_key=llm_config['api_key'],
117
- api_version=llm_config.get('api_version', '2024-02-15-preview'),
118
- deployment_name=llm_config['model'],
119
- temperature=float(llm_config.get('temperature', 0.0))
120
- )
121
- elif provider == 'openai':
122
- from langchain_openai import ChatOpenAI
123
- return ChatOpenAI(
124
- api_key=llm_config['api_key'],
125
- model=llm_config['model'],
126
- temperature=float(llm_config.get('temperature', 0.0))
127
- )
128
- elif provider == 'ollama':
129
- from langchain_ollama import ChatOllama
130
- return ChatOllama(
131
- base_url=llm_config.get('base_url', 'http://localhost:11434'),
132
- model=llm_config['model'],
133
- temperature=float(llm_config.get('temperature', 0.0))
134
- )
135
- else:
136
- raise ValueError(f"Unknown LLM provider: {provider}")
137
-
138
-
139
- def get_embeddings(config):
140
- """Initialize embeddings based on provider."""
141
- emb_config = config['ragas']['embeddings']
142
- provider = emb_config['provider'].lower()
143
-
144
- if provider == 'azure':
145
- from langchain_openai import AzureOpenAIEmbeddings
146
- return AzureOpenAIEmbeddings(
147
- azure_endpoint=emb_config['azure_endpoint'],
148
- api_key=emb_config['api_key'],
149
- api_version=emb_config.get('api_version', '2024-02-15-preview'),
150
- deployment=emb_config['model']
151
- )
152
- elif provider == 'openai':
153
- from langchain_openai import OpenAIEmbeddings
154
- return OpenAIEmbeddings(
155
- api_key=emb_config['api_key'],
156
- model=emb_config['model']
157
- )
158
- elif provider == 'ollama':
159
- from langchain_ollama import OllamaEmbeddings
160
- return OllamaEmbeddings(
161
- base_url=emb_config.get('base_url', 'http://localhost:11434'),
162
- model=emb_config['model']
163
- )
164
- else:
165
- raise ValueError(f"Unknown embeddings provider: {provider}")
166
-
167
-
168
- def get_metrics(config):
169
- """Get list of Ragas metrics."""
170
- metric_map = {
171
- 'faithfulness': faithfulness,
172
- 'answer_relevancy': answer_relevancy,
173
- 'context_precision': context_precision,
174
- 'answer_correctness': answer_correctness
175
- }
176
- return [metric_map[m] for m in config['ragas']['metrics'] if m in metric_map]
177
-
178
-
179
- def get_auth_headers_and_cookies(config):
180
- """
181
- Get authentication headers and cookies from backend config.
182
-
183
- Supports three authentication types:
184
- - cookie: Session cookie authentication
185
- - bearer: Bearer token authentication
186
- - header: Custom header authentication
187
-
188
- Args:
189
- config: Full configuration dictionary
190
-
191
- Returns:
192
- tuple: (headers dict, cookies dict)
193
- """
194
- # Auth config is nested under backend.auth in the YAML
195
- auth_config = config.get('backend', {}).get('auth', {})
196
- auth_type = auth_config.get('type', 'none').lower()
197
- headers = {}
198
- cookies = {}
199
-
200
- if auth_type == 'cookie':
201
- cookie_name = auth_config.get('cookie_name', 'session')
202
- cookie_value = auth_config.get('cookie_value', '')
203
- if cookie_value:
204
- cookies[cookie_name] = cookie_value
205
- elif auth_type == 'bearer':
206
- token = auth_config.get('bearer_token', '')
207
- if token:
208
- headers['Authorization'] = f'Bearer {token}'
209
- elif auth_type == 'header':
210
- header_name = auth_config.get('header_name', '')
211
- header_value = auth_config.get('header_value', '')
212
- if header_name and header_value:
213
- headers[header_name] = header_value
214
-
215
- return headers, cookies
216
-
217
-
218
- def extract_response_data(response, endpoint_config):
219
- """Extract data from API response."""
220
- data = response.json()
221
- response_path = endpoint_config.get('response_path', '')
222
-
223
- if response_path:
224
- for key in response_path.split('.'):
225
- if isinstance(data, dict) and key in data:
226
- data = data[key]
227
- elif isinstance(data, list) and key.isdigit():
228
- data = data[int(key)]
229
- else:
230
- return data
231
- return data
232
-
233
-
234
- def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
235
- """Make API request to backend."""
236
- url = base_url.rstrip('/') + endpoint_config['path']
237
- method = endpoint_config.get('method', 'POST').upper()
238
-
239
- body = endpoint_config.get('body', {}).copy()
240
- body['query'] = query
241
- body['chat_id'] = chat_id
242
-
243
- headers = {'Content-Type': 'application/json'}
244
- headers.update(auth_headers)
245
-
246
- if method == 'POST':
247
- response = requests.post(url, json=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
248
- else:
249
- response = requests.get(url, params=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
250
-
251
- response.raise_for_status()
252
- return response
253
-
254
-
255
- def get_context(config, query, chat_id, auth_headers, auth_cookies):
256
- """Get context from backend API."""
257
- base_url = config['backend']['base_url']
258
- endpoint_config = config['backend']['endpoints']['context']
259
- verify_ssl = config['backend'].get('verify_ssl', True)
260
-
261
- response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
262
- context = extract_response_data(response, endpoint_config)
263
-
264
- if isinstance(context, list):
265
- return [str(c) for c in context]
266
- return [str(context)]
267
-
268
-
269
- def get_answer(config, query, chat_id, auth_headers, auth_cookies):
270
- """Get answer from backend API."""
271
- base_url = config['backend']['base_url']
272
- endpoint_config = config['backend']['endpoints']['answer']
273
- verify_ssl = config['backend'].get('verify_ssl', True)
274
-
275
- response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
276
- answer = extract_response_data(response, endpoint_config)
277
-
278
- return str(answer)
279
-
280
-
281
-
282
- def run_evaluation():
283
- """Main evaluation function."""
284
- print("=" * 60)
285
- print("RAGSentinel - RAG Evaluation Framework")
286
- print("=" * 60)
287
-
288
- print("\n📁 Loading configuration...")
289
- config = load_config()
290
-
291
- dataset_path = config['dataset']['path']
292
- print(f"📊 Loading dataset from {dataset_path}...")
293
- dataset = pd.read_csv(dataset_path)
294
-
295
- auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
296
-
297
- results = []
298
- print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
299
-
300
- for idx, row in dataset.iterrows():
301
- chat_id = str(row['chat_id'])
302
- query = row['query']
303
- ground_truth = row['ground_truth']
304
-
305
- try:
306
- context = get_context(config, query, chat_id, auth_headers, auth_cookies)
307
- answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
308
-
309
- results.append({
310
- 'question': query,
311
- 'contexts': context,
312
- 'answer': answer,
313
- 'ground_truth': ground_truth
314
- })
315
- print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
316
- except Exception as e:
317
- print(f" ✗ Error processing query {idx + 1}: {e}")
318
- continue
319
-
320
- if not results:
321
- print("\n❌ No results collected. Exiting.")
322
- return
323
-
324
- eval_df = pd.DataFrame(results)
325
- print(f"\n✓ Collected {len(eval_df)} responses")
326
-
327
- print("\n🤖 Initializing LLM and embeddings...")
328
- llm = get_llm(config)
329
- embeddings = get_embeddings(config)
330
-
331
- metrics = get_metrics(config)
332
- print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
333
-
334
- print("\n📈 Preparing data for RAGAS evaluation...")
335
- ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
336
-
337
- for _, row in eval_df.iterrows():
338
- contexts = row.get("contexts", [])
339
- if not isinstance(contexts, list):
340
- contexts = [str(contexts)]
341
- contexts = [str(c) for c in contexts if c and str(c).strip()]
342
- if not contexts:
343
- contexts = ["No context available."]
344
-
345
- ragas_data["question"].append(str(row["question"]))
346
- ragas_data["answer"].append(str(row["answer"]))
347
- ragas_data["contexts"].append(contexts)
348
- ragas_data["ground_truth"].append(str(row["ground_truth"]))
349
-
350
- dataset = Dataset.from_dict(ragas_data)
351
-
352
- print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
353
-
354
- run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
355
-
356
- ragas_result = evaluate(
357
- dataset,
358
- metrics=metrics,
359
- llm=llm,
360
- embeddings=embeddings,
361
- batch_size=2,
362
- run_config=run_config,
363
- raise_exceptions=False
364
- )
365
-
366
- print("\n📊 Processing results...")
367
- scores_df = ragas_result.to_pandas()
368
- numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
369
- mean_scores = scores_df[numeric_columns].mean().to_dict()
370
-
371
- mlflow_config = config['mlflow']
372
- mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
373
- mlflow.set_experiment(mlflow_config['experiment_name'])
374
-
375
- print("\n📤 Logging results to MLflow...")
376
- run_name = mlflow_config.get('run_name', 'RAG Evaluation')
377
- with mlflow.start_run(run_name=run_name):
378
- print("\n" + "=" * 40)
379
- print("📊 EVALUATION RESULTS")
380
- print("=" * 40)
381
- for metric_name, value in mean_scores.items():
382
- mlflow.log_metric(metric_name, value)
383
- print(f" {metric_name}: {value:.4f}")
384
-
385
- mlflow.log_param("dataset_path", dataset_path)
386
- mlflow.log_param("num_samples", len(eval_df))
387
- mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
388
-
389
- print("\n" + "=" * 60)
390
- print("✅ Evaluation complete!")
391
- print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
392
- print("=" * 60)
File without changes
File without changes
File without changes
File without changes