rag-sentinel 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {rag_sentinel-0.1.2/src/rag_sentinel.egg-info → rag_sentinel-0.1.4}/PKG-INFO +1 -1
  2. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/pyproject.toml +1 -1
  3. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/__init__.py +1 -1
  4. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/cli.py +44 -7
  5. rag_sentinel-0.1.4/src/rag_sentinel/evaluator.py +363 -0
  6. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4/src/rag_sentinel.egg-info}/PKG-INFO +1 -1
  7. rag_sentinel-0.1.2/src/rag_sentinel/evaluator.py +0 -392
  8. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/LICENSE +0 -0
  9. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/MANIFEST.in +0 -0
  10. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/README.md +0 -0
  11. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/setup.cfg +0 -0
  12. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/templates/.env.template +0 -0
  13. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/templates/config.ini.template +0 -0
  14. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel/templates/rag_eval_config.yaml +0 -0
  15. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/SOURCES.txt +0 -0
  16. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/dependency_links.txt +0 -0
  17. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/entry_points.txt +0 -0
  18. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/requires.txt +0 -0
  19. {rag_sentinel-0.1.2 → rag_sentinel-0.1.4}/src/rag_sentinel.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rag-sentinel
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: RAG Evaluation Framework using Ragas metrics and MLflow tracking
5
5
  Author: RAGSentinel Team
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rag-sentinel"
7
- version = "0.1.2"
7
+ version = "0.1.4"
8
8
  description = "RAG Evaluation Framework using Ragas metrics and MLflow tracking"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -20,5 +20,5 @@ Author: RAGSentinel Team
20
20
  License: MIT
21
21
  """
22
22
 
23
- __version__ = "0.1.0"
23
+ __version__ = "0.1.2"
24
24
 
@@ -20,7 +20,9 @@ import socket
20
20
  import subprocess
21
21
  import time
22
22
  import argparse
23
+ import configparser
23
24
  from pathlib import Path
25
+ from urllib.parse import urlparse
24
26
 
25
27
 
26
28
  # =============================================================================
@@ -34,6 +36,39 @@ TEMPLATES_DIR = Path(__file__).parent / "templates"
34
36
  # Helper Functions
35
37
  # =============================================================================
36
38
 
39
+ def get_mlflow_host_port():
40
+ """
41
+ Read MLflow tracking_uri from config.ini and parse host and port.
42
+
43
+ Returns:
44
+ tuple: (host, port) - defaults to ("127.0.0.1", 5001) if not configured
45
+ """
46
+ default_host = "127.0.0.1"
47
+ default_port = 5000
48
+
49
+ config_path = Path("config.ini")
50
+ if not config_path.exists():
51
+ return default_host, default_port
52
+
53
+ try:
54
+ ini = configparser.ConfigParser()
55
+ ini.read(config_path)
56
+
57
+ tracking_uri = ini.get("mlflow", "tracking_uri", fallback=None)
58
+ if not tracking_uri:
59
+ return default_host, default_port
60
+
61
+ # Parse the URI (e.g., "http://192.168.1.100:5000")
62
+ parsed = urlparse(tracking_uri)
63
+
64
+ host = parsed.hostname or default_host
65
+ port = parsed.port or default_port
66
+
67
+ return host, port
68
+ except Exception:
69
+ return default_host, default_port
70
+
71
+
37
72
  def is_port_in_use(host, port):
38
73
  """
39
74
  Check if a port is already in use.
@@ -49,7 +84,7 @@ def is_port_in_use(host, port):
49
84
  return s.connect_ex((host, port)) == 0
50
85
 
51
86
 
52
- def start_mlflow_server(host="127.0.0.1", port=5001):
87
+ def start_mlflow_server(host, port):
53
88
  """
54
89
  Start MLflow tracking server as a background process.
55
90
 
@@ -57,8 +92,8 @@ def start_mlflow_server(host="127.0.0.1", port=5001):
57
92
  will skip starting a new instance.
58
93
 
59
94
  Args:
60
- host: The hostname to bind the server to (default: "127.0.0.1")
61
- port: The port number for the server (default: 5001)
95
+ host: The hostname to bind the server to
96
+ port: The port number for the server
62
97
 
63
98
  Returns:
64
99
  subprocess.Popen or None: The server process, or None if already running
@@ -144,8 +179,9 @@ def cmd_run(args):
144
179
 
145
180
  This command:
146
181
  1. Validates that all required config files exist
147
- 2. Starts MLflow server (unless --no-server is specified)
148
- 3. Runs the evaluation using the evaluator module
182
+ 2. Reads MLflow host/port from config.ini
183
+ 3. Starts MLflow server (unless --no-server is specified)
184
+ 4. Runs the evaluation using the evaluator module
149
185
 
150
186
  Args:
151
187
  args: Parsed command-line arguments (includes --no-server flag)
@@ -161,9 +197,10 @@ def cmd_run(args):
161
197
  print("\nRun 'rag-sentinel init' first to create config files.")
162
198
  sys.exit(1)
163
199
 
164
- # Start MLflow server if not disabled
200
+ # Start MLflow server if not disabled (uses host/port from config.ini)
165
201
  if not args.no_server:
166
- start_mlflow_server()
202
+ host, port = get_mlflow_host_port()
203
+ start_mlflow_server(host, port)
167
204
 
168
205
  # Import and run the evaluation
169
206
  from rag_sentinel.evaluator import run_evaluation
@@ -0,0 +1,363 @@
1
+ """
2
+ RAGSentinel Evaluator - Core evaluation logic.
3
+
4
+ This module contains the main evaluation pipeline for RAGSentinel.
5
+ It handles configuration loading, LLM initialization, API communication,
6
+ Ragas metrics evaluation, and MLflow result logging.
7
+ """
8
+
9
+ import os
10
+ import re
11
+ import yaml
12
+ import configparser
13
+ import requests
14
+ import pandas as pd
15
+ import mlflow
16
+ from dotenv import load_dotenv
17
+ from datasets import Dataset
18
+ from ragas import evaluate
19
+ from ragas.run_config import RunConfig
20
+ from ragas.metrics import (
21
+ Faithfulness,
22
+ AnswerRelevancy,
23
+ ContextPrecision,
24
+ AnswerCorrectness,
25
+ )
26
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings, AzureChatOpenAI, AzureOpenAIEmbeddings
27
+ from langchain_ollama import ChatOllama, OllamaEmbeddings
28
+
29
+
30
+ # =============================================================================
31
+ # Configuration Loading
32
+ # =============================================================================
33
+
34
+
35
+ def load_config(yaml_file='rag_eval_config.yaml'):
36
+ """
37
+ Load configuration from YAML file with values resolved from .env and config.ini.
38
+
39
+ Returns:
40
+ dict: Fully resolved configuration dictionary
41
+ """
42
+ load_dotenv('.env')
43
+
44
+ ini = configparser.ConfigParser()
45
+ ini.read('config.ini')
46
+
47
+ def resolve(obj):
48
+ if isinstance(obj, dict):
49
+ return {k: resolve(v) for k, v in obj.items()}
50
+ if isinstance(obj, list):
51
+ return [resolve(i) for i in obj]
52
+ if isinstance(obj, str):
53
+ # Resolve ${ENV:VAR} and ${INI:section.key} placeholders
54
+ result = re.sub(r'\$\{ENV:([^}]+)\}', lambda m: os.getenv(m.group(1), ''), obj)
55
+ result = re.sub(r'\$\{INI:([^.]+)\.([^}]+)\}',
56
+ lambda m: ini.get(m.group(1), m.group(2), fallback=''), result)
57
+ # Convert types
58
+ if result.lower() == 'true': return True
59
+ if result.lower() == 'false': return False
60
+ try:
61
+ if '.' in result: return float(result)
62
+ except ValueError:
63
+ pass
64
+ return result
65
+ return obj
66
+
67
+ with open(yaml_file, 'r') as f:
68
+ return resolve(yaml.safe_load(f))
69
+
70
+
71
+ def get_llm(config):
72
+ """Initialize LLM based on config."""
73
+ provider = config['ragas']['llm']['provider']
74
+
75
+ if provider == "azure":
76
+ azure_config = config['ragas']['llm']['azure']
77
+ return AzureChatOpenAI(
78
+ azure_endpoint=azure_config['endpoint'],
79
+ api_key=azure_config['api_key'],
80
+ deployment_name=azure_config['deployment_name'],
81
+ model=azure_config['model'],
82
+ temperature=azure_config['temperature'],
83
+ api_version=azure_config['api_version']
84
+ )
85
+ elif provider == "openai":
86
+ openai_config = config['ragas']['llm']['openai']
87
+ return ChatOpenAI(
88
+ model=openai_config['model'],
89
+ temperature=openai_config['temperature'],
90
+ api_key=openai_config['api_key']
91
+ )
92
+ elif provider == "ollama":
93
+ ollama_config = config['ragas']['llm']['ollama']
94
+ return ChatOllama(
95
+ base_url=ollama_config['base_url'],
96
+ model=ollama_config['model'],
97
+ temperature=ollama_config['temperature']
98
+ )
99
+ else:
100
+ raise ValueError(f"Unsupported LLM provider: {provider}")
101
+
102
+
103
+ def get_embeddings(config):
104
+ """Initialize embeddings based on config."""
105
+ provider = config['ragas']['embeddings']['provider']
106
+
107
+ if provider == "azure":
108
+ azure_config = config['ragas']['embeddings']['azure']
109
+ return AzureOpenAIEmbeddings(
110
+ azure_endpoint=azure_config['endpoint'],
111
+ api_key=azure_config['api_key'],
112
+ deployment=azure_config['deployment_name'],
113
+ api_version=azure_config['api_version']
114
+ )
115
+ elif provider == "openai":
116
+ openai_config = config['ragas']['embeddings']['openai']
117
+ return OpenAIEmbeddings(
118
+ model=openai_config['model'],
119
+ api_key=openai_config['api_key']
120
+ )
121
+ elif provider == "ollama":
122
+ ollama_config = config['ragas']['embeddings']['ollama']
123
+ return OllamaEmbeddings(
124
+ base_url=ollama_config['base_url'],
125
+ model=ollama_config['model']
126
+ )
127
+ else:
128
+ raise ValueError(f"Unsupported embeddings provider: {provider}")
129
+
130
+
131
+ def get_metrics(config):
132
+ """Get Ragas metrics based on config."""
133
+ metric_map = {
134
+ "Faithfulness": Faithfulness(),
135
+ "AnswerRelevancy": AnswerRelevancy(),
136
+ "ContextPrecision": ContextPrecision(),
137
+ "AnswerCorrectness": AnswerCorrectness(),
138
+ }
139
+
140
+ metric_names = config['ragas']['metrics']
141
+ return [metric_map[name] for name in metric_names if name in metric_map]
142
+
143
+
144
+ def get_auth_headers_and_cookies(config):
145
+ """Get authentication headers and cookies based on config."""
146
+ auth_config = config['backend']['auth']
147
+ auth_type = auth_config.get('type', 'none')
148
+
149
+ headers = {}
150
+ cookies = {}
151
+
152
+ if auth_type == "cookie":
153
+ cookies[auth_config['cookie_name']] = auth_config['cookie_value']
154
+ elif auth_type == "bearer":
155
+ headers['Authorization'] = f"Bearer {auth_config['bearer_token']}"
156
+ elif auth_type == "header":
157
+ headers[auth_config['header_name']] = auth_config['header_value']
158
+
159
+ return headers, cookies
160
+
161
+
162
+ def extract_response_data(response, endpoint_config):
163
+ """Extract data from API response."""
164
+ if endpoint_config.get('stream', False):
165
+ return "".join(chunk.decode() for chunk in response.iter_content(chunk_size=None))
166
+
167
+ # Try to parse as JSON first
168
+ try:
169
+ data = response.json()
170
+ response_key = endpoint_config.get('response_key')
171
+ if response_key:
172
+ return data.get(response_key)
173
+ return data
174
+ except:
175
+ # If JSON parsing fails, return as plain text
176
+ return response.text
177
+
178
+
179
+ def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
180
+ """Make API request to backend."""
181
+ url = base_url + endpoint_config['path']
182
+ method = endpoint_config.get('method', 'POST')
183
+
184
+ headers = {**endpoint_config.get('headers', {}), **auth_headers}
185
+
186
+ # Flexible body preparation
187
+ body = {}
188
+ for key, value in endpoint_config.get('body', {}).items():
189
+ if isinstance(value, str) and ("{query}" in value or "{chat_id}" in value):
190
+ body[key] = value.format(query=query, chat_id=chat_id)
191
+ elif key == "chat_id":
192
+ try:
193
+ body[key] = int(chat_id)
194
+ except (ValueError, TypeError):
195
+ body[key] = chat_id
196
+ else:
197
+ body[key] = value
198
+
199
+ if method.upper() == 'POST':
200
+ resp = requests.post(
201
+ url,
202
+ json=body,
203
+ headers=headers,
204
+ cookies=auth_cookies,
205
+ stream=endpoint_config.get('stream', False),
206
+ verify=verify_ssl
207
+ )
208
+ elif method.upper() == 'GET':
209
+ resp = requests.get(
210
+ url,
211
+ params=body,
212
+ headers=headers,
213
+ cookies=auth_cookies,
214
+ verify=verify_ssl
215
+ )
216
+ else:
217
+ raise ValueError(f"Unsupported HTTP method: {method}")
218
+
219
+ resp.raise_for_status()
220
+ return resp
221
+
222
+
223
+ def get_context(config, query, chat_id, auth_headers, auth_cookies):
224
+ """Retrieve context from backend API."""
225
+ base_url = config['backend']['base_url']
226
+ endpoint_config = config['backend']['endpoints']['context']
227
+ verify_ssl = config['backend'].get('verify_ssl', True)
228
+
229
+ response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
230
+ context = extract_response_data(response, endpoint_config)
231
+
232
+ if isinstance(context, str):
233
+ return [context]
234
+ elif isinstance(context, list):
235
+ return context
236
+ else:
237
+ return [str(context)]
238
+
239
+
240
+ def get_answer(config, query, chat_id, auth_headers, auth_cookies):
241
+ """Get answer from backend API."""
242
+ base_url = config['backend']['base_url']
243
+ endpoint_config = config['backend']['endpoints']['answer']
244
+ verify_ssl = config['backend'].get('verify_ssl', True)
245
+
246
+ response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
247
+ answer = extract_response_data(response, endpoint_config)
248
+
249
+ return str(answer)
250
+
251
+
252
+
253
+ def run_evaluation():
254
+ """Main evaluation function."""
255
+ print("=" * 60)
256
+ print("RAGSentinel - RAG Evaluation Framework")
257
+ print("=" * 60)
258
+
259
+ print("\n📁 Loading configuration...")
260
+ config = load_config()
261
+
262
+ dataset_path = config['dataset']['path']
263
+ print(f"📊 Loading dataset from {dataset_path}...")
264
+ dataset = pd.read_csv(dataset_path)
265
+
266
+ auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
267
+
268
+ results = []
269
+ print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
270
+
271
+ for idx, row in dataset.iterrows():
272
+ chat_id = str(row['chat_id'])
273
+ query = row['query']
274
+ ground_truth = row['ground_truth']
275
+
276
+ try:
277
+ context = get_context(config, query, chat_id, auth_headers, auth_cookies)
278
+ answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
279
+
280
+ results.append({
281
+ 'question': query,
282
+ 'contexts': context,
283
+ 'answer': answer,
284
+ 'ground_truth': ground_truth
285
+ })
286
+ print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
287
+ except Exception as e:
288
+ print(f" ✗ Error processing query {idx + 1}: {e}")
289
+ continue
290
+
291
+ if not results:
292
+ print("\n❌ No results collected. Exiting.")
293
+ return
294
+
295
+ eval_df = pd.DataFrame(results)
296
+ print(f"\n✓ Collected {len(eval_df)} responses")
297
+
298
+ print("\n🤖 Initializing LLM and embeddings...")
299
+ llm = get_llm(config)
300
+ embeddings = get_embeddings(config)
301
+
302
+ metrics = get_metrics(config)
303
+ print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
304
+
305
+ print("\n📈 Preparing data for RAGAS evaluation...")
306
+ ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
307
+
308
+ for _, row in eval_df.iterrows():
309
+ contexts = row.get("contexts", [])
310
+ if not isinstance(contexts, list):
311
+ contexts = [str(contexts)]
312
+ contexts = [str(c) for c in contexts if c and str(c).strip()]
313
+ if not contexts:
314
+ contexts = ["No context available."]
315
+
316
+ ragas_data["question"].append(str(row["question"]))
317
+ ragas_data["answer"].append(str(row["answer"]))
318
+ ragas_data["contexts"].append(contexts)
319
+ ragas_data["ground_truth"].append(str(row["ground_truth"]))
320
+
321
+ dataset = Dataset.from_dict(ragas_data)
322
+
323
+ print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
324
+
325
+ run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
326
+
327
+ ragas_result = evaluate(
328
+ dataset,
329
+ metrics=metrics,
330
+ llm=llm,
331
+ embeddings=embeddings,
332
+ batch_size=2,
333
+ run_config=run_config,
334
+ raise_exceptions=False
335
+ )
336
+
337
+ print("\n📊 Processing results...")
338
+ scores_df = ragas_result.to_pandas()
339
+ numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
340
+ mean_scores = scores_df[numeric_columns].mean().to_dict()
341
+
342
+ mlflow_config = config['mlflow']
343
+ mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
344
+ mlflow.set_experiment(mlflow_config['experiment_name'])
345
+
346
+ print("\n📤 Logging results to MLflow...")
347
+ run_name = mlflow_config.get('run_name', 'RAG Evaluation')
348
+ with mlflow.start_run(run_name=run_name):
349
+ print("\n" + "=" * 40)
350
+ print("📊 EVALUATION RESULTS")
351
+ print("=" * 40)
352
+ for metric_name, value in mean_scores.items():
353
+ mlflow.log_metric(metric_name, value)
354
+ print(f" {metric_name}: {value:.4f}")
355
+
356
+ mlflow.log_param("dataset_path", dataset_path)
357
+ mlflow.log_param("num_samples", len(eval_df))
358
+ mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
359
+
360
+ print("\n" + "=" * 60)
361
+ print("✅ Evaluation complete!")
362
+ print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
363
+ print("=" * 60)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rag-sentinel
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: RAG Evaluation Framework using Ragas metrics and MLflow tracking
5
5
  Author: RAGSentinel Team
6
6
  License: MIT
@@ -1,392 +0,0 @@
1
- """
2
- RAGSentinel Evaluator - Core evaluation logic.
3
-
4
- This module contains the main evaluation pipeline for RAGSentinel.
5
- It handles configuration loading, LLM initialization, API communication,
6
- Ragas metrics evaluation, and MLflow result logging.
7
- """
8
-
9
- import os
10
- import re
11
- import yaml
12
- import configparser
13
- import requests
14
- import pandas as pd
15
- import mlflow
16
- from dotenv import load_dotenv
17
- from datasets import Dataset
18
- from ragas import evaluate, RunConfig
19
- from ragas.metrics import faithfulness, answer_relevancy, context_precision, answer_correctness
20
-
21
-
22
- # =============================================================================
23
- # Configuration Loading
24
- # =============================================================================
25
-
26
-
27
- def resolve_placeholder(value, env_vars, ini_config):
28
- """
29
- Resolve ${ENV:...} and ${INI:...} placeholders in a string value.
30
-
31
- Args:
32
- value: String that may contain placeholders
33
- env_vars: Dictionary of environment variables
34
- ini_config: ConfigParser object with ini file contents
35
-
36
- Returns:
37
- str: Value with all placeholders resolved
38
- """
39
- if not isinstance(value, str):
40
- return value
41
-
42
- # Resolve ${ENV:VAR_NAME} - reads from environment variables
43
- env_pattern = r'\$\{ENV:([^}]+)\}'
44
- def env_replacer(match):
45
- var_name = match.group(1)
46
- return env_vars.get(var_name, '')
47
- value = re.sub(env_pattern, env_replacer, value)
48
-
49
- # Resolve ${INI:section.key} - reads from config.ini
50
- ini_pattern = r'\$\{INI:([^}]+)\}'
51
- def ini_replacer(match):
52
- path = match.group(1)
53
- parts = path.split('.')
54
- if len(parts) == 2:
55
- section, key = parts
56
- if ini_config.has_option(section, key):
57
- return ini_config.get(section, key)
58
- return ''
59
- value = re.sub(ini_pattern, ini_replacer, value)
60
-
61
- return value
62
-
63
-
64
- def resolve_config(obj, env_vars, ini_config):
65
- """
66
- Recursively resolve all placeholders in a configuration object.
67
-
68
- Args:
69
- obj: Configuration object (dict, list, or str)
70
- env_vars: Dictionary of environment variables
71
- ini_config: ConfigParser object
72
-
73
- Returns:
74
- Configuration object with all placeholders resolved
75
- """
76
- if isinstance(obj, dict):
77
- return {k: resolve_config(v, env_vars, ini_config) for k, v in obj.items()}
78
- elif isinstance(obj, list):
79
- return [resolve_config(item, env_vars, ini_config) for item in obj]
80
- elif isinstance(obj, str):
81
- return resolve_placeholder(obj, env_vars, ini_config)
82
- return obj
83
-
84
-
85
- def load_config():
86
- """
87
- Load and merge configuration from .env, config.ini, and rag_eval_config.yaml.
88
-
89
- Returns:
90
- dict: Fully resolved configuration dictionary
91
- """
92
- # Load environment variables from .env file
93
- load_dotenv('.env')
94
- env_vars = dict(os.environ)
95
-
96
- # Load INI configuration
97
- ini_config = configparser.ConfigParser()
98
- ini_config.read('config.ini')
99
-
100
- # Load YAML configuration and resolve all placeholders
101
- with open('rag_eval_config.yaml', 'r') as f:
102
- yaml_config = yaml.safe_load(f)
103
-
104
- return resolve_config(yaml_config, env_vars, ini_config)
105
-
106
-
107
- def get_llm(config):
108
- """Initialize LLM based on provider."""
109
- llm_config = config['ragas']['llm']
110
- provider = llm_config['provider'].lower()
111
-
112
- if provider == 'azure':
113
- from langchain_openai import AzureChatOpenAI
114
- return AzureChatOpenAI(
115
- azure_endpoint=llm_config['azure_endpoint'],
116
- api_key=llm_config['api_key'],
117
- api_version=llm_config.get('api_version', '2024-02-15-preview'),
118
- deployment_name=llm_config['model'],
119
- temperature=float(llm_config.get('temperature', 0.0))
120
- )
121
- elif provider == 'openai':
122
- from langchain_openai import ChatOpenAI
123
- return ChatOpenAI(
124
- api_key=llm_config['api_key'],
125
- model=llm_config['model'],
126
- temperature=float(llm_config.get('temperature', 0.0))
127
- )
128
- elif provider == 'ollama':
129
- from langchain_ollama import ChatOllama
130
- return ChatOllama(
131
- base_url=llm_config.get('base_url', 'http://localhost:11434'),
132
- model=llm_config['model'],
133
- temperature=float(llm_config.get('temperature', 0.0))
134
- )
135
- else:
136
- raise ValueError(f"Unknown LLM provider: {provider}")
137
-
138
-
139
- def get_embeddings(config):
140
- """Initialize embeddings based on provider."""
141
- emb_config = config['ragas']['embeddings']
142
- provider = emb_config['provider'].lower()
143
-
144
- if provider == 'azure':
145
- from langchain_openai import AzureOpenAIEmbeddings
146
- return AzureOpenAIEmbeddings(
147
- azure_endpoint=emb_config['azure_endpoint'],
148
- api_key=emb_config['api_key'],
149
- api_version=emb_config.get('api_version', '2024-02-15-preview'),
150
- deployment=emb_config['model']
151
- )
152
- elif provider == 'openai':
153
- from langchain_openai import OpenAIEmbeddings
154
- return OpenAIEmbeddings(
155
- api_key=emb_config['api_key'],
156
- model=emb_config['model']
157
- )
158
- elif provider == 'ollama':
159
- from langchain_ollama import OllamaEmbeddings
160
- return OllamaEmbeddings(
161
- base_url=emb_config.get('base_url', 'http://localhost:11434'),
162
- model=emb_config['model']
163
- )
164
- else:
165
- raise ValueError(f"Unknown embeddings provider: {provider}")
166
-
167
-
168
- def get_metrics(config):
169
- """Get list of Ragas metrics."""
170
- metric_map = {
171
- 'faithfulness': faithfulness,
172
- 'answer_relevancy': answer_relevancy,
173
- 'context_precision': context_precision,
174
- 'answer_correctness': answer_correctness
175
- }
176
- return [metric_map[m] for m in config['ragas']['metrics'] if m in metric_map]
177
-
178
-
179
- def get_auth_headers_and_cookies(config):
180
- """
181
- Get authentication headers and cookies from backend config.
182
-
183
- Supports three authentication types:
184
- - cookie: Session cookie authentication
185
- - bearer: Bearer token authentication
186
- - header: Custom header authentication
187
-
188
- Args:
189
- config: Full configuration dictionary
190
-
191
- Returns:
192
- tuple: (headers dict, cookies dict)
193
- """
194
- # Auth config is nested under backend.auth in the YAML
195
- auth_config = config.get('backend', {}).get('auth', {})
196
- auth_type = auth_config.get('type', 'none').lower()
197
- headers = {}
198
- cookies = {}
199
-
200
- if auth_type == 'cookie':
201
- cookie_name = auth_config.get('cookie_name', 'session')
202
- cookie_value = auth_config.get('cookie_value', '')
203
- if cookie_value:
204
- cookies[cookie_name] = cookie_value
205
- elif auth_type == 'bearer':
206
- token = auth_config.get('bearer_token', '')
207
- if token:
208
- headers['Authorization'] = f'Bearer {token}'
209
- elif auth_type == 'header':
210
- header_name = auth_config.get('header_name', '')
211
- header_value = auth_config.get('header_value', '')
212
- if header_name and header_value:
213
- headers[header_name] = header_value
214
-
215
- return headers, cookies
216
-
217
-
218
- def extract_response_data(response, endpoint_config):
219
- """Extract data from API response."""
220
- data = response.json()
221
- response_path = endpoint_config.get('response_path', '')
222
-
223
- if response_path:
224
- for key in response_path.split('.'):
225
- if isinstance(data, dict) and key in data:
226
- data = data[key]
227
- elif isinstance(data, list) and key.isdigit():
228
- data = data[int(key)]
229
- else:
230
- return data
231
- return data
232
-
233
-
234
- def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
235
- """Make API request to backend."""
236
- url = base_url.rstrip('/') + endpoint_config['path']
237
- method = endpoint_config.get('method', 'POST').upper()
238
-
239
- body = endpoint_config.get('body', {}).copy()
240
- body['query'] = query
241
- body['chat_id'] = chat_id
242
-
243
- headers = {'Content-Type': 'application/json'}
244
- headers.update(auth_headers)
245
-
246
- if method == 'POST':
247
- response = requests.post(url, json=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
248
- else:
249
- response = requests.get(url, params=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
250
-
251
- response.raise_for_status()
252
- return response
253
-
254
-
255
- def get_context(config, query, chat_id, auth_headers, auth_cookies):
256
- """Get context from backend API."""
257
- base_url = config['backend']['base_url']
258
- endpoint_config = config['backend']['endpoints']['context']
259
- verify_ssl = config['backend'].get('verify_ssl', True)
260
-
261
- response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
262
- context = extract_response_data(response, endpoint_config)
263
-
264
- if isinstance(context, list):
265
- return [str(c) for c in context]
266
- return [str(context)]
267
-
268
-
269
- def get_answer(config, query, chat_id, auth_headers, auth_cookies):
270
- """Get answer from backend API."""
271
- base_url = config['backend']['base_url']
272
- endpoint_config = config['backend']['endpoints']['answer']
273
- verify_ssl = config['backend'].get('verify_ssl', True)
274
-
275
- response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
276
- answer = extract_response_data(response, endpoint_config)
277
-
278
- return str(answer)
279
-
280
-
281
-
282
- def run_evaluation():
283
- """Main evaluation function."""
284
- print("=" * 60)
285
- print("RAGSentinel - RAG Evaluation Framework")
286
- print("=" * 60)
287
-
288
- print("\n📁 Loading configuration...")
289
- config = load_config()
290
-
291
- dataset_path = config['dataset']['path']
292
- print(f"📊 Loading dataset from {dataset_path}...")
293
- dataset = pd.read_csv(dataset_path)
294
-
295
- auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
296
-
297
- results = []
298
- print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
299
-
300
- for idx, row in dataset.iterrows():
301
- chat_id = str(row['chat_id'])
302
- query = row['query']
303
- ground_truth = row['ground_truth']
304
-
305
- try:
306
- context = get_context(config, query, chat_id, auth_headers, auth_cookies)
307
- answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
308
-
309
- results.append({
310
- 'question': query,
311
- 'contexts': context,
312
- 'answer': answer,
313
- 'ground_truth': ground_truth
314
- })
315
- print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
316
- except Exception as e:
317
- print(f" ✗ Error processing query {idx + 1}: {e}")
318
- continue
319
-
320
- if not results:
321
- print("\n❌ No results collected. Exiting.")
322
- return
323
-
324
- eval_df = pd.DataFrame(results)
325
- print(f"\n✓ Collected {len(eval_df)} responses")
326
-
327
- print("\n🤖 Initializing LLM and embeddings...")
328
- llm = get_llm(config)
329
- embeddings = get_embeddings(config)
330
-
331
- metrics = get_metrics(config)
332
- print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
333
-
334
- print("\n📈 Preparing data for RAGAS evaluation...")
335
- ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
336
-
337
- for _, row in eval_df.iterrows():
338
- contexts = row.get("contexts", [])
339
- if not isinstance(contexts, list):
340
- contexts = [str(contexts)]
341
- contexts = [str(c) for c in contexts if c and str(c).strip()]
342
- if not contexts:
343
- contexts = ["No context available."]
344
-
345
- ragas_data["question"].append(str(row["question"]))
346
- ragas_data["answer"].append(str(row["answer"]))
347
- ragas_data["contexts"].append(contexts)
348
- ragas_data["ground_truth"].append(str(row["ground_truth"]))
349
-
350
- dataset = Dataset.from_dict(ragas_data)
351
-
352
- print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
353
-
354
- run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
355
-
356
- ragas_result = evaluate(
357
- dataset,
358
- metrics=metrics,
359
- llm=llm,
360
- embeddings=embeddings,
361
- batch_size=2,
362
- run_config=run_config,
363
- raise_exceptions=False
364
- )
365
-
366
- print("\n📊 Processing results...")
367
- scores_df = ragas_result.to_pandas()
368
- numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
369
- mean_scores = scores_df[numeric_columns].mean().to_dict()
370
-
371
- mlflow_config = config['mlflow']
372
- mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
373
- mlflow.set_experiment(mlflow_config['experiment_name'])
374
-
375
- print("\n📤 Logging results to MLflow...")
376
- run_name = mlflow_config.get('run_name', 'RAG Evaluation')
377
- with mlflow.start_run(run_name=run_name):
378
- print("\n" + "=" * 40)
379
- print("📊 EVALUATION RESULTS")
380
- print("=" * 40)
381
- for metric_name, value in mean_scores.items():
382
- mlflow.log_metric(metric_name, value)
383
- print(f" {metric_name}: {value:.4f}")
384
-
385
- mlflow.log_param("dataset_path", dataset_path)
386
- mlflow.log_param("num_samples", len(eval_df))
387
- mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
388
-
389
- print("\n" + "=" * 60)
390
- print("✅ Evaluation complete!")
391
- print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
392
- print("=" * 60)
File without changes
File without changes
File without changes
File without changes