rag-sentinel 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rag_sentinel-0.1.0/LICENSE +22 -0
- rag_sentinel-0.1.0/MANIFEST.in +4 -0
- rag_sentinel-0.1.0/PKG-INFO +114 -0
- rag_sentinel-0.1.0/README.md +81 -0
- rag_sentinel-0.1.0/pyproject.toml +55 -0
- rag_sentinel-0.1.0/setup.cfg +4 -0
- rag_sentinel-0.1.0/src/rag_sentinel/__init__.py +6 -0
- rag_sentinel-0.1.0/src/rag_sentinel/cli.py +174 -0
- rag_sentinel-0.1.0/src/rag_sentinel/evaluator.py +341 -0
- rag_sentinel-0.1.0/src/rag_sentinel/templates/.env.template +52 -0
- rag_sentinel-0.1.0/src/rag_sentinel/templates/config.ini.template +39 -0
- rag_sentinel-0.1.0/src/rag_sentinel/templates/rag_eval_config.yaml +99 -0
- rag_sentinel-0.1.0/src/rag_sentinel.egg-info/PKG-INFO +114 -0
- rag_sentinel-0.1.0/src/rag_sentinel.egg-info/SOURCES.txt +16 -0
- rag_sentinel-0.1.0/src/rag_sentinel.egg-info/dependency_links.txt +1 -0
- rag_sentinel-0.1.0/src/rag_sentinel.egg-info/entry_points.txt +2 -0
- rag_sentinel-0.1.0/src/rag_sentinel.egg-info/requires.txt +10 -0
- rag_sentinel-0.1.0/src/rag_sentinel.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 RAGSentinel Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rag-sentinel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: RAG Evaluation Framework using Ragas metrics and MLflow tracking
|
|
5
|
+
Author: RAGSentinel Team
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/rag-sentinel
|
|
8
|
+
Project-URL: Repository, https://github.com/yourusername/rag-sentinel
|
|
9
|
+
Keywords: rag,evaluation,ragas,mlflow,llm,ai
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: ragas>=0.2.0
|
|
23
|
+
Requires-Dist: mlflow>=2.9.0
|
|
24
|
+
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: requests>=2.31.0
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
28
|
+
Requires-Dist: langchain-openai>=0.0.5
|
|
29
|
+
Requires-Dist: langchain-ollama>=0.0.1
|
|
30
|
+
Requires-Dist: langchain-core>=0.1.0
|
|
31
|
+
Requires-Dist: datasets>=2.14.0
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# RAGSentinel
|
|
35
|
+
|
|
36
|
+
RAG Evaluation Framework using Ragas metrics and MLflow tracking.
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install rag-sentinel
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
### 1. Initialize Project
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
mkdir my-rag-eval
|
|
50
|
+
cd my-rag-eval
|
|
51
|
+
rag-sentinel init
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
This creates:
|
|
55
|
+
- `.env` - LLM/Embeddings API keys
|
|
56
|
+
- `config.ini` - App settings and authentication
|
|
57
|
+
- `rag_eval_config.yaml` - Master configuration
|
|
58
|
+
|
|
59
|
+
### 2. Configure
|
|
60
|
+
|
|
61
|
+
Edit `.env`:
|
|
62
|
+
```bash
|
|
63
|
+
LLM_PROVIDER=azure
|
|
64
|
+
AZURE_LLM_API_KEY=your-api-key
|
|
65
|
+
AZURE_LLM_ENDPOINT=https://your-resource.openai.azure.com/
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Edit `config.ini`:
|
|
69
|
+
```ini
|
|
70
|
+
[app]
|
|
71
|
+
app_url = https://your-rag-app.com/backend
|
|
72
|
+
|
|
73
|
+
[auth]
|
|
74
|
+
type = cookie
|
|
75
|
+
cookie_name = session
|
|
76
|
+
cookie_value = your-session-cookie
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 3. Create Test Dataset
|
|
80
|
+
|
|
81
|
+
Create `test_dataset.csv`:
|
|
82
|
+
```csv
|
|
83
|
+
query,ground_truth,chat_id
|
|
84
|
+
Hello,Hello! How can I help you?,1
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 4. Run Evaluation
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
rag-sentinel run
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
View results at: http://127.0.0.1:5001
|
|
94
|
+
|
|
95
|
+
## CLI Commands
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
rag-sentinel init # Initialize project
|
|
99
|
+
rag-sentinel run # Run evaluation (auto-starts MLflow)
|
|
100
|
+
rag-sentinel run --no-server # Run without starting MLflow
|
|
101
|
+
rag-sentinel validate # Validate configuration
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Metrics
|
|
105
|
+
|
|
106
|
+
- **Faithfulness** - Factual consistency of answer with context
|
|
107
|
+
- **Answer Relevancy** - How relevant the answer is to the question
|
|
108
|
+
- **Context Precision** - Quality of retrieved context
|
|
109
|
+
- **Answer Correctness** - Comparison against ground truth
|
|
110
|
+
|
|
111
|
+
## License
|
|
112
|
+
|
|
113
|
+
MIT
|
|
114
|
+
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# RAGSentinel
|
|
2
|
+
|
|
3
|
+
RAG Evaluation Framework using Ragas metrics and MLflow tracking.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install rag-sentinel
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
### 1. Initialize Project
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
mkdir my-rag-eval
|
|
17
|
+
cd my-rag-eval
|
|
18
|
+
rag-sentinel init
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
This creates:
|
|
22
|
+
- `.env` - LLM/Embeddings API keys
|
|
23
|
+
- `config.ini` - App settings and authentication
|
|
24
|
+
- `rag_eval_config.yaml` - Master configuration
|
|
25
|
+
|
|
26
|
+
### 2. Configure
|
|
27
|
+
|
|
28
|
+
Edit `.env`:
|
|
29
|
+
```bash
|
|
30
|
+
LLM_PROVIDER=azure
|
|
31
|
+
AZURE_LLM_API_KEY=your-api-key
|
|
32
|
+
AZURE_LLM_ENDPOINT=https://your-resource.openai.azure.com/
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Edit `config.ini`:
|
|
36
|
+
```ini
|
|
37
|
+
[app]
|
|
38
|
+
app_url = https://your-rag-app.com/backend
|
|
39
|
+
|
|
40
|
+
[auth]
|
|
41
|
+
type = cookie
|
|
42
|
+
cookie_name = session
|
|
43
|
+
cookie_value = your-session-cookie
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### 3. Create Test Dataset
|
|
47
|
+
|
|
48
|
+
Create `test_dataset.csv`:
|
|
49
|
+
```csv
|
|
50
|
+
query,ground_truth,chat_id
|
|
51
|
+
Hello,Hello! How can I help you?,1
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 4. Run Evaluation
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
rag-sentinel run
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
View results at: http://127.0.0.1:5001
|
|
61
|
+
|
|
62
|
+
## CLI Commands
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
rag-sentinel init # Initialize project
|
|
66
|
+
rag-sentinel run # Run evaluation (auto-starts MLflow)
|
|
67
|
+
rag-sentinel run --no-server # Run without starting MLflow
|
|
68
|
+
rag-sentinel validate # Validate configuration
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Metrics
|
|
72
|
+
|
|
73
|
+
- **Faithfulness** - Factual consistency of answer with context
|
|
74
|
+
- **Answer Relevancy** - How relevant the answer is to the question
|
|
75
|
+
- **Context Precision** - Quality of retrieved context
|
|
76
|
+
- **Answer Correctness** - Comparison against ground truth
|
|
77
|
+
|
|
78
|
+
## License
|
|
79
|
+
|
|
80
|
+
MIT
|
|
81
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "rag-sentinel"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "RAG Evaluation Framework using Ragas metrics and MLflow tracking"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "RAGSentinel Team"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["rag", "evaluation", "ragas", "mlflow", "llm", "ai"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"ragas>=0.2.0",
|
|
29
|
+
"mlflow>=2.9.0",
|
|
30
|
+
"pandas>=2.0.0",
|
|
31
|
+
"pyyaml>=6.0",
|
|
32
|
+
"requests>=2.31.0",
|
|
33
|
+
"python-dotenv>=1.0.0",
|
|
34
|
+
"langchain-openai>=0.0.5",
|
|
35
|
+
"langchain-ollama>=0.0.1",
|
|
36
|
+
"langchain-core>=0.1.0",
|
|
37
|
+
"datasets>=2.14.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
rag-sentinel = "rag_sentinel.cli:main"
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
Homepage = "https://github.com/yourusername/rag-sentinel"
|
|
45
|
+
Repository = "https://github.com/yourusername/rag-sentinel"
|
|
46
|
+
|
|
47
|
+
[tool.setuptools]
|
|
48
|
+
package-dir = {"" = "src"}
|
|
49
|
+
|
|
50
|
+
[tool.setuptools.packages.find]
|
|
51
|
+
where = ["src"]
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.package-data]
|
|
54
|
+
rag_sentinel = ["templates/*"]
|
|
55
|
+
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RAGSentinel CLI - Command Line Interface
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import shutil
|
|
8
|
+
import socket
|
|
9
|
+
import subprocess
|
|
10
|
+
import time
|
|
11
|
+
import argparse
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# Get the templates directory path
|
|
15
|
+
TEMPLATES_DIR = Path(__file__).parent / "templates"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_port_in_use(host, port):
|
|
19
|
+
"""Check if a port is in use."""
|
|
20
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
21
|
+
return s.connect_ex((host, port)) == 0
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def start_mlflow_server(host="127.0.0.1", port=5001):
|
|
25
|
+
"""Start MLflow server as a background process."""
|
|
26
|
+
if is_port_in_use(host, port):
|
|
27
|
+
print(f"✓ MLflow server already running at http://{host}:{port}")
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
print(f"🚀 Starting MLflow server at http://{host}:{port}...")
|
|
31
|
+
|
|
32
|
+
process = subprocess.Popen(
|
|
33
|
+
[sys.executable, "-m", "mlflow", "server", "--host", host, "--port", str(port)],
|
|
34
|
+
stdout=subprocess.DEVNULL,
|
|
35
|
+
stderr=subprocess.DEVNULL,
|
|
36
|
+
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Wait for server to start
|
|
40
|
+
for _ in range(10):
|
|
41
|
+
time.sleep(1)
|
|
42
|
+
if is_port_in_use(host, port):
|
|
43
|
+
print(f"✓ MLflow server started at http://{host}:{port}")
|
|
44
|
+
return process
|
|
45
|
+
|
|
46
|
+
print("⚠ MLflow server may not have started properly")
|
|
47
|
+
return process
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def cmd_init(args):
|
|
51
|
+
"""Initialize project with config templates."""
|
|
52
|
+
print("=" * 50)
|
|
53
|
+
print("RAGSentinel - Project Initialization")
|
|
54
|
+
print("=" * 50)
|
|
55
|
+
|
|
56
|
+
files_to_copy = [
|
|
57
|
+
(".env.template", ".env"),
|
|
58
|
+
("config.ini.template", "config.ini"),
|
|
59
|
+
("rag_eval_config.yaml", "rag_eval_config.yaml"),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
for src_name, dest_name in files_to_copy:
|
|
63
|
+
src_path = TEMPLATES_DIR / src_name
|
|
64
|
+
dest_path = Path.cwd() / dest_name
|
|
65
|
+
|
|
66
|
+
if dest_path.exists() and not args.force:
|
|
67
|
+
print(f"⚠ {dest_name} already exists (use --force to overwrite)")
|
|
68
|
+
else:
|
|
69
|
+
if src_path.exists():
|
|
70
|
+
shutil.copy(src_path, dest_path)
|
|
71
|
+
print(f"✓ Created {dest_name}")
|
|
72
|
+
else:
|
|
73
|
+
print(f"✗ Template not found: {src_name}")
|
|
74
|
+
|
|
75
|
+
print("\n" + "=" * 50)
|
|
76
|
+
print("Next steps:")
|
|
77
|
+
print(" 1. Edit .env with your LLM/Embeddings API keys")
|
|
78
|
+
print(" 2. Edit config.ini with your app settings")
|
|
79
|
+
print(" 3. Create test_dataset.csv with your test data")
|
|
80
|
+
print(" 4. Run: rag-sentinel run")
|
|
81
|
+
print("=" * 50)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def cmd_run(args):
|
|
85
|
+
"""Run the RAG evaluation."""
|
|
86
|
+
# Check if config files exist
|
|
87
|
+
required_files = [".env", "config.ini", "rag_eval_config.yaml"]
|
|
88
|
+
missing_files = [f for f in required_files if not Path(f).exists()]
|
|
89
|
+
|
|
90
|
+
if missing_files:
|
|
91
|
+
print("❌ Missing configuration files:")
|
|
92
|
+
for f in missing_files:
|
|
93
|
+
print(f" - {f}")
|
|
94
|
+
print("\nRun 'rag-sentinel init' first to create config files.")
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
|
|
97
|
+
# Start MLflow server if not running
|
|
98
|
+
if not args.no_server:
|
|
99
|
+
start_mlflow_server()
|
|
100
|
+
|
|
101
|
+
# Import and run evaluation
|
|
102
|
+
from rag_sentinel.evaluator import run_evaluation
|
|
103
|
+
run_evaluation()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def cmd_validate(args):
|
|
107
|
+
"""Validate configuration files."""
|
|
108
|
+
print("=" * 50)
|
|
109
|
+
print("RAGSentinel - Configuration Validation")
|
|
110
|
+
print("=" * 50)
|
|
111
|
+
|
|
112
|
+
# Check files exist
|
|
113
|
+
files_to_check = [".env", "config.ini", "rag_eval_config.yaml"]
|
|
114
|
+
all_exist = True
|
|
115
|
+
|
|
116
|
+
for f in files_to_check:
|
|
117
|
+
if Path(f).exists():
|
|
118
|
+
print(f"✓ {f} exists")
|
|
119
|
+
else:
|
|
120
|
+
print(f"✗ {f} missing")
|
|
121
|
+
all_exist = False
|
|
122
|
+
|
|
123
|
+
if not all_exist:
|
|
124
|
+
print("\n❌ Some files are missing. Run 'rag-sentinel init' first.")
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# Try to load config
|
|
128
|
+
try:
|
|
129
|
+
from rag_sentinel.evaluator import load_config
|
|
130
|
+
config = load_config()
|
|
131
|
+
print("\n✓ Configuration loaded successfully")
|
|
132
|
+
print(f" - LLM Provider: {config['ragas']['llm']['provider']}")
|
|
133
|
+
print(f" - Embeddings Provider: {config['ragas']['embeddings']['provider']}")
|
|
134
|
+
print(f" - Backend URL: {config['backend']['base_url']}")
|
|
135
|
+
print(f" - Dataset: {config['dataset']['path']}")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
print(f"\n❌ Configuration error: {e}")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def main():
|
|
141
|
+
"""Main entry point for CLI."""
|
|
142
|
+
parser = argparse.ArgumentParser(
|
|
143
|
+
prog="rag-sentinel",
|
|
144
|
+
description="RAGSentinel - RAG Evaluation Framework using Ragas and MLflow"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
148
|
+
|
|
149
|
+
# init command
|
|
150
|
+
init_parser = subparsers.add_parser("init", help="Initialize project with config templates")
|
|
151
|
+
init_parser.add_argument("--force", "-f", action="store_true", help="Overwrite existing files")
|
|
152
|
+
|
|
153
|
+
# run command
|
|
154
|
+
run_parser = subparsers.add_parser("run", help="Run RAG evaluation")
|
|
155
|
+
run_parser.add_argument("--no-server", action="store_true", help="Don't start MLflow server")
|
|
156
|
+
|
|
157
|
+
# validate command
|
|
158
|
+
validate_parser = subparsers.add_parser("validate", help="Validate configuration files")
|
|
159
|
+
|
|
160
|
+
args = parser.parse_args()
|
|
161
|
+
|
|
162
|
+
if args.command == "init":
|
|
163
|
+
cmd_init(args)
|
|
164
|
+
elif args.command == "run":
|
|
165
|
+
cmd_run(args)
|
|
166
|
+
elif args.command == "validate":
|
|
167
|
+
cmd_validate(args)
|
|
168
|
+
else:
|
|
169
|
+
parser.print_help()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
if __name__ == "__main__":
|
|
173
|
+
main()
|
|
174
|
+
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RAGSentinel Evaluator - Core evaluation logic
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import yaml
|
|
8
|
+
import configparser
|
|
9
|
+
import requests
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import mlflow
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from datasets import Dataset
|
|
14
|
+
from ragas import evaluate, RunConfig
|
|
15
|
+
from ragas.metrics import faithfulness, answer_relevancy, context_precision, answer_correctness
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def resolve_placeholder(value, env_vars, ini_config):
|
|
19
|
+
"""Resolve ${ENV:...} and ${INI:...} placeholders."""
|
|
20
|
+
if not isinstance(value, str):
|
|
21
|
+
return value
|
|
22
|
+
|
|
23
|
+
# Resolve ${ENV:VAR_NAME}
|
|
24
|
+
env_pattern = r'\$\{ENV:([^}]+)\}'
|
|
25
|
+
def env_replacer(match):
|
|
26
|
+
var_name = match.group(1)
|
|
27
|
+
return env_vars.get(var_name, '')
|
|
28
|
+
value = re.sub(env_pattern, env_replacer, value)
|
|
29
|
+
|
|
30
|
+
# Resolve ${INI:section.key}
|
|
31
|
+
ini_pattern = r'\$\{INI:([^}]+)\}'
|
|
32
|
+
def ini_replacer(match):
|
|
33
|
+
path = match.group(1)
|
|
34
|
+
parts = path.split('.')
|
|
35
|
+
if len(parts) == 2:
|
|
36
|
+
section, key = parts
|
|
37
|
+
if ini_config.has_option(section, key):
|
|
38
|
+
return ini_config.get(section, key)
|
|
39
|
+
return ''
|
|
40
|
+
value = re.sub(ini_pattern, ini_replacer, value)
|
|
41
|
+
|
|
42
|
+
return value
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def resolve_config(obj, env_vars, ini_config):
|
|
46
|
+
"""Recursively resolve all placeholders in config."""
|
|
47
|
+
if isinstance(obj, dict):
|
|
48
|
+
return {k: resolve_config(v, env_vars, ini_config) for k, v in obj.items()}
|
|
49
|
+
elif isinstance(obj, list):
|
|
50
|
+
return [resolve_config(item, env_vars, ini_config) for item in obj]
|
|
51
|
+
elif isinstance(obj, str):
|
|
52
|
+
return resolve_placeholder(obj, env_vars, ini_config)
|
|
53
|
+
return obj
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def load_config():
|
|
57
|
+
"""Load configuration from .env, config.ini, and rag_eval_config.yaml."""
|
|
58
|
+
load_dotenv('.env')
|
|
59
|
+
env_vars = dict(os.environ)
|
|
60
|
+
|
|
61
|
+
ini_config = configparser.ConfigParser()
|
|
62
|
+
ini_config.read('config.ini')
|
|
63
|
+
|
|
64
|
+
with open('rag_eval_config.yaml', 'r') as f:
|
|
65
|
+
yaml_config = yaml.safe_load(f)
|
|
66
|
+
|
|
67
|
+
return resolve_config(yaml_config, env_vars, ini_config)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_llm(config):
|
|
71
|
+
"""Initialize LLM based on provider."""
|
|
72
|
+
llm_config = config['ragas']['llm']
|
|
73
|
+
provider = llm_config['provider'].lower()
|
|
74
|
+
|
|
75
|
+
if provider == 'azure':
|
|
76
|
+
from langchain_openai import AzureChatOpenAI
|
|
77
|
+
return AzureChatOpenAI(
|
|
78
|
+
azure_endpoint=llm_config['azure_endpoint'],
|
|
79
|
+
api_key=llm_config['api_key'],
|
|
80
|
+
api_version=llm_config.get('api_version', '2024-02-15-preview'),
|
|
81
|
+
deployment_name=llm_config['model'],
|
|
82
|
+
temperature=float(llm_config.get('temperature', 0.0))
|
|
83
|
+
)
|
|
84
|
+
elif provider == 'openai':
|
|
85
|
+
from langchain_openai import ChatOpenAI
|
|
86
|
+
return ChatOpenAI(
|
|
87
|
+
api_key=llm_config['api_key'],
|
|
88
|
+
model=llm_config['model'],
|
|
89
|
+
temperature=float(llm_config.get('temperature', 0.0))
|
|
90
|
+
)
|
|
91
|
+
elif provider == 'ollama':
|
|
92
|
+
from langchain_ollama import ChatOllama
|
|
93
|
+
return ChatOllama(
|
|
94
|
+
base_url=llm_config.get('base_url', 'http://localhost:11434'),
|
|
95
|
+
model=llm_config['model'],
|
|
96
|
+
temperature=float(llm_config.get('temperature', 0.0))
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
raise ValueError(f"Unknown LLM provider: {provider}")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_embeddings(config):
|
|
103
|
+
"""Initialize embeddings based on provider."""
|
|
104
|
+
emb_config = config['ragas']['embeddings']
|
|
105
|
+
provider = emb_config['provider'].lower()
|
|
106
|
+
|
|
107
|
+
if provider == 'azure':
|
|
108
|
+
from langchain_openai import AzureOpenAIEmbeddings
|
|
109
|
+
return AzureOpenAIEmbeddings(
|
|
110
|
+
azure_endpoint=emb_config['azure_endpoint'],
|
|
111
|
+
api_key=emb_config['api_key'],
|
|
112
|
+
api_version=emb_config.get('api_version', '2024-02-15-preview'),
|
|
113
|
+
deployment=emb_config['model']
|
|
114
|
+
)
|
|
115
|
+
elif provider == 'openai':
|
|
116
|
+
from langchain_openai import OpenAIEmbeddings
|
|
117
|
+
return OpenAIEmbeddings(
|
|
118
|
+
api_key=emb_config['api_key'],
|
|
119
|
+
model=emb_config['model']
|
|
120
|
+
)
|
|
121
|
+
elif provider == 'ollama':
|
|
122
|
+
from langchain_ollama import OllamaEmbeddings
|
|
123
|
+
return OllamaEmbeddings(
|
|
124
|
+
base_url=emb_config.get('base_url', 'http://localhost:11434'),
|
|
125
|
+
model=emb_config['model']
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
raise ValueError(f"Unknown embeddings provider: {provider}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_metrics(config):
|
|
132
|
+
"""Get list of Ragas metrics."""
|
|
133
|
+
metric_map = {
|
|
134
|
+
'faithfulness': faithfulness,
|
|
135
|
+
'answer_relevancy': answer_relevancy,
|
|
136
|
+
'context_precision': context_precision,
|
|
137
|
+
'answer_correctness': answer_correctness
|
|
138
|
+
}
|
|
139
|
+
return [metric_map[m] for m in config['ragas']['metrics'] if m in metric_map]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def get_auth_headers_and_cookies(config):
|
|
143
|
+
"""Get authentication headers and cookies."""
|
|
144
|
+
auth_config = config.get('auth', {})
|
|
145
|
+
auth_type = auth_config.get('type', 'none').lower()
|
|
146
|
+
headers = {}
|
|
147
|
+
cookies = {}
|
|
148
|
+
|
|
149
|
+
if auth_type == 'cookie':
|
|
150
|
+
cookie_name = auth_config.get('cookie_name', 'session')
|
|
151
|
+
cookie_value = auth_config.get('cookie_value', '')
|
|
152
|
+
if cookie_value:
|
|
153
|
+
cookies[cookie_name] = cookie_value
|
|
154
|
+
elif auth_type == 'bearer':
|
|
155
|
+
token = auth_config.get('bearer_token', '')
|
|
156
|
+
if token:
|
|
157
|
+
headers['Authorization'] = f'Bearer {token}'
|
|
158
|
+
elif auth_type == 'header':
|
|
159
|
+
header_name = auth_config.get('header_name', '')
|
|
160
|
+
header_value = auth_config.get('header_value', '')
|
|
161
|
+
if header_name and header_value:
|
|
162
|
+
headers[header_name] = header_value
|
|
163
|
+
|
|
164
|
+
return headers, cookies
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def extract_response_data(response, endpoint_config):
|
|
168
|
+
"""Extract data from API response."""
|
|
169
|
+
data = response.json()
|
|
170
|
+
response_path = endpoint_config.get('response_path', '')
|
|
171
|
+
|
|
172
|
+
if response_path:
|
|
173
|
+
for key in response_path.split('.'):
|
|
174
|
+
if isinstance(data, dict) and key in data:
|
|
175
|
+
data = data[key]
|
|
176
|
+
elif isinstance(data, list) and key.isdigit():
|
|
177
|
+
data = data[int(key)]
|
|
178
|
+
else:
|
|
179
|
+
return data
|
|
180
|
+
return data
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl=True):
|
|
184
|
+
"""Make API request to backend."""
|
|
185
|
+
url = base_url.rstrip('/') + endpoint_config['path']
|
|
186
|
+
method = endpoint_config.get('method', 'POST').upper()
|
|
187
|
+
|
|
188
|
+
body = endpoint_config.get('body', {}).copy()
|
|
189
|
+
body['query'] = query
|
|
190
|
+
body['chat_id'] = chat_id
|
|
191
|
+
|
|
192
|
+
headers = {'Content-Type': 'application/json'}
|
|
193
|
+
headers.update(auth_headers)
|
|
194
|
+
|
|
195
|
+
if method == 'POST':
|
|
196
|
+
response = requests.post(url, json=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
|
|
197
|
+
else:
|
|
198
|
+
response = requests.get(url, params=body, headers=headers, cookies=auth_cookies, verify=verify_ssl)
|
|
199
|
+
|
|
200
|
+
response.raise_for_status()
|
|
201
|
+
return response
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_context(config, query, chat_id, auth_headers, auth_cookies):
|
|
205
|
+
"""Get context from backend API."""
|
|
206
|
+
base_url = config['backend']['base_url']
|
|
207
|
+
endpoint_config = config['backend']['endpoints']['context']
|
|
208
|
+
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
209
|
+
|
|
210
|
+
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
211
|
+
context = extract_response_data(response, endpoint_config)
|
|
212
|
+
|
|
213
|
+
if isinstance(context, list):
|
|
214
|
+
return [str(c) for c in context]
|
|
215
|
+
return [str(context)]
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def get_answer(config, query, chat_id, auth_headers, auth_cookies):
|
|
219
|
+
"""Get answer from backend API."""
|
|
220
|
+
base_url = config['backend']['base_url']
|
|
221
|
+
endpoint_config = config['backend']['endpoints']['answer']
|
|
222
|
+
verify_ssl = config['backend'].get('verify_ssl', True)
|
|
223
|
+
|
|
224
|
+
response = make_api_request(base_url, endpoint_config, query, chat_id, auth_headers, auth_cookies, verify_ssl)
|
|
225
|
+
answer = extract_response_data(response, endpoint_config)
|
|
226
|
+
|
|
227
|
+
return str(answer)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def run_evaluation():
|
|
232
|
+
"""Main evaluation function."""
|
|
233
|
+
print("=" * 60)
|
|
234
|
+
print("RAGSentinel - RAG Evaluation Framework")
|
|
235
|
+
print("=" * 60)
|
|
236
|
+
|
|
237
|
+
print("\n📁 Loading configuration...")
|
|
238
|
+
config = load_config()
|
|
239
|
+
|
|
240
|
+
dataset_path = config['dataset']['path']
|
|
241
|
+
print(f"📊 Loading dataset from {dataset_path}...")
|
|
242
|
+
dataset = pd.read_csv(dataset_path)
|
|
243
|
+
|
|
244
|
+
auth_headers, auth_cookies = get_auth_headers_and_cookies(config)
|
|
245
|
+
|
|
246
|
+
results = []
|
|
247
|
+
print(f"\n🔗 Collecting responses from {config['backend']['base_url']}...")
|
|
248
|
+
|
|
249
|
+
for idx, row in dataset.iterrows():
|
|
250
|
+
chat_id = str(row['chat_id'])
|
|
251
|
+
query = row['query']
|
|
252
|
+
ground_truth = row['ground_truth']
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
context = get_context(config, query, chat_id, auth_headers, auth_cookies)
|
|
256
|
+
answer = get_answer(config, query, chat_id, auth_headers, auth_cookies)
|
|
257
|
+
|
|
258
|
+
results.append({
|
|
259
|
+
'question': query,
|
|
260
|
+
'contexts': context,
|
|
261
|
+
'answer': answer,
|
|
262
|
+
'ground_truth': ground_truth
|
|
263
|
+
})
|
|
264
|
+
print(f" ✓ Processed query {idx + 1}/{len(dataset)}: {query[:50]}...")
|
|
265
|
+
except Exception as e:
|
|
266
|
+
print(f" ✗ Error processing query {idx + 1}: {e}")
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
if not results:
|
|
270
|
+
print("\n❌ No results collected. Exiting.")
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
eval_df = pd.DataFrame(results)
|
|
274
|
+
print(f"\n✓ Collected {len(eval_df)} responses")
|
|
275
|
+
|
|
276
|
+
print("\n🤖 Initializing LLM and embeddings...")
|
|
277
|
+
llm = get_llm(config)
|
|
278
|
+
embeddings = get_embeddings(config)
|
|
279
|
+
|
|
280
|
+
metrics = get_metrics(config)
|
|
281
|
+
print(f" Metrics: {', '.join(config['ragas']['metrics'])}")
|
|
282
|
+
|
|
283
|
+
print("\n📈 Preparing data for RAGAS evaluation...")
|
|
284
|
+
ragas_data = {"question": [], "answer": [], "contexts": [], "ground_truth": []}
|
|
285
|
+
|
|
286
|
+
for _, row in eval_df.iterrows():
|
|
287
|
+
contexts = row.get("contexts", [])
|
|
288
|
+
if not isinstance(contexts, list):
|
|
289
|
+
contexts = [str(contexts)]
|
|
290
|
+
contexts = [str(c) for c in contexts if c and str(c).strip()]
|
|
291
|
+
if not contexts:
|
|
292
|
+
contexts = ["No context available."]
|
|
293
|
+
|
|
294
|
+
ragas_data["question"].append(str(row["question"]))
|
|
295
|
+
ragas_data["answer"].append(str(row["answer"]))
|
|
296
|
+
ragas_data["contexts"].append(contexts)
|
|
297
|
+
ragas_data["ground_truth"].append(str(row["ground_truth"]))
|
|
298
|
+
|
|
299
|
+
dataset = Dataset.from_dict(ragas_data)
|
|
300
|
+
|
|
301
|
+
print("\n⏳ Evaluating with Ragas metrics (this may take a while)...")
|
|
302
|
+
|
|
303
|
+
run_config = RunConfig(timeout=300, max_retries=3, max_wait=600)
|
|
304
|
+
|
|
305
|
+
ragas_result = evaluate(
|
|
306
|
+
dataset,
|
|
307
|
+
metrics=metrics,
|
|
308
|
+
llm=llm,
|
|
309
|
+
embeddings=embeddings,
|
|
310
|
+
batch_size=2,
|
|
311
|
+
run_config=run_config,
|
|
312
|
+
raise_exceptions=False
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
print("\n📊 Processing results...")
|
|
316
|
+
scores_df = ragas_result.to_pandas()
|
|
317
|
+
numeric_columns = scores_df.select_dtypes(include=['float64', 'float32', 'int64', 'int32']).columns
|
|
318
|
+
mean_scores = scores_df[numeric_columns].mean().to_dict()
|
|
319
|
+
|
|
320
|
+
mlflow_config = config['mlflow']
|
|
321
|
+
mlflow.set_tracking_uri(mlflow_config['tracking_uri'])
|
|
322
|
+
mlflow.set_experiment(mlflow_config['experiment_name'])
|
|
323
|
+
|
|
324
|
+
print("\n📤 Logging results to MLflow...")
|
|
325
|
+
run_name = mlflow_config.get('run_name', 'RAG Evaluation')
|
|
326
|
+
with mlflow.start_run(run_name=run_name):
|
|
327
|
+
print("\n" + "=" * 40)
|
|
328
|
+
print("📊 EVALUATION RESULTS")
|
|
329
|
+
print("=" * 40)
|
|
330
|
+
for metric_name, value in mean_scores.items():
|
|
331
|
+
mlflow.log_metric(metric_name, value)
|
|
332
|
+
print(f" {metric_name}: {value:.4f}")
|
|
333
|
+
|
|
334
|
+
mlflow.log_param("dataset_path", dataset_path)
|
|
335
|
+
mlflow.log_param("num_samples", len(eval_df))
|
|
336
|
+
mlflow.log_table(data=scores_df, artifact_file="ragas_detailed_results.json")
|
|
337
|
+
|
|
338
|
+
print("\n" + "=" * 60)
|
|
339
|
+
print("✅ Evaluation complete!")
|
|
340
|
+
print(f"🔗 View results at: {mlflow_config['tracking_uri']}")
|
|
341
|
+
print("=" * 60)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# RAGSentinel Environment Variables
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# This file contains sensitive LLM and Embeddings configuration.
|
|
5
|
+
# Copy this file to .env and fill in your values.
|
|
6
|
+
# DO NOT commit .env to version control!
|
|
7
|
+
# =============================================================================
|
|
8
|
+
|
|
9
|
+
# =============================================================================
|
|
10
|
+
# LLM Configuration
|
|
11
|
+
# =============================================================================
|
|
12
|
+
# Provider options: azure, openai, ollama
|
|
13
|
+
LLM_PROVIDER=azure
|
|
14
|
+
|
|
15
|
+
# Azure OpenAI LLM Configuration
|
|
16
|
+
AZURE_LLM_API_KEY=
|
|
17
|
+
AZURE_LLM_ENDPOINT=
|
|
18
|
+
AZURE_LLM_DEPLOYMENT_NAME=
|
|
19
|
+
AZURE_LLM_MODEL=gpt-4
|
|
20
|
+
AZURE_LLM_TEMPERATURE=0.0
|
|
21
|
+
AZURE_LLM_API_VERSION=2024-02-15-preview
|
|
22
|
+
|
|
23
|
+
# OpenAI LLM Configuration
|
|
24
|
+
OPENAI_LLM_API_KEY=
|
|
25
|
+
OPENAI_LLM_MODEL=gpt-4
|
|
26
|
+
OPENAI_LLM_TEMPERATURE=0.0
|
|
27
|
+
|
|
28
|
+
# Ollama LLM Configuration
|
|
29
|
+
OLLAMA_LLM_BASE_URL=http://localhost:11434
|
|
30
|
+
OLLAMA_LLM_MODEL=llama3
|
|
31
|
+
OLLAMA_LLM_TEMPERATURE=0.0
|
|
32
|
+
|
|
33
|
+
# =============================================================================
|
|
34
|
+
# Embeddings Configuration
|
|
35
|
+
# =============================================================================
|
|
36
|
+
# Provider options: azure, openai, ollama
|
|
37
|
+
EMBEDDINGS_PROVIDER=azure
|
|
38
|
+
|
|
39
|
+
# Azure OpenAI Embeddings Configuration
|
|
40
|
+
AZURE_EMBEDDINGS_API_KEY=
|
|
41
|
+
AZURE_EMBEDDINGS_ENDPOINT=
|
|
42
|
+
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
|
43
|
+
AZURE_EMBEDDINGS_API_VERSION=2024-02-15-preview
|
|
44
|
+
|
|
45
|
+
# OpenAI Embeddings Configuration
|
|
46
|
+
OPENAI_EMBEDDINGS_API_KEY=
|
|
47
|
+
OPENAI_EMBEDDINGS_MODEL=text-embedding-3-small
|
|
48
|
+
|
|
49
|
+
# Ollama Embeddings Configuration
|
|
50
|
+
OLLAMA_EMBEDDINGS_BASE_URL=http://localhost:11434
|
|
51
|
+
OLLAMA_EMBEDDINGS_MODEL=nomic-embed-text
|
|
52
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# RAGSentinel Configuration File
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# This file contains non-sensitive configuration settings.
|
|
5
|
+
# Copy this file to config.ini and fill in your values.
|
|
6
|
+
# =============================================================================
|
|
7
|
+
|
|
8
|
+
[mlflow]
|
|
9
|
+
# MLflow tracking server URI
|
|
10
|
+
tracking_uri = http://127.0.0.1:5001
|
|
11
|
+
|
|
12
|
+
[app]
|
|
13
|
+
# Backend RAG application URL (e.g., https://your-app.com/backend)
|
|
14
|
+
app_url =
|
|
15
|
+
|
|
16
|
+
[endpoints]
|
|
17
|
+
# API endpoint paths for context retrieval and answer generation
|
|
18
|
+
context_path = /api/retrieve_context
|
|
19
|
+
answer_path = /api/respond
|
|
20
|
+
|
|
21
|
+
[dataset]
|
|
22
|
+
# Path to test dataset CSV file (columns: query, ground_truth, chat_id)
|
|
23
|
+
path = test_dataset.csv
|
|
24
|
+
|
|
25
|
+
[auth]
|
|
26
|
+
# =============================================================================
|
|
27
|
+
# Authentication Configuration
|
|
28
|
+
# =============================================================================
|
|
29
|
+
# type options: cookie, bearer, header, or none
|
|
30
|
+
# For cookie auth: set type=cookie, cookie_name, and cookie_value
|
|
31
|
+
# For bearer auth: set type=bearer and bearer_token
|
|
32
|
+
# For header auth: set type=header, header_name, and header_value
|
|
33
|
+
# =============================================================================
|
|
34
|
+
type = cookie
|
|
35
|
+
cookie_name = session
|
|
36
|
+
cookie_value =
|
|
37
|
+
bearer_token =
|
|
38
|
+
header_name =
|
|
39
|
+
header_value =
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# RAG Evaluation Configuration
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# This file contains the master configuration for RAG evaluation.
|
|
5
|
+
#
|
|
6
|
+
# Placeholders:
|
|
7
|
+
# ${ENV:VARIABLE_NAME} - Gets value from .env file (LLM & Embeddings only)
|
|
8
|
+
# ${INI:section.key} - Gets value from config.ini file (all other settings)
|
|
9
|
+
# Direct values - Hardcoded defaults (rarely need to change)
|
|
10
|
+
# =============================================================================
|
|
11
|
+
|
|
12
|
+
# MLflow Configuration
|
|
13
|
+
mlflow:
|
|
14
|
+
tracking_uri: "${INI:mlflow.tracking_uri}"
|
|
15
|
+
experiment_name: "RAG Evaluation"
|
|
16
|
+
run_name: "RAG Evaluation Run"
|
|
17
|
+
|
|
18
|
+
# Backend API Configuration
|
|
19
|
+
backend:
|
|
20
|
+
base_url: "${INI:app.app_url}"
|
|
21
|
+
verify_ssl: false
|
|
22
|
+
|
|
23
|
+
auth:
|
|
24
|
+
type: "${INI:auth.type}"
|
|
25
|
+
cookie_name: "${INI:auth.cookie_name}"
|
|
26
|
+
cookie_value: "${INI:auth.cookie_value}"
|
|
27
|
+
bearer_token: "${INI:auth.bearer_token}"
|
|
28
|
+
header_name: "${INI:auth.header_name}"
|
|
29
|
+
header_value: "${INI:auth.header_value}"
|
|
30
|
+
|
|
31
|
+
endpoints:
|
|
32
|
+
context:
|
|
33
|
+
path: "${INI:endpoints.context_path}"
|
|
34
|
+
method: "POST"
|
|
35
|
+
headers:
|
|
36
|
+
Content-Type: "application/json"
|
|
37
|
+
body:
|
|
38
|
+
query: "{query}"
|
|
39
|
+
chat_id: "{chat_id}"
|
|
40
|
+
selected_option: "QPR Review"
|
|
41
|
+
response_key: "retrieved_contexts"
|
|
42
|
+
|
|
43
|
+
answer:
|
|
44
|
+
path: "${INI:endpoints.answer_path}"
|
|
45
|
+
method: "POST"
|
|
46
|
+
headers:
|
|
47
|
+
Content-Type: "application/json"
|
|
48
|
+
body:
|
|
49
|
+
prompt: "{query}"
|
|
50
|
+
chat_id: "{chat_id}"
|
|
51
|
+
selected_option: "QPR Review"
|
|
52
|
+
stream: true
|
|
53
|
+
response_key: "response"
|
|
54
|
+
|
|
55
|
+
# Dataset Configuration
|
|
56
|
+
dataset:
|
|
57
|
+
path: "${INI:dataset.path}"
|
|
58
|
+
|
|
59
|
+
# Ragas Metrics Configuration
|
|
60
|
+
ragas:
|
|
61
|
+
metrics:
|
|
62
|
+
- "Faithfulness"
|
|
63
|
+
- "AnswerRelevancy"
|
|
64
|
+
- "ContextPrecision"
|
|
65
|
+
- "AnswerCorrectness"
|
|
66
|
+
|
|
67
|
+
# LLM Configuration (from .env)
|
|
68
|
+
llm:
|
|
69
|
+
provider: "${ENV:LLM_PROVIDER}"
|
|
70
|
+
azure:
|
|
71
|
+
api_key: "${ENV:AZURE_LLM_API_KEY}"
|
|
72
|
+
endpoint: "${ENV:AZURE_LLM_ENDPOINT}"
|
|
73
|
+
deployment_name: "${ENV:AZURE_LLM_DEPLOYMENT_NAME}"
|
|
74
|
+
model: "${ENV:AZURE_LLM_MODEL}"
|
|
75
|
+
temperature: "${ENV:AZURE_LLM_TEMPERATURE}"
|
|
76
|
+
api_version: "${ENV:AZURE_LLM_API_VERSION}"
|
|
77
|
+
openai:
|
|
78
|
+
api_key: "${ENV:OPENAI_LLM_API_KEY}"
|
|
79
|
+
model: "${ENV:OPENAI_LLM_MODEL}"
|
|
80
|
+
temperature: "${ENV:OPENAI_LLM_TEMPERATURE}"
|
|
81
|
+
ollama:
|
|
82
|
+
base_url: "${ENV:OLLAMA_LLM_BASE_URL}"
|
|
83
|
+
model: "${ENV:OLLAMA_LLM_MODEL}"
|
|
84
|
+
temperature: "${ENV:OLLAMA_LLM_TEMPERATURE}"
|
|
85
|
+
|
|
86
|
+
# Embeddings Configuration (from .env)
|
|
87
|
+
embeddings:
|
|
88
|
+
provider: "${ENV:EMBEDDINGS_PROVIDER}"
|
|
89
|
+
azure:
|
|
90
|
+
api_key: "${ENV:AZURE_EMBEDDINGS_API_KEY}"
|
|
91
|
+
endpoint: "${ENV:AZURE_EMBEDDINGS_ENDPOINT}"
|
|
92
|
+
deployment_name: "${ENV:AZURE_EMBEDDINGS_DEPLOYMENT_NAME}"
|
|
93
|
+
api_version: "${ENV:AZURE_EMBEDDINGS_API_VERSION}"
|
|
94
|
+
openai:
|
|
95
|
+
api_key: "${ENV:OPENAI_EMBEDDINGS_API_KEY}"
|
|
96
|
+
model: "${ENV:OPENAI_EMBEDDINGS_MODEL}"
|
|
97
|
+
ollama:
|
|
98
|
+
base_url: "${ENV:OLLAMA_EMBEDDINGS_BASE_URL}"
|
|
99
|
+
model: "${ENV:OLLAMA_EMBEDDINGS_MODEL}"
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rag-sentinel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: RAG Evaluation Framework using Ragas metrics and MLflow tracking
|
|
5
|
+
Author: RAGSentinel Team
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/rag-sentinel
|
|
8
|
+
Project-URL: Repository, https://github.com/yourusername/rag-sentinel
|
|
9
|
+
Keywords: rag,evaluation,ragas,mlflow,llm,ai
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: ragas>=0.2.0
|
|
23
|
+
Requires-Dist: mlflow>=2.9.0
|
|
24
|
+
Requires-Dist: pandas>=2.0.0
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: requests>=2.31.0
|
|
27
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
28
|
+
Requires-Dist: langchain-openai>=0.0.5
|
|
29
|
+
Requires-Dist: langchain-ollama>=0.0.1
|
|
30
|
+
Requires-Dist: langchain-core>=0.1.0
|
|
31
|
+
Requires-Dist: datasets>=2.14.0
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# RAGSentinel
|
|
35
|
+
|
|
36
|
+
RAG Evaluation Framework using Ragas metrics and MLflow tracking.
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install rag-sentinel
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
### 1. Initialize Project
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
mkdir my-rag-eval
|
|
50
|
+
cd my-rag-eval
|
|
51
|
+
rag-sentinel init
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
This creates:
|
|
55
|
+
- `.env` - LLM/Embeddings API keys
|
|
56
|
+
- `config.ini` - App settings and authentication
|
|
57
|
+
- `rag_eval_config.yaml` - Master configuration
|
|
58
|
+
|
|
59
|
+
### 2. Configure
|
|
60
|
+
|
|
61
|
+
Edit `.env`:
|
|
62
|
+
```bash
|
|
63
|
+
LLM_PROVIDER=azure
|
|
64
|
+
AZURE_LLM_API_KEY=your-api-key
|
|
65
|
+
AZURE_LLM_ENDPOINT=https://your-resource.openai.azure.com/
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Edit `config.ini`:
|
|
69
|
+
```ini
|
|
70
|
+
[app]
|
|
71
|
+
app_url = https://your-rag-app.com/backend
|
|
72
|
+
|
|
73
|
+
[auth]
|
|
74
|
+
type = cookie
|
|
75
|
+
cookie_name = session
|
|
76
|
+
cookie_value = your-session-cookie
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 3. Create Test Dataset
|
|
80
|
+
|
|
81
|
+
Create `test_dataset.csv`:
|
|
82
|
+
```csv
|
|
83
|
+
query,ground_truth,chat_id
|
|
84
|
+
Hello,Hello! How can I help you?,1
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 4. Run Evaluation
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
rag-sentinel run
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
View results at: http://127.0.0.1:5001
|
|
94
|
+
|
|
95
|
+
## CLI Commands
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
rag-sentinel init # Initialize project
|
|
99
|
+
rag-sentinel run # Run evaluation (auto-starts MLflow)
|
|
100
|
+
rag-sentinel run --no-server # Run without starting MLflow
|
|
101
|
+
rag-sentinel validate # Validate configuration
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Metrics
|
|
105
|
+
|
|
106
|
+
- **Faithfulness** - Factual consistency of answer with context
|
|
107
|
+
- **Answer Relevancy** - How relevant the answer is to the question
|
|
108
|
+
- **Context Precision** - Quality of retrieved context
|
|
109
|
+
- **Answer Correctness** - Comparison against ground truth
|
|
110
|
+
|
|
111
|
+
## License
|
|
112
|
+
|
|
113
|
+
MIT
|
|
114
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
src/rag_sentinel/__init__.py
|
|
6
|
+
src/rag_sentinel/cli.py
|
|
7
|
+
src/rag_sentinel/evaluator.py
|
|
8
|
+
src/rag_sentinel.egg-info/PKG-INFO
|
|
9
|
+
src/rag_sentinel.egg-info/SOURCES.txt
|
|
10
|
+
src/rag_sentinel.egg-info/dependency_links.txt
|
|
11
|
+
src/rag_sentinel.egg-info/entry_points.txt
|
|
12
|
+
src/rag_sentinel.egg-info/requires.txt
|
|
13
|
+
src/rag_sentinel.egg-info/top_level.txt
|
|
14
|
+
src/rag_sentinel/templates/.env.template
|
|
15
|
+
src/rag_sentinel/templates/config.ini.template
|
|
16
|
+
src/rag_sentinel/templates/rag_eval_config.yaml
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
rag_sentinel
|