ragaai-catalyst 2.0.7b1__tar.gz → 2.0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ragaai_catalyst-2.0.7b1/ragaai_catalyst.egg-info → ragaai_catalyst-2.0.7.1}/PKG-INFO +2 -2
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/pyproject.toml +2 -2
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1/ragaai_catalyst.egg-info}/PKG-INFO +2 -2
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/SOURCES.txt +6 -1
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/requires.txt +1 -1
- ragaai_catalyst-2.0.7.1/test/test_catalyst/test_configuration.py +199 -0
- ragaai_catalyst-2.0.7.1/test/test_catalyst/test_dataset.py +170 -0
- ragaai_catalyst-2.0.7.1/test/test_catalyst/test_evaluation.py +503 -0
- ragaai_catalyst-2.0.7.1/test/test_catalyst/test_prompt_manager.py +88 -0
- ragaai_catalyst-2.0.7.1/test/test_catalyst/test_synthetic_data_generation.py +151 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/.gitignore +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/README.md +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/__init__.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/docs/dataset_management.md +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/docs/prompt_management.md +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/examples/prompt_management_litellm.ipynb +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/examples/prompt_management_openai.ipynb +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/__init__.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/_version.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/dataset.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/evaluation.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/experiment.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/guard_executor.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/guardrails_manager.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/internal_api_completion.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/prompt_manager.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/proxy_call.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/ragaai_catalyst.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/synthetic_data_generation.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/__init__.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/__init__.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/__init__.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/langchain.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/llamaindex.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/openai.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/llamaindex_callback.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/tracer.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/utils/__init__.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/utils/utils.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/utils.py +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/dependency_links.txt +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/top_level.txt +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/requirements.txt +0 -0
- {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ragaai_catalyst
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.7.1
|
4
4
|
Summary: RAGA AI CATALYST
|
5
5
|
Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
|
6
6
|
Requires-Python: >=3.9
|
@@ -18,7 +18,7 @@ Requires-Dist: opentelemetry-instrumentation-langchain~=0.24.0
|
|
18
18
|
Requires-Dist: opentelemetry-instrumentation-openai~=0.24.0
|
19
19
|
Requires-Dist: langchain-core>=0.2.11
|
20
20
|
Requires-Dist: langchain>=0.2.11
|
21
|
-
Requires-Dist: openai>=1.
|
21
|
+
Requires-Dist: openai>=1.57.0
|
22
22
|
Requires-Dist: pandas>=2.1.1
|
23
23
|
Requires-Dist: groq>=0.11.0
|
24
24
|
Requires-Dist: PyPDF2>=3.0.1
|
@@ -8,7 +8,7 @@ description = "RAGA AI CATALYST"
|
|
8
8
|
readme = "README.md"
|
9
9
|
requires-python = ">=3.9"
|
10
10
|
# license = {file = "LICENSE"}
|
11
|
-
version = "2.0.7.
|
11
|
+
version = "2.0.7.1"
|
12
12
|
authors = [
|
13
13
|
{name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
|
14
14
|
{name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
|
@@ -32,7 +32,7 @@ dependencies = [
|
|
32
32
|
"opentelemetry-instrumentation-openai~=0.24.0",
|
33
33
|
"langchain-core>=0.2.11",
|
34
34
|
"langchain>=0.2.11",
|
35
|
-
"openai>=1.
|
35
|
+
"openai>=1.57.0",
|
36
36
|
"pandas>=2.1.1",
|
37
37
|
"groq>=0.11.0",
|
38
38
|
"PyPDF2>=3.0.1",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ragaai_catalyst
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.7.1
|
4
4
|
Summary: RAGA AI CATALYST
|
5
5
|
Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
|
6
6
|
Requires-Python: >=3.9
|
@@ -18,7 +18,7 @@ Requires-Dist: opentelemetry-instrumentation-langchain~=0.24.0
|
|
18
18
|
Requires-Dist: opentelemetry-instrumentation-openai~=0.24.0
|
19
19
|
Requires-Dist: langchain-core>=0.2.11
|
20
20
|
Requires-Dist: langchain>=0.2.11
|
21
|
-
Requires-Dist: openai>=1.
|
21
|
+
Requires-Dist: openai>=1.57.0
|
22
22
|
Requires-Dist: pandas>=2.1.1
|
23
23
|
Requires-Dist: groq>=0.11.0
|
24
24
|
Requires-Dist: PyPDF2>=3.0.1
|
@@ -36,4 +36,9 @@ ragaai_catalyst/tracers/instrumentators/langchain.py
|
|
36
36
|
ragaai_catalyst/tracers/instrumentators/llamaindex.py
|
37
37
|
ragaai_catalyst/tracers/instrumentators/openai.py
|
38
38
|
ragaai_catalyst/tracers/utils/__init__.py
|
39
|
-
ragaai_catalyst/tracers/utils/utils.py
|
39
|
+
ragaai_catalyst/tracers/utils/utils.py
|
40
|
+
test/test_catalyst/test_configuration.py
|
41
|
+
test/test_catalyst/test_dataset.py
|
42
|
+
test/test_catalyst/test_evaluation.py
|
43
|
+
test/test_catalyst/test_prompt_manager.py
|
44
|
+
test/test_catalyst/test_synthetic_data_generation.py
|
@@ -0,0 +1,199 @@
|
|
1
|
+
import pytest
|
2
|
+
import os
|
3
|
+
import requests
|
4
|
+
from unittest.mock import patch, MagicMock
|
5
|
+
import dotenv
|
6
|
+
dotenv.load_dotenv()
|
7
|
+
import os
|
8
|
+
|
9
|
+
from ragaai_catalyst import RagaAICatalyst
|
10
|
+
|
11
|
+
|
12
|
+
# Mock environment variables for testing
|
13
|
+
@pytest.fixture
|
14
|
+
def mock_env_vars():
|
15
|
+
original_environ = os.environ.copy()
|
16
|
+
RAGAAI_CATALYST_ACCESS_KEY = os.getenv("RAGAAI_CATALYST_ACCESS_KEY")
|
17
|
+
RAGAAI_CATALYST_SECRET_KEY = os.getenv("RAGAAI_CATALYST_SECRET_KEY")
|
18
|
+
RAGAAI_CATALYST_BASE_URL = os.getenv("RAGAAI_CATALYST_BASE_URL")
|
19
|
+
|
20
|
+
yield
|
21
|
+
|
22
|
+
os.environ.clear()
|
23
|
+
os.environ.update(original_environ)
|
24
|
+
|
25
|
+
@pytest.fixture
|
26
|
+
def raga_catalyst(mock_env_vars):
|
27
|
+
with patch('ragaai_catalyst.RagaAICatalyst.get_token', return_value='test_token'):
|
28
|
+
catalyst = RagaAICatalyst(
|
29
|
+
os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
30
|
+
os.getenv("RAGAAI_CATALYST_SECRET_KEY")
|
31
|
+
)
|
32
|
+
return catalyst
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
def test_project_use_cases():
|
37
|
+
catalyst = RagaAICatalyst(
|
38
|
+
access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
39
|
+
secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
|
40
|
+
base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
|
41
|
+
)
|
42
|
+
use_case = catalyst.project_use_cases()
|
43
|
+
assert use_case ==['Chatbot', 'Text2SQL', 'Q/A', 'Code Generation', 'Others']
|
44
|
+
|
45
|
+
|
46
|
+
def test_list_project():
|
47
|
+
catalyst = RagaAICatalyst(
|
48
|
+
access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
49
|
+
secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
|
50
|
+
base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
|
51
|
+
)
|
52
|
+
use_case = catalyst.list_projects()
|
53
|
+
assert use_case is not None # Check if the result is not None
|
54
|
+
|
55
|
+
|
56
|
+
def test_existing_projectname():
|
57
|
+
with pytest.raises(ValueError, match="already exists. Please choose a different name."):
|
58
|
+
catalyst = RagaAICatalyst(
|
59
|
+
access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
60
|
+
secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
|
61
|
+
base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
|
62
|
+
)
|
63
|
+
project = catalyst.create_project(
|
64
|
+
project_name="prompt_metric_dataset3",
|
65
|
+
usecase="Chatbot"
|
66
|
+
)
|
67
|
+
|
68
|
+
def test_initialization_missing_credentials():
|
69
|
+
"""Test initialization with missing credentials"""
|
70
|
+
with pytest.raises(ValueError, match="RAGAAI_CATALYST_ACCESS_KEY and RAGAAI_CATALYST_SECRET_KEY environment variables must be set"):
|
71
|
+
RagaAICatalyst('', '')
|
72
|
+
|
73
|
+
@patch('requests.post')
|
74
|
+
def test_get_token_success(mock_post, mock_env_vars):
|
75
|
+
"""Test token retrieval success"""
|
76
|
+
mock_response = MagicMock()
|
77
|
+
mock_response.status_code = 200
|
78
|
+
mock_response.json.return_value = {
|
79
|
+
'success': True,
|
80
|
+
'data': {'token': 'test_token'}
|
81
|
+
}
|
82
|
+
mock_post.return_value = mock_response
|
83
|
+
|
84
|
+
token = RagaAICatalyst.get_token()
|
85
|
+
assert token == 'test_token'
|
86
|
+
assert os.getenv('RAGAAI_CATALYST_TOKEN') == 'test_token'
|
87
|
+
|
88
|
+
@patch('requests.post')
|
89
|
+
def test_get_token_failure(mock_post, mock_env_vars):
|
90
|
+
"""Test token retrieval failure"""
|
91
|
+
mock_response = MagicMock()
|
92
|
+
mock_response.status_code = 400
|
93
|
+
mock_response.json.return_value = {
|
94
|
+
'message': 'Please enter valid credentials'
|
95
|
+
}
|
96
|
+
mock_post.return_value = mock_response
|
97
|
+
|
98
|
+
with pytest.raises(Exception, match="Authentication failed"):
|
99
|
+
RagaAICatalyst.get_token()
|
100
|
+
|
101
|
+
@patch('requests.get')
|
102
|
+
def test_project_use_cases_success(mock_get, raga_catalyst):
|
103
|
+
"""Test retrieving project use cases"""
|
104
|
+
mock_response = MagicMock()
|
105
|
+
mock_response.status_code = 200
|
106
|
+
mock_response.json.return_value = {
|
107
|
+
'data': {'usecase': ['Q/A', 'Chatbot', 'Summarization']}
|
108
|
+
}
|
109
|
+
mock_get.return_value = mock_response
|
110
|
+
|
111
|
+
use_cases = raga_catalyst.project_use_cases()
|
112
|
+
assert use_cases == ['Q/A', 'Chatbot', 'Summarization']
|
113
|
+
|
114
|
+
@patch('requests.get')
|
115
|
+
def test_project_use_cases_failure(mock_get, raga_catalyst):
|
116
|
+
"""Test project use cases retrieval failure"""
|
117
|
+
mock_get.side_effect = requests.exceptions.RequestException("Network Error")
|
118
|
+
|
119
|
+
use_cases = raga_catalyst.project_use_cases()
|
120
|
+
assert use_cases == []
|
121
|
+
|
122
|
+
@patch('requests.post')
|
123
|
+
@patch('ragaai_catalyst.RagaAICatalyst.list_projects')
|
124
|
+
def test_create_project_success(mock_list_projects, mock_post, raga_catalyst):
|
125
|
+
"""Test successful project creation"""
|
126
|
+
mock_list_projects.return_value = [] # No existing projects
|
127
|
+
mock_post_response = MagicMock()
|
128
|
+
mock_post_response.status_code = 200
|
129
|
+
mock_post_response.json.return_value = {
|
130
|
+
'data': {'name': 'TestProject'}
|
131
|
+
}
|
132
|
+
mock_post.return_value = mock_post_response
|
133
|
+
|
134
|
+
with patch('ragaai_catalyst.RagaAICatalyst.project_use_cases', return_value=['Q/A']):
|
135
|
+
result = raga_catalyst.create_project('TestProject')
|
136
|
+
assert 'Project Created Successfully' in result
|
137
|
+
|
138
|
+
@patch('requests.post')
|
139
|
+
@patch('ragaai_catalyst.RagaAICatalyst.list_projects')
|
140
|
+
def test_create_project_duplicate(mock_list_projects, mock_post, raga_catalyst):
|
141
|
+
"""Test project creation with duplicate name"""
|
142
|
+
mock_list_projects.return_value = ['TestProject']
|
143
|
+
|
144
|
+
with pytest.raises(ValueError, match="Project name 'TestProject' already exists"):
|
145
|
+
raga_catalyst.create_project('TestProject')
|
146
|
+
|
147
|
+
@patch('requests.get')
|
148
|
+
def test_list_projects_success(mock_get, raga_catalyst):
|
149
|
+
"""Test successful project listing"""
|
150
|
+
mock_response = MagicMock()
|
151
|
+
mock_response.status_code = 200
|
152
|
+
mock_response.json.return_value = {
|
153
|
+
'data': {
|
154
|
+
'content': [
|
155
|
+
{'name': 'Project1'},
|
156
|
+
{'name': 'Project2'}
|
157
|
+
]
|
158
|
+
}
|
159
|
+
}
|
160
|
+
mock_get.return_value = mock_response
|
161
|
+
|
162
|
+
projects = raga_catalyst.list_projects()
|
163
|
+
assert projects == ['Project1', 'Project2']
|
164
|
+
|
165
|
+
@patch('requests.get')
|
166
|
+
def test_list_metrics_success(mock_get):
|
167
|
+
"""Test successful metrics listing"""
|
168
|
+
with patch.dict(os.environ, {'RAGAAI_CATALYST_TOKEN': 'test_token'}):
|
169
|
+
mock_response = MagicMock()
|
170
|
+
mock_response.status_code = 200
|
171
|
+
mock_response.json.return_value = {
|
172
|
+
'data': {
|
173
|
+
'metrics': [
|
174
|
+
{'name': 'hallucination', 'category': 'quality'},
|
175
|
+
{'name': 'toxicity', 'category': 'safety'}
|
176
|
+
]
|
177
|
+
}
|
178
|
+
}
|
179
|
+
mock_get.return_value = mock_response
|
180
|
+
|
181
|
+
metrics = RagaAICatalyst.list_metrics()
|
182
|
+
assert metrics == ['hallucination', 'toxicity']
|
183
|
+
|
184
|
+
def test_initialization_invalid_credentials():
|
185
|
+
"""Test initialization with invalid credentials"""
|
186
|
+
with pytest.raises(Exception, match="Authentication failed. Invalid credentials provided."):
|
187
|
+
RagaAICatalyst(
|
188
|
+
access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY")+"a",
|
189
|
+
secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
|
190
|
+
base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
|
191
|
+
)
|
192
|
+
|
193
|
+
def test_initialization_invalid_base_url():
|
194
|
+
with pytest.raises(ConnectionError, match="The provided base_url is not accessible. Please re-check the base_url."):
|
195
|
+
RagaAICatalyst(
|
196
|
+
access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
197
|
+
secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
|
198
|
+
base_url=os.getenv("RAGAAI_CATALYST_BASE_URL") +"a",
|
199
|
+
)
|
@@ -0,0 +1,170 @@
|
|
1
|
+
import pytest
|
2
|
+
import os
|
3
|
+
import dotenv
|
4
|
+
dotenv.load_dotenv()
|
5
|
+
import pandas as pd
|
6
|
+
from datetime import datetime
|
7
|
+
from typing import Dict, List
|
8
|
+
from unittest.mock import patch, Mock
|
9
|
+
import requests
|
10
|
+
from ragaai_catalyst import Dataset,RagaAICatalyst
|
11
|
+
|
12
|
+
|
13
|
+
@pytest.fixture
|
14
|
+
def base_url():
|
15
|
+
return "https://catalyst.raga.ai/api"
|
16
|
+
|
17
|
+
@pytest.fixture
|
18
|
+
def access_keys():
|
19
|
+
return {
|
20
|
+
"access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
21
|
+
"secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY")}
|
22
|
+
|
23
|
+
@pytest.fixture
|
24
|
+
def dataset(base_url, access_keys):
|
25
|
+
"""Create evaluation instance with specific project and dataset"""
|
26
|
+
os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
|
27
|
+
catalyst = RagaAICatalyst(
|
28
|
+
access_key=access_keys["access_key"],
|
29
|
+
secret_key=access_keys["secret_key"]
|
30
|
+
)
|
31
|
+
return Dataset(project_name="prompt_metric_dataset")
|
32
|
+
|
33
|
+
def test_list_dataset(dataset) -> List[str]:
|
34
|
+
datasets = dataset.list_datasets()
|
35
|
+
return datasets
|
36
|
+
|
37
|
+
|
38
|
+
def test_get_dataset_columns(dataset) -> List[str]:
|
39
|
+
dataset_column = dataset.get_dataset_columns(dataset_name="ritika_dataset")
|
40
|
+
return dataset_column
|
41
|
+
|
42
|
+
def test_incorrect_dataset(dataset):
|
43
|
+
with pytest.raises(ValueError, match="Please enter a valid dataset name"):
|
44
|
+
dataset.get_dataset_columns(dataset_name="ritika_datset")
|
45
|
+
|
46
|
+
def test_get_schema_mapping(dataset):
|
47
|
+
schema_mapping_columns= dataset.get_schema_mapping()
|
48
|
+
return schema_mapping_columns
|
49
|
+
|
50
|
+
|
51
|
+
def test_upload_csv(dataset):
|
52
|
+
project_name = 'prompt_metric_dataset'
|
53
|
+
|
54
|
+
schema_mapping = {
|
55
|
+
'Query': 'prompt',
|
56
|
+
'Response': 'response',
|
57
|
+
'Context': 'context',
|
58
|
+
'ExpectedResponse': 'expected_response',
|
59
|
+
}
|
60
|
+
|
61
|
+
csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
|
62
|
+
|
63
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
64
|
+
dataset_name = f"schema_metric_dataset_ritika_{timestamp}"
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
dataset.create_from_csv(
|
69
|
+
csv_path=csv_path,
|
70
|
+
dataset_name=dataset_name,
|
71
|
+
schema_mapping=schema_mapping
|
72
|
+
)
|
73
|
+
|
74
|
+
def test_upload_csv_repeat_dataset(dataset):
|
75
|
+
with pytest.raises(ValueError, match="already exists"):
|
76
|
+
project_name = 'prompt_metric_dataset'
|
77
|
+
|
78
|
+
schema_mapping = {
|
79
|
+
'Query': 'prompt',
|
80
|
+
'Response': 'response',
|
81
|
+
'Context': 'context',
|
82
|
+
'ExpectedResponse': 'expected_response',
|
83
|
+
}
|
84
|
+
|
85
|
+
csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
|
86
|
+
|
87
|
+
dataset.create_from_csv(
|
88
|
+
csv_path=csv_path,
|
89
|
+
dataset_name="schema_metric_dataset_ritika_3",
|
90
|
+
schema_mapping=schema_mapping
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
def test_upload_csv_no_schema_mapping(dataset):
|
95
|
+
with pytest.raises(TypeError, match="missing 1 required positional argument"):
|
96
|
+
project_name = 'prompt_metric_dataset'
|
97
|
+
|
98
|
+
schema_mapping = {
|
99
|
+
'Query': 'prompt',
|
100
|
+
'Response': 'response',
|
101
|
+
'Context': 'context',
|
102
|
+
'ExpectedResponse': 'expected_response',
|
103
|
+
}
|
104
|
+
|
105
|
+
csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
|
106
|
+
|
107
|
+
dataset.create_from_csv(
|
108
|
+
csv_path=csv_path,
|
109
|
+
dataset_name="schema_metric_dataset_ritika_3",
|
110
|
+
)
|
111
|
+
|
112
|
+
def test_upload_csv_empty_csv_path(dataset):
|
113
|
+
with pytest.raises(FileNotFoundError, match="No such file or directory"):
|
114
|
+
project_name = 'prompt_metric_dataset'
|
115
|
+
|
116
|
+
schema_mapping = {
|
117
|
+
'Query': 'prompt',
|
118
|
+
'Response': 'response',
|
119
|
+
'Context': 'context',
|
120
|
+
'ExpectedResponse': 'expected_response',
|
121
|
+
}
|
122
|
+
|
123
|
+
csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
|
124
|
+
|
125
|
+
dataset.create_from_csv(
|
126
|
+
csv_path="",
|
127
|
+
dataset_name="schema_metric_dataset_ritika_12",
|
128
|
+
schema_mapping=schema_mapping
|
129
|
+
|
130
|
+
)
|
131
|
+
|
132
|
+
def test_upload_csv_empty_schema_mapping(dataset):
|
133
|
+
with pytest.raises(AttributeError):
|
134
|
+
project_name = 'prompt_metric_dataset'
|
135
|
+
|
136
|
+
schema_mapping = {
|
137
|
+
'Query': 'prompt',
|
138
|
+
'Response': 'response',
|
139
|
+
'Context': 'context',
|
140
|
+
'ExpectedResponse': 'expected_response',
|
141
|
+
}
|
142
|
+
|
143
|
+
csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
|
144
|
+
|
145
|
+
dataset.create_from_csv(
|
146
|
+
csv_path=csv_path,
|
147
|
+
dataset_name="schema_metric_dataset_ritika_12",
|
148
|
+
schema_mapping=""
|
149
|
+
|
150
|
+
)
|
151
|
+
|
152
|
+
|
153
|
+
def test_upload_csv_invalid_schema(dataset):
|
154
|
+
with pytest.raises(ValueError, match="Invalid schema mapping provided"):
|
155
|
+
|
156
|
+
project_name = 'prompt_metric_dataset'
|
157
|
+
|
158
|
+
schema_mapping={
|
159
|
+
'prompt': 'prompt',
|
160
|
+
'response': 'response',
|
161
|
+
'chatId': 'chatId',
|
162
|
+
'chatSequence': 'chatSequence'
|
163
|
+
}
|
164
|
+
|
165
|
+
csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
|
166
|
+
|
167
|
+
dataset.create_from_csv(
|
168
|
+
csv_path=csv_path,
|
169
|
+
dataset_name="schema_metric_dataset_ritika_12",
|
170
|
+
schema_mapping=schema_mapping)
|
@@ -0,0 +1,503 @@
|
|
1
|
+
|
2
|
+
from unittest.mock import patch
|
3
|
+
import time
|
4
|
+
import pytest
|
5
|
+
import os
|
6
|
+
import dotenv
|
7
|
+
dotenv.load_dotenv()
|
8
|
+
import pandas as pd
|
9
|
+
from datetime import datetime
|
10
|
+
from typing import Dict, List
|
11
|
+
from ragaai_catalyst import Evaluation, RagaAICatalyst
|
12
|
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
13
|
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
14
|
+
# Define model configurations
|
15
|
+
MODEL_CONFIGS = [
|
16
|
+
# OpenAI Models
|
17
|
+
{
|
18
|
+
"provider": "openai",
|
19
|
+
"model": "gpt-4",
|
20
|
+
"suffix": "gpt4"
|
21
|
+
},
|
22
|
+
{
|
23
|
+
"provider": "openai",
|
24
|
+
"model": "gpt-4o",
|
25
|
+
"suffix": "gpt4o"
|
26
|
+
},
|
27
|
+
{
|
28
|
+
"provider": "openai",
|
29
|
+
"model": "gpt-4o-mini",
|
30
|
+
"suffix": "gpt4o_mini"
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"provider": "openai",
|
34
|
+
"model": "gpt-3.5-turbo",
|
35
|
+
"suffix": "gpt35"
|
36
|
+
},
|
37
|
+
# Gemini Models
|
38
|
+
{
|
39
|
+
"provider": "gemini",
|
40
|
+
"model": "gemini-1.5-flash",
|
41
|
+
"suffix": "gemini15_flash"
|
42
|
+
},
|
43
|
+
{
|
44
|
+
"provider": "gemini",
|
45
|
+
"model": "gemini-1.5-pro",
|
46
|
+
"suffix": "gemini15_pro"
|
47
|
+
},
|
48
|
+
# Azure OpenAI Models
|
49
|
+
{
|
50
|
+
"provider": "azure",
|
51
|
+
"model": "gpt-4",
|
52
|
+
"suffix": "azure_gpt4"
|
53
|
+
},
|
54
|
+
{
|
55
|
+
"provider": "azure",
|
56
|
+
"model": "gpt-35-turbo",
|
57
|
+
"suffix": "azure_gpt35"
|
58
|
+
}
|
59
|
+
]
|
60
|
+
|
61
|
+
@pytest.fixture
|
62
|
+
def base_url():
|
63
|
+
return "https://catalyst.raga.ai/api"
|
64
|
+
|
65
|
+
@pytest.fixture
|
66
|
+
def access_keys():
|
67
|
+
return {
|
68
|
+
"access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
69
|
+
"secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY")}
|
70
|
+
|
71
|
+
|
72
|
+
@pytest.fixture
|
73
|
+
def evaluation(base_url, access_keys):
|
74
|
+
"""Create evaluation instance with specific project and dataset"""
|
75
|
+
os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
|
76
|
+
catalyst = RagaAICatalyst(
|
77
|
+
access_key=access_keys["access_key"],
|
78
|
+
secret_key=access_keys["secret_key"]
|
79
|
+
)
|
80
|
+
return Evaluation(project_name="prompt_metric_dataset", dataset_name="ritika_dataset")
|
81
|
+
|
82
|
+
@pytest.fixture
|
83
|
+
def chat_evaluation(base_url, access_keys):
|
84
|
+
"""Create evaluation instance with specific project and dataset"""
|
85
|
+
os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
|
86
|
+
catalyst = RagaAICatalyst(
|
87
|
+
access_key=access_keys["access_key"],
|
88
|
+
secret_key=access_keys["secret_key"]
|
89
|
+
)
|
90
|
+
return Evaluation(project_name="chat_demo_sk_v1", dataset_name="chat_metric_dataset_ritika")
|
91
|
+
|
92
|
+
def test_evaluation_initialization(evaluation):
|
93
|
+
"""Test if evaluation is initialized correctly"""
|
94
|
+
assert evaluation.project_name == "prompt_metric_dataset"
|
95
|
+
assert evaluation.dataset_name == "ritika_dataset"
|
96
|
+
assert evaluation.base_url == "https://catalyst.raga.ai/api"
|
97
|
+
assert evaluation.timeout == 10
|
98
|
+
assert evaluation.jobId is None
|
99
|
+
|
100
|
+
def test_project_does_not_exist():
|
101
|
+
"""Test initialization with non-existent project"""
|
102
|
+
with pytest.raises(ValueError, match="Project not found. Please enter a valid project name"):
|
103
|
+
Evaluation(project_name="non_existent_project", dataset_name="prompt_metric_dataset_v1")
|
104
|
+
|
105
|
+
def test_dataset_does_not_exist():
|
106
|
+
"""Test initialization with non-existent dataset"""
|
107
|
+
with pytest.raises(ValueError, match="Dataset not found. Please enter a valid dataset name"):
|
108
|
+
Evaluation(project_name="prompt_metric_dataset", dataset_name="non_existent_dataset")
|
109
|
+
|
110
|
+
def test_list_metrics(evaluation) -> List[str]:
|
111
|
+
"""Test if it lists all the metrics correctly"""
|
112
|
+
metrics = evaluation.list_metrics()
|
113
|
+
return metrics
|
114
|
+
|
115
|
+
@pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
|
116
|
+
def test_invalid_schema_mapping(evaluation, provider_config):
|
117
|
+
"""Wrong schema mapping for different providers"""
|
118
|
+
with pytest.raises(ValueError, match="Map"):
|
119
|
+
schema_mapping={
|
120
|
+
'Query': 'Prompt',
|
121
|
+
'Context': 'Context',
|
122
|
+
}
|
123
|
+
metrics = [{
|
124
|
+
"name": "Hallucination",
|
125
|
+
"config": {
|
126
|
+
"model": provider_config["model"],
|
127
|
+
"provider": provider_config["provider"]
|
128
|
+
},
|
129
|
+
"column_name": f"Hallucination_{provider_config['suffix']}",
|
130
|
+
"schema_mapping": schema_mapping
|
131
|
+
}]
|
132
|
+
evaluation.add_metrics(metrics=metrics)
|
133
|
+
|
134
|
+
@pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
|
135
|
+
def test_missing_schema_mapping(evaluation, provider_config):
|
136
|
+
"""schema_mapping not present for different providers"""
|
137
|
+
with pytest.raises(ValueError, match="{'schema_mapping'} required for each metric evaluation."):
|
138
|
+
metrics = [{
|
139
|
+
"name": "Hallucination",
|
140
|
+
"config": {
|
141
|
+
"model": provider_config["model"],
|
142
|
+
"provider": provider_config["provider"]
|
143
|
+
},
|
144
|
+
"column_name": f"Hallucination_{provider_config['suffix']}"
|
145
|
+
}]
|
146
|
+
evaluation.add_metrics(metrics=metrics)
|
147
|
+
|
148
|
+
@pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
|
149
|
+
def test_missing_column_name(evaluation, provider_config):
|
150
|
+
"""column_name not present for different providers"""
|
151
|
+
with pytest.raises(ValueError, match="{'column_name'} required for each metric evaluation."):
|
152
|
+
schema_mapping={
|
153
|
+
'Query': 'Prompt',
|
154
|
+
'Response': 'Response',
|
155
|
+
'Context': 'Context',
|
156
|
+
}
|
157
|
+
metrics = [{
|
158
|
+
"name": "Hallucination",
|
159
|
+
"config": {
|
160
|
+
"model": provider_config["model"],
|
161
|
+
"provider": provider_config["provider"]
|
162
|
+
},
|
163
|
+
"schema_mapping": schema_mapping
|
164
|
+
}]
|
165
|
+
evaluation.add_metrics(metrics=metrics)
|
166
|
+
|
167
|
+
@pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
|
168
|
+
def test_missing_metric_name(evaluation, provider_config):
|
169
|
+
"""metric name missing for different providers"""
|
170
|
+
with pytest.raises(ValueError, match="{'name'} required for each metric evaluation."):
|
171
|
+
schema_mapping={
|
172
|
+
'Query': 'Prompt',
|
173
|
+
'Response': 'Response',
|
174
|
+
'Context': 'Context',
|
175
|
+
}
|
176
|
+
metrics = [{
|
177
|
+
"config": {
|
178
|
+
"model": provider_config["model"],
|
179
|
+
"provider": provider_config["provider"]
|
180
|
+
},
|
181
|
+
"column_name": f"Hallucination_{provider_config['suffix']}",
|
182
|
+
"schema_mapping": schema_mapping
|
183
|
+
}]
|
184
|
+
evaluation.add_metrics(metrics=metrics)
|
185
|
+
|
186
|
+
@pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
|
187
|
+
def test_column_name_already_exists(evaluation, provider_config):
|
188
|
+
"""Column name already exists for different providers"""
|
189
|
+
with pytest.raises(ValueError, match="already exists."):
|
190
|
+
schema_mapping={
|
191
|
+
'Query': 'Prompt',
|
192
|
+
'Response': 'Response',
|
193
|
+
'Context': 'Context',
|
194
|
+
}
|
195
|
+
metrics = [{
|
196
|
+
"name": "Hallucination",
|
197
|
+
"config": {
|
198
|
+
"model": provider_config["model"],
|
199
|
+
"provider": provider_config["provider"]
|
200
|
+
},
|
201
|
+
"column_name": "Hallucination_column3",
|
202
|
+
"schema_mapping": schema_mapping
|
203
|
+
}]
|
204
|
+
evaluation.add_metrics(metrics=metrics)
|
205
|
+
|
206
|
+
def test_missing_config(evaluation):
|
207
|
+
with pytest.raises(ValueError, match="{'config'} required for each metric evaluation."):
|
208
|
+
schema_mapping={
|
209
|
+
'Query': 'Prompt',
|
210
|
+
'Response': 'Response',
|
211
|
+
'Context': 'Context',
|
212
|
+
}
|
213
|
+
metrics = [{"name": "Hallucination", "column_name": "Hallucination5", "schema_mapping": schema_mapping}]
|
214
|
+
evaluation.add_metrics(metrics=metrics)
|
215
|
+
|
216
|
+
|
217
|
+
|
218
|
+
@pytest.mark.parametrize("metric_name", ['Hallucination',
|
219
|
+
'Faithfulness',
|
220
|
+
'SQL Prompt Injection',
|
221
|
+
'Response Correctness',
|
222
|
+
'Response Completeness',
|
223
|
+
'False Refusal',
|
224
|
+
'Context Precision',
|
225
|
+
'Context Recall',
|
226
|
+
'Context Relevancy'
|
227
|
+
'SQL Response Correctness',
|
228
|
+
'SQL Prompt Ambiguity',
|
229
|
+
'SQL Context Sufficiency',
|
230
|
+
'SQL Context Ambiguity'])
|
231
|
+
|
232
|
+
def test_metric_initialization_gemini(evaluation, metric_name: str,capfd):
|
233
|
+
"""Test if adding each metric and tracking its completion works correctly"""
|
234
|
+
schema_mapping = {
|
235
|
+
'Query': 'prompt',
|
236
|
+
'Response': 'response',
|
237
|
+
'Context': 'context',
|
238
|
+
'ExpectedResponse': 'expectedresponse',
|
239
|
+
}
|
240
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS
|
241
|
+
metrics = [{
|
242
|
+
"name": metric_name,
|
243
|
+
"config": {
|
244
|
+
"model": "gemini-1.5-flash",
|
245
|
+
"provider": "gemini"
|
246
|
+
},
|
247
|
+
"column_name": f"{metric_name}_column_{timestamp}",
|
248
|
+
"schema_mapping": schema_mapping
|
249
|
+
}]
|
250
|
+
|
251
|
+
# Add metrics and capture the printed output
|
252
|
+
evaluation.add_metrics(metrics=metrics)
|
253
|
+
out, err = capfd.readouterr()
|
254
|
+
print(f"Add metrics output: {out}") # Debug print
|
255
|
+
|
256
|
+
# Verify the success message for metric addition
|
257
|
+
assert "Metric Evaluation Job scheduled successfully" in out, f"Failed to schedule job for metric: {metric_name}"
|
258
|
+
|
259
|
+
# Store the jobId for status checking
|
260
|
+
assert evaluation.jobId is not None, "Job ID was not set after adding metrics"
|
261
|
+
print(f"Job ID: {evaluation.jobId}") # Debug print
|
262
|
+
|
263
|
+
# Check job status with timeout
|
264
|
+
max_wait_time = 180 # Increased timeout to 3 minutes
|
265
|
+
poll_interval = 5 # Check every 5 seconds
|
266
|
+
start_time = time.time()
|
267
|
+
status_checked = False
|
268
|
+
last_status = None
|
269
|
+
|
270
|
+
print(f"Starting job status checks for {metric_name}...") # Debug print
|
271
|
+
|
272
|
+
while (time.time() - start_time) < max_wait_time:
|
273
|
+
try:
|
274
|
+
evaluation.get_status()
|
275
|
+
out, err = capfd.readouterr()
|
276
|
+
print(f"Status check output: {out}") # Debug print
|
277
|
+
|
278
|
+
if "Job completed" in out:
|
279
|
+
status_checked = True
|
280
|
+
print(f"Job completed for {metric_name}") # Debug print
|
281
|
+
break
|
282
|
+
|
283
|
+
if "Job failed" in out:
|
284
|
+
pytest.fail(f"Job failed for metric: {metric_name}")
|
285
|
+
|
286
|
+
last_status = out
|
287
|
+
time.sleep(poll_interval)
|
288
|
+
|
289
|
+
except Exception as e:
|
290
|
+
print(f"Error checking status: {str(e)}") # Debug print
|
291
|
+
time.sleep(poll_interval)
|
292
|
+
|
293
|
+
if not status_checked:
|
294
|
+
print(f"Last known status: {last_status}") # Debug print
|
295
|
+
if last_status and "In Progress" in last_status:
|
296
|
+
pytest.skip(f"Job still in progress after {max_wait_time} seconds for {metric_name}. This is not a failure, but took longer than expected.")
|
297
|
+
else:
|
298
|
+
assert False, f"Job did not complete within {max_wait_time} seconds for metric: {metric_name}. Last status: {last_status}"
|
299
|
+
|
300
|
+
# Only check results if the job completed successfully
|
301
|
+
if status_checked:
|
302
|
+
try:
|
303
|
+
results = evaluation.get_results()
|
304
|
+
assert isinstance(results, pd.DataFrame), "Results should be returned as a DataFrame"
|
305
|
+
assert not results.empty, "Results DataFrame should not be empty"
|
306
|
+
column_name = f"{metric_name}_column25"
|
307
|
+
assert column_name in results.columns, f"Expected column {column_name} not found in results. Available columns: {results.columns.tolist()}"
|
308
|
+
except Exception as e:
|
309
|
+
pytest.fail(f"Error getting results for {metric_name} with provider: gemini and model: gemini-1.5-flash: {str(e)}")
|
310
|
+
|
311
|
+
|
312
|
+
|
313
|
+
@pytest.mark.parametrize("metric_name", ['Hallucination',
|
314
|
+
'Faithfulness',
|
315
|
+
'SQL Prompt Injection',
|
316
|
+
'Response Correctness',
|
317
|
+
'Response Completeness',
|
318
|
+
'False Refusal',
|
319
|
+
'Context Precision',
|
320
|
+
'Context Recall',
|
321
|
+
'Context Relevancy',
|
322
|
+
'SQL Response Correctness',
|
323
|
+
'SQL Prompt Ambiguity',
|
324
|
+
'SQL Context Sufficiency',
|
325
|
+
'SQL Context Ambiguity'])
|
326
|
+
|
327
|
+
def test_metric_initialization_openai(evaluation, metric_name: str,capfd):
|
328
|
+
"""Test if adding each metric and tracking its completion works correctly"""
|
329
|
+
schema_mapping = {
|
330
|
+
'Query': 'prompt',
|
331
|
+
'Response': 'response',
|
332
|
+
'Context': 'context',
|
333
|
+
'ExpectedResponse': 'expectedresponse',
|
334
|
+
}
|
335
|
+
|
336
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS
|
337
|
+
|
338
|
+
metrics = [{
|
339
|
+
"name": metric_name,
|
340
|
+
"config": {
|
341
|
+
"model": "gpt-4o-mini",
|
342
|
+
"provider": "openai"
|
343
|
+
},
|
344
|
+
"column_name": f"{metric_name}_column_{timestamp}",
|
345
|
+
"schema_mapping": schema_mapping
|
346
|
+
}]
|
347
|
+
|
348
|
+
# Add metrics and capture the printed output
|
349
|
+
evaluation.add_metrics(metrics=metrics)
|
350
|
+
out, err = capfd.readouterr()
|
351
|
+
print(f"Add metrics output: {out}") # Debug print
|
352
|
+
|
353
|
+
# Verify the success message for metric addition
|
354
|
+
assert "Metric Evaluation Job scheduled successfully" in out, f"Failed to schedule job for metric: {metric_name}"
|
355
|
+
|
356
|
+
# Store the jobId for status checking
|
357
|
+
assert evaluation.jobId is not None, "Job ID was not set after adding metrics"
|
358
|
+
print(f"Job ID: {evaluation.jobId}") # Debug print
|
359
|
+
|
360
|
+
# Check job status with timeout
|
361
|
+
max_wait_time = 300 # Increased timeout to 3 minutes
|
362
|
+
poll_interval = 5 # Check every 5 seconds
|
363
|
+
start_time = time.time()
|
364
|
+
status_checked = False
|
365
|
+
last_status = None
|
366
|
+
|
367
|
+
print(f"Starting job status checks for {metric_name}...") # Debug print
|
368
|
+
|
369
|
+
while (time.time() - start_time) < max_wait_time:
|
370
|
+
try:
|
371
|
+
evaluation.get_status()
|
372
|
+
out, err = capfd.readouterr()
|
373
|
+
print(f"Status check output: {out}") # Debug print
|
374
|
+
|
375
|
+
if "Job completed" in out:
|
376
|
+
status_checked = True
|
377
|
+
print(f"Job completed for {metric_name}") # Debug print
|
378
|
+
break
|
379
|
+
|
380
|
+
if "Job failed" in out:
|
381
|
+
pytest.fail(f"Job failed for metric: {metric_name}")
|
382
|
+
|
383
|
+
last_status = out
|
384
|
+
time.sleep(poll_interval)
|
385
|
+
|
386
|
+
except Exception as e:
|
387
|
+
print(f"Error checking status: {str(e)}") # Debug print
|
388
|
+
time.sleep(poll_interval)
|
389
|
+
|
390
|
+
if not status_checked:
|
391
|
+
print(f"Last known status: {last_status}") # Debug print
|
392
|
+
if last_status and "In Progress" in last_status:
|
393
|
+
pytest.skip(f"Job still in progress after {max_wait_time} seconds for {metric_name}. This is not a failure, but took longer than expected.")
|
394
|
+
else:
|
395
|
+
assert False, f"Job did not complete within {max_wait_time} seconds for metric: {metric_name}. Last status: {last_status}"
|
396
|
+
|
397
|
+
# Only check results if the job completed successfully
|
398
|
+
if status_checked:
|
399
|
+
try:
|
400
|
+
results = evaluation.get_results()
|
401
|
+
assert isinstance(results, pd.DataFrame), "Results should be returned as a DataFrame"
|
402
|
+
assert not results.empty, "Results DataFrame should not be empty"
|
403
|
+
column_name = f"{metric_name}_column26"
|
404
|
+
assert column_name in results.columns, f"Expected column {column_name} not found in results. Available columns: {results.columns.tolist()}"
|
405
|
+
except Exception as e:
|
406
|
+
pytest.fail(f"Error getting results for {metric_name} with provider: oprnai and model: gpt-4o-mini: {str(e)}")
|
407
|
+
|
408
|
+
|
409
|
+
|
410
|
+
# Add a counter to keep track of the test iterations
|
411
|
+
counter = 30
|
412
|
+
|
413
|
+
@pytest.mark.parametrize("metric_name", ['Agent Quality',
|
414
|
+
'User Chat Quality',
|
415
|
+
'Instruction Adherence'])
|
416
|
+
@pytest.mark.parametrize("model_config", [
|
417
|
+
{"model": "gpt-4o-mini", "provider": "openai"},
|
418
|
+
{"model": "gpt-4", "provider": "openai"},
|
419
|
+
{"model": "gpt-3.5-turbo", "provider": "openai"},
|
420
|
+
{"model":"gemini-1.5-flash", "provider": "gemini"}
|
421
|
+
])
|
422
|
+
def test_metric_initialization_openai_chatmetric(chat_evaluation, model_config, metric_name: str, capfd):
|
423
|
+
"""Test if adding each metric and tracking its completion works correctly"""
|
424
|
+
global counter # Use the global counter
|
425
|
+
schema_mapping = {
|
426
|
+
'ChatID': 'ChatID',
|
427
|
+
'Chat': 'Chat',
|
428
|
+
'Instructions': 'Instructions',
|
429
|
+
'System Prompt': 'systemprompt',
|
430
|
+
}
|
431
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS
|
432
|
+
metrics = [{
|
433
|
+
"name": metric_name,
|
434
|
+
"config": model_config,
|
435
|
+
"column_name": f"{metric_name}_column_{timestamp}_{counter}", # Use counter for unique column name
|
436
|
+
"schema_mapping": schema_mapping
|
437
|
+
}]
|
438
|
+
|
439
|
+
# Increment the counter after each test
|
440
|
+
counter += 1
|
441
|
+
|
442
|
+
# Add metrics and capture the printed output
|
443
|
+
chat_evaluation.add_metrics(metrics=metrics)
|
444
|
+
out, err = capfd.readouterr()
|
445
|
+
print(f"Add metrics output: {out}") # Debug print
|
446
|
+
|
447
|
+
# Verify the success message for metric addition
|
448
|
+
assert "Metric Evaluation Job scheduled successfully" in out, f"Failed to schedule job for metric: {metric_name} and {model_config}"
|
449
|
+
|
450
|
+
# Store the jobId for status checking
|
451
|
+
assert chat_evaluation.jobId is not None, "Job ID was not set after adding metrics"
|
452
|
+
print(f"Job ID: {chat_evaluation.jobId}") # Debug print
|
453
|
+
|
454
|
+
# Check job status with timeout
|
455
|
+
max_wait_time = 600 # Increased timeout to 3 minutes
|
456
|
+
poll_interval = 5 # Check every 5 seconds
|
457
|
+
start_time = time.time()
|
458
|
+
status_checked = False
|
459
|
+
last_status = None
|
460
|
+
|
461
|
+
print(f"Starting job status checks for {metric_name}...") # Debug print
|
462
|
+
|
463
|
+
while (time.time() - start_time) < max_wait_time:
|
464
|
+
try:
|
465
|
+
chat_evaluation.get_status()
|
466
|
+
out, err = capfd.readouterr()
|
467
|
+
print(f"Status check output: {out}") # Debug print
|
468
|
+
|
469
|
+
if "Job completed" in out:
|
470
|
+
status_checked = True
|
471
|
+
print(f"Job completed for {metric_name}") # Debug print
|
472
|
+
break
|
473
|
+
|
474
|
+
if "Job failed" in out:
|
475
|
+
pytest.fail(f"Job failed for metric: {metric_name}{model_config}")
|
476
|
+
|
477
|
+
last_status = out
|
478
|
+
time.sleep(poll_interval)
|
479
|
+
|
480
|
+
except Exception as e:
|
481
|
+
print(f"Error checking status: {str(e)}") # Debug print
|
482
|
+
time.sleep(poll_interval)
|
483
|
+
|
484
|
+
if not status_checked:
|
485
|
+
print(f"Last known status: {last_status}") # Debug print
|
486
|
+
if last_status and "In Progress" in last_status:
|
487
|
+
pytest.skip(f"Job still in progress after {max_wait_time} seconds {model_config} for {metric_name}. This is not a failure, but took longer than expected.")
|
488
|
+
else:
|
489
|
+
assert False, f"Job did not complete within {max_wait_time} seconds {model_config} for metric: {metric_name}. Last status: {last_status}"
|
490
|
+
|
491
|
+
# Only check results if the job completed successfully
|
492
|
+
if status_checked:
|
493
|
+
try:
|
494
|
+
results = chat_evaluation.get_results()
|
495
|
+
assert isinstance(results, pd.DataFrame), "Results should be returned as a DataFrame"
|
496
|
+
assert not results.empty, "Results DataFrame should not be empty"
|
497
|
+
column_name = f"{metric_name}_column_{counter - 1}" # Use the last counter value
|
498
|
+
assert column_name in results.columns, f"Expected column {column_name} not found in results. Available columns: {results.columns.tolist()}"
|
499
|
+
except Exception as e:
|
500
|
+
pytest.fail(f"Error getting results for {metric_name} with {model_config}: {str(e)}")
|
501
|
+
|
502
|
+
|
503
|
+
|
@@ -0,0 +1,88 @@
|
|
1
|
+
import os
|
2
|
+
import pytest
|
3
|
+
import copy
|
4
|
+
from ragaai_catalyst import PromptManager, RagaAICatalyst
|
5
|
+
import dotenv
|
6
|
+
import openai
|
7
|
+
dotenv.load_dotenv()
|
8
|
+
|
9
|
+
|
10
|
+
@pytest.fixture
|
11
|
+
def base_url():
|
12
|
+
return "https://catalyst.raga.ai/api"
|
13
|
+
|
14
|
+
@pytest.fixture
|
15
|
+
def access_keys():
|
16
|
+
return {
|
17
|
+
"access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
|
18
|
+
"secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY")}
|
19
|
+
|
20
|
+
|
21
|
+
@pytest.fixture
|
22
|
+
def prompt_manager(base_url, access_keys):
|
23
|
+
"""Create evaluation instance with specific project and dataset"""
|
24
|
+
os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
|
25
|
+
catalyst = RagaAICatalyst(
|
26
|
+
access_key=access_keys["access_key"],
|
27
|
+
secret_key=access_keys["secret_key"]
|
28
|
+
)
|
29
|
+
return PromptManager(project_name="prompt_metric_dataset")
|
30
|
+
|
31
|
+
def test_prompt_initialistaion(prompt_manager):
|
32
|
+
prompt_list= prompt_manager.list_prompts()
|
33
|
+
assert prompt_list ==['test','test2']
|
34
|
+
|
35
|
+
def test_list_prompt_version(prompt_manager):
|
36
|
+
prompt_version_list = prompt_manager.list_prompt_versions(prompt_name="test2")
|
37
|
+
assert len(prompt_version_list.keys()) == 2
|
38
|
+
|
39
|
+
def test_missing_prompt_name(prompt_manager):
|
40
|
+
with pytest.raises(ValueError, match="Please enter a valid prompt name"):
|
41
|
+
prompt = prompt_manager.get_prompt(prompt_name="", version="v1")
|
42
|
+
|
43
|
+
def test_get_variable(prompt_manager):
|
44
|
+
prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
|
45
|
+
prompt_variable = prompt.get_variables()
|
46
|
+
assert prompt_variable == ['system1', 'system2']
|
47
|
+
|
48
|
+
def test_get_model_parameters(prompt_manager):
|
49
|
+
prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
|
50
|
+
model_parameter = prompt.get_model_parameters()
|
51
|
+
assert model_parameter== {'frequency_penalty': 0.4,'max_tokens': 1038,'presence_penalty': 0.1,'temperature': 0.7,'model': 'gpt-4o-mini'}
|
52
|
+
|
53
|
+
def test_compile_prompt(prompt_manager):
|
54
|
+
prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
|
55
|
+
compiled_prompt = prompt.compile(
|
56
|
+
system1='What is chocolate?',
|
57
|
+
system2 = "How it is made")
|
58
|
+
def get_openai_response(prompt):
|
59
|
+
client = openai.OpenAI()
|
60
|
+
response = client.chat.completions.create(
|
61
|
+
model="gpt-4o-mini",
|
62
|
+
messages=prompt
|
63
|
+
)
|
64
|
+
return response.choices[0].message.content
|
65
|
+
get_openai_response(compiled_prompt)
|
66
|
+
|
67
|
+
def test_compile_prompt_no_modelname(prompt_manager):
|
68
|
+
with pytest.raises(openai.BadRequestError,match="you must provide a model parameter"):
|
69
|
+
|
70
|
+
prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
|
71
|
+
compiled_prompt = prompt.compile(
|
72
|
+
system1='What is chocolate?',
|
73
|
+
system2 = "How it is made")
|
74
|
+
def get_openai_response(prompt):
|
75
|
+
client = openai.OpenAI()
|
76
|
+
response = client.chat.completions.create(
|
77
|
+
model="",
|
78
|
+
messages=prompt
|
79
|
+
)
|
80
|
+
return response.choices[0].message.content
|
81
|
+
get_openai_response(compiled_prompt)
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
@@ -0,0 +1,151 @@
|
|
1
|
+
import sys
|
2
|
+
# sys.path.append('/Users/ritikagoel/workspace/synthetic-catalyst-internal-api2/ragaai-catalyst')
|
3
|
+
|
4
|
+
import pytest
|
5
|
+
from ragaai_catalyst import SyntheticDataGeneration
|
6
|
+
import os
|
7
|
+
|
8
|
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
9
|
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
10
|
+
|
11
|
+
@pytest.fixture
|
12
|
+
def synthetic_gen():
|
13
|
+
return SyntheticDataGeneration()
|
14
|
+
|
15
|
+
@pytest.fixture
|
16
|
+
def sample_text(synthetic_gen):
|
17
|
+
text_file = "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/ai_document_061023_2.pdf" # Update this path as needed
|
18
|
+
return synthetic_gen.process_document(input_data=text_file)
|
19
|
+
|
20
|
+
def test_invalid_csv_processing(synthetic_gen):
|
21
|
+
"""Test processing an invalid CSV file"""
|
22
|
+
with pytest.raises(Exception):
|
23
|
+
synthetic_gen.process_document(input_data="/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/OG1.csv")
|
24
|
+
|
25
|
+
def test_special_chars_csv_processing(synthetic_gen):
|
26
|
+
"""Test processing CSV with special characters"""
|
27
|
+
with pytest.raises(Exception):
|
28
|
+
synthetic_gen.process_document(input_data="/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/OG1.csv")
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
def test_missing_llm_proxy(synthetic_gen, sample_text):
|
33
|
+
"""Test behavior when internal_llm_proxy is not provided"""
|
34
|
+
print('-'*10)
|
35
|
+
print(OPENAI_API_KEY)
|
36
|
+
print('-'*10)
|
37
|
+
with pytest.raises(ValueError, match="API key must be provided"):
|
38
|
+
synthetic_gen.generate_qna(
|
39
|
+
text=sample_text,
|
40
|
+
question_type='mcq',
|
41
|
+
model_config={"provider": "openai", "model": "gpt-4o-mini"},
|
42
|
+
n=20,
|
43
|
+
user_id="1"
|
44
|
+
)
|
45
|
+
|
46
|
+
def test_llm_proxy(synthetic_gen, sample_text):
|
47
|
+
result = synthetic_gen.generate_qna(
|
48
|
+
text=sample_text,
|
49
|
+
question_type='mcq',
|
50
|
+
model_config={"provider": "gemini", "model": "gemini-1.5-flash"},
|
51
|
+
n=15,
|
52
|
+
internal_llm_proxy="http://4.247.138.221:4000/chat/completions",
|
53
|
+
user_id="1"
|
54
|
+
)
|
55
|
+
assert len(result) == 15
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
def test_invalid_llm_proxy(synthetic_gen, sample_text):
|
60
|
+
"""Test behavior with invalid internal_llm_proxy URL"""
|
61
|
+
with pytest.raises(Exception, match="No connection adapters were found for"):
|
62
|
+
synthetic_gen.generate_qna(
|
63
|
+
text=sample_text,
|
64
|
+
question_type='mcq',
|
65
|
+
model_config={"provider": "openai", "model": "gpt-4o-mini"},
|
66
|
+
n=2,
|
67
|
+
internal_llm_proxy="tp://invalid.url",
|
68
|
+
user_id="1"
|
69
|
+
)
|
70
|
+
|
71
|
+
def test_missing_model_config(synthetic_gen, sample_text):
|
72
|
+
"""Test behavior when model_config is not provided"""
|
73
|
+
with pytest.raises(ValueError, match="Model configuration must be provided with a valid provider and model"):
|
74
|
+
synthetic_gen.generate_qna(
|
75
|
+
text=sample_text,
|
76
|
+
question_type='mcq',
|
77
|
+
n=2,
|
78
|
+
internal_llm_proxy="http://20.244.126.4:4000/chat/completions",
|
79
|
+
user_id="1"
|
80
|
+
)
|
81
|
+
|
82
|
+
def test_missing_api_key_for_external_provider(synthetic_gen, sample_text):
|
83
|
+
"""Test behavior when API key is missing for external provider"""
|
84
|
+
with pytest.raises(ValueError, match="API key must be provided"):
|
85
|
+
synthetic_gen.generate_qna(
|
86
|
+
text=sample_text,
|
87
|
+
question_type='mcq',
|
88
|
+
model_config={"provider": "gemini", "model": "gemini/gemini-1.5-flash"},
|
89
|
+
n=5
|
90
|
+
)
|
91
|
+
|
92
|
+
def test_invalid_api_key(synthetic_gen, sample_text):
|
93
|
+
"""Test behavior with invalid API key"""
|
94
|
+
with pytest.raises(Exception, match="Failed to generate valid response after 3 attempts: Invalid API key provided"):
|
95
|
+
synthetic_gen.generate_qna(
|
96
|
+
text=sample_text,
|
97
|
+
question_type='mcq',
|
98
|
+
model_config={"provider": "gemini", "model": "gemini/gemini-1.5-flash"},
|
99
|
+
n=5,
|
100
|
+
api_key='invalid_key'
|
101
|
+
)
|
102
|
+
|
103
|
+
def test_default_question_count(synthetic_gen, sample_text):
|
104
|
+
"""Test default number of questions when n is not provided"""
|
105
|
+
result = synthetic_gen.generate_qna(
|
106
|
+
text=sample_text,
|
107
|
+
question_type='mcq',
|
108
|
+
model_config={"provider": "openai", "model": "gpt-4o-mini"},
|
109
|
+
internal_llm_proxy="http://4.247.138.221:4000/chat/completions",
|
110
|
+
user_id="1"
|
111
|
+
)
|
112
|
+
assert len(result) == 5 # Default should be 5 questions
|
113
|
+
|
114
|
+
def test_default_question_type(synthetic_gen, sample_text):
|
115
|
+
"""Test default question type when question_type is not provided"""
|
116
|
+
result = synthetic_gen.generate_qna(
|
117
|
+
text=sample_text,
|
118
|
+
model_config={"provider": "openai", "model": "gpt-4o-mini"},
|
119
|
+
n=5,
|
120
|
+
internal_llm_proxy="http://20.244.126.4:4000/chat/completions",
|
121
|
+
user_id="1"
|
122
|
+
)
|
123
|
+
# Verify result contains simple Q/A format without multiple choice options
|
124
|
+
assert all('options' not in qa for qa in result)
|
125
|
+
|
126
|
+
def test_question_count_matches_n(synthetic_gen, sample_text):
|
127
|
+
"""Test if number of generated questions matches n"""
|
128
|
+
n = 2
|
129
|
+
result = synthetic_gen.generate_qna(
|
130
|
+
text=sample_text,
|
131
|
+
question_type='mcq',
|
132
|
+
model_config={"provider": "openai", "model": "gpt-4o-mini"},
|
133
|
+
n=n,
|
134
|
+
internal_llm_proxy="http://4.247.138.221:4000/chat/completions",
|
135
|
+
user_id="1"
|
136
|
+
)
|
137
|
+
assert len(result) == n
|
138
|
+
|
139
|
+
def test_proxy_call_check(synthetic_gen,sample_text):
|
140
|
+
"""Test compatibility when proxy script called"""
|
141
|
+
|
142
|
+
result = synthetic_gen.generate_qna(
|
143
|
+
text=sample_text,
|
144
|
+
question_type='simple',
|
145
|
+
model_config={"provider": "gemini", "model": "gemini-1.5-flash", "api_base": "http://172.172.11.158:8000/v1alpha1/v1alpha1/predictions"},
|
146
|
+
n=5
|
147
|
+
)
|
148
|
+
assert len(result) == 5
|
149
|
+
|
150
|
+
|
151
|
+
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/examples/prompt_management_litellm.ipynb
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/internal_api_completion.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/synthetic_data_generation.py
RENAMED
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/llamaindex_callback.py
RENAMED
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/utils/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|