ragaai-catalyst 2.0.7b1__tar.gz → 2.0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {ragaai_catalyst-2.0.7b1/ragaai_catalyst.egg-info → ragaai_catalyst-2.0.7.1}/PKG-INFO +2 -2
  2. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/pyproject.toml +2 -2
  3. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1/ragaai_catalyst.egg-info}/PKG-INFO +2 -2
  4. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/SOURCES.txt +6 -1
  5. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/requires.txt +1 -1
  6. ragaai_catalyst-2.0.7.1/test/test_catalyst/test_configuration.py +199 -0
  7. ragaai_catalyst-2.0.7.1/test/test_catalyst/test_dataset.py +170 -0
  8. ragaai_catalyst-2.0.7.1/test/test_catalyst/test_evaluation.py +503 -0
  9. ragaai_catalyst-2.0.7.1/test/test_catalyst/test_prompt_manager.py +88 -0
  10. ragaai_catalyst-2.0.7.1/test/test_catalyst/test_synthetic_data_generation.py +151 -0
  11. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/.gitignore +0 -0
  12. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/README.md +0 -0
  13. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/__init__.py +0 -0
  14. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/docs/dataset_management.md +0 -0
  15. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/docs/prompt_management.md +0 -0
  16. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/examples/prompt_management_litellm.ipynb +0 -0
  17. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/examples/prompt_management_openai.ipynb +0 -0
  18. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/__init__.py +0 -0
  19. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/_version.py +0 -0
  20. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/dataset.py +0 -0
  21. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/evaluation.py +0 -0
  22. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/experiment.py +0 -0
  23. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/guard_executor.py +0 -0
  24. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/guardrails_manager.py +0 -0
  25. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/internal_api_completion.py +0 -0
  26. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/prompt_manager.py +0 -0
  27. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/proxy_call.py +0 -0
  28. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/ragaai_catalyst.py +0 -0
  29. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/synthetic_data_generation.py +0 -0
  30. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/__init__.py +0 -0
  31. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/__init__.py +0 -0
  32. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -0
  33. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -0
  34. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/__init__.py +0 -0
  35. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/langchain.py +0 -0
  36. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/llamaindex.py +0 -0
  37. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/instrumentators/openai.py +0 -0
  38. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/llamaindex_callback.py +0 -0
  39. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/tracer.py +0 -0
  40. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/utils/__init__.py +0 -0
  41. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/tracers/utils/utils.py +0 -0
  42. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst/utils.py +0 -0
  43. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/dependency_links.txt +0 -0
  44. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/ragaai_catalyst.egg-info/top_level.txt +0 -0
  45. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/requirements.txt +0 -0
  46. {ragaai_catalyst-2.0.7b1 → ragaai_catalyst-2.0.7.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ragaai_catalyst
3
- Version: 2.0.7b1
3
+ Version: 2.0.7.1
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
6
6
  Requires-Python: >=3.9
@@ -18,7 +18,7 @@ Requires-Dist: opentelemetry-instrumentation-langchain~=0.24.0
18
18
  Requires-Dist: opentelemetry-instrumentation-openai~=0.24.0
19
19
  Requires-Dist: langchain-core>=0.2.11
20
20
  Requires-Dist: langchain>=0.2.11
21
- Requires-Dist: openai>=1.35.10
21
+ Requires-Dist: openai>=1.57.0
22
22
  Requires-Dist: pandas>=2.1.1
23
23
  Requires-Dist: groq>=0.11.0
24
24
  Requires-Dist: PyPDF2>=3.0.1
@@ -8,7 +8,7 @@ description = "RAGA AI CATALYST"
8
8
  readme = "README.md"
9
9
  requires-python = ">=3.9"
10
10
  # license = {file = "LICENSE"}
11
- version = "2.0.7.beta.1"
11
+ version = "2.0.7.1"
12
12
  authors = [
13
13
  {name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
14
14
  {name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
@@ -32,7 +32,7 @@ dependencies = [
32
32
  "opentelemetry-instrumentation-openai~=0.24.0",
33
33
  "langchain-core>=0.2.11",
34
34
  "langchain>=0.2.11",
35
- "openai>=1.35.10",
35
+ "openai>=1.57.0",
36
36
  "pandas>=2.1.1",
37
37
  "groq>=0.11.0",
38
38
  "PyPDF2>=3.0.1",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ragaai_catalyst
3
- Version: 2.0.7b1
3
+ Version: 2.0.7.1
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
6
6
  Requires-Python: >=3.9
@@ -18,7 +18,7 @@ Requires-Dist: opentelemetry-instrumentation-langchain~=0.24.0
18
18
  Requires-Dist: opentelemetry-instrumentation-openai~=0.24.0
19
19
  Requires-Dist: langchain-core>=0.2.11
20
20
  Requires-Dist: langchain>=0.2.11
21
- Requires-Dist: openai>=1.35.10
21
+ Requires-Dist: openai>=1.57.0
22
22
  Requires-Dist: pandas>=2.1.1
23
23
  Requires-Dist: groq>=0.11.0
24
24
  Requires-Dist: PyPDF2>=3.0.1
@@ -36,4 +36,9 @@ ragaai_catalyst/tracers/instrumentators/langchain.py
36
36
  ragaai_catalyst/tracers/instrumentators/llamaindex.py
37
37
  ragaai_catalyst/tracers/instrumentators/openai.py
38
38
  ragaai_catalyst/tracers/utils/__init__.py
39
- ragaai_catalyst/tracers/utils/utils.py
39
+ ragaai_catalyst/tracers/utils/utils.py
40
+ test/test_catalyst/test_configuration.py
41
+ test/test_catalyst/test_dataset.py
42
+ test/test_catalyst/test_evaluation.py
43
+ test/test_catalyst/test_prompt_manager.py
44
+ test/test_catalyst/test_synthetic_data_generation.py
@@ -11,7 +11,7 @@ opentelemetry-instrumentation-langchain~=0.24.0
11
11
  opentelemetry-instrumentation-openai~=0.24.0
12
12
  langchain-core>=0.2.11
13
13
  langchain>=0.2.11
14
- openai>=1.35.10
14
+ openai>=1.57.0
15
15
  pandas>=2.1.1
16
16
  groq>=0.11.0
17
17
  PyPDF2>=3.0.1
@@ -0,0 +1,199 @@
1
+ import pytest
2
+ import os
3
+ import requests
4
+ from unittest.mock import patch, MagicMock
5
+ import dotenv
6
+ dotenv.load_dotenv()
7
+ import os
8
+
9
+ from ragaai_catalyst import RagaAICatalyst
10
+
11
+
12
+ # Mock environment variables for testing
13
+ @pytest.fixture
14
+ def mock_env_vars():
15
+ original_environ = os.environ.copy()
16
+ RAGAAI_CATALYST_ACCESS_KEY = os.getenv("RAGAAI_CATALYST_ACCESS_KEY")
17
+ RAGAAI_CATALYST_SECRET_KEY = os.getenv("RAGAAI_CATALYST_SECRET_KEY")
18
+ RAGAAI_CATALYST_BASE_URL = os.getenv("RAGAAI_CATALYST_BASE_URL")
19
+
20
+ yield
21
+
22
+ os.environ.clear()
23
+ os.environ.update(original_environ)
24
+
25
+ @pytest.fixture
26
+ def raga_catalyst(mock_env_vars):
27
+ with patch('ragaai_catalyst.RagaAICatalyst.get_token', return_value='test_token'):
28
+ catalyst = RagaAICatalyst(
29
+ os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
30
+ os.getenv("RAGAAI_CATALYST_SECRET_KEY")
31
+ )
32
+ return catalyst
33
+
34
+
35
+
36
+ def test_project_use_cases():
37
+ catalyst = RagaAICatalyst(
38
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
39
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
40
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
41
+ )
42
+ use_case = catalyst.project_use_cases()
43
+ assert use_case ==['Chatbot', 'Text2SQL', 'Q/A', 'Code Generation', 'Others']
44
+
45
+
46
+ def test_list_project():
47
+ catalyst = RagaAICatalyst(
48
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
49
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
50
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
51
+ )
52
+ use_case = catalyst.list_projects()
53
+ assert use_case is not None # Check if the result is not None
54
+
55
+
56
+ def test_existing_projectname():
57
+ with pytest.raises(ValueError, match="already exists. Please choose a different name."):
58
+ catalyst = RagaAICatalyst(
59
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
60
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
61
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
62
+ )
63
+ project = catalyst.create_project(
64
+ project_name="prompt_metric_dataset3",
65
+ usecase="Chatbot"
66
+ )
67
+
68
+ def test_initialization_missing_credentials():
69
+ """Test initialization with missing credentials"""
70
+ with pytest.raises(ValueError, match="RAGAAI_CATALYST_ACCESS_KEY and RAGAAI_CATALYST_SECRET_KEY environment variables must be set"):
71
+ RagaAICatalyst('', '')
72
+
73
+ @patch('requests.post')
74
+ def test_get_token_success(mock_post, mock_env_vars):
75
+ """Test token retrieval success"""
76
+ mock_response = MagicMock()
77
+ mock_response.status_code = 200
78
+ mock_response.json.return_value = {
79
+ 'success': True,
80
+ 'data': {'token': 'test_token'}
81
+ }
82
+ mock_post.return_value = mock_response
83
+
84
+ token = RagaAICatalyst.get_token()
85
+ assert token == 'test_token'
86
+ assert os.getenv('RAGAAI_CATALYST_TOKEN') == 'test_token'
87
+
88
+ @patch('requests.post')
89
+ def test_get_token_failure(mock_post, mock_env_vars):
90
+ """Test token retrieval failure"""
91
+ mock_response = MagicMock()
92
+ mock_response.status_code = 400
93
+ mock_response.json.return_value = {
94
+ 'message': 'Please enter valid credentials'
95
+ }
96
+ mock_post.return_value = mock_response
97
+
98
+ with pytest.raises(Exception, match="Authentication failed"):
99
+ RagaAICatalyst.get_token()
100
+
101
+ @patch('requests.get')
102
+ def test_project_use_cases_success(mock_get, raga_catalyst):
103
+ """Test retrieving project use cases"""
104
+ mock_response = MagicMock()
105
+ mock_response.status_code = 200
106
+ mock_response.json.return_value = {
107
+ 'data': {'usecase': ['Q/A', 'Chatbot', 'Summarization']}
108
+ }
109
+ mock_get.return_value = mock_response
110
+
111
+ use_cases = raga_catalyst.project_use_cases()
112
+ assert use_cases == ['Q/A', 'Chatbot', 'Summarization']
113
+
114
+ @patch('requests.get')
115
+ def test_project_use_cases_failure(mock_get, raga_catalyst):
116
+ """Test project use cases retrieval failure"""
117
+ mock_get.side_effect = requests.exceptions.RequestException("Network Error")
118
+
119
+ use_cases = raga_catalyst.project_use_cases()
120
+ assert use_cases == []
121
+
122
+ @patch('requests.post')
123
+ @patch('ragaai_catalyst.RagaAICatalyst.list_projects')
124
+ def test_create_project_success(mock_list_projects, mock_post, raga_catalyst):
125
+ """Test successful project creation"""
126
+ mock_list_projects.return_value = [] # No existing projects
127
+ mock_post_response = MagicMock()
128
+ mock_post_response.status_code = 200
129
+ mock_post_response.json.return_value = {
130
+ 'data': {'name': 'TestProject'}
131
+ }
132
+ mock_post.return_value = mock_post_response
133
+
134
+ with patch('ragaai_catalyst.RagaAICatalyst.project_use_cases', return_value=['Q/A']):
135
+ result = raga_catalyst.create_project('TestProject')
136
+ assert 'Project Created Successfully' in result
137
+
138
+ @patch('requests.post')
139
+ @patch('ragaai_catalyst.RagaAICatalyst.list_projects')
140
+ def test_create_project_duplicate(mock_list_projects, mock_post, raga_catalyst):
141
+ """Test project creation with duplicate name"""
142
+ mock_list_projects.return_value = ['TestProject']
143
+
144
+ with pytest.raises(ValueError, match="Project name 'TestProject' already exists"):
145
+ raga_catalyst.create_project('TestProject')
146
+
147
+ @patch('requests.get')
148
+ def test_list_projects_success(mock_get, raga_catalyst):
149
+ """Test successful project listing"""
150
+ mock_response = MagicMock()
151
+ mock_response.status_code = 200
152
+ mock_response.json.return_value = {
153
+ 'data': {
154
+ 'content': [
155
+ {'name': 'Project1'},
156
+ {'name': 'Project2'}
157
+ ]
158
+ }
159
+ }
160
+ mock_get.return_value = mock_response
161
+
162
+ projects = raga_catalyst.list_projects()
163
+ assert projects == ['Project1', 'Project2']
164
+
165
+ @patch('requests.get')
166
+ def test_list_metrics_success(mock_get):
167
+ """Test successful metrics listing"""
168
+ with patch.dict(os.environ, {'RAGAAI_CATALYST_TOKEN': 'test_token'}):
169
+ mock_response = MagicMock()
170
+ mock_response.status_code = 200
171
+ mock_response.json.return_value = {
172
+ 'data': {
173
+ 'metrics': [
174
+ {'name': 'hallucination', 'category': 'quality'},
175
+ {'name': 'toxicity', 'category': 'safety'}
176
+ ]
177
+ }
178
+ }
179
+ mock_get.return_value = mock_response
180
+
181
+ metrics = RagaAICatalyst.list_metrics()
182
+ assert metrics == ['hallucination', 'toxicity']
183
+
184
+ def test_initialization_invalid_credentials():
185
+ """Test initialization with invalid credentials"""
186
+ with pytest.raises(Exception, match="Authentication failed. Invalid credentials provided."):
187
+ RagaAICatalyst(
188
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY")+"a",
189
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
190
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL")
191
+ )
192
+
193
+ def test_initialization_invalid_base_url():
194
+ with pytest.raises(ConnectionError, match="The provided base_url is not accessible. Please re-check the base_url."):
195
+ RagaAICatalyst(
196
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
197
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
198
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL") +"a",
199
+ )
@@ -0,0 +1,170 @@
1
+ import pytest
2
+ import os
3
+ import dotenv
4
+ dotenv.load_dotenv()
5
+ import pandas as pd
6
+ from datetime import datetime
7
+ from typing import Dict, List
8
+ from unittest.mock import patch, Mock
9
+ import requests
10
+ from ragaai_catalyst import Dataset,RagaAICatalyst
11
+
12
+
13
+ @pytest.fixture
14
+ def base_url():
15
+ return "https://catalyst.raga.ai/api"
16
+
17
+ @pytest.fixture
18
+ def access_keys():
19
+ return {
20
+ "access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
21
+ "secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY")}
22
+
23
+ @pytest.fixture
24
+ def dataset(base_url, access_keys):
25
+ """Create evaluation instance with specific project and dataset"""
26
+ os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
27
+ catalyst = RagaAICatalyst(
28
+ access_key=access_keys["access_key"],
29
+ secret_key=access_keys["secret_key"]
30
+ )
31
+ return Dataset(project_name="prompt_metric_dataset")
32
+
33
+ def test_list_dataset(dataset) -> List[str]:
34
+ datasets = dataset.list_datasets()
35
+ return datasets
36
+
37
+
38
+ def test_get_dataset_columns(dataset) -> List[str]:
39
+ dataset_column = dataset.get_dataset_columns(dataset_name="ritika_dataset")
40
+ return dataset_column
41
+
42
+ def test_incorrect_dataset(dataset):
43
+ with pytest.raises(ValueError, match="Please enter a valid dataset name"):
44
+ dataset.get_dataset_columns(dataset_name="ritika_datset")
45
+
46
+ def test_get_schema_mapping(dataset):
47
+ schema_mapping_columns= dataset.get_schema_mapping()
48
+ return schema_mapping_columns
49
+
50
+
51
+ def test_upload_csv(dataset):
52
+ project_name = 'prompt_metric_dataset'
53
+
54
+ schema_mapping = {
55
+ 'Query': 'prompt',
56
+ 'Response': 'response',
57
+ 'Context': 'context',
58
+ 'ExpectedResponse': 'expected_response',
59
+ }
60
+
61
+ csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
62
+
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ dataset_name = f"schema_metric_dataset_ritika_{timestamp}"
65
+
66
+
67
+
68
+ dataset.create_from_csv(
69
+ csv_path=csv_path,
70
+ dataset_name=dataset_name,
71
+ schema_mapping=schema_mapping
72
+ )
73
+
74
+ def test_upload_csv_repeat_dataset(dataset):
75
+ with pytest.raises(ValueError, match="already exists"):
76
+ project_name = 'prompt_metric_dataset'
77
+
78
+ schema_mapping = {
79
+ 'Query': 'prompt',
80
+ 'Response': 'response',
81
+ 'Context': 'context',
82
+ 'ExpectedResponse': 'expected_response',
83
+ }
84
+
85
+ csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
86
+
87
+ dataset.create_from_csv(
88
+ csv_path=csv_path,
89
+ dataset_name="schema_metric_dataset_ritika_3",
90
+ schema_mapping=schema_mapping
91
+ )
92
+
93
+
94
+ def test_upload_csv_no_schema_mapping(dataset):
95
+ with pytest.raises(TypeError, match="missing 1 required positional argument"):
96
+ project_name = 'prompt_metric_dataset'
97
+
98
+ schema_mapping = {
99
+ 'Query': 'prompt',
100
+ 'Response': 'response',
101
+ 'Context': 'context',
102
+ 'ExpectedResponse': 'expected_response',
103
+ }
104
+
105
+ csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
106
+
107
+ dataset.create_from_csv(
108
+ csv_path=csv_path,
109
+ dataset_name="schema_metric_dataset_ritika_3",
110
+ )
111
+
112
+ def test_upload_csv_empty_csv_path(dataset):
113
+ with pytest.raises(FileNotFoundError, match="No such file or directory"):
114
+ project_name = 'prompt_metric_dataset'
115
+
116
+ schema_mapping = {
117
+ 'Query': 'prompt',
118
+ 'Response': 'response',
119
+ 'Context': 'context',
120
+ 'ExpectedResponse': 'expected_response',
121
+ }
122
+
123
+ csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
124
+
125
+ dataset.create_from_csv(
126
+ csv_path="",
127
+ dataset_name="schema_metric_dataset_ritika_12",
128
+ schema_mapping=schema_mapping
129
+
130
+ )
131
+
132
+ def test_upload_csv_empty_schema_mapping(dataset):
133
+ with pytest.raises(AttributeError):
134
+ project_name = 'prompt_metric_dataset'
135
+
136
+ schema_mapping = {
137
+ 'Query': 'prompt',
138
+ 'Response': 'response',
139
+ 'Context': 'context',
140
+ 'ExpectedResponse': 'expected_response',
141
+ }
142
+
143
+ csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
144
+
145
+ dataset.create_from_csv(
146
+ csv_path=csv_path,
147
+ dataset_name="schema_metric_dataset_ritika_12",
148
+ schema_mapping=""
149
+
150
+ )
151
+
152
+
153
+ def test_upload_csv_invalid_schema(dataset):
154
+ with pytest.raises(ValueError, match="Invalid schema mapping provided"):
155
+
156
+ project_name = 'prompt_metric_dataset'
157
+
158
+ schema_mapping={
159
+ 'prompt': 'prompt',
160
+ 'response': 'response',
161
+ 'chatId': 'chatId',
162
+ 'chatSequence': 'chatSequence'
163
+ }
164
+
165
+ csv_path= "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/prompt_metric_dataset_v1.csv"
166
+
167
+ dataset.create_from_csv(
168
+ csv_path=csv_path,
169
+ dataset_name="schema_metric_dataset_ritika_12",
170
+ schema_mapping=schema_mapping)
@@ -0,0 +1,503 @@
1
+
2
+ from unittest.mock import patch
3
+ import time
4
+ import pytest
5
+ import os
6
+ import dotenv
7
+ dotenv.load_dotenv()
8
+ import pandas as pd
9
+ from datetime import datetime
10
+ from typing import Dict, List
11
+ from ragaai_catalyst import Evaluation, RagaAICatalyst
12
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
13
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
14
+ # Define model configurations
15
+ MODEL_CONFIGS = [
16
+ # OpenAI Models
17
+ {
18
+ "provider": "openai",
19
+ "model": "gpt-4",
20
+ "suffix": "gpt4"
21
+ },
22
+ {
23
+ "provider": "openai",
24
+ "model": "gpt-4o",
25
+ "suffix": "gpt4o"
26
+ },
27
+ {
28
+ "provider": "openai",
29
+ "model": "gpt-4o-mini",
30
+ "suffix": "gpt4o_mini"
31
+ },
32
+ {
33
+ "provider": "openai",
34
+ "model": "gpt-3.5-turbo",
35
+ "suffix": "gpt35"
36
+ },
37
+ # Gemini Models
38
+ {
39
+ "provider": "gemini",
40
+ "model": "gemini-1.5-flash",
41
+ "suffix": "gemini15_flash"
42
+ },
43
+ {
44
+ "provider": "gemini",
45
+ "model": "gemini-1.5-pro",
46
+ "suffix": "gemini15_pro"
47
+ },
48
+ # Azure OpenAI Models
49
+ {
50
+ "provider": "azure",
51
+ "model": "gpt-4",
52
+ "suffix": "azure_gpt4"
53
+ },
54
+ {
55
+ "provider": "azure",
56
+ "model": "gpt-35-turbo",
57
+ "suffix": "azure_gpt35"
58
+ }
59
+ ]
60
+
61
+ @pytest.fixture
62
+ def base_url():
63
+ return "https://catalyst.raga.ai/api"
64
+
65
+ @pytest.fixture
66
+ def access_keys():
67
+ return {
68
+ "access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
69
+ "secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY")}
70
+
71
+
72
+ @pytest.fixture
73
+ def evaluation(base_url, access_keys):
74
+ """Create evaluation instance with specific project and dataset"""
75
+ os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
76
+ catalyst = RagaAICatalyst(
77
+ access_key=access_keys["access_key"],
78
+ secret_key=access_keys["secret_key"]
79
+ )
80
+ return Evaluation(project_name="prompt_metric_dataset", dataset_name="ritika_dataset")
81
+
82
+ @pytest.fixture
83
+ def chat_evaluation(base_url, access_keys):
84
+ """Create evaluation instance with specific project and dataset"""
85
+ os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
86
+ catalyst = RagaAICatalyst(
87
+ access_key=access_keys["access_key"],
88
+ secret_key=access_keys["secret_key"]
89
+ )
90
+ return Evaluation(project_name="chat_demo_sk_v1", dataset_name="chat_metric_dataset_ritika")
91
+
92
+ def test_evaluation_initialization(evaluation):
93
+ """Test if evaluation is initialized correctly"""
94
+ assert evaluation.project_name == "prompt_metric_dataset"
95
+ assert evaluation.dataset_name == "ritika_dataset"
96
+ assert evaluation.base_url == "https://catalyst.raga.ai/api"
97
+ assert evaluation.timeout == 10
98
+ assert evaluation.jobId is None
99
+
100
+ def test_project_does_not_exist():
101
+ """Test initialization with non-existent project"""
102
+ with pytest.raises(ValueError, match="Project not found. Please enter a valid project name"):
103
+ Evaluation(project_name="non_existent_project", dataset_name="prompt_metric_dataset_v1")
104
+
105
+ def test_dataset_does_not_exist():
106
+ """Test initialization with non-existent dataset"""
107
+ with pytest.raises(ValueError, match="Dataset not found. Please enter a valid dataset name"):
108
+ Evaluation(project_name="prompt_metric_dataset", dataset_name="non_existent_dataset")
109
+
110
+ def test_list_metrics(evaluation) -> List[str]:
111
+ """Test if it lists all the metrics correctly"""
112
+ metrics = evaluation.list_metrics()
113
+ return metrics
114
+
115
+ @pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
116
+ def test_invalid_schema_mapping(evaluation, provider_config):
117
+ """Wrong schema mapping for different providers"""
118
+ with pytest.raises(ValueError, match="Map"):
119
+ schema_mapping={
120
+ 'Query': 'Prompt',
121
+ 'Context': 'Context',
122
+ }
123
+ metrics = [{
124
+ "name": "Hallucination",
125
+ "config": {
126
+ "model": provider_config["model"],
127
+ "provider": provider_config["provider"]
128
+ },
129
+ "column_name": f"Hallucination_{provider_config['suffix']}",
130
+ "schema_mapping": schema_mapping
131
+ }]
132
+ evaluation.add_metrics(metrics=metrics)
133
+
134
+ @pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
135
+ def test_missing_schema_mapping(evaluation, provider_config):
136
+ """schema_mapping not present for different providers"""
137
+ with pytest.raises(ValueError, match="{'schema_mapping'} required for each metric evaluation."):
138
+ metrics = [{
139
+ "name": "Hallucination",
140
+ "config": {
141
+ "model": provider_config["model"],
142
+ "provider": provider_config["provider"]
143
+ },
144
+ "column_name": f"Hallucination_{provider_config['suffix']}"
145
+ }]
146
+ evaluation.add_metrics(metrics=metrics)
147
+
148
+ @pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
149
+ def test_missing_column_name(evaluation, provider_config):
150
+ """column_name not present for different providers"""
151
+ with pytest.raises(ValueError, match="{'column_name'} required for each metric evaluation."):
152
+ schema_mapping={
153
+ 'Query': 'Prompt',
154
+ 'Response': 'Response',
155
+ 'Context': 'Context',
156
+ }
157
+ metrics = [{
158
+ "name": "Hallucination",
159
+ "config": {
160
+ "model": provider_config["model"],
161
+ "provider": provider_config["provider"]
162
+ },
163
+ "schema_mapping": schema_mapping
164
+ }]
165
+ evaluation.add_metrics(metrics=metrics)
166
+
167
+ @pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
168
+ def test_missing_metric_name(evaluation, provider_config):
169
+ """metric name missing for different providers"""
170
+ with pytest.raises(ValueError, match="{'name'} required for each metric evaluation."):
171
+ schema_mapping={
172
+ 'Query': 'Prompt',
173
+ 'Response': 'Response',
174
+ 'Context': 'Context',
175
+ }
176
+ metrics = [{
177
+ "config": {
178
+ "model": provider_config["model"],
179
+ "provider": provider_config["provider"]
180
+ },
181
+ "column_name": f"Hallucination_{provider_config['suffix']}",
182
+ "schema_mapping": schema_mapping
183
+ }]
184
+ evaluation.add_metrics(metrics=metrics)
185
+
186
+ @pytest.mark.parametrize("provider_config", MODEL_CONFIGS)
187
+ def test_column_name_already_exists(evaluation, provider_config):
188
+ """Column name already exists for different providers"""
189
+ with pytest.raises(ValueError, match="already exists."):
190
+ schema_mapping={
191
+ 'Query': 'Prompt',
192
+ 'Response': 'Response',
193
+ 'Context': 'Context',
194
+ }
195
+ metrics = [{
196
+ "name": "Hallucination",
197
+ "config": {
198
+ "model": provider_config["model"],
199
+ "provider": provider_config["provider"]
200
+ },
201
+ "column_name": "Hallucination_column3",
202
+ "schema_mapping": schema_mapping
203
+ }]
204
+ evaluation.add_metrics(metrics=metrics)
205
+
206
+ def test_missing_config(evaluation):
207
+ with pytest.raises(ValueError, match="{'config'} required for each metric evaluation."):
208
+ schema_mapping={
209
+ 'Query': 'Prompt',
210
+ 'Response': 'Response',
211
+ 'Context': 'Context',
212
+ }
213
+ metrics = [{"name": "Hallucination", "column_name": "Hallucination5", "schema_mapping": schema_mapping}]
214
+ evaluation.add_metrics(metrics=metrics)
215
+
216
+
217
+
218
+ @pytest.mark.parametrize("metric_name", ['Hallucination',
219
+ 'Faithfulness',
220
+ 'SQL Prompt Injection',
221
+ 'Response Correctness',
222
+ 'Response Completeness',
223
+ 'False Refusal',
224
+ 'Context Precision',
225
+ 'Context Recall',
226
+ 'Context Relevancy'
227
+ 'SQL Response Correctness',
228
+ 'SQL Prompt Ambiguity',
229
+ 'SQL Context Sufficiency',
230
+ 'SQL Context Ambiguity'])
231
+
232
+ def test_metric_initialization_gemini(evaluation, metric_name: str,capfd):
233
+ """Test if adding each metric and tracking its completion works correctly"""
234
+ schema_mapping = {
235
+ 'Query': 'prompt',
236
+ 'Response': 'response',
237
+ 'Context': 'context',
238
+ 'ExpectedResponse': 'expectedresponse',
239
+ }
240
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS
241
+ metrics = [{
242
+ "name": metric_name,
243
+ "config": {
244
+ "model": "gemini-1.5-flash",
245
+ "provider": "gemini"
246
+ },
247
+ "column_name": f"{metric_name}_column_{timestamp}",
248
+ "schema_mapping": schema_mapping
249
+ }]
250
+
251
+ # Add metrics and capture the printed output
252
+ evaluation.add_metrics(metrics=metrics)
253
+ out, err = capfd.readouterr()
254
+ print(f"Add metrics output: {out}") # Debug print
255
+
256
+ # Verify the success message for metric addition
257
+ assert "Metric Evaluation Job scheduled successfully" in out, f"Failed to schedule job for metric: {metric_name}"
258
+
259
+ # Store the jobId for status checking
260
+ assert evaluation.jobId is not None, "Job ID was not set after adding metrics"
261
+ print(f"Job ID: {evaluation.jobId}") # Debug print
262
+
263
+ # Check job status with timeout
264
+ max_wait_time = 180 # Increased timeout to 3 minutes
265
+ poll_interval = 5 # Check every 5 seconds
266
+ start_time = time.time()
267
+ status_checked = False
268
+ last_status = None
269
+
270
+ print(f"Starting job status checks for {metric_name}...") # Debug print
271
+
272
+ while (time.time() - start_time) < max_wait_time:
273
+ try:
274
+ evaluation.get_status()
275
+ out, err = capfd.readouterr()
276
+ print(f"Status check output: {out}") # Debug print
277
+
278
+ if "Job completed" in out:
279
+ status_checked = True
280
+ print(f"Job completed for {metric_name}") # Debug print
281
+ break
282
+
283
+ if "Job failed" in out:
284
+ pytest.fail(f"Job failed for metric: {metric_name}")
285
+
286
+ last_status = out
287
+ time.sleep(poll_interval)
288
+
289
+ except Exception as e:
290
+ print(f"Error checking status: {str(e)}") # Debug print
291
+ time.sleep(poll_interval)
292
+
293
+ if not status_checked:
294
+ print(f"Last known status: {last_status}") # Debug print
295
+ if last_status and "In Progress" in last_status:
296
+ pytest.skip(f"Job still in progress after {max_wait_time} seconds for {metric_name}. This is not a failure, but took longer than expected.")
297
+ else:
298
+ assert False, f"Job did not complete within {max_wait_time} seconds for metric: {metric_name}. Last status: {last_status}"
299
+
300
+ # Only check results if the job completed successfully
301
+ if status_checked:
302
+ try:
303
+ results = evaluation.get_results()
304
+ assert isinstance(results, pd.DataFrame), "Results should be returned as a DataFrame"
305
+ assert not results.empty, "Results DataFrame should not be empty"
306
+ column_name = f"{metric_name}_column25"
307
+ assert column_name in results.columns, f"Expected column {column_name} not found in results. Available columns: {results.columns.tolist()}"
308
+ except Exception as e:
309
+ pytest.fail(f"Error getting results for {metric_name} with provider: gemini and model: gemini-1.5-flash: {str(e)}")
310
+
311
+
312
+
313
+ @pytest.mark.parametrize("metric_name", ['Hallucination',
314
+ 'Faithfulness',
315
+ 'SQL Prompt Injection',
316
+ 'Response Correctness',
317
+ 'Response Completeness',
318
+ 'False Refusal',
319
+ 'Context Precision',
320
+ 'Context Recall',
321
+ 'Context Relevancy',
322
+ 'SQL Response Correctness',
323
+ 'SQL Prompt Ambiguity',
324
+ 'SQL Context Sufficiency',
325
+ 'SQL Context Ambiguity'])
326
+
327
+ def test_metric_initialization_openai(evaluation, metric_name: str,capfd):
328
+ """Test if adding each metric and tracking its completion works correctly"""
329
+ schema_mapping = {
330
+ 'Query': 'prompt',
331
+ 'Response': 'response',
332
+ 'Context': 'context',
333
+ 'ExpectedResponse': 'expectedresponse',
334
+ }
335
+
336
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS
337
+
338
+ metrics = [{
339
+ "name": metric_name,
340
+ "config": {
341
+ "model": "gpt-4o-mini",
342
+ "provider": "openai"
343
+ },
344
+ "column_name": f"{metric_name}_column_{timestamp}",
345
+ "schema_mapping": schema_mapping
346
+ }]
347
+
348
+ # Add metrics and capture the printed output
349
+ evaluation.add_metrics(metrics=metrics)
350
+ out, err = capfd.readouterr()
351
+ print(f"Add metrics output: {out}") # Debug print
352
+
353
+ # Verify the success message for metric addition
354
+ assert "Metric Evaluation Job scheduled successfully" in out, f"Failed to schedule job for metric: {metric_name}"
355
+
356
+ # Store the jobId for status checking
357
+ assert evaluation.jobId is not None, "Job ID was not set after adding metrics"
358
+ print(f"Job ID: {evaluation.jobId}") # Debug print
359
+
360
+ # Check job status with timeout
361
+ max_wait_time = 300 # Increased timeout to 3 minutes
362
+ poll_interval = 5 # Check every 5 seconds
363
+ start_time = time.time()
364
+ status_checked = False
365
+ last_status = None
366
+
367
+ print(f"Starting job status checks for {metric_name}...") # Debug print
368
+
369
+ while (time.time() - start_time) < max_wait_time:
370
+ try:
371
+ evaluation.get_status()
372
+ out, err = capfd.readouterr()
373
+ print(f"Status check output: {out}") # Debug print
374
+
375
+ if "Job completed" in out:
376
+ status_checked = True
377
+ print(f"Job completed for {metric_name}") # Debug print
378
+ break
379
+
380
+ if "Job failed" in out:
381
+ pytest.fail(f"Job failed for metric: {metric_name}")
382
+
383
+ last_status = out
384
+ time.sleep(poll_interval)
385
+
386
+ except Exception as e:
387
+ print(f"Error checking status: {str(e)}") # Debug print
388
+ time.sleep(poll_interval)
389
+
390
+ if not status_checked:
391
+ print(f"Last known status: {last_status}") # Debug print
392
+ if last_status and "In Progress" in last_status:
393
+ pytest.skip(f"Job still in progress after {max_wait_time} seconds for {metric_name}. This is not a failure, but took longer than expected.")
394
+ else:
395
+ assert False, f"Job did not complete within {max_wait_time} seconds for metric: {metric_name}. Last status: {last_status}"
396
+
397
+ # Only check results if the job completed successfully
398
+ if status_checked:
399
+ try:
400
+ results = evaluation.get_results()
401
+ assert isinstance(results, pd.DataFrame), "Results should be returned as a DataFrame"
402
+ assert not results.empty, "Results DataFrame should not be empty"
403
+ column_name = f"{metric_name}_column26"
404
+ assert column_name in results.columns, f"Expected column {column_name} not found in results. Available columns: {results.columns.tolist()}"
405
+ except Exception as e:
406
+ pytest.fail(f"Error getting results for {metric_name} with provider: oprnai and model: gpt-4o-mini: {str(e)}")
407
+
408
+
409
+
410
+ # Add a counter to keep track of the test iterations
411
+ counter = 30
412
+
413
+ @pytest.mark.parametrize("metric_name", ['Agent Quality',
414
+ 'User Chat Quality',
415
+ 'Instruction Adherence'])
416
+ @pytest.mark.parametrize("model_config", [
417
+ {"model": "gpt-4o-mini", "provider": "openai"},
418
+ {"model": "gpt-4", "provider": "openai"},
419
+ {"model": "gpt-3.5-turbo", "provider": "openai"},
420
+ {"model":"gemini-1.5-flash", "provider": "gemini"}
421
+ ])
422
+ def test_metric_initialization_openai_chatmetric(chat_evaluation, model_config, metric_name: str, capfd):
423
+ """Test if adding each metric and tracking its completion works correctly"""
424
+ global counter # Use the global counter
425
+ schema_mapping = {
426
+ 'ChatID': 'ChatID',
427
+ 'Chat': 'Chat',
428
+ 'Instructions': 'Instructions',
429
+ 'System Prompt': 'systemprompt',
430
+ }
431
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Format: YYYYMMDD_HHMMSS
432
+ metrics = [{
433
+ "name": metric_name,
434
+ "config": model_config,
435
+ "column_name": f"{metric_name}_column_{timestamp}_{counter}", # Use counter for unique column name
436
+ "schema_mapping": schema_mapping
437
+ }]
438
+
439
+ # Increment the counter after each test
440
+ counter += 1
441
+
442
+ # Add metrics and capture the printed output
443
+ chat_evaluation.add_metrics(metrics=metrics)
444
+ out, err = capfd.readouterr()
445
+ print(f"Add metrics output: {out}") # Debug print
446
+
447
+ # Verify the success message for metric addition
448
+ assert "Metric Evaluation Job scheduled successfully" in out, f"Failed to schedule job for metric: {metric_name} and {model_config}"
449
+
450
+ # Store the jobId for status checking
451
+ assert chat_evaluation.jobId is not None, "Job ID was not set after adding metrics"
452
+ print(f"Job ID: {chat_evaluation.jobId}") # Debug print
453
+
454
+ # Check job status with timeout
455
+ max_wait_time = 600 # Increased timeout to 3 minutes
456
+ poll_interval = 5 # Check every 5 seconds
457
+ start_time = time.time()
458
+ status_checked = False
459
+ last_status = None
460
+
461
+ print(f"Starting job status checks for {metric_name}...") # Debug print
462
+
463
+ while (time.time() - start_time) < max_wait_time:
464
+ try:
465
+ chat_evaluation.get_status()
466
+ out, err = capfd.readouterr()
467
+ print(f"Status check output: {out}") # Debug print
468
+
469
+ if "Job completed" in out:
470
+ status_checked = True
471
+ print(f"Job completed for {metric_name}") # Debug print
472
+ break
473
+
474
+ if "Job failed" in out:
475
+ pytest.fail(f"Job failed for metric: {metric_name}{model_config}")
476
+
477
+ last_status = out
478
+ time.sleep(poll_interval)
479
+
480
+ except Exception as e:
481
+ print(f"Error checking status: {str(e)}") # Debug print
482
+ time.sleep(poll_interval)
483
+
484
+ if not status_checked:
485
+ print(f"Last known status: {last_status}") # Debug print
486
+ if last_status and "In Progress" in last_status:
487
+ pytest.skip(f"Job still in progress after {max_wait_time} seconds {model_config} for {metric_name}. This is not a failure, but took longer than expected.")
488
+ else:
489
+ assert False, f"Job did not complete within {max_wait_time} seconds {model_config} for metric: {metric_name}. Last status: {last_status}"
490
+
491
+ # Only check results if the job completed successfully
492
+ if status_checked:
493
+ try:
494
+ results = chat_evaluation.get_results()
495
+ assert isinstance(results, pd.DataFrame), "Results should be returned as a DataFrame"
496
+ assert not results.empty, "Results DataFrame should not be empty"
497
+ column_name = f"{metric_name}_column_{counter - 1}" # Use the last counter value
498
+ assert column_name in results.columns, f"Expected column {column_name} not found in results. Available columns: {results.columns.tolist()}"
499
+ except Exception as e:
500
+ pytest.fail(f"Error getting results for {metric_name} with {model_config}: {str(e)}")
501
+
502
+
503
+
@@ -0,0 +1,88 @@
1
+ import os
2
+ import pytest
3
+ import copy
4
+ from ragaai_catalyst import PromptManager, RagaAICatalyst
5
+ import dotenv
6
+ import openai
7
+ dotenv.load_dotenv()
8
+
9
+
10
+ @pytest.fixture
11
+ def base_url():
12
+ return "https://catalyst.raga.ai/api"
13
+
14
+ @pytest.fixture
15
+ def access_keys():
16
+ return {
17
+ "access_key": os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
18
+ "secret_key": os.getenv("RAGAAI_CATALYST_SECRET_KEY")}
19
+
20
+
21
+ @pytest.fixture
22
+ def prompt_manager(base_url, access_keys):
23
+ """Create evaluation instance with specific project and dataset"""
24
+ os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
25
+ catalyst = RagaAICatalyst(
26
+ access_key=access_keys["access_key"],
27
+ secret_key=access_keys["secret_key"]
28
+ )
29
+ return PromptManager(project_name="prompt_metric_dataset")
30
+
31
+ def test_prompt_initialistaion(prompt_manager):
32
+ prompt_list= prompt_manager.list_prompts()
33
+ assert prompt_list ==['test','test2']
34
+
35
+ def test_list_prompt_version(prompt_manager):
36
+ prompt_version_list = prompt_manager.list_prompt_versions(prompt_name="test2")
37
+ assert len(prompt_version_list.keys()) == 2
38
+
39
+ def test_missing_prompt_name(prompt_manager):
40
+ with pytest.raises(ValueError, match="Please enter a valid prompt name"):
41
+ prompt = prompt_manager.get_prompt(prompt_name="", version="v1")
42
+
43
+ def test_get_variable(prompt_manager):
44
+ prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
45
+ prompt_variable = prompt.get_variables()
46
+ assert prompt_variable == ['system1', 'system2']
47
+
48
+ def test_get_model_parameters(prompt_manager):
49
+ prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
50
+ model_parameter = prompt.get_model_parameters()
51
+ assert model_parameter== {'frequency_penalty': 0.4,'max_tokens': 1038,'presence_penalty': 0.1,'temperature': 0.7,'model': 'gpt-4o-mini'}
52
+
53
+ def test_compile_prompt(prompt_manager):
54
+ prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
55
+ compiled_prompt = prompt.compile(
56
+ system1='What is chocolate?',
57
+ system2 = "How it is made")
58
+ def get_openai_response(prompt):
59
+ client = openai.OpenAI()
60
+ response = client.chat.completions.create(
61
+ model="gpt-4o-mini",
62
+ messages=prompt
63
+ )
64
+ return response.choices[0].message.content
65
+ get_openai_response(compiled_prompt)
66
+
67
+ def test_compile_prompt_no_modelname(prompt_manager):
68
+ with pytest.raises(openai.BadRequestError,match="you must provide a model parameter"):
69
+
70
+ prompt = prompt_manager.get_prompt(prompt_name="test2", version="v2")
71
+ compiled_prompt = prompt.compile(
72
+ system1='What is chocolate?',
73
+ system2 = "How it is made")
74
+ def get_openai_response(prompt):
75
+ client = openai.OpenAI()
76
+ response = client.chat.completions.create(
77
+ model="",
78
+ messages=prompt
79
+ )
80
+ return response.choices[0].message.content
81
+ get_openai_response(compiled_prompt)
82
+
83
+
84
+
85
+
86
+
87
+
88
+
@@ -0,0 +1,151 @@
1
+ import sys
2
+ # sys.path.append('/Users/ritikagoel/workspace/synthetic-catalyst-internal-api2/ragaai-catalyst')
3
+
4
+ import pytest
5
+ from ragaai_catalyst import SyntheticDataGeneration
6
+ import os
7
+
8
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
9
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
10
+
11
+ @pytest.fixture
12
+ def synthetic_gen():
13
+ return SyntheticDataGeneration()
14
+
15
+ @pytest.fixture
16
+ def sample_text(synthetic_gen):
17
+ text_file = "/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/ai_document_061023_2.pdf" # Update this path as needed
18
+ return synthetic_gen.process_document(input_data=text_file)
19
+
20
+ def test_invalid_csv_processing(synthetic_gen):
21
+ """Test processing an invalid CSV file"""
22
+ with pytest.raises(Exception):
23
+ synthetic_gen.process_document(input_data="/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/OG1.csv")
24
+
25
+ def test_special_chars_csv_processing(synthetic_gen):
26
+ """Test processing CSV with special characters"""
27
+ with pytest.raises(Exception):
28
+ synthetic_gen.process_document(input_data="/Users/siddharthakosti/Downloads/catalyst_error_handling/catalyst_v2/catalyst_v2_new_1/data/OG1.csv")
29
+
30
+
31
+
32
+ def test_missing_llm_proxy(synthetic_gen, sample_text):
33
+ """Test behavior when internal_llm_proxy is not provided"""
34
+ print('-'*10)
35
+ print(OPENAI_API_KEY)
36
+ print('-'*10)
37
+ with pytest.raises(ValueError, match="API key must be provided"):
38
+ synthetic_gen.generate_qna(
39
+ text=sample_text,
40
+ question_type='mcq',
41
+ model_config={"provider": "openai", "model": "gpt-4o-mini"},
42
+ n=20,
43
+ user_id="1"
44
+ )
45
+
46
+ def test_llm_proxy(synthetic_gen, sample_text):
47
+ result = synthetic_gen.generate_qna(
48
+ text=sample_text,
49
+ question_type='mcq',
50
+ model_config={"provider": "gemini", "model": "gemini-1.5-flash"},
51
+ n=15,
52
+ internal_llm_proxy="http://4.247.138.221:4000/chat/completions",
53
+ user_id="1"
54
+ )
55
+ assert len(result) == 15
56
+
57
+
58
+
59
+ def test_invalid_llm_proxy(synthetic_gen, sample_text):
60
+ """Test behavior with invalid internal_llm_proxy URL"""
61
+ with pytest.raises(Exception, match="No connection adapters were found for"):
62
+ synthetic_gen.generate_qna(
63
+ text=sample_text,
64
+ question_type='mcq',
65
+ model_config={"provider": "openai", "model": "gpt-4o-mini"},
66
+ n=2,
67
+ internal_llm_proxy="tp://invalid.url",
68
+ user_id="1"
69
+ )
70
+
71
+ def test_missing_model_config(synthetic_gen, sample_text):
72
+ """Test behavior when model_config is not provided"""
73
+ with pytest.raises(ValueError, match="Model configuration must be provided with a valid provider and model"):
74
+ synthetic_gen.generate_qna(
75
+ text=sample_text,
76
+ question_type='mcq',
77
+ n=2,
78
+ internal_llm_proxy="http://20.244.126.4:4000/chat/completions",
79
+ user_id="1"
80
+ )
81
+
82
+ def test_missing_api_key_for_external_provider(synthetic_gen, sample_text):
83
+ """Test behavior when API key is missing for external provider"""
84
+ with pytest.raises(ValueError, match="API key must be provided"):
85
+ synthetic_gen.generate_qna(
86
+ text=sample_text,
87
+ question_type='mcq',
88
+ model_config={"provider": "gemini", "model": "gemini/gemini-1.5-flash"},
89
+ n=5
90
+ )
91
+
92
+ def test_invalid_api_key(synthetic_gen, sample_text):
93
+ """Test behavior with invalid API key"""
94
+ with pytest.raises(Exception, match="Failed to generate valid response after 3 attempts: Invalid API key provided"):
95
+ synthetic_gen.generate_qna(
96
+ text=sample_text,
97
+ question_type='mcq',
98
+ model_config={"provider": "gemini", "model": "gemini/gemini-1.5-flash"},
99
+ n=5,
100
+ api_key='invalid_key'
101
+ )
102
+
103
+ def test_default_question_count(synthetic_gen, sample_text):
104
+ """Test default number of questions when n is not provided"""
105
+ result = synthetic_gen.generate_qna(
106
+ text=sample_text,
107
+ question_type='mcq',
108
+ model_config={"provider": "openai", "model": "gpt-4o-mini"},
109
+ internal_llm_proxy="http://4.247.138.221:4000/chat/completions",
110
+ user_id="1"
111
+ )
112
+ assert len(result) == 5 # Default should be 5 questions
113
+
114
+ def test_default_question_type(synthetic_gen, sample_text):
115
+ """Test default question type when question_type is not provided"""
116
+ result = synthetic_gen.generate_qna(
117
+ text=sample_text,
118
+ model_config={"provider": "openai", "model": "gpt-4o-mini"},
119
+ n=5,
120
+ internal_llm_proxy="http://20.244.126.4:4000/chat/completions",
121
+ user_id="1"
122
+ )
123
+ # Verify result contains simple Q/A format without multiple choice options
124
+ assert all('options' not in qa for qa in result)
125
+
126
+ def test_question_count_matches_n(synthetic_gen, sample_text):
127
+ """Test if number of generated questions matches n"""
128
+ n = 2
129
+ result = synthetic_gen.generate_qna(
130
+ text=sample_text,
131
+ question_type='mcq',
132
+ model_config={"provider": "openai", "model": "gpt-4o-mini"},
133
+ n=n,
134
+ internal_llm_proxy="http://4.247.138.221:4000/chat/completions",
135
+ user_id="1"
136
+ )
137
+ assert len(result) == n
138
+
139
+ def test_proxy_call_check(synthetic_gen,sample_text):
140
+ """Test compatibility when proxy script called"""
141
+
142
+ result = synthetic_gen.generate_qna(
143
+ text=sample_text,
144
+ question_type='simple',
145
+ model_config={"provider": "gemini", "model": "gemini-1.5-flash", "api_base": "http://172.172.11.158:8000/v1alpha1/v1alpha1/predictions"},
146
+ n=5
147
+ )
148
+ assert len(result) == 5
149
+
150
+
151
+