ragaai-catalyst 1.0.8.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,5 +4,6 @@ from .tracers import Tracer
4
4
  from .utils import response_checker
5
5
  from .dataset import Dataset
6
6
  from .prompt_manager import PromptManager
7
+ from .evaluation import Evaluation
7
8
 
8
- __all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager"]
9
+ __all__ = ["Experiment", "RagaAICatalyst", "Tracer", "PromptManager", "Evaluation"]
@@ -16,11 +16,38 @@ class Dataset:
16
16
 
17
17
  def __init__(self, project_name):
18
18
  self.project_name = project_name
19
+ self.num_projects = 100
19
20
  Dataset.BASE_URL = (
20
21
  os.getenv("RAGAAI_CATALYST_BASE_URL")
21
22
  if os.getenv("RAGAAI_CATALYST_BASE_URL")
22
- else "https://llm-platform.prod5.ragaai.ai/api"
23
+ else "https://catalyst.raga.ai/api"
23
24
  )
25
+ headers = {
26
+ "Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
27
+ }
28
+ try:
29
+ response = requests.get(
30
+ f"{Dataset.BASE_URL}/v2/llm/projects?size={self.num_projects}",
31
+ headers=headers,
32
+ timeout=self.TIMEOUT,
33
+ )
34
+ response.raise_for_status()
35
+ logger.debug("Projects list retrieved successfully")
36
+
37
+ project_list = [
38
+ project["name"] for project in response.json()["data"]["content"]
39
+ ]
40
+
41
+ if project_name not in project_list:
42
+ raise ValueError("Project not found. Please enter a valid project name")
43
+
44
+ self.project_id = [
45
+ project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
46
+ ][0]
47
+
48
+ except requests.exceptions.RequestException as e:
49
+ logger.error(f"Failed to retrieve projects list: {e}")
50
+ raise
24
51
 
25
52
  def list_datasets(self):
26
53
  """
@@ -35,34 +62,44 @@ class Dataset:
35
62
 
36
63
  def make_request():
37
64
  headers = {
38
- "accept": "application/json, text/plain, */*",
39
- "authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
40
- "X-Project-Name": self.project_name,
41
- }
42
- params = {
43
- "projectName": self.project_name,
44
- }
45
- response = requests.get(
46
- f"{Dataset.BASE_URL}/v1/llm/sub-datasets",
47
- headers=headers,
48
- params=params,
49
- timeout=Dataset.TIMEOUT,
50
- )
51
- return response
52
-
53
- response = make_request()
54
- response_checker(response, "Dataset.list_datasets")
55
- if response.status_code == 401:
56
- get_token() # Fetch a new token and set it in the environment
57
- response = make_request() # Retry the request
58
- if response.status_code != 200:
59
- return {
60
- "status_code": response.status_code,
61
- "message": response.json(),
65
+ 'Content-Type': 'application/json',
66
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
67
+ "X-Project-Id": str(self.project_id),
62
68
  }
63
- datasets = response.json()["data"]["content"]
64
- sub_datasets = [dataset["name"] for dataset in datasets]
65
- return sub_datasets
69
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
70
+ try:
71
+ response = requests.post(
72
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
73
+ headers=headers,
74
+ json=json_data,
75
+ timeout=Dataset.TIMEOUT,
76
+ )
77
+ response.raise_for_status()
78
+ return response
79
+ except requests.exceptions.RequestException as e:
80
+ logger.error(f"Failed to list datasets: {e}")
81
+ raise
82
+
83
+ try:
84
+ response = make_request()
85
+ response_checker(response, "Dataset.list_datasets")
86
+ if response.status_code == 401:
87
+ get_token() # Fetch a new token and set it in the environment
88
+ response = make_request() # Retry the request
89
+ if response.status_code != 200:
90
+ return {
91
+ "status_code": response.status_code,
92
+ "message": response.json(),
93
+ }
94
+ datasets = response.json()["data"]["content"]
95
+ dataset_list = [dataset["name"] for dataset in datasets]
96
+ return dataset_list
97
+ except Exception as e:
98
+ logger.error(f"Error in list_datasets: {e}")
99
+ raise
100
+
101
+ def get_schema_mapping(self):
102
+ return ["traceid", "prompt", "context", "response", "expected_response", "expected_context", "timestamp", "metadata", "pipeline", "cost", "feedBack", "latency", "sanitized_response", "system_prompt", "traceUri"]
66
103
 
67
104
  def create_from_trace(self, dataset_name, filter_list):
68
105
  """
@@ -91,85 +128,88 @@ class Dataset:
91
128
  "subDatasetName": dataset_name,
92
129
  "filterList": filter_list,
93
130
  }
94
- response = requests.post(
95
- f"{Dataset.BASE_URL}/v1/llm/sub-dataset",
96
- headers=headers,
97
- json=json_data,
98
- timeout=Dataset.TIMEOUT,
99
- )
100
- return response
101
-
102
- response = request_trace_creation()
103
- response_checker(response, "Dataset.create_dataset")
104
- if response.status_code == 401:
105
- get_token() # Fetch a new token and set it in the environment
106
- response = request_trace_creation() # Retry the request
107
- if response.status_code != 200:
108
- return response.json()["message"]
109
- message = response.json()["message"]
110
- return f"{message} {dataset_name}"
111
-
131
+ try:
132
+ response = requests.post(
133
+ f"{Dataset.BASE_URL}/v1/llm/sub-dataset",
134
+ headers=headers,
135
+ json=json_data,
136
+ timeout=Dataset.TIMEOUT,
137
+ )
138
+ response.raise_for_status()
139
+ return response
140
+ except requests.exceptions.RequestException as e:
141
+ logger.error(f"Failed to create dataset from trace: {e}")
142
+ raise
112
143
 
144
+ try:
145
+ response = request_trace_creation()
146
+ response_checker(response, "Dataset.create_dataset")
147
+ if response.status_code == 401:
148
+ get_token() # Fetch a new token and set it in the environment
149
+ response = request_trace_creation() # Retry the request
150
+ if response.status_code != 200:
151
+ return response.json()["message"]
152
+ message = response.json()["message"]
153
+ return f"{message} {dataset_name}"
154
+ except Exception as e:
155
+ logger.error(f"Error in create_from_trace: {e}")
156
+ raise
113
157
 
114
- ###################### CSV Upload APIs ###################
158
+ ###################### CSV Upload APIs ###################
115
159
 
116
160
  def get_csv_schema(self):
117
161
  headers = {
118
162
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
119
163
  "X-Project-Name": self.project_name,
120
164
  }
121
- response = requests.get(
165
+ try:
166
+ response = requests.get(
122
167
  f"{Dataset.BASE_URL}/v1/llm/schema-elements",
123
168
  headers=headers,
124
169
  timeout=Dataset.TIMEOUT,
125
170
  )
126
-
127
- response_data = response.json()
128
- if not response_data['success']:
129
- raise ValueError('Unable to fetch Schema Elements for the CSV')
130
-
131
- # chema_elements = response['data']['schemaElements']
132
- return response_data
133
-
171
+ response.raise_for_status()
172
+ response_data = response.json()
173
+ if not response_data['success']:
174
+ raise ValueError('Unable to fetch Schema Elements for the CSV')
175
+ return response_data
176
+ except requests.exceptions.RequestException as e:
177
+ logger.error(f"Failed to get CSV schema: {e}")
178
+ raise
134
179
 
135
180
  def create_from_csv(self, csv_path, dataset_name, schema_mapping):
136
-
137
- ## check the validity of schema_mapping
138
- df = pd.read_csv(csv_path)
139
- keys = list(df.columns)
140
- values = self.get_csv_schema()['data']['schemaElements']
141
- print(type(values), values)
142
- for k in schema_mapping.keys():
143
- if k not in keys:
144
- raise ValueError(f'--{k}-- column is not present in csv column but present in schema_mapping. Plase provide the right schema_mapping.')
145
- for k in schema_mapping.values():
146
- if k not in values:
147
- raise ValueError(f'--{k}-- is not present in the schema_elements but present in schema_mapping. Plase provide the right schema_mapping.')
148
-
181
+ list_dataset = self.list_datasets()
182
+ if dataset_name in list_dataset:
183
+ raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
149
184
 
150
185
  #### get presigned URL
151
186
  def get_presignedUrl():
152
187
  headers = {
153
188
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
154
- "X-Project-Name": self.project_name,
189
+ "X-Project-Id": str(self.project_id),
155
190
  }
156
- response = requests.get(
157
- f"{Dataset.BASE_URL}/v1/llm/presignedUrl/test-url",
158
- headers=headers,
159
- timeout=Dataset.TIMEOUT,
160
- )
161
- return response.json()
162
-
163
- presignedUrl = get_presignedUrl()
164
- if presignedUrl['success']:
165
- url = presignedUrl['data']['presignedUrl']
166
- filename = presignedUrl['data']['fileName']
167
- print('-- PresignedUrl fetched Succussfuly --')
168
- print('filename: ', filename)
169
- else:
170
- raise ValueError('Unable to fetch presignedUrl')
171
-
191
+ try:
192
+ response = requests.get(
193
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
194
+ headers=headers,
195
+ timeout=Dataset.TIMEOUT,
196
+ )
197
+ response.raise_for_status()
198
+ return response.json()
199
+ except requests.exceptions.RequestException as e:
200
+ logger.error(f"Failed to get presigned URL: {e}")
201
+ raise
172
202
 
203
+ try:
204
+ presignedUrl = get_presignedUrl()
205
+ if presignedUrl['success']:
206
+ url = presignedUrl['data']['presignedUrl']
207
+ filename = presignedUrl['data']['fileName']
208
+ else:
209
+ raise ValueError('Unable to fetch presignedUrl')
210
+ except Exception as e:
211
+ logger.error(f"Error in get_presignedUrl: {e}")
212
+ raise
173
213
 
174
214
  #### put csv to presigned URL
175
215
  def put_csv_to_presignedUrl(url):
@@ -177,51 +217,71 @@ class Dataset:
177
217
  'Content-Type': 'text/csv',
178
218
  'x-ms-blob-type': 'BlockBlob',
179
219
  }
180
- with open(csv_path, 'rb') as file:
181
- response = requests.put(
182
- url,
183
- headers=headers,
184
- data=file,
185
- timeout=Dataset.TIMEOUT,
186
- )
187
- return response
188
-
189
-
190
-
191
- put_csv_response = put_csv_to_presignedUrl(url)
192
- if put_csv_response.status_code != 201:
193
- raise ValueError('Unable to put csv to the presignedUrl')
194
- else:
195
- print('-- csv put to presignedUrl Succussfuly --')
196
-
220
+ try:
221
+ with open(csv_path, 'rb') as file:
222
+ response = requests.put(
223
+ url,
224
+ headers=headers,
225
+ data=file,
226
+ timeout=Dataset.TIMEOUT,
227
+ )
228
+ response.raise_for_status()
229
+ return response
230
+ except requests.exceptions.RequestException as e:
231
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
232
+ raise
197
233
 
234
+ try:
235
+ put_csv_response = put_csv_to_presignedUrl(url)
236
+ if put_csv_response.status_code != 200:
237
+ raise ValueError('Unable to put csv to the presignedUrl')
238
+ except Exception as e:
239
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
240
+ raise
198
241
 
199
242
  ## Upload csv to elastic
200
243
  def upload_csv_to_elastic(data):
201
244
  header = {
245
+ 'Content-Type': 'application/json',
202
246
  'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
203
- 'X-Project-Name': self.project_name
247
+ "X-Project-Id": str(self.project_id)
204
248
  }
205
- response = requests.post(
206
- f"{Dataset.BASE_URL}/v1/llm/csv-dataset",
207
- headers=header,
208
- json=data,
209
- timeout=Dataset.TIMEOUT,
210
- )
249
+ try:
250
+ response = requests.post(
251
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
252
+ headers=header,
253
+ json=data,
254
+ timeout=Dataset.TIMEOUT,
255
+ )
256
+ if response.status_code==400:
257
+ raise ValueError(response.json()["message"])
258
+ response.raise_for_status()
259
+ return response.json()
260
+ except requests.exceptions.RequestException as e:
261
+ logger.error(f"Failed to upload CSV to elastic: {e}")
262
+ raise
211
263
 
212
- return response.json()
213
-
214
- data = {
215
- "datasetName": dataset_name,
216
- "fileName": filename,
217
- "schemaMapping": schema_mapping
218
- }
219
- print(data)
220
-
221
- upload_csv_response = upload_csv_to_elastic(data)
222
- print(type(upload_csv_response), upload_csv_response)
223
- if not upload_csv_response['success']:
224
- raise ValueError('Unable to upload csv')
225
- else:
226
- print(upload_csv_response['message'])
227
-
264
+ def generate_schema(mapping):
265
+ result = {}
266
+ for column, schema_element in mapping.items():
267
+ result[column] = {"columnType": schema_element}
268
+ return result
269
+
270
+ try:
271
+ schema_mapping = generate_schema(schema_mapping)
272
+ data = {
273
+ "projectId": str(self.project_id),
274
+ "datasetName": dataset_name,
275
+ "fileName": filename,
276
+ "schemaMapping": schema_mapping,
277
+ "opType": "insert",
278
+ "description": ""
279
+ }
280
+ upload_csv_response = upload_csv_to_elastic(data)
281
+ if not upload_csv_response['success']:
282
+ raise ValueError('Unable to upload csv')
283
+ else:
284
+ print(upload_csv_response['message'])
285
+ except Exception as e:
286
+ logger.error(f"Error in create_from_csv: {e}")
287
+ raise