ragaai-catalyst 2.2.4b5__py3-none-any.whl → 2.2.5b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. ragaai_catalyst/__init__.py +0 -2
  2. ragaai_catalyst/dataset.py +59 -1
  3. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +5 -285
  4. ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +0 -2
  5. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +1 -1
  6. ragaai_catalyst/tracers/exporters/__init__.py +1 -2
  7. ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -1
  8. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +23 -1
  9. ragaai_catalyst/tracers/tracer.py +6 -186
  10. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/METADATA +1 -1
  11. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/RECORD +14 -45
  12. ragaai_catalyst/experiment.py +0 -486
  13. ragaai_catalyst/tracers/agentic_tracing/tests/FinancialAnalysisSystem.ipynb +0 -536
  14. ragaai_catalyst/tracers/agentic_tracing/tests/GameActivityEventPlanner.ipynb +0 -134
  15. ragaai_catalyst/tracers/agentic_tracing/tests/TravelPlanner.ipynb +0 -563
  16. ragaai_catalyst/tracers/agentic_tracing/tests/__init__.py +0 -0
  17. ragaai_catalyst/tracers/agentic_tracing/tests/ai_travel_agent.py +0 -197
  18. ragaai_catalyst/tracers/agentic_tracing/tests/unique_decorator_test.py +0 -172
  19. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +0 -687
  20. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +0 -1319
  21. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +0 -347
  22. ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +0 -1182
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +0 -288
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +0 -557
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +0 -129
  27. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +0 -74
  28. ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +0 -21
  29. ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +0 -32
  30. ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +0 -28
  31. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +0 -133
  32. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +0 -34
  33. ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -467
  34. ragaai_catalyst/tracers/langchain_callback.py +0 -821
  35. ragaai_catalyst/tracers/llamaindex_callback.py +0 -361
  36. ragaai_catalyst/tracers/llamaindex_instrumentation.py +0 -424
  37. ragaai_catalyst/tracers/upload_traces.py +0 -170
  38. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +0 -62
  39. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +0 -69
  40. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +0 -74
  41. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +0 -82
  42. ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +0 -403
  43. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/WHEEL +0 -0
  44. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/licenses/LICENSE +0 -0
  45. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/top_level.txt +0 -0
@@ -1,467 +0,0 @@
1
- import os
2
- import json
3
- import asyncio
4
- import aiohttp
5
- import logging
6
- from tqdm import tqdm
7
- import requests
8
- from ...ragaai_catalyst import RagaAICatalyst
9
- import shutil
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- get_token = RagaAICatalyst.get_token
14
-
15
-
16
- class RagaExporter:
17
- BASE_URL = None
18
- SCHEMA_MAPPING = {
19
- "trace_id": "traceId",
20
- "trace_uri": "traceUri",
21
- "prompt": "prompt",
22
- "response": "response",
23
- "context": "context",
24
- "llm_model": "pipeline",
25
- "recorded_on": "metadata",
26
- "embed_model": "pipeline",
27
- "log_source": "metadata",
28
- "vector_store": "pipeline",
29
- }
30
- SCHEMA_MAPPING_NEW = {
31
- "trace_id": {"columnType": "traceId"},
32
- "trace_uri": {"columnType": "traceUri"},
33
- "prompt": {"columnType": "prompt"},
34
- "response":{"columnType": "response"},
35
- "context": {"columnType": "context"},
36
- "llm_model": {"columnType":"pipeline"},
37
- "recorded_on": {"columnType": "metadata"},
38
- "embed_model": {"columnType":"pipeline"},
39
- "log_source": {"columnType": "metadata"},
40
- "vector_store":{"columnType":"pipeline"},
41
- "feedback": {"columnType":"feedBack"}
42
- }
43
- TIMEOUT = 10
44
-
45
- def __init__(self, project_name, dataset_name):
46
- """
47
- Initializes a new instance of the RagaExporter class.
48
-
49
- Args:
50
- project_name (str): The name of the project.
51
-
52
- Raises:
53
- ValueError: If the environment variables RAGAAI_CATALYST_ACCESS_KEY and RAGAAI_CATALYST_SECRET_KEY are not set.
54
- Exception: If the schema check fails or the schema creation fails.
55
- """
56
- self.project_name = project_name
57
- self.dataset_name = dataset_name
58
- RagaExporter.BASE_URL = RagaAICatalyst.BASE_URL
59
- self.access_key = os.getenv("RAGAAI_CATALYST_ACCESS_KEY")
60
- self.secret_key = os.getenv("RAGAAI_CATALYST_SECRET_KEY")
61
- self.max_urls = 20
62
- if not self.access_key or not self.secret_key:
63
- logger.error(
64
- "RAGAAI_CATALYST_ACCESS_KEY and RAGAAI_CATALYST_SECRET_KEY environment variables must be set"
65
- )
66
- if not os.getenv("RAGAAI_CATALYST_TOKEN"):
67
- get_token()
68
-
69
- create_status_code = self._create_schema()
70
- if create_status_code != 200:
71
- logger.error(
72
- "Failed to create schema. Please consider raising an issue."
73
- )
74
- # elif status_code != 200:
75
- # raise Exception("Failed to check schema. Please consider raising an issue.")
76
-
77
- def _check_schema(self):
78
- """
79
- Checks if the schema for the project exists.
80
-
81
- This function makes a GET request to the RagaExporter.BASE_URL endpoint to check if the schema for the project exists.
82
- It uses the project name to construct the URL.
83
-
84
- Returns:
85
- int: The status code of the response. If the response status code is 200, it means the schema exists.
86
- If the response status code is 401, it means the token is invalid and a new token is fetched and set in the environment.
87
- If the response status code is not 200, it means the schema does not exist.
88
-
89
- Raises:
90
- None
91
- """
92
-
93
- def make_request():
94
- headers = {
95
- "authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
96
- "X-Project-Name": self.project_name,
97
- }
98
- response = requests.get(
99
- f"{RagaExporter.BASE_URL}/v1/llm/master-dataset/schema/{self.project_name}",
100
- headers=headers,
101
- timeout=RagaExporter.TIMEOUT,
102
- )
103
- return response
104
-
105
-
106
- def compare_schemas(base_schema, project_schema):
107
-
108
- differences = []
109
- for key, base_value in base_schema.items():
110
- if key not in project_schema:
111
- differences.append(f"Key '{key}' is missing in new schema.")
112
- else:
113
- # Remove everything after '_' in the new schema value
114
- new_value = project_schema[key].split('_')[0]
115
- if base_value != new_value:
116
- differences.append(f"Value mismatch for key '{key}': base = '{base_value}', new = '{new_value}'.")
117
-
118
- if differences:
119
- return False, differences
120
- return True, []
121
-
122
-
123
- response = make_request()
124
- if response.status_code == 401:
125
- get_token() # Fetch a new token and set it in the environment
126
- response = make_request() # Retry the request
127
- if response.status_code != 200:
128
- return response.status_code
129
- if response.status_code == 200:
130
- pass
131
- # project_schema = response.json()["data"]
132
- # base_schema = RagaExporter.SCHEMA_MAPPING
133
- # is_same, _ = compare_schemas(base_schema, project_schema)
134
- # if not is_same:
135
- # raise Exception(f"Trace cannot be logged to this Project because of schema difference. Create a new project to log trace")
136
- # return response.status_code
137
- return response.status_code
138
-
139
- def _create_schema(self):
140
- """
141
- Creates a schema for the project by making a POST request to the RagaExporter.BASE_URL endpoint.
142
-
143
- This function makes a POST request to the RagaExporter.BASE_URL endpoint to create a schema for the project.
144
- It uses the project name and the schema mapping defined in RagaExporter.SCHEMA_MAPPING to construct the JSON data.
145
- The request includes the project name, schema mapping, and a trace folder URL set to None.
146
-
147
- Parameters:
148
- self (RagaExporter): The instance of the RagaExporter class.
149
-
150
- Returns:
151
- int: The status code of the response. If the response status code is 200, it means the schema was created successfully.
152
- If the response status code is 401, it means the token is invalid and a new token is fetched and set in the environment.
153
- If the response status code is not 200, it means the schema creation failed.
154
-
155
- Raises:
156
- None
157
- """
158
-
159
- def make_request():
160
- headers = {
161
- "Content-Type": "application/json",
162
- "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
163
- "X-Project-Name": self.project_name,
164
- }
165
- json_data = {
166
- "datasetName": self.dataset_name,
167
- "schemaMapping": RagaExporter.SCHEMA_MAPPING_NEW,
168
- "traceFolderUrl": None,
169
- }
170
- response = requests.post(
171
- f"{RagaExporter.BASE_URL}/v1/llm/dataset/logs",
172
- headers=headers,
173
- json=json_data,
174
- timeout=RagaExporter.TIMEOUT,
175
- )
176
-
177
- return response
178
-
179
- response = make_request()
180
-
181
- if response.status_code == 401:
182
- get_token() # Fetch a new token and set it in the environment
183
- response = make_request() # Retry the request
184
- if response.status_code != 200:
185
- return response.status_code
186
- return response.status_code
187
-
188
- async def response_checker_async(self, response, context=""):
189
- logger.debug(f"Function: {context} - Response: {response}")
190
- status_code = response.status
191
- return status_code
192
-
193
- async def get_presigned_url(self, session, num_files):
194
- """
195
- Asynchronously retrieves a presigned URL from the RagaExporter API.
196
-
197
- Args:
198
- session (aiohttp.ClientSession): The aiohttp session to use for the request.
199
- num_files (int): The number of files to be uploaded.
200
-
201
- Returns:
202
- dict: The JSON response containing the presigned URL.
203
-
204
- Raises:
205
- aiohttp.ClientError: If the request fails.
206
-
207
- """
208
-
209
- async def make_request():
210
-
211
- json_data = {
212
- "datasetName": self.dataset_name,
213
- "numFiles": num_files,
214
- }
215
- headers = {
216
- "Content-Type": "application/json",
217
- "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
218
- "X-Project-Name": self.project_name,
219
- }
220
- async with session.get(
221
- f"{RagaExporter.BASE_URL}/v1/llm/presigned-url",
222
- headers=headers,
223
- json=json_data,
224
- timeout=RagaExporter.TIMEOUT,
225
- ) as response:
226
-
227
- json_data = await response.json()
228
-
229
- return response, json_data
230
- response, json_data = await make_request()
231
- await self.response_checker_async(response, "RagaExporter.get_presigned_url")
232
- if response.status == 401:
233
- await get_token() # Fetch a new token and set it in the environment
234
- response, json_data = await make_request() # Retry the request
235
-
236
- if response.status != 200:
237
- return {"status": response.status, "message": "Failed to get presigned URL"}
238
-
239
- return json_data
240
-
241
- async def stream_trace(self, session, trace_uri):
242
- """
243
- Asynchronously streams a trace to the RagaExporter API.
244
-
245
- Args:
246
- session (aiohttp.ClientSession): The aiohttp session to use for the request.
247
- trace_uri (str): The URI of the trace to stream.
248
-
249
- Returns:
250
- int: The status code of the response.
251
-
252
- Raises:
253
- aiohttp.ClientError: If the request fails.
254
-
255
- """
256
-
257
- async def make_request():
258
-
259
- headers = {
260
- "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
261
- "Content-Type": "application/json",
262
- "X-Project-Name": self.project_name,
263
- }
264
-
265
- json_data = {
266
- "datasetName": self.dataset_name,
267
- "presignedUrl": trace_uri,
268
- }
269
-
270
- async with session.post(
271
- f"{RagaExporter.BASE_URL}/v1/llm/insert/trace",
272
- headers=headers,
273
- json=json_data,
274
- timeout=RagaExporter.TIMEOUT,
275
- ) as response:
276
-
277
- status = response.status
278
- return response, status
279
-
280
- response, status = await make_request()
281
- await self.response_checker_async(response, "RagaExporter.upload_file")
282
- if response.status == 401:
283
- await get_token() # Fetch a new token and set it in the environment
284
- response, status = await make_request() # Retry the request
285
-
286
- if response.status != 200:
287
- return response.status
288
-
289
- return response.status
290
-
291
- async def upload_file(self, session, url, file_path):
292
-
293
- """
294
- Asynchronously uploads a file using the given session, url, and file path.
295
- Supports both regular and Azure blob storage URLs.
296
-
297
- Args:
298
- self: The RagaExporter instance.
299
- session (aiohttp.ClientSession): The aiohttp session to use for the request.
300
- url (str): The URL to upload the file to.
301
- file_path (str): The path to the file to upload.
302
-
303
- Returns:
304
- int: The status code of the response.
305
- """
306
-
307
- async def make_request():
308
- headers = {
309
- "Content-Type": "application/json",
310
- }
311
-
312
- if "blob.core.windows.net" in url: # Azure
313
- headers["x-ms-blob-type"] = "BlockBlob"
314
- print(f"Uploading traces...")
315
- logger.debug(f"Uploading file:{file_path} with url {url}")
316
-
317
- with open(file_path) as f:
318
- data = f.read().replace("\n", "").replace("\r", "").encode()
319
-
320
- async with session.put(
321
- url, headers=headers, data=data, timeout=RagaExporter.TIMEOUT
322
- ) as response:
323
- status = response.status
324
- return response, status
325
-
326
- response, status = await make_request()
327
- await self.response_checker_async(response, "RagaExporter.upload_file")
328
-
329
- if response.status == 401:
330
- await get_token() # Fetch a new token and set it in the environment
331
- response, status = await make_request() # Retry the request
332
-
333
- if response.status != 200 or response.status != 201:
334
- return response.status
335
-
336
-
337
- return response.status
338
-
339
- async def check_and_upload_files(self, session, file_paths):
340
- """
341
- Checks if there are files to upload, gets presigned URLs, uploads files, and streams them if successful.
342
-
343
- Args:
344
- self: The object instance.
345
- session (aiohttp.ClientSession): The aiohttp session to use for the request.
346
- file_paths (list): List of file paths to upload.
347
-
348
- Returns:
349
- str: The status of the upload process.
350
- """ """
351
- Asynchronously uploads a file using the given session, url, and file path.
352
-
353
- Args:
354
- self: The RagaExporter instance.
355
- session (aiohttp.ClientSession): The aiohttp session to use for the request.
356
- url (str): The URL to upload the file to.
357
- file_path (str): The path to the file to upload.
358
-
359
- Returns:
360
- int: The status code of the response.
361
- """
362
- # Check if there are no files to upload
363
- if len(file_paths) == 0:
364
- print("No files to be uploaded.")
365
- return None
366
-
367
- # Ensure a required environment token is available; if not, attempt to obtain it.
368
- if os.getenv("RAGAAI_CATALYST_TOKEN") is None:
369
- await get_token()
370
- if os.getenv("RAGAAI_CATALYST_TOKEN") is None:
371
- print("Failed to obtain token.")
372
- return None
373
-
374
- # Initialize lists for URLs and tasks
375
- presigned_urls = []
376
- trace_folder_urls = []
377
- tasks_json = []
378
- tasks_stream = []
379
- # Determine the number of files to process
380
- num_files = len(file_paths)
381
-
382
- # If number of files exceeds the maximum allowed URLs, fetch URLs in batches
383
- if num_files > self.max_urls:
384
- for i in range(
385
- (num_files // self.max_urls) + 1
386
- ): # Correct integer division
387
- presigned_url_response = await self.get_presigned_url(
388
- session, self.max_urls
389
- )
390
- if presigned_url_response.get("success") == True:
391
- data = presigned_url_response.get("data", {})
392
- presigned_urls += data.get("presignedUrls", [])
393
- trace_folder_urls.append(data.get("traceFolderUrl", []))
394
- else:
395
- # Fetch URLs for all files if under the limit
396
- presigned_url_response = await self.get_presigned_url(session, num_files)
397
- if presigned_url_response.get("success") == True:
398
- data = presigned_url_response.get("data", {})
399
- presigned_urls += data.get("presignedUrls", [])
400
- trace_folder_urls.append(data.get("traceFolderUrl", []))
401
-
402
- # If URLs were successfully obtained, start the upload process
403
- if presigned_urls != []:
404
- for file_path, presigned_url in tqdm(
405
- zip(file_paths, presigned_urls), desc="Uploading traces"
406
- ):
407
- if not os.path.isfile(file_path):
408
- print(f"The file '{file_path}' does not exist.")
409
- continue
410
-
411
- # Upload each file and collect the future tasks
412
- upload_status = await self.upload_file(
413
- session, presigned_url, file_path
414
- )
415
- if upload_status == 200 or upload_status == 201:
416
- logger.debug(
417
- f"File '{os.path.basename(file_path)}' uploaded successfully."
418
- )
419
- stream_status = await self.stream_trace(
420
- session, trace_uri=presigned_url
421
- )
422
- if stream_status == 200 or stream_status == 201:
423
- logger.debug(
424
- f"File '{os.path.basename(file_path)}' streamed successfully."
425
- )
426
- shutil.move(
427
- file_path,
428
- os.path.join(
429
- os.path.dirname(file_path),
430
- "backup",
431
- os.path.basename(file_path).split(".")[0]
432
- + "_backup.json",
433
- ),
434
- )
435
- else:
436
- logger.error(
437
- f"Failed to stream the file '{os.path.basename(file_path)}'."
438
- )
439
- else:
440
- logger.error(
441
- f"Failed to upload the file '{os.path.basename(file_path)}'."
442
- )
443
-
444
- return "upload successful"
445
-
446
- else:
447
- # Log failure if no presigned URLs could be obtained
448
- print(f"Failed to get presigned URLs.")
449
- return None
450
-
451
- async def tracer_stopsession(self, file_names):
452
- """
453
- Asynchronously stops the tracing session, checks for RAGAAI_CATALYST_TOKEN, and uploads files if the token is present.
454
-
455
- Parameters:
456
- self: The current instance of the class.
457
- file_names: A list of file names to be uploaded.
458
-
459
- Returns:
460
- None
461
- """
462
- async with aiohttp.ClientSession() as session:
463
- if os.getenv("RAGAAI_CATALYST_TOKEN"):
464
- print("Token obtained successfully.")
465
- await self.check_and_upload_files(session, file_paths=file_names)
466
- else:
467
- print("Failed to obtain token.")