ragaai-catalyst 2.1.6.4b1__py3-none-any.whl → 2.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,7 +69,7 @@ class Dataset:
69
69
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
70
70
  "X-Project-Id": str(self.project_id),
71
71
  }
72
- json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
72
+ json_data = {"size": 99999, "page": "0", "projectId": str(self.project_id), "search": ""}
73
73
  try:
74
74
  response = requests.post(
75
75
  f"{Dataset.BASE_URL}/v2/llm/dataset",
@@ -85,6 +85,9 @@ class BaseTracer:
85
85
  self.system_monitor = None
86
86
  self.gt = None
87
87
 
88
+ # For post processing of tracing file before uploading
89
+ self.post_processor = None
90
+
88
91
  # For upload tracking
89
92
  self.upload_task_id = None
90
93
 
@@ -142,6 +145,21 @@ class BaseTracer:
142
145
  except Exception as e:
143
146
  logger.warning(f"Sleep interrupted in network tracking: {str(e)}")
144
147
 
148
+ def register_post_processor(self, post_processor_func):
149
+ """
150
+ Register a post-processing function that will be called after trace generation.
151
+
152
+ Args:
153
+ post_processor_func (callable): A function that takes a trace JSON file path as input
154
+ and returns a processed trace JSON file path.
155
+ The function signature should be:
156
+ def post_processor_func(original_trace_json_path: os.PathLike) -> os.PathLike
157
+ """
158
+ if not callable(post_processor_func):
159
+ raise TypeError("post_processor_func must be a callable")
160
+ self.post_processor = post_processor_func
161
+ logger.debug("Post-processor function registered successfully in BaseTracer")
162
+
145
163
  def start(self):
146
164
  """Initialize a new trace"""
147
165
  self.tracking = True
@@ -301,12 +319,19 @@ class BaseTracer:
301
319
 
302
320
  logger.info("Traces saved successfully.")
303
321
  logger.debug(f"Trace saved to {filepath}")
322
+
323
+ # Apply post-processor if registered
324
+ if self.post_processor is not None:
325
+ try:
326
+ filepath = self.post_processor(filepath)
327
+ logger.debug(f"Post-processor applied successfully in BaseTracer, new path: {filepath}")
328
+ except Exception as e:
329
+ logger.error(f"Error in post-processing in BaseTracer: {e}")
304
330
 
305
331
  # Make sure uploader process is available
306
332
  ensure_uploader_running()
307
333
 
308
334
  logger.debug("Base URL used for uploading: {}".format(self.base_url))
309
-
310
335
  # Submit to background process for uploading using futures
311
336
  self.upload_task_id = submit_upload_task(
312
337
  filepath=filepath,
@@ -156,6 +156,12 @@ class AgenticTracing(
156
156
  self.current_component_id.set(None)
157
157
  self.user_interaction_tracer.component_id.set(None)
158
158
 
159
+ def register_post_processor(self, post_processor_func):
160
+ """
161
+ Pass through the post-processor registration to the BaseTracer
162
+ """
163
+ super().register_post_processor(post_processor_func)
164
+
159
165
  def start(self):
160
166
  """Start tracing"""
161
167
  self.is_active = True
@@ -257,180 +257,196 @@ def format_interactions(trace) -> dict:
257
257
  interactions = []
258
258
  interaction_id = 1
259
259
 
260
- if 'data' not in trace or not trace['data'][0]["spans"]:
261
- return {"workflow": []}
262
-
263
- for span in trace['data'][0]["spans"]:
264
- # Process agent spans
265
- if span['type'] == "agent":
266
- # Add agent_start interaction
267
- interactions.append(
268
- {
269
- "id": str(interaction_id),
270
- "span_id": span['id'],
271
- "interaction_type": "agent_call_start",
272
- "name": span['name'],
273
- "content": None,
274
- "timestamp": span['start_time'],
275
- "error": span['error'],
276
- }
277
- )
278
- interaction_id += 1
279
-
280
- # Process children of agent recursively
281
- if "children" in span['data']:
282
- for child in span['data']["children"]:
283
- interaction_id = process_child_interactions(
284
- child, interaction_id, interactions
285
- )
286
-
287
- # Add agent_end interaction
288
- interactions.append(
289
- {
290
- "id": str(interaction_id),
291
- "span_id": span['id'],
292
- "interaction_type": "agent_call_end",
293
- "name": span['name'],
294
- "content": span['data'].get("output"),
295
- "timestamp": span['end_time'],
296
- "error": span['error'],
297
- }
298
- )
299
- interaction_id += 1
300
-
301
- elif span['type'] == "tool":
302
- interactions.append(
303
- {
304
- "id": str(interaction_id),
305
- "span_id": span['id'],
306
- "interaction_type": "tool_call_start",
307
- "name": span['name'],
308
- "content": {
309
- "prompt": span['data'].get("input"),
310
- "response": span['data'].get("output"),
311
- },
312
- "timestamp": span['start_time'],
313
- "error": span['error'],
314
- }
315
- )
316
- interaction_id += 1
260
+ try:
261
+ if 'data' not in trace or not trace['data'][0].get("spans"):
262
+ return {"workflow": []}
263
+ except Exception as e:
264
+ print(f"Error in checking data or spans: {str(e)}")
265
+
266
+
267
+ for span in trace['data'][0].get("spans", []):
268
+ try:
269
+ # Process agent spans
270
+ if span.get('type') == "agent":
271
+ # Add agent_start interaction
272
+ interactions.append(
273
+ {
274
+ "id": str(interaction_id),
275
+ "span_id": span.get('id'),
276
+ "interaction_type": "agent_call_start",
277
+ "name": span.get('name'),
278
+ "content": None,
279
+ "timestamp": span.get('start_time'),
280
+ "error": span.get('error'),
281
+ }
282
+ )
283
+ interaction_id += 1
317
284
 
318
- interactions.append(
319
- {
320
- "id": str(interaction_id),
321
- "span_id": span['id'],
322
- "interaction_type": "tool_call_end",
323
- "name": span['name'],
324
- "content": {
325
- "prompt": span['data'].get("input"),
326
- "response": span['data'].get("output"),
327
- },
328
- "timestamp": span['end_time'],
329
- "error": span['error'],
330
- }
331
- )
332
- interaction_id += 1
285
+ # Process children of agent recursively
286
+ if "children" in span.get('data', {}):
287
+ for child in span['data'].get("children", []):
288
+ interaction_id = process_child_interactions(
289
+ child, interaction_id, interactions
290
+ )
291
+
292
+ # Add agent_end interaction
293
+ interactions.append(
294
+ {
295
+ "id": str(interaction_id),
296
+ "span_id": span.get('id'),
297
+ "interaction_type": "agent_call_end",
298
+ "name": span.get('name'),
299
+ "content": span.get('data', {}).get("output"),
300
+ "timestamp": span.get('end_time'),
301
+ "error": span.get('error'),
302
+ }
303
+ )
304
+ interaction_id += 1
333
305
 
334
- elif span['type'] == "llm":
335
- interactions.append(
336
- {
337
- "id": str(interaction_id),
338
- "span_id": span['id'],
339
- "interaction_type": "llm_call_start",
340
- "name": span['name'],
341
- "content": {
342
- "prompt": span['data'].get("input"),
343
- },
344
- "timestamp": span['start_time'],
345
- "error": span['error']
346
- }
347
- )
348
- interaction_id += 1
306
+ elif span.get('type') == "tool":
307
+ interactions.append(
308
+ {
309
+ "id": str(interaction_id),
310
+ "span_id": span.get('id'),
311
+ "interaction_type": "tool_call_start",
312
+ "name": span.get('name'),
313
+ "content": {
314
+ "prompt": span.get('data', {}).get("input"),
315
+ "response": span.get('data', {}).get("output"),
316
+ },
317
+ "timestamp": span.get('start_time'),
318
+ "error": span.get('error'),
319
+ }
320
+ )
321
+ interaction_id += 1
349
322
 
350
- interactions.append(
351
- {
352
- "id": str(interaction_id),
353
- "span_id": span['id'],
354
- "interaction_type": "llm_call_end",
355
- "name": span['name'],
356
- "content": {"response": span['data'].get("output")},
357
- "timestamp": span['end_time'],
358
- "error": span['error'],
359
- }
360
- )
361
- interaction_id += 1
323
+ interactions.append(
324
+ {
325
+ "id": str(interaction_id),
326
+ "span_id": span.get('id'),
327
+ "interaction_type": "tool_call_end",
328
+ "name": span.get('name'),
329
+ "content": {
330
+ "prompt": span.get('data', {}).get("input"),
331
+ "response": span.get('data', {}).get("output"),
332
+ },
333
+ "timestamp": span.get('end_time'),
334
+ "error": span.get('error'),
335
+ }
336
+ )
337
+ interaction_id += 1
362
338
 
363
- else:
364
- interactions.append(
365
- {
366
- "id": str(interaction_id),
367
- "span_id": span['id'],
368
- "interaction_type": f"{span['type']}_call_start",
369
- "name": span['name'],
370
- "content": span['data'],
371
- "timestamp": span['start_time'],
372
- "error": span['error'],
373
- }
374
- )
375
- interaction_id += 1
339
+ elif span.get('type') == "llm":
340
+ interactions.append(
341
+ {
342
+ "id": str(interaction_id),
343
+ "span_id": span.get('id'),
344
+ "interaction_type": "llm_call_start",
345
+ "name": span.get('name'),
346
+ "content": {
347
+ "prompt": span.get('data', {}).get("input"),
348
+ },
349
+ "timestamp": span.get('start_time'),
350
+ "error": span.get('error')
351
+ }
352
+ )
353
+ interaction_id += 1
376
354
 
377
- interactions.append(
378
- {
379
- "id": str(interaction_id),
380
- "span_id": span['id'],
381
- "interaction_type": f"{span['type']}_call_end",
382
- "name": span['name'],
383
- "content": span['data'],
384
- "timestamp": span['end_time'],
385
- "error": span['error'],
386
- }
387
- )
388
- interaction_id += 1
355
+ interactions.append(
356
+ {
357
+ "id": str(interaction_id),
358
+ "span_id": span.get('id'),
359
+ "interaction_type": "llm_call_end",
360
+ "name": span.get('name'),
361
+ "content": {"response": span.get('data', {}).get("output")},
362
+ "timestamp": span.get('end_time'),
363
+ "error": span.get('error'),
364
+ }
365
+ )
366
+ interaction_id += 1
389
367
 
390
- # Process interactions from span.data if they exist
391
- if 'interactions' in span:
392
- for span_interaction in span['interactions']:
393
- interaction = {}
394
- interaction["id"] = str(interaction_id)
395
- interaction["span_id"] = span['id']
396
- interaction["interaction_type"] = span_interaction['type']
397
- interaction["content"] = span_interaction['content']
398
- interaction["timestamp"] = span_interaction['timestamp']
399
- interaction["error"] = span['error']
400
- interactions.append(interaction)
368
+ else:
369
+ interactions.append(
370
+ {
371
+ "id": str(interaction_id),
372
+ "span_id": span.get('id'),
373
+ "interaction_type": f"{span.get('type')}_call_start",
374
+ "name": span.get('name'),
375
+ "content": span.get('data'),
376
+ "timestamp": span.get('start_time'),
377
+ "error": span.get('error'),
378
+ }
379
+ )
401
380
  interaction_id += 1
402
381
 
403
- if 'network_calls' in span:
404
- for span_network_call in span['network_calls']:
405
- network_call = {}
406
- network_call["id"] = str(interaction_id)
407
- network_call["span_id"] = span['id']
408
- network_call["interaction_type"] = "network_call"
409
- network_call["name"] = None
410
- network_call["content"] = {
411
- "request": {
412
- "url": span_network_call.get("url"),
413
- "method": span_network_call.get("method"),
414
- "headers": span_network_call.get("headers"),
415
- },
416
- "response": {
417
- "status_code": span_network_call.get("status_code"),
418
- "headers": span_network_call.get("response_headers"),
419
- "body": span_network_call.get("response_body"),
420
- },
421
- }
422
- network_call["timestamp"] = span_network_call.get("timestamp")
423
- network_call["error"] = span_network_call.get("error")
424
- interactions.append(network_call)
382
+ interactions.append(
383
+ {
384
+ "id": str(interaction_id),
385
+ "span_id": span.get('id'),
386
+ "interaction_type": f"{span.get('type')}_call_end",
387
+ "name": span.get('name'),
388
+ "content": span.get('data'),
389
+ "timestamp": span.get('end_time'),
390
+ "error": span.get('error'),
391
+ }
392
+ )
425
393
  interaction_id += 1
426
394
 
427
- # Sort interactions by timestamp
428
- sorted_interactions = sorted(
429
- interactions, key=lambda x: x["timestamp"] if x["timestamp"] else ""
430
- )
395
+ # Process interactions from span.data if they exist
396
+ if 'interactions' in span:
397
+ for span_interaction in span['interactions']:
398
+ interaction = {}
399
+ interaction["id"] = str(interaction_id)
400
+ interaction["span_id"] = span.get('id')
401
+ interaction["interaction_type"] = span_interaction.get('type')
402
+ interaction["content"] = span_interaction.get('content')
403
+ interaction["timestamp"] = span_interaction.get('timestamp')
404
+ interaction["error"] = span.get('error')
405
+ interactions.append(interaction)
406
+ interaction_id += 1
407
+
408
+ if 'network_calls' in span:
409
+ for span_network_call in span['network_calls']:
410
+ network_call = {}
411
+ network_call["id"] = str(interaction_id)
412
+ network_call["span_id"] = span.get('id')
413
+ network_call["interaction_type"] = "network_call"
414
+ network_call["name"] = None
415
+ network_call["content"] = {
416
+ "request": {
417
+ "url": span_network_call.get("url"),
418
+ "method": span_network_call.get("method"),
419
+ "headers": span_network_call.get("headers"),
420
+ },
421
+ "response": {
422
+ "status_code": span_network_call.get("status_code"),
423
+ "headers": span_network_call.get("response_headers"),
424
+ "body": span_network_call.get("response_body"),
425
+ },
426
+ }
427
+ network_call["timestamp"] = span_network_call.get("timestamp")
428
+ network_call["error"] = span_network_call.get("error")
429
+ interactions.append(network_call)
430
+ interaction_id += 1
431
+
432
+ except Exception as e:
433
+ logger.warning(f"Found issue processing span, skipping")
434
+ continue
435
+ try:
436
+ # Sort interactions by timestamp
437
+ sorted_interactions = sorted(
438
+ interactions, key=lambda x: x.get("timestamp") if x.get("timestamp") else ""
439
+ )
440
+ except Exception as e:
441
+ print(f"Error in sorting interactions: {str(e)}")
442
+
431
443
 
432
- # Reassign IDs to maintain sequential order after sorting
433
- for idx, interaction in enumerate(sorted_interactions, 1):
434
- interaction["id"] = str(idx)
444
+ try:
445
+ # Reassign IDs to maintain sequential order after sorting
446
+ for idx, interaction in enumerate(sorted_interactions, 1):
447
+ interaction["id"] = str(idx)
448
+ except Exception as e:
449
+ print(f"Error in reassigning IDs: {str(e)}")
450
+
435
451
 
436
452
  return {"workflow": sorted_interactions}
@@ -14,19 +14,23 @@ class DynamicTraceExporter(SpanExporter):
14
14
  certain properties to be updated dynamically during execution.
15
15
  """
16
16
 
17
- def __init__(self, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120):
17
+ def __init__(self, tracer_type, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120, post_processor = None, max_upload_workers = 30):
18
18
  """
19
19
  Initialize the DynamicTraceExporter.
20
20
 
21
21
  Args:
22
+ tracer_type: Type of tracer
22
23
  files_to_zip: List of files to zip
23
24
  project_name: Project name
24
25
  project_id: Project ID
25
26
  dataset_name: Dataset name
26
27
  user_details: User details
27
28
  base_url: Base URL for API
29
+ post_processor: Post processing function before uploading trace
30
+ max_upload_workers: Maximum number of upload workers
28
31
  """
29
32
  self._exporter = RAGATraceExporter(
33
+ tracer_type=tracer_type,
30
34
  files_to_zip=files_to_zip,
31
35
  project_name=project_name,
32
36
  project_id=project_id,
@@ -34,7 +38,9 @@ class DynamicTraceExporter(SpanExporter):
34
38
  user_details=user_details,
35
39
  base_url=base_url,
36
40
  custom_model_cost=custom_model_cost,
37
- timeout=timeout
41
+ timeout=timeout,
42
+ post_processor= post_processor,
43
+ max_upload_workers = max_upload_workers
38
44
  )
39
45
 
40
46
  # Store the initial values
@@ -45,6 +51,8 @@ class DynamicTraceExporter(SpanExporter):
45
51
  self._user_details = user_details
46
52
  self._base_url = base_url
47
53
  self._custom_model_cost = custom_model_cost
54
+ self._post_processor = post_processor
55
+ self._max_upload_workers = max_upload_workers
48
56
 
49
57
 
50
58
  def export(self, spans):
@@ -101,6 +109,8 @@ class DynamicTraceExporter(SpanExporter):
101
109
  self._exporter.user_details = self._user_details
102
110
  self._exporter.base_url = self._base_url
103
111
  self._exporter.custom_model_cost = self._custom_model_cost
112
+ self._exporter.post_processor = self._post_processor
113
+ self._exporter.max_upload_workers = self._max_upload_workers
104
114
 
105
115
  # Getter and setter methods for dynamic properties
106
116
 
@@ -159,3 +169,11 @@ class DynamicTraceExporter(SpanExporter):
159
169
  @custom_model_cost.setter
160
170
  def custom_model_cost(self, value):
161
171
  self._custom_model_cost = value
172
+
173
+ @property
174
+ def max_upload_workers(self):
175
+ return self._max_upload_workers
176
+
177
+ @max_upload_workers.setter
178
+ def max_upload_workers(self, value):
179
+ self._max_upload_workers = value