ragaai-catalyst 2.1.6.4b0__py3-none-any.whl → 2.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +26 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +6 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +180 -164
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +20 -2
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +169 -50
- ragaai_catalyst/tracers/tracer.py +128 -115
- ragaai_catalyst/tracers/upload_traces.py +3 -3
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +1 -1
- ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +243 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +1 -0
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/METADATA +1 -1
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/RECORD +16 -15
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/licenses/LICENSE +0 -0
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/top_level.txt +0 -0
ragaai_catalyst/dataset.py
CHANGED
@@ -69,7 +69,7 @@ class Dataset:
|
|
69
69
|
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
70
70
|
"X-Project-Id": str(self.project_id),
|
71
71
|
}
|
72
|
-
json_data = {"size":
|
72
|
+
json_data = {"size": 99999, "page": "0", "projectId": str(self.project_id), "search": ""}
|
73
73
|
try:
|
74
74
|
response = requests.post(
|
75
75
|
f"{Dataset.BASE_URL}/v2/llm/dataset",
|
@@ -85,6 +85,9 @@ class BaseTracer:
|
|
85
85
|
self.system_monitor = None
|
86
86
|
self.gt = None
|
87
87
|
|
88
|
+
# For post processing of tracing file before uploading
|
89
|
+
self.post_processor = None
|
90
|
+
|
88
91
|
# For upload tracking
|
89
92
|
self.upload_task_id = None
|
90
93
|
|
@@ -142,6 +145,21 @@ class BaseTracer:
|
|
142
145
|
except Exception as e:
|
143
146
|
logger.warning(f"Sleep interrupted in network tracking: {str(e)}")
|
144
147
|
|
148
|
+
def register_post_processor(self, post_processor_func):
|
149
|
+
"""
|
150
|
+
Register a post-processing function that will be called after trace generation.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
post_processor_func (callable): A function that takes a trace JSON file path as input
|
154
|
+
and returns a processed trace JSON file path.
|
155
|
+
The function signature should be:
|
156
|
+
def post_processor_func(original_trace_json_path: os.PathLike) -> os.PathLike
|
157
|
+
"""
|
158
|
+
if not callable(post_processor_func):
|
159
|
+
raise TypeError("post_processor_func must be a callable")
|
160
|
+
self.post_processor = post_processor_func
|
161
|
+
logger.debug("Post-processor function registered successfully in BaseTracer")
|
162
|
+
|
145
163
|
def start(self):
|
146
164
|
"""Initialize a new trace"""
|
147
165
|
self.tracking = True
|
@@ -301,12 +319,19 @@ class BaseTracer:
|
|
301
319
|
|
302
320
|
logger.info("Traces saved successfully.")
|
303
321
|
logger.debug(f"Trace saved to {filepath}")
|
322
|
+
|
323
|
+
# Apply post-processor if registered
|
324
|
+
if self.post_processor is not None:
|
325
|
+
try:
|
326
|
+
filepath = self.post_processor(filepath)
|
327
|
+
logger.debug(f"Post-processor applied successfully in BaseTracer, new path: {filepath}")
|
328
|
+
except Exception as e:
|
329
|
+
logger.error(f"Error in post-processing in BaseTracer: {e}")
|
304
330
|
|
305
331
|
# Make sure uploader process is available
|
306
332
|
ensure_uploader_running()
|
307
333
|
|
308
334
|
logger.debug("Base URL used for uploading: {}".format(self.base_url))
|
309
|
-
|
310
335
|
# Submit to background process for uploading using futures
|
311
336
|
self.upload_task_id = submit_upload_task(
|
312
337
|
filepath=filepath,
|
@@ -156,6 +156,12 @@ class AgenticTracing(
|
|
156
156
|
self.current_component_id.set(None)
|
157
157
|
self.user_interaction_tracer.component_id.set(None)
|
158
158
|
|
159
|
+
def register_post_processor(self, post_processor_func):
|
160
|
+
"""
|
161
|
+
Pass through the post-processor registration to the BaseTracer
|
162
|
+
"""
|
163
|
+
super().register_post_processor(post_processor_func)
|
164
|
+
|
159
165
|
def start(self):
|
160
166
|
"""Start tracing"""
|
161
167
|
self.is_active = True
|
@@ -257,180 +257,196 @@ def format_interactions(trace) -> dict:
|
|
257
257
|
interactions = []
|
258
258
|
interaction_id = 1
|
259
259
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
child, interaction_id, interactions
|
285
|
-
)
|
286
|
-
|
287
|
-
# Add agent_end interaction
|
288
|
-
interactions.append(
|
289
|
-
{
|
290
|
-
"id": str(interaction_id),
|
291
|
-
"span_id": span['id'],
|
292
|
-
"interaction_type": "agent_call_end",
|
293
|
-
"name": span['name'],
|
294
|
-
"content": span['data'].get("output"),
|
295
|
-
"timestamp": span['end_time'],
|
296
|
-
"error": span['error'],
|
297
|
-
}
|
298
|
-
)
|
299
|
-
interaction_id += 1
|
300
|
-
|
301
|
-
elif span['type'] == "tool":
|
302
|
-
interactions.append(
|
303
|
-
{
|
304
|
-
"id": str(interaction_id),
|
305
|
-
"span_id": span['id'],
|
306
|
-
"interaction_type": "tool_call_start",
|
307
|
-
"name": span['name'],
|
308
|
-
"content": {
|
309
|
-
"prompt": span['data'].get("input"),
|
310
|
-
"response": span['data'].get("output"),
|
311
|
-
},
|
312
|
-
"timestamp": span['start_time'],
|
313
|
-
"error": span['error'],
|
314
|
-
}
|
315
|
-
)
|
316
|
-
interaction_id += 1
|
260
|
+
try:
|
261
|
+
if 'data' not in trace or not trace['data'][0].get("spans"):
|
262
|
+
return {"workflow": []}
|
263
|
+
except Exception as e:
|
264
|
+
print(f"Error in checking data or spans: {str(e)}")
|
265
|
+
|
266
|
+
|
267
|
+
for span in trace['data'][0].get("spans", []):
|
268
|
+
try:
|
269
|
+
# Process agent spans
|
270
|
+
if span.get('type') == "agent":
|
271
|
+
# Add agent_start interaction
|
272
|
+
interactions.append(
|
273
|
+
{
|
274
|
+
"id": str(interaction_id),
|
275
|
+
"span_id": span.get('id'),
|
276
|
+
"interaction_type": "agent_call_start",
|
277
|
+
"name": span.get('name'),
|
278
|
+
"content": None,
|
279
|
+
"timestamp": span.get('start_time'),
|
280
|
+
"error": span.get('error'),
|
281
|
+
}
|
282
|
+
)
|
283
|
+
interaction_id += 1
|
317
284
|
|
318
|
-
|
319
|
-
{
|
320
|
-
"
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
285
|
+
# Process children of agent recursively
|
286
|
+
if "children" in span.get('data', {}):
|
287
|
+
for child in span['data'].get("children", []):
|
288
|
+
interaction_id = process_child_interactions(
|
289
|
+
child, interaction_id, interactions
|
290
|
+
)
|
291
|
+
|
292
|
+
# Add agent_end interaction
|
293
|
+
interactions.append(
|
294
|
+
{
|
295
|
+
"id": str(interaction_id),
|
296
|
+
"span_id": span.get('id'),
|
297
|
+
"interaction_type": "agent_call_end",
|
298
|
+
"name": span.get('name'),
|
299
|
+
"content": span.get('data', {}).get("output"),
|
300
|
+
"timestamp": span.get('end_time'),
|
301
|
+
"error": span.get('error'),
|
302
|
+
}
|
303
|
+
)
|
304
|
+
interaction_id += 1
|
333
305
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
306
|
+
elif span.get('type') == "tool":
|
307
|
+
interactions.append(
|
308
|
+
{
|
309
|
+
"id": str(interaction_id),
|
310
|
+
"span_id": span.get('id'),
|
311
|
+
"interaction_type": "tool_call_start",
|
312
|
+
"name": span.get('name'),
|
313
|
+
"content": {
|
314
|
+
"prompt": span.get('data', {}).get("input"),
|
315
|
+
"response": span.get('data', {}).get("output"),
|
316
|
+
},
|
317
|
+
"timestamp": span.get('start_time'),
|
318
|
+
"error": span.get('error'),
|
319
|
+
}
|
320
|
+
)
|
321
|
+
interaction_id += 1
|
349
322
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
323
|
+
interactions.append(
|
324
|
+
{
|
325
|
+
"id": str(interaction_id),
|
326
|
+
"span_id": span.get('id'),
|
327
|
+
"interaction_type": "tool_call_end",
|
328
|
+
"name": span.get('name'),
|
329
|
+
"content": {
|
330
|
+
"prompt": span.get('data', {}).get("input"),
|
331
|
+
"response": span.get('data', {}).get("output"),
|
332
|
+
},
|
333
|
+
"timestamp": span.get('end_time'),
|
334
|
+
"error": span.get('error'),
|
335
|
+
}
|
336
|
+
)
|
337
|
+
interaction_id += 1
|
362
338
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
339
|
+
elif span.get('type') == "llm":
|
340
|
+
interactions.append(
|
341
|
+
{
|
342
|
+
"id": str(interaction_id),
|
343
|
+
"span_id": span.get('id'),
|
344
|
+
"interaction_type": "llm_call_start",
|
345
|
+
"name": span.get('name'),
|
346
|
+
"content": {
|
347
|
+
"prompt": span.get('data', {}).get("input"),
|
348
|
+
},
|
349
|
+
"timestamp": span.get('start_time'),
|
350
|
+
"error": span.get('error')
|
351
|
+
}
|
352
|
+
)
|
353
|
+
interaction_id += 1
|
376
354
|
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
355
|
+
interactions.append(
|
356
|
+
{
|
357
|
+
"id": str(interaction_id),
|
358
|
+
"span_id": span.get('id'),
|
359
|
+
"interaction_type": "llm_call_end",
|
360
|
+
"name": span.get('name'),
|
361
|
+
"content": {"response": span.get('data', {}).get("output")},
|
362
|
+
"timestamp": span.get('end_time'),
|
363
|
+
"error": span.get('error'),
|
364
|
+
}
|
365
|
+
)
|
366
|
+
interaction_id += 1
|
389
367
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
368
|
+
else:
|
369
|
+
interactions.append(
|
370
|
+
{
|
371
|
+
"id": str(interaction_id),
|
372
|
+
"span_id": span.get('id'),
|
373
|
+
"interaction_type": f"{span.get('type')}_call_start",
|
374
|
+
"name": span.get('name'),
|
375
|
+
"content": span.get('data'),
|
376
|
+
"timestamp": span.get('start_time'),
|
377
|
+
"error": span.get('error'),
|
378
|
+
}
|
379
|
+
)
|
401
380
|
interaction_id += 1
|
402
381
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
"headers": span_network_call.get("headers"),
|
415
|
-
},
|
416
|
-
"response": {
|
417
|
-
"status_code": span_network_call.get("status_code"),
|
418
|
-
"headers": span_network_call.get("response_headers"),
|
419
|
-
"body": span_network_call.get("response_body"),
|
420
|
-
},
|
421
|
-
}
|
422
|
-
network_call["timestamp"] = span_network_call.get("timestamp")
|
423
|
-
network_call["error"] = span_network_call.get("error")
|
424
|
-
interactions.append(network_call)
|
382
|
+
interactions.append(
|
383
|
+
{
|
384
|
+
"id": str(interaction_id),
|
385
|
+
"span_id": span.get('id'),
|
386
|
+
"interaction_type": f"{span.get('type')}_call_end",
|
387
|
+
"name": span.get('name'),
|
388
|
+
"content": span.get('data'),
|
389
|
+
"timestamp": span.get('end_time'),
|
390
|
+
"error": span.get('error'),
|
391
|
+
}
|
392
|
+
)
|
425
393
|
interaction_id += 1
|
426
394
|
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
395
|
+
# Process interactions from span.data if they exist
|
396
|
+
if 'interactions' in span:
|
397
|
+
for span_interaction in span['interactions']:
|
398
|
+
interaction = {}
|
399
|
+
interaction["id"] = str(interaction_id)
|
400
|
+
interaction["span_id"] = span.get('id')
|
401
|
+
interaction["interaction_type"] = span_interaction.get('type')
|
402
|
+
interaction["content"] = span_interaction.get('content')
|
403
|
+
interaction["timestamp"] = span_interaction.get('timestamp')
|
404
|
+
interaction["error"] = span.get('error')
|
405
|
+
interactions.append(interaction)
|
406
|
+
interaction_id += 1
|
407
|
+
|
408
|
+
if 'network_calls' in span:
|
409
|
+
for span_network_call in span['network_calls']:
|
410
|
+
network_call = {}
|
411
|
+
network_call["id"] = str(interaction_id)
|
412
|
+
network_call["span_id"] = span.get('id')
|
413
|
+
network_call["interaction_type"] = "network_call"
|
414
|
+
network_call["name"] = None
|
415
|
+
network_call["content"] = {
|
416
|
+
"request": {
|
417
|
+
"url": span_network_call.get("url"),
|
418
|
+
"method": span_network_call.get("method"),
|
419
|
+
"headers": span_network_call.get("headers"),
|
420
|
+
},
|
421
|
+
"response": {
|
422
|
+
"status_code": span_network_call.get("status_code"),
|
423
|
+
"headers": span_network_call.get("response_headers"),
|
424
|
+
"body": span_network_call.get("response_body"),
|
425
|
+
},
|
426
|
+
}
|
427
|
+
network_call["timestamp"] = span_network_call.get("timestamp")
|
428
|
+
network_call["error"] = span_network_call.get("error")
|
429
|
+
interactions.append(network_call)
|
430
|
+
interaction_id += 1
|
431
|
+
|
432
|
+
except Exception as e:
|
433
|
+
logger.warning(f"Found issue processing span, skipping")
|
434
|
+
continue
|
435
|
+
try:
|
436
|
+
# Sort interactions by timestamp
|
437
|
+
sorted_interactions = sorted(
|
438
|
+
interactions, key=lambda x: x.get("timestamp") if x.get("timestamp") else ""
|
439
|
+
)
|
440
|
+
except Exception as e:
|
441
|
+
print(f"Error in sorting interactions: {str(e)}")
|
442
|
+
|
431
443
|
|
432
|
-
|
433
|
-
|
434
|
-
interaction
|
444
|
+
try:
|
445
|
+
# Reassign IDs to maintain sequential order after sorting
|
446
|
+
for idx, interaction in enumerate(sorted_interactions, 1):
|
447
|
+
interaction["id"] = str(idx)
|
448
|
+
except Exception as e:
|
449
|
+
print(f"Error in reassigning IDs: {str(e)}")
|
450
|
+
|
435
451
|
|
436
452
|
return {"workflow": sorted_interactions}
|
@@ -14,19 +14,23 @@ class DynamicTraceExporter(SpanExporter):
|
|
14
14
|
certain properties to be updated dynamically during execution.
|
15
15
|
"""
|
16
16
|
|
17
|
-
def __init__(self, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120):
|
17
|
+
def __init__(self, tracer_type, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120, post_processor = None, max_upload_workers = 30):
|
18
18
|
"""
|
19
19
|
Initialize the DynamicTraceExporter.
|
20
20
|
|
21
21
|
Args:
|
22
|
+
tracer_type: Type of tracer
|
22
23
|
files_to_zip: List of files to zip
|
23
24
|
project_name: Project name
|
24
25
|
project_id: Project ID
|
25
26
|
dataset_name: Dataset name
|
26
27
|
user_details: User details
|
27
28
|
base_url: Base URL for API
|
29
|
+
post_processor: Post processing function before uploading trace
|
30
|
+
max_upload_workers: Maximum number of upload workers
|
28
31
|
"""
|
29
32
|
self._exporter = RAGATraceExporter(
|
33
|
+
tracer_type=tracer_type,
|
30
34
|
files_to_zip=files_to_zip,
|
31
35
|
project_name=project_name,
|
32
36
|
project_id=project_id,
|
@@ -34,7 +38,9 @@ class DynamicTraceExporter(SpanExporter):
|
|
34
38
|
user_details=user_details,
|
35
39
|
base_url=base_url,
|
36
40
|
custom_model_cost=custom_model_cost,
|
37
|
-
timeout=timeout
|
41
|
+
timeout=timeout,
|
42
|
+
post_processor= post_processor,
|
43
|
+
max_upload_workers = max_upload_workers
|
38
44
|
)
|
39
45
|
|
40
46
|
# Store the initial values
|
@@ -45,6 +51,8 @@ class DynamicTraceExporter(SpanExporter):
|
|
45
51
|
self._user_details = user_details
|
46
52
|
self._base_url = base_url
|
47
53
|
self._custom_model_cost = custom_model_cost
|
54
|
+
self._post_processor = post_processor
|
55
|
+
self._max_upload_workers = max_upload_workers
|
48
56
|
|
49
57
|
|
50
58
|
def export(self, spans):
|
@@ -101,6 +109,8 @@ class DynamicTraceExporter(SpanExporter):
|
|
101
109
|
self._exporter.user_details = self._user_details
|
102
110
|
self._exporter.base_url = self._base_url
|
103
111
|
self._exporter.custom_model_cost = self._custom_model_cost
|
112
|
+
self._exporter.post_processor = self._post_processor
|
113
|
+
self._exporter.max_upload_workers = self._max_upload_workers
|
104
114
|
|
105
115
|
# Getter and setter methods for dynamic properties
|
106
116
|
|
@@ -159,3 +169,11 @@ class DynamicTraceExporter(SpanExporter):
|
|
159
169
|
@custom_model_cost.setter
|
160
170
|
def custom_model_cost(self, value):
|
161
171
|
self._custom_model_cost = value
|
172
|
+
|
173
|
+
@property
|
174
|
+
def max_upload_workers(self):
|
175
|
+
return self._max_upload_workers
|
176
|
+
|
177
|
+
@max_upload_workers.setter
|
178
|
+
def max_upload_workers(self, value):
|
179
|
+
self._max_upload_workers = value
|