ragaai-catalyst 2.1.5.1b2__py3-none-any.whl → 2.1.6b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import ast
3
3
  import csv
4
4
  import json
5
5
  import random
6
- import PyPDF2
6
+ import pypdf
7
7
  import markdown
8
8
  import pandas as pd
9
9
  from tqdm import tqdm
@@ -11,7 +11,6 @@ from tqdm import tqdm
11
11
  import openai
12
12
  import tiktoken
13
13
  import litellm
14
- import google.generativeai as genai
15
14
  from groq import Groq
16
15
  from litellm import completion
17
16
 
@@ -105,6 +104,9 @@ class SyntheticDataGeneration:
105
104
  raise Exception(f"{e}")
106
105
 
107
106
  else:
107
+ if "'utf-8' codec can't encode characters" in str(e):
108
+ print('Encountered non utf charactes, retrying with processed text')
109
+ text = str(text.encode('utf-8',errors='ignore'))
108
110
  print(f"Retrying...")
109
111
  continue
110
112
 
@@ -163,7 +165,9 @@ class SyntheticDataGeneration:
163
165
  elif provider == "gemini":
164
166
  if api_key is None and os.getenv("GEMINI_API_KEY") is None and api_base is None and internal_llm_proxy is None:
165
167
  raise ValueError("API key must be provided for Gemini.")
166
- genai.configure(api_key=api_key or os.getenv("GEMINI_API_KEY"))
168
+ if api_key:
169
+ os.environ["GEMINI_API_KEY"] = api_key
170
+ # genai.configure(api_key=api_key or os.getenv("GEMINI_API_KEY"))
167
171
 
168
172
  elif provider == "openai":
169
173
  if api_key is None and os.getenv("OPENAI_API_KEY") is None and internal_llm_proxy is None:
@@ -469,7 +473,7 @@ class SyntheticDataGeneration:
469
473
  """
470
474
  text = ""
471
475
  with open(file_path, 'rb') as file:
472
- pdf_reader = PyPDF2.PdfReader(file)
476
+ pdf_reader = pypdf.PdfReader(file)
473
477
  for page in pdf_reader.pages:
474
478
  text += page.extract_text()
475
479
  return text
@@ -323,7 +323,7 @@ def shutdown():
323
323
  global _executor
324
324
  if _executor:
325
325
  logger.info("Shutting down executor")
326
- _executor.shutdown(wait=False)
326
+ _executor.shutdown(wait=True)
327
327
  _executor = None
328
328
 
329
329
  # Register shutdown handler
@@ -63,3 +63,374 @@ def log_event(event_data, log_file_path):
63
63
  event_data = asdict(event_data)
64
64
  with open(log_file_path, "a") as f:
65
65
  f.write(json.dumps(event_data) + "\n")
66
+
67
+
68
+ def process_child_interactions(child, interaction_id, interactions):
69
+ """
70
+ Helper method to process child interactions recursively.
71
+
72
+ Args:
73
+ child (dict): The child span to process
74
+ interaction_id (int): Current interaction ID
75
+ interactions (list): List of interactions to append to
76
+
77
+ Returns:
78
+ int: Next interaction ID to use
79
+ """
80
+ child_type = child.get("type")
81
+
82
+ if child_type == "tool":
83
+ # Tool call start
84
+ interactions.append(
85
+ {
86
+ "id": str(interaction_id),
87
+ "span_id": child.get("id"),
88
+ "interaction_type": "tool_call_start",
89
+ "name": child.get("name"),
90
+ "content": {
91
+ "parameters": [
92
+ child.get("data", {}).get("input", {}).get("args"),
93
+ child.get("data", {}).get("input", {}).get("kwargs"),
94
+ ]
95
+ },
96
+ "timestamp": child.get("start_time"),
97
+ "error": child.get("error"),
98
+ }
99
+ )
100
+ interaction_id += 1
101
+
102
+ # Tool call end
103
+ interactions.append(
104
+ {
105
+ "id": str(interaction_id),
106
+ "span_id": child.get("id"),
107
+ "interaction_type": "tool_call_end",
108
+ "name": child.get("name"),
109
+ "content": {
110
+ "returns": child.get("data", {}).get("output"),
111
+ },
112
+ "timestamp": child.get("end_time"),
113
+ "error": child.get("error"),
114
+ }
115
+ )
116
+ interaction_id += 1
117
+
118
+ elif child_type == "llm":
119
+ interactions.append(
120
+ {
121
+ "id": str(interaction_id),
122
+ "span_id": child.get("id"),
123
+ "interaction_type": "llm_call_start",
124
+ "name": child.get("name"),
125
+ "content": {
126
+ "prompt": child.get("data", {}).get("input"),
127
+ },
128
+ "timestamp": child.get("start_time"),
129
+ "error": child.get("error"),
130
+ }
131
+ )
132
+ interaction_id += 1
133
+
134
+ interactions.append(
135
+ {
136
+ "id": str(interaction_id),
137
+ "span_id": child.get("id"),
138
+ "interaction_type": "llm_call_end",
139
+ "name": child.get("name"),
140
+ "content": {"response": child.get("data", {}).get("output")},
141
+ "timestamp": child.get("end_time"),
142
+ "error": child.get("error"),
143
+ }
144
+ )
145
+ interaction_id += 1
146
+
147
+ elif child_type == "agent":
148
+ interactions.append(
149
+ {
150
+ "id": str(interaction_id),
151
+ "span_id": child.get("id"),
152
+ "interaction_type": "agent_call_start",
153
+ "name": child.get("name"),
154
+ "content": None,
155
+ "timestamp": child.get("start_time"),
156
+ "error": child.get("error"),
157
+ }
158
+ )
159
+ interaction_id += 1
160
+
161
+ # Process nested children recursively
162
+ if "children" in child.get("data", {}):
163
+ for nested_child in child["data"]["children"]:
164
+ interaction_id = process_child_interactions(
165
+ nested_child, interaction_id, interactions
166
+ )
167
+
168
+ interactions.append(
169
+ {
170
+ "id": str(interaction_id),
171
+ "span_id": child.get("id"),
172
+ "interaction_type": "agent_call_end",
173
+ "name": child.get("name"),
174
+ "content": child.get("data", {}).get("output"),
175
+ "timestamp": child.get("end_time"),
176
+ "error": child.get("error"),
177
+ }
178
+ )
179
+ interaction_id += 1
180
+
181
+ else:
182
+ interactions.append(
183
+ {
184
+ "id": str(interaction_id),
185
+ "span_id": child.get("id"),
186
+ "interaction_type": f"{child_type}_call_start",
187
+ "name": child.get("name"),
188
+ "content": child.get("data", {}),
189
+ "timestamp": child.get("start_time"),
190
+ "error": child.get("error"),
191
+ }
192
+ )
193
+ interaction_id += 1
194
+
195
+ interactions.append(
196
+ {
197
+ "id": str(interaction_id),
198
+ "span_id": child.get("id"),
199
+ "interaction_type": f"{child_type}_call_end",
200
+ "name": child.get("name"),
201
+ "content": child.get("data", {}),
202
+ "timestamp": child.get("end_time"),
203
+ "error": child.get("error"),
204
+ }
205
+ )
206
+ interaction_id += 1
207
+
208
+ # Process additional interactions and network calls
209
+ if "interactions" in child:
210
+ for interaction in child["interactions"]:
211
+ interaction["id"] = str(interaction_id)
212
+ interaction["span_id"] = child.get("id")
213
+ interaction["error"] = None
214
+ interactions.append(interaction)
215
+ interaction_id += 1
216
+
217
+ if "network_calls" in child:
218
+ for child_network_call in child["network_calls"]:
219
+ network_call = {}
220
+ network_call["id"] = str(interaction_id)
221
+ network_call["span_id"] = child.get("id")
222
+ network_call["interaction_type"] = "network_call"
223
+ network_call["name"] = None
224
+ network_call["content"] = {
225
+ "request": {
226
+ "url": child_network_call.get("url"),
227
+ "method": child_network_call.get("method"),
228
+ "headers": child_network_call.get("headers"),
229
+ },
230
+ "response": {
231
+ "status_code": child_network_call.get("status_code"),
232
+ "headers": child_network_call.get("response_headers"),
233
+ "body": child_network_call.get("response_body"),
234
+ },
235
+ }
236
+ network_call["timestamp"] = child_network_call.get("start_time")
237
+ network_call["error"] = child_network_call.get("error")
238
+ interactions.append(network_call)
239
+ interaction_id += 1
240
+
241
+ return interaction_id
242
+
243
+
244
+ def format_interactions(trace) -> dict:
245
+ """
246
+ Format interactions from trace data into a standardized format.
247
+ Returns a dictionary containing formatted interactions based on trace data.
248
+
249
+ The function processes spans from self.trace and formats them into interactions
250
+ of various types including: agent_start, agent_end, input, output, tool_call_start,
251
+ tool_call_end, llm_call, file_read, file_write, network_call.
252
+
253
+ Returns:
254
+ dict: A dictionary with "workflow" key containing a list of interactions
255
+ sorted by timestamp.
256
+ """
257
+ interactions = []
258
+ interaction_id = 1
259
+
260
+ if 'data' not in trace or not trace['data'][0]["spans"]:
261
+ return {"workflow": []}
262
+
263
+ for span in trace['data'][0]["spans"]:
264
+ # Process agent spans
265
+ if span['type'] == "agent":
266
+ # Add agent_start interaction
267
+ interactions.append(
268
+ {
269
+ "id": str(interaction_id),
270
+ "span_id": span['id'],
271
+ "interaction_type": "agent_call_start",
272
+ "name": span['name'],
273
+ "content": None,
274
+ "timestamp": span['start_time'],
275
+ "error": span['error'],
276
+ }
277
+ )
278
+ interaction_id += 1
279
+
280
+ # Process children of agent recursively
281
+ if "children" in span['data']:
282
+ for child in span['data']["children"]:
283
+ interaction_id = process_child_interactions(
284
+ child, interaction_id, interactions
285
+ )
286
+
287
+ # Add agent_end interaction
288
+ interactions.append(
289
+ {
290
+ "id": str(interaction_id),
291
+ "span_id": span['id'],
292
+ "interaction_type": "agent_call_end",
293
+ "name": span['name'],
294
+ "content": span['data'].get("output"),
295
+ "timestamp": span['end_time'],
296
+ "error": span['error'],
297
+ }
298
+ )
299
+ interaction_id += 1
300
+
301
+ elif span['type'] == "tool":
302
+ interactions.append(
303
+ {
304
+ "id": str(interaction_id),
305
+ "span_id": span['id'],
306
+ "interaction_type": "tool_call_start",
307
+ "name": span['name'],
308
+ "content": {
309
+ "prompt": span['data'].get("input"),
310
+ "response": span['data'].get("output"),
311
+ },
312
+ "timestamp": span['start_time'],
313
+ "error": span['error'],
314
+ }
315
+ )
316
+ interaction_id += 1
317
+
318
+ interactions.append(
319
+ {
320
+ "id": str(interaction_id),
321
+ "span_id": span['id'],
322
+ "interaction_type": "tool_call_end",
323
+ "name": span['name'],
324
+ "content": {
325
+ "prompt": span['data'].get("input"),
326
+ "response": span['data'].get("output"),
327
+ },
328
+ "timestamp": span['end_time'],
329
+ "error": span['error'],
330
+ }
331
+ )
332
+ interaction_id += 1
333
+
334
+ elif span['type'] == "llm":
335
+ interactions.append(
336
+ {
337
+ "id": str(interaction_id),
338
+ "span_id": span['id'],
339
+ "interaction_type": "llm_call_start",
340
+ "name": span['name'],
341
+ "content": {
342
+ "prompt": span['data'].get("input"),
343
+ },
344
+ "timestamp": span['start_time'],
345
+ "error": span['error']
346
+ }
347
+ )
348
+ interaction_id += 1
349
+
350
+ interactions.append(
351
+ {
352
+ "id": str(interaction_id),
353
+ "span_id": span['id'],
354
+ "interaction_type": "llm_call_end",
355
+ "name": span['name'],
356
+ "content": {"response": span['data'].get("output")},
357
+ "timestamp": span['end_time'],
358
+ "error": span['error'],
359
+ }
360
+ )
361
+ interaction_id += 1
362
+
363
+ else:
364
+ interactions.append(
365
+ {
366
+ "id": str(interaction_id),
367
+ "span_id": span['id'],
368
+ "interaction_type": f"{span['type']}_call_start",
369
+ "name": span['name'],
370
+ "content": span['data'],
371
+ "timestamp": span['start_time'],
372
+ "error": span['error'],
373
+ }
374
+ )
375
+ interaction_id += 1
376
+
377
+ interactions.append(
378
+ {
379
+ "id": str(interaction_id),
380
+ "span_id": span['id'],
381
+ "interaction_type": f"{span['type']}_call_end",
382
+ "name": span['name'],
383
+ "content": span['data'],
384
+ "timestamp": span['end_time'],
385
+ "error": span['error'],
386
+ }
387
+ )
388
+ interaction_id += 1
389
+
390
+ # Process interactions from span.data if they exist
391
+ if 'interactions' in span:
392
+ for span_interaction in span['interactions']:
393
+ interaction = {}
394
+ interaction["id"] = str(interaction_id)
395
+ interaction["span_id"] = span['id']
396
+ interaction["interaction_type"] = span_interaction['type']
397
+ interaction["content"] = span_interaction['content']
398
+ interaction["timestamp"] = span_interaction['timestamp']
399
+ interaction["error"] = span['error']
400
+ interactions.append(interaction)
401
+ interaction_id += 1
402
+
403
+ if 'network_calls' in span:
404
+ for span_network_call in span['network_calls']:
405
+ network_call = {}
406
+ network_call["id"] = str(interaction_id)
407
+ network_call["span_id"] = span['id']
408
+ network_call["interaction_type"] = "network_call"
409
+ network_call["name"] = None
410
+ network_call["content"] = {
411
+ "request": {
412
+ "url": span_network_call.get("url"),
413
+ "method": span_network_call.get("method"),
414
+ "headers": span_network_call.get("headers"),
415
+ },
416
+ "response": {
417
+ "status_code": span_network_call.get("status_code"),
418
+ "headers": span_network_call.get("response_headers"),
419
+ "body": span_network_call.get("response_body"),
420
+ },
421
+ }
422
+ network_call["timestamp"] = span_network_call.get("timestamp")
423
+ network_call["error"] = span_network_call.get("error")
424
+ interactions.append(network_call)
425
+ interaction_id += 1
426
+
427
+ # Sort interactions by timestamp
428
+ sorted_interactions = sorted(
429
+ interactions, key=lambda x: x["timestamp"] if x["timestamp"] else ""
430
+ )
431
+
432
+ # Reassign IDs to maintain sequential order after sorting
433
+ for idx, interaction in enumerate(sorted_interactions, 1):
434
+ interaction["id"] = str(idx)
435
+
436
+ return {"workflow": sorted_interactions}
@@ -3,13 +3,13 @@ import json
3
3
  import tempfile
4
4
  from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
5
5
  import logging
6
- from datetime import datetime
7
6
  from dataclasses import asdict
8
7
  from ragaai_catalyst.tracers.utils.trace_json_converter import convert_json_format
9
8
  from ragaai_catalyst.tracers.agentic_tracing.tracers.base import TracerJSONEncoder
10
9
  from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemMonitor
11
10
  from ragaai_catalyst.tracers.agentic_tracing.upload.trace_uploader import submit_upload_task
12
11
  from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
12
+ from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import format_interactions
13
13
 
14
14
 
15
15
  logger = logging.getLogger("RagaAICatalyst")
@@ -77,8 +77,9 @@ class RAGATraceExporter(SpanExporter):
77
77
 
78
78
  def prepare_trace(self, spans, trace_id):
79
79
  try:
80
- ragaai_trace = convert_json_format(spans, self.custom_model_cost)
81
- ragaai_trace["workflow"] = []
80
+ ragaai_trace = convert_json_format(spans, self.custom_model_cost)
81
+ interactions = format_interactions(ragaai_trace)
82
+ ragaai_trace["workflow"] = interactions['workflow']
82
83
 
83
84
  # Add source code hash
84
85
  hash_id, zip_path = zip_list_of_unique_files(
@@ -111,9 +112,7 @@ class RAGATraceExporter(SpanExporter):
111
112
  def upload_trace(self, ragaai_trace_details, trace_id):
112
113
  filepath = ragaai_trace_details['trace_file_path']
113
114
  hash_id = ragaai_trace_details['hash_id']
114
- zip_path = ragaai_trace_details['code_zip_path']
115
-
116
-
115
+ zip_path = ragaai_trace_details['code_zip_path']
117
116
 
118
117
  self.upload_task_id = submit_upload_task(
119
118
  filepath=filepath,
@@ -1,4 +1,3 @@
1
- from audioop import add
2
1
  import os
3
2
  import uuid
4
3
  import datetime
@@ -37,6 +36,9 @@ from ragaai_catalyst.tracers.exporters.ragaai_trace_exporter import RAGATraceExp
37
36
  from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
38
37
 
39
38
  logger = logging.getLogger(__name__)
39
+ logging_level = (
40
+ logger.setLevel(logging.DEBUG) if os.getenv("DEBUG") == "1" else logging.INFO
41
+ )
40
42
 
41
43
  class Tracer(AgenticTracing):
42
44
  NUM_PROJECTS = 99999
@@ -90,7 +92,7 @@ class Tracer(AgenticTracing):
90
92
 
91
93
  # take care of auto_instrumentation
92
94
  if isinstance(auto_instrumentation, bool):
93
- if tracer_type == "agentic/llamaindex":
95
+ if tracer_type.startswith("agentic/"):
94
96
  auto_instrumentation = {
95
97
  "llm": False,
96
98
  "tool": False,
@@ -184,30 +186,148 @@ class Tracer(AgenticTracing):
184
186
  elif tracer_type == "llamaindex":
185
187
  self._upload_task = None
186
188
  self.llamaindex_tracer = None
187
- elif tracer_type == "agentic/llamaindex":
188
- from opentelemetry.sdk import trace as trace_sdk
189
- from opentelemetry.sdk.trace.export import SimpleSpanProcessor
190
- from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
191
- from ragaai_catalyst.tracers.exporters.dynamic_trace_exporter import DynamicTraceExporter
192
-
193
- # Get the code_files
194
- self.file_tracker.trace_main_file()
195
- list_of_unique_files = self.file_tracker.get_unique_files()
196
-
197
- # Create a dynamic exporter that allows property updates
198
- self.dynamic_exporter = DynamicTraceExporter(
199
- files_to_zip=list_of_unique_files,
200
- project_name=self.project_name,
201
- project_id=self.project_id,
202
- dataset_name=self.dataset_name,
203
- user_details=self.user_details,
204
- base_url=self.base_url,
205
- custom_model_cost=self.model_custom_cost
206
- )
189
+ # Handle agentic tracers
190
+ elif tracer_type == "agentic" or tracer_type.startswith("agentic/"):
191
+
192
+ # Setup instrumentors based on tracer type
193
+ instrumentors = []
194
+
195
+ # Add LLM Instrumentors
196
+ if tracer_type in ['agentic/crewai']:
197
+ try:
198
+ from openinference.instrumentation.vertexai import VertexAIInstrumentor
199
+ instrumentors.append((VertexAIInstrumentor, []))
200
+ except (ImportError, ModuleNotFoundError):
201
+ logger.debug("VertexAI not available in environment")
202
+ try:
203
+ from openinference.instrumentation.anthropic import AnthropicInstrumentor
204
+ instrumentors.append((AnthropicInstrumentor, []))
205
+ except (ImportError, ModuleNotFoundError):
206
+ logger.debug("Anthropic not available in environment")
207
+ try:
208
+ from openinference.instrumentation.groq import GroqInstrumentor
209
+ instrumentors.append((GroqInstrumentor, []))
210
+ except (ImportError, ModuleNotFoundError):
211
+ logger.debug("Groq not available in environment")
212
+ try:
213
+ from openinference.instrumentation.litellm import LiteLLMInstrumentor
214
+ instrumentors.append((LiteLLMInstrumentor, []))
215
+ except (ImportError, ModuleNotFoundError):
216
+ logger.debug("LiteLLM not available in environment")
217
+ try:
218
+ from openinference.instrumentation.mistralai import MistralAIInstrumentor
219
+ instrumentors.append((MistralAIInstrumentor, []))
220
+ except (ImportError, ModuleNotFoundError):
221
+ logger.debug("MistralAI not available in environment")
222
+ try:
223
+ from openinference.instrumentation.openai import OpenAIInstrumentor
224
+ instrumentors.append((OpenAIInstrumentor, []))
225
+ except (ImportError, ModuleNotFoundError):
226
+ logger.debug("OpenAI not available in environment")
227
+ try:
228
+ from openinference.instrumentation.bedrock import BedrockInstrumentor
229
+ instrumentors.append((BedrockInstrumentor, []))
230
+ except (ImportError, ModuleNotFoundError):
231
+ logger.debug("Bedrock not available in environment")
232
+
233
+ # If tracer_type is just "agentic", try to instrument all available packages
234
+ if tracer_type == "agentic":
235
+ logger.info("Attempting to instrument all available agentic packages")
236
+
237
+ # Try to import and add all known instrumentors
238
+ try:
239
+ # LlamaIndex
240
+ try:
241
+ from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
242
+ instrumentors.append((LlamaIndexInstrumentor, []))
243
+ logger.info("Instrumenting LlamaIndex...")
244
+ except (ImportError, ModuleNotFoundError):
245
+ logger.debug("LlamaIndex not available in environment")
246
+
247
+ # LangChain
248
+ try:
249
+ from openinference.instrumentation.langchain import LangChainInstrumentor
250
+ instrumentors.append((LangChainInstrumentor, []))
251
+ logger.info("Instrumenting LangChain...")
252
+ except (ImportError, ModuleNotFoundError):
253
+ logger.debug("LangChain not available in environment")
254
+
255
+ # CrewAI
256
+ try:
257
+ from openinference.instrumentation.crewai import CrewAIInstrumentor
258
+ instrumentors.append((CrewAIInstrumentor, []))
259
+ logger.info("Instrumenting CrewAI...")
260
+ except (ImportError, ModuleNotFoundError):
261
+ logger.debug("CrewAI not available in environment")
262
+
263
+ # Haystack
264
+ try:
265
+ from openinference.instrumentation.haystack import HaystackInstrumentor
266
+ instrumentors.append((HaystackInstrumentor, []))
267
+ logger.info("Instrumenting Haystack...")
268
+ except (ImportError, ModuleNotFoundError):
269
+ logger.debug("Haystack not available in environment")
270
+
271
+ # AutoGen
272
+ try:
273
+ from openinference.instrumentation.autogen import AutogenInstrumentor
274
+ instrumentors.append((AutogenInstrumentor, []))
275
+ logger.info("Instrumenting AutoGen...")
276
+ except (ImportError, ModuleNotFoundError):
277
+ logger.debug("AutoGen not available in environment")
278
+
279
+ # Smolagents
280
+ try:
281
+ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
282
+ instrumentors.append((SmolagentsInstrumentor, []))
283
+ logger.info("Instrumenting Smolagents...")
284
+ except (ImportError, ModuleNotFoundError):
285
+ logger.debug("Smolagents not available in environment")
286
+
287
+ if not instrumentors:
288
+ logger.warning("No agentic packages found in environment to instrument")
289
+ self._upload_task = None
290
+ return
291
+
292
+ except Exception as e:
293
+ logger.error(f"Error during auto-instrumentation: {str(e)}")
294
+ self._upload_task = None
295
+ return
296
+
297
+ # Handle specific framework instrumentation
298
+ elif tracer_type == "agentic/llamaindex":
299
+ from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
300
+ instrumentors += [(LlamaIndexInstrumentor, [])]
301
+
302
+ elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph":
303
+ from openinference.instrumentation.langchain import LangChainInstrumentor
304
+ instrumentors += [(LangChainInstrumentor, [])]
305
+
306
+ elif tracer_type == "agentic/crewai":
307
+ from openinference.instrumentation.crewai import CrewAIInstrumentor
308
+ from openinference.instrumentation.langchain import LangChainInstrumentor
309
+ instrumentors += [(CrewAIInstrumentor, []), (LangChainInstrumentor, [])]
310
+
311
+ elif tracer_type == "agentic/haystack":
312
+ from openinference.instrumentation.haystack import HaystackInstrumentor
313
+ instrumentors += [(HaystackInstrumentor, [])]
207
314
 
208
- tracer_provider = trace_sdk.TracerProvider()
209
- tracer_provider.add_span_processor(SimpleSpanProcessor(self.dynamic_exporter))
210
- LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
315
+ elif tracer_type == "agentic/autogen":
316
+ from openinference.instrumentation.autogen import AutogenInstrumentor
317
+ instrumentors += [(AutogenInstrumentor, [])]
318
+
319
+ elif tracer_type == "agentic/smolagents":
320
+ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
321
+ instrumentors += [(SmolagentsInstrumentor, [])]
322
+
323
+ else:
324
+ # Unknown agentic tracer type
325
+ logger.warning(f"Unknown agentic tracer type: {tracer_type}")
326
+ self._upload_task = None
327
+ return
328
+
329
+ # Common setup for all agentic tracers
330
+ self._setup_agentic_tracer(instrumentors)
211
331
  else:
212
332
  self._upload_task = None
213
333
  # raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
@@ -576,12 +696,13 @@ class Tracer(AgenticTracing):
576
696
  - dataset_name: Dataset name
577
697
  - user_details: User details
578
698
  - base_url: Base URL for API
699
+ - custom_model_cost: Dictionary of custom model costs
579
700
 
580
701
  Raises:
581
- AttributeError: If the tracer_type is not 'agentic/llamaindex' or if the dynamic_exporter is not initialized.
702
+ AttributeError: If the tracer_type is not an agentic tracer or if the dynamic_exporter is not initialized.
582
703
  """
583
- if self.tracer_type != "agentic/llamaindex" or not hasattr(self, "dynamic_exporter"):
584
- raise AttributeError("Dynamic exporter is only available for 'agentic/llamaindex' tracer type")
704
+ if not self.tracer_type.startswith("agentic/") or not hasattr(self, "dynamic_exporter"):
705
+ raise AttributeError("This method is only available for agentic tracers with a dynamic exporter.")
585
706
 
586
707
  for key, value in kwargs.items():
587
708
  if hasattr(self.dynamic_exporter, key):
@@ -590,6 +711,40 @@ class Tracer(AgenticTracing):
590
711
  else:
591
712
  logger.warning(f"Dynamic exporter has no attribute '{key}'")
592
713
 
714
+ def _setup_agentic_tracer(self, instrumentors):
715
+ """
716
+ Common setup for all agentic tracers.
717
+
718
+ Args:
719
+ instrumentors (list): List of tuples (instrumentor_class, args) to be instrumented
720
+ """
721
+ from opentelemetry.sdk import trace as trace_sdk
722
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
723
+ from ragaai_catalyst.tracers.exporters.dynamic_trace_exporter import DynamicTraceExporter
724
+
725
+ # Get the code_files
726
+ self.file_tracker.trace_main_file()
727
+ list_of_unique_files = self.file_tracker.get_unique_files()
728
+
729
+ # Create a dynamic exporter that allows property updates
730
+ self.dynamic_exporter = DynamicTraceExporter(
731
+ files_to_zip=list_of_unique_files,
732
+ project_name=self.project_name,
733
+ project_id=self.project_id,
734
+ dataset_name=self.dataset_name,
735
+ user_details=self.user_details,
736
+ base_url=self.base_url,
737
+ custom_model_cost=self.model_custom_cost
738
+ )
739
+
740
+ # Set up tracer provider
741
+ tracer_provider = trace_sdk.TracerProvider()
742
+ tracer_provider.add_span_processor(SimpleSpanProcessor(self.dynamic_exporter))
743
+
744
+ # Instrument all specified instrumentors
745
+ for instrumentor_class, args in instrumentors:
746
+ instrumentor_class().instrument(tracer_provider=tracer_provider, *args)
747
+
593
748
  def update_file_list(self):
594
749
  """
595
750
  Update the file list in the dynamic exporter with the latest tracked files.
@@ -598,8 +753,8 @@ class Tracer(AgenticTracing):
598
753
  Raises:
599
754
  AttributeError: If the tracer_type is not 'agentic/llamaindex' or if the dynamic_exporter is not initialized.
600
755
  """
601
- if self.tracer_type != "agentic/llamaindex" or not hasattr(self, "dynamic_exporter"):
602
- raise AttributeError("Dynamic exporter is only available for 'agentic/llamaindex' tracer type")
756
+ if not self.tracer_type.startswith("agentic/") or not hasattr(self, "dynamic_exporter"):
757
+ raise AttributeError("This method is only available for agentic tracers with a dynamic exporter.")
603
758
 
604
759
  # Get the latest list of unique files
605
760
  list_of_unique_files = self.file_tracker.get_unique_files()
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import sys
3
3
  from datetime import datetime
4
- from typing import final
4
+ from typing import final, List, Dict, Any, Optional
5
5
  import pytz
6
6
  import uuid
7
7
  from ragaai_catalyst.tracers.agentic_tracing.utils.llm_utils import calculate_llm_cost, get_model_cost
@@ -35,14 +35,29 @@ def get_uuid(name):
35
35
  """Generate a random UUID (not based on name)."""
36
36
  return str(uuid.uuid5(uuid.NAMESPACE_DNS, name))
37
37
 
38
+ def get_ordered_family(parent_children_mapping: Dict[str, Any]) -> List[str]:
39
+ def ordering_function(parent_id: str, ordered_family: List[str]):
40
+ children = parent_children_mapping.get(parent_id, [])
41
+ parent_child_ids =[child['id'] for child in children if child['id'] in parent_children_mapping]
42
+ for child_id in parent_child_ids:
43
+ if child_id not in ordered_family:
44
+ ordered_family.append(child_id)
45
+ ordering_function(child_id, ordered_family)
46
+ ordered_family = [None]
47
+ ordering_function(None, ordered_family)
48
+ return reversed(ordered_family)
49
+
38
50
  def get_spans(input_trace, custom_model_cost):
39
- data=[]
51
+ span_map = {}
52
+ parent_children_mapping = {}
40
53
  span_type_mapping={"AGENT":"agent","LLM":"llm","TOOL":"tool"}
41
54
  span_name_occurrence = {}
42
55
  for span in input_trace:
43
56
  final_span = {}
44
57
  span_type=span_type_mapping.get(span["attributes"]["openinference.span.kind"],"custom")
45
- final_span["id"] = span["context"]["span_id"]
58
+ span_id = span["context"]["span_id"]
59
+ parent_id = span["parent_id"]
60
+ final_span["id"] = span_id
46
61
  if span["name"] not in span_name_occurrence:
47
62
  span_name_occurrence[span['name']]=0
48
63
  else:
@@ -53,7 +68,7 @@ def get_spans(input_trace, custom_model_cost):
53
68
  final_span["type"] = span_type
54
69
  final_span["start_time"] = convert_time_format(span['start_time'])
55
70
  final_span["end_time"] = convert_time_format(span['end_time'])
56
- final_span["parent_id"] = span["parent_id"]
71
+ final_span["parent_id"] = parent_id
57
72
  final_span["extra_info"] = None
58
73
  '''Handle Error if any'''
59
74
  if span["status"]["status_code"].lower() == "error":
@@ -82,6 +97,7 @@ def get_spans(input_trace, custom_model_cost):
82
97
  final_span["data"]["output"] = span["attributes"]["output.value"]
83
98
  else:
84
99
  final_span["data"]["output"] = ""
100
+ final_span["data"]['children'] = []
85
101
 
86
102
  elif span_type=="tool":
87
103
  available_fields = list(span['attributes'].keys())
@@ -189,7 +205,23 @@ def get_spans(input_trace, custom_model_cost):
189
205
  "total_tokens": final_span["info"]["tokens"]["total_tokens"]
190
206
  }
191
207
  final_span["info"]["cost"] = calculate_llm_cost(token_usage=token_usage, model_name=model_name, model_costs=model_costs, model_custom_cost=custom_model_cost)
192
- data.append(final_span)
208
+ span_map[span_id] = final_span
209
+ if parent_id not in parent_children_mapping:
210
+ parent_children_mapping[parent_id] = []
211
+ parent_children_mapping[parent_id].append(final_span)
212
+ ordered_family = get_ordered_family(parent_children_mapping)
213
+ data = []
214
+ for parent_id in ordered_family:
215
+ children = parent_children_mapping[parent_id]
216
+ if parent_id in span_map:
217
+ parent_type = span_map[parent_id]["type"]
218
+ if parent_type == 'agent':
219
+ span_map[parent_id]['data']["children"] = children
220
+ else:
221
+ grand_parent_id = span_map[parent_id]["parent_id"]
222
+ parent_children_mapping[grand_parent_id].extend(children)
223
+ else:
224
+ data = children
193
225
  return data
194
226
 
195
227
  def convert_json_format(input_trace, custom_model_cost):
@@ -1,29 +1,18 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ragaai_catalyst
3
- Version: 2.1.5.1b2
3
+ Version: 2.1.6b0
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>, Tushar Kumar <tushar.kumar@raga.ai>
6
6
  Requires-Python: <3.13,>=3.9
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: aiohttp>=3.10.2
10
- Requires-Dist: opentelemetry-api==1.25.0
11
- Requires-Dist: opentelemetry-sdk==1.25.0
12
- Requires-Dist: opentelemetry-exporter-otlp-proto-grpc==1.25.0
13
- Requires-Dist: opentelemetry-instrumentation==0.46b0
14
- Requires-Dist: opentelemetry-instrumentation-fastapi==0.46b0
15
- Requires-Dist: opentelemetry-instrumentation-asgi==0.46b0
16
- Requires-Dist: opentelemetry-semantic-conventions==0.46b0
17
- Requires-Dist: opentelemetry-util-http==0.46b0
18
- Requires-Dist: opentelemetry-instrumentation-langchain~=0.24.0
19
- Requires-Dist: opentelemetry-instrumentation-openai~=0.24.0
20
10
  Requires-Dist: langchain-core>=0.2.11
21
11
  Requires-Dist: langchain>=0.2.11
22
12
  Requires-Dist: openai>=1.57.0
23
13
  Requires-Dist: pandas
24
14
  Requires-Dist: groq>=0.11.0
25
- Requires-Dist: PyPDF2>=3.0.1
26
- Requires-Dist: google-generativeai>=0.8.2
15
+ Requires-Dist: pypdf>=5.3.1
27
16
  Requires-Dist: google-genai>=1.3.0
28
17
  Requires-Dist: Markdown>=3.7
29
18
  Requires-Dist: litellm==1.51.1
@@ -10,7 +10,7 @@ ragaai_catalyst/prompt_manager.py,sha256=W8ypramzOprrJ7-22d5vkBXIuIQ8v9XAzKDGxKs
10
10
  ragaai_catalyst/proxy_call.py,sha256=CHxldeceZUaLU-to_hs_Kf1z_b2vHMssLS_cOBedu78,5499
11
11
  ragaai_catalyst/ragaai_catalyst.py,sha256=1FaeK_VZpJLQ1ZqEWpMyI8J8M2MI0abLLLDFWY9W-4A,19580
12
12
  ragaai_catalyst/redteaming_old.py,sha256=W2d89Ok8W-C8g7TBM3fDIFLof3q9FuYSr0jcryH2XQo,7097
13
- ragaai_catalyst/synthetic_data_generation.py,sha256=oWxV36wewwrggxXgvOZtzf-M5IOYalP1h5XjF6IOyY8,37597
13
+ ragaai_catalyst/synthetic_data_generation.py,sha256=RsaT2sJ4MxvwYU0t4tOTm9lAcsJJEPR3Z_YhG-Lo39g,37880
14
14
  ragaai_catalyst/utils.py,sha256=TlhEFwLyRU690HvANbyoRycR3nQ67lxVUQoUOfTPYQ0,3772
15
15
  ragaai_catalyst/redteaming/__init__.py,sha256=TJdvZpaZGFsg9qKONdjTosSVLZGadYFpHG6KE0xapKU,155
16
16
  ragaai_catalyst/redteaming/evaluator.py,sha256=C50SAc3RsR7PZnz-VQ7wQfDpiVEb7T3W3KV4Lj0tWYE,4599
@@ -31,7 +31,7 @@ ragaai_catalyst/tracers/distributed.py,sha256=MwlBwIxCAng-OI-7Ove_rkE1mTLeuW4Jw-
31
31
  ragaai_catalyst/tracers/langchain_callback.py,sha256=CB75zzG3-DkYTELj0vI1MOHQTY0MuQJfoHIXz9Cl8S8,34568
32
32
  ragaai_catalyst/tracers/llamaindex_callback.py,sha256=ZY0BJrrlz-P9Mg2dX-ZkVKG3gSvzwqBtk7JL_05MiYA,14028
33
33
  ragaai_catalyst/tracers/llamaindex_instrumentation.py,sha256=Ys_jLkvVqo12bKgXDmkp4TxJu9HkBATrFE8cIcTYxWw,14329
34
- ragaai_catalyst/tracers/tracer.py,sha256=oaag7-VdUufR5LygnKcUgjTvlAEcxToVxNYkQCWEhkg,27827
34
+ ragaai_catalyst/tracers/tracer.py,sha256=NFrDbko4xw7QYEWUfi98LyKlxROUyAbDktf0y3DCARg,35996
35
35
  ragaai_catalyst/tracers/upload_traces.py,sha256=OKsc-Obf8bJvKBprt3dqj8GQQNkoX3kT_t8TBDi9YDQ,5670
36
36
  ragaai_catalyst/tracers/agentic_tracing/README.md,sha256=X4QwLb7-Jg7GQMIXj-SerZIgDETfw-7VgYlczOR8ZeQ,4508
37
37
  ragaai_catalyst/tracers/agentic_tracing/__init__.py,sha256=yf6SKvOPSpH-9LiKaoLKXwqj5sez8F_5wkOb91yp0oE,260
@@ -54,7 +54,7 @@ ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py,sha256=m8CxYkl
54
54
  ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py,sha256=xxrliKPfdfbIZRZqMnUewsaTD8_Hv0dbuoBivNZGD4U,21674
55
55
  ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py,sha256=bhSUhNQCuJXKjgJAXhjKEYjnHMpYN90FSZdR84fNIKU,4614
56
56
  ragaai_catalyst/tracers/agentic_tracing/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py,sha256=Buk0OXjdkku0tuuFzGeqKRtwSeIBe3LpA1oa14qS7v4,12380
57
+ ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py,sha256=UnGpcMpRbntUrYsIU11r-gMHtzNkDGSGCbepiL_XTFA,12379
58
58
  ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py,sha256=icycLgfA0734xxoM1rTMG_iIrI3iM94th8RQggJ7sSw,8541
59
59
  ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py,sha256=aw_eHhUYRbR_9IbIkNjYb7NOsmETD3k1p4a6gxaGI7Q,6462
60
60
  ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py,sha256=m1O8lKpxKwtHofXLW3fTHX5yfqDW5GxoveARlg5cTw4,2571
@@ -70,14 +70,14 @@ ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json,sha256=2tzGw_cKCT
70
70
  ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py,sha256=qmODERcFZhc8MX24boFCXkkh6sJ-vZngRHPvxhyWFeE,4347
71
71
  ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml,sha256=LvFDivDIE96Zasp-fgDEqUJ5GEQZUawQucR3aOcSUTY,926
72
72
  ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py,sha256=H8WNsk4v_5T6OUw4TFOzlDLjQhJwjh1nAMyMAoqMEi4,6946
73
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py,sha256=go7FVnofviATDph-j8sk2juv09CGSRt1Vq4U868Fhd8,2259
73
+ ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py,sha256=rssHolDvKxZ9V6-4VTFAqC65o6-CG924hA0CnG3smSc,15902
74
74
  ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py,sha256=G027toV-Km20JjKrc-Y_PilQ8ABEKrBvvzgLTnqVg7I,5819
75
75
  ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py,sha256=4TeCGsFF26249fV6dJHLTZDrRa93SG9oer4rudoF8Y4,19443
76
76
  ragaai_catalyst/tracers/exporters/__init__.py,sha256=wQbaqyeIjVZxYprHCKZ9BeiqxeXYBKjzEgP79LWNxCU,293
77
77
  ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py,sha256=w9U8UTxvTbGTDUoMtsgy2BsdpYp-APTKFdGV4o5JPaM,5051
78
78
  ragaai_catalyst/tracers/exporters/file_span_exporter.py,sha256=RgGteu-NVGprXKkynvyIO5yOjpbtA41R3W_NzCjnkwE,6445
79
79
  ragaai_catalyst/tracers/exporters/raga_exporter.py,sha256=6xvjWXyh8XPkHKSLLmAZUQSvwuyY17ov8pv2VdfI0qA,17875
80
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py,sha256=s8zIUMrUKhtGrg-32XZnlJPKXWSyHo_WiJ1DoTcHVRg,5170
80
+ ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py,sha256=HZG1UjcipgQOHkeqQHVGxenIab2mHqcVmWqtOXlMt6Q,5305
81
81
  ragaai_catalyst/tracers/instrumentators/__init__.py,sha256=FgnMQupoRTzmVsG9YKsLQera2Pfs-AluZv8CxwavoyQ,253
82
82
  ragaai_catalyst/tracers/instrumentators/langchain.py,sha256=yMN0qVF0pUVk6R5M1vJoUXezDo1ejs4klCFRlE8x4vE,574
83
83
  ragaai_catalyst/tracers/instrumentators/llamaindex.py,sha256=SMrRlR4xM7k9HK43hakE8rkrWHxMlmtmWD-AX6TeByc,416
@@ -88,10 +88,10 @@ ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py,sha256=8qLo7x4Zsn
88
88
  ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py,sha256=ZhPs0YhVtB82-Pq9o1BvCinKE_WPvVxPTEcZjlJbFYM,2371
89
89
  ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py,sha256=XS2_x2qneqEx9oAighLg-LRiueWcESLwIC2r7eJT-Ww,3117
90
90
  ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json,sha256=C3uwkibJ08C9sOX-54kulZYmJlIpZ-SQpfE6HNGrjbM,343502
91
- ragaai_catalyst/tracers/utils/trace_json_converter.py,sha256=qXSYKr4JMUpGQsB3mnr9_2qH6FqzUhCynNqlDp1IWTs,12440
91
+ ragaai_catalyst/tracers/utils/trace_json_converter.py,sha256=06oTKZHtKL9ylBybpYNU8AfT9xIVMekUBOhlUnRvzB8,13969
92
92
  ragaai_catalyst/tracers/utils/utils.py,sha256=ViygfJ7vZ7U0CTSA1lbxVloHp4NSlmfDzBRNCJuMhis,2374
93
- ragaai_catalyst-2.1.5.1b2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
94
- ragaai_catalyst-2.1.5.1b2.dist-info/METADATA,sha256=Nv0jgHG5lZLvef0tdbH7msv7Wb2nkPVEH1GgK_JH-xQ,22057
95
- ragaai_catalyst-2.1.5.1b2.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
96
- ragaai_catalyst-2.1.5.1b2.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
97
- ragaai_catalyst-2.1.5.1b2.dist-info/RECORD,,
93
+ ragaai_catalyst-2.1.6b0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
94
+ ragaai_catalyst-2.1.6b0.dist-info/METADATA,sha256=9jXmItgYw2N3g013dK16qpjvA8jGq_Q_jza3o0lqJi0,21468
95
+ ragaai_catalyst-2.1.6b0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
96
+ ragaai_catalyst-2.1.6b0.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
97
+ ragaai_catalyst-2.1.6b0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (76.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5