ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ragaai_catalyst/__init__.py +23 -2
  2. ragaai_catalyst/dataset.py +462 -1
  3. ragaai_catalyst/evaluation.py +76 -7
  4. ragaai_catalyst/ragaai_catalyst.py +52 -10
  5. ragaai_catalyst/redteaming/__init__.py +7 -0
  6. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  7. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  8. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  9. ragaai_catalyst/redteaming/evaluator.py +125 -0
  10. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  11. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  12. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  13. ragaai_catalyst/redteaming/requirements.txt +4 -0
  14. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  15. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  16. ragaai_catalyst/redteaming/upload_result.py +38 -0
  17. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  18. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  19. ragaai_catalyst/redteaming_old.py +171 -0
  20. ragaai_catalyst/synthetic_data_generation.py +400 -22
  21. ragaai_catalyst/tracers/__init__.py +17 -1
  22. ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
  27. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
  28. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
  29. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
  30. ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
  31. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
  32. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
  33. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
  34. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
  35. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
  36. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
  37. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
  38. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
  39. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
  40. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
  41. ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  42. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  43. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  44. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
  45. ragaai_catalyst/tracers/distributed.py +300 -0
  46. ragaai_catalyst/tracers/exporters/__init__.py +3 -1
  47. ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
  48. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
  49. ragaai_catalyst/tracers/langchain_callback.py +809 -0
  50. ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
  51. ragaai_catalyst/tracers/tracer.py +301 -55
  52. ragaai_catalyst/tracers/upload_traces.py +24 -7
  53. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
  54. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
  55. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
  56. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
  57. ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
  58. ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
  59. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
  60. ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
  61. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
  62. ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
  63. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
  64. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,23 @@
1
+ from audioop import add
1
2
  import os
3
+ import uuid
2
4
  import datetime
3
5
  import logging
4
6
  import asyncio
5
7
  import aiohttp
6
8
  import requests
9
+ from litellm import model_cost
10
+
7
11
  from contextlib import contextmanager
8
12
  from concurrent.futures import ThreadPoolExecutor
9
-
13
+ from ragaai_catalyst.tracers.langchain_callback import LangchainTracer
14
+ from ragaai_catalyst.tracers.utils.convert_langchain_callbacks_output import convert_langchain_callbacks_output
15
+
16
+ from ragaai_catalyst.tracers.utils.langchain_tracer_extraction_logic import langchain_tracer_extraction
17
+ from ragaai_catalyst.tracers.upload_traces import UploadTraces
18
+ import tempfile
19
+ import json
20
+ import numpy as np
10
21
  from opentelemetry.sdk import trace as trace_sdk
11
22
  from opentelemetry.sdk.trace.export import SimpleSpanProcessor
12
23
  from ragaai_catalyst.tracers.exporters.file_span_exporter import FileSpanExporter
@@ -18,15 +29,17 @@ from ragaai_catalyst.tracers.instrumentators import (
18
29
  )
19
30
  from ragaai_catalyst.tracers.utils import get_unique_key
20
31
  # from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
32
+ from ragaai_catalyst.tracers.llamaindex_instrumentation import LlamaIndexInstrumentationTracer
21
33
  from ragaai_catalyst import RagaAICatalyst
22
- from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing, TrackName
34
+ from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing
23
35
  from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
24
- from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import load_model_costs, update_model_costs_from_github
36
+ from ragaai_catalyst.tracers.exporters.ragaai_trace_exporter import RAGATraceExporter
37
+ from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
25
38
 
26
39
  logger = logging.getLogger(__name__)
27
40
 
28
41
  class Tracer(AgenticTracing):
29
- NUM_PROJECTS = 100
42
+ NUM_PROJECTS = 99999
30
43
  TIMEOUT = 10
31
44
  def __init__(
32
45
  self,
@@ -77,7 +90,17 @@ class Tracer(AgenticTracing):
77
90
 
78
91
  # take care of auto_instrumentation
79
92
  if isinstance(auto_instrumentation, bool):
80
- if auto_instrumentation:
93
+ if tracer_type == "agentic/llamaindex":
94
+ auto_instrumentation = {
95
+ "llm": False,
96
+ "tool": False,
97
+ "agent": False,
98
+ "user_interaction": False,
99
+ "file_io": False,
100
+ "network": False,
101
+ "custom": False
102
+ }
103
+ elif auto_instrumentation:
81
104
  auto_instrumentation = {
82
105
  "llm": True,
83
106
  "tool": True,
@@ -98,11 +121,11 @@ class Tracer(AgenticTracing):
98
121
  "custom": False
99
122
  }
100
123
  elif isinstance(auto_instrumentation, dict):
101
- auto_instrumentation = {k: v for k, v in auto_instrumentation.items() if v}
124
+ auto_instrumentation = {k: v for k, v in auto_instrumentation.items()}
102
125
  for key in ["llm", "tool", "agent", "user_interaction", "file_io", "network", "custom"]:
103
126
  if key not in auto_instrumentation:
104
- auto_instrumentation[key] = False
105
-
127
+ auto_instrumentation[key] = True
128
+ self.model_custom_cost = {}
106
129
  super().__init__(user_detail=user_detail, auto_instrumentation=auto_instrumentation)
107
130
 
108
131
  self.project_name = project_name
@@ -116,12 +139,11 @@ class Tracer(AgenticTracing):
116
139
  self.upload_timeout = upload_timeout
117
140
  self.base_url = f"{RagaAICatalyst.BASE_URL}"
118
141
  self.timeout = 30
119
- self.num_projects = 100
142
+ self.num_projects = 99999
120
143
  self.start_time = datetime.datetime.now().astimezone().isoformat()
121
-
122
- if update_llm_cost:
123
- # First update the model costs file from GitHub
124
- update_model_costs_from_github()
144
+ self.model_cost_dict = model_cost
145
+ self.user_context = "" # Initialize user_context to store context from add_context
146
+ self.file_tracker = TrackName()
125
147
 
126
148
  try:
127
149
  response = requests.get(
@@ -152,43 +174,112 @@ class Tracer(AgenticTracing):
152
174
  raise
153
175
 
154
176
  if tracer_type == "langchain":
155
- self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
177
+ # self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
156
178
 
157
- self._tracer_provider = self._setup_provider()
158
- self._instrumentor = self._setup_instrumentor(tracer_type)
159
- self.is_instrumented = False
179
+ # self._tracer_provider = self._setup_provider()
180
+ # self._instrumentor = self._setup_instrumentor(tracer_type)
181
+ # self.is_instrumented = False
182
+ # self._upload_task = None
160
183
  self._upload_task = None
161
184
  elif tracer_type == "llamaindex":
162
185
  self._upload_task = None
163
- from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
164
-
186
+ self.llamaindex_tracer = None
187
+ elif tracer_type == "agentic/llamaindex":
188
+ from opentelemetry.sdk import trace as trace_sdk
189
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
190
+ from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
191
+ from ragaai_catalyst.tracers.exporters.dynamic_trace_exporter import DynamicTraceExporter
192
+
193
+ # Get the code_files
194
+ self.file_tracker.trace_main_file()
195
+ list_of_unique_files = self.file_tracker.get_unique_files()
196
+
197
+ # Create a dynamic exporter that allows property updates
198
+ self.dynamic_exporter = DynamicTraceExporter(
199
+ files_to_zip=list_of_unique_files,
200
+ project_name=self.project_name,
201
+ project_id=self.project_id,
202
+ dataset_name=self.dataset_name,
203
+ user_details=self.user_details,
204
+ base_url=self.base_url,
205
+ custom_model_cost=self.model_custom_cost
206
+ )
207
+
208
+ tracer_provider = trace_sdk.TracerProvider()
209
+ tracer_provider.add_span_processor(SimpleSpanProcessor(self.dynamic_exporter))
210
+ LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
165
211
  else:
166
212
  self._upload_task = None
167
213
  # raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
168
214
 
169
-
215
+ def set_model_cost(self, cost_config):
216
+ """
217
+ Set custom cost values for a specific model.
218
+
219
+ Args:
220
+ cost_config (dict): Dictionary containing model cost configuration with keys:
221
+ - model_name (str): Name of the model
222
+ - input_cost_per_token (float): Cost per input token
223
+ - output_cost_per_token (float): Cost per output token
224
+
225
+ Example:
226
+ tracer.set_model_cost({
227
+ "model_name": "gpt-4",
228
+ "input_cost_per_million_token": 6,
229
+ "output_cost_per_million_token": 2.40
230
+ })
231
+ """
232
+ if not isinstance(cost_config, dict):
233
+ raise TypeError("cost_config must be a dictionary")
234
+
235
+ required_keys = {"model_name", "input_cost_per_million_token", "output_cost_per_million_token"}
236
+ if not all(key in cost_config for key in required_keys):
237
+ raise ValueError(f"cost_config must contain all required keys: {required_keys}")
238
+
239
+ model_name = cost_config["model_name"]
240
+ self.model_custom_cost[model_name] = {
241
+ "input_cost_per_token": float(cost_config["input_cost_per_million_token"])/ 1000000,
242
+ "output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
243
+ }
244
+
170
245
  def set_dataset_name(self, dataset_name):
171
246
  """
172
247
  Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
248
+ If using agentic/llamaindex tracer with dynamic exporter, update the exporter's dataset_name property.
173
249
 
174
250
  Args:
175
251
  dataset_name (str): The new dataset name to set
176
252
  """
177
- # Store current parameters
178
- current_params = {
179
- 'project_name': self.project_name,
180
- 'tracer_type': self.tracer_type,
181
- 'pipeline': self.pipeline,
182
- 'metadata': self.metadata,
183
- 'description': self.description,
184
- 'upload_timeout': self.upload_timeout
185
- }
186
-
187
- # Reinitialize self with new dataset_name and stored parameters
188
- self.__init__(
189
- dataset_name=dataset_name,
190
- **current_params
191
- )
253
+ # If we have a dynamic exporter, update its dataset_name property
254
+ if self.tracer_type == "agentic/llamaindex" and hasattr(self, "dynamic_exporter"):
255
+ # Update the dataset name in the dynamic exporter
256
+ self.dynamic_exporter.dataset_name = dataset_name
257
+ logger.debug(f"Updated dynamic exporter's dataset_name to {dataset_name}")
258
+
259
+ # Update the instance variable
260
+ self.dataset_name = dataset_name
261
+
262
+ # Update user_details with new dataset_name
263
+ self.user_details = self._pass_user_data()
264
+
265
+ # Also update the user_details in the dynamic exporter
266
+ self.dynamic_exporter.user_details = self.user_details
267
+ else:
268
+ # Store current parameters
269
+ current_params = {
270
+ 'project_name': self.project_name,
271
+ 'tracer_type': self.tracer_type,
272
+ 'pipeline': self.pipeline,
273
+ 'metadata': self.metadata,
274
+ 'description': self.description,
275
+ 'upload_timeout': self.upload_timeout
276
+ }
277
+
278
+ # Reinitialize self with new dataset_name and stored parameters
279
+ self.__init__(
280
+ dataset_name=dataset_name,
281
+ **current_params
282
+ )
192
283
 
193
284
  def _improve_metadata(self, metadata, tracer_type):
194
285
  if metadata is None:
@@ -239,14 +330,15 @@ class Tracer(AgenticTracing):
239
330
  def start(self):
240
331
  """Start the tracer."""
241
332
  if self.tracer_type == "langchain":
242
- if not self.is_instrumented:
243
- self._instrumentor().instrument(tracer_provider=self._tracer_provider)
244
- self.is_instrumented = True
245
- print(f"Tracer started for project: {self.project_name}")
246
- return self
333
+ # if not self.is_instrumented:
334
+ # self._instrumentor().instrument(tracer_provider=self._tracer_provider)
335
+ # self.is_instrumented = True
336
+ # print(f"Tracer started for project: {self.project_name}")
337
+ self.langchain_tracer = LangchainTracer()
338
+ return self.langchain_tracer.start()
247
339
  elif self.tracer_type == "llamaindex":
248
- from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
249
- return LlamaIndexTracer(self._pass_user_data()).start()
340
+ self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
341
+ return self.llamaindex_tracer.start()
250
342
  else:
251
343
  super().start()
252
344
  return self
@@ -254,20 +346,111 @@ class Tracer(AgenticTracing):
254
346
  def stop(self):
255
347
  """Stop the tracer and initiate trace upload."""
256
348
  if self.tracer_type == "langchain":
257
- if not self.is_instrumented:
258
- logger.warning("Tracer was not started. No traces to upload.")
259
- return "No traces to upload"
260
-
261
- print("Stopping tracer and initiating trace upload...")
262
- self._cleanup()
263
- self._upload_task = self._run_async(self._upload_traces())
264
- self.is_active = False
265
- self.dataset_name = None
349
+ # if not self.is_instrumented:
350
+ # logger.warning("Tracer was not started. No traces to upload.")
351
+ # return "No traces to upload"
352
+
353
+ # print("Stopping tracer and initiating trace upload...")
354
+ # self._cleanup()
355
+ # self._upload_task = self._run_async(self._upload_traces())
356
+ # self.is_active = False
357
+ # self.dataset_name = None
266
358
 
267
- return "Trace upload initiated. Use get_upload_status() to check the status."
359
+ user_detail = self._pass_user_data()
360
+ data, additional_metadata = self.langchain_tracer.stop()
361
+
362
+ # Add cost if possible
363
+ if additional_metadata.get('model_name'):
364
+ try:
365
+ model_cost_data = self.model_cost_dict[additional_metadata['model_name']]
366
+ if 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
367
+ prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
368
+ completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
369
+ additional_metadata["cost"] = prompt_cost + completion_cost
370
+
371
+ additional_metadata["prompt_tokens"] = float(additional_metadata["tokens"].get("prompt", 0.0))
372
+ additional_metadata["completion_tokens"] = float(additional_metadata["tokens"].get("completion", 0.0))
373
+
374
+ logger.debug("Metadata added successfully")
375
+ else:
376
+ logger.warning("Token information missing in additional_metadata")
377
+
378
+ if 'cost' in additional_metadata:
379
+ additional_metadata["cost"] = float(additional_metadata["cost"])
380
+ else:
381
+ additional_metadata["cost"] = 0.0
382
+ logger.warning("Total cost information not available")
383
+
384
+
385
+ except Exception as e:
386
+ logger.warning(f"Error adding cost: {e}")
387
+ else:
388
+ logger.debug("Model name not available in additional_metadata, skipping cost calculation")
389
+
390
+
391
+ # Safely remove tokens and cost dictionaries if they exist
392
+ additional_metadata.pop("tokens", None)
393
+ # additional_metadata.pop("cost", None)
394
+
395
+ # Safely merge metadata
396
+ combined_metadata = {}
397
+ if user_detail.get('trace_user_detail', {}).get('metadata'):
398
+ combined_metadata.update(user_detail['trace_user_detail']['metadata'])
399
+ if additional_metadata:
400
+ combined_metadata.update(additional_metadata)
401
+
402
+ langchain_traces = langchain_tracer_extraction(data, self.user_context)
403
+ final_result = convert_langchain_callbacks_output(langchain_traces)
404
+
405
+ # Safely set required fields in final_result
406
+ if final_result and isinstance(final_result, list) and len(final_result) > 0:
407
+ final_result[0]['project_name'] = user_detail.get('project_name', '')
408
+ final_result[0]['trace_id'] = str(uuid.uuid4())
409
+ final_result[0]['session_id'] = None
410
+ final_result[0]['metadata'] = combined_metadata
411
+ final_result[0]['pipeline'] = user_detail.get('trace_user_detail', {}).get('pipeline')
412
+
413
+ filepath_3 = os.path.join(os.getcwd(), "final_result.json")
414
+ with open(filepath_3, 'w') as f:
415
+ json.dump(final_result, f, indent=2)
416
+
417
+ print(filepath_3)
418
+ else:
419
+ logger.warning("No valid langchain traces found in final_result")
420
+
421
+ # additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
422
+ additional_metadata_dict = additional_metadata if additional_metadata else {}
423
+
424
+ UploadTraces(json_file_path=filepath_3,
425
+ project_name=self.project_name,
426
+ project_id=self.project_id,
427
+ dataset_name=self.dataset_name,
428
+ user_detail=self._pass_user_data(),
429
+ base_url=self.base_url
430
+ ).upload_traces(additional_metadata_keys=additional_metadata_dict)
431
+
432
+ return
433
+
268
434
  elif self.tracer_type == "llamaindex":
269
- from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
270
- return LlamaIndexTracer(self._pass_user_data()).stop()
435
+ if self.llamaindex_tracer is None:
436
+ raise ValueError("LlamaIndex tracer was not started")
437
+
438
+ user_detail = self._pass_user_data()
439
+ converted_back_to_callback = self.llamaindex_tracer.stop()
440
+
441
+ filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
442
+ with open(filepath_3, 'w') as f:
443
+ json.dump(converted_back_to_callback, f, default=str, indent=2)
444
+
445
+ if converted_back_to_callback:
446
+ UploadTraces(json_file_path=filepath_3,
447
+ project_name=self.project_name,
448
+ project_id=self.project_id,
449
+ dataset_name=self.dataset_name,
450
+ user_detail=user_detail,
451
+ base_url=self.base_url
452
+ ).upload_traces()
453
+ return
271
454
  else:
272
455
  super().stop()
273
456
 
@@ -379,4 +562,67 @@ class Tracer(AgenticTracing):
379
562
  }
380
563
  }
381
564
  }
382
- return user_detail
565
+ return user_detail
566
+
567
+ def update_dynamic_exporter(self, **kwargs):
568
+ """
569
+ Update the dynamic exporter's properties.
570
+
571
+ Args:
572
+ **kwargs: Keyword arguments to update. Can include any of the following:
573
+ - files_to_zip: List of files to zip
574
+ - project_name: Project name
575
+ - project_id: Project ID
576
+ - dataset_name: Dataset name
577
+ - user_details: User details
578
+ - base_url: Base URL for API
579
+
580
+ Raises:
581
+ AttributeError: If the tracer_type is not 'agentic/llamaindex' or if the dynamic_exporter is not initialized.
582
+ """
583
+ if self.tracer_type != "agentic/llamaindex" or not hasattr(self, "dynamic_exporter"):
584
+ raise AttributeError("Dynamic exporter is only available for 'agentic/llamaindex' tracer type")
585
+
586
+ for key, value in kwargs.items():
587
+ if hasattr(self.dynamic_exporter, key):
588
+ setattr(self.dynamic_exporter, key, value)
589
+ logger.debug(f"Updated dynamic exporter's {key} to {value}")
590
+ else:
591
+ logger.warning(f"Dynamic exporter has no attribute '{key}'")
592
+
593
+ def update_file_list(self):
594
+ """
595
+ Update the file list in the dynamic exporter with the latest tracked files.
596
+ This is useful when new files are added to the project during execution.
597
+
598
+ Raises:
599
+ AttributeError: If the tracer_type is not 'agentic/llamaindex' or if the dynamic_exporter is not initialized.
600
+ """
601
+ if self.tracer_type != "agentic/llamaindex" or not hasattr(self, "dynamic_exporter"):
602
+ raise AttributeError("Dynamic exporter is only available for 'agentic/llamaindex' tracer type")
603
+
604
+ # Get the latest list of unique files
605
+ list_of_unique_files = self.file_tracker.get_unique_files()
606
+
607
+ # Update the dynamic exporter's files_to_zip property
608
+ self.dynamic_exporter.files_to_zip = list_of_unique_files
609
+ logger.debug(f"Updated dynamic exporter's files_to_zip with {len(list_of_unique_files)} files")
610
+
611
+ def add_context(self, context):
612
+ """
613
+ Add context information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
614
+
615
+ Args:
616
+ context: Additional context information to be added to the trace. Can be a string.
617
+
618
+ Raises:
619
+ ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
620
+ """
621
+ if self.tracer_type not in ["langchain", "llamaindex"]:
622
+ raise ValueError("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
623
+
624
+ # Convert string context to string if needed
625
+ if isinstance(context, str):
626
+ self.user_context = context
627
+ else:
628
+ raise TypeError("context must be a string")
@@ -20,7 +20,7 @@ class UploadTraces:
20
20
  self.base_url = base_url
21
21
  self.timeout = 10
22
22
 
23
- def _create_dataset_schema_with_trace(self):
23
+ def _create_dataset_schema_with_trace(self, additional_metadata_keys=None, additional_pipeline_keys=None):
24
24
  SCHEMA_MAPPING_NEW = {
25
25
  "trace_id": {"columnType": "traceId"},
26
26
  "trace_uri": {"columnType": "traceUri"},
@@ -34,6 +34,18 @@ class UploadTraces:
34
34
  "vector_store":{"columnType":"pipeline"},
35
35
  "feedback": {"columnType":"feedBack"}
36
36
  }
37
+
38
+ if additional_metadata_keys:
39
+ for key in additional_metadata_keys:
40
+ if key == "model_name":
41
+ SCHEMA_MAPPING_NEW['response']["modelName"] = additional_metadata_keys[key]
42
+ else:
43
+ SCHEMA_MAPPING_NEW[key] = {"columnType": key, "parentColumn": "response"}
44
+
45
+ if additional_pipeline_keys:
46
+ for key in additional_pipeline_keys:
47
+ SCHEMA_MAPPING_NEW[key] = {"columnType": "pipeline"}
48
+
37
49
  def make_request():
38
50
  headers = {
39
51
  "Content-Type": "application/json",
@@ -119,9 +131,14 @@ class UploadTraces:
119
131
  data=payload,
120
132
  timeout=self.timeout)
121
133
 
122
- def upload_traces(self):
123
- self._create_dataset_schema_with_trace()
124
- presignedUrl = self._get_presigned_url()
125
- self._put_presigned_url(presignedUrl, self.json_file_path)
126
- self._insert_traces(presignedUrl)
127
- print("Traces uploaded")
134
+ def upload_traces(self, additional_metadata_keys=None, additional_pipeline_keys=None):
135
+ try:
136
+ self._create_dataset_schema_with_trace(additional_metadata_keys, additional_pipeline_keys)
137
+ presignedUrl = self._get_presigned_url()
138
+ if presignedUrl is None:
139
+ return
140
+ self._put_presigned_url(presignedUrl, self.json_file_path)
141
+ self._insert_traces(presignedUrl)
142
+ print("Traces uploaded")
143
+ except Exception as e:
144
+ print(f"Error while uploading agentic traces: {e}")
@@ -0,0 +1,61 @@
1
+ import json
2
+
3
+ def convert_langchain_callbacks_output(result, project_name="", metadata="", pipeline=""):
4
+ initial_struc = [{
5
+ "project_name": project_name,
6
+ "trace_id": "NA",
7
+ "session_id": "NA",
8
+ "metadata" : metadata,
9
+ "pipeline" : pipeline,
10
+ "traces" : []
11
+ }]
12
+ traces_data = []
13
+
14
+ prompt = result["data"]["prompt"]
15
+ response = result["data"]["response"]
16
+ context = result["data"]["context"]
17
+ final_prompt = ""
18
+
19
+ prompt_structured_data = {
20
+ "traceloop.entity.input": json.dumps({
21
+ "kwargs": {
22
+ "input": prompt,
23
+ }
24
+ })
25
+ }
26
+ prompt_data = {
27
+ "name": "retrieve_documents.langchain.workflow",
28
+ "attributes": prompt_structured_data,
29
+ }
30
+
31
+ traces_data.append(prompt_data)
32
+
33
+ context_structured_data = {
34
+ "traceloop.entity.input": json.dumps({
35
+ "kwargs": {
36
+ "context": context
37
+ }
38
+ }),
39
+ "traceloop.entity.output": json.dumps({
40
+ "kwargs": {
41
+ "text": prompt
42
+ }
43
+ })
44
+ }
45
+ context_data = {
46
+ "name": "PromptTemplate.langchain.task",
47
+ "attributes": context_structured_data,
48
+ }
49
+ traces_data.append(context_data)
50
+
51
+ response_structured_data = {"gen_ai.completion.0.content": response,
52
+ "gen_ai.prompt.0.content": prompt}
53
+ response_data = {
54
+ "name": "ChatOpenAI.langchain.task",
55
+ "attributes" : response_structured_data
56
+ }
57
+ traces_data.append(response_data)
58
+
59
+ initial_struc[0]["traces"] = traces_data
60
+
61
+ return initial_struc
@@ -0,0 +1,69 @@
1
+ def convert_llamaindex_instrumentation_to_callback(data):
2
+ data = data[0]
3
+ initial_struc = [{
4
+ "trace_id": data["trace_id"],
5
+ "project_id": data["project_id"],
6
+ "session_id": data["session_id"],
7
+ "trace_type": data["trace_type"],
8
+ "metadata" : data["metadata"],
9
+ "pipeline" : data["pipeline"],
10
+ "traces" : []
11
+ }]
12
+
13
+ traces_data = []
14
+
15
+ prompt = data["data"]["prompt"]
16
+ response = data["data"]["response"]
17
+ context = data["data"]["context"]
18
+ system_prompt = data["data"]["system_prompt"]
19
+
20
+ prompt_structured_data = {
21
+ "event_type": "query",
22
+ "payload": {
23
+ "query_str": prompt
24
+ }
25
+ }
26
+ traces_data.append(prompt_structured_data)
27
+
28
+ response_structured_data = {
29
+ "event_type": "llm",
30
+ "payload": {
31
+ "response": {
32
+ "message": {
33
+ "content": response,
34
+ }
35
+ }
36
+ }
37
+ }
38
+ traces_data.append(response_structured_data)
39
+
40
+ context_structured_data = {
41
+ "event_type": "retrieve",
42
+ "payload": {
43
+ "nodes": [
44
+ {
45
+ "node": {
46
+ "text": context
47
+ }
48
+ }
49
+ ]
50
+ }
51
+ }
52
+ traces_data.append(context_structured_data)
53
+
54
+ system_prompt_structured_data = {
55
+ "event_type": "llm",
56
+ "payload": {
57
+ "messages": [
58
+ {
59
+ "role": "system",
60
+ "content": system_prompt
61
+ }
62
+ ]
63
+ }
64
+ }
65
+ traces_data.append(system_prompt_structured_data)
66
+
67
+ initial_struc[0]["traces"] = traces_data
68
+
69
+ return initial_struc