ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,23 @@
|
|
1
|
+
from audioop import add
|
1
2
|
import os
|
3
|
+
import uuid
|
2
4
|
import datetime
|
3
5
|
import logging
|
4
6
|
import asyncio
|
5
7
|
import aiohttp
|
6
8
|
import requests
|
9
|
+
from litellm import model_cost
|
10
|
+
|
7
11
|
from contextlib import contextmanager
|
8
12
|
from concurrent.futures import ThreadPoolExecutor
|
9
|
-
|
13
|
+
from ragaai_catalyst.tracers.langchain_callback import LangchainTracer
|
14
|
+
from ragaai_catalyst.tracers.utils.convert_langchain_callbacks_output import convert_langchain_callbacks_output
|
15
|
+
|
16
|
+
from ragaai_catalyst.tracers.utils.langchain_tracer_extraction_logic import langchain_tracer_extraction
|
17
|
+
from ragaai_catalyst.tracers.upload_traces import UploadTraces
|
18
|
+
import tempfile
|
19
|
+
import json
|
20
|
+
import numpy as np
|
10
21
|
from opentelemetry.sdk import trace as trace_sdk
|
11
22
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
12
23
|
from ragaai_catalyst.tracers.exporters.file_span_exporter import FileSpanExporter
|
@@ -18,15 +29,17 @@ from ragaai_catalyst.tracers.instrumentators import (
|
|
18
29
|
)
|
19
30
|
from ragaai_catalyst.tracers.utils import get_unique_key
|
20
31
|
# from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
32
|
+
from ragaai_catalyst.tracers.llamaindex_instrumentation import LlamaIndexInstrumentationTracer
|
21
33
|
from ragaai_catalyst import RagaAICatalyst
|
22
|
-
from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing
|
34
|
+
from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing
|
23
35
|
from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
|
24
|
-
from ragaai_catalyst.tracers.
|
36
|
+
from ragaai_catalyst.tracers.exporters.ragaai_trace_exporter import RAGATraceExporter
|
37
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.file_name_tracker import TrackName
|
25
38
|
|
26
39
|
logger = logging.getLogger(__name__)
|
27
40
|
|
28
41
|
class Tracer(AgenticTracing):
|
29
|
-
NUM_PROJECTS =
|
42
|
+
NUM_PROJECTS = 99999
|
30
43
|
TIMEOUT = 10
|
31
44
|
def __init__(
|
32
45
|
self,
|
@@ -77,7 +90,17 @@ class Tracer(AgenticTracing):
|
|
77
90
|
|
78
91
|
# take care of auto_instrumentation
|
79
92
|
if isinstance(auto_instrumentation, bool):
|
80
|
-
if
|
93
|
+
if tracer_type == "agentic/llamaindex":
|
94
|
+
auto_instrumentation = {
|
95
|
+
"llm": False,
|
96
|
+
"tool": False,
|
97
|
+
"agent": False,
|
98
|
+
"user_interaction": False,
|
99
|
+
"file_io": False,
|
100
|
+
"network": False,
|
101
|
+
"custom": False
|
102
|
+
}
|
103
|
+
elif auto_instrumentation:
|
81
104
|
auto_instrumentation = {
|
82
105
|
"llm": True,
|
83
106
|
"tool": True,
|
@@ -98,11 +121,11 @@ class Tracer(AgenticTracing):
|
|
98
121
|
"custom": False
|
99
122
|
}
|
100
123
|
elif isinstance(auto_instrumentation, dict):
|
101
|
-
auto_instrumentation = {k: v for k, v in auto_instrumentation.items()
|
124
|
+
auto_instrumentation = {k: v for k, v in auto_instrumentation.items()}
|
102
125
|
for key in ["llm", "tool", "agent", "user_interaction", "file_io", "network", "custom"]:
|
103
126
|
if key not in auto_instrumentation:
|
104
|
-
auto_instrumentation[key] =
|
105
|
-
|
127
|
+
auto_instrumentation[key] = True
|
128
|
+
self.model_custom_cost = {}
|
106
129
|
super().__init__(user_detail=user_detail, auto_instrumentation=auto_instrumentation)
|
107
130
|
|
108
131
|
self.project_name = project_name
|
@@ -116,12 +139,11 @@ class Tracer(AgenticTracing):
|
|
116
139
|
self.upload_timeout = upload_timeout
|
117
140
|
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
118
141
|
self.timeout = 30
|
119
|
-
self.num_projects =
|
142
|
+
self.num_projects = 99999
|
120
143
|
self.start_time = datetime.datetime.now().astimezone().isoformat()
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
update_model_costs_from_github()
|
144
|
+
self.model_cost_dict = model_cost
|
145
|
+
self.user_context = "" # Initialize user_context to store context from add_context
|
146
|
+
self.file_tracker = TrackName()
|
125
147
|
|
126
148
|
try:
|
127
149
|
response = requests.get(
|
@@ -152,43 +174,112 @@ class Tracer(AgenticTracing):
|
|
152
174
|
raise
|
153
175
|
|
154
176
|
if tracer_type == "langchain":
|
155
|
-
self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
|
177
|
+
# self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
|
156
178
|
|
157
|
-
self._tracer_provider = self._setup_provider()
|
158
|
-
self._instrumentor = self._setup_instrumentor(tracer_type)
|
159
|
-
self.is_instrumented = False
|
179
|
+
# self._tracer_provider = self._setup_provider()
|
180
|
+
# self._instrumentor = self._setup_instrumentor(tracer_type)
|
181
|
+
# self.is_instrumented = False
|
182
|
+
# self._upload_task = None
|
160
183
|
self._upload_task = None
|
161
184
|
elif tracer_type == "llamaindex":
|
162
185
|
self._upload_task = None
|
163
|
-
|
164
|
-
|
186
|
+
self.llamaindex_tracer = None
|
187
|
+
elif tracer_type == "agentic/llamaindex":
|
188
|
+
from opentelemetry.sdk import trace as trace_sdk
|
189
|
+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
190
|
+
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
|
191
|
+
from ragaai_catalyst.tracers.exporters.dynamic_trace_exporter import DynamicTraceExporter
|
192
|
+
|
193
|
+
# Get the code_files
|
194
|
+
self.file_tracker.trace_main_file()
|
195
|
+
list_of_unique_files = self.file_tracker.get_unique_files()
|
196
|
+
|
197
|
+
# Create a dynamic exporter that allows property updates
|
198
|
+
self.dynamic_exporter = DynamicTraceExporter(
|
199
|
+
files_to_zip=list_of_unique_files,
|
200
|
+
project_name=self.project_name,
|
201
|
+
project_id=self.project_id,
|
202
|
+
dataset_name=self.dataset_name,
|
203
|
+
user_details=self.user_details,
|
204
|
+
base_url=self.base_url,
|
205
|
+
custom_model_cost=self.model_custom_cost
|
206
|
+
)
|
207
|
+
|
208
|
+
tracer_provider = trace_sdk.TracerProvider()
|
209
|
+
tracer_provider.add_span_processor(SimpleSpanProcessor(self.dynamic_exporter))
|
210
|
+
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)
|
165
211
|
else:
|
166
212
|
self._upload_task = None
|
167
213
|
# raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
|
168
214
|
|
169
|
-
|
215
|
+
def set_model_cost(self, cost_config):
|
216
|
+
"""
|
217
|
+
Set custom cost values for a specific model.
|
218
|
+
|
219
|
+
Args:
|
220
|
+
cost_config (dict): Dictionary containing model cost configuration with keys:
|
221
|
+
- model_name (str): Name of the model
|
222
|
+
- input_cost_per_token (float): Cost per input token
|
223
|
+
- output_cost_per_token (float): Cost per output token
|
224
|
+
|
225
|
+
Example:
|
226
|
+
tracer.set_model_cost({
|
227
|
+
"model_name": "gpt-4",
|
228
|
+
"input_cost_per_million_token": 6,
|
229
|
+
"output_cost_per_million_token": 2.40
|
230
|
+
})
|
231
|
+
"""
|
232
|
+
if not isinstance(cost_config, dict):
|
233
|
+
raise TypeError("cost_config must be a dictionary")
|
234
|
+
|
235
|
+
required_keys = {"model_name", "input_cost_per_million_token", "output_cost_per_million_token"}
|
236
|
+
if not all(key in cost_config for key in required_keys):
|
237
|
+
raise ValueError(f"cost_config must contain all required keys: {required_keys}")
|
238
|
+
|
239
|
+
model_name = cost_config["model_name"]
|
240
|
+
self.model_custom_cost[model_name] = {
|
241
|
+
"input_cost_per_token": float(cost_config["input_cost_per_million_token"])/ 1000000,
|
242
|
+
"output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
|
243
|
+
}
|
244
|
+
|
170
245
|
def set_dataset_name(self, dataset_name):
|
171
246
|
"""
|
172
247
|
Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
|
248
|
+
If using agentic/llamaindex tracer with dynamic exporter, update the exporter's dataset_name property.
|
173
249
|
|
174
250
|
Args:
|
175
251
|
dataset_name (str): The new dataset name to set
|
176
252
|
"""
|
177
|
-
#
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
'
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
253
|
+
# If we have a dynamic exporter, update its dataset_name property
|
254
|
+
if self.tracer_type == "agentic/llamaindex" and hasattr(self, "dynamic_exporter"):
|
255
|
+
# Update the dataset name in the dynamic exporter
|
256
|
+
self.dynamic_exporter.dataset_name = dataset_name
|
257
|
+
logger.debug(f"Updated dynamic exporter's dataset_name to {dataset_name}")
|
258
|
+
|
259
|
+
# Update the instance variable
|
260
|
+
self.dataset_name = dataset_name
|
261
|
+
|
262
|
+
# Update user_details with new dataset_name
|
263
|
+
self.user_details = self._pass_user_data()
|
264
|
+
|
265
|
+
# Also update the user_details in the dynamic exporter
|
266
|
+
self.dynamic_exporter.user_details = self.user_details
|
267
|
+
else:
|
268
|
+
# Store current parameters
|
269
|
+
current_params = {
|
270
|
+
'project_name': self.project_name,
|
271
|
+
'tracer_type': self.tracer_type,
|
272
|
+
'pipeline': self.pipeline,
|
273
|
+
'metadata': self.metadata,
|
274
|
+
'description': self.description,
|
275
|
+
'upload_timeout': self.upload_timeout
|
276
|
+
}
|
277
|
+
|
278
|
+
# Reinitialize self with new dataset_name and stored parameters
|
279
|
+
self.__init__(
|
280
|
+
dataset_name=dataset_name,
|
281
|
+
**current_params
|
282
|
+
)
|
192
283
|
|
193
284
|
def _improve_metadata(self, metadata, tracer_type):
|
194
285
|
if metadata is None:
|
@@ -239,14 +330,15 @@ class Tracer(AgenticTracing):
|
|
239
330
|
def start(self):
|
240
331
|
"""Start the tracer."""
|
241
332
|
if self.tracer_type == "langchain":
|
242
|
-
if not self.is_instrumented:
|
243
|
-
|
244
|
-
|
245
|
-
print(f"Tracer started for project: {self.project_name}")
|
246
|
-
|
333
|
+
# if not self.is_instrumented:
|
334
|
+
# self._instrumentor().instrument(tracer_provider=self._tracer_provider)
|
335
|
+
# self.is_instrumented = True
|
336
|
+
# print(f"Tracer started for project: {self.project_name}")
|
337
|
+
self.langchain_tracer = LangchainTracer()
|
338
|
+
return self.langchain_tracer.start()
|
247
339
|
elif self.tracer_type == "llamaindex":
|
248
|
-
|
249
|
-
return
|
340
|
+
self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
|
341
|
+
return self.llamaindex_tracer.start()
|
250
342
|
else:
|
251
343
|
super().start()
|
252
344
|
return self
|
@@ -254,20 +346,111 @@ class Tracer(AgenticTracing):
|
|
254
346
|
def stop(self):
|
255
347
|
"""Stop the tracer and initiate trace upload."""
|
256
348
|
if self.tracer_type == "langchain":
|
257
|
-
if not self.is_instrumented:
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
print("Stopping tracer and initiating trace upload...")
|
262
|
-
self._cleanup()
|
263
|
-
self._upload_task = self._run_async(self._upload_traces())
|
264
|
-
self.is_active = False
|
265
|
-
self.dataset_name = None
|
349
|
+
# if not self.is_instrumented:
|
350
|
+
# logger.warning("Tracer was not started. No traces to upload.")
|
351
|
+
# return "No traces to upload"
|
352
|
+
|
353
|
+
# print("Stopping tracer and initiating trace upload...")
|
354
|
+
# self._cleanup()
|
355
|
+
# self._upload_task = self._run_async(self._upload_traces())
|
356
|
+
# self.is_active = False
|
357
|
+
# self.dataset_name = None
|
266
358
|
|
267
|
-
|
359
|
+
user_detail = self._pass_user_data()
|
360
|
+
data, additional_metadata = self.langchain_tracer.stop()
|
361
|
+
|
362
|
+
# Add cost if possible
|
363
|
+
if additional_metadata.get('model_name'):
|
364
|
+
try:
|
365
|
+
model_cost_data = self.model_cost_dict[additional_metadata['model_name']]
|
366
|
+
if 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
|
367
|
+
prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
|
368
|
+
completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
|
369
|
+
additional_metadata["cost"] = prompt_cost + completion_cost
|
370
|
+
|
371
|
+
additional_metadata["prompt_tokens"] = float(additional_metadata["tokens"].get("prompt", 0.0))
|
372
|
+
additional_metadata["completion_tokens"] = float(additional_metadata["tokens"].get("completion", 0.0))
|
373
|
+
|
374
|
+
logger.debug("Metadata added successfully")
|
375
|
+
else:
|
376
|
+
logger.warning("Token information missing in additional_metadata")
|
377
|
+
|
378
|
+
if 'cost' in additional_metadata:
|
379
|
+
additional_metadata["cost"] = float(additional_metadata["cost"])
|
380
|
+
else:
|
381
|
+
additional_metadata["cost"] = 0.0
|
382
|
+
logger.warning("Total cost information not available")
|
383
|
+
|
384
|
+
|
385
|
+
except Exception as e:
|
386
|
+
logger.warning(f"Error adding cost: {e}")
|
387
|
+
else:
|
388
|
+
logger.debug("Model name not available in additional_metadata, skipping cost calculation")
|
389
|
+
|
390
|
+
|
391
|
+
# Safely remove tokens and cost dictionaries if they exist
|
392
|
+
additional_metadata.pop("tokens", None)
|
393
|
+
# additional_metadata.pop("cost", None)
|
394
|
+
|
395
|
+
# Safely merge metadata
|
396
|
+
combined_metadata = {}
|
397
|
+
if user_detail.get('trace_user_detail', {}).get('metadata'):
|
398
|
+
combined_metadata.update(user_detail['trace_user_detail']['metadata'])
|
399
|
+
if additional_metadata:
|
400
|
+
combined_metadata.update(additional_metadata)
|
401
|
+
|
402
|
+
langchain_traces = langchain_tracer_extraction(data, self.user_context)
|
403
|
+
final_result = convert_langchain_callbacks_output(langchain_traces)
|
404
|
+
|
405
|
+
# Safely set required fields in final_result
|
406
|
+
if final_result and isinstance(final_result, list) and len(final_result) > 0:
|
407
|
+
final_result[0]['project_name'] = user_detail.get('project_name', '')
|
408
|
+
final_result[0]['trace_id'] = str(uuid.uuid4())
|
409
|
+
final_result[0]['session_id'] = None
|
410
|
+
final_result[0]['metadata'] = combined_metadata
|
411
|
+
final_result[0]['pipeline'] = user_detail.get('trace_user_detail', {}).get('pipeline')
|
412
|
+
|
413
|
+
filepath_3 = os.path.join(os.getcwd(), "final_result.json")
|
414
|
+
with open(filepath_3, 'w') as f:
|
415
|
+
json.dump(final_result, f, indent=2)
|
416
|
+
|
417
|
+
print(filepath_3)
|
418
|
+
else:
|
419
|
+
logger.warning("No valid langchain traces found in final_result")
|
420
|
+
|
421
|
+
# additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
|
422
|
+
additional_metadata_dict = additional_metadata if additional_metadata else {}
|
423
|
+
|
424
|
+
UploadTraces(json_file_path=filepath_3,
|
425
|
+
project_name=self.project_name,
|
426
|
+
project_id=self.project_id,
|
427
|
+
dataset_name=self.dataset_name,
|
428
|
+
user_detail=self._pass_user_data(),
|
429
|
+
base_url=self.base_url
|
430
|
+
).upload_traces(additional_metadata_keys=additional_metadata_dict)
|
431
|
+
|
432
|
+
return
|
433
|
+
|
268
434
|
elif self.tracer_type == "llamaindex":
|
269
|
-
|
270
|
-
|
435
|
+
if self.llamaindex_tracer is None:
|
436
|
+
raise ValueError("LlamaIndex tracer was not started")
|
437
|
+
|
438
|
+
user_detail = self._pass_user_data()
|
439
|
+
converted_back_to_callback = self.llamaindex_tracer.stop()
|
440
|
+
|
441
|
+
filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
|
442
|
+
with open(filepath_3, 'w') as f:
|
443
|
+
json.dump(converted_back_to_callback, f, default=str, indent=2)
|
444
|
+
|
445
|
+
if converted_back_to_callback:
|
446
|
+
UploadTraces(json_file_path=filepath_3,
|
447
|
+
project_name=self.project_name,
|
448
|
+
project_id=self.project_id,
|
449
|
+
dataset_name=self.dataset_name,
|
450
|
+
user_detail=user_detail,
|
451
|
+
base_url=self.base_url
|
452
|
+
).upload_traces()
|
453
|
+
return
|
271
454
|
else:
|
272
455
|
super().stop()
|
273
456
|
|
@@ -379,4 +562,67 @@ class Tracer(AgenticTracing):
|
|
379
562
|
}
|
380
563
|
}
|
381
564
|
}
|
382
|
-
return user_detail
|
565
|
+
return user_detail
|
566
|
+
|
567
|
+
def update_dynamic_exporter(self, **kwargs):
|
568
|
+
"""
|
569
|
+
Update the dynamic exporter's properties.
|
570
|
+
|
571
|
+
Args:
|
572
|
+
**kwargs: Keyword arguments to update. Can include any of the following:
|
573
|
+
- files_to_zip: List of files to zip
|
574
|
+
- project_name: Project name
|
575
|
+
- project_id: Project ID
|
576
|
+
- dataset_name: Dataset name
|
577
|
+
- user_details: User details
|
578
|
+
- base_url: Base URL for API
|
579
|
+
|
580
|
+
Raises:
|
581
|
+
AttributeError: If the tracer_type is not 'agentic/llamaindex' or if the dynamic_exporter is not initialized.
|
582
|
+
"""
|
583
|
+
if self.tracer_type != "agentic/llamaindex" or not hasattr(self, "dynamic_exporter"):
|
584
|
+
raise AttributeError("Dynamic exporter is only available for 'agentic/llamaindex' tracer type")
|
585
|
+
|
586
|
+
for key, value in kwargs.items():
|
587
|
+
if hasattr(self.dynamic_exporter, key):
|
588
|
+
setattr(self.dynamic_exporter, key, value)
|
589
|
+
logger.debug(f"Updated dynamic exporter's {key} to {value}")
|
590
|
+
else:
|
591
|
+
logger.warning(f"Dynamic exporter has no attribute '{key}'")
|
592
|
+
|
593
|
+
def update_file_list(self):
|
594
|
+
"""
|
595
|
+
Update the file list in the dynamic exporter with the latest tracked files.
|
596
|
+
This is useful when new files are added to the project during execution.
|
597
|
+
|
598
|
+
Raises:
|
599
|
+
AttributeError: If the tracer_type is not 'agentic/llamaindex' or if the dynamic_exporter is not initialized.
|
600
|
+
"""
|
601
|
+
if self.tracer_type != "agentic/llamaindex" or not hasattr(self, "dynamic_exporter"):
|
602
|
+
raise AttributeError("Dynamic exporter is only available for 'agentic/llamaindex' tracer type")
|
603
|
+
|
604
|
+
# Get the latest list of unique files
|
605
|
+
list_of_unique_files = self.file_tracker.get_unique_files()
|
606
|
+
|
607
|
+
# Update the dynamic exporter's files_to_zip property
|
608
|
+
self.dynamic_exporter.files_to_zip = list_of_unique_files
|
609
|
+
logger.debug(f"Updated dynamic exporter's files_to_zip with {len(list_of_unique_files)} files")
|
610
|
+
|
611
|
+
def add_context(self, context):
|
612
|
+
"""
|
613
|
+
Add context information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
|
614
|
+
|
615
|
+
Args:
|
616
|
+
context: Additional context information to be added to the trace. Can be a string.
|
617
|
+
|
618
|
+
Raises:
|
619
|
+
ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
|
620
|
+
"""
|
621
|
+
if self.tracer_type not in ["langchain", "llamaindex"]:
|
622
|
+
raise ValueError("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
|
623
|
+
|
624
|
+
# Convert string context to string if needed
|
625
|
+
if isinstance(context, str):
|
626
|
+
self.user_context = context
|
627
|
+
else:
|
628
|
+
raise TypeError("context must be a string")
|
@@ -20,7 +20,7 @@ class UploadTraces:
|
|
20
20
|
self.base_url = base_url
|
21
21
|
self.timeout = 10
|
22
22
|
|
23
|
-
def _create_dataset_schema_with_trace(self):
|
23
|
+
def _create_dataset_schema_with_trace(self, additional_metadata_keys=None, additional_pipeline_keys=None):
|
24
24
|
SCHEMA_MAPPING_NEW = {
|
25
25
|
"trace_id": {"columnType": "traceId"},
|
26
26
|
"trace_uri": {"columnType": "traceUri"},
|
@@ -34,6 +34,18 @@ class UploadTraces:
|
|
34
34
|
"vector_store":{"columnType":"pipeline"},
|
35
35
|
"feedback": {"columnType":"feedBack"}
|
36
36
|
}
|
37
|
+
|
38
|
+
if additional_metadata_keys:
|
39
|
+
for key in additional_metadata_keys:
|
40
|
+
if key == "model_name":
|
41
|
+
SCHEMA_MAPPING_NEW['response']["modelName"] = additional_metadata_keys[key]
|
42
|
+
else:
|
43
|
+
SCHEMA_MAPPING_NEW[key] = {"columnType": key, "parentColumn": "response"}
|
44
|
+
|
45
|
+
if additional_pipeline_keys:
|
46
|
+
for key in additional_pipeline_keys:
|
47
|
+
SCHEMA_MAPPING_NEW[key] = {"columnType": "pipeline"}
|
48
|
+
|
37
49
|
def make_request():
|
38
50
|
headers = {
|
39
51
|
"Content-Type": "application/json",
|
@@ -119,9 +131,14 @@ class UploadTraces:
|
|
119
131
|
data=payload,
|
120
132
|
timeout=self.timeout)
|
121
133
|
|
122
|
-
def upload_traces(self):
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
134
|
+
def upload_traces(self, additional_metadata_keys=None, additional_pipeline_keys=None):
|
135
|
+
try:
|
136
|
+
self._create_dataset_schema_with_trace(additional_metadata_keys, additional_pipeline_keys)
|
137
|
+
presignedUrl = self._get_presigned_url()
|
138
|
+
if presignedUrl is None:
|
139
|
+
return
|
140
|
+
self._put_presigned_url(presignedUrl, self.json_file_path)
|
141
|
+
self._insert_traces(presignedUrl)
|
142
|
+
print("Traces uploaded")
|
143
|
+
except Exception as e:
|
144
|
+
print(f"Error while uploading agentic traces: {e}")
|
@@ -0,0 +1,61 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
def convert_langchain_callbacks_output(result, project_name="", metadata="", pipeline=""):
|
4
|
+
initial_struc = [{
|
5
|
+
"project_name": project_name,
|
6
|
+
"trace_id": "NA",
|
7
|
+
"session_id": "NA",
|
8
|
+
"metadata" : metadata,
|
9
|
+
"pipeline" : pipeline,
|
10
|
+
"traces" : []
|
11
|
+
}]
|
12
|
+
traces_data = []
|
13
|
+
|
14
|
+
prompt = result["data"]["prompt"]
|
15
|
+
response = result["data"]["response"]
|
16
|
+
context = result["data"]["context"]
|
17
|
+
final_prompt = ""
|
18
|
+
|
19
|
+
prompt_structured_data = {
|
20
|
+
"traceloop.entity.input": json.dumps({
|
21
|
+
"kwargs": {
|
22
|
+
"input": prompt,
|
23
|
+
}
|
24
|
+
})
|
25
|
+
}
|
26
|
+
prompt_data = {
|
27
|
+
"name": "retrieve_documents.langchain.workflow",
|
28
|
+
"attributes": prompt_structured_data,
|
29
|
+
}
|
30
|
+
|
31
|
+
traces_data.append(prompt_data)
|
32
|
+
|
33
|
+
context_structured_data = {
|
34
|
+
"traceloop.entity.input": json.dumps({
|
35
|
+
"kwargs": {
|
36
|
+
"context": context
|
37
|
+
}
|
38
|
+
}),
|
39
|
+
"traceloop.entity.output": json.dumps({
|
40
|
+
"kwargs": {
|
41
|
+
"text": prompt
|
42
|
+
}
|
43
|
+
})
|
44
|
+
}
|
45
|
+
context_data = {
|
46
|
+
"name": "PromptTemplate.langchain.task",
|
47
|
+
"attributes": context_structured_data,
|
48
|
+
}
|
49
|
+
traces_data.append(context_data)
|
50
|
+
|
51
|
+
response_structured_data = {"gen_ai.completion.0.content": response,
|
52
|
+
"gen_ai.prompt.0.content": prompt}
|
53
|
+
response_data = {
|
54
|
+
"name": "ChatOpenAI.langchain.task",
|
55
|
+
"attributes" : response_structured_data
|
56
|
+
}
|
57
|
+
traces_data.append(response_data)
|
58
|
+
|
59
|
+
initial_struc[0]["traces"] = traces_data
|
60
|
+
|
61
|
+
return initial_struc
|
@@ -0,0 +1,69 @@
|
|
1
|
+
def convert_llamaindex_instrumentation_to_callback(data):
|
2
|
+
data = data[0]
|
3
|
+
initial_struc = [{
|
4
|
+
"trace_id": data["trace_id"],
|
5
|
+
"project_id": data["project_id"],
|
6
|
+
"session_id": data["session_id"],
|
7
|
+
"trace_type": data["trace_type"],
|
8
|
+
"metadata" : data["metadata"],
|
9
|
+
"pipeline" : data["pipeline"],
|
10
|
+
"traces" : []
|
11
|
+
}]
|
12
|
+
|
13
|
+
traces_data = []
|
14
|
+
|
15
|
+
prompt = data["data"]["prompt"]
|
16
|
+
response = data["data"]["response"]
|
17
|
+
context = data["data"]["context"]
|
18
|
+
system_prompt = data["data"]["system_prompt"]
|
19
|
+
|
20
|
+
prompt_structured_data = {
|
21
|
+
"event_type": "query",
|
22
|
+
"payload": {
|
23
|
+
"query_str": prompt
|
24
|
+
}
|
25
|
+
}
|
26
|
+
traces_data.append(prompt_structured_data)
|
27
|
+
|
28
|
+
response_structured_data = {
|
29
|
+
"event_type": "llm",
|
30
|
+
"payload": {
|
31
|
+
"response": {
|
32
|
+
"message": {
|
33
|
+
"content": response,
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
traces_data.append(response_structured_data)
|
39
|
+
|
40
|
+
context_structured_data = {
|
41
|
+
"event_type": "retrieve",
|
42
|
+
"payload": {
|
43
|
+
"nodes": [
|
44
|
+
{
|
45
|
+
"node": {
|
46
|
+
"text": context
|
47
|
+
}
|
48
|
+
}
|
49
|
+
]
|
50
|
+
}
|
51
|
+
}
|
52
|
+
traces_data.append(context_structured_data)
|
53
|
+
|
54
|
+
system_prompt_structured_data = {
|
55
|
+
"event_type": "llm",
|
56
|
+
"payload": {
|
57
|
+
"messages": [
|
58
|
+
{
|
59
|
+
"role": "system",
|
60
|
+
"content": system_prompt
|
61
|
+
}
|
62
|
+
]
|
63
|
+
}
|
64
|
+
}
|
65
|
+
traces_data.append(system_prompt_structured_data)
|
66
|
+
|
67
|
+
initial_struc[0]["traces"] = traces_data
|
68
|
+
|
69
|
+
return initial_struc
|