ragaai-catalyst 2.1.6.4b0__py3-none-any.whl → 2.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +26 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +6 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +180 -164
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +20 -2
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +169 -50
- ragaai_catalyst/tracers/tracer.py +128 -115
- ragaai_catalyst/tracers/upload_traces.py +3 -3
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +1 -1
- ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +243 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +1 -0
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/METADATA +1 -1
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/RECORD +16 -15
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/licenses/LICENSE +0 -0
- {ragaai_catalyst-2.1.6.4b0.dist-info → ragaai_catalyst-2.1.7.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,14 @@ from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemM
|
|
10
10
|
from ragaai_catalyst.tracers.agentic_tracing.upload.trace_uploader import submit_upload_task
|
11
11
|
from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
|
12
12
|
from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import format_interactions
|
13
|
-
|
13
|
+
from ragaai_catalyst.tracers.utils.rag_trace_json_converter import rag_trace_json_converter
|
14
|
+
from ragaai_catalyst.tracers.utils.convert_langchain_callbacks_output import convert_langchain_callbacks_output
|
15
|
+
from ragaai_catalyst.tracers.upload_traces import UploadTraces
|
16
|
+
import datetime
|
17
|
+
import logging
|
18
|
+
import asyncio
|
19
|
+
import concurrent.futures
|
20
|
+
from functools import partial
|
14
21
|
|
15
22
|
logger = logging.getLogger("RagaAICatalyst")
|
16
23
|
logging_level = (
|
@@ -19,9 +26,10 @@ logging_level = (
|
|
19
26
|
|
20
27
|
|
21
28
|
class RAGATraceExporter(SpanExporter):
|
22
|
-
def __init__(self, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120):
|
29
|
+
def __init__(self, tracer_type, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120, post_processor = None, max_upload_workers = 30):
|
23
30
|
self.trace_spans = dict()
|
24
31
|
self.tmp_dir = tempfile.gettempdir()
|
32
|
+
self.tracer_type = tracer_type
|
25
33
|
self.files_to_zip = files_to_zip
|
26
34
|
self.project_name = project_name
|
27
35
|
self.project_id = project_id
|
@@ -31,29 +39,35 @@ class RAGATraceExporter(SpanExporter):
|
|
31
39
|
self.custom_model_cost = custom_model_cost
|
32
40
|
self.system_monitor = SystemMonitor(dataset_name)
|
33
41
|
self.timeout = timeout
|
42
|
+
self.post_processor = post_processor
|
43
|
+
self.max_upload_workers = max_upload_workers
|
34
44
|
|
35
45
|
def export(self, spans):
|
36
46
|
for span in spans:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
self.trace_spans
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
47
|
+
try:
|
48
|
+
span_json = json.loads(span.to_json())
|
49
|
+
trace_id = span_json.get("context").get("trace_id")
|
50
|
+
if trace_id is None:
|
51
|
+
raise Exception("Trace ID is None")
|
52
|
+
|
53
|
+
if trace_id not in self.trace_spans:
|
54
|
+
self.trace_spans[trace_id] = list()
|
55
|
+
|
56
|
+
self.trace_spans[trace_id].append(span_json)
|
57
|
+
|
58
|
+
if span_json["parent_id"] is None:
|
59
|
+
trace = self.trace_spans[trace_id]
|
60
|
+
try:
|
61
|
+
self.process_complete_trace(trace, trace_id)
|
62
|
+
except Exception as e:
|
63
|
+
raise Exception(f"Error processing complete trace: {e}")
|
64
|
+
try:
|
65
|
+
del self.trace_spans[trace_id]
|
66
|
+
except Exception as e:
|
67
|
+
raise Exception(f"Error deleting trace: {e}")
|
68
|
+
except Exception as e:
|
69
|
+
logger.warning(f"Error processing span: {e}")
|
70
|
+
continue
|
57
71
|
|
58
72
|
return SpanExportResult.SUCCESS
|
59
73
|
|
@@ -66,40 +80,81 @@ class RAGATraceExporter(SpanExporter):
|
|
66
80
|
def process_complete_trace(self, spans, trace_id):
|
67
81
|
# Convert the trace to ragaai trace format
|
68
82
|
try:
|
69
|
-
|
83
|
+
if self.tracer_type == "langchain":
|
84
|
+
ragaai_trace_details, additional_metadata = self.prepare_rag_trace(spans, trace_id)
|
85
|
+
else:
|
86
|
+
ragaai_trace_details = self.prepare_trace(spans, trace_id)
|
70
87
|
except Exception as e:
|
71
88
|
print(f"Error converting trace {trace_id}: {e}")
|
72
|
-
|
73
89
|
# Upload the trace if upload_trace function is provided
|
74
90
|
try:
|
75
|
-
self.
|
91
|
+
if self.post_processor!=None:
|
92
|
+
ragaai_trace_details['trace_file_path'] = self.post_processor(ragaai_trace_details['trace_file_path'])
|
93
|
+
if self.tracer_type == "langchain":
|
94
|
+
asyncio.run(self.upload_rag_trace(ragaai_trace_details, additional_metadata, trace_id))
|
95
|
+
else:
|
96
|
+
self.upload_trace(ragaai_trace_details, trace_id)
|
76
97
|
except Exception as e:
|
77
98
|
print(f"Error uploading trace {trace_id}: {e}")
|
78
99
|
|
79
100
|
def prepare_trace(self, spans, trace_id):
|
80
101
|
try:
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
hash_id, zip_path = zip_list_of_unique_files(
|
87
|
-
self.files_to_zip, output_dir=self.tmp_dir
|
88
|
-
)
|
89
|
-
|
90
|
-
ragaai_trace["metadata"]["system_info"] = asdict(self.system_monitor.get_system_info())
|
91
|
-
ragaai_trace["metadata"]["resources"] = asdict(self.system_monitor.get_resources())
|
92
|
-
ragaai_trace["metadata"]["system_info"]["source_code"] = hash_id
|
93
|
-
|
94
|
-
ragaai_trace["data"][0]["start_time"] = ragaai_trace["start_time"]
|
95
|
-
ragaai_trace["data"][0]["end_time"] = ragaai_trace["end_time"]
|
96
|
-
|
97
|
-
ragaai_trace["project_name"] = self.project_name
|
102
|
+
try:
|
103
|
+
ragaai_trace = convert_json_format(spans, self.custom_model_cost)
|
104
|
+
except Exception as e:
|
105
|
+
print(f"Error in convert_json_format function: {trace_id}: {e}")
|
106
|
+
return None
|
98
107
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
108
|
+
try:
|
109
|
+
interactions = format_interactions(ragaai_trace)
|
110
|
+
ragaai_trace["workflow"] = interactions['workflow']
|
111
|
+
except Exception as e:
|
112
|
+
print(f"Error in format_interactions function: {trace_id}: {e}")
|
113
|
+
return None
|
114
|
+
|
115
|
+
try:
|
116
|
+
# Add source code hash
|
117
|
+
hash_id, zip_path = zip_list_of_unique_files(
|
118
|
+
self.files_to_zip, output_dir=self.tmp_dir
|
119
|
+
)
|
120
|
+
except Exception as e:
|
121
|
+
print(f"Error in zip_list_of_unique_files function: {trace_id}: {e}")
|
122
|
+
return None
|
123
|
+
|
124
|
+
try:
|
125
|
+
ragaai_trace["metadata"]["system_info"] = asdict(self.system_monitor.get_system_info())
|
126
|
+
ragaai_trace["metadata"]["resources"] = asdict(self.system_monitor.get_resources())
|
127
|
+
except Exception as e:
|
128
|
+
print(f"Error in get_system_info or get_resources function: {trace_id}: {e}")
|
129
|
+
return None
|
130
|
+
|
131
|
+
try:
|
132
|
+
ragaai_trace["metadata"]["system_info"]["source_code"] = hash_id
|
133
|
+
except Exception as e:
|
134
|
+
print(f"Error in adding source code hash: {trace_id}: {e}")
|
135
|
+
return None
|
136
|
+
|
137
|
+
try:
|
138
|
+
ragaai_trace["data"][0]["start_time"] = ragaai_trace["start_time"]
|
139
|
+
ragaai_trace["data"][0]["end_time"] = ragaai_trace["end_time"]
|
140
|
+
except Exception as e:
|
141
|
+
print(f"Error in adding start_time or end_time: {trace_id}: {e}")
|
142
|
+
return None
|
143
|
+
|
144
|
+
try:
|
145
|
+
ragaai_trace["project_name"] = self.project_name
|
146
|
+
except Exception as e:
|
147
|
+
print(f"Error in adding project name: {trace_id}: {e}")
|
148
|
+
return None
|
149
|
+
|
150
|
+
try:
|
151
|
+
# Save the trace_json
|
152
|
+
trace_file_path = os.path.join(self.tmp_dir, f"{trace_id}.json")
|
153
|
+
with open(trace_file_path, "w") as file:
|
154
|
+
json.dump(ragaai_trace, file, cls=TracerJSONEncoder, indent=2)
|
155
|
+
except Exception as e:
|
156
|
+
print(f"Error in saving trace json: {trace_id}: {e}")
|
157
|
+
return None
|
103
158
|
|
104
159
|
return {
|
105
160
|
'trace_file_path': trace_file_path,
|
@@ -107,14 +162,13 @@ class RAGATraceExporter(SpanExporter):
|
|
107
162
|
'hash_id': hash_id
|
108
163
|
}
|
109
164
|
except Exception as e:
|
110
|
-
|
165
|
+
print(f"Error converting trace {trace_id}: {str(e)}")
|
111
166
|
return None
|
112
167
|
|
113
168
|
def upload_trace(self, ragaai_trace_details, trace_id):
|
114
169
|
filepath = ragaai_trace_details['trace_file_path']
|
115
170
|
hash_id = ragaai_trace_details['hash_id']
|
116
|
-
zip_path = ragaai_trace_details['code_zip_path']
|
117
|
-
|
171
|
+
zip_path = ragaai_trace_details['code_zip_path']
|
118
172
|
self.upload_task_id = submit_upload_task(
|
119
173
|
filepath=filepath,
|
120
174
|
hash_id=hash_id,
|
@@ -127,4 +181,69 @@ class RAGATraceExporter(SpanExporter):
|
|
127
181
|
timeout=self.timeout
|
128
182
|
)
|
129
183
|
|
130
|
-
logger.info(f"Submitted upload task with ID: {self.upload_task_id}")
|
184
|
+
logger.info(f"Submitted upload task with ID: {self.upload_task_id}")
|
185
|
+
|
186
|
+
async def upload_rag_trace(self, ragaai_trace, additional_metadata, trace_id):
|
187
|
+
try:
|
188
|
+
trace_file_path = os.path.join(self.tmp_dir, f"{trace_id}.json")
|
189
|
+
with open(trace_file_path, 'w') as f:
|
190
|
+
json.dump(ragaai_trace, f, indent=2)
|
191
|
+
|
192
|
+
# Create a ThreadPoolExecutor with max_workers=30
|
193
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_upload_workers) as executor:
|
194
|
+
# Create a partial function with all the necessary arguments
|
195
|
+
upload_func = partial(
|
196
|
+
UploadTraces(
|
197
|
+
json_file_path=trace_file_path,
|
198
|
+
project_name=self.project_name,
|
199
|
+
project_id=self.project_id,
|
200
|
+
dataset_name=self.dataset_name,
|
201
|
+
user_detail=self.user_details,
|
202
|
+
base_url=self.base_url
|
203
|
+
).upload_traces,
|
204
|
+
additional_metadata_keys=additional_metadata
|
205
|
+
)
|
206
|
+
|
207
|
+
# Implement retry logic - attempt upload up to 3 times
|
208
|
+
max_retries = 3
|
209
|
+
retry_count = 0
|
210
|
+
last_exception = None
|
211
|
+
|
212
|
+
while retry_count < max_retries:
|
213
|
+
try:
|
214
|
+
# Submit the task to the executor and get a future
|
215
|
+
loop = asyncio.get_event_loop()
|
216
|
+
await loop.run_in_executor(executor, upload_func)
|
217
|
+
|
218
|
+
logger.info(f"Successfully uploaded rag trace {trace_id} on attempt {retry_count + 1}")
|
219
|
+
return # Exit the method if upload is successful
|
220
|
+
except Exception as e:
|
221
|
+
retry_count += 1
|
222
|
+
last_exception = e
|
223
|
+
logger.warning(f"Attempt {retry_count} to upload rag trace {trace_id} failed: {str(e)}")
|
224
|
+
|
225
|
+
if retry_count < max_retries:
|
226
|
+
# Add a small delay before retrying (exponential backoff)
|
227
|
+
await asyncio.sleep(2 ** retry_count) # 2, 4, 8 seconds
|
228
|
+
|
229
|
+
# If we've exhausted all retries, log the error
|
230
|
+
logger.error(f"Failed to upload rag trace {trace_id} after {max_retries} attempts. Last error: {str(last_exception)}")
|
231
|
+
except Exception as e:
|
232
|
+
logger.error(f"Error preparing rag trace {trace_id} for upload: {str(e)}")
|
233
|
+
|
234
|
+
def prepare_rag_trace(self, spans, trace_id):
|
235
|
+
try:
|
236
|
+
ragaai_trace, additional_metadata = rag_trace_json_converter(spans, self.custom_model_cost, trace_id, self.user_details, self.tracer_type)
|
237
|
+
ragaai_trace["metadata"]["recorded_on"] = datetime.datetime.now().astimezone().isoformat()
|
238
|
+
ragaai_trace["metadata"]["log_source"] = "langchain_tracer"
|
239
|
+
|
240
|
+
if True:
|
241
|
+
converted_ragaai_trace = convert_langchain_callbacks_output(ragaai_trace, self.project_name, ragaai_trace["metadata"], ragaai_trace["pipeline"])
|
242
|
+
else:
|
243
|
+
converted_ragaai_trace = ragaai_trace
|
244
|
+
|
245
|
+
return converted_ragaai_trace, additional_metadata
|
246
|
+
|
247
|
+
except Exception as e:
|
248
|
+
logger.error(f"Error converting trace {trace_id}: {str(e)}")
|
249
|
+
return None
|
@@ -6,7 +6,7 @@ import asyncio
|
|
6
6
|
import aiohttp
|
7
7
|
import requests
|
8
8
|
from litellm import model_cost
|
9
|
-
|
9
|
+
from pathlib import Path
|
10
10
|
from contextlib import contextmanager
|
11
11
|
from concurrent.futures import ThreadPoolExecutor
|
12
12
|
from ragaai_catalyst.tracers.langchain_callback import LangchainTracer
|
@@ -59,6 +59,7 @@ class Tracer(AgenticTracing):
|
|
59
59
|
},
|
60
60
|
interval_time=2,
|
61
61
|
# auto_instrumentation=True/False # to control automatic instrumentation of everything
|
62
|
+
max_upload_workers=30
|
62
63
|
|
63
64
|
):
|
64
65
|
"""
|
@@ -71,7 +72,7 @@ class Tracer(AgenticTracing):
|
|
71
72
|
pipeline (dict, optional): The pipeline configuration. Defaults to None.
|
72
73
|
metadata (dict, optional): The metadata. Defaults to None.
|
73
74
|
description (str, optional): The description. Defaults to None.
|
74
|
-
timeout (int, optional): The upload timeout in seconds. Defaults to
|
75
|
+
timeout (int, optional): The upload timeout in seconds. Defaults to 120.
|
75
76
|
update_llm_cost (bool, optional): Whether to update model costs from GitHub. Defaults to True.
|
76
77
|
"""
|
77
78
|
|
@@ -134,11 +135,14 @@ class Tracer(AgenticTracing):
|
|
134
135
|
self.description = description
|
135
136
|
self.timeout = timeout
|
136
137
|
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
138
|
+
self.timeout = timeout
|
137
139
|
self.num_projects = 99999
|
138
140
|
self.start_time = datetime.datetime.now().astimezone().isoformat()
|
139
141
|
self.model_cost_dict = model_cost
|
140
142
|
self.user_context = "" # Initialize user_context to store context from add_context
|
141
143
|
self.file_tracker = TrackName()
|
144
|
+
self.post_processor = None
|
145
|
+
self.max_upload_workers = max_upload_workers
|
142
146
|
|
143
147
|
try:
|
144
148
|
response = requests.get(
|
@@ -169,16 +173,18 @@ class Tracer(AgenticTracing):
|
|
169
173
|
raise
|
170
174
|
|
171
175
|
if tracer_type == "langchain":
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
# self.is_instrumented = False
|
177
|
-
# self._upload_task = None
|
178
|
-
self._upload_task = None
|
176
|
+
instrumentors = []
|
177
|
+
from openinference.instrumentation.langchain import LangChainInstrumentor
|
178
|
+
instrumentors += [(LangChainInstrumentor, [])]
|
179
|
+
self._setup_agentic_tracer(instrumentors)
|
179
180
|
elif tracer_type == "llamaindex":
|
180
181
|
self._upload_task = None
|
181
182
|
self.llamaindex_tracer = None
|
183
|
+
elif tracer_type == "rag/langchain":
|
184
|
+
instrumentors = []
|
185
|
+
from openinference.instrumentation.langchain import LangChainInstrumentor
|
186
|
+
instrumentors += [(LangChainInstrumentor, [])]
|
187
|
+
self._setup_agentic_tracer(instrumentors)
|
182
188
|
# Handle agentic tracers
|
183
189
|
elif tracer_type == "agentic" or tracer_type.startswith("agentic/"):
|
184
190
|
|
@@ -367,6 +373,95 @@ class Tracer(AgenticTracing):
|
|
367
373
|
"output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
|
368
374
|
}
|
369
375
|
|
376
|
+
def register_masking_function(self, masking_func):
|
377
|
+
"""
|
378
|
+
Register a masking function that will be used to transform values in the trace data.
|
379
|
+
This method handles all file operations internally and creates a post-processor
|
380
|
+
using the provided masking function.
|
381
|
+
|
382
|
+
Args:
|
383
|
+
masking_func (callable): A function that takes a value and returns the masked value.
|
384
|
+
The function should handle string transformations for masking sensitive data.
|
385
|
+
|
386
|
+
Example:
|
387
|
+
def masking_function(value):
|
388
|
+
if isinstance(value, str):
|
389
|
+
value = re.sub(r'\b\d+\.\d+\b', 'x.x', value)
|
390
|
+
value = re.sub(r'\b\d+\b', 'xxxx', value)
|
391
|
+
return value
|
392
|
+
"""
|
393
|
+
if not callable(masking_func):
|
394
|
+
raise TypeError("masking_func must be a callable")
|
395
|
+
|
396
|
+
def recursive_mask_values(obj, parent_key=None):
|
397
|
+
"""Apply masking to all values in nested structure."""
|
398
|
+
if isinstance(obj, dict):
|
399
|
+
return {k: recursive_mask_values(v, k) for k, v in obj.items()}
|
400
|
+
elif isinstance(obj, list):
|
401
|
+
return [recursive_mask_values(item, parent_key) for item in obj]
|
402
|
+
elif isinstance(obj, str):
|
403
|
+
# List of keys that should NOT be masked
|
404
|
+
excluded_keys = {
|
405
|
+
'start_time', 'end_time', 'name', 'id',
|
406
|
+
'hash_id', 'parent_id', 'source_hash_id',
|
407
|
+
'cost', 'type', 'feedback', 'error', 'ctx','telemetry.sdk.version',
|
408
|
+
'telemetry.sdk.language','service.name'
|
409
|
+
}
|
410
|
+
# Apply masking only if the key is NOT in the excluded list
|
411
|
+
if parent_key and parent_key.lower() not in excluded_keys:
|
412
|
+
return masking_func(obj)
|
413
|
+
return obj
|
414
|
+
else:
|
415
|
+
return obj
|
416
|
+
|
417
|
+
def file_post_processor(original_trace_json_path: os.PathLike) -> os.PathLike:
|
418
|
+
original_path = Path(original_trace_json_path)
|
419
|
+
|
420
|
+
# Read original JSON data
|
421
|
+
with open(original_path, 'r') as f:
|
422
|
+
data = json.load(f)
|
423
|
+
|
424
|
+
# Apply masking only to data['data']
|
425
|
+
data['data'] = recursive_mask_values(data['data'])
|
426
|
+
|
427
|
+
# Create new filename with 'processed_' prefix in /var/tmp/
|
428
|
+
new_filename = f"processed_{original_path.name}"
|
429
|
+
final_trace_json_path = Path("/var/tmp") / new_filename
|
430
|
+
|
431
|
+
# Write modified data to the new file
|
432
|
+
with open(final_trace_json_path, 'w') as f:
|
433
|
+
json.dump(data, f, indent=4)
|
434
|
+
|
435
|
+
logger.debug(f"Created masked trace file: {final_trace_json_path}")
|
436
|
+
return final_trace_json_path
|
437
|
+
|
438
|
+
# Register the created post-processor
|
439
|
+
self.register_post_processor(file_post_processor)
|
440
|
+
logger.debug("Masking function registered successfully as post-processor")
|
441
|
+
|
442
|
+
|
443
|
+
def register_post_processor(self, post_processor_func):
|
444
|
+
"""
|
445
|
+
Register a post-processing function that will be called after trace generation.
|
446
|
+
|
447
|
+
Args:
|
448
|
+
post_processor_func (callable): A function that takes a trace JSON file path as input
|
449
|
+
and returns a processed trace JSON file path.
|
450
|
+
The function signature should be:
|
451
|
+
def post_processor_func(original_trace_json_path: os.PathLike) -> os.PathLike
|
452
|
+
"""
|
453
|
+
if not callable(post_processor_func):
|
454
|
+
raise TypeError("post_processor_func must be a callable")
|
455
|
+
self.post_processor = post_processor_func
|
456
|
+
# Register in parent AgenticTracing class
|
457
|
+
super().register_post_processor(post_processor_func)
|
458
|
+
# Update DynamicTraceExporter's post-processor if it exists
|
459
|
+
if hasattr(self, 'dynamic_exporter'):
|
460
|
+
self.dynamic_exporter._exporter.post_processor = post_processor_func
|
461
|
+
self.dynamic_exporter._post_processor = post_processor_func
|
462
|
+
logger.info("Registered post process as: "+str(post_processor_func))
|
463
|
+
|
464
|
+
|
370
465
|
def set_dataset_name(self, dataset_name):
|
371
466
|
"""
|
372
467
|
Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
|
@@ -455,15 +550,14 @@ class Tracer(AgenticTracing):
|
|
455
550
|
def start(self):
|
456
551
|
"""Start the tracer."""
|
457
552
|
if self.tracer_type == "langchain":
|
458
|
-
|
459
|
-
|
460
|
-
# self.is_instrumented = True
|
461
|
-
# print(f"Tracer started for project: {self.project_name}")
|
462
|
-
self.langchain_tracer = LangchainTracer()
|
463
|
-
return self.langchain_tracer.start()
|
553
|
+
super().start()
|
554
|
+
return self
|
464
555
|
elif self.tracer_type == "llamaindex":
|
465
556
|
self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
|
466
557
|
return self.llamaindex_tracer.start()
|
558
|
+
elif self.tracer_type == "rag/langchain":
|
559
|
+
super().start()
|
560
|
+
return self
|
467
561
|
else:
|
468
562
|
super().start()
|
469
563
|
return self
|
@@ -471,104 +565,8 @@ class Tracer(AgenticTracing):
|
|
471
565
|
def stop(self):
|
472
566
|
"""Stop the tracer and initiate trace upload."""
|
473
567
|
if self.tracer_type == "langchain":
|
474
|
-
|
475
|
-
|
476
|
-
# return "No traces to upload"
|
477
|
-
|
478
|
-
# print("Stopping tracer and initiating trace upload...")
|
479
|
-
# self._cleanup()
|
480
|
-
# self._upload_task = self._run_async(self._upload_traces())
|
481
|
-
# self.is_active = False
|
482
|
-
# self.dataset_name = None
|
483
|
-
|
484
|
-
user_detail = self._pass_user_data()
|
485
|
-
data, additional_metadata = self.langchain_tracer.stop()
|
486
|
-
|
487
|
-
# Add cost if possible
|
488
|
-
if additional_metadata.get('model_name'):
|
489
|
-
try:
|
490
|
-
if self.model_custom_cost.get(additional_metadata['model_name']):
|
491
|
-
model_cost_data = self.model_custom_cost[additional_metadata['model_name']]
|
492
|
-
else:
|
493
|
-
model_cost_data = self.model_cost_dict[additional_metadata['model_name']]
|
494
|
-
if 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
|
495
|
-
prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
|
496
|
-
completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
|
497
|
-
additional_metadata["cost"] = prompt_cost + completion_cost
|
498
|
-
|
499
|
-
additional_metadata["prompt_tokens"] = float(additional_metadata["tokens"].get("prompt", 0.0))
|
500
|
-
additional_metadata["completion_tokens"] = float(additional_metadata["tokens"].get("completion", 0.0))
|
501
|
-
|
502
|
-
logger.debug("Metadata added successfully")
|
503
|
-
else:
|
504
|
-
logger.warning("Token information missing in additional_metadata")
|
505
|
-
|
506
|
-
if 'cost' in additional_metadata:
|
507
|
-
additional_metadata["cost"] = float(additional_metadata["cost"])
|
508
|
-
else:
|
509
|
-
additional_metadata["cost"] = 0.0
|
510
|
-
logger.warning("Total cost information not available")
|
511
|
-
|
512
|
-
|
513
|
-
except Exception as e:
|
514
|
-
logger.warning(f"Error adding cost: {e}")
|
515
|
-
else:
|
516
|
-
logger.debug("Model name not available in additional_metadata, skipping cost calculation")
|
517
|
-
|
518
|
-
|
519
|
-
# Safely remove tokens and cost dictionaries if they exist
|
520
|
-
additional_metadata.pop("tokens", None)
|
521
|
-
# additional_metadata.pop("cost", None)
|
522
|
-
|
523
|
-
additional_metadata["model"] = additional_metadata.get("model_name", "")
|
524
|
-
|
525
|
-
# Safely merge metadata
|
526
|
-
combined_metadata = {}
|
527
|
-
if user_detail.get('trace_user_detail', {}).get('metadata'):
|
528
|
-
combined_metadata.update(user_detail['trace_user_detail']['metadata'])
|
529
|
-
if additional_metadata:
|
530
|
-
combined_metadata.update(additional_metadata)
|
531
|
-
|
532
|
-
model_cost_latency_metadata = {}
|
533
|
-
if additional_metadata:
|
534
|
-
model_cost_latency_metadata["model_name"] = additional_metadata["model_name"]
|
535
|
-
model_cost_latency_metadata["total_cost"] = additional_metadata["cost"]
|
536
|
-
model_cost_latency_metadata["total_latency"] = additional_metadata["latency"]
|
537
|
-
model_cost_latency_metadata["recorded_on"] = datetime.datetime.now().astimezone().isoformat()
|
538
|
-
combined_metadata.update(model_cost_latency_metadata)
|
539
|
-
|
540
|
-
langchain_traces = langchain_tracer_extraction(data, self.user_context)
|
541
|
-
final_result = convert_langchain_callbacks_output(langchain_traces)
|
542
|
-
|
543
|
-
# Safely set required fields in final_result
|
544
|
-
if final_result and isinstance(final_result, list) and len(final_result) > 0:
|
545
|
-
final_result[0]['project_name'] = user_detail.get('project_name', '')
|
546
|
-
final_result[0]['trace_id'] = str(uuid.uuid4())
|
547
|
-
final_result[0]['session_id'] = None
|
548
|
-
final_result[0]['metadata'] = combined_metadata
|
549
|
-
final_result[0]['pipeline'] = user_detail.get('trace_user_detail', {}).get('pipeline')
|
550
|
-
|
551
|
-
filepath_3 = os.path.join(os.getcwd(), "final_result.json")
|
552
|
-
with open(filepath_3, 'w') as f:
|
553
|
-
json.dump(final_result, f, indent=2)
|
554
|
-
|
555
|
-
# print(filepath_3)
|
556
|
-
else:
|
557
|
-
logger.warning("No valid langchain traces found in final_result")
|
558
|
-
|
559
|
-
# additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
|
560
|
-
additional_metadata_dict = additional_metadata if additional_metadata else {}
|
561
|
-
|
562
|
-
UploadTraces(json_file_path=filepath_3,
|
563
|
-
project_name=self.project_name,
|
564
|
-
project_id=self.project_id,
|
565
|
-
dataset_name=self.dataset_name,
|
566
|
-
user_detail=self._pass_user_data(),
|
567
|
-
base_url=self.base_url
|
568
|
-
).upload_traces(additional_metadata_keys=additional_metadata_dict)
|
569
|
-
|
570
|
-
return
|
571
|
-
|
568
|
+
super().stop()
|
569
|
+
return self
|
572
570
|
elif self.tracer_type == "llamaindex":
|
573
571
|
if self.llamaindex_tracer is None:
|
574
572
|
raise ValueError("LlamaIndex tracer was not started")
|
@@ -580,8 +578,18 @@ class Tracer(AgenticTracing):
|
|
580
578
|
with open(filepath_3, 'w') as f:
|
581
579
|
json.dump(converted_back_to_callback, f, default=str, indent=2)
|
582
580
|
|
581
|
+
# Apply post-processor if registered
|
582
|
+
if self.post_processor is not None:
|
583
|
+
try:
|
584
|
+
final_trace_filepath = self.post_processor(filepath_3)
|
585
|
+
logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
|
586
|
+
except Exception as e:
|
587
|
+
logger.error(f"Error in post-processing: {e}")
|
588
|
+
else:
|
589
|
+
final_trace_filepath = filepath_3
|
590
|
+
|
583
591
|
if converted_back_to_callback:
|
584
|
-
UploadTraces(json_file_path=
|
592
|
+
UploadTraces(json_file_path=final_trace_filepath,
|
585
593
|
project_name=self.project_name,
|
586
594
|
project_id=self.project_id,
|
587
595
|
dataset_name=self.dataset_name,
|
@@ -589,6 +597,8 @@ class Tracer(AgenticTracing):
|
|
589
597
|
base_url=self.base_url
|
590
598
|
).upload_traces()
|
591
599
|
return
|
600
|
+
elif self.tracer_type == "rag/langchain":
|
601
|
+
super().stop()
|
592
602
|
else:
|
593
603
|
super().stop()
|
594
604
|
|
@@ -746,6 +756,7 @@ class Tracer(AgenticTracing):
|
|
746
756
|
|
747
757
|
# Create a dynamic exporter that allows property updates
|
748
758
|
self.dynamic_exporter = DynamicTraceExporter(
|
759
|
+
tracer_type=self.tracer_type,
|
749
760
|
files_to_zip=list_of_unique_files,
|
750
761
|
project_name=self.project_name,
|
751
762
|
project_id=self.project_id,
|
@@ -753,7 +764,9 @@ class Tracer(AgenticTracing):
|
|
753
764
|
user_details=self.user_details,
|
754
765
|
base_url=self.base_url,
|
755
766
|
custom_model_cost=self.model_custom_cost,
|
756
|
-
timeout=self.timeout
|
767
|
+
timeout = self.timeout,
|
768
|
+
post_processor= self.post_processor,
|
769
|
+
max_upload_workers = self.max_upload_workers
|
757
770
|
)
|
758
771
|
|
759
772
|
# Set up tracer provider
|
@@ -105,7 +105,7 @@ class UploadTraces:
|
|
105
105
|
|
106
106
|
if "blob.core.windows.net" in presignedUrl: # Azure
|
107
107
|
headers["x-ms-blob-type"] = "BlockBlob"
|
108
|
-
print(f"Uploading traces...")
|
108
|
+
# print(f"Uploading traces...")
|
109
109
|
with open(filename) as f:
|
110
110
|
payload = f.read().replace("\n", "").replace("\r", "").encode()
|
111
111
|
|
@@ -142,6 +142,6 @@ class UploadTraces:
|
|
142
142
|
return
|
143
143
|
self._put_presigned_url(presignedUrl, self.json_file_path)
|
144
144
|
self._insert_traces(presignedUrl)
|
145
|
-
print("Traces uploaded")
|
145
|
+
# print("Traces uploaded")
|
146
146
|
except Exception as e:
|
147
|
-
print(f"Error while uploading
|
147
|
+
print(f"Error while uploading rag traces: {e}")
|
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
def convert_langchain_callbacks_output(result, project_name="", metadata="", pipeline=""):
|
4
4
|
initial_struc = [{
|
5
5
|
"project_name": project_name,
|
6
|
-
"trace_id": "
|
6
|
+
"trace_id": result["trace_id"],
|
7
7
|
"session_id": "NA",
|
8
8
|
"metadata" : metadata,
|
9
9
|
"pipeline" : pipeline,
|