ragaai-catalyst 2.1.6.4b1__py3-none-any.whl → 2.1.7b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +26 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +6 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +180 -164
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +8 -2
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +168 -50
- ragaai_catalyst/tracers/tracer.py +125 -115
- ragaai_catalyst/tracers/upload_traces.py +3 -3
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +1 -1
- ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +243 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +1 -0
- {ragaai_catalyst-2.1.6.4b1.dist-info → ragaai_catalyst-2.1.7b0.dist-info}/METADATA +1 -1
- {ragaai_catalyst-2.1.6.4b1.dist-info → ragaai_catalyst-2.1.7b0.dist-info}/RECORD +16 -15
- {ragaai_catalyst-2.1.6.4b1.dist-info → ragaai_catalyst-2.1.7b0.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.6.4b1.dist-info → ragaai_catalyst-2.1.7b0.dist-info}/licenses/LICENSE +0 -0
- {ragaai_catalyst-2.1.6.4b1.dist-info → ragaai_catalyst-2.1.7b0.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,14 @@ from ragaai_catalyst.tracers.agentic_tracing.utils.system_monitor import SystemM
|
|
10
10
|
from ragaai_catalyst.tracers.agentic_tracing.upload.trace_uploader import submit_upload_task
|
11
11
|
from ragaai_catalyst.tracers.agentic_tracing.utils.zip_list_of_unique_files import zip_list_of_unique_files
|
12
12
|
from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import format_interactions
|
13
|
-
|
13
|
+
from ragaai_catalyst.tracers.utils.rag_trace_json_converter import rag_trace_json_converter
|
14
|
+
from ragaai_catalyst.tracers.utils.convert_langchain_callbacks_output import convert_langchain_callbacks_output
|
15
|
+
from ragaai_catalyst.tracers.upload_traces import UploadTraces
|
16
|
+
import datetime
|
17
|
+
import logging
|
18
|
+
import asyncio
|
19
|
+
import concurrent.futures
|
20
|
+
from functools import partial
|
14
21
|
|
15
22
|
logger = logging.getLogger("RagaAICatalyst")
|
16
23
|
logging_level = (
|
@@ -19,9 +26,10 @@ logging_level = (
|
|
19
26
|
|
20
27
|
|
21
28
|
class RAGATraceExporter(SpanExporter):
|
22
|
-
def __init__(self, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120):
|
29
|
+
def __init__(self, tracer_type, files_to_zip, project_name, project_id, dataset_name, user_details, base_url, custom_model_cost, timeout=120, post_processor = None):
|
23
30
|
self.trace_spans = dict()
|
24
31
|
self.tmp_dir = tempfile.gettempdir()
|
32
|
+
self.tracer_type = tracer_type
|
25
33
|
self.files_to_zip = files_to_zip
|
26
34
|
self.project_name = project_name
|
27
35
|
self.project_id = project_id
|
@@ -31,29 +39,34 @@ class RAGATraceExporter(SpanExporter):
|
|
31
39
|
self.custom_model_cost = custom_model_cost
|
32
40
|
self.system_monitor = SystemMonitor(dataset_name)
|
33
41
|
self.timeout = timeout
|
42
|
+
self.post_processor = post_processor
|
34
43
|
|
35
44
|
def export(self, spans):
|
36
45
|
for span in spans:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
self.trace_spans
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
46
|
+
try:
|
47
|
+
span_json = json.loads(span.to_json())
|
48
|
+
trace_id = span_json.get("context").get("trace_id")
|
49
|
+
if trace_id is None:
|
50
|
+
raise Exception("Trace ID is None")
|
51
|
+
|
52
|
+
if trace_id not in self.trace_spans:
|
53
|
+
self.trace_spans[trace_id] = list()
|
54
|
+
|
55
|
+
self.trace_spans[trace_id].append(span_json)
|
56
|
+
|
57
|
+
if span_json["parent_id"] is None:
|
58
|
+
trace = self.trace_spans[trace_id]
|
59
|
+
try:
|
60
|
+
self.process_complete_trace(trace, trace_id)
|
61
|
+
except Exception as e:
|
62
|
+
raise Exception(f"Error processing complete trace: {e}")
|
63
|
+
try:
|
64
|
+
del self.trace_spans[trace_id]
|
65
|
+
except Exception as e:
|
66
|
+
raise Exception(f"Error deleting trace: {e}")
|
67
|
+
except Exception as e:
|
68
|
+
logger.warning(f"Error processing span: {e}")
|
69
|
+
continue
|
57
70
|
|
58
71
|
return SpanExportResult.SUCCESS
|
59
72
|
|
@@ -66,40 +79,81 @@ class RAGATraceExporter(SpanExporter):
|
|
66
79
|
def process_complete_trace(self, spans, trace_id):
|
67
80
|
# Convert the trace to ragaai trace format
|
68
81
|
try:
|
69
|
-
|
82
|
+
if self.tracer_type == "langchain":
|
83
|
+
ragaai_trace_details, additional_metadata = self.prepare_rag_trace(spans, trace_id)
|
84
|
+
else:
|
85
|
+
ragaai_trace_details = self.prepare_trace(spans, trace_id)
|
70
86
|
except Exception as e:
|
71
87
|
print(f"Error converting trace {trace_id}: {e}")
|
72
|
-
|
73
88
|
# Upload the trace if upload_trace function is provided
|
74
89
|
try:
|
75
|
-
self.
|
90
|
+
if self.post_processor!=None:
|
91
|
+
ragaai_trace_details['trace_file_path'] = self.post_processor(ragaai_trace_details['trace_file_path'])
|
92
|
+
if self.tracer_type == "langchain":
|
93
|
+
asyncio.run(self.upload_rag_trace(ragaai_trace_details, additional_metadata, trace_id))
|
94
|
+
else:
|
95
|
+
self.upload_trace(ragaai_trace_details, trace_id)
|
76
96
|
except Exception as e:
|
77
97
|
print(f"Error uploading trace {trace_id}: {e}")
|
78
98
|
|
79
99
|
def prepare_trace(self, spans, trace_id):
|
80
100
|
try:
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
hash_id, zip_path = zip_list_of_unique_files(
|
87
|
-
self.files_to_zip, output_dir=self.tmp_dir
|
88
|
-
)
|
89
|
-
|
90
|
-
ragaai_trace["metadata"]["system_info"] = asdict(self.system_monitor.get_system_info())
|
91
|
-
ragaai_trace["metadata"]["resources"] = asdict(self.system_monitor.get_resources())
|
92
|
-
ragaai_trace["metadata"]["system_info"]["source_code"] = hash_id
|
93
|
-
|
94
|
-
ragaai_trace["data"][0]["start_time"] = ragaai_trace["start_time"]
|
95
|
-
ragaai_trace["data"][0]["end_time"] = ragaai_trace["end_time"]
|
96
|
-
|
97
|
-
ragaai_trace["project_name"] = self.project_name
|
101
|
+
try:
|
102
|
+
ragaai_trace = convert_json_format(spans, self.custom_model_cost)
|
103
|
+
except Exception as e:
|
104
|
+
print(f"Error in convert_json_format function: {trace_id}: {e}")
|
105
|
+
return None
|
98
106
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
107
|
+
try:
|
108
|
+
interactions = format_interactions(ragaai_trace)
|
109
|
+
ragaai_trace["workflow"] = interactions['workflow']
|
110
|
+
except Exception as e:
|
111
|
+
print(f"Error in format_interactions function: {trace_id}: {e}")
|
112
|
+
return None
|
113
|
+
|
114
|
+
try:
|
115
|
+
# Add source code hash
|
116
|
+
hash_id, zip_path = zip_list_of_unique_files(
|
117
|
+
self.files_to_zip, output_dir=self.tmp_dir
|
118
|
+
)
|
119
|
+
except Exception as e:
|
120
|
+
print(f"Error in zip_list_of_unique_files function: {trace_id}: {e}")
|
121
|
+
return None
|
122
|
+
|
123
|
+
try:
|
124
|
+
ragaai_trace["metadata"]["system_info"] = asdict(self.system_monitor.get_system_info())
|
125
|
+
ragaai_trace["metadata"]["resources"] = asdict(self.system_monitor.get_resources())
|
126
|
+
except Exception as e:
|
127
|
+
print(f"Error in get_system_info or get_resources function: {trace_id}: {e}")
|
128
|
+
return None
|
129
|
+
|
130
|
+
try:
|
131
|
+
ragaai_trace["metadata"]["system_info"]["source_code"] = hash_id
|
132
|
+
except Exception as e:
|
133
|
+
print(f"Error in adding source code hash: {trace_id}: {e}")
|
134
|
+
return None
|
135
|
+
|
136
|
+
try:
|
137
|
+
ragaai_trace["data"][0]["start_time"] = ragaai_trace["start_time"]
|
138
|
+
ragaai_trace["data"][0]["end_time"] = ragaai_trace["end_time"]
|
139
|
+
except Exception as e:
|
140
|
+
print(f"Error in adding start_time or end_time: {trace_id}: {e}")
|
141
|
+
return None
|
142
|
+
|
143
|
+
try:
|
144
|
+
ragaai_trace["project_name"] = self.project_name
|
145
|
+
except Exception as e:
|
146
|
+
print(f"Error in adding project name: {trace_id}: {e}")
|
147
|
+
return None
|
148
|
+
|
149
|
+
try:
|
150
|
+
# Save the trace_json
|
151
|
+
trace_file_path = os.path.join(self.tmp_dir, f"{trace_id}.json")
|
152
|
+
with open(trace_file_path, "w") as file:
|
153
|
+
json.dump(ragaai_trace, file, cls=TracerJSONEncoder, indent=2)
|
154
|
+
except Exception as e:
|
155
|
+
print(f"Error in saving trace json: {trace_id}: {e}")
|
156
|
+
return None
|
103
157
|
|
104
158
|
return {
|
105
159
|
'trace_file_path': trace_file_path,
|
@@ -107,14 +161,13 @@ class RAGATraceExporter(SpanExporter):
|
|
107
161
|
'hash_id': hash_id
|
108
162
|
}
|
109
163
|
except Exception as e:
|
110
|
-
|
164
|
+
print(f"Error converting trace {trace_id}: {str(e)}")
|
111
165
|
return None
|
112
166
|
|
113
167
|
def upload_trace(self, ragaai_trace_details, trace_id):
|
114
168
|
filepath = ragaai_trace_details['trace_file_path']
|
115
169
|
hash_id = ragaai_trace_details['hash_id']
|
116
|
-
zip_path = ragaai_trace_details['code_zip_path']
|
117
|
-
|
170
|
+
zip_path = ragaai_trace_details['code_zip_path']
|
118
171
|
self.upload_task_id = submit_upload_task(
|
119
172
|
filepath=filepath,
|
120
173
|
hash_id=hash_id,
|
@@ -127,4 +180,69 @@ class RAGATraceExporter(SpanExporter):
|
|
127
180
|
timeout=self.timeout
|
128
181
|
)
|
129
182
|
|
130
|
-
logger.info(f"Submitted upload task with ID: {self.upload_task_id}")
|
183
|
+
logger.info(f"Submitted upload task with ID: {self.upload_task_id}")
|
184
|
+
|
185
|
+
async def upload_rag_trace(self, ragaai_trace, additional_metadata, trace_id):
|
186
|
+
try:
|
187
|
+
trace_file_path = os.path.join(self.tmp_dir, f"{trace_id}.json")
|
188
|
+
with open(trace_file_path, 'w') as f:
|
189
|
+
json.dump(ragaai_trace, f, indent=2)
|
190
|
+
|
191
|
+
# Create a ThreadPoolExecutor with max_workers=30
|
192
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
|
193
|
+
# Create a partial function with all the necessary arguments
|
194
|
+
upload_func = partial(
|
195
|
+
UploadTraces(
|
196
|
+
json_file_path=trace_file_path,
|
197
|
+
project_name=self.project_name,
|
198
|
+
project_id=self.project_id,
|
199
|
+
dataset_name=self.dataset_name,
|
200
|
+
user_detail=self.user_details,
|
201
|
+
base_url=self.base_url
|
202
|
+
).upload_traces,
|
203
|
+
additional_metadata_keys=additional_metadata
|
204
|
+
)
|
205
|
+
|
206
|
+
# Implement retry logic - attempt upload up to 3 times
|
207
|
+
max_retries = 3
|
208
|
+
retry_count = 0
|
209
|
+
last_exception = None
|
210
|
+
|
211
|
+
while retry_count < max_retries:
|
212
|
+
try:
|
213
|
+
# Submit the task to the executor and get a future
|
214
|
+
loop = asyncio.get_event_loop()
|
215
|
+
await loop.run_in_executor(executor, upload_func)
|
216
|
+
|
217
|
+
logger.info(f"Successfully uploaded rag trace {trace_id} on attempt {retry_count + 1}")
|
218
|
+
return # Exit the method if upload is successful
|
219
|
+
except Exception as e:
|
220
|
+
retry_count += 1
|
221
|
+
last_exception = e
|
222
|
+
logger.warning(f"Attempt {retry_count} to upload rag trace {trace_id} failed: {str(e)}")
|
223
|
+
|
224
|
+
if retry_count < max_retries:
|
225
|
+
# Add a small delay before retrying (exponential backoff)
|
226
|
+
await asyncio.sleep(2 ** retry_count) # 2, 4, 8 seconds
|
227
|
+
|
228
|
+
# If we've exhausted all retries, log the error
|
229
|
+
logger.error(f"Failed to upload rag trace {trace_id} after {max_retries} attempts. Last error: {str(last_exception)}")
|
230
|
+
except Exception as e:
|
231
|
+
logger.error(f"Error preparing rag trace {trace_id} for upload: {str(e)}")
|
232
|
+
|
233
|
+
def prepare_rag_trace(self, spans, trace_id):
|
234
|
+
try:
|
235
|
+
ragaai_trace, additional_metadata = rag_trace_json_converter(spans, self.custom_model_cost, trace_id, self.user_details, self.tracer_type)
|
236
|
+
ragaai_trace["metadata"]["recorded_on"] = datetime.datetime.now().astimezone().isoformat()
|
237
|
+
ragaai_trace["metadata"]["log_source"] = "langchain_tracer"
|
238
|
+
|
239
|
+
if True:
|
240
|
+
converted_ragaai_trace = convert_langchain_callbacks_output(ragaai_trace, self.project_name, ragaai_trace["metadata"], ragaai_trace["pipeline"])
|
241
|
+
else:
|
242
|
+
converted_ragaai_trace = ragaai_trace
|
243
|
+
|
244
|
+
return converted_ragaai_trace, additional_metadata
|
245
|
+
|
246
|
+
except Exception as e:
|
247
|
+
logger.error(f"Error converting trace {trace_id}: {str(e)}")
|
248
|
+
return None
|
@@ -6,7 +6,7 @@ import asyncio
|
|
6
6
|
import aiohttp
|
7
7
|
import requests
|
8
8
|
from litellm import model_cost
|
9
|
-
|
9
|
+
from pathlib import Path
|
10
10
|
from contextlib import contextmanager
|
11
11
|
from concurrent.futures import ThreadPoolExecutor
|
12
12
|
from ragaai_catalyst.tracers.langchain_callback import LangchainTracer
|
@@ -71,7 +71,7 @@ class Tracer(AgenticTracing):
|
|
71
71
|
pipeline (dict, optional): The pipeline configuration. Defaults to None.
|
72
72
|
metadata (dict, optional): The metadata. Defaults to None.
|
73
73
|
description (str, optional): The description. Defaults to None.
|
74
|
-
timeout (int, optional): The upload timeout in seconds. Defaults to
|
74
|
+
timeout (int, optional): The upload timeout in seconds. Defaults to 120.
|
75
75
|
update_llm_cost (bool, optional): Whether to update model costs from GitHub. Defaults to True.
|
76
76
|
"""
|
77
77
|
|
@@ -134,11 +134,13 @@ class Tracer(AgenticTracing):
|
|
134
134
|
self.description = description
|
135
135
|
self.timeout = timeout
|
136
136
|
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
137
|
+
self.timeout = timeout
|
137
138
|
self.num_projects = 99999
|
138
139
|
self.start_time = datetime.datetime.now().astimezone().isoformat()
|
139
140
|
self.model_cost_dict = model_cost
|
140
141
|
self.user_context = "" # Initialize user_context to store context from add_context
|
141
142
|
self.file_tracker = TrackName()
|
143
|
+
self.post_processor = None
|
142
144
|
|
143
145
|
try:
|
144
146
|
response = requests.get(
|
@@ -169,16 +171,18 @@ class Tracer(AgenticTracing):
|
|
169
171
|
raise
|
170
172
|
|
171
173
|
if tracer_type == "langchain":
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
# self.is_instrumented = False
|
177
|
-
# self._upload_task = None
|
178
|
-
self._upload_task = None
|
174
|
+
instrumentors = []
|
175
|
+
from openinference.instrumentation.langchain import LangChainInstrumentor
|
176
|
+
instrumentors += [(LangChainInstrumentor, [])]
|
177
|
+
self._setup_agentic_tracer(instrumentors)
|
179
178
|
elif tracer_type == "llamaindex":
|
180
179
|
self._upload_task = None
|
181
180
|
self.llamaindex_tracer = None
|
181
|
+
elif tracer_type == "rag/langchain":
|
182
|
+
instrumentors = []
|
183
|
+
from openinference.instrumentation.langchain import LangChainInstrumentor
|
184
|
+
instrumentors += [(LangChainInstrumentor, [])]
|
185
|
+
self._setup_agentic_tracer(instrumentors)
|
182
186
|
# Handle agentic tracers
|
183
187
|
elif tracer_type == "agentic" or tracer_type.startswith("agentic/"):
|
184
188
|
|
@@ -367,6 +371,95 @@ class Tracer(AgenticTracing):
|
|
367
371
|
"output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
|
368
372
|
}
|
369
373
|
|
374
|
+
def register_masking_function(self, masking_func):
|
375
|
+
"""
|
376
|
+
Register a masking function that will be used to transform values in the trace data.
|
377
|
+
This method handles all file operations internally and creates a post-processor
|
378
|
+
using the provided masking function.
|
379
|
+
|
380
|
+
Args:
|
381
|
+
masking_func (callable): A function that takes a value and returns the masked value.
|
382
|
+
The function should handle string transformations for masking sensitive data.
|
383
|
+
|
384
|
+
Example:
|
385
|
+
def masking_function(value):
|
386
|
+
if isinstance(value, str):
|
387
|
+
value = re.sub(r'\b\d+\.\d+\b', 'x.x', value)
|
388
|
+
value = re.sub(r'\b\d+\b', 'xxxx', value)
|
389
|
+
return value
|
390
|
+
"""
|
391
|
+
if not callable(masking_func):
|
392
|
+
raise TypeError("masking_func must be a callable")
|
393
|
+
|
394
|
+
def recursive_mask_values(obj, parent_key=None):
|
395
|
+
"""Apply masking to all values in nested structure."""
|
396
|
+
if isinstance(obj, dict):
|
397
|
+
return {k: recursive_mask_values(v, k) for k, v in obj.items()}
|
398
|
+
elif isinstance(obj, list):
|
399
|
+
return [recursive_mask_values(item, parent_key) for item in obj]
|
400
|
+
elif isinstance(obj, str):
|
401
|
+
# List of keys that should NOT be masked
|
402
|
+
excluded_keys = {
|
403
|
+
'start_time', 'end_time', 'name', 'id',
|
404
|
+
'hash_id', 'parent_id', 'source_hash_id',
|
405
|
+
'cost', 'type', 'feedback', 'error', 'ctx','telemetry.sdk.version',
|
406
|
+
'telemetry.sdk.language','service.name'
|
407
|
+
}
|
408
|
+
# Apply masking only if the key is NOT in the excluded list
|
409
|
+
if parent_key and parent_key.lower() not in excluded_keys:
|
410
|
+
return masking_func(obj)
|
411
|
+
return obj
|
412
|
+
else:
|
413
|
+
return obj
|
414
|
+
|
415
|
+
def file_post_processor(original_trace_json_path: os.PathLike) -> os.PathLike:
|
416
|
+
original_path = Path(original_trace_json_path)
|
417
|
+
|
418
|
+
# Read original JSON data
|
419
|
+
with open(original_path, 'r') as f:
|
420
|
+
data = json.load(f)
|
421
|
+
|
422
|
+
# Apply masking only to data['data']
|
423
|
+
data['data'] = recursive_mask_values(data['data'])
|
424
|
+
|
425
|
+
# Create new filename with 'processed_' prefix in /var/tmp/
|
426
|
+
new_filename = f"processed_{original_path.name}"
|
427
|
+
final_trace_json_path = Path("/var/tmp") / new_filename
|
428
|
+
|
429
|
+
# Write modified data to the new file
|
430
|
+
with open(final_trace_json_path, 'w') as f:
|
431
|
+
json.dump(data, f, indent=4)
|
432
|
+
|
433
|
+
logger.debug(f"Created masked trace file: {final_trace_json_path}")
|
434
|
+
return final_trace_json_path
|
435
|
+
|
436
|
+
# Register the created post-processor
|
437
|
+
self.register_post_processor(file_post_processor)
|
438
|
+
logger.debug("Masking function registered successfully as post-processor")
|
439
|
+
|
440
|
+
|
441
|
+
def register_post_processor(self, post_processor_func):
|
442
|
+
"""
|
443
|
+
Register a post-processing function that will be called after trace generation.
|
444
|
+
|
445
|
+
Args:
|
446
|
+
post_processor_func (callable): A function that takes a trace JSON file path as input
|
447
|
+
and returns a processed trace JSON file path.
|
448
|
+
The function signature should be:
|
449
|
+
def post_processor_func(original_trace_json_path: os.PathLike) -> os.PathLike
|
450
|
+
"""
|
451
|
+
if not callable(post_processor_func):
|
452
|
+
raise TypeError("post_processor_func must be a callable")
|
453
|
+
self.post_processor = post_processor_func
|
454
|
+
# Register in parent AgenticTracing class
|
455
|
+
super().register_post_processor(post_processor_func)
|
456
|
+
# Update DynamicTraceExporter's post-processor if it exists
|
457
|
+
if hasattr(self, 'dynamic_exporter'):
|
458
|
+
self.dynamic_exporter._exporter.post_processor = post_processor_func
|
459
|
+
self.dynamic_exporter._post_processor = post_processor_func
|
460
|
+
logger.info("Registered post process as: "+str(post_processor_func))
|
461
|
+
|
462
|
+
|
370
463
|
def set_dataset_name(self, dataset_name):
|
371
464
|
"""
|
372
465
|
Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
|
@@ -455,15 +548,14 @@ class Tracer(AgenticTracing):
|
|
455
548
|
def start(self):
|
456
549
|
"""Start the tracer."""
|
457
550
|
if self.tracer_type == "langchain":
|
458
|
-
|
459
|
-
|
460
|
-
# self.is_instrumented = True
|
461
|
-
# print(f"Tracer started for project: {self.project_name}")
|
462
|
-
self.langchain_tracer = LangchainTracer()
|
463
|
-
return self.langchain_tracer.start()
|
551
|
+
super().start()
|
552
|
+
return self
|
464
553
|
elif self.tracer_type == "llamaindex":
|
465
554
|
self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
|
466
555
|
return self.llamaindex_tracer.start()
|
556
|
+
elif self.tracer_type == "rag/langchain":
|
557
|
+
super().start()
|
558
|
+
return self
|
467
559
|
else:
|
468
560
|
super().start()
|
469
561
|
return self
|
@@ -471,104 +563,8 @@ class Tracer(AgenticTracing):
|
|
471
563
|
def stop(self):
|
472
564
|
"""Stop the tracer and initiate trace upload."""
|
473
565
|
if self.tracer_type == "langchain":
|
474
|
-
|
475
|
-
|
476
|
-
# return "No traces to upload"
|
477
|
-
|
478
|
-
# print("Stopping tracer and initiating trace upload...")
|
479
|
-
# self._cleanup()
|
480
|
-
# self._upload_task = self._run_async(self._upload_traces())
|
481
|
-
# self.is_active = False
|
482
|
-
# self.dataset_name = None
|
483
|
-
|
484
|
-
user_detail = self._pass_user_data()
|
485
|
-
data, additional_metadata = self.langchain_tracer.stop()
|
486
|
-
|
487
|
-
# Add cost if possible
|
488
|
-
additional_metadata["cost"] = 0.0
|
489
|
-
if additional_metadata.get('model_name'):
|
490
|
-
try:
|
491
|
-
if self.model_custom_cost.get(additional_metadata['model_name']):
|
492
|
-
model_cost_data = self.model_custom_cost[additional_metadata['model_name']]
|
493
|
-
else:
|
494
|
-
model_cost_data = self.model_cost_dict[additional_metadata['model_name']]
|
495
|
-
if 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
|
496
|
-
prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
|
497
|
-
completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
|
498
|
-
additional_metadata["cost"] = prompt_cost + completion_cost
|
499
|
-
|
500
|
-
additional_metadata["prompt_tokens"] = float(additional_metadata["tokens"].get("prompt", 0.0))
|
501
|
-
additional_metadata["completion_tokens"] = float(additional_metadata["tokens"].get("completion", 0.0))
|
502
|
-
|
503
|
-
logger.debug("Metadata added successfully")
|
504
|
-
else:
|
505
|
-
logger.warning("Token information missing in additional_metadata")
|
506
|
-
|
507
|
-
if 'cost' in additional_metadata:
|
508
|
-
additional_metadata["cost"] = float(additional_metadata["cost"])
|
509
|
-
else:
|
510
|
-
additional_metadata["cost"] = 0.0
|
511
|
-
logger.warning("Total cost information not available")
|
512
|
-
|
513
|
-
|
514
|
-
except Exception as e:
|
515
|
-
logger.warning(f"Error adding cost: {e}")
|
516
|
-
else:
|
517
|
-
logger.debug("Model name not available in additional_metadata, skipping cost calculation")
|
518
|
-
|
519
|
-
|
520
|
-
# Safely remove tokens and cost dictionaries if they exist
|
521
|
-
additional_metadata.pop("tokens", None)
|
522
|
-
|
523
|
-
additional_metadata["model"] = additional_metadata.get("model_name", "")
|
524
|
-
|
525
|
-
# Safely merge metadata
|
526
|
-
combined_metadata = {}
|
527
|
-
if user_detail.get('trace_user_detail', {}).get('metadata'):
|
528
|
-
combined_metadata.update(user_detail['trace_user_detail']['metadata'])
|
529
|
-
if additional_metadata:
|
530
|
-
combined_metadata.update(additional_metadata)
|
531
|
-
|
532
|
-
model_cost_latency_metadata = {}
|
533
|
-
if additional_metadata:
|
534
|
-
model_cost_latency_metadata["model_name"] = additional_metadata.get("model_name", 0)
|
535
|
-
model_cost_latency_metadata["total_cost"] = additional_metadata.get("cost", 0)
|
536
|
-
model_cost_latency_metadata["total_latency"] = additional_metadata.get("latency", 0)
|
537
|
-
model_cost_latency_metadata["recorded_on"] = datetime.datetime.now().astimezone().isoformat()
|
538
|
-
combined_metadata.update(model_cost_latency_metadata)
|
539
|
-
|
540
|
-
langchain_traces = langchain_tracer_extraction(data, self.user_context)
|
541
|
-
final_result = convert_langchain_callbacks_output(langchain_traces)
|
542
|
-
|
543
|
-
# Safely set required fields in final_result
|
544
|
-
if final_result and isinstance(final_result, list) and len(final_result) > 0:
|
545
|
-
final_result[0]['project_name'] = user_detail.get('project_name', '')
|
546
|
-
final_result[0]['trace_id'] = str(uuid.uuid4())
|
547
|
-
final_result[0]['session_id'] = None
|
548
|
-
final_result[0]['metadata'] = combined_metadata
|
549
|
-
final_result[0]['pipeline'] = user_detail.get('trace_user_detail', {}).get('pipeline')
|
550
|
-
|
551
|
-
filepath_3 = os.path.join(os.getcwd(), "final_result.json")
|
552
|
-
with open(filepath_3, 'w') as f:
|
553
|
-
json.dump(final_result, f, indent=2)
|
554
|
-
|
555
|
-
# print(filepath_3)
|
556
|
-
else:
|
557
|
-
logger.warning("No valid langchain traces found in final_result")
|
558
|
-
|
559
|
-
# additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
|
560
|
-
additional_metadata_dict = additional_metadata if additional_metadata else {}
|
561
|
-
|
562
|
-
UploadTraces(json_file_path=filepath_3,
|
563
|
-
project_name=self.project_name,
|
564
|
-
project_id=self.project_id,
|
565
|
-
dataset_name=self.dataset_name,
|
566
|
-
user_detail=self._pass_user_data(),
|
567
|
-
base_url=self.base_url
|
568
|
-
).upload_traces(additional_metadata_keys=additional_metadata_dict)
|
569
|
-
|
570
|
-
return
|
571
|
-
|
566
|
+
super().stop()
|
567
|
+
return self
|
572
568
|
elif self.tracer_type == "llamaindex":
|
573
569
|
if self.llamaindex_tracer is None:
|
574
570
|
raise ValueError("LlamaIndex tracer was not started")
|
@@ -580,8 +576,18 @@ class Tracer(AgenticTracing):
|
|
580
576
|
with open(filepath_3, 'w') as f:
|
581
577
|
json.dump(converted_back_to_callback, f, default=str, indent=2)
|
582
578
|
|
579
|
+
# Apply post-processor if registered
|
580
|
+
if self.post_processor is not None:
|
581
|
+
try:
|
582
|
+
final_trace_filepath = self.post_processor(filepath_3)
|
583
|
+
logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
|
584
|
+
except Exception as e:
|
585
|
+
logger.error(f"Error in post-processing: {e}")
|
586
|
+
else:
|
587
|
+
final_trace_filepath = filepath_3
|
588
|
+
|
583
589
|
if converted_back_to_callback:
|
584
|
-
UploadTraces(json_file_path=
|
590
|
+
UploadTraces(json_file_path=final_trace_filepath,
|
585
591
|
project_name=self.project_name,
|
586
592
|
project_id=self.project_id,
|
587
593
|
dataset_name=self.dataset_name,
|
@@ -589,6 +595,8 @@ class Tracer(AgenticTracing):
|
|
589
595
|
base_url=self.base_url
|
590
596
|
).upload_traces()
|
591
597
|
return
|
598
|
+
elif self.tracer_type == "rag/langchain":
|
599
|
+
super().stop()
|
592
600
|
else:
|
593
601
|
super().stop()
|
594
602
|
|
@@ -746,6 +754,7 @@ class Tracer(AgenticTracing):
|
|
746
754
|
|
747
755
|
# Create a dynamic exporter that allows property updates
|
748
756
|
self.dynamic_exporter = DynamicTraceExporter(
|
757
|
+
tracer_type=self.tracer_type,
|
749
758
|
files_to_zip=list_of_unique_files,
|
750
759
|
project_name=self.project_name,
|
751
760
|
project_id=self.project_id,
|
@@ -753,7 +762,8 @@ class Tracer(AgenticTracing):
|
|
753
762
|
user_details=self.user_details,
|
754
763
|
base_url=self.base_url,
|
755
764
|
custom_model_cost=self.model_custom_cost,
|
756
|
-
timeout=self.timeout
|
765
|
+
timeout = self.timeout,
|
766
|
+
post_processor= self.post_processor
|
757
767
|
)
|
758
768
|
|
759
769
|
# Set up tracer provider
|
@@ -105,7 +105,7 @@ class UploadTraces:
|
|
105
105
|
|
106
106
|
if "blob.core.windows.net" in presignedUrl: # Azure
|
107
107
|
headers["x-ms-blob-type"] = "BlockBlob"
|
108
|
-
print(f"Uploading traces...")
|
108
|
+
# print(f"Uploading traces...")
|
109
109
|
with open(filename) as f:
|
110
110
|
payload = f.read().replace("\n", "").replace("\r", "").encode()
|
111
111
|
|
@@ -142,6 +142,6 @@ class UploadTraces:
|
|
142
142
|
return
|
143
143
|
self._put_presigned_url(presignedUrl, self.json_file_path)
|
144
144
|
self._insert_traces(presignedUrl)
|
145
|
-
print("Traces uploaded")
|
145
|
+
# print("Traces uploaded")
|
146
146
|
except Exception as e:
|
147
|
-
print(f"Error while uploading
|
147
|
+
print(f"Error while uploading rag traces: {e}")
|
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
def convert_langchain_callbacks_output(result, project_name="", metadata="", pipeline=""):
|
4
4
|
initial_struc = [{
|
5
5
|
"project_name": project_name,
|
6
|
-
"trace_id": "
|
6
|
+
"trace_id": result["trace_id"],
|
7
7
|
"session_id": "NA",
|
8
8
|
"metadata" : metadata,
|
9
9
|
"pipeline" : pipeline,
|