ragaai-catalyst 2.1.5b25__py3-none-any.whl → 2.1.5b27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/evaluation.py +3 -0
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +137 -101
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +1 -0
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +4 -6
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +193 -155
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +7 -60
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +4 -6
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +5 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +33 -13
- ragaai_catalyst/tracers/distributed.py +10 -27
- ragaai_catalyst/tracers/langchain_callback.py +59 -6
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +37 -20
- ragaai_catalyst/tracers/upload_traces.py +4 -1
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- {ragaai_catalyst-2.1.5b25.dist-info → ragaai_catalyst-2.1.5b27.dist-info}/METADATA +11 -4
- {ragaai_catalyst-2.1.5b25.dist-info → ragaai_catalyst-2.1.5b27.dist-info}/RECORD +21 -18
- {ragaai_catalyst-2.1.5b25.dist-info → ragaai_catalyst-2.1.5b27.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.5b25.dist-info → ragaai_catalyst-2.1.5b27.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.5b25.dist-info → ragaai_catalyst-2.1.5b27.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,424 @@
|
|
1
|
+
from configparser import InterpolationMissingOptionError
|
2
|
+
import json
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Any, Optional, Dict, List, ClassVar
|
5
|
+
from pydantic import Field
|
6
|
+
# from treelib import Tree
|
7
|
+
|
8
|
+
from llama_index.core.instrumentation.span import SimpleSpan
|
9
|
+
from llama_index.core.instrumentation.span_handlers.base import BaseSpanHandler
|
10
|
+
from llama_index.core.instrumentation.events import BaseEvent
|
11
|
+
from llama_index.core.instrumentation.event_handlers import BaseEventHandler
|
12
|
+
from llama_index.core.instrumentation import get_dispatcher
|
13
|
+
from llama_index.core.instrumentation.span_handlers import SimpleSpanHandler
|
14
|
+
|
15
|
+
from llama_index.core.instrumentation.events.agent import (
|
16
|
+
AgentChatWithStepStartEvent,
|
17
|
+
AgentChatWithStepEndEvent,
|
18
|
+
AgentRunStepStartEvent,
|
19
|
+
AgentRunStepEndEvent,
|
20
|
+
AgentToolCallEvent,
|
21
|
+
)
|
22
|
+
from llama_index.core.instrumentation.events.chat_engine import (
|
23
|
+
StreamChatErrorEvent,
|
24
|
+
StreamChatDeltaReceivedEvent,
|
25
|
+
)
|
26
|
+
from llama_index.core.instrumentation.events.embedding import (
|
27
|
+
EmbeddingStartEvent,
|
28
|
+
EmbeddingEndEvent,
|
29
|
+
)
|
30
|
+
from llama_index.core.instrumentation.events.llm import (
|
31
|
+
LLMPredictEndEvent,
|
32
|
+
LLMPredictStartEvent,
|
33
|
+
LLMStructuredPredictEndEvent,
|
34
|
+
LLMStructuredPredictStartEvent,
|
35
|
+
LLMCompletionEndEvent,
|
36
|
+
LLMCompletionStartEvent,
|
37
|
+
LLMChatEndEvent,
|
38
|
+
LLMChatStartEvent,
|
39
|
+
LLMChatInProgressEvent,
|
40
|
+
)
|
41
|
+
from llama_index.core.instrumentation.events.query import (
|
42
|
+
QueryStartEvent,
|
43
|
+
QueryEndEvent,
|
44
|
+
)
|
45
|
+
from llama_index.core.instrumentation.events.rerank import (
|
46
|
+
ReRankStartEvent,
|
47
|
+
ReRankEndEvent,
|
48
|
+
)
|
49
|
+
from llama_index.core.instrumentation.events.retrieval import (
|
50
|
+
RetrievalStartEvent,
|
51
|
+
RetrievalEndEvent,
|
52
|
+
)
|
53
|
+
from llama_index.core.instrumentation.events.span import (
|
54
|
+
SpanDropEvent,
|
55
|
+
)
|
56
|
+
from llama_index.core.instrumentation.events.synthesis import (
|
57
|
+
SynthesizeStartEvent,
|
58
|
+
SynthesizeEndEvent,
|
59
|
+
GetResponseEndEvent,
|
60
|
+
GetResponseStartEvent,
|
61
|
+
)
|
62
|
+
|
63
|
+
import uuid
|
64
|
+
|
65
|
+
from .utils.extraction_logic_llama_index import extract_llama_index_data
|
66
|
+
from .utils.convert_llama_instru_callback import convert_llamaindex_instrumentation_to_callback
|
67
|
+
|
68
|
+
class EventHandler(BaseEventHandler):
|
69
|
+
"""Example event handler.
|
70
|
+
|
71
|
+
This event handler is an example of how to create a custom event handler.
|
72
|
+
|
73
|
+
In general, logged events are treated as single events in a point in time,
|
74
|
+
that link to a span. The span is a collection of events that are related to
|
75
|
+
a single task. The span is identified by a unique span_id.
|
76
|
+
|
77
|
+
While events are independent, there is some hierarchy.
|
78
|
+
For example, in query_engine.query() call with a reranker attached:
|
79
|
+
- QueryStartEvent
|
80
|
+
- RetrievalStartEvent
|
81
|
+
- EmbeddingStartEvent
|
82
|
+
- EmbeddingEndEvent
|
83
|
+
- RetrievalEndEvent
|
84
|
+
- RerankStartEvent
|
85
|
+
- RerankEndEvent
|
86
|
+
- SynthesizeStartEvent
|
87
|
+
- GetResponseStartEvent
|
88
|
+
- LLMPredictStartEvent
|
89
|
+
- LLMChatStartEvent
|
90
|
+
- LLMChatEndEvent
|
91
|
+
- LLMPredictEndEvent
|
92
|
+
- GetResponseEndEvent
|
93
|
+
- SynthesizeEndEvent
|
94
|
+
- QueryEndEvent
|
95
|
+
"""
|
96
|
+
|
97
|
+
events: List[BaseEvent] = []
|
98
|
+
current_trace: List[Dict[str, Any]] = [] # Store events for the current trace
|
99
|
+
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def class_name(cls) -> str:
|
103
|
+
"""Class name."""
|
104
|
+
return "EventHandler"
|
105
|
+
|
106
|
+
def handle(self, event: BaseEvent) -> None:
|
107
|
+
"""Logic for handling event."""
|
108
|
+
# print("-----------------------")
|
109
|
+
# # all events have these attributes
|
110
|
+
# print(event.id_)
|
111
|
+
# print(event.timestamp)
|
112
|
+
# print(event.span_id)
|
113
|
+
|
114
|
+
# Prepare event details dictionary
|
115
|
+
event_details = {
|
116
|
+
"id": event.id_,
|
117
|
+
"timestamp": event.timestamp,
|
118
|
+
"span_id": event.span_id,
|
119
|
+
"event_type": event.class_name(),
|
120
|
+
}
|
121
|
+
|
122
|
+
# event specific attributes
|
123
|
+
# print(f"Event type: {event.class_name()}")
|
124
|
+
if isinstance(event, AgentRunStepStartEvent):
|
125
|
+
event_details.update({
|
126
|
+
"task_id": event.task_id,
|
127
|
+
"step": event.step,
|
128
|
+
"input": event.input,
|
129
|
+
})
|
130
|
+
if isinstance(event, AgentRunStepEndEvent):
|
131
|
+
event_details.update({
|
132
|
+
"step_output": event.step_output,
|
133
|
+
})
|
134
|
+
if isinstance(event, AgentChatWithStepStartEvent):
|
135
|
+
event_details.update({
|
136
|
+
"user_msg": event.user_msg,
|
137
|
+
})
|
138
|
+
if isinstance(event, AgentChatWithStepEndEvent):
|
139
|
+
event_details.update({
|
140
|
+
"response": event.response,
|
141
|
+
})
|
142
|
+
if isinstance(event, AgentToolCallEvent):
|
143
|
+
event_details.update({
|
144
|
+
"arguments": event.arguments,
|
145
|
+
"tool_name": event.tool.name,
|
146
|
+
"tool_description": event.tool.description,
|
147
|
+
"tool_openai": event.tool.to_openai_tool(),
|
148
|
+
})
|
149
|
+
if isinstance(event, StreamChatDeltaReceivedEvent):
|
150
|
+
event_details.update({
|
151
|
+
"delta": event.delta,
|
152
|
+
})
|
153
|
+
if isinstance(event, StreamChatErrorEvent):
|
154
|
+
event_details.update({
|
155
|
+
"exception": event.exception,
|
156
|
+
})
|
157
|
+
if isinstance(event, EmbeddingStartEvent):
|
158
|
+
event_details.update({
|
159
|
+
"model_dict": event.model_dict,
|
160
|
+
})
|
161
|
+
if isinstance(event, EmbeddingEndEvent):
|
162
|
+
event_details.update({
|
163
|
+
"chunks": event.chunks,
|
164
|
+
"embeddings": event.embeddings[0][:5],
|
165
|
+
})
|
166
|
+
if isinstance(event, LLMPredictStartEvent):
|
167
|
+
event_details.update({
|
168
|
+
"template": event.template,
|
169
|
+
"template_args": event.template_args,
|
170
|
+
})
|
171
|
+
if isinstance(event, LLMPredictEndEvent):
|
172
|
+
event_details.update({
|
173
|
+
"output": event.output,
|
174
|
+
})
|
175
|
+
if isinstance(event, LLMStructuredPredictStartEvent):
|
176
|
+
event_details.update({
|
177
|
+
"template": event.template,
|
178
|
+
"template_args": event.template_args,
|
179
|
+
"output_cls": event.output_cls,
|
180
|
+
})
|
181
|
+
if isinstance(event, LLMStructuredPredictEndEvent):
|
182
|
+
event_details.update({
|
183
|
+
"output": event.output,
|
184
|
+
})
|
185
|
+
if isinstance(event, LLMCompletionStartEvent):
|
186
|
+
event_details.update({
|
187
|
+
"model_dict": event.model_dict,
|
188
|
+
"prompt": event.prompt,
|
189
|
+
"additional_kwargs": event.additional_kwargs,
|
190
|
+
})
|
191
|
+
if isinstance(event, LLMCompletionEndEvent):
|
192
|
+
event_details.update({
|
193
|
+
"response": event.response,
|
194
|
+
"prompt": event.prompt,
|
195
|
+
})
|
196
|
+
if isinstance(event, LLMChatInProgressEvent):
|
197
|
+
event_details.update({
|
198
|
+
"messages": event.messages,
|
199
|
+
"response": event.response,
|
200
|
+
})
|
201
|
+
if isinstance(event, LLMChatStartEvent):
|
202
|
+
event_details.update({
|
203
|
+
"messages": event.messages,
|
204
|
+
"additional_kwargs": event.additional_kwargs,
|
205
|
+
"model_dict": event.model_dict,
|
206
|
+
})
|
207
|
+
if isinstance(event, LLMChatEndEvent):
|
208
|
+
event_details.update({
|
209
|
+
"messages": event.messages,
|
210
|
+
"response": event.response,
|
211
|
+
})
|
212
|
+
if isinstance(event, RetrievalStartEvent):
|
213
|
+
event_details.update({
|
214
|
+
"str_or_query_bundle": event.str_or_query_bundle,
|
215
|
+
})
|
216
|
+
if isinstance(event, RetrievalEndEvent):
|
217
|
+
event_details.update({
|
218
|
+
"str_or_query_bundle": event.str_or_query_bundle,
|
219
|
+
"nodes": event.nodes,
|
220
|
+
"text": event.nodes[0].text
|
221
|
+
})
|
222
|
+
if isinstance(event, ReRankStartEvent):
|
223
|
+
event_details.update({
|
224
|
+
"query": event.query,
|
225
|
+
"nodes": event.nodes,
|
226
|
+
"top_n": event.top_n,
|
227
|
+
"model_name": event.model_name,
|
228
|
+
})
|
229
|
+
if isinstance(event, ReRankEndEvent):
|
230
|
+
event_details.update({
|
231
|
+
"nodes": event.nodes,
|
232
|
+
})
|
233
|
+
if isinstance(event, QueryStartEvent):
|
234
|
+
event_details.update({
|
235
|
+
"query": event.query,
|
236
|
+
})
|
237
|
+
if isinstance(event, QueryEndEvent):
|
238
|
+
event_details.update({
|
239
|
+
"response": event.response,
|
240
|
+
"query": event.query,
|
241
|
+
})
|
242
|
+
if isinstance(event, SpanDropEvent):
|
243
|
+
event_details.update({
|
244
|
+
"err_str": event.err_str,
|
245
|
+
})
|
246
|
+
if isinstance(event, SynthesizeStartEvent):
|
247
|
+
event_details.update({
|
248
|
+
"query": event.query,
|
249
|
+
})
|
250
|
+
if isinstance(event, SynthesizeEndEvent):
|
251
|
+
event_details.update({
|
252
|
+
"response": event.response,
|
253
|
+
"query": event.query,
|
254
|
+
})
|
255
|
+
if isinstance(event, GetResponseStartEvent):
|
256
|
+
event_details.update({
|
257
|
+
"query_str": event.query_str,
|
258
|
+
})
|
259
|
+
|
260
|
+
# Append event details to current_trace
|
261
|
+
self.current_trace.append(event_details)
|
262
|
+
|
263
|
+
self.events.append(event)
|
264
|
+
|
265
|
+
def _get_events_by_span(self) -> Dict[str, List[BaseEvent]]:
|
266
|
+
events_by_span: Dict[str, List[BaseEvent]] = {}
|
267
|
+
for event in self.events:
|
268
|
+
if event.span_id in events_by_span:
|
269
|
+
events_by_span[event.span_id].append(event)
|
270
|
+
else:
|
271
|
+
events_by_span[event.span_id] = [event]
|
272
|
+
return events_by_span
|
273
|
+
|
274
|
+
# def _get_event_span_trees(self) -> List[Tree]:
|
275
|
+
# events_by_span = self._get_events_by_span()
|
276
|
+
|
277
|
+
# trees = []
|
278
|
+
# tree = Tree()
|
279
|
+
|
280
|
+
# for span, sorted_events in events_by_span.items():
|
281
|
+
# # create root node i.e. span node
|
282
|
+
# tree.create_node(
|
283
|
+
# tag=f"{span} (SPAN)",
|
284
|
+
# identifier=span,
|
285
|
+
# parent=None,
|
286
|
+
# data=sorted_events[0].timestamp,
|
287
|
+
# )
|
288
|
+
|
289
|
+
# for event in sorted_events:
|
290
|
+
# tree.create_node(
|
291
|
+
# tag=f"{event.class_name()}: {event.id_}",
|
292
|
+
# identifier=event.id_,
|
293
|
+
# parent=event.span_id,
|
294
|
+
# data=event.timestamp,
|
295
|
+
# )
|
296
|
+
|
297
|
+
# trees.append(tree)
|
298
|
+
# tree = Tree()
|
299
|
+
# return trees
|
300
|
+
|
301
|
+
# def print_event_span_trees(self) -> None:
|
302
|
+
# """Method for viewing trace trees."""
|
303
|
+
# trees = self._get_event_span_trees()
|
304
|
+
# for tree in trees:
|
305
|
+
# print(
|
306
|
+
# tree.show(
|
307
|
+
# stdout=False, sorting=True, key=lambda node: node.data
|
308
|
+
# )
|
309
|
+
# )
|
310
|
+
# print("")
|
311
|
+
|
312
|
+
|
313
|
+
|
314
|
+
class SpanHandler(BaseSpanHandler[SimpleSpan]):
|
315
|
+
# span_dict = {}
|
316
|
+
span_dict: ClassVar[Dict[str, List[SimpleSpan]]] = {}
|
317
|
+
|
318
|
+
@classmethod
|
319
|
+
def class_name(cls) -> str:
|
320
|
+
"""Class name."""
|
321
|
+
return "SpanHandler"
|
322
|
+
|
323
|
+
def new_span(
|
324
|
+
self,
|
325
|
+
id_: str,
|
326
|
+
bound_args: Any,
|
327
|
+
instance: Optional[Any] = None,
|
328
|
+
parent_span_id: Optional[str] = None,
|
329
|
+
tags: Optional[Dict[str, Any]] = None,
|
330
|
+
**kwargs: Any,
|
331
|
+
) -> Optional[SimpleSpan]:
|
332
|
+
"""Create a span."""
|
333
|
+
# logic for creating a new MyCustomSpan
|
334
|
+
if id_ not in self.span_dict:
|
335
|
+
self.span_dict[id_] = []
|
336
|
+
self.span_dict[id_].append(
|
337
|
+
SimpleSpan(id_=id_, parent_id=parent_span_id)
|
338
|
+
)
|
339
|
+
|
340
|
+
def prepare_to_exit_span(
|
341
|
+
self,
|
342
|
+
id_: str,
|
343
|
+
bound_args: Any,
|
344
|
+
instance: Optional[Any] = None,
|
345
|
+
result: Optional[Any] = None,
|
346
|
+
**kwargs: Any,
|
347
|
+
) -> Any:
|
348
|
+
"""Logic for preparing to exit a span."""
|
349
|
+
pass
|
350
|
+
# if id in self.span_dict:
|
351
|
+
# return self.span_dict[id].pop()
|
352
|
+
|
353
|
+
def prepare_to_drop_span(
|
354
|
+
self,
|
355
|
+
id_: str,
|
356
|
+
bound_args: Any,
|
357
|
+
instance: Optional[Any] = None,
|
358
|
+
err: Optional[BaseException] = None,
|
359
|
+
**kwargs: Any,
|
360
|
+
) -> Any:
|
361
|
+
"""Logic for preparing to drop a span."""
|
362
|
+
pass
|
363
|
+
# if id in self.span_dict:
|
364
|
+
# return self.span_dict[id].pop()
|
365
|
+
|
366
|
+
|
367
|
+
|
368
|
+
class LlamaIndexInstrumentationTracer:
|
369
|
+
def __init__(self, user_detail):
|
370
|
+
"""Initialize the LlamaIndexTracer with handlers but don't start tracing yet."""
|
371
|
+
# Initialize the root dispatcher
|
372
|
+
self.root_dispatcher = get_dispatcher()
|
373
|
+
|
374
|
+
# Initialize handlers
|
375
|
+
self.json_event_handler = EventHandler()
|
376
|
+
self.span_handler = SpanHandler()
|
377
|
+
self.simple_span_handler = SimpleSpanHandler()
|
378
|
+
|
379
|
+
self.is_tracing = False # Flag to check if tracing is active
|
380
|
+
|
381
|
+
self.user_detail = user_detail
|
382
|
+
|
383
|
+
def start(self):
|
384
|
+
"""Start tracing by registering handlers."""
|
385
|
+
if self.is_tracing:
|
386
|
+
print("Tracing is already active.")
|
387
|
+
return
|
388
|
+
|
389
|
+
# Register handlers
|
390
|
+
self.root_dispatcher.add_span_handler(self.span_handler)
|
391
|
+
self.root_dispatcher.add_span_handler(self.simple_span_handler)
|
392
|
+
self.root_dispatcher.add_event_handler(self.json_event_handler)
|
393
|
+
|
394
|
+
self.is_tracing = True
|
395
|
+
print("Tracing started.")
|
396
|
+
|
397
|
+
def stop(self):
|
398
|
+
"""Stop tracing by unregistering handlers."""
|
399
|
+
if not self.is_tracing:
|
400
|
+
print("Tracing is not active.")
|
401
|
+
return
|
402
|
+
|
403
|
+
# Write current_trace to a JSON file
|
404
|
+
final_traces = {
|
405
|
+
"project_id": self.user_detail["project_id"],
|
406
|
+
"trace_id": str(uuid.uuid4()),
|
407
|
+
"session_id": None,
|
408
|
+
"trace_type": "llamaindex",
|
409
|
+
"metadata": self.user_detail["trace_user_detail"]["metadata"],
|
410
|
+
"pipeline": self.user_detail["trace_user_detail"]["pipeline"],
|
411
|
+
"traces": self.json_event_handler.current_trace,
|
412
|
+
|
413
|
+
}
|
414
|
+
|
415
|
+
with open('new_llamaindex_traces.json', 'w') as f:
|
416
|
+
json.dump([final_traces], f, default=str, indent=4)
|
417
|
+
|
418
|
+
llamaindex_instrumentation_data = extract_llama_index_data([final_traces])
|
419
|
+
converted_back_to_callback = convert_llamaindex_instrumentation_to_callback(llamaindex_instrumentation_data)
|
420
|
+
|
421
|
+
# Just indicate tracing is stopped
|
422
|
+
self.is_tracing = False
|
423
|
+
print("Tracing stopped.")
|
424
|
+
return converted_back_to_callback
|
@@ -29,6 +29,7 @@ from ragaai_catalyst.tracers.instrumentators import (
|
|
29
29
|
)
|
30
30
|
from ragaai_catalyst.tracers.utils import get_unique_key
|
31
31
|
# from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
32
|
+
from ragaai_catalyst.tracers.llamaindex_instrumentation import LlamaIndexInstrumentationTracer
|
32
33
|
from ragaai_catalyst import RagaAICatalyst
|
33
34
|
from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing, TrackName
|
34
35
|
from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
|
@@ -170,7 +171,6 @@ class Tracer(AgenticTracing):
|
|
170
171
|
elif tracer_type == "llamaindex":
|
171
172
|
self._upload_task = None
|
172
173
|
self.llamaindex_tracer = None
|
173
|
-
from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
174
174
|
|
175
175
|
else:
|
176
176
|
self._upload_task = None
|
@@ -256,8 +256,7 @@ class Tracer(AgenticTracing):
|
|
256
256
|
self.langchain_tracer = LangchainTracer()
|
257
257
|
return self.langchain_tracer.start()
|
258
258
|
elif self.tracer_type == "llamaindex":
|
259
|
-
|
260
|
-
self.llamaindex_tracer = LlamaIndexTracer(self._pass_user_data())
|
259
|
+
self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
|
261
260
|
return self.llamaindex_tracer.start()
|
262
261
|
else:
|
263
262
|
super().start()
|
@@ -286,30 +285,31 @@ class Tracer(AgenticTracing):
|
|
286
285
|
if 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
|
287
286
|
prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
|
288
287
|
completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
|
289
|
-
additional_metadata
|
288
|
+
additional_metadata["cost"] = prompt_cost + completion_cost
|
289
|
+
|
290
|
+
additional_metadata["prompt_tokens"] = float(additional_metadata["tokens"].get("prompt", 0.0))
|
291
|
+
additional_metadata["completion_tokens"] = float(additional_metadata["tokens"].get("completion", 0.0))
|
292
|
+
|
293
|
+
logger.debug("Metadata added successfully")
|
290
294
|
else:
|
291
295
|
logger.warning("Token information missing in additional_metadata")
|
296
|
+
|
297
|
+
if 'cost' in additional_metadata:
|
298
|
+
additional_metadata["cost"] = float(additional_metadata["cost"])
|
299
|
+
else:
|
300
|
+
additional_metadata["cost"] = 0.0
|
301
|
+
logger.warning("Total cost information not available")
|
302
|
+
|
303
|
+
|
292
304
|
except Exception as e:
|
293
305
|
logger.warning(f"Error adding cost: {e}")
|
294
306
|
else:
|
295
307
|
logger.debug("Model name not available in additional_metadata, skipping cost calculation")
|
296
308
|
|
297
|
-
# Safely get total tokens and cost
|
298
|
-
if 'tokens' in additional_metadata and 'total' in additional_metadata['tokens']:
|
299
|
-
additional_metadata["total_tokens"] = float(additional_metadata["tokens"]["total"])
|
300
|
-
else:
|
301
|
-
additional_metadata["total_tokens"] = 0.0
|
302
|
-
logger.warning("Total tokens information not available")
|
303
|
-
|
304
|
-
if 'cost' in additional_metadata and 'total_cost' in additional_metadata['cost']:
|
305
|
-
additional_metadata["total_cost"] = float(additional_metadata["cost"]["total_cost"])
|
306
|
-
else:
|
307
|
-
additional_metadata["total_cost"] = 0.0
|
308
|
-
logger.warning("Total cost information not available")
|
309
309
|
|
310
310
|
# Safely remove tokens and cost dictionaries if they exist
|
311
311
|
additional_metadata.pop("tokens", None)
|
312
|
-
additional_metadata.pop("cost", None)
|
312
|
+
# additional_metadata.pop("cost", None)
|
313
313
|
|
314
314
|
# Safely merge metadata
|
315
315
|
combined_metadata = {}
|
@@ -337,7 +337,8 @@ class Tracer(AgenticTracing):
|
|
337
337
|
else:
|
338
338
|
logger.warning("No valid langchain traces found in final_result")
|
339
339
|
|
340
|
-
additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
|
340
|
+
# additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
|
341
|
+
additional_metadata_dict = additional_metadata if additional_metadata else {}
|
341
342
|
|
342
343
|
UploadTraces(json_file_path=filepath_3,
|
343
344
|
project_name=self.project_name,
|
@@ -345,14 +346,30 @@ class Tracer(AgenticTracing):
|
|
345
346
|
dataset_name=self.dataset_name,
|
346
347
|
user_detail=user_detail,
|
347
348
|
base_url=self.base_url
|
348
|
-
).upload_traces(additional_metadata_keys=
|
349
|
+
).upload_traces(additional_metadata_keys=additional_metadata_dict)
|
349
350
|
|
350
351
|
return
|
351
352
|
|
352
353
|
elif self.tracer_type == "llamaindex":
|
353
354
|
if self.llamaindex_tracer is None:
|
354
355
|
raise ValueError("LlamaIndex tracer was not started")
|
355
|
-
|
356
|
+
|
357
|
+
user_detail = self._pass_user_data()
|
358
|
+
converted_back_to_callback = self.llamaindex_tracer.stop()
|
359
|
+
|
360
|
+
filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
|
361
|
+
with open(filepath_3, 'w') as f:
|
362
|
+
json.dump(converted_back_to_callback, f, default=str, indent=2)
|
363
|
+
|
364
|
+
if converted_back_to_callback:
|
365
|
+
UploadTraces(json_file_path=filepath_3,
|
366
|
+
project_name=self.project_name,
|
367
|
+
project_id=self.project_id,
|
368
|
+
dataset_name=self.dataset_name,
|
369
|
+
user_detail=user_detail,
|
370
|
+
base_url=self.base_url
|
371
|
+
).upload_traces()
|
372
|
+
return
|
356
373
|
else:
|
357
374
|
super().stop()
|
358
375
|
|
@@ -37,7 +37,10 @@ class UploadTraces:
|
|
37
37
|
|
38
38
|
if additional_metadata_keys:
|
39
39
|
for key in additional_metadata_keys:
|
40
|
-
|
40
|
+
if key == "model_name":
|
41
|
+
SCHEMA_MAPPING_NEW['response']["modelName"] = additional_metadata_keys[key]
|
42
|
+
else:
|
43
|
+
SCHEMA_MAPPING_NEW[key] = {"columnType": key, "parentColumn": "response"}
|
41
44
|
|
42
45
|
if additional_pipeline_keys:
|
43
46
|
for key in additional_pipeline_keys:
|
@@ -0,0 +1,69 @@
|
|
1
|
+
def convert_llamaindex_instrumentation_to_callback(data):
|
2
|
+
data = data[0]
|
3
|
+
initial_struc = [{
|
4
|
+
"trace_id": data["trace_id"],
|
5
|
+
"project_id": data["project_id"],
|
6
|
+
"session_id": data["session_id"],
|
7
|
+
"trace_type": data["trace_type"],
|
8
|
+
"metadata" : data["metadata"],
|
9
|
+
"pipeline" : data["pipeline"],
|
10
|
+
"traces" : []
|
11
|
+
}]
|
12
|
+
|
13
|
+
traces_data = []
|
14
|
+
|
15
|
+
prompt = data["data"]["prompt"]
|
16
|
+
response = data["data"]["response"]
|
17
|
+
context = data["data"]["context"]
|
18
|
+
system_prompt = data["data"]["system_prompt"]
|
19
|
+
|
20
|
+
prompt_structured_data = {
|
21
|
+
"event_type": "query",
|
22
|
+
"payload": {
|
23
|
+
"query_str": prompt
|
24
|
+
}
|
25
|
+
}
|
26
|
+
traces_data.append(prompt_structured_data)
|
27
|
+
|
28
|
+
response_structured_data = {
|
29
|
+
"event_type": "llm",
|
30
|
+
"payload": {
|
31
|
+
"response": {
|
32
|
+
"message": {
|
33
|
+
"content": response,
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
traces_data.append(response_structured_data)
|
39
|
+
|
40
|
+
context_structured_data = {
|
41
|
+
"event_type": "retrieve",
|
42
|
+
"payload": {
|
43
|
+
"nodes": [
|
44
|
+
{
|
45
|
+
"node": {
|
46
|
+
"text": context
|
47
|
+
}
|
48
|
+
}
|
49
|
+
]
|
50
|
+
}
|
51
|
+
}
|
52
|
+
traces_data.append(context_structured_data)
|
53
|
+
|
54
|
+
system_prompt_structured_data = {
|
55
|
+
"event_type": "llm",
|
56
|
+
"payload": {
|
57
|
+
"messages": [
|
58
|
+
{
|
59
|
+
"role": "system",
|
60
|
+
"content": system_prompt
|
61
|
+
}
|
62
|
+
]
|
63
|
+
}
|
64
|
+
}
|
65
|
+
traces_data.append(system_prompt_structured_data)
|
66
|
+
|
67
|
+
initial_struc[0]["traces"] = traces_data
|
68
|
+
|
69
|
+
return initial_struc
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import json
|
2
|
+
from typing import Dict, Any, Optional
|
3
|
+
|
4
|
+
|
5
|
+
def extract_llama_index_data(data):
|
6
|
+
"""
|
7
|
+
Transform llama_index trace data into standardized format
|
8
|
+
"""
|
9
|
+
data = data[0]
|
10
|
+
|
11
|
+
# Extract top-level metadata
|
12
|
+
trace_data = {
|
13
|
+
"project_id": data.get("project_id"),
|
14
|
+
"trace_id": data.get("trace_id"),
|
15
|
+
"session_id": data.get("session_id"),
|
16
|
+
"trace_type": data.get("trace_type"),
|
17
|
+
"pipeline": data.get("pipeline"),
|
18
|
+
"metadata":data.get("metadata") ,
|
19
|
+
"prompt_length": 0,
|
20
|
+
"data": {
|
21
|
+
"prompt": None,
|
22
|
+
"context": None,
|
23
|
+
"response": None,
|
24
|
+
"system_prompt": None
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
def get_prompt(data):
|
29
|
+
for span in data:
|
30
|
+
if span["event_type"]=="QueryStartEvent":
|
31
|
+
prompt = span.get("query", "")
|
32
|
+
return prompt
|
33
|
+
if span["event_type"]=="QueryEndEvent":
|
34
|
+
prompt = span.get("query", "")
|
35
|
+
return prompt
|
36
|
+
|
37
|
+
|
38
|
+
def get_context(data):
|
39
|
+
for span in data:
|
40
|
+
if span["event_type"]=="RetrievalEndEvent":
|
41
|
+
context = span.get("text", "")
|
42
|
+
return context
|
43
|
+
|
44
|
+
def get_response(data):
|
45
|
+
for span in data:
|
46
|
+
if span["event_type"]=="QueryEndEvent":
|
47
|
+
response = span.get("response", "")
|
48
|
+
return response
|
49
|
+
# if span["event_type"]=="LLMPredictEndEvent":
|
50
|
+
# response = span.get("output", "")
|
51
|
+
# return response
|
52
|
+
# if span["event_type"]=="SynthesizeEndEvent":
|
53
|
+
# response = span.get("response", "")
|
54
|
+
# return response
|
55
|
+
|
56
|
+
def get_system_prompt(data):
|
57
|
+
for span in data:
|
58
|
+
if span["event_type"]=="LLMChatStartEvent":
|
59
|
+
response = span.get("messages", "")
|
60
|
+
response = response[0]
|
61
|
+
return response
|
62
|
+
|
63
|
+
# Process traces
|
64
|
+
if "traces" in data:
|
65
|
+
prompt = get_prompt(data["traces"])
|
66
|
+
context = get_context(data["traces"])
|
67
|
+
response = get_response(data["traces"])
|
68
|
+
system_prompt = get_system_prompt(data["traces"])
|
69
|
+
|
70
|
+
trace_data["data"]["prompt"] = prompt
|
71
|
+
trace_data["data"]["context"] = context
|
72
|
+
trace_data["data"]["response"] = response
|
73
|
+
trace_data["data"]["system_prompt"] = system_prompt
|
74
|
+
return [trace_data]
|