ragaai-catalyst 2.1.5b25__py3-none-any.whl → 2.1.5b27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,424 @@
1
+ from configparser import InterpolationMissingOptionError
2
+ import json
3
+ from datetime import datetime
4
+ from typing import Any, Optional, Dict, List, ClassVar
5
+ from pydantic import Field
6
+ # from treelib import Tree
7
+
8
+ from llama_index.core.instrumentation.span import SimpleSpan
9
+ from llama_index.core.instrumentation.span_handlers.base import BaseSpanHandler
10
+ from llama_index.core.instrumentation.events import BaseEvent
11
+ from llama_index.core.instrumentation.event_handlers import BaseEventHandler
12
+ from llama_index.core.instrumentation import get_dispatcher
13
+ from llama_index.core.instrumentation.span_handlers import SimpleSpanHandler
14
+
15
+ from llama_index.core.instrumentation.events.agent import (
16
+ AgentChatWithStepStartEvent,
17
+ AgentChatWithStepEndEvent,
18
+ AgentRunStepStartEvent,
19
+ AgentRunStepEndEvent,
20
+ AgentToolCallEvent,
21
+ )
22
+ from llama_index.core.instrumentation.events.chat_engine import (
23
+ StreamChatErrorEvent,
24
+ StreamChatDeltaReceivedEvent,
25
+ )
26
+ from llama_index.core.instrumentation.events.embedding import (
27
+ EmbeddingStartEvent,
28
+ EmbeddingEndEvent,
29
+ )
30
+ from llama_index.core.instrumentation.events.llm import (
31
+ LLMPredictEndEvent,
32
+ LLMPredictStartEvent,
33
+ LLMStructuredPredictEndEvent,
34
+ LLMStructuredPredictStartEvent,
35
+ LLMCompletionEndEvent,
36
+ LLMCompletionStartEvent,
37
+ LLMChatEndEvent,
38
+ LLMChatStartEvent,
39
+ LLMChatInProgressEvent,
40
+ )
41
+ from llama_index.core.instrumentation.events.query import (
42
+ QueryStartEvent,
43
+ QueryEndEvent,
44
+ )
45
+ from llama_index.core.instrumentation.events.rerank import (
46
+ ReRankStartEvent,
47
+ ReRankEndEvent,
48
+ )
49
+ from llama_index.core.instrumentation.events.retrieval import (
50
+ RetrievalStartEvent,
51
+ RetrievalEndEvent,
52
+ )
53
+ from llama_index.core.instrumentation.events.span import (
54
+ SpanDropEvent,
55
+ )
56
+ from llama_index.core.instrumentation.events.synthesis import (
57
+ SynthesizeStartEvent,
58
+ SynthesizeEndEvent,
59
+ GetResponseEndEvent,
60
+ GetResponseStartEvent,
61
+ )
62
+
63
+ import uuid
64
+
65
+ from .utils.extraction_logic_llama_index import extract_llama_index_data
66
+ from .utils.convert_llama_instru_callback import convert_llamaindex_instrumentation_to_callback
67
+
68
+ class EventHandler(BaseEventHandler):
69
+ """Example event handler.
70
+
71
+ This event handler is an example of how to create a custom event handler.
72
+
73
+ In general, logged events are treated as single events in a point in time,
74
+ that link to a span. The span is a collection of events that are related to
75
+ a single task. The span is identified by a unique span_id.
76
+
77
+ While events are independent, there is some hierarchy.
78
+ For example, in query_engine.query() call with a reranker attached:
79
+ - QueryStartEvent
80
+ - RetrievalStartEvent
81
+ - EmbeddingStartEvent
82
+ - EmbeddingEndEvent
83
+ - RetrievalEndEvent
84
+ - RerankStartEvent
85
+ - RerankEndEvent
86
+ - SynthesizeStartEvent
87
+ - GetResponseStartEvent
88
+ - LLMPredictStartEvent
89
+ - LLMChatStartEvent
90
+ - LLMChatEndEvent
91
+ - LLMPredictEndEvent
92
+ - GetResponseEndEvent
93
+ - SynthesizeEndEvent
94
+ - QueryEndEvent
95
+ """
96
+
97
+ events: List[BaseEvent] = []
98
+ current_trace: List[Dict[str, Any]] = [] # Store events for the current trace
99
+
100
+
101
+ @classmethod
102
+ def class_name(cls) -> str:
103
+ """Class name."""
104
+ return "EventHandler"
105
+
106
+ def handle(self, event: BaseEvent) -> None:
107
+ """Logic for handling event."""
108
+ # print("-----------------------")
109
+ # # all events have these attributes
110
+ # print(event.id_)
111
+ # print(event.timestamp)
112
+ # print(event.span_id)
113
+
114
+ # Prepare event details dictionary
115
+ event_details = {
116
+ "id": event.id_,
117
+ "timestamp": event.timestamp,
118
+ "span_id": event.span_id,
119
+ "event_type": event.class_name(),
120
+ }
121
+
122
+ # event specific attributes
123
+ # print(f"Event type: {event.class_name()}")
124
+ if isinstance(event, AgentRunStepStartEvent):
125
+ event_details.update({
126
+ "task_id": event.task_id,
127
+ "step": event.step,
128
+ "input": event.input,
129
+ })
130
+ if isinstance(event, AgentRunStepEndEvent):
131
+ event_details.update({
132
+ "step_output": event.step_output,
133
+ })
134
+ if isinstance(event, AgentChatWithStepStartEvent):
135
+ event_details.update({
136
+ "user_msg": event.user_msg,
137
+ })
138
+ if isinstance(event, AgentChatWithStepEndEvent):
139
+ event_details.update({
140
+ "response": event.response,
141
+ })
142
+ if isinstance(event, AgentToolCallEvent):
143
+ event_details.update({
144
+ "arguments": event.arguments,
145
+ "tool_name": event.tool.name,
146
+ "tool_description": event.tool.description,
147
+ "tool_openai": event.tool.to_openai_tool(),
148
+ })
149
+ if isinstance(event, StreamChatDeltaReceivedEvent):
150
+ event_details.update({
151
+ "delta": event.delta,
152
+ })
153
+ if isinstance(event, StreamChatErrorEvent):
154
+ event_details.update({
155
+ "exception": event.exception,
156
+ })
157
+ if isinstance(event, EmbeddingStartEvent):
158
+ event_details.update({
159
+ "model_dict": event.model_dict,
160
+ })
161
+ if isinstance(event, EmbeddingEndEvent):
162
+ event_details.update({
163
+ "chunks": event.chunks,
164
+ "embeddings": event.embeddings[0][:5],
165
+ })
166
+ if isinstance(event, LLMPredictStartEvent):
167
+ event_details.update({
168
+ "template": event.template,
169
+ "template_args": event.template_args,
170
+ })
171
+ if isinstance(event, LLMPredictEndEvent):
172
+ event_details.update({
173
+ "output": event.output,
174
+ })
175
+ if isinstance(event, LLMStructuredPredictStartEvent):
176
+ event_details.update({
177
+ "template": event.template,
178
+ "template_args": event.template_args,
179
+ "output_cls": event.output_cls,
180
+ })
181
+ if isinstance(event, LLMStructuredPredictEndEvent):
182
+ event_details.update({
183
+ "output": event.output,
184
+ })
185
+ if isinstance(event, LLMCompletionStartEvent):
186
+ event_details.update({
187
+ "model_dict": event.model_dict,
188
+ "prompt": event.prompt,
189
+ "additional_kwargs": event.additional_kwargs,
190
+ })
191
+ if isinstance(event, LLMCompletionEndEvent):
192
+ event_details.update({
193
+ "response": event.response,
194
+ "prompt": event.prompt,
195
+ })
196
+ if isinstance(event, LLMChatInProgressEvent):
197
+ event_details.update({
198
+ "messages": event.messages,
199
+ "response": event.response,
200
+ })
201
+ if isinstance(event, LLMChatStartEvent):
202
+ event_details.update({
203
+ "messages": event.messages,
204
+ "additional_kwargs": event.additional_kwargs,
205
+ "model_dict": event.model_dict,
206
+ })
207
+ if isinstance(event, LLMChatEndEvent):
208
+ event_details.update({
209
+ "messages": event.messages,
210
+ "response": event.response,
211
+ })
212
+ if isinstance(event, RetrievalStartEvent):
213
+ event_details.update({
214
+ "str_or_query_bundle": event.str_or_query_bundle,
215
+ })
216
+ if isinstance(event, RetrievalEndEvent):
217
+ event_details.update({
218
+ "str_or_query_bundle": event.str_or_query_bundle,
219
+ "nodes": event.nodes,
220
+ "text": event.nodes[0].text
221
+ })
222
+ if isinstance(event, ReRankStartEvent):
223
+ event_details.update({
224
+ "query": event.query,
225
+ "nodes": event.nodes,
226
+ "top_n": event.top_n,
227
+ "model_name": event.model_name,
228
+ })
229
+ if isinstance(event, ReRankEndEvent):
230
+ event_details.update({
231
+ "nodes": event.nodes,
232
+ })
233
+ if isinstance(event, QueryStartEvent):
234
+ event_details.update({
235
+ "query": event.query,
236
+ })
237
+ if isinstance(event, QueryEndEvent):
238
+ event_details.update({
239
+ "response": event.response,
240
+ "query": event.query,
241
+ })
242
+ if isinstance(event, SpanDropEvent):
243
+ event_details.update({
244
+ "err_str": event.err_str,
245
+ })
246
+ if isinstance(event, SynthesizeStartEvent):
247
+ event_details.update({
248
+ "query": event.query,
249
+ })
250
+ if isinstance(event, SynthesizeEndEvent):
251
+ event_details.update({
252
+ "response": event.response,
253
+ "query": event.query,
254
+ })
255
+ if isinstance(event, GetResponseStartEvent):
256
+ event_details.update({
257
+ "query_str": event.query_str,
258
+ })
259
+
260
+ # Append event details to current_trace
261
+ self.current_trace.append(event_details)
262
+
263
+ self.events.append(event)
264
+
265
+ def _get_events_by_span(self) -> Dict[str, List[BaseEvent]]:
266
+ events_by_span: Dict[str, List[BaseEvent]] = {}
267
+ for event in self.events:
268
+ if event.span_id in events_by_span:
269
+ events_by_span[event.span_id].append(event)
270
+ else:
271
+ events_by_span[event.span_id] = [event]
272
+ return events_by_span
273
+
274
+ # def _get_event_span_trees(self) -> List[Tree]:
275
+ # events_by_span = self._get_events_by_span()
276
+
277
+ # trees = []
278
+ # tree = Tree()
279
+
280
+ # for span, sorted_events in events_by_span.items():
281
+ # # create root node i.e. span node
282
+ # tree.create_node(
283
+ # tag=f"{span} (SPAN)",
284
+ # identifier=span,
285
+ # parent=None,
286
+ # data=sorted_events[0].timestamp,
287
+ # )
288
+
289
+ # for event in sorted_events:
290
+ # tree.create_node(
291
+ # tag=f"{event.class_name()}: {event.id_}",
292
+ # identifier=event.id_,
293
+ # parent=event.span_id,
294
+ # data=event.timestamp,
295
+ # )
296
+
297
+ # trees.append(tree)
298
+ # tree = Tree()
299
+ # return trees
300
+
301
+ # def print_event_span_trees(self) -> None:
302
+ # """Method for viewing trace trees."""
303
+ # trees = self._get_event_span_trees()
304
+ # for tree in trees:
305
+ # print(
306
+ # tree.show(
307
+ # stdout=False, sorting=True, key=lambda node: node.data
308
+ # )
309
+ # )
310
+ # print("")
311
+
312
+
313
+
314
+ class SpanHandler(BaseSpanHandler[SimpleSpan]):
315
+ # span_dict = {}
316
+ span_dict: ClassVar[Dict[str, List[SimpleSpan]]] = {}
317
+
318
+ @classmethod
319
+ def class_name(cls) -> str:
320
+ """Class name."""
321
+ return "SpanHandler"
322
+
323
+ def new_span(
324
+ self,
325
+ id_: str,
326
+ bound_args: Any,
327
+ instance: Optional[Any] = None,
328
+ parent_span_id: Optional[str] = None,
329
+ tags: Optional[Dict[str, Any]] = None,
330
+ **kwargs: Any,
331
+ ) -> Optional[SimpleSpan]:
332
+ """Create a span."""
333
+ # logic for creating a new MyCustomSpan
334
+ if id_ not in self.span_dict:
335
+ self.span_dict[id_] = []
336
+ self.span_dict[id_].append(
337
+ SimpleSpan(id_=id_, parent_id=parent_span_id)
338
+ )
339
+
340
+ def prepare_to_exit_span(
341
+ self,
342
+ id_: str,
343
+ bound_args: Any,
344
+ instance: Optional[Any] = None,
345
+ result: Optional[Any] = None,
346
+ **kwargs: Any,
347
+ ) -> Any:
348
+ """Logic for preparing to exit a span."""
349
+ pass
350
+ # if id in self.span_dict:
351
+ # return self.span_dict[id].pop()
352
+
353
+ def prepare_to_drop_span(
354
+ self,
355
+ id_: str,
356
+ bound_args: Any,
357
+ instance: Optional[Any] = None,
358
+ err: Optional[BaseException] = None,
359
+ **kwargs: Any,
360
+ ) -> Any:
361
+ """Logic for preparing to drop a span."""
362
+ pass
363
+ # if id in self.span_dict:
364
+ # return self.span_dict[id].pop()
365
+
366
+
367
+
368
+ class LlamaIndexInstrumentationTracer:
369
+ def __init__(self, user_detail):
370
+ """Initialize the LlamaIndexTracer with handlers but don't start tracing yet."""
371
+ # Initialize the root dispatcher
372
+ self.root_dispatcher = get_dispatcher()
373
+
374
+ # Initialize handlers
375
+ self.json_event_handler = EventHandler()
376
+ self.span_handler = SpanHandler()
377
+ self.simple_span_handler = SimpleSpanHandler()
378
+
379
+ self.is_tracing = False # Flag to check if tracing is active
380
+
381
+ self.user_detail = user_detail
382
+
383
+ def start(self):
384
+ """Start tracing by registering handlers."""
385
+ if self.is_tracing:
386
+ print("Tracing is already active.")
387
+ return
388
+
389
+ # Register handlers
390
+ self.root_dispatcher.add_span_handler(self.span_handler)
391
+ self.root_dispatcher.add_span_handler(self.simple_span_handler)
392
+ self.root_dispatcher.add_event_handler(self.json_event_handler)
393
+
394
+ self.is_tracing = True
395
+ print("Tracing started.")
396
+
397
+ def stop(self):
398
+ """Stop tracing by unregistering handlers."""
399
+ if not self.is_tracing:
400
+ print("Tracing is not active.")
401
+ return
402
+
403
+ # Write current_trace to a JSON file
404
+ final_traces = {
405
+ "project_id": self.user_detail["project_id"],
406
+ "trace_id": str(uuid.uuid4()),
407
+ "session_id": None,
408
+ "trace_type": "llamaindex",
409
+ "metadata": self.user_detail["trace_user_detail"]["metadata"],
410
+ "pipeline": self.user_detail["trace_user_detail"]["pipeline"],
411
+ "traces": self.json_event_handler.current_trace,
412
+
413
+ }
414
+
415
+ with open('new_llamaindex_traces.json', 'w') as f:
416
+ json.dump([final_traces], f, default=str, indent=4)
417
+
418
+ llamaindex_instrumentation_data = extract_llama_index_data([final_traces])
419
+ converted_back_to_callback = convert_llamaindex_instrumentation_to_callback(llamaindex_instrumentation_data)
420
+
421
+ # Just indicate tracing is stopped
422
+ self.is_tracing = False
423
+ print("Tracing stopped.")
424
+ return converted_back_to_callback
@@ -29,6 +29,7 @@ from ragaai_catalyst.tracers.instrumentators import (
29
29
  )
30
30
  from ragaai_catalyst.tracers.utils import get_unique_key
31
31
  # from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
32
+ from ragaai_catalyst.tracers.llamaindex_instrumentation import LlamaIndexInstrumentationTracer
32
33
  from ragaai_catalyst import RagaAICatalyst
33
34
  from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing, TrackName
34
35
  from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
@@ -170,7 +171,6 @@ class Tracer(AgenticTracing):
170
171
  elif tracer_type == "llamaindex":
171
172
  self._upload_task = None
172
173
  self.llamaindex_tracer = None
173
- from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
174
174
 
175
175
  else:
176
176
  self._upload_task = None
@@ -256,8 +256,7 @@ class Tracer(AgenticTracing):
256
256
  self.langchain_tracer = LangchainTracer()
257
257
  return self.langchain_tracer.start()
258
258
  elif self.tracer_type == "llamaindex":
259
- from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
260
- self.llamaindex_tracer = LlamaIndexTracer(self._pass_user_data())
259
+ self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
261
260
  return self.llamaindex_tracer.start()
262
261
  else:
263
262
  super().start()
@@ -286,30 +285,31 @@ class Tracer(AgenticTracing):
286
285
  if 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
287
286
  prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
288
287
  completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
289
- additional_metadata.setdefault('cost', {})["total_cost"] = prompt_cost + completion_cost
288
+ additional_metadata["cost"] = prompt_cost + completion_cost
289
+
290
+ additional_metadata["prompt_tokens"] = float(additional_metadata["tokens"].get("prompt", 0.0))
291
+ additional_metadata["completion_tokens"] = float(additional_metadata["tokens"].get("completion", 0.0))
292
+
293
+ logger.debug("Metadata added successfully")
290
294
  else:
291
295
  logger.warning("Token information missing in additional_metadata")
296
+
297
+ if 'cost' in additional_metadata:
298
+ additional_metadata["cost"] = float(additional_metadata["cost"])
299
+ else:
300
+ additional_metadata["cost"] = 0.0
301
+ logger.warning("Total cost information not available")
302
+
303
+
292
304
  except Exception as e:
293
305
  logger.warning(f"Error adding cost: {e}")
294
306
  else:
295
307
  logger.debug("Model name not available in additional_metadata, skipping cost calculation")
296
308
 
297
- # Safely get total tokens and cost
298
- if 'tokens' in additional_metadata and 'total' in additional_metadata['tokens']:
299
- additional_metadata["total_tokens"] = float(additional_metadata["tokens"]["total"])
300
- else:
301
- additional_metadata["total_tokens"] = 0.0
302
- logger.warning("Total tokens information not available")
303
-
304
- if 'cost' in additional_metadata and 'total_cost' in additional_metadata['cost']:
305
- additional_metadata["total_cost"] = float(additional_metadata["cost"]["total_cost"])
306
- else:
307
- additional_metadata["total_cost"] = 0.0
308
- logger.warning("Total cost information not available")
309
309
 
310
310
  # Safely remove tokens and cost dictionaries if they exist
311
311
  additional_metadata.pop("tokens", None)
312
- additional_metadata.pop("cost", None)
312
+ # additional_metadata.pop("cost", None)
313
313
 
314
314
  # Safely merge metadata
315
315
  combined_metadata = {}
@@ -337,7 +337,8 @@ class Tracer(AgenticTracing):
337
337
  else:
338
338
  logger.warning("No valid langchain traces found in final_result")
339
339
 
340
- additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
340
+ # additional_metadata_keys = list(additional_metadata.keys()) if additional_metadata else None
341
+ additional_metadata_dict = additional_metadata if additional_metadata else {}
341
342
 
342
343
  UploadTraces(json_file_path=filepath_3,
343
344
  project_name=self.project_name,
@@ -345,14 +346,30 @@ class Tracer(AgenticTracing):
345
346
  dataset_name=self.dataset_name,
346
347
  user_detail=user_detail,
347
348
  base_url=self.base_url
348
- ).upload_traces(additional_metadata_keys=additional_metadata_keys)
349
+ ).upload_traces(additional_metadata_keys=additional_metadata_dict)
349
350
 
350
351
  return
351
352
 
352
353
  elif self.tracer_type == "llamaindex":
353
354
  if self.llamaindex_tracer is None:
354
355
  raise ValueError("LlamaIndex tracer was not started")
355
- return self.llamaindex_tracer.stop()
356
+
357
+ user_detail = self._pass_user_data()
358
+ converted_back_to_callback = self.llamaindex_tracer.stop()
359
+
360
+ filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
361
+ with open(filepath_3, 'w') as f:
362
+ json.dump(converted_back_to_callback, f, default=str, indent=2)
363
+
364
+ if converted_back_to_callback:
365
+ UploadTraces(json_file_path=filepath_3,
366
+ project_name=self.project_name,
367
+ project_id=self.project_id,
368
+ dataset_name=self.dataset_name,
369
+ user_detail=user_detail,
370
+ base_url=self.base_url
371
+ ).upload_traces()
372
+ return
356
373
  else:
357
374
  super().stop()
358
375
 
@@ -37,7 +37,10 @@ class UploadTraces:
37
37
 
38
38
  if additional_metadata_keys:
39
39
  for key in additional_metadata_keys:
40
- SCHEMA_MAPPING_NEW[key] = {"columnType": "metadata"}
40
+ if key == "model_name":
41
+ SCHEMA_MAPPING_NEW['response']["modelName"] = additional_metadata_keys[key]
42
+ else:
43
+ SCHEMA_MAPPING_NEW[key] = {"columnType": key, "parentColumn": "response"}
41
44
 
42
45
  if additional_pipeline_keys:
43
46
  for key in additional_pipeline_keys:
@@ -0,0 +1,69 @@
1
+ def convert_llamaindex_instrumentation_to_callback(data):
2
+ data = data[0]
3
+ initial_struc = [{
4
+ "trace_id": data["trace_id"],
5
+ "project_id": data["project_id"],
6
+ "session_id": data["session_id"],
7
+ "trace_type": data["trace_type"],
8
+ "metadata" : data["metadata"],
9
+ "pipeline" : data["pipeline"],
10
+ "traces" : []
11
+ }]
12
+
13
+ traces_data = []
14
+
15
+ prompt = data["data"]["prompt"]
16
+ response = data["data"]["response"]
17
+ context = data["data"]["context"]
18
+ system_prompt = data["data"]["system_prompt"]
19
+
20
+ prompt_structured_data = {
21
+ "event_type": "query",
22
+ "payload": {
23
+ "query_str": prompt
24
+ }
25
+ }
26
+ traces_data.append(prompt_structured_data)
27
+
28
+ response_structured_data = {
29
+ "event_type": "llm",
30
+ "payload": {
31
+ "response": {
32
+ "message": {
33
+ "content": response,
34
+ }
35
+ }
36
+ }
37
+ }
38
+ traces_data.append(response_structured_data)
39
+
40
+ context_structured_data = {
41
+ "event_type": "retrieve",
42
+ "payload": {
43
+ "nodes": [
44
+ {
45
+ "node": {
46
+ "text": context
47
+ }
48
+ }
49
+ ]
50
+ }
51
+ }
52
+ traces_data.append(context_structured_data)
53
+
54
+ system_prompt_structured_data = {
55
+ "event_type": "llm",
56
+ "payload": {
57
+ "messages": [
58
+ {
59
+ "role": "system",
60
+ "content": system_prompt
61
+ }
62
+ ]
63
+ }
64
+ }
65
+ traces_data.append(system_prompt_structured_data)
66
+
67
+ initial_struc[0]["traces"] = traces_data
68
+
69
+ return initial_struc
@@ -0,0 +1,74 @@
1
+ import json
2
+ from typing import Dict, Any, Optional
3
+
4
+
5
+ def extract_llama_index_data(data):
6
+ """
7
+ Transform llama_index trace data into standardized format
8
+ """
9
+ data = data[0]
10
+
11
+ # Extract top-level metadata
12
+ trace_data = {
13
+ "project_id": data.get("project_id"),
14
+ "trace_id": data.get("trace_id"),
15
+ "session_id": data.get("session_id"),
16
+ "trace_type": data.get("trace_type"),
17
+ "pipeline": data.get("pipeline"),
18
+ "metadata":data.get("metadata") ,
19
+ "prompt_length": 0,
20
+ "data": {
21
+ "prompt": None,
22
+ "context": None,
23
+ "response": None,
24
+ "system_prompt": None
25
+ }
26
+ }
27
+
28
+ def get_prompt(data):
29
+ for span in data:
30
+ if span["event_type"]=="QueryStartEvent":
31
+ prompt = span.get("query", "")
32
+ return prompt
33
+ if span["event_type"]=="QueryEndEvent":
34
+ prompt = span.get("query", "")
35
+ return prompt
36
+
37
+
38
+ def get_context(data):
39
+ for span in data:
40
+ if span["event_type"]=="RetrievalEndEvent":
41
+ context = span.get("text", "")
42
+ return context
43
+
44
+ def get_response(data):
45
+ for span in data:
46
+ if span["event_type"]=="QueryEndEvent":
47
+ response = span.get("response", "")
48
+ return response
49
+ # if span["event_type"]=="LLMPredictEndEvent":
50
+ # response = span.get("output", "")
51
+ # return response
52
+ # if span["event_type"]=="SynthesizeEndEvent":
53
+ # response = span.get("response", "")
54
+ # return response
55
+
56
+ def get_system_prompt(data):
57
+ for span in data:
58
+ if span["event_type"]=="LLMChatStartEvent":
59
+ response = span.get("messages", "")
60
+ response = response[0]
61
+ return response
62
+
63
+ # Process traces
64
+ if "traces" in data:
65
+ prompt = get_prompt(data["traces"])
66
+ context = get_context(data["traces"])
67
+ response = get_response(data["traces"])
68
+ system_prompt = get_system_prompt(data["traces"])
69
+
70
+ trace_data["data"]["prompt"] = prompt
71
+ trace_data["data"]["context"] = context
72
+ trace_data["data"]["response"] = response
73
+ trace_data["data"]["system_prompt"] = system_prompt
74
+ return [trace_data]