ragaai-catalyst 2.2.4b5__py3-none-any.whl → 2.2.5b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. ragaai_catalyst/__init__.py +0 -2
  2. ragaai_catalyst/dataset.py +59 -1
  3. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +5 -285
  4. ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +0 -2
  5. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +1 -1
  6. ragaai_catalyst/tracers/exporters/__init__.py +1 -2
  7. ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -1
  8. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +23 -1
  9. ragaai_catalyst/tracers/tracer.py +6 -186
  10. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/METADATA +1 -1
  11. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/RECORD +14 -45
  12. ragaai_catalyst/experiment.py +0 -486
  13. ragaai_catalyst/tracers/agentic_tracing/tests/FinancialAnalysisSystem.ipynb +0 -536
  14. ragaai_catalyst/tracers/agentic_tracing/tests/GameActivityEventPlanner.ipynb +0 -134
  15. ragaai_catalyst/tracers/agentic_tracing/tests/TravelPlanner.ipynb +0 -563
  16. ragaai_catalyst/tracers/agentic_tracing/tests/__init__.py +0 -0
  17. ragaai_catalyst/tracers/agentic_tracing/tests/ai_travel_agent.py +0 -197
  18. ragaai_catalyst/tracers/agentic_tracing/tests/unique_decorator_test.py +0 -172
  19. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +0 -687
  20. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +0 -1319
  21. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +0 -347
  22. ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +0 -1182
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +0 -288
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +0 -557
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +0 -129
  27. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +0 -74
  28. ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +0 -21
  29. ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +0 -32
  30. ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +0 -28
  31. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +0 -133
  32. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +0 -34
  33. ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -467
  34. ragaai_catalyst/tracers/langchain_callback.py +0 -821
  35. ragaai_catalyst/tracers/llamaindex_callback.py +0 -361
  36. ragaai_catalyst/tracers/llamaindex_instrumentation.py +0 -424
  37. ragaai_catalyst/tracers/upload_traces.py +0 -170
  38. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +0 -62
  39. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +0 -69
  40. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +0 -74
  41. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +0 -82
  42. ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +0 -403
  43. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/WHEEL +0 -0
  44. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/licenses/LICENSE +0 -0
  45. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/top_level.txt +0 -0
@@ -1,69 +0,0 @@
1
- def convert_llamaindex_instrumentation_to_callback(data):
2
- data = data[0]
3
- initial_struc = [{
4
- "trace_id": data["trace_id"],
5
- "project_id": data["project_id"],
6
- "session_id": data["session_id"],
7
- "trace_type": data["trace_type"],
8
- "metadata" : data["metadata"],
9
- "pipeline" : data["pipeline"],
10
- "traces" : []
11
- }]
12
-
13
- traces_data = []
14
-
15
- prompt = data["data"]["prompt"]
16
- response = data["data"]["response"]
17
- context = data["data"]["context"]
18
- system_prompt = data["data"]["system_prompt"]
19
-
20
- prompt_structured_data = {
21
- "event_type": "query",
22
- "payload": {
23
- "query_str": prompt
24
- }
25
- }
26
- traces_data.append(prompt_structured_data)
27
-
28
- response_structured_data = {
29
- "event_type": "llm",
30
- "payload": {
31
- "response": {
32
- "message": {
33
- "content": response,
34
- }
35
- }
36
- }
37
- }
38
- traces_data.append(response_structured_data)
39
-
40
- context_structured_data = {
41
- "event_type": "retrieve",
42
- "payload": {
43
- "nodes": [
44
- {
45
- "node": {
46
- "text": context
47
- }
48
- }
49
- ]
50
- }
51
- }
52
- traces_data.append(context_structured_data)
53
-
54
- system_prompt_structured_data = {
55
- "event_type": "llm",
56
- "payload": {
57
- "messages": [
58
- {
59
- "role": "system",
60
- "content": system_prompt
61
- }
62
- ]
63
- }
64
- }
65
- traces_data.append(system_prompt_structured_data)
66
-
67
- initial_struc[0]["traces"] = traces_data
68
-
69
- return initial_struc
@@ -1,74 +0,0 @@
1
- import json
2
- from typing import Dict, Any, Optional
3
-
4
-
5
- def extract_llama_index_data(data):
6
- """
7
- Transform llama_index trace data into standardized format
8
- """
9
- data = data[0]
10
-
11
- # Extract top-level metadata
12
- trace_data = {
13
- "project_id": data.get("project_id"),
14
- "trace_id": data.get("trace_id"),
15
- "session_id": data.get("session_id"),
16
- "trace_type": data.get("trace_type"),
17
- "pipeline": data.get("pipeline"),
18
- "metadata":data.get("metadata") ,
19
- "prompt_length": 0,
20
- "data": {
21
- "prompt": None,
22
- "context": None,
23
- "response": None,
24
- "system_prompt": None
25
- }
26
- }
27
-
28
- def get_prompt(data):
29
- for span in data:
30
- if span["event_type"]=="QueryStartEvent":
31
- prompt = span.get("query", "")
32
- return prompt
33
- if span["event_type"]=="QueryEndEvent":
34
- prompt = span.get("query", "")
35
- return prompt
36
-
37
-
38
- def get_context(data):
39
- for span in data:
40
- if span["event_type"]=="RetrievalEndEvent":
41
- context = span.get("text", "")
42
- return context
43
-
44
- def get_response(data):
45
- for span in data:
46
- if span["event_type"]=="QueryEndEvent":
47
- response = span.get("response", "")
48
- return response
49
- # if span["event_type"]=="LLMPredictEndEvent":
50
- # response = span.get("output", "")
51
- # return response
52
- # if span["event_type"]=="SynthesizeEndEvent":
53
- # response = span.get("response", "")
54
- # return response
55
-
56
- def get_system_prompt(data):
57
- for span in data:
58
- if span["event_type"]=="LLMChatStartEvent":
59
- response = span.get("messages", "")
60
- response = response[0]
61
- return response
62
-
63
- # Process traces
64
- if "traces" in data:
65
- prompt = get_prompt(data["traces"])
66
- context = get_context(data["traces"])
67
- response = get_response(data["traces"])
68
- system_prompt = get_system_prompt(data["traces"])
69
-
70
- trace_data["data"]["prompt"] = prompt
71
- trace_data["data"]["context"] = context
72
- trace_data["data"]["response"] = response
73
- trace_data["data"]["system_prompt"] = system_prompt
74
- return [trace_data]
@@ -1,82 +0,0 @@
1
- import json
2
- import uuid
3
-
4
- def langchain_tracer_extraction(data, user_context=""):
5
- trace_aggregate = {}
6
- import uuid
7
-
8
- def generate_trace_id():
9
- """
10
- Generate a random trace ID using UUID4.
11
- Returns a string representation of the UUID with no hyphens.
12
- """
13
- return '0x'+str(uuid.uuid4()).replace('-', '')
14
-
15
- trace_aggregate["tracer_type"] = "langchain"
16
- trace_aggregate['trace_id'] = generate_trace_id()
17
- trace_aggregate['session_id'] = None
18
- trace_aggregate["pipeline"] = {
19
- 'llm_model': 'gpt-3.5-turbo',
20
- 'vector_store': 'faiss',
21
- 'embed_model': 'text-embedding-ada-002'
22
- }
23
- trace_aggregate["metadata"] = {
24
- 'key1': 'value1',
25
- 'key2': 'value2',
26
- 'log_source': 'langchain_tracer',
27
- 'recorded_on': '2024-06-14 08:57:27.324410'
28
- }
29
- trace_aggregate["prompt_length"] = 0
30
- trace_aggregate["data"] = {}
31
-
32
- def get_prompt(data):
33
- # if "chain_starts" in data and data["chain_starts"] != []:
34
- # for item in data["chain_starts"]:
35
-
36
- if "chat_model_calls" in data and data["chat_model_calls"] != []:
37
- for item in data["chat_model_calls"]:
38
- messages = item["messages"][0]
39
- for message in messages:
40
- if message["type"]=="human":
41
- human_messages = message["content"].strip()
42
- return human_messages
43
- if "llm_calls" in data and data["llm_calls"] != []:
44
- if "llm_start" in data["llm_calls"][0]["event"]:
45
- for item in data["llm_calls"]:
46
- prompt = item["prompts"]
47
- return prompt[0].strip()
48
-
49
- def get_response(data):
50
- for item in data["llm_calls"]:
51
- if item["event"] == "llm_end":
52
- llm_end_responses = item["response"]["generations"][0]
53
- for llm_end_response in llm_end_responses:
54
- response = llm_end_response["text"]
55
- return response.strip()
56
-
57
- def get_context(data, user_context):
58
- if user_context:
59
- return user_context
60
- if "retriever_actions" in data and data["retriever_actions"] != []:
61
- for item in data["retriever_actions"]:
62
- if item["event"] == "retriever_end":
63
- context = item["documents"][0]["page_content"].replace('\n', ' ')
64
- return context
65
- # if "chat_model_calls" in data and data["chat_model_calls"] != []:
66
- # for item in data["chat_model_calls"]:
67
- # messages = item["messages"][0]
68
- # for message in messages:
69
- # if message["type"]=="system":
70
- # content = message["content"].strip().replace('\n', ' ')
71
- # return content
72
-
73
-
74
- prompt = get_prompt(data)
75
- response = get_response(data)
76
- context = get_context(data, user_context)
77
-
78
- trace_aggregate["data"]["prompt"]=prompt
79
- trace_aggregate["data"]["response"]=response
80
- trace_aggregate["data"]["context"]=context
81
-
82
- return trace_aggregate
@@ -1,403 +0,0 @@
1
- import json
2
- from litellm import model_cost
3
- import logging
4
- import os
5
- import re
6
- from datetime import datetime
7
- import tiktoken
8
-
9
- logger = logging.getLogger("RagaAICatalyst")
10
- logging_level = (
11
- logger.setLevel(logging.DEBUG) if os.getenv("DEBUG") == "1" else logging.INFO
12
- )
13
-
14
- def rag_trace_json_converter(input_trace, custom_model_cost, trace_id, user_details, tracer_type, user_context):
15
- trace_aggregate = {}
16
- input_trace = add_span_hash_id(input_trace)
17
- prompt = get_prompt(input_trace, tracer_type)
18
- response = get_response(input_trace, tracer_type)
19
- context = get_context(input_trace, tracer_type, user_context)
20
- error = get_span_errors(input_trace, tracer_type)
21
-
22
- if tracer_type == "langchain":
23
- trace_aggregate["tracer_type"] = "langchain"
24
- elif tracer_type == "llamaindex":
25
- trace_aggregate["tracer_type"] = "llamaindex"
26
-
27
- trace_aggregate['id'] = trace_id
28
- trace_aggregate['trace_name'] = user_details.get("dataset_name", "")
29
- trace_aggregate['project_name'] = user_details.get("project_name", "")
30
- trace_aggregate["start_time"] = input_trace[0].get("start_time", "")
31
- trace_aggregate["end_time"] = input_trace[-1].get("end_time", "")
32
- trace_aggregate["metadata"] = user_details.get("trace_user_detail", {}).get("metadata")
33
- trace_aggregate["pipeline"] = user_details.get("trace_user_detail", {}).get("pipeline")
34
- trace_aggregate["replays"] = {"source": None}
35
-
36
- trace_aggregate["data"] = [{"spans": input_trace, "start_time": trace_aggregate["start_time"], "end_time": trace_aggregate["end_time"]}]
37
- if tracer_type == "langchain":
38
- additional_metadata = get_additional_metadata(input_trace, custom_model_cost, model_cost, prompt, response)
39
-
40
- trace_aggregate["metadata"].update(additional_metadata)
41
- trace_aggregate["metadata"]["error"] = f"{error}"
42
- additional_metadata["error"] = error if error else None
43
-
44
- additional_metadata.pop("total_cost")
45
- additional_metadata.pop("total_latency")
46
- return trace_aggregate, additional_metadata
47
-
48
- def get_additional_metadata(spans, custom_model_cost, model_cost_dict, prompt="", response=""):
49
- additional_metadata = {}
50
- additional_metadata["cost"] = 0.0
51
- additional_metadata["tokens"] = {}
52
- try:
53
- for span in spans:
54
- if span["name"] in ["ChatOpenAI", "ChatAnthropic", "ChatGoogleGenerativeAI", "OpenAI", "ChatOpenAI_LangchainOpenAI", "ChatOpenAI_ChatModels",
55
- "ChatVertexAI", "VertexAI", "ChatLiteLLM", "ChatBedrock", "AzureChatOpenAI", "ChatAnthropicVertex"]:
56
- start_time = datetime.fromisoformat(span.get("start_time", "")[:-1]) # Remove 'Z' and parse
57
- end_time = datetime.fromisoformat(span.get("end_time", "")[:-1]) # Remove 'Z' and parse
58
- additional_metadata["latency"] = (end_time - start_time).total_seconds()
59
- additional_metadata["model_name"] = span["attributes"].get("llm.model_name", "").replace("models/", "")
60
- additional_metadata["model"] = additional_metadata["model_name"]
61
- try:
62
- additional_metadata["tokens"]["prompt"] = span["attributes"]["llm.token_count.prompt"]
63
-
64
- except:
65
- logger.debug("Warning: prompt token not found. using fallback strategies to get tokens.")
66
- try:
67
- additional_metadata["tokens"]["prompt"] = num_tokens_from_messages(
68
- model=additional_metadata["model_name"],
69
- message=prompt
70
- )
71
- except Exception as e:
72
- logger.debug(f"Failed to count prompt tokens: {str(e)}. Using 'gpt-4o-mini' model count as fallback.")
73
- additional_metadata["tokens"]["prompt"] = num_tokens_from_messages(
74
- model="gpt-4o-mini",
75
- message=prompt
76
- )
77
-
78
- try:
79
- additional_metadata["tokens"]["completion"] = span["attributes"]["llm.token_count.completion"]
80
- except:
81
- logger.debug("Warning: completion token not found. using fallback strategies to get tokens.")
82
- try:
83
- additional_metadata["tokens"]["completion"] = num_tokens_from_messages(
84
- model=additional_metadata["model_name"],
85
- message=response
86
- )
87
- except Exception as e:
88
- logger.debug(f"Failed to count completion tokens: {str(e)}. Using 'gpt-4o-mini' model count as fallback.")
89
- additional_metadata["tokens"]["completion"] = num_tokens_from_messages(
90
- model="gpt-4o-mini",
91
- message=response
92
- )
93
-
94
- # Ensure both values are not None before adding
95
- prompt_tokens = additional_metadata["tokens"].get("prompt", 0) or 0
96
- completion_tokens = additional_metadata["tokens"].get("completion", 0) or 0
97
- additional_metadata["tokens"]["total"] = prompt_tokens + completion_tokens
98
-
99
- except Exception as e:
100
- logger.error(f"Error getting additional metadata: {str(e)}")
101
-
102
- try:
103
- if custom_model_cost.get(additional_metadata.get('model_name')):
104
- model_cost_data = custom_model_cost[additional_metadata.get('model_name')]
105
- else:
106
- model_cost_data = model_cost_dict.get(additional_metadata.get('model_name'))
107
-
108
- # Check if model_cost_data is None
109
- if model_cost_data is None:
110
- logger.warning(f"No cost data found for model: {additional_metadata.get('model_name')}")
111
- # Set default values
112
- additional_metadata["cost"] = 0.0
113
- additional_metadata["total_cost"] = 0.0
114
- additional_metadata["total_latency"] = additional_metadata.get("latency", 0)
115
- additional_metadata["prompt_tokens"] = additional_metadata["tokens"].get("prompt", 0) or 0
116
- additional_metadata["completion_tokens"] = additional_metadata["tokens"].get("completion", 0) or 0
117
- elif 'tokens' in additional_metadata and all(k in additional_metadata['tokens'] for k in ['prompt', 'completion']):
118
- # Get input and output costs, defaulting to 0 if not found
119
- input_cost_per_token = model_cost_data.get("input_cost_per_token", 0) or 0
120
- output_cost_per_token = model_cost_data.get("output_cost_per_token", 0) or 0
121
-
122
- # Get token counts, defaulting to 0 if not found
123
- prompt_tokens = additional_metadata["tokens"].get("prompt", 0) or 0
124
- completion_tokens = additional_metadata["tokens"].get("completion", 0) or 0
125
-
126
- # Calculate costs
127
- prompt_cost = prompt_tokens * input_cost_per_token
128
- completion_cost = completion_tokens * output_cost_per_token
129
-
130
- additional_metadata["cost"] = prompt_cost + completion_cost
131
- additional_metadata["total_cost"] = additional_metadata["cost"]
132
- additional_metadata["total_latency"] = additional_metadata.get("latency", 0)
133
- additional_metadata["prompt_tokens"] = prompt_tokens
134
- additional_metadata["completion_tokens"] = completion_tokens
135
- except Exception as e:
136
- logger.warning(f"Error getting model cost data: {str(e)}")
137
- # Set default values in case of error
138
- additional_metadata["cost"] = 0.0
139
- additional_metadata["total_cost"] = 0.0
140
- additional_metadata["total_latency"] = additional_metadata.get("latency", 0)
141
- additional_metadata["prompt_tokens"] = additional_metadata["tokens"].get("prompt", 0) or 0
142
- additional_metadata["completion_tokens"] = additional_metadata["tokens"].get("completion", 0) or 0
143
- try:
144
- additional_metadata.pop("tokens", None)
145
- except Exception as e:
146
- logger.error(f"Error removing tokens from additional metadata: {str(e)}")
147
-
148
- return additional_metadata
149
-
150
- def num_tokens_from_messages(model, message):
151
- try:
152
- if not message:
153
- logger.error("Empty or None message provided to token counter")
154
- return 0
155
-
156
- def num_tokens_from_string(text_content: str, encoding_name: str) -> int:
157
- """Returns the number of tokens in a text string."""
158
- if isinstance(text_content, list):
159
- list_str = str(text_content[0]) if text_content else ""
160
- pattern = r"content=\'(.*?)\'(?:\s+additional_kwargs=|$)"
161
- match = re.search(pattern, list_str, re.DOTALL)
162
- if match:
163
- text_content = match.group(1) # Extract content and process it for tokens
164
- else:
165
- text_content = list_str
166
- try:
167
- encoding = tiktoken.get_encoding(encoding_name)
168
- return len(encoding.encode(text_content))
169
- except Exception as e:
170
- logger.warning(f"Error encoding with {encoding_name}: {str(e)}")
171
- try:
172
- fallback_encoding = tiktoken.get_encoding("cl100k_base")
173
- return len(fallback_encoding.encode(text_content))
174
- except:
175
- logger.debug("Failed to use fallback encoding")
176
- return 0
177
-
178
- # Determine which encoding to use based on model name
179
- encoding_name = "o200k_base"
180
-
181
- if re.match(r'^gpt-', model):
182
- if re.match(r'^gpt-(4o|4\.1).*', model):
183
- # GPT-4o and GPT-4.1 models
184
- encoding_name = "o200k_base"
185
- elif re.match(r'^gpt-(4|3\.5).*', model):
186
- # GPT-4 and GPT-3.5 models
187
- encoding_name = "cl100k_base"
188
- else:
189
- logger.debug(f"Using default token counter for: {model}.")
190
-
191
- return num_tokens_from_string(message, encoding_name)
192
-
193
- except Exception as e:
194
- logger.error(f"Unexpected error in token counting: {str(e)}")
195
- return 0
196
-
197
-
198
- def get_prompt(input_trace, tracer_type):
199
- try:
200
- if tracer_type == "langchain":
201
- for span in input_trace:
202
- try:
203
- attributes = span.get("attributes", {})
204
-
205
- if attributes:
206
- for key, value in attributes.items():
207
- try:
208
- if key.startswith("llm.input_messages.") and key.endswith(".message.role") and value == "user":
209
- message_num = key.split(".")[2]
210
- content_key = f"llm.input_messages.{message_num}.message.content"
211
- if content_key in attributes:
212
- return attributes.get(content_key)
213
- except Exception as e:
214
- logger.warning(f"Error processing attribute key-value pair: {str(e)}")
215
- continue
216
-
217
- for key, value in attributes.items():
218
- try:
219
- if key.startswith("llm.prompts") and isinstance(value, list):
220
- human_message = None
221
- for message in value:
222
- if isinstance(message, str):
223
- human_index = message.find("Human:")
224
- if human_index != -1:
225
- human_message = message[human_index:].replace("Human:", "")
226
- break
227
- return human_message if human_message else value
228
- except Exception as e:
229
- logger.warning(f"Error processing attribute key-value pair for prompt: {str(e)}")
230
- continue
231
- except Exception as e:
232
- logger.warning(f"Error processing span for prompt extraction: {str(e)}")
233
- continue
234
-
235
- for span in input_trace:
236
- try:
237
- if span["name"] == "LLMChain":
238
- try:
239
- input_value = span["attributes"].get("input.value", "{}")
240
- return json.loads(input_value).get("question", "")
241
- except json.JSONDecodeError:
242
- logger.warning(f"Invalid JSON in LLMChain input.value: {input_value}")
243
- continue
244
- elif span["name"] == "RetrievalQA":
245
- return span["attributes"].get("input.value", "")
246
- elif span["name"] == "VectorStoreRetriever":
247
- return span["attributes"].get("input.value", "")
248
- except Exception as e:
249
- logger.warning(f"Error processing span for fallback prompt extraction: {str(e)}")
250
- continue
251
-
252
- logger.warning("No user message found in any span")
253
- logger.warning("Returning empty string for prompt.")
254
- return ""
255
- elif tracer_type == "llamaindex":
256
- for span in input_trace:
257
- if span["name"] == "BaseQueryEngine.query":
258
- return span["attributes"]["input.value"]
259
- elif "query_bundle" in span["attributes"].get("input.value", ""):
260
- try:
261
- query_data = json.loads(span["attributes"]["input.value"])
262
- if "query_bundle" in query_data:
263
- return query_data["query_bundle"]["query_str"]
264
- except json.JSONDecodeError:
265
- logger.error("Failed to parse query_bundle JSON")
266
- logger.error("Prompt not found in the trace")
267
- return None
268
- except Exception as e:
269
- logger.error(f"Error while extracting prompt from trace: {str(e)}")
270
- return None
271
-
272
- def get_response(input_trace, tracer_type):
273
- try:
274
- if tracer_type == "langchain":
275
- for span in input_trace:
276
- try:
277
- attributes = span.get("attributes", {})
278
- if attributes:
279
- for key, value in attributes.items():
280
- try:
281
- if key.startswith("llm.output_messages.") and key.endswith(".message.content"):
282
- return value
283
- except Exception as e:
284
- logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
285
- continue
286
-
287
- for key, value in attributes.items():
288
- try:
289
- if key.startswith("output.value"):
290
- try:
291
- output_json = json.loads(value)
292
- if "generations" in output_json and isinstance(output_json.get("generations"), list) and len(output_json.get("generations")) > 0:
293
- if isinstance(output_json.get("generations")[0], list) and len(output_json.get("generations")[0]) > 0:
294
- first_generation = output_json.get("generations")[0][0]
295
- if "text" in first_generation:
296
- return first_generation["text"]
297
- except json.JSONDecodeError:
298
- logger.warning(f"Invalid JSON in output.value: {value}")
299
- continue
300
- except Exception as e:
301
- logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
302
- continue
303
- except Exception as e:
304
- logger.warning(f"Error processing span for response extraction: {str(e)}")
305
- continue
306
-
307
- for span in input_trace:
308
- try:
309
- if span["name"] == "LLMChain":
310
- try:
311
- output_value = span["attributes"].get("output.value", "")
312
- if output_value:
313
- return json.loads(output_value)
314
- return ""
315
- except json.JSONDecodeError:
316
- logger.warning(f"Invalid JSON in LLMChain output.value: {output_value}")
317
- continue
318
- elif span["name"] == "RetrievalQA":
319
- return span["attributes"].get("output.value", "")
320
- elif span["name"] == "VectorStoreRetriever":
321
- return span["attributes"].get("output.value", "")
322
- except Exception as e:
323
- logger.warning(f"Error processing span for fallback response extraction: {str(e)}")
324
- continue
325
-
326
- logger.warning("No response found in any span")
327
- return ""
328
- elif tracer_type == "llamaindex":
329
- for span in input_trace:
330
- if span["name"] == "BaseQueryEngine.query":
331
- return span["attributes"]["output.value"]
332
- logger.error("Response not found in the trace")
333
- return None
334
- except Exception as e:
335
- logger.error(f"Error while extracting response from trace: {str(e)}")
336
- return None
337
-
338
- def get_context(input_trace, tracer_type, user_context):
339
- try:
340
- if user_context and user_context.strip():
341
- return user_context
342
- elif tracer_type == "langchain":
343
- for span in input_trace:
344
- try:
345
- if span["name"] == "VectorStoreRetriever":
346
- return span["attributes"].get("retrieval.documents.1.document.content", "")
347
- except Exception as e:
348
- logger.warning(f"Error processing span for context extraction: {str(e)}")
349
- continue
350
- elif tracer_type == "llamaindex":
351
- for span in input_trace:
352
- try:
353
- if span["name"] == "BaseRetriever.retrieve":
354
- return span["attributes"]["retrieval.documents.1.document.content"]
355
- except Exception as e:
356
- logger.warning(f"Error processing span for context extraction: {str(e)}")
357
- continue
358
- logger.warning("Context not found in the trace")
359
- return ""
360
- except Exception as e:
361
- logger.error(f"Error while extracting context from trace: {str(e)}")
362
- return ""
363
-
364
- def get_span_errors(input_trace, tracer_type):
365
- try:
366
- if tracer_type == "langchain":
367
- span_errors = {}
368
- for span in input_trace:
369
- try:
370
- if "status" in span.keys() and span.get("status", {}).get("status_code", "").lower() == "error":
371
- span_errors[f"{span['name']}"] = span["status"]
372
- except:
373
- logger.error(f"Error fetching status from span")
374
- return span_errors
375
- except:
376
- logger.error(f"Error in get_span_errors")
377
- return None
378
-
379
- def add_span_hash_id(input_trace):
380
- """
381
- Add hash IDs to spans and track name occurrences.
382
-
383
- Args:
384
- input_trace (dict): The input trace containing spans
385
-
386
- Returns:
387
- dict: Modified trace with hash IDs and name occurrences added to spans
388
- """
389
- import uuid
390
- from collections import defaultdict
391
-
392
- name_counts = defaultdict(int)
393
-
394
- for span in input_trace:
395
- if "name" in span:
396
- # Add hash ID
397
- span["hash_id"] = str(uuid.uuid4())
398
-
399
- # Track and update name occurrences
400
- span["name_occurrences"] = name_counts[span["name"]]
401
- name_counts[span["name"]] += 1
402
-
403
- return input_trace