ragaai-catalyst 2.2.4b5__py3-none-any.whl → 2.2.5b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +0 -2
- ragaai_catalyst/dataset.py +59 -1
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +5 -285
- ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +0 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +1 -1
- ragaai_catalyst/tracers/exporters/__init__.py +1 -2
- ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -1
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +23 -1
- ragaai_catalyst/tracers/tracer.py +6 -186
- {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/METADATA +1 -1
- {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/RECORD +14 -45
- ragaai_catalyst/experiment.py +0 -486
- ragaai_catalyst/tracers/agentic_tracing/tests/FinancialAnalysisSystem.ipynb +0 -536
- ragaai_catalyst/tracers/agentic_tracing/tests/GameActivityEventPlanner.ipynb +0 -134
- ragaai_catalyst/tracers/agentic_tracing/tests/TravelPlanner.ipynb +0 -563
- ragaai_catalyst/tracers/agentic_tracing/tests/__init__.py +0 -0
- ragaai_catalyst/tracers/agentic_tracing/tests/ai_travel_agent.py +0 -197
- ragaai_catalyst/tracers/agentic_tracing/tests/unique_decorator_test.py +0 -172
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +0 -687
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +0 -1319
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +0 -347
- ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +0 -1182
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +0 -288
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +0 -557
- ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +0 -129
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +0 -74
- ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +0 -21
- ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +0 -28
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +0 -133
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +0 -34
- ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -467
- ragaai_catalyst/tracers/langchain_callback.py +0 -821
- ragaai_catalyst/tracers/llamaindex_callback.py +0 -361
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +0 -424
- ragaai_catalyst/tracers/upload_traces.py +0 -170
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +0 -62
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +0 -69
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +0 -74
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +0 -82
- ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +0 -403
- {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/licenses/LICENSE +0 -0
- {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/top_level.txt +0 -0
@@ -1,361 +0,0 @@
|
|
1
|
-
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
|
2
|
-
from llama_index.core import Settings
|
3
|
-
from typing import List, Dict, Any, Optional
|
4
|
-
from datetime import datetime
|
5
|
-
from enum import Enum
|
6
|
-
import json
|
7
|
-
import uuid
|
8
|
-
import os
|
9
|
-
import requests
|
10
|
-
import tempfile
|
11
|
-
|
12
|
-
from ..ragaai_catalyst import RagaAICatalyst
|
13
|
-
|
14
|
-
class CustomEncoder(json.JSONEncoder):
|
15
|
-
def default(self, obj):
|
16
|
-
if isinstance(obj, Enum):
|
17
|
-
return obj.value
|
18
|
-
elif hasattr(obj, "__dict__"):
|
19
|
-
return obj.__dict__
|
20
|
-
return str(obj)
|
21
|
-
|
22
|
-
|
23
|
-
class LlamaIndexTracer:
|
24
|
-
def __init__(self, user_detail):
|
25
|
-
self.trace_handler = None
|
26
|
-
self.callback_manager = (
|
27
|
-
CallbackManager()
|
28
|
-
) # Ensure callback manager is initialized
|
29
|
-
self._original_inits = {} # Store original __init__ methods
|
30
|
-
self.project_name = user_detail["project_name"]
|
31
|
-
self.project_id = user_detail["project_id"]
|
32
|
-
self.dataset_name = user_detail["dataset_name"]
|
33
|
-
self.user_detail = user_detail["trace_user_detail"]
|
34
|
-
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
35
|
-
self.timeout = 10
|
36
|
-
self.query_count = 0
|
37
|
-
self._upload_task = None
|
38
|
-
|
39
|
-
def start(self):
|
40
|
-
"""Start tracing - call this before your LlamaIndex operations"""
|
41
|
-
outer_self = self # Capture outer self reference for inner class
|
42
|
-
|
43
|
-
class CustomTraceHandler(LlamaDebugHandler):
|
44
|
-
def __init__(self):
|
45
|
-
super().__init__()
|
46
|
-
self.traces: List[Dict[str, Any]] = []
|
47
|
-
self.current_query_traces: List[Dict[str, Any]] = []
|
48
|
-
self.in_query = False
|
49
|
-
self.query_event_id = None
|
50
|
-
|
51
|
-
def on_event_start(
|
52
|
-
self,
|
53
|
-
event_type: Optional[str],
|
54
|
-
payload: Optional[Dict[str, Any]] = None,
|
55
|
-
event_id: str = "",
|
56
|
-
parent_id: str = "",
|
57
|
-
**kwargs: Any
|
58
|
-
) -> None:
|
59
|
-
trace = {
|
60
|
-
"event_type": event_type,
|
61
|
-
"timestamp": datetime.now().astimezone().isoformat(),
|
62
|
-
"payload": payload,
|
63
|
-
"status": "started",
|
64
|
-
"event_id": event_id,
|
65
|
-
"parent_id": parent_id,
|
66
|
-
}
|
67
|
-
if event_type == "query":
|
68
|
-
self.in_query = True
|
69
|
-
self.query_event_id = event_id
|
70
|
-
self.current_query_traces = []
|
71
|
-
|
72
|
-
if self.in_query:
|
73
|
-
self.current_query_traces.append(trace)
|
74
|
-
self.traces.append(trace)
|
75
|
-
|
76
|
-
def on_event_end(
|
77
|
-
self,
|
78
|
-
event_type: Optional[str],
|
79
|
-
payload: Optional[Dict[str, Any]] = None,
|
80
|
-
event_id: str = "",
|
81
|
-
**kwargs: Any
|
82
|
-
) -> None:
|
83
|
-
trace = {
|
84
|
-
"event_type": event_type,
|
85
|
-
"timestamp": datetime.now().astimezone().isoformat(),
|
86
|
-
"payload": payload,
|
87
|
-
"status": "completed",
|
88
|
-
"event_id": event_id,
|
89
|
-
}
|
90
|
-
if self.in_query:
|
91
|
-
self.current_query_traces.append(trace)
|
92
|
-
self.traces.append(trace)
|
93
|
-
|
94
|
-
# If this is the end of a query event, automatically save the traces
|
95
|
-
if event_type == "query" and event_id == self.query_event_id:
|
96
|
-
self.in_query = False
|
97
|
-
outer_self._save_current_query_traces(self.current_query_traces)
|
98
|
-
self.current_query_traces = []
|
99
|
-
|
100
|
-
|
101
|
-
self.trace_handler = CustomTraceHandler()
|
102
|
-
self.callback_manager.add_handler(self.trace_handler)
|
103
|
-
Settings.callback_manager = self.callback_manager
|
104
|
-
|
105
|
-
|
106
|
-
# Monkey-patch LlamaIndex components
|
107
|
-
self._monkey_patch()
|
108
|
-
return self # Return self to allow method chaining
|
109
|
-
|
110
|
-
|
111
|
-
def _save_current_query_traces(self, query_traces):
|
112
|
-
"""Save traces for the current query"""
|
113
|
-
self.query_count += 1
|
114
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
115
|
-
filename = f"trace_query_{self.query_count}_{timestamp}.json"
|
116
|
-
|
117
|
-
traces = self._add_traces_in_data(query_traces)
|
118
|
-
|
119
|
-
# Write the tracer json files to a temporary directory
|
120
|
-
temp_dir = tempfile.gettempdir()
|
121
|
-
temp_file_path = f"{temp_dir}/{filename}"
|
122
|
-
|
123
|
-
with open(temp_file_path, "w") as f:
|
124
|
-
json.dump([traces], f, indent=2, cls=CustomEncoder)
|
125
|
-
# print(f"Query traces saved to {temp_file_path}")
|
126
|
-
|
127
|
-
# Upload the traces
|
128
|
-
self._create_dataset_schema_with_trace()
|
129
|
-
presignedUrl = self._get_presigned_url()
|
130
|
-
self._put_presigned_url(presignedUrl, temp_file_path)
|
131
|
-
self._insert_traces(presignedUrl)
|
132
|
-
# print(f"Query {self.query_count} traces uploaded")
|
133
|
-
|
134
|
-
|
135
|
-
def _monkey_patch(self):
|
136
|
-
"""Monkey-patch LlamaIndex components to automatically include the callback manager"""
|
137
|
-
from llama_index.core import VectorStoreIndex, ServiceContext
|
138
|
-
from llama_index.llms.openai import OpenAI
|
139
|
-
|
140
|
-
# Import any other classes you need to patch here
|
141
|
-
|
142
|
-
def make_new_init(original_init, callback_manager):
|
143
|
-
def new_init(self, *args, **kwargs):
|
144
|
-
# If 'callback_manager' is not provided, inject our tracer's callback manager
|
145
|
-
if "callback_manager" not in kwargs:
|
146
|
-
kwargs["callback_manager"] = callback_manager
|
147
|
-
original_init(self, *args, **kwargs)
|
148
|
-
|
149
|
-
return new_init
|
150
|
-
|
151
|
-
# Monkey-patch VectorStoreIndex
|
152
|
-
self._original_inits["VectorStoreIndex"] = VectorStoreIndex.__init__
|
153
|
-
VectorStoreIndex.__init__ = make_new_init(
|
154
|
-
VectorStoreIndex.__init__, self.callback_manager
|
155
|
-
)
|
156
|
-
|
157
|
-
# Monkey-patch OpenAI LLM
|
158
|
-
self._original_inits["OpenAI"] = OpenAI.__init__
|
159
|
-
OpenAI.__init__ = make_new_init(OpenAI.__init__, self.callback_manager)
|
160
|
-
|
161
|
-
# Monkey-patch ServiceContext
|
162
|
-
self._original_inits["ServiceContext"] = ServiceContext.__init__
|
163
|
-
ServiceContext.__init__ = make_new_init(
|
164
|
-
ServiceContext.__init__, self.callback_manager
|
165
|
-
)
|
166
|
-
|
167
|
-
# To monkey-patch additional classes:
|
168
|
-
# 1. Import the class you want to patch
|
169
|
-
# from llama_index.some_module import SomeOtherClass
|
170
|
-
|
171
|
-
# 2. Store the original __init__ method
|
172
|
-
# self._original_inits['SomeOtherClass'] = SomeOtherClass.__init__
|
173
|
-
|
174
|
-
# 3. Replace the __init__ method with the new one that injects the callback manager
|
175
|
-
# SomeOtherClass.__init__ = make_new_init(SomeOtherClass.__init__, self.callback_manager)
|
176
|
-
|
177
|
-
# Repeat steps 1-3 for each additional class you wish to monkey-patch
|
178
|
-
|
179
|
-
def stop(self):
|
180
|
-
"""Stop tracing and restore original methods"""
|
181
|
-
# self._upload_traces(save_json_to_pwd=True)
|
182
|
-
self.callback_manager.remove_handler(self.trace_handler)
|
183
|
-
self._restore_original_inits()
|
184
|
-
print("Traces uploaded")
|
185
|
-
self._upload_task = True
|
186
|
-
|
187
|
-
def _restore_original_inits(self):
|
188
|
-
"""Restore the original __init__ methods of LlamaIndex components"""
|
189
|
-
from llama_index.core import VectorStoreIndex, ServiceContext
|
190
|
-
from llama_index.llms.openai import OpenAI
|
191
|
-
|
192
|
-
# Import any other classes you patched
|
193
|
-
|
194
|
-
# Restore VectorStoreIndex
|
195
|
-
if "VectorStoreIndex" in self._original_inits:
|
196
|
-
VectorStoreIndex.__init__ = self._original_inits["VectorStoreIndex"]
|
197
|
-
|
198
|
-
# Restore OpenAI
|
199
|
-
if "OpenAI" in self._original_inits:
|
200
|
-
OpenAI.__init__ = self._original_inits["OpenAI"]
|
201
|
-
|
202
|
-
# Restore ServiceContext
|
203
|
-
if "ServiceContext" in self._original_inits:
|
204
|
-
ServiceContext.__init__ = self._original_inits["ServiceContext"]
|
205
|
-
|
206
|
-
# To restore additional classes:
|
207
|
-
# Check if the class was patched, then restore the original __init__
|
208
|
-
# if 'SomeOtherClass' in self._original_inits:
|
209
|
-
# SomeOtherClass.__init__ = self._original_inits['SomeOtherClass']
|
210
|
-
|
211
|
-
def _generate_trace_id(self):
|
212
|
-
"""
|
213
|
-
Generate a random trace ID using UUID4.
|
214
|
-
Returns a string representation of the UUID with no hyphens.
|
215
|
-
"""
|
216
|
-
return '0x'+str(uuid.uuid4()).replace('-', '')
|
217
|
-
|
218
|
-
def _get_user_passed_detail(self):
|
219
|
-
user_detail = self.user_detail
|
220
|
-
user_detail["trace_id"] = self._generate_trace_id()
|
221
|
-
metadata = user_detail["metadata"]
|
222
|
-
metadata["log_source"] = "llamaindex_tracer"
|
223
|
-
metadata["recorded_on"] = datetime.now().isoformat()
|
224
|
-
user_detail["metadata"] = metadata
|
225
|
-
return user_detail
|
226
|
-
|
227
|
-
def _add_traces_in_data(self, traces=None):
|
228
|
-
"""Add traces to user detail"""
|
229
|
-
user_detail = self._get_user_passed_detail()
|
230
|
-
if traces is None:
|
231
|
-
if not self.trace_handler:
|
232
|
-
raise RuntimeError("No traces available. Did you call start()?")
|
233
|
-
traces = self.trace_handler.traces
|
234
|
-
user_detail["traces"] = traces
|
235
|
-
return user_detail
|
236
|
-
|
237
|
-
|
238
|
-
def _create_dataset_schema_with_trace(self):
|
239
|
-
SCHEMA_MAPPING_NEW = {
|
240
|
-
"trace_id": {"columnType": "traceId"},
|
241
|
-
"trace_uri": {"columnType": "traceUri"},
|
242
|
-
"prompt": {"columnType": "prompt"},
|
243
|
-
"response":{"columnType": "response"},
|
244
|
-
"context": {"columnType": "context"},
|
245
|
-
"llm_model": {"columnType":"pipeline"},
|
246
|
-
"recorded_on": {"columnType": "metadata"},
|
247
|
-
"embed_model": {"columnType":"pipeline"},
|
248
|
-
"log_source": {"columnType": "metadata"},
|
249
|
-
"vector_store":{"columnType":"pipeline"},
|
250
|
-
"feedback": {"columnType":"feedBack"}
|
251
|
-
}
|
252
|
-
def make_request():
|
253
|
-
headers = {
|
254
|
-
"Content-Type": "application/json",
|
255
|
-
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
256
|
-
"X-Project-Name": self.project_name,
|
257
|
-
}
|
258
|
-
payload = json.dumps({
|
259
|
-
"datasetName": self.dataset_name,
|
260
|
-
"schemaMapping": SCHEMA_MAPPING_NEW,
|
261
|
-
"traceFolderUrl": None,
|
262
|
-
})
|
263
|
-
response = requests.request("POST",
|
264
|
-
f"{self.base_url}/v1/llm/dataset/logs",
|
265
|
-
headers=headers,
|
266
|
-
data=payload,
|
267
|
-
timeout=self.timeout
|
268
|
-
)
|
269
|
-
|
270
|
-
return response
|
271
|
-
|
272
|
-
response = make_request()
|
273
|
-
|
274
|
-
if response.status_code == 401:
|
275
|
-
# get_token() # Fetch a new token and set it in the environment
|
276
|
-
response = make_request() # Retry the request
|
277
|
-
if response.status_code != 200:
|
278
|
-
return response.status_code
|
279
|
-
return response.status_code
|
280
|
-
|
281
|
-
def _get_presigned_url(self):
|
282
|
-
payload = json.dumps({
|
283
|
-
"datasetName": self.dataset_name,
|
284
|
-
"numFiles": 1,
|
285
|
-
})
|
286
|
-
headers = {
|
287
|
-
"Content-Type": "application/json",
|
288
|
-
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
289
|
-
"X-Project-Name": self.project_name,
|
290
|
-
}
|
291
|
-
|
292
|
-
response = requests.request("GET",
|
293
|
-
f"{self.base_url}/v1/llm/presigned-url",
|
294
|
-
headers=headers,
|
295
|
-
data=payload,
|
296
|
-
timeout=self.timeout)
|
297
|
-
if response.status_code == 200:
|
298
|
-
presignedUrls = response.json()["data"]["presignedUrls"][0]
|
299
|
-
return presignedUrls
|
300
|
-
|
301
|
-
def _put_presigned_url(self, presignedUrl, filename):
|
302
|
-
headers = {
|
303
|
-
"Content-Type": "application/json",
|
304
|
-
}
|
305
|
-
|
306
|
-
if "blob.core.windows.net" in presignedUrl: # Azure
|
307
|
-
headers["x-ms-blob-type"] = "BlockBlob"
|
308
|
-
print(f"Uploading traces...")
|
309
|
-
with open(filename) as f:
|
310
|
-
payload = f.read().replace("\n", "").replace("\r", "").encode()
|
311
|
-
|
312
|
-
|
313
|
-
response = requests.request("PUT",
|
314
|
-
presignedUrl,
|
315
|
-
headers=headers,
|
316
|
-
data=payload,
|
317
|
-
timeout=self.timeout)
|
318
|
-
if response.status_code != 200 or response.status_code != 201:
|
319
|
-
return response, response.status_code
|
320
|
-
|
321
|
-
def _insert_traces(self, presignedUrl):
|
322
|
-
headers = {
|
323
|
-
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
324
|
-
"Content-Type": "application/json",
|
325
|
-
"X-Project-Name": self.project_name,
|
326
|
-
}
|
327
|
-
payload = json.dumps({
|
328
|
-
"datasetName": self.dataset_name,
|
329
|
-
"presignedUrl": presignedUrl,
|
330
|
-
})
|
331
|
-
response = requests.request("POST",
|
332
|
-
f"{self.base_url}/v1/llm/insert/trace",
|
333
|
-
headers=headers,
|
334
|
-
data=payload,
|
335
|
-
timeout=self.timeout)
|
336
|
-
|
337
|
-
|
338
|
-
def _upload_traces(self, save_json_to_pwd=None):
|
339
|
-
"""Save traces to a file"""
|
340
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
341
|
-
filename = f"trace_{timestamp}.json"
|
342
|
-
|
343
|
-
traces = self._add_traces_in_data()
|
344
|
-
|
345
|
-
if save_json_to_pwd:
|
346
|
-
with open(filename, "w") as f:
|
347
|
-
json.dump([traces], f, indent=2, cls=CustomEncoder)
|
348
|
-
print(f"tracer is saved to {filename}")
|
349
|
-
|
350
|
-
self._create_dataset_schema_with_trace()
|
351
|
-
presignedUrl = self._get_presigned_url()
|
352
|
-
self._put_presigned_url(presignedUrl, filename)
|
353
|
-
self._insert_traces(presignedUrl)
|
354
|
-
print("Traces uploaded")
|
355
|
-
|
356
|
-
def get_upload_status(self):
|
357
|
-
"""Check the status of the trace upload."""
|
358
|
-
if self._upload_task is None:
|
359
|
-
return "No upload task in progress."
|
360
|
-
if self._upload_task:
|
361
|
-
return "Upload completed"
|