ragaai-catalyst 2.0.5__py3-none-any.whl → 2.0.6b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,361 @@
1
+ from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler
2
+ from llama_index.core import Settings
3
+ from typing import List, Dict, Any, Optional
4
+ from datetime import datetime
5
+ from enum import Enum
6
+ import json
7
+ import uuid
8
+ import os
9
+ import requests
10
+ import tempfile
11
+
12
+ from ..ragaai_catalyst import RagaAICatalyst
13
+
14
+ class CustomEncoder(json.JSONEncoder):
15
+ def default(self, obj):
16
+ if isinstance(obj, Enum):
17
+ return obj.value
18
+ elif hasattr(obj, "__dict__"):
19
+ return obj.__dict__
20
+ return str(obj)
21
+
22
+
23
+ class LlamaIndexTracer:
24
+ def __init__(self, user_detail):
25
+ self.trace_handler = None
26
+ self.callback_manager = (
27
+ CallbackManager()
28
+ ) # Ensure callback manager is initialized
29
+ self._original_inits = {} # Store original __init__ methods
30
+ self.project_name = user_detail["project_name"]
31
+ self.project_id = user_detail["project_id"]
32
+ self.dataset_name = user_detail["dataset_name"]
33
+ self.user_detail = user_detail["trace_user_detail"]
34
+ self.base_url = f"{RagaAICatalyst.BASE_URL}"
35
+ self.timeout = 10
36
+ self.query_count = 0
37
+ self._upload_task = None
38
+
39
+ def start(self):
40
+ """Start tracing - call this before your LlamaIndex operations"""
41
+ outer_self = self # Capture outer self reference for inner class
42
+
43
+ class CustomTraceHandler(LlamaDebugHandler):
44
+ def __init__(self):
45
+ super().__init__()
46
+ self.traces: List[Dict[str, Any]] = []
47
+ self.current_query_traces: List[Dict[str, Any]] = []
48
+ self.in_query = False
49
+ self.query_event_id = None
50
+
51
+ def on_event_start(
52
+ self,
53
+ event_type: Optional[str],
54
+ payload: Optional[Dict[str, Any]] = None,
55
+ event_id: str = "",
56
+ parent_id: str = "",
57
+ **kwargs: Any
58
+ ) -> None:
59
+ trace = {
60
+ "event_type": event_type,
61
+ "timestamp": datetime.now().isoformat(),
62
+ "payload": payload,
63
+ "status": "started",
64
+ "event_id": event_id,
65
+ "parent_id": parent_id,
66
+ }
67
+ if event_type == "query":
68
+ self.in_query = True
69
+ self.query_event_id = event_id
70
+ self.current_query_traces = []
71
+
72
+ if self.in_query:
73
+ self.current_query_traces.append(trace)
74
+ self.traces.append(trace)
75
+
76
+ def on_event_end(
77
+ self,
78
+ event_type: Optional[str],
79
+ payload: Optional[Dict[str, Any]] = None,
80
+ event_id: str = "",
81
+ **kwargs: Any
82
+ ) -> None:
83
+ trace = {
84
+ "event_type": event_type,
85
+ "timestamp": datetime.now().isoformat(),
86
+ "payload": payload,
87
+ "status": "completed",
88
+ "event_id": event_id,
89
+ }
90
+ if self.in_query:
91
+ self.current_query_traces.append(trace)
92
+ self.traces.append(trace)
93
+
94
+ # If this is the end of a query event, automatically save the traces
95
+ if event_type == "query" and event_id == self.query_event_id:
96
+ self.in_query = False
97
+ outer_self._save_current_query_traces(self.current_query_traces)
98
+ self.current_query_traces = []
99
+
100
+
101
+ self.trace_handler = CustomTraceHandler()
102
+ self.callback_manager.add_handler(self.trace_handler)
103
+ Settings.callback_manager = self.callback_manager
104
+
105
+
106
+ # Monkey-patch LlamaIndex components
107
+ self._monkey_patch()
108
+ return self # Return self to allow method chaining
109
+
110
+
111
+ def _save_current_query_traces(self, query_traces):
112
+ """Save traces for the current query"""
113
+ self.query_count += 1
114
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
115
+ filename = f"trace_query_{self.query_count}_{timestamp}.json"
116
+
117
+ traces = self._add_traces_in_data(query_traces)
118
+
119
+ # Write the tracer json files to a temporary directory
120
+ temp_dir = tempfile.gettempdir()
121
+ temp_file_path = f"{temp_dir}/{filename}"
122
+
123
+ with open(temp_file_path, "w") as f:
124
+ json.dump([traces], f, indent=2, cls=CustomEncoder)
125
+ # print(f"Query traces saved to {temp_file_path}")
126
+
127
+ # Upload the traces
128
+ self._create_dataset_schema_with_trace()
129
+ presignedUrl = self._get_presigned_url()
130
+ self._put_presigned_url(presignedUrl, temp_file_path)
131
+ self._insert_traces(presignedUrl)
132
+ # print(f"Query {self.query_count} traces uploaded")
133
+
134
+
135
+ def _monkey_patch(self):
136
+ """Monkey-patch LlamaIndex components to automatically include the callback manager"""
137
+ from llama_index.core import VectorStoreIndex, ServiceContext
138
+ from llama_index.llms.openai import OpenAI
139
+
140
+ # Import any other classes you need to patch here
141
+
142
+ def make_new_init(original_init, callback_manager):
143
+ def new_init(self, *args, **kwargs):
144
+ # If 'callback_manager' is not provided, inject our tracer's callback manager
145
+ if "callback_manager" not in kwargs:
146
+ kwargs["callback_manager"] = callback_manager
147
+ original_init(self, *args, **kwargs)
148
+
149
+ return new_init
150
+
151
+ # Monkey-patch VectorStoreIndex
152
+ self._original_inits["VectorStoreIndex"] = VectorStoreIndex.__init__
153
+ VectorStoreIndex.__init__ = make_new_init(
154
+ VectorStoreIndex.__init__, self.callback_manager
155
+ )
156
+
157
+ # Monkey-patch OpenAI LLM
158
+ self._original_inits["OpenAI"] = OpenAI.__init__
159
+ OpenAI.__init__ = make_new_init(OpenAI.__init__, self.callback_manager)
160
+
161
+ # Monkey-patch ServiceContext
162
+ self._original_inits["ServiceContext"] = ServiceContext.__init__
163
+ ServiceContext.__init__ = make_new_init(
164
+ ServiceContext.__init__, self.callback_manager
165
+ )
166
+
167
+ # To monkey-patch additional classes:
168
+ # 1. Import the class you want to patch
169
+ # from llama_index.some_module import SomeOtherClass
170
+
171
+ # 2. Store the original __init__ method
172
+ # self._original_inits['SomeOtherClass'] = SomeOtherClass.__init__
173
+
174
+ # 3. Replace the __init__ method with the new one that injects the callback manager
175
+ # SomeOtherClass.__init__ = make_new_init(SomeOtherClass.__init__, self.callback_manager)
176
+
177
+ # Repeat steps 1-3 for each additional class you wish to monkey-patch
178
+
179
+ def stop(self):
180
+ """Stop tracing and restore original methods"""
181
+ # self._upload_traces(save_json_to_pwd=True)
182
+ self.callback_manager.remove_handler(self.trace_handler)
183
+ self._restore_original_inits()
184
+ print("Traces uplaoded")
185
+ self._upload_task = True
186
+
187
+ def _restore_original_inits(self):
188
+ """Restore the original __init__ methods of LlamaIndex components"""
189
+ from llama_index.core import VectorStoreIndex, ServiceContext
190
+ from llama_index.llms.openai import OpenAI
191
+
192
+ # Import any other classes you patched
193
+
194
+ # Restore VectorStoreIndex
195
+ if "VectorStoreIndex" in self._original_inits:
196
+ VectorStoreIndex.__init__ = self._original_inits["VectorStoreIndex"]
197
+
198
+ # Restore OpenAI
199
+ if "OpenAI" in self._original_inits:
200
+ OpenAI.__init__ = self._original_inits["OpenAI"]
201
+
202
+ # Restore ServiceContext
203
+ if "ServiceContext" in self._original_inits:
204
+ ServiceContext.__init__ = self._original_inits["ServiceContext"]
205
+
206
+ # To restore additional classes:
207
+ # Check if the class was patched, then restore the original __init__
208
+ # if 'SomeOtherClass' in self._original_inits:
209
+ # SomeOtherClass.__init__ = self._original_inits['SomeOtherClass']
210
+
211
+ def _generate_trace_id(self):
212
+ """
213
+ Generate a random trace ID using UUID4.
214
+ Returns a string representation of the UUID with no hyphens.
215
+ """
216
+ return '0x'+str(uuid.uuid4()).replace('-', '')
217
+
218
+ def _get_user_passed_detail(self):
219
+ user_detail = self.user_detail
220
+ user_detail["trace_id"] = self._generate_trace_id()
221
+ metadata = user_detail["metadata"]
222
+ metadata["log_source"] = "llamaindex_tracer"
223
+ metadata["recorded_on"] = datetime.utcnow().isoformat().replace('T', ' ')
224
+ user_detail["metadata"] = metadata
225
+ return user_detail
226
+
227
+ def _add_traces_in_data(self, traces=None):
228
+ """Add traces to user detail"""
229
+ user_detail = self._get_user_passed_detail()
230
+ if traces is None:
231
+ if not self.trace_handler:
232
+ raise RuntimeError("No traces available. Did you call start()?")
233
+ traces = self.trace_handler.traces
234
+ user_detail["traces"] = traces
235
+ return user_detail
236
+
237
+
238
+ def _create_dataset_schema_with_trace(self):
239
+ SCHEMA_MAPPING_NEW = {
240
+ "trace_id": {"columnType": "traceId"},
241
+ "trace_uri": {"columnType": "traceUri"},
242
+ "prompt": {"columnType": "prompt"},
243
+ "response":{"columnType": "response"},
244
+ "context": {"columnType": "context"},
245
+ "llm_model": {"columnType":"pipeline"},
246
+ "recorded_on": {"columnType": "metadata"},
247
+ "embed_model": {"columnType":"pipeline"},
248
+ "log_source": {"columnType": "metadata"},
249
+ "vector_store":{"columnType":"pipeline"},
250
+ "feedback": {"columnType":"feedBack"}
251
+ }
252
+ def make_request():
253
+ headers = {
254
+ "Content-Type": "application/json",
255
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
256
+ "X-Project-Name": self.project_name,
257
+ }
258
+ payload = json.dumps({
259
+ "datasetName": self.dataset_name,
260
+ "schemaMapping": SCHEMA_MAPPING_NEW,
261
+ "traceFolderUrl": None,
262
+ })
263
+ response = requests.request("POST",
264
+ f"{self.base_url}/v1/llm/dataset/logs",
265
+ headers=headers,
266
+ data=payload,
267
+ timeout=self.timeout
268
+ )
269
+
270
+ return response
271
+
272
+ response = make_request()
273
+
274
+ if response.status_code == 401:
275
+ # get_token() # Fetch a new token and set it in the environment
276
+ response = make_request() # Retry the request
277
+ if response.status_code != 200:
278
+ return response.status_code
279
+ return response.status_code
280
+
281
+ def _get_presigned_url(self):
282
+ payload = json.dumps({
283
+ "datasetName": self.dataset_name,
284
+ "numFiles": 1,
285
+ })
286
+ headers = {
287
+ "Content-Type": "application/json",
288
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
289
+ "X-Project-Name": self.project_name,
290
+ }
291
+
292
+ response = requests.request("GET",
293
+ f"{self.base_url}/v1/llm/presigned-url",
294
+ headers=headers,
295
+ data=payload,
296
+ timeout=self.timeout)
297
+ if response.status_code == 200:
298
+ presignedUrls = response.json()["data"]["presignedUrls"][0]
299
+ return presignedUrls
300
+
301
+ def _put_presigned_url(self, presignedUrl, filename):
302
+ headers = {
303
+ "Content-Type": "application/json",
304
+ }
305
+
306
+ if "blob.core.windows.net" in presignedUrl: # Azure
307
+ headers["x-ms-blob-type"] = "BlockBlob"
308
+ print(f"Uploading traces...")
309
+ with open(filename) as f:
310
+ payload = f.read().replace("\n", "").replace("\r", "").encode()
311
+
312
+
313
+ response = requests.request("PUT",
314
+ presignedUrl,
315
+ headers=headers,
316
+ data=payload,
317
+ timeout=self.timeout)
318
+ if response.status_code != 200 or response.status_code != 201:
319
+ return response, response.status_code
320
+
321
+ def _insert_traces(self, presignedUrl):
322
+ headers = {
323
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
324
+ "Content-Type": "application/json",
325
+ "X-Project-Name": self.project_name,
326
+ }
327
+ payload = json.dumps({
328
+ "datasetName": self.dataset_name,
329
+ "presignedUrl": presignedUrl,
330
+ })
331
+ response = requests.request("POST",
332
+ f"{self.base_url}/v1/llm/insert/trace",
333
+ headers=headers,
334
+ data=payload,
335
+ timeout=self.timeout)
336
+
337
+
338
+ def _upload_traces(self, save_json_to_pwd=None):
339
+ """Save traces to a file"""
340
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
341
+ filename = f"trace_{timestamp}.json"
342
+
343
+ traces = self._add_traces_in_data()
344
+
345
+ if save_json_to_pwd:
346
+ with open(filename, "w") as f:
347
+ json.dump([traces], f, indent=2, cls=CustomEncoder)
348
+ print(f"tracer is saved to {filename}")
349
+
350
+ self._create_dataset_schema_with_trace()
351
+ presignedUrl = self._get_presigned_url()
352
+ self._put_presigned_url(presignedUrl, filename)
353
+ self._insert_traces(presignedUrl)
354
+ print("Traces uplaoded")
355
+
356
+ def get_upload_status(self):
357
+ """Check the status of the trace upload."""
358
+ if self._upload_task is None:
359
+ return "No upload task in progress."
360
+ if self._upload_task:
361
+ return "Upload completed"
@@ -17,7 +17,7 @@ from .instrumentators import (
17
17
  LlamaIndexInstrumentor,
18
18
  )
19
19
  from .utils import get_unique_key
20
-
20
+ # from .llamaindex_callback import LlamaIndexTracer
21
21
  from ..ragaai_catalyst import RagaAICatalyst
22
22
 
23
23
  logger = logging.getLogger(__name__)
@@ -86,13 +86,19 @@ class Tracer:
86
86
  logger.error(f"Failed to retrieve projects list: {e}")
87
87
  raise
88
88
 
89
+ if tracer_type == "langchain":
90
+ self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
89
91
 
90
- self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
92
+ self._tracer_provider = self._setup_provider()
93
+ self._instrumentor = self._setup_instrumentor(tracer_type)
94
+ self.is_instrumented = False
95
+ self._upload_task = None
96
+ elif tracer_type == "llamaindex":
97
+ self._upload_task = None
98
+ from .llamaindex_callback import LlamaIndexTracer
91
99
 
92
- self._tracer_provider = self._setup_provider()
93
- self._instrumentor = self._setup_instrumentor(tracer_type)
94
- self.is_instrumented = False
95
- self._upload_task = None
100
+ else:
101
+ raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
96
102
 
97
103
  def _improve_metadata(self, metadata, tracer_type):
98
104
  if metadata is None:
@@ -142,34 +148,44 @@ class Tracer:
142
148
 
143
149
  def start(self):
144
150
  """Start the tracer."""
145
- if not self.is_instrumented:
146
- self._instrumentor().instrument(tracer_provider=self._tracer_provider)
147
- self.is_instrumented = True
148
- print(f"Tracer started for project: {self.project_name}")
149
- return self
151
+ if self.tracer_type == "langchain":
152
+ if not self.is_instrumented:
153
+ self._instrumentor().instrument(tracer_provider=self._tracer_provider)
154
+ self.is_instrumented = True
155
+ print(f"Tracer started for project: {self.project_name}")
156
+ return self
157
+ elif self.tracer_type == "llamaindex":
158
+ from .llamaindex_callback import LlamaIndexTracer
159
+ return LlamaIndexTracer(self._pass_user_data()).start()
160
+
150
161
 
151
162
  def stop(self):
152
163
  """Stop the tracer and initiate trace upload."""
153
- if not self.is_instrumented:
154
- logger.warning("Tracer was not started. No traces to upload.")
155
- return "No traces to upload"
156
-
157
- print("Stopping tracer and initiating trace upload...")
158
- self._cleanup()
159
- self._upload_task = self._run_async(self._upload_traces())
160
- return "Trace upload initiated. Use get_upload_status() to check the status."
164
+ if self.tracer_type == "langchain":
165
+ if not self.is_instrumented:
166
+ logger.warning("Tracer was not started. No traces to upload.")
167
+ return "No traces to upload"
168
+
169
+ print("Stopping tracer and initiating trace upload...")
170
+ self._cleanup()
171
+ self._upload_task = self._run_async(self._upload_traces())
172
+ return "Trace upload initiated. Use get_upload_status() to check the status."
173
+ elif self.tracer_type == "llamaindex":
174
+ from .llamaindex_callback import LlamaIndexTracer
175
+ return LlamaIndexTracer().stop()
161
176
 
162
177
  def get_upload_status(self):
163
178
  """Check the status of the trace upload."""
164
- if self._upload_task is None:
165
- return "No upload task in progress."
166
- if self._upload_task.done():
167
- try:
168
- result = self._upload_task.result()
169
- return f"Upload completed: {result}"
170
- except Exception as e:
171
- return f"Upload failed: {str(e)}"
172
- return "Upload in progress..."
179
+ if self.tracer_type == "langchain":
180
+ if self._upload_task is None:
181
+ return "No upload task in progress."
182
+ if self._upload_task.done():
183
+ try:
184
+ result = self._upload_task.result()
185
+ return f"Upload completed: {result}"
186
+ except Exception as e:
187
+ return f"Upload failed: {str(e)}"
188
+ return "Upload in progress..."
173
189
 
174
190
  def _run_async(self, coroutine):
175
191
  """Run an asynchronous coroutine in a separate thread."""
@@ -246,3 +262,21 @@ class Tracer:
246
262
  # Reset instrumentation flag
247
263
  self.is_instrumented = False
248
264
  # Note: We're not resetting all attributes here to allow for upload status checking
265
+ def _pass_user_data(self):
266
+ return {"project_name":self.project_name,
267
+ "project_id": self.project_id,
268
+ "dataset_name":self.dataset_name,
269
+ "trace_user_detail" : {
270
+ "project_id": self.project_id,
271
+ "trace_id": "",
272
+ "session_id": None,
273
+ "trace_type": self.tracer_type,
274
+ "traces": [],
275
+ "metadata": self.metadata,
276
+ "pipeline": {
277
+ "llm_model": self.pipeline["llm_model"],
278
+ "vector_store": self.pipeline["vector_store"],
279
+ "embed_model": self.pipeline["embed_model"]
280
+ }
281
+ }
282
+ }