ragaai-catalyst 2.1.5b2__py3-none-any.whl → 2.1.5b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import os
2
2
  import logging
3
3
  import requests
4
4
  from typing import Dict, Optional, Union
5
-
5
+ import re
6
6
  logger = logging.getLogger("RagaAICatalyst")
7
7
 
8
8
 
@@ -55,10 +55,11 @@ class RagaAICatalyst:
55
55
  self.api_keys = api_keys or {}
56
56
 
57
57
  if base_url:
58
- RagaAICatalyst.BASE_URL = base_url
58
+ RagaAICatalyst.BASE_URL = self._normalize_base_url(base_url)
59
59
  try:
60
+ #set the os.environ["RAGAAI_CATALYST_BASE_URL"] before getting the token as it is used in the get_token method
61
+ os.environ["RAGAAI_CATALYST_BASE_URL"] = RagaAICatalyst.BASE_URL
60
62
  self.get_token()
61
- os.environ["RAGAAI_CATALYST_BASE_URL"] = base_url
62
63
  except requests.exceptions.RequestException:
63
64
  raise ConnectionError(
64
65
  "The provided base_url is not accessible. Please re-check the base_url."
@@ -71,6 +72,14 @@ class RagaAICatalyst:
71
72
  if self.api_keys:
72
73
  self._upload_keys()
73
74
 
75
+ @staticmethod
76
+ def _normalize_base_url(url):
77
+ url = re.sub(r'(?<!:)//+', '/', url) # Ignore the `://` part of URLs and remove extra // if any
78
+ url = url.rstrip("/") # To remove trailing slashes
79
+ if not url.endswith("/api"): # To ensure it ends with /api
80
+ url = f"{url}/api"
81
+ return url
82
+
74
83
  def _set_access_key_secret_key(self, access_key, secret_key):
75
84
  os.environ["RAGAAI_CATALYST_ACCESS_KEY"] = access_key
76
85
  os.environ["RAGAAI_CATALYST_SECRET_KEY"] = secret_key
@@ -150,6 +150,8 @@ class LLMTracerMixin:
150
150
  beta_module = openai_module.beta
151
151
 
152
152
  # Patch openai.beta.threads
153
+ import openai
154
+ openai.api_type = "openai"
153
155
  if hasattr(beta_module, "threads"):
154
156
  threads_obj = beta_module.threads
155
157
  # Patch top-level methods on openai.beta.threads
@@ -0,0 +1,568 @@
1
+ from typing import Any, Dict, List, Optional, Union, Sequence
2
+
3
+ import attr
4
+ from langchain.callbacks.base import BaseCallbackHandler
5
+ from langchain.schema import LLMResult, AgentAction, AgentFinish, BaseMessage
6
+ from datetime import datetime
7
+ import json
8
+ import os
9
+ from uuid import UUID
10
+ from functools import wraps
11
+ import asyncio
12
+ from langchain_core.documents import Document
13
+ import logging
14
+ import tempfile
15
+
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class LangchainTracer(BaseCallbackHandler):
21
+ """
22
+ An enhanced callback handler for LangChain that traces all actions and saves them to a JSON file.
23
+ Includes improved error handling, async support, and configuration options.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ output_path: str = tempfile.gettempdir(),
29
+ trace_all: bool = True,
30
+ save_interval: Optional[int] = None,
31
+ log_level: int = logging.INFO,
32
+ ):
33
+ """
34
+ Initialize the tracer with enhanced configuration options.
35
+
36
+ Args:
37
+ output_path (str): Directory where trace files will be saved
38
+ trace_all (bool): Whether to trace all components or only specific ones
39
+ save_interval (Optional[int]): Interval in seconds to auto-save traces
40
+ log_level (int): Logging level for the tracer
41
+ """
42
+ super().__init__()
43
+ self.output_path = output_path
44
+ self.trace_all = trace_all
45
+ self.save_interval = save_interval
46
+ self._active = False
47
+ self._original_inits = {}
48
+ self._original_methods = {}
49
+ self.additional_metadata = {}
50
+ self._save_task = None
51
+ self._current_query = None # Add this line to track the current query
52
+ self.filepath = None
53
+ logger.setLevel(log_level)
54
+
55
+ if not os.path.exists(output_path):
56
+ os.makedirs(output_path)
57
+
58
+ self.reset_trace()
59
+
60
+
61
+ def __enter__(self):
62
+ """Context manager entry"""
63
+ self.start()
64
+ return self
65
+
66
+ def __exit__(self, exc_type, exc_val, exc_tb):
67
+ """Context manager exit"""
68
+
69
+ self.stop()
70
+ if exc_type:
71
+ logger.error(f"Error in context manager: {exc_val}")
72
+ return False
73
+ return True
74
+
75
+ def reset_trace(self):
76
+ """Reset the current trace to initial state with enhanced structure"""
77
+ self.current_trace: Dict[str, Any] = {
78
+ "start_time": None,
79
+ "end_time": None,
80
+ "actions": [],
81
+ "llm_calls": [],
82
+ "chain_starts": [],
83
+ "chain_ends": [],
84
+ "agent_actions": [],
85
+ "chat_model_calls": [],
86
+ "retriever_actions": [],
87
+ "tokens": [],
88
+ "errors": [],
89
+ "query": self._current_query, # Add this line to include the query in the trace
90
+ "metadata": {
91
+ "version": "2.0",
92
+ "trace_all": self.trace_all,
93
+ "save_interval": self.save_interval,
94
+ },
95
+ }
96
+
97
+ async def _periodic_save(self):
98
+ """Periodically save traces if save_interval is set"""
99
+ while self._active and self.save_interval:
100
+ await asyncio.sleep(self.save_interval)
101
+ await self._async_save_trace()
102
+
103
+ async def _async_save_trace(self, force: bool = False):
104
+ """Asynchronously save the current trace to a JSON file"""
105
+ if not self.current_trace["start_time"] and not force:
106
+ return
107
+
108
+ try:
109
+ self.current_trace["end_time"] = datetime.now()
110
+
111
+ # Use the query from the trace or fallback to a default
112
+ safe_query = self._current_query or "unknown"
113
+
114
+ # Sanitize the query for filename
115
+ safe_query = ''.join(c for c in safe_query if c.isalnum() or c.isspace())[:50].strip()
116
+
117
+ # Add a timestamp to ensure unique filenames
118
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
119
+ filename = f"langchain_callback_traces.json"
120
+ filepath = os.path.join(self.output_path, filename)
121
+ self.filepath = filepath
122
+
123
+ trace_to_save = self.current_trace.copy()
124
+ trace_to_save["start_time"] = str(trace_to_save["start_time"])
125
+ trace_to_save["end_time"] = str(trace_to_save["end_time"])
126
+
127
+ # Save if there are meaningful events or if force is True
128
+ if (
129
+ len(trace_to_save["llm_calls"]) > 0
130
+ or len(trace_to_save["chain_starts"]) > 0
131
+ or len(trace_to_save["chain_ends"]) > 0
132
+ or len(trace_to_save["errors"]) > 0
133
+ or force
134
+ ):
135
+ async with asyncio.Lock():
136
+ with open(filepath, "w", encoding="utf-8") as f:
137
+ json.dump(trace_to_save, f, indent=2, default=str)
138
+
139
+ logger.info(f"Trace saved to: {filepath}")
140
+
141
+ # Reset the current query after saving
142
+ self._current_query = None
143
+
144
+ # Reset the trace
145
+ self.reset_trace()
146
+
147
+ except Exception as e:
148
+ logger.error(f"Error saving trace: {e}")
149
+ self.on_error(e, context="save_trace")
150
+
151
+ def _save_trace(self, force: bool = False):
152
+ """Synchronous version of trace saving"""
153
+ if asyncio.get_event_loop().is_running():
154
+ asyncio.create_task(self._async_save_trace(force))
155
+ else:
156
+ asyncio.run(self._async_save_trace(force))
157
+
158
+ def _create_safe_wrapper(self, original_func, component_name):
159
+ """Create a safely wrapped version of an original function with enhanced error handling"""
160
+
161
+ @wraps(original_func)
162
+ def wrapped(*args, **kwargs):
163
+ if not self._active:
164
+ return original_func(*args, **kwargs)
165
+
166
+ try:
167
+ # Deep copy kwargs to avoid modifying the original
168
+ kwargs_copy = kwargs.copy() if kwargs is not None else {}
169
+
170
+ # Handle different calling conventions
171
+ if 'callbacks' not in kwargs_copy:
172
+ kwargs_copy['callbacks'] = [self]
173
+ elif self not in kwargs_copy['callbacks']:
174
+ kwargs_copy['callbacks'].append(self)
175
+
176
+ # Try different method signatures
177
+ try:
178
+ # First, try calling with modified kwargs
179
+ return original_func(*args, **kwargs_copy)
180
+ except TypeError:
181
+ # If that fails, try without kwargs
182
+ try:
183
+ return original_func(*args)
184
+ except Exception as e:
185
+ # If all else fails, use original call
186
+ logger.error(f"Failed to invoke {component_name} with modified callbacks: {e}")
187
+ return original_func(*args, **kwargs)
188
+
189
+ except Exception as e:
190
+ # Log any errors that occur during the function call
191
+ logger.error(f"Error in {component_name} wrapper: {e}")
192
+
193
+ # Record the error using the tracer's error handling method
194
+ self.on_error(e, context=f"wrapper_{component_name}")
195
+
196
+ # Fallback to calling the original function without modifications
197
+ return original_func(*args, **kwargs)
198
+
199
+ return wrapped
200
+
201
+
202
+ def _monkey_patch(self):
203
+ """Enhanced monkey-patching with comprehensive component support"""
204
+ from langchain.llms import OpenAI
205
+ # from langchain_groq import ChatGroq
206
+ # from langchain_google_genai import ChatGoogleGenerativeAI
207
+ # from langchain_anthropic import ChatAnthropic
208
+ from langchain_community.chat_models import ChatLiteLLM
209
+ # from langchain_cohere import ChatCohere
210
+ from langchain_openai import ChatOpenAI as ChatOpenAI_LangchainOpenAI
211
+ from langchain.chat_models import ChatOpenAI as ChatOpenAI_ChatModels
212
+ from langchain.chains import create_retrieval_chain, RetrievalQA
213
+
214
+ components_to_patch = {
215
+ "OpenAI": (OpenAI, "__init__"),
216
+ # "ChatGroq": (ChatGroq, "__init__"),
217
+ # "ChatGoogleGenerativeAI": (ChatGoogleGenerativeAI, "__init__"),
218
+ # "ChatAnthropic": (ChatAnthropic, "__init__"),
219
+ "ChatLiteLLM": (ChatLiteLLM, "__init__"),
220
+ # "ChatCohere": (ChatCohere, "__init__"),
221
+ "ChatOpenAI_LangchainOpenAI": (ChatOpenAI_LangchainOpenAI, "__init__"),
222
+ "ChatOpenAI_ChatModels": (ChatOpenAI_ChatModels, "__init__"),
223
+ "RetrievalQA": (RetrievalQA, "from_chain_type"),
224
+ "create_retrieval_chain": (create_retrieval_chain, None),
225
+ }
226
+
227
+ for name, (component, method_name) in components_to_patch.items():
228
+ try:
229
+ if method_name == "__init__":
230
+ original = component.__init__
231
+ self._original_inits[name] = original
232
+ component.__init__ = self._create_safe_wrapper(original, name)
233
+ elif method_name:
234
+ original = getattr(component, method_name)
235
+ self._original_methods[name] = original
236
+ if isinstance(original, classmethod):
237
+ wrapped = classmethod(
238
+ self._create_safe_wrapper(original.__func__, name)
239
+ )
240
+ else:
241
+ wrapped = self._create_safe_wrapper(original, name)
242
+ setattr(component, method_name, wrapped)
243
+ else:
244
+ self._original_methods[name] = component
245
+ globals()[name] = self._create_safe_wrapper(component, name)
246
+ except Exception as e:
247
+ logger.error(f"Error patching {name}: {e}")
248
+ self.on_error(e, context=f"patch_{name}")
249
+
250
+ def _restore_original_methods(self):
251
+ """Restore all original methods and functions with enhanced error handling"""
252
+ from langchain.llms import OpenAI
253
+ # from langchain_groq import ChatGroq
254
+ # from langchain_google_genai import ChatGoogleGenerativeAI
255
+ # from langchain_anthropic import ChatAnthropic
256
+ from langchain_community.chat_models import ChatLiteLLM
257
+ # from langchain_cohere import ChatCohere
258
+ from langchain_openai import ChatOpenAI as ChatOpenAI_LangchainOpenAI
259
+ from langchain.chat_models import ChatOpenAI as ChatOpenAI_ChatModels
260
+ from langchain.chains import create_retrieval_chain, RetrievalQA
261
+
262
+
263
+ for name, original in self._original_inits.items():
264
+ try:
265
+ component = eval(name)
266
+ component.__init__ = original
267
+ except Exception as e:
268
+ logger.error(f"Error restoring {name}: {e}")
269
+ self.on_error(e, context=f"restore_{name}")
270
+
271
+ for name, original in self._original_methods.items():
272
+ try:
273
+ if "." in name:
274
+ module_name, method_name = name.rsplit(".", 1)
275
+ module = eval(module_name)
276
+ setattr(module, method_name, original)
277
+ else:
278
+ globals()[name] = original
279
+ except Exception as e:
280
+ logger.error(f"Error restoring {name}: {e}")
281
+ self.on_error(e, context=f"restore_{name}")
282
+
283
+ def start(self):
284
+ """Start tracing with enhanced error handling and async support"""
285
+ try:
286
+ self.reset_trace()
287
+ self.current_trace["start_time"] = datetime.now()
288
+ self._active = True
289
+ self._monkey_patch()
290
+
291
+ if self.save_interval:
292
+ loop = asyncio.get_event_loop()
293
+ self._save_task = loop.create_task(self._periodic_save())
294
+
295
+ logger.info("Tracing started")
296
+ except Exception as e:
297
+ logger.error(f"Error starting tracer: {e}")
298
+ self.on_error(e, context="start")
299
+ raise
300
+
301
+ def stop(self):
302
+ """Stop tracing with enhanced cleanup"""
303
+ try:
304
+ self._active = False
305
+ if self._save_task:
306
+ self._save_task.cancel()
307
+ self._restore_original_methods()
308
+ # self._save_trace(force=True)
309
+
310
+ return self.current_trace.copy(), self.additional_metadata
311
+
312
+ logger.info("Tracing stopped")
313
+ except Exception as e:
314
+ logger.error(f"Error stopping tracer: {e}")
315
+ self.on_error(e, context="stop")
316
+ raise
317
+ finally:
318
+ self._original_inits.clear()
319
+ self._original_methods.clear()
320
+
321
+ def force_save(self):
322
+ """Force save the current trace"""
323
+ self._save_trace(force=True)
324
+
325
+ # Callback methods with enhanced error handling and logging
326
+ def on_llm_start(
327
+ self,
328
+ serialized: Dict[str, Any],
329
+ prompts: List[str],
330
+ run_id: UUID,
331
+ **kwargs: Any,
332
+ ) -> None:
333
+ try:
334
+ if not self.current_trace["start_time"]:
335
+ self.current_trace["start_time"] = datetime.now()
336
+
337
+ self.current_trace["llm_calls"].append(
338
+ {
339
+ "timestamp": datetime.now(),
340
+ "event": "llm_start",
341
+ "serialized": serialized,
342
+ "prompts": prompts,
343
+ "run_id": str(run_id),
344
+ "additional_kwargs": kwargs,
345
+ }
346
+ )
347
+ except Exception as e:
348
+ self.on_error(e, context="llm_start")
349
+
350
+ def on_llm_end(self, response: LLMResult, *, run_id: UUID, **kwargs: Any) -> None:
351
+ try:
352
+ self.current_trace["llm_calls"].append(
353
+ {
354
+ "timestamp": datetime.now(),
355
+ "event": "llm_end",
356
+ "response": response.dict(),
357
+ "run_id": str(run_id),
358
+ "additional_kwargs": kwargs,
359
+ }
360
+ )
361
+
362
+ end_time = datetime.now()
363
+ self.additional_metadata["latency"] = (end_time - self.current_trace["start_time"]).total_seconds()
364
+
365
+ if response and response.llm_output:
366
+ self.additional_metadata["model_name"] = response.llm_output.get("model_name", "")
367
+ self.additional_metadata["tokens"] = {}
368
+ if response.llm_output.get("token_usage"):
369
+ self.additional_metadata["tokens"]["total"] = response.llm_output["token_usage"].get("total_tokens", 0)
370
+ self.additional_metadata["tokens"]["prompt"] = response.llm_output["token_usage"].get("prompt_tokens", 0)
371
+ self.additional_metadata["tokens"]["completion"] = response.llm_output["token_usage"].get("completion_tokens", 0)
372
+ except Exception as e:
373
+ self.on_error(e, context="llm_end")
374
+
375
+ def on_chat_model_start(
376
+ self,
377
+ serialized: Dict[str, Any],
378
+ messages: List[List[BaseMessage]],
379
+ *,
380
+ run_id: UUID,
381
+ **kwargs: Any,
382
+ ) -> None:
383
+ try:
384
+ messages_dict = [
385
+ [
386
+ {
387
+ "type": msg.type,
388
+ "content": msg.content,
389
+ "additional_kwargs": msg.additional_kwargs,
390
+ }
391
+ for msg in batch
392
+ ]
393
+ for batch in messages
394
+ ]
395
+
396
+ self.current_trace["chat_model_calls"].append(
397
+ {
398
+ "timestamp": datetime.now(),
399
+ "event": "chat_model_start",
400
+ "serialized": serialized,
401
+ "messages": messages_dict,
402
+ "run_id": str(run_id),
403
+ "additional_kwargs": kwargs,
404
+ }
405
+ )
406
+ except Exception as e:
407
+ self.on_error(e, context="chat_model_start")
408
+
409
+ def on_chain_start(
410
+ self,
411
+ serialized: Dict[str, Any],
412
+ inputs: Dict[str, Any],
413
+ *,
414
+ run_id: UUID,
415
+ **kwargs: Any,
416
+ ) -> None:
417
+ try:
418
+ context = ""
419
+ query = ""
420
+ if isinstance(inputs, dict):
421
+ if "context" in inputs:
422
+ if isinstance(inputs["context"], Document):
423
+ context = inputs["context"].page_content
424
+ elif isinstance(inputs["context"], list):
425
+ context = "\n".join(
426
+ doc.page_content if isinstance(doc, Document) else str(doc)
427
+ for doc in inputs["context"]
428
+ )
429
+ elif isinstance(inputs["context"], str):
430
+ context = inputs["context"]
431
+
432
+ query = inputs.get("question", inputs.get("input", ""))
433
+
434
+ # Set the current query
435
+ self._current_query = query
436
+
437
+ chain_event = {
438
+ "timestamp": datetime.now(),
439
+ "serialized": serialized,
440
+ "context": context,
441
+ "query": inputs.get("question", inputs.get("input", "")),
442
+ "run_id": str(run_id),
443
+ "additional_kwargs": kwargs,
444
+ }
445
+
446
+ self.current_trace["chain_starts"].append(chain_event)
447
+ except Exception as e:
448
+ self.on_error(e, context="chain_start")
449
+
450
+ def on_chain_end(
451
+ self, outputs: Dict[str, Any], *, run_id: UUID, **kwargs: Any
452
+ ) -> None:
453
+ try:
454
+ self.current_trace["chain_ends"].append(
455
+ {
456
+ "timestamp": datetime.now(),
457
+ "outputs": outputs,
458
+ "run_id": str(run_id),
459
+ "additional_kwargs": kwargs,
460
+ }
461
+ )
462
+ except Exception as e:
463
+ self.on_error(e, context="chain_end")
464
+
465
+ def on_agent_action(self, action: AgentAction, run_id: UUID, **kwargs: Any) -> None:
466
+ try:
467
+ self.current_trace["agent_actions"].append(
468
+ {
469
+ "timestamp": datetime.now(),
470
+ "action": action.dict(),
471
+ "run_id": str(run_id),
472
+ "additional_kwargs": kwargs,
473
+ }
474
+ )
475
+ except Exception as e:
476
+ self.on_error(e, context="agent_action")
477
+
478
+ def on_agent_finish(self, finish: AgentFinish, run_id: UUID, **kwargs: Any) -> None:
479
+ try:
480
+ self.current_trace["agent_actions"].append(
481
+ {
482
+ "timestamp": datetime.now(),
483
+ "event": "agent_finish",
484
+ "finish": finish.dict(),
485
+ "run_id": str(run_id),
486
+ "additional_kwargs": kwargs,
487
+ }
488
+ )
489
+ except Exception as e:
490
+ self.on_error(e, context="agent_finish")
491
+
492
+ def on_retriever_start(
493
+ self, serialized: Dict[str, Any], query: str, *, run_id: UUID, **kwargs: Any
494
+ ) -> None:
495
+ try:
496
+ retriever_event = {
497
+ "timestamp": datetime.now(),
498
+ "event": "retriever_start",
499
+ "serialized": serialized,
500
+ "query": query,
501
+ "run_id": str(run_id),
502
+ "additional_kwargs": kwargs,
503
+ }
504
+
505
+ self.current_trace["retriever_actions"].append(retriever_event)
506
+ except Exception as e:
507
+ self.on_error(e, context="retriever_start")
508
+
509
+ def on_retriever_end(
510
+ self, documents: Sequence[Document], *, run_id: UUID, **kwargs: Any
511
+ ) -> None:
512
+ try:
513
+ processed_documents = [
514
+ {"page_content": doc.page_content, "metadata": doc.metadata}
515
+ for doc in documents
516
+ ]
517
+
518
+ retriever_event = {
519
+ "timestamp": datetime.now(),
520
+ "event": "retriever_end",
521
+ "documents": processed_documents,
522
+ "run_id": str(run_id),
523
+ "additional_kwargs": kwargs,
524
+ }
525
+
526
+ self.current_trace["retriever_actions"].append(retriever_event)
527
+ except Exception as e:
528
+ self.on_error(e, context="retriever_end")
529
+
530
+ def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
531
+ try:
532
+ self.current_trace["tokens"].append(
533
+ {
534
+ "timestamp": datetime.now(),
535
+ "event": "new_token",
536
+ "token": token,
537
+ "additional_kwargs": kwargs,
538
+ }
539
+ )
540
+ except Exception as e:
541
+ self.on_error(e, context="llm_new_token")
542
+
543
+ def on_error(self, error: Exception, context: str = "", **kwargs: Any) -> None:
544
+ """Enhanced error handling with context"""
545
+ try:
546
+ error_event = {
547
+ "timestamp": datetime.now(),
548
+ "error": str(error),
549
+ "error_type": type(error).__name__,
550
+ "context": context,
551
+ "additional_kwargs": kwargs,
552
+ }
553
+ self.current_trace["errors"].append(error_event)
554
+ logger.error(f"Error in {context}: {error}")
555
+ except Exception as e:
556
+ logger.critical(f"Error in error handler: {e}")
557
+
558
+ def on_chain_error(self, error: Exception, **kwargs: Any) -> None:
559
+ self.on_error(error, context="chain", **kwargs)
560
+
561
+ def on_llm_error(self, error: Exception, **kwargs: Any) -> None:
562
+ self.on_error(error, context="llm", **kwargs)
563
+
564
+ def on_tool_error(self, error: Exception, **kwargs: Any) -> None:
565
+ self.on_error(error, context="tool", **kwargs)
566
+
567
+ def on_retriever_error(self, error: Exception, **kwargs: Any) -> None:
568
+ self.on_error(error, context="retriever", **kwargs)
@@ -1,4 +1,6 @@
1
+ from audioop import add
1
2
  import os
3
+ import uuid
2
4
  import datetime
3
5
  import logging
4
6
  import asyncio
@@ -6,6 +8,13 @@ import aiohttp
6
8
  import requests
7
9
  from contextlib import contextmanager
8
10
  from concurrent.futures import ThreadPoolExecutor
11
+ from ragaai_catalyst.tracers.langchain_callback import LangchainTracer
12
+ from ragaai_catalyst.tracers.utils.convert_langchain_callbacks_output import convert_langchain_callbacks_output
13
+
14
+ from ragaai_catalyst.tracers.utils.langchain_tracer_extraction_logic import langchain_tracer_extraction
15
+ from ragaai_catalyst.tracers.upload_traces import UploadTraces
16
+ import tempfile
17
+ import json
9
18
 
10
19
  from opentelemetry.sdk import trace as trace_sdk
11
20
  from opentelemetry.sdk.trace.export import SimpleSpanProcessor
@@ -118,6 +127,7 @@ class Tracer(AgenticTracing):
118
127
  self.timeout = 30
119
128
  self.num_projects = 100
120
129
  self.start_time = datetime.datetime.now().astimezone().isoformat()
130
+ self.model_cost_dict = load_model_costs()
121
131
 
122
132
  if update_llm_cost:
123
133
  # First update the model costs file from GitHub
@@ -152,11 +162,12 @@ class Tracer(AgenticTracing):
152
162
  raise
153
163
 
154
164
  if tracer_type == "langchain":
155
- self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
165
+ # self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
156
166
 
157
- self._tracer_provider = self._setup_provider()
158
- self._instrumentor = self._setup_instrumentor(tracer_type)
159
- self.is_instrumented = False
167
+ # self._tracer_provider = self._setup_provider()
168
+ # self._instrumentor = self._setup_instrumentor(tracer_type)
169
+ # self.is_instrumented = False
170
+ # self._upload_task = None
160
171
  self._upload_task = None
161
172
  elif tracer_type == "llamaindex":
162
173
  self._upload_task = None
@@ -239,11 +250,12 @@ class Tracer(AgenticTracing):
239
250
  def start(self):
240
251
  """Start the tracer."""
241
252
  if self.tracer_type == "langchain":
242
- if not self.is_instrumented:
243
- self._instrumentor().instrument(tracer_provider=self._tracer_provider)
244
- self.is_instrumented = True
245
- print(f"Tracer started for project: {self.project_name}")
246
- return self
253
+ # if not self.is_instrumented:
254
+ # self._instrumentor().instrument(tracer_provider=self._tracer_provider)
255
+ # self.is_instrumented = True
256
+ # print(f"Tracer started for project: {self.project_name}")
257
+ self.langchain_tracer = LangchainTracer()
258
+ return self.langchain_tracer.start()
247
259
  elif self.tracer_type == "llamaindex":
248
260
  from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
249
261
  return LlamaIndexTracer(self._pass_user_data()).start()
@@ -254,17 +266,74 @@ class Tracer(AgenticTracing):
254
266
  def stop(self):
255
267
  """Stop the tracer and initiate trace upload."""
256
268
  if self.tracer_type == "langchain":
257
- if not self.is_instrumented:
258
- logger.warning("Tracer was not started. No traces to upload.")
259
- return "No traces to upload"
260
-
261
- print("Stopping tracer and initiating trace upload...")
262
- self._cleanup()
263
- self._upload_task = self._run_async(self._upload_traces())
264
- self.is_active = False
265
- self.dataset_name = None
269
+ # if not self.is_instrumented:
270
+ # logger.warning("Tracer was not started. No traces to upload.")
271
+ # return "No traces to upload"
272
+
273
+ # print("Stopping tracer and initiating trace upload...")
274
+ # self._cleanup()
275
+ # self._upload_task = self._run_async(self._upload_traces())
276
+ # self.is_active = False
277
+ # self.dataset_name = None
278
+
279
+ # filename = f"langchain_callback_traces.json"
280
+ # filepath = os.path.join(tempfile.gettempdir(), filename)
281
+
282
+ user_detail = self._pass_user_data()
283
+ data, additional_metadata = self.langchain_tracer.stop()
284
+
285
+ # Add cost if possible
286
+ # import pdb; pdb.set_trace()
287
+ if additional_metadata['model_name']:
288
+ try:
289
+ model_cost_data = self.model_cost_dict[additional_metadata['model_name']]
290
+ prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
291
+ completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
292
+ # additional_metadata.setdefault('cost', {})["prompt_cost"] = prompt_cost
293
+ # additional_metadata.setdefault('cost', {})["completion_cost"] = completion_cost
294
+ additional_metadata.setdefault('cost', {})["total_cost"] = prompt_cost + completion_cost
295
+ except Exception as e:
296
+ logger.warning(f"Error adding cost: {e}")
297
+
298
+ # with open(filepath, 'r') as f:
299
+ # data = json.load(f)
300
+ additional_metadata["total_tokens"] = additional_metadata["tokens"]["total"]
301
+ additional_metadata["total_cost"] = additional_metadata["cost"]["total_cost"]
302
+
303
+ del additional_metadata["tokens"]
304
+ del additional_metadata["cost"]
305
+
306
+ combined_metadata = user_detail['trace_user_detail']['metadata'].copy()
307
+ combined_metadata.update(additional_metadata)
308
+ combined_metadata
309
+
310
+ langchain_traces = langchain_tracer_extraction(data)
311
+ final_result = convert_langchain_callbacks_output(langchain_traces)
312
+ final_result[0]['project_name'] = user_detail['project_name']
313
+ final_result[0]['trace_id'] = str(uuid.uuid4())
314
+ final_result[0]['session_id'] = None
315
+ final_result[0]['metadata'] = combined_metadata
316
+ final_result[0]['pipeline'] = user_detail['trace_user_detail']['pipeline']
317
+
318
+ filepath_3 = os.path.join(os.getcwd(), "final_result.json")
319
+ with open(filepath_3, 'w') as f:
320
+ json.dump(final_result, f, indent=2)
266
321
 
267
- return "Trace upload initiated. Use get_upload_status() to check the status."
322
+
323
+ print(filepath_3)
324
+
325
+ additional_metadata_keys = additional_metadata.keys() if additional_metadata else None
326
+
327
+ UploadTraces(json_file_path=filepath_3,
328
+ project_name=self.project_name,
329
+ project_id=self.project_id,
330
+ dataset_name=self.dataset_name,
331
+ user_detail=user_detail,
332
+ base_url=self.base_url
333
+ ).upload_traces(additional_metadata_keys=additional_metadata_keys)
334
+
335
+ return
336
+
268
337
  elif self.tracer_type == "llamaindex":
269
338
  from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
270
339
  return LlamaIndexTracer(self._pass_user_data()).stop()
@@ -20,7 +20,7 @@ class UploadTraces:
20
20
  self.base_url = base_url
21
21
  self.timeout = 10
22
22
 
23
- def _create_dataset_schema_with_trace(self):
23
+ def _create_dataset_schema_with_trace(self, additional_metadata_keys=None, additional_pipeline_keys=None):
24
24
  SCHEMA_MAPPING_NEW = {
25
25
  "trace_id": {"columnType": "traceId"},
26
26
  "trace_uri": {"columnType": "traceUri"},
@@ -34,6 +34,15 @@ class UploadTraces:
34
34
  "vector_store":{"columnType":"pipeline"},
35
35
  "feedback": {"columnType":"feedBack"}
36
36
  }
37
+
38
+ if additional_metadata_keys:
39
+ for key in additional_metadata_keys:
40
+ SCHEMA_MAPPING_NEW[key] = {"columnType": "metadata"}
41
+
42
+ if additional_pipeline_keys:
43
+ for key in additional_pipeline_keys:
44
+ SCHEMA_MAPPING_NEW[key] = {"columnType": "pipeline"}
45
+
37
46
  def make_request():
38
47
  headers = {
39
48
  "Content-Type": "application/json",
@@ -119,9 +128,14 @@ class UploadTraces:
119
128
  data=payload,
120
129
  timeout=self.timeout)
121
130
 
122
- def upload_traces(self):
123
- self._create_dataset_schema_with_trace()
124
- presignedUrl = self._get_presigned_url()
125
- self._put_presigned_url(presignedUrl, self.json_file_path)
126
- self._insert_traces(presignedUrl)
127
- print("Traces uploaded")
131
+ def upload_traces(self, additional_metadata_keys=None, additional_pipeline_keys=None):
132
+ try:
133
+ self._create_dataset_schema_with_trace(additional_metadata_keys, additional_pipeline_keys)
134
+ presignedUrl = self._get_presigned_url()
135
+ if presignedUrl is None:
136
+ return
137
+ self._put_presigned_url(presignedUrl, self.json_file_path)
138
+ self._insert_traces(presignedUrl)
139
+ print("Traces uploaded")
140
+ except Exception as e:
141
+ print(f"Error while uploading agentic traces: {e}")
@@ -0,0 +1,61 @@
1
+ import json
2
+
3
+ def convert_langchain_callbacks_output(result, project_name="", metadata="", pipeline=""):
4
+ initial_struc = [{
5
+ "project_name": project_name,
6
+ "trace_id": "NA",
7
+ "session_id": "NA",
8
+ "metadata" : metadata,
9
+ "pipeline" : pipeline,
10
+ "traces" : []
11
+ }]
12
+ traces_data = []
13
+
14
+ prompt = result["data"]["prompt"]
15
+ response = result["data"]["response"]
16
+ context = result["data"]["context"]
17
+ final_prompt = ""
18
+
19
+ prompt_structured_data = {
20
+ "traceloop.entity.input": json.dumps({
21
+ "kwargs": {
22
+ "input": prompt,
23
+ }
24
+ })
25
+ }
26
+ prompt_data = {
27
+ "name": "retrieve_documents.langchain.workflow",
28
+ "attributes": prompt_structured_data,
29
+ }
30
+
31
+ traces_data.append(prompt_data)
32
+
33
+ context_structured_data = {
34
+ "traceloop.entity.input": json.dumps({
35
+ "kwargs": {
36
+ "context": context
37
+ }
38
+ }),
39
+ "traceloop.entity.output": json.dumps({
40
+ "kwargs": {
41
+ "text": prompt
42
+ }
43
+ })
44
+ }
45
+ context_data = {
46
+ "name": "PromptTemplate.langchain.task",
47
+ "attributes": context_structured_data,
48
+ }
49
+ traces_data.append(context_data)
50
+
51
+ response_structured_data = {"gen_ai.completion.0.content": response,
52
+ "gen_ai.prompt.0.content": prompt}
53
+ response_data = {
54
+ "name": "ChatOpenAI.langchain.task",
55
+ "attributes" : response_structured_data
56
+ }
57
+ traces_data.append(response_data)
58
+
59
+ initial_struc[0]["traces"] = traces_data
60
+
61
+ return initial_struc
@@ -0,0 +1,81 @@
1
+ import json
2
+ import uuid
3
+
4
+ def langchain_tracer_extraction(data):
5
+ trace_aggregate = {}
6
+ import uuid
7
+
8
+ def generate_trace_id():
9
+ """
10
+ Generate a random trace ID using UUID4.
11
+ Returns a string representation of the UUID with no hyphens.
12
+ """
13
+ return '0x'+str(uuid.uuid4()).replace('-', '')
14
+
15
+ trace_aggregate["tracer_type"] = "langchain"
16
+ trace_aggregate['trace_id'] = generate_trace_id()
17
+ trace_aggregate['session_id'] = None
18
+ trace_aggregate["pipeline"] = {
19
+ 'llm_model': 'gpt-3.5-turbo',
20
+ 'vector_store': 'faiss',
21
+ 'embed_model': 'text-embedding-ada-002'
22
+ }
23
+ trace_aggregate["metadata"] = {
24
+ 'key1': 'value1',
25
+ 'key2': 'value2',
26
+ 'log_source': 'langchain_tracer',
27
+ 'recorded_on': '2024-06-14 08:57:27.324410'
28
+ }
29
+ trace_aggregate["prompt_length"] = 0
30
+ trace_aggregate["data"] = {}
31
+
32
+ def get_prompt(data):
33
+ # if "chain_starts" in data and data["chain_starts"] != []:
34
+ # for item in data["chain_starts"]:
35
+
36
+ if "chat_model_calls" in data and data["chat_model_calls"] != []:
37
+ for item in data["chat_model_calls"]:
38
+ messages = item["messages"][0]
39
+ for message in messages:
40
+ if message["type"]=="human":
41
+ human_messages = message["content"].strip()
42
+ return human_messages
43
+ if "llm_calls" in data and data["llm_calls"] != []:
44
+ if "llm_start" in data["llm_calls"][0]["event"]:
45
+ for item in data["llm_calls"]:
46
+ prompt = item["prompts"]
47
+ return prompt[0].strip()
48
+
49
+ def get_response(data):
50
+ for item in data["llm_calls"]:
51
+ if item["event"] == "llm_end":
52
+ # import pdb; pdb.set_trace()
53
+ llm_end_responses = item["response"]["generations"][0]
54
+ for llm_end_response in llm_end_responses:
55
+ response = llm_end_response["text"]
56
+ return response.strip()
57
+
58
+ def get_context(data):
59
+ if "retriever_actions" in data and data["retriever_actions"] != []:
60
+ for item in data["retriever_actions"]:
61
+ if item["event"] == "retriever_end":
62
+ context = item["documents"][0]["page_content"].replace('\n', ' ')
63
+ return context
64
+ if "chat_model_calls" in data and data["chat_model_calls"] != []:
65
+ for item in data["chat_model_calls"]:
66
+ messages = item["messages"][0]
67
+ for message in messages:
68
+ if message["type"]=="system":
69
+ content = message["content"].strip().replace('\n', ' ')
70
+ return content
71
+
72
+
73
+ prompt = get_prompt(data)
74
+ response = get_response(data)
75
+ context = get_context(data)
76
+
77
+ trace_aggregate["data"]["prompt"]=prompt
78
+ trace_aggregate["data"]["response"]=response
79
+ trace_aggregate["data"]["context"]=context
80
+
81
+ return trace_aggregate
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ragaai_catalyst
3
- Version: 2.1.5b2
3
+ Version: 2.1.5b4
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
6
6
  Requires-Python: <3.13,>=3.9
@@ -8,14 +8,15 @@ ragaai_catalyst/guardrails_manager.py,sha256=DILMOAASK57FH9BLq_8yC1AQzRJ8McMFLwC
8
8
  ragaai_catalyst/internal_api_completion.py,sha256=DdICI5yfEudiOAIC8L4oxH0Qz7kX-BZCdo9IWsi2gNo,2965
9
9
  ragaai_catalyst/prompt_manager.py,sha256=W8ypramzOprrJ7-22d5vkBXIuIQ8v9XAzKDGxKsTK28,16550
10
10
  ragaai_catalyst/proxy_call.py,sha256=CHxldeceZUaLU-to_hs_Kf1z_b2vHMssLS_cOBedu78,5499
11
- ragaai_catalyst/ragaai_catalyst.py,sha256=FdqMzwuQLqS2-3JJDsTQ8uh2itllOxfPrRUjb8Kwmn0,17428
11
+ ragaai_catalyst/ragaai_catalyst.py,sha256=5nVg3_-lcvhrXjNkPTeGhe3tdUjm_4ZIctOcqWXBkRA,17939
12
12
  ragaai_catalyst/synthetic_data_generation.py,sha256=uDV9tNwto2xSkWg5XHXUvjErW-4P34CTrxaJpRfezyA,19250
13
13
  ragaai_catalyst/utils.py,sha256=TlhEFwLyRU690HvANbyoRycR3nQ67lxVUQoUOfTPYQ0,3772
14
14
  ragaai_catalyst/tracers/__init__.py,sha256=LfgTes-nHpazssbGKnn8kyLZNr49kIPrlkrqqoTFTfc,301
15
15
  ragaai_catalyst/tracers/distributed.py,sha256=AIRvS5Ur4jbFDXsUkYuCTmtGoHHx3LOG4n5tWOh610U,10330
16
+ ragaai_catalyst/tracers/langchain_callback.py,sha256=LvMBhgvAX8ftyBQ9Naeui46EoDa2nHQZq48Ra6nL-Qg,21991
16
17
  ragaai_catalyst/tracers/llamaindex_callback.py,sha256=ZY0BJrrlz-P9Mg2dX-ZkVKG3gSvzwqBtk7JL_05MiYA,14028
17
- ragaai_catalyst/tracers/tracer.py,sha256=S_ANRm5zSMvQiUyQTRwyUepFci_T3AN26wAOXoURfyc,15648
18
- ragaai_catalyst/tracers/upload_traces.py,sha256=mT5rverNUL5Rcal9VR5_c75wHBAUrm2pvYetTZqP3ok,4796
18
+ ragaai_catalyst/tracers/tracer.py,sha256=k2HjH6ONaabbPvoX6xJRck-A2l-9GVW7Nueimuu-Ua8,19096
19
+ ragaai_catalyst/tracers/upload_traces.py,sha256=2TWdRTN6FMaX-dqDv8BJWQS0xrCGYKkXEYOi2kK3Z3Y,5487
19
20
  ragaai_catalyst/tracers/agentic_tracing/README.md,sha256=X4QwLb7-Jg7GQMIXj-SerZIgDETfw-7VgYlczOR8ZeQ,4508
20
21
  ragaai_catalyst/tracers/agentic_tracing/__init__.py,sha256=yf6SKvOPSpH-9LiKaoLKXwqj5sez8F_5wkOb91yp0oE,260
21
22
  ragaai_catalyst/tracers/agentic_tracing/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -31,7 +32,7 @@ ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py,sha256=--wvhOJ-J
31
32
  ragaai_catalyst/tracers/agentic_tracing/tracers/base.py,sha256=88rX7OkOGEyVNECUrc4bYqODyulXve_-99d9ku5hBeQ,37373
32
33
  ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py,sha256=l3x3uFO5ov93I7UUrUX1M06WVGy2ug2jEZ1G7o315z4,13075
33
34
  ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py,sha256=91aWXJGb3GDfyDfJyA7Irnk3XSyfkQaQppW_NMORGJQ,31725
35
+ ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py,sha256=s6BRoBteCRF8XrXGnmZ98ZWPrSONC5RObPXNaq-im3w,31782
35
36
  ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py,sha256=6hsg-Yw11v4qeELI1CWrdX8BXf-wJrTF5smBI5prgoo,15873
36
37
  ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py,sha256=m8CxYkl7iMiFya_lNwN1ykBc3Pmo-2pR_2HmpptwHWQ,10352
37
38
  ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py,sha256=4rWL7fIJE5wN0nwh6fMWyh3OrrenZHJkNzyQXikyzQI,13771
@@ -61,9 +62,11 @@ ragaai_catalyst/tracers/instrumentators/langchain.py,sha256=yMN0qVF0pUVk6R5M1vJo
61
62
  ragaai_catalyst/tracers/instrumentators/llamaindex.py,sha256=SMrRlR4xM7k9HK43hakE8rkrWHxMlmtmWD-AX6TeByc,416
62
63
  ragaai_catalyst/tracers/instrumentators/openai.py,sha256=14R4KW9wQCR1xysLfsP_nxS7cqXrTPoD8En4MBAaZUU,379
63
64
  ragaai_catalyst/tracers/utils/__init__.py,sha256=KeMaZtYaTojilpLv65qH08QmpYclfpacDA0U3wg6Ybw,64
65
+ ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py,sha256=ofrNrxf2b1hpjDh_zeaxiYq86azn1MF3kW8-ViYPEg0,1641
66
+ ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py,sha256=cghjCuUe8w-2MZdh9xgtRGe3y219u26GGzpnuY4Wt6Q,3047
64
67
  ragaai_catalyst/tracers/utils/utils.py,sha256=ViygfJ7vZ7U0CTSA1lbxVloHp4NSlmfDzBRNCJuMhis,2374
65
- ragaai_catalyst-2.1.5b2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- ragaai_catalyst-2.1.5b2.dist-info/METADATA,sha256=OtU5W4jpT4K2GVF82w9jYeaVglZOWOIwpPfXJNN_SmM,12764
67
- ragaai_catalyst-2.1.5b2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
68
- ragaai_catalyst-2.1.5b2.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
69
- ragaai_catalyst-2.1.5b2.dist-info/RECORD,,
68
+ ragaai_catalyst-2.1.5b4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ ragaai_catalyst-2.1.5b4.dist-info/METADATA,sha256=9KXrmDoxY6TV9BL3TXWha0rm4n4wXJy03sTW7dc4ZRU,12764
70
+ ragaai_catalyst-2.1.5b4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
71
+ ragaai_catalyst-2.1.5b4.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
72
+ ragaai_catalyst-2.1.5b4.dist-info/RECORD,,