ragaai-catalyst 2.1.5b6__py3-none-any.whl → 2.1.5b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +0 -2
- ragaai_catalyst/tracers/langchain_callback.py +185 -46
- ragaai_catalyst/tracers/tracer.py +453 -0
- {ragaai_catalyst-2.1.5b6.dist-info → ragaai_catalyst-2.1.5b8.dist-info}/METADATA +1 -1
- {ragaai_catalyst-2.1.5b6.dist-info → ragaai_catalyst-2.1.5b8.dist-info}/RECORD +8 -7
- {ragaai_catalyst-2.1.5b6.dist-info → ragaai_catalyst-2.1.5b8.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.5b6.dist-info → ragaai_catalyst-2.1.5b8.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.5b6.dist-info → ragaai_catalyst-2.1.5b8.dist-info}/top_level.txt +0 -0
@@ -150,8 +150,6 @@ class LLMTracerMixin:
|
|
150
150
|
beta_module = openai_module.beta
|
151
151
|
|
152
152
|
# Patch openai.beta.threads
|
153
|
-
import openai
|
154
|
-
openai.api_type = "openai"
|
155
153
|
if hasattr(beta_module, "threads"):
|
156
154
|
threads_obj = beta_module.threads
|
157
155
|
# Patch top-level methods on openai.beta.threads
|
@@ -48,8 +48,9 @@ class LangchainTracer(BaseCallbackHandler):
|
|
48
48
|
self._original_methods = {}
|
49
49
|
self.additional_metadata = {}
|
50
50
|
self._save_task = None
|
51
|
-
self._current_query = None
|
51
|
+
self._current_query = None
|
52
52
|
self.filepath = None
|
53
|
+
self.model_names = {} # Store model names by component instance
|
53
54
|
logger.setLevel(log_level)
|
54
55
|
|
55
56
|
if not os.path.exists(output_path):
|
@@ -86,7 +87,7 @@ class LangchainTracer(BaseCallbackHandler):
|
|
86
87
|
"retriever_actions": [],
|
87
88
|
"tokens": [],
|
88
89
|
"errors": [],
|
89
|
-
"query": self._current_query,
|
90
|
+
"query": self._current_query,
|
90
91
|
"metadata": {
|
91
92
|
"version": "2.0",
|
92
93
|
"trace_all": self.trace_all,
|
@@ -172,6 +173,14 @@ class LangchainTracer(BaseCallbackHandler):
|
|
172
173
|
kwargs_copy['callbacks'] = [self]
|
173
174
|
elif self not in kwargs_copy['callbacks']:
|
174
175
|
kwargs_copy['callbacks'].append(self)
|
176
|
+
|
177
|
+
# Store model name if available
|
178
|
+
if component_name in ["OpenAI", "ChatOpenAI_LangchainOpenAI", "ChatOpenAI_ChatModels",
|
179
|
+
"ChatVertexAI", "ChatGoogleGenerativeAI", "ChatAnthropic", "ChatLiteLLM"]:
|
180
|
+
instance = args[0] if args else None
|
181
|
+
model_name = kwargs.get('model_name') or kwargs.get('model')
|
182
|
+
if instance and model_name:
|
183
|
+
self.model_names[id(instance)] = model_name
|
175
184
|
|
176
185
|
# Try different method signatures
|
177
186
|
try:
|
@@ -201,28 +210,56 @@ class LangchainTracer(BaseCallbackHandler):
|
|
201
210
|
|
202
211
|
def _monkey_patch(self):
|
203
212
|
"""Enhanced monkey-patching with comprehensive component support"""
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
"
|
223
|
-
|
224
|
-
|
225
|
-
|
213
|
+
components_to_patch = {}
|
214
|
+
|
215
|
+
try:
|
216
|
+
from langchain.llms import OpenAI
|
217
|
+
components_to_patch["OpenAI"] = (OpenAI, "__init__")
|
218
|
+
except ImportError:
|
219
|
+
logger.debug("OpenAI not available for patching")
|
220
|
+
|
221
|
+
try:
|
222
|
+
from langchain_google_vertexai import ChatVertexAI
|
223
|
+
components_to_patch["ChatVertexAI"] = (ChatVertexAI, "__init__")
|
224
|
+
except ImportError:
|
225
|
+
logger.debug("ChatVertexAI not available for patching")
|
226
|
+
|
227
|
+
try:
|
228
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
229
|
+
components_to_patch["ChatGoogleGenerativeAI"] = (ChatGoogleGenerativeAI, "__init__")
|
230
|
+
except ImportError:
|
231
|
+
logger.debug("ChatGoogleGenerativeAI not available for patching")
|
232
|
+
|
233
|
+
try:
|
234
|
+
from langchain_anthropic import ChatAnthropic
|
235
|
+
components_to_patch["ChatAnthropic"] = (ChatAnthropic, "__init__")
|
236
|
+
except ImportError:
|
237
|
+
logger.debug("ChatAnthropic not available for patching")
|
238
|
+
|
239
|
+
try:
|
240
|
+
from langchain_community.chat_models import ChatLiteLLM
|
241
|
+
components_to_patch["ChatLiteLLM"] = (ChatLiteLLM, "__init__")
|
242
|
+
except ImportError:
|
243
|
+
logger.debug("ChatLiteLLM not available for patching")
|
244
|
+
|
245
|
+
try:
|
246
|
+
from langchain_openai import ChatOpenAI as ChatOpenAI_LangchainOpenAI
|
247
|
+
components_to_patch["ChatOpenAI_LangchainOpenAI"] = (ChatOpenAI_LangchainOpenAI, "__init__")
|
248
|
+
except ImportError:
|
249
|
+
logger.debug("ChatOpenAI_LangchainOpenAI not available for patching")
|
250
|
+
|
251
|
+
try:
|
252
|
+
from langchain.chat_models import ChatOpenAI as ChatOpenAI_ChatModels
|
253
|
+
components_to_patch["ChatOpenAI_ChatModels"] = (ChatOpenAI_ChatModels, "__init__")
|
254
|
+
except ImportError:
|
255
|
+
logger.debug("ChatOpenAI_ChatModels not available for patching")
|
256
|
+
|
257
|
+
try:
|
258
|
+
from langchain.chains import create_retrieval_chain, RetrievalQA
|
259
|
+
components_to_patch["RetrievalQA"] = (RetrievalQA, "from_chain_type")
|
260
|
+
components_to_patch["create_retrieval_chain"] = (create_retrieval_chain, None)
|
261
|
+
except ImportError:
|
262
|
+
logger.debug("Langchain chains not available for patching")
|
226
263
|
|
227
264
|
for name, (component, method_name) in components_to_patch.items():
|
228
265
|
try:
|
@@ -249,21 +286,45 @@ class LangchainTracer(BaseCallbackHandler):
|
|
249
286
|
|
250
287
|
def _restore_original_methods(self):
|
251
288
|
"""Restore all original methods and functions with enhanced error handling"""
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
289
|
+
# Dynamically import only what we need based on what was patched
|
290
|
+
imported_components = {}
|
291
|
+
|
292
|
+
if self._original_inits or self._original_methods:
|
293
|
+
for name in list(self._original_inits.keys()) + list(self._original_methods.keys()):
|
294
|
+
try:
|
295
|
+
if name == "OpenAI":
|
296
|
+
from langchain.llms import OpenAI
|
297
|
+
imported_components[name] = OpenAI
|
298
|
+
elif name == "ChatVertexAI":
|
299
|
+
from langchain_google_vertexai import ChatVertexAI
|
300
|
+
imported_components[name] = ChatVertexAI
|
301
|
+
elif name == "ChatGoogleGenerativeAI":
|
302
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
303
|
+
imported_components[name] = ChatGoogleGenerativeAI
|
304
|
+
elif name == "ChatAnthropic":
|
305
|
+
from langchain_anthropic import ChatAnthropic
|
306
|
+
imported_components[name] = ChatAnthropic
|
307
|
+
elif name == "ChatLiteLLM":
|
308
|
+
from langchain_community.chat_models import ChatLiteLLM
|
309
|
+
imported_components[name] = ChatLiteLLM
|
310
|
+
elif name == "ChatOpenAI_LangchainOpenAI":
|
311
|
+
from langchain_openai import ChatOpenAI as ChatOpenAI_LangchainOpenAI
|
312
|
+
imported_components[name] = ChatOpenAI_LangchainOpenAI
|
313
|
+
elif name == "ChatOpenAI_ChatModels":
|
314
|
+
from langchain.chat_models import ChatOpenAI as ChatOpenAI_ChatModels
|
315
|
+
imported_components[name] = ChatOpenAI_ChatModels
|
316
|
+
elif name in ["RetrievalQA", "create_retrieval_chain"]:
|
317
|
+
from langchain.chains import create_retrieval_chain, RetrievalQA
|
318
|
+
imported_components["RetrievalQA"] = RetrievalQA
|
319
|
+
imported_components["create_retrieval_chain"] = create_retrieval_chain
|
320
|
+
except ImportError:
|
321
|
+
logger.debug(f"{name} not available for restoration")
|
262
322
|
|
263
323
|
for name, original in self._original_inits.items():
|
264
324
|
try:
|
265
|
-
|
266
|
-
|
325
|
+
if name in imported_components:
|
326
|
+
component = imported_components[name]
|
327
|
+
component.__init__ = original
|
267
328
|
except Exception as e:
|
268
329
|
logger.error(f"Error restoring {name}: {e}")
|
269
330
|
self.on_error(e, context=f"restore_{name}")
|
@@ -272,10 +333,12 @@ class LangchainTracer(BaseCallbackHandler):
|
|
272
333
|
try:
|
273
334
|
if "." in name:
|
274
335
|
module_name, method_name = name.rsplit(".", 1)
|
275
|
-
|
276
|
-
|
336
|
+
if module_name in imported_components:
|
337
|
+
module = imported_components[module_name]
|
338
|
+
setattr(module, method_name, original)
|
277
339
|
else:
|
278
|
-
|
340
|
+
if name in imported_components:
|
341
|
+
globals()[name] = original
|
279
342
|
except Exception as e:
|
280
343
|
logger.error(f"Error restoring {name}: {e}")
|
281
344
|
self.on_error(e, context=f"restore_{name}")
|
@@ -359,16 +422,92 @@ class LangchainTracer(BaseCallbackHandler):
|
|
359
422
|
}
|
360
423
|
)
|
361
424
|
|
425
|
+
# Calculate latency
|
362
426
|
end_time = datetime.now()
|
363
|
-
|
427
|
+
latency = (end_time - self.current_trace["start_time"]).total_seconds()
|
364
428
|
|
429
|
+
# Check if values are there in llm_output
|
430
|
+
model = ""
|
431
|
+
prompt_tokens = 0
|
432
|
+
completion_tokens = 0
|
433
|
+
total_tokens = 0
|
434
|
+
|
435
|
+
# Try to get model name from llm_output first
|
365
436
|
if response and response.llm_output:
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
437
|
+
try:
|
438
|
+
model = response.llm_output.get("model_name")
|
439
|
+
if not model:
|
440
|
+
model = response.llm_output.get("model", "")
|
441
|
+
except Exception as e:
|
442
|
+
# logger.debug(f"Error getting model name: {e}")
|
443
|
+
model = ""
|
444
|
+
|
445
|
+
# Add model name
|
446
|
+
if not model:
|
447
|
+
try:
|
448
|
+
model = response.llm_output.get("model_name")
|
449
|
+
if not model:
|
450
|
+
model = response.llm_output.get("model", "")
|
451
|
+
except Exception as e:
|
452
|
+
# logger.debug(f"Error getting model name: {e}")
|
453
|
+
model = ""
|
454
|
+
|
455
|
+
|
456
|
+
# Add token usage
|
457
|
+
try:
|
458
|
+
token_usage = response.llm_output.get("token_usage", {})
|
459
|
+
if token_usage=={}:
|
460
|
+
try:
|
461
|
+
token_usage = response.llm_output.get("usage")
|
462
|
+
except Exception as e:
|
463
|
+
# logger.debug(f"Error getting token usage: {e}")
|
464
|
+
token_usage = {}
|
465
|
+
|
466
|
+
if token_usage !={}:
|
467
|
+
prompt_tokens = token_usage.get("prompt_tokens", 0)
|
468
|
+
if prompt_tokens==0:
|
469
|
+
prompt_tokens = token_usage.get("input_tokens", 0)
|
470
|
+
completion_tokens = token_usage.get("completion_tokens", 0)
|
471
|
+
if completion_tokens==0:
|
472
|
+
completion_tokens = token_usage.get("output_tokens", 0)
|
473
|
+
|
474
|
+
total_tokens = prompt_tokens + completion_tokens
|
475
|
+
except Exception as e:
|
476
|
+
# logger.debug(f"Error getting token usage: {e}")
|
477
|
+
prompt_tokens = 0
|
478
|
+
completion_tokens = 0
|
479
|
+
total_tokens = 0
|
480
|
+
|
481
|
+
# Check if values are there in
|
482
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
483
|
+
try:
|
484
|
+
usage_data = response.generations[0][0].message.usage_metadata
|
485
|
+
prompt_tokens = usage_data.get("input_tokens", 0)
|
486
|
+
completion_tokens = usage_data.get("output_tokens", 0)
|
487
|
+
total_tokens = prompt_tokens + completion_tokens
|
488
|
+
except Exception as e:
|
489
|
+
# logger.debug(f"Error getting usage data: {e}")
|
490
|
+
prompt_tokens = 0
|
491
|
+
completion_tokens = 0
|
492
|
+
total_tokens = 0
|
493
|
+
|
494
|
+
# If no model name in llm_output, try to get it from stored model names
|
495
|
+
try:
|
496
|
+
if model == "":
|
497
|
+
model = list(self.model_names.values())[0]
|
498
|
+
except Exception as e:
|
499
|
+
model=""
|
500
|
+
|
501
|
+
self.additional_metadata = {
|
502
|
+
'latency': latency,
|
503
|
+
'model_name': model,
|
504
|
+
'tokens': {
|
505
|
+
'prompt': prompt_tokens,
|
506
|
+
'completion': completion_tokens,
|
507
|
+
'total': total_tokens
|
508
|
+
}
|
509
|
+
}
|
510
|
+
|
372
511
|
except Exception as e:
|
373
512
|
self.on_error(e, context="llm_end")
|
374
513
|
|
@@ -0,0 +1,453 @@
|
|
1
|
+
from audioop import add
|
2
|
+
import os
|
3
|
+
import uuid
|
4
|
+
import datetime
|
5
|
+
import logging
|
6
|
+
import asyncio
|
7
|
+
import aiohttp
|
8
|
+
import requests
|
9
|
+
from contextlib import contextmanager
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
11
|
+
from ragaai_catalyst.tracers.langchain_callback import LangchainTracer
|
12
|
+
from ragaai_catalyst.tracers.utils.convert_langchain_callbacks_output import convert_langchain_callbacks_output
|
13
|
+
|
14
|
+
from ragaai_catalyst.tracers.utils.langchain_tracer_extraction_logic import langchain_tracer_extraction
|
15
|
+
from ragaai_catalyst.tracers.upload_traces import UploadTraces
|
16
|
+
import tempfile
|
17
|
+
import json
|
18
|
+
|
19
|
+
from opentelemetry.sdk import trace as trace_sdk
|
20
|
+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
21
|
+
from ragaai_catalyst.tracers.exporters.file_span_exporter import FileSpanExporter
|
22
|
+
from ragaai_catalyst.tracers.exporters.raga_exporter import RagaExporter
|
23
|
+
from ragaai_catalyst.tracers.instrumentators import (
|
24
|
+
LangchainInstrumentor,
|
25
|
+
OpenAIInstrumentor,
|
26
|
+
LlamaIndexInstrumentor,
|
27
|
+
)
|
28
|
+
from ragaai_catalyst.tracers.utils import get_unique_key
|
29
|
+
# from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
30
|
+
from ragaai_catalyst import RagaAICatalyst
|
31
|
+
from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing, TrackName
|
32
|
+
from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
|
33
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import load_model_costs, update_model_costs_from_github
|
34
|
+
|
35
|
+
logger = logging.getLogger(__name__)
|
36
|
+
|
37
|
+
class Tracer(AgenticTracing):
|
38
|
+
NUM_PROJECTS = 100
|
39
|
+
TIMEOUT = 10
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
project_name,
|
43
|
+
dataset_name,
|
44
|
+
trace_name=None,
|
45
|
+
tracer_type=None,
|
46
|
+
pipeline=None,
|
47
|
+
metadata=None,
|
48
|
+
description=None,
|
49
|
+
upload_timeout=30, # Default timeout of 30 seconds
|
50
|
+
update_llm_cost=True, # Parameter to control model cost updates
|
51
|
+
auto_instrumentation={ # to control automatic instrumentation of different components
|
52
|
+
'llm':True,
|
53
|
+
'tool':True,
|
54
|
+
'agent':True,
|
55
|
+
'user_interaction':True,
|
56
|
+
'file_io':True,
|
57
|
+
'network':True,
|
58
|
+
'custom':True
|
59
|
+
},
|
60
|
+
interval_time=2,
|
61
|
+
# auto_instrumentation=True/False # to control automatic instrumentation of everything
|
62
|
+
|
63
|
+
):
|
64
|
+
"""
|
65
|
+
Initializes a Tracer object.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
project_name (str): The name of the project.
|
69
|
+
dataset_name (str): The name of the dataset.
|
70
|
+
tracer_type (str, optional): The type of tracer. Defaults to None.
|
71
|
+
pipeline (dict, optional): The pipeline configuration. Defaults to None.
|
72
|
+
metadata (dict, optional): The metadata. Defaults to None.
|
73
|
+
description (str, optional): The description. Defaults to None.
|
74
|
+
upload_timeout (int, optional): The upload timeout in seconds. Defaults to 30.
|
75
|
+
update_llm_cost (bool, optional): Whether to update model costs from GitHub. Defaults to True.
|
76
|
+
"""
|
77
|
+
|
78
|
+
user_detail = {
|
79
|
+
"project_name": project_name,
|
80
|
+
"project_id": None, # Will be set after project validation
|
81
|
+
"dataset_name": dataset_name,
|
82
|
+
"interval_time": interval_time,
|
83
|
+
"trace_name": trace_name if trace_name else f"trace_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
|
84
|
+
"trace_user_detail": {"metadata": metadata} if metadata else {}
|
85
|
+
}
|
86
|
+
|
87
|
+
# take care of auto_instrumentation
|
88
|
+
if isinstance(auto_instrumentation, bool):
|
89
|
+
if auto_instrumentation:
|
90
|
+
auto_instrumentation = {
|
91
|
+
"llm": True,
|
92
|
+
"tool": True,
|
93
|
+
"agent": True,
|
94
|
+
"user_interaction": True,
|
95
|
+
"file_io": True,
|
96
|
+
"network": True,
|
97
|
+
"custom": True
|
98
|
+
}
|
99
|
+
else:
|
100
|
+
auto_instrumentation = {
|
101
|
+
"llm": False,
|
102
|
+
"tool": False,
|
103
|
+
"agent": False,
|
104
|
+
"user_interaction": False,
|
105
|
+
"file_io": False,
|
106
|
+
"network": False,
|
107
|
+
"custom": False
|
108
|
+
}
|
109
|
+
elif isinstance(auto_instrumentation, dict):
|
110
|
+
auto_instrumentation = {k: v for k, v in auto_instrumentation.items()}
|
111
|
+
for key in ["llm", "tool", "agent", "user_interaction", "file_io", "network", "custom"]:
|
112
|
+
if key not in auto_instrumentation:
|
113
|
+
auto_instrumentation[key] = True
|
114
|
+
|
115
|
+
super().__init__(user_detail=user_detail, auto_instrumentation=auto_instrumentation)
|
116
|
+
|
117
|
+
self.project_name = project_name
|
118
|
+
self.dataset_name = dataset_name
|
119
|
+
self.tracer_type = tracer_type
|
120
|
+
self.metadata = self._improve_metadata(metadata, tracer_type)
|
121
|
+
# self.metadata["total_cost"] = 0.0
|
122
|
+
# self.metadata["total_tokens"] = 0
|
123
|
+
self.pipeline = pipeline
|
124
|
+
self.description = description
|
125
|
+
self.upload_timeout = upload_timeout
|
126
|
+
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
127
|
+
self.timeout = 30
|
128
|
+
self.num_projects = 100
|
129
|
+
self.start_time = datetime.datetime.now().astimezone().isoformat()
|
130
|
+
self.model_cost_dict = load_model_costs()
|
131
|
+
|
132
|
+
if update_llm_cost:
|
133
|
+
# First update the model costs file from GitHub
|
134
|
+
update_model_costs_from_github()
|
135
|
+
|
136
|
+
try:
|
137
|
+
response = requests.get(
|
138
|
+
f"{self.base_url}/v2/llm/projects?size={self.num_projects}",
|
139
|
+
headers={
|
140
|
+
"Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
|
141
|
+
},
|
142
|
+
timeout=self.timeout,
|
143
|
+
)
|
144
|
+
response.raise_for_status()
|
145
|
+
logger.debug("Projects list retrieved successfully")
|
146
|
+
|
147
|
+
project_list = [
|
148
|
+
project["name"] for project in response.json()["data"]["content"]
|
149
|
+
]
|
150
|
+
if project_name not in project_list:
|
151
|
+
raise ValueError("Project not found. Please enter a valid project name")
|
152
|
+
|
153
|
+
self.project_id = [
|
154
|
+
project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
|
155
|
+
][0]
|
156
|
+
# super().__init__(user_detail=self._pass_user_data())
|
157
|
+
# self.file_tracker = TrackName()
|
158
|
+
self._pass_user_data()
|
159
|
+
|
160
|
+
except requests.exceptions.RequestException as e:
|
161
|
+
logger.error(f"Failed to retrieve projects list: {e}")
|
162
|
+
raise
|
163
|
+
|
164
|
+
if tracer_type == "langchain":
|
165
|
+
# self.raga_client = RagaExporter(project_name=self.project_name, dataset_name=self.dataset_name)
|
166
|
+
|
167
|
+
# self._tracer_provider = self._setup_provider()
|
168
|
+
# self._instrumentor = self._setup_instrumentor(tracer_type)
|
169
|
+
# self.is_instrumented = False
|
170
|
+
# self._upload_task = None
|
171
|
+
self._upload_task = None
|
172
|
+
elif tracer_type == "llamaindex":
|
173
|
+
self._upload_task = None
|
174
|
+
from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
175
|
+
|
176
|
+
else:
|
177
|
+
self._upload_task = None
|
178
|
+
# raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
|
179
|
+
|
180
|
+
|
181
|
+
def set_dataset_name(self, dataset_name):
|
182
|
+
"""
|
183
|
+
Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
dataset_name (str): The new dataset name to set
|
187
|
+
"""
|
188
|
+
# Store current parameters
|
189
|
+
current_params = {
|
190
|
+
'project_name': self.project_name,
|
191
|
+
'tracer_type': self.tracer_type,
|
192
|
+
'pipeline': self.pipeline,
|
193
|
+
'metadata': self.metadata,
|
194
|
+
'description': self.description,
|
195
|
+
'upload_timeout': self.upload_timeout
|
196
|
+
}
|
197
|
+
|
198
|
+
# Reinitialize self with new dataset_name and stored parameters
|
199
|
+
self.__init__(
|
200
|
+
dataset_name=dataset_name,
|
201
|
+
**current_params
|
202
|
+
)
|
203
|
+
|
204
|
+
def _improve_metadata(self, metadata, tracer_type):
|
205
|
+
if metadata is None:
|
206
|
+
metadata = {}
|
207
|
+
metadata.setdefault("log_source", f"{tracer_type}_tracer")
|
208
|
+
metadata.setdefault("recorded_on", str(datetime.datetime.now()))
|
209
|
+
return metadata
|
210
|
+
|
211
|
+
def _add_unique_key(self, data, key_name):
|
212
|
+
data[key_name] = get_unique_key(data)
|
213
|
+
return data
|
214
|
+
|
215
|
+
def _setup_provider(self):
|
216
|
+
self.filespanx = FileSpanExporter(
|
217
|
+
project_name=self.project_name,
|
218
|
+
metadata=self.metadata,
|
219
|
+
pipeline=self.pipeline,
|
220
|
+
raga_client=self.raga_client,
|
221
|
+
)
|
222
|
+
tracer_provider = trace_sdk.TracerProvider()
|
223
|
+
tracer_provider.add_span_processor(SimpleSpanProcessor(self.filespanx))
|
224
|
+
return tracer_provider
|
225
|
+
|
226
|
+
def _setup_instrumentor(self, tracer_type):
|
227
|
+
instrumentors = {
|
228
|
+
"langchain": LangchainInstrumentor,
|
229
|
+
"openai": OpenAIInstrumentor,
|
230
|
+
"llama_index": LlamaIndexInstrumentor,
|
231
|
+
}
|
232
|
+
if tracer_type not in instrumentors:
|
233
|
+
raise ValueError(f"Invalid tracer type: {tracer_type}")
|
234
|
+
return instrumentors[tracer_type]().get()
|
235
|
+
|
236
|
+
@contextmanager
|
237
|
+
def trace(self):
|
238
|
+
"""
|
239
|
+
Synchronous context manager for tracing.
|
240
|
+
Usage:
|
241
|
+
with tracer.trace():
|
242
|
+
# Your code here
|
243
|
+
"""
|
244
|
+
self.start()
|
245
|
+
try:
|
246
|
+
yield self
|
247
|
+
finally:
|
248
|
+
self.stop()
|
249
|
+
|
250
|
+
def start(self):
|
251
|
+
"""Start the tracer."""
|
252
|
+
if self.tracer_type == "langchain":
|
253
|
+
# if not self.is_instrumented:
|
254
|
+
# self._instrumentor().instrument(tracer_provider=self._tracer_provider)
|
255
|
+
# self.is_instrumented = True
|
256
|
+
# print(f"Tracer started for project: {self.project_name}")
|
257
|
+
self.langchain_tracer = LangchainTracer()
|
258
|
+
return self.langchain_tracer.start()
|
259
|
+
elif self.tracer_type == "llamaindex":
|
260
|
+
from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
261
|
+
return LlamaIndexTracer(self._pass_user_data()).start()
|
262
|
+
else:
|
263
|
+
super().start()
|
264
|
+
return self
|
265
|
+
|
266
|
+
def stop(self):
|
267
|
+
"""Stop the tracer and initiate trace upload."""
|
268
|
+
if self.tracer_type == "langchain":
|
269
|
+
# if not self.is_instrumented:
|
270
|
+
# logger.warning("Tracer was not started. No traces to upload.")
|
271
|
+
# return "No traces to upload"
|
272
|
+
|
273
|
+
# print("Stopping tracer and initiating trace upload...")
|
274
|
+
# self._cleanup()
|
275
|
+
# self._upload_task = self._run_async(self._upload_traces())
|
276
|
+
# self.is_active = False
|
277
|
+
# self.dataset_name = None
|
278
|
+
|
279
|
+
# filename = f"langchain_callback_traces.json"
|
280
|
+
# filepath = os.path.join(tempfile.gettempdir(), filename)
|
281
|
+
|
282
|
+
user_detail = self._pass_user_data()
|
283
|
+
data, additional_metadata = self.langchain_tracer.stop()
|
284
|
+
|
285
|
+
# Add cost if possible
|
286
|
+
# import pdb; pdb.set_trace()
|
287
|
+
if additional_metadata['model_name']:
|
288
|
+
try:
|
289
|
+
model_cost_data = self.model_cost_dict[additional_metadata['model_name']]
|
290
|
+
prompt_cost = additional_metadata["tokens"]["prompt"]*model_cost_data["input_cost_per_token"]
|
291
|
+
completion_cost = additional_metadata["tokens"]["completion"]*model_cost_data["output_cost_per_token"]
|
292
|
+
# additional_metadata.setdefault('cost', {})["prompt_cost"] = prompt_cost
|
293
|
+
# additional_metadata.setdefault('cost', {})["completion_cost"] = completion_cost
|
294
|
+
additional_metadata.setdefault('cost', {})["total_cost"] = prompt_cost + completion_cost
|
295
|
+
except Exception as e:
|
296
|
+
logger.warning(f"Error adding cost: {e}")
|
297
|
+
|
298
|
+
# with open(filepath, 'r') as f:
|
299
|
+
# data = json.load(f)
|
300
|
+
additional_metadata["total_tokens"] = additional_metadata["tokens"]["total"]
|
301
|
+
del additional_metadata["tokens"]
|
302
|
+
if "cost" in additional_metadata:
|
303
|
+
additional_metadata["total_cost"] = additional_metadata["cost"]["total_cost"]
|
304
|
+
del additional_metadata["cost"]
|
305
|
+
else:
|
306
|
+
additional_metadata["total_cost"] = 0.0
|
307
|
+
|
308
|
+
combined_metadata = user_detail['trace_user_detail']['metadata'].copy()
|
309
|
+
combined_metadata.update(additional_metadata)
|
310
|
+
combined_metadata
|
311
|
+
|
312
|
+
langchain_traces = langchain_tracer_extraction(data)
|
313
|
+
final_result = convert_langchain_callbacks_output(langchain_traces)
|
314
|
+
final_result[0]['project_name'] = user_detail['project_name']
|
315
|
+
final_result[0]['trace_id'] = str(uuid.uuid4())
|
316
|
+
final_result[0]['session_id'] = None
|
317
|
+
final_result[0]['metadata'] = combined_metadata
|
318
|
+
final_result[0]['pipeline'] = user_detail['trace_user_detail']['pipeline']
|
319
|
+
|
320
|
+
filepath_3 = os.path.join(os.getcwd(), "final_result.json")
|
321
|
+
with open(filepath_3, 'w') as f:
|
322
|
+
json.dump(final_result, f, indent=2)
|
323
|
+
|
324
|
+
|
325
|
+
print(filepath_3)
|
326
|
+
|
327
|
+
additional_metadata_keys = additional_metadata.keys() if additional_metadata else None
|
328
|
+
|
329
|
+
UploadTraces(json_file_path=filepath_3,
|
330
|
+
project_name=self.project_name,
|
331
|
+
project_id=self.project_id,
|
332
|
+
dataset_name=self.dataset_name,
|
333
|
+
user_detail=user_detail,
|
334
|
+
base_url=self.base_url
|
335
|
+
).upload_traces(additional_metadata_keys=additional_metadata_keys)
|
336
|
+
|
337
|
+
return
|
338
|
+
|
339
|
+
elif self.tracer_type == "llamaindex":
|
340
|
+
from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|
341
|
+
return LlamaIndexTracer(self._pass_user_data()).stop()
|
342
|
+
else:
|
343
|
+
super().stop()
|
344
|
+
|
345
|
+
def get_upload_status(self):
|
346
|
+
"""Check the status of the trace upload."""
|
347
|
+
if self.tracer_type == "langchain":
|
348
|
+
if self._upload_task is None:
|
349
|
+
return "No upload task in progress."
|
350
|
+
if self._upload_task.done():
|
351
|
+
try:
|
352
|
+
result = self._upload_task.result()
|
353
|
+
return f"Upload completed: {result}"
|
354
|
+
except Exception as e:
|
355
|
+
return f"Upload failed: {str(e)}"
|
356
|
+
return "Upload in progress..."
|
357
|
+
|
358
|
+
def _run_async(self, coroutine):
|
359
|
+
"""Run an asynchronous coroutine in a separate thread."""
|
360
|
+
loop = asyncio.new_event_loop()
|
361
|
+
with ThreadPoolExecutor() as executor:
|
362
|
+
future = executor.submit(lambda: loop.run_until_complete(coroutine))
|
363
|
+
return future
|
364
|
+
|
365
|
+
async def _upload_traces(self):
|
366
|
+
"""
|
367
|
+
Asynchronously uploads traces to the RagaAICatalyst server.
|
368
|
+
|
369
|
+
This function uploads the traces generated by the RagaAICatalyst client to the RagaAICatalyst server. It uses the `aiohttp` library to make an asynchronous HTTP request to the server. The function first checks if the `RAGAAI_CATALYST_TOKEN` environment variable is set. If not, it raises a `ValueError` with the message "RAGAAI_CATALYST_TOKEN not found. Cannot upload traces.".
|
370
|
+
|
371
|
+
The function then uses the `asyncio.wait_for` function to wait for the `check_and_upload_files` method of the `raga_client` object to complete. The `check_and_upload_files` method is called with the `session` object and a list of file paths to be uploaded. The `timeout` parameter is set to the value of the `upload_timeout` attribute of the `Tracer` object.
|
372
|
+
|
373
|
+
If the upload is successful, the function returns the string "Files uploaded successfully" if the `upload_stat` variable is truthy, otherwise it returns the string "No files to upload".
|
374
|
+
|
375
|
+
If the upload times out, the function returns a string with the message "Upload timed out after {self.upload_timeout} seconds".
|
376
|
+
|
377
|
+
If any other exception occurs during the upload, the function returns a string with the message "Upload failed: {str(e)}", where `{str(e)}` is the string representation of the exception.
|
378
|
+
|
379
|
+
Parameters:
|
380
|
+
None
|
381
|
+
|
382
|
+
Returns:
|
383
|
+
A string indicating the status of the upload.
|
384
|
+
"""
|
385
|
+
async with aiohttp.ClientSession() as session:
|
386
|
+
if not os.getenv("RAGAAI_CATALYST_TOKEN"):
|
387
|
+
raise ValueError(
|
388
|
+
"RAGAAI_CATALYST_TOKEN not found. Cannot upload traces."
|
389
|
+
)
|
390
|
+
|
391
|
+
try:
|
392
|
+
upload_stat = await asyncio.wait_for(
|
393
|
+
self.raga_client.check_and_upload_files(
|
394
|
+
session=session,
|
395
|
+
file_paths=[self.filespanx.sync_file],
|
396
|
+
),
|
397
|
+
timeout=self.upload_timeout,
|
398
|
+
)
|
399
|
+
return (
|
400
|
+
"Files uploaded successfully"
|
401
|
+
if upload_stat
|
402
|
+
else "No files to upload"
|
403
|
+
)
|
404
|
+
except asyncio.TimeoutError:
|
405
|
+
return f"Upload timed out after {self.upload_timeout} seconds"
|
406
|
+
except Exception as e:
|
407
|
+
return f"Upload failed: {str(e)}"
|
408
|
+
|
409
|
+
def _cleanup(self):
|
410
|
+
"""
|
411
|
+
Cleans up the tracer by uninstrumenting the instrumentor, shutting down the tracer provider,
|
412
|
+
and resetting the instrumentation flag. This function is called when the tracer is no longer
|
413
|
+
needed.
|
414
|
+
|
415
|
+
Parameters:
|
416
|
+
self (Tracer): The Tracer instance.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
None
|
420
|
+
"""
|
421
|
+
if self.is_instrumented:
|
422
|
+
try:
|
423
|
+
self._instrumentor().uninstrument()
|
424
|
+
self._tracer_provider.shutdown()
|
425
|
+
self.is_instrumented = False
|
426
|
+
print("Tracer provider shut down successfully")
|
427
|
+
except Exception as e:
|
428
|
+
logger.error(f"Error during tracer shutdown: {str(e)}")
|
429
|
+
|
430
|
+
# Reset instrumentation flag
|
431
|
+
self.is_instrumented = False
|
432
|
+
# Note: We're not resetting all attributes here to allow for upload status checking
|
433
|
+
|
434
|
+
def _pass_user_data(self):
|
435
|
+
user_detail = {
|
436
|
+
"project_name":self.project_name,
|
437
|
+
"project_id": self.project_id,
|
438
|
+
"dataset_name":self.dataset_name,
|
439
|
+
"trace_user_detail" : {
|
440
|
+
"project_id": self.project_id,
|
441
|
+
"trace_id": "",
|
442
|
+
"session_id": None,
|
443
|
+
"trace_type": self.tracer_type,
|
444
|
+
"traces": [],
|
445
|
+
"metadata": self.metadata,
|
446
|
+
"pipeline": {
|
447
|
+
"llm_model": (getattr(self, "pipeline", {}) or {}).get("llm_model", ""),
|
448
|
+
"vector_store": (getattr(self, "pipeline", {}) or {}).get("vector_store", ""),
|
449
|
+
"embed_model": (getattr(self, "pipeline", {}) or {}).get("embed_model", "")
|
450
|
+
}
|
451
|
+
}
|
452
|
+
}
|
453
|
+
return user_detail
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: ragaai_catalyst
|
3
|
-
Version: 2.1.
|
3
|
+
Version: 2.1.5b8
|
4
4
|
Summary: RAGA AI CATALYST
|
5
5
|
Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
|
6
6
|
Requires-Python: <3.13,>=3.9
|
@@ -13,8 +13,9 @@ ragaai_catalyst/synthetic_data_generation.py,sha256=uDV9tNwto2xSkWg5XHXUvjErW-4P
|
|
13
13
|
ragaai_catalyst/utils.py,sha256=TlhEFwLyRU690HvANbyoRycR3nQ67lxVUQoUOfTPYQ0,3772
|
14
14
|
ragaai_catalyst/tracers/__init__.py,sha256=LfgTes-nHpazssbGKnn8kyLZNr49kIPrlkrqqoTFTfc,301
|
15
15
|
ragaai_catalyst/tracers/distributed.py,sha256=AIRvS5Ur4jbFDXsUkYuCTmtGoHHx3LOG4n5tWOh610U,10330
|
16
|
-
ragaai_catalyst/tracers/langchain_callback.py,sha256=
|
16
|
+
ragaai_catalyst/tracers/langchain_callback.py,sha256=lLeED0Eg2kT4-_O9IUw3pAyi_Hm4AaX57VfeSiOwaUw,28134
|
17
17
|
ragaai_catalyst/tracers/llamaindex_callback.py,sha256=ZY0BJrrlz-P9Mg2dX-ZkVKG3gSvzwqBtk7JL_05MiYA,14028
|
18
|
+
ragaai_catalyst/tracers/tracer.py,sha256=Yq2HhgT4785t9573kksJ7ngM3qCLPgZbZ0IpgOHdTTo,19223
|
18
19
|
ragaai_catalyst/tracers/upload_traces.py,sha256=2TWdRTN6FMaX-dqDv8BJWQS0xrCGYKkXEYOi2kK3Z3Y,5487
|
19
20
|
ragaai_catalyst/tracers/agentic_tracing/README.md,sha256=X4QwLb7-Jg7GQMIXj-SerZIgDETfw-7VgYlczOR8ZeQ,4508
|
20
21
|
ragaai_catalyst/tracers/agentic_tracing/__init__.py,sha256=yf6SKvOPSpH-9LiKaoLKXwqj5sez8F_5wkOb91yp0oE,260
|
@@ -31,7 +32,7 @@ ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py,sha256=--wvhOJ-J
|
|
31
32
|
ragaai_catalyst/tracers/agentic_tracing/tracers/base.py,sha256=88rX7OkOGEyVNECUrc4bYqODyulXve_-99d9ku5hBeQ,37373
|
32
33
|
ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py,sha256=l3x3uFO5ov93I7UUrUX1M06WVGy2ug2jEZ1G7o315z4,13075
|
33
34
|
ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
-
ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py,sha256=
|
35
|
+
ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py,sha256=91aWXJGb3GDfyDfJyA7Irnk3XSyfkQaQppW_NMORGJQ,31725
|
35
36
|
ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py,sha256=6hsg-Yw11v4qeELI1CWrdX8BXf-wJrTF5smBI5prgoo,15873
|
36
37
|
ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py,sha256=m8CxYkl7iMiFya_lNwN1ykBc3Pmo-2pR_2HmpptwHWQ,10352
|
37
38
|
ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py,sha256=4rWL7fIJE5wN0nwh6fMWyh3OrrenZHJkNzyQXikyzQI,13771
|
@@ -64,8 +65,8 @@ ragaai_catalyst/tracers/utils/__init__.py,sha256=KeMaZtYaTojilpLv65qH08QmpYclfpa
|
|
64
65
|
ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py,sha256=ofrNrxf2b1hpjDh_zeaxiYq86azn1MF3kW8-ViYPEg0,1641
|
65
66
|
ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py,sha256=cghjCuUe8w-2MZdh9xgtRGe3y219u26GGzpnuY4Wt6Q,3047
|
66
67
|
ragaai_catalyst/tracers/utils/utils.py,sha256=ViygfJ7vZ7U0CTSA1lbxVloHp4NSlmfDzBRNCJuMhis,2374
|
67
|
-
ragaai_catalyst-2.1.
|
68
|
-
ragaai_catalyst-2.1.
|
69
|
-
ragaai_catalyst-2.1.
|
70
|
-
ragaai_catalyst-2.1.
|
71
|
-
ragaai_catalyst-2.1.
|
68
|
+
ragaai_catalyst-2.1.5b8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
69
|
+
ragaai_catalyst-2.1.5b8.dist-info/METADATA,sha256=OaiEW7uA1wnQO562QbKGgtlZuue1PTGTjK9-AW5gkLQ,12764
|
70
|
+
ragaai_catalyst-2.1.5b8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
71
|
+
ragaai_catalyst-2.1.5b8.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
|
72
|
+
ragaai_catalyst-2.1.5b8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|