ragaai-catalyst 2.1.3b0__py3-none-any.whl → 2.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +37 -11
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +240 -81
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +632 -114
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +316 -0
- ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +229 -82
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +214 -59
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +16 -14
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +147 -28
- ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +88 -2
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +9 -51
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +83 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +26 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +28 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +45 -15
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2520 -2152
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +59 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +23 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +284 -15
- ragaai_catalyst/tracers/llamaindex_callback.py +5 -5
- ragaai_catalyst/tracers/tracer.py +83 -10
- ragaai_catalyst/tracers/upload_traces.py +1 -1
- ragaai_catalyst-2.1.4.dist-info/METADATA +431 -0
- {ragaai_catalyst-2.1.3b0.dist-info → ragaai_catalyst-2.1.4.dist-info}/RECORD +26 -20
- ragaai_catalyst-2.1.3b0.dist-info/METADATA +0 -43
- {ragaai_catalyst-2.1.3b0.dist-info → ragaai_catalyst-2.1.4.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.1.3b0.dist-info → ragaai_catalyst-2.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Dict, Any
|
3
|
+
import logging
|
4
|
+
|
5
|
+
logger = logging.getLogger(__name__)
|
6
|
+
logging_level = (
|
7
|
+
logger.setLevel(logging.DEBUG)
|
8
|
+
if os.getenv("DEBUG")
|
9
|
+
else logger.setLevel(logging.INFO)
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
class SpanAttributes:
|
14
|
+
def __init__(self, name):
|
15
|
+
self.name = name
|
16
|
+
self.tags = []
|
17
|
+
self.metadata = {}
|
18
|
+
self.metrics = []
|
19
|
+
self.feedback = None
|
20
|
+
self.trace_attributes = ["tags", "metadata", "metrics"]
|
21
|
+
|
22
|
+
def add_tags(self, tags: str | List[str]):
|
23
|
+
if isinstance(tags, str):
|
24
|
+
tags = [tags]
|
25
|
+
self.tags.extend(tags)
|
26
|
+
logger.debug(f"Added tags: {tags}")
|
27
|
+
|
28
|
+
def add_metadata(self, metadata):
|
29
|
+
self.metadata.update(metadata)
|
30
|
+
logger.debug(f"Added metadata: {metadata}")
|
31
|
+
|
32
|
+
def add_metrics(
|
33
|
+
self,
|
34
|
+
name: str,
|
35
|
+
score: float | int,
|
36
|
+
reasoning: str = "",
|
37
|
+
cost: float = None,
|
38
|
+
latency: float = None,
|
39
|
+
metadata: Dict[str, Any] = {},
|
40
|
+
config: Dict[str, Any] = {},
|
41
|
+
):
|
42
|
+
self.metrics.append(
|
43
|
+
{
|
44
|
+
"name": name,
|
45
|
+
"score": score,
|
46
|
+
"reason": reasoning,
|
47
|
+
"source": "user",
|
48
|
+
"cost": cost,
|
49
|
+
"latency": latency,
|
50
|
+
"metadata": metadata,
|
51
|
+
"mappings": [],
|
52
|
+
"config": config,
|
53
|
+
}
|
54
|
+
)
|
55
|
+
logger.debug(f"Added metrics: {self.metrics}")
|
56
|
+
|
57
|
+
def add_feedback(self, feedback: Any):
|
58
|
+
self.feedback = feedback
|
59
|
+
logger.debug(f"Added feedback: {self.feedback}")
|
@@ -1,8 +1,11 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
+
import requests
|
4
|
+
import logging
|
3
5
|
from importlib import resources
|
4
6
|
from dataclasses import asdict
|
5
7
|
|
8
|
+
logger = logging.getLogger(__name__)
|
6
9
|
|
7
10
|
def convert_usage_to_dict(usage):
|
8
11
|
# Initialize the token_usage dictionary with default values
|
@@ -68,6 +71,26 @@ def load_model_costs():
|
|
68
71
|
return json.load(file)
|
69
72
|
|
70
73
|
|
74
|
+
def update_model_costs_from_github():
|
75
|
+
"""Updates the model_costs.json file with latest costs from GitHub."""
|
76
|
+
try:
|
77
|
+
logger.debug("loading the latest model costs.")
|
78
|
+
response = requests.get(
|
79
|
+
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
80
|
+
)
|
81
|
+
if response.status_code == 200:
|
82
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
83
|
+
model_costs_path = os.path.join(current_dir, "model_costs.json")
|
84
|
+
with open(model_costs_path, "w") as file:
|
85
|
+
json.dump(response.json(), file, indent=4)
|
86
|
+
logger.debug("Model costs updated successfully.")
|
87
|
+
return True
|
88
|
+
return False
|
89
|
+
except Exception as e:
|
90
|
+
logger.error(f"Failed to update model costs from GitHub: {e}")
|
91
|
+
return False
|
92
|
+
|
93
|
+
|
71
94
|
def log_event(event_data, log_file_path):
|
72
95
|
event_data = asdict(event_data)
|
73
96
|
with open(log_file_path, "a") as f:
|
@@ -6,9 +6,22 @@ import ast
|
|
6
6
|
import importlib.util
|
7
7
|
import json
|
8
8
|
import astor
|
9
|
+
import ipynbname
|
10
|
+
import sys
|
11
|
+
|
9
12
|
from pathlib import Path
|
13
|
+
from IPython import get_ipython
|
14
|
+
|
15
|
+
|
16
|
+
if 'get_ipython' in locals():
|
17
|
+
ipython_instance = get_ipython()
|
18
|
+
if ipython_instance:
|
19
|
+
ipython_instance.run_line_magic('reset', '-f')
|
20
|
+
|
10
21
|
import logging
|
11
22
|
logger = logging.getLogger(__name__)
|
23
|
+
logging_level = logger.setLevel(logging.DEBUG) if os.getenv("DEBUG") == "1" else logging.INFO
|
24
|
+
|
12
25
|
|
13
26
|
# Define the PackageUsageRemover class
|
14
27
|
class PackageUsageRemover(ast.NodeTransformer):
|
@@ -68,12 +81,187 @@ def remove_package_code(source_code: str, package_name: str) -> str:
|
|
68
81
|
except Exception as e:
|
69
82
|
raise Exception(f"Error processing source code: {str(e)}")
|
70
83
|
|
71
|
-
|
84
|
+
class JupyterNotebookHandler:
|
85
|
+
@staticmethod
|
86
|
+
def is_running_in_colab():
|
87
|
+
"""Check if the code is running in Google Colab."""
|
88
|
+
try:
|
89
|
+
import google.colab
|
90
|
+
return True
|
91
|
+
except ImportError:
|
92
|
+
return False
|
93
|
+
|
94
|
+
@staticmethod
|
95
|
+
def is_running_in_notebook():
|
96
|
+
"""Check if the code is running in a Jupyter notebook or Colab."""
|
97
|
+
try:
|
98
|
+
shell = get_ipython().__class__.__name__
|
99
|
+
if JupyterNotebookHandler.is_running_in_colab():
|
100
|
+
return True
|
101
|
+
return shell == 'ZMQInteractiveShell'
|
102
|
+
except:
|
103
|
+
return False
|
104
|
+
|
105
|
+
@staticmethod
|
106
|
+
def get_notebook_path():
|
107
|
+
"""Get the path of the current executing notebook."""
|
108
|
+
try:
|
109
|
+
# First try using ipynbname
|
110
|
+
try:
|
111
|
+
notebook_path = ipynbname.path()
|
112
|
+
if notebook_path:
|
113
|
+
# logger.info(f"Found notebook using ipynbname: {notebook_path}")
|
114
|
+
return str(notebook_path)
|
115
|
+
except:
|
116
|
+
pass
|
117
|
+
|
118
|
+
# Check if running in Colab
|
119
|
+
if JupyterNotebookHandler.is_running_in_colab():
|
120
|
+
try:
|
121
|
+
from google.colab import drive
|
122
|
+
if not os.path.exists('/content/drive'):
|
123
|
+
drive.mount('/content/drive')
|
124
|
+
# logger.info("Google Drive mounted successfully")
|
125
|
+
|
126
|
+
# Look for notebooks in /content first
|
127
|
+
ipynb_files = list(Path('/content').glob('*.ipynb'))
|
128
|
+
if ipynb_files:
|
129
|
+
current_nb = max(ipynb_files, key=os.path.getmtime)
|
130
|
+
# logger.info(f"Found current Colab notebook: {current_nb}")
|
131
|
+
return str(current_nb)
|
132
|
+
|
133
|
+
# Then check Drive if mounted
|
134
|
+
if os.path.exists('/content/drive'):
|
135
|
+
drive_ipynb_files = list(Path('/content/drive').rglob('*.ipynb'))
|
136
|
+
if drive_ipynb_files:
|
137
|
+
current_nb = max(drive_ipynb_files, key=os.path.getmtime)
|
138
|
+
# logger.info(f"Found Colab notebook in Drive: {current_nb}")
|
139
|
+
return str(current_nb)
|
140
|
+
except Exception as e:
|
141
|
+
logger.warning(f"Error in Colab notebook detection: {str(e)}")
|
142
|
+
|
143
|
+
# Try getting notebook path for regular Jupyter
|
144
|
+
try:
|
145
|
+
import IPython
|
146
|
+
ipython = IPython.get_ipython()
|
147
|
+
if ipython is not None:
|
148
|
+
# Try getting the notebook name from kernel
|
149
|
+
if hasattr(ipython, 'kernel') and hasattr(ipython.kernel, 'session'):
|
150
|
+
kernel_file = ipython.kernel.session.config.get('IPKernelApp', {}).get('connection_file', '')
|
151
|
+
if kernel_file:
|
152
|
+
kernel_id = Path(kernel_file).stem
|
153
|
+
current_dir = Path.cwd()
|
154
|
+
|
155
|
+
# Look for .ipynb files in current and parent directories
|
156
|
+
for search_dir in [current_dir] + list(current_dir.parents):
|
157
|
+
notebooks = list(search_dir.glob('*.ipynb'))
|
158
|
+
recent_notebooks = [
|
159
|
+
nb for nb in notebooks
|
160
|
+
if '.ipynb_checkpoints' not in str(nb)
|
161
|
+
]
|
162
|
+
|
163
|
+
if recent_notebooks:
|
164
|
+
notebook_path = str(max(recent_notebooks, key=os.path.getmtime))
|
165
|
+
# logger.info(f"Found Jupyter notebook: {notebook_path}")
|
166
|
+
return notebook_path
|
167
|
+
|
168
|
+
# Try alternative method using notebook metadata
|
169
|
+
try:
|
170
|
+
notebook_path = ipython.kernel._parent_ident
|
171
|
+
if notebook_path:
|
172
|
+
# logger.info(f"Found notebook using kernel parent ident: {notebook_path}")
|
173
|
+
return notebook_path
|
174
|
+
except:
|
175
|
+
pass
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
# logger.warning(f"Error in Jupyter notebook detection: {str(e)}")
|
179
|
+
return None
|
180
|
+
|
181
|
+
except Exception as e:
|
182
|
+
# logger.warning(f"Error getting notebook path: {str(e)}")
|
183
|
+
return None
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
def comment_magic_commands(script_content: str) -> str:
|
188
|
+
"""Comment out magic commands, shell commands, and direct execution commands in the script content."""
|
189
|
+
lines = script_content.splitlines()
|
190
|
+
commented_lines = []
|
191
|
+
for line in lines:
|
192
|
+
# Check for magic commands, shell commands, or direct execution commands
|
193
|
+
if re.match(r'^\s*(!|%|pip|apt-get|curl|conda)', line.strip()):
|
194
|
+
commented_lines.append(f"# {line}") # Comment the line
|
195
|
+
else:
|
196
|
+
commented_lines.append(line) # Keep the line unchanged
|
197
|
+
return "\n".join(commented_lines)
|
198
|
+
|
199
|
+
|
200
|
+
|
72
201
|
class TraceDependencyTracker:
|
73
202
|
def __init__(self, output_dir=None):
|
74
203
|
self.tracked_files = set()
|
75
204
|
self.python_imports = set()
|
76
|
-
self.
|
205
|
+
self.notebook_path = None
|
206
|
+
self.colab_content = None
|
207
|
+
|
208
|
+
# Set output directory with Colab handling
|
209
|
+
if JupyterNotebookHandler.is_running_in_colab():
|
210
|
+
self.output_dir = '/content'
|
211
|
+
if not os.path.exists(self.output_dir):
|
212
|
+
os.makedirs(self.output_dir)
|
213
|
+
logger.info("Using /content as output directory for Colab")
|
214
|
+
else:
|
215
|
+
self.output_dir = output_dir or os.getcwd()
|
216
|
+
|
217
|
+
self.jupyter_handler = JupyterNotebookHandler()
|
218
|
+
|
219
|
+
|
220
|
+
def check_environment_and_save(self):
|
221
|
+
"""Check if running in Colab and get current cell content."""
|
222
|
+
try:
|
223
|
+
from IPython import get_ipython
|
224
|
+
ipython = get_ipython()
|
225
|
+
if 'google.colab' in sys.modules:
|
226
|
+
logger.info("Running on Google Colab.")
|
227
|
+
|
228
|
+
# Retrieve the current cell content dynamically in Colab
|
229
|
+
current_cell = ipython.history_manager.get_range()
|
230
|
+
script_content = "\n".join(input_line for _, _, input_line in current_cell if input_line.strip())
|
231
|
+
script_content = comment_magic_commands(script_content) # Comment out magic commands
|
232
|
+
|
233
|
+
# Store the content in the class attribute instead of saving to file
|
234
|
+
self.colab_content = script_content
|
235
|
+
logger.info("Successfully retrieved Colab cell content")
|
236
|
+
|
237
|
+
else:
|
238
|
+
logger.info("Not running on Google Colab.")
|
239
|
+
except Exception as e:
|
240
|
+
logger.warning(f"Error retrieving the current cell content: {e}")
|
241
|
+
|
242
|
+
|
243
|
+
def track_jupyter_notebook(self):
|
244
|
+
"""Track the current notebook and its dependencies."""
|
245
|
+
if self.jupyter_handler.is_running_in_notebook():
|
246
|
+
# Get notebook path using the enhanced handler
|
247
|
+
notebook_path = self.jupyter_handler.get_notebook_path()
|
248
|
+
|
249
|
+
if notebook_path:
|
250
|
+
self.notebook_path = notebook_path
|
251
|
+
self.track_file_access(notebook_path)
|
252
|
+
|
253
|
+
# Track notebook dependencies
|
254
|
+
try:
|
255
|
+
with open(notebook_path, 'r', encoding='utf-8') as f:
|
256
|
+
notebook_content = f.read()
|
257
|
+
notebook_content = comment_magic_commands(notebook_content)
|
258
|
+
# Find and track imported files
|
259
|
+
self.find_config_files(notebook_content, notebook_path)
|
260
|
+
except Exception as e:
|
261
|
+
pass
|
262
|
+
else:
|
263
|
+
pass
|
264
|
+
|
77
265
|
|
78
266
|
def track_file_access(self, filepath):
|
79
267
|
if os.path.exists(filepath):
|
@@ -122,65 +310,146 @@ class TraceDependencyTracker:
|
|
122
310
|
except (ImportError, AttributeError):
|
123
311
|
pass
|
124
312
|
except Exception as e:
|
125
|
-
|
313
|
+
pass
|
126
314
|
|
127
315
|
def create_zip(self, filepaths):
|
316
|
+
self.track_jupyter_notebook()
|
317
|
+
# logger.info("Tracked Jupyter notebook and its dependencies")
|
318
|
+
|
319
|
+
# Ensure output directory exists
|
320
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
321
|
+
# logger.info(f"Using output directory: {self.output_dir}")
|
322
|
+
|
323
|
+
# Special handling for Colab
|
324
|
+
if self.jupyter_handler.is_running_in_colab():
|
325
|
+
# logger.info("Running in Google Colab environment")
|
326
|
+
# Try to get the Colab notebook path
|
327
|
+
colab_notebook = self.jupyter_handler.get_notebook_path()
|
328
|
+
if colab_notebook:
|
329
|
+
self.tracked_files.add(os.path.abspath(colab_notebook))
|
330
|
+
# logger.info(f"Added Colab notebook to tracked files: {colab_notebook}")
|
331
|
+
|
332
|
+
# Get current cell content
|
333
|
+
self.check_environment_and_save()
|
334
|
+
|
335
|
+
# Process all files (existing code)
|
128
336
|
for filepath in filepaths:
|
129
337
|
abs_path = os.path.abspath(filepath)
|
130
338
|
self.track_file_access(abs_path)
|
131
339
|
try:
|
132
340
|
with open(abs_path, 'r', encoding='utf-8') as file:
|
133
341
|
content = file.read()
|
342
|
+
# Comment out magic commands before processing
|
343
|
+
content = comment_magic_commands(content)
|
134
344
|
self.find_config_files(content, abs_path)
|
135
345
|
if filepath.endswith('.py'):
|
136
346
|
self.analyze_python_imports(abs_path)
|
137
347
|
except Exception as e:
|
138
|
-
|
348
|
+
pass
|
349
|
+
|
350
|
+
notebook_content_str = None
|
351
|
+
if self.notebook_path and os.path.exists(self.notebook_path):
|
352
|
+
try:
|
353
|
+
with open(self.notebook_path, 'r', encoding='utf-8') as f:
|
354
|
+
notebook_content = json.load(f)
|
355
|
+
|
356
|
+
cell_contents = []
|
357
|
+
for cell in notebook_content.get('cells', []):
|
358
|
+
if cell['cell_type'] == 'code':
|
359
|
+
# Comment out magic commands in the cell's source
|
360
|
+
cell_source = ''.join(cell['source'])
|
361
|
+
commented_source = comment_magic_commands(cell_source)
|
362
|
+
cell_contents.append(commented_source)
|
139
363
|
|
364
|
+
notebook_content_str = '\n\n'.join(cell_contents)
|
365
|
+
notebook_abs_path = os.path.abspath(self.notebook_path)
|
366
|
+
if notebook_abs_path in self.tracked_files:
|
367
|
+
self.tracked_files.remove(notebook_abs_path)
|
368
|
+
|
369
|
+
except Exception as e:
|
370
|
+
pass
|
371
|
+
|
372
|
+
# Calculate hash and create zip
|
140
373
|
self.tracked_files.update(self.python_imports)
|
141
374
|
hash_contents = []
|
375
|
+
|
142
376
|
for filepath in sorted(self.tracked_files):
|
143
|
-
if '
|
377
|
+
if not filepath.endswith('.py'):
|
378
|
+
continue
|
379
|
+
elif '/envs' in filepath or '__init__' in filepath:
|
144
380
|
continue
|
145
381
|
try:
|
146
382
|
with open(filepath, 'rb') as file:
|
147
383
|
content = file.read()
|
148
|
-
|
149
|
-
# Temporarily remove raga_catalyst code for hash calculation
|
150
|
-
content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
|
384
|
+
content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
|
151
385
|
hash_contents.append(content)
|
152
386
|
except Exception as e:
|
153
|
-
|
387
|
+
logger.warning(f"Could not read {filepath} for hash calculation: {str(e)}")
|
388
|
+
pass
|
389
|
+
|
390
|
+
|
391
|
+
if notebook_content_str:
|
392
|
+
hash_contents.append(notebook_content_str.encode('utf-8'))
|
393
|
+
|
394
|
+
if self.colab_content:
|
395
|
+
hash_contents.append(self.colab_content.encode('utf-8'))
|
396
|
+
|
154
397
|
|
155
398
|
combined_content = b''.join(hash_contents)
|
156
399
|
hash_id = hashlib.sha256(combined_content).hexdigest()
|
157
400
|
|
401
|
+
# Create zip in the appropriate location
|
158
402
|
zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
|
159
403
|
common_path = [os.path.abspath(p) for p in self.tracked_files if 'env' not in p]
|
160
404
|
|
161
|
-
if common_path
|
405
|
+
if common_path:
|
162
406
|
base_path = os.path.commonpath(common_path)
|
407
|
+
else:
|
408
|
+
base_path = os.getcwd()
|
409
|
+
|
163
410
|
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
164
411
|
for filepath in sorted(self.tracked_files):
|
165
|
-
if 'env' in filepath:
|
412
|
+
if 'env' in filepath or 'ragaai_catalyst' in filepath:
|
166
413
|
continue
|
167
414
|
try:
|
168
415
|
relative_path = os.path.relpath(filepath, base_path)
|
169
416
|
zipf.write(filepath, relative_path)
|
170
|
-
|
417
|
+
logger.debug(f"Added python script to zip: {relative_path}")
|
171
418
|
except Exception as e:
|
172
|
-
|
419
|
+
pass
|
420
|
+
|
421
|
+
if notebook_content_str:
|
422
|
+
py_filename = os.path.splitext(os.path.basename(self.notebook_path))[0] + ".py"
|
423
|
+
zipf.writestr(py_filename, notebook_content_str)
|
424
|
+
logger.debug(f"Added notebook content to zip as: {py_filename}")
|
425
|
+
|
426
|
+
if self.colab_content:
|
427
|
+
colab_filename = "colab_file.py"
|
428
|
+
zipf.writestr(colab_filename, self.colab_content)
|
429
|
+
logger.debug(f"Added Colab cell content to zip as: {colab_filename}")
|
173
430
|
|
431
|
+
|
432
|
+
logger.info(" Zip file created successfully.")
|
433
|
+
logger.debug(f"Zip file created successfully at: {zip_filename}")
|
174
434
|
return hash_id, zip_filename
|
175
435
|
|
176
|
-
|
177
|
-
|
436
|
+
def zip_list_of_unique_files(filepaths, output_dir=None):
|
437
|
+
"""Create a zip file containing all unique files and their dependencies."""
|
438
|
+
if output_dir is None:
|
439
|
+
# Set default output directory based on environment
|
440
|
+
if JupyterNotebookHandler.is_running_in_colab():
|
441
|
+
output_dir = '/content'
|
442
|
+
else:
|
443
|
+
output_dir = os.getcwd()
|
444
|
+
|
178
445
|
tracker = TraceDependencyTracker(output_dir)
|
179
446
|
return tracker.create_zip(filepaths)
|
180
447
|
|
448
|
+
|
181
449
|
# Example usage
|
182
450
|
if __name__ == "__main__":
|
183
451
|
filepaths = ["script1.py", "script2.py"]
|
184
452
|
hash_id, zip_path = zip_list_of_unique_files(filepaths)
|
185
453
|
print(f"Created zip file: {zip_path}")
|
186
454
|
print(f"Hash ID: {hash_id}")
|
455
|
+
|
@@ -58,7 +58,7 @@ class LlamaIndexTracer:
|
|
58
58
|
) -> None:
|
59
59
|
trace = {
|
60
60
|
"event_type": event_type,
|
61
|
-
"timestamp": datetime.now().isoformat(),
|
61
|
+
"timestamp": datetime.now().astimezone().isoformat(),
|
62
62
|
"payload": payload,
|
63
63
|
"status": "started",
|
64
64
|
"event_id": event_id,
|
@@ -82,7 +82,7 @@ class LlamaIndexTracer:
|
|
82
82
|
) -> None:
|
83
83
|
trace = {
|
84
84
|
"event_type": event_type,
|
85
|
-
"timestamp": datetime.now().isoformat(),
|
85
|
+
"timestamp": datetime.now().astimezone().isoformat(),
|
86
86
|
"payload": payload,
|
87
87
|
"status": "completed",
|
88
88
|
"event_id": event_id,
|
@@ -181,7 +181,7 @@ class LlamaIndexTracer:
|
|
181
181
|
# self._upload_traces(save_json_to_pwd=True)
|
182
182
|
self.callback_manager.remove_handler(self.trace_handler)
|
183
183
|
self._restore_original_inits()
|
184
|
-
print("Traces
|
184
|
+
print("Traces uploaded")
|
185
185
|
self._upload_task = True
|
186
186
|
|
187
187
|
def _restore_original_inits(self):
|
@@ -220,7 +220,7 @@ class LlamaIndexTracer:
|
|
220
220
|
user_detail["trace_id"] = self._generate_trace_id()
|
221
221
|
metadata = user_detail["metadata"]
|
222
222
|
metadata["log_source"] = "llamaindex_tracer"
|
223
|
-
metadata["recorded_on"] = datetime.
|
223
|
+
metadata["recorded_on"] = datetime.now().isoformat()
|
224
224
|
user_detail["metadata"] = metadata
|
225
225
|
return user_detail
|
226
226
|
|
@@ -351,7 +351,7 @@ class LlamaIndexTracer:
|
|
351
351
|
presignedUrl = self._get_presigned_url()
|
352
352
|
self._put_presigned_url(presignedUrl, filename)
|
353
353
|
self._insert_traces(presignedUrl)
|
354
|
-
print("Traces
|
354
|
+
print("Traces uploaded")
|
355
355
|
|
356
356
|
def get_upload_status(self):
|
357
357
|
"""Check the status of the trace upload."""
|
@@ -21,10 +21,10 @@ from ragaai_catalyst.tracers.utils import get_unique_key
|
|
21
21
|
from ragaai_catalyst import RagaAICatalyst
|
22
22
|
from ragaai_catalyst.tracers.agentic_tracing import AgenticTracing, TrackName
|
23
23
|
from ragaai_catalyst.tracers.agentic_tracing.tracers.llm_tracer import LLMTracerMixin
|
24
|
+
from ragaai_catalyst.tracers.agentic_tracing.utils.trace_utils import load_model_costs, update_model_costs_from_github
|
24
25
|
|
25
26
|
logger = logging.getLogger(__name__)
|
26
27
|
|
27
|
-
|
28
28
|
class Tracer(AgenticTracing):
|
29
29
|
NUM_PROJECTS = 100
|
30
30
|
TIMEOUT = 10
|
@@ -32,14 +32,28 @@ class Tracer(AgenticTracing):
|
|
32
32
|
self,
|
33
33
|
project_name,
|
34
34
|
dataset_name,
|
35
|
+
trace_name=None,
|
35
36
|
tracer_type=None,
|
36
37
|
pipeline=None,
|
37
38
|
metadata=None,
|
38
39
|
description=None,
|
39
40
|
upload_timeout=30, # Default timeout of 30 seconds
|
41
|
+
update_llm_cost=True, # Parameter to control model cost updates
|
42
|
+
auto_instrumentation={ # to control automatic instrumentation of different components
|
43
|
+
'llm':True,
|
44
|
+
'tool':True,
|
45
|
+
'agent':True,
|
46
|
+
'user_interaction':True,
|
47
|
+
'file_io':True,
|
48
|
+
'network':True,
|
49
|
+
'custom':True
|
50
|
+
},
|
51
|
+
interval_time=2,
|
52
|
+
# auto_instrumentation=True/False # to control automatic instrumentation of everything
|
53
|
+
|
40
54
|
):
|
41
55
|
"""
|
42
|
-
Initializes a Tracer object.
|
56
|
+
Initializes a Tracer object.
|
43
57
|
|
44
58
|
Args:
|
45
59
|
project_name (str): The name of the project.
|
@@ -49,19 +63,48 @@ class Tracer(AgenticTracing):
|
|
49
63
|
metadata (dict, optional): The metadata. Defaults to None.
|
50
64
|
description (str, optional): The description. Defaults to None.
|
51
65
|
upload_timeout (int, optional): The upload timeout in seconds. Defaults to 30.
|
52
|
-
|
53
|
-
Returns:
|
54
|
-
None
|
66
|
+
update_llm_cost (bool, optional): Whether to update model costs from GitHub. Defaults to True.
|
55
67
|
"""
|
56
|
-
|
68
|
+
|
57
69
|
user_detail = {
|
58
70
|
"project_name": project_name,
|
59
71
|
"project_id": None, # Will be set after project validation
|
60
72
|
"dataset_name": dataset_name,
|
73
|
+
"interval_time": interval_time,
|
74
|
+
"trace_name": trace_name if trace_name else f"trace_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
|
61
75
|
"trace_user_detail": {"metadata": metadata} if metadata else {}
|
62
76
|
}
|
63
|
-
|
64
|
-
|
77
|
+
|
78
|
+
# take care of auto_instrumentation
|
79
|
+
if isinstance(auto_instrumentation, bool):
|
80
|
+
if auto_instrumentation:
|
81
|
+
auto_instrumentation = {
|
82
|
+
"llm": True,
|
83
|
+
"tool": True,
|
84
|
+
"agent": True,
|
85
|
+
"user_interaction": True,
|
86
|
+
"file_io": True,
|
87
|
+
"network": True,
|
88
|
+
"custom": True
|
89
|
+
}
|
90
|
+
else:
|
91
|
+
auto_instrumentation = {
|
92
|
+
"llm": False,
|
93
|
+
"tool": False,
|
94
|
+
"agent": False,
|
95
|
+
"user_interaction": False,
|
96
|
+
"file_io": False,
|
97
|
+
"network": False,
|
98
|
+
"custom": False
|
99
|
+
}
|
100
|
+
elif isinstance(auto_instrumentation, dict):
|
101
|
+
auto_instrumentation = {k: v for k, v in auto_instrumentation.items() if v}
|
102
|
+
for key in ["llm", "tool", "agent", "user_interaction", "file_io", "network", "custom"]:
|
103
|
+
if key not in auto_instrumentation:
|
104
|
+
auto_instrumentation[key] = False
|
105
|
+
|
106
|
+
super().__init__(user_detail=user_detail, auto_instrumentation=auto_instrumentation)
|
107
|
+
|
65
108
|
self.project_name = project_name
|
66
109
|
self.dataset_name = dataset_name
|
67
110
|
self.tracer_type = tracer_type
|
@@ -72,10 +115,14 @@ class Tracer(AgenticTracing):
|
|
72
115
|
self.description = description
|
73
116
|
self.upload_timeout = upload_timeout
|
74
117
|
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
75
|
-
self.timeout =
|
118
|
+
self.timeout = 30
|
76
119
|
self.num_projects = 100
|
77
|
-
self.start_time = datetime.datetime.now(
|
120
|
+
self.start_time = datetime.datetime.now().astimezone().isoformat()
|
78
121
|
|
122
|
+
if update_llm_cost:
|
123
|
+
# First update the model costs file from GitHub
|
124
|
+
update_model_costs_from_github()
|
125
|
+
|
79
126
|
try:
|
80
127
|
response = requests.get(
|
81
128
|
f"{self.base_url}/v2/llm/projects?size={self.num_projects}",
|
@@ -118,7 +165,30 @@ class Tracer(AgenticTracing):
|
|
118
165
|
else:
|
119
166
|
self._upload_task = None
|
120
167
|
# raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
|
168
|
+
|
169
|
+
|
170
|
+
def set_dataset_name(self, dataset_name):
|
171
|
+
"""
|
172
|
+
Reinitialize the Tracer with a new dataset name while keeping all other parameters the same.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
dataset_name (str): The new dataset name to set
|
176
|
+
"""
|
177
|
+
# Store current parameters
|
178
|
+
current_params = {
|
179
|
+
'project_name': self.project_name,
|
180
|
+
'tracer_type': self.tracer_type,
|
181
|
+
'pipeline': self.pipeline,
|
182
|
+
'metadata': self.metadata,
|
183
|
+
'description': self.description,
|
184
|
+
'upload_timeout': self.upload_timeout
|
185
|
+
}
|
121
186
|
|
187
|
+
# Reinitialize self with new dataset_name and stored parameters
|
188
|
+
self.__init__(
|
189
|
+
dataset_name=dataset_name,
|
190
|
+
**current_params
|
191
|
+
)
|
122
192
|
|
123
193
|
def _improve_metadata(self, metadata, tracer_type):
|
124
194
|
if metadata is None:
|
@@ -191,6 +261,9 @@ class Tracer(AgenticTracing):
|
|
191
261
|
print("Stopping tracer and initiating trace upload...")
|
192
262
|
self._cleanup()
|
193
263
|
self._upload_task = self._run_async(self._upload_traces())
|
264
|
+
self.is_active = False
|
265
|
+
self.dataset_name = None
|
266
|
+
|
194
267
|
return "Trace upload initiated. Use get_upload_status() to check the status."
|
195
268
|
elif self.tracer_type == "llamaindex":
|
196
269
|
from ragaai_catalyst.tracers.llamaindex_callback import LlamaIndexTracer
|