ragaai-catalyst 2.1b0__py3-none-any.whl → 2.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +1 -0
- ragaai_catalyst/dataset.py +1 -4
- ragaai_catalyst/evaluation.py +4 -5
- ragaai_catalyst/guard_executor.py +97 -0
- ragaai_catalyst/guardrails_manager.py +41 -15
- ragaai_catalyst/internal_api_completion.py +1 -1
- ragaai_catalyst/prompt_manager.py +7 -2
- ragaai_catalyst/ragaai_catalyst.py +1 -1
- ragaai_catalyst/synthetic_data_generation.py +7 -0
- ragaai_catalyst/tracers/__init__.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/__init__.py +3 -0
- ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +422 -0
- ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +198 -0
- ragaai_catalyst/tracers/agentic_tracing/base.py +376 -0
- ragaai_catalyst/tracers/agentic_tracing/data_structure.py +248 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
- ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
- ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +808 -0
- ragaai_catalyst/tracers/agentic_tracing/network_tracer.py +286 -0
- ragaai_catalyst/tracers/agentic_tracing/sample.py +197 -0
- ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +247 -0
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +165 -0
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator_test.py +172 -0
- ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
- ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
- ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +43 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +3 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +18 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/data_classes.py +61 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +32 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +177 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +7823 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
- ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
- ragaai_catalyst/tracers/tracer.py +30 -4
- ragaai_catalyst/tracers/upload_traces.py +127 -0
- ragaai_catalyst-2.1b1.dist-info/METADATA +43 -0
- ragaai_catalyst-2.1b1.dist-info/RECORD +56 -0
- {ragaai_catalyst-2.1b0.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1b0.dist-info/METADATA +0 -295
- ragaai_catalyst-2.1b0.dist-info/RECORD +0 -28
- {ragaai_catalyst-2.1b0.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
from importlib import resources
|
4
|
+
from dataclasses import asdict
|
5
|
+
|
6
|
+
|
7
|
+
def convert_usage_to_dict(usage):
|
8
|
+
# Initialize the token_usage dictionary with default values
|
9
|
+
token_usage = {
|
10
|
+
"input": 0,
|
11
|
+
"completion": 0,
|
12
|
+
"reasoning": 0, # Default reasoning tokens to 0 unless specified
|
13
|
+
}
|
14
|
+
|
15
|
+
if usage:
|
16
|
+
if isinstance(usage, dict):
|
17
|
+
# Access usage data as dictionary keys
|
18
|
+
token_usage["input"] = usage.get("prompt_tokens", 0)
|
19
|
+
token_usage["completion"] = usage.get("completion_tokens", 0)
|
20
|
+
# If reasoning tokens are provided, adjust accordingly
|
21
|
+
token_usage["reasoning"] = usage.get("reasoning_tokens", 0)
|
22
|
+
else:
|
23
|
+
# Handle the case where usage is not a dictionary
|
24
|
+
# This could be an object with attributes, or something else
|
25
|
+
try:
|
26
|
+
token_usage["input"] = getattr(usage, "prompt_tokens", 0)
|
27
|
+
token_usage["completion"] = getattr(usage, "completion_tokens", 0)
|
28
|
+
token_usage["reasoning"] = getattr(usage, "reasoning_tokens", 0)
|
29
|
+
except AttributeError:
|
30
|
+
# If attributes are not found, log or handle the error as needed
|
31
|
+
print(f"Warning: Unexpected usage type: {type(usage)}")
|
32
|
+
|
33
|
+
return token_usage
|
34
|
+
|
35
|
+
|
36
|
+
def calculate_cost(
|
37
|
+
token_usage,
|
38
|
+
input_cost_per_token=0.0,
|
39
|
+
output_cost_per_token=0.0,
|
40
|
+
reasoning_cost_per_token=0.0,
|
41
|
+
):
|
42
|
+
input_tokens = token_usage.get("prompt_tokens", 0)
|
43
|
+
output_tokens = token_usage.get("completion_tokens", 0)
|
44
|
+
reasoning_tokens = token_usage.get("reasoning_tokens", 0)
|
45
|
+
|
46
|
+
input_cost = input_tokens * input_cost_per_token
|
47
|
+
output_cost = output_tokens * output_cost_per_token
|
48
|
+
reasoning_cost = reasoning_tokens * reasoning_cost_per_token
|
49
|
+
|
50
|
+
total_cost = input_cost + output_cost + reasoning_cost
|
51
|
+
|
52
|
+
return {
|
53
|
+
"input": input_cost,
|
54
|
+
"completion": output_cost,
|
55
|
+
"reasoning": reasoning_cost,
|
56
|
+
"total": total_cost,
|
57
|
+
}
|
58
|
+
|
59
|
+
|
60
|
+
def load_model_costs():
|
61
|
+
try:
|
62
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
63
|
+
model_costs_path = os.path.join(current_dir, "model_costs.json")
|
64
|
+
with open(model_costs_path, "r") as file:
|
65
|
+
return json.load(file)
|
66
|
+
except FileNotFoundError:
|
67
|
+
with resources.open_text("utils", "model_costs.json") as file:
|
68
|
+
return json.load(file)
|
69
|
+
|
70
|
+
|
71
|
+
def log_event(event_data, log_file_path):
|
72
|
+
event_data = asdict(event_data)
|
73
|
+
with open(log_file_path, "a") as f:
|
74
|
+
f.write(json.dumps(event_data) + "\n")
|
@@ -0,0 +1,342 @@
|
|
1
|
+
# import os
|
2
|
+
# import hashlib
|
3
|
+
# import zipfile
|
4
|
+
# import re
|
5
|
+
# import ast
|
6
|
+
# import importlib.util
|
7
|
+
# import json
|
8
|
+
# from pathlib import Path
|
9
|
+
|
10
|
+
# class TraceDependencyTracker:
|
11
|
+
# def __init__(self, output_dir=None):
|
12
|
+
# self.tracked_files = set()
|
13
|
+
# self.python_imports = set()
|
14
|
+
# self.output_dir = output_dir or os.getcwd()
|
15
|
+
|
16
|
+
# def track_file_access(self, filepath):
|
17
|
+
# """Track a file that's been accessed."""
|
18
|
+
# if os.path.exists(filepath):
|
19
|
+
# self.tracked_files.add(os.path.abspath(filepath))
|
20
|
+
|
21
|
+
# def find_config_files(self, content, base_path):
|
22
|
+
# """Find configuration files referenced in the content."""
|
23
|
+
# patterns = [
|
24
|
+
# r'(?:open|read|load|with\s+open)\s*\([\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
|
25
|
+
# r'(?:config|cfg|conf|settings|file|path)(?:_file|_path)?\s*=\s*[\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
|
26
|
+
# r'[\'"]([^\'"]*\.txt)[\'"]',
|
27
|
+
# r'[\'"]([^\'"]*\.(?:yaml|yml))[\'"]',
|
28
|
+
# r'from\s+(\S+)\s+import',
|
29
|
+
# r'import\s+(\S+)'
|
30
|
+
# ]
|
31
|
+
|
32
|
+
# for pattern in patterns:
|
33
|
+
# matches = re.finditer(pattern, content)
|
34
|
+
# for match in matches:
|
35
|
+
# filepath = match.group(1)
|
36
|
+
# if not os.path.isabs(filepath):
|
37
|
+
# full_path = os.path.join(os.path.dirname(base_path), filepath)
|
38
|
+
# else:
|
39
|
+
# full_path = filepath
|
40
|
+
|
41
|
+
# if os.path.exists(full_path):
|
42
|
+
# self.track_file_access(full_path)
|
43
|
+
# try:
|
44
|
+
# with open(full_path, 'r', encoding='utf-8') as f:
|
45
|
+
# self.find_config_files(f.read(), full_path)
|
46
|
+
# except (UnicodeDecodeError, IOError):
|
47
|
+
# pass
|
48
|
+
|
49
|
+
# def analyze_python_imports(self, filepath):
|
50
|
+
# """Analyze Python file for imports and track imported files."""
|
51
|
+
# try:
|
52
|
+
# with open(filepath, 'r', encoding='utf-8') as file:
|
53
|
+
# tree = ast.parse(file.read(), filename=filepath)
|
54
|
+
|
55
|
+
# for node in ast.walk(tree):
|
56
|
+
# if isinstance(node, (ast.Import, ast.ImportFrom)):
|
57
|
+
# if isinstance(node, ast.ImportFrom) and node.module:
|
58
|
+
# module_name = node.module
|
59
|
+
# else:
|
60
|
+
# for name in node.names:
|
61
|
+
# module_name = name.name.split('.')[0]
|
62
|
+
|
63
|
+
# try:
|
64
|
+
# spec = importlib.util.find_spec(module_name)
|
65
|
+
# if spec and spec.origin and not spec.origin.startswith(os.path.dirname(importlib.__file__)):
|
66
|
+
# self.python_imports.add(spec.origin)
|
67
|
+
# except (ImportError, AttributeError):
|
68
|
+
# pass
|
69
|
+
# except Exception as e:
|
70
|
+
# print(f"Warning: Could not analyze imports in {filepath}: {str(e)}")
|
71
|
+
|
72
|
+
# def create_zip(self, filepaths):
|
73
|
+
# """
|
74
|
+
# Process files and create a single zip with all dependencies.
|
75
|
+
|
76
|
+
# Args:
|
77
|
+
# filepaths (list): List of file paths to process.
|
78
|
+
|
79
|
+
# Returns:
|
80
|
+
# tuple: A tuple containing the hash ID (str) and the path to the saved .zip file (str).
|
81
|
+
# """
|
82
|
+
# # Process all files and their dependencies
|
83
|
+
# for filepath in filepaths:
|
84
|
+
# abs_path = os.path.abspath(filepath)
|
85
|
+
# self.track_file_access(abs_path)
|
86
|
+
|
87
|
+
# try:
|
88
|
+
# with open(abs_path, 'r', encoding='utf-8') as file:
|
89
|
+
# content = file.read()
|
90
|
+
|
91
|
+
# self.find_config_files(content, abs_path)
|
92
|
+
|
93
|
+
# if filepath.endswith('.py'):
|
94
|
+
# self.analyze_python_imports(abs_path)
|
95
|
+
# except Exception as e:
|
96
|
+
# print(f"Warning: Could not process {filepath}: {str(e)}")
|
97
|
+
|
98
|
+
# # Add Python imports to tracked files
|
99
|
+
# self.tracked_files.update(self.python_imports)
|
100
|
+
|
101
|
+
# # Generate hash from all files
|
102
|
+
# hash_contents = []
|
103
|
+
# for filepath in sorted(self.tracked_files):
|
104
|
+
# # Skip any file paths that contain 'env'
|
105
|
+
# if 'env' in filepath:
|
106
|
+
# continue # Skip env folder
|
107
|
+
# try:
|
108
|
+
# with open(filepath, 'rb') as file:
|
109
|
+
# content = file.read()
|
110
|
+
# hash_contents.append(content)
|
111
|
+
# except Exception as e:
|
112
|
+
# print(f"Warning: Could not read {filepath} for hash calculation: {str(e)}")
|
113
|
+
|
114
|
+
# combined_content = b''.join(hash_contents)
|
115
|
+
# hash_id = hashlib.sha256(combined_content).hexdigest()
|
116
|
+
|
117
|
+
# # Create zip file
|
118
|
+
# zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
|
119
|
+
|
120
|
+
# # Determine base path excluding 'env' folders
|
121
|
+
# base_path = os.path.commonpath([os.path.abspath(p) for p in self.tracked_files if 'env' not in p])
|
122
|
+
|
123
|
+
# with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
124
|
+
# for filepath in sorted(self.tracked_files):
|
125
|
+
# # Skip any file paths that contain 'env'
|
126
|
+
# if 'env' in filepath:
|
127
|
+
# continue # Skip env folder
|
128
|
+
# try:
|
129
|
+
# relative_path = os.path.relpath(filepath, base_path)
|
130
|
+
# zipf.write(filepath, relative_path)
|
131
|
+
# print(f"Added to zip: {relative_path}")
|
132
|
+
# except Exception as e:
|
133
|
+
# print(f"Warning: Could not add {filepath} to zip: {str(e)}")
|
134
|
+
|
135
|
+
# return hash_id, zip_filename
|
136
|
+
|
137
|
+
# def zip_list_of_unique_files(filepaths):
|
138
|
+
# """
|
139
|
+
# Enhanced version of the original function that tracks all dependencies.
|
140
|
+
|
141
|
+
# Args:
|
142
|
+
# filepaths (list): List of file paths to process.
|
143
|
+
|
144
|
+
# Returns:
|
145
|
+
# tuple: A tuple containing the hash ID (str) and the path to the saved .zip file (str).
|
146
|
+
# """
|
147
|
+
# tracker = TraceDependencyTracker()
|
148
|
+
# return tracker.create_zip(filepaths)
|
149
|
+
|
150
|
+
# if __name__ == "__main__":
|
151
|
+
# filepaths = ["script1.py", "script2.py"]
|
152
|
+
# hash_id, zip_path = zip_list_of_unique_files(filepaths)
|
153
|
+
# print(f"Created zip file: {zip_path}")
|
154
|
+
# print(f"Hash ID: {hash_id}")
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
import os
|
160
|
+
import hashlib
|
161
|
+
import zipfile
|
162
|
+
import re
|
163
|
+
import ast
|
164
|
+
import importlib.util
|
165
|
+
import json
|
166
|
+
import astor
|
167
|
+
from pathlib import Path
|
168
|
+
|
169
|
+
# Define the PackageUsageRemover class
|
170
|
+
class PackageUsageRemover(ast.NodeTransformer):
|
171
|
+
def __init__(self, package_name):
|
172
|
+
self.package_name = package_name
|
173
|
+
self.imported_names = set()
|
174
|
+
|
175
|
+
def visit_Import(self, node):
|
176
|
+
filtered_names = []
|
177
|
+
for name in node.names:
|
178
|
+
if not name.name.startswith(self.package_name):
|
179
|
+
filtered_names.append(name)
|
180
|
+
else:
|
181
|
+
self.imported_names.add(name.asname or name.name)
|
182
|
+
|
183
|
+
if not filtered_names:
|
184
|
+
return None
|
185
|
+
node.names = filtered_names
|
186
|
+
return node
|
187
|
+
|
188
|
+
def visit_ImportFrom(self, node):
|
189
|
+
if node.module and node.module.startswith(self.package_name):
|
190
|
+
self.imported_names.update(n.asname or n.name for n in node.names)
|
191
|
+
return None
|
192
|
+
return node
|
193
|
+
|
194
|
+
def visit_Assign(self, node):
|
195
|
+
if self._uses_package(node.value):
|
196
|
+
return None
|
197
|
+
return node
|
198
|
+
|
199
|
+
def visit_Call(self, node):
|
200
|
+
if isinstance(node.func, ast.Name) and node.func.id in self.imported_names:
|
201
|
+
return None
|
202
|
+
if isinstance(node.func, ast.Attribute):
|
203
|
+
if isinstance(node.func.value, ast.Name) and node.func.value.id in self.imported_names:
|
204
|
+
return None
|
205
|
+
return node
|
206
|
+
|
207
|
+
def _uses_package(self, node):
|
208
|
+
if isinstance(node, ast.Name) and node.id in self.imported_names:
|
209
|
+
return True
|
210
|
+
if isinstance(node, ast.Call):
|
211
|
+
return self._uses_package(node.func)
|
212
|
+
if isinstance(node, ast.Attribute):
|
213
|
+
return self._uses_package(node.value)
|
214
|
+
return False
|
215
|
+
|
216
|
+
# Define the function to remove package code from a source code string
|
217
|
+
def remove_package_code(source_code: str, package_name: str) -> str:
|
218
|
+
try:
|
219
|
+
tree = ast.parse(source_code)
|
220
|
+
transformer = PackageUsageRemover(package_name)
|
221
|
+
modified_tree = transformer.visit(tree)
|
222
|
+
modified_code = astor.to_source(modified_tree)
|
223
|
+
return modified_code
|
224
|
+
except Exception as e:
|
225
|
+
raise Exception(f"Error processing source code: {str(e)}")
|
226
|
+
|
227
|
+
# TraceDependencyTracker class
|
228
|
+
class TraceDependencyTracker:
|
229
|
+
def __init__(self, output_dir=None):
|
230
|
+
self.tracked_files = set()
|
231
|
+
self.python_imports = set()
|
232
|
+
self.output_dir = output_dir or os.getcwd()
|
233
|
+
|
234
|
+
def track_file_access(self, filepath):
|
235
|
+
if os.path.exists(filepath):
|
236
|
+
self.tracked_files.add(os.path.abspath(filepath))
|
237
|
+
|
238
|
+
def find_config_files(self, content, base_path):
|
239
|
+
patterns = [
|
240
|
+
r'(?:open|read|load|with\s+open)\s*\([\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
|
241
|
+
r'(?:config|cfg|conf|settings|file|path)(?:_file|_path)?\s*=\s*[\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
|
242
|
+
r'[\'"]([^\'"]*\.txt)[\'"]',
|
243
|
+
r'[\'"]([^\'"]*\.(?:yaml|yml))[\'"]',
|
244
|
+
r'from\s+(\S+)\s+import',
|
245
|
+
r'import\s+(\S+)'
|
246
|
+
]
|
247
|
+
for pattern in patterns:
|
248
|
+
matches = re.finditer(pattern, content)
|
249
|
+
for match in matches:
|
250
|
+
filepath = match.group(1)
|
251
|
+
if not os.path.isabs(filepath):
|
252
|
+
full_path = os.path.join(os.path.dirname(base_path), filepath)
|
253
|
+
else:
|
254
|
+
full_path = filepath
|
255
|
+
if os.path.exists(full_path):
|
256
|
+
self.track_file_access(full_path)
|
257
|
+
try:
|
258
|
+
with open(full_path, 'r', encoding='utf-8') as f:
|
259
|
+
self.find_config_files(f.read(), full_path)
|
260
|
+
except (UnicodeDecodeError, IOError):
|
261
|
+
pass
|
262
|
+
|
263
|
+
def analyze_python_imports(self, filepath):
|
264
|
+
try:
|
265
|
+
with open(filepath, 'r', encoding='utf-8') as file:
|
266
|
+
tree = ast.parse(file.read(), filename=filepath)
|
267
|
+
for node in ast.walk(tree):
|
268
|
+
if isinstance(node, (ast.Import, ast.ImportFrom)):
|
269
|
+
if isinstance(node, ast.ImportFrom) and node.module:
|
270
|
+
module_name = node.module
|
271
|
+
else:
|
272
|
+
for name in node.names:
|
273
|
+
module_name = name.name.split('.')[0]
|
274
|
+
try:
|
275
|
+
spec = importlib.util.find_spec(module_name)
|
276
|
+
if spec and spec.origin and not spec.origin.startswith(os.path.dirname(importlib.__file__)):
|
277
|
+
self.python_imports.add(spec.origin)
|
278
|
+
except (ImportError, AttributeError):
|
279
|
+
pass
|
280
|
+
except Exception as e:
|
281
|
+
print(f"Warning: Could not analyze imports in {filepath}: {str(e)}")
|
282
|
+
|
283
|
+
def create_zip(self, filepaths):
|
284
|
+
for filepath in filepaths:
|
285
|
+
abs_path = os.path.abspath(filepath)
|
286
|
+
self.track_file_access(abs_path)
|
287
|
+
try:
|
288
|
+
with open(abs_path, 'r', encoding='utf-8') as file:
|
289
|
+
content = file.read()
|
290
|
+
self.find_config_files(content, abs_path)
|
291
|
+
if filepath.endswith('.py'):
|
292
|
+
self.analyze_python_imports(abs_path)
|
293
|
+
except Exception as e:
|
294
|
+
print(f"Warning: Could not process {filepath}: {str(e)}")
|
295
|
+
|
296
|
+
self.tracked_files.update(self.python_imports)
|
297
|
+
hash_contents = []
|
298
|
+
for filepath in sorted(self.tracked_files):
|
299
|
+
if 'env' in filepath:
|
300
|
+
continue
|
301
|
+
try:
|
302
|
+
with open(filepath, 'rb') as file:
|
303
|
+
content = file.read()
|
304
|
+
if filepath.endswith('.py'):
|
305
|
+
# Temporarily remove raga_catalyst code for hash calculation
|
306
|
+
content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
|
307
|
+
hash_contents.append(content)
|
308
|
+
except Exception as e:
|
309
|
+
print(f"Warning: Could not read {filepath} for hash calculation: {str(e)}")
|
310
|
+
|
311
|
+
combined_content = b''.join(hash_contents)
|
312
|
+
hash_id = hashlib.sha256(combined_content).hexdigest()
|
313
|
+
|
314
|
+
zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
|
315
|
+
common_path = [os.path.abspath(p) for p in self.tracked_files if 'env' not in p]
|
316
|
+
|
317
|
+
if common_path!=[]:
|
318
|
+
base_path = os.path.commonpath(common_path)
|
319
|
+
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
320
|
+
for filepath in sorted(self.tracked_files):
|
321
|
+
if 'env' in filepath:
|
322
|
+
continue
|
323
|
+
try:
|
324
|
+
relative_path = os.path.relpath(filepath, base_path)
|
325
|
+
zipf.write(filepath, relative_path)
|
326
|
+
print(f"Added to zip: {relative_path}")
|
327
|
+
except Exception as e:
|
328
|
+
print(f"Warning: Could not add {filepath} to zip: {str(e)}")
|
329
|
+
|
330
|
+
return hash_id, zip_filename
|
331
|
+
|
332
|
+
# Main function for creating a zip of unique files
|
333
|
+
def zip_list_of_unique_files(filepaths):
|
334
|
+
tracker = TraceDependencyTracker()
|
335
|
+
return tracker.create_zip(filepaths)
|
336
|
+
|
337
|
+
# Example usage
|
338
|
+
if __name__ == "__main__":
|
339
|
+
filepaths = ["script1.py", "script2.py"]
|
340
|
+
hash_id, zip_path = zip_list_of_unique_files(filepaths)
|
341
|
+
print(f"Created zip file: {zip_path}")
|
342
|
+
print(f"Hash ID: {hash_id}")
|
@@ -7,7 +7,6 @@ from tqdm import tqdm
|
|
7
7
|
import requests
|
8
8
|
from ...ragaai_catalyst import RagaAICatalyst
|
9
9
|
import shutil
|
10
|
-
import pdb
|
11
10
|
|
12
11
|
logger = logging.getLogger(__name__)
|
13
12
|
|
@@ -196,7 +195,6 @@ class RagaExporter:
|
|
196
195
|
return status_code
|
197
196
|
|
198
197
|
async def get_presigned_url(self, session, num_files):
|
199
|
-
# pdb.set_trace()
|
200
198
|
"""
|
201
199
|
Asynchronously retrieves a presigned URL from the RagaExporter API.
|
202
200
|
|
@@ -213,7 +211,6 @@ class RagaExporter:
|
|
213
211
|
"""
|
214
212
|
|
215
213
|
async def make_request():
|
216
|
-
# pdb.set_trace()
|
217
214
|
|
218
215
|
json_data = {
|
219
216
|
"datasetName": self.dataset_name,
|
@@ -296,8 +293,7 @@ class RagaExporter:
|
|
296
293
|
return response.status
|
297
294
|
|
298
295
|
async def upload_file(self, session, url, file_path):
|
299
|
-
|
300
|
-
# print('url', url)
|
296
|
+
|
301
297
|
"""
|
302
298
|
Asynchronously uploads a file using the given session, url, and file path.
|
303
299
|
Supports both regular and Azure blob storage URLs.
|
@@ -345,8 +341,6 @@ class RagaExporter:
|
|
345
341
|
return response.status
|
346
342
|
|
347
343
|
async def check_and_upload_files(self, session, file_paths):
|
348
|
-
# print(file_paths)
|
349
|
-
# pdb.set_trace()
|
350
344
|
"""
|
351
345
|
Checks if there are files to upload, gets presigned URLs, uploads files, and streams them if successful.
|
352
346
|
|
@@ -19,11 +19,14 @@ from .instrumentators import (
|
|
19
19
|
from .utils import get_unique_key
|
20
20
|
# from .llamaindex_callback import LlamaIndexTracer
|
21
21
|
from ..ragaai_catalyst import RagaAICatalyst
|
22
|
+
from .agentic_tracing.agentic_tracing import AgenticTracing
|
23
|
+
from .agentic_tracing.file_name_tracker import TrackName
|
24
|
+
from .agentic_tracing.llm_tracer import LLMTracerMixin
|
22
25
|
|
23
26
|
logger = logging.getLogger(__name__)
|
24
27
|
|
25
28
|
|
26
|
-
class Tracer:
|
29
|
+
class Tracer(AgenticTracing):
|
27
30
|
NUM_PROJECTS = 100
|
28
31
|
TIMEOUT = 10
|
29
32
|
def __init__(
|
@@ -41,6 +44,7 @@ class Tracer:
|
|
41
44
|
|
42
45
|
Args:
|
43
46
|
project_name (str): The name of the project.
|
47
|
+
dataset_name (str): The name of the dataset.
|
44
48
|
tracer_type (str, optional): The type of tracer. Defaults to None.
|
45
49
|
pipeline (dict, optional): The pipeline configuration. Defaults to None.
|
46
50
|
metadata (dict, optional): The metadata. Defaults to None.
|
@@ -50,16 +54,28 @@ class Tracer:
|
|
50
54
|
Returns:
|
51
55
|
None
|
52
56
|
"""
|
57
|
+
# Set auto_instrument_llm to True to enable automatic LLM tracing
|
58
|
+
user_detail = {
|
59
|
+
"project_name": project_name,
|
60
|
+
"project_id": None, # Will be set after project validation
|
61
|
+
"dataset_name": dataset_name,
|
62
|
+
"trace_user_detail": {"metadata": metadata} if metadata else {}
|
63
|
+
}
|
64
|
+
super().__init__(user_detail=user_detail, auto_instrument_llm=True)
|
65
|
+
self.is_active = True
|
53
66
|
self.project_name = project_name
|
54
67
|
self.dataset_name = dataset_name
|
55
68
|
self.tracer_type = tracer_type
|
56
69
|
self.metadata = self._improve_metadata(metadata, tracer_type)
|
70
|
+
# self.metadata["total_cost"] = 0.0
|
71
|
+
# self.metadata["total_tokens"] = 0
|
57
72
|
self.pipeline = pipeline
|
58
73
|
self.description = description
|
59
74
|
self.upload_timeout = upload_timeout
|
60
75
|
self.base_url = f"{RagaAICatalyst.BASE_URL}"
|
61
76
|
self.timeout = 10
|
62
77
|
self.num_projects = 100
|
78
|
+
self.start_time = datetime.datetime.now(datetime.timezone.utc)
|
63
79
|
|
64
80
|
try:
|
65
81
|
response = requests.get(
|
@@ -81,6 +97,9 @@ class Tracer:
|
|
81
97
|
self.project_id = [
|
82
98
|
project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
|
83
99
|
][0]
|
100
|
+
# super().__init__(user_detail=self._pass_user_data())
|
101
|
+
# self.file_tracker = TrackName()
|
102
|
+
self._pass_user_data()
|
84
103
|
|
85
104
|
except requests.exceptions.RequestException as e:
|
86
105
|
logger.error(f"Failed to retrieve projects list: {e}")
|
@@ -98,7 +117,9 @@ class Tracer:
|
|
98
117
|
from .llamaindex_callback import LlamaIndexTracer
|
99
118
|
|
100
119
|
else:
|
101
|
-
|
120
|
+
self._upload_task = None
|
121
|
+
# raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
|
122
|
+
|
102
123
|
|
103
124
|
def _improve_metadata(self, metadata, tracer_type):
|
104
125
|
if metadata is None:
|
@@ -157,7 +178,9 @@ class Tracer:
|
|
157
178
|
elif self.tracer_type == "llamaindex":
|
158
179
|
from .llamaindex_callback import LlamaIndexTracer
|
159
180
|
return LlamaIndexTracer(self._pass_user_data()).start()
|
160
|
-
|
181
|
+
else:
|
182
|
+
super().start()
|
183
|
+
return self
|
161
184
|
|
162
185
|
def stop(self):
|
163
186
|
"""Stop the tracer and initiate trace upload."""
|
@@ -172,7 +195,9 @@ class Tracer:
|
|
172
195
|
return "Trace upload initiated. Use get_upload_status() to check the status."
|
173
196
|
elif self.tracer_type == "llamaindex":
|
174
197
|
from .llamaindex_callback import LlamaIndexTracer
|
175
|
-
return LlamaIndexTracer().stop()
|
198
|
+
return LlamaIndexTracer(self._pass_user_data()).stop()
|
199
|
+
else:
|
200
|
+
super().stop()
|
176
201
|
|
177
202
|
def get_upload_status(self):
|
178
203
|
"""Check the status of the trace upload."""
|
@@ -262,6 +287,7 @@ class Tracer:
|
|
262
287
|
# Reset instrumentation flag
|
263
288
|
self.is_instrumented = False
|
264
289
|
# Note: We're not resetting all attributes here to allow for upload status checking
|
290
|
+
|
265
291
|
def _pass_user_data(self):
|
266
292
|
return {"project_name":self.project_name,
|
267
293
|
"project_id": self.project_id,
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import requests
|
2
|
+
import json
|
3
|
+
import os
|
4
|
+
from datetime import datetime
|
5
|
+
|
6
|
+
|
7
|
+
class UploadTraces:
|
8
|
+
def __init__(self,
|
9
|
+
json_file_path,
|
10
|
+
project_name,
|
11
|
+
project_id,
|
12
|
+
dataset_name,
|
13
|
+
user_detail,
|
14
|
+
base_url):
|
15
|
+
self.json_file_path = json_file_path
|
16
|
+
self.project_name = project_name
|
17
|
+
self.project_id = project_id
|
18
|
+
self.dataset_name = dataset_name
|
19
|
+
self.user_detail = user_detail
|
20
|
+
self.base_url = base_url
|
21
|
+
self.timeout = 10
|
22
|
+
|
23
|
+
def _create_dataset_schema_with_trace(self):
|
24
|
+
SCHEMA_MAPPING_NEW = {
|
25
|
+
"trace_id": {"columnType": "traceId"},
|
26
|
+
"trace_uri": {"columnType": "traceUri"},
|
27
|
+
"prompt": {"columnType": "prompt"},
|
28
|
+
"response":{"columnType": "response"},
|
29
|
+
"context": {"columnType": "context"},
|
30
|
+
"llm_model": {"columnType":"pipeline"},
|
31
|
+
"recorded_on": {"columnType": "metadata"},
|
32
|
+
"embed_model": {"columnType":"pipeline"},
|
33
|
+
"log_source": {"columnType": "metadata"},
|
34
|
+
"vector_store":{"columnType":"pipeline"},
|
35
|
+
"feedback": {"columnType":"feedBack"}
|
36
|
+
}
|
37
|
+
def make_request():
|
38
|
+
headers = {
|
39
|
+
"Content-Type": "application/json",
|
40
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
41
|
+
"X-Project-Name": self.project_name,
|
42
|
+
}
|
43
|
+
payload = json.dumps({
|
44
|
+
"datasetName": self.dataset_name,
|
45
|
+
"schemaMapping": SCHEMA_MAPPING_NEW,
|
46
|
+
"traceFolderUrl": None,
|
47
|
+
})
|
48
|
+
response = requests.request("POST",
|
49
|
+
f"{self.base_url}/v1/llm/dataset/logs",
|
50
|
+
headers=headers,
|
51
|
+
data=payload,
|
52
|
+
timeout=self.timeout
|
53
|
+
)
|
54
|
+
|
55
|
+
return response
|
56
|
+
|
57
|
+
response = make_request()
|
58
|
+
|
59
|
+
if response.status_code == 401:
|
60
|
+
# get_token() # Fetch a new token and set it in the environment
|
61
|
+
response = make_request() # Retry the request
|
62
|
+
if response.status_code != 200:
|
63
|
+
return response.status_code
|
64
|
+
return response.status_code
|
65
|
+
|
66
|
+
def _get_presigned_url(self):
|
67
|
+
payload = json.dumps({
|
68
|
+
"datasetName": self.dataset_name,
|
69
|
+
"numFiles": 1,
|
70
|
+
})
|
71
|
+
headers = {
|
72
|
+
"Content-Type": "application/json",
|
73
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
74
|
+
"X-Project-Name": self.project_name,
|
75
|
+
}
|
76
|
+
|
77
|
+
response = requests.request("GET",
|
78
|
+
f"{self.base_url}/v1/llm/presigned-url",
|
79
|
+
headers=headers,
|
80
|
+
data=payload,
|
81
|
+
timeout=self.timeout)
|
82
|
+
if response.status_code == 200:
|
83
|
+
presignedUrls = response.json()["data"]["presignedUrls"][0]
|
84
|
+
return presignedUrls
|
85
|
+
|
86
|
+
def _put_presigned_url(self, presignedUrl, filename):
|
87
|
+
headers = {
|
88
|
+
"Content-Type": "application/json",
|
89
|
+
}
|
90
|
+
|
91
|
+
if "blob.core.windows.net" in presignedUrl: # Azure
|
92
|
+
headers["x-ms-blob-type"] = "BlockBlob"
|
93
|
+
print(f"Uploading traces...")
|
94
|
+
with open(filename) as f:
|
95
|
+
payload = f.read().replace("\n", "").replace("\r", "").encode()
|
96
|
+
|
97
|
+
|
98
|
+
response = requests.request("PUT",
|
99
|
+
presignedUrl,
|
100
|
+
headers=headers,
|
101
|
+
data=payload,
|
102
|
+
timeout=self.timeout)
|
103
|
+
if response.status_code != 200 or response.status_code != 201:
|
104
|
+
return response, response.status_code
|
105
|
+
|
106
|
+
def _insert_traces(self, presignedUrl):
|
107
|
+
headers = {
|
108
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
109
|
+
"Content-Type": "application/json",
|
110
|
+
"X-Project-Name": self.project_name,
|
111
|
+
}
|
112
|
+
payload = json.dumps({
|
113
|
+
"datasetName": self.dataset_name,
|
114
|
+
"presignedUrl": presignedUrl,
|
115
|
+
})
|
116
|
+
response = requests.request("POST",
|
117
|
+
f"{self.base_url}/v1/llm/insert/trace",
|
118
|
+
headers=headers,
|
119
|
+
data=payload,
|
120
|
+
timeout=self.timeout)
|
121
|
+
|
122
|
+
def upload_traces(self):
|
123
|
+
self._create_dataset_schema_with_trace()
|
124
|
+
presignedUrl = self._get_presigned_url()
|
125
|
+
self._put_presigned_url(presignedUrl, self.json_file_path)
|
126
|
+
self._insert_traces(presignedUrl)
|
127
|
+
print("Traces uplaoded")
|