ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/dataset.py +0 -3
- ragaai_catalyst/evaluation.py +1 -2
- ragaai_catalyst/tracers/__init__.py +1 -1
- ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +217 -106
- ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +27 -41
- ragaai_catalyst/tracers/agentic_tracing/base.py +127 -21
- ragaai_catalyst/tracers/agentic_tracing/data_structure.py +88 -79
- ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
- ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
- ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
- ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +258 -356
- ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
- ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
- ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
- ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
- ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
- ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +342 -0
- ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
- ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
- ragaai_catalyst/tracers/tracer.py +6 -2
- ragaai_catalyst/tracers/upload_traces.py +46 -57
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/METADATA +6 -2
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/RECORD +28 -22
- ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/WHEEL +0 -0
- {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1b1.dist-info}/top_level.txt +0 -0
@@ -3,38 +3,57 @@ import uuid
|
|
3
3
|
from datetime import datetime
|
4
4
|
import psutil
|
5
5
|
from typing import Optional, Any, Dict, List
|
6
|
-
from .unique_decorator import
|
6
|
+
from .unique_decorator import generate_unique_hash_simple
|
7
7
|
import contextvars
|
8
8
|
import asyncio
|
9
|
+
from .file_name_tracker import TrackName
|
10
|
+
|
9
11
|
|
10
12
|
class ToolTracerMixin:
|
11
13
|
def __init__(self, *args, **kwargs):
|
12
14
|
super().__init__(*args, **kwargs)
|
15
|
+
self.file_tracker = TrackName()
|
13
16
|
self.current_tool_name = contextvars.ContextVar("tool_name", default=None)
|
14
17
|
self.current_tool_id = contextvars.ContextVar("tool_id", default=None)
|
15
18
|
self.component_network_calls = {}
|
16
|
-
self.
|
17
|
-
self.
|
19
|
+
self.component_user_interaction = {}
|
20
|
+
self.gt = None
|
18
21
|
|
19
22
|
|
20
23
|
def trace_tool(self, name: str, tool_type: str = "generic", version: str = "1.0.0"):
|
21
24
|
def decorator(func):
|
25
|
+
# Add metadata attribute to the function
|
26
|
+
metadata = {
|
27
|
+
"name": name,
|
28
|
+
"tool_type": tool_type,
|
29
|
+
"version": version,
|
30
|
+
"is_active": True
|
31
|
+
}
|
32
|
+
|
22
33
|
# Check if the function is async
|
23
34
|
is_async = asyncio.iscoroutinefunction(func)
|
24
35
|
|
36
|
+
@self.file_tracker.trace_decorator
|
25
37
|
@functools.wraps(func)
|
26
38
|
async def async_wrapper(*args, **kwargs):
|
39
|
+
async_wrapper.metadata = metadata
|
40
|
+
self.gt = kwargs.get('gt', None) if kwargs else None
|
27
41
|
return await self._trace_tool_execution(
|
28
42
|
func, name, tool_type, version, *args, **kwargs
|
29
43
|
)
|
30
44
|
|
45
|
+
@self.file_tracker.trace_decorator
|
31
46
|
@functools.wraps(func)
|
32
47
|
def sync_wrapper(*args, **kwargs):
|
48
|
+
sync_wrapper.metadata = metadata
|
49
|
+
self.gt = kwargs.get('gt', None) if kwargs else None
|
33
50
|
return self._trace_sync_tool_execution(
|
34
51
|
func, name, tool_type, version, *args, **kwargs
|
35
52
|
)
|
36
53
|
|
37
|
-
|
54
|
+
wrapper = async_wrapper if is_async else sync_wrapper
|
55
|
+
wrapper.metadata = metadata
|
56
|
+
return wrapper
|
38
57
|
|
39
58
|
return decorator
|
40
59
|
|
@@ -46,7 +65,7 @@ class ToolTracerMixin:
|
|
46
65
|
start_time = datetime.now().astimezone()
|
47
66
|
start_memory = psutil.Process().memory_info().rss
|
48
67
|
component_id = str(uuid.uuid4())
|
49
|
-
hash_id =
|
68
|
+
hash_id = generate_unique_hash_simple(func)
|
50
69
|
|
51
70
|
# Start tracking network calls for this component
|
52
71
|
self.start_component(component_id)
|
@@ -118,7 +137,7 @@ class ToolTracerMixin:
|
|
118
137
|
start_time = datetime.now().astimezone()
|
119
138
|
start_memory = psutil.Process().memory_info().rss
|
120
139
|
component_id = str(uuid.uuid4())
|
121
|
-
hash_id =
|
140
|
+
hash_id = generate_unique_hash_simple(func)
|
122
141
|
|
123
142
|
try:
|
124
143
|
# Execute the tool
|
@@ -142,7 +161,6 @@ class ToolTracerMixin:
|
|
142
161
|
input_data=self._sanitize_input(args, kwargs),
|
143
162
|
output_data=self._sanitize_output(result)
|
144
163
|
)
|
145
|
-
|
146
164
|
self.add_component(tool_component)
|
147
165
|
return result
|
148
166
|
|
@@ -169,11 +187,12 @@ class ToolTracerMixin:
|
|
169
187
|
output_data=None,
|
170
188
|
error=error_component
|
171
189
|
)
|
172
|
-
|
173
190
|
self.add_component(tool_component)
|
174
191
|
raise
|
175
192
|
|
176
193
|
def create_tool_component(self, **kwargs):
|
194
|
+
|
195
|
+
|
177
196
|
"""Create a tool component according to the data structure"""
|
178
197
|
start_time = kwargs["start_time"]
|
179
198
|
component = {
|
@@ -197,19 +216,12 @@ class ToolTracerMixin:
|
|
197
216
|
"memory_used": kwargs["memory_used"]
|
198
217
|
},
|
199
218
|
"network_calls": self.component_network_calls.get(kwargs["component_id"], []),
|
200
|
-
"interactions": [
|
201
|
-
"id": f"int_{uuid.uuid4()}",
|
202
|
-
"interaction_type": "input",
|
203
|
-
"timestamp": start_time.isoformat(),
|
204
|
-
"content": kwargs["input_data"]
|
205
|
-
}, {
|
206
|
-
"id": f"int_{uuid.uuid4()}",
|
207
|
-
"interaction_type": "output",
|
208
|
-
"timestamp": kwargs["end_time"].isoformat(),
|
209
|
-
"content": kwargs["output_data"]
|
210
|
-
}]
|
219
|
+
"interactions": self.component_user_interaction.get(kwargs["component_id"], [])
|
211
220
|
}
|
212
221
|
|
222
|
+
if self.gt:
|
223
|
+
component["data"]["gt"] = self.gt
|
224
|
+
|
213
225
|
return component
|
214
226
|
|
215
227
|
def start_component(self, component_id):
|
@@ -4,7 +4,7 @@ import functools
|
|
4
4
|
import re
|
5
5
|
import tokenize
|
6
6
|
import io
|
7
|
-
import
|
7
|
+
import types
|
8
8
|
|
9
9
|
def normalize_source_code(source):
|
10
10
|
"""
|
@@ -19,123 +19,33 @@ def normalize_source_code(source):
|
|
19
19
|
Returns:
|
20
20
|
str: Normalized source code
|
21
21
|
"""
|
22
|
-
# Use tokenize to carefully parse the source code
|
23
22
|
normalized_tokens = []
|
24
23
|
|
25
24
|
try:
|
26
|
-
# Convert source to a file-like object for tokenize
|
27
25
|
token_source = io.StringIO(source).readline
|
28
26
|
|
29
27
|
for token_type, token_string, _, _, _ in tokenize.generate_tokens(token_source):
|
30
|
-
# Preserve strings (including docstrings)
|
31
28
|
if token_type == tokenize.STRING:
|
32
29
|
normalized_tokens.append(token_string.strip())
|
33
|
-
|
34
|
-
# Preserve code tokens
|
35
|
-
elif token_type in [
|
36
|
-
tokenize.NAME,
|
37
|
-
tokenize.NUMBER,
|
38
|
-
tokenize.OP
|
39
|
-
]:
|
30
|
+
elif token_type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
40
31
|
normalized_tokens.append(token_string.strip())
|
41
32
|
|
42
33
|
except tokenize.TokenError:
|
43
|
-
# Fallback to a simpler method if tokenization fails
|
44
34
|
normalized_tokens = re.findall(r'\w+|[^\w\s]', source)
|
45
35
|
|
46
|
-
|
47
|
-
normalized_source = ''.join(normalized_tokens)
|
48
|
-
|
49
|
-
return normalized_source
|
50
|
-
|
51
|
-
# def generate_unique_hash(obj, *call_args, **call_kwargs):
|
52
|
-
# print('#'*100,'hash id: ', '#'*100)
|
53
|
-
# print(obj)
|
54
|
-
# print(*call_args)
|
55
|
-
# # print(**call_kwargs)
|
56
|
-
# """
|
57
|
-
# Generate a unique, deterministic hash for a given object.
|
58
|
-
|
59
|
-
# Args:
|
60
|
-
# obj: The object (function or class) to generate hash for
|
61
|
-
# additional_salt: Optional additional salt to ensure uniqueness
|
62
|
-
|
63
|
-
# Returns:
|
64
|
-
# str: A unique hash_id meeting the specified requirements
|
65
|
-
# """
|
66
|
-
# # Handle different object types
|
67
|
-
|
68
|
-
# if inspect.isclass(obj):
|
69
|
-
# # For classes, use the class definition
|
70
|
-
# try:
|
71
|
-
# source = inspect.getsource(obj)
|
72
|
-
# except (IOError, TypeError):
|
73
|
-
# source = repr(obj)
|
74
|
-
|
75
|
-
# # Use class name in hash generation
|
76
|
-
# hash_input = f"{obj.__name__}{normalize_source_code(source)}"
|
77
|
-
|
78
|
-
# else:
|
79
|
-
# # For functions and methods
|
80
|
-
# # Get full signature information
|
81
|
-
# signature = inspect.signature(obj)
|
82
|
-
|
83
|
-
# # Capture parameter names and their default values
|
84
|
-
# params_info = []
|
85
|
-
# for name, param in signature.parameters.items():
|
86
|
-
# param_str = f"{name}:{param.kind}"
|
87
|
-
# if param.default != inspect.Parameter.empty:
|
88
|
-
# param_str += f":default={param.default}"
|
89
|
-
# params_info.append(param_str)
|
90
|
-
|
91
|
-
# # Get source code
|
92
|
-
# try:
|
93
|
-
# source = inspect.getsource(obj)
|
94
|
-
# except (IOError, TypeError):
|
95
|
-
# source = repr(obj)
|
96
|
-
|
97
|
-
# # Combine method name, parameters, and normalized source
|
98
|
-
# hash_input = (
|
99
|
-
# f"{obj.__name__}" # Method name
|
100
|
-
# f"{''.join(params_info)}" # Parameter details
|
101
|
-
# f"{normalize_source_code(source)}" # Normalized source code
|
102
|
-
# )
|
103
|
-
|
104
|
-
# # Add optional salt
|
105
|
-
# args_repr = str(call_args) + str(sorted(call_kwargs.items()))
|
106
|
-
# hash_input += args_repr
|
107
|
-
# # Use SHA-256 for generating the hash
|
108
|
-
# hash_object = hashlib.sha256(hash_input.encode('utf-8'))
|
109
|
-
|
110
|
-
# # Generate hash and truncate to 32 characters
|
111
|
-
# hash_id = hash_object.hexdigest()[:32]
|
112
|
-
|
113
|
-
# # Ensure the hash starts with a letter
|
114
|
-
# if not hash_id[0].isalpha():
|
115
|
-
# hash_id = 'a' + hash_id[1:]
|
116
|
-
|
117
|
-
# print(hash_id)
|
118
|
-
# return hash_id
|
119
|
-
|
36
|
+
return ''.join(normalized_tokens)
|
120
37
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
if inspect.ismethod(obj) or inspect.isfunction(obj):
|
38
|
+
def generate_unique_hash(func, *args, **kwargs):
|
39
|
+
"""Generate a unique hash based on the original function and its arguments"""
|
40
|
+
if inspect.ismethod(func) or inspect.isfunction(func):
|
125
41
|
# Get function name and source code
|
126
|
-
func_name =
|
42
|
+
func_name = func.__name__
|
127
43
|
try:
|
128
|
-
|
129
|
-
func_source = inspect.getsource(obj)
|
44
|
+
func_source = inspect.getsource(func)
|
130
45
|
normalized_source = normalize_source_code(func_source)
|
131
46
|
except (IOError, TypeError):
|
132
47
|
normalized_source = ""
|
133
48
|
|
134
|
-
# Get function arguments
|
135
|
-
if args and hasattr(args[0], '__class__'):
|
136
|
-
# If it's a method, skip the 'self' argument
|
137
|
-
args = args[1:]
|
138
|
-
|
139
49
|
# Normalize argument values
|
140
50
|
def normalize_arg(arg):
|
141
51
|
if isinstance(arg, (str, int, float, bool)):
|
@@ -158,27 +68,62 @@ def generate_unique_hash(obj, *args, **kwargs):
|
|
158
68
|
# Combine all components
|
159
69
|
hash_input = f"{func_name}_{normalized_source}_{args_str}_{kwargs_str}"
|
160
70
|
|
161
|
-
elif inspect.isclass(
|
162
|
-
# For classes, normalize the class definition
|
71
|
+
elif inspect.isclass(func):
|
163
72
|
try:
|
164
|
-
class_source = inspect.getsource(
|
73
|
+
class_source = inspect.getsource(func)
|
165
74
|
normalized_source = normalize_source_code(class_source)
|
166
|
-
hash_input = f"{
|
75
|
+
hash_input = f"{func.__name__}_{normalized_source}"
|
167
76
|
except (IOError, TypeError):
|
168
|
-
hash_input = f"{
|
77
|
+
hash_input = f"{func.__name__}_{str(func)}"
|
169
78
|
|
170
79
|
else:
|
171
|
-
|
172
|
-
hash_input = str(obj)
|
80
|
+
hash_input = str(func)
|
173
81
|
|
174
|
-
# Create hash
|
175
82
|
hash_obj = hashlib.md5(hash_input.encode('utf-8'))
|
176
83
|
return hash_obj.hexdigest()
|
177
84
|
|
85
|
+
def generate_unique_hash_simple(func):
|
86
|
+
"""Generate a unique hash based on the function name and normalized source code.
|
87
|
+
Works for both standalone functions and class methods (where self would be passed)."""
|
88
|
+
import hashlib
|
89
|
+
import inspect
|
90
|
+
|
91
|
+
# Handle bound methods (instance methods of classes)
|
92
|
+
if hasattr(func, '__self__'):
|
93
|
+
# Get the underlying function from the bound method
|
94
|
+
func = func.__func__
|
95
|
+
|
96
|
+
|
97
|
+
# Get function name
|
98
|
+
func_name = func.__name__
|
99
|
+
|
100
|
+
# Get and normalize source code based on type
|
101
|
+
try:
|
102
|
+
if isinstance(func, (types.FunctionType, types.MethodType)):
|
103
|
+
source = inspect.getsource(func)
|
104
|
+
# Remove whitespace and normalize line endings
|
105
|
+
normalized_source = "\n".join(line.strip() for line in source.splitlines())
|
106
|
+
elif inspect.isclass(func):
|
107
|
+
source = inspect.getsource(func)
|
108
|
+
normalized_source = "\n".join(line.strip() for line in source.splitlines())
|
109
|
+
else:
|
110
|
+
normalized_source = str(func)
|
111
|
+
except (IOError, TypeError):
|
112
|
+
normalized_source = str(func)
|
113
|
+
|
114
|
+
# Use fixed timestamp for reproducibility
|
115
|
+
timestamp = "2025-01-03T18:15:16+05:30"
|
116
|
+
|
117
|
+
# Combine components
|
118
|
+
hash_input = f"{func_name}_{normalized_source}_{timestamp}"
|
119
|
+
|
120
|
+
# Generate MD5 hash
|
121
|
+
hash_obj = hashlib.md5(hash_input.encode('utf-8'))
|
122
|
+
return hash_obj.hexdigest()
|
178
123
|
|
179
124
|
class UniqueIdentifier:
|
180
125
|
_instance = None
|
181
|
-
_hash_cache = {}
|
126
|
+
_hash_cache = {}
|
182
127
|
|
183
128
|
def __new__(cls, *args, **kwargs):
|
184
129
|
if cls._instance is None:
|
@@ -186,7 +131,6 @@ class UniqueIdentifier:
|
|
186
131
|
return cls._instance
|
187
132
|
|
188
133
|
def __init__(self, salt=None):
|
189
|
-
# Initialize only once
|
190
134
|
if not hasattr(self, 'salt'):
|
191
135
|
self.salt = salt
|
192
136
|
|
@@ -198,15 +142,16 @@ class UniqueIdentifier:
|
|
198
142
|
|
199
143
|
@functools.wraps(obj)
|
200
144
|
def wrapper(*args, **kwargs):
|
201
|
-
# Generate
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
145
|
+
# Generate hash based on the original function and its arguments
|
146
|
+
if hasattr(args[0], 'original_func'): # Check if it's a wrapped LLM call
|
147
|
+
original_func = args[0].original_func
|
148
|
+
func_args = args[1:] # Skip the original_func argument
|
149
|
+
hash_id = generate_unique_hash(original_func, *func_args, **kwargs)
|
150
|
+
else:
|
151
|
+
hash_id = generate_unique_hash(obj, *args, **kwargs)
|
207
152
|
|
208
153
|
# Store hash_id on the wrapper function
|
209
|
-
wrapper.hash_id =
|
154
|
+
wrapper.hash_id = hash_id
|
210
155
|
|
211
156
|
return obj(*args, **kwargs)
|
212
157
|
|
@@ -217,5 +162,4 @@ class UniqueIdentifier:
|
|
217
162
|
return wrapper
|
218
163
|
|
219
164
|
# Create a single instance to be used across all mixins
|
220
|
-
mydecorator = UniqueIdentifier()
|
221
|
-
|
165
|
+
mydecorator = UniqueIdentifier()
|
@@ -0,0 +1,187 @@
|
|
1
|
+
import requests
|
2
|
+
import json
|
3
|
+
import os
|
4
|
+
from datetime import datetime
|
5
|
+
|
6
|
+
|
7
|
+
class UploadAgenticTraces:
|
8
|
+
def __init__(self,
|
9
|
+
json_file_path,
|
10
|
+
project_name,
|
11
|
+
project_id,
|
12
|
+
dataset_name,
|
13
|
+
user_detail,
|
14
|
+
base_url):
|
15
|
+
self.json_file_path = json_file_path
|
16
|
+
self.project_name = project_name
|
17
|
+
self.project_id = project_id
|
18
|
+
self.dataset_name = dataset_name
|
19
|
+
self.user_detail = user_detail
|
20
|
+
self.base_url = base_url
|
21
|
+
self.timeout = 99999
|
22
|
+
|
23
|
+
def _create_dataset_schema_with_trace(self):
|
24
|
+
SCHEMA_MAPPING_NEW = {
|
25
|
+
"trace_id": {"columnType": "traceId"},
|
26
|
+
"trace_uri": {"columnType": "traceUri"},
|
27
|
+
"prompt": {"columnType": "prompt"},
|
28
|
+
"response":{"columnType": "response"},
|
29
|
+
"context": {"columnType": "context"},
|
30
|
+
"llm_model": {"columnType":"pipeline"},
|
31
|
+
"recorded_on": {"columnType": "metadata"},
|
32
|
+
"embed_model": {"columnType":"pipeline"},
|
33
|
+
"log_source": {"columnType": "metadata"},
|
34
|
+
"vector_store":{"columnType":"pipeline"},
|
35
|
+
"feedback": {"columnType":"feedBack"}
|
36
|
+
}
|
37
|
+
def make_request():
|
38
|
+
headers = {
|
39
|
+
"Content-Type": "application/json",
|
40
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
41
|
+
"X-Project-Name": self.project_name,
|
42
|
+
}
|
43
|
+
payload = json.dumps({
|
44
|
+
"datasetName": self.dataset_name,
|
45
|
+
# "schemaMapping": SCHEMA_MAPPING_NEW,
|
46
|
+
"traceFolderUrl": None,
|
47
|
+
})
|
48
|
+
response = requests.request("POST",
|
49
|
+
f"{self.base_url}/v1/llm/dataset/logs",
|
50
|
+
headers=headers,
|
51
|
+
data=payload,
|
52
|
+
timeout=self.timeout
|
53
|
+
)
|
54
|
+
|
55
|
+
return response
|
56
|
+
|
57
|
+
response = make_request()
|
58
|
+
|
59
|
+
if response.status_code == 401:
|
60
|
+
# get_token() # Fetch a new token and set it in the environment
|
61
|
+
response = make_request() # Retry the request
|
62
|
+
if response.status_code != 200:
|
63
|
+
return response.status_code
|
64
|
+
return response.status_code
|
65
|
+
|
66
|
+
|
67
|
+
def _get_presigned_url(self):
|
68
|
+
payload = json.dumps({
|
69
|
+
"datasetName": self.dataset_name,
|
70
|
+
"numFiles": 1,
|
71
|
+
})
|
72
|
+
headers = {
|
73
|
+
"Content-Type": "application/json",
|
74
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
75
|
+
"X-Project-Name": self.project_name,
|
76
|
+
}
|
77
|
+
|
78
|
+
try:
|
79
|
+
response = requests.request("GET",
|
80
|
+
f"{self.base_url}/v1/llm/presigned-url",
|
81
|
+
headers=headers,
|
82
|
+
data=payload,
|
83
|
+
timeout=self.timeout)
|
84
|
+
if response.status_code == 200:
|
85
|
+
presignedUrls = response.json()["data"]["presignedUrls"][0]
|
86
|
+
return presignedUrls
|
87
|
+
except requests.exceptions.RequestException as e:
|
88
|
+
print(f"Error while getting presigned url: {e}")
|
89
|
+
return None
|
90
|
+
|
91
|
+
def _put_presigned_url(self, presignedUrl, filename):
|
92
|
+
headers = {
|
93
|
+
"Content-Type": "application/json",
|
94
|
+
}
|
95
|
+
|
96
|
+
if "blob.core.windows.net" in presignedUrl: # Azure
|
97
|
+
headers["x-ms-blob-type"] = "BlockBlob"
|
98
|
+
print(f"Uploading agentic traces...")
|
99
|
+
try:
|
100
|
+
with open(filename) as f:
|
101
|
+
payload = f.read().replace("\n", "").replace("\r", "").encode()
|
102
|
+
except Exception as e:
|
103
|
+
print(f"Error while reading file: {e}")
|
104
|
+
return None
|
105
|
+
try:
|
106
|
+
response = requests.request("PUT",
|
107
|
+
presignedUrl,
|
108
|
+
headers=headers,
|
109
|
+
data=payload,
|
110
|
+
timeout=self.timeout)
|
111
|
+
if response.status_code != 200 or response.status_code != 201:
|
112
|
+
return response, response.status_code
|
113
|
+
except requests.exceptions.RequestException as e:
|
114
|
+
print(f"Error while uploading to presigned url: {e}")
|
115
|
+
return None
|
116
|
+
|
117
|
+
def insert_traces(self, presignedUrl):
|
118
|
+
headers = {
|
119
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
120
|
+
"Content-Type": "application/json",
|
121
|
+
"X-Project-Name": self.project_name,
|
122
|
+
}
|
123
|
+
payload = json.dumps({
|
124
|
+
"datasetName": self.dataset_name,
|
125
|
+
"presignedUrl": presignedUrl,
|
126
|
+
"datasetSpans": self._get_dataset_spans(), #Extra key for agentic traces
|
127
|
+
})
|
128
|
+
try:
|
129
|
+
response = requests.request("POST",
|
130
|
+
f"{self.base_url}/v1/llm/insert/trace",
|
131
|
+
headers=headers,
|
132
|
+
data=payload,
|
133
|
+
timeout=self.timeout)
|
134
|
+
except requests.exceptions.RequestException as e:
|
135
|
+
print(f"Error while inserting traces: {e}")
|
136
|
+
return None
|
137
|
+
|
138
|
+
def _get_dataset_spans(self):
|
139
|
+
try:
|
140
|
+
with open(self.json_file_path) as f:
|
141
|
+
data = json.load(f)
|
142
|
+
except Exception as e:
|
143
|
+
print(f"Error while reading file: {e}")
|
144
|
+
return None
|
145
|
+
try:
|
146
|
+
spans = data["data"][0]["spans"]
|
147
|
+
datasetSpans = []
|
148
|
+
for span in spans:
|
149
|
+
if span["type"] != "agent":
|
150
|
+
existing_span = next((s for s in datasetSpans if s["spanHash"] == span["hash_id"]), None)
|
151
|
+
if existing_span is None:
|
152
|
+
datasetSpans.append({
|
153
|
+
"spanId": span["id"],
|
154
|
+
"spanName": span["name"],
|
155
|
+
"spanHash": span["hash_id"],
|
156
|
+
"spanType": span["type"],
|
157
|
+
})
|
158
|
+
else:
|
159
|
+
datasetSpans.append({
|
160
|
+
"spanId": span["id"],
|
161
|
+
"spanName": span["name"],
|
162
|
+
"spanHash": span["hash_id"],
|
163
|
+
"spanType": span["type"],
|
164
|
+
})
|
165
|
+
children = span["data"]["children"]
|
166
|
+
for child in children:
|
167
|
+
existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
|
168
|
+
if existing_span is None:
|
169
|
+
datasetSpans.append({
|
170
|
+
"spanId": child["id"],
|
171
|
+
"spanName": child["name"],
|
172
|
+
"spanHash": child["hash_id"],
|
173
|
+
"spanType": child["type"],
|
174
|
+
})
|
175
|
+
return datasetSpans
|
176
|
+
except Exception as e:
|
177
|
+
print(f"Error while reading dataset spans: {e}")
|
178
|
+
return None
|
179
|
+
|
180
|
+
def upload_agentic_traces(self):
|
181
|
+
self._create_dataset_schema_with_trace()
|
182
|
+
presignedUrl = self._get_presigned_url()
|
183
|
+
if presignedUrl is None:
|
184
|
+
return
|
185
|
+
self._put_presigned_url(presignedUrl, self.json_file_path)
|
186
|
+
self.insert_traces(presignedUrl)
|
187
|
+
print("Agentic Traces uploaded")
|
@@ -0,0 +1,115 @@
|
|
1
|
+
from aiohttp import payload
|
2
|
+
import requests
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
import logging
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
def upload_code(hash_id, zip_path, project_name, dataset_name):
|
9
|
+
code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name)
|
10
|
+
|
11
|
+
if hash_id not in code_hashes_list:
|
12
|
+
presigned_url = _fetch_presigned_url(project_name, dataset_name)
|
13
|
+
_put_zip_presigned_url(project_name, presigned_url, zip_path)
|
14
|
+
|
15
|
+
response = _insert_code(dataset_name, hash_id, presigned_url, project_name)
|
16
|
+
return response
|
17
|
+
else:
|
18
|
+
return "Code already exists"
|
19
|
+
|
20
|
+
def _fetch_dataset_code_hashes(project_name, dataset_name):
|
21
|
+
payload = {}
|
22
|
+
headers = {
|
23
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
24
|
+
"X-Project-Name": project_name,
|
25
|
+
}
|
26
|
+
|
27
|
+
try:
|
28
|
+
response = requests.request("GET",
|
29
|
+
f"{os.getenv('RAGAAI_CATALYST_BASE_URL')}/v2/llm/dataset/code?datasetName={dataset_name}",
|
30
|
+
headers=headers,
|
31
|
+
data=payload,
|
32
|
+
timeout=99999)
|
33
|
+
|
34
|
+
if response.status_code == 200:
|
35
|
+
return response.json()["data"]["codeHashes"]
|
36
|
+
else:
|
37
|
+
raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
|
38
|
+
except requests.exceptions.RequestException as e:
|
39
|
+
logger.error(f"Failed to list datasets: {e}")
|
40
|
+
raise
|
41
|
+
|
42
|
+
def _fetch_presigned_url(project_name, dataset_name):
|
43
|
+
payload = json.dumps({
|
44
|
+
"datasetName": dataset_name,
|
45
|
+
"numFiles": 1,
|
46
|
+
"contentType": "application/zip"
|
47
|
+
})
|
48
|
+
|
49
|
+
headers = {
|
50
|
+
"Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
|
51
|
+
"Content-Type": "application/json",
|
52
|
+
"X-Project-Name": project_name,
|
53
|
+
}
|
54
|
+
|
55
|
+
try:
|
56
|
+
response = requests.request("GET",
|
57
|
+
f"{os.getenv('RAGAAI_CATALYST_BASE_URL')}/v1/llm/presigned-url",
|
58
|
+
headers=headers,
|
59
|
+
data=payload,
|
60
|
+
timeout=99999)
|
61
|
+
|
62
|
+
if response.status_code == 200:
|
63
|
+
return response.json()["data"]["presignedUrls"][0]
|
64
|
+
else:
|
65
|
+
raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
|
66
|
+
except requests.exceptions.RequestException as e:
|
67
|
+
logger.error(f"Failed to list datasets: {e}")
|
68
|
+
raise
|
69
|
+
|
70
|
+
def _put_zip_presigned_url(project_name, presignedUrl, filename):
|
71
|
+
headers = {
|
72
|
+
"X-Project-Name": project_name,
|
73
|
+
"Content-Type": "application/zip",
|
74
|
+
}
|
75
|
+
|
76
|
+
if "blob.core.windows.net" in presignedUrl: # Azure
|
77
|
+
headers["x-ms-blob-type"] = "BlockBlob"
|
78
|
+
print(f"Uploading code...")
|
79
|
+
with open(filename, 'rb') as f:
|
80
|
+
payload = f.read()
|
81
|
+
|
82
|
+
response = requests.request("PUT",
|
83
|
+
presignedUrl,
|
84
|
+
headers=headers,
|
85
|
+
data=payload,
|
86
|
+
timeout=99999)
|
87
|
+
if response.status_code != 200 or response.status_code != 201:
|
88
|
+
return response, response.status_code
|
89
|
+
|
90
|
+
def _insert_code(dataset_name, hash_id, presigned_url, project_name):
|
91
|
+
payload = json.dumps({
|
92
|
+
"datasetName": dataset_name,
|
93
|
+
"codeHash": hash_id,
|
94
|
+
"presignedUrl": presigned_url
|
95
|
+
})
|
96
|
+
|
97
|
+
headers = {
|
98
|
+
'X-Project-Name': project_name,
|
99
|
+
'Content-Type': 'application/json',
|
100
|
+
'Authorization': f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}'
|
101
|
+
}
|
102
|
+
|
103
|
+
try:
|
104
|
+
response = requests.request("POST",
|
105
|
+
f"{os.getenv('RAGAAI_CATALYST_BASE_URL')}/v2/llm/dataset/code",
|
106
|
+
headers=headers,
|
107
|
+
data=payload,
|
108
|
+
timeout=99999)
|
109
|
+
if response.status_code == 200:
|
110
|
+
return response.json()["message"]
|
111
|
+
else:
|
112
|
+
raise Exception(f"Failed to insert code: {response.json()['message']}")
|
113
|
+
except requests.exceptions.RequestException as e:
|
114
|
+
logger.error(f"Failed to insert code: {e}")
|
115
|
+
raise
|