ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ragaai_catalyst/dataset.py +0 -3
  2. ragaai_catalyst/evaluation.py +1 -2
  3. ragaai_catalyst/tracers/__init__.py +1 -1
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +231 -74
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +32 -42
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +132 -30
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +91 -79
  8. ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
  9. ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
  10. ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
  11. ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
  12. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +262 -356
  13. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
  14. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
  15. ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
  16. ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
  17. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
  20. ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +186 -0
  21. ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
  22. ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
  23. ragaai_catalyst/tracers/tracer.py +6 -2
  24. ragaai_catalyst/tracers/upload_traces.py +46 -57
  25. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/METADATA +8 -4
  26. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/RECORD +28 -22
  27. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/WHEEL +1 -1
  28. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
  29. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/top_level.txt +0 -0
@@ -3,38 +3,57 @@ import uuid
3
3
  from datetime import datetime
4
4
  import psutil
5
5
  from typing import Optional, Any, Dict, List
6
- from .unique_decorator import mydecorator
6
+ from .unique_decorator import generate_unique_hash_simple
7
7
  import contextvars
8
8
  import asyncio
9
+ from .file_name_tracker import TrackName
10
+
9
11
 
10
12
  class ToolTracerMixin:
11
13
  def __init__(self, *args, **kwargs):
12
14
  super().__init__(*args, **kwargs)
15
+ self.file_tracker = TrackName()
13
16
  self.current_tool_name = contextvars.ContextVar("tool_name", default=None)
14
17
  self.current_tool_id = contextvars.ContextVar("tool_id", default=None)
15
18
  self.component_network_calls = {}
16
- self._trace_sync_tool_execution = mydecorator(self._trace_sync_tool_execution)
17
- self._trace_tool_execution = mydecorator(self._trace_tool_execution)
19
+ self.component_user_interaction = {}
20
+ self.gt = None
18
21
 
19
22
 
20
23
  def trace_tool(self, name: str, tool_type: str = "generic", version: str = "1.0.0"):
21
24
  def decorator(func):
25
+ # Add metadata attribute to the function
26
+ metadata = {
27
+ "name": name,
28
+ "tool_type": tool_type,
29
+ "version": version,
30
+ "is_active": True
31
+ }
32
+
22
33
  # Check if the function is async
23
34
  is_async = asyncio.iscoroutinefunction(func)
24
35
 
36
+ @self.file_tracker.trace_decorator
25
37
  @functools.wraps(func)
26
38
  async def async_wrapper(*args, **kwargs):
39
+ async_wrapper.metadata = metadata
40
+ self.gt = kwargs.get('gt', None) if kwargs else None
27
41
  return await self._trace_tool_execution(
28
42
  func, name, tool_type, version, *args, **kwargs
29
43
  )
30
44
 
45
+ @self.file_tracker.trace_decorator
31
46
  @functools.wraps(func)
32
47
  def sync_wrapper(*args, **kwargs):
48
+ sync_wrapper.metadata = metadata
49
+ self.gt = kwargs.get('gt', None) if kwargs else None
33
50
  return self._trace_sync_tool_execution(
34
51
  func, name, tool_type, version, *args, **kwargs
35
52
  )
36
53
 
37
- return async_wrapper if is_async else sync_wrapper
54
+ wrapper = async_wrapper if is_async else sync_wrapper
55
+ wrapper.metadata = metadata
56
+ return wrapper
38
57
 
39
58
  return decorator
40
59
 
@@ -46,7 +65,7 @@ class ToolTracerMixin:
46
65
  start_time = datetime.now().astimezone()
47
66
  start_memory = psutil.Process().memory_info().rss
48
67
  component_id = str(uuid.uuid4())
49
- hash_id = self._trace_sync_tool_execution.hash_id
68
+ hash_id = generate_unique_hash_simple(func)
50
69
 
51
70
  # Start tracking network calls for this component
52
71
  self.start_component(component_id)
@@ -118,7 +137,7 @@ class ToolTracerMixin:
118
137
  start_time = datetime.now().astimezone()
119
138
  start_memory = psutil.Process().memory_info().rss
120
139
  component_id = str(uuid.uuid4())
121
- hash_id = self._trace_tool_execution.hash_id
140
+ hash_id = generate_unique_hash_simple(func)
122
141
 
123
142
  try:
124
143
  # Execute the tool
@@ -142,7 +161,6 @@ class ToolTracerMixin:
142
161
  input_data=self._sanitize_input(args, kwargs),
143
162
  output_data=self._sanitize_output(result)
144
163
  )
145
-
146
164
  self.add_component(tool_component)
147
165
  return result
148
166
 
@@ -169,11 +187,12 @@ class ToolTracerMixin:
169
187
  output_data=None,
170
188
  error=error_component
171
189
  )
172
-
173
190
  self.add_component(tool_component)
174
191
  raise
175
192
 
176
193
  def create_tool_component(self, **kwargs):
194
+
195
+
177
196
  """Create a tool component according to the data structure"""
178
197
  start_time = kwargs["start_time"]
179
198
  component = {
@@ -197,19 +216,12 @@ class ToolTracerMixin:
197
216
  "memory_used": kwargs["memory_used"]
198
217
  },
199
218
  "network_calls": self.component_network_calls.get(kwargs["component_id"], []),
200
- "interactions": [{
201
- "id": f"int_{uuid.uuid4()}",
202
- "interaction_type": "input",
203
- "timestamp": start_time.isoformat(),
204
- "content": kwargs["input_data"]
205
- }, {
206
- "id": f"int_{uuid.uuid4()}",
207
- "interaction_type": "output",
208
- "timestamp": kwargs["end_time"].isoformat(),
209
- "content": kwargs["output_data"]
210
- }]
219
+ "interactions": self.component_user_interaction.get(kwargs["component_id"], [])
211
220
  }
212
221
 
222
+ if self.gt:
223
+ component["data"]["gt"] = self.gt
224
+
213
225
  return component
214
226
 
215
227
  def start_component(self, component_id):
@@ -4,7 +4,7 @@ import functools
4
4
  import re
5
5
  import tokenize
6
6
  import io
7
- import uuid
7
+ import types
8
8
 
9
9
  def normalize_source_code(source):
10
10
  """
@@ -19,123 +19,33 @@ def normalize_source_code(source):
19
19
  Returns:
20
20
  str: Normalized source code
21
21
  """
22
- # Use tokenize to carefully parse the source code
23
22
  normalized_tokens = []
24
23
 
25
24
  try:
26
- # Convert source to a file-like object for tokenize
27
25
  token_source = io.StringIO(source).readline
28
26
 
29
27
  for token_type, token_string, _, _, _ in tokenize.generate_tokens(token_source):
30
- # Preserve strings (including docstrings)
31
28
  if token_type == tokenize.STRING:
32
29
  normalized_tokens.append(token_string.strip())
33
-
34
- # Preserve code tokens
35
- elif token_type in [
36
- tokenize.NAME,
37
- tokenize.NUMBER,
38
- tokenize.OP
39
- ]:
30
+ elif token_type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
40
31
  normalized_tokens.append(token_string.strip())
41
32
 
42
33
  except tokenize.TokenError:
43
- # Fallback to a simpler method if tokenization fails
44
34
  normalized_tokens = re.findall(r'\w+|[^\w\s]', source)
45
35
 
46
- # Remove extra spaces and join
47
- normalized_source = ''.join(normalized_tokens)
48
-
49
- return normalized_source
50
-
51
- # def generate_unique_hash(obj, *call_args, **call_kwargs):
52
- # print('#'*100,'hash id: ', '#'*100)
53
- # print(obj)
54
- # print(*call_args)
55
- # # print(**call_kwargs)
56
- # """
57
- # Generate a unique, deterministic hash for a given object.
58
-
59
- # Args:
60
- # obj: The object (function or class) to generate hash for
61
- # additional_salt: Optional additional salt to ensure uniqueness
62
-
63
- # Returns:
64
- # str: A unique hash_id meeting the specified requirements
65
- # """
66
- # # Handle different object types
67
-
68
- # if inspect.isclass(obj):
69
- # # For classes, use the class definition
70
- # try:
71
- # source = inspect.getsource(obj)
72
- # except (IOError, TypeError):
73
- # source = repr(obj)
74
-
75
- # # Use class name in hash generation
76
- # hash_input = f"{obj.__name__}{normalize_source_code(source)}"
77
-
78
- # else:
79
- # # For functions and methods
80
- # # Get full signature information
81
- # signature = inspect.signature(obj)
82
-
83
- # # Capture parameter names and their default values
84
- # params_info = []
85
- # for name, param in signature.parameters.items():
86
- # param_str = f"{name}:{param.kind}"
87
- # if param.default != inspect.Parameter.empty:
88
- # param_str += f":default={param.default}"
89
- # params_info.append(param_str)
90
-
91
- # # Get source code
92
- # try:
93
- # source = inspect.getsource(obj)
94
- # except (IOError, TypeError):
95
- # source = repr(obj)
96
-
97
- # # Combine method name, parameters, and normalized source
98
- # hash_input = (
99
- # f"{obj.__name__}" # Method name
100
- # f"{''.join(params_info)}" # Parameter details
101
- # f"{normalize_source_code(source)}" # Normalized source code
102
- # )
103
-
104
- # # Add optional salt
105
- # args_repr = str(call_args) + str(sorted(call_kwargs.items()))
106
- # hash_input += args_repr
107
- # # Use SHA-256 for generating the hash
108
- # hash_object = hashlib.sha256(hash_input.encode('utf-8'))
109
-
110
- # # Generate hash and truncate to 32 characters
111
- # hash_id = hash_object.hexdigest()[:32]
112
-
113
- # # Ensure the hash starts with a letter
114
- # if not hash_id[0].isalpha():
115
- # hash_id = 'a' + hash_id[1:]
116
-
117
- # print(hash_id)
118
- # return hash_id
119
-
36
+ return ''.join(normalized_tokens)
120
37
 
121
-
122
- def generate_unique_hash(obj, *args, **kwargs):
123
- """Generate a unique hash based on the normalized function definition and its arguments"""
124
- if inspect.ismethod(obj) or inspect.isfunction(obj):
38
+ def generate_unique_hash(func, *args, **kwargs):
39
+ """Generate a unique hash based on the original function and its arguments"""
40
+ if inspect.ismethod(func) or inspect.isfunction(func):
125
41
  # Get function name and source code
126
- func_name = obj.__name__
42
+ func_name = func.__name__
127
43
  try:
128
- # Get the source code and normalize it
129
- func_source = inspect.getsource(obj)
44
+ func_source = inspect.getsource(func)
130
45
  normalized_source = normalize_source_code(func_source)
131
46
  except (IOError, TypeError):
132
47
  normalized_source = ""
133
48
 
134
- # Get function arguments
135
- if args and hasattr(args[0], '__class__'):
136
- # If it's a method, skip the 'self' argument
137
- args = args[1:]
138
-
139
49
  # Normalize argument values
140
50
  def normalize_arg(arg):
141
51
  if isinstance(arg, (str, int, float, bool)):
@@ -158,27 +68,62 @@ def generate_unique_hash(obj, *args, **kwargs):
158
68
  # Combine all components
159
69
  hash_input = f"{func_name}_{normalized_source}_{args_str}_{kwargs_str}"
160
70
 
161
- elif inspect.isclass(obj):
162
- # For classes, normalize the class definition
71
+ elif inspect.isclass(func):
163
72
  try:
164
- class_source = inspect.getsource(obj)
73
+ class_source = inspect.getsource(func)
165
74
  normalized_source = normalize_source_code(class_source)
166
- hash_input = f"{obj.__name__}_{normalized_source}"
75
+ hash_input = f"{func.__name__}_{normalized_source}"
167
76
  except (IOError, TypeError):
168
- hash_input = f"{obj.__name__}_{str(obj)}"
77
+ hash_input = f"{func.__name__}_{str(func)}"
169
78
 
170
79
  else:
171
- # For other objects, use their string representation
172
- hash_input = str(obj)
80
+ hash_input = str(func)
173
81
 
174
- # Create hash
175
82
  hash_obj = hashlib.md5(hash_input.encode('utf-8'))
176
83
  return hash_obj.hexdigest()
177
84
 
85
+ def generate_unique_hash_simple(func):
86
+ """Generate a unique hash based on the function name and normalized source code.
87
+ Works for both standalone functions and class methods (where self would be passed)."""
88
+ import hashlib
89
+ import inspect
90
+
91
+ # Handle bound methods (instance methods of classes)
92
+ if hasattr(func, '__self__'):
93
+ # Get the underlying function from the bound method
94
+ func = func.__func__
95
+
96
+
97
+ # Get function name
98
+ func_name = func.__name__
99
+
100
+ # Get and normalize source code based on type
101
+ try:
102
+ if isinstance(func, (types.FunctionType, types.MethodType)):
103
+ source = inspect.getsource(func)
104
+ # Remove whitespace and normalize line endings
105
+ normalized_source = "\n".join(line.strip() for line in source.splitlines())
106
+ elif inspect.isclass(func):
107
+ source = inspect.getsource(func)
108
+ normalized_source = "\n".join(line.strip() for line in source.splitlines())
109
+ else:
110
+ normalized_source = str(func)
111
+ except (IOError, TypeError):
112
+ normalized_source = str(func)
113
+
114
+ # Use fixed timestamp for reproducibility
115
+ timestamp = "2025-01-03T18:15:16+05:30"
116
+
117
+ # Combine components
118
+ hash_input = f"{func_name}_{normalized_source}_{timestamp}"
119
+
120
+ # Generate MD5 hash
121
+ hash_obj = hashlib.md5(hash_input.encode('utf-8'))
122
+ return hash_obj.hexdigest()
178
123
 
179
124
  class UniqueIdentifier:
180
125
  _instance = None
181
- _hash_cache = {} # Class-level cache for storing hashes
126
+ _hash_cache = {}
182
127
 
183
128
  def __new__(cls, *args, **kwargs):
184
129
  if cls._instance is None:
@@ -186,7 +131,6 @@ class UniqueIdentifier:
186
131
  return cls._instance
187
132
 
188
133
  def __init__(self, salt=None):
189
- # Initialize only once
190
134
  if not hasattr(self, 'salt'):
191
135
  self.salt = salt
192
136
 
@@ -198,15 +142,16 @@ class UniqueIdentifier:
198
142
 
199
143
  @functools.wraps(obj)
200
144
  def wrapper(*args, **kwargs):
201
- # Generate cache key based on function and arguments
202
- cache_key = (obj.__name__, str(args), str(kwargs))
203
-
204
- # Use cached hash if available, otherwise generate new one
205
- if cache_key not in self._hash_cache:
206
- self._hash_cache[cache_key] = generate_unique_hash(obj, *args, **kwargs)
145
+ # Generate hash based on the original function and its arguments
146
+ if hasattr(args[0], 'original_func'): # Check if it's a wrapped LLM call
147
+ original_func = args[0].original_func
148
+ func_args = args[1:] # Skip the original_func argument
149
+ hash_id = generate_unique_hash(original_func, *func_args, **kwargs)
150
+ else:
151
+ hash_id = generate_unique_hash(obj, *args, **kwargs)
207
152
 
208
153
  # Store hash_id on the wrapper function
209
- wrapper.hash_id = self._hash_cache[cache_key]
154
+ wrapper.hash_id = hash_id
210
155
 
211
156
  return obj(*args, **kwargs)
212
157
 
@@ -217,5 +162,4 @@ class UniqueIdentifier:
217
162
  return wrapper
218
163
 
219
164
  # Create a single instance to be used across all mixins
220
- mydecorator = UniqueIdentifier()
221
-
165
+ mydecorator = UniqueIdentifier()
@@ -0,0 +1,187 @@
1
+ import requests
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+
6
+
7
+ class UploadAgenticTraces:
8
+ def __init__(self,
9
+ json_file_path,
10
+ project_name,
11
+ project_id,
12
+ dataset_name,
13
+ user_detail,
14
+ base_url):
15
+ self.json_file_path = json_file_path
16
+ self.project_name = project_name
17
+ self.project_id = project_id
18
+ self.dataset_name = dataset_name
19
+ self.user_detail = user_detail
20
+ self.base_url = base_url
21
+ self.timeout = 99999
22
+
23
+ def _create_dataset_schema_with_trace(self):
24
+ SCHEMA_MAPPING_NEW = {
25
+ "trace_id": {"columnType": "traceId"},
26
+ "trace_uri": {"columnType": "traceUri"},
27
+ "prompt": {"columnType": "prompt"},
28
+ "response":{"columnType": "response"},
29
+ "context": {"columnType": "context"},
30
+ "llm_model": {"columnType":"pipeline"},
31
+ "recorded_on": {"columnType": "metadata"},
32
+ "embed_model": {"columnType":"pipeline"},
33
+ "log_source": {"columnType": "metadata"},
34
+ "vector_store":{"columnType":"pipeline"},
35
+ "feedback": {"columnType":"feedBack"}
36
+ }
37
+ def make_request():
38
+ headers = {
39
+ "Content-Type": "application/json",
40
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
41
+ "X-Project-Name": self.project_name,
42
+ }
43
+ payload = json.dumps({
44
+ "datasetName": self.dataset_name,
45
+ # "schemaMapping": SCHEMA_MAPPING_NEW,
46
+ "traceFolderUrl": None,
47
+ })
48
+ response = requests.request("POST",
49
+ f"{self.base_url}/v1/llm/dataset/logs",
50
+ headers=headers,
51
+ data=payload,
52
+ timeout=self.timeout
53
+ )
54
+
55
+ return response
56
+
57
+ response = make_request()
58
+
59
+ if response.status_code == 401:
60
+ # get_token() # Fetch a new token and set it in the environment
61
+ response = make_request() # Retry the request
62
+ if response.status_code != 200:
63
+ return response.status_code
64
+ return response.status_code
65
+
66
+
67
+ def _get_presigned_url(self):
68
+ payload = json.dumps({
69
+ "datasetName": self.dataset_name,
70
+ "numFiles": 1,
71
+ })
72
+ headers = {
73
+ "Content-Type": "application/json",
74
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
75
+ "X-Project-Name": self.project_name,
76
+ }
77
+
78
+ try:
79
+ response = requests.request("GET",
80
+ f"{self.base_url}/v1/llm/presigned-url",
81
+ headers=headers,
82
+ data=payload,
83
+ timeout=self.timeout)
84
+ if response.status_code == 200:
85
+ presignedUrls = response.json()["data"]["presignedUrls"][0]
86
+ return presignedUrls
87
+ except requests.exceptions.RequestException as e:
88
+ print(f"Error while getting presigned url: {e}")
89
+ return None
90
+
91
+ def _put_presigned_url(self, presignedUrl, filename):
92
+ headers = {
93
+ "Content-Type": "application/json",
94
+ }
95
+
96
+ if "blob.core.windows.net" in presignedUrl: # Azure
97
+ headers["x-ms-blob-type"] = "BlockBlob"
98
+ print(f"Uploading agentic traces...")
99
+ try:
100
+ with open(filename) as f:
101
+ payload = f.read().replace("\n", "").replace("\r", "").encode()
102
+ except Exception as e:
103
+ print(f"Error while reading file: {e}")
104
+ return None
105
+ try:
106
+ response = requests.request("PUT",
107
+ presignedUrl,
108
+ headers=headers,
109
+ data=payload,
110
+ timeout=self.timeout)
111
+ if response.status_code != 200 or response.status_code != 201:
112
+ return response, response.status_code
113
+ except requests.exceptions.RequestException as e:
114
+ print(f"Error while uploading to presigned url: {e}")
115
+ return None
116
+
117
+ def insert_traces(self, presignedUrl):
118
+ headers = {
119
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
120
+ "Content-Type": "application/json",
121
+ "X-Project-Name": self.project_name,
122
+ }
123
+ payload = json.dumps({
124
+ "datasetName": self.dataset_name,
125
+ "presignedUrl": presignedUrl,
126
+ "datasetSpans": self._get_dataset_spans(), #Extra key for agentic traces
127
+ })
128
+ try:
129
+ response = requests.request("POST",
130
+ f"{self.base_url}/v1/llm/insert/trace",
131
+ headers=headers,
132
+ data=payload,
133
+ timeout=self.timeout)
134
+ except requests.exceptions.RequestException as e:
135
+ print(f"Error while inserting traces: {e}")
136
+ return None
137
+
138
+ def _get_dataset_spans(self):
139
+ try:
140
+ with open(self.json_file_path) as f:
141
+ data = json.load(f)
142
+ except Exception as e:
143
+ print(f"Error while reading file: {e}")
144
+ return None
145
+ try:
146
+ spans = data["data"][0]["spans"]
147
+ datasetSpans = []
148
+ for span in spans:
149
+ if span["type"] != "agent":
150
+ existing_span = next((s for s in datasetSpans if s["spanHash"] == span["hash_id"]), None)
151
+ if existing_span is None:
152
+ datasetSpans.append({
153
+ "spanId": span["id"],
154
+ "spanName": span["name"],
155
+ "spanHash": span["hash_id"],
156
+ "spanType": span["type"],
157
+ })
158
+ else:
159
+ datasetSpans.append({
160
+ "spanId": span["id"],
161
+ "spanName": span["name"],
162
+ "spanHash": span["hash_id"],
163
+ "spanType": span["type"],
164
+ })
165
+ children = span["data"]["children"]
166
+ for child in children:
167
+ existing_span = next((s for s in datasetSpans if s["spanHash"] == child["hash_id"]), None)
168
+ if existing_span is None:
169
+ datasetSpans.append({
170
+ "spanId": child["id"],
171
+ "spanName": child["name"],
172
+ "spanHash": child["hash_id"],
173
+ "spanType": child["type"],
174
+ })
175
+ return datasetSpans
176
+ except Exception as e:
177
+ print(f"Error while reading dataset spans: {e}")
178
+ return None
179
+
180
+ def upload_agentic_traces(self):
181
+ self._create_dataset_schema_with_trace()
182
+ presignedUrl = self._get_presigned_url()
183
+ if presignedUrl is None:
184
+ return
185
+ self._put_presigned_url(presignedUrl, self.json_file_path)
186
+ self.insert_traces(presignedUrl)
187
+ print("Agentic Traces uploaded")
@@ -0,0 +1,115 @@
1
+ from aiohttp import payload
2
+ import requests
3
+ import json
4
+ import os
5
+ import logging
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def upload_code(hash_id, zip_path, project_name, dataset_name):
9
+ code_hashes_list = _fetch_dataset_code_hashes(project_name, dataset_name)
10
+
11
+ if hash_id not in code_hashes_list:
12
+ presigned_url = _fetch_presigned_url(project_name, dataset_name)
13
+ _put_zip_presigned_url(project_name, presigned_url, zip_path)
14
+
15
+ response = _insert_code(dataset_name, hash_id, presigned_url, project_name)
16
+ return response
17
+ else:
18
+ return "Code already exists"
19
+
20
+ def _fetch_dataset_code_hashes(project_name, dataset_name):
21
+ payload = {}
22
+ headers = {
23
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
24
+ "X-Project-Name": project_name,
25
+ }
26
+
27
+ try:
28
+ response = requests.request("GET",
29
+ f"{os.getenv('RAGAAI_CATALYST_BASE_URL')}/v2/llm/dataset/code?datasetName={dataset_name}",
30
+ headers=headers,
31
+ data=payload,
32
+ timeout=99999)
33
+
34
+ if response.status_code == 200:
35
+ return response.json()["data"]["codeHashes"]
36
+ else:
37
+ raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
38
+ except requests.exceptions.RequestException as e:
39
+ logger.error(f"Failed to list datasets: {e}")
40
+ raise
41
+
42
+ def _fetch_presigned_url(project_name, dataset_name):
43
+ payload = json.dumps({
44
+ "datasetName": dataset_name,
45
+ "numFiles": 1,
46
+ "contentType": "application/zip"
47
+ })
48
+
49
+ headers = {
50
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
51
+ "Content-Type": "application/json",
52
+ "X-Project-Name": project_name,
53
+ }
54
+
55
+ try:
56
+ response = requests.request("GET",
57
+ f"{os.getenv('RAGAAI_CATALYST_BASE_URL')}/v1/llm/presigned-url",
58
+ headers=headers,
59
+ data=payload,
60
+ timeout=99999)
61
+
62
+ if response.status_code == 200:
63
+ return response.json()["data"]["presignedUrls"][0]
64
+ else:
65
+ raise Exception(f"Failed to fetch code hashes: {response.json()['message']}")
66
+ except requests.exceptions.RequestException as e:
67
+ logger.error(f"Failed to list datasets: {e}")
68
+ raise
69
+
70
+ def _put_zip_presigned_url(project_name, presignedUrl, filename):
71
+ headers = {
72
+ "X-Project-Name": project_name,
73
+ "Content-Type": "application/zip",
74
+ }
75
+
76
+ if "blob.core.windows.net" in presignedUrl: # Azure
77
+ headers["x-ms-blob-type"] = "BlockBlob"
78
+ print(f"Uploading code...")
79
+ with open(filename, 'rb') as f:
80
+ payload = f.read()
81
+
82
+ response = requests.request("PUT",
83
+ presignedUrl,
84
+ headers=headers,
85
+ data=payload,
86
+ timeout=99999)
87
+ if response.status_code != 200 or response.status_code != 201:
88
+ return response, response.status_code
89
+
90
+ def _insert_code(dataset_name, hash_id, presigned_url, project_name):
91
+ payload = json.dumps({
92
+ "datasetName": dataset_name,
93
+ "codeHash": hash_id,
94
+ "presignedUrl": presigned_url
95
+ })
96
+
97
+ headers = {
98
+ 'X-Project-Name': project_name,
99
+ 'Content-Type': 'application/json',
100
+ 'Authorization': f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}'
101
+ }
102
+
103
+ try:
104
+ response = requests.request("POST",
105
+ f"{os.getenv('RAGAAI_CATALYST_BASE_URL')}/v2/llm/dataset/code",
106
+ headers=headers,
107
+ data=payload,
108
+ timeout=99999)
109
+ if response.status_code == 200:
110
+ return response.json()["message"]
111
+ else:
112
+ raise Exception(f"Failed to insert code: {response.json()['message']}")
113
+ except requests.exceptions.RequestException as e:
114
+ logger.error(f"Failed to insert code: {e}")
115
+ raise