ragaai-catalyst 2.2.4b4__py3-none-any.whl → 2.2.4.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import threading
3
+
4
+ from requests.adapters import HTTPAdapter
5
+ from urllib3.util.retry import Retry
6
+ from urllib3.exceptions import PoolError, MaxRetryError, NewConnectionError
7
+ from requests.exceptions import ConnectionError, Timeout, RequestException
8
+ import requests
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SessionManager:
14
+ """Shared session manager with connection pooling for HTTP requests"""
15
+ _instance = None
16
+ _session = None
17
+ _lock = threading.Lock()
18
+
19
+ def __new__(cls):
20
+ if cls._instance is None:
21
+ with cls._lock: # Thread-safe singleton
22
+ if cls._instance is None: # Double-check locking
23
+ logger.info("Creating new SessionManager singleton instance")
24
+ cls._instance = super(SessionManager, cls).__new__(cls)
25
+ cls._instance._initialize_session()
26
+ else:
27
+ logger.debug("SessionManager instance already exists, returning existing instance")
28
+ else:
29
+ logger.debug("SessionManager instance exists, returning existing instance")
30
+ return cls._instance
31
+
32
+ def _initialize_session(self):
33
+ """Initialize session with connection pooling and retry strategy"""
34
+ logger.info("Initializing HTTP session with connection pooling and retry strategy")
35
+ self._session = requests.Session()
36
+
37
+ retry_strategy = Retry(
38
+ total=3, # number of retries
39
+ backoff_factor=0.5, # wait 0.5, 1, 2... seconds between retries
40
+ status_forcelist=[500, 502, 503, 504] # HTTP status codes to retry on
41
+ )
42
+
43
+ adapter = HTTPAdapter(
44
+ max_retries=retry_strategy,
45
+ pool_connections=2, # number of connections to keep in the pool
46
+ pool_maxsize=50, # maximum number of connections in the pool
47
+ pool_block=True
48
+ )
49
+ logger.debug(f"Configured HTTP adapter: pool_connections={adapter.config.get('pool_connections', 1)}, "
50
+ f"pool_maxsize={adapter.config.get('pool_maxsize', 50)}, "
51
+ f"pool_block={adapter.config.get('pool_block', False)}")
52
+
53
+ self._session.mount("http://", adapter)
54
+ self._session.mount("https://", adapter)
55
+ logger.info("HTTP session initialized successfully with adapters mounted for http:// and https://")
56
+
57
+ @property
58
+ def session(self):
59
+ if self._session is None:
60
+ logger.warning("Session accessed but not initialized, reinitializing...")
61
+ self._initialize_session()
62
+ return self._session
63
+
64
+ def close(self):
65
+ """Close the session"""
66
+ if self._session:
67
+ logger.info("Closing HTTP session")
68
+ self._session.close()
69
+ self._session = None
70
+ logger.info("HTTP session closed successfully")
71
+ else:
72
+ logger.debug("Close called but session was already None")
73
+
74
+ def handle_request_exceptions(self, e, operation_name):
75
+ """Handle common request exceptions with appropriate logging"""
76
+ logger.error(f"Exception occurred during {operation_name}")
77
+ if isinstance(e, (PoolError, MaxRetryError)):
78
+ logger.error(f"Connection pool exhausted during {operation_name}: {e}")
79
+ elif isinstance(e, NewConnectionError):
80
+ logger.error(f"Failed to establish new connection during {operation_name}: {e}")
81
+ elif isinstance(e, ConnectionError):
82
+ logger.error(f"Connection error during {operation_name}: {e}")
83
+ elif isinstance(e, Timeout):
84
+ logger.error(f"Request timeout during {operation_name}: {e}")
85
+ else:
86
+ logger.error(f"Unexpected error during {operation_name}: {e}")
87
+
88
+
89
+ # Global session manager instance
90
+ logger.info("Creating global SessionManager instance")
91
+ session_manager = SessionManager()
92
+ logger.info(f"Global SessionManager instance created with ID: {id(session_manager)}")
@@ -22,6 +22,7 @@ from typing import Dict, Any, Optional
22
22
  import threading
23
23
  import uuid
24
24
 
25
+
25
26
  # Set up logging
26
27
  log_dir = os.path.join(tempfile.gettempdir(), "ragaai_logs")
27
28
  os.makedirs(log_dir, exist_ok=True)
@@ -49,11 +50,13 @@ try:
49
50
  from ragaai_catalyst.tracers.agentic_tracing.upload.upload_code import upload_code
50
51
  # from ragaai_catalyst.tracers.agentic_tracing.upload.upload_trace_metric import upload_trace_metric
51
52
  from ragaai_catalyst.tracers.agentic_tracing.utils.create_dataset_schema import create_dataset_schema_with_trace
53
+ from ragaai_catalyst.tracers.agentic_tracing.upload.session_manager import session_manager
52
54
  from ragaai_catalyst import RagaAICatalyst
53
55
  IMPORTS_AVAILABLE = True
54
56
  except ImportError:
55
57
  logger.warning("RagaAI Catalyst imports not available - running in test mode")
56
58
  IMPORTS_AVAILABLE = False
59
+ session_manager = None
57
60
 
58
61
  # Define task queue directory
59
62
  QUEUE_DIR = os.path.join(tempfile.gettempdir(), "ragaai_tasks")
@@ -72,6 +75,10 @@ _executor_lock = threading.Lock()
72
75
  _futures: Dict[str, Any] = {}
73
76
  _futures_lock = threading.Lock()
74
77
 
78
+ # Dataset creation cache to avoid redundant API calls
79
+ _dataset_cache: Dict[str, Dict[str, Any]] = {}
80
+ _dataset_cache_lock = threading.Lock()
81
+ DATASET_CACHE_DURATION = 600 # 10 minutes in seconds
75
82
 
76
83
  _cleanup_lock = threading.Lock()
77
84
  _last_cleanup = 0
@@ -88,7 +95,7 @@ def get_executor(max_workers=None):
88
95
  if _executor is None:
89
96
  # Calculate optimal worker count
90
97
  if max_workers is None:
91
- max_workers = min(32, (os.cpu_count() or 1) * 4)
98
+ max_workers = min(8, (os.cpu_count() or 1) * 4)
92
99
 
93
100
  logger.info(f"Creating ThreadPoolExecutor with {max_workers} workers")
94
101
  _executor = concurrent.futures.ThreadPoolExecutor(
@@ -110,9 +117,57 @@ def generate_unique_task_id():
110
117
  unique_id = str(uuid.uuid4())[:8] # Short UUID
111
118
  return f"task_{int(time.time())}_{os.getpid()}_{counter}_{unique_id}"
112
119
 
120
+ def _generate_dataset_cache_key(dataset_name: str, project_name: str, base_url: str) -> str:
121
+ """Generate a unique cache key for dataset creation"""
122
+ return f"{dataset_name}#{project_name}#{base_url}"
123
+
124
+ def _is_dataset_cached(cache_key: str) -> bool:
125
+ """Check if dataset creation is cached and still valid"""
126
+ with _dataset_cache_lock:
127
+ if cache_key not in _dataset_cache:
128
+ return False
129
+
130
+ cache_entry = _dataset_cache[cache_key]
131
+ cache_time = cache_entry.get('timestamp', 0)
132
+ current_time = time.time()
133
+
134
+ # Check if cache is still valid (within 10 minutes)
135
+ if current_time - cache_time <= DATASET_CACHE_DURATION:
136
+ logger.info(f"Dataset creation cache hit for key: {cache_key}")
137
+ return True
138
+ else:
139
+ # Cache expired, remove it
140
+ logger.info(f"Dataset creation cache expired for key: {cache_key}")
141
+ del _dataset_cache[cache_key]
142
+ return False
143
+
144
+ def _cache_dataset_creation(cache_key: str, response: Any) -> None:
145
+ """Cache successful dataset creation"""
146
+ with _dataset_cache_lock:
147
+ _dataset_cache[cache_key] = {
148
+ 'timestamp': time.time(),
149
+ 'response': response
150
+ }
151
+
152
+ def _cleanup_expired_cache_entries() -> None:
153
+ """Remove expired cache entries"""
154
+ current_time = time.time()
155
+ with _dataset_cache_lock:
156
+ expired_keys = []
157
+ for cache_key, cache_entry in _dataset_cache.items():
158
+ cache_time = cache_entry.get('timestamp', 0)
159
+ if current_time - cache_time > DATASET_CACHE_DURATION:
160
+ expired_keys.append(cache_key)
161
+
162
+ for key in expired_keys:
163
+ del _dataset_cache[key]
164
+
165
+ if expired_keys:
166
+ logger.info(f"Cleaned up {len(expired_keys)} expired dataset cache entries")
167
+
113
168
  def process_upload(task_id: str, filepath: str, hash_id: str, zip_path: str,
114
169
  project_name: str, project_id: str, dataset_name: str,
115
- user_details: Dict[str, Any], base_url: str, timeout=120, fail_on_trace_error=True) -> Dict[str, Any]:
170
+ user_details: Dict[str, Any], base_url: str, tracer_type, timeout=120, fail_on_trace_error=True) -> Dict[str, Any]:
116
171
  """
117
172
  Process a single upload task
118
173
 
@@ -165,20 +220,36 @@ def process_upload(task_id: str, filepath: str, hash_id: str, zip_path: str,
165
220
  save_task_status(result)
166
221
  return result
167
222
 
168
- # Step 1: Create dataset schema
223
+ # Step 1: Create dataset schema (with caching)
169
224
  logger.info(f"Creating dataset schema for {dataset_name} with base_url: {base_url} and timeout: {timeout}")
170
- try:
171
- response = create_dataset_schema_with_trace(
172
- dataset_name=dataset_name,
173
- project_name=project_name,
174
- base_url=base_url,
175
- user_details=user_details,
176
- timeout=timeout
177
- )
178
- logger.info(f"Dataset schema created: {response}")
179
- except Exception as e:
180
- logger.error(f"Error creating dataset schema: {e}")
181
- # Continue with other steps
225
+
226
+ # Generate cache key and check if dataset creation is already cached
227
+ cache_key = _generate_dataset_cache_key(dataset_name, project_name, base_url)
228
+
229
+ if _is_dataset_cached(cache_key):
230
+ logger.info(f"Dataset schema creation skipped (cached) for {dataset_name}")
231
+ else:
232
+ try:
233
+ # Clean up expired cache entries periodically
234
+ # _cleanup_expired_cache_entries()
235
+
236
+ response = create_dataset_schema_with_trace(
237
+ dataset_name=dataset_name,
238
+ project_name=project_name,
239
+ base_url=base_url,
240
+ user_details=user_details,
241
+ timeout=timeout
242
+ )
243
+ logger.info(f"Dataset schema created: {response}")
244
+
245
+ # Cache the response only if status code is 200
246
+ if response and hasattr(response, 'status_code') and response.status_code in [200, 201]:
247
+ _cache_dataset_creation(cache_key, response)
248
+ logger.info(f"Response cached successfully for dataset: {dataset_name} and key: {cache_key}")
249
+
250
+ except Exception as e:
251
+ logger.error(f"Error creating dataset schema: {e}")
252
+ # Continue with other steps
182
253
 
183
254
  # Step 2: Upload trace metrics
184
255
  # if filepath and os.path.exists(filepath):
@@ -238,28 +309,34 @@ def process_upload(task_id: str, filepath: str, hash_id: str, zip_path: str,
238
309
  logger.error(error_msg)
239
310
 
240
311
  # Step 4: Upload code hash
241
- if hash_id and zip_path and os.path.exists(zip_path):
242
- logger.info(f"Uploading code hash {hash_id} with base_url: {base_url} and timeout: {timeout}")
243
- try:
244
- response = upload_code(
245
- hash_id=hash_id,
246
- zip_path=zip_path,
247
- project_name=project_name,
248
- dataset_name=dataset_name,
249
- base_url=base_url,
250
- timeout=timeout
251
- )
252
- logger.info(f"Code hash uploaded: {response}")
253
- except Exception as e:
254
- logger.error(f"Error uploading code hash: {e}")
255
- else:
256
- logger.warning(f"Code zip {zip_path} not found, skipping code upload")
257
-
312
+ if tracer_type.startswith("agentic/"):
313
+ logger.info(f"Tracer type '{tracer_type}' matches agentic pattern, proceeding with code upload")
314
+ if hash_id and zip_path and os.path.exists(zip_path):
315
+ logger.info(f"Uploading code hash {hash_id} with base_url: {base_url} and timeout: {timeout}")
316
+ try:
317
+ response = upload_code(
318
+ hash_id=hash_id,
319
+ zip_path=zip_path,
320
+ project_name=project_name,
321
+ dataset_name=dataset_name,
322
+ base_url=base_url,
323
+ timeout=timeout
324
+ )
325
+ if response is None:
326
+ error_msg = "Code hash not uploaded"
327
+ logger.error(error_msg)
328
+ else:
329
+ logger.info(f"Code hash uploaded successfully: {response}")
330
+ except Exception as e:
331
+ logger.error(f"Error uploading code hash: {e}")
332
+ else:
333
+ logger.warning(f"Code zip {zip_path} not found, skipping code upload")
334
+
258
335
  # Mark task as completed
259
336
  result["status"] = STATUS_COMPLETED
260
337
  result["end_time"] = datetime.now().isoformat()
261
338
  logger.info(f"Task {task_id} completed successfully")
262
-
339
+
263
340
  except Exception as e:
264
341
  logger.error(f"Error processing task {task_id}: {e}")
265
342
  result["status"] = STATUS_FAILED
@@ -302,7 +379,8 @@ def save_task_status(task_status: Dict[str, Any]):
302
379
  with open(status_path, "w") as f:
303
380
  json.dump(task_status, f, indent=2)
304
381
 
305
- def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, dataset_name, user_details, base_url, timeout=120):
382
+ def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, dataset_name, user_details, base_url,
383
+ tracer_type, timeout=120):
306
384
  """
307
385
  Submit a new upload task using futures.
308
386
 
@@ -349,6 +427,7 @@ def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, da
349
427
  dataset_name=dataset_name,
350
428
  user_details=user_details,
351
429
  base_url=base_url,
430
+ tracer_type = tracer_type,
352
431
  timeout=timeout,
353
432
  fail_on_trace_error=True
354
433
  )
@@ -379,6 +458,7 @@ def submit_upload_task(filepath, hash_id, zip_path, project_name, project_id, da
379
458
  dataset_name=dataset_name,
380
459
  user_details=user_details,
381
460
  base_url=base_url,
461
+ tracer_type=tracer_type,
382
462
  timeout=timeout,
383
463
  fail_on_trace_error=True
384
464
  )
@@ -550,6 +630,14 @@ def shutdown(timeout=120):
550
630
 
551
631
  _executor = None
552
632
 
633
+ # Close the session manager to clean up HTTP connections
634
+ if session_manager is not None:
635
+ try:
636
+ session_manager.close()
637
+ logger.info("Session manager closed successfully")
638
+ except Exception as e:
639
+ logger.error(f"Error closing session manager: {e}")
640
+
553
641
  # Register shutdown handler
554
642
  atexit.register(shutdown)
555
643
 
@@ -4,6 +4,9 @@ import os
4
4
  import re
5
5
  import time
6
6
  from urllib.parse import urlparse, urlunparse
7
+ from urllib3.exceptions import PoolError, MaxRetryError, NewConnectionError
8
+ from requests.exceptions import ConnectionError, Timeout, RequestException
9
+ from .session_manager import session_manager
7
10
 
8
11
  import requests
9
12
 
@@ -48,7 +51,7 @@ class UploadAgenticTraces:
48
51
  start_time = time.time()
49
52
  endpoint = f"{self.base_url}/v1/llm/presigned-url"
50
53
  # Changed to POST from GET
51
- response = requests.request(
54
+ response = session_manager.session.request(
52
55
  "POST", endpoint, headers=headers, data=payload, timeout=self.timeout
53
56
  )
54
57
  elapsed_ms = (time.time() - start_time) * 1000
@@ -62,7 +65,7 @@ class UploadAgenticTraces:
62
65
  return presignedurl
63
66
  else:
64
67
  # If POST fails, try GET
65
- response = requests.request(
68
+ response = session_manager.session.request(
66
69
  "GET", endpoint, headers=headers, data=payload, timeout=self.timeout
67
70
  )
68
71
  elapsed_ms = (time.time() - start_time) * 1000
@@ -83,7 +86,7 @@ class UploadAgenticTraces:
83
86
  "Authorization": f"Bearer {token}",
84
87
  "X-Project-Name": self.project_name,
85
88
  }
86
- response = requests.request(
89
+ response = session_manager.session.request(
87
90
  "POST",
88
91
  endpoint,
89
92
  headers=headers,
@@ -110,8 +113,10 @@ class UploadAgenticTraces:
110
113
  f"Error while getting presigned url: {response.json()['message']}"
111
114
  )
112
115
  return None
113
-
114
- except requests.exceptions.RequestException as e:
116
+ except (PoolError, MaxRetryError, NewConnectionError, ConnectionError, Timeout) as e:
117
+ session_manager.handle_request_exceptions(e, "getting presigned URL")
118
+ return None
119
+ except RequestException as e:
115
120
  logger.error(f"Error while getting presigned url: {e}")
116
121
  return None
117
122
 
@@ -138,16 +143,16 @@ class UploadAgenticTraces:
138
143
 
139
144
  if "blob.core.windows.net" in presignedUrl: # Azure
140
145
  headers["x-ms-blob-type"] = "BlockBlob"
141
- print("Uploading agentic traces...")
146
+ logger.info("Uploading agentic traces to presigned URL...")
142
147
  try:
143
148
  with open(filename) as f:
144
149
  payload = f.read().replace("\n", "").replace("\r", "").encode()
145
150
  except Exception as e:
146
- print(f"Error while reading file: {e}")
151
+ logger.error(f"Error while reading file: {e}")
147
152
  return False
148
153
  try:
149
154
  start_time = time.time()
150
- response = requests.request(
155
+ response = session_manager.session.request(
151
156
  "PUT", presignedUrl, headers=headers, data=payload, timeout=self.timeout
152
157
  )
153
158
  elapsed_ms = (time.time() - start_time) * 1000
@@ -157,8 +162,11 @@ class UploadAgenticTraces:
157
162
  if response.status_code != 200 or response.status_code != 201:
158
163
  return response, response.status_code
159
164
  return True
160
- except requests.exceptions.RequestException as e:
161
- print(f"Error while uploading to presigned url: {e}")
165
+ except (PoolError, MaxRetryError, NewConnectionError, ConnectionError, Timeout) as e:
166
+ session_manager.handle_request_exceptions(e, "uploading trace to presigned URL")
167
+ return False
168
+ except RequestException as e:
169
+ logger.error(f"Error while uploading trace to presigned url: {e}")
162
170
  return False
163
171
 
164
172
  def insert_traces(self, presignedUrl):
@@ -177,16 +185,16 @@ class UploadAgenticTraces:
177
185
  try:
178
186
  start_time = time.time()
179
187
  endpoint = f"{self.base_url}/v1/llm/insert/trace"
180
- response = requests.request(
188
+ response = session_manager.session.request(
181
189
  "POST", endpoint, headers=headers, data=payload, timeout=self.timeout
182
190
  )
183
191
  elapsed_ms = (time.time() - start_time) * 1000
184
192
  logger.debug(
185
193
  f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
186
194
  )
187
- if response.status_code != 200:
188
- print(f"Error inserting traces: {response.json()['message']}")
189
- return False
195
+ if response.status_code in [200, 201]:
196
+ logger.info(f"Traces inserted successfully: {response.json()['message']}")
197
+ return True
190
198
  elif response.status_code == 401:
191
199
  logger.warning("Received 401 error. Attempting to refresh token.")
192
200
  token = RagaAICatalyst.get_token(force_refresh=True)
@@ -195,7 +203,7 @@ class UploadAgenticTraces:
195
203
  "Content-Type": "application/json",
196
204
  "X-Project-Name": self.project_name,
197
205
  }
198
- response = requests.request(
206
+ response = session_manager.session.request(
199
207
  "POST",
200
208
  endpoint,
201
209
  headers=headers,
@@ -206,17 +214,21 @@ class UploadAgenticTraces:
206
214
  logger.debug(
207
215
  f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
208
216
  )
209
- if response.status_code != 200:
210
- print(f"Error inserting traces: {response.json()['message']}")
211
- return False
217
+ if response.status_code in [200, 201]:
218
+ logger.info(f"Traces inserted successfully: {response.json()['message']}")
219
+ return True
212
220
  else:
213
- print("Error while inserting traces")
221
+ logger.error(f"Error while inserting traces after 401: {response.json()['message']}")
214
222
  return False
215
223
  else:
216
- return True
217
- except requests.exceptions.RequestException as e:
218
- print(f"Error while inserting traces: {e}")
219
- return None
224
+ logger.error(f"Error while inserting traces: {response.json()['message']}")
225
+ return False
226
+ except (PoolError, MaxRetryError, NewConnectionError, ConnectionError, Timeout) as e:
227
+ session_manager.handle_request_exceptions(e, "inserting traces")
228
+ return False
229
+ except RequestException as e:
230
+ logger.error(f"Error while inserting traces: {e}")
231
+ return False
220
232
 
221
233
  def _get_dataset_spans(self):
222
234
  try:
@@ -245,26 +257,26 @@ class UploadAgenticTraces:
245
257
  continue
246
258
  return dataset_spans
247
259
  except Exception as e:
248
- print(f"Error while reading dataset spans: {e}")
260
+ logger.error(f"Error while reading dataset spans: {e}")
249
261
  return None
250
262
 
251
263
  def upload_agentic_traces(self):
252
264
  try:
253
265
  presigned_url = self._get_presigned_url()
254
266
  if presigned_url is None:
255
- print("Warning: Failed to obtain presigned URL")
267
+ logger.warning("Warning: Failed to obtain presigned URL")
256
268
  return False
257
269
 
258
270
  # Upload the file using the presigned URL
259
271
  upload_result = self._put_presigned_url(presigned_url, self.json_file_path)
260
272
  if not upload_result:
261
- print("Error: Failed to upload file to presigned URL")
273
+ logger.error("Error: Failed to upload file to presigned URL")
262
274
  return False
263
275
  elif isinstance(upload_result, tuple):
264
276
  response, status_code = upload_result
265
277
  if status_code not in [200, 201]:
266
- print(
267
- f"Error: Upload failed with status code {status_code}: {response.text if hasattr(response, 'text') else 'Unknown error'}")
278
+ logger.error(
279
+ f"Error: Uploading agentic traces failed with status code {status_code}: {response.text if hasattr(response, 'text') else 'Unknown error'}")
268
280
  return False
269
281
  # Insert trace records
270
282
  insert_success = self.insert_traces(presigned_url)
@@ -272,13 +284,14 @@ class UploadAgenticTraces:
272
284
  print("Error: Failed to insert trace records")
273
285
  return False
274
286
 
275
- print("Successfully uploaded agentic traces")
287
+ logger.info("Successfully uploaded agentic traces")
276
288
  return True
277
289
  except FileNotFoundError:
278
- print(f"Error: Trace file not found at {self.json_file_path}")
290
+ logger.error(f"Error: Trace file not found at {self.json_file_path}")
279
291
  return False
280
292
  except ConnectionError as e:
281
- print(f"Error: Network connection failed while uploading traces: {e}")
293
+ logger.error(f"Error: Network connection failed while uploading traces: {e}")
282
294
  return False
283
295
  except Exception as e:
284
- print(f"Error while uploading agentic traces: {e}")
296
+ logger.error(f"Error while uploading agentic traces: {e}")
297
+ return False
@@ -1,15 +1,18 @@
1
- import json
2
1
  import logging
3
2
  import os
4
3
  import time
4
+ import re
5
+ import json
6
+ from urllib.parse import urlparse, urlunparse
7
+ from urllib3.exceptions import PoolError, MaxRetryError, NewConnectionError
8
+ from requests.exceptions import ConnectionError, Timeout, RequestException
5
9
 
6
10
  import requests
7
11
 
8
12
  from ragaai_catalyst.ragaai_catalyst import RagaAICatalyst
13
+ from .session_manager import session_manager
9
14
 
10
15
  logger = logging.getLogger(__name__)
11
- import re
12
- from urllib.parse import urlparse, urlunparse
13
16
 
14
17
 
15
18
  def upload_code(
@@ -19,11 +22,26 @@ def upload_code(
19
22
  project_name, dataset_name, base_url, timeout=timeout
20
23
  )
21
24
 
25
+ # Handle None case during exceptions - do not proceed
26
+ if code_hashes_list is None:
27
+ logger.error("Failed to fetch existing code hashes, cannot proceed with upload")
28
+ return None
29
+
22
30
  if hash_id not in code_hashes_list:
23
31
  presigned_url = _fetch_presigned_url(
24
32
  project_name, dataset_name, base_url, timeout=timeout
25
33
  )
26
- _put_zip_presigned_url(project_name, presigned_url, zip_path, timeout=timeout)
34
+ # Handle None case for presigned URL
35
+ if presigned_url is None:
36
+ logger.error("Failed to fetch presigned URL, cannot proceed with upload")
37
+ return None
38
+
39
+ upload_result = _put_zip_presigned_url(project_name, presigned_url, zip_path, timeout=timeout)
40
+
41
+ # Handle upload failure
42
+ if upload_result is False or (isinstance(upload_result, tuple) and upload_result[1] not in [200, 201]):
43
+ logger.error("Failed to upload zip file")
44
+ return None
27
45
 
28
46
  response = _insert_code(
29
47
  dataset_name,
@@ -33,6 +51,10 @@ def upload_code(
33
51
  base_url,
34
52
  timeout=timeout,
35
53
  )
54
+ # Handle None response from insert_code
55
+ if response is None:
56
+ logger.error("Failed to insert code metadata")
57
+ return None
36
58
  return response
37
59
  else:
38
60
  return "Code already exists"
@@ -49,7 +71,7 @@ def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None, timeou
49
71
  url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
50
72
  start_time = time.time()
51
73
  endpoint = f"{url_base}/v2/llm/dataset/code?datasetName={dataset_name}"
52
- response = requests.request(
74
+ response = session_manager.session.request(
53
75
  "GET", endpoint, headers=headers, data=payload, timeout=timeout
54
76
  )
55
77
  elapsed_ms = (time.time() - start_time) * 1000
@@ -57,7 +79,7 @@ def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None, timeou
57
79
  f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
58
80
  )
59
81
 
60
- if response.status_code == 200:
82
+ if response.status_code in [200, 201]:
61
83
  return response.json()["data"]["codeHashes"]
62
84
  elif response.status_code == 401:
63
85
  logger.warning("Received 401 error. Attempting to refresh token.")
@@ -66,22 +88,25 @@ def _fetch_dataset_code_hashes(project_name, dataset_name, base_url=None, timeou
66
88
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
67
89
  "X-Project-Name": project_name,
68
90
  }
69
- response = requests.request(
91
+ response = session_manager.session.request(
70
92
  "GET", endpoint, headers=headers, data=payload, timeout=timeout
71
93
  )
72
94
  elapsed_ms = (time.time() - start_time) * 1000
73
- logger.debug(
74
- f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
75
- )
76
- if response.status_code == 200:
95
+ logger.debug(f"API Call: [GET] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms")
96
+ if response.status_code in [200, 201]:
77
97
  return response.json()["data"]["codeHashes"]
78
98
  else:
79
- logger.error(
80
- f"Failed to fetch code hashes: {response.json()['message']}"
81
- )
82
- except requests.exceptions.RequestException as e:
99
+ logger.error(f"Failed to fetch code hashes: {response.json()['message']}")
100
+ return None
101
+ else:
102
+ logger.error(f"Error while inserting traces: {response.json()['message']}")
103
+ return None
104
+ except (PoolError, MaxRetryError, NewConnectionError, ConnectionError, Timeout) as e:
105
+ session_manager.handle_request_exceptions(e, "fetching dataset code hashes")
106
+ return None
107
+ except RequestException as e:
83
108
  logger.error(f"Failed to list datasets: {e}")
84
- pass
109
+ return None
85
110
 
86
111
 
87
112
  def update_presigned_url(presigned_url, base_url):
@@ -156,7 +181,7 @@ def _fetch_presigned_url(project_name, dataset_name, base_url=None, timeout=120)
156
181
  logger.debug(
157
182
  f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
158
183
  )
159
- if response.status_code == 200:
184
+ if response.status_code in [200, 201]:
160
185
  presigned_url = response.json()["data"]["presignedUrls"][0]
161
186
  presigned_url = update_presigned_url(presigned_url, url_base)
162
187
  return presigned_url
@@ -168,9 +193,10 @@ def _fetch_presigned_url(project_name, dataset_name, base_url=None, timeout=120)
168
193
  logger.error(
169
194
  f"Failed to fetch code hashes: {response.json()['message']}"
170
195
  )
196
+ return None
171
197
  except requests.exceptions.RequestException as e:
172
198
  logger.error(f"Failed to list datasets: {e}")
173
- pass
199
+ return None
174
200
 
175
201
 
176
202
  def _put_zip_presigned_url(project_name, presignedUrl, filename, timeout=120):
@@ -181,21 +207,28 @@ def _put_zip_presigned_url(project_name, presignedUrl, filename, timeout=120):
181
207
 
182
208
  if "blob.core.windows.net" in presignedUrl: # Azure
183
209
  headers["x-ms-blob-type"] = "BlockBlob"
184
- print("Uploading code...")
185
- with open(filename, "rb") as f:
186
- payload = f.read()
187
-
188
- start_time = time.time()
189
- response = requests.request(
190
- "PUT", presignedUrl, headers=headers, data=payload, timeout=timeout
191
- )
192
- elapsed_ms = (time.time() - start_time) * 1000
193
- logger.debug(
194
- f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
195
- )
196
- if response.status_code != 200 or response.status_code != 201:
197
- return response, response.status_code
210
+ logger.info("Uploading code to presigned URL...")
211
+ try:
212
+ with open(filename, "rb") as f:
213
+ payload = f.read()
198
214
 
215
+ start_time = time.time()
216
+ response = session_manager.session.request(
217
+ "PUT", presignedUrl, headers=headers, data=payload, timeout=timeout
218
+ )
219
+ elapsed_ms = (time.time() - start_time) * 1000
220
+ logger.debug(
221
+ f"API Call: [PUT] {presignedUrl} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
222
+ )
223
+ if response.status_code not in [200, 201]:
224
+ return response, response.status_code
225
+ return True
226
+ except (PoolError, MaxRetryError, NewConnectionError, ConnectionError, Timeout) as e:
227
+ session_manager.handle_request_exceptions(e, "uploading zip to presigned URL")
228
+ return False
229
+ except RequestException as e:
230
+ logger.error(f"Failed to upload zip: {e}")
231
+ return False
199
232
 
200
233
  def _insert_code(
201
234
  dataset_name, hash_id, presigned_url, project_name, base_url=None, timeout=120
@@ -218,39 +251,43 @@ def _insert_code(
218
251
  url_base = base_url if base_url is not None else RagaAICatalyst.BASE_URL
219
252
  start_time = time.time()
220
253
  endpoint = f"{url_base}/v2/llm/dataset/code"
221
- response = requests.request(
254
+ response = session_manager.session.request(
222
255
  "POST", endpoint, headers=headers, data=payload, timeout=timeout
223
256
  )
224
257
  elapsed_ms = (time.time() - start_time) * 1000
225
258
  logger.debug(
226
259
  f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
227
260
  )
228
- if response.status_code == 200:
261
+ if response.status_code in [200, 201]:
229
262
  return response.json()["message"]
230
263
 
231
264
  elif response.status_code == 401:
232
- logger.warning("Received 401 error. Attempting to refresh token.")
265
+ logger.warning("Received 401 error during inserting code. Attempting to refresh token.")
233
266
  token = RagaAICatalyst.get_token(force_refresh=True)
234
267
  headers = {
235
268
  "X-Project-Name": project_name,
236
269
  "Content-Type": "application/json",
237
270
  "Authorization": f"Bearer {token}",
238
271
  }
239
- response = requests.request(
272
+ response = session_manager.session.request(
240
273
  "POST", endpoint, headers=headers, data=payload, timeout=timeout
241
274
  )
242
275
  elapsed_ms = (time.time() - start_time) * 1000
243
276
  logger.debug(
244
277
  f"API Call: [POST] {endpoint} | Status: {response.status_code} | Time: {elapsed_ms:.2f}ms"
245
278
  )
246
- if response.status_code == 200:
279
+ if response.status_code in [200, 201]:
280
+ logger.info(f"Code inserted successfully after 401: {response.json()['message']}")
247
281
  return response.json()["message"]
248
282
  else:
249
- logger.error(f"Failed to insert code: {response.json()['message']}")
250
- pass
283
+ logger.error(f"Failed to insert code after 401: {response.json()['message']}")
284
+ return None
251
285
  else:
252
286
  logger.error(f"Failed to insert code: {response.json()['message']}")
253
- pass
254
- except requests.exceptions.RequestException as e:
287
+ return None
288
+ except (PoolError, MaxRetryError, NewConnectionError, ConnectionError, Timeout) as e:
289
+ session_manager.handle_request_exceptions(e, "inserting code")
290
+ return None
291
+ except RequestException as e:
255
292
  logger.error(f"Failed to insert code: {e}")
256
- pass
293
+ return None
@@ -203,6 +203,7 @@ class RAGATraceExporter(SpanExporter):
203
203
  dataset_name=self.dataset_name,
204
204
  user_details=self.user_details,
205
205
  base_url=self.base_url,
206
+ tracer_type=self.tracer_type,
206
207
  timeout=self.timeout
207
208
  )
208
209
 
@@ -156,14 +156,14 @@ def convert_json_format(
156
156
 
157
157
  # If prompt tokens or/and completion tokens are not present, will calculate it using tiktoken
158
158
  try:
159
- if prompt_tokens == 0:
159
+ if prompt_tokens == 0 and span["attributes"].get("openinference.span.kind") == "LLM" and span["status"].get("status_code") != "ERROR":
160
160
  prompt_value = span["attributes"].get("input.value")
161
161
  if prompt_value:
162
162
  prompt_tokens = count_tokens(prompt_value)
163
163
  logger.debug(
164
164
  f"Prompt tokens not present, calculated it: {prompt_tokens}"
165
165
  )
166
- if completion_tokens == 0:
166
+ if completion_tokens == 0 and span["attributes"].get("openinference.span.kind") == "LLM" and span["status"].get("status_code") != "ERROR" :
167
167
  completion_value = span["attributes"].get("output.value")
168
168
  if completion_value:
169
169
  completion_tokens = count_tokens(completion_value)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragaai_catalyst
3
- Version: 2.2.4b4
3
+ Version: 2.2.4.1b1
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>, Tushar Kumar <tushar.kumar@raga.ai>, Rishabh Pandey <rishabh.pandey@raga.ai>, Jyotsana C G <jyotsana@raga.ai>
6
6
  Requires-Python: <=3.13.2,>=3.10
@@ -54,9 +54,10 @@ ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py,sha256=m8CxYkl
54
54
  ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py,sha256=xxrliKPfdfbIZRZqMnUewsaTD8_Hv0dbuoBivNZGD4U,21674
55
55
  ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py,sha256=bhSUhNQCuJXKjgJAXhjKEYjnHMpYN90FSZdR84fNIKU,4614
56
56
  ragaai_catalyst/tracers/agentic_tracing/upload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py,sha256=iMUMFR9XVipCBunpv8_No8bCoP3lqG47M5dg-ugibWo,21006
58
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py,sha256=t3spo5w7TyfR0Zeqm1h5Z-bJ-BlZ3EPGTvRdK5lpFpE,11705
59
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py,sha256=2mxdi7k_SoDqQUFo1oQ__28CpmSIvVugYcbuRltUK9Q,9920
57
+ ragaai_catalyst/tracers/agentic_tracing/upload/session_manager.py,sha256=XZih2aV8OAcRkjFPISFM1MecW04d0psBw6KQwuaqMZE,3937
58
+ ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py,sha256=Ujbu0KDl7oDr-cFtLwrQK_i7ghMuPV92mFnRfobJ1aI,24822
59
+ ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py,sha256=vH9at3012iNrNIA30TMr7qLyvWtKyZn9wpd5esmBg0A,12866
60
+ ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py,sha256=hJv-LST4rGbldG9k075otjfHZRRurQKyUHClD5HXs4s,12015
60
61
  ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py,sha256=m1O8lKpxKwtHofXLW3fTHX5yfqDW5GxoveARlg5cTw4,2571
61
62
  ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py,sha256=XdB3X_ufe4RVvGorxSqAiB9dYv4UD7Hvvuw3bsDUppY,60
62
63
  ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py,sha256=ZduFA7MmTnWfQ2FzSD0hxMAAfNNTgBs4CXcHZdXJv6k,749
@@ -76,7 +77,7 @@ ragaai_catalyst/tracers/exporters/__init__.py,sha256=wQbaqyeIjVZxYprHCKZ9BeiqxeX
76
77
  ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py,sha256=Rm-QaLv1qMAKpHKcFOcK_HWaKHwFBoUH45_4QYipE-g,6843
77
78
  ragaai_catalyst/tracers/exporters/file_span_exporter.py,sha256=NZsD3rShUiC3rO9y3Y2vqEtS3MO51FXZy0p3q9cdDNY,6403
78
79
  ragaai_catalyst/tracers/exporters/raga_exporter.py,sha256=l-RfysTIXYxtvYkVlJbRvg-AzJbT4Fdb-YiZh0mfuDs,17868
79
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py,sha256=VLvlWFRFPhE32WrF-_J_vCczduz13WAcOW8MKDgDYJc,8979
80
+ ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py,sha256=VxO96ldBpG5mCncrN5mXErIZMlxQ1ewhNoMLfCrzegM,9025
80
81
  ragaai_catalyst/tracers/instrumentators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
82
  ragaai_catalyst/tracers/utils/__init__.py,sha256=KeMaZtYaTojilpLv65qH08QmpYclfpacDA0U3wg6Ybw,64
82
83
  ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py,sha256=SehrD7q8ytAiUYoWr406b4mWs3Lk0Rcy6Ekkihh22TI,1703
@@ -86,10 +87,10 @@ ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py,sha256=XS2_x2
86
87
  ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json,sha256=WlZCZeOQ54aMVjYS8BAeka2uaFC3ftBTMZ8zzzA8TAI,495947
87
88
  ragaai_catalyst/tracers/utils/rag_extraction_logic_final.py,sha256=3ygkRT__lLDRflRttjzPu28tIA8cTCiGQVMQjqMItqQ,11309
88
89
  ragaai_catalyst/tracers/utils/rag_trace_json_converter.py,sha256=54IEZO-YRjUAahV5nw8KClXqTF1LhfDry_TsZ4KGow4,20467
89
- ragaai_catalyst/tracers/utils/trace_json_converter.py,sha256=U9GFVDCWRQvmBSMTDIZoMerJCnH8Gijw95r2oQbuFdQ,11560
90
+ ragaai_catalyst/tracers/utils/trace_json_converter.py,sha256=-HZVmijeUFLO7e9OAvi1RJdWVTxPRUHPd1MkKQlCD54,11785
90
91
  ragaai_catalyst/tracers/utils/utils.py,sha256=o-p9n2ZuophdrV0wrixu-BqRHCkovup_klc3mS8mU8g,2374
91
- ragaai_catalyst-2.2.4b4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
92
- ragaai_catalyst-2.2.4b4.dist-info/METADATA,sha256=sOTQ0RA2Ao4UevDTayNNcYJhJ8OBza9sKJXpJ7lHg-0,17679
93
- ragaai_catalyst-2.2.4b4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
94
- ragaai_catalyst-2.2.4b4.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
95
- ragaai_catalyst-2.2.4b4.dist-info/RECORD,,
92
+ ragaai_catalyst-2.2.4.1b1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
93
+ ragaai_catalyst-2.2.4.1b1.dist-info/METADATA,sha256=AMVTTjxVzSdSmDIXQrIgZmUVOXxi7o43k0mNMGJfzJQ,17681
94
+ ragaai_catalyst-2.2.4.1b1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
95
+ ragaai_catalyst-2.2.4.1b1.dist-info/top_level.txt,sha256=HpgsdRgEJMk8nqrU6qdCYk3di7MJkDL0B19lkc7dLfM,16
96
+ ragaai_catalyst-2.2.4.1b1.dist-info/RECORD,,