ragaai-catalyst 2.0.7.2b1__py3-none-any.whl → 2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ragaai_catalyst/dataset.py +0 -3
  2. ragaai_catalyst/evaluation.py +1 -2
  3. ragaai_catalyst/tracers/__init__.py +1 -1
  4. ragaai_catalyst/tracers/agentic_tracing/agent_tracer.py +231 -74
  5. ragaai_catalyst/tracers/agentic_tracing/agentic_tracing.py +32 -42
  6. ragaai_catalyst/tracers/agentic_tracing/base.py +132 -30
  7. ragaai_catalyst/tracers/agentic_tracing/data_structure.py +91 -79
  8. ragaai_catalyst/tracers/agentic_tracing/examples/FinancialAnalysisSystem.ipynb +536 -0
  9. ragaai_catalyst/tracers/agentic_tracing/examples/GameActivityEventPlanner.ipynb +134 -0
  10. ragaai_catalyst/tracers/agentic_tracing/examples/TravelPlanner.ipynb +563 -0
  11. ragaai_catalyst/tracers/agentic_tracing/file_name_tracker.py +46 -0
  12. ragaai_catalyst/tracers/agentic_tracing/llm_tracer.py +262 -356
  13. ragaai_catalyst/tracers/agentic_tracing/tool_tracer.py +31 -19
  14. ragaai_catalyst/tracers/agentic_tracing/unique_decorator.py +61 -117
  15. ragaai_catalyst/tracers/agentic_tracing/upload_agentic_traces.py +187 -0
  16. ragaai_catalyst/tracers/agentic_tracing/upload_code.py +115 -0
  17. ragaai_catalyst/tracers/agentic_tracing/user_interaction_tracer.py +35 -59
  18. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -4
  19. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +2201 -324
  20. ragaai_catalyst/tracers/agentic_tracing/zip_list_of_unique_files.py +186 -0
  21. ragaai_catalyst/tracers/exporters/raga_exporter.py +1 -7
  22. ragaai_catalyst/tracers/llamaindex_callback.py +56 -60
  23. ragaai_catalyst/tracers/tracer.py +6 -2
  24. ragaai_catalyst/tracers/upload_traces.py +46 -57
  25. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/METADATA +8 -4
  26. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/RECORD +28 -22
  27. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/WHEEL +1 -1
  28. ragaai_catalyst/tracers/agentic_tracing/Untitled-1.json +0 -660
  29. {ragaai_catalyst-2.0.7.2b1.dist-info → ragaai_catalyst-2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ import os
2
+ import hashlib
3
+ import zipfile
4
+ import re
5
+ import ast
6
+ import importlib.util
7
+ import json
8
+ import astor
9
+ from pathlib import Path
10
+ import logging
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Define the PackageUsageRemover class
14
+ class PackageUsageRemover(ast.NodeTransformer):
15
+ def __init__(self, package_name):
16
+ self.package_name = package_name
17
+ self.imported_names = set()
18
+
19
+ def visit_Import(self, node):
20
+ filtered_names = []
21
+ for name in node.names:
22
+ if not name.name.startswith(self.package_name):
23
+ filtered_names.append(name)
24
+ else:
25
+ self.imported_names.add(name.asname or name.name)
26
+
27
+ if not filtered_names:
28
+ return None
29
+ node.names = filtered_names
30
+ return node
31
+
32
+ def visit_ImportFrom(self, node):
33
+ if node.module and node.module.startswith(self.package_name):
34
+ self.imported_names.update(n.asname or n.name for n in node.names)
35
+ return None
36
+ return node
37
+
38
+ def visit_Assign(self, node):
39
+ if self._uses_package(node.value):
40
+ return None
41
+ return node
42
+
43
+ def visit_Call(self, node):
44
+ if isinstance(node.func, ast.Name) and node.func.id in self.imported_names:
45
+ return None
46
+ if isinstance(node.func, ast.Attribute):
47
+ if isinstance(node.func.value, ast.Name) and node.func.value.id in self.imported_names:
48
+ return None
49
+ return node
50
+
51
+ def _uses_package(self, node):
52
+ if isinstance(node, ast.Name) and node.id in self.imported_names:
53
+ return True
54
+ if isinstance(node, ast.Call):
55
+ return self._uses_package(node.func)
56
+ if isinstance(node, ast.Attribute):
57
+ return self._uses_package(node.value)
58
+ return False
59
+
60
+ # Define the function to remove package code from a source code string
61
+ def remove_package_code(source_code: str, package_name: str) -> str:
62
+ try:
63
+ tree = ast.parse(source_code)
64
+ transformer = PackageUsageRemover(package_name)
65
+ modified_tree = transformer.visit(tree)
66
+ modified_code = astor.to_source(modified_tree)
67
+ return modified_code
68
+ except Exception as e:
69
+ raise Exception(f"Error processing source code: {str(e)}")
70
+
71
+ # TraceDependencyTracker class
72
+ class TraceDependencyTracker:
73
+ def __init__(self, output_dir=None):
74
+ self.tracked_files = set()
75
+ self.python_imports = set()
76
+ self.output_dir = output_dir or os.getcwd()
77
+
78
+ def track_file_access(self, filepath):
79
+ if os.path.exists(filepath):
80
+ self.tracked_files.add(os.path.abspath(filepath))
81
+
82
+ def find_config_files(self, content, base_path):
83
+ patterns = [
84
+ r'(?:open|read|load|with\s+open)\s*\([\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
85
+ r'(?:config|cfg|conf|settings|file|path)(?:_file|_path)?\s*=\s*[\'"]([^\'"]*\.(?:json|yaml|yml|txt|cfg|config|ini))[\'"]',
86
+ r'[\'"]([^\'"]*\.txt)[\'"]',
87
+ r'[\'"]([^\'"]*\.(?:yaml|yml))[\'"]',
88
+ r'from\s+(\S+)\s+import',
89
+ r'import\s+(\S+)'
90
+ ]
91
+ for pattern in patterns:
92
+ matches = re.finditer(pattern, content)
93
+ for match in matches:
94
+ filepath = match.group(1)
95
+ if not os.path.isabs(filepath):
96
+ full_path = os.path.join(os.path.dirname(base_path), filepath)
97
+ else:
98
+ full_path = filepath
99
+ if os.path.exists(full_path):
100
+ self.track_file_access(full_path)
101
+ try:
102
+ with open(full_path, 'r', encoding='utf-8') as f:
103
+ self.find_config_files(f.read(), full_path)
104
+ except (UnicodeDecodeError, IOError):
105
+ pass
106
+
107
+ def analyze_python_imports(self, filepath):
108
+ try:
109
+ with open(filepath, 'r', encoding='utf-8') as file:
110
+ tree = ast.parse(file.read(), filename=filepath)
111
+ for node in ast.walk(tree):
112
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
113
+ if isinstance(node, ast.ImportFrom) and node.module:
114
+ module_name = node.module
115
+ else:
116
+ for name in node.names:
117
+ module_name = name.name.split('.')[0]
118
+ try:
119
+ spec = importlib.util.find_spec(module_name)
120
+ if spec and spec.origin and not spec.origin.startswith(os.path.dirname(importlib.__file__)):
121
+ self.python_imports.add(spec.origin)
122
+ except (ImportError, AttributeError):
123
+ pass
124
+ except Exception as e:
125
+ print(f"Warning: Could not analyze imports in {filepath}: {str(e)}")
126
+
127
+ def create_zip(self, filepaths):
128
+ for filepath in filepaths:
129
+ abs_path = os.path.abspath(filepath)
130
+ self.track_file_access(abs_path)
131
+ try:
132
+ with open(abs_path, 'r', encoding='utf-8') as file:
133
+ content = file.read()
134
+ self.find_config_files(content, abs_path)
135
+ if filepath.endswith('.py'):
136
+ self.analyze_python_imports(abs_path)
137
+ except Exception as e:
138
+ print(f"Warning: Could not process {filepath}: {str(e)}")
139
+
140
+ self.tracked_files.update(self.python_imports)
141
+ hash_contents = []
142
+ for filepath in sorted(self.tracked_files):
143
+ if 'env' in filepath:
144
+ continue
145
+ try:
146
+ with open(filepath, 'rb') as file:
147
+ content = file.read()
148
+ if filepath.endswith('.py'):
149
+ # Temporarily remove raga_catalyst code for hash calculation
150
+ content = remove_package_code(content.decode('utf-8'), 'ragaai_catalyst').encode('utf-8')
151
+ hash_contents.append(content)
152
+ except Exception as e:
153
+ print(f"Warning: Could not read {filepath} for hash calculation: {str(e)}")
154
+
155
+ combined_content = b''.join(hash_contents)
156
+ hash_id = hashlib.sha256(combined_content).hexdigest()
157
+
158
+ zip_filename = os.path.join(self.output_dir, f'{hash_id}.zip')
159
+ common_path = [os.path.abspath(p) for p in self.tracked_files if 'env' not in p]
160
+
161
+ if common_path!=[]:
162
+ base_path = os.path.commonpath(common_path)
163
+ with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
164
+ for filepath in sorted(self.tracked_files):
165
+ if 'env' in filepath:
166
+ continue
167
+ try:
168
+ relative_path = os.path.relpath(filepath, base_path)
169
+ zipf.write(filepath, relative_path)
170
+ # logger.info(f"Added to zip: {relative_path}")
171
+ except Exception as e:
172
+ print(f"Warning: Could not add {filepath} to zip: {str(e)}")
173
+
174
+ return hash_id, zip_filename
175
+
176
+ # Main function for creating a zip of unique files
177
+ def zip_list_of_unique_files(filepaths, output_dir):
178
+ tracker = TraceDependencyTracker(output_dir)
179
+ return tracker.create_zip(filepaths)
180
+
181
+ # Example usage
182
+ if __name__ == "__main__":
183
+ filepaths = ["script1.py", "script2.py"]
184
+ hash_id, zip_path = zip_list_of_unique_files(filepaths)
185
+ print(f"Created zip file: {zip_path}")
186
+ print(f"Hash ID: {hash_id}")
@@ -7,7 +7,6 @@ from tqdm import tqdm
7
7
  import requests
8
8
  from ...ragaai_catalyst import RagaAICatalyst
9
9
  import shutil
10
- import pdb
11
10
 
12
11
  logger = logging.getLogger(__name__)
13
12
 
@@ -196,7 +195,6 @@ class RagaExporter:
196
195
  return status_code
197
196
 
198
197
  async def get_presigned_url(self, session, num_files):
199
- # pdb.set_trace()
200
198
  """
201
199
  Asynchronously retrieves a presigned URL from the RagaExporter API.
202
200
 
@@ -213,7 +211,6 @@ class RagaExporter:
213
211
  """
214
212
 
215
213
  async def make_request():
216
- # pdb.set_trace()
217
214
 
218
215
  json_data = {
219
216
  "datasetName": self.dataset_name,
@@ -296,8 +293,7 @@ class RagaExporter:
296
293
  return response.status
297
294
 
298
295
  async def upload_file(self, session, url, file_path):
299
- # pdb.set_trace()
300
- # print('url', url)
296
+
301
297
  """
302
298
  Asynchronously uploads a file using the given session, url, and file path.
303
299
  Supports both regular and Azure blob storage URLs.
@@ -345,8 +341,6 @@ class RagaExporter:
345
341
  return response.status
346
342
 
347
343
  async def check_and_upload_files(self, session, file_paths):
348
- # print(file_paths)
349
- # pdb.set_trace()
350
344
  """
351
345
  Checks if there are files to upload, gets presigned URLs, uploads files, and streams them if successful.
352
346
 
@@ -11,7 +11,6 @@ import tempfile
11
11
 
12
12
  from ..ragaai_catalyst import RagaAICatalyst
13
13
 
14
-
15
14
  class CustomEncoder(json.JSONEncoder):
16
15
  def default(self, obj):
17
16
  if isinstance(obj, Enum):
@@ -55,7 +54,7 @@ class LlamaIndexTracer:
55
54
  payload: Optional[Dict[str, Any]] = None,
56
55
  event_id: str = "",
57
56
  parent_id: str = "",
58
- **kwargs: Any,
57
+ **kwargs: Any
59
58
  ) -> None:
60
59
  trace = {
61
60
  "event_type": event_type,
@@ -69,7 +68,7 @@ class LlamaIndexTracer:
69
68
  self.in_query = True
70
69
  self.query_event_id = event_id
71
70
  self.current_query_traces = []
72
-
71
+
73
72
  if self.in_query:
74
73
  self.current_query_traces.append(trace)
75
74
  self.traces.append(trace)
@@ -79,7 +78,7 @@ class LlamaIndexTracer:
79
78
  event_type: Optional[str],
80
79
  payload: Optional[Dict[str, Any]] = None,
81
80
  event_id: str = "",
82
- **kwargs: Any,
81
+ **kwargs: Any
83
82
  ) -> None:
84
83
  trace = {
85
84
  "event_type": event_type,
@@ -91,21 +90,24 @@ class LlamaIndexTracer:
91
90
  if self.in_query:
92
91
  self.current_query_traces.append(trace)
93
92
  self.traces.append(trace)
94
-
93
+
95
94
  # If this is the end of a query event, automatically save the traces
96
95
  if event_type == "query" and event_id == self.query_event_id:
97
96
  self.in_query = False
98
97
  outer_self._save_current_query_traces(self.current_query_traces)
99
98
  self.current_query_traces = []
99
+
100
100
 
101
101
  self.trace_handler = CustomTraceHandler()
102
102
  self.callback_manager.add_handler(self.trace_handler)
103
103
  Settings.callback_manager = self.callback_manager
104
104
 
105
+
105
106
  # Monkey-patch LlamaIndex components
106
107
  self._monkey_patch()
107
108
  return self # Return self to allow method chaining
108
109
 
110
+
109
111
  def _save_current_query_traces(self, query_traces):
110
112
  """Save traces for the current query"""
111
113
  self.query_count += 1
@@ -129,6 +131,7 @@ class LlamaIndexTracer:
129
131
  self._insert_traces(presignedUrl)
130
132
  # print(f"Query {self.query_count} traces uploaded")
131
133
 
134
+
132
135
  def _monkey_patch(self):
133
136
  """Monkey-patch LlamaIndex components to automatically include the callback manager"""
134
137
  from llama_index.core import VectorStoreIndex, ServiceContext
@@ -178,7 +181,7 @@ class LlamaIndexTracer:
178
181
  # self._upload_traces(save_json_to_pwd=True)
179
182
  self.callback_manager.remove_handler(self.trace_handler)
180
183
  self._restore_original_inits()
181
- print("Traces uploaded")
184
+ print("Traces uplaoded")
182
185
  self._upload_task = True
183
186
 
184
187
  def _restore_original_inits(self):
@@ -210,17 +213,17 @@ class LlamaIndexTracer:
210
213
  Generate a random trace ID using UUID4.
211
214
  Returns a string representation of the UUID with no hyphens.
212
215
  """
213
- return "0x" + str(uuid.uuid4()).replace("-", "")
216
+ return '0x'+str(uuid.uuid4()).replace('-', '')
214
217
 
215
218
  def _get_user_passed_detail(self):
216
219
  user_detail = self.user_detail
217
220
  user_detail["trace_id"] = self._generate_trace_id()
218
221
  metadata = user_detail["metadata"]
219
222
  metadata["log_source"] = "llamaindex_tracer"
220
- metadata["recorded_on"] = datetime.utcnow().isoformat().replace("T", " ")
223
+ metadata["recorded_on"] = datetime.utcnow().isoformat().replace('T', ' ')
221
224
  user_detail["metadata"] = metadata
222
225
  return user_detail
223
-
226
+
224
227
  def _add_traces_in_data(self, traces=None):
225
228
  """Add traces to user detail"""
226
229
  user_detail = self._get_user_passed_detail()
@@ -231,40 +234,37 @@ class LlamaIndexTracer:
231
234
  user_detail["traces"] = traces
232
235
  return user_detail
233
236
 
237
+
234
238
  def _create_dataset_schema_with_trace(self):
235
239
  SCHEMA_MAPPING_NEW = {
236
240
  "trace_id": {"columnType": "traceId"},
237
241
  "trace_uri": {"columnType": "traceUri"},
238
242
  "prompt": {"columnType": "prompt"},
239
- "response": {"columnType": "response"},
243
+ "response":{"columnType": "response"},
240
244
  "context": {"columnType": "context"},
241
- "llm_model": {"columnType": "pipeline"},
245
+ "llm_model": {"columnType":"pipeline"},
242
246
  "recorded_on": {"columnType": "metadata"},
243
- "embed_model": {"columnType": "pipeline"},
247
+ "embed_model": {"columnType":"pipeline"},
244
248
  "log_source": {"columnType": "metadata"},
245
- "vector_store": {"columnType": "pipeline"},
246
- "feedback": {"columnType": "feedBack"},
249
+ "vector_store":{"columnType":"pipeline"},
250
+ "feedback": {"columnType":"feedBack"}
247
251
  }
248
-
249
252
  def make_request():
250
253
  headers = {
251
254
  "Content-Type": "application/json",
252
255
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
253
256
  "X-Project-Name": self.project_name,
254
257
  }
255
- payload = json.dumps(
256
- {
257
- "datasetName": self.dataset_name,
258
- "schemaMapping": SCHEMA_MAPPING_NEW,
259
- "traceFolderUrl": None,
260
- }
261
- )
262
- response = requests.request(
263
- "POST",
258
+ payload = json.dumps({
259
+ "datasetName": self.dataset_name,
260
+ "schemaMapping": SCHEMA_MAPPING_NEW,
261
+ "traceFolderUrl": None,
262
+ })
263
+ response = requests.request("POST",
264
264
  f"{self.base_url}/v1/llm/dataset/logs",
265
265
  headers=headers,
266
266
  data=payload,
267
- timeout=self.timeout,
267
+ timeout=self.timeout
268
268
  )
269
269
 
270
270
  return response
@@ -277,35 +277,31 @@ class LlamaIndexTracer:
277
277
  if response.status_code != 200:
278
278
  return response.status_code
279
279
  return response.status_code
280
-
280
+
281
281
  def _get_presigned_url(self):
282
- payload = json.dumps(
283
- {
282
+ payload = json.dumps({
284
283
  "datasetName": self.dataset_name,
285
284
  "numFiles": 1,
286
- }
287
- )
285
+ })
288
286
  headers = {
289
287
  "Content-Type": "application/json",
290
288
  "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
291
289
  "X-Project-Name": self.project_name,
292
290
  }
293
291
 
294
- response = requests.request(
295
- "GET",
296
- f"{self.base_url}/v1/llm/presigned-url",
297
- headers=headers,
298
- data=payload,
299
- timeout=self.timeout,
300
- )
292
+ response = requests.request("GET",
293
+ f"{self.base_url}/v1/llm/presigned-url",
294
+ headers=headers,
295
+ data=payload,
296
+ timeout=self.timeout)
301
297
  if response.status_code == 200:
302
298
  presignedUrls = response.json()["data"]["presignedUrls"][0]
303
299
  return presignedUrls
304
-
300
+
305
301
  def _put_presigned_url(self, presignedUrl, filename):
306
302
  headers = {
307
- "Content-Type": "application/json",
308
- }
303
+ "Content-Type": "application/json",
304
+ }
309
305
 
310
306
  if "blob.core.windows.net" in presignedUrl: # Azure
311
307
  headers["x-ms-blob-type"] = "BlockBlob"
@@ -313,31 +309,31 @@ class LlamaIndexTracer:
313
309
  with open(filename) as f:
314
310
  payload = f.read().replace("\n", "").replace("\r", "").encode()
315
311
 
316
- response = requests.request(
317
- "PUT", presignedUrl, headers=headers, data=payload, timeout=self.timeout
318
- )
312
+
313
+ response = requests.request("PUT",
314
+ presignedUrl,
315
+ headers=headers,
316
+ data=payload,
317
+ timeout=self.timeout)
319
318
  if response.status_code != 200 or response.status_code != 201:
320
319
  return response, response.status_code
321
-
320
+
322
321
  def _insert_traces(self, presignedUrl):
323
322
  headers = {
324
- "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
325
- "Content-Type": "application/json",
326
- "X-Project-Name": self.project_name,
327
- }
328
- payload = json.dumps(
329
- {
323
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
324
+ "Content-Type": "application/json",
325
+ "X-Project-Name": self.project_name,
326
+ }
327
+ payload = json.dumps({
330
328
  "datasetName": self.dataset_name,
331
329
  "presignedUrl": presignedUrl,
332
- }
333
- )
334
- response = requests.request(
335
- "POST",
336
- f"{self.base_url}/v1/llm/insert/trace",
337
- headers=headers,
338
- data=payload,
339
- timeout=self.timeout,
340
- )
330
+ })
331
+ response = requests.request("POST",
332
+ f"{self.base_url}/v1/llm/insert/trace",
333
+ headers=headers,
334
+ data=payload,
335
+ timeout=self.timeout)
336
+
341
337
 
342
338
  def _upload_traces(self, save_json_to_pwd=None):
343
339
  """Save traces to a file"""
@@ -355,7 +351,7 @@ class LlamaIndexTracer:
355
351
  presignedUrl = self._get_presigned_url()
356
352
  self._put_presigned_url(presignedUrl, filename)
357
353
  self._insert_traces(presignedUrl)
358
- print("Traces uploaded")
354
+ print("Traces uplaoded")
359
355
 
360
356
  def get_upload_status(self):
361
357
  """Check the status of the trace upload."""
@@ -20,6 +20,7 @@ from .utils import get_unique_key
20
20
  # from .llamaindex_callback import LlamaIndexTracer
21
21
  from ..ragaai_catalyst import RagaAICatalyst
22
22
  from .agentic_tracing.agentic_tracing import AgenticTracing
23
+ from .agentic_tracing.file_name_tracker import TrackName
23
24
  from .agentic_tracing.llm_tracer import LLMTracerMixin
24
25
 
25
26
  logger = logging.getLogger(__name__)
@@ -66,8 +67,8 @@ class Tracer(AgenticTracing):
66
67
  self.dataset_name = dataset_name
67
68
  self.tracer_type = tracer_type
68
69
  self.metadata = self._improve_metadata(metadata, tracer_type)
69
- self.metadata["total_cost"] = 0.0
70
- self.metadata["total_tokens"] = 0
70
+ # self.metadata["total_cost"] = 0.0
71
+ # self.metadata["total_tokens"] = 0
71
72
  self.pipeline = pipeline
72
73
  self.description = description
73
74
  self.upload_timeout = upload_timeout
@@ -96,6 +97,8 @@ class Tracer(AgenticTracing):
96
97
  self.project_id = [
97
98
  project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
98
99
  ][0]
100
+ # super().__init__(user_detail=self._pass_user_data())
101
+ # self.file_tracker = TrackName()
99
102
  self._pass_user_data()
100
103
 
101
104
  except requests.exceptions.RequestException as e:
@@ -116,6 +119,7 @@ class Tracer(AgenticTracing):
116
119
  else:
117
120
  self._upload_task = None
118
121
  # raise ValueError (f"Currently supported tracer types are 'langchain' and 'llamaindex'.")
122
+
119
123
 
120
124
  def _improve_metadata(self, metadata, tracer_type):
121
125
  if metadata is None: