ragaai-catalyst 2.1.5b0__py3-none-any.whl → 2.1.5b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import json
2
3
  import requests
3
4
  from .utils import response_checker
4
5
  from typing import Union
@@ -271,3 +272,332 @@ class Dataset:
271
272
  except Exception as e:
272
273
  logger.error(f"Error in create_from_csv: {e}")
273
274
  raise
275
+
276
+ def add_rows(self, csv_path, dataset_name):
277
+ """
278
+ Add rows to an existing dataset from a CSV file.
279
+
280
+ Args:
281
+ csv_path (str): Path to the CSV file to be added
282
+ dataset_name (str): Name of the existing dataset to add rows to
283
+
284
+ Raises:
285
+ ValueError: If dataset does not exist or columns are incompatible
286
+ """
287
+ # Get existing dataset columns
288
+ existing_columns = self.get_dataset_columns(dataset_name)
289
+
290
+ # Read the CSV file to check columns
291
+ try:
292
+ import pandas as pd
293
+ df = pd.read_csv(csv_path)
294
+ csv_columns = df.columns.tolist()
295
+ except Exception as e:
296
+ logger.error(f"Failed to read CSV file: {e}")
297
+ raise ValueError(f"Unable to read CSV file: {e}")
298
+
299
+ # Check column compatibility
300
+ for column in existing_columns:
301
+ if column not in csv_columns:
302
+ df[column] = None
303
+
304
+ # Get presigned URL for the CSV
305
+ def get_presignedUrl():
306
+ headers = {
307
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
308
+ "X-Project-Id": str(self.project_id),
309
+ }
310
+ try:
311
+ response = requests.get(
312
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
313
+ headers=headers,
314
+ timeout=Dataset.TIMEOUT,
315
+ )
316
+ response.raise_for_status()
317
+ return response.json()
318
+ except requests.exceptions.RequestException as e:
319
+ logger.error(f"Failed to get presigned URL: {e}")
320
+ raise
321
+
322
+ try:
323
+ presignedUrl = get_presignedUrl()
324
+ if presignedUrl['success']:
325
+ url = presignedUrl['data']['presignedUrl']
326
+ filename = presignedUrl['data']['fileName']
327
+ else:
328
+ raise ValueError('Unable to fetch presignedUrl')
329
+ except Exception as e:
330
+ logger.error(f"Error in get_presignedUrl: {e}")
331
+ raise
332
+
333
+ # Upload CSV to presigned URL
334
+ def put_csv_to_presignedUrl(url):
335
+ headers = {
336
+ 'Content-Type': 'text/csv',
337
+ 'x-ms-blob-type': 'BlockBlob',
338
+ }
339
+ try:
340
+ with open(csv_path, 'rb') as file:
341
+ response = requests.put(
342
+ url,
343
+ headers=headers,
344
+ data=file,
345
+ timeout=Dataset.TIMEOUT,
346
+ )
347
+ response.raise_for_status()
348
+ return response
349
+ except requests.exceptions.RequestException as e:
350
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
351
+ raise
352
+
353
+ try:
354
+ put_csv_response = put_csv_to_presignedUrl(url)
355
+ if put_csv_response.status_code not in (200, 201):
356
+ raise ValueError('Unable to put csv to the presignedUrl')
357
+ except Exception as e:
358
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
359
+ raise
360
+
361
+ # Prepare schema mapping (assuming same mapping as original dataset)
362
+ def generate_schema_mapping(dataset_name):
363
+ headers = {
364
+ 'Content-Type': 'application/json',
365
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
366
+ "X-Project-Id": str(self.project_id),
367
+ }
368
+ json_data = {
369
+ "size": 12,
370
+ "page": "0",
371
+ "projectId": str(self.project_id),
372
+ "search": ""
373
+ }
374
+ try:
375
+ # First get dataset details
376
+ response = requests.post(
377
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
378
+ headers=headers,
379
+ json=json_data,
380
+ timeout=Dataset.TIMEOUT,
381
+ )
382
+ response.raise_for_status()
383
+ datasets = response.json()["data"]["content"]
384
+ dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
385
+
386
+ # Get dataset details to extract schema mapping
387
+ response = requests.get(
388
+ f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
389
+ headers=headers,
390
+ timeout=Dataset.TIMEOUT,
391
+ )
392
+ response.raise_for_status()
393
+
394
+ # Extract schema mapping
395
+ schema_mapping = {}
396
+ for col in response.json()["data"]["datasetColumnsResponses"]:
397
+ schema_mapping[col["displayName"]] = {"columnType": col["columnType"]}
398
+
399
+ return schema_mapping
400
+ except requests.exceptions.RequestException as e:
401
+ logger.error(f"Failed to get schema mapping: {e}")
402
+ raise
403
+
404
+ # Upload CSV to elastic
405
+ try:
406
+ schema_mapping = generate_schema_mapping(dataset_name)
407
+
408
+ data = {
409
+ "projectId": str(self.project_id),
410
+ "datasetName": dataset_name,
411
+ "fileName": filename,
412
+ "schemaMapping": schema_mapping,
413
+ "opType": "update", # Use update for adding rows
414
+ "description": "Adding new rows to dataset"
415
+ }
416
+
417
+ headers = {
418
+ 'Content-Type': 'application/json',
419
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
420
+ "X-Project-Id": str(self.project_id)
421
+ }
422
+
423
+ response = requests.post(
424
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
425
+ headers=headers,
426
+ json=data,
427
+ timeout=Dataset.TIMEOUT,
428
+ )
429
+
430
+ if response.status_code == 400:
431
+ raise ValueError(response.json().get("message", "Failed to add rows"))
432
+
433
+ response.raise_for_status()
434
+
435
+ # Check response
436
+ response_data = response.json()
437
+ if response_data.get('success', False):
438
+ print(f"{response_data['message']}")
439
+ else:
440
+ raise ValueError(response_data.get('message', 'Failed to add rows'))
441
+
442
+ except Exception as e:
443
+ logger.error(f"Error in add_rows_to_dataset: {e}")
444
+ raise
445
+
446
+ def add_columns(self, text_fields, dataset_name, column_name, provider, model, variables={}):
447
+ """
448
+ Add a column to a dataset with dynamically fetched model parameters
449
+
450
+ Args:
451
+ project_id (int): Project ID
452
+ dataset_id (int): Dataset ID
453
+ column_name (str): Name of the new column
454
+ provider (str): Name of the model provider
455
+ model (str): Name of the model
456
+ """
457
+ # First, get model parameters
458
+
459
+ # Validate text_fields input
460
+ if not isinstance(text_fields, list):
461
+ raise ValueError("text_fields must be a list of dictionaries")
462
+
463
+ for field in text_fields:
464
+ if not isinstance(field, dict) or 'role' not in field or 'content' not in field:
465
+ raise ValueError("Each text field must be a dictionary with 'role' and 'content' keys")
466
+
467
+ # First, get the dataset ID
468
+ headers = {
469
+ 'Content-Type': 'application/json',
470
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
471
+ "X-Project-Id": str(self.project_id),
472
+ }
473
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
474
+
475
+ try:
476
+ # Get dataset list
477
+ response = requests.post(
478
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
479
+ headers=headers,
480
+ json=json_data,
481
+ timeout=Dataset.TIMEOUT,
482
+ )
483
+ response.raise_for_status()
484
+ datasets = response.json()["data"]["content"]
485
+
486
+ # Find dataset ID
487
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
488
+
489
+ if dataset_id is None:
490
+ raise ValueError(f"Dataset {dataset_name} not found")
491
+
492
+
493
+
494
+ parameters_url= f"{Dataset.BASE_URL}/playground/providers/models/parameters/list"
495
+
496
+ headers = {
497
+ 'Content-Type': 'application/json',
498
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
499
+ "X-Project-Id": str(self.project_id),
500
+ }
501
+
502
+ # Fetch model parameters
503
+ parameters_payload = {
504
+ "providerName": provider,
505
+ "modelName": model
506
+ }
507
+
508
+ # Get model parameters
509
+ params_response = requests.post(
510
+ parameters_url,
511
+ headers=headers,
512
+ json=parameters_payload,
513
+ timeout=30
514
+ )
515
+ params_response.raise_for_status()
516
+
517
+ # Extract parameters
518
+ all_parameters = params_response.json().get('data', [])
519
+
520
+ # Filter and transform parameters for add-column API
521
+ formatted_parameters = []
522
+ for param in all_parameters:
523
+ value = param.get('value')
524
+ param_type = param.get('type')
525
+
526
+ if value is None:
527
+ formatted_param = {
528
+ "name": param.get('name'),
529
+ "value": None, # Pass None if the value is null
530
+ "type": param.get('type')
531
+ }
532
+ else:
533
+ # Improved type handling
534
+ if param_type == "float":
535
+ value = float(value) # Ensure value is converted to float
536
+ elif param_type == "int":
537
+ value = int(value) # Ensure value is converted to int
538
+ elif param_type == "bool":
539
+ value = bool(value) # Ensure value is converted to bool
540
+ elif param_type == "string":
541
+ value = str(value) # Ensure value is converted to string
542
+ else:
543
+ raise ValueError(f"Unsupported parameter type: {param_type}") # Handle unsupported types
544
+
545
+ formatted_param = {
546
+ "name": param.get('name'),
547
+ "value": value,
548
+ "type": param.get('type')
549
+ }
550
+ formatted_parameters.append(formatted_param)
551
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
552
+
553
+ # Prepare payload for add column API
554
+ add_column_payload = {
555
+ "rowFilterList": [],
556
+ "columnName": column_name,
557
+ "datasetId": dataset_id,
558
+ "variables": variables,
559
+ "promptTemplate": {
560
+ "textFields": text_fields,
561
+ "modelSpecs": {
562
+ "model": f"{provider}/{model}",
563
+ "parameters": formatted_parameters
564
+ }
565
+ }
566
+ }
567
+ if variables:
568
+ variable_specs = []
569
+ for key, values in variables.items():
570
+ variable_specs.append({
571
+ "name": key,
572
+ "type": "string",
573
+ "schema": "query"
574
+ })
575
+ add_column_payload["promptTemplate"]["variableSpecs"] = variable_specs
576
+
577
+ # Make API call to add column
578
+ add_column_url = f"{Dataset.BASE_URL}/v2/llm/dataset/add-column"
579
+
580
+ response = requests.post(
581
+ add_column_url,
582
+ headers={
583
+ 'Content-Type': 'application/json',
584
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
585
+ "X-Project-Id": str(self.project_id)
586
+ },
587
+ json=add_column_payload,
588
+ timeout=30
589
+ )
590
+
591
+ # Check response
592
+ response.raise_for_status()
593
+ response_data = response.json()
594
+
595
+ if response_data.get('success', False):
596
+ print(f"Column '{column_name}' added successfully to dataset '{dataset_name}'")
597
+ else:
598
+ raise ValueError(response_data.get('message', 'Failed to add column'))
599
+
600
+ except requests.exceptions.RequestException as e:
601
+ print(f"Error adding column: {e}")
602
+ raise
603
+
@@ -44,6 +44,7 @@ class AgentTracerMixin:
44
44
  # Add auto instrument flags
45
45
  self.auto_instrument_agent = False
46
46
  self.auto_instrument_user_interaction = False
47
+ self.auto_instrument_file_io = False
47
48
  self.auto_instrument_network = False
48
49
 
49
50
  def trace_agent(
@@ -512,10 +513,22 @@ class AgentTracerMixin:
512
513
  network_calls = self.component_network_calls.get(kwargs["component_id"], [])
513
514
  interactions = []
514
515
  if self.auto_instrument_user_interaction:
515
- interactions = self.component_user_interaction.get(
516
- kwargs["component_id"], []
517
- )
518
- start_time = kwargs["start_time"]
516
+ input_output_interactions = []
517
+ for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
518
+ if interaction["interaction_type"] in ["input", "output"]:
519
+ input_output_interactions.append(interaction)
520
+ interactions.extend(input_output_interactions)
521
+ if self.auto_instrument_file_io:
522
+ file_io_interactions = []
523
+ for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
524
+ if interaction["interaction_type"] in ["file_read", "file_write"]:
525
+ file_io_interactions.append(interaction)
526
+ interactions.extend(file_io_interactions)
527
+
528
+ # Get start time
529
+ start_time = None
530
+ if "start_time" in kwargs:
531
+ start_time = kwargs["start_time"]
519
532
 
520
533
  # Get tags, metrics
521
534
  name = kwargs["name"]
@@ -621,3 +634,6 @@ class AgentTracerMixin:
621
634
 
622
635
  def instrument_network_calls(self):
623
636
  self.auto_instrument_network = True
637
+
638
+ def instrument_file_io_calls(self):
639
+ self.auto_instrument_file_io = True
@@ -1,8 +1,5 @@
1
1
  import json
2
2
  import os
3
- import platform
4
- import psutil
5
- import pkg_resources
6
3
  from datetime import datetime
7
4
  from pathlib import Path
8
5
  from typing import List, Any, Dict
@@ -16,20 +13,9 @@ from ..data.data_structure import (
16
13
  Trace,
17
14
  Metadata,
18
15
  SystemInfo,
19
- OSInfo,
20
- EnvironmentInfo,
21
16
  Resources,
22
- CPUResource,
23
- MemoryResource,
24
- DiskResource,
25
- NetworkResource,
26
- ResourceInfo,
27
- MemoryInfo,
28
- DiskInfo,
29
- NetworkInfo,
30
17
  Component,
31
18
  )
32
-
33
19
  from ..upload.upload_agentic_traces import UploadAgenticTraces
34
20
  from ..upload.upload_code import upload_code
35
21
  from ..upload.upload_trace_metric import upload_trace_metric
@@ -37,9 +23,8 @@ from ..utils.file_name_tracker import TrackName
37
23
  from ..utils.zip_list_of_unique_files import zip_list_of_unique_files
38
24
  from ..utils.span_attributes import SpanAttributes
39
25
  from ..utils.create_dataset_schema import create_dataset_schema_with_trace
26
+ from ..utils.system_monitor import SystemMonitor
40
27
 
41
-
42
- # Configure logging to show debug messages (which includes info messages as well)
43
28
  import logging
44
29
 
45
30
  logger = logging.getLogger(__name__)
@@ -76,12 +61,12 @@ class TracerJSONEncoder(json.JSONEncoder):
76
61
  class BaseTracer:
77
62
  def __init__(self, user_details):
78
63
  self.user_details = user_details
79
- self.project_name = self.user_details["project_name"] # Access the project_name
80
- self.dataset_name = self.user_details["dataset_name"] # Access the dataset_name
81
- self.project_id = self.user_details["project_id"] # Access the project_id
82
- self.trace_name = self.user_details["trace_name"] # Access the trace_name
64
+ self.project_name = self.user_details["project_name"]
65
+ self.dataset_name = self.user_details["dataset_name"]
66
+ self.project_id = self.user_details["project_id"]
67
+ self.trace_name = self.user_details["trace_name"]
83
68
  self.visited_metrics = []
84
- self.trace_metrics = [] # Store metrics here
69
+ self.trace_metrics = []
85
70
 
86
71
  # Initialize trace data
87
72
  self.trace_id = None
@@ -97,117 +82,60 @@ class BaseTracer:
97
82
  self.network_usage_list = []
98
83
  self.tracking_thread = None
99
84
  self.tracking = False
85
+ self.system_monitor = None
100
86
 
101
87
  def _get_system_info(self) -> SystemInfo:
102
- # Get OS info
103
- os_info = OSInfo(
104
- name=platform.system(),
105
- version=platform.version(),
106
- platform=platform.machine(),
107
- kernel_version=platform.release(),
108
- )
109
-
110
- # Get Python environment info
111
- installed_packages = [
112
- f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set
113
- ]
114
- env_info = EnvironmentInfo(
115
- name="Python",
116
- version=platform.python_version(),
117
- packages=installed_packages,
118
- env_path=sys.prefix,
119
- command_to_run=f"python {sys.argv[0]}",
120
- )
121
-
122
- return SystemInfo(
123
- id=f"sys_{self.trace_id}",
124
- os=os_info,
125
- environment=env_info,
126
- source_code="Path to the source code .zip file in format hashid.zip", # TODO: Implement source code archiving
127
- )
88
+ return self.system_monitor.get_system_info()
128
89
 
129
90
  def _get_resources(self) -> Resources:
130
- # CPU info
131
- cpu_info = ResourceInfo(
132
- name=platform.processor(),
133
- cores=psutil.cpu_count(logical=False),
134
- threads=psutil.cpu_count(logical=True),
135
- )
136
- cpu = CPUResource(info=cpu_info, interval="5s", values=[psutil.cpu_percent()])
137
-
138
- # Memory info
139
- memory = psutil.virtual_memory()
140
- mem_info = MemoryInfo(
141
- total=memory.total / (1024**3), # Convert to GB
142
- free=memory.available / (1024**3),
143
- )
144
- mem = MemoryResource(info=mem_info, interval="5s", values=[memory.percent])
145
-
146
- # Disk info
147
- disk = psutil.disk_usage("/")
148
- disk_info = DiskInfo(total=disk.total / (1024**3), free=disk.free / (1024**3))
149
- disk_io = psutil.disk_io_counters()
150
- disk_resource = DiskResource(
151
- info=disk_info,
152
- interval="5s",
153
- read=[disk_io.read_bytes / (1024**2)], # MB
154
- write=[disk_io.write_bytes / (1024**2)],
155
- )
156
-
157
- # Network info
158
- net_io = psutil.net_io_counters()
159
- net_info = NetworkInfo(
160
- upload_speed=net_io.bytes_sent / (1024**2), # MB
161
- download_speed=net_io.bytes_recv / (1024**2),
162
- )
163
- net = NetworkResource(
164
- info=net_info,
165
- interval="5s",
166
- uploads=[net_io.bytes_sent / (1024**2)],
167
- downloads=[net_io.bytes_recv / (1024**2)],
168
- )
169
-
170
- return Resources(cpu=cpu, memory=mem, disk=disk_resource, network=net)
91
+ return self.system_monitor.get_resources()
171
92
 
172
93
  def _track_memory_usage(self):
173
94
  self.memory_usage_list = []
174
95
  while self.tracking:
175
- memory_usage = psutil.Process().memory_info().rss
176
- self.memory_usage_list.append(memory_usage / (1024 * 1024)) # Convert to MB and append to the list
177
- time.sleep(self.interval_time)
96
+ usage = self.system_monitor.track_memory_usage()
97
+ self.memory_usage_list.append(usage)
98
+ try:
99
+ time.sleep(self.interval_time)
100
+ except Exception as e:
101
+ logger.warning(f"Sleep interrupted in memory tracking: {str(e)}")
178
102
 
179
103
  def _track_cpu_usage(self):
180
104
  self.cpu_usage_list = []
181
105
  while self.tracking:
182
- cpu_usage = psutil.cpu_percent(interval=self.interval_time)
183
- self.cpu_usage_list.append(cpu_usage)
184
- time.sleep(self.interval_time)
106
+ usage = self.system_monitor.track_cpu_usage(self.interval_time)
107
+ self.cpu_usage_list.append(usage)
108
+ try:
109
+ time.sleep(self.interval_time)
110
+ except Exception as e:
111
+ logger.warning(f"Sleep interrupted in CPU tracking: {str(e)}")
185
112
 
186
113
  def _track_disk_usage(self):
187
114
  self.disk_usage_list = []
188
115
  while self.tracking:
189
- disk_io = psutil.disk_io_counters()
190
- self.disk_usage_list.append({
191
- 'disk_read': disk_io.read_bytes / (1024 * 1024), # Convert to MB
192
- 'disk_write': disk_io.write_bytes / (1024 * 1024) # Convert to MB
193
- })
194
- time.sleep(self.interval_time)
116
+ usage = self.system_monitor.track_disk_usage()
117
+ self.disk_usage_list.append(usage)
118
+ try:
119
+ time.sleep(self.interval_time)
120
+ except Exception as e:
121
+ logger.warning(f"Sleep interrupted in disk tracking: {str(e)}")
195
122
 
196
123
  def _track_network_usage(self):
197
124
  self.network_usage_list = []
198
125
  while self.tracking:
199
- net_io = psutil.net_io_counters()
200
- self.network_usage_list.append({
201
- 'uploads': net_io.bytes_sent / (1024 * 1024), # Convert to MB
202
- 'downloads': net_io.bytes_recv / (1024 * 1024) # Convert to MB
203
- })
204
- time.sleep(self.interval_time)
126
+ usage = self.system_monitor.track_network_usage()
127
+ self.network_usage_list.append(usage)
128
+ try:
129
+ time.sleep(self.interval_time)
130
+ except Exception as e:
131
+ logger.warning(f"Sleep interrupted in network tracking: {str(e)}")
205
132
 
206
133
  def start(self):
207
134
  """Initialize a new trace"""
208
135
  self.tracking = True
209
- self.tracking_thread = threading.Thread(target=self._track_memory_usage)
210
- self.tracking_thread.start()
136
+ self.trace_id = str(uuid.uuid4())
137
+ self.system_monitor = SystemMonitor(self.trace_id)
138
+ threading.Thread(target=self._track_memory_usage).start()
211
139
  threading.Thread(target=self._track_cpu_usage).start()
212
140
  threading.Thread(target=self._track_disk_usage).start()
213
141
  threading.Thread(target=self._track_network_usage).start()
@@ -223,9 +151,6 @@ class BaseTracer:
223
151
  resources=self._get_resources(),
224
152
  )
225
153
 
226
- # Generate a unique trace ID, when trace starts
227
- self.trace_id = str(uuid.uuid4())
228
-
229
154
  # Get the start time
230
155
  self.start_time = datetime.now().astimezone().isoformat()
231
156
 
@@ -257,8 +182,6 @@ class BaseTracer:
257
182
 
258
183
  #track memory usage
259
184
  self.tracking = False
260
- if self.tracking_thread is not None:
261
- self.tracking_thread.join()
262
185
  self.trace.metadata.resources.memory.values = self.memory_usage_list
263
186
 
264
187
  #track cpu usage
@@ -25,6 +25,7 @@ class CustomTracerMixin:
25
25
  self.auto_instrument_custom = False
26
26
  self.auto_instrument_user_interaction = False
27
27
  self.auto_instrument_network = False
28
+ self.auto_instrument_file_io = False
28
29
 
29
30
  def trace_custom(self, name: str = None, custom_type: str = "generic", version: str = "1.0.0", trace_variables: bool = True):
30
31
  def decorator(func):
@@ -246,8 +247,18 @@ class CustomTracerMixin:
246
247
 
247
248
  interactions = []
248
249
  if self.auto_instrument_user_interaction:
249
- interactions = self.component_user_interaction.get(kwargs["component_id"], [])
250
-
250
+ input_output_interactions = []
251
+ for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
252
+ if interaction["interaction_type"] in ["input", "output"]:
253
+ input_output_interactions.append(interaction)
254
+ interactions.extend(input_output_interactions)
255
+ if self.auto_instrument_file_io:
256
+ file_io_interactions = []
257
+ for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
258
+ if interaction["interaction_type"] in ["file_read", "file_write"]:
259
+ file_io_interactions.append(interaction)
260
+ interactions.extend(file_io_interactions)
261
+
251
262
  component = {
252
263
  "id": kwargs["component_id"],
253
264
  "hash_id": kwargs["hash_id"],
@@ -314,3 +325,7 @@ class CustomTracerMixin:
314
325
  def instrument_network_calls(self):
315
326
  """Enable auto-instrumentation for network calls"""
316
327
  self.auto_instrument_network = True
328
+
329
+ def instrument_file_io_calls(self):
330
+ """Enable auto-instrumentation for file IO calls"""
331
+ self.auto_instrument_file_io = True