ragaai-catalyst 2.2.4b5__py3-none-any.whl → 2.2.5b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. ragaai_catalyst/__init__.py +0 -2
  2. ragaai_catalyst/dataset.py +59 -1
  3. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +5 -285
  4. ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +0 -2
  5. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +1 -1
  6. ragaai_catalyst/tracers/exporters/__init__.py +1 -2
  7. ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -1
  8. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +23 -1
  9. ragaai_catalyst/tracers/tracer.py +6 -186
  10. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/METADATA +1 -1
  11. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/RECORD +14 -45
  12. ragaai_catalyst/experiment.py +0 -486
  13. ragaai_catalyst/tracers/agentic_tracing/tests/FinancialAnalysisSystem.ipynb +0 -536
  14. ragaai_catalyst/tracers/agentic_tracing/tests/GameActivityEventPlanner.ipynb +0 -134
  15. ragaai_catalyst/tracers/agentic_tracing/tests/TravelPlanner.ipynb +0 -563
  16. ragaai_catalyst/tracers/agentic_tracing/tests/__init__.py +0 -0
  17. ragaai_catalyst/tracers/agentic_tracing/tests/ai_travel_agent.py +0 -197
  18. ragaai_catalyst/tracers/agentic_tracing/tests/unique_decorator_test.py +0 -172
  19. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +0 -687
  20. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +0 -1319
  21. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +0 -347
  22. ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +0 -1182
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +0 -288
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +0 -557
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +0 -129
  27. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +0 -74
  28. ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +0 -21
  29. ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +0 -32
  30. ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +0 -28
  31. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +0 -133
  32. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +0 -34
  33. ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -467
  34. ragaai_catalyst/tracers/langchain_callback.py +0 -821
  35. ragaai_catalyst/tracers/llamaindex_callback.py +0 -361
  36. ragaai_catalyst/tracers/llamaindex_instrumentation.py +0 -424
  37. ragaai_catalyst/tracers/upload_traces.py +0 -170
  38. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +0 -62
  39. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +0 -69
  40. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +0 -74
  41. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +0 -82
  42. ragaai_catalyst/tracers/utils/rag_trace_json_converter.py +0 -403
  43. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/WHEEL +0 -0
  44. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/licenses/LICENSE +0 -0
  45. {ragaai_catalyst-2.2.4b5.dist-info → ragaai_catalyst-2.2.5b2.dist-info}/top_level.txt +0 -0
@@ -1,557 +0,0 @@
1
- import os
2
- import uuid
3
- from datetime import datetime
4
- from langchain_core.tools import tool
5
- import psutil
6
- import functools
7
- from typing import Optional, Any, Dict, List
8
-
9
- from pydantic import tools
10
- from .base import BaseTracer
11
- from ..utils.unique_decorator import generate_unique_hash_simple
12
- import contextvars
13
- import asyncio
14
- from ..utils.file_name_tracker import TrackName
15
- from ..utils.span_attributes import SpanAttributes
16
- import logging
17
- import wrapt
18
- import time
19
- import inspect
20
-
21
- logger = logging.getLogger(__name__)
22
- logging_level = (
23
- logger.setLevel(logging.DEBUG)
24
- if os.getenv("DEBUG")
25
- else logger.setLevel(logging.INFO)
26
- )
27
-
28
-
29
- class ToolTracerMixin:
30
- def __init__(self, *args, **kwargs):
31
- super().__init__(*args, **kwargs)
32
- self.file_tracker = TrackName()
33
- self.current_tool_name = contextvars.ContextVar("tool_name", default=None)
34
- self.current_tool_id = contextvars.ContextVar("tool_id", default=None)
35
- self.component_network_calls = {}
36
- self.component_user_interaction = {}
37
- self.gt = None
38
-
39
- # add auto_instrument option
40
- self.auto_instrument_tool = False
41
- self.auto_instrument_user_interaction = False
42
- self.auto_instrument_file_io = False
43
- self.auto_instrument_network = False
44
- self._instrumented_tools = set() # Track which tools we've instrumented
45
- self._method_usage = {} # Track which methods are actually used
46
- self._active_tool_calls = set() # Track active tool calls to prevent duplicates
47
-
48
- # take care of auto_instrument
49
- def instrument_tool_calls(self):
50
- """Enable tool instrumentation"""
51
- self.auto_instrument_tool = True
52
-
53
- # Handle modules that are already imported
54
- import sys
55
-
56
- if "langchain_community.tools" in sys.modules:
57
- self.patch_langchain_tools(sys.modules["langchain_community.tools"])
58
-
59
- if "langchain.tools" in sys.modules:
60
- self.patch_langchain_tools(sys.modules["langchain.tools"])
61
-
62
- if "langchain_core.tools" in sys.modules:
63
- self.patch_langchain_core_tools(sys.modules["langchain_core.tools"])
64
-
65
- # Register hooks for future imports
66
- wrapt.register_post_import_hook(
67
- self.patch_langchain_tools, "langchain_community.tools"
68
- )
69
- wrapt.register_post_import_hook(
70
- self.patch_langchain_tools, "langchain.tools"
71
- )
72
- wrapt.register_post_import_hook(
73
- self.patch_langchain_core_tools, "langchain_core.tools"
74
- )
75
-
76
- def patch_langchain_core_tools(self, module):
77
- """Patch langchain tool methods"""
78
- from langchain_core.tools import BaseTool, StructuredTool, Tool
79
-
80
- # Process tool classes in order of inheritance (base class first)
81
- tool_classes = [BaseTool] # Start with base class
82
- # Add derived classes that don't inherit from already processed classes
83
- for tool_class in [StructuredTool, Tool]:
84
- if not any(issubclass(tool_class, processed) for processed in tool_classes):
85
- tool_classes.append(tool_class)
86
-
87
- for tool_class in tool_classes:
88
- if tool_class in self._instrumented_tools:
89
- continue
90
- # Create proxy instead of directly wrapping methods
91
- self.ToolMethodProxy(self, tool_class, tool_class.__name__)
92
- self._instrumented_tools.add(tool_class)
93
-
94
- def patch_langchain_tools(self, module):
95
- """Patch langchain tool methods"""
96
- for directory in dir(module):
97
- dir_class = getattr(module, directory)
98
- tools = getattr(dir_class, "__all__", None)
99
- if tools is None:
100
- continue
101
- for tool in tools:
102
- tool_class = getattr(dir_class, tool)
103
- # Skip if already instrumented
104
- if tool_class in self._instrumented_tools:
105
- continue
106
-
107
- # Create proxy instead of directly wrapping methods
108
- self.ToolMethodProxy(self, tool_class, tool)
109
- self._instrumented_tools.add(tool_class)
110
-
111
- class ToolMethodProxy:
112
- def __init__(self, tracer, tool_class, tool_name):
113
- self.tracer = tracer
114
- self.tool_class = tool_class
115
- self.tool_name = tool_name
116
- self._original_methods = {}
117
- self._wrapped = False
118
-
119
- # Store original methods
120
- for method in ['run', 'arun', 'invoke', 'ainvoke']:
121
- if hasattr(tool_class, method):
122
- self._original_methods[method] = getattr(tool_class, method)
123
- setattr(tool_class, method, self._create_proxy_method(method))
124
-
125
- def _create_proxy_method(self, method_name):
126
- original_method = self._original_methods[method_name]
127
-
128
- async def async_proxy_method(*args, **kwargs):
129
- if not self._wrapped:
130
- self._cleanup_proxy()
131
- self.tracer._wrap_specific_method(self.tool_class, method_name, self.tool_name)
132
- self._wrapped = True
133
- # Get the now-wrapped method
134
- wrapped_method = getattr(self.tool_class, method_name)
135
- return await wrapped_method(*args, **kwargs)
136
-
137
- def sync_proxy_method(*args, **kwargs):
138
- if not self._wrapped:
139
- self._cleanup_proxy()
140
- self.tracer._wrap_specific_method(self.tool_class, method_name, self.tool_name)
141
- self._wrapped = True
142
- # Get the now-wrapped method
143
- wrapped_method = getattr(self.tool_class, method_name)
144
- return wrapped_method(*args, **kwargs)
145
-
146
- # Use appropriate proxy based on whether original method is async
147
- proxy_method = async_proxy_method if asyncio.iscoroutinefunction(original_method) else sync_proxy_method
148
- proxy_method.__name__ = method_name
149
- return proxy_method
150
-
151
- def _cleanup_proxy(self):
152
- # Restore all original methods except the one that was called
153
- for method, original in self._original_methods.items():
154
- if not self._wrapped:
155
- setattr(self.tool_class, method, original)
156
-
157
- def _wrap_specific_method(self, tool_class, method_name, tool_name):
158
- """Wrap only the specific method that is being used"""
159
- original_method = getattr(tool_class, method_name)
160
-
161
- async def async_wrapper(*args, **kwargs):
162
- tool_call_id = kwargs.get('tool_call_id', None)
163
- if tool_call_id and tool_call_id in self._active_tool_calls:
164
- # Skip tracing if this tool call is already being traced
165
- return await original_method(*args, **kwargs)
166
-
167
- if tool_call_id:
168
- self._active_tool_calls.add(tool_call_id)
169
- try:
170
- name = tool_name
171
- tool_type = "langchain"
172
- version = None
173
- return await self._trace_tool_execution(original_method, name, tool_type, version, *args, **kwargs)
174
- finally:
175
- if tool_call_id:
176
- self._active_tool_calls.remove(tool_call_id)
177
-
178
- def sync_wrapper(*args, **kwargs):
179
- tool_call_id = kwargs.get('tool_call_id', None)
180
- if tool_call_id and tool_call_id in self._active_tool_calls:
181
- # Skip tracing if this tool call is already being traced
182
- return original_method(*args, **kwargs)
183
-
184
- if tool_call_id:
185
- self._active_tool_calls.add(tool_call_id)
186
- try:
187
- name = tool_name
188
- tool_type = "langchain"
189
- version = None
190
- return self._trace_sync_tool_execution(original_method, name, tool_type, version, *args, **kwargs)
191
- finally:
192
- if tool_call_id:
193
- self._active_tool_calls.remove(tool_call_id)
194
-
195
- wrapper = async_wrapper if asyncio.iscoroutinefunction(original_method) else sync_wrapper
196
- wrapper.__name__ = method_name
197
- setattr(tool_class, method_name, wrapper)
198
-
199
- def instrument_user_interaction_calls(self):
200
- self.auto_instrument_user_interaction = True
201
-
202
- def instrument_file_io_calls(self):
203
- self.auto_instrument_file_io = True
204
-
205
- def instrument_network_calls(self):
206
- self.auto_instrument_network = True
207
-
208
- def trace_tool(
209
- self,
210
- name: str,
211
- tool_type: str = "generic",
212
- version: str = "1.0.0",
213
- tags: List[str] = [],
214
- metadata: Dict[str, Any] = {},
215
- metrics: List[Dict[str, Any]] = [],
216
- feedback: Optional[Any] = None,
217
- ):
218
- if name not in self.span_attributes_dict:
219
- self.span_attributes_dict[name] = SpanAttributes(name)
220
- if tags:
221
- self.span(name).add_tags(tags)
222
- if metadata:
223
- self.span(name).add_metadata(metadata)
224
- if metrics:
225
- if isinstance(metrics, dict):
226
- metrics = [metrics]
227
- try:
228
- for metric in metrics:
229
- self.span(name).add_metrics(
230
- name=metric["name"],
231
- score=metric["score"],
232
- reasoning=metric.get("reasoning", ""),
233
- cost=metric.get("cost", None),
234
- latency=metric.get("latency", None),
235
- metadata=metric.get("metadata", {}),
236
- config=metric.get("config", {}),
237
- )
238
- except ValueError as e:
239
- logger.error(f"Validation Error: {e}")
240
- except Exception as e:
241
- logger.error(f"Error adding metric: {e}")
242
-
243
- if feedback:
244
- self.span(name).add_feedback(feedback)
245
-
246
- def decorator(func):
247
- # Add metadata attribute to the function
248
- metadata = {
249
- "name": name,
250
- "tool_type": tool_type,
251
- "version": version,
252
- "is_active": self.is_active,
253
- }
254
-
255
- # Check if the function is async
256
- is_async = asyncio.iscoroutinefunction(func)
257
-
258
- @functools.wraps(func)
259
- async def async_wrapper(*args, **kwargs):
260
- async_wrapper.metadata = metadata
261
- gt = kwargs.get("gt") if kwargs else None
262
- if gt is not None:
263
- span = self.span(name)
264
- span.add_gt(gt)
265
- return await self._trace_tool_execution(
266
- func, name, tool_type, version, *args, **kwargs
267
- )
268
-
269
- @functools.wraps(func)
270
- def sync_wrapper(*args, **kwargs):
271
- sync_wrapper.metadata = metadata
272
- gt = kwargs.get("gt") if kwargs else None
273
- if gt is not None:
274
- span = self.span(name)
275
- span.add_gt(gt)
276
- return self._trace_sync_tool_execution(
277
- func, name, tool_type, version, *args, **kwargs
278
- )
279
-
280
- wrapper = async_wrapper if is_async else sync_wrapper
281
- wrapper.metadata = metadata
282
- return wrapper
283
-
284
- return decorator
285
-
286
- def _trace_sync_tool_execution(
287
- self, func, name, tool_type, version, *args, **kwargs
288
- ):
289
- """Synchronous version of tool tracing"""
290
- if not self.is_active:
291
- return func(*args, **kwargs)
292
-
293
- if not self.auto_instrument_tool:
294
- return func(*args, **kwargs)
295
-
296
- start_time = datetime.now().astimezone()
297
- start_memory = psutil.Process().memory_info().rss
298
- component_id = str(uuid.uuid4())
299
- hash_id = generate_unique_hash_simple(func)
300
-
301
- # Set current tool name and store the token
302
- name_token = self.current_tool_name.set(name)
303
- id_token = self.current_tool_id.set(component_id)
304
-
305
- # Start tracking network calls for this component
306
- self.start_component(component_id)
307
-
308
- try:
309
- # Execute the tool
310
- result = func(*args, **kwargs)
311
-
312
- # Calculate resource usage
313
- end_memory = psutil.Process().memory_info().rss
314
- memory_used = max(0, end_memory - start_memory)
315
-
316
- # End tracking network calls for this component
317
- self.end_component(component_id)
318
-
319
- # Create tool component
320
- tool_component = self.create_tool_component(
321
- component_id=component_id,
322
- hash_id=hash_id,
323
- name=name,
324
- tool_type=tool_type,
325
- version=version,
326
- memory_used=memory_used,
327
- start_time=start_time,
328
- input_data=self._sanitize_input(args, kwargs),
329
- output_data=self._sanitize_output(result),
330
- )
331
-
332
- self.add_component(tool_component)
333
-
334
- return result
335
-
336
- except Exception as e:
337
- error_component = {
338
- "code": 500,
339
- "type": type(e).__name__,
340
- "message": str(e),
341
- "details": {},
342
- }
343
-
344
- # End tracking network calls for this component
345
- self.end_component(component_id)
346
-
347
- tool_component = self.create_tool_component(
348
- component_id=component_id,
349
- hash_id=hash_id,
350
- name=name,
351
- tool_type=tool_type,
352
- version=version,
353
- memory_used=0,
354
- start_time=start_time,
355
- input_data=self._sanitize_input(args, kwargs),
356
- output_data=None,
357
- error=error_component,
358
- )
359
-
360
- self.add_component(tool_component, is_error=True)
361
-
362
- raise
363
- finally:
364
- # Reset the tool name and id context
365
- if name_token:
366
- self.current_tool_name.reset(name_token)
367
- if id_token:
368
- self.current_tool_id.reset(id_token)
369
-
370
- async def _trace_tool_execution(
371
- self, func, name, tool_type, version, *args, **kwargs
372
- ):
373
- """Asynchronous version of tool tracing"""
374
- if not self.is_active:
375
- return await func(*args, **kwargs)
376
-
377
- if not self.auto_instrument_tool:
378
- return await func(*args, **kwargs)
379
-
380
- start_time = datetime.now().astimezone()
381
- start_memory = psutil.Process().memory_info().rss
382
- component_id = str(uuid.uuid4())
383
- hash_id = generate_unique_hash_simple(func)
384
-
385
- # Set current tool name and store the token
386
- name_token = self.current_tool_name.set(name)
387
- id_token = self.current_tool_id.set(component_id)
388
-
389
- self.start_component(component_id)
390
- try:
391
- # Execute the tool
392
- result = await func(*args, **kwargs)
393
-
394
- # Calculate resource usage
395
- end_memory = psutil.Process().memory_info().rss
396
- memory_used = max(0, end_memory - start_memory)
397
- self.end_component(component_id)
398
-
399
- # Create tool component
400
- tool_component = self.create_tool_component(
401
- component_id=component_id,
402
- hash_id=hash_id,
403
- name=name,
404
- tool_type=tool_type,
405
- version=version,
406
- start_time=start_time,
407
- memory_used=memory_used,
408
- input_data=self._sanitize_input(args, kwargs),
409
- output_data=self._sanitize_output(result),
410
- )
411
- self.add_component(tool_component)
412
-
413
- return result
414
-
415
- except Exception as e:
416
- error_component = {
417
- "code": 500,
418
- "type": type(e).__name__,
419
- "message": str(e),
420
- "details": {},
421
- }
422
-
423
- tool_component = self.create_tool_component(
424
- component_id=component_id,
425
- hash_id=hash_id,
426
- name=name,
427
- tool_type=tool_type,
428
- version=version,
429
- start_time=start_time,
430
- memory_used=0,
431
- input_data=self._sanitize_input(args, kwargs),
432
- output_data=None,
433
- error=error_component,
434
- )
435
- self.add_component(tool_component, is_error=True)
436
-
437
- raise
438
- finally:
439
- # Reset the tool name and id context
440
- if name_token:
441
- self.current_tool_name.reset(name_token)
442
- if id_token:
443
- self.current_tool_id.reset(id_token)
444
-
445
- def create_tool_component(self, **kwargs):
446
- """Create a tool component according to the data structure"""
447
- network_calls = []
448
- if self.auto_instrument_network:
449
- network_calls = self.component_network_calls.get(kwargs["component_id"], [])
450
- interactions = []
451
- if self.auto_instrument_user_interaction:
452
- input_output_interactions = []
453
- for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
454
- if interaction["interaction_type"] in ["input", "output"]:
455
- input_output_interactions.append(interaction)
456
- if input_output_interactions!=[]:
457
- interactions.extend(input_output_interactions)
458
- if self.auto_instrument_file_io:
459
- file_io_interactions = []
460
- for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
461
- if interaction["interaction_type"] in ["file_read", "file_write"]:
462
- file_io_interactions.append(interaction)
463
- if file_io_interactions!=[]:
464
- interactions.extend(file_io_interactions)
465
-
466
- # Get tags, metrics
467
- name = kwargs["name"]
468
- # tags
469
- tags = []
470
- if name in self.span_attributes_dict:
471
- tags = self.span_attributes_dict[name].tags or []
472
-
473
- # metrics
474
- metrics = []
475
- if name in self.span_attributes_dict:
476
- raw_metrics = self.span_attributes_dict[name].metrics or []
477
- for metric in raw_metrics:
478
- base_metric_name = metric["name"]
479
- counter = sum(1 for x in self.visited_metrics if x.startswith(base_metric_name))
480
- metric_name = f'{base_metric_name}_{counter}' if counter > 0 else base_metric_name
481
- self.visited_metrics.append(metric_name)
482
- metric["name"] = metric_name
483
- metrics.append(metric)
484
-
485
- formatted_metrics = BaseTracer.get_formatted_metric(self.span_attributes_dict, self.project_id, name)
486
- if formatted_metrics:
487
- metrics.extend(formatted_metrics)
488
-
489
- start_time = kwargs["start_time"]
490
- component = {
491
- "id": kwargs["component_id"],
492
- "hash_id": kwargs["hash_id"],
493
- "source_hash_id": None,
494
- "type": "tool",
495
- "name": kwargs["name"],
496
- "start_time": start_time.isoformat(),
497
- "end_time": datetime.now().astimezone().isoformat(),
498
- "error": kwargs.get("error"),
499
- "parent_id": self.current_agent_id.get(),
500
- "info": {
501
- "tool_type": kwargs["tool_type"],
502
- "version": kwargs["version"],
503
- "memory_used": kwargs["memory_used"],
504
- "tags": tags,
505
- },
506
- "data": {
507
- "input": kwargs["input_data"],
508
- "output": kwargs["output_data"],
509
- "memory_used": kwargs["memory_used"],
510
- },
511
- "metrics": metrics,
512
- "network_calls": network_calls,
513
- "interactions": interactions,
514
- }
515
-
516
- if name in self.span_attributes_dict:
517
- span_gt = self.span_attributes_dict[name].gt
518
- if span_gt is not None:
519
- component["data"]["gt"] = span_gt
520
- span_context = self.span_attributes_dict[name].context
521
- if span_context:
522
- component["data"]["context"] = span_context
523
-
524
- # Reset the SpanAttributes context variable
525
- self.span_attributes_dict[kwargs["name"]] = SpanAttributes(kwargs["name"])
526
-
527
- return component
528
-
529
- def start_component(self, component_id):
530
- self.component_network_calls[component_id] = []
531
-
532
- def end_component(self, component_id):
533
- pass
534
-
535
- def _sanitize_input(self, args: tuple, kwargs: dict) -> dict:
536
- """Sanitize and format input data, including handling of nested lists and dictionaries."""
537
-
538
- def sanitize_value(value):
539
- if isinstance(value, (int, float, bool, str)):
540
- return value
541
- elif isinstance(value, list):
542
- return [sanitize_value(item) for item in value]
543
- elif isinstance(value, dict):
544
- return {key: sanitize_value(val) for key, val in value.items()}
545
- else:
546
- return str(value) # Convert non-standard types to string
547
-
548
- return {
549
- "args": [sanitize_value(arg) for arg in args],
550
- "kwargs": {key: sanitize_value(val) for key, val in kwargs.items()},
551
- }
552
-
553
- def _sanitize_output(self, output: Any) -> Any:
554
- """Sanitize and format output data"""
555
- if isinstance(output, (int, float, bool, str, list, dict)):
556
- return output
557
- return str(output)
@@ -1,129 +0,0 @@
1
- import builtins
2
- from datetime import datetime
3
- import contextvars
4
- import inspect
5
- import uuid
6
- from typing import Optional, Any
7
-
8
- class TracedFile:
9
- def __init__(self, file_obj, file_path: str, tracer):
10
- self._file = file_obj
11
- self._file_path = file_path
12
- self._tracer = tracer
13
-
14
- def write(self, content: str) -> int:
15
- self._tracer.trace_file_operation("write", self._file_path, content=content)
16
- return self._file.write(content)
17
-
18
- def read(self, size: Optional[int] = None) -> str:
19
- content = self._file.read() if size is None else self._file.read(size)
20
- self._tracer.trace_file_operation("read", self._file_path, content=content)
21
- return content
22
-
23
- def close(self) -> None:
24
- return self._file.close()
25
-
26
- def __enter__(self):
27
- return self
28
-
29
- def __exit__(self, exc_type, exc_val, exc_tb):
30
- self.close()
31
- return None
32
-
33
- def __getattr__(self, name: str) -> Any:
34
- return getattr(self._file, name)
35
-
36
- class UserInteractionTracer:
37
- def __init__(self, *args, **kwargs):
38
- self.project_id = contextvars.ContextVar("project_id", default=None)
39
- self.trace_id = contextvars.ContextVar("trace_id", default=None)
40
- self.tracer = contextvars.ContextVar("tracer", default=None)
41
- self.component_id = contextvars.ContextVar("component_id", default=None)
42
- self.original_input = builtins.input
43
- self.original_print = builtins.print
44
- self.original_open = builtins.open
45
- self.interactions = []
46
-
47
- def traced_input(self, prompt=""):
48
- # Get caller information
49
- if prompt:
50
- self.traced_print(prompt, end="")
51
- try:
52
- content = self.original_input()
53
- except EOFError:
54
- content = "" # Return empty string on EOF
55
-
56
- self.interactions.append({
57
- "id": str(uuid.uuid4()),
58
- "component_id": self.component_id.get(),
59
- "interaction_type": "input",
60
- "content": content,
61
- "timestamp": datetime.now().astimezone().isoformat()
62
- })
63
- return content
64
-
65
- def traced_print(self, *args, **kwargs):
66
- content = " ".join(str(arg) for arg in args)
67
-
68
- self.interactions.append({
69
- "id": str(uuid.uuid4()),
70
- "component_id": self.component_id.get(),
71
- "interaction_type": "output",
72
- "content": content,
73
- "timestamp": datetime.now().astimezone().isoformat()
74
- })
75
- return self.original_print(*args, **kwargs)
76
-
77
- def traced_open(self, file: str, mode: str = 'r', *args, **kwargs):
78
- # Skip tracing for system and virtual environment paths
79
- system_paths = [
80
- 'site-packages',
81
- 'dist-packages',
82
- '/proc/',
83
- '/sys/',
84
- '/var/lib/',
85
- '/usr/lib/',
86
- '/System/Library'
87
- ]
88
-
89
- file_str = str(file)
90
- if any(path in file_str for path in system_paths):
91
- return self.original_open(file, mode, *args, **kwargs)
92
-
93
- file_obj = self.original_open(file, mode, *args, **kwargs)
94
- return TracedFile(file_obj, file, self)
95
-
96
- def trace_file_operation(self, operation: str, file_path: str, **kwargs):
97
- interaction_type = f"file_{operation}"
98
-
99
- # Check for existing interaction with same file_path and operation
100
- for existing in reversed(self.interactions):
101
- if (existing.get("file_path") == file_path and
102
- existing.get("interaction_type") == interaction_type):
103
- # Merge content if it exists
104
- if "content" in kwargs and "content" in existing:
105
- existing["content"] += kwargs["content"]
106
- return
107
- break
108
-
109
- # If no matching interaction found or couldn't merge, create new one
110
- interaction = {
111
- "id": str(uuid.uuid4()),
112
- "component_id": self.component_id.get(),
113
- "interaction_type": interaction_type,
114
- "file_path": file_path,
115
- "timestamp": datetime.now().astimezone().isoformat()
116
- }
117
- interaction.update(kwargs)
118
- self.interactions.append(interaction)
119
-
120
- def __enter__(self):
121
- builtins.input = self.traced_input
122
- builtins.print = self.traced_print
123
- builtins.open = self.traced_open
124
- return self
125
-
126
- def __exit__(self, exc_type, exc_val, exc_tb):
127
- builtins.input = self.original_input
128
- builtins.print = self.original_print
129
- builtins.open = self.original_open