ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,22 @@
|
|
1
1
|
import os
|
2
2
|
import uuid
|
3
3
|
from datetime import datetime
|
4
|
+
from langchain_core.tools import tool
|
4
5
|
import psutil
|
5
6
|
import functools
|
6
7
|
from typing import Optional, Any, Dict, List
|
8
|
+
|
9
|
+
from pydantic import tools
|
10
|
+
from .base import BaseTracer
|
7
11
|
from ..utils.unique_decorator import generate_unique_hash_simple
|
8
12
|
import contextvars
|
9
13
|
import asyncio
|
10
14
|
from ..utils.file_name_tracker import TrackName
|
11
15
|
from ..utils.span_attributes import SpanAttributes
|
12
16
|
import logging
|
17
|
+
import wrapt
|
18
|
+
import time
|
19
|
+
import inspect
|
13
20
|
|
14
21
|
logger = logging.getLogger(__name__)
|
15
22
|
logging_level = (
|
@@ -32,14 +39,168 @@ class ToolTracerMixin:
|
|
32
39
|
# add auto_instrument option
|
33
40
|
self.auto_instrument_tool = False
|
34
41
|
self.auto_instrument_user_interaction = False
|
42
|
+
self.auto_instrument_file_io = False
|
35
43
|
self.auto_instrument_network = False
|
44
|
+
self._instrumented_tools = set() # Track which tools we've instrumented
|
45
|
+
self._method_usage = {} # Track which methods are actually used
|
46
|
+
self._active_tool_calls = set() # Track active tool calls to prevent duplicates
|
36
47
|
|
37
48
|
# take care of auto_instrument
|
38
49
|
def instrument_tool_calls(self):
|
50
|
+
"""Enable tool instrumentation"""
|
39
51
|
self.auto_instrument_tool = True
|
52
|
+
|
53
|
+
# Handle modules that are already imported
|
54
|
+
import sys
|
55
|
+
|
56
|
+
if "langchain_community.tools" in sys.modules:
|
57
|
+
self.patch_langchain_tools(sys.modules["langchain_community.tools"])
|
58
|
+
|
59
|
+
if "langchain.tools" in sys.modules:
|
60
|
+
self.patch_langchain_tools(sys.modules["langchain.tools"])
|
61
|
+
|
62
|
+
if "langchain_core.tools" in sys.modules:
|
63
|
+
self.patch_langchain_core_tools(sys.modules["langchain_core.tools"])
|
64
|
+
|
65
|
+
# Register hooks for future imports
|
66
|
+
wrapt.register_post_import_hook(
|
67
|
+
self.patch_langchain_tools, "langchain_community.tools"
|
68
|
+
)
|
69
|
+
wrapt.register_post_import_hook(
|
70
|
+
self.patch_langchain_tools, "langchain.tools"
|
71
|
+
)
|
72
|
+
wrapt.register_post_import_hook(
|
73
|
+
self.patch_langchain_core_tools, "langchain_core.tools"
|
74
|
+
)
|
75
|
+
|
76
|
+
def patch_langchain_core_tools(self, module):
|
77
|
+
"""Patch langchain tool methods"""
|
78
|
+
from langchain_core.tools import BaseTool, StructuredTool, Tool
|
79
|
+
|
80
|
+
# Process tool classes in order of inheritance (base class first)
|
81
|
+
tool_classes = [BaseTool] # Start with base class
|
82
|
+
# Add derived classes that don't inherit from already processed classes
|
83
|
+
for tool_class in [StructuredTool, Tool]:
|
84
|
+
if not any(issubclass(tool_class, processed) for processed in tool_classes):
|
85
|
+
tool_classes.append(tool_class)
|
86
|
+
|
87
|
+
for tool_class in tool_classes:
|
88
|
+
if tool_class in self._instrumented_tools:
|
89
|
+
continue
|
90
|
+
# Create proxy instead of directly wrapping methods
|
91
|
+
self.ToolMethodProxy(self, tool_class, tool_class.__name__)
|
92
|
+
self._instrumented_tools.add(tool_class)
|
93
|
+
|
94
|
+
def patch_langchain_tools(self, module):
|
95
|
+
"""Patch langchain tool methods"""
|
96
|
+
for directory in dir(module):
|
97
|
+
dir_class = getattr(module, directory)
|
98
|
+
tools = getattr(dir_class, "__all__", None)
|
99
|
+
if tools is None:
|
100
|
+
continue
|
101
|
+
for tool in tools:
|
102
|
+
tool_class = getattr(dir_class, tool)
|
103
|
+
# Skip if already instrumented
|
104
|
+
if tool_class in self._instrumented_tools:
|
105
|
+
continue
|
106
|
+
|
107
|
+
# Create proxy instead of directly wrapping methods
|
108
|
+
self.ToolMethodProxy(self, tool_class, tool)
|
109
|
+
self._instrumented_tools.add(tool_class)
|
110
|
+
|
111
|
+
class ToolMethodProxy:
|
112
|
+
def __init__(self, tracer, tool_class, tool_name):
|
113
|
+
self.tracer = tracer
|
114
|
+
self.tool_class = tool_class
|
115
|
+
self.tool_name = tool_name
|
116
|
+
self._original_methods = {}
|
117
|
+
self._wrapped = False
|
118
|
+
|
119
|
+
# Store original methods
|
120
|
+
for method in ['run', 'arun', 'invoke', 'ainvoke']:
|
121
|
+
if hasattr(tool_class, method):
|
122
|
+
self._original_methods[method] = getattr(tool_class, method)
|
123
|
+
setattr(tool_class, method, self._create_proxy_method(method))
|
124
|
+
|
125
|
+
def _create_proxy_method(self, method_name):
|
126
|
+
original_method = self._original_methods[method_name]
|
127
|
+
|
128
|
+
async def async_proxy_method(*args, **kwargs):
|
129
|
+
if not self._wrapped:
|
130
|
+
self._cleanup_proxy()
|
131
|
+
self.tracer._wrap_specific_method(self.tool_class, method_name, self.tool_name)
|
132
|
+
self._wrapped = True
|
133
|
+
# Get the now-wrapped method
|
134
|
+
wrapped_method = getattr(self.tool_class, method_name)
|
135
|
+
return await wrapped_method(*args, **kwargs)
|
136
|
+
|
137
|
+
def sync_proxy_method(*args, **kwargs):
|
138
|
+
if not self._wrapped:
|
139
|
+
self._cleanup_proxy()
|
140
|
+
self.tracer._wrap_specific_method(self.tool_class, method_name, self.tool_name)
|
141
|
+
self._wrapped = True
|
142
|
+
# Get the now-wrapped method
|
143
|
+
wrapped_method = getattr(self.tool_class, method_name)
|
144
|
+
return wrapped_method(*args, **kwargs)
|
145
|
+
|
146
|
+
# Use appropriate proxy based on whether original method is async
|
147
|
+
proxy_method = async_proxy_method if asyncio.iscoroutinefunction(original_method) else sync_proxy_method
|
148
|
+
proxy_method.__name__ = method_name
|
149
|
+
return proxy_method
|
150
|
+
|
151
|
+
def _cleanup_proxy(self):
|
152
|
+
# Restore all original methods except the one that was called
|
153
|
+
for method, original in self._original_methods.items():
|
154
|
+
if not self._wrapped:
|
155
|
+
setattr(self.tool_class, method, original)
|
156
|
+
|
157
|
+
def _wrap_specific_method(self, tool_class, method_name, tool_name):
|
158
|
+
"""Wrap only the specific method that is being used"""
|
159
|
+
original_method = getattr(tool_class, method_name)
|
160
|
+
|
161
|
+
async def async_wrapper(*args, **kwargs):
|
162
|
+
tool_call_id = kwargs.get('tool_call_id', None)
|
163
|
+
if tool_call_id and tool_call_id in self._active_tool_calls:
|
164
|
+
# Skip tracing if this tool call is already being traced
|
165
|
+
return await original_method(*args, **kwargs)
|
166
|
+
|
167
|
+
if tool_call_id:
|
168
|
+
self._active_tool_calls.add(tool_call_id)
|
169
|
+
try:
|
170
|
+
name = tool_name
|
171
|
+
tool_type = "langchain"
|
172
|
+
version = None
|
173
|
+
return await self._trace_tool_execution(original_method, name, tool_type, version, *args, **kwargs)
|
174
|
+
finally:
|
175
|
+
if tool_call_id:
|
176
|
+
self._active_tool_calls.remove(tool_call_id)
|
177
|
+
|
178
|
+
def sync_wrapper(*args, **kwargs):
|
179
|
+
tool_call_id = kwargs.get('tool_call_id', None)
|
180
|
+
if tool_call_id and tool_call_id in self._active_tool_calls:
|
181
|
+
# Skip tracing if this tool call is already being traced
|
182
|
+
return original_method(*args, **kwargs)
|
183
|
+
|
184
|
+
if tool_call_id:
|
185
|
+
self._active_tool_calls.add(tool_call_id)
|
186
|
+
try:
|
187
|
+
name = tool_name
|
188
|
+
tool_type = "langchain"
|
189
|
+
version = None
|
190
|
+
return self._trace_sync_tool_execution(original_method, name, tool_type, version, *args, **kwargs)
|
191
|
+
finally:
|
192
|
+
if tool_call_id:
|
193
|
+
self._active_tool_calls.remove(tool_call_id)
|
194
|
+
|
195
|
+
wrapper = async_wrapper if asyncio.iscoroutinefunction(original_method) else sync_wrapper
|
196
|
+
wrapper.__name__ = method_name
|
197
|
+
setattr(tool_class, method_name, wrapper)
|
40
198
|
|
41
199
|
def instrument_user_interaction_calls(self):
|
42
200
|
self.auto_instrument_user_interaction = True
|
201
|
+
|
202
|
+
def instrument_file_io_calls(self):
|
203
|
+
self.auto_instrument_file_io = True
|
43
204
|
|
44
205
|
def instrument_network_calls(self):
|
45
206
|
self.auto_instrument_network = True
|
@@ -94,20 +255,24 @@ class ToolTracerMixin:
|
|
94
255
|
# Check if the function is async
|
95
256
|
is_async = asyncio.iscoroutinefunction(func)
|
96
257
|
|
97
|
-
@self.file_tracker.trace_decorator
|
98
258
|
@functools.wraps(func)
|
99
259
|
async def async_wrapper(*args, **kwargs):
|
100
260
|
async_wrapper.metadata = metadata
|
101
|
-
|
261
|
+
gt = kwargs.get("gt") if kwargs else None
|
262
|
+
if gt is not None:
|
263
|
+
span = self.span(name)
|
264
|
+
span.add_gt(gt)
|
102
265
|
return await self._trace_tool_execution(
|
103
266
|
func, name, tool_type, version, *args, **kwargs
|
104
267
|
)
|
105
268
|
|
106
|
-
@self.file_tracker.trace_decorator
|
107
269
|
@functools.wraps(func)
|
108
270
|
def sync_wrapper(*args, **kwargs):
|
109
271
|
sync_wrapper.metadata = metadata
|
110
|
-
|
272
|
+
gt = kwargs.get("gt") if kwargs else None
|
273
|
+
if gt is not None:
|
274
|
+
span = self.span(name)
|
275
|
+
span.add_gt(gt)
|
111
276
|
return self._trace_sync_tool_execution(
|
112
277
|
func, name, tool_type, version, *args, **kwargs
|
113
278
|
)
|
@@ -133,6 +298,10 @@ class ToolTracerMixin:
|
|
133
298
|
component_id = str(uuid.uuid4())
|
134
299
|
hash_id = generate_unique_hash_simple(func)
|
135
300
|
|
301
|
+
# Set current tool name and store the token
|
302
|
+
name_token = self.current_tool_name.set(name)
|
303
|
+
id_token = self.current_tool_id.set(component_id)
|
304
|
+
|
136
305
|
# Start tracking network calls for this component
|
137
306
|
self.start_component(component_id)
|
138
307
|
|
@@ -188,9 +357,15 @@ class ToolTracerMixin:
|
|
188
357
|
error=error_component,
|
189
358
|
)
|
190
359
|
|
191
|
-
self.add_component(tool_component)
|
360
|
+
self.add_component(tool_component, is_error=True)
|
192
361
|
|
193
362
|
raise
|
363
|
+
finally:
|
364
|
+
# Reset the tool name and id context
|
365
|
+
if name_token:
|
366
|
+
self.current_tool_name.reset(name_token)
|
367
|
+
if id_token:
|
368
|
+
self.current_tool_id.reset(id_token)
|
194
369
|
|
195
370
|
async def _trace_tool_execution(
|
196
371
|
self, func, name, tool_type, version, *args, **kwargs
|
@@ -207,6 +382,10 @@ class ToolTracerMixin:
|
|
207
382
|
component_id = str(uuid.uuid4())
|
208
383
|
hash_id = generate_unique_hash_simple(func)
|
209
384
|
|
385
|
+
# Set current tool name and store the token
|
386
|
+
name_token = self.current_tool_name.set(name)
|
387
|
+
id_token = self.current_tool_id.set(component_id)
|
388
|
+
|
210
389
|
self.start_component(component_id)
|
211
390
|
try:
|
212
391
|
# Execute the tool
|
@@ -253,9 +432,15 @@ class ToolTracerMixin:
|
|
253
432
|
output_data=None,
|
254
433
|
error=error_component,
|
255
434
|
)
|
256
|
-
self.add_component(tool_component)
|
435
|
+
self.add_component(tool_component, is_error=True)
|
257
436
|
|
258
437
|
raise
|
438
|
+
finally:
|
439
|
+
# Reset the tool name and id context
|
440
|
+
if name_token:
|
441
|
+
self.current_tool_name.reset(name_token)
|
442
|
+
if id_token:
|
443
|
+
self.current_tool_id.reset(id_token)
|
259
444
|
|
260
445
|
def create_tool_component(self, **kwargs):
|
261
446
|
"""Create a tool component according to the data structure"""
|
@@ -264,9 +449,19 @@ class ToolTracerMixin:
|
|
264
449
|
network_calls = self.component_network_calls.get(kwargs["component_id"], [])
|
265
450
|
interactions = []
|
266
451
|
if self.auto_instrument_user_interaction:
|
267
|
-
|
268
|
-
|
269
|
-
|
452
|
+
input_output_interactions = []
|
453
|
+
for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
|
454
|
+
if interaction["interaction_type"] in ["input", "output"]:
|
455
|
+
input_output_interactions.append(interaction)
|
456
|
+
if input_output_interactions!=[]:
|
457
|
+
interactions.extend(input_output_interactions)
|
458
|
+
if self.auto_instrument_file_io:
|
459
|
+
file_io_interactions = []
|
460
|
+
for interaction in self.component_user_interaction.get(kwargs["component_id"], []):
|
461
|
+
if interaction["interaction_type"] in ["file_read", "file_write"]:
|
462
|
+
file_io_interactions.append(interaction)
|
463
|
+
if file_io_interactions!=[]:
|
464
|
+
interactions.extend(file_io_interactions)
|
270
465
|
|
271
466
|
# Get tags, metrics
|
272
467
|
name = kwargs["name"]
|
@@ -287,6 +482,10 @@ class ToolTracerMixin:
|
|
287
482
|
metric["name"] = metric_name
|
288
483
|
metrics.append(metric)
|
289
484
|
|
485
|
+
formatted_metrics = BaseTracer.get_formatted_metric(self.span_attributes_dict, self.project_id, name)
|
486
|
+
if formatted_metrics:
|
487
|
+
metrics.extend(formatted_metrics)
|
488
|
+
|
290
489
|
start_time = kwargs["start_time"]
|
291
490
|
component = {
|
292
491
|
"id": kwargs["component_id"],
|
@@ -314,8 +513,13 @@ class ToolTracerMixin:
|
|
314
513
|
"interactions": interactions,
|
315
514
|
}
|
316
515
|
|
317
|
-
if self.
|
318
|
-
|
516
|
+
if name in self.span_attributes_dict:
|
517
|
+
span_gt = self.span_attributes_dict[name].gt
|
518
|
+
if span_gt is not None:
|
519
|
+
component["data"]["gt"] = span_gt
|
520
|
+
span_context = self.span_attributes_dict[name].context
|
521
|
+
if span_context:
|
522
|
+
component["data"]["context"] = span_context
|
319
523
|
|
320
524
|
# Reset the SpanAttributes context variable
|
321
525
|
self.span_attributes_dict[kwargs["name"]] = SpanAttributes(kwargs["name"])
|
@@ -328,25 +532,22 @@ class ToolTracerMixin:
|
|
328
532
|
def end_component(self, component_id):
|
329
533
|
pass
|
330
534
|
|
331
|
-
def _sanitize_input(self, args: tuple, kwargs: dict) ->
|
332
|
-
"""Sanitize and format input data"""
|
535
|
+
def _sanitize_input(self, args: tuple, kwargs: dict) -> dict:
|
536
|
+
"""Sanitize and format input data, including handling of nested lists and dictionaries."""
|
537
|
+
|
538
|
+
def sanitize_value(value):
|
539
|
+
if isinstance(value, (int, float, bool, str)):
|
540
|
+
return value
|
541
|
+
elif isinstance(value, list):
|
542
|
+
return [sanitize_value(item) for item in value]
|
543
|
+
elif isinstance(value, dict):
|
544
|
+
return {key: sanitize_value(val) for key, val in value.items()}
|
545
|
+
else:
|
546
|
+
return str(value) # Convert non-standard types to string
|
547
|
+
|
333
548
|
return {
|
334
|
-
"args": [
|
335
|
-
|
336
|
-
str(arg)
|
337
|
-
if not isinstance(arg, (int, float, bool, str, list, dict))
|
338
|
-
else arg
|
339
|
-
)
|
340
|
-
for arg in args
|
341
|
-
],
|
342
|
-
"kwargs": {
|
343
|
-
k: (
|
344
|
-
str(v)
|
345
|
-
if not isinstance(v, (int, float, bool, str, list, dict))
|
346
|
-
else v
|
347
|
-
)
|
348
|
-
for k, v in kwargs.items()
|
349
|
-
},
|
549
|
+
"args": [sanitize_value(arg) for arg in args],
|
550
|
+
"kwargs": {key: sanitize_value(val) for key, val in kwargs.items()},
|
350
551
|
}
|
351
552
|
|
352
553
|
def _sanitize_output(self, output: Any) -> Any:
|