ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ragaai_catalyst/__init__.py +23 -2
  2. ragaai_catalyst/dataset.py +462 -1
  3. ragaai_catalyst/evaluation.py +76 -7
  4. ragaai_catalyst/ragaai_catalyst.py +52 -10
  5. ragaai_catalyst/redteaming/__init__.py +7 -0
  6. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  7. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  8. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  9. ragaai_catalyst/redteaming/evaluator.py +125 -0
  10. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  11. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  12. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  13. ragaai_catalyst/redteaming/requirements.txt +4 -0
  14. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  15. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  16. ragaai_catalyst/redteaming/upload_result.py +38 -0
  17. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  18. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  19. ragaai_catalyst/redteaming_old.py +171 -0
  20. ragaai_catalyst/synthetic_data_generation.py +400 -22
  21. ragaai_catalyst/tracers/__init__.py +17 -1
  22. ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
  27. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
  28. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
  29. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
  30. ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
  31. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
  32. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
  33. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
  34. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
  35. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
  36. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
  37. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
  38. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
  39. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
  40. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
  41. ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  42. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  43. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  44. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
  45. ragaai_catalyst/tracers/distributed.py +300 -0
  46. ragaai_catalyst/tracers/exporters/__init__.py +3 -1
  47. ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
  48. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
  49. ragaai_catalyst/tracers/langchain_callback.py +809 -0
  50. ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
  51. ragaai_catalyst/tracers/tracer.py +301 -55
  52. ragaai_catalyst/tracers/upload_traces.py +24 -7
  53. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
  54. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
  55. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
  56. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
  57. ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
  58. ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
  59. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
  60. ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
  61. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
  62. ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
  63. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
  64. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,424 @@
1
+ from configparser import InterpolationMissingOptionError
2
+ import json
3
+ from datetime import datetime
4
+ from typing import Any, Optional, Dict, List, ClassVar
5
+ from pydantic import Field
6
+ # from treelib import Tree
7
+
8
+ from llama_index.core.instrumentation.span import SimpleSpan
9
+ from llama_index.core.instrumentation.span_handlers.base import BaseSpanHandler
10
+ from llama_index.core.instrumentation.events import BaseEvent
11
+ from llama_index.core.instrumentation.event_handlers import BaseEventHandler
12
+ from llama_index.core.instrumentation import get_dispatcher
13
+ from llama_index.core.instrumentation.span_handlers import SimpleSpanHandler
14
+
15
+ from llama_index.core.instrumentation.events.agent import (
16
+ AgentChatWithStepStartEvent,
17
+ AgentChatWithStepEndEvent,
18
+ AgentRunStepStartEvent,
19
+ AgentRunStepEndEvent,
20
+ AgentToolCallEvent,
21
+ )
22
+ from llama_index.core.instrumentation.events.chat_engine import (
23
+ StreamChatErrorEvent,
24
+ StreamChatDeltaReceivedEvent,
25
+ )
26
+ from llama_index.core.instrumentation.events.embedding import (
27
+ EmbeddingStartEvent,
28
+ EmbeddingEndEvent,
29
+ )
30
+ from llama_index.core.instrumentation.events.llm import (
31
+ LLMPredictEndEvent,
32
+ LLMPredictStartEvent,
33
+ LLMStructuredPredictEndEvent,
34
+ LLMStructuredPredictStartEvent,
35
+ LLMCompletionEndEvent,
36
+ LLMCompletionStartEvent,
37
+ LLMChatEndEvent,
38
+ LLMChatStartEvent,
39
+ LLMChatInProgressEvent,
40
+ )
41
+ from llama_index.core.instrumentation.events.query import (
42
+ QueryStartEvent,
43
+ QueryEndEvent,
44
+ )
45
+ from llama_index.core.instrumentation.events.rerank import (
46
+ ReRankStartEvent,
47
+ ReRankEndEvent,
48
+ )
49
+ from llama_index.core.instrumentation.events.retrieval import (
50
+ RetrievalStartEvent,
51
+ RetrievalEndEvent,
52
+ )
53
+ from llama_index.core.instrumentation.events.span import (
54
+ SpanDropEvent,
55
+ )
56
+ from llama_index.core.instrumentation.events.synthesis import (
57
+ SynthesizeStartEvent,
58
+ SynthesizeEndEvent,
59
+ GetResponseEndEvent,
60
+ GetResponseStartEvent,
61
+ )
62
+
63
+ import uuid
64
+
65
+ from .utils.extraction_logic_llama_index import extract_llama_index_data
66
+ from .utils.convert_llama_instru_callback import convert_llamaindex_instrumentation_to_callback
67
+
68
+ class EventHandler(BaseEventHandler):
69
+ """Example event handler.
70
+
71
+ This event handler is an example of how to create a custom event handler.
72
+
73
+ In general, logged events are treated as single events in a point in time,
74
+ that link to a span. The span is a collection of events that are related to
75
+ a single task. The span is identified by a unique span_id.
76
+
77
+ While events are independent, there is some hierarchy.
78
+ For example, in query_engine.query() call with a reranker attached:
79
+ - QueryStartEvent
80
+ - RetrievalStartEvent
81
+ - EmbeddingStartEvent
82
+ - EmbeddingEndEvent
83
+ - RetrievalEndEvent
84
+ - RerankStartEvent
85
+ - RerankEndEvent
86
+ - SynthesizeStartEvent
87
+ - GetResponseStartEvent
88
+ - LLMPredictStartEvent
89
+ - LLMChatStartEvent
90
+ - LLMChatEndEvent
91
+ - LLMPredictEndEvent
92
+ - GetResponseEndEvent
93
+ - SynthesizeEndEvent
94
+ - QueryEndEvent
95
+ """
96
+
97
+ events: List[BaseEvent] = []
98
+ current_trace: List[Dict[str, Any]] = [] # Store events for the current trace
99
+
100
+
101
+ @classmethod
102
+ def class_name(cls) -> str:
103
+ """Class name."""
104
+ return "EventHandler"
105
+
106
+ def handle(self, event: BaseEvent) -> None:
107
+ """Logic for handling event."""
108
+ # print("-----------------------")
109
+ # # all events have these attributes
110
+ # print(event.id_)
111
+ # print(event.timestamp)
112
+ # print(event.span_id)
113
+
114
+ # Prepare event details dictionary
115
+ event_details = {
116
+ "id": event.id_,
117
+ "timestamp": event.timestamp,
118
+ "span_id": event.span_id,
119
+ "event_type": event.class_name(),
120
+ }
121
+
122
+ # event specific attributes
123
+ # print(f"Event type: {event.class_name()}")
124
+ if isinstance(event, AgentRunStepStartEvent):
125
+ event_details.update({
126
+ "task_id": event.task_id,
127
+ "step": event.step,
128
+ "input": event.input,
129
+ })
130
+ if isinstance(event, AgentRunStepEndEvent):
131
+ event_details.update({
132
+ "step_output": event.step_output,
133
+ })
134
+ if isinstance(event, AgentChatWithStepStartEvent):
135
+ event_details.update({
136
+ "user_msg": event.user_msg,
137
+ })
138
+ if isinstance(event, AgentChatWithStepEndEvent):
139
+ event_details.update({
140
+ "response": event.response,
141
+ })
142
+ if isinstance(event, AgentToolCallEvent):
143
+ event_details.update({
144
+ "arguments": event.arguments,
145
+ "tool_name": event.tool.name,
146
+ "tool_description": event.tool.description,
147
+ "tool_openai": event.tool.to_openai_tool(),
148
+ })
149
+ if isinstance(event, StreamChatDeltaReceivedEvent):
150
+ event_details.update({
151
+ "delta": event.delta,
152
+ })
153
+ if isinstance(event, StreamChatErrorEvent):
154
+ event_details.update({
155
+ "exception": event.exception,
156
+ })
157
+ if isinstance(event, EmbeddingStartEvent):
158
+ event_details.update({
159
+ "model_dict": event.model_dict,
160
+ })
161
+ if isinstance(event, EmbeddingEndEvent):
162
+ event_details.update({
163
+ "chunks": event.chunks,
164
+ "embeddings": event.embeddings[0][:5],
165
+ })
166
+ if isinstance(event, LLMPredictStartEvent):
167
+ event_details.update({
168
+ "template": event.template,
169
+ "template_args": event.template_args,
170
+ })
171
+ if isinstance(event, LLMPredictEndEvent):
172
+ event_details.update({
173
+ "output": event.output,
174
+ })
175
+ if isinstance(event, LLMStructuredPredictStartEvent):
176
+ event_details.update({
177
+ "template": event.template,
178
+ "template_args": event.template_args,
179
+ "output_cls": event.output_cls,
180
+ })
181
+ if isinstance(event, LLMStructuredPredictEndEvent):
182
+ event_details.update({
183
+ "output": event.output,
184
+ })
185
+ if isinstance(event, LLMCompletionStartEvent):
186
+ event_details.update({
187
+ "model_dict": event.model_dict,
188
+ "prompt": event.prompt,
189
+ "additional_kwargs": event.additional_kwargs,
190
+ })
191
+ if isinstance(event, LLMCompletionEndEvent):
192
+ event_details.update({
193
+ "response": event.response,
194
+ "prompt": event.prompt,
195
+ })
196
+ if isinstance(event, LLMChatInProgressEvent):
197
+ event_details.update({
198
+ "messages": event.messages,
199
+ "response": event.response,
200
+ })
201
+ if isinstance(event, LLMChatStartEvent):
202
+ event_details.update({
203
+ "messages": event.messages,
204
+ "additional_kwargs": event.additional_kwargs,
205
+ "model_dict": event.model_dict,
206
+ })
207
+ if isinstance(event, LLMChatEndEvent):
208
+ event_details.update({
209
+ "messages": event.messages,
210
+ "response": event.response,
211
+ })
212
+ if isinstance(event, RetrievalStartEvent):
213
+ event_details.update({
214
+ "str_or_query_bundle": event.str_or_query_bundle,
215
+ })
216
+ if isinstance(event, RetrievalEndEvent):
217
+ event_details.update({
218
+ "str_or_query_bundle": event.str_or_query_bundle,
219
+ "nodes": event.nodes,
220
+ "text": event.nodes[0].text
221
+ })
222
+ if isinstance(event, ReRankStartEvent):
223
+ event_details.update({
224
+ "query": event.query,
225
+ "nodes": event.nodes,
226
+ "top_n": event.top_n,
227
+ "model_name": event.model_name,
228
+ })
229
+ if isinstance(event, ReRankEndEvent):
230
+ event_details.update({
231
+ "nodes": event.nodes,
232
+ })
233
+ if isinstance(event, QueryStartEvent):
234
+ event_details.update({
235
+ "query": event.query,
236
+ })
237
+ if isinstance(event, QueryEndEvent):
238
+ event_details.update({
239
+ "response": event.response,
240
+ "query": event.query,
241
+ })
242
+ if isinstance(event, SpanDropEvent):
243
+ event_details.update({
244
+ "err_str": event.err_str,
245
+ })
246
+ if isinstance(event, SynthesizeStartEvent):
247
+ event_details.update({
248
+ "query": event.query,
249
+ })
250
+ if isinstance(event, SynthesizeEndEvent):
251
+ event_details.update({
252
+ "response": event.response,
253
+ "query": event.query,
254
+ })
255
+ if isinstance(event, GetResponseStartEvent):
256
+ event_details.update({
257
+ "query_str": event.query_str,
258
+ })
259
+
260
+ # Append event details to current_trace
261
+ self.current_trace.append(event_details)
262
+
263
+ self.events.append(event)
264
+
265
+ def _get_events_by_span(self) -> Dict[str, List[BaseEvent]]:
266
+ events_by_span: Dict[str, List[BaseEvent]] = {}
267
+ for event in self.events:
268
+ if event.span_id in events_by_span:
269
+ events_by_span[event.span_id].append(event)
270
+ else:
271
+ events_by_span[event.span_id] = [event]
272
+ return events_by_span
273
+
274
+ # def _get_event_span_trees(self) -> List[Tree]:
275
+ # events_by_span = self._get_events_by_span()
276
+
277
+ # trees = []
278
+ # tree = Tree()
279
+
280
+ # for span, sorted_events in events_by_span.items():
281
+ # # create root node i.e. span node
282
+ # tree.create_node(
283
+ # tag=f"{span} (SPAN)",
284
+ # identifier=span,
285
+ # parent=None,
286
+ # data=sorted_events[0].timestamp,
287
+ # )
288
+
289
+ # for event in sorted_events:
290
+ # tree.create_node(
291
+ # tag=f"{event.class_name()}: {event.id_}",
292
+ # identifier=event.id_,
293
+ # parent=event.span_id,
294
+ # data=event.timestamp,
295
+ # )
296
+
297
+ # trees.append(tree)
298
+ # tree = Tree()
299
+ # return trees
300
+
301
+ # def print_event_span_trees(self) -> None:
302
+ # """Method for viewing trace trees."""
303
+ # trees = self._get_event_span_trees()
304
+ # for tree in trees:
305
+ # print(
306
+ # tree.show(
307
+ # stdout=False, sorting=True, key=lambda node: node.data
308
+ # )
309
+ # )
310
+ # print("")
311
+
312
+
313
+
314
+ class SpanHandler(BaseSpanHandler[SimpleSpan]):
315
+ # span_dict = {}
316
+ span_dict: ClassVar[Dict[str, List[SimpleSpan]]] = {}
317
+
318
+ @classmethod
319
+ def class_name(cls) -> str:
320
+ """Class name."""
321
+ return "SpanHandler"
322
+
323
+ def new_span(
324
+ self,
325
+ id_: str,
326
+ bound_args: Any,
327
+ instance: Optional[Any] = None,
328
+ parent_span_id: Optional[str] = None,
329
+ tags: Optional[Dict[str, Any]] = None,
330
+ **kwargs: Any,
331
+ ) -> Optional[SimpleSpan]:
332
+ """Create a span."""
333
+ # logic for creating a new MyCustomSpan
334
+ if id_ not in self.span_dict:
335
+ self.span_dict[id_] = []
336
+ self.span_dict[id_].append(
337
+ SimpleSpan(id_=id_, parent_id=parent_span_id)
338
+ )
339
+
340
+ def prepare_to_exit_span(
341
+ self,
342
+ id_: str,
343
+ bound_args: Any,
344
+ instance: Optional[Any] = None,
345
+ result: Optional[Any] = None,
346
+ **kwargs: Any,
347
+ ) -> Any:
348
+ """Logic for preparing to exit a span."""
349
+ pass
350
+ # if id in self.span_dict:
351
+ # return self.span_dict[id].pop()
352
+
353
+ def prepare_to_drop_span(
354
+ self,
355
+ id_: str,
356
+ bound_args: Any,
357
+ instance: Optional[Any] = None,
358
+ err: Optional[BaseException] = None,
359
+ **kwargs: Any,
360
+ ) -> Any:
361
+ """Logic for preparing to drop a span."""
362
+ pass
363
+ # if id in self.span_dict:
364
+ # return self.span_dict[id].pop()
365
+
366
+
367
+
368
+ class LlamaIndexInstrumentationTracer:
369
+ def __init__(self, user_detail):
370
+ """Initialize the LlamaIndexTracer with handlers but don't start tracing yet."""
371
+ # Initialize the root dispatcher
372
+ self.root_dispatcher = get_dispatcher()
373
+
374
+ # Initialize handlers
375
+ self.json_event_handler = EventHandler()
376
+ self.span_handler = SpanHandler()
377
+ self.simple_span_handler = SimpleSpanHandler()
378
+
379
+ self.is_tracing = False # Flag to check if tracing is active
380
+
381
+ self.user_detail = user_detail
382
+
383
+ def start(self):
384
+ """Start tracing by registering handlers."""
385
+ if self.is_tracing:
386
+ print("Tracing is already active.")
387
+ return
388
+
389
+ # Register handlers
390
+ self.root_dispatcher.add_span_handler(self.span_handler)
391
+ self.root_dispatcher.add_span_handler(self.simple_span_handler)
392
+ self.root_dispatcher.add_event_handler(self.json_event_handler)
393
+
394
+ self.is_tracing = True
395
+ print("Tracing started.")
396
+
397
+ def stop(self):
398
+ """Stop tracing by unregistering handlers."""
399
+ if not self.is_tracing:
400
+ print("Tracing is not active.")
401
+ return
402
+
403
+ # Write current_trace to a JSON file
404
+ final_traces = {
405
+ "project_id": self.user_detail["project_id"],
406
+ "trace_id": str(uuid.uuid4()),
407
+ "session_id": None,
408
+ "trace_type": "llamaindex",
409
+ "metadata": self.user_detail["trace_user_detail"]["metadata"],
410
+ "pipeline": self.user_detail["trace_user_detail"]["pipeline"],
411
+ "traces": self.json_event_handler.current_trace,
412
+
413
+ }
414
+
415
+ with open('new_llamaindex_traces.json', 'w') as f:
416
+ json.dump([final_traces], f, default=str, indent=4)
417
+
418
+ llamaindex_instrumentation_data = extract_llama_index_data([final_traces])
419
+ converted_back_to_callback = convert_llamaindex_instrumentation_to_callback(llamaindex_instrumentation_data)
420
+
421
+ # Just indicate tracing is stopped
422
+ self.is_tracing = False
423
+ print("Tracing stopped.")
424
+ return converted_back_to_callback