ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. ragaai_catalyst/__init__.py +23 -2
  2. ragaai_catalyst/dataset.py +462 -1
  3. ragaai_catalyst/evaluation.py +76 -7
  4. ragaai_catalyst/ragaai_catalyst.py +52 -10
  5. ragaai_catalyst/redteaming/__init__.py +7 -0
  6. ragaai_catalyst/redteaming/config/detectors.toml +13 -0
  7. ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
  8. ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
  9. ragaai_catalyst/redteaming/evaluator.py +125 -0
  10. ragaai_catalyst/redteaming/llm_generator.py +136 -0
  11. ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
  12. ragaai_catalyst/redteaming/red_teaming.py +331 -0
  13. ragaai_catalyst/redteaming/requirements.txt +4 -0
  14. ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
  15. ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
  16. ragaai_catalyst/redteaming/upload_result.py +38 -0
  17. ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
  18. ragaai_catalyst/redteaming/utils/rt.png +0 -0
  19. ragaai_catalyst/redteaming_old.py +171 -0
  20. ragaai_catalyst/synthetic_data_generation.py +400 -22
  21. ragaai_catalyst/tracers/__init__.py +17 -1
  22. ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
  23. ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
  24. ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
  25. ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
  26. ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
  27. ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
  28. ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
  29. ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
  30. ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
  31. ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
  32. ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
  33. ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
  34. ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
  35. ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
  36. ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
  37. ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
  38. ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
  39. ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
  40. ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
  41. ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  42. ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
  43. ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
  44. ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
  45. ragaai_catalyst/tracers/distributed.py +300 -0
  46. ragaai_catalyst/tracers/exporters/__init__.py +3 -1
  47. ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
  48. ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
  49. ragaai_catalyst/tracers/langchain_callback.py +809 -0
  50. ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
  51. ragaai_catalyst/tracers/tracer.py +301 -55
  52. ragaai_catalyst/tracers/upload_traces.py +24 -7
  53. ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
  54. ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
  55. ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
  56. ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
  57. ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
  58. ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
  59. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
  60. ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
  61. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
  62. ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
  63. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
  64. {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
1
+ import json
2
+ from typing import Dict, Any, Optional
3
+
4
+
5
+ def extract_llama_index_data(data):
6
+ """
7
+ Transform llama_index trace data into standardized format
8
+ """
9
+ data = data[0]
10
+
11
+ # Extract top-level metadata
12
+ trace_data = {
13
+ "project_id": data.get("project_id"),
14
+ "trace_id": data.get("trace_id"),
15
+ "session_id": data.get("session_id"),
16
+ "trace_type": data.get("trace_type"),
17
+ "pipeline": data.get("pipeline"),
18
+ "metadata":data.get("metadata") ,
19
+ "prompt_length": 0,
20
+ "data": {
21
+ "prompt": None,
22
+ "context": None,
23
+ "response": None,
24
+ "system_prompt": None
25
+ }
26
+ }
27
+
28
+ def get_prompt(data):
29
+ for span in data:
30
+ if span["event_type"]=="QueryStartEvent":
31
+ prompt = span.get("query", "")
32
+ return prompt
33
+ if span["event_type"]=="QueryEndEvent":
34
+ prompt = span.get("query", "")
35
+ return prompt
36
+
37
+
38
+ def get_context(data):
39
+ for span in data:
40
+ if span["event_type"]=="RetrievalEndEvent":
41
+ context = span.get("text", "")
42
+ return context
43
+
44
+ def get_response(data):
45
+ for span in data:
46
+ if span["event_type"]=="QueryEndEvent":
47
+ response = span.get("response", "")
48
+ return response
49
+ # if span["event_type"]=="LLMPredictEndEvent":
50
+ # response = span.get("output", "")
51
+ # return response
52
+ # if span["event_type"]=="SynthesizeEndEvent":
53
+ # response = span.get("response", "")
54
+ # return response
55
+
56
+ def get_system_prompt(data):
57
+ for span in data:
58
+ if span["event_type"]=="LLMChatStartEvent":
59
+ response = span.get("messages", "")
60
+ response = response[0]
61
+ return response
62
+
63
+ # Process traces
64
+ if "traces" in data:
65
+ prompt = get_prompt(data["traces"])
66
+ context = get_context(data["traces"])
67
+ response = get_response(data["traces"])
68
+ system_prompt = get_system_prompt(data["traces"])
69
+
70
+ trace_data["data"]["prompt"] = prompt
71
+ trace_data["data"]["context"] = context
72
+ trace_data["data"]["response"] = response
73
+ trace_data["data"]["system_prompt"] = system_prompt
74
+ return [trace_data]
@@ -0,0 +1,82 @@
1
+ import json
2
+ import uuid
3
+
4
+ def langchain_tracer_extraction(data, user_context=""):
5
+ trace_aggregate = {}
6
+ import uuid
7
+
8
+ def generate_trace_id():
9
+ """
10
+ Generate a random trace ID using UUID4.
11
+ Returns a string representation of the UUID with no hyphens.
12
+ """
13
+ return '0x'+str(uuid.uuid4()).replace('-', '')
14
+
15
+ trace_aggregate["tracer_type"] = "langchain"
16
+ trace_aggregate['trace_id'] = generate_trace_id()
17
+ trace_aggregate['session_id'] = None
18
+ trace_aggregate["pipeline"] = {
19
+ 'llm_model': 'gpt-3.5-turbo',
20
+ 'vector_store': 'faiss',
21
+ 'embed_model': 'text-embedding-ada-002'
22
+ }
23
+ trace_aggregate["metadata"] = {
24
+ 'key1': 'value1',
25
+ 'key2': 'value2',
26
+ 'log_source': 'langchain_tracer',
27
+ 'recorded_on': '2024-06-14 08:57:27.324410'
28
+ }
29
+ trace_aggregate["prompt_length"] = 0
30
+ trace_aggregate["data"] = {}
31
+
32
+ def get_prompt(data):
33
+ # if "chain_starts" in data and data["chain_starts"] != []:
34
+ # for item in data["chain_starts"]:
35
+
36
+ if "chat_model_calls" in data and data["chat_model_calls"] != []:
37
+ for item in data["chat_model_calls"]:
38
+ messages = item["messages"][0]
39
+ for message in messages:
40
+ if message["type"]=="human":
41
+ human_messages = message["content"].strip()
42
+ return human_messages
43
+ if "llm_calls" in data and data["llm_calls"] != []:
44
+ if "llm_start" in data["llm_calls"][0]["event"]:
45
+ for item in data["llm_calls"]:
46
+ prompt = item["prompts"]
47
+ return prompt[0].strip()
48
+
49
+ def get_response(data):
50
+ for item in data["llm_calls"]:
51
+ if item["event"] == "llm_end":
52
+ llm_end_responses = item["response"]["generations"][0]
53
+ for llm_end_response in llm_end_responses:
54
+ response = llm_end_response["text"]
55
+ return response.strip()
56
+
57
+ def get_context(data, user_context):
58
+ if user_context:
59
+ return user_context
60
+ if "retriever_actions" in data and data["retriever_actions"] != []:
61
+ for item in data["retriever_actions"]:
62
+ if item["event"] == "retriever_end":
63
+ context = item["documents"][0]["page_content"].replace('\n', ' ')
64
+ return context
65
+ # if "chat_model_calls" in data and data["chat_model_calls"] != []:
66
+ # for item in data["chat_model_calls"]:
67
+ # messages = item["messages"][0]
68
+ # for message in messages:
69
+ # if message["type"]=="system":
70
+ # content = message["content"].strip().replace('\n', ' ')
71
+ # return content
72
+
73
+
74
+ prompt = get_prompt(data)
75
+ response = get_response(data)
76
+ context = get_context(data, user_context)
77
+
78
+ trace_aggregate["data"]["prompt"]=prompt
79
+ trace_aggregate["data"]["response"]=response
80
+ trace_aggregate["data"]["context"]=context
81
+
82
+ return trace_aggregate