ragaai-catalyst 2.1.4.1b0__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragaai_catalyst/__init__.py +23 -2
- ragaai_catalyst/dataset.py +462 -1
- ragaai_catalyst/evaluation.py +76 -7
- ragaai_catalyst/ragaai_catalyst.py +52 -10
- ragaai_catalyst/redteaming/__init__.py +7 -0
- ragaai_catalyst/redteaming/config/detectors.toml +13 -0
- ragaai_catalyst/redteaming/data_generator/scenario_generator.py +95 -0
- ragaai_catalyst/redteaming/data_generator/test_case_generator.py +120 -0
- ragaai_catalyst/redteaming/evaluator.py +125 -0
- ragaai_catalyst/redteaming/llm_generator.py +136 -0
- ragaai_catalyst/redteaming/llm_generator_old.py +83 -0
- ragaai_catalyst/redteaming/red_teaming.py +331 -0
- ragaai_catalyst/redteaming/requirements.txt +4 -0
- ragaai_catalyst/redteaming/tests/grok.ipynb +97 -0
- ragaai_catalyst/redteaming/tests/stereotype.ipynb +2258 -0
- ragaai_catalyst/redteaming/upload_result.py +38 -0
- ragaai_catalyst/redteaming/utils/issue_description.py +114 -0
- ragaai_catalyst/redteaming/utils/rt.png +0 -0
- ragaai_catalyst/redteaming_old.py +171 -0
- ragaai_catalyst/synthetic_data_generation.py +400 -22
- ragaai_catalyst/tracers/__init__.py +17 -1
- ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +212 -148
- ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +657 -247
- ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +50 -19
- ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +588 -177
- ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +99 -100
- ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +3 -3
- ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +230 -29
- ragaai_catalyst/tracers/agentic_tracing/upload/trace_uploader.py +358 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +75 -20
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +55 -11
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_local_metric.py +74 -0
- ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +47 -16
- ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +4 -2
- ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +26 -3
- ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +182 -17
- ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +1233 -497
- ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +81 -10
- ragaai_catalyst/tracers/agentic_tracing/utils/supported_llm_provider.toml +34 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
- ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -32
- ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +3 -1
- ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +73 -47
- ragaai_catalyst/tracers/distributed.py +300 -0
- ragaai_catalyst/tracers/exporters/__init__.py +3 -1
- ragaai_catalyst/tracers/exporters/dynamic_trace_exporter.py +160 -0
- ragaai_catalyst/tracers/exporters/ragaai_trace_exporter.py +129 -0
- ragaai_catalyst/tracers/langchain_callback.py +809 -0
- ragaai_catalyst/tracers/llamaindex_instrumentation.py +424 -0
- ragaai_catalyst/tracers/tracer.py +301 -55
- ragaai_catalyst/tracers/upload_traces.py +24 -7
- ragaai_catalyst/tracers/utils/convert_langchain_callbacks_output.py +61 -0
- ragaai_catalyst/tracers/utils/convert_llama_instru_callback.py +69 -0
- ragaai_catalyst/tracers/utils/extraction_logic_llama_index.py +74 -0
- ragaai_catalyst/tracers/utils/langchain_tracer_extraction_logic.py +82 -0
- ragaai_catalyst/tracers/utils/model_prices_and_context_window_backup.json +9365 -0
- ragaai_catalyst/tracers/utils/trace_json_converter.py +269 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/METADATA +367 -45
- ragaai_catalyst-2.1.5.dist-info/RECORD +97 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/WHEEL +1 -1
- ragaai_catalyst-2.1.4.1b0.dist-info/RECORD +0 -67
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/LICENSE +0 -0
- {ragaai_catalyst-2.1.4.1b0.dist-info → ragaai_catalyst-2.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
import json
|
2
|
+
from typing import Dict, Any, Optional
|
3
|
+
|
4
|
+
|
5
|
+
def extract_llama_index_data(data):
|
6
|
+
"""
|
7
|
+
Transform llama_index trace data into standardized format
|
8
|
+
"""
|
9
|
+
data = data[0]
|
10
|
+
|
11
|
+
# Extract top-level metadata
|
12
|
+
trace_data = {
|
13
|
+
"project_id": data.get("project_id"),
|
14
|
+
"trace_id": data.get("trace_id"),
|
15
|
+
"session_id": data.get("session_id"),
|
16
|
+
"trace_type": data.get("trace_type"),
|
17
|
+
"pipeline": data.get("pipeline"),
|
18
|
+
"metadata":data.get("metadata") ,
|
19
|
+
"prompt_length": 0,
|
20
|
+
"data": {
|
21
|
+
"prompt": None,
|
22
|
+
"context": None,
|
23
|
+
"response": None,
|
24
|
+
"system_prompt": None
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
def get_prompt(data):
|
29
|
+
for span in data:
|
30
|
+
if span["event_type"]=="QueryStartEvent":
|
31
|
+
prompt = span.get("query", "")
|
32
|
+
return prompt
|
33
|
+
if span["event_type"]=="QueryEndEvent":
|
34
|
+
prompt = span.get("query", "")
|
35
|
+
return prompt
|
36
|
+
|
37
|
+
|
38
|
+
def get_context(data):
|
39
|
+
for span in data:
|
40
|
+
if span["event_type"]=="RetrievalEndEvent":
|
41
|
+
context = span.get("text", "")
|
42
|
+
return context
|
43
|
+
|
44
|
+
def get_response(data):
|
45
|
+
for span in data:
|
46
|
+
if span["event_type"]=="QueryEndEvent":
|
47
|
+
response = span.get("response", "")
|
48
|
+
return response
|
49
|
+
# if span["event_type"]=="LLMPredictEndEvent":
|
50
|
+
# response = span.get("output", "")
|
51
|
+
# return response
|
52
|
+
# if span["event_type"]=="SynthesizeEndEvent":
|
53
|
+
# response = span.get("response", "")
|
54
|
+
# return response
|
55
|
+
|
56
|
+
def get_system_prompt(data):
|
57
|
+
for span in data:
|
58
|
+
if span["event_type"]=="LLMChatStartEvent":
|
59
|
+
response = span.get("messages", "")
|
60
|
+
response = response[0]
|
61
|
+
return response
|
62
|
+
|
63
|
+
# Process traces
|
64
|
+
if "traces" in data:
|
65
|
+
prompt = get_prompt(data["traces"])
|
66
|
+
context = get_context(data["traces"])
|
67
|
+
response = get_response(data["traces"])
|
68
|
+
system_prompt = get_system_prompt(data["traces"])
|
69
|
+
|
70
|
+
trace_data["data"]["prompt"] = prompt
|
71
|
+
trace_data["data"]["context"] = context
|
72
|
+
trace_data["data"]["response"] = response
|
73
|
+
trace_data["data"]["system_prompt"] = system_prompt
|
74
|
+
return [trace_data]
|
@@ -0,0 +1,82 @@
|
|
1
|
+
import json
|
2
|
+
import uuid
|
3
|
+
|
4
|
+
def langchain_tracer_extraction(data, user_context=""):
|
5
|
+
trace_aggregate = {}
|
6
|
+
import uuid
|
7
|
+
|
8
|
+
def generate_trace_id():
|
9
|
+
"""
|
10
|
+
Generate a random trace ID using UUID4.
|
11
|
+
Returns a string representation of the UUID with no hyphens.
|
12
|
+
"""
|
13
|
+
return '0x'+str(uuid.uuid4()).replace('-', '')
|
14
|
+
|
15
|
+
trace_aggregate["tracer_type"] = "langchain"
|
16
|
+
trace_aggregate['trace_id'] = generate_trace_id()
|
17
|
+
trace_aggregate['session_id'] = None
|
18
|
+
trace_aggregate["pipeline"] = {
|
19
|
+
'llm_model': 'gpt-3.5-turbo',
|
20
|
+
'vector_store': 'faiss',
|
21
|
+
'embed_model': 'text-embedding-ada-002'
|
22
|
+
}
|
23
|
+
trace_aggregate["metadata"] = {
|
24
|
+
'key1': 'value1',
|
25
|
+
'key2': 'value2',
|
26
|
+
'log_source': 'langchain_tracer',
|
27
|
+
'recorded_on': '2024-06-14 08:57:27.324410'
|
28
|
+
}
|
29
|
+
trace_aggregate["prompt_length"] = 0
|
30
|
+
trace_aggregate["data"] = {}
|
31
|
+
|
32
|
+
def get_prompt(data):
|
33
|
+
# if "chain_starts" in data and data["chain_starts"] != []:
|
34
|
+
# for item in data["chain_starts"]:
|
35
|
+
|
36
|
+
if "chat_model_calls" in data and data["chat_model_calls"] != []:
|
37
|
+
for item in data["chat_model_calls"]:
|
38
|
+
messages = item["messages"][0]
|
39
|
+
for message in messages:
|
40
|
+
if message["type"]=="human":
|
41
|
+
human_messages = message["content"].strip()
|
42
|
+
return human_messages
|
43
|
+
if "llm_calls" in data and data["llm_calls"] != []:
|
44
|
+
if "llm_start" in data["llm_calls"][0]["event"]:
|
45
|
+
for item in data["llm_calls"]:
|
46
|
+
prompt = item["prompts"]
|
47
|
+
return prompt[0].strip()
|
48
|
+
|
49
|
+
def get_response(data):
|
50
|
+
for item in data["llm_calls"]:
|
51
|
+
if item["event"] == "llm_end":
|
52
|
+
llm_end_responses = item["response"]["generations"][0]
|
53
|
+
for llm_end_response in llm_end_responses:
|
54
|
+
response = llm_end_response["text"]
|
55
|
+
return response.strip()
|
56
|
+
|
57
|
+
def get_context(data, user_context):
|
58
|
+
if user_context:
|
59
|
+
return user_context
|
60
|
+
if "retriever_actions" in data and data["retriever_actions"] != []:
|
61
|
+
for item in data["retriever_actions"]:
|
62
|
+
if item["event"] == "retriever_end":
|
63
|
+
context = item["documents"][0]["page_content"].replace('\n', ' ')
|
64
|
+
return context
|
65
|
+
# if "chat_model_calls" in data and data["chat_model_calls"] != []:
|
66
|
+
# for item in data["chat_model_calls"]:
|
67
|
+
# messages = item["messages"][0]
|
68
|
+
# for message in messages:
|
69
|
+
# if message["type"]=="system":
|
70
|
+
# content = message["content"].strip().replace('\n', ' ')
|
71
|
+
# return content
|
72
|
+
|
73
|
+
|
74
|
+
prompt = get_prompt(data)
|
75
|
+
response = get_response(data)
|
76
|
+
context = get_context(data, user_context)
|
77
|
+
|
78
|
+
trace_aggregate["data"]["prompt"]=prompt
|
79
|
+
trace_aggregate["data"]["response"]=response
|
80
|
+
trace_aggregate["data"]["context"]=context
|
81
|
+
|
82
|
+
return trace_aggregate
|