holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from holmes.core.llm import LLM
|
|
4
|
+
from holmes.plugins.prompts import load_and_render_prompt
|
|
5
|
+
from litellm.types.utils import ModelResponse
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def strip_system_prompt(
|
|
9
|
+
conversation_history: list[dict],
|
|
10
|
+
) -> tuple[list[dict], Optional[dict]]:
|
|
11
|
+
if not conversation_history:
|
|
12
|
+
return conversation_history, None
|
|
13
|
+
first_message = conversation_history[0]
|
|
14
|
+
if first_message and first_message.get("role") == "system":
|
|
15
|
+
return conversation_history[1:], first_message
|
|
16
|
+
return conversation_history[:], None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def compact_conversation_history(
|
|
20
|
+
original_conversation_history: list[dict], llm: LLM
|
|
21
|
+
) -> list[dict]:
|
|
22
|
+
conversation_history, system_prompt_message = strip_system_prompt(
|
|
23
|
+
original_conversation_history
|
|
24
|
+
)
|
|
25
|
+
compaction_instructions = load_and_render_prompt(
|
|
26
|
+
prompt="builtin://conversation_history_compaction.jinja2", context={}
|
|
27
|
+
)
|
|
28
|
+
conversation_history.append({"role": "user", "content": compaction_instructions})
|
|
29
|
+
|
|
30
|
+
response: ModelResponse = llm.completion(conversation_history) # type: ignore
|
|
31
|
+
response_message = None
|
|
32
|
+
if (
|
|
33
|
+
response
|
|
34
|
+
and response.choices
|
|
35
|
+
and response.choices[0]
|
|
36
|
+
and response.choices[0].message # type:ignore
|
|
37
|
+
):
|
|
38
|
+
response_message = response.choices[0].message # type:ignore
|
|
39
|
+
else:
|
|
40
|
+
logging.error(
|
|
41
|
+
"Failed to compact conversation history. Unexpected LLM's response for compaction"
|
|
42
|
+
)
|
|
43
|
+
return original_conversation_history
|
|
44
|
+
|
|
45
|
+
compacted_conversation_history: list[dict] = []
|
|
46
|
+
if system_prompt_message:
|
|
47
|
+
compacted_conversation_history.append(system_prompt_message)
|
|
48
|
+
compacted_conversation_history.append(
|
|
49
|
+
response_message.model_dump(
|
|
50
|
+
exclude_defaults=True, exclude_unset=True, exclude_none=True
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
compacted_conversation_history.append(
|
|
54
|
+
{
|
|
55
|
+
"role": "system",
|
|
56
|
+
"content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
return compacted_conversation_history
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from holmes.common.env_vars import MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def truncate_string(data_str: str) -> str:
|
|
5
|
+
if data_str and len(data_str) > MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION:
|
|
6
|
+
return (
|
|
7
|
+
data_str[:MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION]
|
|
8
|
+
+ "-- DATA TRUNCATED TO AVOID HITTING CONTEXT WINDOW LIMITS"
|
|
9
|
+
)
|
|
10
|
+
return data_str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def truncate_evidences_entities_if_necessary(evidence_list: list[dict]):
|
|
14
|
+
if (
|
|
15
|
+
not MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
|
|
16
|
+
or MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION <= 0
|
|
17
|
+
):
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
for evidence in evidence_list:
|
|
21
|
+
data = evidence.get("data")
|
|
22
|
+
if data:
|
|
23
|
+
evidence["data"] = truncate_string(str(data))
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
import sentry_sdk
|
|
5
|
+
from holmes.common.env_vars import (
|
|
6
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION,
|
|
7
|
+
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
8
|
+
)
|
|
9
|
+
from holmes.core.llm import (
|
|
10
|
+
LLM,
|
|
11
|
+
TokenCountMetadata,
|
|
12
|
+
get_context_window_compaction_threshold_pct,
|
|
13
|
+
)
|
|
14
|
+
from holmes.core.models import TruncationMetadata, TruncationResult
|
|
15
|
+
from holmes.core.truncation.compaction import compact_conversation_history
|
|
16
|
+
from holmes.utils import sentry_helper
|
|
17
|
+
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _truncate_tool_message(
|
|
24
|
+
msg: dict, allocated_space: int, needed_space: int
|
|
25
|
+
) -> TruncationMetadata:
|
|
26
|
+
msg_content = msg["content"]
|
|
27
|
+
tool_call_id = msg["tool_call_id"]
|
|
28
|
+
tool_name = msg["name"]
|
|
29
|
+
|
|
30
|
+
# Ensure the indicator fits in the allocated space
|
|
31
|
+
if allocated_space > len(TRUNCATION_NOTICE):
|
|
32
|
+
original = msg_content if isinstance(msg_content, str) else str(msg_content)
|
|
33
|
+
msg["content"] = (
|
|
34
|
+
original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
|
|
35
|
+
)
|
|
36
|
+
end_index = allocated_space - len(TRUNCATION_NOTICE)
|
|
37
|
+
else:
|
|
38
|
+
msg["content"] = TRUNCATION_NOTICE[:allocated_space]
|
|
39
|
+
end_index = allocated_space
|
|
40
|
+
|
|
41
|
+
msg.pop("token_count", None) # Remove token_count if present
|
|
42
|
+
logging.info(
|
|
43
|
+
f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
|
|
44
|
+
)
|
|
45
|
+
truncation_metadata = TruncationMetadata(
|
|
46
|
+
tool_call_id=tool_call_id,
|
|
47
|
+
start_index=0,
|
|
48
|
+
end_index=end_index,
|
|
49
|
+
tool_name=tool_name,
|
|
50
|
+
original_token_count=needed_space,
|
|
51
|
+
)
|
|
52
|
+
return truncation_metadata
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
56
|
+
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
57
|
+
# We should fix this in the future
|
|
58
|
+
# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
|
|
59
|
+
# token truncation and not character truncation
|
|
60
|
+
def truncate_messages_to_fit_context(
|
|
61
|
+
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
62
|
+
) -> TruncationResult:
|
|
63
|
+
"""
|
|
64
|
+
Helper function to truncate tool messages to fit within context limits.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
messages: List of message dictionaries with roles and content
|
|
68
|
+
max_context_size: Maximum context window size for the model
|
|
69
|
+
maximum_output_token: Maximum tokens reserved for model output
|
|
70
|
+
count_tokens_fn: Function to count tokens for a list of messages
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Modified list of messages with truncated tool responses
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
Exception: If non-tool messages exceed available context space
|
|
77
|
+
"""
|
|
78
|
+
messages_except_tools = [
|
|
79
|
+
message for message in messages if message["role"] != "tool"
|
|
80
|
+
]
|
|
81
|
+
tokens = count_tokens_fn(messages_except_tools)
|
|
82
|
+
message_size_without_tools = tokens.total_tokens
|
|
83
|
+
|
|
84
|
+
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
85
|
+
|
|
86
|
+
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
87
|
+
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
88
|
+
logging.error(
|
|
89
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
90
|
+
)
|
|
91
|
+
raise Exception(
|
|
92
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if len(tool_call_messages) == 0:
|
|
96
|
+
return TruncationResult(truncated_messages=messages, truncations=[])
|
|
97
|
+
|
|
98
|
+
available_space = (
|
|
99
|
+
max_context_size - message_size_without_tools - reserved_for_output_tokens
|
|
100
|
+
)
|
|
101
|
+
remaining_space = available_space
|
|
102
|
+
tool_call_messages.sort(
|
|
103
|
+
key=lambda x: count_tokens_fn(
|
|
104
|
+
[{"role": "tool", "content": x["content"]}]
|
|
105
|
+
).total_tokens
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
truncations = []
|
|
109
|
+
|
|
110
|
+
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
111
|
+
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
112
|
+
# We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
|
|
113
|
+
for i, msg in enumerate(tool_call_messages):
|
|
114
|
+
remaining_tools = len(tool_call_messages) - i
|
|
115
|
+
max_allocation = remaining_space // remaining_tools
|
|
116
|
+
needed_space = count_tokens_fn(
|
|
117
|
+
[{"role": "tool", "content": msg["content"]}]
|
|
118
|
+
).total_tokens
|
|
119
|
+
allocated_space = min(needed_space, max_allocation)
|
|
120
|
+
|
|
121
|
+
if needed_space > allocated_space:
|
|
122
|
+
truncation_metadata = _truncate_tool_message(
|
|
123
|
+
msg, allocated_space, needed_space
|
|
124
|
+
)
|
|
125
|
+
truncations.append(truncation_metadata)
|
|
126
|
+
|
|
127
|
+
remaining_space -= allocated_space
|
|
128
|
+
|
|
129
|
+
if truncations:
|
|
130
|
+
sentry_helper.capture_tool_truncations(truncations)
|
|
131
|
+
|
|
132
|
+
return TruncationResult(truncated_messages=messages, truncations=truncations)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class ContextWindowLimiterOutput(BaseModel):
|
|
136
|
+
metadata: dict
|
|
137
|
+
messages: list[dict]
|
|
138
|
+
events: list[StreamMessage]
|
|
139
|
+
max_context_size: int
|
|
140
|
+
maximum_output_token: int
|
|
141
|
+
tokens: TokenCountMetadata
|
|
142
|
+
conversation_history_compacted: bool
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@sentry_sdk.trace
|
|
146
|
+
def limit_input_context_window(
|
|
147
|
+
llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
|
|
148
|
+
) -> ContextWindowLimiterOutput:
|
|
149
|
+
events = []
|
|
150
|
+
metadata = {}
|
|
151
|
+
initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
152
|
+
max_context_size = llm.get_context_window_size()
|
|
153
|
+
maximum_output_token = llm.get_maximum_output_token()
|
|
154
|
+
conversation_history_compacted = False
|
|
155
|
+
if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
|
|
156
|
+
initial_tokens.total_tokens + maximum_output_token
|
|
157
|
+
) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
|
|
158
|
+
compacted_messages = compact_conversation_history(
|
|
159
|
+
original_conversation_history=messages, llm=llm
|
|
160
|
+
)
|
|
161
|
+
compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
|
|
162
|
+
compacted_total_tokens = compacted_tokens.total_tokens
|
|
163
|
+
|
|
164
|
+
if compacted_total_tokens < initial_tokens.total_tokens:
|
|
165
|
+
messages = compacted_messages
|
|
166
|
+
compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
|
|
167
|
+
logging.info(compaction_message)
|
|
168
|
+
conversation_history_compacted = True
|
|
169
|
+
events.append(
|
|
170
|
+
StreamMessage(
|
|
171
|
+
event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
|
|
172
|
+
data={
|
|
173
|
+
"content": compaction_message,
|
|
174
|
+
"messages": compacted_messages,
|
|
175
|
+
"metadata": {
|
|
176
|
+
"initial_tokens": initial_tokens.total_tokens,
|
|
177
|
+
"compacted_tokens": compacted_total_tokens,
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
events.append(
|
|
183
|
+
StreamMessage(
|
|
184
|
+
event=StreamEvents.AI_MESSAGE,
|
|
185
|
+
data={"content": compaction_message},
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
logging.debug(
|
|
190
|
+
f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
194
|
+
if (tokens.total_tokens + maximum_output_token) > max_context_size:
|
|
195
|
+
# Compaction was not sufficient. Truncating messages.
|
|
196
|
+
truncated_res = truncate_messages_to_fit_context(
|
|
197
|
+
messages=messages,
|
|
198
|
+
max_context_size=max_context_size,
|
|
199
|
+
maximum_output_token=maximum_output_token,
|
|
200
|
+
count_tokens_fn=llm.count_tokens,
|
|
201
|
+
)
|
|
202
|
+
metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
|
|
203
|
+
messages = truncated_res.truncated_messages
|
|
204
|
+
|
|
205
|
+
# recount after truncation
|
|
206
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
207
|
+
else:
|
|
208
|
+
metadata["truncations"] = []
|
|
209
|
+
|
|
210
|
+
return ContextWindowLimiterOutput(
|
|
211
|
+
events=events,
|
|
212
|
+
messages=messages,
|
|
213
|
+
metadata=metadata,
|
|
214
|
+
max_context_size=max_context_size,
|
|
215
|
+
maximum_output_token=maximum_output_token,
|
|
216
|
+
tokens=tokens,
|
|
217
|
+
conversation_history_compacted=conversation_history_compacted,
|
|
218
|
+
)
|
holmes/interactive.py
CHANGED
|
@@ -26,9 +26,16 @@ from prompt_toolkit.widgets import TextArea
|
|
|
26
26
|
from pygments.lexers import guess_lexer
|
|
27
27
|
from rich.console import Console
|
|
28
28
|
from rich.markdown import Markdown, Panel
|
|
29
|
+
from rich.markup import escape
|
|
29
30
|
|
|
30
31
|
from holmes.common.env_vars import ENABLE_CLI_TOOL_APPROVAL
|
|
31
32
|
from holmes.core.config import config_path_dir
|
|
33
|
+
from holmes.core.feedback import (
|
|
34
|
+
PRIVACY_NOTICE_BANNER,
|
|
35
|
+
Feedback,
|
|
36
|
+
FeedbackCallback,
|
|
37
|
+
UserFeedback,
|
|
38
|
+
)
|
|
32
39
|
from holmes.core.prompt import build_initial_ask_messages
|
|
33
40
|
from holmes.core.tool_calling_llm import ToolCallingLLM, ToolCallResult
|
|
34
41
|
from holmes.core.tools import StructuredToolResult, pretty_print_toolset_status
|
|
@@ -43,6 +50,7 @@ from holmes.utils.colors import (
|
|
|
43
50
|
)
|
|
44
51
|
from holmes.utils.console.consts import agent_name
|
|
45
52
|
from holmes.version import check_version_async
|
|
53
|
+
import re
|
|
46
54
|
|
|
47
55
|
|
|
48
56
|
class SlashCommands(Enum):
|
|
@@ -62,19 +70,25 @@ class SlashCommands(Enum):
|
|
|
62
70
|
)
|
|
63
71
|
CONTEXT = ("/context", "Show conversation context size and token count")
|
|
64
72
|
SHOW = ("/show", "Show specific tool output in scrollable view")
|
|
73
|
+
FEEDBACK = ("/feedback", "Provide feedback on the agent's response")
|
|
65
74
|
|
|
66
75
|
def __init__(self, command, description):
|
|
67
76
|
self.command = command
|
|
68
77
|
self.description = description
|
|
69
78
|
|
|
70
79
|
|
|
71
|
-
SLASH_COMMANDS_REFERENCE = {cmd.command: cmd.description for cmd in SlashCommands}
|
|
72
|
-
ALL_SLASH_COMMANDS = [cmd.command for cmd in SlashCommands]
|
|
73
|
-
|
|
74
|
-
|
|
75
80
|
class SlashCommandCompleter(Completer):
|
|
76
|
-
def __init__(self):
|
|
77
|
-
|
|
81
|
+
def __init__(self, unsupported_commands: Optional[List[str]] = None):
|
|
82
|
+
# Build commands dictionary, excluding unsupported commands
|
|
83
|
+
all_commands = {cmd.command: cmd.description for cmd in SlashCommands}
|
|
84
|
+
if unsupported_commands:
|
|
85
|
+
self.commands = {
|
|
86
|
+
cmd: desc
|
|
87
|
+
for cmd, desc in all_commands.items()
|
|
88
|
+
if cmd not in unsupported_commands
|
|
89
|
+
}
|
|
90
|
+
else:
|
|
91
|
+
self.commands = all_commands
|
|
78
92
|
|
|
79
93
|
def get_completions(self, document, complete_event):
|
|
80
94
|
text = document.text_before_cursor
|
|
@@ -233,6 +247,13 @@ def build_modal_title(tool_call: ToolCallResult, wrap_status: str) -> str:
|
|
|
233
247
|
return f"{tool_call.description} (exit: q, nav: ↑↓/j/k/g/G/d/u/f/b/space, wrap: w [{wrap_status}])"
|
|
234
248
|
|
|
235
249
|
|
|
250
|
+
def strip_ansi_codes(text: str) -> str:
|
|
251
|
+
ansi_escape_pattern = re.compile(
|
|
252
|
+
r"\x1b\[[0-9;]*[a-zA-Z]|\033\[[0-9;]*[a-zA-Z]|\^\[\[[0-9;]*[a-zA-Z]"
|
|
253
|
+
)
|
|
254
|
+
return ansi_escape_pattern.sub("", text)
|
|
255
|
+
|
|
256
|
+
|
|
236
257
|
def detect_lexer(content: str) -> Optional[PygmentsLexer]:
|
|
237
258
|
"""
|
|
238
259
|
Detect appropriate lexer for content using Pygments' built-in detection.
|
|
@@ -314,6 +335,7 @@ def show_tool_output_modal(tool_call: ToolCallResult, console: Console) -> None:
|
|
|
314
335
|
try:
|
|
315
336
|
# Get the full output
|
|
316
337
|
output = tool_call.result.get_stringified_data()
|
|
338
|
+
output = strip_ansi_codes(output)
|
|
317
339
|
title = build_modal_title(tool_call, "off") # Word wrap starts disabled
|
|
318
340
|
|
|
319
341
|
# Detect appropriate syntax highlighting
|
|
@@ -467,10 +489,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
467
489
|
return
|
|
468
490
|
|
|
469
491
|
# Calculate context statistics
|
|
470
|
-
|
|
492
|
+
tokens_metadata = ai.llm.count_tokens(
|
|
493
|
+
messages
|
|
494
|
+
) # TODO: pass tools to also count tokens used by input tools
|
|
471
495
|
max_context_size = ai.llm.get_context_window_size()
|
|
472
496
|
max_output_tokens = ai.llm.get_maximum_output_token()
|
|
473
|
-
available_tokens =
|
|
497
|
+
available_tokens = (
|
|
498
|
+
max_context_size - tokens_metadata.total_tokens - max_output_tokens
|
|
499
|
+
)
|
|
474
500
|
|
|
475
501
|
# Analyze token distribution by role and tool calls
|
|
476
502
|
role_token_usage: DefaultDict[str, int] = defaultdict(int)
|
|
@@ -479,19 +505,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
479
505
|
|
|
480
506
|
for msg in messages:
|
|
481
507
|
role = msg.get("role", "unknown")
|
|
482
|
-
|
|
483
|
-
|
|
508
|
+
message_tokens = ai.llm.count_tokens(
|
|
509
|
+
[msg]
|
|
510
|
+
) # TODO: pass tools to also count tokens used by input tools
|
|
511
|
+
role_token_usage[role] += message_tokens.total_tokens
|
|
484
512
|
|
|
485
513
|
# Track individual tool usage
|
|
486
514
|
if role == "tool":
|
|
487
515
|
tool_name = msg.get("name", "unknown_tool")
|
|
488
|
-
tool_token_usage[tool_name] +=
|
|
516
|
+
tool_token_usage[tool_name] += message_tokens.total_tokens
|
|
489
517
|
tool_call_counts[tool_name] += 1
|
|
490
518
|
|
|
491
519
|
# Display context information
|
|
492
520
|
console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
|
|
493
521
|
console.print(
|
|
494
|
-
f" Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
|
|
522
|
+
f" Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
|
|
495
523
|
)
|
|
496
524
|
console.print(
|
|
497
525
|
f" Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
|
|
@@ -502,7 +530,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
502
530
|
for role in ["system", "user", "assistant", "tool"]:
|
|
503
531
|
if role in role_token_usage:
|
|
504
532
|
tokens = role_token_usage[role]
|
|
505
|
-
percentage = (
|
|
533
|
+
percentage = (
|
|
534
|
+
(tokens / tokens_metadata.total_tokens) * 100
|
|
535
|
+
if tokens_metadata.total_tokens > 0
|
|
536
|
+
else 0
|
|
537
|
+
)
|
|
506
538
|
role_name = {
|
|
507
539
|
"system": "system prompt",
|
|
508
540
|
"user": "user messages",
|
|
@@ -811,6 +843,88 @@ def handle_last_command(
|
|
|
811
843
|
)
|
|
812
844
|
|
|
813
845
|
|
|
846
|
+
def handle_feedback_command(
|
|
847
|
+
style: Style,
|
|
848
|
+
console: Console,
|
|
849
|
+
feedback: Feedback,
|
|
850
|
+
feedback_callback: FeedbackCallback,
|
|
851
|
+
) -> None:
|
|
852
|
+
"""Handle the /feedback command to collect user feedback."""
|
|
853
|
+
try:
|
|
854
|
+
# Create a temporary session without history for feedback prompts
|
|
855
|
+
temp_session = PromptSession(history=InMemoryHistory()) # type: ignore
|
|
856
|
+
# Prominent privacy notice to users
|
|
857
|
+
console.print(
|
|
858
|
+
f"[bold {HELP_COLOR}]Privacy Notice:[/bold {HELP_COLOR}] {PRIVACY_NOTICE_BANNER}"
|
|
859
|
+
)
|
|
860
|
+
# A "Cancel" button of equal discoverability to "Sent" or "Submit" buttons must be made available
|
|
861
|
+
console.print(
|
|
862
|
+
"[bold yellow]💡 Tip: Press Ctrl+C at any time to cancel feedback[/bold yellow]"
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
# Ask for thumbs up/down rating with validation
|
|
866
|
+
while True:
|
|
867
|
+
rating_prompt = temp_session.prompt(
|
|
868
|
+
[("class:prompt", "Was this response useful to you? 👍(y)/👎(n): ")],
|
|
869
|
+
style=style,
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
rating_lower = rating_prompt.lower().strip()
|
|
873
|
+
if rating_lower in ["y", "n"]:
|
|
874
|
+
break
|
|
875
|
+
else:
|
|
876
|
+
console.print(
|
|
877
|
+
"[bold red]Please enter only 'y' for yes or 'n' for no.[/bold red]"
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
# Determine rating
|
|
881
|
+
is_positive = rating_lower == "y"
|
|
882
|
+
|
|
883
|
+
# Ask for additional comments
|
|
884
|
+
comment_prompt = temp_session.prompt(
|
|
885
|
+
[
|
|
886
|
+
(
|
|
887
|
+
"class:prompt",
|
|
888
|
+
"Do you want to provide any additional comments for feedback? (press Enter to skip):\n",
|
|
889
|
+
)
|
|
890
|
+
],
|
|
891
|
+
style=style,
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
comment = comment_prompt.strip() if comment_prompt.strip() else None
|
|
895
|
+
|
|
896
|
+
# Create UserFeedback object
|
|
897
|
+
user_feedback = UserFeedback(is_positive, comment)
|
|
898
|
+
|
|
899
|
+
if comment:
|
|
900
|
+
console.print(
|
|
901
|
+
f'[bold green]✓ Feedback recorded (rating={user_feedback.rating_emoji}, "{escape(comment)}")[/bold green]'
|
|
902
|
+
)
|
|
903
|
+
else:
|
|
904
|
+
console.print(
|
|
905
|
+
f"[bold green]✓ Feedback recorded (rating={user_feedback.rating_emoji}, no comment)[/bold green]"
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
# Final confirmation before submitting
|
|
909
|
+
final_confirmation = temp_session.prompt(
|
|
910
|
+
[("class:prompt", "\nDo you want to submit this feedback? (Y/n): ")],
|
|
911
|
+
style=style,
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
# If user says no, cancel the feedback
|
|
915
|
+
if final_confirmation.lower().strip().startswith("n"):
|
|
916
|
+
console.print("[dim]Feedback cancelled.[/dim]")
|
|
917
|
+
return
|
|
918
|
+
|
|
919
|
+
feedback.user_feedback = user_feedback
|
|
920
|
+
feedback_callback(feedback)
|
|
921
|
+
console.print("[bold green]Thank you for your feedback! 🙏[/bold green]")
|
|
922
|
+
|
|
923
|
+
except KeyboardInterrupt:
|
|
924
|
+
console.print("[dim]Feedback cancelled.[/dim]")
|
|
925
|
+
return
|
|
926
|
+
|
|
927
|
+
|
|
814
928
|
def display_recent_tool_outputs(
|
|
815
929
|
tool_calls: List[ToolCallResult],
|
|
816
930
|
console: Console,
|
|
@@ -823,7 +937,10 @@ def display_recent_tool_outputs(
|
|
|
823
937
|
for tool_call in tool_calls:
|
|
824
938
|
tool_index = find_tool_index_in_history(tool_call, all_tool_calls_history)
|
|
825
939
|
preview_output = format_tool_call_output(tool_call, tool_index)
|
|
826
|
-
title =
|
|
940
|
+
title = (
|
|
941
|
+
f"{tool_call.result.status.to_emoji()} {tool_call.description} -> "
|
|
942
|
+
f"returned {tool_call.result.return_code}"
|
|
943
|
+
)
|
|
827
944
|
|
|
828
945
|
console.print(
|
|
829
946
|
Panel(
|
|
@@ -846,6 +963,7 @@ def run_interactive_loop(
|
|
|
846
963
|
runbooks=None,
|
|
847
964
|
system_prompt_additions: Optional[str] = None,
|
|
848
965
|
check_version: bool = True,
|
|
966
|
+
feedback_callback: Optional[FeedbackCallback] = None,
|
|
849
967
|
) -> None:
|
|
850
968
|
# Initialize tracer - use DummyTracer if no tracer provided
|
|
851
969
|
if tracer is None:
|
|
@@ -874,7 +992,11 @@ def run_interactive_loop(
|
|
|
874
992
|
ai.approval_callback = approval_handler
|
|
875
993
|
|
|
876
994
|
# Create merged completer with slash commands, conditional executables, show command, and smart paths
|
|
877
|
-
|
|
995
|
+
# TODO: remove unsupported_commands support once we implement feedback callback
|
|
996
|
+
unsupported_commands = []
|
|
997
|
+
if feedback_callback is None:
|
|
998
|
+
unsupported_commands.append(SlashCommands.FEEDBACK.command)
|
|
999
|
+
slash_completer = SlashCommandCompleter(unsupported_commands)
|
|
878
1000
|
executable_completer = ConditionalExecutableCompleter()
|
|
879
1001
|
show_completer = ShowCommandCompleter()
|
|
880
1002
|
path_completer = SmartPathCompleter()
|
|
@@ -891,6 +1013,9 @@ def run_interactive_loop(
|
|
|
891
1013
|
if initial_user_input:
|
|
892
1014
|
history.append_string(initial_user_input)
|
|
893
1015
|
|
|
1016
|
+
feedback = Feedback()
|
|
1017
|
+
feedback.metadata.update_llm(ai.llm)
|
|
1018
|
+
|
|
894
1019
|
# Create custom key bindings for Ctrl+C behavior
|
|
895
1020
|
bindings = KeyBindings()
|
|
896
1021
|
status_message = ""
|
|
@@ -963,7 +1088,15 @@ def run_interactive_loop(
|
|
|
963
1088
|
|
|
964
1089
|
input_prompt = [("class:prompt", "User: ")]
|
|
965
1090
|
|
|
966
|
-
|
|
1091
|
+
# TODO: merge the /feedback command description to WELCOME_BANNER once we implement feedback callback
|
|
1092
|
+
welcome_banner = WELCOME_BANNER
|
|
1093
|
+
if feedback_callback:
|
|
1094
|
+
welcome_banner = (
|
|
1095
|
+
welcome_banner.rstrip(".")
|
|
1096
|
+
+ f", '{SlashCommands.FEEDBACK.command}' to share your thoughts."
|
|
1097
|
+
)
|
|
1098
|
+
console.print(welcome_banner)
|
|
1099
|
+
|
|
967
1100
|
if initial_user_input:
|
|
968
1101
|
console.print(
|
|
969
1102
|
f"[bold {USER_COLOR}]User:[/bold {USER_COLOR}] {initial_user_input}"
|
|
@@ -985,14 +1118,18 @@ def run_interactive_loop(
|
|
|
985
1118
|
if user_input.startswith("/"):
|
|
986
1119
|
original_input = user_input.strip()
|
|
987
1120
|
command = original_input.lower()
|
|
988
|
-
|
|
989
1121
|
# Handle prefix matching for slash commands
|
|
990
|
-
matches = [
|
|
1122
|
+
matches = [
|
|
1123
|
+
cmd
|
|
1124
|
+
for cmd in slash_completer.commands.keys()
|
|
1125
|
+
if cmd.startswith(command)
|
|
1126
|
+
]
|
|
991
1127
|
if len(matches) == 1:
|
|
992
1128
|
command = matches[0]
|
|
993
1129
|
elif len(matches) > 1:
|
|
994
1130
|
console.print(
|
|
995
|
-
f"[bold {ERROR_COLOR}]Ambiguous command '{command}'.
|
|
1131
|
+
f"[bold {ERROR_COLOR}]Ambiguous command '{command}'. "
|
|
1132
|
+
f"Matches: {', '.join(matches)}[/bold {ERROR_COLOR}]"
|
|
996
1133
|
)
|
|
997
1134
|
continue
|
|
998
1135
|
|
|
@@ -1002,13 +1139,20 @@ def run_interactive_loop(
|
|
|
1002
1139
|
console.print(
|
|
1003
1140
|
f"[bold {HELP_COLOR}]Available commands:[/bold {HELP_COLOR}]"
|
|
1004
1141
|
)
|
|
1005
|
-
for cmd, description in
|
|
1142
|
+
for cmd, description in slash_completer.commands.items():
|
|
1143
|
+
# Only show feedback command if callback is available
|
|
1144
|
+
if (
|
|
1145
|
+
cmd == SlashCommands.FEEDBACK.command
|
|
1146
|
+
and feedback_callback is None
|
|
1147
|
+
):
|
|
1148
|
+
continue
|
|
1006
1149
|
console.print(f" [bold]{cmd}[/bold] - {description}")
|
|
1007
1150
|
continue
|
|
1008
1151
|
elif command == SlashCommands.CLEAR.command:
|
|
1009
1152
|
console.clear()
|
|
1010
1153
|
console.print(
|
|
1011
|
-
f"[bold {STATUS_COLOR}]Screen cleared and context reset.
|
|
1154
|
+
f"[bold {STATUS_COLOR}]Screen cleared and context reset. "
|
|
1155
|
+
f"You can now ask a new question.[/bold {STATUS_COLOR}]"
|
|
1012
1156
|
)
|
|
1013
1157
|
messages = None
|
|
1014
1158
|
last_response = None
|
|
@@ -1052,6 +1196,12 @@ def run_interactive_loop(
|
|
|
1052
1196
|
if shared_input is None:
|
|
1053
1197
|
continue # User chose not to share or no output, continue to next input
|
|
1054
1198
|
user_input = shared_input
|
|
1199
|
+
elif (
|
|
1200
|
+
command == SlashCommands.FEEDBACK.command
|
|
1201
|
+
and feedback_callback is not None
|
|
1202
|
+
):
|
|
1203
|
+
handle_feedback_command(style, console, feedback, feedback_callback)
|
|
1204
|
+
continue
|
|
1055
1205
|
else:
|
|
1056
1206
|
console.print(f"Unknown command: {command}")
|
|
1057
1207
|
continue
|
|
@@ -1091,6 +1241,7 @@ def run_interactive_loop(
|
|
|
1091
1241
|
|
|
1092
1242
|
messages = response.messages # type: ignore
|
|
1093
1243
|
last_response = response
|
|
1244
|
+
feedback.metadata.add_llm_response(user_input, response.result)
|
|
1094
1245
|
|
|
1095
1246
|
if response.tool_calls:
|
|
1096
1247
|
all_tool_calls_history.extend(response.tool_calls)
|
|
@@ -1111,9 +1262,6 @@ def run_interactive_loop(
|
|
|
1111
1262
|
)
|
|
1112
1263
|
)
|
|
1113
1264
|
|
|
1114
|
-
if trace_url:
|
|
1115
|
-
console.print(f"🔍 View trace: {trace_url}")
|
|
1116
|
-
|
|
1117
1265
|
console.print("")
|
|
1118
1266
|
except typer.Abort:
|
|
1119
1267
|
break
|
|
@@ -1122,6 +1270,11 @@ def run_interactive_loop(
|
|
|
1122
1270
|
except Exception as e:
|
|
1123
1271
|
logging.error("An error occurred during interactive mode:", exc_info=e)
|
|
1124
1272
|
console.print(f"[bold {ERROR_COLOR}]Error: {e}[/bold {ERROR_COLOR}]")
|
|
1273
|
+
finally:
|
|
1274
|
+
# Print trace URL for debugging (works for both success and error cases)
|
|
1275
|
+
trace_url = tracer.get_trace_url()
|
|
1276
|
+
if trace_url:
|
|
1277
|
+
console.print(f"🔍 View trace: {trace_url}")
|
|
1125
1278
|
|
|
1126
1279
|
console.print(
|
|
1127
1280
|
f"[bold {STATUS_COLOR}]Exiting interactive mode.[/bold {STATUS_COLOR}]"
|