holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +17 -4
  3. holmes/common/env_vars.py +40 -1
  4. holmes/config.py +114 -144
  5. holmes/core/conversations.py +53 -14
  6. holmes/core/feedback.py +191 -0
  7. holmes/core/investigation.py +18 -22
  8. holmes/core/llm.py +489 -88
  9. holmes/core/models.py +103 -1
  10. holmes/core/openai_formatting.py +13 -0
  11. holmes/core/prompt.py +1 -1
  12. holmes/core/safeguards.py +4 -4
  13. holmes/core/supabase_dal.py +293 -100
  14. holmes/core/tool_calling_llm.py +423 -323
  15. holmes/core/tools.py +311 -33
  16. holmes/core/tools_utils/token_counting.py +14 -0
  17. holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
  18. holmes/core/tools_utils/tool_executor.py +13 -8
  19. holmes/core/toolset_manager.py +155 -4
  20. holmes/core/tracing.py +6 -1
  21. holmes/core/transformers/__init__.py +23 -0
  22. holmes/core/transformers/base.py +62 -0
  23. holmes/core/transformers/llm_summarize.py +174 -0
  24. holmes/core/transformers/registry.py +122 -0
  25. holmes/core/transformers/transformer.py +31 -0
  26. holmes/core/truncation/compaction.py +59 -0
  27. holmes/core/truncation/dal_truncation_utils.py +23 -0
  28. holmes/core/truncation/input_context_window_limiter.py +218 -0
  29. holmes/interactive.py +177 -24
  30. holmes/main.py +7 -4
  31. holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
  32. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  33. holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
  34. holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
  35. holmes/plugins/prompts/generic_ask.jinja2 +2 -4
  36. holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
  37. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
  38. holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
  39. holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
  40. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
  41. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
  42. holmes/plugins/runbooks/__init__.py +117 -18
  43. holmes/plugins/runbooks/catalog.json +2 -0
  44. holmes/plugins/toolsets/__init__.py +21 -8
  45. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  46. holmes/plugins/toolsets/aks.yaml +64 -0
  47. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
  48. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
  49. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
  50. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
  51. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
  52. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
  53. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
  54. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
  55. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
  56. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
  57. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
  58. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
  59. holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
  60. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  61. holmes/plugins/toolsets/cilium.yaml +284 -0
  62. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  63. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  64. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  65. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
  66. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
  67. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
  68. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
  69. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
  70. holmes/plugins/toolsets/git.py +51 -46
  71. holmes/plugins/toolsets/grafana/common.py +15 -3
  72. holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
  73. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
  74. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
  75. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
  76. holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
  77. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
  78. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  79. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
  80. holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
  81. holmes/plugins/toolsets/internet/internet.py +6 -7
  82. holmes/plugins/toolsets/internet/notion.py +5 -6
  83. holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
  84. holmes/plugins/toolsets/kafka.py +25 -36
  85. holmes/plugins/toolsets/kubernetes.yaml +58 -84
  86. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  87. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  88. holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
  89. holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
  90. holmes/plugins/toolsets/newrelic/__init__.py +0 -0
  91. holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
  92. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
  93. holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
  94. holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
  95. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  96. holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  97. holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
  98. holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
  99. holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
  100. holmes/plugins/toolsets/openshift.yaml +283 -0
  101. holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
  102. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
  103. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  104. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
  105. holmes/plugins/toolsets/robusta/robusta.py +236 -65
  106. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  107. holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
  108. holmes/plugins/toolsets/service_discovery.py +1 -1
  109. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  110. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  111. holmes/plugins/toolsets/utils.py +88 -0
  112. holmes/utils/config_utils.py +91 -0
  113. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  114. holmes/utils/env.py +7 -0
  115. holmes/utils/global_instructions.py +75 -10
  116. holmes/utils/holmes_status.py +2 -1
  117. holmes/utils/holmes_sync_toolsets.py +0 -2
  118. holmes/utils/krr_utils.py +188 -0
  119. holmes/utils/sentry_helper.py +41 -0
  120. holmes/utils/stream.py +61 -7
  121. holmes/version.py +34 -14
  122. holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
  123. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
  124. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
  125. holmes/core/performance_timing.py +0 -72
  126. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  127. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  128. holmes/plugins/toolsets/newrelic.py +0 -231
  129. holmes/plugins/toolsets/servicenow/install.md +0 -37
  130. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  131. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  132. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  133. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
  134. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from typing import Optional
3
+ from holmes.core.llm import LLM
4
+ from holmes.plugins.prompts import load_and_render_prompt
5
+ from litellm.types.utils import ModelResponse
6
+
7
+
8
+ def strip_system_prompt(
9
+ conversation_history: list[dict],
10
+ ) -> tuple[list[dict], Optional[dict]]:
11
+ if not conversation_history:
12
+ return conversation_history, None
13
+ first_message = conversation_history[0]
14
+ if first_message and first_message.get("role") == "system":
15
+ return conversation_history[1:], first_message
16
+ return conversation_history[:], None
17
+
18
+
19
+ def compact_conversation_history(
20
+ original_conversation_history: list[dict], llm: LLM
21
+ ) -> list[dict]:
22
+ conversation_history, system_prompt_message = strip_system_prompt(
23
+ original_conversation_history
24
+ )
25
+ compaction_instructions = load_and_render_prompt(
26
+ prompt="builtin://conversation_history_compaction.jinja2", context={}
27
+ )
28
+ conversation_history.append({"role": "user", "content": compaction_instructions})
29
+
30
+ response: ModelResponse = llm.completion(conversation_history) # type: ignore
31
+ response_message = None
32
+ if (
33
+ response
34
+ and response.choices
35
+ and response.choices[0]
36
+ and response.choices[0].message # type:ignore
37
+ ):
38
+ response_message = response.choices[0].message # type:ignore
39
+ else:
40
+ logging.error(
41
+ "Failed to compact conversation history. Unexpected LLM's response for compaction"
42
+ )
43
+ return original_conversation_history
44
+
45
+ compacted_conversation_history: list[dict] = []
46
+ if system_prompt_message:
47
+ compacted_conversation_history.append(system_prompt_message)
48
+ compacted_conversation_history.append(
49
+ response_message.model_dump(
50
+ exclude_defaults=True, exclude_unset=True, exclude_none=True
51
+ )
52
+ )
53
+ compacted_conversation_history.append(
54
+ {
55
+ "role": "system",
56
+ "content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
57
+ }
58
+ )
59
+ return compacted_conversation_history
@@ -0,0 +1,23 @@
1
+ from holmes.common.env_vars import MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
2
+
3
+
4
+ def truncate_string(data_str: str) -> str:
5
+ if data_str and len(data_str) > MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION:
6
+ return (
7
+ data_str[:MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION]
8
+ + "-- DATA TRUNCATED TO AVOID HITTING CONTEXT WINDOW LIMITS"
9
+ )
10
+ return data_str
11
+
12
+
13
+ def truncate_evidences_entities_if_necessary(evidence_list: list[dict]):
14
+ if (
15
+ not MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
16
+ or MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION <= 0
17
+ ):
18
+ return
19
+
20
+ for evidence in evidence_list:
21
+ data = evidence.get("data")
22
+ if data:
23
+ evidence["data"] = truncate_string(str(data))
@@ -0,0 +1,218 @@
1
+ import logging
2
+ from typing import Any, Optional
3
+ from pydantic import BaseModel
4
+ import sentry_sdk
5
+ from holmes.common.env_vars import (
6
+ ENABLE_CONVERSATION_HISTORY_COMPACTION,
7
+ MAX_OUTPUT_TOKEN_RESERVATION,
8
+ )
9
+ from holmes.core.llm import (
10
+ LLM,
11
+ TokenCountMetadata,
12
+ get_context_window_compaction_threshold_pct,
13
+ )
14
+ from holmes.core.models import TruncationMetadata, TruncationResult
15
+ from holmes.core.truncation.compaction import compact_conversation_history
16
+ from holmes.utils import sentry_helper
17
+ from holmes.utils.stream import StreamEvents, StreamMessage
18
+
19
+
20
+ TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
21
+
22
+
23
+ def _truncate_tool_message(
24
+ msg: dict, allocated_space: int, needed_space: int
25
+ ) -> TruncationMetadata:
26
+ msg_content = msg["content"]
27
+ tool_call_id = msg["tool_call_id"]
28
+ tool_name = msg["name"]
29
+
30
+ # Ensure the indicator fits in the allocated space
31
+ if allocated_space > len(TRUNCATION_NOTICE):
32
+ original = msg_content if isinstance(msg_content, str) else str(msg_content)
33
+ msg["content"] = (
34
+ original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
35
+ )
36
+ end_index = allocated_space - len(TRUNCATION_NOTICE)
37
+ else:
38
+ msg["content"] = TRUNCATION_NOTICE[:allocated_space]
39
+ end_index = allocated_space
40
+
41
+ msg.pop("token_count", None) # Remove token_count if present
42
+ logging.info(
43
+ f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
44
+ )
45
+ truncation_metadata = TruncationMetadata(
46
+ tool_call_id=tool_call_id,
47
+ start_index=0,
48
+ end_index=end_index,
49
+ tool_name=tool_name,
50
+ original_token_count=needed_space,
51
+ )
52
+ return truncation_metadata
53
+
54
+
55
+ # TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
56
+ # However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
57
+ # We should fix this in the future
58
+ # TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
59
+ # token truncation and not character truncation
60
+ def truncate_messages_to_fit_context(
61
+ messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
62
+ ) -> TruncationResult:
63
+ """
64
+ Helper function to truncate tool messages to fit within context limits.
65
+
66
+ Args:
67
+ messages: List of message dictionaries with roles and content
68
+ max_context_size: Maximum context window size for the model
69
+ maximum_output_token: Maximum tokens reserved for model output
70
+ count_tokens_fn: Function to count tokens for a list of messages
71
+
72
+ Returns:
73
+ Modified list of messages with truncated tool responses
74
+
75
+ Raises:
76
+ Exception: If non-tool messages exceed available context space
77
+ """
78
+ messages_except_tools = [
79
+ message for message in messages if message["role"] != "tool"
80
+ ]
81
+ tokens = count_tokens_fn(messages_except_tools)
82
+ message_size_without_tools = tokens.total_tokens
83
+
84
+ tool_call_messages = [message for message in messages if message["role"] == "tool"]
85
+
86
+ reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
87
+ if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
88
+ logging.error(
89
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
90
+ )
91
+ raise Exception(
92
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
93
+ )
94
+
95
+ if len(tool_call_messages) == 0:
96
+ return TruncationResult(truncated_messages=messages, truncations=[])
97
+
98
+ available_space = (
99
+ max_context_size - message_size_without_tools - reserved_for_output_tokens
100
+ )
101
+ remaining_space = available_space
102
+ tool_call_messages.sort(
103
+ key=lambda x: count_tokens_fn(
104
+ [{"role": "tool", "content": x["content"]}]
105
+ ).total_tokens
106
+ )
107
+
108
+ truncations = []
109
+
110
+ # Allocate space starting with small tools and going to larger tools, while maintaining fairness
111
+ # Small tools can often get exactly what they need, while larger tools may need to be truncated
112
+ # We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
113
+ for i, msg in enumerate(tool_call_messages):
114
+ remaining_tools = len(tool_call_messages) - i
115
+ max_allocation = remaining_space // remaining_tools
116
+ needed_space = count_tokens_fn(
117
+ [{"role": "tool", "content": msg["content"]}]
118
+ ).total_tokens
119
+ allocated_space = min(needed_space, max_allocation)
120
+
121
+ if needed_space > allocated_space:
122
+ truncation_metadata = _truncate_tool_message(
123
+ msg, allocated_space, needed_space
124
+ )
125
+ truncations.append(truncation_metadata)
126
+
127
+ remaining_space -= allocated_space
128
+
129
+ if truncations:
130
+ sentry_helper.capture_tool_truncations(truncations)
131
+
132
+ return TruncationResult(truncated_messages=messages, truncations=truncations)
133
+
134
+
135
+ class ContextWindowLimiterOutput(BaseModel):
136
+ metadata: dict
137
+ messages: list[dict]
138
+ events: list[StreamMessage]
139
+ max_context_size: int
140
+ maximum_output_token: int
141
+ tokens: TokenCountMetadata
142
+ conversation_history_compacted: bool
143
+
144
+
145
+ @sentry_sdk.trace
146
+ def limit_input_context_window(
147
+ llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
148
+ ) -> ContextWindowLimiterOutput:
149
+ events = []
150
+ metadata = {}
151
+ initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
152
+ max_context_size = llm.get_context_window_size()
153
+ maximum_output_token = llm.get_maximum_output_token()
154
+ conversation_history_compacted = False
155
+ if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
156
+ initial_tokens.total_tokens + maximum_output_token
157
+ ) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
158
+ compacted_messages = compact_conversation_history(
159
+ original_conversation_history=messages, llm=llm
160
+ )
161
+ compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
162
+ compacted_total_tokens = compacted_tokens.total_tokens
163
+
164
+ if compacted_total_tokens < initial_tokens.total_tokens:
165
+ messages = compacted_messages
166
+ compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
167
+ logging.info(compaction_message)
168
+ conversation_history_compacted = True
169
+ events.append(
170
+ StreamMessage(
171
+ event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
172
+ data={
173
+ "content": compaction_message,
174
+ "messages": compacted_messages,
175
+ "metadata": {
176
+ "initial_tokens": initial_tokens.total_tokens,
177
+ "compacted_tokens": compacted_total_tokens,
178
+ },
179
+ },
180
+ )
181
+ )
182
+ events.append(
183
+ StreamMessage(
184
+ event=StreamEvents.AI_MESSAGE,
185
+ data={"content": compaction_message},
186
+ )
187
+ )
188
+ else:
189
+ logging.debug(
190
+ f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
191
+ )
192
+
193
+ tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
194
+ if (tokens.total_tokens + maximum_output_token) > max_context_size:
195
+ # Compaction was not sufficient. Truncating messages.
196
+ truncated_res = truncate_messages_to_fit_context(
197
+ messages=messages,
198
+ max_context_size=max_context_size,
199
+ maximum_output_token=maximum_output_token,
200
+ count_tokens_fn=llm.count_tokens,
201
+ )
202
+ metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
203
+ messages = truncated_res.truncated_messages
204
+
205
+ # recount after truncation
206
+ tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
207
+ else:
208
+ metadata["truncations"] = []
209
+
210
+ return ContextWindowLimiterOutput(
211
+ events=events,
212
+ messages=messages,
213
+ metadata=metadata,
214
+ max_context_size=max_context_size,
215
+ maximum_output_token=maximum_output_token,
216
+ tokens=tokens,
217
+ conversation_history_compacted=conversation_history_compacted,
218
+ )
holmes/interactive.py CHANGED
@@ -26,9 +26,16 @@ from prompt_toolkit.widgets import TextArea
26
26
  from pygments.lexers import guess_lexer
27
27
  from rich.console import Console
28
28
  from rich.markdown import Markdown, Panel
29
+ from rich.markup import escape
29
30
 
30
31
  from holmes.common.env_vars import ENABLE_CLI_TOOL_APPROVAL
31
32
  from holmes.core.config import config_path_dir
33
+ from holmes.core.feedback import (
34
+ PRIVACY_NOTICE_BANNER,
35
+ Feedback,
36
+ FeedbackCallback,
37
+ UserFeedback,
38
+ )
32
39
  from holmes.core.prompt import build_initial_ask_messages
33
40
  from holmes.core.tool_calling_llm import ToolCallingLLM, ToolCallResult
34
41
  from holmes.core.tools import StructuredToolResult, pretty_print_toolset_status
@@ -43,6 +50,7 @@ from holmes.utils.colors import (
43
50
  )
44
51
  from holmes.utils.console.consts import agent_name
45
52
  from holmes.version import check_version_async
53
+ import re
46
54
 
47
55
 
48
56
  class SlashCommands(Enum):
@@ -62,19 +70,25 @@ class SlashCommands(Enum):
62
70
  )
63
71
  CONTEXT = ("/context", "Show conversation context size and token count")
64
72
  SHOW = ("/show", "Show specific tool output in scrollable view")
73
+ FEEDBACK = ("/feedback", "Provide feedback on the agent's response")
65
74
 
66
75
  def __init__(self, command, description):
67
76
  self.command = command
68
77
  self.description = description
69
78
 
70
79
 
71
- SLASH_COMMANDS_REFERENCE = {cmd.command: cmd.description for cmd in SlashCommands}
72
- ALL_SLASH_COMMANDS = [cmd.command for cmd in SlashCommands]
73
-
74
-
75
80
  class SlashCommandCompleter(Completer):
76
- def __init__(self):
77
- self.commands = SLASH_COMMANDS_REFERENCE
81
+ def __init__(self, unsupported_commands: Optional[List[str]] = None):
82
+ # Build commands dictionary, excluding unsupported commands
83
+ all_commands = {cmd.command: cmd.description for cmd in SlashCommands}
84
+ if unsupported_commands:
85
+ self.commands = {
86
+ cmd: desc
87
+ for cmd, desc in all_commands.items()
88
+ if cmd not in unsupported_commands
89
+ }
90
+ else:
91
+ self.commands = all_commands
78
92
 
79
93
  def get_completions(self, document, complete_event):
80
94
  text = document.text_before_cursor
@@ -233,6 +247,13 @@ def build_modal_title(tool_call: ToolCallResult, wrap_status: str) -> str:
233
247
  return f"{tool_call.description} (exit: q, nav: ↑↓/j/k/g/G/d/u/f/b/space, wrap: w [{wrap_status}])"
234
248
 
235
249
 
250
+ def strip_ansi_codes(text: str) -> str:
251
+ ansi_escape_pattern = re.compile(
252
+ r"\x1b\[[0-9;]*[a-zA-Z]|\033\[[0-9;]*[a-zA-Z]|\^\[\[[0-9;]*[a-zA-Z]"
253
+ )
254
+ return ansi_escape_pattern.sub("", text)
255
+
256
+
236
257
  def detect_lexer(content: str) -> Optional[PygmentsLexer]:
237
258
  """
238
259
  Detect appropriate lexer for content using Pygments' built-in detection.
@@ -314,6 +335,7 @@ def show_tool_output_modal(tool_call: ToolCallResult, console: Console) -> None:
314
335
  try:
315
336
  # Get the full output
316
337
  output = tool_call.result.get_stringified_data()
338
+ output = strip_ansi_codes(output)
317
339
  title = build_modal_title(tool_call, "off") # Word wrap starts disabled
318
340
 
319
341
  # Detect appropriate syntax highlighting
@@ -467,10 +489,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
467
489
  return
468
490
 
469
491
  # Calculate context statistics
470
- total_tokens = ai.llm.count_tokens_for_message(messages)
492
+ tokens_metadata = ai.llm.count_tokens(
493
+ messages
494
+ ) # TODO: pass tools to also count tokens used by input tools
471
495
  max_context_size = ai.llm.get_context_window_size()
472
496
  max_output_tokens = ai.llm.get_maximum_output_token()
473
- available_tokens = max_context_size - total_tokens - max_output_tokens
497
+ available_tokens = (
498
+ max_context_size - tokens_metadata.total_tokens - max_output_tokens
499
+ )
474
500
 
475
501
  # Analyze token distribution by role and tool calls
476
502
  role_token_usage: DefaultDict[str, int] = defaultdict(int)
@@ -479,19 +505,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
479
505
 
480
506
  for msg in messages:
481
507
  role = msg.get("role", "unknown")
482
- msg_tokens = ai.llm.count_tokens_for_message([msg])
483
- role_token_usage[role] += msg_tokens
508
+ message_tokens = ai.llm.count_tokens(
509
+ [msg]
510
+ ) # TODO: pass tools to also count tokens used by input tools
511
+ role_token_usage[role] += message_tokens.total_tokens
484
512
 
485
513
  # Track individual tool usage
486
514
  if role == "tool":
487
515
  tool_name = msg.get("name", "unknown_tool")
488
- tool_token_usage[tool_name] += msg_tokens
516
+ tool_token_usage[tool_name] += message_tokens.total_tokens
489
517
  tool_call_counts[tool_name] += 1
490
518
 
491
519
  # Display context information
492
520
  console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
493
521
  console.print(
494
- f" Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
522
+ f" Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
495
523
  )
496
524
  console.print(
497
525
  f" Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
@@ -502,7 +530,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
502
530
  for role in ["system", "user", "assistant", "tool"]:
503
531
  if role in role_token_usage:
504
532
  tokens = role_token_usage[role]
505
- percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
533
+ percentage = (
534
+ (tokens / tokens_metadata.total_tokens) * 100
535
+ if tokens_metadata.total_tokens > 0
536
+ else 0
537
+ )
506
538
  role_name = {
507
539
  "system": "system prompt",
508
540
  "user": "user messages",
@@ -811,6 +843,88 @@ def handle_last_command(
811
843
  )
812
844
 
813
845
 
846
+ def handle_feedback_command(
847
+ style: Style,
848
+ console: Console,
849
+ feedback: Feedback,
850
+ feedback_callback: FeedbackCallback,
851
+ ) -> None:
852
+ """Handle the /feedback command to collect user feedback."""
853
+ try:
854
+ # Create a temporary session without history for feedback prompts
855
+ temp_session = PromptSession(history=InMemoryHistory()) # type: ignore
856
+ # Prominent privacy notice to users
857
+ console.print(
858
+ f"[bold {HELP_COLOR}]Privacy Notice:[/bold {HELP_COLOR}] {PRIVACY_NOTICE_BANNER}"
859
+ )
860
+ # A "Cancel" button of equal discoverability to "Sent" or "Submit" buttons must be made available
861
+ console.print(
862
+ "[bold yellow]💡 Tip: Press Ctrl+C at any time to cancel feedback[/bold yellow]"
863
+ )
864
+
865
+ # Ask for thumbs up/down rating with validation
866
+ while True:
867
+ rating_prompt = temp_session.prompt(
868
+ [("class:prompt", "Was this response useful to you? 👍(y)/👎(n): ")],
869
+ style=style,
870
+ )
871
+
872
+ rating_lower = rating_prompt.lower().strip()
873
+ if rating_lower in ["y", "n"]:
874
+ break
875
+ else:
876
+ console.print(
877
+ "[bold red]Please enter only 'y' for yes or 'n' for no.[/bold red]"
878
+ )
879
+
880
+ # Determine rating
881
+ is_positive = rating_lower == "y"
882
+
883
+ # Ask for additional comments
884
+ comment_prompt = temp_session.prompt(
885
+ [
886
+ (
887
+ "class:prompt",
888
+ "Do you want to provide any additional comments for feedback? (press Enter to skip):\n",
889
+ )
890
+ ],
891
+ style=style,
892
+ )
893
+
894
+ comment = comment_prompt.strip() if comment_prompt.strip() else None
895
+
896
+ # Create UserFeedback object
897
+ user_feedback = UserFeedback(is_positive, comment)
898
+
899
+ if comment:
900
+ console.print(
901
+ f'[bold green]✓ Feedback recorded (rating={user_feedback.rating_emoji}, "{escape(comment)}")[/bold green]'
902
+ )
903
+ else:
904
+ console.print(
905
+ f"[bold green]✓ Feedback recorded (rating={user_feedback.rating_emoji}, no comment)[/bold green]"
906
+ )
907
+
908
+ # Final confirmation before submitting
909
+ final_confirmation = temp_session.prompt(
910
+ [("class:prompt", "\nDo you want to submit this feedback? (Y/n): ")],
911
+ style=style,
912
+ )
913
+
914
+ # If user says no, cancel the feedback
915
+ if final_confirmation.lower().strip().startswith("n"):
916
+ console.print("[dim]Feedback cancelled.[/dim]")
917
+ return
918
+
919
+ feedback.user_feedback = user_feedback
920
+ feedback_callback(feedback)
921
+ console.print("[bold green]Thank you for your feedback! 🙏[/bold green]")
922
+
923
+ except KeyboardInterrupt:
924
+ console.print("[dim]Feedback cancelled.[/dim]")
925
+ return
926
+
927
+
814
928
  def display_recent_tool_outputs(
815
929
  tool_calls: List[ToolCallResult],
816
930
  console: Console,
@@ -823,7 +937,10 @@ def display_recent_tool_outputs(
823
937
  for tool_call in tool_calls:
824
938
  tool_index = find_tool_index_in_history(tool_call, all_tool_calls_history)
825
939
  preview_output = format_tool_call_output(tool_call, tool_index)
826
- title = f"{tool_call.result.status.to_emoji()} {tool_call.description} -> returned {tool_call.result.return_code}"
940
+ title = (
941
+ f"{tool_call.result.status.to_emoji()} {tool_call.description} -> "
942
+ f"returned {tool_call.result.return_code}"
943
+ )
827
944
 
828
945
  console.print(
829
946
  Panel(
@@ -846,6 +963,7 @@ def run_interactive_loop(
846
963
  runbooks=None,
847
964
  system_prompt_additions: Optional[str] = None,
848
965
  check_version: bool = True,
966
+ feedback_callback: Optional[FeedbackCallback] = None,
849
967
  ) -> None:
850
968
  # Initialize tracer - use DummyTracer if no tracer provided
851
969
  if tracer is None:
@@ -874,7 +992,11 @@ def run_interactive_loop(
874
992
  ai.approval_callback = approval_handler
875
993
 
876
994
  # Create merged completer with slash commands, conditional executables, show command, and smart paths
877
- slash_completer = SlashCommandCompleter()
995
+ # TODO: remove unsupported_commands support once we implement feedback callback
996
+ unsupported_commands = []
997
+ if feedback_callback is None:
998
+ unsupported_commands.append(SlashCommands.FEEDBACK.command)
999
+ slash_completer = SlashCommandCompleter(unsupported_commands)
878
1000
  executable_completer = ConditionalExecutableCompleter()
879
1001
  show_completer = ShowCommandCompleter()
880
1002
  path_completer = SmartPathCompleter()
@@ -891,6 +1013,9 @@ def run_interactive_loop(
891
1013
  if initial_user_input:
892
1014
  history.append_string(initial_user_input)
893
1015
 
1016
+ feedback = Feedback()
1017
+ feedback.metadata.update_llm(ai.llm)
1018
+
894
1019
  # Create custom key bindings for Ctrl+C behavior
895
1020
  bindings = KeyBindings()
896
1021
  status_message = ""
@@ -963,7 +1088,15 @@ def run_interactive_loop(
963
1088
 
964
1089
  input_prompt = [("class:prompt", "User: ")]
965
1090
 
966
- console.print(WELCOME_BANNER)
1091
+ # TODO: merge the /feedback command description to WELCOME_BANNER once we implement feedback callback
1092
+ welcome_banner = WELCOME_BANNER
1093
+ if feedback_callback:
1094
+ welcome_banner = (
1095
+ welcome_banner.rstrip(".")
1096
+ + f", '{SlashCommands.FEEDBACK.command}' to share your thoughts."
1097
+ )
1098
+ console.print(welcome_banner)
1099
+
967
1100
  if initial_user_input:
968
1101
  console.print(
969
1102
  f"[bold {USER_COLOR}]User:[/bold {USER_COLOR}] {initial_user_input}"
@@ -985,14 +1118,18 @@ def run_interactive_loop(
985
1118
  if user_input.startswith("/"):
986
1119
  original_input = user_input.strip()
987
1120
  command = original_input.lower()
988
-
989
1121
  # Handle prefix matching for slash commands
990
- matches = [cmd for cmd in ALL_SLASH_COMMANDS if cmd.startswith(command)]
1122
+ matches = [
1123
+ cmd
1124
+ for cmd in slash_completer.commands.keys()
1125
+ if cmd.startswith(command)
1126
+ ]
991
1127
  if len(matches) == 1:
992
1128
  command = matches[0]
993
1129
  elif len(matches) > 1:
994
1130
  console.print(
995
- f"[bold {ERROR_COLOR}]Ambiguous command '{command}'. Matches: {', '.join(matches)}[/bold {ERROR_COLOR}]"
1131
+ f"[bold {ERROR_COLOR}]Ambiguous command '{command}'. "
1132
+ f"Matches: {', '.join(matches)}[/bold {ERROR_COLOR}]"
996
1133
  )
997
1134
  continue
998
1135
 
@@ -1002,13 +1139,20 @@ def run_interactive_loop(
1002
1139
  console.print(
1003
1140
  f"[bold {HELP_COLOR}]Available commands:[/bold {HELP_COLOR}]"
1004
1141
  )
1005
- for cmd, description in SLASH_COMMANDS_REFERENCE.items():
1142
+ for cmd, description in slash_completer.commands.items():
1143
+ # Only show feedback command if callback is available
1144
+ if (
1145
+ cmd == SlashCommands.FEEDBACK.command
1146
+ and feedback_callback is None
1147
+ ):
1148
+ continue
1006
1149
  console.print(f" [bold]{cmd}[/bold] - {description}")
1007
1150
  continue
1008
1151
  elif command == SlashCommands.CLEAR.command:
1009
1152
  console.clear()
1010
1153
  console.print(
1011
- f"[bold {STATUS_COLOR}]Screen cleared and context reset. You can now ask a new question.[/bold {STATUS_COLOR}]"
1154
+ f"[bold {STATUS_COLOR}]Screen cleared and context reset. "
1155
+ f"You can now ask a new question.[/bold {STATUS_COLOR}]"
1012
1156
  )
1013
1157
  messages = None
1014
1158
  last_response = None
@@ -1052,6 +1196,12 @@ def run_interactive_loop(
1052
1196
  if shared_input is None:
1053
1197
  continue # User chose not to share or no output, continue to next input
1054
1198
  user_input = shared_input
1199
+ elif (
1200
+ command == SlashCommands.FEEDBACK.command
1201
+ and feedback_callback is not None
1202
+ ):
1203
+ handle_feedback_command(style, console, feedback, feedback_callback)
1204
+ continue
1055
1205
  else:
1056
1206
  console.print(f"Unknown command: {command}")
1057
1207
  continue
@@ -1091,6 +1241,7 @@ def run_interactive_loop(
1091
1241
 
1092
1242
  messages = response.messages # type: ignore
1093
1243
  last_response = response
1244
+ feedback.metadata.add_llm_response(user_input, response.result)
1094
1245
 
1095
1246
  if response.tool_calls:
1096
1247
  all_tool_calls_history.extend(response.tool_calls)
@@ -1111,9 +1262,6 @@ def run_interactive_loop(
1111
1262
  )
1112
1263
  )
1113
1264
 
1114
- if trace_url:
1115
- console.print(f"🔍 View trace: {trace_url}")
1116
-
1117
1265
  console.print("")
1118
1266
  except typer.Abort:
1119
1267
  break
@@ -1122,6 +1270,11 @@ def run_interactive_loop(
1122
1270
  except Exception as e:
1123
1271
  logging.error("An error occurred during interactive mode:", exc_info=e)
1124
1272
  console.print(f"[bold {ERROR_COLOR}]Error: {e}[/bold {ERROR_COLOR}]")
1273
+ finally:
1274
+ # Print trace URL for debugging (works for both success and error cases)
1275
+ trace_url = tracer.get_trace_url()
1276
+ if trace_url:
1277
+ console.print(f"🔍 View trace: {trace_url}")
1125
1278
 
1126
1279
  console.print(
1127
1280
  f"[bold {STATUS_COLOR}]Exiting interactive mode.[/bold {STATUS_COLOR}]"