PyPI - holmesgpt - Versions diffs - 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl - Mend

holmesgpt 0.13.2py3-none-any.whl → 0.16.2a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +17 -4
holmes/common/env_vars.py +40 -1
holmes/config.py +114 -144
holmes/core/conversations.py +53 -14
holmes/core/feedback.py +191 -0
holmes/core/investigation.py +18 -22
holmes/core/llm.py +489 -88
holmes/core/models.py +103 -1
holmes/core/openai_formatting.py +13 -0
holmes/core/prompt.py +1 -1
holmes/core/safeguards.py +4 -4
holmes/core/supabase_dal.py +293 -100
holmes/core/tool_calling_llm.py +423 -323
holmes/core/tools.py +311 -33
holmes/core/tools_utils/token_counting.py +14 -0
holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
holmes/core/tools_utils/tool_executor.py +13 -8
holmes/core/toolset_manager.py +155 -4
holmes/core/tracing.py +6 -1
holmes/core/transformers/__init__.py +23 -0
holmes/core/transformers/base.py +62 -0
holmes/core/transformers/llm_summarize.py +174 -0
holmes/core/transformers/registry.py +122 -0
holmes/core/transformers/transformer.py +31 -0
holmes/core/truncation/compaction.py +59 -0
holmes/core/truncation/dal_truncation_utils.py +23 -0
holmes/core/truncation/input_context_window_limiter.py +218 -0
holmes/interactive.py +177 -24
holmes/main.py +7 -4
holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
holmes/plugins/prompts/generic_ask.jinja2 +2 -4
holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
holmes/plugins/runbooks/__init__.py +117 -18
holmes/plugins/runbooks/catalog.json +2 -0
holmes/plugins/toolsets/__init__.py +21 -8
holmes/plugins/toolsets/aks-node-health.yaml +46 -0
holmes/plugins/toolsets/aks.yaml +64 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
holmes/plugins/toolsets/bash/common/bash.py +7 -7
holmes/plugins/toolsets/cilium.yaml +284 -0
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
holmes/plugins/toolsets/git.py +51 -46
holmes/plugins/toolsets/grafana/common.py +15 -3
holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
holmes/plugins/toolsets/internet/internet.py +6 -7
holmes/plugins/toolsets/internet/notion.py +5 -6
holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
holmes/plugins/toolsets/kafka.py +25 -36
holmes/plugins/toolsets/kubernetes.yaml +58 -84
holmes/plugins/toolsets/kubernetes_logs.py +6 -6
holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
holmes/plugins/toolsets/newrelic/__init__.py +0 -0
holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
holmes/plugins/toolsets/openshift.yaml +283 -0
holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
holmes/plugins/toolsets/prometheus/utils.py +28 -0
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
holmes/plugins/toolsets/robusta/robusta.py +236 -65
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
holmes/plugins/toolsets/service_discovery.py +1 -1
holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
holmes/plugins/toolsets/utils.py +88 -0
holmes/utils/config_utils.py +91 -0
holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
holmes/utils/env.py +7 -0
holmes/utils/global_instructions.py +75 -10
holmes/utils/holmes_status.py +2 -1
holmes/utils/holmes_sync_toolsets.py +0 -2
holmes/utils/krr_utils.py +188 -0
holmes/utils/sentry_helper.py +41 -0
holmes/utils/stream.py +61 -7
holmes/version.py +34 -14
holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
{holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
holmes/core/performance_timing.py +0 -72
holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
holmes/plugins/toolsets/newrelic.py +0 -231
holmes/plugins/toolsets/servicenow/install.md +0 -37
holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
{holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
{holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0

holmes/core/truncation/compaction.py ADDED Viewed

@@ -0,0 +1,59 @@
+import logging
+from typing import Optional
+from holmes.core.llm import LLM
+from holmes.plugins.prompts import load_and_render_prompt
+from litellm.types.utils import ModelResponse
+def strip_system_prompt(
+    conversation_history: list[dict],
+) -> tuple[list[dict], Optional[dict]]:
+    if not conversation_history:
+        return conversation_history, None
+    first_message = conversation_history[0]
+    if first_message and first_message.get("role") == "system":
+        return conversation_history[1:], first_message
+    return conversation_history[:], None
+def compact_conversation_history(
+    original_conversation_history: list[dict], llm: LLM
+) -> list[dict]:
+    conversation_history, system_prompt_message = strip_system_prompt(
+        original_conversation_history
+    )
+    compaction_instructions = load_and_render_prompt(
+        prompt="builtin://conversation_history_compaction.jinja2", context={}
+    )
+    conversation_history.append({"role": "user", "content": compaction_instructions})
+    response: ModelResponse = llm.completion(conversation_history)  # type: ignore
+    response_message = None
+    if (
+        response
+        and response.choices
+        and response.choices[0]
+        and response.choices[0].message  # type:ignore
+    ):
+        response_message = response.choices[0].message  # type:ignore
+    else:
+        logging.error(
+            "Failed to compact conversation history. Unexpected LLM's response for compaction"
+        )
+        return original_conversation_history
+    compacted_conversation_history: list[dict] = []
+    if system_prompt_message:
+        compacted_conversation_history.append(system_prompt_message)
+    compacted_conversation_history.append(
+        response_message.model_dump(
+            exclude_defaults=True, exclude_unset=True, exclude_none=True
+        )
+    )
+    compacted_conversation_history.append(
+        {
+            "role": "system",
+            "content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
+        }
+    )
+    return compacted_conversation_history

holmes/core/truncation/dal_truncation_utils.py ADDED Viewed

@@ -0,0 +1,23 @@
+from holmes.common.env_vars import MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
+def truncate_string(data_str: str) -> str:
+    if data_str and len(data_str) > MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION:
+        return (
+            data_str[:MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION]
+            + "-- DATA TRUNCATED TO AVOID HITTING CONTEXT WINDOW LIMITS"
+        )
+    return data_str
+def truncate_evidences_entities_if_necessary(evidence_list: list[dict]):
+    if (
+        not MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
+        or MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION <= 0
+    ):
+        return
+    for evidence in evidence_list:
+        data = evidence.get("data")
+        if data:
+            evidence["data"] = truncate_string(str(data))

holmes/core/truncation/input_context_window_limiter.py ADDED Viewed

@@ -0,0 +1,218 @@
+import logging
+from typing import Any, Optional
+from pydantic import BaseModel
+import sentry_sdk
+from holmes.common.env_vars import (
+    ENABLE_CONVERSATION_HISTORY_COMPACTION,
+    MAX_OUTPUT_TOKEN_RESERVATION,
+)
+from holmes.core.llm import (
+    LLM,
+    TokenCountMetadata,
+    get_context_window_compaction_threshold_pct,
+)
+from holmes.core.models import TruncationMetadata, TruncationResult
+from holmes.core.truncation.compaction import compact_conversation_history
+from holmes.utils import sentry_helper
+from holmes.utils.stream import StreamEvents, StreamMessage
+TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
+def _truncate_tool_message(
+    msg: dict, allocated_space: int, needed_space: int
+) -> TruncationMetadata:
+    msg_content = msg["content"]
+    tool_call_id = msg["tool_call_id"]
+    tool_name = msg["name"]
+    # Ensure the indicator fits in the allocated space
+    if allocated_space > len(TRUNCATION_NOTICE):
+        original = msg_content if isinstance(msg_content, str) else str(msg_content)
+        msg["content"] = (
+            original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
+        )
+        end_index = allocated_space - len(TRUNCATION_NOTICE)
+    else:
+        msg["content"] = TRUNCATION_NOTICE[:allocated_space]
+        end_index = allocated_space
+    msg.pop("token_count", None)  # Remove token_count if present
+    logging.info(
+        f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
+    )
+    truncation_metadata = TruncationMetadata(
+        tool_call_id=tool_call_id,
+        start_index=0,
+        end_index=end_index,
+        tool_name=tool_name,
+        original_token_count=needed_space,
+    )
+    return truncation_metadata
+# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
+# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
+# We should fix this in the future
+# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
+# token truncation and not character truncation
+def truncate_messages_to_fit_context(
+    messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
+) -> TruncationResult:
+    """
+    Helper function to truncate tool messages to fit within context limits.
+    Args:
+        messages: List of message dictionaries with roles and content
+        max_context_size: Maximum context window size for the model
+        maximum_output_token: Maximum tokens reserved for model output
+        count_tokens_fn: Function to count tokens for a list of messages
+    Returns:
+        Modified list of messages with truncated tool responses
+    Raises:
+        Exception: If non-tool messages exceed available context space
+    """
+    messages_except_tools = [
+        message for message in messages if message["role"] != "tool"
+    ]
+    tokens = count_tokens_fn(messages_except_tools)
+    message_size_without_tools = tokens.total_tokens
+    tool_call_messages = [message for message in messages if message["role"] == "tool"]
+    reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
+    if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
+        logging.error(
+            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
+        )
+        raise Exception(
+            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
+        )
+    if len(tool_call_messages) == 0:
+        return TruncationResult(truncated_messages=messages, truncations=[])
+    available_space = (
+        max_context_size - message_size_without_tools - reserved_for_output_tokens
+    )
+    remaining_space = available_space
+    tool_call_messages.sort(
+        key=lambda x: count_tokens_fn(
+            [{"role": "tool", "content": x["content"]}]
+        ).total_tokens
+    )
+    truncations = []
+    # Allocate space starting with small tools and going to larger tools, while maintaining fairness
+    # Small tools can often get exactly what they need, while larger tools may need to be truncated
+    # We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
+    for i, msg in enumerate(tool_call_messages):
+        remaining_tools = len(tool_call_messages) - i
+        max_allocation = remaining_space // remaining_tools
+        needed_space = count_tokens_fn(
+            [{"role": "tool", "content": msg["content"]}]
+        ).total_tokens
+        allocated_space = min(needed_space, max_allocation)
+        if needed_space > allocated_space:
+            truncation_metadata = _truncate_tool_message(
+                msg, allocated_space, needed_space
+            )
+            truncations.append(truncation_metadata)
+        remaining_space -= allocated_space
+    if truncations:
+        sentry_helper.capture_tool_truncations(truncations)
+    return TruncationResult(truncated_messages=messages, truncations=truncations)
+class ContextWindowLimiterOutput(BaseModel):
+    metadata: dict
+    messages: list[dict]
+    events: list[StreamMessage]
+    max_context_size: int
+    maximum_output_token: int
+    tokens: TokenCountMetadata
+    conversation_history_compacted: bool
+@sentry_sdk.trace
+def limit_input_context_window(
+    llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
+) -> ContextWindowLimiterOutput:
+    events = []
+    metadata = {}
+    initial_tokens = llm.count_tokens(messages=messages, tools=tools)  # type: ignore
+    max_context_size = llm.get_context_window_size()
+    maximum_output_token = llm.get_maximum_output_token()
+    conversation_history_compacted = False
+    if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
+        initial_tokens.total_tokens + maximum_output_token
+    ) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
+        compacted_messages = compact_conversation_history(
+            original_conversation_history=messages, llm=llm
+        )
+        compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
+        compacted_total_tokens = compacted_tokens.total_tokens
+        if compacted_total_tokens < initial_tokens.total_tokens:
+            messages = compacted_messages
+            compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
+            logging.info(compaction_message)
+            conversation_history_compacted = True
+            events.append(
+                StreamMessage(
+                    event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
+                    data={
+                        "content": compaction_message,
+                        "messages": compacted_messages,
+                        "metadata": {
+                            "initial_tokens": initial_tokens.total_tokens,
+                            "compacted_tokens": compacted_total_tokens,
+                        },
+                    },
+                )
+            )
+            events.append(
+                StreamMessage(
+                    event=StreamEvents.AI_MESSAGE,
+                    data={"content": compaction_message},
+                )
+            )
+        else:
+            logging.debug(
+                f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
+            )
+    tokens = llm.count_tokens(messages=messages, tools=tools)  # type: ignore
+    if (tokens.total_tokens + maximum_output_token) > max_context_size:
+        # Compaction was not sufficient. Truncating messages.
+        truncated_res = truncate_messages_to_fit_context(
+            messages=messages,
+            max_context_size=max_context_size,
+            maximum_output_token=maximum_output_token,
+            count_tokens_fn=llm.count_tokens,
+        )
+        metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
+        messages = truncated_res.truncated_messages
+        # recount after truncation
+        tokens = llm.count_tokens(messages=messages, tools=tools)  # type: ignore
+    else:
+        metadata["truncations"] = []
+    return ContextWindowLimiterOutput(
+        events=events,
+        messages=messages,
+        metadata=metadata,
+        max_context_size=max_context_size,
+        maximum_output_token=maximum_output_token,
+        tokens=tokens,
+        conversation_history_compacted=conversation_history_compacted,
+    )

holmes/interactive.py CHANGED Viewed

@@ -26,9 +26,16 @@ from prompt_toolkit.widgets import TextArea
 from pygments.lexers import guess_lexer
 from rich.console import Console
 from rich.markdown import Markdown, Panel
+from rich.markup import escape
 from holmes.common.env_vars import ENABLE_CLI_TOOL_APPROVAL
 from holmes.core.config import config_path_dir
+from holmes.core.feedback import (
+    PRIVACY_NOTICE_BANNER,
+    Feedback,
+    FeedbackCallback,
+    UserFeedback,
+)
 from holmes.core.prompt import build_initial_ask_messages
 from holmes.core.tool_calling_llm import ToolCallingLLM, ToolCallResult
 from holmes.core.tools import StructuredToolResult, pretty_print_toolset_status
@@ -43,6 +50,7 @@ from holmes.utils.colors import (
 )
 from holmes.utils.console.consts import agent_name
 from holmes.version import check_version_async
+import re
 class SlashCommands(Enum):
@@ -62,19 +70,25 @@ class SlashCommands(Enum):
     )
     CONTEXT = ("/context", "Show conversation context size and token count")
     SHOW = ("/show", "Show specific tool output in scrollable view")
+    FEEDBACK = ("/feedback", "Provide feedback on the agent's response")
     def __init__(self, command, description):
         self.command = command
         self.description = description
-SLASH_COMMANDS_REFERENCE = {cmd.command: cmd.description for cmd in SlashCommands}
-ALL_SLASH_COMMANDS = [cmd.command for cmd in SlashCommands]
 class SlashCommandCompleter(Completer):
-    def __init__(self):
-        self.commands = SLASH_COMMANDS_REFERENCE
+    def __init__(self, unsupported_commands: Optional[List[str]] = None):
+        # Build commands dictionary, excluding unsupported commands
+        all_commands = {cmd.command: cmd.description for cmd in SlashCommands}
+        if unsupported_commands:
+            self.commands = {
+                cmd: desc
+                for cmd, desc in all_commands.items()
+                if cmd not in unsupported_commands
+            }
+        else:
+            self.commands = all_commands
     def get_completions(self, document, complete_event):
         text = document.text_before_cursor
@@ -233,6 +247,13 @@ def build_modal_title(tool_call: ToolCallResult, wrap_status: str) -> str:
     return f"{tool_call.description} (exit: q, nav: ↑↓/j/k/g/G/d/u/f/b/space, wrap: w [{wrap_status}])"
+def strip_ansi_codes(text: str) -> str:
+    ansi_escape_pattern = re.compile(
+        r"\x1b\[[0-9;]*[a-zA-Z]|\033\[[0-9;]*[a-zA-Z]|\^\[\[[0-9;]*[a-zA-Z]"
+    )
+    return ansi_escape_pattern.sub("", text)
 def detect_lexer(content: str) -> Optional[PygmentsLexer]:
     """
     Detect appropriate lexer for content using Pygments' built-in detection.
@@ -314,6 +335,7 @@ def show_tool_output_modal(tool_call: ToolCallResult, console: Console) -> None:
     try:
         # Get the full output
         output = tool_call.result.get_stringified_data()
+        output = strip_ansi_codes(output)
         title = build_modal_title(tool_call, "off")  # Word wrap starts disabled
         # Detect appropriate syntax highlighting
@@ -467,10 +489,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
         return
     # Calculate context statistics
-    total_tokens = ai.llm.count_tokens_for_message(messages)
+    tokens_metadata = ai.llm.count_tokens(
+        messages
+    )  # TODO: pass tools to also count tokens used by input tools
     max_context_size = ai.llm.get_context_window_size()
     max_output_tokens = ai.llm.get_maximum_output_token()
-    available_tokens = max_context_size - total_tokens - max_output_tokens
+    available_tokens = (
+        max_context_size - tokens_metadata.total_tokens - max_output_tokens
+    )
     # Analyze token distribution by role and tool calls
     role_token_usage: DefaultDict[str, int] = defaultdict(int)
@@ -479,19 +505,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
     for msg in messages:
         role = msg.get("role", "unknown")
-        msg_tokens = ai.llm.count_tokens_for_message([msg])
-        role_token_usage[role] += msg_tokens
+        message_tokens = ai.llm.count_tokens(
+            [msg]
+        )  # TODO: pass tools to also count tokens used by input tools
+        role_token_usage[role] += message_tokens.total_tokens
         # Track individual tool usage
         if role == "tool":
             tool_name = msg.get("name", "unknown_tool")
-            tool_token_usage[tool_name] += msg_tokens
+            tool_token_usage[tool_name] += message_tokens.total_tokens
             tool_call_counts[tool_name] += 1
     # Display context information
     console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
     console.print(
-        f"  Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
+        f"  Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
     )
     console.print(
         f"  Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
@@ -502,7 +530,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
     for role in ["system", "user", "assistant", "tool"]:
         if role in role_token_usage:
             tokens = role_token_usage[role]
-            percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
+            percentage = (
+                (tokens / tokens_metadata.total_tokens) * 100
+                if tokens_metadata.total_tokens > 0
+                else 0
+            )
             role_name = {
                 "system": "system prompt",
                 "user": "user messages",
@@ -811,6 +843,88 @@ def handle_last_command(
         )
+def handle_feedback_command(
+    style: Style,
+    console: Console,
+    feedback: Feedback,
+    feedback_callback: FeedbackCallback,
+) -> None:
+    """Handle the /feedback command to collect user feedback."""
+    try:
+        # Create a temporary session without history for feedback prompts
+        temp_session = PromptSession(history=InMemoryHistory())  # type: ignore
+        # Prominent privacy notice to users
+        console.print(
+            f"[bold {HELP_COLOR}]Privacy Notice:[/bold {HELP_COLOR}] {PRIVACY_NOTICE_BANNER}"
+        )
+        # A "Cancel" button of equal discoverability to "Sent" or "Submit" buttons must be made available
+        console.print(
+            "[bold yellow]💡 Tip: Press Ctrl+C at any time to cancel feedback[/bold yellow]"
+        )
+        # Ask for thumbs up/down rating with validation
+        while True:
+            rating_prompt = temp_session.prompt(
+                [("class:prompt", "Was this response useful to you? 👍(y)/👎(n): ")],
+                style=style,
+            )
+            rating_lower = rating_prompt.lower().strip()
+            if rating_lower in ["y", "n"]:
+                break
+            else:
+                console.print(
+                    "[bold red]Please enter only 'y' for yes or 'n' for no.[/bold red]"
+                )
+        # Determine rating
+        is_positive = rating_lower == "y"
+        # Ask for additional comments
+        comment_prompt = temp_session.prompt(
+            [
+                (
+                    "class:prompt",
+                    "Do you want to provide any additional comments for feedback? (press Enter to skip):\n",
+                )
+            ],
+            style=style,
+        )
+        comment = comment_prompt.strip() if comment_prompt.strip() else None
+        # Create UserFeedback object
+        user_feedback = UserFeedback(is_positive, comment)
+        if comment:
+            console.print(
+                f'[bold green]✓ Feedback recorded (rating={user_feedback.rating_emoji}, "{escape(comment)}")[/bold green]'
+            )
+        else:
+            console.print(
+                f"[bold green]✓ Feedback recorded (rating={user_feedback.rating_emoji}, no comment)[/bold green]"
+            )
+        # Final confirmation before submitting
+        final_confirmation = temp_session.prompt(
+            [("class:prompt", "\nDo you want to submit this feedback? (Y/n): ")],
+            style=style,
+        )
+        # If user says no, cancel the feedback
+        if final_confirmation.lower().strip().startswith("n"):
+            console.print("[dim]Feedback cancelled.[/dim]")
+            return
+        feedback.user_feedback = user_feedback
+        feedback_callback(feedback)
+        console.print("[bold green]Thank you for your feedback! 🙏[/bold green]")
+    except KeyboardInterrupt:
+        console.print("[dim]Feedback cancelled.[/dim]")
+        return
 def display_recent_tool_outputs(
     tool_calls: List[ToolCallResult],
     console: Console,
@@ -823,7 +937,10 @@ def display_recent_tool_outputs(
     for tool_call in tool_calls:
         tool_index = find_tool_index_in_history(tool_call, all_tool_calls_history)
         preview_output = format_tool_call_output(tool_call, tool_index)
-        title = f"{tool_call.result.status.to_emoji()} {tool_call.description} -> returned {tool_call.result.return_code}"
+        title = (
+            f"{tool_call.result.status.to_emoji()} {tool_call.description} -> "
+            f"returned {tool_call.result.return_code}"
+        )
         console.print(
             Panel(
@@ -846,6 +963,7 @@ def run_interactive_loop(
     runbooks=None,
     system_prompt_additions: Optional[str] = None,
     check_version: bool = True,
+    feedback_callback: Optional[FeedbackCallback] = None,
 ) -> None:
     # Initialize tracer - use DummyTracer if no tracer provided
     if tracer is None:
@@ -874,7 +992,11 @@ def run_interactive_loop(
         ai.approval_callback = approval_handler
     # Create merged completer with slash commands, conditional executables, show command, and smart paths
-    slash_completer = SlashCommandCompleter()
+    # TODO: remove unsupported_commands support once we implement feedback callback
+    unsupported_commands = []
+    if feedback_callback is None:
+        unsupported_commands.append(SlashCommands.FEEDBACK.command)
+    slash_completer = SlashCommandCompleter(unsupported_commands)
     executable_completer = ConditionalExecutableCompleter()
     show_completer = ShowCommandCompleter()
     path_completer = SmartPathCompleter()
@@ -891,6 +1013,9 @@ def run_interactive_loop(
     if initial_user_input:
         history.append_string(initial_user_input)
+    feedback = Feedback()
+    feedback.metadata.update_llm(ai.llm)
     # Create custom key bindings for Ctrl+C behavior
     bindings = KeyBindings()
     status_message = ""
@@ -963,7 +1088,15 @@ def run_interactive_loop(
     input_prompt = [("class:prompt", "User: ")]
-    console.print(WELCOME_BANNER)
+    # TODO: merge the /feedback command description to WELCOME_BANNER once we implement feedback callback
+    welcome_banner = WELCOME_BANNER
+    if feedback_callback:
+        welcome_banner = (
+            welcome_banner.rstrip(".")
+            + f", '{SlashCommands.FEEDBACK.command}' to share your thoughts."
+        )
+    console.print(welcome_banner)
     if initial_user_input:
         console.print(
             f"[bold {USER_COLOR}]User:[/bold {USER_COLOR}] {initial_user_input}"
@@ -985,14 +1118,18 @@ def run_interactive_loop(
             if user_input.startswith("/"):
                 original_input = user_input.strip()
                 command = original_input.lower()
                 # Handle prefix matching for slash commands
-                matches = [cmd for cmd in ALL_SLASH_COMMANDS if cmd.startswith(command)]
+                matches = [
+                    cmd
+                    for cmd in slash_completer.commands.keys()
+                    if cmd.startswith(command)
+                ]
                 if len(matches) == 1:
                     command = matches[0]
                 elif len(matches) > 1:
                     console.print(
-                        f"[bold {ERROR_COLOR}]Ambiguous command '{command}'. Matches: {', '.join(matches)}[/bold {ERROR_COLOR}]"
+                        f"[bold {ERROR_COLOR}]Ambiguous command '{command}'. "
+                        f"Matches: {', '.join(matches)}[/bold {ERROR_COLOR}]"
                     )
                     continue
@@ -1002,13 +1139,20 @@ def run_interactive_loop(
                     console.print(
                         f"[bold {HELP_COLOR}]Available commands:[/bold {HELP_COLOR}]"
                     )
-                    for cmd, description in SLASH_COMMANDS_REFERENCE.items():
+                    for cmd, description in slash_completer.commands.items():
+                        # Only show feedback command if callback is available
+                        if (
+                            cmd == SlashCommands.FEEDBACK.command
+                            and feedback_callback is None
+                        ):
+                            continue
                         console.print(f"  [bold]{cmd}[/bold] - {description}")
                     continue
                 elif command == SlashCommands.CLEAR.command:
                     console.clear()
                     console.print(
-                        f"[bold {STATUS_COLOR}]Screen cleared and context reset. You can now ask a new question.[/bold {STATUS_COLOR}]"
+                        f"[bold {STATUS_COLOR}]Screen cleared and context reset. "
+                        f"You can now ask a new question.[/bold {STATUS_COLOR}]"
                     )
                     messages = None
                     last_response = None
@@ -1052,6 +1196,12 @@ def run_interactive_loop(
                     if shared_input is None:
                         continue  # User chose not to share or no output, continue to next input
                     user_input = shared_input
+                elif (
+                    command == SlashCommands.FEEDBACK.command
+                    and feedback_callback is not None
+                ):
+                    handle_feedback_command(style, console, feedback, feedback_callback)
+                    continue
                 else:
                     console.print(f"Unknown command: {command}")
                     continue
@@ -1091,6 +1241,7 @@ def run_interactive_loop(
             messages = response.messages  # type: ignore
             last_response = response
+            feedback.metadata.add_llm_response(user_input, response.result)
             if response.tool_calls:
                 all_tool_calls_history.extend(response.tool_calls)
@@ -1111,9 +1262,6 @@ def run_interactive_loop(
                 )
             )
-            if trace_url:
-                console.print(f"🔍 View trace: {trace_url}")
             console.print("")
         except typer.Abort:
             break
@@ -1122,6 +1270,11 @@ def run_interactive_loop(
         except Exception as e:
             logging.error("An error occurred during interactive mode:", exc_info=e)
             console.print(f"[bold {ERROR_COLOR}]Error: {e}[/bold {ERROR_COLOR}]")
+        finally:
+            # Print trace URL for debugging (works for both success and error cases)
+            trace_url = tracer.get_trace_url()
+            if trace_url:
+                console.print(f"🔍 View trace: {trace_url}")
     console.print(
         f"[bold {STATUS_COLOR}]Exiting interactive mode.[/bold {STATUS_COLOR}]"

holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl

holmesgpt 0.13.2py3-none-any.whl → 0.16.2a0py3-none-any.whl