PyPI - holmesgpt - Versions diffs - 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl - Mend

holmesgpt 0.12.6py3-none-any.whl → 0.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (125) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +19 -1
holmes/common/env_vars.py +17 -0
holmes/config.py +69 -9
holmes/core/conversations.py +11 -0
holmes/core/investigation.py +16 -3
holmes/core/investigation_structured_output.py +12 -0
holmes/core/llm.py +13 -1
holmes/core/models.py +9 -1
holmes/core/openai_formatting.py +72 -12
holmes/core/prompt.py +13 -0
holmes/core/supabase_dal.py +3 -0
holmes/core/todo_manager.py +88 -0
holmes/core/tool_calling_llm.py +230 -157
holmes/core/tools.py +10 -1
holmes/core/tools_utils/tool_executor.py +7 -2
holmes/core/tools_utils/toolset_utils.py +7 -2
holmes/core/toolset_manager.py +1 -5
holmes/core/tracing.py +4 -3
holmes/interactive.py +1 -0
holmes/main.py +9 -2
holmes/plugins/prompts/__init__.py +7 -1
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
holmes/plugins/prompts/generic_ask.jinja2 +4 -3
holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
holmes/plugins/runbooks/CLAUDE.md +85 -0
holmes/plugins/runbooks/README.md +24 -0
holmes/plugins/toolsets/__init__.py +19 -6
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
holmes/plugins/toolsets/bash/aws/constants.py +529 -0
holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
holmes/plugins/toolsets/bash/azure/constants.py +339 -0
holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
holmes/plugins/toolsets/bash/bash_toolset.py +47 -13
holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
holmes/plugins/toolsets/bash/common/stringify.py +14 -1
holmes/plugins/toolsets/bash/common/validators.py +91 -0
holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
holmes/plugins/toolsets/bash/docker/constants.py +255 -0
holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
holmes/plugins/toolsets/bash/helm/constants.py +92 -0
holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
holmes/plugins/toolsets/bash/parse_command.py +106 -32
holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
holmes/plugins/toolsets/bash/utilities/head.py +12 -0
holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
holmes/plugins/toolsets/coralogix/api.py +6 -6
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
holmes/plugins/toolsets/git.py +15 -15
holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
holmes/plugins/toolsets/internet/internet.py +2 -1
holmes/plugins/toolsets/internet/notion.py +2 -1
holmes/plugins/toolsets/investigator/__init__.py +0 -0
holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
holmes/plugins/toolsets/investigator/model.py +15 -0
holmes/plugins/toolsets/kafka.py +14 -7
holmes/plugins/toolsets/kubernetes_logs.py +454 -25
holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
holmes/plugins/toolsets/newrelic.py +8 -3
holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
holmes/plugins/toolsets/prometheus/prometheus.py +179 -44
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
holmes/plugins/toolsets/robusta/robusta.py +4 -4
holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
holmes/plugins/toolsets/utils.py +8 -1
holmes/utils/console/logging.py +6 -1
holmes/utils/llms.py +20 -0
holmes/utils/stream.py +90 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/METADATA +47 -34
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/RECORD +123 -91
holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
holmes/utils/robusta.py +0 -9
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/WHEEL +0 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/entry_points.txt +0 -0

holmes/plugins/toolsets/kubernetes_logs.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import logging
 import re
 import subprocess
-from typing import Optional, List, Tuple
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone
+from typing import Optional, List, Tuple, Set
 from pydantic import BaseModel
 from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
@@ -14,8 +16,10 @@ from holmes.core.tools import (
 from holmes.plugins.toolsets.logging_utils.logging_api import (
     BasePodLoggingToolset,
     FetchPodLogsParams,
+    LoggingCapability,
     LoggingConfig,
     PodLoggingTool,
+    DEFAULT_TIME_SPAN_SECONDS,
 )
 from holmes.plugins.toolsets.utils import process_timestamps_to_int, to_unix_ms
@@ -46,6 +50,14 @@ class LogResult(BaseModel):
 class KubernetesLogsToolset(BasePodLoggingToolset):
     """Implementation of the unified logging API for Kubernetes logs using kubectl commands"""
+    @property
+    def supported_capabilities(self) -> Set[LoggingCapability]:
+        """Kubernetes native logging supports regex and exclude filters"""
+        return {
+            LoggingCapability.REGEX_FILTER,
+            LoggingCapability.EXCLUDE_FILTER,
+        }
     def __init__(self):
         prerequisite = StaticPrerequisite(enabled=False, disabled_reason="Initializing")
         super().__init__(
@@ -91,17 +103,47 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
         try:
             all_logs: list[StructuredLog] = []
-            # Fetch previous logs
-            previous_logs_result = self._fetch_kubectl_logs(
-                params=params,
-                previous=True,
-            )
+            # Fetch previous and current logs in parallel
+            with ThreadPoolExecutor(max_workers=2) as executor:
+                future_previous = executor.submit(
+                    self._fetch_kubectl_logs, params, previous=True
+                )
+                future_current = executor.submit(
+                    self._fetch_kubectl_logs, params, previous=False
+                )
-            # Fetch current logs
-            current_logs_result = self._fetch_kubectl_logs(
-                params=params,
-                previous=False,
-            )
+                futures = {future_previous: "previous", future_current: "current"}
+                previous_logs_result = None
+                current_logs_result = None
+                for future in as_completed(futures):
+                    log_type = futures[future]
+                    try:
+                        result = future.result()
+                        if log_type == "previous":
+                            previous_logs_result = result
+                        else:
+                            current_logs_result = result
+                    except Exception as e:
+                        logging.error(f"Error fetching {log_type} logs: {str(e)}")
+                        error_result = LogResult(
+                            logs=[],
+                            error=f"Error fetching {log_type} logs: {str(e)}",
+                            return_code=None,
+                            has_multiple_containers=False,
+                        )
+                        if log_type == "previous":
+                            previous_logs_result = error_result
+                        else:
+                            current_logs_result = error_result
+            # Ensure both results are not None (they should always be set by the loop)
+            if current_logs_result is None or previous_logs_result is None:
+                return StructuredToolResult(
+                    status=ToolResultStatus.ERROR,
+                    error="Internal error: Failed to fetch logs",
+                    params=params.model_dump(),
+                )
             return_code: Optional[int] = current_logs_result.return_code
@@ -126,24 +168,58 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
                     return_code=return_code,
                 )
-            all_logs = filter_logs(all_logs, params)
+            # Track counts for metadata
+            total_count = len(all_logs)
+            (
+                filtered_logs,
+                filtered_count_before_limit,
+                used_substring_fallback,
+                exclude_used_substring_fallback,
+                removed_by_include_filter,
+                removed_by_exclude_filter,
+            ) = filter_logs(all_logs, params)
+            has_multiple_containers = (
+                previous_logs_result.has_multiple_containers
+                or current_logs_result.has_multiple_containers
+            )
+            formatted_logs = format_logs(
+                logs=filtered_logs,
+                display_container_name=has_multiple_containers,
+            )
+            # Generate metadata
+            metadata_lines = add_metadata(
+                params=params,
+                total_count=total_count,
+                filtered_logs=filtered_logs,
+                filtered_count_before_limit=filtered_count_before_limit,
+                used_substring_fallback=used_substring_fallback,
+                exclude_used_substring_fallback=exclude_used_substring_fallback,
+                removed_by_include_filter=removed_by_include_filter,
+                removed_by_exclude_filter=removed_by_exclude_filter,
+                has_multiple_containers=has_multiple_containers,
+            )
-            if not all_logs:
+            # Check if we have any logs to return
+            if len(filtered_logs) == 0:
+                # Return NO_DATA status when there are no logs
                 return StructuredToolResult(
                     status=ToolResultStatus.NO_DATA,
+                    data="\n".join(
+                        metadata_lines
+                    ),  # Still include metadata for context
                     params=params.model_dump(),
                     return_code=return_code,
                 )
-            formatted_logs = format_logs(
-                logs=all_logs,
-                display_container_name=previous_logs_result.has_multiple_containers
-                or current_logs_result.has_multiple_containers,
-            )
+            # Put metadata at the end
+            response_data = formatted_logs + "\n" + "\n".join(metadata_lines)
             return StructuredToolResult(
                 status=ToolResultStatus.SUCCESS,
-                data=formatted_logs,
+                data=response_data,
                 params=params.model_dump(),
                 return_code=return_code,
             )
@@ -318,6 +394,287 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
         )
+# TODO: review this
+def format_relative_time(timestamp_str: str, current_time: datetime) -> str:
+    """Format a timestamp as relative to current time (e.g., '2 hours 15 minutes ago')"""
+    try:
+        # Handle relative timestamps (negative numbers)
+        if timestamp_str and timestamp_str.startswith("-"):
+            seconds = abs(int(timestamp_str))
+            if seconds < 60:
+                return f"{seconds} second{'s' if seconds != 1 else ''} before end time"
+            minutes = seconds // 60
+            if minutes < 60:
+                return f"{minutes} minute{'s' if minutes != 1 else ''} before end time"
+            hours = minutes // 60
+            if hours < 24:
+                return f"{hours} hour{'s' if hours != 1 else ''} before end time"
+            days = hours // 24
+            return f"{days} day{'s' if days != 1 else ''} before end time"
+        # Parse the timestamp
+        timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
+        # Calculate the difference
+        diff = current_time - timestamp
+        # If in the future
+        if diff.total_seconds() < 0:
+            diff = timestamp - current_time
+            suffix = "from now"
+        else:
+            suffix = "ago"
+        # Format the difference
+        days = diff.days
+        hours = diff.seconds // 3600
+        minutes = (diff.seconds % 3600) // 60
+        parts = []
+        if days > 0:
+            parts.append(f"{days} day{'s' if days != 1 else ''}")
+        if hours > 0:
+            parts.append(f"{hours} hour{'s' if hours != 1 else ''}")
+        if minutes > 0 and days == 0:  # Only show minutes if less than a day
+            parts.append(f"{minutes} minute{'s' if minutes != 1 else ''}")
+        if not parts:
+            if diff.seconds < 60:
+                return "just now" if suffix == "ago" else "right now"
+        return f"{' '.join(parts)} {suffix}"
+    except Exception:
+        # If we can't parse it, just return the original
+        return timestamp_str
+# TODO: review this
+def add_metadata(
+    params: FetchPodLogsParams,
+    total_count: int,
+    filtered_logs: List[StructuredLog],
+    filtered_count_before_limit: int,
+    used_substring_fallback: bool,
+    exclude_used_substring_fallback: bool,
+    removed_by_include_filter: int,
+    removed_by_exclude_filter: int,
+    has_multiple_containers: bool,
+) -> List[str]:
+    """Generate all metadata for the log query"""
+    metadata_lines = [
+        "\n" + "=" * 80,
+        "LOG QUERY METADATA",
+        "=" * 80,
+    ]
+    # Time Context section
+    current_time = datetime.now(timezone.utc)
+    current_time_str = current_time.strftime("%Y-%m-%dT%H:%M:%SZ")
+    metadata_lines.extend(
+        [
+            "Time Context:",
+            f"- Query executed at: {current_time_str} (UTC)",
+            "",
+            "Query Parameters:",
+            f"- Pod: {params.pod_name}",
+            f"- Namespace: {params.namespace}",
+            "- Log source: Current and previous container logs",
+        ]
+    )
+    # Always show time range info
+    if params.start_time or params.end_time:
+        start_str = params.start_time or "beginning"
+        end_str = params.end_time or "now"
+        # Calculate relative times and duration
+        relative_parts = []
+        # Parse timestamps for duration calculation
+        start_dt = None
+        end_dt = None
+        if params.start_time and params.start_time != "beginning":
+            start_relative = format_relative_time(params.start_time, current_time)
+            relative_parts.append(f"Started: {start_relative}")
+            try:
+                if not params.start_time.startswith("-"):
+                    start_dt = datetime.fromisoformat(
+                        params.start_time.replace("Z", "+00:00")
+                    )
+            except Exception:
+                pass
+        if params.end_time and params.end_time != "now":
+            end_relative = format_relative_time(params.end_time, current_time)
+            relative_parts.append(f"Ended: {end_relative}")
+            try:
+                end_dt = datetime.fromisoformat(params.end_time.replace("Z", "+00:00"))
+            except Exception:
+                pass
+        else:
+            # If end_time is "now" or not specified, use current time
+            end_dt = current_time
+        # Calculate duration if we have both timestamps
+        if start_dt and end_dt:
+            duration = end_dt - start_dt
+            if duration.total_seconds() > 0:
+                days = duration.days
+                hours = duration.seconds // 3600
+                minutes = (duration.seconds % 3600) // 60
+                duration_parts = []
+                if days > 0:
+                    duration_parts.append(f"{days} day{'s' if days != 1 else ''}")
+                if hours > 0:
+                    duration_parts.append(f"{hours} hour{'s' if hours != 1 else ''}")
+                if minutes > 0:
+                    duration_parts.append(
+                        f"{minutes} minute{'s' if minutes != 1 else ''}"
+                    )
+                if duration_parts:
+                    duration_str = " ".join(duration_parts)
+                else:
+                    duration_str = "less than 1 minute"
+                metadata_lines.append(
+                    f"- Log time range: {start_str} (UTC) to {end_str} (UTC) ({duration_str})"
+                )
+            else:
+                metadata_lines.append(
+                    f"- Log time range: {start_str} (UTC) to {end_str} (UTC)"
+                )
+        else:
+            metadata_lines.append(
+                f"- Log time range: {start_str} (UTC) to {end_str} (UTC)"
+            )
+        if relative_parts:
+            metadata_lines.append(f"  {' | '.join(relative_parts)}")
+    else:
+        metadata_lines.append(
+            "- Log time range: None (fetching logs available via `kubectl logs`)"
+        )
+    # Add container info if multiple containers
+    if has_multiple_containers:
+        metadata_lines.append("- Container(s): Multiple containers")
+    metadata_lines.extend(
+        [
+            "",
+            f"Total logs found before filtering: {total_count:,}",
+        ]
+    )
+    # Only show filtering details if filters were applied
+    if params.filter or params.exclude_filter:
+        metadata_lines.append("")
+        metadata_lines.append("Filtering Applied:")
+    if params.filter:
+        if used_substring_fallback:
+            metadata_lines.append(
+                f"  ⚠️  Filter '{params.filter}' is not valid regex, using substring match"
+            )
+        matched_by_filter = total_count - removed_by_include_filter
+        percentage = (matched_by_filter / total_count * 100) if total_count > 0 else 0
+        metadata_lines.append(f"  1. Include filter: '{params.filter}'")
+        metadata_lines.append(
+            f"     → Matched: {matched_by_filter:,} logs ({percentage:.1f}% of total)"
+        )
+    if params.exclude_filter:
+        if exclude_used_substring_fallback:
+            metadata_lines.append(
+                f"  ⚠️  Exclude filter '{params.exclude_filter}' is not valid regex, using substring match"
+            )
+        metadata_lines.append("")
+        metadata_lines.append(f"  2. Exclude filter: '{params.exclude_filter}'")
+        metadata_lines.append(f"     → Excluded: {removed_by_exclude_filter:,} logs")
+        metadata_lines.append(f"     → Remaining: {filtered_count_before_limit:,} logs")
+    # Display section
+    metadata_lines.append("")
+    hit_limit = params.limit is not None and params.limit < filtered_count_before_limit
+    if hit_limit and params.limit is not None:
+        logs_omitted = filtered_count_before_limit - params.limit
+        metadata_lines.append(
+            f"Display: Showing latest {params.limit:,} of {filtered_count_before_limit:,} filtered logs ({logs_omitted:,} omitted)"
+        )
+    else:
+        if filtered_count_before_limit == total_count:
+            metadata_lines.append(f"Display: Showing all {len(filtered_logs):,} logs")
+        else:
+            metadata_lines.append(
+                f"Display: Showing all {len(filtered_logs):,} filtered logs"
+            )
+    # Add contextual hints based on results
+    if len(filtered_logs) == 0:
+        metadata_lines.append("")
+        if params.filter and total_count > 0:
+            # Logs exist but none matched the filter
+            metadata_lines.append("Result: No logs matched your filters")
+            metadata_lines.append("")
+            metadata_lines.append("⚠️  Suggestions:")
+            metadata_lines.append("  - Try a broader filter pattern")
+            metadata_lines.append(
+                f"  - Remove the filter to see all {total_count:,} available logs"
+            )
+            metadata_lines.append(
+                "  - Your filter may be too specific for the log format used"
+            )
+        else:
+            # No logs exist at all
+            metadata_lines.append("Result: No logs found for this pod")
+            metadata_lines.append("")
+            metadata_lines.append("⚠️  Possible reasons:")
+            if params.start_time or params.end_time:
+                metadata_lines.append("  - Pod was not running during this time period")
+            else:
+                metadata_lines.append(
+                    "  - Pod may not exist or may have been recently created"
+                )
+            metadata_lines.append("  - Container might not be logging to stdout/stderr")
+            metadata_lines.append(
+                "  - Logs might be going to a file instead of stdout/stderr"
+            )
+            # Only show time range suggestions if a time range was specified
+            if params.start_time or params.end_time:
+                metadata_lines.append("")
+                metadata_lines.append("⚠️  Try:")
+                metadata_lines.append(
+                    "  - Remove time range to see ALL available logs (recommended unless you need this specific timeframe)"
+                )
+                metadata_lines.append("  - Or expand time range (e.g., last 24 hours)")
+            else:
+                metadata_lines.append("")
+                metadata_lines.append("⚠️  Try:")
+                metadata_lines.append(
+                    f"  - Check if pod exists: kubectl get pods -n {params.namespace}"
+                )
+                metadata_lines.append(
+                    f"  - Check pod events: kubectl describe pod {params.pod_name} -n {params.namespace}"
+                )
+    elif hit_limit:
+        metadata_lines.append("")
+        metadata_lines.append("⚠️  Hit display limit! Suggestions:")
+        metadata_lines.append(
+            "  - Add exclude_filter to remove noise: exclude_filter='<pattern1>|<pattern2>|<pattern3>'"
+        )
+        metadata_lines.append("  - Narrow time range to see fewer logs")
+        metadata_lines.append(
+            "  - Use more specific filter: filter='<term1>.*<term2>|<exact-phrase>'"
+        )
+    metadata_lines.append("=" * 80)
+    return metadata_lines
 def format_logs(logs: List[StructuredLog], display_container_name: bool) -> str:
     if display_container_name:
         return "\n".join([f"{log.container or 'N/A'}: {log.content}" for log in logs])
@@ -332,23 +689,84 @@ class TimeFilter(BaseModel):
 def filter_logs(
     logs: List[StructuredLog], params: FetchPodLogsParams
-) -> List[StructuredLog]:
+) -> Tuple[List[StructuredLog], int, bool, bool, int, int]:
     time_filter: Optional[TimeFilter] = None
     if params.start_time or params.end_time:
         start, end = process_timestamps_to_int(
             start=params.start_time,
             end=params.end_time,
-            default_time_span_seconds=3600,
+            default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
         )
         time_filter = TimeFilter(start_ms=start * 1000, end_ms=end * 1000)
     filtered_logs = []
+    # is this really needed? doesn't kubectl already sort logs for us
     logs.sort(key=lambda x: x.timestamp_ms or 0)
+    # Pre-compile regex patterns if provided
+    regex_pattern = None
+    exclude_regex_pattern = None
+    used_substring_fallback = False
+    exclude_used_substring_fallback = False
+    # Track filtering statistics
+    removed_by_include_filter = 0
+    removed_by_exclude_filter = 0
+    if params.filter:
+        try:
+            # Try to compile as regex first
+            regex_pattern = re.compile(params.filter, re.IGNORECASE)
+        except re.error:
+            # If not a valid regex, fall back to simple substring matching
+            logging.debug(
+                f"Filter '{params.filter}' is not a valid regex, using substring matching"
+            )
+            regex_pattern = None
+            used_substring_fallback = True
+    if params.exclude_filter:
+        try:
+            # Try to compile as regex first
+            exclude_regex_pattern = re.compile(params.exclude_filter, re.IGNORECASE)
+        except re.error:
+            # If not a valid regex, fall back to simple substring matching
+            logging.debug(
+                f"Exclude filter '{params.exclude_filter}' is not a valid regex, using substring matching"
+            )
+            exclude_regex_pattern = None
+            exclude_used_substring_fallback = True
     for log in logs:
-        if params.filter and params.filter.lower() not in log.content.lower():
-            # exclude this log
-            continue
+        # Apply inclusion filter
+        if params.filter:
+            if regex_pattern:
+                # Use regex matching
+                if not regex_pattern.search(log.content):
+                    # exclude this log
+                    removed_by_include_filter += 1
+                    continue
+            else:
+                # Fall back to simple substring matching (case-insensitive)
+                if params.filter.lower() not in log.content.lower():
+                    # exclude this log
+                    removed_by_include_filter += 1
+                    continue
+        # Apply exclusion filter
+        if params.exclude_filter:
+            if exclude_regex_pattern:
+                # Use regex matching
+                if exclude_regex_pattern.search(log.content):
+                    # exclude this log
+                    removed_by_exclude_filter += 1
+                    continue
+            else:
+                # Fall back to simple substring matching (case-insensitive)
+                if params.exclude_filter.lower() in log.content.lower():
+                    # exclude this log
+                    removed_by_exclude_filter += 1
+                    continue
         if (
             time_filter
@@ -365,9 +783,20 @@ def filter_logs(
         else:
             filtered_logs.append(log)
+    # Track count before limiting
+    filtered_count_before_limit = len(filtered_logs)
     if params.limit and params.limit < len(filtered_logs):
         filtered_logs = filtered_logs[-params.limit :]
-    return filtered_logs
+    return (
+        filtered_logs,
+        filtered_count_before_limit,
+        used_substring_fallback,
+        exclude_used_substring_fallback,
+        removed_by_include_filter,
+        removed_by_exclude_filter,
+    )
 def parse_logs(

holmesgpt 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.12.6py3-none-any.whl → 0.13.1py3-none-any.whl