holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transformer registry for managing available transformers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List, Optional, Type
|
|
6
|
+
|
|
7
|
+
from .base import BaseTransformer, TransformerError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TransformerRegistry:
|
|
11
|
+
"""
|
|
12
|
+
Registry for managing transformer types and creating transformer instances.
|
|
13
|
+
|
|
14
|
+
This registry provides a centralized way to register transformer classes
|
|
15
|
+
and create instances based on configuration.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self):
|
|
19
|
+
self._transformers: Dict[str, Type[BaseTransformer]] = {}
|
|
20
|
+
|
|
21
|
+
def register(self, transformer_class: Type[BaseTransformer]) -> None:
|
|
22
|
+
"""
|
|
23
|
+
Register a transformer class, using the transformer's name property.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
transformer_class: The transformer class to register
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
ValueError: If name is already registered or transformer_class is invalid
|
|
30
|
+
"""
|
|
31
|
+
if not issubclass(transformer_class, BaseTransformer):
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"Transformer class must inherit from BaseTransformer, got {transformer_class}"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Get name from the transformer class
|
|
37
|
+
try:
|
|
38
|
+
temp_instance = transformer_class()
|
|
39
|
+
name = temp_instance.name
|
|
40
|
+
except Exception:
|
|
41
|
+
# Fallback to class name if instantiation fails
|
|
42
|
+
name = transformer_class.__name__
|
|
43
|
+
|
|
44
|
+
if name in self._transformers:
|
|
45
|
+
raise ValueError(f"Transformer '{name}' is already registered")
|
|
46
|
+
|
|
47
|
+
self._transformers[name] = transformer_class
|
|
48
|
+
|
|
49
|
+
def unregister(self, name: str) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Unregister a transformer by name.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
name: The name of the transformer to unregister
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
KeyError: If transformer name is not registered
|
|
58
|
+
"""
|
|
59
|
+
if name not in self._transformers:
|
|
60
|
+
raise KeyError(f"Transformer '{name}' is not registered")
|
|
61
|
+
|
|
62
|
+
del self._transformers[name]
|
|
63
|
+
|
|
64
|
+
def create_transformer(
|
|
65
|
+
self, name: str, config: Optional[Dict[str, Any]] = None
|
|
66
|
+
) -> BaseTransformer:
|
|
67
|
+
"""
|
|
68
|
+
Create a transformer instance by name.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
name: The name of the transformer to create
|
|
72
|
+
config: Optional configuration for the transformer
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A new transformer instance
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
KeyError: If transformer name is not registered
|
|
79
|
+
TransformerError: If transformer creation fails
|
|
80
|
+
"""
|
|
81
|
+
if name not in self._transformers:
|
|
82
|
+
raise KeyError(f"Transformer '{name}' is not registered")
|
|
83
|
+
|
|
84
|
+
transformer_class = self._transformers[name]
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Handle both old-style dict config and new Pydantic models
|
|
88
|
+
if config is None:
|
|
89
|
+
return transformer_class()
|
|
90
|
+
else:
|
|
91
|
+
# For Pydantic models, pass config as keyword arguments
|
|
92
|
+
return transformer_class(**config)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
raise TransformerError(f"Failed to create transformer '{name}': {e}") from e
|
|
95
|
+
|
|
96
|
+
def is_registered(self, name: str) -> bool:
|
|
97
|
+
"""
|
|
98
|
+
Check if a transformer is registered.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
name: The name to check
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
True if the transformer is registered, False otherwise
|
|
105
|
+
"""
|
|
106
|
+
return name in self._transformers
|
|
107
|
+
|
|
108
|
+
def list_transformers(self) -> List[str]:
|
|
109
|
+
"""
|
|
110
|
+
Get a list of all registered transformer names.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of registered transformer names
|
|
114
|
+
"""
|
|
115
|
+
return list(self._transformers.keys())
|
|
116
|
+
|
|
117
|
+
def clear(self) -> None:
|
|
118
|
+
"""Clear all registered transformers."""
|
|
119
|
+
self._transformers.clear()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# Global transformer registry instance
|
|
123
|
+
registry = TransformerRegistry()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration class for tool transformers.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field, model_validator
|
|
9
|
+
|
|
10
|
+
from .registry import registry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Transformer(BaseModel):
|
|
14
|
+
"""
|
|
15
|
+
Configuration for a tool transformer.
|
|
16
|
+
|
|
17
|
+
Each transformer config specifies a transformer type and its parameters.
|
|
18
|
+
This replaces the previous dict-based configuration with proper type safety.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name: str = Field(description="Name of the transformer (e.g., 'llm_summarize')")
|
|
22
|
+
config: Dict[str, Any] = Field(
|
|
23
|
+
default_factory=dict, description="Configuration parameters for the transformer"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
@model_validator(mode="after")
|
|
27
|
+
def validate_transformer(self):
|
|
28
|
+
"""Validate that the transformer name is known to the registry."""
|
|
29
|
+
if not registry.is_registered(self.name):
|
|
30
|
+
# Log warning but don't fail validation - allows for graceful degradation
|
|
31
|
+
logging.warning(f"Transformer '{self.name}' is not registered")
|
|
32
|
+
return self
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import litellm
|
|
5
|
+
from litellm.types.utils import ModelResponse
|
|
6
|
+
|
|
7
|
+
from holmes.core.llm import LLM
|
|
8
|
+
from holmes.plugins.prompts import load_and_render_prompt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def strip_system_prompt(
|
|
12
|
+
conversation_history: list[dict],
|
|
13
|
+
) -> tuple[list[dict], Optional[dict]]:
|
|
14
|
+
if not conversation_history:
|
|
15
|
+
return conversation_history, None
|
|
16
|
+
first_message = conversation_history[0]
|
|
17
|
+
if first_message and first_message.get("role") == "system":
|
|
18
|
+
return conversation_history[1:], first_message
|
|
19
|
+
return conversation_history[:], None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def find_last_user_prompt(conversation_history: list[dict]) -> Optional[dict]:
|
|
23
|
+
if not conversation_history:
|
|
24
|
+
return None
|
|
25
|
+
last_user_prompt: Optional[dict] = None
|
|
26
|
+
for message in conversation_history:
|
|
27
|
+
if message.get("role") == "user":
|
|
28
|
+
last_user_prompt = message
|
|
29
|
+
return last_user_prompt
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def compact_conversation_history(
|
|
33
|
+
original_conversation_history: list[dict], llm: LLM
|
|
34
|
+
) -> list[dict]:
|
|
35
|
+
"""
|
|
36
|
+
The compacted conversation history contains:
|
|
37
|
+
1. Original system prompt, uncompacted (if present)
|
|
38
|
+
2. Last user prompt, uncompacted (if present)
|
|
39
|
+
3. Compacted conversation history (role=assistant)
|
|
40
|
+
4. Compaction message (role=system)
|
|
41
|
+
"""
|
|
42
|
+
conversation_history, system_prompt_message = strip_system_prompt(
|
|
43
|
+
original_conversation_history
|
|
44
|
+
)
|
|
45
|
+
compaction_instructions = load_and_render_prompt(
|
|
46
|
+
prompt="builtin://conversation_history_compaction.jinja2", context={}
|
|
47
|
+
)
|
|
48
|
+
conversation_history.append({"role": "user", "content": compaction_instructions})
|
|
49
|
+
|
|
50
|
+
# Set modify_params to handle providers like Anthropic that require tools
|
|
51
|
+
# when conversation history contains tool calls
|
|
52
|
+
original_modify_params = litellm.modify_params
|
|
53
|
+
try:
|
|
54
|
+
litellm.modify_params = True # necessary when using anthropic
|
|
55
|
+
response: ModelResponse = llm.completion(
|
|
56
|
+
messages=conversation_history, drop_params=True
|
|
57
|
+
) # type: ignore
|
|
58
|
+
finally:
|
|
59
|
+
litellm.modify_params = original_modify_params
|
|
60
|
+
response_message = None
|
|
61
|
+
if (
|
|
62
|
+
response
|
|
63
|
+
and response.choices
|
|
64
|
+
and response.choices[0]
|
|
65
|
+
and response.choices[0].message # type:ignore
|
|
66
|
+
):
|
|
67
|
+
response_message = response.choices[0].message # type:ignore
|
|
68
|
+
else:
|
|
69
|
+
logging.error(
|
|
70
|
+
"Failed to compact conversation history. Unexpected LLM's response for compaction"
|
|
71
|
+
)
|
|
72
|
+
return original_conversation_history
|
|
73
|
+
|
|
74
|
+
compacted_conversation_history: list[dict] = []
|
|
75
|
+
if system_prompt_message:
|
|
76
|
+
compacted_conversation_history.append(system_prompt_message)
|
|
77
|
+
|
|
78
|
+
last_user_prompt = find_last_user_prompt(original_conversation_history)
|
|
79
|
+
if last_user_prompt:
|
|
80
|
+
compacted_conversation_history.append(last_user_prompt)
|
|
81
|
+
|
|
82
|
+
compacted_conversation_history.append(
|
|
83
|
+
response_message.model_dump(
|
|
84
|
+
exclude_defaults=True, exclude_unset=True, exclude_none=True
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
compacted_conversation_history.append(
|
|
89
|
+
{
|
|
90
|
+
"role": "system",
|
|
91
|
+
"content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
return compacted_conversation_history
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from holmes.common.env_vars import MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def truncate_string(data_str: str) -> str:
|
|
5
|
+
if data_str and len(data_str) > MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION:
|
|
6
|
+
return (
|
|
7
|
+
data_str[:MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION]
|
|
8
|
+
+ "-- DATA TRUNCATED TO AVOID HITTING CONTEXT WINDOW LIMITS"
|
|
9
|
+
)
|
|
10
|
+
return data_str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def truncate_evidences_entities_if_necessary(evidence_list: list[dict]):
|
|
14
|
+
if (
|
|
15
|
+
not MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
|
|
16
|
+
or MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION <= 0
|
|
17
|
+
):
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
for evidence in evidence_list:
|
|
21
|
+
data = evidence.get("data")
|
|
22
|
+
if data:
|
|
23
|
+
evidence["data"] = truncate_string(str(data))
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
|
|
4
|
+
import sentry_sdk
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from holmes.common.env_vars import (
|
|
8
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION,
|
|
9
|
+
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
10
|
+
)
|
|
11
|
+
from holmes.core.llm import (
|
|
12
|
+
LLM,
|
|
13
|
+
TokenCountMetadata,
|
|
14
|
+
get_context_window_compaction_threshold_pct,
|
|
15
|
+
)
|
|
16
|
+
from holmes.core.models import TruncationMetadata, TruncationResult
|
|
17
|
+
from holmes.core.truncation.compaction import compact_conversation_history
|
|
18
|
+
from holmes.utils import sentry_helper
|
|
19
|
+
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
20
|
+
|
|
21
|
+
TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _truncate_tool_message(
|
|
25
|
+
msg: dict, allocated_space: int, needed_space: int
|
|
26
|
+
) -> TruncationMetadata:
|
|
27
|
+
msg_content = msg["content"]
|
|
28
|
+
tool_call_id = msg["tool_call_id"]
|
|
29
|
+
tool_name = msg["name"]
|
|
30
|
+
|
|
31
|
+
# Ensure the indicator fits in the allocated space
|
|
32
|
+
if allocated_space > len(TRUNCATION_NOTICE):
|
|
33
|
+
original = msg_content if isinstance(msg_content, str) else str(msg_content)
|
|
34
|
+
msg["content"] = (
|
|
35
|
+
original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
|
|
36
|
+
)
|
|
37
|
+
end_index = allocated_space - len(TRUNCATION_NOTICE)
|
|
38
|
+
else:
|
|
39
|
+
msg["content"] = TRUNCATION_NOTICE[:allocated_space]
|
|
40
|
+
end_index = allocated_space
|
|
41
|
+
|
|
42
|
+
msg.pop("token_count", None) # Remove token_count if present
|
|
43
|
+
logging.info(
|
|
44
|
+
f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
|
|
45
|
+
)
|
|
46
|
+
truncation_metadata = TruncationMetadata(
|
|
47
|
+
tool_call_id=tool_call_id,
|
|
48
|
+
start_index=0,
|
|
49
|
+
end_index=end_index,
|
|
50
|
+
tool_name=tool_name,
|
|
51
|
+
original_token_count=needed_space,
|
|
52
|
+
)
|
|
53
|
+
return truncation_metadata
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
57
|
+
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
58
|
+
# We should fix this in the future
|
|
59
|
+
# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
|
|
60
|
+
# token truncation and not character truncation
|
|
61
|
+
def truncate_messages_to_fit_context(
|
|
62
|
+
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
63
|
+
) -> TruncationResult:
|
|
64
|
+
"""
|
|
65
|
+
Helper function to truncate tool messages to fit within context limits.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
messages: List of message dictionaries with roles and content
|
|
69
|
+
max_context_size: Maximum context window size for the model
|
|
70
|
+
maximum_output_token: Maximum tokens reserved for model output
|
|
71
|
+
count_tokens_fn: Function to count tokens for a list of messages
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Modified list of messages with truncated tool responses
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
Exception: If non-tool messages exceed available context space
|
|
78
|
+
"""
|
|
79
|
+
messages_except_tools = [
|
|
80
|
+
message for message in messages if message["role"] != "tool"
|
|
81
|
+
]
|
|
82
|
+
tokens = count_tokens_fn(messages_except_tools)
|
|
83
|
+
message_size_without_tools = tokens.total_tokens
|
|
84
|
+
|
|
85
|
+
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
86
|
+
|
|
87
|
+
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
88
|
+
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
89
|
+
logging.error(
|
|
90
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
91
|
+
)
|
|
92
|
+
raise Exception(
|
|
93
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if len(tool_call_messages) == 0:
|
|
97
|
+
return TruncationResult(truncated_messages=messages, truncations=[])
|
|
98
|
+
|
|
99
|
+
available_space = (
|
|
100
|
+
max_context_size - message_size_without_tools - reserved_for_output_tokens
|
|
101
|
+
)
|
|
102
|
+
remaining_space = available_space
|
|
103
|
+
tool_call_messages.sort(
|
|
104
|
+
key=lambda x: count_tokens_fn(
|
|
105
|
+
[{"role": "tool", "content": x["content"]}]
|
|
106
|
+
).total_tokens
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
truncations = []
|
|
110
|
+
|
|
111
|
+
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
112
|
+
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
113
|
+
# We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
|
|
114
|
+
for i, msg in enumerate(tool_call_messages):
|
|
115
|
+
remaining_tools = len(tool_call_messages) - i
|
|
116
|
+
max_allocation = remaining_space // remaining_tools
|
|
117
|
+
needed_space = count_tokens_fn(
|
|
118
|
+
[{"role": "tool", "content": msg["content"]}]
|
|
119
|
+
).total_tokens
|
|
120
|
+
allocated_space = min(needed_space, max_allocation)
|
|
121
|
+
|
|
122
|
+
if needed_space > allocated_space:
|
|
123
|
+
truncation_metadata = _truncate_tool_message(
|
|
124
|
+
msg, allocated_space, needed_space
|
|
125
|
+
)
|
|
126
|
+
truncations.append(truncation_metadata)
|
|
127
|
+
|
|
128
|
+
remaining_space -= allocated_space
|
|
129
|
+
|
|
130
|
+
if truncations:
|
|
131
|
+
sentry_helper.capture_tool_truncations(truncations)
|
|
132
|
+
|
|
133
|
+
return TruncationResult(truncated_messages=messages, truncations=truncations)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ContextWindowLimiterOutput(BaseModel):
|
|
137
|
+
metadata: dict
|
|
138
|
+
messages: list[dict]
|
|
139
|
+
events: list[StreamMessage]
|
|
140
|
+
max_context_size: int
|
|
141
|
+
maximum_output_token: int
|
|
142
|
+
tokens: TokenCountMetadata
|
|
143
|
+
conversation_history_compacted: bool
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@sentry_sdk.trace
|
|
147
|
+
def limit_input_context_window(
|
|
148
|
+
llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
|
|
149
|
+
) -> ContextWindowLimiterOutput:
|
|
150
|
+
events = []
|
|
151
|
+
metadata = {}
|
|
152
|
+
initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
153
|
+
max_context_size = llm.get_context_window_size()
|
|
154
|
+
maximum_output_token = llm.get_maximum_output_token()
|
|
155
|
+
conversation_history_compacted = False
|
|
156
|
+
if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
|
|
157
|
+
initial_tokens.total_tokens + maximum_output_token
|
|
158
|
+
) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
|
|
159
|
+
compacted_messages = compact_conversation_history(
|
|
160
|
+
original_conversation_history=messages, llm=llm
|
|
161
|
+
)
|
|
162
|
+
compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
|
|
163
|
+
compacted_total_tokens = compacted_tokens.total_tokens
|
|
164
|
+
|
|
165
|
+
if compacted_total_tokens < initial_tokens.total_tokens:
|
|
166
|
+
messages = compacted_messages
|
|
167
|
+
compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
|
|
168
|
+
logging.info(compaction_message)
|
|
169
|
+
conversation_history_compacted = True
|
|
170
|
+
events.append(
|
|
171
|
+
StreamMessage(
|
|
172
|
+
event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
|
|
173
|
+
data={
|
|
174
|
+
"content": compaction_message,
|
|
175
|
+
"messages": compacted_messages,
|
|
176
|
+
"metadata": {
|
|
177
|
+
"initial_tokens": initial_tokens.total_tokens,
|
|
178
|
+
"compacted_tokens": compacted_total_tokens,
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
events.append(
|
|
184
|
+
StreamMessage(
|
|
185
|
+
event=StreamEvents.AI_MESSAGE,
|
|
186
|
+
data={"content": compaction_message},
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
else:
|
|
190
|
+
logging.debug(
|
|
191
|
+
f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
195
|
+
if (tokens.total_tokens + maximum_output_token) > max_context_size:
|
|
196
|
+
# Compaction was not sufficient. Truncating messages.
|
|
197
|
+
truncated_res = truncate_messages_to_fit_context(
|
|
198
|
+
messages=messages,
|
|
199
|
+
max_context_size=max_context_size,
|
|
200
|
+
maximum_output_token=maximum_output_token,
|
|
201
|
+
count_tokens_fn=llm.count_tokens,
|
|
202
|
+
)
|
|
203
|
+
metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
|
|
204
|
+
messages = truncated_res.truncated_messages
|
|
205
|
+
|
|
206
|
+
# recount after truncation
|
|
207
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
208
|
+
else:
|
|
209
|
+
metadata["truncations"] = []
|
|
210
|
+
|
|
211
|
+
return ContextWindowLimiterOutput(
|
|
212
|
+
events=events,
|
|
213
|
+
messages=messages,
|
|
214
|
+
metadata=metadata,
|
|
215
|
+
max_context_size=max_context_size,
|
|
216
|
+
maximum_output_token=maximum_output_token,
|
|
217
|
+
tokens=tokens,
|
|
218
|
+
conversation_history_compacted=conversation_history_compacted,
|
|
219
|
+
)
|