holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,123 @@
1
+ """
2
+ Transformer registry for managing available transformers.
3
+ """
4
+
5
+ from typing import Any, Dict, List, Optional, Type
6
+
7
+ from .base import BaseTransformer, TransformerError
8
+
9
+
10
+ class TransformerRegistry:
11
+ """
12
+ Registry for managing transformer types and creating transformer instances.
13
+
14
+ This registry provides a centralized way to register transformer classes
15
+ and create instances based on configuration.
16
+ """
17
+
18
+ def __init__(self):
19
+ self._transformers: Dict[str, Type[BaseTransformer]] = {}
20
+
21
+ def register(self, transformer_class: Type[BaseTransformer]) -> None:
22
+ """
23
+ Register a transformer class, using the transformer's name property.
24
+
25
+ Args:
26
+ transformer_class: The transformer class to register
27
+
28
+ Raises:
29
+ ValueError: If name is already registered or transformer_class is invalid
30
+ """
31
+ if not issubclass(transformer_class, BaseTransformer):
32
+ raise ValueError(
33
+ f"Transformer class must inherit from BaseTransformer, got {transformer_class}"
34
+ )
35
+
36
+ # Get name from the transformer class
37
+ try:
38
+ temp_instance = transformer_class()
39
+ name = temp_instance.name
40
+ except Exception:
41
+ # Fallback to class name if instantiation fails
42
+ name = transformer_class.__name__
43
+
44
+ if name in self._transformers:
45
+ raise ValueError(f"Transformer '{name}' is already registered")
46
+
47
+ self._transformers[name] = transformer_class
48
+
49
+ def unregister(self, name: str) -> None:
50
+ """
51
+ Unregister a transformer by name.
52
+
53
+ Args:
54
+ name: The name of the transformer to unregister
55
+
56
+ Raises:
57
+ KeyError: If transformer name is not registered
58
+ """
59
+ if name not in self._transformers:
60
+ raise KeyError(f"Transformer '{name}' is not registered")
61
+
62
+ del self._transformers[name]
63
+
64
+ def create_transformer(
65
+ self, name: str, config: Optional[Dict[str, Any]] = None
66
+ ) -> BaseTransformer:
67
+ """
68
+ Create a transformer instance by name.
69
+
70
+ Args:
71
+ name: The name of the transformer to create
72
+ config: Optional configuration for the transformer
73
+
74
+ Returns:
75
+ A new transformer instance
76
+
77
+ Raises:
78
+ KeyError: If transformer name is not registered
79
+ TransformerError: If transformer creation fails
80
+ """
81
+ if name not in self._transformers:
82
+ raise KeyError(f"Transformer '{name}' is not registered")
83
+
84
+ transformer_class = self._transformers[name]
85
+
86
+ try:
87
+ # Handle both old-style dict config and new Pydantic models
88
+ if config is None:
89
+ return transformer_class()
90
+ else:
91
+ # For Pydantic models, pass config as keyword arguments
92
+ return transformer_class(**config)
93
+ except Exception as e:
94
+ raise TransformerError(f"Failed to create transformer '{name}': {e}") from e
95
+
96
+ def is_registered(self, name: str) -> bool:
97
+ """
98
+ Check if a transformer is registered.
99
+
100
+ Args:
101
+ name: The name to check
102
+
103
+ Returns:
104
+ True if the transformer is registered, False otherwise
105
+ """
106
+ return name in self._transformers
107
+
108
+ def list_transformers(self) -> List[str]:
109
+ """
110
+ Get a list of all registered transformer names.
111
+
112
+ Returns:
113
+ List of registered transformer names
114
+ """
115
+ return list(self._transformers.keys())
116
+
117
+ def clear(self) -> None:
118
+ """Clear all registered transformers."""
119
+ self._transformers.clear()
120
+
121
+
122
+ # Global transformer registry instance
123
+ registry = TransformerRegistry()
@@ -0,0 +1,32 @@
1
+ """
2
+ Configuration class for tool transformers.
3
+ """
4
+
5
+ import logging
6
+ from typing import Any, Dict
7
+
8
+ from pydantic import BaseModel, Field, model_validator
9
+
10
+ from .registry import registry
11
+
12
+
13
+ class Transformer(BaseModel):
14
+ """
15
+ Configuration for a tool transformer.
16
+
17
+ Each transformer config specifies a transformer type and its parameters.
18
+ This replaces the previous dict-based configuration with proper type safety.
19
+ """
20
+
21
+ name: str = Field(description="Name of the transformer (e.g., 'llm_summarize')")
22
+ config: Dict[str, Any] = Field(
23
+ default_factory=dict, description="Configuration parameters for the transformer"
24
+ )
25
+
26
+ @model_validator(mode="after")
27
+ def validate_transformer(self):
28
+ """Validate that the transformer name is known to the registry."""
29
+ if not registry.is_registered(self.name):
30
+ # Log warning but don't fail validation - allows for graceful degradation
31
+ logging.warning(f"Transformer '{self.name}' is not registered")
32
+ return self
@@ -0,0 +1,94 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import litellm
5
+ from litellm.types.utils import ModelResponse
6
+
7
+ from holmes.core.llm import LLM
8
+ from holmes.plugins.prompts import load_and_render_prompt
9
+
10
+
11
+ def strip_system_prompt(
12
+ conversation_history: list[dict],
13
+ ) -> tuple[list[dict], Optional[dict]]:
14
+ if not conversation_history:
15
+ return conversation_history, None
16
+ first_message = conversation_history[0]
17
+ if first_message and first_message.get("role") == "system":
18
+ return conversation_history[1:], first_message
19
+ return conversation_history[:], None
20
+
21
+
22
+ def find_last_user_prompt(conversation_history: list[dict]) -> Optional[dict]:
23
+ if not conversation_history:
24
+ return None
25
+ last_user_prompt: Optional[dict] = None
26
+ for message in conversation_history:
27
+ if message.get("role") == "user":
28
+ last_user_prompt = message
29
+ return last_user_prompt
30
+
31
+
32
+ def compact_conversation_history(
33
+ original_conversation_history: list[dict], llm: LLM
34
+ ) -> list[dict]:
35
+ """
36
+ The compacted conversation history contains:
37
+ 1. Original system prompt, uncompacted (if present)
38
+ 2. Last user prompt, uncompacted (if present)
39
+ 3. Compacted conversation history (role=assistant)
40
+ 4. Compaction message (role=system)
41
+ """
42
+ conversation_history, system_prompt_message = strip_system_prompt(
43
+ original_conversation_history
44
+ )
45
+ compaction_instructions = load_and_render_prompt(
46
+ prompt="builtin://conversation_history_compaction.jinja2", context={}
47
+ )
48
+ conversation_history.append({"role": "user", "content": compaction_instructions})
49
+
50
+ # Set modify_params to handle providers like Anthropic that require tools
51
+ # when conversation history contains tool calls
52
+ original_modify_params = litellm.modify_params
53
+ try:
54
+ litellm.modify_params = True # necessary when using anthropic
55
+ response: ModelResponse = llm.completion(
56
+ messages=conversation_history, drop_params=True
57
+ ) # type: ignore
58
+ finally:
59
+ litellm.modify_params = original_modify_params
60
+ response_message = None
61
+ if (
62
+ response
63
+ and response.choices
64
+ and response.choices[0]
65
+ and response.choices[0].message # type:ignore
66
+ ):
67
+ response_message = response.choices[0].message # type:ignore
68
+ else:
69
+ logging.error(
70
+ "Failed to compact conversation history. Unexpected LLM's response for compaction"
71
+ )
72
+ return original_conversation_history
73
+
74
+ compacted_conversation_history: list[dict] = []
75
+ if system_prompt_message:
76
+ compacted_conversation_history.append(system_prompt_message)
77
+
78
+ last_user_prompt = find_last_user_prompt(original_conversation_history)
79
+ if last_user_prompt:
80
+ compacted_conversation_history.append(last_user_prompt)
81
+
82
+ compacted_conversation_history.append(
83
+ response_message.model_dump(
84
+ exclude_defaults=True, exclude_unset=True, exclude_none=True
85
+ )
86
+ )
87
+
88
+ compacted_conversation_history.append(
89
+ {
90
+ "role": "system",
91
+ "content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
92
+ }
93
+ )
94
+ return compacted_conversation_history
@@ -0,0 +1,23 @@
1
+ from holmes.common.env_vars import MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
2
+
3
+
4
+ def truncate_string(data_str: str) -> str:
5
+ if data_str and len(data_str) > MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION:
6
+ return (
7
+ data_str[:MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION]
8
+ + "-- DATA TRUNCATED TO AVOID HITTING CONTEXT WINDOW LIMITS"
9
+ )
10
+ return data_str
11
+
12
+
13
+ def truncate_evidences_entities_if_necessary(evidence_list: list[dict]):
14
+ if (
15
+ not MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
16
+ or MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION <= 0
17
+ ):
18
+ return
19
+
20
+ for evidence in evidence_list:
21
+ data = evidence.get("data")
22
+ if data:
23
+ evidence["data"] = truncate_string(str(data))
@@ -0,0 +1,219 @@
1
+ import logging
2
+ from typing import Any, Optional
3
+
4
+ import sentry_sdk
5
+ from pydantic import BaseModel
6
+
7
+ from holmes.common.env_vars import (
8
+ ENABLE_CONVERSATION_HISTORY_COMPACTION,
9
+ MAX_OUTPUT_TOKEN_RESERVATION,
10
+ )
11
+ from holmes.core.llm import (
12
+ LLM,
13
+ TokenCountMetadata,
14
+ get_context_window_compaction_threshold_pct,
15
+ )
16
+ from holmes.core.models import TruncationMetadata, TruncationResult
17
+ from holmes.core.truncation.compaction import compact_conversation_history
18
+ from holmes.utils import sentry_helper
19
+ from holmes.utils.stream import StreamEvents, StreamMessage
20
+
21
+ TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
22
+
23
+
24
+ def _truncate_tool_message(
25
+ msg: dict, allocated_space: int, needed_space: int
26
+ ) -> TruncationMetadata:
27
+ msg_content = msg["content"]
28
+ tool_call_id = msg["tool_call_id"]
29
+ tool_name = msg["name"]
30
+
31
+ # Ensure the indicator fits in the allocated space
32
+ if allocated_space > len(TRUNCATION_NOTICE):
33
+ original = msg_content if isinstance(msg_content, str) else str(msg_content)
34
+ msg["content"] = (
35
+ original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
36
+ )
37
+ end_index = allocated_space - len(TRUNCATION_NOTICE)
38
+ else:
39
+ msg["content"] = TRUNCATION_NOTICE[:allocated_space]
40
+ end_index = allocated_space
41
+
42
+ msg.pop("token_count", None) # Remove token_count if present
43
+ logging.info(
44
+ f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
45
+ )
46
+ truncation_metadata = TruncationMetadata(
47
+ tool_call_id=tool_call_id,
48
+ start_index=0,
49
+ end_index=end_index,
50
+ tool_name=tool_name,
51
+ original_token_count=needed_space,
52
+ )
53
+ return truncation_metadata
54
+
55
+
56
+ # TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
57
+ # However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
58
+ # We should fix this in the future
59
+ # TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
60
+ # token truncation and not character truncation
61
+ def truncate_messages_to_fit_context(
62
+ messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
63
+ ) -> TruncationResult:
64
+ """
65
+ Helper function to truncate tool messages to fit within context limits.
66
+
67
+ Args:
68
+ messages: List of message dictionaries with roles and content
69
+ max_context_size: Maximum context window size for the model
70
+ maximum_output_token: Maximum tokens reserved for model output
71
+ count_tokens_fn: Function to count tokens for a list of messages
72
+
73
+ Returns:
74
+ Modified list of messages with truncated tool responses
75
+
76
+ Raises:
77
+ Exception: If non-tool messages exceed available context space
78
+ """
79
+ messages_except_tools = [
80
+ message for message in messages if message["role"] != "tool"
81
+ ]
82
+ tokens = count_tokens_fn(messages_except_tools)
83
+ message_size_without_tools = tokens.total_tokens
84
+
85
+ tool_call_messages = [message for message in messages if message["role"] == "tool"]
86
+
87
+ reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
88
+ if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
89
+ logging.error(
90
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
91
+ )
92
+ raise Exception(
93
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
94
+ )
95
+
96
+ if len(tool_call_messages) == 0:
97
+ return TruncationResult(truncated_messages=messages, truncations=[])
98
+
99
+ available_space = (
100
+ max_context_size - message_size_without_tools - reserved_for_output_tokens
101
+ )
102
+ remaining_space = available_space
103
+ tool_call_messages.sort(
104
+ key=lambda x: count_tokens_fn(
105
+ [{"role": "tool", "content": x["content"]}]
106
+ ).total_tokens
107
+ )
108
+
109
+ truncations = []
110
+
111
+ # Allocate space starting with small tools and going to larger tools, while maintaining fairness
112
+ # Small tools can often get exactly what they need, while larger tools may need to be truncated
113
+ # We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
114
+ for i, msg in enumerate(tool_call_messages):
115
+ remaining_tools = len(tool_call_messages) - i
116
+ max_allocation = remaining_space // remaining_tools
117
+ needed_space = count_tokens_fn(
118
+ [{"role": "tool", "content": msg["content"]}]
119
+ ).total_tokens
120
+ allocated_space = min(needed_space, max_allocation)
121
+
122
+ if needed_space > allocated_space:
123
+ truncation_metadata = _truncate_tool_message(
124
+ msg, allocated_space, needed_space
125
+ )
126
+ truncations.append(truncation_metadata)
127
+
128
+ remaining_space -= allocated_space
129
+
130
+ if truncations:
131
+ sentry_helper.capture_tool_truncations(truncations)
132
+
133
+ return TruncationResult(truncated_messages=messages, truncations=truncations)
134
+
135
+
136
+ class ContextWindowLimiterOutput(BaseModel):
137
+ metadata: dict
138
+ messages: list[dict]
139
+ events: list[StreamMessage]
140
+ max_context_size: int
141
+ maximum_output_token: int
142
+ tokens: TokenCountMetadata
143
+ conversation_history_compacted: bool
144
+
145
+
146
+ @sentry_sdk.trace
147
+ def limit_input_context_window(
148
+ llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
149
+ ) -> ContextWindowLimiterOutput:
150
+ events = []
151
+ metadata = {}
152
+ initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
153
+ max_context_size = llm.get_context_window_size()
154
+ maximum_output_token = llm.get_maximum_output_token()
155
+ conversation_history_compacted = False
156
+ if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
157
+ initial_tokens.total_tokens + maximum_output_token
158
+ ) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
159
+ compacted_messages = compact_conversation_history(
160
+ original_conversation_history=messages, llm=llm
161
+ )
162
+ compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
163
+ compacted_total_tokens = compacted_tokens.total_tokens
164
+
165
+ if compacted_total_tokens < initial_tokens.total_tokens:
166
+ messages = compacted_messages
167
+ compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
168
+ logging.info(compaction_message)
169
+ conversation_history_compacted = True
170
+ events.append(
171
+ StreamMessage(
172
+ event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
173
+ data={
174
+ "content": compaction_message,
175
+ "messages": compacted_messages,
176
+ "metadata": {
177
+ "initial_tokens": initial_tokens.total_tokens,
178
+ "compacted_tokens": compacted_total_tokens,
179
+ },
180
+ },
181
+ )
182
+ )
183
+ events.append(
184
+ StreamMessage(
185
+ event=StreamEvents.AI_MESSAGE,
186
+ data={"content": compaction_message},
187
+ )
188
+ )
189
+ else:
190
+ logging.debug(
191
+ f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
192
+ )
193
+
194
+ tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
195
+ if (tokens.total_tokens + maximum_output_token) > max_context_size:
196
+ # Compaction was not sufficient. Truncating messages.
197
+ truncated_res = truncate_messages_to_fit_context(
198
+ messages=messages,
199
+ max_context_size=max_context_size,
200
+ maximum_output_token=maximum_output_token,
201
+ count_tokens_fn=llm.count_tokens,
202
+ )
203
+ metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
204
+ messages = truncated_res.truncated_messages
205
+
206
+ # recount after truncation
207
+ tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
208
+ else:
209
+ metadata["truncations"] = []
210
+
211
+ return ContextWindowLimiterOutput(
212
+ events=events,
213
+ messages=messages,
214
+ metadata=metadata,
215
+ max_context_size=max_context_size,
216
+ maximum_output_token=maximum_output_token,
217
+ tokens=tokens,
218
+ conversation_history_compacted=conversation_history_compacted,
219
+ )