holmesgpt 0.14.1a0__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

holmes/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # This is patched by github actions during release
2
- __version__ = "0.14.1-alpha"
2
+ __version__ = "0.14.2"
3
3
 
4
4
  # Re-export version functions from version module for backward compatibility
5
5
  from .version import (
@@ -1,8 +1,8 @@
1
1
  import logging
2
- from typing import List, Optional
2
+ from typing import List, Optional, Dict, Any
3
3
  import requests # type: ignore
4
4
  from functools import cache
5
- from pydantic import BaseModel, ConfigDict
5
+ from pydantic import BaseModel, ConfigDict, Field
6
6
  from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
7
7
 
8
8
  HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
@@ -17,6 +17,9 @@ class HolmesInfo(BaseModel):
17
17
  class RobustaModelsResponse(BaseModel):
18
18
  model_config = ConfigDict(extra="ignore")
19
19
  models: List[str]
20
+ models_args: Dict[str, Any] = Field(
21
+ default_factory=dict, alias="models_holmes_args"
22
+ )
20
23
  default_model: Optional[str] = None
21
24
 
22
25
 
holmes/common/env_vars.py CHANGED
@@ -73,11 +73,11 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
73
73
  # For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
74
74
  ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
75
75
 
76
- MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 200))
76
+ MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
77
77
 
78
78
  # Limit each tool response to N% of the total context window.
79
79
  # Number between 0 and 100
80
80
  # Setting to either 0 or any number above 100 disables the logic that limits tool response size
81
81
  TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
82
- os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 10)
82
+ os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
83
83
  )
holmes/config.py CHANGED
@@ -131,7 +131,7 @@ class Config(RobustaBaseConfig):
131
131
  def log_useful_info(self):
132
132
  if self.llm_model_registry and self.llm_model_registry.models:
133
133
  logging.info(
134
- f"loaded models: {list(self.llm_model_registry.models.keys())}"
134
+ f"Loaded models: {list(self.llm_model_registry.models.keys())}"
135
135
  )
136
136
  else:
137
137
  logging.warning("No llm models were loaded")
holmes/core/llm.py CHANGED
@@ -3,7 +3,7 @@ import logging
3
3
  from abc import abstractmethod
4
4
  from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING
5
5
 
6
- from litellm.types.utils import ModelResponse
6
+ from litellm.types.utils import ModelResponse, TextCompletionResponse
7
7
  import sentry_sdk
8
8
 
9
9
  from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
@@ -90,9 +90,13 @@ class DefaultLLM(LLM):
90
90
  self.args = args or {}
91
91
  self.tracer = tracer
92
92
  self.name = name
93
-
93
+ self.update_custom_args()
94
94
  self.check_llm(self.model, self.api_key, self.api_base, self.api_version)
95
95
 
96
+ def update_custom_args(self):
97
+ self.max_context_size = self.args.get("custom_args", {}).get("max_context_size")
98
+ self.args.pop("custom_args", None)
99
+
96
100
  def check_llm(
97
101
  self,
98
102
  model: str,
@@ -178,6 +182,9 @@ class DefaultLLM(LLM):
178
182
  return list(dict.fromkeys(names_to_try))
179
183
 
180
184
  def get_context_window_size(self) -> int:
185
+ if self.max_context_size:
186
+ return self.max_context_size
187
+
181
188
  if OVERRIDE_MAX_CONTENT_SIZE:
182
189
  logging.debug(
183
190
  f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}"
@@ -424,7 +431,8 @@ class LLMModelRegistry:
424
431
 
425
432
  for model in robusta_models.models:
426
433
  logging.info(f"Loading Robusta AI model: {model}")
427
- self._llms[model] = self._create_robusta_model_entry(model)
434
+ args = robusta_models.models_args.get(model)
435
+ self._llms[model] = self._create_robusta_model_entry(model, args)
428
436
 
429
437
  if robusta_models.default_model:
430
438
  logging.info(
@@ -492,7 +500,7 @@ class LLMModelRegistry:
492
500
  )
493
501
 
494
502
  model_key, first_model_params = next(iter(self._llms.items()))
495
- logging.info(f"Using first available model: {model_key}")
503
+ logging.debug(f"Using first available model: {model_key}")
496
504
  return first_model_params.copy()
497
505
 
498
506
  def get_llm(self, name: str) -> LLM: # TODO: fix logic
@@ -509,12 +517,15 @@ class LLMModelRegistry:
509
517
 
510
518
  return models
511
519
 
512
- def _create_robusta_model_entry(self, model_name: str) -> dict[str, Any]:
520
+ def _create_robusta_model_entry(
521
+ self, model_name: str, args: Optional[dict[str, Any]] = None
522
+ ) -> dict[str, Any]:
513
523
  return self._create_model_entry(
514
524
  model="gpt-4o", # Robusta AI model is using openai like API.
515
525
  model_name=model_name,
516
526
  base_url=f"{ROBUSTA_API_ENDPOINT}/llm/{model_name}",
517
527
  is_robusta_model=True,
528
+ args=args or {},
518
529
  )
519
530
 
520
531
  def _create_model_entry(
@@ -523,10 +534,37 @@ class LLMModelRegistry:
523
534
  model_name: str,
524
535
  base_url: Optional[str] = None,
525
536
  is_robusta_model: Optional[bool] = None,
537
+ args: Optional[dict[str, Any]] = None,
526
538
  ) -> dict[str, Any]:
527
- return {
539
+ entry = {
528
540
  "name": model_name,
529
541
  "base_url": base_url,
530
542
  "is_robusta_model": is_robusta_model,
531
543
  "model": model,
532
544
  }
545
+ if args:
546
+ entry["custom_args"] = args # type: ignore[assignment]
547
+
548
+ return entry
549
+
550
+
551
+ def get_llm_usage(
552
+ llm_response: Union[ModelResponse, CustomStreamWrapper, TextCompletionResponse],
553
+ ) -> dict:
554
+ usage: dict = {}
555
+ if (
556
+ (
557
+ isinstance(llm_response, ModelResponse)
558
+ or isinstance(llm_response, TextCompletionResponse)
559
+ )
560
+ and hasattr(llm_response, "usage")
561
+ and llm_response.usage
562
+ ): # type: ignore
563
+ usage["prompt_tokens"] = llm_response.usage.prompt_tokens # type: ignore
564
+ usage["completion_tokens"] = llm_response.usage.completion_tokens # type: ignore
565
+ usage["total_tokens"] = llm_response.usage.total_tokens # type: ignore
566
+ elif isinstance(llm_response, CustomStreamWrapper):
567
+ complete_response = litellm.stream_chunk_builder(chunks=llm_response) # type: ignore
568
+ if complete_response:
569
+ return get_llm_usage(complete_response)
570
+ return usage
@@ -27,7 +27,7 @@ from holmes.core.investigation_structured_output import (
27
27
  is_response_an_incorrect_tool_call,
28
28
  )
29
29
  from holmes.core.issue import Issue
30
- from holmes.core.llm import LLM
30
+ from holmes.core.llm import LLM, get_llm_usage
31
31
  from holmes.core.performance_timing import PerformanceTiming
32
32
  from holmes.core.resource_instruction import ResourceInstructions
33
33
  from holmes.core.runbooks import RunbookManager
@@ -422,7 +422,11 @@ class ToolCallingLLM:
422
422
  )
423
423
  costs.total_cost += post_processing_cost
424
424
 
425
+ self.llm.count_tokens_for_message(messages)
425
426
  perf_timing.end(f"- completed in {i} iterations -")
427
+ metadata["usage"] = get_llm_usage(full_response)
428
+ metadata["max_tokens"] = max_context_size
429
+ metadata["max_output_tokens"] = maximum_output_token
426
430
  return LLMResult(
427
431
  result=post_processed_response,
428
432
  unprocessed_result=raw_response,
@@ -863,6 +867,10 @@ class ToolCallingLLM:
863
867
 
864
868
  tools_to_call = getattr(response_message, "tool_calls", None)
865
869
  if not tools_to_call:
870
+ self.llm.count_tokens_for_message(messages)
871
+ metadata["usage"] = get_llm_usage(full_response)
872
+ metadata["max_tokens"] = max_context_size
873
+ metadata["max_output_tokens"] = maximum_output_token
866
874
  yield StreamMessage(
867
875
  event=StreamEvents.ANSWER_END,
868
876
  data={
@@ -464,12 +464,12 @@ class ToolsetManager:
464
464
 
465
465
  logger = logging.getLogger(__name__)
466
466
 
467
- logger.info(
467
+ logger.debug(
468
468
  f"Starting fast_model injection. global_fast_model={self.global_fast_model}"
469
469
  )
470
470
 
471
471
  if not self.global_fast_model:
472
- logger.info("No global_fast_model configured, skipping injection")
472
+ logger.debug("No global_fast_model configured, skipping injection")
473
473
  return
474
474
 
475
475
  injected_count = 0
@@ -11,6 +11,7 @@
11
11
  * IMPORTANT: ALWAYS inform the user about what logs you fetched. For example: "Here are pod logs for ..."
12
12
  * IMPORTANT: If logs commands have limits mention them. For example: "Showing last 100 lines of logs:"
13
13
  * IMPORTANT: If a filter was used, mention the filter. For example: "Logs filtered for 'error':"
14
+ * IMPORTANT: If a date range was used (even if just the default one and you didn't specify the parameter, mention the date range. For example: "Logs from last 1 hour..."
14
15
 
15
16
  {% if loki_ts and loki_ts.status == "enabled" -%}
16
17
  * For any logs, including for investigating kubernetes problems, use Loki
@@ -34,7 +35,15 @@ Tools to search and fetch logs from Coralogix.
34
35
  ### datadog/logs
35
36
  #### Datadog Logs Toolset
36
37
  Tools to search and fetch logs from Datadog.
37
- {% include '_default_log_prompt.jinja2' %}
38
+ * Use the tool `fetch_pod_logs` to access an application's logs.
39
+ * Do fetch application logs yourself and DO not ask users to do so
40
+ * If you have an alert/monitor try to figure out the time it fired
41
+ ** Then, use `start_time=-300` (5 minutes before `end_time`) and `end_time=<time monitor started firing>` when calling `fetch_pod_logs`.
42
+ ** If there are too many logs, or not enough, narrow or widen the timestamps
43
+ * If the user did not explicitly ask about a given timeframe, ignore the `start_time` and `end_time` so it will use the default.
44
+ * IMPORTANT: ALWAYS inform the user about the actual time period fetched (e.g., "Looking at logs from the last <X> days")
45
+ * IMPORTANT: If a limit was applied, ALWAYS tell the user how many logs were shown vs total (e.g., "Showing latest <Y> of <Z> logs")
46
+ * IMPORTANT: If any filters were applied, ALWAYS mention them explicitly
38
47
  {%- elif k8s_yaml_ts and k8s_yaml_ts.status == "enabled" -%}
39
48
  ### kubernetes/logs
40
49
  #### Kubernetes Logs Toolset