holmesgpt 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (125) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +19 -1
  3. holmes/common/env_vars.py +17 -0
  4. holmes/config.py +69 -9
  5. holmes/core/conversations.py +11 -0
  6. holmes/core/investigation.py +16 -3
  7. holmes/core/investigation_structured_output.py +12 -0
  8. holmes/core/llm.py +13 -1
  9. holmes/core/models.py +9 -1
  10. holmes/core/openai_formatting.py +72 -12
  11. holmes/core/prompt.py +13 -0
  12. holmes/core/supabase_dal.py +3 -0
  13. holmes/core/todo_manager.py +88 -0
  14. holmes/core/tool_calling_llm.py +230 -157
  15. holmes/core/tools.py +10 -1
  16. holmes/core/tools_utils/tool_executor.py +7 -2
  17. holmes/core/tools_utils/toolset_utils.py +7 -2
  18. holmes/core/toolset_manager.py +1 -5
  19. holmes/core/tracing.py +4 -3
  20. holmes/interactive.py +1 -0
  21. holmes/main.py +9 -2
  22. holmes/plugins/prompts/__init__.py +7 -1
  23. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  24. holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
  25. holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
  26. holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
  27. holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
  28. holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
  29. holmes/plugins/prompts/generic_ask.jinja2 +4 -3
  30. holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
  31. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
  32. holmes/plugins/runbooks/CLAUDE.md +85 -0
  33. holmes/plugins/runbooks/README.md +24 -0
  34. holmes/plugins/toolsets/__init__.py +19 -6
  35. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
  36. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
  37. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
  38. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  39. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
  40. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
  41. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  42. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
  43. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
  44. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
  45. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
  46. holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
  47. holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
  48. holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
  49. holmes/plugins/toolsets/bash/aws/constants.py +529 -0
  50. holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
  51. holmes/plugins/toolsets/bash/azure/constants.py +339 -0
  52. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
  53. holmes/plugins/toolsets/bash/bash_toolset.py +47 -13
  54. holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
  55. holmes/plugins/toolsets/bash/common/stringify.py +14 -1
  56. holmes/plugins/toolsets/bash/common/validators.py +91 -0
  57. holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
  58. holmes/plugins/toolsets/bash/docker/constants.py +255 -0
  59. holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
  60. holmes/plugins/toolsets/bash/helm/constants.py +92 -0
  61. holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
  62. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
  63. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
  64. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
  65. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
  66. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
  67. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
  68. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
  69. holmes/plugins/toolsets/bash/parse_command.py +106 -32
  70. holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
  71. holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
  72. holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
  73. holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
  74. holmes/plugins/toolsets/bash/utilities/head.py +12 -0
  75. holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
  76. holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
  77. holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
  78. holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
  79. holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
  80. holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
  81. holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
  82. holmes/plugins/toolsets/coralogix/api.py +6 -6
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
  84. holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
  85. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
  86. holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
  87. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
  88. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
  89. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
  90. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
  91. holmes/plugins/toolsets/git.py +15 -15
  92. holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
  93. holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
  94. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
  95. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
  96. holmes/plugins/toolsets/internet/internet.py +2 -1
  97. holmes/plugins/toolsets/internet/notion.py +2 -1
  98. holmes/plugins/toolsets/investigator/__init__.py +0 -0
  99. holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
  100. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
  101. holmes/plugins/toolsets/investigator/model.py +15 -0
  102. holmes/plugins/toolsets/kafka.py +14 -7
  103. holmes/plugins/toolsets/kubernetes_logs.py +454 -25
  104. holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
  105. holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
  106. holmes/plugins/toolsets/newrelic.py +8 -3
  107. holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
  108. holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
  109. holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
  110. holmes/plugins/toolsets/prometheus/prometheus.py +179 -44
  111. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
  112. holmes/plugins/toolsets/robusta/robusta.py +4 -4
  113. holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
  114. holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
  115. holmes/plugins/toolsets/utils.py +8 -1
  116. holmes/utils/console/logging.py +6 -1
  117. holmes/utils/llms.py +20 -0
  118. holmes/utils/stream.py +90 -0
  119. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/METADATA +47 -34
  120. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/RECORD +123 -91
  121. holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
  122. holmes/utils/robusta.py +0 -9
  123. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/LICENSE.txt +0 -0
  124. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/WHEEL +0 -0
  125. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/entry_points.txt +0 -0
@@ -2,32 +2,25 @@ import concurrent.futures
2
2
  import json
3
3
  import logging
4
4
  import textwrap
5
+ import uuid
5
6
  from typing import Dict, List, Optional, Type, Union
6
7
 
7
- import requests # type: ignore
8
8
  import sentry_sdk
9
- from litellm.types.utils import Message
10
9
  from openai import BadRequestError
11
10
  from openai.types.chat.chat_completion_message_tool_call import (
12
11
  ChatCompletionMessageToolCall,
13
12
  )
14
- from pydantic import BaseModel
15
- from pydantic_core import from_json
13
+ from pydantic import BaseModel, Field
16
14
  from rich.console import Console
17
15
 
18
- from holmes.common.env_vars import (
19
- ROBUSTA_API_ENDPOINT,
20
- STREAM_CHUNKS_PER_PARSE,
21
- TEMPERATURE,
22
- )
16
+ from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
17
+
23
18
  from holmes.core.investigation_structured_output import (
24
19
  DEFAULT_SECTIONS,
25
20
  REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
26
21
  InputSectionsDataType,
27
22
  get_output_format_for_investigation,
28
23
  is_response_an_incorrect_tool_call,
29
- parse_markdown_into_sections_from_hash_sign,
30
- process_response_into_sections,
31
24
  )
32
25
  from holmes.core.issue import Issue
33
26
  from holmes.core.llm import LLM
@@ -45,6 +38,82 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
45
38
  from holmes.core.tools_utils.tool_executor import ToolExecutor
46
39
  from holmes.core.tracing import DummySpan
47
40
  from holmes.utils.colors import AI_COLOR
41
+ from holmes.utils.stream import StreamEvents, StreamMessage
42
+ from holmes.core.todo_manager import (
43
+ get_todo_manager,
44
+ )
45
+
46
+ # Create a named logger for cost tracking
47
+ cost_logger = logging.getLogger("holmes.costs")
48
+
49
+
50
+ class LLMCosts(BaseModel):
51
+ """Tracks cost and token usage for LLM calls."""
52
+
53
+ total_cost: float = 0.0
54
+ total_tokens: int = 0
55
+ prompt_tokens: int = 0
56
+ completion_tokens: int = 0
57
+
58
+
59
+ def _extract_cost_from_response(full_response) -> float:
60
+ """Extract cost value from LLM response.
61
+
62
+ Args:
63
+ full_response: The raw LLM response object
64
+
65
+ Returns:
66
+ The cost as a float, or 0.0 if not available
67
+ """
68
+ try:
69
+ cost_value = (
70
+ full_response._hidden_params.get("response_cost", 0)
71
+ if hasattr(full_response, "_hidden_params")
72
+ else 0
73
+ )
74
+ # Ensure cost is a float
75
+ return float(cost_value) if cost_value is not None else 0.0
76
+ except Exception:
77
+ return 0.0
78
+
79
+
80
+ def _process_cost_info(
81
+ full_response, costs: Optional[LLMCosts] = None, log_prefix: str = "LLM call"
82
+ ) -> None:
83
+ """Process cost and token information from LLM response.
84
+
85
+ Logs the cost information and optionally accumulates it into a costs object.
86
+
87
+ Args:
88
+ full_response: The raw LLM response object
89
+ costs: Optional LLMCosts object to accumulate costs into
90
+ log_prefix: Prefix for logging messages (e.g., "LLM call", "Post-processing")
91
+ """
92
+ try:
93
+ cost = _extract_cost_from_response(full_response)
94
+ usage = getattr(full_response, "usage", {})
95
+
96
+ if usage:
97
+ prompt_toks = usage.get("prompt_tokens", 0)
98
+ completion_toks = usage.get("completion_tokens", 0)
99
+ total_toks = usage.get("total_tokens", 0)
100
+ cost_logger.debug(
101
+ f"{log_prefix} cost: ${cost:.6f} | Tokens: {prompt_toks} prompt + {completion_toks} completion = {total_toks} total"
102
+ )
103
+ # Accumulate costs and tokens if costs object provided
104
+ if costs:
105
+ costs.total_cost += cost
106
+ costs.prompt_tokens += prompt_toks
107
+ costs.completion_tokens += completion_toks
108
+ costs.total_tokens += total_toks
109
+ elif cost > 0:
110
+ cost_logger.debug(
111
+ f"{log_prefix} cost: ${cost:.6f} | Token usage not available"
112
+ )
113
+ if costs:
114
+ costs.total_cost += cost
115
+ except Exception as e:
116
+ logging.debug(f"Could not extract cost information: {e}")
48
117
 
49
118
 
50
119
  def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -94,12 +163,13 @@ def truncate_messages_to_fit_context(
94
163
 
95
164
  tool_call_messages = [message for message in messages if message["role"] == "tool"]
96
165
 
97
- if message_size_without_tools >= (max_context_size - maximum_output_token):
166
+ reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
167
+ if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
98
168
  logging.error(
99
169
  f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
100
170
  )
101
171
  raise Exception(
102
- f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - maximum_output_token} tokens available for input."
172
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
103
173
  )
104
174
 
105
175
  if len(tool_call_messages) == 0:
@@ -188,11 +258,11 @@ class ToolCallResult(BaseModel):
188
258
  }
189
259
 
190
260
 
191
- class LLMResult(BaseModel):
261
+ class LLMResult(LLMCosts):
192
262
  tool_calls: Optional[List[ToolCallResult]] = None
193
263
  result: Optional[str] = None
194
264
  unprocessed_result: Optional[str] = None
195
- instructions: List[str] = []
265
+ instructions: List[str] = Field(default_factory=list)
196
266
  # TODO: clean up these two
197
267
  prompt: Optional[str] = None
198
268
  messages: Optional[List[dict]] = None
@@ -213,6 +283,7 @@ class ToolCallingLLM:
213
283
  self.max_steps = max_steps
214
284
  self.tracer = tracer
215
285
  self.llm = llm
286
+ self.investigation_id = str(uuid.uuid4())
216
287
 
217
288
  def prompt_call(
218
289
  self,
@@ -221,6 +292,7 @@ class ToolCallingLLM:
221
292
  post_process_prompt: Optional[str] = None,
222
293
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
223
294
  sections: Optional[InputSectionsDataType] = None,
295
+ trace_span=DummySpan(),
224
296
  ) -> LLMResult:
225
297
  messages = [
226
298
  {"role": "system", "content": system_prompt},
@@ -232,6 +304,7 @@ class ToolCallingLLM:
232
304
  response_format,
233
305
  user_prompt=user_prompt,
234
306
  sections=sections,
307
+ trace_span=trace_span,
235
308
  )
236
309
 
237
310
  def messages_call(
@@ -258,7 +331,11 @@ class ToolCallingLLM:
258
331
  ) -> LLMResult:
259
332
  perf_timing = PerformanceTiming("tool_calling_llm.call")
260
333
  tool_calls = [] # type: ignore
261
- tools = self.tool_executor.get_all_tools_openai_format()
334
+ costs = LLMCosts()
335
+
336
+ tools = self.tool_executor.get_all_tools_openai_format(
337
+ target_model=self.llm.model
338
+ )
262
339
  perf_timing.measure("get_all_tools_openai_format")
263
340
  max_steps = self.max_steps
264
341
  i = 0
@@ -296,6 +373,9 @@ class ToolCallingLLM:
296
373
  )
297
374
  logging.debug(f"got response {full_response.to_json()}") # type: ignore
298
375
 
376
+ # Extract and accumulate cost information
377
+ _process_cost_info(full_response, costs, "LLM call")
378
+
299
379
  perf_timing.measure("llm.completion")
300
380
  # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
301
381
  except BadRequestError as e:
@@ -349,11 +429,14 @@ class ToolCallingLLM:
349
429
  if post_process_prompt and user_prompt:
350
430
  logging.info("Running post processing on investigation.")
351
431
  raw_response = text_response
352
- post_processed_response = self._post_processing_call(
353
- prompt=user_prompt,
354
- investigation=raw_response,
355
- user_prompt=post_process_prompt,
432
+ post_processed_response, post_processing_cost = (
433
+ self._post_processing_call(
434
+ prompt=user_prompt,
435
+ investigation=raw_response,
436
+ user_prompt=post_process_prompt,
437
+ )
356
438
  )
439
+ costs.total_cost += post_processing_cost
357
440
 
358
441
  perf_timing.end(f"- completed in {i} iterations -")
359
442
  return LLMResult(
@@ -362,6 +445,7 @@ class ToolCallingLLM:
362
445
  tool_calls=tool_calls,
363
446
  prompt=json.dumps(messages, indent=2),
364
447
  messages=messages,
448
+ **costs.model_dump(), # Include all cost fields
365
449
  )
366
450
 
367
451
  perf_timing.end(f"- completed in {i} iterations -")
@@ -370,6 +454,7 @@ class ToolCallingLLM:
370
454
  tool_calls=tool_calls,
371
455
  prompt=json.dumps(messages, indent=2),
372
456
  messages=messages,
457
+ **costs.model_dump(), # Include all cost fields
373
458
  )
374
459
 
375
460
  if text_response and text_response.strip():
@@ -400,6 +485,9 @@ class ToolCallingLLM:
400
485
 
401
486
  perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
402
487
 
488
+ # Update the tool number offset for the next iteration
489
+ tool_number_offset += len(tools_to_call)
490
+
403
491
  # Add a blank line after all tools in this batch complete
404
492
  if tools_to_call:
405
493
  logging.info("")
@@ -413,20 +501,41 @@ class ToolCallingLLM:
413
501
  trace_span=DummySpan(),
414
502
  tool_number=None,
415
503
  ) -> ToolCallResult:
416
- tool_name = tool_to_call.function.name
504
+ # Handle the union type - ChatCompletionMessageToolCall can be either
505
+ # ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
506
+ # or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
507
+ # We use hasattr to check for the 'function' attribute as it's more flexible
508
+ # and doesn't require importing the specific type.
509
+ if hasattr(tool_to_call, "function"):
510
+ tool_name = tool_to_call.function.name
511
+ tool_arguments = tool_to_call.function.arguments
512
+ else:
513
+ # This is a custom tool call - we don't support these currently
514
+ logging.error(f"Unsupported custom tool call: {tool_to_call}")
515
+ return ToolCallResult(
516
+ tool_call_id=tool_to_call.id,
517
+ tool_name="unknown",
518
+ description="NA",
519
+ result=StructuredToolResult(
520
+ status=ToolResultStatus.ERROR,
521
+ error="Custom tool calls are not supported",
522
+ params=None,
523
+ ),
524
+ )
525
+
417
526
  tool_params = None
418
527
  try:
419
- tool_params = json.loads(tool_to_call.function.arguments)
528
+ tool_params = json.loads(tool_arguments)
420
529
  except Exception:
421
530
  logging.warning(
422
- f"Failed to parse arguments for tool: {tool_name}. args: {tool_to_call.function.arguments}"
531
+ f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
423
532
  )
424
533
  tool_call_id = tool_to_call.id
425
534
  tool = self.tool_executor.get_tool_by_name(tool_name)
426
535
 
427
536
  if (not tool) or (tool_params is None):
428
537
  logging.warning(
429
- f"Skipping tool execution for {tool_name}: args: {tool_to_call.function.arguments}"
538
+ f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
430
539
  )
431
540
  return ToolCallResult(
432
541
  tool_call_id=tool_call_id,
@@ -516,7 +625,7 @@ class ToolCallingLLM:
516
625
  investigation,
517
626
  user_prompt: Optional[str] = None,
518
627
  system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
519
- ) -> Optional[str]:
628
+ ) -> tuple[Optional[str], float]:
520
629
  try:
521
630
  user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
522
631
  prompt, investigation, user_prompt
@@ -535,10 +644,18 @@ class ToolCallingLLM:
535
644
  ]
536
645
  full_response = self.llm.completion(messages=messages, temperature=0)
537
646
  logging.debug(f"Post processing response {full_response}")
538
- return full_response.choices[0].message.content # type: ignore
647
+
648
+ # Extract and log cost information for post-processing
649
+ post_processing_cost = _extract_cost_from_response(full_response)
650
+ if post_processing_cost > 0:
651
+ cost_logger.debug(
652
+ f"Post-processing LLM cost: ${post_processing_cost:.6f}"
653
+ )
654
+
655
+ return full_response.choices[0].message.content, post_processing_cost # type: ignore
539
656
  except Exception:
540
657
  logging.exception("Failed to run post processing", exc_info=True)
541
- return investigation
658
+ return investigation, 0.0
542
659
 
543
660
  @sentry_sdk.trace
544
661
  def truncate_messages_to_fit_context(
@@ -553,61 +670,40 @@ class ToolCallingLLM:
553
670
 
554
671
  def call_stream(
555
672
  self,
556
- system_prompt: str,
673
+ system_prompt: str = "",
557
674
  user_prompt: Optional[str] = None,
558
- stream: bool = False,
559
675
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
560
676
  sections: Optional[InputSectionsDataType] = None,
561
- runbooks: Optional[List[str]] = None,
677
+ msgs: Optional[list[dict]] = None,
562
678
  ):
563
- def stream_analysis(it, peek_chunk):
564
- buffer = peek_chunk.get("data", "")
565
- yield create_sse_message(peek_chunk.get("event"), peek_chunk.get("data"))
566
- chunk_counter = 0
567
-
568
- for chunk in it:
569
- buffer += chunk
570
- chunk_counter += 1
571
- if chunk_counter == STREAM_CHUNKS_PER_PARSE:
572
- chunk_counter = 0
573
- yield create_sse_message(
574
- "ai_answer",
575
- {
576
- "sections": parse_markdown_into_sections_from_hash_sign(
577
- buffer
578
- )
579
- or {},
580
- "analysis": buffer,
581
- "instructions": runbooks or [],
582
- },
583
- )
584
-
585
- yield create_sse_message(
586
- "ai_answer_end",
587
- {
588
- "sections": parse_markdown_into_sections_from_hash_sign(buffer)
589
- or {},
590
- "analysis": buffer,
591
- "instructions": runbooks or [],
592
- },
593
- )
594
-
595
- messages = [
596
- {"role": "system", "content": system_prompt},
597
- {"role": "user", "content": user_prompt},
598
- ]
679
+ """
680
+ This function DOES NOT call llm.completion(stream=true).
681
+ This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
682
+ """
683
+ messages = []
684
+ if system_prompt:
685
+ messages.append({"role": "system", "content": system_prompt})
686
+ if user_prompt:
687
+ messages.append({"role": "user", "content": user_prompt})
688
+ if msgs:
689
+ messages.extend(msgs)
599
690
  perf_timing = PerformanceTiming("tool_calling_llm.call")
600
- tools = self.tool_executor.get_all_tools_openai_format()
691
+ tool_calls: list[dict] = []
692
+ tools = self.tool_executor.get_all_tools_openai_format(
693
+ target_model=self.llm.model
694
+ )
601
695
  perf_timing.measure("get_all_tools_openai_format")
696
+ max_steps = self.max_steps
602
697
  i = 0
603
- tool_calls: list[dict] = []
604
- while i < self.max_steps:
698
+ tool_number_offset = 0
699
+
700
+ while i < max_steps:
605
701
  i += 1
606
702
  perf_timing.measure(f"start iteration {i}")
607
703
  logging.debug(f"running iteration {i}")
608
704
 
609
- tools = [] if i == self.max_steps - 1 else tools
610
- tool_choice = None if tools == [] else "auto"
705
+ tools = None if i == max_steps else tools
706
+ tool_choice = "auto" if tools else None
611
707
 
612
708
  total_tokens = self.llm.count_tokens_for_message(messages) # type: ignore
613
709
  max_context_size = self.llm.get_context_window_size()
@@ -623,90 +719,47 @@ class ToolCallingLLM:
623
719
 
624
720
  logging.debug(f"sending messages={messages}\n\ntools={tools}")
625
721
  try:
626
- if stream:
627
- response = requests.post(
628
- f"{ROBUSTA_API_ENDPOINT}/chat/completions",
629
- json={
630
- "messages": parse_messages_tags(messages), # type: ignore
631
- "tools": tools,
632
- "tool_choice": tool_choice,
633
- "temperature": TEMPERATURE,
634
- "response_format": response_format,
635
- "stream": True,
636
- "drop_param": True,
637
- },
638
- headers={"Authorization": f"Bearer {self.llm.api_key}"}, # type: ignore
639
- stream=True,
640
- )
641
- response.raise_for_status()
642
- it = response.iter_content(chunk_size=None, decode_unicode=True)
643
- peek_chunk = from_json(next(it))
644
- tools = peek_chunk.get("tool_calls")
645
-
646
- if not tools:
647
- yield from stream_analysis(it, peek_chunk)
648
- perf_timing.measure("llm.completion")
649
- return
650
-
651
- response_message = Message(**peek_chunk)
652
- tools_to_call = response_message.tool_calls
653
- else:
654
- full_response = self.llm.completion(
655
- messages=parse_messages_tags(messages), # type: ignore
656
- tools=tools,
657
- tool_choice=tool_choice,
658
- temperature=TEMPERATURE,
659
- response_format=response_format,
660
- stream=False,
661
- drop_params=True,
662
- )
663
- perf_timing.measure("llm.completion")
664
-
665
- response_message = full_response.choices[0].message # type: ignore
666
- if response_message and response_format:
667
- # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
668
- dict_response = json.loads(full_response.to_json()) # type: ignore
669
- incorrect_tool_call = is_response_an_incorrect_tool_call(
670
- sections, dict_response.get("choices", [{}])[0]
671
- )
722
+ full_response = self.llm.completion(
723
+ messages=parse_messages_tags(messages), # type: ignore
724
+ tools=tools,
725
+ tool_choice=tool_choice,
726
+ response_format=response_format,
727
+ temperature=TEMPERATURE,
728
+ stream=False,
729
+ drop_params=True,
730
+ )
672
731
 
673
- if incorrect_tool_call:
674
- logging.warning(
675
- "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
676
- )
677
- # disable structured output going forward and and retry
678
- response_format = None
679
- i -= 1
680
- continue
681
-
682
- tools_to_call = getattr(response_message, "tool_calls", None)
683
- if not tools_to_call:
684
- (text_response, sections) = process_response_into_sections( # type: ignore
685
- response_message.content
686
- )
732
+ # Log cost information for this iteration (no accumulation in streaming)
733
+ _process_cost_info(full_response, log_prefix="LLM iteration")
687
734
 
688
- yield create_sse_message(
689
- "ai_answer_end",
690
- {
691
- "sections": sections or {},
692
- "analysis": text_response,
693
- "instructions": runbooks or [],
694
- },
695
- )
696
- return
735
+ perf_timing.measure("llm.completion")
697
736
  # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
698
737
  except BadRequestError as e:
699
- logging.exception("Bad completion request")
700
738
  if "Unrecognized request arguments supplied: tool_choice, tools" in str(
701
739
  e
702
740
  ):
703
741
  raise Exception(
704
742
  "The Azure model you chose is not supported. Model version 1106 and higher required."
743
+ ) from e
744
+ else:
745
+ raise
746
+
747
+ response_message = full_response.choices[0].message # type: ignore
748
+ if response_message and response_format:
749
+ # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
750
+ dict_response = json.loads(full_response.to_json()) # type: ignore
751
+ incorrect_tool_call = is_response_an_incorrect_tool_call(
752
+ sections, dict_response.get("choices", [{}])[0]
753
+ )
754
+
755
+ if incorrect_tool_call:
756
+ logging.warning(
757
+ "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
705
758
  )
706
- raise e
707
- except Exception:
708
- logging.exception("Completion request exception")
709
- raise
759
+ # disable structured output going forward and and retry
760
+ response_format = None
761
+ max_steps = max_steps + 1
762
+ continue
710
763
 
711
764
  messages.append(
712
765
  response_message.model_dump(
@@ -714,6 +767,22 @@ class ToolCallingLLM:
714
767
  )
715
768
  )
716
769
 
770
+ tools_to_call = getattr(response_message, "tool_calls", None)
771
+ if not tools_to_call:
772
+ yield StreamMessage(
773
+ event=StreamEvents.ANSWER_END,
774
+ data={"content": response_message.content, "messages": messages},
775
+ )
776
+ return
777
+
778
+ reasoning = getattr(response_message, "reasoning_content", None)
779
+ message = response_message.content
780
+ if reasoning or message:
781
+ yield StreamMessage(
782
+ event=StreamEvents.AI_MESSAGE,
783
+ data={"content": message, "reasoning": reasoning},
784
+ )
785
+
717
786
  perf_timing.measure("pre-tool-calls")
718
787
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
719
788
  futures = []
@@ -724,11 +793,12 @@ class ToolCallingLLM:
724
793
  tool_to_call=t, # type: ignore
725
794
  previous_tool_calls=tool_calls,
726
795
  trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
727
- tool_number=tool_index,
796
+ tool_number=tool_number_offset + tool_index,
728
797
  )
729
798
  )
730
- yield create_sse_message(
731
- "start_tool_calling", {"tool_name": t.function.name, "id": t.id}
799
+ yield StreamMessage(
800
+ event=StreamEvents.START_TOOL,
801
+ data={"tool_name": t.function.name, "id": t.id},
732
802
  )
733
803
 
734
804
  for future in concurrent.futures.as_completed(futures):
@@ -739,13 +809,13 @@ class ToolCallingLLM:
739
809
 
740
810
  perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
741
811
 
742
- streaming_result_dict = (
743
- tool_call_result.as_streaming_tool_result_response()
812
+ yield StreamMessage(
813
+ event=StreamEvents.TOOL_RESULT,
814
+ data=tool_call_result.as_streaming_tool_result_response(),
744
815
  )
745
816
 
746
- yield create_sse_message(
747
- "tool_calling_result", streaming_result_dict
748
- )
817
+ # Update the tool number offset for the next iteration
818
+ tool_number_offset += len(tools_to_call)
749
819
 
750
820
  raise Exception(
751
821
  f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
@@ -782,6 +852,7 @@ class IssueInvestigator(ToolCallingLLM):
782
852
  global_instructions: Optional[Instructions] = None,
783
853
  post_processing_prompt: Optional[str] = None,
784
854
  sections: Optional[InputSectionsDataType] = None,
855
+ trace_span=DummySpan(),
785
856
  ) -> LLMResult:
786
857
  runbooks = self.runbook_manager.get_instructions_for_issue(issue)
787
858
 
@@ -823,6 +894,9 @@ class IssueInvestigator(ToolCallingLLM):
823
894
  "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
824
895
  )
825
896
 
897
+ todo_manager = get_todo_manager()
898
+ todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
899
+
826
900
  system_prompt = load_and_render_prompt(
827
901
  prompt,
828
902
  {
@@ -831,6 +905,8 @@ class IssueInvestigator(ToolCallingLLM):
831
905
  "structured_output": request_structured_output_from_llm,
832
906
  "toolsets": self.tool_executor.toolsets,
833
907
  "cluster_name": self.cluster_name,
908
+ "todo_list": todo_context,
909
+ "investigation_id": self.investigation_id,
834
910
  },
835
911
  )
836
912
 
@@ -865,10 +941,7 @@ class IssueInvestigator(ToolCallingLLM):
865
941
  post_processing_prompt,
866
942
  response_format=response_format,
867
943
  sections=sections,
944
+ trace_span=trace_span,
868
945
  )
869
946
  res.instructions = runbooks
870
947
  return res
871
-
872
-
873
- def create_sse_message(event_type: str, data: dict = {}):
874
- return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
holmes/core/tools.py CHANGED
@@ -51,6 +51,7 @@ class StructuredToolResult(BaseModel):
51
51
  url: Optional[str] = None
52
52
  invocation: Optional[str] = None
53
53
  params: Optional[Dict] = None
54
+ icon_url: Optional[str] = None
54
55
 
55
56
  def get_stringified_data(self) -> str:
56
57
  if self.data is None:
@@ -121,6 +122,8 @@ class ToolParameter(BaseModel):
121
122
  description: Optional[str] = None
122
123
  type: str = "string"
123
124
  required: bool = True
125
+ properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
126
+ items: Optional["ToolParameter"] = None # For array item schemas
124
127
 
125
128
 
126
129
  class Tool(ABC, BaseModel):
@@ -131,12 +134,17 @@ class Tool(ABC, BaseModel):
131
134
  None # templated string to show to the user describing this tool invocation (not seen by llm)
132
135
  )
133
136
  additional_instructions: Optional[str] = None
137
+ icon_url: Optional[str] = Field(
138
+ default=None,
139
+ description="The URL of the icon for the tool, if None will get toolset icon",
140
+ )
134
141
 
135
- def get_openai_format(self):
142
+ def get_openai_format(self, target_model: str):
136
143
  return format_tool_to_open_ai_standard(
137
144
  tool_name=self.name,
138
145
  tool_description=self.description,
139
146
  tool_parameters=self.parameters,
147
+ target_model=target_model,
140
148
  )
141
149
 
142
150
  def invoke(
@@ -148,6 +156,7 @@ class Tool(ABC, BaseModel):
148
156
  )
149
157
  start_time = time.time()
150
158
  result = self._invoke(params)
159
+ result.icon_url = self.icon_url
151
160
  elapsed = time.time() - start_time
152
161
  output_str = (
153
162
  result.get_stringified_data()
@@ -38,6 +38,8 @@ class ToolExecutor:
38
38
  self.tools_by_name: dict[str, Tool] = {}
39
39
  for ts in toolsets_by_name.values():
40
40
  for tool in ts.tools:
41
+ if tool.icon_url is None and ts.icon_url is not None:
42
+ tool.icon_url = ts.icon_url
41
43
  if tool.name in self.tools_by_name:
42
44
  logging.warning(
43
45
  f"Overriding existing tool '{tool.name} with new tool from {ts.name} at {ts.path}'!"
@@ -62,5 +64,8 @@ class ToolExecutor:
62
64
  return None
63
65
 
64
66
  @sentry_sdk.trace
65
- def get_all_tools_openai_format(self):
66
- return [tool.get_openai_format() for tool in self.tools_by_name.values()]
67
+ def get_all_tools_openai_format(self, target_model: str):
68
+ return [
69
+ tool.get_openai_format(target_model=target_model)
70
+ for tool in self.tools_by_name.values()
71
+ ]