holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (82) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +15 -4
  3. holmes/common/env_vars.py +8 -1
  4. holmes/config.py +66 -139
  5. holmes/core/investigation.py +1 -2
  6. holmes/core/llm.py +295 -52
  7. holmes/core/models.py +2 -0
  8. holmes/core/safeguards.py +4 -4
  9. holmes/core/supabase_dal.py +14 -8
  10. holmes/core/tool_calling_llm.py +110 -102
  11. holmes/core/tools.py +260 -25
  12. holmes/core/tools_utils/data_types.py +81 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
  14. holmes/core/tools_utils/tool_executor.py +2 -2
  15. holmes/core/toolset_manager.py +150 -3
  16. holmes/core/transformers/__init__.py +23 -0
  17. holmes/core/transformers/base.py +62 -0
  18. holmes/core/transformers/llm_summarize.py +174 -0
  19. holmes/core/transformers/registry.py +122 -0
  20. holmes/core/transformers/transformer.py +31 -0
  21. holmes/main.py +5 -0
  22. holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
  23. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  24. holmes/plugins/toolsets/aks.yaml +64 -0
  25. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  30. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
  31. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
  32. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
  35. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
  36. holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
  37. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  38. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  39. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  40. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  41. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +344 -205
  42. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +189 -17
  43. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +95 -30
  44. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
  45. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +20 -20
  46. holmes/plugins/toolsets/git.py +21 -21
  47. holmes/plugins/toolsets/grafana/common.py +2 -2
  48. holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
  49. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
  50. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
  51. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
  52. holmes/plugins/toolsets/internet/internet.py +3 -3
  53. holmes/plugins/toolsets/internet/notion.py +3 -3
  54. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  55. holmes/plugins/toolsets/kafka.py +18 -18
  56. holmes/plugins/toolsets/kubernetes.yaml +58 -0
  57. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  58. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  59. holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
  60. holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
  61. holmes/plugins/toolsets/newrelic.py +5 -5
  62. holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
  63. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  64. holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
  65. holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
  66. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
  67. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  68. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
  69. holmes/plugins/toolsets/robusta/robusta.py +10 -10
  70. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
  71. holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
  72. holmes/plugins/toolsets/utils.py +88 -0
  73. holmes/utils/config_utils.py +91 -0
  74. holmes/utils/env.py +7 -0
  75. holmes/utils/holmes_status.py +2 -1
  76. holmes/utils/sentry_helper.py +41 -0
  77. holmes/utils/stream.py +9 -0
  78. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +10 -14
  79. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +82 -72
  80. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
  81. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
  82. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
@@ -2,7 +2,7 @@ import concurrent.futures
2
2
  import json
3
3
  import logging
4
4
  import textwrap
5
- from typing import Dict, List, Optional, Type, Union, Callable
5
+ from typing import Dict, List, Optional, Type, Union, Callable, Any
6
6
 
7
7
 
8
8
  import sentry_sdk
@@ -27,19 +27,28 @@ from holmes.core.investigation_structured_output import (
27
27
  is_response_an_incorrect_tool_call,
28
28
  )
29
29
  from holmes.core.issue import Issue
30
- from holmes.core.llm import LLM
30
+ from holmes.core.llm import LLM, get_llm_usage
31
31
  from holmes.core.performance_timing import PerformanceTiming
32
32
  from holmes.core.resource_instruction import ResourceInstructions
33
33
  from holmes.core.runbooks import RunbookManager
34
34
  from holmes.core.safeguards import prevent_overly_repeated_tool_call
35
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
35
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
36
+ from holmes.core.tools_utils.tool_context_window_limiter import (
37
+ prevent_overly_big_tool_response,
38
+ )
36
39
  from holmes.plugins.prompts import load_and_render_prompt
40
+ from holmes.utils import sentry_helper
37
41
  from holmes.utils.global_instructions import (
38
42
  Instructions,
39
43
  add_global_instructions_to_user_prompt,
40
44
  )
41
45
  from holmes.utils.tags import format_tags_in_string, parse_messages_tags
42
46
  from holmes.core.tools_utils.tool_executor import ToolExecutor
47
+ from holmes.core.tools_utils.data_types import (
48
+ TruncationResult,
49
+ ToolCallResult,
50
+ TruncationMetadata,
51
+ )
43
52
  from holmes.core.tracing import DummySpan
44
53
  from holmes.utils.colors import AI_COLOR
45
54
  from holmes.utils.stream import StreamEvents, StreamMessage
@@ -48,6 +57,9 @@ from holmes.utils.stream import StreamEvents, StreamMessage
48
57
  cost_logger = logging.getLogger("holmes.costs")
49
58
 
50
59
 
60
+ TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
61
+
62
+
51
63
  class LLMCosts(BaseModel):
52
64
  """Tracks cost and token usage for LLM calls."""
53
65
 
@@ -119,23 +131,6 @@ def _process_cost_info(
119
131
  logging.debug(f"Could not extract cost information: {e}")
120
132
 
121
133
 
122
- def format_tool_result_data(tool_result: StructuredToolResult) -> str:
123
- tool_response = tool_result.data
124
- if isinstance(tool_result.data, str):
125
- tool_response = tool_result.data
126
- else:
127
- try:
128
- if isinstance(tool_result.data, BaseModel):
129
- tool_response = tool_result.data.model_dump_json(indent=2)
130
- else:
131
- tool_response = json.dumps(tool_result.data, indent=2)
132
- except Exception:
133
- tool_response = str(tool_result.data)
134
- if tool_result.status == ToolResultStatus.ERROR:
135
- tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
136
- return tool_response
137
-
138
-
139
134
  # TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
140
135
  # However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
141
136
  # We should fix this in the future
@@ -143,7 +138,7 @@ def format_tool_result_data(tool_result: StructuredToolResult) -> str:
143
138
  # token truncation and not character truncation
144
139
  def truncate_messages_to_fit_context(
145
140
  messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
146
- ) -> list:
141
+ ) -> TruncationResult:
147
142
  """
148
143
  Helper function to truncate tool messages to fit within context limits.
149
144
 
@@ -176,13 +171,17 @@ def truncate_messages_to_fit_context(
176
171
  )
177
172
 
178
173
  if len(tool_call_messages) == 0:
179
- return messages
174
+ return TruncationResult(truncated_messages=messages, truncations=[])
180
175
 
181
176
  available_space = (
182
- max_context_size - message_size_without_tools - maximum_output_token
177
+ max_context_size - message_size_without_tools - reserved_for_output_tokens
183
178
  )
184
179
  remaining_space = available_space
185
- tool_call_messages.sort(key=lambda x: len(x["content"]))
180
+ tool_call_messages.sort(
181
+ key=lambda x: count_tokens_fn([{"role": "tool", "content": x["content"]}])
182
+ )
183
+
184
+ truncations = []
186
185
 
187
186
  # Allocate space starting with small tools and going to larger tools, while maintaining fairness
188
187
  # Small tools can often get exactly what they need, while larger tools may need to be truncated
@@ -190,75 +189,49 @@ def truncate_messages_to_fit_context(
190
189
  for i, msg in enumerate(tool_call_messages):
191
190
  remaining_tools = len(tool_call_messages) - i
192
191
  max_allocation = remaining_space // remaining_tools
193
- needed_space = len(msg["content"])
192
+ needed_space = count_tokens_fn([{"role": "tool", "content": msg["content"]}])
194
193
  allocated_space = min(needed_space, max_allocation)
195
194
 
196
195
  if needed_space > allocated_space:
197
- truncation_notice = "\n\n[TRUNCATED]"
198
- # Ensure the indicator fits in the allocated space
199
- if allocated_space > len(truncation_notice):
200
- msg["content"] = (
201
- msg["content"][: allocated_space - len(truncation_notice)]
202
- + truncation_notice
203
- )
204
- logging.info(
205
- f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space-len(truncation_notice)} tokens"
206
- )
207
- else:
208
- msg["content"] = truncation_notice[:allocated_space]
209
- logging.info(
210
- f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space} tokens"
211
- )
212
- msg.pop("token_count", None) # Remove token_count if present
196
+ truncation_metadata = _truncate_tool_message(
197
+ msg, allocated_space, needed_space
198
+ )
199
+ truncations.append(truncation_metadata)
213
200
 
214
201
  remaining_space -= allocated_space
215
- return messages
216
-
217
-
218
- class ToolCallResult(BaseModel):
219
- tool_call_id: str
220
- tool_name: str
221
- description: str
222
- result: StructuredToolResult
223
- size: Optional[int] = None
224
-
225
- def as_tool_call_message(self):
226
- content = format_tool_result_data(self.result)
227
- if self.result.params:
228
- content = (
229
- f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
230
- + content
231
- )
232
- return {
233
- "tool_call_id": self.tool_call_id,
234
- "role": "tool",
235
- "name": self.tool_name,
236
- "content": content,
237
- }
238
-
239
- def as_tool_result_response(self):
240
- result_dump = self.result.model_dump()
241
- result_dump["data"] = self.result.get_stringified_data()
242
-
243
- return {
244
- "tool_call_id": self.tool_call_id,
245
- "tool_name": self.tool_name,
246
- "description": self.description,
247
- "role": "tool",
248
- "result": result_dump,
249
- }
250
-
251
- def as_streaming_tool_result_response(self):
252
- result_dump = self.result.model_dump()
253
- result_dump["data"] = self.result.get_stringified_data()
254
-
255
- return {
256
- "tool_call_id": self.tool_call_id,
257
- "role": "tool",
258
- "description": self.description,
259
- "name": self.tool_name,
260
- "result": result_dump,
261
- }
202
+ return TruncationResult(truncated_messages=messages, truncations=truncations)
203
+
204
+
205
+ def _truncate_tool_message(
206
+ msg: dict, allocated_space: int, needed_space: int
207
+ ) -> TruncationMetadata:
208
+ msg_content = msg["content"]
209
+ tool_call_id = msg["tool_call_id"]
210
+ tool_name = msg["name"]
211
+
212
+ # Ensure the indicator fits in the allocated space
213
+ if allocated_space > len(TRUNCATION_NOTICE):
214
+ original = msg_content if isinstance(msg_content, str) else str(msg_content)
215
+ msg["content"] = (
216
+ original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
217
+ )
218
+ end_index = allocated_space - len(TRUNCATION_NOTICE)
219
+ else:
220
+ msg["content"] = TRUNCATION_NOTICE[:allocated_space]
221
+ end_index = allocated_space
222
+
223
+ msg.pop("token_count", None) # Remove token_count if present
224
+ logging.info(
225
+ f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
226
+ )
227
+ truncation_metadata = TruncationMetadata(
228
+ tool_call_id=tool_call_id,
229
+ start_index=0,
230
+ end_index=end_index,
231
+ tool_name=tool_name,
232
+ original_token_count=needed_space,
233
+ )
234
+ return truncation_metadata
262
235
 
263
236
 
264
237
  class LLMResult(LLMCosts):
@@ -269,6 +242,7 @@ class LLMResult(LLMCosts):
269
242
  # TODO: clean up these two
270
243
  prompt: Optional[str] = None
271
244
  messages: Optional[List[dict]] = None
245
+ metadata: Optional[Dict[Any, Any]] = None
272
246
 
273
247
  def get_tool_usage_summary(self):
274
248
  return "AI used info from issue and " + ",".join(
@@ -344,7 +318,7 @@ class ToolCallingLLM:
344
318
  perf_timing.measure("get_all_tools_openai_format")
345
319
  max_steps = self.max_steps
346
320
  i = 0
347
-
321
+ metadata: Dict[Any, Any] = {}
348
322
  while i < max_steps:
349
323
  i += 1
350
324
  perf_timing.measure(f"start iteration {i}")
@@ -360,9 +334,13 @@ class ToolCallingLLM:
360
334
 
361
335
  if (total_tokens + maximum_output_token) > max_context_size:
362
336
  logging.warning("Token limit exceeded. Truncating tool responses.")
363
- messages = self.truncate_messages_to_fit_context(
337
+ truncated_res = self.truncate_messages_to_fit_context(
364
338
  messages, max_context_size, maximum_output_token
365
339
  )
340
+ metadata["truncations"] = [
341
+ t.model_dump() for t in truncated_res.truncations
342
+ ]
343
+ messages = truncated_res.truncated_messages
366
344
  perf_timing.measure("truncate_messages_to_fit_context")
367
345
 
368
346
  logging.debug(f"sending messages={messages}\n\ntools={tools}")
@@ -408,6 +386,7 @@ class ToolCallingLLM:
408
386
  "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
409
387
  )
410
388
  # disable structured output going forward and and retry
389
+ sentry_helper.capture_structured_output_incorrect_tool_call()
411
390
  response_format = None
412
391
  max_steps = max_steps + 1
413
392
  continue
@@ -443,7 +422,11 @@ class ToolCallingLLM:
443
422
  )
444
423
  costs.total_cost += post_processing_cost
445
424
 
425
+ self.llm.count_tokens_for_message(messages)
446
426
  perf_timing.end(f"- completed in {i} iterations -")
427
+ metadata["usage"] = get_llm_usage(full_response)
428
+ metadata["max_tokens"] = max_context_size
429
+ metadata["max_output_tokens"] = maximum_output_token
447
430
  return LLMResult(
448
431
  result=post_processed_response,
449
432
  unprocessed_result=raw_response,
@@ -451,6 +434,7 @@ class ToolCallingLLM:
451
434
  prompt=json.dumps(messages, indent=2),
452
435
  messages=messages,
453
436
  **costs.model_dump(), # Include all cost fields
437
+ metadata=metadata,
454
438
  )
455
439
 
456
440
  perf_timing.end(f"- completed in {i} iterations -")
@@ -460,6 +444,7 @@ class ToolCallingLLM:
460
444
  prompt=json.dumps(messages, indent=2),
461
445
  messages=messages,
462
446
  **costs.model_dump(), # Include all cost fields
447
+ metadata=metadata,
463
448
  )
464
449
 
465
450
  if text_response and text_response.strip():
@@ -498,7 +483,7 @@ class ToolCallingLLM:
498
483
 
499
484
  if (
500
485
  tool_call_result.result.status
501
- == ToolResultStatus.APPROVAL_REQUIRED
486
+ == StructuredToolResultStatus.APPROVAL_REQUIRED
502
487
  ):
503
488
  with trace_span.start_span(type="tool") as tool_span:
504
489
  tool_call_result = self._handle_tool_call_approval(
@@ -536,7 +521,7 @@ class ToolCallingLLM:
536
521
  f"Skipping tool execution for {tool_name}: args: {tool_params}"
537
522
  )
538
523
  return StructuredToolResult(
539
- status=ToolResultStatus.ERROR,
524
+ status=StructuredToolResultStatus.ERROR,
540
525
  error=f"Failed to find tool {tool_name}",
541
526
  params=tool_params,
542
527
  )
@@ -550,7 +535,7 @@ class ToolCallingLLM:
550
535
  f"Tool call to {tool_name} failed with an Exception", exc_info=True
551
536
  )
552
537
  tool_response = StructuredToolResult(
553
- status=ToolResultStatus.ERROR,
538
+ status=StructuredToolResultStatus.ERROR,
554
539
  error=f"Tool call failed: {e}",
555
540
  params=tool_params,
556
541
  )
@@ -592,7 +577,7 @@ class ToolCallingLLM:
592
577
  f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
593
578
  )
594
579
  tool_response = StructuredToolResult(
595
- status=ToolResultStatus.SUCCESS,
580
+ status=StructuredToolResultStatus.SUCCESS,
596
581
  data=tool_response,
597
582
  params=tool_params,
598
583
  )
@@ -642,7 +627,7 @@ class ToolCallingLLM:
642
627
  tool_name=tool_name,
643
628
  description="NA",
644
629
  result=StructuredToolResult(
645
- status=ToolResultStatus.ERROR,
630
+ status=StructuredToolResultStatus.ERROR,
646
631
  error="Custom tool calls are not supported",
647
632
  params=None,
648
633
  ),
@@ -658,6 +643,11 @@ class ToolCallingLLM:
658
643
  previous_tool_calls=previous_tool_calls,
659
644
  tool_number=tool_number,
660
645
  )
646
+
647
+ prevent_overly_big_tool_response(
648
+ tool_call_result=tool_call_result, llm=self.llm
649
+ )
650
+
661
651
  ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
662
652
  return tool_call_result
663
653
 
@@ -679,7 +669,7 @@ class ToolCallingLLM:
679
669
 
680
670
  # If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
681
671
  if not self.approval_callback:
682
- tool_call_result.result.status = ToolResultStatus.ERROR
672
+ tool_call_result.result.status = StructuredToolResultStatus.ERROR
683
673
  return tool_call_result
684
674
 
685
675
  # Get approval from user
@@ -699,7 +689,7 @@ class ToolCallingLLM:
699
689
  else:
700
690
  # User denied - update to error
701
691
  feedback_text = f" User feedback: {feedback}" if feedback else ""
702
- tool_call_result.result.status = ToolResultStatus.ERROR
692
+ tool_call_result.result.status = StructuredToolResultStatus.ERROR
703
693
  tool_call_result.result.error = (
704
694
  f"User denied command execution.{feedback_text}"
705
695
  )
@@ -757,13 +747,16 @@ class ToolCallingLLM:
757
747
  @sentry_sdk.trace
758
748
  def truncate_messages_to_fit_context(
759
749
  self, messages: list, max_context_size: int, maximum_output_token: int
760
- ) -> list:
761
- return truncate_messages_to_fit_context(
750
+ ) -> TruncationResult:
751
+ truncated_res = truncate_messages_to_fit_context(
762
752
  messages,
763
753
  max_context_size,
764
754
  maximum_output_token,
765
755
  self.llm.count_tokens_for_message,
766
756
  )
757
+ if truncated_res.truncations:
758
+ sentry_helper.capture_tool_truncations(truncated_res.truncations)
759
+ return truncated_res
767
760
 
768
761
  def call_stream(
769
762
  self,
@@ -791,6 +784,7 @@ class ToolCallingLLM:
791
784
  )
792
785
  perf_timing.measure("get_all_tools_openai_format")
793
786
  max_steps = self.max_steps
787
+ metadata: Dict[Any, Any] = {}
794
788
  i = 0
795
789
  tool_number_offset = 0
796
790
 
@@ -809,10 +803,16 @@ class ToolCallingLLM:
809
803
 
810
804
  if (total_tokens + maximum_output_token) > max_context_size:
811
805
  logging.warning("Token limit exceeded. Truncating tool responses.")
812
- messages = self.truncate_messages_to_fit_context(
806
+ truncated_res = self.truncate_messages_to_fit_context(
813
807
  messages, max_context_size, maximum_output_token
814
808
  )
809
+ metadata["truncations"] = [
810
+ t.model_dump() for t in truncated_res.truncations
811
+ ]
812
+ messages = truncated_res.truncated_messages
815
813
  perf_timing.measure("truncate_messages_to_fit_context")
814
+ else:
815
+ metadata["truncations"] = []
816
816
 
817
817
  logging.debug(f"sending messages={messages}\n\ntools={tools}")
818
818
  try:
@@ -854,6 +854,7 @@ class ToolCallingLLM:
854
854
  "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
855
855
  )
856
856
  # disable structured output going forward and and retry
857
+ sentry_helper.capture_structured_output_incorrect_tool_call()
857
858
  response_format = None
858
859
  max_steps = max_steps + 1
859
860
  continue
@@ -866,9 +867,17 @@ class ToolCallingLLM:
866
867
 
867
868
  tools_to_call = getattr(response_message, "tool_calls", None)
868
869
  if not tools_to_call:
870
+ self.llm.count_tokens_for_message(messages)
871
+ metadata["usage"] = get_llm_usage(full_response)
872
+ metadata["max_tokens"] = max_context_size
873
+ metadata["max_output_tokens"] = maximum_output_token
869
874
  yield StreamMessage(
870
875
  event=StreamEvents.ANSWER_END,
871
- data={"content": response_message.content, "messages": messages},
876
+ data={
877
+ "content": response_message.content,
878
+ "messages": messages,
879
+ "metadata": metadata,
880
+ },
872
881
  )
873
882
  return
874
883
 
@@ -900,7 +909,6 @@ class ToolCallingLLM:
900
909
 
901
910
  for future in concurrent.futures.as_completed(futures):
902
911
  tool_call_result: ToolCallResult = future.result()
903
-
904
912
  tool_calls.append(tool_call_result.as_tool_result_response())
905
913
  messages.append(tool_call_result.as_tool_call_message())
906
914