holmesgpt 0.14.1a0__py3-none-any.whl → 0.14.3a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +5 -2
- holmes/common/env_vars.py +8 -2
- holmes/config.py +4 -7
- holmes/core/conversations.py +12 -2
- holmes/core/feedback.py +191 -0
- holmes/core/llm.py +52 -10
- holmes/core/models.py +101 -1
- holmes/core/supabase_dal.py +23 -9
- holmes/core/tool_calling_llm.py +206 -16
- holmes/core/tools.py +20 -7
- holmes/core/tools_utils/token_counting.py +13 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
- holmes/core/tools_utils/tool_executor.py +11 -6
- holmes/core/toolset_manager.py +7 -3
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/interactive.py +146 -14
- holmes/plugins/prompts/_fetch_logs.jinja2 +13 -1
- holmes/plugins/runbooks/__init__.py +6 -1
- holmes/plugins/toolsets/__init__.py +11 -4
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +333 -199
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +181 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +80 -22
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +5 -8
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +7 -12
- holmes/plugins/toolsets/git.py +14 -12
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
- holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +2 -1
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +21 -39
- holmes/plugins/toolsets/internet/internet.py +2 -3
- holmes/plugins/toolsets/internet/notion.py +2 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
- holmes/plugins/toolsets/kafka.py +7 -18
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +808 -419
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +27 -11
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
- holmes/plugins/toolsets/robusta/robusta.py +4 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
- holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
- holmes/utils/sentry_helper.py +1 -1
- holmes/utils/stream.py +22 -7
- holmes/version.py +34 -14
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/METADATA +7 -9
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/RECORD +71 -65
- holmes/core/tools_utils/data_types.py +0 -81
- holmes/plugins/toolsets/newrelic.py +0 -231
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/entry_points.txt +0 -0
holmes/core/supabase_dal.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import binascii
|
|
3
|
+
import gzip
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
@@ -7,7 +8,6 @@ import threading
|
|
|
7
8
|
from datetime import datetime, timedelta
|
|
8
9
|
from typing import Dict, List, Optional, Tuple
|
|
9
10
|
from uuid import uuid4
|
|
10
|
-
import gzip
|
|
11
11
|
|
|
12
12
|
import yaml # type: ignore
|
|
13
13
|
from cachetools import TTLCache # type: ignore
|
|
@@ -30,6 +30,9 @@ from holmes.core.resource_instruction import (
|
|
|
30
30
|
ResourceInstructionDocument,
|
|
31
31
|
ResourceInstructions,
|
|
32
32
|
)
|
|
33
|
+
from holmes.core.truncation.dal_truncation_utils import (
|
|
34
|
+
truncate_evidences_entities_if_necessary,
|
|
35
|
+
)
|
|
33
36
|
from holmes.utils.definitions import RobustaConfig
|
|
34
37
|
from holmes.utils.env import get_env_replacement
|
|
35
38
|
from holmes.utils.global_instructions import Instructions
|
|
@@ -46,6 +49,9 @@ HOLMES_TOOLSET = "HolmesToolsStatus"
|
|
|
46
49
|
SCANS_META_TABLE = "ScansMeta"
|
|
47
50
|
SCANS_RESULTS_TABLE = "ScansResults"
|
|
48
51
|
|
|
52
|
+
ENRICHMENT_BLACKLIST = ["text_file", "graph", "ai_analysis", "holmes"]
|
|
53
|
+
ENRICHMENT_BLACKLIST_SET = set(ENRICHMENT_BLACKLIST)
|
|
54
|
+
|
|
49
55
|
|
|
50
56
|
class RobustaToken(BaseModel):
|
|
51
57
|
store_url: str
|
|
@@ -60,7 +66,7 @@ class SupabaseDal:
|
|
|
60
66
|
self.enabled = self.__init_config()
|
|
61
67
|
self.cluster = cluster
|
|
62
68
|
if not self.enabled:
|
|
63
|
-
logging.
|
|
69
|
+
logging.debug(
|
|
64
70
|
"Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible"
|
|
65
71
|
)
|
|
66
72
|
return
|
|
@@ -118,7 +124,7 @@ class SupabaseDal:
|
|
|
118
124
|
)
|
|
119
125
|
|
|
120
126
|
if not os.path.exists(config_file_path):
|
|
121
|
-
logging.
|
|
127
|
+
logging.debug(f"No robusta config in {config_file_path}")
|
|
122
128
|
return None
|
|
123
129
|
|
|
124
130
|
logging.info(f"loading config {config_file_path}")
|
|
@@ -262,11 +268,14 @@ class SupabaseDal:
|
|
|
262
268
|
.select("*")
|
|
263
269
|
.eq("account_id", self.account_id)
|
|
264
270
|
.in_("issue_id", changes_ids)
|
|
271
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
265
272
|
.execute()
|
|
266
273
|
)
|
|
267
274
|
if not len(change_data_response.data):
|
|
268
275
|
return None
|
|
269
276
|
|
|
277
|
+
truncate_evidences_entities_if_necessary(change_data_response.data)
|
|
278
|
+
|
|
270
279
|
except Exception:
|
|
271
280
|
logging.exception("Supabase error while retrieving change content")
|
|
272
281
|
return None
|
|
@@ -323,17 +332,17 @@ class SupabaseDal:
|
|
|
323
332
|
return data
|
|
324
333
|
|
|
325
334
|
def extract_relevant_issues(self, evidence):
|
|
326
|
-
enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
|
|
327
335
|
data = [
|
|
328
336
|
enrich
|
|
329
337
|
for enrich in evidence.data
|
|
330
|
-
if enrich.get("enrichment_type") not in
|
|
338
|
+
if enrich.get("enrichment_type") not in ENRICHMENT_BLACKLIST_SET
|
|
331
339
|
]
|
|
332
340
|
|
|
333
341
|
unzipped_files = [
|
|
334
342
|
self.unzip_evidence_file(enrich)
|
|
335
343
|
for enrich in evidence.data
|
|
336
344
|
if enrich.get("enrichment_type") == "text_file"
|
|
345
|
+
or enrich.get("enrichment_type") == "alert_raw_data"
|
|
337
346
|
]
|
|
338
347
|
|
|
339
348
|
data.extend(unzipped_files)
|
|
@@ -370,12 +379,14 @@ class SupabaseDal:
|
|
|
370
379
|
evidence = (
|
|
371
380
|
self.client.table(EVIDENCE_TABLE)
|
|
372
381
|
.select("*")
|
|
373
|
-
.
|
|
382
|
+
.eq("issue_id", issue_id)
|
|
383
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
374
384
|
.execute()
|
|
375
385
|
)
|
|
376
|
-
|
|
386
|
+
relevant_evidence = self.extract_relevant_issues(evidence)
|
|
387
|
+
truncate_evidences_entities_if_necessary(relevant_evidence)
|
|
377
388
|
|
|
378
|
-
issue_data["evidence"] =
|
|
389
|
+
issue_data["evidence"] = relevant_evidence
|
|
379
390
|
|
|
380
391
|
# build issue investigation dates
|
|
381
392
|
started_at = issue_data.get("starts_at")
|
|
@@ -518,10 +529,13 @@ class SupabaseDal:
|
|
|
518
529
|
self.client.table(EVIDENCE_TABLE)
|
|
519
530
|
.select("data, enrichment_type")
|
|
520
531
|
.in_("issue_id", unique_issues)
|
|
532
|
+
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
521
533
|
.execute()
|
|
522
534
|
)
|
|
523
535
|
|
|
524
|
-
|
|
536
|
+
relevant_issues = self.extract_relevant_issues(res)
|
|
537
|
+
truncate_evidences_entities_if_necessary(relevant_issues)
|
|
538
|
+
return relevant_issues
|
|
525
539
|
|
|
526
540
|
except Exception:
|
|
527
541
|
logging.exception("failed to fetch workload issues data", exc_info=True)
|
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -4,6 +4,13 @@ import logging
|
|
|
4
4
|
import textwrap
|
|
5
5
|
from typing import Dict, List, Optional, Type, Union, Callable, Any
|
|
6
6
|
|
|
7
|
+
from holmes.core.models import (
|
|
8
|
+
ToolApprovalDecision,
|
|
9
|
+
ToolCallResult,
|
|
10
|
+
TruncationResult,
|
|
11
|
+
TruncationMetadata,
|
|
12
|
+
PendingToolApproval,
|
|
13
|
+
)
|
|
7
14
|
|
|
8
15
|
import sentry_sdk
|
|
9
16
|
from openai import BadRequestError
|
|
@@ -27,13 +34,18 @@ from holmes.core.investigation_structured_output import (
|
|
|
27
34
|
is_response_an_incorrect_tool_call,
|
|
28
35
|
)
|
|
29
36
|
from holmes.core.issue import Issue
|
|
30
|
-
from holmes.core.llm import LLM
|
|
37
|
+
from holmes.core.llm import LLM, get_llm_usage
|
|
31
38
|
from holmes.core.performance_timing import PerformanceTiming
|
|
32
39
|
from holmes.core.resource_instruction import ResourceInstructions
|
|
33
40
|
from holmes.core.runbooks import RunbookManager
|
|
34
41
|
from holmes.core.safeguards import prevent_overly_repeated_tool_call
|
|
35
|
-
from holmes.core.tools import
|
|
42
|
+
from holmes.core.tools import (
|
|
43
|
+
StructuredToolResult,
|
|
44
|
+
StructuredToolResultStatus,
|
|
45
|
+
ToolInvokeContext,
|
|
46
|
+
)
|
|
36
47
|
from holmes.core.tools_utils.tool_context_window_limiter import (
|
|
48
|
+
get_max_token_count_for_single_tool,
|
|
37
49
|
prevent_overly_big_tool_response,
|
|
38
50
|
)
|
|
39
51
|
from holmes.plugins.prompts import load_and_render_prompt
|
|
@@ -44,11 +56,6 @@ from holmes.utils.global_instructions import (
|
|
|
44
56
|
)
|
|
45
57
|
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
46
58
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
47
|
-
from holmes.core.tools_utils.data_types import (
|
|
48
|
-
TruncationResult,
|
|
49
|
-
ToolCallResult,
|
|
50
|
-
TruncationMetadata,
|
|
51
|
-
)
|
|
52
59
|
from holmes.core.tracing import DummySpan
|
|
53
60
|
from holmes.utils.colors import AI_COLOR
|
|
54
61
|
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
@@ -264,6 +271,99 @@ class ToolCallingLLM:
|
|
|
264
271
|
Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
|
|
265
272
|
] = None
|
|
266
273
|
|
|
274
|
+
def process_tool_decisions(
|
|
275
|
+
self, messages: List[Dict[str, Any]], tool_decisions: List[ToolApprovalDecision]
|
|
276
|
+
) -> List[Dict[str, Any]]:
|
|
277
|
+
"""
|
|
278
|
+
Process tool approval decisions and execute approved tools.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
messages: Current conversation messages
|
|
282
|
+
tool_decisions: List of ToolApprovalDecision objects
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Updated messages list with tool execution results
|
|
286
|
+
"""
|
|
287
|
+
# Import here to avoid circular imports
|
|
288
|
+
|
|
289
|
+
# Find the last message with pending approvals
|
|
290
|
+
pending_message_idx = None
|
|
291
|
+
pending_tool_calls = None
|
|
292
|
+
|
|
293
|
+
for i in reversed(range(len(messages))):
|
|
294
|
+
msg = messages[i]
|
|
295
|
+
if msg.get("role") == "assistant" and msg.get("pending_approval"):
|
|
296
|
+
pending_message_idx = i
|
|
297
|
+
pending_tool_calls = msg.get("tool_calls", [])
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
if pending_message_idx is None or not pending_tool_calls:
|
|
301
|
+
# No pending approvals found
|
|
302
|
+
if tool_decisions:
|
|
303
|
+
logging.warning(
|
|
304
|
+
f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
|
|
305
|
+
)
|
|
306
|
+
return messages
|
|
307
|
+
|
|
308
|
+
# Create decision lookup
|
|
309
|
+
decisions_by_id = {
|
|
310
|
+
decision.tool_call_id: decision for decision in tool_decisions
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
# Validate that all decisions have corresponding pending tool calls
|
|
314
|
+
pending_tool_ids = {tool_call["id"] for tool_call in pending_tool_calls}
|
|
315
|
+
invalid_decisions = [
|
|
316
|
+
decision.tool_call_id
|
|
317
|
+
for decision in tool_decisions
|
|
318
|
+
if decision.tool_call_id not in pending_tool_ids
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
if invalid_decisions:
|
|
322
|
+
logging.warning(
|
|
323
|
+
f"Received decisions for non-pending tool calls: {invalid_decisions}"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Process each tool call
|
|
327
|
+
for tool_call in pending_tool_calls:
|
|
328
|
+
tool_call_id = tool_call["id"]
|
|
329
|
+
decision = decisions_by_id.get(tool_call_id)
|
|
330
|
+
|
|
331
|
+
if decision and decision.approved:
|
|
332
|
+
try:
|
|
333
|
+
tool_call_obj = ChatCompletionMessageToolCall(**tool_call)
|
|
334
|
+
llm_tool_result = self._invoke_llm_tool_call(
|
|
335
|
+
tool_to_call=tool_call_obj,
|
|
336
|
+
previous_tool_calls=[],
|
|
337
|
+
trace_span=DummySpan(),
|
|
338
|
+
tool_number=None,
|
|
339
|
+
)
|
|
340
|
+
messages.append(llm_tool_result.as_tool_call_message())
|
|
341
|
+
|
|
342
|
+
except Exception as e:
|
|
343
|
+
logging.error(
|
|
344
|
+
f"Failed to execute approved tool {tool_call_id}: {e}"
|
|
345
|
+
)
|
|
346
|
+
messages.append(
|
|
347
|
+
{
|
|
348
|
+
"tool_call_id": tool_call_id,
|
|
349
|
+
"role": "tool",
|
|
350
|
+
"name": tool_call["function"]["name"],
|
|
351
|
+
"content": f"Tool execution failed: {str(e)}",
|
|
352
|
+
}
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
# Tool was rejected or no decision found, add rejection message
|
|
356
|
+
messages.append(
|
|
357
|
+
{
|
|
358
|
+
"tool_call_id": tool_call_id,
|
|
359
|
+
"role": "tool",
|
|
360
|
+
"name": tool_call["function"]["name"],
|
|
361
|
+
"content": "Tool execution was denied by the user.",
|
|
362
|
+
}
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
return messages
|
|
366
|
+
|
|
267
367
|
def prompt_call(
|
|
268
368
|
self,
|
|
269
369
|
system_prompt: str,
|
|
@@ -422,7 +522,11 @@ class ToolCallingLLM:
|
|
|
422
522
|
)
|
|
423
523
|
costs.total_cost += post_processing_cost
|
|
424
524
|
|
|
525
|
+
self.llm.count_tokens_for_message(messages)
|
|
425
526
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
527
|
+
metadata["usage"] = get_llm_usage(full_response)
|
|
528
|
+
metadata["max_tokens"] = max_context_size
|
|
529
|
+
metadata["max_output_tokens"] = maximum_output_token
|
|
426
530
|
return LLMResult(
|
|
427
531
|
result=post_processed_response,
|
|
428
532
|
unprocessed_result=raw_response,
|
|
@@ -523,9 +627,13 @@ class ToolCallingLLM:
|
|
|
523
627
|
)
|
|
524
628
|
|
|
525
629
|
try:
|
|
526
|
-
|
|
527
|
-
|
|
630
|
+
invoke_context = ToolInvokeContext(
|
|
631
|
+
tool_number=tool_number,
|
|
632
|
+
user_approved=user_approved,
|
|
633
|
+
llm=self.llm,
|
|
634
|
+
max_token_count=get_max_token_count_for_single_tool(self.llm),
|
|
528
635
|
)
|
|
636
|
+
tool_response = tool.invoke(tool_params, context=invoke_context)
|
|
529
637
|
except Exception as e:
|
|
530
638
|
logging.error(
|
|
531
639
|
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
@@ -583,7 +691,9 @@ class ToolCallingLLM:
|
|
|
583
691
|
return ToolCallResult(
|
|
584
692
|
tool_call_id=tool_call_id,
|
|
585
693
|
tool_name=tool_name,
|
|
586
|
-
description=tool.get_parameterized_one_liner(tool_params)
|
|
694
|
+
description=str(tool.get_parameterized_one_liner(tool_params))
|
|
695
|
+
if tool
|
|
696
|
+
else "",
|
|
587
697
|
result=tool_response,
|
|
588
698
|
)
|
|
589
699
|
|
|
@@ -761,12 +871,13 @@ class ToolCallingLLM:
|
|
|
761
871
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
762
872
|
sections: Optional[InputSectionsDataType] = None,
|
|
763
873
|
msgs: Optional[list[dict]] = None,
|
|
874
|
+
enable_tool_approval: bool = False,
|
|
764
875
|
):
|
|
765
876
|
"""
|
|
766
877
|
This function DOES NOT call llm.completion(stream=true).
|
|
767
878
|
This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
|
|
768
879
|
"""
|
|
769
|
-
messages = []
|
|
880
|
+
messages: list[dict] = []
|
|
770
881
|
if system_prompt:
|
|
771
882
|
messages.append({"role": "system", "content": system_prompt})
|
|
772
883
|
if user_prompt:
|
|
@@ -863,6 +974,10 @@ class ToolCallingLLM:
|
|
|
863
974
|
|
|
864
975
|
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
865
976
|
if not tools_to_call:
|
|
977
|
+
self.llm.count_tokens_for_message(messages)
|
|
978
|
+
metadata["usage"] = get_llm_usage(full_response)
|
|
979
|
+
metadata["max_tokens"] = max_context_size
|
|
980
|
+
metadata["max_output_tokens"] = maximum_output_token
|
|
866
981
|
yield StreamMessage(
|
|
867
982
|
event=StreamEvents.ANSWER_END,
|
|
868
983
|
data={
|
|
@@ -882,6 +997,11 @@ class ToolCallingLLM:
|
|
|
882
997
|
)
|
|
883
998
|
|
|
884
999
|
perf_timing.measure("pre-tool-calls")
|
|
1000
|
+
|
|
1001
|
+
# Check if any tools require approval first
|
|
1002
|
+
pending_approvals = []
|
|
1003
|
+
approval_required_tools = []
|
|
1004
|
+
|
|
885
1005
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
886
1006
|
futures = []
|
|
887
1007
|
for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
|
|
@@ -901,15 +1021,85 @@ class ToolCallingLLM:
|
|
|
901
1021
|
|
|
902
1022
|
for future in concurrent.futures.as_completed(futures):
|
|
903
1023
|
tool_call_result: ToolCallResult = future.result()
|
|
904
|
-
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
905
|
-
messages.append(tool_call_result.as_tool_call_message())
|
|
906
1024
|
|
|
907
|
-
|
|
1025
|
+
if (
|
|
1026
|
+
tool_call_result.result.status
|
|
1027
|
+
== StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
1028
|
+
):
|
|
1029
|
+
if enable_tool_approval:
|
|
1030
|
+
pending_approvals.append(
|
|
1031
|
+
PendingToolApproval(
|
|
1032
|
+
tool_call_id=tool_call_result.tool_call_id,
|
|
1033
|
+
tool_name=tool_call_result.tool_name,
|
|
1034
|
+
description=tool_call_result.description,
|
|
1035
|
+
params=tool_call_result.result.params or {},
|
|
1036
|
+
)
|
|
1037
|
+
)
|
|
1038
|
+
approval_required_tools.append(tool_call_result)
|
|
1039
|
+
|
|
1040
|
+
yield StreamMessage(
|
|
1041
|
+
event=StreamEvents.TOOL_RESULT,
|
|
1042
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
1043
|
+
)
|
|
1044
|
+
else:
|
|
1045
|
+
tool_call_result.result.status = (
|
|
1046
|
+
StructuredToolResultStatus.ERROR
|
|
1047
|
+
)
|
|
1048
|
+
tool_call_result.result.error = f"Tool call rejected for security reasons: {tool_call_result.result.error}"
|
|
1049
|
+
|
|
1050
|
+
tool_calls.append(
|
|
1051
|
+
tool_call_result.as_tool_result_response()
|
|
1052
|
+
)
|
|
1053
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
908
1054
|
|
|
1055
|
+
yield StreamMessage(
|
|
1056
|
+
event=StreamEvents.TOOL_RESULT,
|
|
1057
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
else:
|
|
1061
|
+
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
1062
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
1063
|
+
|
|
1064
|
+
yield StreamMessage(
|
|
1065
|
+
event=StreamEvents.TOOL_RESULT,
|
|
1066
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
# If we have approval required tools, end the stream with pending approvals
|
|
1070
|
+
if pending_approvals:
|
|
1071
|
+
# Add assistant message with pending tool calls
|
|
1072
|
+
assistant_msg = {
|
|
1073
|
+
"role": "assistant",
|
|
1074
|
+
"content": response_message.content,
|
|
1075
|
+
"tool_calls": [
|
|
1076
|
+
{
|
|
1077
|
+
"id": result.tool_call_id,
|
|
1078
|
+
"type": "function",
|
|
1079
|
+
"function": {
|
|
1080
|
+
"name": result.tool_name,
|
|
1081
|
+
"arguments": json.dumps(result.result.params or {}),
|
|
1082
|
+
},
|
|
1083
|
+
}
|
|
1084
|
+
for result in approval_required_tools
|
|
1085
|
+
],
|
|
1086
|
+
"pending_approval": True,
|
|
1087
|
+
}
|
|
1088
|
+
messages.append(assistant_msg)
|
|
1089
|
+
|
|
1090
|
+
# End stream with approvals required
|
|
909
1091
|
yield StreamMessage(
|
|
910
|
-
event=StreamEvents.
|
|
911
|
-
data=
|
|
1092
|
+
event=StreamEvents.APPROVAL_REQUIRED,
|
|
1093
|
+
data={
|
|
1094
|
+
"content": None,
|
|
1095
|
+
"messages": messages,
|
|
1096
|
+
"pending_approvals": [
|
|
1097
|
+
approval.model_dump() for approval in pending_approvals
|
|
1098
|
+
],
|
|
1099
|
+
"requires_approval": True,
|
|
1100
|
+
},
|
|
912
1101
|
)
|
|
1102
|
+
return
|
|
913
1103
|
|
|
914
1104
|
# Update the tool number offset for the next iteration
|
|
915
1105
|
tool_number_offset += len(tools_to_call)
|
holmes/core/tools.py
CHANGED
|
@@ -31,6 +31,7 @@ from pydantic import (
|
|
|
31
31
|
)
|
|
32
32
|
from rich.console import Console
|
|
33
33
|
|
|
34
|
+
from holmes.core.llm import LLM
|
|
34
35
|
from holmes.core.openai_formatting import format_tool_to_open_ai_standard
|
|
35
36
|
from holmes.plugins.prompts import load_and_render_prompt
|
|
36
37
|
from holmes.core.transformers import (
|
|
@@ -159,6 +160,15 @@ class ToolParameter(BaseModel):
|
|
|
159
160
|
items: Optional["ToolParameter"] = None # For array item schemas
|
|
160
161
|
|
|
161
162
|
|
|
163
|
+
class ToolInvokeContext(BaseModel):
|
|
164
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
165
|
+
|
|
166
|
+
tool_number: Optional[int] = None
|
|
167
|
+
user_approved: bool = False
|
|
168
|
+
llm: LLM
|
|
169
|
+
max_token_count: int
|
|
170
|
+
|
|
171
|
+
|
|
162
172
|
class Tool(ABC, BaseModel):
|
|
163
173
|
name: str
|
|
164
174
|
description: str
|
|
@@ -225,15 +235,14 @@ class Tool(ABC, BaseModel):
|
|
|
225
235
|
def invoke(
|
|
226
236
|
self,
|
|
227
237
|
params: Dict,
|
|
228
|
-
|
|
229
|
-
user_approved: bool = False,
|
|
238
|
+
context: ToolInvokeContext,
|
|
230
239
|
) -> StructuredToolResult:
|
|
231
|
-
tool_number_str = f"#{tool_number} " if tool_number else ""
|
|
240
|
+
tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
|
|
232
241
|
logger.info(
|
|
233
242
|
f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
|
|
234
243
|
)
|
|
235
244
|
start_time = time.time()
|
|
236
|
-
result = self._invoke(params=params,
|
|
245
|
+
result = self._invoke(params=params, context=context)
|
|
237
246
|
result.icon_url = self.icon_url
|
|
238
247
|
|
|
239
248
|
# Apply transformers to the result
|
|
@@ -244,7 +253,7 @@ class Tool(ABC, BaseModel):
|
|
|
244
253
|
if hasattr(transformed_result, "get_stringified_data")
|
|
245
254
|
else str(transformed_result)
|
|
246
255
|
)
|
|
247
|
-
show_hint = f"/show {tool_number}" if tool_number else "/show"
|
|
256
|
+
show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
|
|
248
257
|
line_count = output_str.count("\n") + 1 if output_str else 0
|
|
249
258
|
logger.info(
|
|
250
259
|
f" [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
|
|
@@ -340,7 +349,9 @@ class Tool(ABC, BaseModel):
|
|
|
340
349
|
|
|
341
350
|
@abstractmethod
|
|
342
351
|
def _invoke(
|
|
343
|
-
self,
|
|
352
|
+
self,
|
|
353
|
+
params: dict,
|
|
354
|
+
context: ToolInvokeContext,
|
|
344
355
|
) -> StructuredToolResult:
|
|
345
356
|
"""
|
|
346
357
|
params: the tool params
|
|
@@ -400,7 +411,9 @@ class YAMLTool(Tool, BaseModel):
|
|
|
400
411
|
return StructuredToolResultStatus.SUCCESS
|
|
401
412
|
|
|
402
413
|
def _invoke(
|
|
403
|
-
self,
|
|
414
|
+
self,
|
|
415
|
+
params: dict,
|
|
416
|
+
context: ToolInvokeContext,
|
|
404
417
|
) -> StructuredToolResult:
|
|
405
418
|
if self.command is not None:
|
|
406
419
|
raw_output, return_code, invocation = self.__invoke_command(params)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from holmes.core.llm import LLM
|
|
2
|
+
from holmes.core.models import format_tool_result_data
|
|
3
|
+
from holmes.core.tools import StructuredToolResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def count_tool_response_tokens(
|
|
7
|
+
llm: LLM, structured_tool_result: StructuredToolResult
|
|
8
|
+
) -> int:
|
|
9
|
+
message = {
|
|
10
|
+
"role": "tool",
|
|
11
|
+
"content": format_tool_result_data(structured_tool_result),
|
|
12
|
+
}
|
|
13
|
+
return llm.count_tokens_for_message([message])
|
|
@@ -1,33 +1,55 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
|
|
2
3
|
from holmes.core.llm import LLM
|
|
3
4
|
from holmes.core.tools import StructuredToolResultStatus
|
|
4
|
-
from holmes.core.
|
|
5
|
+
from holmes.core.models import ToolCallResult
|
|
5
6
|
from holmes.utils import sentry_helper
|
|
6
7
|
|
|
7
8
|
|
|
9
|
+
def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
|
|
10
|
+
context_window_size = llm.get_context_window_size()
|
|
11
|
+
|
|
12
|
+
if 0 < percent_of_total_context_window and percent_of_total_context_window <= 100:
|
|
13
|
+
return int(context_window_size * percent_of_total_context_window // 100)
|
|
14
|
+
else:
|
|
15
|
+
return context_window_size
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_max_token_count_for_single_tool(llm: LLM) -> int:
|
|
19
|
+
return get_pct_token_count(
|
|
20
|
+
percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
8
24
|
def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
|
|
25
|
+
max_tokens_allowed = get_max_token_count_for_single_tool(llm)
|
|
26
|
+
|
|
27
|
+
message = tool_call_result.as_tool_call_message()
|
|
28
|
+
messages_token = llm.count_tokens_for_message(messages=[message])
|
|
29
|
+
|
|
30
|
+
if messages_token > max_tokens_allowed:
|
|
31
|
+
relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
|
|
32
|
+
|
|
33
|
+
error_message: Optional[str] = (
|
|
34
|
+
f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
|
|
20
35
|
)
|
|
21
36
|
|
|
22
|
-
if
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
sentry_helper.capture_toolcall_contains_too_many_tokens(
|
|
32
|
-
tool_call_result, messages_token, max_tokens_allowed
|
|
37
|
+
if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
|
|
38
|
+
error_message = None
|
|
39
|
+
# tool_call_result.result.data is set to None below which is expected to fix the issue
|
|
40
|
+
elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
|
|
41
|
+
original_error = (
|
|
42
|
+
tool_call_result.result.error
|
|
43
|
+
or tool_call_result.result.data
|
|
44
|
+
or "Unknown error"
|
|
33
45
|
)
|
|
46
|
+
truncated_error = str(original_error)[:100]
|
|
47
|
+
error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
|
|
48
|
+
|
|
49
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
50
|
+
tool_call_result.result.data = None
|
|
51
|
+
tool_call_result.result.error = error_message
|
|
52
|
+
|
|
53
|
+
sentry_helper.capture_toolcall_contains_too_many_tokens(
|
|
54
|
+
tool_call_result, messages_token, max_tokens_allowed
|
|
55
|
+
)
|
|
@@ -9,6 +9,7 @@ from holmes.core.tools import (
|
|
|
9
9
|
StructuredToolResultStatus,
|
|
10
10
|
Toolset,
|
|
11
11
|
ToolsetStatusEnum,
|
|
12
|
+
ToolInvokeContext,
|
|
12
13
|
)
|
|
13
14
|
from holmes.core.tools_utils.toolset_utils import filter_out_default_logging_toolset
|
|
14
15
|
|
|
@@ -46,16 +47,20 @@ class ToolExecutor:
|
|
|
46
47
|
)
|
|
47
48
|
self.tools_by_name[tool.name] = tool
|
|
48
49
|
|
|
49
|
-
def invoke(
|
|
50
|
+
def invoke(
|
|
51
|
+
self, tool_name: str, params: dict, context: ToolInvokeContext
|
|
52
|
+
) -> StructuredToolResult:
|
|
53
|
+
"""TODO: remove this function as it seems unused.
|
|
54
|
+
We call tool_executor.get_tool_by_name() and then tool.invoke() directly instead of this invoke function
|
|
55
|
+
"""
|
|
50
56
|
tool = self.get_tool_by_name(tool_name)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if tool
|
|
54
|
-
else StructuredToolResult(
|
|
57
|
+
if not tool:
|
|
58
|
+
return StructuredToolResult(
|
|
55
59
|
status=StructuredToolResultStatus.ERROR,
|
|
56
60
|
error=f"Could not find tool named {tool_name}",
|
|
57
61
|
)
|
|
58
|
-
|
|
62
|
+
|
|
63
|
+
return tool.invoke(params, context)
|
|
59
64
|
|
|
60
65
|
def get_tool_by_name(self, name: str) -> Optional[Tool]:
|
|
61
66
|
if name in self.tools_by_name:
|
holmes/core/toolset_manager.py
CHANGED
|
@@ -275,7 +275,11 @@ class ToolsetManager:
|
|
|
275
275
|
toolset.path = cached_status.get("path", None)
|
|
276
276
|
# check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
|
|
277
277
|
# not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
|
|
278
|
-
if toolset.enabled and
|
|
278
|
+
if toolset.enabled and (
|
|
279
|
+
toolset.status == ToolsetStatusEnum.ENABLED
|
|
280
|
+
or toolset.type == ToolsetType.MCP
|
|
281
|
+
):
|
|
282
|
+
# MCP servers need to reload their tools even if previously failed, so rerun prerequisites
|
|
279
283
|
enabled_toolsets_from_cache.append(toolset)
|
|
280
284
|
self.check_toolset_prerequisites(enabled_toolsets_from_cache)
|
|
281
285
|
|
|
@@ -464,12 +468,12 @@ class ToolsetManager:
|
|
|
464
468
|
|
|
465
469
|
logger = logging.getLogger(__name__)
|
|
466
470
|
|
|
467
|
-
logger.
|
|
471
|
+
logger.debug(
|
|
468
472
|
f"Starting fast_model injection. global_fast_model={self.global_fast_model}"
|
|
469
473
|
)
|
|
470
474
|
|
|
471
475
|
if not self.global_fast_model:
|
|
472
|
-
logger.
|
|
476
|
+
logger.debug("No global_fast_model configured, skipping injection")
|
|
473
477
|
return
|
|
474
478
|
|
|
475
479
|
injected_count = 0
|