holmesgpt 0.14.1a0__py3-none-any.whl → 0.14.3a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +5 -2
- holmes/common/env_vars.py +8 -2
- holmes/config.py +4 -7
- holmes/core/conversations.py +12 -2
- holmes/core/feedback.py +191 -0
- holmes/core/llm.py +52 -10
- holmes/core/models.py +101 -1
- holmes/core/supabase_dal.py +23 -9
- holmes/core/tool_calling_llm.py +206 -16
- holmes/core/tools.py +20 -7
- holmes/core/tools_utils/token_counting.py +13 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
- holmes/core/tools_utils/tool_executor.py +11 -6
- holmes/core/toolset_manager.py +7 -3
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/interactive.py +146 -14
- holmes/plugins/prompts/_fetch_logs.jinja2 +13 -1
- holmes/plugins/runbooks/__init__.py +6 -1
- holmes/plugins/toolsets/__init__.py +11 -4
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +333 -199
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +181 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +80 -22
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +5 -8
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +7 -12
- holmes/plugins/toolsets/git.py +14 -12
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
- holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +2 -1
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +21 -39
- holmes/plugins/toolsets/internet/internet.py +2 -3
- holmes/plugins/toolsets/internet/notion.py +2 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
- holmes/plugins/toolsets/kafka.py +7 -18
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +808 -419
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +27 -11
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
- holmes/plugins/toolsets/robusta/robusta.py +4 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
- holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
- holmes/utils/sentry_helper.py +1 -1
- holmes/utils/stream.py +22 -7
- holmes/version.py +34 -14
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/METADATA +7 -9
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/RECORD +71 -65
- holmes/core/tools_utils/data_types.py +0 -81
- holmes/plugins/toolsets/newrelic.py +0 -231
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py
CHANGED
holmes/clients/robusta_client.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import List, Optional, Dict, Any
|
|
3
3
|
import requests # type: ignore
|
|
4
4
|
from functools import cache
|
|
5
|
-
from pydantic import BaseModel, ConfigDict
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
6
|
from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
|
|
7
7
|
|
|
8
8
|
HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
|
|
@@ -17,6 +17,9 @@ class HolmesInfo(BaseModel):
|
|
|
17
17
|
class RobustaModelsResponse(BaseModel):
|
|
18
18
|
model_config = ConfigDict(extra="ignore")
|
|
19
19
|
models: List[str]
|
|
20
|
+
models_args: Dict[str, Any] = Field(
|
|
21
|
+
default_factory=dict, alias="models_holmes_args"
|
|
22
|
+
)
|
|
20
23
|
default_model: Optional[str] = None
|
|
21
24
|
|
|
22
25
|
|
holmes/common/env_vars.py
CHANGED
|
@@ -73,11 +73,17 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
|
|
|
73
73
|
# For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
|
|
74
74
|
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
|
|
75
75
|
|
|
76
|
-
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS",
|
|
76
|
+
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
|
|
77
77
|
|
|
78
78
|
# Limit each tool response to N% of the total context window.
|
|
79
79
|
# Number between 0 and 100
|
|
80
80
|
# Setting to either 0 or any number above 100 disables the logic that limits tool response size
|
|
81
81
|
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
|
|
82
|
-
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT",
|
|
82
|
+
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
|
|
83
83
|
)
|
|
84
|
+
|
|
85
|
+
MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
|
|
86
|
+
os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
|
holmes/config.py
CHANGED
|
@@ -9,11 +9,8 @@ import sentry_sdk
|
|
|
9
9
|
import yaml # type: ignore
|
|
10
10
|
from pydantic import BaseModel, ConfigDict, FilePath, PrivateAttr, SecretStr
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
from holmes.common.env_vars import ROBUSTA_CONFIG_PATH
|
|
13
13
|
from holmes.core.llm import DefaultLLM, LLMModelRegistry
|
|
14
|
-
from holmes.common.env_vars import (
|
|
15
|
-
ROBUSTA_CONFIG_PATH,
|
|
16
|
-
)
|
|
17
14
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
18
15
|
from holmes.core.toolset_manager import ToolsetManager
|
|
19
16
|
from holmes.plugins.runbooks import (
|
|
@@ -33,8 +30,8 @@ if TYPE_CHECKING:
|
|
|
33
30
|
from holmes.plugins.sources.pagerduty import PagerDutySource
|
|
34
31
|
from holmes.plugins.sources.prometheus.plugin import AlertManagerSource
|
|
35
32
|
|
|
36
|
-
from holmes.core.supabase_dal import SupabaseDal
|
|
37
33
|
from holmes.core.config import config_path_dir
|
|
34
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
38
35
|
from holmes.utils.definitions import RobustaConfig
|
|
39
36
|
from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
|
|
40
37
|
|
|
@@ -129,9 +126,9 @@ class Config(RobustaBaseConfig):
|
|
|
129
126
|
return self._llm_model_registry
|
|
130
127
|
|
|
131
128
|
def log_useful_info(self):
|
|
132
|
-
if self.llm_model_registry
|
|
129
|
+
if self.llm_model_registry.models:
|
|
133
130
|
logging.info(
|
|
134
|
-
f"
|
|
131
|
+
f"Loaded models: {list(self.llm_model_registry.models.keys())}"
|
|
135
132
|
)
|
|
136
133
|
else:
|
|
137
134
|
logging.warning("No llm models were loaded")
|
holmes/core/conversations.py
CHANGED
|
@@ -262,7 +262,10 @@ def build_issue_chat_messages(
|
|
|
262
262
|
|
|
263
263
|
|
|
264
264
|
def add_or_update_system_prompt(
|
|
265
|
-
conversation_history: List[Dict[str, str]],
|
|
265
|
+
conversation_history: List[Dict[str, str]],
|
|
266
|
+
ai: ToolCallingLLM,
|
|
267
|
+
config: Config,
|
|
268
|
+
additional_system_prompt: Optional[str] = None,
|
|
266
269
|
):
|
|
267
270
|
"""Either add the system prompt or replace an existing system prompt.
|
|
268
271
|
As a 'defensive' measure, this code will only replace an existing system prompt if it is the
|
|
@@ -278,6 +281,9 @@ def add_or_update_system_prompt(
|
|
|
278
281
|
|
|
279
282
|
system_prompt = load_and_render_prompt(template_path, context)
|
|
280
283
|
|
|
284
|
+
if additional_system_prompt:
|
|
285
|
+
system_prompt = system_prompt + "\n" + additional_system_prompt
|
|
286
|
+
|
|
281
287
|
if not conversation_history or len(conversation_history) == 0:
|
|
282
288
|
conversation_history.append({"role": "system", "content": system_prompt})
|
|
283
289
|
elif conversation_history[0]["role"] == "system":
|
|
@@ -303,6 +309,7 @@ def build_chat_messages(
|
|
|
303
309
|
ai: ToolCallingLLM,
|
|
304
310
|
config: Config,
|
|
305
311
|
global_instructions: Optional[Instructions] = None,
|
|
312
|
+
additional_system_prompt: Optional[str] = None,
|
|
306
313
|
) -> List[dict]:
|
|
307
314
|
"""
|
|
308
315
|
This function generates a list of messages for general chat conversation and ensures that the message sequence adheres to the model's context window limitations
|
|
@@ -358,7 +365,10 @@ def build_chat_messages(
|
|
|
358
365
|
conversation_history = conversation_history.copy()
|
|
359
366
|
|
|
360
367
|
conversation_history = add_or_update_system_prompt(
|
|
361
|
-
conversation_history=conversation_history,
|
|
368
|
+
conversation_history=conversation_history,
|
|
369
|
+
ai=ai,
|
|
370
|
+
config=config,
|
|
371
|
+
additional_system_prompt=additional_system_prompt,
|
|
362
372
|
)
|
|
363
373
|
|
|
364
374
|
ask = add_global_instructions_to_user_prompt(ask, global_instructions)
|
holmes/core/feedback.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Callable, Optional
|
|
4
|
+
|
|
5
|
+
from .llm import LLM
|
|
6
|
+
|
|
7
|
+
DEFAULT_PRIVACY_NOTICE_BANNER = "Your feedback will be used to improve Holmesgpt's performance. Please avoid sharing sensitive personal information. By continuing, you consent to this data usage."
|
|
8
|
+
PRIVACY_NOTICE_BANNER = os.environ.get(
|
|
9
|
+
"PRIVACY_NOTICE_BANNER", DEFAULT_PRIVACY_NOTICE_BANNER
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FeedbackInfoBase(ABC):
|
|
14
|
+
"""Abstract base class for all feedback-related classes that must implement to_dict()."""
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def to_dict(self) -> dict:
|
|
18
|
+
"""Convert to dictionary representation. Must be implemented by all subclasses."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FeedbackLLM(FeedbackInfoBase):
|
|
23
|
+
"""Class to represent a LLM in the feedback."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, model: str, max_context_size: int):
|
|
26
|
+
self.model = model
|
|
27
|
+
self.max_context_size = max_context_size
|
|
28
|
+
|
|
29
|
+
def update_from_llm(self, llm: LLM):
|
|
30
|
+
self.model = llm.model
|
|
31
|
+
self.max_context_size = llm.get_context_window_size()
|
|
32
|
+
|
|
33
|
+
def to_dict(self) -> dict:
|
|
34
|
+
"""Convert to dictionary representation."""
|
|
35
|
+
return self.__dict__
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# TODO: extend the FeedbackLLMResponse to include each tool call results details used for evaluate the overall response.
|
|
39
|
+
# Currenlty tool call details in plan:
|
|
40
|
+
# - toolcall parameter and success/failure, toolcall truncation size
|
|
41
|
+
# - Holmes plan (todo list)
|
|
42
|
+
# - Holmes intermediate output
|
|
43
|
+
class FeedbackLLMResponse(FeedbackInfoBase):
|
|
44
|
+
"""Class to represent a LLM response in the feedback"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, user_ask: str, response: str):
|
|
47
|
+
self.user_ask = user_ask
|
|
48
|
+
self.response = response
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> dict:
|
|
51
|
+
"""Convert to dictionary representation."""
|
|
52
|
+
return self.__dict__
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class FeedbackMetadata(FeedbackInfoBase):
|
|
56
|
+
"""Class to store feedback metadata."""
|
|
57
|
+
|
|
58
|
+
def __init__(self):
|
|
59
|
+
# In iteration mode, there can be multiple ask and response pairs.
|
|
60
|
+
self.llm_responses = []
|
|
61
|
+
self.llm = FeedbackLLM("", 0)
|
|
62
|
+
|
|
63
|
+
def add_llm_response(self, user_ask: str, response: str) -> None:
|
|
64
|
+
"""Add a LLM response to the metadata."""
|
|
65
|
+
llm_response = FeedbackLLMResponse(user_ask, response)
|
|
66
|
+
self.llm_responses.append(llm_response)
|
|
67
|
+
|
|
68
|
+
def update_llm(self, llm: LLM) -> None:
|
|
69
|
+
"""Update the LLM information in the metadata."""
|
|
70
|
+
self.llm.update_from_llm(llm)
|
|
71
|
+
|
|
72
|
+
def to_dict(self) -> dict:
|
|
73
|
+
"""Convert to dictionary representation."""
|
|
74
|
+
return {
|
|
75
|
+
"llm_responses": [resp.to_dict() for resp in self.llm_responses],
|
|
76
|
+
"llm": self.llm.to_dict(),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class UserFeedback(FeedbackInfoBase):
|
|
81
|
+
"""Class to store user rate and comment to the AI response."""
|
|
82
|
+
|
|
83
|
+
def __init__(self, is_positive: bool, comment: Optional[str]):
|
|
84
|
+
self.is_positive = is_positive
|
|
85
|
+
self.comment = comment
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def rating_text(self) -> str:
|
|
89
|
+
"""Return human-readable rating text."""
|
|
90
|
+
return "useful" if self.is_positive else "not useful"
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def rating_emoji(self) -> str:
|
|
94
|
+
"""Return emoji representation of the rating."""
|
|
95
|
+
return "👍" if self.is_positive else "👎"
|
|
96
|
+
|
|
97
|
+
def __str__(self) -> str:
|
|
98
|
+
"""Return string representation of the feedback."""
|
|
99
|
+
if self.comment:
|
|
100
|
+
return f"Rating: {self.rating_text}. Comment: {self.comment}"
|
|
101
|
+
else:
|
|
102
|
+
return f"Rating: {self.rating_text}. No additional comment."
|
|
103
|
+
|
|
104
|
+
def to_dict(self) -> dict:
|
|
105
|
+
"""Convert to dictionary representation."""
|
|
106
|
+
return {
|
|
107
|
+
"is_positive": self.is_positive,
|
|
108
|
+
"comment": self.comment,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class Feedback(FeedbackInfoBase):
|
|
113
|
+
"""Class to store overall feedback data used to evaluate the AI response."""
|
|
114
|
+
|
|
115
|
+
def __init__(self):
|
|
116
|
+
self.metadata = FeedbackMetadata()
|
|
117
|
+
self.user_feedback: Optional[UserFeedback] = None
|
|
118
|
+
|
|
119
|
+
def set_user_feedback(self, user_feedback: UserFeedback) -> None:
|
|
120
|
+
"""Set the user feedback."""
|
|
121
|
+
self.user_feedback = user_feedback
|
|
122
|
+
|
|
123
|
+
def to_dict(self) -> dict:
|
|
124
|
+
"""Convert to dictionary representation."""
|
|
125
|
+
return {
|
|
126
|
+
"metadata": self.metadata.to_dict(),
|
|
127
|
+
"user_feedback": self.user_feedback.to_dict()
|
|
128
|
+
if self.user_feedback
|
|
129
|
+
else None,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
FeedbackCallback = Callable[[Feedback], None]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def feedback_callback_example(feedback: Feedback) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Example implementation of a feedback callback function.
|
|
139
|
+
|
|
140
|
+
This function demonstrates how to process feedback data using to_dict() methods
|
|
141
|
+
and could be used for:
|
|
142
|
+
- Logging feedback to files or databases
|
|
143
|
+
- Sending feedback to analytics services
|
|
144
|
+
- Training data collection
|
|
145
|
+
- User satisfaction monitoring
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
feedback: Feedback object containing user feedback and metadata
|
|
149
|
+
"""
|
|
150
|
+
print("\n=== Feedback Received ===")
|
|
151
|
+
|
|
152
|
+
# Convert entire feedback to dict first - this is the main data structure
|
|
153
|
+
feedback_dict = feedback.to_dict()
|
|
154
|
+
print(f"Complete feedback dictionary keys: {list(feedback_dict.keys())}")
|
|
155
|
+
|
|
156
|
+
# How to check user feedback using to_dict()
|
|
157
|
+
print("\n1. Checking User Feedback:")
|
|
158
|
+
user_feedback_dict = (
|
|
159
|
+
feedback.user_feedback.to_dict() if feedback.user_feedback else None
|
|
160
|
+
)
|
|
161
|
+
if user_feedback_dict:
|
|
162
|
+
print(f" User feedback dict: {user_feedback_dict}")
|
|
163
|
+
print(f" Is positive: {user_feedback_dict['is_positive']}")
|
|
164
|
+
print(f" Comment: {user_feedback_dict['comment'] or 'None'}")
|
|
165
|
+
# You can also access properties through the object:
|
|
166
|
+
print(f" Rating emoji: {feedback.user_feedback.rating_emoji}") # type: ignore
|
|
167
|
+
print(f" Rating text: {feedback.user_feedback.rating_text}") # type: ignore
|
|
168
|
+
else:
|
|
169
|
+
print(" No user feedback provided (user_feedback is None)")
|
|
170
|
+
|
|
171
|
+
# How to check LLM information using to_dict()
|
|
172
|
+
print("\n2. Checking LLM Information:")
|
|
173
|
+
metadata_dict = feedback.metadata.to_dict()
|
|
174
|
+
llm_dict = metadata_dict["llm"]
|
|
175
|
+
print(f" LLM dict: {llm_dict}")
|
|
176
|
+
print(f" Model: {llm_dict['model']}")
|
|
177
|
+
print(f" Max context size: {llm_dict['max_context_size']}")
|
|
178
|
+
|
|
179
|
+
# How to check ask and response pairs using to_dict()
|
|
180
|
+
print("\n3. Checking Ask and Response History:")
|
|
181
|
+
llm_responses_dict = metadata_dict["llm_responses"]
|
|
182
|
+
print(f" Number of exchanges: {len(llm_responses_dict)}")
|
|
183
|
+
|
|
184
|
+
for i, response_dict in enumerate(llm_responses_dict, 1):
|
|
185
|
+
print(f" Exchange {i} dict: {list(response_dict.keys())}")
|
|
186
|
+
user_ask = response_dict["user_ask"]
|
|
187
|
+
ai_response = response_dict["response"]
|
|
188
|
+
print(f" User ask: {user_ask}")
|
|
189
|
+
print(f" AI response: {ai_response}")
|
|
190
|
+
|
|
191
|
+
print("=== End Feedback ===\n")
|
holmes/core/llm.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
+
from math import floor
|
|
4
5
|
from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING
|
|
5
6
|
|
|
6
|
-
from litellm.types.utils import ModelResponse
|
|
7
|
+
from litellm.types.utils import ModelResponse, TextCompletionResponse
|
|
7
8
|
import sentry_sdk
|
|
8
9
|
|
|
9
10
|
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
|
@@ -90,9 +91,13 @@ class DefaultLLM(LLM):
|
|
|
90
91
|
self.args = args or {}
|
|
91
92
|
self.tracer = tracer
|
|
92
93
|
self.name = name
|
|
93
|
-
|
|
94
|
+
self.update_custom_args()
|
|
94
95
|
self.check_llm(self.model, self.api_key, self.api_base, self.api_version)
|
|
95
96
|
|
|
97
|
+
def update_custom_args(self):
|
|
98
|
+
self.max_context_size = self.args.get("custom_args", {}).get("max_context_size")
|
|
99
|
+
self.args.pop("custom_args", None)
|
|
100
|
+
|
|
96
101
|
def check_llm(
|
|
97
102
|
self,
|
|
98
103
|
model: str,
|
|
@@ -178,6 +183,9 @@ class DefaultLLM(LLM):
|
|
|
178
183
|
return list(dict.fromkeys(names_to_try))
|
|
179
184
|
|
|
180
185
|
def get_context_window_size(self) -> int:
|
|
186
|
+
if self.max_context_size:
|
|
187
|
+
return self.max_context_size
|
|
188
|
+
|
|
181
189
|
if OVERRIDE_MAX_CONTENT_SIZE:
|
|
182
190
|
logging.debug(
|
|
183
191
|
f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}"
|
|
@@ -285,6 +293,8 @@ class DefaultLLM(LLM):
|
|
|
285
293
|
raise Exception(f"Unexpected type returned by the LLM {type(result)}")
|
|
286
294
|
|
|
287
295
|
def get_maximum_output_token(self) -> int:
|
|
296
|
+
max_output_tokens = floor(min(64000, self.get_context_window_size() / 5))
|
|
297
|
+
|
|
288
298
|
if OVERRIDE_MAX_OUTPUT_TOKEN:
|
|
289
299
|
logging.debug(
|
|
290
300
|
f"Using OVERRIDE_MAX_OUTPUT_TOKEN {OVERRIDE_MAX_OUTPUT_TOKEN}"
|
|
@@ -294,17 +304,22 @@ class DefaultLLM(LLM):
|
|
|
294
304
|
# Try each name variant
|
|
295
305
|
for name in self._get_model_name_variants_for_lookup():
|
|
296
306
|
try:
|
|
297
|
-
|
|
307
|
+
litellm_max_output_tokens = litellm.model_cost[name][
|
|
308
|
+
"max_output_tokens"
|
|
309
|
+
]
|
|
310
|
+
if litellm_max_output_tokens < max_output_tokens:
|
|
311
|
+
max_output_tokens = litellm_max_output_tokens
|
|
312
|
+
return max_output_tokens
|
|
298
313
|
except Exception:
|
|
299
314
|
continue
|
|
300
315
|
|
|
301
316
|
# Log which lookups we tried
|
|
302
317
|
logging.warning(
|
|
303
318
|
f"Couldn't find model {self.model} in litellm's model list (tried: {', '.join(self._get_model_name_variants_for_lookup())}), "
|
|
304
|
-
f"using
|
|
319
|
+
f"using {max_output_tokens} tokens for max_output_tokens. "
|
|
305
320
|
f"To override, set OVERRIDE_MAX_OUTPUT_TOKEN environment variable to the correct value for your model."
|
|
306
321
|
)
|
|
307
|
-
return
|
|
322
|
+
return max_output_tokens
|
|
308
323
|
|
|
309
324
|
def _add_cache_control_to_last_message(
|
|
310
325
|
self, messages: List[Dict[str, Any]]
|
|
@@ -342,7 +357,7 @@ class DefaultLLM(LLM):
|
|
|
342
357
|
if content is None:
|
|
343
358
|
return
|
|
344
359
|
|
|
345
|
-
if isinstance(content, str):
|
|
360
|
+
if isinstance(content, str) and content:
|
|
346
361
|
# Convert string to structured format with cache_control
|
|
347
362
|
target_msg["content"] = [
|
|
348
363
|
{
|
|
@@ -424,7 +439,8 @@ class LLMModelRegistry:
|
|
|
424
439
|
|
|
425
440
|
for model in robusta_models.models:
|
|
426
441
|
logging.info(f"Loading Robusta AI model: {model}")
|
|
427
|
-
|
|
442
|
+
args = robusta_models.models_args.get(model)
|
|
443
|
+
self._llms[model] = self._create_robusta_model_entry(model, args)
|
|
428
444
|
|
|
429
445
|
if robusta_models.default_model:
|
|
430
446
|
logging.info(
|
|
@@ -492,7 +508,7 @@ class LLMModelRegistry:
|
|
|
492
508
|
)
|
|
493
509
|
|
|
494
510
|
model_key, first_model_params = next(iter(self._llms.items()))
|
|
495
|
-
logging.
|
|
511
|
+
logging.debug(f"Using first available model: {model_key}")
|
|
496
512
|
return first_model_params.copy()
|
|
497
513
|
|
|
498
514
|
def get_llm(self, name: str) -> LLM: # TODO: fix logic
|
|
@@ -509,13 +525,17 @@ class LLMModelRegistry:
|
|
|
509
525
|
|
|
510
526
|
return models
|
|
511
527
|
|
|
512
|
-
def _create_robusta_model_entry(
|
|
513
|
-
|
|
528
|
+
def _create_robusta_model_entry(
|
|
529
|
+
self, model_name: str, args: Optional[dict[str, Any]] = None
|
|
530
|
+
) -> dict[str, Any]:
|
|
531
|
+
entry = self._create_model_entry(
|
|
514
532
|
model="gpt-4o", # Robusta AI model is using openai like API.
|
|
515
533
|
model_name=model_name,
|
|
516
534
|
base_url=f"{ROBUSTA_API_ENDPOINT}/llm/{model_name}",
|
|
517
535
|
is_robusta_model=True,
|
|
518
536
|
)
|
|
537
|
+
entry["custom_args"] = args or {} # type: ignore[assignment]
|
|
538
|
+
return entry
|
|
519
539
|
|
|
520
540
|
def _create_model_entry(
|
|
521
541
|
self,
|
|
@@ -530,3 +550,25 @@ class LLMModelRegistry:
|
|
|
530
550
|
"is_robusta_model": is_robusta_model,
|
|
531
551
|
"model": model,
|
|
532
552
|
}
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def get_llm_usage(
|
|
556
|
+
llm_response: Union[ModelResponse, CustomStreamWrapper, TextCompletionResponse],
|
|
557
|
+
) -> dict:
|
|
558
|
+
usage: dict = {}
|
|
559
|
+
if (
|
|
560
|
+
(
|
|
561
|
+
isinstance(llm_response, ModelResponse)
|
|
562
|
+
or isinstance(llm_response, TextCompletionResponse)
|
|
563
|
+
)
|
|
564
|
+
and hasattr(llm_response, "usage")
|
|
565
|
+
and llm_response.usage
|
|
566
|
+
): # type: ignore
|
|
567
|
+
usage["prompt_tokens"] = llm_response.usage.prompt_tokens # type: ignore
|
|
568
|
+
usage["completion_tokens"] = llm_response.usage.completion_tokens # type: ignore
|
|
569
|
+
usage["total_tokens"] = llm_response.usage.total_tokens # type: ignore
|
|
570
|
+
elif isinstance(llm_response, CustomStreamWrapper):
|
|
571
|
+
complete_response = litellm.stream_chunk_builder(chunks=llm_response) # type: ignore
|
|
572
|
+
if complete_response:
|
|
573
|
+
return get_llm_usage(complete_response)
|
|
574
|
+
return usage
|
holmes/core/models.py
CHANGED
|
@@ -1,9 +1,87 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from holmes.core.investigation_structured_output import InputSectionsDataType
|
|
2
|
-
from holmes.core.tool_calling_llm import ToolCallResult
|
|
3
3
|
from typing import Optional, List, Dict, Any, Union
|
|
4
4
|
from pydantic import BaseModel, model_validator, Field
|
|
5
5
|
from enum import Enum
|
|
6
6
|
|
|
7
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TruncationMetadata(BaseModel):
|
|
11
|
+
tool_call_id: str
|
|
12
|
+
start_index: int
|
|
13
|
+
end_index: int
|
|
14
|
+
tool_name: str
|
|
15
|
+
original_token_count: int
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TruncationResult(BaseModel):
|
|
19
|
+
truncated_messages: list[dict]
|
|
20
|
+
truncations: list[TruncationMetadata]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ToolCallResult(BaseModel):
|
|
24
|
+
tool_call_id: str
|
|
25
|
+
tool_name: str
|
|
26
|
+
description: str
|
|
27
|
+
result: StructuredToolResult
|
|
28
|
+
size: Optional[int] = None
|
|
29
|
+
|
|
30
|
+
def as_tool_call_message(self):
|
|
31
|
+
return {
|
|
32
|
+
"tool_call_id": self.tool_call_id,
|
|
33
|
+
"role": "tool",
|
|
34
|
+
"name": self.tool_name,
|
|
35
|
+
"content": format_tool_result_data(self.result),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
def as_tool_result_response(self):
|
|
39
|
+
result_dump = self.result.model_dump()
|
|
40
|
+
result_dump["data"] = self.result.get_stringified_data()
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
"tool_call_id": self.tool_call_id,
|
|
44
|
+
"tool_name": self.tool_name,
|
|
45
|
+
"description": self.description,
|
|
46
|
+
"role": "tool",
|
|
47
|
+
"result": result_dump,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def as_streaming_tool_result_response(self):
|
|
51
|
+
result_dump = self.result.model_dump()
|
|
52
|
+
result_dump["data"] = self.result.get_stringified_data()
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
"tool_call_id": self.tool_call_id,
|
|
56
|
+
"role": "tool",
|
|
57
|
+
"description": self.description,
|
|
58
|
+
"name": self.tool_name,
|
|
59
|
+
"result": result_dump,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
64
|
+
tool_response = tool_result.data
|
|
65
|
+
if isinstance(tool_result.data, str):
|
|
66
|
+
tool_response = tool_result.data
|
|
67
|
+
else:
|
|
68
|
+
try:
|
|
69
|
+
if isinstance(tool_result.data, BaseModel):
|
|
70
|
+
tool_response = tool_result.data.model_dump_json(indent=2)
|
|
71
|
+
else:
|
|
72
|
+
tool_response = json.dumps(tool_result.data, indent=2)
|
|
73
|
+
except Exception:
|
|
74
|
+
tool_response = str(tool_result.data)
|
|
75
|
+
if tool_result.status == StructuredToolResultStatus.ERROR:
|
|
76
|
+
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
|
|
77
|
+
|
|
78
|
+
if tool_result.params:
|
|
79
|
+
tool_response = (
|
|
80
|
+
f"Params used for the tool call: {json.dumps(tool_result.params)}. The tool call output follows on the next line.\n"
|
|
81
|
+
+ tool_response
|
|
82
|
+
)
|
|
83
|
+
return tool_response
|
|
84
|
+
|
|
7
85
|
|
|
8
86
|
class InvestigationResult(BaseModel):
|
|
9
87
|
analysis: Optional[str] = None
|
|
@@ -87,10 +165,31 @@ class ConversationRequest(BaseModel):
|
|
|
87
165
|
include_tool_call_results: bool = False
|
|
88
166
|
|
|
89
167
|
|
|
168
|
+
class PendingToolApproval(BaseModel):
|
|
169
|
+
"""Represents a tool call that requires user approval."""
|
|
170
|
+
|
|
171
|
+
tool_call_id: str
|
|
172
|
+
tool_name: str
|
|
173
|
+
description: str
|
|
174
|
+
params: Dict[str, Any]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class ToolApprovalDecision(BaseModel):
|
|
178
|
+
"""Represents a user's decision on a tool approval."""
|
|
179
|
+
|
|
180
|
+
tool_call_id: str
|
|
181
|
+
approved: bool
|
|
182
|
+
|
|
183
|
+
|
|
90
184
|
class ChatRequestBaseModel(BaseModel):
|
|
91
185
|
conversation_history: Optional[list[dict]] = None
|
|
92
186
|
model: Optional[str] = None
|
|
93
187
|
stream: bool = Field(default=False)
|
|
188
|
+
enable_tool_approval: Optional[bool] = (
|
|
189
|
+
False # Optional boolean for backwards compatibility
|
|
190
|
+
)
|
|
191
|
+
tool_decisions: Optional[List[ToolApprovalDecision]] = None
|
|
192
|
+
additional_system_prompt: Optional[str] = None
|
|
94
193
|
|
|
95
194
|
# In our setup with litellm, the first message in conversation_history
|
|
96
195
|
# should follow the structure [{"role": "system", "content": ...}],
|
|
@@ -146,6 +245,7 @@ class ChatResponse(BaseModel):
|
|
|
146
245
|
conversation_history: list[dict]
|
|
147
246
|
tool_calls: Optional[List[ToolCallResult]] = []
|
|
148
247
|
follow_up_actions: Optional[List[FollowUpAction]] = []
|
|
248
|
+
pending_approvals: Optional[List[PendingToolApproval]] = None
|
|
149
249
|
metadata: Optional[Dict[Any, Any]] = None
|
|
150
250
|
|
|
151
251
|
|