holmesgpt 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +8 -0
- holmes/core/llm.py +28 -0
- holmes/core/supabase_dal.py +33 -42
- holmes/core/tool_calling_llm.py +92 -223
- holmes/core/tools_utils/tool_context_window_limiter.py +32 -39
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/toolsets/investigator/core_investigation.py +20 -11
- holmes/plugins/toolsets/robusta/robusta.py +35 -8
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
- holmes/utils/stream.py +1 -0
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +4 -2
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +18 -16
- holmes/core/performance_timing.py +0 -72
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py
CHANGED
holmes/common/env_vars.py
CHANGED
|
@@ -100,4 +100,12 @@ MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
|
|
|
100
100
|
os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
|
|
101
101
|
)
|
|
102
102
|
|
|
103
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
|
|
104
|
+
"ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
|
|
105
|
+
)
|
|
106
|
+
|
|
103
107
|
DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
|
|
108
|
+
|
|
109
|
+
RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
|
|
110
|
+
"RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
|
|
111
|
+
)
|
holmes/core/llm.py
CHANGED
|
@@ -26,6 +26,8 @@ from holmes.common.env_vars import (
|
|
|
26
26
|
ROBUSTA_API_ENDPOINT,
|
|
27
27
|
THINKING,
|
|
28
28
|
EXTRA_HEADERS,
|
|
29
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
|
|
30
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
|
|
29
31
|
)
|
|
30
32
|
from holmes.core.supabase_dal import SupabaseDal
|
|
31
33
|
from holmes.utils.env import environ_get_safe_int, replace_env_vars_values
|
|
@@ -41,6 +43,13 @@ MODEL_LIST_FILE_LOCATION = os.environ.get(
|
|
|
41
43
|
|
|
42
44
|
OVERRIDE_MAX_OUTPUT_TOKEN = environ_get_safe_int("OVERRIDE_MAX_OUTPUT_TOKEN")
|
|
43
45
|
OVERRIDE_MAX_CONTENT_SIZE = environ_get_safe_int("OVERRIDE_MAX_CONTENT_SIZE")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_context_window_compaction_threshold_pct() -> int:
|
|
49
|
+
"""Get the compaction threshold percentage at runtime to support test overrides."""
|
|
50
|
+
return environ_get_safe_int("CONTEXT_WINDOW_COMPACTION_THRESHOLD_PCT", default="95")
|
|
51
|
+
|
|
52
|
+
|
|
44
53
|
ROBUSTA_AI_MODEL_NAME = "Robusta"
|
|
45
54
|
|
|
46
55
|
|
|
@@ -50,6 +59,7 @@ class TokenCountMetadata(BaseModel):
|
|
|
50
59
|
system_tokens: int
|
|
51
60
|
user_tokens: int
|
|
52
61
|
tools_to_call_tokens: int
|
|
62
|
+
assistant_tokens: int
|
|
53
63
|
other_tokens: int
|
|
54
64
|
|
|
55
65
|
|
|
@@ -90,6 +100,19 @@ class LLM:
|
|
|
90
100
|
def get_maximum_output_token(self) -> int:
|
|
91
101
|
pass
|
|
92
102
|
|
|
103
|
+
def get_max_token_count_for_single_tool(self) -> int:
|
|
104
|
+
if (
|
|
105
|
+
0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
|
|
106
|
+
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
|
|
107
|
+
):
|
|
108
|
+
context_window_size = self.get_context_window_size()
|
|
109
|
+
calculated_max_tokens = int(
|
|
110
|
+
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
|
|
111
|
+
)
|
|
112
|
+
return min(calculated_max_tokens, TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS)
|
|
113
|
+
else:
|
|
114
|
+
return TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS
|
|
115
|
+
|
|
93
116
|
@abstractmethod
|
|
94
117
|
def count_tokens(
|
|
95
118
|
self, messages: list[dict], tools: Optional[list[dict[str, Any]]] = None
|
|
@@ -279,6 +302,7 @@ class DefaultLLM(LLM):
|
|
|
279
302
|
total_tokens = 0
|
|
280
303
|
tools_tokens = 0
|
|
281
304
|
system_tokens = 0
|
|
305
|
+
assistant_tokens = 0
|
|
282
306
|
user_tokens = 0
|
|
283
307
|
other_tokens = 0
|
|
284
308
|
tools_to_call_tokens = 0
|
|
@@ -296,6 +320,8 @@ class DefaultLLM(LLM):
|
|
|
296
320
|
user_tokens += token_count
|
|
297
321
|
elif role == "tool":
|
|
298
322
|
tools_tokens += token_count
|
|
323
|
+
elif role == "assistant":
|
|
324
|
+
assistant_tokens += token_count
|
|
299
325
|
else:
|
|
300
326
|
# although this should not be needed,
|
|
301
327
|
# it is defensive code so that all tokens are accounted for
|
|
@@ -320,6 +346,7 @@ class DefaultLLM(LLM):
|
|
|
320
346
|
tools_tokens=tools_tokens,
|
|
321
347
|
tools_to_call_tokens=tools_to_call_tokens,
|
|
322
348
|
other_tokens=other_tokens,
|
|
349
|
+
assistant_tokens=assistant_tokens,
|
|
323
350
|
)
|
|
324
351
|
|
|
325
352
|
def get_litellm_corrected_name_for_robusta_ai(self) -> str:
|
|
@@ -544,6 +571,7 @@ class LLMModelRegistry:
|
|
|
544
571
|
return
|
|
545
572
|
|
|
546
573
|
account_id, token = self.dal.get_ai_credentials()
|
|
574
|
+
|
|
547
575
|
robusta_models: RobustaModelsResponse | None = fetch_robusta_models(
|
|
548
576
|
account_id, token
|
|
549
577
|
)
|
holmes/core/supabase_dal.py
CHANGED
|
@@ -237,70 +237,61 @@ class SupabaseDal:
|
|
|
237
237
|
logging.exception("Supabase error while retrieving efficiency data")
|
|
238
238
|
return None
|
|
239
239
|
|
|
240
|
-
def
|
|
241
|
-
self,
|
|
240
|
+
def get_configuration_changes_metadata(
|
|
241
|
+
self,
|
|
242
|
+
start_datetime: str,
|
|
243
|
+
end_datetime: str,
|
|
244
|
+
limit: int = 100,
|
|
245
|
+
workload: Optional[str] = None,
|
|
246
|
+
ns: Optional[str] = None,
|
|
242
247
|
) -> Optional[List[Dict]]:
|
|
243
248
|
if not self.enabled:
|
|
244
249
|
return []
|
|
245
250
|
|
|
246
251
|
try:
|
|
247
|
-
|
|
252
|
+
query = (
|
|
248
253
|
self.client.table(ISSUES_TABLE)
|
|
249
|
-
.select(
|
|
254
|
+
.select(
|
|
255
|
+
"id",
|
|
256
|
+
"title",
|
|
257
|
+
"subject_name",
|
|
258
|
+
"subject_namespace",
|
|
259
|
+
"subject_type",
|
|
260
|
+
"description",
|
|
261
|
+
"starts_at",
|
|
262
|
+
"ends_at",
|
|
263
|
+
)
|
|
250
264
|
.eq("account_id", self.account_id)
|
|
251
265
|
.eq("cluster", self.cluster)
|
|
252
266
|
.eq("finding_type", "configuration_change")
|
|
253
267
|
.gte("creation_date", start_datetime)
|
|
254
268
|
.lte("creation_date", end_datetime)
|
|
255
|
-
.
|
|
269
|
+
.limit(limit)
|
|
256
270
|
)
|
|
257
|
-
if not len(changes_response.data):
|
|
258
|
-
return None
|
|
259
271
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
272
|
+
if workload:
|
|
273
|
+
query.eq("subject_name", workload)
|
|
274
|
+
if ns:
|
|
275
|
+
query.eq("subject_namespace", ns)
|
|
263
276
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
change_data_response = (
|
|
267
|
-
self.client.table(EVIDENCE_TABLE)
|
|
268
|
-
.select("*")
|
|
269
|
-
.eq("account_id", self.account_id)
|
|
270
|
-
.in_("issue_id", changes_ids)
|
|
271
|
-
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
272
|
-
.execute()
|
|
273
|
-
)
|
|
274
|
-
if not len(change_data_response.data):
|
|
277
|
+
res = query.execute()
|
|
278
|
+
if not res.data:
|
|
275
279
|
return None
|
|
276
280
|
|
|
277
|
-
truncate_evidences_entities_if_necessary(change_data_response.data)
|
|
278
|
-
|
|
279
281
|
except Exception:
|
|
280
|
-
logging.exception("Supabase error while retrieving change
|
|
282
|
+
logging.exception("Supabase error while retrieving change data")
|
|
281
283
|
return None
|
|
282
284
|
|
|
283
|
-
changes_data = []
|
|
284
|
-
change_data_map = {
|
|
285
|
-
change["issue_id"]: change for change in change_data_response.data
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
for change in changes_response.data:
|
|
289
|
-
change_content = change_data_map.get(change["id"])
|
|
290
|
-
if change_content:
|
|
291
|
-
changes_data.append(
|
|
292
|
-
{
|
|
293
|
-
"change": change_content["data"],
|
|
294
|
-
"evidence_id": change_content["id"],
|
|
295
|
-
**change,
|
|
296
|
-
}
|
|
297
|
-
)
|
|
298
|
-
|
|
299
285
|
logging.debug(
|
|
300
|
-
"Change history for %s-%s: %s",
|
|
286
|
+
"Change history metadata for %s-%s workload %s in ns %s: %s",
|
|
287
|
+
start_datetime,
|
|
288
|
+
end_datetime,
|
|
289
|
+
workload,
|
|
290
|
+
ns,
|
|
291
|
+
res.data,
|
|
301
292
|
)
|
|
302
293
|
|
|
303
|
-
return
|
|
294
|
+
return res.data
|
|
304
295
|
|
|
305
296
|
def unzip_evidence_file(self, data):
|
|
306
297
|
try:
|