holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT
|
|
5
|
+
from holmes.config import Config
|
|
6
|
+
from holmes.core.investigation_structured_output import process_response_into_sections
|
|
7
|
+
from holmes.core.issue import Issue
|
|
8
|
+
from holmes.core.models import InvestigateRequest, InvestigationResult
|
|
9
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
10
|
+
from holmes.utils.global_instructions import add_global_instructions_to_user_prompt
|
|
11
|
+
from holmes.utils.robusta import load_robusta_api_key
|
|
12
|
+
|
|
13
|
+
from holmes.core.investigation_structured_output import (
|
|
14
|
+
DEFAULT_SECTIONS,
|
|
15
|
+
REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
|
|
16
|
+
get_output_format_for_investigation,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from holmes.plugins.prompts import load_and_render_prompt
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def investigate_issues(
|
|
23
|
+
investigate_request: InvestigateRequest,
|
|
24
|
+
dal: SupabaseDal,
|
|
25
|
+
config: Config,
|
|
26
|
+
model: Optional[str] = None,
|
|
27
|
+
) -> InvestigationResult:
|
|
28
|
+
load_robusta_api_key(dal=dal, config=config)
|
|
29
|
+
context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id"))
|
|
30
|
+
|
|
31
|
+
resource_instructions = dal.get_resource_instructions(
|
|
32
|
+
"alert", investigate_request.context.get("issue_type")
|
|
33
|
+
)
|
|
34
|
+
global_instructions = dal.get_global_instructions_for_account()
|
|
35
|
+
|
|
36
|
+
raw_data = investigate_request.model_dump()
|
|
37
|
+
if context:
|
|
38
|
+
raw_data["extra_context"] = context
|
|
39
|
+
|
|
40
|
+
ai = config.create_issue_investigator(dal=dal, model=model)
|
|
41
|
+
|
|
42
|
+
issue = Issue(
|
|
43
|
+
id=context["id"] if context else "",
|
|
44
|
+
name=investigate_request.title,
|
|
45
|
+
source_type=investigate_request.source,
|
|
46
|
+
source_instance_id=investigate_request.source_instance_id,
|
|
47
|
+
raw=raw_data,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
investigation = ai.investigate(
|
|
51
|
+
issue,
|
|
52
|
+
prompt=investigate_request.prompt_template,
|
|
53
|
+
post_processing_prompt=HOLMES_POST_PROCESSING_PROMPT,
|
|
54
|
+
instructions=resource_instructions,
|
|
55
|
+
global_instructions=global_instructions,
|
|
56
|
+
sections=investigate_request.sections,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
(text_response, sections) = process_response_into_sections(investigation.result)
|
|
60
|
+
|
|
61
|
+
logging.debug(f"text response: {text_response}")
|
|
62
|
+
return InvestigationResult(
|
|
63
|
+
analysis=text_response,
|
|
64
|
+
sections=sections,
|
|
65
|
+
tool_calls=investigation.tool_calls or [],
|
|
66
|
+
instructions=investigation.instructions,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_investigation_context(
|
|
71
|
+
investigate_request: InvestigateRequest,
|
|
72
|
+
dal: SupabaseDal,
|
|
73
|
+
config: Config,
|
|
74
|
+
request_structured_output_from_llm: Optional[bool] = None,
|
|
75
|
+
):
|
|
76
|
+
load_robusta_api_key(dal=dal, config=config)
|
|
77
|
+
ai = config.create_issue_investigator(dal=dal, model=investigate_request.model)
|
|
78
|
+
|
|
79
|
+
raw_data = investigate_request.model_dump()
|
|
80
|
+
context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id"))
|
|
81
|
+
if context:
|
|
82
|
+
raw_data["extra_context"] = context
|
|
83
|
+
|
|
84
|
+
issue = Issue(
|
|
85
|
+
id=context["id"] if context else "",
|
|
86
|
+
name=investigate_request.title,
|
|
87
|
+
source_type=investigate_request.source,
|
|
88
|
+
source_instance_id=investigate_request.source_instance_id,
|
|
89
|
+
raw=raw_data,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
runbooks = ai.runbook_manager.get_instructions_for_issue(issue)
|
|
93
|
+
|
|
94
|
+
instructions = dal.get_resource_instructions(
|
|
95
|
+
"alert", investigate_request.context.get("issue_type")
|
|
96
|
+
)
|
|
97
|
+
if instructions is not None and instructions.instructions:
|
|
98
|
+
runbooks.extend(instructions.instructions)
|
|
99
|
+
if instructions is not None and len(instructions.documents) > 0:
|
|
100
|
+
docPrompts = []
|
|
101
|
+
for document in instructions.documents:
|
|
102
|
+
docPrompts.append(f"* fetch information from this URL: {document.url}\n")
|
|
103
|
+
runbooks.extend(docPrompts)
|
|
104
|
+
|
|
105
|
+
# This section is about setting vars to request the LLM to return structured output.
|
|
106
|
+
# It does not mean that Holmes will not return structured sections for investigation as it is
|
|
107
|
+
# capable of splitting the markdown into sections
|
|
108
|
+
if request_structured_output_from_llm is None:
|
|
109
|
+
request_structured_output_from_llm = REQUEST_STRUCTURED_OUTPUT_FROM_LLM
|
|
110
|
+
response_format = None
|
|
111
|
+
sections = investigate_request.sections
|
|
112
|
+
if not sections:
|
|
113
|
+
sections = DEFAULT_SECTIONS
|
|
114
|
+
request_structured_output_from_llm = False
|
|
115
|
+
logging.info(
|
|
116
|
+
"No section received from the client. Default sections will be used."
|
|
117
|
+
)
|
|
118
|
+
elif ai.llm.model and ai.llm.model.startswith(("bedrock", "gemini")):
|
|
119
|
+
# Structured output does not work well with Bedrock Anthropic Sonnet 3.5, or gemini through litellm
|
|
120
|
+
request_structured_output_from_llm = False
|
|
121
|
+
|
|
122
|
+
if request_structured_output_from_llm:
|
|
123
|
+
response_format = get_output_format_for_investigation(sections)
|
|
124
|
+
logging.info("Structured output is enabled for this request")
|
|
125
|
+
else:
|
|
126
|
+
logging.info("Structured output is disabled for this request")
|
|
127
|
+
|
|
128
|
+
system_prompt = load_and_render_prompt(
|
|
129
|
+
investigate_request.prompt_template,
|
|
130
|
+
{
|
|
131
|
+
"issue": issue,
|
|
132
|
+
"sections": sections,
|
|
133
|
+
"structured_output": request_structured_output_from_llm,
|
|
134
|
+
"toolsets": ai.tool_executor.toolsets,
|
|
135
|
+
},
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
user_prompt = ""
|
|
139
|
+
if runbooks:
|
|
140
|
+
for runbook_str in runbooks:
|
|
141
|
+
user_prompt += f"* {runbook_str}\n"
|
|
142
|
+
|
|
143
|
+
user_prompt = f'My instructions to check \n"""{user_prompt}"""'
|
|
144
|
+
|
|
145
|
+
global_instructions = dal.get_global_instructions_for_account()
|
|
146
|
+
user_prompt = add_global_instructions_to_user_prompt(
|
|
147
|
+
user_prompt, global_instructions
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}"
|
|
151
|
+
|
|
152
|
+
return ai, system_prompt, user_prompt, response_format, sections, runbooks
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Optional, Tuple
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from contextlib import suppress
|
|
6
|
+
from holmes.common.env_vars import load_bool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
REQUEST_STRUCTURED_OUTPUT_FROM_LLM = load_bool(
|
|
10
|
+
"REQUEST_STRUCTURED_OUTPUT_FROM_LLM", True
|
|
11
|
+
)
|
|
12
|
+
PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS = load_bool(
|
|
13
|
+
"PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS", True
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
InputSectionsDataType = Dict[str, str]
|
|
18
|
+
|
|
19
|
+
DEFAULT_SECTIONS: InputSectionsDataType = {
|
|
20
|
+
"Alert Explanation": '1-2 sentences explaining the alert itself - note don\'t say "The alert indicates a warning event related to a Kubernetes pod doing blah" rather just say "The pod XYZ did blah" because that is what the user actually cares about',
|
|
21
|
+
"Key Findings": "What you checked and found",
|
|
22
|
+
"Conclusions and Possible Root causes": "What conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains. Don't say root cause but 'possible root causes'. Be clear to distinguish between what you know for certain and what is a possible explanation",
|
|
23
|
+
"Next Steps": "What you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)",
|
|
24
|
+
"Related logs": "Truncate and share the most relevant logs, especially if these explain the root cause. For example: \nLogs from pod robusta-holmes:\n```\n<logs>```\n. Always embed the surroundding +/- 5 log lines to any relevant logs. ",
|
|
25
|
+
"App or Infra?": "Explain whether the issue is more likely an infrastructure or an application level issue and why you think that.",
|
|
26
|
+
"External links": "Provide links to external sources and a short sentence describing each link. For example provide links to relevant runbooks, etc. This section is a markdown formatted string.",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_output_format_for_investigation(
|
|
31
|
+
sections: InputSectionsDataType,
|
|
32
|
+
) -> Dict[str, Any]:
|
|
33
|
+
properties = {}
|
|
34
|
+
required_fields = []
|
|
35
|
+
|
|
36
|
+
for title, description in sections.items():
|
|
37
|
+
properties[title] = {"type": ["string", "null"], "description": description}
|
|
38
|
+
required_fields.append(title)
|
|
39
|
+
|
|
40
|
+
schema = {
|
|
41
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
42
|
+
"type": "object",
|
|
43
|
+
"required": required_fields,
|
|
44
|
+
"properties": properties,
|
|
45
|
+
"additionalProperties": False,
|
|
46
|
+
}
|
|
47
|
+
output_format = {
|
|
48
|
+
"type": "json_schema",
|
|
49
|
+
"json_schema": {
|
|
50
|
+
"name": "InvestigationResult",
|
|
51
|
+
"schema": schema,
|
|
52
|
+
"strict": False,
|
|
53
|
+
},
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return output_format
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def combine_sections(sections: Dict) -> str:
|
|
60
|
+
content = ""
|
|
61
|
+
for section_title, section_content in sections.items():
|
|
62
|
+
if section_content:
|
|
63
|
+
content = content + f"\n# {section_title}\n{section_content}\n"
|
|
64
|
+
return content
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_markdown_into_sections_from_equal_sign(
|
|
68
|
+
markdown_content: str,
|
|
69
|
+
) -> Optional[Dict[str, Optional[str]]]:
|
|
70
|
+
"""Splits a markdown in different sections where the key is a top level title underlined with `====` and the value is the content
|
|
71
|
+
```
|
|
72
|
+
Header Title
|
|
73
|
+
===========
|
|
74
|
+
Content here
|
|
75
|
+
```
|
|
76
|
+
=>
|
|
77
|
+
{
|
|
78
|
+
"Header Title": "Content here"
|
|
79
|
+
}
|
|
80
|
+
"""
|
|
81
|
+
matches = re.split(r"(?:^|\n)([^\n]+)\n=+\n", markdown_content.strip())
|
|
82
|
+
|
|
83
|
+
# Remove any empty first element if the text starts with a header
|
|
84
|
+
if matches[0].strip() == "":
|
|
85
|
+
matches = matches[1:]
|
|
86
|
+
|
|
87
|
+
sections = {}
|
|
88
|
+
|
|
89
|
+
for i in range(0, len(matches), 2):
|
|
90
|
+
if i + 1 < len(matches):
|
|
91
|
+
header = matches[i]
|
|
92
|
+
content = matches[i + 1].strip()
|
|
93
|
+
sections[header] = content
|
|
94
|
+
|
|
95
|
+
if len(sections) > 0:
|
|
96
|
+
return sections
|
|
97
|
+
else:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def parse_markdown_into_sections_from_hash_sign(
|
|
102
|
+
markdown_content: str,
|
|
103
|
+
) -> Optional[Dict[str, Optional[str]]]:
|
|
104
|
+
"""Splits a markdown in different sections where the key is a top level title underlined with `====` and the value is the content
|
|
105
|
+
```
|
|
106
|
+
# Header Title
|
|
107
|
+
Content here
|
|
108
|
+
```
|
|
109
|
+
=>
|
|
110
|
+
{
|
|
111
|
+
"Header Title": "Content here"
|
|
112
|
+
}
|
|
113
|
+
"""
|
|
114
|
+
# Split the text into sections based on headers (# Section)
|
|
115
|
+
matches = re.split(r"\n(?=# )", markdown_content.strip())
|
|
116
|
+
|
|
117
|
+
if not matches[0].startswith("#"):
|
|
118
|
+
matches = matches[1:]
|
|
119
|
+
|
|
120
|
+
sections = {}
|
|
121
|
+
|
|
122
|
+
for match in matches:
|
|
123
|
+
match = match.strip()
|
|
124
|
+
if match:
|
|
125
|
+
parts = match.split("\n", 1)
|
|
126
|
+
|
|
127
|
+
if len(parts) > 1:
|
|
128
|
+
# Remove the # from the title and use it as key
|
|
129
|
+
title = parts[0].replace("#", "").strip()
|
|
130
|
+
# Use the rest as content
|
|
131
|
+
content = parts[1].strip()
|
|
132
|
+
sections[title] = content
|
|
133
|
+
else:
|
|
134
|
+
# Handle case where section has no content
|
|
135
|
+
title = parts[0].replace("#", "").strip()
|
|
136
|
+
sections[title] = None
|
|
137
|
+
|
|
138
|
+
if len(sections) > 0:
|
|
139
|
+
return sections
|
|
140
|
+
else:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def extract_within(content: str, from_idx: int, to_idx: int) -> str:
|
|
145
|
+
with suppress(Exception):
|
|
146
|
+
extracted_content = content[from_idx:to_idx]
|
|
147
|
+
parsed = json.loads(
|
|
148
|
+
extracted_content
|
|
149
|
+
) # if this parses as json, set the response as that.
|
|
150
|
+
if isinstance(parsed, dict):
|
|
151
|
+
logging.warning(
|
|
152
|
+
"The LLM did not return structured data but embedded the data into a markdown code block. This indicates the prompt is not optimised for that AI model."
|
|
153
|
+
)
|
|
154
|
+
content = extracted_content
|
|
155
|
+
return content
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def pre_format_sections(response: Any) -> Any:
|
|
159
|
+
"""Pre-cleaning of the response for some known, specific use cases
|
|
160
|
+
prior to it being parsed for sections
|
|
161
|
+
"""
|
|
162
|
+
if isinstance(response, dict):
|
|
163
|
+
# No matter if the result is already structured, we want to go through the code below to validate the JSON
|
|
164
|
+
response = json.dumps(response)
|
|
165
|
+
|
|
166
|
+
if not isinstance(response, str):
|
|
167
|
+
# if it's not a string, we make it so as it'll be parsed later
|
|
168
|
+
response = str(response)
|
|
169
|
+
|
|
170
|
+
# In some cases, the LLM will not return a structured json but instead embed the JSON into a markdown code block
|
|
171
|
+
# This is not ideal and actually should not happen
|
|
172
|
+
if response.startswith("```json\n") and response.endswith("\n```"):
|
|
173
|
+
response = extract_within(response, 8, -3)
|
|
174
|
+
|
|
175
|
+
if response.startswith('"{') and response.endswith('}"'):
|
|
176
|
+
# Some Anthropic models embed the actual JSON dict inside a JSON string
|
|
177
|
+
# In that case it gets parsed once to get rid of the first level of marshalling
|
|
178
|
+
with suppress(Exception):
|
|
179
|
+
response = json.loads(response)
|
|
180
|
+
return response
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def parse_json_sections(
|
|
184
|
+
response: Any,
|
|
185
|
+
) -> Tuple[str, Optional[Dict[str, Optional[str]]]]:
|
|
186
|
+
response = pre_format_sections(response)
|
|
187
|
+
|
|
188
|
+
with suppress(Exception):
|
|
189
|
+
parsed_json = json.loads(response)
|
|
190
|
+
|
|
191
|
+
if not isinstance(parsed_json, dict):
|
|
192
|
+
return (response, None)
|
|
193
|
+
sections = {}
|
|
194
|
+
for key, value in parsed_json.items():
|
|
195
|
+
if isinstance(value, list) and len(value) == 0:
|
|
196
|
+
value = None # For links, LLM returns '[]' which is unsightly when converted to markdown
|
|
197
|
+
|
|
198
|
+
if isinstance(value, list):
|
|
199
|
+
sections[key] = "\n\n".join(f"{str(item)}" for item in value)
|
|
200
|
+
elif value is not None:
|
|
201
|
+
sections[key] = str(
|
|
202
|
+
value
|
|
203
|
+
) # force to strings. We only expect markdown and don't want to give anything but a string to the UI
|
|
204
|
+
else:
|
|
205
|
+
sections[key] = value # type: ignore
|
|
206
|
+
if sections:
|
|
207
|
+
combined = combine_sections(sections)
|
|
208
|
+
return (combined, sections) # type: ignore
|
|
209
|
+
|
|
210
|
+
return (response, None)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def process_response_into_sections(
|
|
214
|
+
response: Any,
|
|
215
|
+
) -> Tuple[str, Optional[Dict[str, Optional[str]]]]:
|
|
216
|
+
sections = None
|
|
217
|
+
|
|
218
|
+
if REQUEST_STRUCTURED_OUTPUT_FROM_LLM:
|
|
219
|
+
(response, sections) = parse_json_sections(response)
|
|
220
|
+
|
|
221
|
+
if not sections and PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS:
|
|
222
|
+
sections = parse_markdown_into_sections_from_hash_sign(response)
|
|
223
|
+
if not sections and PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS:
|
|
224
|
+
sections = parse_markdown_into_sections_from_equal_sign(response)
|
|
225
|
+
|
|
226
|
+
return (response, sections)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def is_response_an_incorrect_tool_call(
|
|
230
|
+
sections: Optional[InputSectionsDataType], choice: dict
|
|
231
|
+
) -> bool:
|
|
232
|
+
"""Cf. https://github.com/BerriAI/litellm/issues/8241
|
|
233
|
+
This code detects when LiteLLM is incapable of handling both tool calls and structured output. This only happens when the LLM is returning a single tool call.
|
|
234
|
+
In that case the intention is to retry the LLM calls without structured output.
|
|
235
|
+
Post processing may still try to generate a structured output from a monolithic markdown.
|
|
236
|
+
"""
|
|
237
|
+
with suppress(Exception):
|
|
238
|
+
message = choice.get("message", {})
|
|
239
|
+
finish_reason = choice.get("finish_reason")
|
|
240
|
+
content = message.get("content")
|
|
241
|
+
tool_calls = message.get("tool_calls")
|
|
242
|
+
role = message.get("role")
|
|
243
|
+
if (
|
|
244
|
+
sections
|
|
245
|
+
and content
|
|
246
|
+
and (
|
|
247
|
+
# azure
|
|
248
|
+
finish_reason == "stop"
|
|
249
|
+
or
|
|
250
|
+
# bedrock
|
|
251
|
+
finish_reason == "tool_calls"
|
|
252
|
+
)
|
|
253
|
+
and role == "assistant"
|
|
254
|
+
and not tool_calls
|
|
255
|
+
):
|
|
256
|
+
if not isinstance(content, dict):
|
|
257
|
+
content = json.loads(content)
|
|
258
|
+
if not isinstance(content, dict):
|
|
259
|
+
return False
|
|
260
|
+
for section_title in sections:
|
|
261
|
+
if section_title in content:
|
|
262
|
+
return False
|
|
263
|
+
return True
|
|
264
|
+
return False
|
holmes/core/issue.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from strenum import StrEnum
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, ConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class IssueStatus(StrEnum):
|
|
8
|
+
OPEN = "open"
|
|
9
|
+
CLOSED = "closed"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# TODO: look at finding in Robusta
|
|
13
|
+
class Issue(BaseModel):
|
|
14
|
+
model_config = ConfigDict(extra="forbid", validate_default=True)
|
|
15
|
+
|
|
16
|
+
# Identifier for the issue - source + issue_id should be unique
|
|
17
|
+
id: str
|
|
18
|
+
|
|
19
|
+
# Name of the issue - not necessarily unique
|
|
20
|
+
name: str
|
|
21
|
+
|
|
22
|
+
# Source of the issue - e.g. jira
|
|
23
|
+
source_type: str
|
|
24
|
+
|
|
25
|
+
# Identifier for the instance of the source - e.g. Jira project key
|
|
26
|
+
source_instance_id: str
|
|
27
|
+
|
|
28
|
+
# Link to the issue, when available
|
|
29
|
+
url: Optional[str] = None
|
|
30
|
+
|
|
31
|
+
# Raw object from the source - e.g. a dict from the source's API
|
|
32
|
+
raw: Optional[dict] = None
|
|
33
|
+
|
|
34
|
+
# these fields are all optional and used for visual presentation of the issue
|
|
35
|
+
# there may not be a 1:1 mapping between source fields and these fields, which is OK
|
|
36
|
+
# e.g. jira issues can have arbitrary statuses like 'closed' and 'resolved' whereas for presentation sake
|
|
37
|
+
# we want to classify as open/closed so we can color the issue red/green
|
|
38
|
+
# if these fields are not present, an LLM may be used to guess them
|
|
39
|
+
presentation_status: Optional[IssueStatus] = None
|
|
40
|
+
|
|
41
|
+
# Markdown with key metadata about the issue. Suggested format is several lines each styled as "*X*: Y" and separated by \n
|
|
42
|
+
presentation_key_metadata: Optional[str] = None
|
|
43
|
+
|
|
44
|
+
# Markdown with all metadata about the issue. Suggested to format this with presentation_utils.dict_to_markdown
|
|
45
|
+
presentation_all_metadata: Optional[str] = None
|
|
46
|
+
|
|
47
|
+
# title: Optional[str] = None # Short title or summary of the issue
|
|
48
|
+
description: Optional[str] = None # Detailed description of the issue
|
|
49
|
+
# status: Optional[str] = None # Current status (e.g., 'open', 'closed', 'resolved')
|
|
50
|
+
# group_id: Optional[str] = None # Grouping ID from the source (when relevant)
|
|
51
|
+
# priority: Optional[str] = None # Priority level of the issue (e.g., 'high', 'medium', 'low')
|
|
52
|
+
# created_at: Optional[datetime] = None # Timestamp of when the issue was created
|
|
53
|
+
# updated_at: Optional[datetime] = None # Timestamp of when the issue was last updated
|
|
54
|
+
# metadata: Optional[dict] = None # All additional metadata from the source (can be hierchical - e.g. dicts in dicts
|