holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,152 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT
5
+ from holmes.config import Config
6
+ from holmes.core.investigation_structured_output import process_response_into_sections
7
+ from holmes.core.issue import Issue
8
+ from holmes.core.models import InvestigateRequest, InvestigationResult
9
+ from holmes.core.supabase_dal import SupabaseDal
10
+ from holmes.utils.global_instructions import add_global_instructions_to_user_prompt
11
+ from holmes.utils.robusta import load_robusta_api_key
12
+
13
+ from holmes.core.investigation_structured_output import (
14
+ DEFAULT_SECTIONS,
15
+ REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
16
+ get_output_format_for_investigation,
17
+ )
18
+
19
+ from holmes.plugins.prompts import load_and_render_prompt
20
+
21
+
22
+ def investigate_issues(
23
+ investigate_request: InvestigateRequest,
24
+ dal: SupabaseDal,
25
+ config: Config,
26
+ model: Optional[str] = None,
27
+ ) -> InvestigationResult:
28
+ load_robusta_api_key(dal=dal, config=config)
29
+ context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id"))
30
+
31
+ resource_instructions = dal.get_resource_instructions(
32
+ "alert", investigate_request.context.get("issue_type")
33
+ )
34
+ global_instructions = dal.get_global_instructions_for_account()
35
+
36
+ raw_data = investigate_request.model_dump()
37
+ if context:
38
+ raw_data["extra_context"] = context
39
+
40
+ ai = config.create_issue_investigator(dal=dal, model=model)
41
+
42
+ issue = Issue(
43
+ id=context["id"] if context else "",
44
+ name=investigate_request.title,
45
+ source_type=investigate_request.source,
46
+ source_instance_id=investigate_request.source_instance_id,
47
+ raw=raw_data,
48
+ )
49
+
50
+ investigation = ai.investigate(
51
+ issue,
52
+ prompt=investigate_request.prompt_template,
53
+ post_processing_prompt=HOLMES_POST_PROCESSING_PROMPT,
54
+ instructions=resource_instructions,
55
+ global_instructions=global_instructions,
56
+ sections=investigate_request.sections,
57
+ )
58
+
59
+ (text_response, sections) = process_response_into_sections(investigation.result)
60
+
61
+ logging.debug(f"text response: {text_response}")
62
+ return InvestigationResult(
63
+ analysis=text_response,
64
+ sections=sections,
65
+ tool_calls=investigation.tool_calls or [],
66
+ instructions=investigation.instructions,
67
+ )
68
+
69
+
70
+ def get_investigation_context(
71
+ investigate_request: InvestigateRequest,
72
+ dal: SupabaseDal,
73
+ config: Config,
74
+ request_structured_output_from_llm: Optional[bool] = None,
75
+ ):
76
+ load_robusta_api_key(dal=dal, config=config)
77
+ ai = config.create_issue_investigator(dal=dal, model=investigate_request.model)
78
+
79
+ raw_data = investigate_request.model_dump()
80
+ context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id"))
81
+ if context:
82
+ raw_data["extra_context"] = context
83
+
84
+ issue = Issue(
85
+ id=context["id"] if context else "",
86
+ name=investigate_request.title,
87
+ source_type=investigate_request.source,
88
+ source_instance_id=investigate_request.source_instance_id,
89
+ raw=raw_data,
90
+ )
91
+
92
+ runbooks = ai.runbook_manager.get_instructions_for_issue(issue)
93
+
94
+ instructions = dal.get_resource_instructions(
95
+ "alert", investigate_request.context.get("issue_type")
96
+ )
97
+ if instructions is not None and instructions.instructions:
98
+ runbooks.extend(instructions.instructions)
99
+ if instructions is not None and len(instructions.documents) > 0:
100
+ docPrompts = []
101
+ for document in instructions.documents:
102
+ docPrompts.append(f"* fetch information from this URL: {document.url}\n")
103
+ runbooks.extend(docPrompts)
104
+
105
+ # This section is about setting vars to request the LLM to return structured output.
106
+ # It does not mean that Holmes will not return structured sections for investigation as it is
107
+ # capable of splitting the markdown into sections
108
+ if request_structured_output_from_llm is None:
109
+ request_structured_output_from_llm = REQUEST_STRUCTURED_OUTPUT_FROM_LLM
110
+ response_format = None
111
+ sections = investigate_request.sections
112
+ if not sections:
113
+ sections = DEFAULT_SECTIONS
114
+ request_structured_output_from_llm = False
115
+ logging.info(
116
+ "No section received from the client. Default sections will be used."
117
+ )
118
+ elif ai.llm.model and ai.llm.model.startswith(("bedrock", "gemini")):
119
+ # Structured output does not work well with Bedrock Anthropic Sonnet 3.5, or gemini through litellm
120
+ request_structured_output_from_llm = False
121
+
122
+ if request_structured_output_from_llm:
123
+ response_format = get_output_format_for_investigation(sections)
124
+ logging.info("Structured output is enabled for this request")
125
+ else:
126
+ logging.info("Structured output is disabled for this request")
127
+
128
+ system_prompt = load_and_render_prompt(
129
+ investigate_request.prompt_template,
130
+ {
131
+ "issue": issue,
132
+ "sections": sections,
133
+ "structured_output": request_structured_output_from_llm,
134
+ "toolsets": ai.tool_executor.toolsets,
135
+ },
136
+ )
137
+
138
+ user_prompt = ""
139
+ if runbooks:
140
+ for runbook_str in runbooks:
141
+ user_prompt += f"* {runbook_str}\n"
142
+
143
+ user_prompt = f'My instructions to check \n"""{user_prompt}"""'
144
+
145
+ global_instructions = dal.get_global_instructions_for_account()
146
+ user_prompt = add_global_instructions_to_user_prompt(
147
+ user_prompt, global_instructions
148
+ )
149
+
150
+ user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}"
151
+
152
+ return ai, system_prompt, user_prompt, response_format, sections, runbooks
@@ -0,0 +1,264 @@
1
+ import logging
2
+ from typing import Any, Dict, Optional, Tuple
3
+ import json
4
+ import re
5
+ from contextlib import suppress
6
+ from holmes.common.env_vars import load_bool
7
+
8
+
9
+ REQUEST_STRUCTURED_OUTPUT_FROM_LLM = load_bool(
10
+ "REQUEST_STRUCTURED_OUTPUT_FROM_LLM", True
11
+ )
12
+ PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS = load_bool(
13
+ "PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS", True
14
+ )
15
+
16
+
17
+ InputSectionsDataType = Dict[str, str]
18
+
19
+ DEFAULT_SECTIONS: InputSectionsDataType = {
20
+ "Alert Explanation": '1-2 sentences explaining the alert itself - note don\'t say "The alert indicates a warning event related to a Kubernetes pod doing blah" rather just say "The pod XYZ did blah" because that is what the user actually cares about',
21
+ "Key Findings": "What you checked and found",
22
+ "Conclusions and Possible Root causes": "What conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains. Don't say root cause but 'possible root causes'. Be clear to distinguish between what you know for certain and what is a possible explanation",
23
+ "Next Steps": "What you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)",
24
+ "Related logs": "Truncate and share the most relevant logs, especially if these explain the root cause. For example: \nLogs from pod robusta-holmes:\n```\n<logs>```\n. Always embed the surroundding +/- 5 log lines to any relevant logs. ",
25
+ "App or Infra?": "Explain whether the issue is more likely an infrastructure or an application level issue and why you think that.",
26
+ "External links": "Provide links to external sources and a short sentence describing each link. For example provide links to relevant runbooks, etc. This section is a markdown formatted string.",
27
+ }
28
+
29
+
30
+ def get_output_format_for_investigation(
31
+ sections: InputSectionsDataType,
32
+ ) -> Dict[str, Any]:
33
+ properties = {}
34
+ required_fields = []
35
+
36
+ for title, description in sections.items():
37
+ properties[title] = {"type": ["string", "null"], "description": description}
38
+ required_fields.append(title)
39
+
40
+ schema = {
41
+ "$schema": "http://json-schema.org/draft-07/schema#",
42
+ "type": "object",
43
+ "required": required_fields,
44
+ "properties": properties,
45
+ "additionalProperties": False,
46
+ }
47
+ output_format = {
48
+ "type": "json_schema",
49
+ "json_schema": {
50
+ "name": "InvestigationResult",
51
+ "schema": schema,
52
+ "strict": False,
53
+ },
54
+ }
55
+
56
+ return output_format
57
+
58
+
59
+ def combine_sections(sections: Dict) -> str:
60
+ content = ""
61
+ for section_title, section_content in sections.items():
62
+ if section_content:
63
+ content = content + f"\n# {section_title}\n{section_content}\n"
64
+ return content
65
+
66
+
67
+ def parse_markdown_into_sections_from_equal_sign(
68
+ markdown_content: str,
69
+ ) -> Optional[Dict[str, Optional[str]]]:
70
+ """Splits a markdown in different sections where the key is a top level title underlined with `====` and the value is the content
71
+ ```
72
+ Header Title
73
+ ===========
74
+ Content here
75
+ ```
76
+ =>
77
+ {
78
+ "Header Title": "Content here"
79
+ }
80
+ """
81
+ matches = re.split(r"(?:^|\n)([^\n]+)\n=+\n", markdown_content.strip())
82
+
83
+ # Remove any empty first element if the text starts with a header
84
+ if matches[0].strip() == "":
85
+ matches = matches[1:]
86
+
87
+ sections = {}
88
+
89
+ for i in range(0, len(matches), 2):
90
+ if i + 1 < len(matches):
91
+ header = matches[i]
92
+ content = matches[i + 1].strip()
93
+ sections[header] = content
94
+
95
+ if len(sections) > 0:
96
+ return sections
97
+ else:
98
+ return None
99
+
100
+
101
+ def parse_markdown_into_sections_from_hash_sign(
102
+ markdown_content: str,
103
+ ) -> Optional[Dict[str, Optional[str]]]:
104
+ """Splits a markdown in different sections where the key is a top level title underlined with `====` and the value is the content
105
+ ```
106
+ # Header Title
107
+ Content here
108
+ ```
109
+ =>
110
+ {
111
+ "Header Title": "Content here"
112
+ }
113
+ """
114
+ # Split the text into sections based on headers (# Section)
115
+ matches = re.split(r"\n(?=# )", markdown_content.strip())
116
+
117
+ if not matches[0].startswith("#"):
118
+ matches = matches[1:]
119
+
120
+ sections = {}
121
+
122
+ for match in matches:
123
+ match = match.strip()
124
+ if match:
125
+ parts = match.split("\n", 1)
126
+
127
+ if len(parts) > 1:
128
+ # Remove the # from the title and use it as key
129
+ title = parts[0].replace("#", "").strip()
130
+ # Use the rest as content
131
+ content = parts[1].strip()
132
+ sections[title] = content
133
+ else:
134
+ # Handle case where section has no content
135
+ title = parts[0].replace("#", "").strip()
136
+ sections[title] = None
137
+
138
+ if len(sections) > 0:
139
+ return sections
140
+ else:
141
+ return None
142
+
143
+
144
+ def extract_within(content: str, from_idx: int, to_idx: int) -> str:
145
+ with suppress(Exception):
146
+ extracted_content = content[from_idx:to_idx]
147
+ parsed = json.loads(
148
+ extracted_content
149
+ ) # if this parses as json, set the response as that.
150
+ if isinstance(parsed, dict):
151
+ logging.warning(
152
+ "The LLM did not return structured data but embedded the data into a markdown code block. This indicates the prompt is not optimised for that AI model."
153
+ )
154
+ content = extracted_content
155
+ return content
156
+
157
+
158
+ def pre_format_sections(response: Any) -> Any:
159
+ """Pre-cleaning of the response for some known, specific use cases
160
+ prior to it being parsed for sections
161
+ """
162
+ if isinstance(response, dict):
163
+ # No matter if the result is already structured, we want to go through the code below to validate the JSON
164
+ response = json.dumps(response)
165
+
166
+ if not isinstance(response, str):
167
+ # if it's not a string, we make it so as it'll be parsed later
168
+ response = str(response)
169
+
170
+ # In some cases, the LLM will not return a structured json but instead embed the JSON into a markdown code block
171
+ # This is not ideal and actually should not happen
172
+ if response.startswith("```json\n") and response.endswith("\n```"):
173
+ response = extract_within(response, 8, -3)
174
+
175
+ if response.startswith('"{') and response.endswith('}"'):
176
+ # Some Anthropic models embed the actual JSON dict inside a JSON string
177
+ # In that case it gets parsed once to get rid of the first level of marshalling
178
+ with suppress(Exception):
179
+ response = json.loads(response)
180
+ return response
181
+
182
+
183
+ def parse_json_sections(
184
+ response: Any,
185
+ ) -> Tuple[str, Optional[Dict[str, Optional[str]]]]:
186
+ response = pre_format_sections(response)
187
+
188
+ with suppress(Exception):
189
+ parsed_json = json.loads(response)
190
+
191
+ if not isinstance(parsed_json, dict):
192
+ return (response, None)
193
+ sections = {}
194
+ for key, value in parsed_json.items():
195
+ if isinstance(value, list) and len(value) == 0:
196
+ value = None # For links, LLM returns '[]' which is unsightly when converted to markdown
197
+
198
+ if isinstance(value, list):
199
+ sections[key] = "\n\n".join(f"{str(item)}" for item in value)
200
+ elif value is not None:
201
+ sections[key] = str(
202
+ value
203
+ ) # force to strings. We only expect markdown and don't want to give anything but a string to the UI
204
+ else:
205
+ sections[key] = value # type: ignore
206
+ if sections:
207
+ combined = combine_sections(sections)
208
+ return (combined, sections) # type: ignore
209
+
210
+ return (response, None)
211
+
212
+
213
+ def process_response_into_sections(
214
+ response: Any,
215
+ ) -> Tuple[str, Optional[Dict[str, Optional[str]]]]:
216
+ sections = None
217
+
218
+ if REQUEST_STRUCTURED_OUTPUT_FROM_LLM:
219
+ (response, sections) = parse_json_sections(response)
220
+
221
+ if not sections and PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS:
222
+ sections = parse_markdown_into_sections_from_hash_sign(response)
223
+ if not sections and PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS:
224
+ sections = parse_markdown_into_sections_from_equal_sign(response)
225
+
226
+ return (response, sections)
227
+
228
+
229
+ def is_response_an_incorrect_tool_call(
230
+ sections: Optional[InputSectionsDataType], choice: dict
231
+ ) -> bool:
232
+ """Cf. https://github.com/BerriAI/litellm/issues/8241
233
+ This code detects when LiteLLM is incapable of handling both tool calls and structured output. This only happens when the LLM is returning a single tool call.
234
+ In that case the intention is to retry the LLM calls without structured output.
235
+ Post processing may still try to generate a structured output from a monolithic markdown.
236
+ """
237
+ with suppress(Exception):
238
+ message = choice.get("message", {})
239
+ finish_reason = choice.get("finish_reason")
240
+ content = message.get("content")
241
+ tool_calls = message.get("tool_calls")
242
+ role = message.get("role")
243
+ if (
244
+ sections
245
+ and content
246
+ and (
247
+ # azure
248
+ finish_reason == "stop"
249
+ or
250
+ # bedrock
251
+ finish_reason == "tool_calls"
252
+ )
253
+ and role == "assistant"
254
+ and not tool_calls
255
+ ):
256
+ if not isinstance(content, dict):
257
+ content = json.loads(content)
258
+ if not isinstance(content, dict):
259
+ return False
260
+ for section_title in sections:
261
+ if section_title in content:
262
+ return False
263
+ return True
264
+ return False
holmes/core/issue.py ADDED
@@ -0,0 +1,54 @@
1
+ from strenum import StrEnum
2
+ from typing import Optional
3
+
4
+ from pydantic import BaseModel, ConfigDict
5
+
6
+
7
+ class IssueStatus(StrEnum):
8
+ OPEN = "open"
9
+ CLOSED = "closed"
10
+
11
+
12
+ # TODO: look at finding in Robusta
13
+ class Issue(BaseModel):
14
+ model_config = ConfigDict(extra="forbid", validate_default=True)
15
+
16
+ # Identifier for the issue - source + issue_id should be unique
17
+ id: str
18
+
19
+ # Name of the issue - not necessarily unique
20
+ name: str
21
+
22
+ # Source of the issue - e.g. jira
23
+ source_type: str
24
+
25
+ # Identifier for the instance of the source - e.g. Jira project key
26
+ source_instance_id: str
27
+
28
+ # Link to the issue, when available
29
+ url: Optional[str] = None
30
+
31
+ # Raw object from the source - e.g. a dict from the source's API
32
+ raw: Optional[dict] = None
33
+
34
+ # these fields are all optional and used for visual presentation of the issue
35
+ # there may not be a 1:1 mapping between source fields and these fields, which is OK
36
+ # e.g. jira issues can have arbitrary statuses like 'closed' and 'resolved' whereas for presentation sake
37
+ # we want to classify as open/closed so we can color the issue red/green
38
+ # if these fields are not present, an LLM may be used to guess them
39
+ presentation_status: Optional[IssueStatus] = None
40
+
41
+ # Markdown with key metadata about the issue. Suggested format is several lines each styled as "*X*: Y" and separated by \n
42
+ presentation_key_metadata: Optional[str] = None
43
+
44
+ # Markdown with all metadata about the issue. Suggested to format this with presentation_utils.dict_to_markdown
45
+ presentation_all_metadata: Optional[str] = None
46
+
47
+ # title: Optional[str] = None # Short title or summary of the issue
48
+ description: Optional[str] = None # Detailed description of the issue
49
+ # status: Optional[str] = None # Current status (e.g., 'open', 'closed', 'resolved')
50
+ # group_id: Optional[str] = None # Grouping ID from the source (when relevant)
51
+ # priority: Optional[str] = None # Priority level of the issue (e.g., 'high', 'medium', 'low')
52
+ # created_at: Optional[datetime] = None # Timestamp of when the issue was created
53
+ # updated_at: Optional[datetime] = None # Timestamp of when the issue was last updated
54
+ # metadata: Optional[dict] = None # All additional metadata from the source (can be hierchical - e.g. dicts in dicts