holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
holmes/main.py CHANGED
@@ -1,9 +1,7 @@
1
1
  # ruff: noqa: E402
2
2
  import os
3
- import sys
4
3
 
5
4
  from holmes.utils.cert_utils import add_custom_certificate
6
- from holmes.utils.colors import USER_COLOR
7
5
 
8
6
  ADDITIONAL_CERTIFICATE: str = os.environ.get("CERTIFICATE", "")
9
7
  if add_custom_certificate(ADDITIONAL_CERTIFICATE):
@@ -11,8 +9,8 @@ if add_custom_certificate(ADDITIONAL_CERTIFICATE):
11
9
 
12
10
  # DO NOT ADD ANY IMPORTS OR CODE ABOVE THIS LINE
13
11
  # IMPORTING ABOVE MIGHT INITIALIZE AN HTTPS CLIENT THAT DOESN'T TRUST THE CUSTOM CERTIFICATE
14
-
15
-
12
+ import sys
13
+ from holmes.utils.colors import USER_COLOR
16
14
  import json
17
15
  import logging
18
16
  import socket
@@ -31,7 +29,7 @@ from holmes.config import (
31
29
  SourceFactory,
32
30
  SupportedTicketSources,
33
31
  )
34
- from holmes.core.prompt import build_initial_ask_messages
32
+ from holmes.core.prompt import build_initial_ask_messages, generate_user_prompt
35
33
  from holmes.core.resource_instruction import ResourceInstructionDocument
36
34
  from holmes.core.tools import pretty_print_toolset_status
37
35
  from holmes.core.tracing import SpanType, TracingFactory
@@ -76,6 +74,9 @@ opt_api_key: Optional[str] = typer.Option(
76
74
  help="API key to use for the LLM (if not given, uses environment variables OPENAI_API_KEY or AZURE_API_KEY)",
77
75
  )
78
76
  opt_model: Optional[str] = typer.Option(None, help="Model to use for the LLM")
77
+ opt_fast_model: Optional[str] = typer.Option(
78
+ None, help="Optional fast model for summarization tasks"
79
+ )
79
80
  opt_config_file: Optional[Path] = typer.Option(
80
81
  DEFAULT_CONFIG_LOCATION, # type: ignore
81
82
  "--config",
@@ -136,13 +137,6 @@ opt_json_output_file: Optional[str] = typer.Option(
136
137
  envvar="HOLMES_JSON_OUTPUT_FILE",
137
138
  )
138
139
 
139
- opt_post_processing_prompt: Optional[str] = typer.Option(
140
- None,
141
- "--post-processing-prompt",
142
- help="Adds a prompt for post processing. (Preferable for chatty ai models)",
143
- envvar="HOLMES_POST_PROCESSING_PROMPT",
144
- )
145
-
146
140
  opt_documents: Optional[str] = typer.Option(
147
141
  None,
148
142
  "--documents",
@@ -177,6 +171,7 @@ def ask(
177
171
  # common options
178
172
  api_key: Optional[str] = opt_api_key,
179
173
  model: Optional[str] = opt_model,
174
+ fast_model: Optional[str] = opt_fast_model,
180
175
  config_file: Optional[Path] = opt_config_file,
181
176
  custom_toolsets: Optional[List[Path]] = opt_custom_toolsets,
182
177
  max_steps: Optional[int] = opt_max_steps,
@@ -199,7 +194,6 @@ def ask(
199
194
  ),
200
195
  json_output_file: Optional[str] = opt_json_output_file,
201
196
  echo_request: bool = opt_echo_request,
202
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
203
197
  interactive: bool = typer.Option(
204
198
  True,
205
199
  "--interactive/--no-interactive",
@@ -244,6 +238,7 @@ def ask(
244
238
  config_file,
245
239
  api_key=api_key,
246
240
  model=model,
241
+ fast_model=fast_model,
247
242
  max_steps=max_steps,
248
243
  custom_toolsets_from_cli=custom_toolsets,
249
244
  slack_token=slack_token,
@@ -258,6 +253,7 @@ def ask(
258
253
  dal=None, # type: ignore
259
254
  refresh_toolsets=refresh_toolsets, # flag to refresh the toolset status
260
255
  tracer=tracer,
256
+ model_name=model,
261
257
  )
262
258
 
263
259
  if prompt_file and prompt:
@@ -295,11 +291,11 @@ def ask(
295
291
  console,
296
292
  prompt,
297
293
  include_file,
298
- post_processing_prompt,
299
294
  show_tool_output,
300
295
  tracer,
301
296
  config.get_runbook_catalog(),
302
297
  system_prompt_additions,
298
+ json_output_file=json_output_file,
303
299
  )
304
300
  return
305
301
 
@@ -316,7 +312,7 @@ def ask(
316
312
  f'holmes ask "{prompt}"', span_type=SpanType.TASK
317
313
  ) as trace_span:
318
314
  trace_span.log(input=prompt, metadata={"type": "user_question"})
319
- response = ai.call(messages, post_processing_prompt, trace_span=trace_span)
315
+ response = ai.call(messages, trace_span=trace_span)
320
316
  trace_span.log(
321
317
  output=response.result,
322
318
  )
@@ -342,6 +338,7 @@ def ask(
342
338
  issue,
343
339
  show_tool_output,
344
340
  False, # type: ignore
341
+ log_costs,
345
342
  )
346
343
 
347
344
  if trace_url:
@@ -387,7 +384,6 @@ def alertmanager(
387
384
  system_prompt: Optional[str] = typer.Option(
388
385
  "builtin://generic_investigation.jinja2", help=system_prompt_help
389
386
  ),
390
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
391
387
  ):
392
388
  """
393
389
  Investigate a Prometheus/Alertmanager alert
@@ -410,7 +406,7 @@ def alertmanager(
410
406
  custom_runbooks=custom_runbooks,
411
407
  )
412
408
 
413
- ai = config.create_console_issue_investigator() # type: ignore
409
+ ai = config.create_console_issue_investigator(model_name=model) # type: ignore
414
410
 
415
411
  source = config.create_alertmanager_source()
416
412
 
@@ -443,8 +439,6 @@ def alertmanager(
443
439
  issue=issue,
444
440
  prompt=system_prompt, # type: ignore
445
441
  console=console,
446
- instructions=None,
447
- post_processing_prompt=post_processing_prompt,
448
442
  )
449
443
  results.append({"issue": issue.model_dump(), "result": result.model_dump()})
450
444
  handle_result(result, console, destination, config, issue, False, True) # type: ignore
@@ -521,7 +515,6 @@ def jira(
521
515
  system_prompt: Optional[str] = typer.Option(
522
516
  "builtin://generic_investigation.jinja2", help=system_prompt_help
523
517
  ),
524
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
525
518
  ):
526
519
  """
527
520
  Investigate a Jira ticket
@@ -539,7 +532,7 @@ def jira(
539
532
  custom_toolsets_from_cli=custom_toolsets,
540
533
  custom_runbooks=custom_runbooks,
541
534
  )
542
- ai = config.create_console_issue_investigator() # type: ignore
535
+ ai = config.create_console_issue_investigator(model_name=model) # type: ignore
543
536
  source = config.create_jira_source()
544
537
  try:
545
538
  issues = source.fetch_issues()
@@ -560,8 +553,6 @@ def jira(
560
553
  issue=issue,
561
554
  prompt=system_prompt, # type: ignore
562
555
  console=console,
563
- instructions=None,
564
- post_processing_prompt=post_processing_prompt,
565
556
  )
566
557
 
567
558
  console.print(Rule())
@@ -614,7 +605,7 @@ def ticket(
614
605
  system_prompt: Optional[str] = typer.Option(
615
606
  "builtin://generic_ticket.jinja2", help=system_prompt_help
616
607
  ),
617
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
608
+ model: Optional[str] = opt_model,
618
609
  ):
619
610
  """
620
611
  Fetch and print a Jira ticket from the specified source.
@@ -655,7 +646,7 @@ def ticket(
655
646
  },
656
647
  )
657
648
 
658
- ai = ticket_source.config.create_console_issue_investigator()
649
+ ai = ticket_source.config.create_console_issue_investigator(model_name=model)
659
650
  console.print(
660
651
  f"[bold yellow]Analyzing ticket: {issue_to_investigate.name}...[/bold yellow]"
661
652
  )
@@ -664,7 +655,8 @@ def ticket(
664
655
  + f" for issue '{issue_to_investigate.name}' with description:'{issue_to_investigate.description}'"
665
656
  )
666
657
 
667
- result = ai.prompt_call(system_prompt, prompt, post_processing_prompt)
658
+ ticket_user_prompt = generate_user_prompt(prompt, context={})
659
+ result = ai.prompt_call(system_prompt, ticket_user_prompt)
668
660
 
669
661
  console.print(Rule())
670
662
  console.print(
@@ -685,14 +677,14 @@ def github(
685
677
  ),
686
678
  github_owner: Optional[str] = typer.Option(
687
679
  None,
688
- help="The GitHub repository Owner, eg: if the repository url is https://github.com/robusta-dev/holmesgpt, the owner is robusta-dev",
680
+ help="The GitHub repository Owner, eg: if the repository url is https://github.com/HolmesGPT/holmesgpt, the owner is HolmesGPT",
689
681
  ),
690
682
  github_pat: str = typer.Option(
691
683
  None,
692
684
  ),
693
685
  github_repository: Optional[str] = typer.Option(
694
686
  None,
695
- help="The GitHub repository name, eg: if the repository url is https://github.com/robusta-dev/holmesgpt, the repository name is holmesgpt",
687
+ help="The GitHub repository name, eg: if the repository url is https://github.com/HolmesGPT/holmesgpt, the repository name is holmesgpt",
696
688
  ),
697
689
  update: Optional[bool] = typer.Option(False, help="Update GitHub with AI results"),
698
690
  github_query: Optional[str] = typer.Option(
@@ -711,7 +703,6 @@ def github(
711
703
  system_prompt: Optional[str] = typer.Option(
712
704
  "builtin://generic_investigation.jinja2", help=system_prompt_help
713
705
  ),
714
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
715
706
  ):
716
707
  """
717
708
  Investigate a GitHub issue
@@ -730,7 +721,7 @@ def github(
730
721
  custom_toolsets_from_cli=custom_toolsets,
731
722
  custom_runbooks=custom_runbooks,
732
723
  )
733
- ai = config.create_console_issue_investigator()
724
+ ai = config.create_console_issue_investigator(model_name=model)
734
725
  source = config.create_github_source()
735
726
  try:
736
727
  issues = source.fetch_issues()
@@ -750,8 +741,6 @@ def github(
750
741
  issue=issue,
751
742
  prompt=system_prompt, # type: ignore
752
743
  console=console,
753
- instructions=None,
754
- post_processing_prompt=post_processing_prompt,
755
744
  )
756
745
 
757
746
  console.print(Rule())
@@ -797,7 +786,6 @@ def pagerduty(
797
786
  system_prompt: Optional[str] = typer.Option(
798
787
  "builtin://generic_investigation.jinja2", help=system_prompt_help
799
788
  ),
800
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
801
789
  ):
802
790
  """
803
791
  Investigate a PagerDuty incident
@@ -814,7 +802,7 @@ def pagerduty(
814
802
  custom_toolsets_from_cli=custom_toolsets,
815
803
  custom_runbooks=custom_runbooks,
816
804
  )
817
- ai = config.create_console_issue_investigator()
805
+ ai = config.create_console_issue_investigator(model_name=model)
818
806
  source = config.create_pagerduty_source()
819
807
  try:
820
808
  issues = source.fetch_issues()
@@ -836,8 +824,6 @@ def pagerduty(
836
824
  issue=issue,
837
825
  prompt=system_prompt, # type: ignore
838
826
  console=console,
839
- instructions=None,
840
- post_processing_prompt=post_processing_prompt,
841
827
  )
842
828
 
843
829
  console.print(Rule())
@@ -882,7 +868,6 @@ def opsgenie(
882
868
  system_prompt: Optional[str] = typer.Option(
883
869
  "builtin://generic_investigation.jinja2", help=system_prompt_help
884
870
  ),
885
- post_processing_prompt: Optional[str] = opt_post_processing_prompt,
886
871
  documents: Optional[str] = opt_documents,
887
872
  ):
888
873
  """
@@ -900,7 +885,7 @@ def opsgenie(
900
885
  custom_toolsets_from_cli=custom_toolsets,
901
886
  custom_runbooks=custom_runbooks,
902
887
  )
903
- ai = config.create_console_issue_investigator()
888
+ ai = config.create_console_issue_investigator(model_name=model)
904
889
  source = config.create_opsgenie_source()
905
890
  try:
906
891
  issues = source.fetch_issues()
@@ -919,8 +904,6 @@ def opsgenie(
919
904
  issue=issue,
920
905
  prompt=system_prompt, # type: ignore
921
906
  console=console,
922
- instructions=None,
923
- post_processing_prompt=post_processing_prompt,
924
907
  )
925
908
 
926
909
  console.print(Rule())
@@ -1,4 +1,5 @@
1
- from typing import List, Iterable
1
+ from typing import Iterable, List
2
+
2
3
  from holmes.core.issue import Issue
3
4
  from holmes.core.tool_calling_llm import LLMResult
4
5
 
@@ -1,8 +1,9 @@
1
1
  import os
2
2
  import os.path
3
+ from datetime import datetime, timezone
3
4
  from typing import Optional
5
+
4
6
  from jinja2 import Environment, FileSystemLoader
5
- from datetime import datetime, timezone
6
7
 
7
8
  THIS_DIR = os.path.abspath(os.path.dirname(__file__))
8
9
 
@@ -1,9 +1,10 @@
1
1
  {%- set loki_ts = toolsets | selectattr("name", "equalto", "grafana/loki") | first -%}
2
- {%- set coralogix_ts = toolsets | selectattr("name", "equalto", "coralogix/logs") | first -%}
2
+ {%- set coralogix_ts = toolsets | selectattr("name", "equalto", "coralogix") | first -%}
3
3
  {%- set k8s_base_ts = toolsets | selectattr("name", "equalto", "kubernetes/logs") | selectattr("fetch_pod_logs", "defined") | first -%}
4
4
  {%- set k8s_yaml_ts = toolsets | selectattr("name", "equalto", "kubernetes/logs") | rejectattr("fetch_pod_logs", "defined") | first -%}
5
5
  {%- set opensearch_ts = toolsets | selectattr("name", "equalto", "opensearch/logs") | first -%}
6
6
  {%- set datadog_ts = toolsets | selectattr("name", "equalto", "datadog/logs") | first -%}
7
+ {%- set openshift_ts = toolsets | selectattr("name", "equalto", "openshift/logs") | first -%}
7
8
  {%- set bash_ts = toolsets | selectattr("name", "equalto", "bash") | first -%}
8
9
 
9
10
  ## Logs
@@ -11,6 +12,7 @@
11
12
  * IMPORTANT: ALWAYS inform the user about what logs you fetched. For example: "Here are pod logs for ..."
12
13
  * IMPORTANT: If logs commands have limits mention them. For example: "Showing last 100 lines of logs:"
13
14
  * IMPORTANT: If a filter was used, mention the filter. For example: "Logs filtered for 'error':"
15
+ * IMPORTANT: If a date range was used (even if just the default one and you didn't specify the parameter, mention the date range. For example: "Logs from last 1 hour..."
14
16
 
15
17
  {% if loki_ts and loki_ts.status == "enabled" -%}
16
18
  * For any logs, including for investigating kubernetes problems, use Loki
@@ -23,9 +25,9 @@
23
25
  ** If there are too many logs, or not enough, narrow or widen the timestamps
24
26
  * If you are not provided with time information. Ignore start_timestamp and end_timestamp.
25
27
  {%- elif coralogix_ts and coralogix_ts.status == "enabled" -%}
26
- ### coralogix/logs
27
- #### Coralogix Logs Toolset
28
- Tools to search and fetch logs from Coralogix.
28
+ ### coralogix
29
+ #### Coralogix Toolset
30
+ Tools to search and fetch logs, traces, metrics, and other telemetry data from Coralogix.
29
31
  {% include '_default_log_prompt.jinja2' %}
30
32
  {%- elif k8s_base_ts and k8s_base_ts.status == "enabled" -%}
31
33
  {% include '_default_log_prompt.jinja2' %}
@@ -34,8 +36,29 @@ Tools to search and fetch logs from Coralogix.
34
36
  ### datadog/logs
35
37
  #### Datadog Logs Toolset
36
38
  Tools to search and fetch logs from Datadog.
37
- {% include '_default_log_prompt.jinja2' %}
39
+ * Use the tool `fetch_pod_logs` to access an application's logs.
40
+ * Do fetch application logs yourself and DO not ask users to do so
41
+ * If you have an alert/monitor try to figure out the time it fired
42
+ ** Then, use `start_time=-300` (5 minutes before `end_time`) and `end_time=<time monitor started firing>` when calling `fetch_pod_logs`.
43
+ ** If there are too many logs, or not enough, narrow or widen the timestamps
44
+ * If the user did not explicitly ask about a given timeframe, ignore the `start_time` and `end_time` so it will use the default.
45
+ * IMPORTANT: ALWAYS inform the user about the actual time period fetched (e.g., "Looking at logs from the last <X> days")
46
+ * IMPORTANT: If a limit was applied, ALWAYS tell the user how many logs were shown vs total (e.g., "Showing latest <Y> of <Z> logs")
47
+ * IMPORTANT: If any filters were applied, ALWAYS mention them explicitly
48
+ {%- elif openshift_ts and openshift_ts.status == "enabled" -%}
49
+ ### openshift/logs
50
+ #### OpenShift Logs Toolset
51
+ Tools to search and fetch logs from OpenShift.
52
+ * Use the tool `oc_logs` to access an application's logs.
53
+ * Do fetch application logs yourself and DO not ask users to do so
54
+ * If you have an alert/monitor try to figure out the time it fired
55
+ ** If there are too many logs, or not enough, narrow or widen the timestamps
56
+ * IMPORTANT: ALWAYS inform the user about the actual time period fetched (e.g., "Looking at logs from the last <X> days")
57
+ * IMPORTANT: If a limit was applied, ALWAYS tell the user how many logs were shown vs total (e.g., "Showing latest <Y> of <Z> logs")
58
+ * IMPORTANT: If any filters were applied, ALWAYS mention them explicitly
38
59
  {%- elif k8s_yaml_ts and k8s_yaml_ts.status == "enabled" -%}
60
+ ### Logs from newrelic
61
+ * you can fetch logs from newrelic if this is toolset is enabled
39
62
  ### kubernetes/logs
40
63
  #### Kubernetes Logs Toolset
41
64
  Tools to search and fetch logs from Kubernetes.
@@ -54,6 +77,8 @@ DO NOT use `--tail` or `| tail` when calling `kubectl logs` because you may miss
54
77
  ** 'kubernetes/logs'
55
78
  ** 'grafana/loki'
56
79
  ** 'opensearch/logs'
57
- ** 'coralogix/logs'
80
+ ** 'coralogix'
58
81
  ** 'datadog/logs'
82
+ ** 'openshift/logs'
83
+ ** 'newrelic'
59
84
  {%- endif -%}
@@ -12,8 +12,7 @@
12
12
  * do not stop investigating until you are at the final root cause you are able to find.
13
13
  * use the "five whys" methodology to find the root cause.
14
14
  * for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
15
- * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and.
16
- * in this case, try to find substrings or search for the correct spellings
15
+ * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
17
16
  * always provide detailed information like exact resource names, versions, labels, etc
18
17
  * even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
19
18
  * if a runbook url is present you MUST fetch the runbook before beginning your investigation
@@ -1,21 +1,33 @@
1
- {% if runbooks and runbooks.catalog|length > 0 %}
1
+ {%- set sections = [
2
+ {'title': 'Runbook Catalog', 'content': runbook_catalog},
3
+ {'title': 'Subject/Issue Runbooks', 'content': custom_instructions},
4
+ {'title': 'Global Instructions', 'content': global_instructions}
5
+ ] -%}
6
+ {%- set available = sections | selectattr('content') | list -%}
7
+ {%- if available -%}
2
8
  # Runbook Selection
3
9
 
4
- You (HolmesGPT) have access to a set of runbooks that provide step-by-step troubleshooting instructions for various known issues.
5
- If one of the following runbooks relates to the user's issue, you MUST fetch it with the fetch_runbook tool.
10
+ You (HolmesGPT) have access to runbooks with step-by-step troubleshooting instructions.
11
+ If one of the following runbooks relates to the user's issue or match one of the alerts or symptoms listed in the runbook entry, you MUST fetch it with the fetch_runbook tool.
12
+ You (HolmesGPT) must follow runbook sources in this priority order:
13
+ {%- for sec in available %}
14
+ {{ loop.index }}) {{ sec.title }} (priority #{{ loop.index }})
15
+ {%- endfor %}
6
16
 
7
- ## Available Runbooks for fetch_runbook tool
8
- {% for runbook in runbooks.catalog %}
9
- ### description: {{ runbook.description }}
10
- link: {{ runbook.link }}
11
- {% endfor %}
17
+ {%- for sec in available %}
18
+ ## {{ sec.title }} (priority #{{ loop.index }})
12
19
 
13
- If there is a runbook that MIGHT match the user's issue, you MUST:
20
+ {%- set content = (sec.content|string) -%}
21
+ {{ content.replace('\n', '\n ') }}
22
+
23
+ {%- endfor %}
24
+
25
+
26
+ If a runbook might match the user's issue, you MUST:
14
27
  1. Fetch the runbook with the `fetch_runbook` tool.
15
28
  2. Decide based on the runbook's contents if it is relevant or not.
16
- 3. If it seems relevant, inform the user that you accesses a runbook and will use it to troubleshoot the issue.
29
+ 3. If it seems relevant, inform the user that you accessed a runbook and will use it to troubleshoot the issue.
17
30
  4. To the maximum extent possible, follow the runbook instructions step-by-step.
18
31
  5. Provide a detailed report of the steps you performed, including any findings or errors encountered.
19
- 6. If a runbook step requires tools or integrations you don't have access to tell the user that you cannot perform that step due to missing tools.
20
-
32
+ 6. If a runbook step requires tools or integrations you don't have access to, tell the user that you cannot perform that step due to missing tools.
21
33
  {%- endif -%}
@@ -0,0 +1,7 @@
1
+ {{ user_prompt }}
2
+
3
+ {% if runbooks_enabled %}
4
+ {% include '_runbook_instructions.jinja2' %}
5
+ {% endif %}
6
+
7
+ {% include '_current_date_time.jinja2' %}
@@ -0,0 +1,89 @@
1
+ This conversation history is too long and no longer fits your context window.
2
+ Your task is to create a detailed summary of the conversation so far. This will help continuing the investigation after summarisation is done.
3
+ This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
4
+
5
+ Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
6
+
7
+ 1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
8
+ - The user's explicit requests and intents
9
+ - Your approach to addressing the user's requests
10
+ - Key decisions, technical concepts and code patterns
11
+ - Specific details like kubernetes resource names, namespaces, relevant logs extracts (verbatim), etc
12
+ - What tools were called and the outcome or analysis of the tool output
13
+ 2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
14
+
15
+ Your summary should include the following sections:
16
+
17
+ 1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
18
+ 2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
19
+ 3. Resources: Enumerate specific kubernetes or cloud resources and logs extract examined. Pay special attention to the most recent messages and include logs or tool outputs where applicable and include a summary of why this resource is important.
20
+ 4. Tool calls: List all tool calls that were executed and whether they failed/succeeded. Make sure to mention the full arguments used. Only summarize the arguments if they are over 200 characters long
21
+ 5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
22
+ 6. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
23
+ 7. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include resource names and their namespace and log extracts where applicable.
24
+ 8. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests without confirming with the user first.
25
+ If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
26
+
27
+ Here's an example of how your output should be structured:
28
+
29
+ <example>
30
+ <analysis>
31
+ [Your thought process, ensuring all points are covered thoroughly and accurately]
32
+ </analysis>
33
+
34
+ <summary>
35
+ 1. Primary Request and Intent:
36
+ [Detailed description]
37
+
38
+ 2. Key Technical Concepts:
39
+ - [Concept 1]
40
+ - [Concept 2]
41
+ - [...]
42
+
43
+ 3. Infrastructure Resources:
44
+ - [Deployment name 1]
45
+ - [Summary of why this deployment is important]
46
+ - [Summary of the issues identified with this deployment, if any]
47
+ - [List of related pods/services or otyher resources and why they are relevant]
48
+ - [Pod name 2]
49
+ - [Summary of why this pod is important]
50
+ - [Summary of the issues identified with this pod, if any]
51
+ - [List of related pods/services or otyher resources and why they are relevant]
52
+ - [...]
53
+
54
+ 4. Tool Calls:
55
+ - [✅ function_name {args}]
56
+ - [✅ function_name {args}]
57
+ - [❌ function_name {args} - NO DATA]
58
+ - [❌ function_name {args} - Error message]
59
+ - [...]
60
+
61
+ 5. Problem Solving:
62
+ [Description of solved problems and ongoing troubleshooting]
63
+
64
+ 6. Pending Tasks:
65
+ - [Task 1]
66
+ - [Task 2]
67
+ - [...]
68
+
69
+ 7. Current Work:
70
+ [Precise description of current work]
71
+
72
+ 8. Optional Next Step:
73
+ [Optional Next step to take]
74
+
75
+ </summary>
76
+ </example>
77
+
78
+ Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
79
+
80
+ There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
81
+ <example>
82
+ ## Compact Instructions
83
+ When summarizing the conversation focus on typescript code changes and also remember the mistakes you made and how you fixed them.
84
+ </example>
85
+
86
+ <example>
87
+ # Summary instructions
88
+ When you are using compact - please focus on test output and code changes. Include relevant logs verbatim.
89
+ </example>
@@ -8,14 +8,10 @@ If you have a good and concrete suggestion for how the user can fix something, t
8
8
  If you are unsure about the answer to the user's request or how to satisfy their request, you should gather more information. This can be done by asking the user for more information.
9
9
  Bias towards not asking the user for help if you can find the answer yourself.
10
10
 
11
- {% include '_current_date_time.jinja2' %}
12
-
13
11
  Use conversation history to maintain continuity when appropriate, ensuring efficiency in your responses.
14
12
 
15
13
  {% include '_general_instructions.jinja2' %}
16
14
 
17
- {% include '_runbook_instructions.jinja2' %}
18
-
19
15
  # Style guide
20
16
 
21
17
  * Reply with terse output.
@@ -4,7 +4,6 @@ Ask for multiple tool calls at the same time as it saves time for the user.
4
4
  Do not say 'based on the tool output' or explicitly refer to tools at all.
5
5
  If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
6
6
  If you have a good and concrete suggestion for how the user can fix something, tell them even if not asked explicitly
7
- {% include '_current_date_time.jinja2' %}
8
7
 
9
8
  Use conversation history to maintain continuity when appropriate, ensuring efficiency in your responses.
10
9
 
@@ -3,7 +3,6 @@ Whenever possible you MUST first use tools to investigate then answer the questi
3
3
  Ask for multiple tool calls at the same time as it saves time for the user.
4
4
  Do not say 'based on the tool output' or explicitly refer to tools at all.
5
5
  If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
6
- {% include '_current_date_time.jinja2' %}
7
6
 
8
7
  ### Context Awareness:
9
8
  Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{issue}}.
@@ -4,7 +4,6 @@ Ask for multiple tool calls at the same time as it saves time for the user.
4
4
  Do not say 'based on the tool output'
5
5
 
6
6
  Provide an terse analysis of the following {{ issue.source_type }} alert/issue and why it is firing.
7
- * {% include '_current_date_time.jinja2' %}
8
7
  * If the tool requires string format timestamps, query from 'start_timestamp' until 'end_timestamp'
9
8
  * If the tool requires timestamps in milliseconds, query from 'start_timestamp' until 'end_timestamp'
10
9
  * If you need timestamp in string format, query from 'start_timestamp_millis' until 'end_timestamp_millis'