holmesgpt 0.14.1__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

holmes/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # This is patched by github actions during release
2
- __version__ = "0.14.1"
2
+ __version__ = "0.14.2"
3
3
 
4
4
  # Re-export version functions from version module for backward compatibility
5
5
  from .version import (
holmes/common/env_vars.py CHANGED
@@ -73,11 +73,11 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
73
73
  # For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
74
74
  ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
75
75
 
76
- MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 200))
76
+ MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
77
77
 
78
78
  # Limit each tool response to N% of the total context window.
79
79
  # Number between 0 and 100
80
80
  # Setting to either 0 or any number above 100 disables the logic that limits tool response size
81
81
  TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
82
- os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 10)
82
+ os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
83
83
  )
@@ -242,9 +242,9 @@ Examples:
242
242
  import logging
243
243
 
244
244
  logger = logging.getLogger(__name__)
245
- logger.info(f"Tempo query: {stats_query}")
245
+ logger.debug(f"Tempo query: {stats_query}")
246
246
 
247
- logger.info(f"start: {start}, end: {end}")
247
+ logger.debug(f"start: {start}, end: {end}")
248
248
 
249
249
  all_traces_response = api.search_traces_by_query(
250
250
  q=stats_query,
@@ -253,7 +253,7 @@ Examples:
253
253
  limit=1000,
254
254
  )
255
255
 
256
- logger.info(f"Response: {all_traces_response}")
256
+ logger.debug(f"Response: {all_traces_response}")
257
257
 
258
258
  traces = all_traces_response.get("traces", [])
259
259
  if not traces:
@@ -43,14 +43,12 @@ PROMETHEUS_METADATA_API_LIMIT = 100 # Default limit for Prometheus metadata API
43
43
  DEFAULT_QUERY_TIMEOUT_SECONDS = 20
44
44
  MAX_QUERY_TIMEOUT_SECONDS = 180
45
45
  # Default character limit for query responses to prevent token limit issues
46
- DEFAULT_QUERY_RESPONSE_SIZE_LIMIT = 20000
46
+ DEFAULT_QUERY_RESPONSE_SIZE_LIMIT = 30000
47
47
  # Default timeout for metadata API calls (discovery endpoints)
48
48
  DEFAULT_METADATA_TIMEOUT_SECONDS = 20
49
49
  MAX_METADATA_TIMEOUT_SECONDS = 60
50
50
  # Default time window for metadata APIs (in hours)
51
51
  DEFAULT_METADATA_TIME_WINDOW_HRS = 1
52
- # Sample size for data summaries when results are too large
53
- DATA_SUMMARY_SAMPLE_SIZE = 10
54
52
 
55
53
 
56
54
  class PrometheusConfig(BaseModel):
@@ -6,11 +6,20 @@
6
6
  * Combine multiple patterns with regex OR (|) to reduce API calls:
7
7
  - `{__name__=~"node_cpu.*|node_memory.*|node_disk.*"}` - get all node resource metrics in one call
8
8
  - `{__name__=~"container.*|pod.*|kube.*"}` - get all Kubernetes-related metrics
9
- - `{namespace=~"default|kube-system|monitoring"}` - metrics from multiple namespaces
9
+ - `{namespace=~"example1|example2|example3"}` - metrics from multiple namespaces
10
10
  * Use `get_metric_metadata` after discovering names to get types/descriptions if needed
11
11
  * Use `get_label_values` to discover pods, namespaces, jobs: e.g., get_label_values(label="pod")
12
12
  * Only use `get_series` when you need full label sets (slower than other methods)
13
13
 
14
+ ## Retrying queries that return too much data
15
+ * When a Prometheus query returns too much data (e.g., truncation error), you MUST retry with a more specific query or less data points or topk/bottomk
16
+ * NEVER EVER EVER answer a question based on Prometheus data that was truncated as you might be missing important information and give the totally wrong answer
17
+ * Prefer telling the user you can't answer the question because of too much data rather than answering based on incomplete data
18
+ * You are also able to show graphs to the user (using the promql embed functionality mentioned below) so you can show users graphs and THEY can interpret the data themselves, even if you can't answer.
19
+ * Do NOT hestitate to try alternative queries and try to reduce the amount of data returned until you get a successful query
20
+ * Be extremely, extremely cautious when answering based on get_label_values because the existence of a label value says NOTHING about the metric value itself (is it high, low, or perhaps the label exists in Prometheus but its an older series not present right now)
21
+ * DO NOT give answers about metrics based on what 'is typically the case' or 'common knowledge' - if you can't see the actual metric value, you MUST NEVER EVER answer about it - just tell the user your limitations due to the size of the data
22
+
14
23
  ## Alert Investigation & Query Execution
15
24
  * When investigating a Prometheus alert, ALWAYS call list_prometheus_rules to get the alert definition
16
25
  * Use Prometheus to query metrics from the alert promql
@@ -37,24 +46,19 @@
37
46
  * ALWAYS use `topk()` or `bottomk()` to limit the number of series returned
38
47
  * Standard pattern for high-cardinality queries:
39
48
  - Use `topk(5, <your_query>)` to get the top 5 series
40
- - Example: `topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m]))`
49
+ - Example: `topk(5, rate(container_cpu_usage_seconds_total{namespace="example"}[5m]))`
41
50
  - This prevents context overflow and focuses on the most relevant data
42
51
  * To also capture the aggregate of remaining series as "other":
43
52
  ```
44
- topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m]))
45
- or
46
- label_replace(
47
- (sum(rate(container_cpu_usage_seconds_total{namespace="default"}[5m])) - sum(topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m])))),
48
- "pod", "other", "", ""
49
- )
53
+ topk(5, rate(container_cpu_usage_seconds_total{namespace="example"}[5m])) or label_replace((sum(rate(container_cpu_usage_seconds_total{namespace="example"}[5m])) - sum(topk(5, rate(container_cpu_usage_seconds_total{namespace="example"}[5m])))), "pod", "other", "", "")
50
54
  ```
51
55
  * Common high-cardinality scenarios requiring topk():
52
56
  - Pod-level metrics in namespaces with many pods
53
57
  - Container-level CPU/memory metrics
54
58
  - HTTP metrics with many endpoints or status codes
55
59
  - Any query returning more than 10 time series
56
- * For initial exploration, use instant queries with `count()` to check cardinality:
57
- - Example: `count(count by (pod) (container_cpu_usage_seconds_total{namespace="default"}))`
60
+ * For initial exploration, you may use instant queries with `count()` to check cardinality:
61
+ - Example: `count(count by (pod) (container_cpu_usage_seconds_total{namespace="example"}))`
58
62
  - If count > 10, use topk() in your range query
59
63
  * When doing queries, always extend the time range, to 15 min before and after the alert start time
60
64
  * ALWAYS embed the execution results into your answer
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: holmesgpt
3
- Version: 0.14.1
3
+ Version: 0.14.2
4
4
  Summary:
5
5
  Author: Natan Yellin
6
6
  Author-email: natan@robusta.dev
@@ -1,7 +1,7 @@
1
1
  holmes/.git_archival.json,sha256=PbwdO7rNhEJ4ALiO12DPPb81xNAIsVxCA0m8OrVoqsk,182
2
- holmes/__init__.py,sha256=jTatz8d7goRhTZfG4-8-aLMSIe0Pi2sDQJ5wtjRWGcg,257
2
+ holmes/__init__.py,sha256=aObdUvtSLEMRLcbzR1BLUnoN1nK0-PV8tAXzjOfEEh8,257
3
3
  holmes/clients/robusta_client.py,sha256=rWst1PANJaIsprp3jZ7RV5UpttM_YLBGQ8B5noZqvgg,1532
4
- holmes/common/env_vars.py,sha256=y61QXRmu80iAtUU7cp30wjCrs_rwHNRS-kZaSyuzJcI,3312
4
+ holmes/common/env_vars.py,sha256=3CKyDmPtEAfYFxWC5wEDq5ppn94BhzDbJA3k9Vtd_WU,3312
5
5
  holmes/common/openshift.py,sha256=akbQ0GpnmuzXOqTcotpTDQSDKIROypS9mgPOprUgkCw,407
6
6
  holmes/config.py,sha256=yu0kQox7tfeKc4kJLESH-eGa6w1-nNC9kxAOtHf_qhQ,21781
7
7
  holmes/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -172,7 +172,7 @@ holmes/plugins/toolsets/grafana/loki_api.py,sha256=f7oTzfhJ1LojsPoAfsKt32ADWffLE
172
172
  holmes/plugins/toolsets/grafana/toolset_grafana.py,sha256=_A3DUOyd2624I75BknsZhHpK1mzcf7JfACL7_ET6sPM,4922
173
173
  holmes/plugins/toolsets/grafana/toolset_grafana_loki.py,sha256=MK0mK5h8MZuULwAoQlng3UZS1xtxHzePwhEoJiroJSw,3912
174
174
  holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2,sha256=0HBYUXkGYWZbHwIvfQEF5oL9LFMYzjgcmL1U6RjgPSE,10417
175
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py,sha256=5lmWIVc8c4iSGwpvhhhxGPe5-LtpGMzprSdR8GmiuxQ,38416
175
+ holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py,sha256=4q9FCHZ2kuI4Kng_JOWipkHIUyfkH2zT5zSywnFie18,38419
176
176
  holmes/plugins/toolsets/grafana/trace_parser.py,sha256=8PjqPGDGo9uB2Z8WWWknMKdhcqlqZEVncQCCkl2F06A,7024
177
177
  holmes/plugins/toolsets/helm.yaml,sha256=-IPDChKMHcxGbzA0z9GKczRshL-mD24cHpBizfNM1jM,1604
178
178
  holmes/plugins/toolsets/internet/internet.py,sha256=cQi8R2rcttIZ49egSzi2y2UVt4tncqE8medxiXp8II8,7779
@@ -196,8 +196,8 @@ holmes/plugins/toolsets/opensearch/opensearch_logs.py,sha256=_j-JAhLWtxhBPafCvey
196
196
  holmes/plugins/toolsets/opensearch/opensearch_traces.py,sha256=FjDbkU-oI-spMdra0raSmiHZb6Cfbo_AsS_OKEt9coI,8876
197
197
  holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2,sha256=Xn8AW4XCMYV1VkBbF8nNB9fUpKQ1Vbm88iFczj-LQXo,1035
198
198
  holmes/plugins/toolsets/opensearch/opensearch_utils.py,sha256=mh9Wp22tOdJYmA9IaFS7tD3aEENljyeuPOsF-lEe5C0,5097
199
- holmes/plugins/toolsets/prometheus/prometheus.py,sha256=_13bVlcxVu0ezcZdyyauFIKPtp-ADLwSIOSXTy3dTag,65922
200
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2,sha256=ssokrP2TNMttRc4oH89BY3cHkTHNHrOUAi4rYcxuMak,5075
199
+ holmes/plugins/toolsets/prometheus/prometheus.py,sha256=H5sdiwk2nAWrnD23wR-8nkTuRLBOhrCZXc51EOgDqIQ,65832
200
+ holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2,sha256=taf5C-N9rdp1A7S__hETefcm2OaYHJLjs1ZbuqIsGtE,6383
201
201
  holmes/plugins/toolsets/prometheus/utils.py,sha256=ZenD354dP0sRmm0R-QBuAq1jyn40GjYf4wx15bXIYRc,775
202
202
  holmes/plugins/toolsets/rabbitmq/api.py,sha256=-BtqF7hQWtl_OamnQ521vYHhR8E2n2wcPNYxfI9r4kQ,14307
203
203
  holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2,sha256=qetmtJUMkx9LIihr2fSJ2EV9h2J-b-ZdUAvMtopXZYY,3105
@@ -237,8 +237,8 @@ holmes/utils/sentry_helper.py,sha256=_IbxqlqbsNb_ncvpZ-B5XxcauQphJStcwaVxRj18RpU
237
237
  holmes/utils/stream.py,sha256=L4vlu1xX5Ihtn-D0Mfml_HuQRfLhHFSkWNojcAJLi9g,3252
238
238
  holmes/utils/tags.py,sha256=SU4EZMBtLlIb7OlHsSpguFaypczRzOcuHYxDSanV3sQ,3364
239
239
  holmes/version.py,sha256=uDRPOvVaHreROj_9HPe81RVpTzHcG8ojpGTsnJIlQOM,5220
240
- holmesgpt-0.14.1.dist-info/LICENSE.txt,sha256=RdZMj8VXRQdVslr6PMYMbAEu5pOjOdjDqt3yAmWb9Ds,1072
241
- holmesgpt-0.14.1.dist-info/METADATA,sha256=vm-GB1-srkFxyYzIbo73gxxamIN5nfkQaaHJ7lNYYuE,16184
242
- holmesgpt-0.14.1.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
243
- holmesgpt-0.14.1.dist-info/entry_points.txt,sha256=JdzEyZhpaYr7Boo4uy4UZgzY1VsAEbzMgGmHZtx9KFY,42
244
- holmesgpt-0.14.1.dist-info/RECORD,,
240
+ holmesgpt-0.14.2.dist-info/LICENSE.txt,sha256=RdZMj8VXRQdVslr6PMYMbAEu5pOjOdjDqt3yAmWb9Ds,1072
241
+ holmesgpt-0.14.2.dist-info/METADATA,sha256=_-DXRD2oFoAqxL5uMxMeds-RItenYjrE2aRKnmV0DHQ,16184
242
+ holmesgpt-0.14.2.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
243
+ holmesgpt-0.14.2.dist-info/entry_points.txt,sha256=JdzEyZhpaYr7Boo4uy4UZgzY1VsAEbzMgGmHZtx9KFY,42
244
+ holmesgpt-0.14.2.dist-info/RECORD,,