holmesgpt 0.14.1__py3-none-any.whl → 0.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +2 -2
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +3 -3
- holmes/plugins/toolsets/prometheus/prometheus.py +1 -3
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +14 -10
- {holmesgpt-0.14.1.dist-info → holmesgpt-0.14.2.dist-info}/METADATA +1 -1
- {holmesgpt-0.14.1.dist-info → holmesgpt-0.14.2.dist-info}/RECORD +10 -10
- {holmesgpt-0.14.1.dist-info → holmesgpt-0.14.2.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.1.dist-info → holmesgpt-0.14.2.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.1.dist-info → holmesgpt-0.14.2.dist-info}/entry_points.txt +0 -0
holmes/__init__.py
CHANGED
holmes/common/env_vars.py
CHANGED
|
@@ -73,11 +73,11 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
|
|
|
73
73
|
# For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
|
|
74
74
|
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
|
|
75
75
|
|
|
76
|
-
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS",
|
|
76
|
+
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
|
|
77
77
|
|
|
78
78
|
# Limit each tool response to N% of the total context window.
|
|
79
79
|
# Number between 0 and 100
|
|
80
80
|
# Setting to either 0 or any number above 100 disables the logic that limits tool response size
|
|
81
81
|
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
|
|
82
|
-
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT",
|
|
82
|
+
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
|
|
83
83
|
)
|
|
@@ -242,9 +242,9 @@ Examples:
|
|
|
242
242
|
import logging
|
|
243
243
|
|
|
244
244
|
logger = logging.getLogger(__name__)
|
|
245
|
-
logger.
|
|
245
|
+
logger.debug(f"Tempo query: {stats_query}")
|
|
246
246
|
|
|
247
|
-
logger.
|
|
247
|
+
logger.debug(f"start: {start}, end: {end}")
|
|
248
248
|
|
|
249
249
|
all_traces_response = api.search_traces_by_query(
|
|
250
250
|
q=stats_query,
|
|
@@ -253,7 +253,7 @@ Examples:
|
|
|
253
253
|
limit=1000,
|
|
254
254
|
)
|
|
255
255
|
|
|
256
|
-
logger.
|
|
256
|
+
logger.debug(f"Response: {all_traces_response}")
|
|
257
257
|
|
|
258
258
|
traces = all_traces_response.get("traces", [])
|
|
259
259
|
if not traces:
|
|
@@ -43,14 +43,12 @@ PROMETHEUS_METADATA_API_LIMIT = 100 # Default limit for Prometheus metadata API
|
|
|
43
43
|
DEFAULT_QUERY_TIMEOUT_SECONDS = 20
|
|
44
44
|
MAX_QUERY_TIMEOUT_SECONDS = 180
|
|
45
45
|
# Default character limit for query responses to prevent token limit issues
|
|
46
|
-
DEFAULT_QUERY_RESPONSE_SIZE_LIMIT =
|
|
46
|
+
DEFAULT_QUERY_RESPONSE_SIZE_LIMIT = 30000
|
|
47
47
|
# Default timeout for metadata API calls (discovery endpoints)
|
|
48
48
|
DEFAULT_METADATA_TIMEOUT_SECONDS = 20
|
|
49
49
|
MAX_METADATA_TIMEOUT_SECONDS = 60
|
|
50
50
|
# Default time window for metadata APIs (in hours)
|
|
51
51
|
DEFAULT_METADATA_TIME_WINDOW_HRS = 1
|
|
52
|
-
# Sample size for data summaries when results are too large
|
|
53
|
-
DATA_SUMMARY_SAMPLE_SIZE = 10
|
|
54
52
|
|
|
55
53
|
|
|
56
54
|
class PrometheusConfig(BaseModel):
|
|
@@ -6,11 +6,20 @@
|
|
|
6
6
|
* Combine multiple patterns with regex OR (|) to reduce API calls:
|
|
7
7
|
- `{__name__=~"node_cpu.*|node_memory.*|node_disk.*"}` - get all node resource metrics in one call
|
|
8
8
|
- `{__name__=~"container.*|pod.*|kube.*"}` - get all Kubernetes-related metrics
|
|
9
|
-
- `{namespace=~"
|
|
9
|
+
- `{namespace=~"example1|example2|example3"}` - metrics from multiple namespaces
|
|
10
10
|
* Use `get_metric_metadata` after discovering names to get types/descriptions if needed
|
|
11
11
|
* Use `get_label_values` to discover pods, namespaces, jobs: e.g., get_label_values(label="pod")
|
|
12
12
|
* Only use `get_series` when you need full label sets (slower than other methods)
|
|
13
13
|
|
|
14
|
+
## Retrying queries that return too much data
|
|
15
|
+
* When a Prometheus query returns too much data (e.g., truncation error), you MUST retry with a more specific query or less data points or topk/bottomk
|
|
16
|
+
* NEVER EVER EVER answer a question based on Prometheus data that was truncated as you might be missing important information and give the totally wrong answer
|
|
17
|
+
* Prefer telling the user you can't answer the question because of too much data rather than answering based on incomplete data
|
|
18
|
+
* You are also able to show graphs to the user (using the promql embed functionality mentioned below) so you can show users graphs and THEY can interpret the data themselves, even if you can't answer.
|
|
19
|
+
* Do NOT hestitate to try alternative queries and try to reduce the amount of data returned until you get a successful query
|
|
20
|
+
* Be extremely, extremely cautious when answering based on get_label_values because the existence of a label value says NOTHING about the metric value itself (is it high, low, or perhaps the label exists in Prometheus but its an older series not present right now)
|
|
21
|
+
* DO NOT give answers about metrics based on what 'is typically the case' or 'common knowledge' - if you can't see the actual metric value, you MUST NEVER EVER answer about it - just tell the user your limitations due to the size of the data
|
|
22
|
+
|
|
14
23
|
## Alert Investigation & Query Execution
|
|
15
24
|
* When investigating a Prometheus alert, ALWAYS call list_prometheus_rules to get the alert definition
|
|
16
25
|
* Use Prometheus to query metrics from the alert promql
|
|
@@ -37,24 +46,19 @@
|
|
|
37
46
|
* ALWAYS use `topk()` or `bottomk()` to limit the number of series returned
|
|
38
47
|
* Standard pattern for high-cardinality queries:
|
|
39
48
|
- Use `topk(5, <your_query>)` to get the top 5 series
|
|
40
|
-
- Example: `topk(5, rate(container_cpu_usage_seconds_total{namespace="
|
|
49
|
+
- Example: `topk(5, rate(container_cpu_usage_seconds_total{namespace="example"}[5m]))`
|
|
41
50
|
- This prevents context overflow and focuses on the most relevant data
|
|
42
51
|
* To also capture the aggregate of remaining series as "other":
|
|
43
52
|
```
|
|
44
|
-
topk(5, rate(container_cpu_usage_seconds_total{namespace="
|
|
45
|
-
or
|
|
46
|
-
label_replace(
|
|
47
|
-
(sum(rate(container_cpu_usage_seconds_total{namespace="default"}[5m])) - sum(topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m])))),
|
|
48
|
-
"pod", "other", "", ""
|
|
49
|
-
)
|
|
53
|
+
topk(5, rate(container_cpu_usage_seconds_total{namespace="example"}[5m])) or label_replace((sum(rate(container_cpu_usage_seconds_total{namespace="example"}[5m])) - sum(topk(5, rate(container_cpu_usage_seconds_total{namespace="example"}[5m])))), "pod", "other", "", "")
|
|
50
54
|
```
|
|
51
55
|
* Common high-cardinality scenarios requiring topk():
|
|
52
56
|
- Pod-level metrics in namespaces with many pods
|
|
53
57
|
- Container-level CPU/memory metrics
|
|
54
58
|
- HTTP metrics with many endpoints or status codes
|
|
55
59
|
- Any query returning more than 10 time series
|
|
56
|
-
* For initial exploration, use instant queries with `count()` to check cardinality:
|
|
57
|
-
- Example: `count(count by (pod) (container_cpu_usage_seconds_total{namespace="
|
|
60
|
+
* For initial exploration, you may use instant queries with `count()` to check cardinality:
|
|
61
|
+
- Example: `count(count by (pod) (container_cpu_usage_seconds_total{namespace="example"}))`
|
|
58
62
|
- If count > 10, use topk() in your range query
|
|
59
63
|
* When doing queries, always extend the time range, to 15 min before and after the alert start time
|
|
60
64
|
* ALWAYS embed the execution results into your answer
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
holmes/.git_archival.json,sha256=PbwdO7rNhEJ4ALiO12DPPb81xNAIsVxCA0m8OrVoqsk,182
|
|
2
|
-
holmes/__init__.py,sha256=
|
|
2
|
+
holmes/__init__.py,sha256=aObdUvtSLEMRLcbzR1BLUnoN1nK0-PV8tAXzjOfEEh8,257
|
|
3
3
|
holmes/clients/robusta_client.py,sha256=rWst1PANJaIsprp3jZ7RV5UpttM_YLBGQ8B5noZqvgg,1532
|
|
4
|
-
holmes/common/env_vars.py,sha256=
|
|
4
|
+
holmes/common/env_vars.py,sha256=3CKyDmPtEAfYFxWC5wEDq5ppn94BhzDbJA3k9Vtd_WU,3312
|
|
5
5
|
holmes/common/openshift.py,sha256=akbQ0GpnmuzXOqTcotpTDQSDKIROypS9mgPOprUgkCw,407
|
|
6
6
|
holmes/config.py,sha256=yu0kQox7tfeKc4kJLESH-eGa6w1-nNC9kxAOtHf_qhQ,21781
|
|
7
7
|
holmes/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -172,7 +172,7 @@ holmes/plugins/toolsets/grafana/loki_api.py,sha256=f7oTzfhJ1LojsPoAfsKt32ADWffLE
|
|
|
172
172
|
holmes/plugins/toolsets/grafana/toolset_grafana.py,sha256=_A3DUOyd2624I75BknsZhHpK1mzcf7JfACL7_ET6sPM,4922
|
|
173
173
|
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py,sha256=MK0mK5h8MZuULwAoQlng3UZS1xtxHzePwhEoJiroJSw,3912
|
|
174
174
|
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2,sha256=0HBYUXkGYWZbHwIvfQEF5oL9LFMYzjgcmL1U6RjgPSE,10417
|
|
175
|
-
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py,sha256=
|
|
175
|
+
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py,sha256=4q9FCHZ2kuI4Kng_JOWipkHIUyfkH2zT5zSywnFie18,38419
|
|
176
176
|
holmes/plugins/toolsets/grafana/trace_parser.py,sha256=8PjqPGDGo9uB2Z8WWWknMKdhcqlqZEVncQCCkl2F06A,7024
|
|
177
177
|
holmes/plugins/toolsets/helm.yaml,sha256=-IPDChKMHcxGbzA0z9GKczRshL-mD24cHpBizfNM1jM,1604
|
|
178
178
|
holmes/plugins/toolsets/internet/internet.py,sha256=cQi8R2rcttIZ49egSzi2y2UVt4tncqE8medxiXp8II8,7779
|
|
@@ -196,8 +196,8 @@ holmes/plugins/toolsets/opensearch/opensearch_logs.py,sha256=_j-JAhLWtxhBPafCvey
|
|
|
196
196
|
holmes/plugins/toolsets/opensearch/opensearch_traces.py,sha256=FjDbkU-oI-spMdra0raSmiHZb6Cfbo_AsS_OKEt9coI,8876
|
|
197
197
|
holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2,sha256=Xn8AW4XCMYV1VkBbF8nNB9fUpKQ1Vbm88iFczj-LQXo,1035
|
|
198
198
|
holmes/plugins/toolsets/opensearch/opensearch_utils.py,sha256=mh9Wp22tOdJYmA9IaFS7tD3aEENljyeuPOsF-lEe5C0,5097
|
|
199
|
-
holmes/plugins/toolsets/prometheus/prometheus.py,sha256=
|
|
200
|
-
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2,sha256=
|
|
199
|
+
holmes/plugins/toolsets/prometheus/prometheus.py,sha256=H5sdiwk2nAWrnD23wR-8nkTuRLBOhrCZXc51EOgDqIQ,65832
|
|
200
|
+
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2,sha256=taf5C-N9rdp1A7S__hETefcm2OaYHJLjs1ZbuqIsGtE,6383
|
|
201
201
|
holmes/plugins/toolsets/prometheus/utils.py,sha256=ZenD354dP0sRmm0R-QBuAq1jyn40GjYf4wx15bXIYRc,775
|
|
202
202
|
holmes/plugins/toolsets/rabbitmq/api.py,sha256=-BtqF7hQWtl_OamnQ521vYHhR8E2n2wcPNYxfI9r4kQ,14307
|
|
203
203
|
holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2,sha256=qetmtJUMkx9LIihr2fSJ2EV9h2J-b-ZdUAvMtopXZYY,3105
|
|
@@ -237,8 +237,8 @@ holmes/utils/sentry_helper.py,sha256=_IbxqlqbsNb_ncvpZ-B5XxcauQphJStcwaVxRj18RpU
|
|
|
237
237
|
holmes/utils/stream.py,sha256=L4vlu1xX5Ihtn-D0Mfml_HuQRfLhHFSkWNojcAJLi9g,3252
|
|
238
238
|
holmes/utils/tags.py,sha256=SU4EZMBtLlIb7OlHsSpguFaypczRzOcuHYxDSanV3sQ,3364
|
|
239
239
|
holmes/version.py,sha256=uDRPOvVaHreROj_9HPe81RVpTzHcG8ojpGTsnJIlQOM,5220
|
|
240
|
-
holmesgpt-0.14.
|
|
241
|
-
holmesgpt-0.14.
|
|
242
|
-
holmesgpt-0.14.
|
|
243
|
-
holmesgpt-0.14.
|
|
244
|
-
holmesgpt-0.14.
|
|
240
|
+
holmesgpt-0.14.2.dist-info/LICENSE.txt,sha256=RdZMj8VXRQdVslr6PMYMbAEu5pOjOdjDqt3yAmWb9Ds,1072
|
|
241
|
+
holmesgpt-0.14.2.dist-info/METADATA,sha256=_-DXRD2oFoAqxL5uMxMeds-RItenYjrE2aRKnmV0DHQ,16184
|
|
242
|
+
holmesgpt-0.14.2.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
|
|
243
|
+
holmesgpt-0.14.2.dist-info/entry_points.txt,sha256=JdzEyZhpaYr7Boo4uy4UZgzY1VsAEbzMgGmHZtx9KFY,42
|
|
244
|
+
holmesgpt-0.14.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|