@techwavedev/agi-agent-kit 1.1.7 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @techwavedev/agi-agent-kit might be problematic. Click here for more details.

Files changed (111) hide show
  1. package/CHANGELOG.md +82 -1
  2. package/README.md +190 -12
  3. package/bin/init.js +30 -2
  4. package/package.json +6 -3
  5. package/templates/base/AGENTS.md +54 -23
  6. package/templates/base/README.md +325 -0
  7. package/templates/base/directives/memory_integration.md +95 -0
  8. package/templates/base/execution/memory_manager.py +309 -0
  9. package/templates/base/execution/session_boot.py +218 -0
  10. package/templates/base/execution/session_init.py +320 -0
  11. package/templates/base/skill-creator/SKILL_skillcreator.md +23 -36
  12. package/templates/base/skill-creator/scripts/init_skill.py +18 -135
  13. package/templates/skills/ec/README.md +31 -0
  14. package/templates/skills/ec/aws/SKILL.md +1020 -0
  15. package/templates/skills/ec/aws/defaults.yaml +13 -0
  16. package/templates/skills/ec/aws/references/common_patterns.md +80 -0
  17. package/templates/skills/ec/aws/references/mcp_servers.md +98 -0
  18. package/templates/skills/ec/aws-terraform/SKILL.md +349 -0
  19. package/templates/skills/ec/aws-terraform/references/best_practices.md +394 -0
  20. package/templates/skills/ec/aws-terraform/references/checkov_reference.md +337 -0
  21. package/templates/skills/ec/aws-terraform/scripts/configure_mcp.py +150 -0
  22. package/templates/skills/ec/confluent-kafka/SKILL.md +655 -0
  23. package/templates/skills/ec/confluent-kafka/references/ansible_playbooks.md +792 -0
  24. package/templates/skills/ec/confluent-kafka/references/ec_deployment.md +579 -0
  25. package/templates/skills/ec/confluent-kafka/references/kraft_migration.md +490 -0
  26. package/templates/skills/ec/confluent-kafka/references/troubleshooting.md +778 -0
  27. package/templates/skills/ec/confluent-kafka/references/upgrade_7x_to_8x.md +488 -0
  28. package/templates/skills/ec/confluent-kafka/scripts/kafka_health_check.py +435 -0
  29. package/templates/skills/ec/confluent-kafka/scripts/upgrade_preflight.py +568 -0
  30. package/templates/skills/ec/confluent-kafka/scripts/validate_config.py +455 -0
  31. package/templates/skills/ec/consul/SKILL.md +427 -0
  32. package/templates/skills/ec/consul/references/acl_setup.md +168 -0
  33. package/templates/skills/ec/consul/references/ha_config.md +196 -0
  34. package/templates/skills/ec/consul/references/troubleshooting.md +267 -0
  35. package/templates/skills/ec/consul/references/upgrades.md +213 -0
  36. package/templates/skills/ec/consul/scripts/consul_health_report.py +530 -0
  37. package/templates/skills/ec/consul/scripts/consul_status.py +264 -0
  38. package/templates/skills/ec/consul/scripts/generate_values.py +170 -0
  39. package/templates/skills/ec/documentation/SKILL.md +351 -0
  40. package/templates/skills/ec/documentation/references/best_practices.md +201 -0
  41. package/templates/skills/ec/documentation/scripts/analyze_code.py +307 -0
  42. package/templates/skills/ec/documentation/scripts/detect_changes.py +460 -0
  43. package/templates/skills/ec/documentation/scripts/generate_changelog.py +312 -0
  44. package/templates/skills/ec/documentation/scripts/sync_docs.py +272 -0
  45. package/templates/skills/ec/documentation/scripts/update_skill_docs.py +366 -0
  46. package/templates/skills/ec/gitlab/SKILL.md +529 -0
  47. package/templates/skills/ec/gitlab/references/agent_installation.md +416 -0
  48. package/templates/skills/ec/gitlab/references/api_reference.md +508 -0
  49. package/templates/skills/ec/gitlab/references/gitops_flux.md +465 -0
  50. package/templates/skills/ec/gitlab/references/troubleshooting.md +518 -0
  51. package/templates/skills/ec/gitlab/scripts/generate_agent_values.py +329 -0
  52. package/templates/skills/ec/gitlab/scripts/gitlab_agent_status.py +414 -0
  53. package/templates/skills/ec/jira/SKILL.md +484 -0
  54. package/templates/skills/ec/jira/references/jql_reference.md +148 -0
  55. package/templates/skills/ec/jira/scripts/add_comment.py +91 -0
  56. package/templates/skills/ec/jira/scripts/bulk_log_work.py +124 -0
  57. package/templates/skills/ec/jira/scripts/create_ticket.py +162 -0
  58. package/templates/skills/ec/jira/scripts/get_ticket.py +191 -0
  59. package/templates/skills/ec/jira/scripts/jira_client.py +383 -0
  60. package/templates/skills/ec/jira/scripts/log_work.py +154 -0
  61. package/templates/skills/ec/jira/scripts/search_tickets.py +104 -0
  62. package/templates/skills/ec/jira/scripts/update_comment.py +67 -0
  63. package/templates/skills/ec/jira/scripts/update_ticket.py +161 -0
  64. package/templates/skills/ec/karpenter/SKILL.md +301 -0
  65. package/templates/skills/ec/karpenter/references/ec2nodeclasses.md +421 -0
  66. package/templates/skills/ec/karpenter/references/migration.md +396 -0
  67. package/templates/skills/ec/karpenter/references/nodepools.md +400 -0
  68. package/templates/skills/ec/karpenter/references/troubleshooting.md +359 -0
  69. package/templates/skills/ec/karpenter/scripts/generate_ec2nodeclass.py +187 -0
  70. package/templates/skills/ec/karpenter/scripts/generate_nodepool.py +245 -0
  71. package/templates/skills/ec/karpenter/scripts/karpenter_status.py +359 -0
  72. package/templates/skills/ec/opensearch/SKILL.md +720 -0
  73. package/templates/skills/ec/opensearch/references/ml_neural_search.md +576 -0
  74. package/templates/skills/ec/opensearch/references/operator.md +532 -0
  75. package/templates/skills/ec/opensearch/references/query_dsl.md +532 -0
  76. package/templates/skills/ec/opensearch/scripts/configure_mcp.py +148 -0
  77. package/templates/skills/ec/victoriametrics/SKILL.md +598 -0
  78. package/templates/skills/ec/victoriametrics/references/kubernetes.md +531 -0
  79. package/templates/skills/ec/victoriametrics/references/prometheus_migration.md +333 -0
  80. package/templates/skills/ec/victoriametrics/references/troubleshooting.md +442 -0
  81. package/templates/skills/knowledge/SKILLS_CATALOG.md +274 -4
  82. package/templates/skills/knowledge/intelligent-routing/SKILL.md +237 -164
  83. package/templates/skills/knowledge/parallel-agents/SKILL.md +345 -73
  84. package/templates/skills/knowledge/plugin-discovery/SKILL.md +582 -0
  85. package/templates/skills/knowledge/plugin-discovery/scripts/platform_setup.py +1083 -0
  86. package/templates/skills/knowledge/design-md/README.md +0 -34
  87. package/templates/skills/knowledge/design-md/SKILL.md +0 -193
  88. package/templates/skills/knowledge/design-md/examples/DESIGN.md +0 -154
  89. package/templates/skills/knowledge/notebooklm-mcp/SKILL.md +0 -71
  90. package/templates/skills/knowledge/notebooklm-mcp/assets/example_asset.txt +0 -24
  91. package/templates/skills/knowledge/notebooklm-mcp/references/api_reference.md +0 -34
  92. package/templates/skills/knowledge/notebooklm-mcp/scripts/example.py +0 -19
  93. package/templates/skills/knowledge/react-components/README.md +0 -36
  94. package/templates/skills/knowledge/react-components/SKILL.md +0 -53
  95. package/templates/skills/knowledge/react-components/examples/gold-standard-card.tsx +0 -80
  96. package/templates/skills/knowledge/react-components/package-lock.json +0 -231
  97. package/templates/skills/knowledge/react-components/package.json +0 -16
  98. package/templates/skills/knowledge/react-components/resources/architecture-checklist.md +0 -15
  99. package/templates/skills/knowledge/react-components/resources/component-template.tsx +0 -37
  100. package/templates/skills/knowledge/react-components/resources/stitch-api-reference.md +0 -14
  101. package/templates/skills/knowledge/react-components/resources/style-guide.json +0 -27
  102. package/templates/skills/knowledge/react-components/scripts/fetch-stitch.sh +0 -30
  103. package/templates/skills/knowledge/react-components/scripts/validate.js +0 -68
  104. package/templates/skills/knowledge/self-update/SKILL.md +0 -60
  105. package/templates/skills/knowledge/self-update/scripts/update_kit.py +0 -103
  106. package/templates/skills/knowledge/stitch-loop/README.md +0 -54
  107. package/templates/skills/knowledge/stitch-loop/SKILL.md +0 -235
  108. package/templates/skills/knowledge/stitch-loop/examples/SITE.md +0 -73
  109. package/templates/skills/knowledge/stitch-loop/examples/next-prompt.md +0 -25
  110. package/templates/skills/knowledge/stitch-loop/resources/baton-schema.md +0 -61
  111. package/templates/skills/knowledge/stitch-loop/resources/site-template.md +0 -104
@@ -0,0 +1,598 @@
1
+ ---
2
+ name: victoriametrics
3
+ description: VictoriaMetrics time-series database specialist covering deployment (bare metal, Docker, EKS/Kubernetes), cluster architecture (vminsert/vmselect/vmstorage), vmagent configuration, performance optimization, capacity planning, troubleshooting, monitoring, and Prometheus migration/compatibility. Use for any task involving: (1) Installing or upgrading VictoriaMetrics (single-node or cluster), (2) vmagent scraping and remote write configuration, (3) Capacity planning and resource optimization, (4) Prometheus to VictoriaMetrics migration with vmctl, (5) High availability and replication setup, (6) Kubernetes/EKS deployments with Helm or Operator, (7) MetricsQL queries and optimization, (8) Troubleshooting performance issues.
4
+ ---
5
+
6
+ # VictoriaMetrics Skill
7
+
8
+ Expert-level guidance for VictoriaMetrics time-series database deployment, optimization, and operations.
9
+
10
+ ## Quick Reference
11
+
12
+ | Component | Purpose |
13
+ | -------------------- | -------------------------------------------------- |
14
+ | `victoria-metrics` | Single-node all-in-one TSDB |
15
+ | `vmagent` | Metrics collection agent (scraping + remote write) |
16
+ | `vminsert` | Cluster write path (accepts data) |
17
+ | `vmselect` | Cluster read path (queries) |
18
+ | `vmstorage` | Cluster storage nodes |
19
+ | `vmalert` | Alerting and recording rules |
20
+ | `vmauth` | Auth proxy and load balancer |
21
+ | `vmctl` | Data migration tool |
22
+ | `vmbackup/vmrestore` | Backup and restore tools |
23
+
24
+ ---
25
+
26
+ ## Deployment Options
27
+
28
+ ### Single-Node (Bare Metal/VM)
29
+
30
+ ```bash
31
+ # Download latest release
32
+ wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.102.0/victoria-metrics-linux-amd64-v1.102.0.tar.gz
33
+ tar -xzf victoria-metrics-linux-amd64-v1.102.0.tar.gz
34
+
35
+ # Start with common flags
36
+ ./victoria-metrics-prod \
37
+ -storageDataPath=/var/lib/victoriametrics \
38
+ -retentionPeriod=90d \
39
+ -httpListenAddr=:8428 \
40
+ -selfScrapeInterval=10s
41
+ ```
42
+
43
+ ### Docker
44
+
45
+ ```bash
46
+ docker run -d \
47
+ --name victoriametrics \
48
+ -p 8428:8428 \
49
+ -v /data/victoriametrics:/victoria-metrics-data \
50
+ victoriametrics/victoria-metrics:v1.102.0 \
51
+ -storageDataPath=/victoria-metrics-data \
52
+ -retentionPeriod=90d
53
+ ```
54
+
55
+ ### Docker Compose
56
+
57
+ ```yaml
58
+ version: "3.8"
59
+ services:
60
+ victoriametrics:
61
+ image: victoriametrics/victoria-metrics:v1.102.0
62
+ ports:
63
+ - "8428:8428"
64
+ volumes:
65
+ - vmdata:/victoria-metrics-data
66
+ command:
67
+ - "-storageDataPath=/victoria-metrics-data"
68
+ - "-retentionPeriod=90d"
69
+ - "-selfScrapeInterval=10s"
70
+ restart: unless-stopped
71
+
72
+ vmagent:
73
+ image: victoriametrics/vmagent:v1.102.0
74
+ ports:
75
+ - "8429:8429"
76
+ volumes:
77
+ - ./prometheus.yml:/etc/prometheus/prometheus.yml
78
+ command:
79
+ - "-promscrape.config=/etc/prometheus/prometheus.yml"
80
+ - "-remoteWrite.url=http://victoriametrics:8428/api/v1/write"
81
+ restart: unless-stopped
82
+
83
+ volumes:
84
+ vmdata:
85
+ ```
86
+
87
+ ### Systemd Service
88
+
89
+ ```ini
90
+ # /etc/systemd/system/victoriametrics.service
91
+ [Unit]
92
+ Description=VictoriaMetrics
93
+ After=network.target
94
+
95
+ [Service]
96
+ Type=simple
97
+ User=victoriametrics
98
+ ExecStart=/usr/local/bin/victoria-metrics-prod \
99
+ -storageDataPath=/var/lib/victoriametrics \
100
+ -retentionPeriod=90d \
101
+ -httpListenAddr=:8428
102
+ Restart=always
103
+ RestartSec=5
104
+
105
+ [Install]
106
+ WantedBy=multi-user.target
107
+ ```
108
+
109
+ See `references/bare_metal.md` for complete installation guide.
110
+
111
+ ---
112
+
113
+ ## Kubernetes/EKS Deployment
114
+
115
+ ### Helm Installation
116
+
117
+ ```bash
118
+ # Add VictoriaMetrics Helm repo
119
+ helm repo add vm https://victoriametrics.github.io/helm-charts/
120
+ helm repo update
121
+
122
+ # Single-node deployment
123
+ helm install victoria-metrics vm/victoria-metrics-single \
124
+ --namespace monitoring \
125
+ --create-namespace \
126
+ --set server.persistentVolume.size=100Gi \
127
+ --set server.retentionPeriod=90d
128
+
129
+ # Cluster deployment
130
+ helm install victoria-metrics-cluster vm/victoria-metrics-cluster \
131
+ --namespace monitoring \
132
+ --create-namespace \
133
+ -f values-cluster.yaml
134
+ ```
135
+
136
+ ### Cluster Values (EKS)
137
+
138
+ ```yaml
139
+ # values-cluster.yaml
140
+ vminsert:
141
+ replicaCount: 2
142
+ resources:
143
+ requests:
144
+ cpu: "500m"
145
+ memory: "512Mi"
146
+ limits:
147
+ cpu: "2000m"
148
+ memory: "2Gi"
149
+
150
+ vmselect:
151
+ replicaCount: 2
152
+ resources:
153
+ requests:
154
+ cpu: "500m"
155
+ memory: "512Mi"
156
+ limits:
157
+ cpu: "2000m"
158
+ memory: "4Gi"
159
+ extraArgs:
160
+ search.maxConcurrentRequests: "16"
161
+
162
+ vmstorage:
163
+ replicaCount: 3
164
+ persistentVolume:
165
+ enabled: true
166
+ size: 200Gi
167
+ storageClass: gp3
168
+ resources:
169
+ requests:
170
+ cpu: "1000m"
171
+ memory: "4Gi"
172
+ limits:
173
+ cpu: "4000m"
174
+ memory: "8Gi"
175
+ extraArgs:
176
+ retentionPeriod: "90d"
177
+ dedup.minScrapeInterval: "15s"
178
+ ```
179
+
180
+ ### VictoriaMetrics Operator (Kubernetes)
181
+
182
+ ```bash
183
+ # Install operator
184
+ helm install vm-operator vm/victoria-metrics-operator \
185
+ --namespace monitoring \
186
+ --create-namespace
187
+ ```
188
+
189
+ ```yaml
190
+ # VMCluster CRD
191
+ apiVersion: operator.victoriametrics.com/v1beta1
192
+ kind: VMCluster
193
+ metadata:
194
+ name: production
195
+ namespace: monitoring
196
+ spec:
197
+ retentionPeriod: "90d"
198
+ replicationFactor: 2
199
+
200
+ vmstorage:
201
+ replicaCount: 3
202
+ storage:
203
+ volumeClaimTemplate:
204
+ spec:
205
+ storageClassName: gp3
206
+ resources:
207
+ requests:
208
+ storage: 200Gi
209
+ resources:
210
+ limits:
211
+ cpu: "4"
212
+ memory: "8Gi"
213
+ requests:
214
+ cpu: "1"
215
+ memory: "4Gi"
216
+
217
+ vmselect:
218
+ replicaCount: 2
219
+ resources:
220
+ limits:
221
+ cpu: "2"
222
+ memory: "4Gi"
223
+ requests:
224
+ cpu: "500m"
225
+ memory: "512Mi"
226
+
227
+ vminsert:
228
+ replicaCount: 2
229
+ resources:
230
+ limits:
231
+ cpu: "2"
232
+ memory: "2Gi"
233
+ requests:
234
+ cpu: "500m"
235
+ memory: "512Mi"
236
+ ```
237
+
238
+ See `references/kubernetes.md` for complete EKS patterns.
239
+
240
+ ---
241
+
242
+ ## vmagent Configuration
243
+
244
+ ### Basic Scrape Config
245
+
246
+ ```yaml
247
+ # prometheus.yml for vmagent
248
+ global:
249
+ scrape_interval: 15s
250
+ external_labels:
251
+ cluster: production
252
+ region: eu-west-1
253
+
254
+ scrape_configs:
255
+ - job_name: "kubernetes-pods"
256
+ kubernetes_sd_configs:
257
+ - role: pod
258
+ relabel_configs:
259
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
260
+ action: keep
261
+ regex: true
262
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
263
+ action: replace
264
+ target_label: __metrics_path__
265
+ regex: (.+)
266
+ - source_labels:
267
+ [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
268
+ action: replace
269
+ regex: ([^:]+)(?::\d+)?;(\d+)
270
+ replacement: $1:$2
271
+ target_label: __address__
272
+
273
+ - job_name: "node-exporter"
274
+ static_configs:
275
+ - targets: ["node-exporter:9100"]
276
+ ```
277
+
278
+ ### vmagent Flags
279
+
280
+ ```bash
281
+ vmagent \
282
+ -promscrape.config=/etc/prometheus/prometheus.yml \
283
+ -remoteWrite.url=http://vminsert:8480/insert/0/prometheus/api/v1/write \
284
+ -remoteWrite.tmpDataPath=/tmp/vmagent-remotewrite \
285
+ -remoteWrite.maxDiskUsagePerURL=1GB \
286
+ -remoteWrite.queues=8 \
287
+ -promscrape.cluster.membersCount=2 \
288
+ -promscrape.cluster.memberNum=0
289
+ ```
290
+
291
+ ### High Availability vmagent
292
+
293
+ ```yaml
294
+ # Deploy multiple vmagents with cluster mode
295
+ apiVersion: operator.victoriametrics.com/v1beta1
296
+ kind: VMAgent
297
+ metadata:
298
+ name: vmagent-ha
299
+ spec:
300
+ replicaCount: 2
301
+ extraArgs:
302
+ promscrape.cluster.membersCount: "2"
303
+ remoteWrite:
304
+ - url: http://vminsert:8480/insert/0/prometheus/api/v1/write
305
+ ```
306
+
307
+ ---
308
+
309
+ ## Prometheus Compatibility & Migration
310
+
311
+ ### Prometheus Remote Write to VM
312
+
313
+ ```yaml
314
+ # prometheus.yml
315
+ remote_write:
316
+ - url: http://victoriametrics:8428/api/v1/write
317
+ queue_config:
318
+ max_samples_per_send: 10000
319
+ capacity: 20000
320
+ max_shards: 30
321
+ ```
322
+
323
+ ### Migrate Historical Data with vmctl
324
+
325
+ ```bash
326
+ # From Prometheus to VictoriaMetrics
327
+ vmctl prometheus \
328
+ --prom-snapshot=/path/to/prometheus/data \
329
+ --vm-addr=http://victoriametrics:8428 \
330
+ --vm-concurrency=8
331
+
332
+ # From InfluxDB
333
+ vmctl influx \
334
+ --influx-addr=http://influxdb:8086 \
335
+ --influx-database=metrics \
336
+ --vm-addr=http://victoriametrics:8428
337
+
338
+ # Between VictoriaMetrics instances
339
+ vmctl vm-native \
340
+ --vm-native-src-addr=http://old-vm:8428 \
341
+ --vm-native-dst-addr=http://new-vm:8428 \
342
+ --vm-native-filter-time-start='2024-01-01T00:00:00Z'
343
+ ```
344
+
345
+ ### MetricsQL vs PromQL
346
+
347
+ | PromQL | MetricsQL Enhancement |
348
+ | ---------------------- | ------------------------------------------------- |
349
+ | `rate()` | `rate()` with auto-interval detection |
350
+ | `irate()` | `irate()` + `rollup_rate()` |
351
+ | `histogram_quantile()` | `histogram_quantile()` + `histogram_share()` |
352
+ | N/A | `rollup()` - aggregates multiple functions |
353
+ | N/A | `range_median()`, `range_first()`, `range_last()` |
354
+ | N/A | `label_set()`, `label_del()`, `label_keep()` |
355
+ | N/A | `limit_offset()` for pagination |
356
+
357
+ ### Query Compatibility
358
+
359
+ ```bash
360
+ # Enable Prometheus-compatible mode
361
+ victoria-metrics \
362
+ -search.latencyOffset=0s \
363
+ -search.disableCache=false
364
+ ```
365
+
366
+ See `references/prometheus_migration.md` for detailed migration guide.
367
+
368
+ ---
369
+
370
+ ## Capacity Planning
371
+
372
+ ### Resource Guidelines
373
+
374
+ | Metric | CPU (cores) | RAM | Storage |
375
+ | ------------------ | ----------- | ------- | ------------ |
376
+ | 100K active series | 0.5 | 1-2GB | ~5GB/month |
377
+ | 1M active series | 2-4 | 8-16GB | ~50GB/month |
378
+ | 10M active series | 8-16 | 32-64GB | ~500GB/month |
379
+ | 50M active series | 32+ | 128GB+ | ~2.5TB/month |
380
+
381
+ ### Recommended Spare Resources
382
+
383
+ - **CPU:** 50% spare for spikes
384
+ - **RAM:** 50% spare to prevent OOM
385
+ - **Disk:** 20% minimum free space
386
+
387
+ ### Storage Calculation
388
+
389
+ ```
390
+ Storage = (active_series × samples_per_day × bytes_per_sample × retention_days) / compression_ratio
391
+
392
+ Example:
393
+ 1M series × 5760 samples/day × 2 bytes × 90 days / 10 (compression) = ~100GB
394
+ ```
395
+
396
+ ### Cluster Sizing
397
+
398
+ | Workload | vminsert | vmselect | vmstorage |
399
+ | --------------------- | ----------- | ----------- | ------------ |
400
+ | Small (<1M series) | 2 replicas | 2 replicas | 3 replicas |
401
+ | Medium (1-10M series) | 3 replicas | 3 replicas | 5 replicas |
402
+ | Large (10M+ series) | 5+ replicas | 5+ replicas | 10+ replicas |
403
+
404
+ ---
405
+
406
+ ## Performance Optimization
407
+
408
+ ### Key Flags
409
+
410
+ ```bash
411
+ # Deduplication (for HA setups)
412
+ -dedup.minScrapeInterval=15s
413
+
414
+ # Query optimization
415
+ -search.maxConcurrentRequests=16
416
+ -search.maxQueueDuration=30s
417
+ -search.maxQueryLen=16KB
418
+ -search.maxPointsPerTimeseries=30000
419
+
420
+ # Memory limits
421
+ -memory.allowedPercent=80
422
+
423
+ # Cache tuning
424
+ -search.cacheTimestampOffset=5m
425
+
426
+ # Cardinality limits
427
+ -storage.maxHourlySeries=5000000
428
+ -storage.maxDailySeries=10000000
429
+ ```
430
+
431
+ ### Filesystem Recommendations
432
+
433
+ ```bash
434
+ # For >1TB storage, use these ext4 options
435
+ mkfs.ext4 -O 64bit,huge_file,extent -T huge /dev/sdb
436
+
437
+ # Mount options
438
+ /dev/sdb /var/lib/victoriametrics ext4 defaults,noatime,nodiratime 0 0
439
+ ```
440
+
441
+ ### OS Tuning
442
+
443
+ ```bash
444
+ # Increase open files limit
445
+ echo "victoriametrics soft nofile 65535" >> /etc/security/limits.conf
446
+ echo "victoriametrics hard nofile 65535" >> /etc/security/limits.conf
447
+
448
+ # Disable THP (Transparent Huge Pages)
449
+ echo never > /sys/kernel/mm/transparent_hugepage/enabled
450
+ ```
451
+
452
+ See `references/optimization.md` for detailed tuning guide.
453
+
454
+ ---
455
+
456
+ ## Monitoring & Alerting
457
+
458
+ ### Self-Monitoring
459
+
460
+ ```yaml
461
+ # Scrape VictoriaMetrics metrics
462
+ scrape_configs:
463
+ - job_name: "victoriametrics"
464
+ static_configs:
465
+ - targets: ["localhost:8428"]
466
+ ```
467
+
468
+ ### Key Metrics
469
+
470
+ | Metric | Alert Threshold |
471
+ | --------------------------------- | --------------- |
472
+ | `vm_free_disk_space_bytes` | < 20% of total |
473
+ | `process_resident_memory_bytes` | > 80% of limit |
474
+ | `vm_slow_row_inserts_total` | Rate > 0 |
475
+ | `vm_slow_metric_name_loads_total` | Rate > 0 |
476
+ | `vm_rows_inserted_total` | Sudden drops |
477
+ | `vm_http_request_errors_total` | Rate > 0 |
478
+
479
+ ### vmalert Rules
480
+
481
+ ```yaml
482
+ # alerting_rules.yml
483
+ groups:
484
+ - name: victoriametrics
485
+ rules:
486
+ - alert: VMDiskSpaceLow
487
+ expr: vm_free_disk_space_bytes / vm_available_disk_space_bytes < 0.2
488
+ for: 5m
489
+ labels:
490
+ severity: warning
491
+ annotations:
492
+ summary: "VictoriaMetrics disk space low"
493
+
494
+ - alert: VMSlowInserts
495
+ expr: rate(vm_slow_row_inserts_total[5m]) > 0
496
+ for: 5m
497
+ labels:
498
+ severity: warning
499
+ annotations:
500
+ summary: "VictoriaMetrics experiencing slow inserts - low RAM"
501
+
502
+ - alert: VMHighCardinality
503
+ expr: vm_new_timeseries_created_total > 1000000
504
+ for: 1h
505
+ labels:
506
+ severity: warning
507
+ annotations:
508
+ summary: "High cardinality detected"
509
+ ```
510
+
511
+ ---
512
+
513
+ ## Troubleshooting
514
+
515
+ ### Common Issues
516
+
517
+ | Issue | Diagnosis | Solution |
518
+ | ---------------- | -------------------------------- | ---------------------------------------- |
519
+ | Slow queries | Check `vm_slow_*` metrics | Increase RAM, add vmselect replicas |
520
+ | OOM crashes | Check memory usage trends | Reduce `-memory.allowedPercent`, add RAM |
521
+ | Disk full | Check `vm_free_disk_space_bytes` | Reduce retention, add storage |
522
+ | High cardinality | Use cardinality explorer | Add relabeling rules to drop labels |
523
+ | Insert delays | Check `vm_rows_pending` | Increase vminsert replicas |
524
+
525
+ ### Diagnostic Commands
526
+
527
+ ```bash
528
+ # Check active queries
529
+ curl http://localhost:8428/api/v1/status/active_queries
530
+
531
+ # Check top queries
532
+ curl http://localhost:8428/api/v1/status/top_queries
533
+
534
+ # Check TSDB stats
535
+ curl http://localhost:8428/api/v1/status/tsdb
536
+
537
+ # Force flush in-memory data
538
+ curl http://localhost:8428/internal/force_flush
539
+
540
+ # Create snapshot
541
+ curl http://localhost:8428/snapshot/create
542
+
543
+ # Check cardinality
544
+ curl 'http://localhost:8428/api/v1/status/tsdb?topN=10'
545
+ ```
546
+
547
+ ### Logs Analysis
548
+
549
+ ```bash
550
+ # Look for warnings
551
+ grep -i "warn\|error\|slow" /var/log/victoriametrics.log
552
+
553
+ # Common issues to look for:
554
+ # - "too many open files" → increase ulimits
555
+ # - "cannot allocate memory" → OOM, increase RAM
556
+ # - "slow" → RAM too low for cardinality
557
+ ```
558
+
559
+ See `references/troubleshooting.md` for complete diagnostic guide.
560
+
561
+ ---
562
+
563
+ ## Backup & Restore
564
+
565
+ ### Create Snapshot
566
+
567
+ ```bash
568
+ # Create snapshot
569
+ curl http://localhost:8428/snapshot/create
570
+ # Returns: {"status":"ok","snapshot":"20240120_123456"}
571
+
572
+ # Backup with vmbackup
573
+ vmbackup \
574
+ -storageDataPath=/var/lib/victoriametrics \
575
+ -snapshot.createURL=http://localhost:8428/snapshot/create \
576
+ -dst=s3://bucket/backups/$(date +%Y%m%d)
577
+ ```
578
+
579
+ ### Restore
580
+
581
+ ```bash
582
+ vmrestore \
583
+ -src=s3://bucket/backups/20240120 \
584
+ -storageDataPath=/var/lib/victoriametrics
585
+ ```
586
+
587
+ ---
588
+
589
+ ## References
590
+
591
+ - [VictoriaMetrics Docs](https://docs.victoriametrics.com/victoriametrics/)
592
+ - [Cluster Docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
593
+ - [vmagent Docs](https://docs.victoriametrics.com/victoriametrics/vmagent/)
594
+ - [vmctl Migration](https://docs.victoriametrics.com/victoriametrics/vmctl/)
595
+ - [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/)
596
+ - [Helm Charts](https://github.com/VictoriaMetrics/helm-charts)
597
+ - [Operator](https://github.com/VictoriaMetrics/operator)
598
+ - See `references/` for detailed guides